Resync the lexers with Scintillua

- Resync the lexers with Scintillua - Update the lexer readme - Update `zenburn` theme to fix some highlighting issues - lexers: redirect print function to vis:info() - Fix support for custom style names - As per error message "lexer.delimited_range() is deprecated, use lexer.range()". - Remove remaining `lexer.delimited_range()` call - Set syntax to `nil` if the file type has no matching lexer - Updated Go lexer for Go 1.18. - lexers/dsv: convert to new lexer format (cherry picked from commit 9edbc3cd9ea1d7142b1305840432a3d2739e755a) - lexers/gemini: disable legacy gemini lexer This reverts commit 468f9ee1b027a7ce98b1a249fa1af5888feeb989. It is in legacy format and of questionable quality. Ideally it should be contributed upstream from where it will eventually trickle down to us. - lexers/git-rebase: convert to new lexer format (cherry picked from commit 4000a4cc9ac4a4c2869dfae772b977a82aee8d8c) - lexers/strace: convert to new lexer format (cherry picked from commit e420451320d97eb164f5629c1bcfab0b595be29d) - lexers/typescript: add new upstream lexer revision 28e2b60 (cherry picked from commit 7326e6deecdaa75fa94ae9ebdb653f9f907b33f2) - use `package.searchpath` instead of a local `searchpath` function - Restore `filetype: support filetype detection via hashbang` - Remove redundant comment - Restore gemini lexer
author: qiu-x <alex@alexslomka.xyz> 2022-06-29 07:56:51 +0200
committer: Felix Van der Jeugt <felix.vanderjeugt@posteo.net> 2022-11-29 21:57:18 +0100
commit: 8a420ecc4c1ed50111464ec66901bd983eaf2dbd (patch)
tree: f31d2186cafaee6e7f18d32fe99144c3e8148c00 /lua/lexers/markdown.lua
parent: 981b90a203484182feace48471fe2b53dae7676f (diff)
download: vis-8a420ecc4c1ed50111464ec66901bd983eaf2dbd.tar.gz
vis-8a420ecc4c1ed50111464ec66901bd983eaf2dbd.tar.xz
1 files changed, 94 insertions, 99 deletions
diff --git a/lua/lexers/markdown.lua b/lua/lexers/markdown.lua
index fe57a1b..cbd4ba2 100644
--- a/lua/lexers/markdown.lua
+++ b/lua/lexers/markdown.lua
@@ -1,109 +1,104 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
 -- Markdown LPeg lexer.
 
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
 
-local M = {_NAME = 'markdown'}
-
--- Whitespace.
-local ws = token(l.WHITESPACE, S(' \t')^1 + S('\v\r\n')^1)
+local lex = lexer.new('markdown')
 
 -- Block elements.
-local header = token('h6', l.starts_line('######') * l.nonnewline^0) +
-               token('h5', l.starts_line('#####') * l.nonnewline^0) +
-               token('h4', l.starts_line('####') * l.nonnewline^0) +
-               token('h3', l.starts_line('###') * l.nonnewline^0) +
-               token('h2', l.starts_line('##') * l.nonnewline^0) +
-               token('h1', l.starts_line('#') * l.nonnewline^0)
-
-local blockquote = token(l.STRING,
-                         lpeg.Cmt(l.starts_line(S(' \t')^0 * '>'),
-                                  function(input, index)
-                                    local _, e = input:find('\n[ \t]*\r?\n',
-                                                            index)
-                                    return (e or #input) + 1
-                                  end))
-
-local blockcode = token('code', l.starts_line(P(' ')^4 + P('\t')) * -P('<') *
-                                l.nonnewline^0)
-
-local hr = token('hr', lpeg.Cmt(l.starts_line(S(' \t')^0 * lpeg.C(S('*-_'))),
-                                function(input, index, c)
-                                  local line = input:match('[^\n]*', index)
-                                  line = line:gsub('[ \t]', '')
-                                  if line:find('[^'..c..']') or #line < 2 then
-                                    return nil
-                                  end
-                                  return (input:find('\n', index) or #input) + 1
-                                end))
+local function h(n) return token('h' .. n, lexer.to_eol(lexer.starts_line(string.rep('#', n)))) end
+lex:add_rule('header', h(6) + h(5) + h(4) + h(3) + h(2) + h(1))
+local font_size = tonumber(lexer.property_expanded['style.default']:match('size:(%d+)')) or 10
+local function add_header_style(n)
+  lex:add_style('h' .. n, {fore = lexer.colors.red, size = (font_size + (6 - n))})
+end
+for i = 1, 6 do add_header_style(i) end
+
+lex:add_rule('blockquote',
+  token(lexer.STRING, lpeg.Cmt(lexer.starts_line(S(' \t')^0 * '>'), function(input, index)
+    local _, e = input:find('\n[ \t]*\r?\n', index)
+    return (e or #input) + 1
+  end)))
+
+lex:add_rule('list', token('list',
+  lexer.starts_line(S(' \t')^0 * (S('*+-') + lexer.digit^1 * '.')) * S(' \t')))
+lex:add_style('list', lexer.styles.constant)
+
+local hspace = S('\t\v\f\r ')
+local blank_line = '\n' * hspace^0 * ('\n' + P(-1))
+
+local code_line = lexer.to_eol(lexer.starts_line(P(' ')^4 + '\t') * -P('<')) * lexer.newline^-1
+local code_block = lexer.range(lexer.starts_line('```'), '\n```' * hspace^0 * ('\n' + P(-1)))
+local code_inline = lpeg.Cmt(lpeg.C(P('`')^1), function(input, index, bt)
+  -- `foo`, ``foo``, ``foo`bar``, `foo``bar` are all allowed.
+  local _, e = input:find('[^`]' .. bt .. '%f[^`]', index)
+  return (e or #input) + 1
+end)
+lex:add_rule('block_code', token('code', code_line + code_block + code_inline))
+lex:add_style('code', lexer.styles.embedded .. {eolfilled = true})
+
+lex:add_rule('hr',
+  token('hr', lpeg.Cmt(lexer.starts_line(S(' \t')^0 * lpeg.C(S('*-_'))), function(input, index, c)
+    local line = input:match('[^\r\n]*', index):gsub('[ \t]', '')
+    if line:find('[^' .. c .. ']') or #line < 2 then return nil end
+    return (select(2, input:find('\r?\n', index)) or #input) + 1
+  end)))
+lex:add_style('hr', {back = lexer.colors.black, eolfilled = true})
+
+-- Whitespace.
+local ws = token(lexer.WHITESPACE, S(' \t')^1 + S('\v\r\n')^1)
+lex:add_rule('whitespace', ws)
 
 -- Span elements.
-local dq_str = token(l.STRING, l.delimited_range('"', false, true))
-local sq_str = token(l.STRING, l.delimited_range("'", false, true))
-local paren_str = token(l.STRING, l.delimited_range('()'))
-local link = token('link', P('!')^-1 * l.delimited_range('[]') *
-                           (P('(') * (l.any - S(') \t'))^0 *
-                            (S(' \t')^1 *
-                             l.delimited_range('"', false, true))^-1 * ')' +
-                            S(' \t')^0 * l.delimited_range('[]')) +
-                           P('http://') * (l.any - l.space)^1)
-local link_label = token('link_label', l.delimited_range('[]') * ':') * ws *
-                   token('link_url', (l.any - l.space)^1) *
-                   (ws * (dq_str + sq_str + paren_str))^-1
-
-local strong = token('strong', (P('**') * (l.any - '**')^0 * P('**')^-1) +
-                               (P('__') * (l.any - '__')^0 * P('__')^-1))
-local em = token('em',
-                 l.delimited_range('*', true) + l.delimited_range('_', true))
-local code = token('code', (P('``') * (l.any - '``')^0 * P('``')^-1) +
-                           l.delimited_range('`', true, true))
-
-local escape = token(l.DEFAULT, P('\\') * 1)
-
-local list = token('list',
-                   l.starts_line(S(' \t')^0 * (S('*+-') + R('09')^1 * '.')) *
-                   S(' \t'))
-
-M._rules = {
-  {'header', header},
-  {'list', list},
-  {'blockquote', blockquote},
-  {'blockcode', blockcode},
-  {'hr', hr},
-  {'whitespace', ws},
-  {'link_label', link_label},
-  {'escape', escape},
-  {'link', link},
-  {'strong', strong},
-  {'em', em},
-  {'code', code},
-}
-
-local font_size = 10
-local hstyle = 'fore:red'
-M._tokenstyles = {
-  h6 = hstyle,
-  h5 = hstyle..',size:'..(font_size + 1),
-  h4 = hstyle..',size:'..(font_size + 2),
-  h3 = hstyle..',size:'..(font_size + 3),
-  h2 = hstyle..',size:'..(font_size + 4),
-  h1 = hstyle..',size:'..(font_size + 5),
-  code = l.STYLE_EMBEDDED..',eolfilled',
-  hr = l.STYLE_DEFAULT..',bold',
-  link = 'underlined',
-  link_url = 'underlined',
-  link_label = l.STYLE_LABEL,
-  strong = 'bold',
-  em = 'italics',
-  list = l.STYLE_CONSTANT,
-}
+lex:add_rule('escape', token(lexer.DEFAULT, P('\\') * 1))
+
+local ref_link_label = token('link_label', lexer.range('[', ']', true) * ':')
+local ref_link_url = token('link_url', (lexer.any - lexer.space)^1)
+local ref_link_title = token(lexer.STRING, lexer.range('"', true, false) +
+  lexer.range("'", true, false) + lexer.range('(', ')', true))
+lex:add_rule('link_label', ref_link_label * ws * ref_link_url * (ws * ref_link_title)^-1)
+lex:add_style('link_label', lexer.styles.label)
+lex:add_style('link_url', {underlined = true})
+
+local link_label = P('!')^-1 * lexer.range('[', ']', true)
+local link_target =
+  '(' * (lexer.any - S(') \t'))^0 * (S(' \t')^1 * lexer.range('"', false, false))^-1 * ')'
+local link_ref = S(' \t')^0 * lexer.range('[', ']', true)
+local link_url = 'http' * P('s')^-1 * '://' * (lexer.any - lexer.space)^1 +
+  ('<' * lexer.alpha^2 * ':' * (lexer.any - lexer.space - '>')^1 * '>')
+lex:add_rule('link', token('link', link_label * (link_target + link_ref) + link_url))
+lex:add_style('link', {underlined = true})
+
+local punct_space = lexer.punct + lexer.space
+
+-- Handles flanking delimiters as described in
+-- https://github.github.com/gfm/#emphasis-and-strong-emphasis in the cases where simple
+-- delimited ranges are not sufficient.
+local function flanked_range(s, not_inword)
+  local fl_char = lexer.any - s - lexer.space
+  local left_fl = lpeg.B(punct_space - s) * s * #fl_char + s * #(fl_char - lexer.punct)
+  local right_fl = lpeg.B(lexer.punct) * s * #(punct_space - s) + lpeg.B(fl_char) * s
+  return left_fl * (lexer.any - blank_line - (not_inword and s * #punct_space or s))^0 * right_fl
+end
+
+local asterisk_strong = flanked_range('**')
+local underscore_strong = (lpeg.B(punct_space) + #lexer.starts_line('_')) *
+  flanked_range('__', true) * #(punct_space + -1)
+lex:add_rule('strong', token('strong', asterisk_strong + underscore_strong))
+lex:add_style('strong', {bold = true})
+
+local asterisk_em = flanked_range('*')
+local underscore_em = (lpeg.B(punct_space) + #lexer.starts_line('_')) * flanked_range('_', true) *
+  #(punct_space + -1)
+lex:add_rule('em', token('em', asterisk_em + underscore_em))
+lex:add_style('em', {italics = true})
 
 -- Embedded HTML.
-local html = l.load('html')
-local start_rule = token('tag', l.starts_line(S(' \t')^0 * '<'))
-local end_rule = token(l.DEFAULT, P('\n')) -- TODO: l.WHITESPACE causes errors
-l.embed_lexer(M, html, start_rule, end_rule)
+local html = lexer.load('html')
+local start_rule = lexer.starts_line(P(' ')^-3) * #P('<') * html:get_rule('element') -- P(' ')^4 starts code_line
+local end_rule = token(lexer.DEFAULT, blank_line) -- TODO: lexer.WHITESPACE errors
+lex:embed(html, start_rule, end_rule)
 
-return M
+return lex
author	qiu-x <alex@alexslomka.xyz>	2022-06-29 07:56:51 +0200
committer	Felix Van der Jeugt <felix.vanderjeugt@posteo.net>	2022-11-29 21:57:18 +0100
commit	8a420ecc4c1ed50111464ec66901bd983eaf2dbd (patch)
tree	f31d2186cafaee6e7f18d32fe99144c3e8148c00 /lua/lexers/markdown.lua
parent	981b90a203484182feace48471fe2b53dae7676f (diff)
download	vis-8a420ecc4c1ed50111464ec66901bd983eaf2dbd.tar.gz vis-8a420ecc4c1ed50111464ec66901bd983eaf2dbd.tar.xz