1 files changed, 125 insertions, 162 deletions
diff --git a/lua/lexers/rest.lua b/lua/lexers/rest.lua
index e7bf467..9340fc2 100644
--- a/lua/lexers/rest.lua
+++ b/lua/lexers/rest.lua
@@ -1,54 +1,14 @@
--- Copyright 2006-2022 Mitchell. See LICENSE.
+-- Copyright 2006-2024 Mitchell. See LICENSE.
 -- reStructuredText LPeg lexer.
 
-local l = require('lexer')
-local token, word_match, starts_line = l.token, l.word_match, l.starts_line
+local lexer = require('lexer')
+local token, word_match, starts_line = lexer.token, lexer.word_match, lexer.starts_line
 local P, S = lpeg.P, lpeg.S
 
-local M = {_NAME = 'rest'}
-
--- Whitespace.
-local ws = token(l.WHITESPACE, S(' \t')^1 + l.newline^1)
-local any_indent = S(' \t')^0
-
--- Section titles (2 or more characters).
-local adornment_chars = lpeg.C(S('!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'))
-local adornment = lpeg.C(adornment_chars^2 * any_indent) * (l.newline + -1)
-local overline = lpeg.Cmt(starts_line(adornment), function(input, index, adm, c)
-  if not adm:find('^%' .. c .. '+%s*$') then return nil end
-  local rest = input:sub(index)
-  local lines = 1
-  for line, e in rest:gmatch('([^\r\n]+)()') do
-    if lines > 1 and line:match('^(%' .. c .. '+)%s*$') == adm then
-      return index + e - 1
-    end
-    if lines > 3 or #line > #adm then return nil end
-    lines = lines + 1
-  end
-  return #input + 1
-end)
-local underline = lpeg.Cmt(starts_line(adornment), function(_, index, adm, c)
-  local pos = adm:match('^%' .. c .. '+%s*()$')
-  return pos and index - #adm + pos - 1 or nil
-end)
--- Token needs to be a predefined one in order for folder to work.
-local title = token(l.CONSTANT, overline + underline)
-
--- Lists.
-local bullet_list = S('*+-') -- TODO: '•‣⁃', as lpeg does not support UTF-8
-local enum_list = P('(')^-1 *
-  (l.digit^1 + S('ivxlcmIVXLCM')^1 + l.alnum + '#') * S('.)')
-local field_list = ':' * (l.any - ':')^1 * P(':')^-1
-local option_word = l.alnum * (l.alnum + '-')^0
-local option = S('-/') * option_word * (' ' * option_word)^-1 +
-  '--' * option_word * ('=' * option_word)^-1
-local option_list = option * (',' * l.space^1 * option)^-1
-local list = #(l.space^0 * (S('*+-:/') + enum_list)) *
-  starts_line(token('list', l.space^0 *
-    (option_list + bullet_list + enum_list + field_list) * l.space))
+local lex = lexer.new('rest')
 
 -- Literal block.
-local block = P('::') * (l.newline + -1) * function(input, index)
+local block = '::' * (lexer.newline + -1) * function(input, index)
   local rest = input:sub(index)
   local level, quote = #rest:match('^([ \t]*)')
   for pos, indent, line in rest:gmatch('()[ \t]*()([^\r\n]+)') do
@@ -59,33 +19,61 @@ local block = P('::') * (l.newline + -1) * function(input, index)
   end
   return #input + 1
 end
-local literal_block = token('literal_block', block)
+lex:add_rule('literal_block', token('literal_block', block))
+lex:add_style('literal_block', lexer.styles.embedded .. {eolfilled = true})
 
--- Line block.
-local line_block_char = token(l.OPERATOR, starts_line(any_indent * '|'))
+-- Lists.
+local option_word = lexer.alnum * (lexer.alnum + '-')^0
+local option = S('-/') * option_word * (' ' * option_word)^-1 +
+  ('--' * option_word * ('=' * option_word)^-1)
+local option_list = option * (',' * lexer.space^1 * option)^-1
+local bullet_list = S('*+-') -- TODO: '•‣⁃', as lpeg does not support UTF-8
+local enum_list = P('(')^-1 * (lexer.digit^1 + S('ivxlcmIVXLCM')^1 + lexer.alnum + '#') * S('.)')
+local field_list = ':' * (lexer.any - ':')^1 * P(':')^-1
+lex:add_rule('list', #(lexer.space^0 * (S('*+-:/') + enum_list)) *
+  starts_line(token(lexer.LIST,
+    lexer.space^0 * (option_list + bullet_list + enum_list + field_list) * lexer.space)))
 
-local word = l.alpha * (l.alnum + S('-.+'))^0
+local any_indent = S(' \t')^0
+local word = lexer.alpha * (lexer.alnum + S('-.+'))^0
+local prefix = any_indent * '.. '
 
 -- Explicit markup blocks.
-local prefix = any_indent * '.. '
-local footnote_label = '[' * (l.digit^1 + '#' * word^-1 + '*') * ']'
-local footnote = token('footnote_block', prefix * footnote_label * l.space)
+local footnote_label = '[' * (lexer.digit^1 + '#' * word^-1 + '*') * ']'
+local footnote = token('footnote_block', prefix * footnote_label * lexer.space)
 local citation_label = '[' * word * ']'
-local citation = token('citation_block', prefix * citation_label * l.space)
+local citation = token('citation_block', prefix * citation_label * lexer.space)
 local link = token('link_block', prefix * '_' *
-  (l.range('`') + (P('\\') * 1 + l.nonnewline - ':')^1) * ':' * l.space)
-local markup_block = #prefix * starts_line(footnote + citation + link)
+  (lexer.range('`') + (P('\\') * 1 + lexer.nonnewline - ':')^1) * ':' * lexer.space)
+lex:add_rule('markup_block', #prefix * starts_line(footnote + citation + link))
+lex:add_style('footnote_block', lexer.styles.label)
+lex:add_style('citation_block', lexer.styles.label)
+lex:add_style('link_block', lexer.styles.label)
+
+-- Sphinx code block.
+local indented_block = function(input, index)
+  local rest = input:sub(index)
+  local level = #rest:match('^([ \t]*)')
+  for pos, indent, line in rest:gmatch('()[ \t]*()([^\r\n]+)') do
+    if indent - pos < level and line ~= ' ' or level == 0 and pos > 1 then return index + pos - 1 end
+  end
+  return #input + 1
+end
+local code_block =
+  prefix * 'code-block::' * S(' \t')^1 * lexer.nonnewline^0 * (lexer.newline + -1) * indented_block
+lex:add_rule('code_block', #prefix * token('code_block', starts_line(code_block)))
+lex:add_style('code_block', lexer.styles.embedded .. {eolfilled = true})
 
 -- Directives.
-local directive_type = word_match({
+local known_directive = token('directive', prefix * word_match{
   -- Admonitions
-  'attention', 'caution', 'danger', 'error', 'hint', 'important', 'note', 'tip',
-  'warning', 'admonition',
+  'attention', 'caution', 'danger', 'error', 'hint', 'important', 'note', 'tip', 'warning',
+  'admonition',
   -- Images
   'image', 'figure',
   -- Body elements
-  'topic', 'sidebar', 'line-block', 'parsed-literal', 'code', 'math', 'rubric',
-  'epigraph', 'highlights', 'pull-quote', 'compound', 'container',
+  'topic', 'sidebar', 'line-block', 'parsed-literal', 'code', 'math', 'rubric', 'epigraph',
+  'highlights', 'pull-quote', 'compound', 'container',
   -- Table
   'table', 'csv-table', 'list-table',
   -- Document parts
@@ -97,128 +85,101 @@ local directive_type = word_match({
   -- Directives for substitution definitions
   'replace', 'unicode', 'date',
   -- Miscellaneous
-  'include', 'raw', 'class', 'role', 'default-role', 'title',
-  'restructuredtext-test-directive',
-}, '-')
-local known_directive = token('directive', prefix * directive_type * '::' *
-  l.space)
-local sphinx_directive_type = word_match({
+  'include', 'raw', 'class', 'role', 'default-role', 'title', 'restructuredtext-test-directive'
+} * '::' * lexer.space)
+local sphinx_directive = token('sphinx_directive', prefix * word_match{
   -- The TOC tree.
   'toctree',
   -- Paragraph-level markup.
-  'note', 'warning', 'versionadded', 'versionchanged', 'deprecated', 'seealso',
-  'rubric', 'centered', 'hlist', 'glossary', 'productionlist',
+  'note', 'warning', 'versionadded', 'versionchanged', 'deprecated', 'seealso', 'rubric',
+  'centered', 'hlist', 'glossary', 'productionlist',
   -- Showing code examples.
   'highlight', 'literalinclude',
   -- Miscellaneous
   'sectionauthor', 'index', 'only', 'tabularcolumns'
-}, '-')
-local sphinx_directive = token('sphinx_directive', prefix *
-  sphinx_directive_type * '::' * l.space)
-local unknown_directive = token('unknown_directive', prefix * word * '::' *
-  l.space)
-local directive = #prefix * starts_line(known_directive + sphinx_directive +
-  unknown_directive)
+} * '::' * lexer.space)
+local unknown_directive = token('unknown_directive', prefix * word * '::' * lexer.space)
+lex:add_rule('directive',
+  #prefix * starts_line(known_directive + sphinx_directive + unknown_directive))
+lex:add_style('directive', lexer.styles.keyword)
+lex:add_style('sphinx_directive', lexer.styles.keyword .. {bold = true})
+lex:add_style('unknown_directive', lexer.styles.keyword .. {italics = true})
 
--- Sphinx code block.
-local indented_block = function(input, index)
+-- Substitution definitions.
+lex:add_rule('substitution', #prefix * token('substitution', starts_line(prefix * lexer.range('|') *
+  lexer.space^1 * word * '::' * lexer.space)))
+lex:add_style('substitution', lexer.styles.variable)
+
+-- Comments.
+local line_comment = lexer.to_eol(prefix)
+local bprefix = any_indent * '..'
+local block_comment = bprefix * lexer.newline * indented_block
+lex:add_rule('comment', #bprefix * token(lexer.COMMENT, starts_line(line_comment + block_comment)))
+
+-- Section titles (2 or more characters).
+local adornment_chars = lpeg.C(S('!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'))
+local adornment = lpeg.C(adornment_chars^2 * any_indent) * (lexer.newline + -1)
+local overline = lpeg.Cmt(starts_line(adornment), function(input, index, adm, c)
+  if not adm:find('^%' .. c .. '+%s*$') then return nil end
   local rest = input:sub(index)
-  local level = #rest:match('^([ \t]*)')
-  for pos, indent, line in rest:gmatch('()[ \t]*()([^\r\n]+)') do
-    if indent - pos < level and line ~= ' ' or level == 0 and pos > 1 then
-      return index + pos - 1
-    end
+  local lines = 1
+  for line, e in rest:gmatch('([^\r\n]+)()') do
+    if lines > 1 and line:match('^(%' .. c .. '+)%s*$') == adm then return index + e - 1 end
+    if lines > 3 or #line > #adm then return nil end
+    lines = lines + 1
   end
   return #input + 1
-end
-local code_block = prefix * 'code-block::' * S(' \t')^1 * l.nonnewline^0 *
-  (l.newline + -1) * indented_block
-local sphinx_block = #prefix * token('code_block', starts_line(code_block))
+end)
+local underline = lpeg.Cmt(starts_line(adornment), function(_, index, adm, c)
+  local pos = adm:match('^%' .. c .. '+%s*()$')
+  return pos and index - #adm + pos - 1 or nil
+end)
+-- Token needs to be a predefined one in order for folder to work.
+lex:add_rule('title', token(lexer.HEADING, overline + underline))
 
--- Substitution definitions.
-local substitution = #prefix * token('substitution',
-  starts_line(prefix * l.range('|') * l.space^1 * word * '::' * l.space))
+-- Line block.
+lex:add_rule('line_block_char', token(lexer.OPERATOR, starts_line(any_indent * '|')))
 
--- Comments.
-local line_comment = l.to_eol(prefix)
-local bprefix = any_indent * '..'
-local block_comment = bprefix * l.newline * indented_block
-local comment = #bprefix * token(l.COMMENT, starts_line(line_comment +
-  block_comment))
+-- Whitespace.
+lex:add_rule('whitespace', token(lexer.WHITESPACE, S(' \t')^1 + lexer.newline^1))
 
 -- Inline markup.
-local em = token('em', l.range('*'))
-local strong = token('strong', l.range('**', '**'))
+local strong = token(lexer.BOLD, lexer.range('**'))
+local em = token(lexer.ITALIC, lexer.range('*'))
+local inline_literal = token('inline_literal', lexer.range('``'))
+local postfix_link = (word + lexer.range('`')) * '_' * P('_')^-1
+local prefix_link = '_' * lexer.range('`')
+local link_ref = token(lexer.LINK, postfix_link + prefix_link)
 local role = token('role', ':' * word * ':' * (word * ':')^-1)
-local interpreted = role^-1 * token('interpreted', l.range('`')) * role^-1
-local inline_literal = token('inline_literal', l.range('``', '``'))
-local postfix_link = (word + l.range('`')) * '_' * P('_')^-1
-local prefix_link = '_' * l.range('`')
-local link_ref = token('link', postfix_link + prefix_link)
-local footnote_ref = token('footnote', footnote_label * '_')
-local citation_ref = token('citation', citation_label * '_')
-local substitution_ref = token('substitution', l.range('|', true) *
-  ('_' * P('_')^-1)^-1)
-local link = token('link', l.alpha * (l.alnum + S('-.'))^1 * ':' *
-  (l.alnum + S('/.+-%@'))^1)
-local inline_markup = (strong + em + inline_literal + link_ref + interpreted +
-  footnote_ref + citation_ref + substitution_ref + link) * -l.alnum
+local interpreted = role^-1 * token('interpreted', lexer.range('`')) * role^-1
+local footnote_ref = token(lexer.REFERENCE, footnote_label * '_')
+local citation_ref = token(lexer.REFERENCE, citation_label * '_')
+local substitution_ref = token('substitution', lexer.range('|', true) * ('_' * P('_')^-1)^-1)
+local link = token(lexer.LINK,
+  lexer.alpha * (lexer.alnum + S('-.'))^1 * ':' * (lexer.alnum + S('/.+-%@'))^1)
+lex:add_rule('inline_markup',
+  (strong + em + inline_literal + link_ref + interpreted + footnote_ref + citation_ref +
+    substitution_ref + link) * -lexer.alnum)
+lex:add_style('inline_literal', lexer.styles.embedded)
+lex:add_style('role', lexer.styles.class)
+lex:add_style('interpreted', lexer.styles.string)
 
 -- Other.
-local non_space = token(l.DEFAULT, l.alnum * (l.any - l.space)^0)
-local escape = token(l.DEFAULT, '\\' * l.any)
-
-M._rules = {
-  {'literal_block', literal_block},
-  {'list', list},
-  {'markup_block', markup_block},
-  {'code_block', sphinx_block},
-  {'directive', directive},
-  {'substitution', substitution},
-  {'comment', comment},
-  {'title', title},
-  {'line_block_char', line_block_char},
-  {'whitespace', ws},
-  {'inline_markup', inline_markup},
-  {'non_space', non_space},
-  {'escape', escape}
-}
-
-M._tokenstyles = {
-  list = l.STYLE_TYPE,
-  literal_block = l.STYLE_EMBEDDED .. ',eolfilled',
-  footnote_block = l.STYLE_LABEL,
-  citation_block = l.STYLE_LABEL,
-  link_block = l.STYLE_LABEL,
-  directive = l.STYLE_KEYWORD,
-  sphinx_directive = l.STYLE_KEYWORD .. ',bold',
-  unknown_directive = l.STYLE_KEYWORD .. ',italics',
-  code_block = l.STYLE_EMBEDDED .. ',eolfilled',
-  substitution = l.STYLE_VARIABLE,
-  strong = 'bold',
-  em = 'italics',
-  role = l.STYLE_CLASS,
-  interpreted = l.STYLE_STRING,
-  inline_literal = l.STYLE_EMBEDDED,
-  link = 'underlined',
-  footnote = 'underlined',
-  citation = 'underlined',
-}
+lex:add_rule('non_space', token(lexer.DEFAULT, lexer.alnum * (lexer.any - lexer.space)^0))
+lex:add_rule('escape', token(lexer.DEFAULT, '\\' * lexer.any))
 
+-- Section-based folding.
 local sphinx_levels = {
   ['#'] = 0, ['*'] = 1, ['='] = 2, ['-'] = 3, ['^'] = 4, ['"'] = 5
 }
 
--- Section-based folding.
-M._fold = function(text, start_pos, start_line, start_level)
+function lex:fold(text, start_pos, start_line, start_level)
   local folds, line_starts = {}, {}
-  for pos in (text .. '\n'):gmatch('().-\r?\n') do
-    line_starts[#line_starts + 1] = pos
-  end
-  local style_at, CONSTANT, level = l.style_at, l.CONSTANT, start_level
-  local sphinx = l.property_int['fold.by.sphinx.convention'] > 0
-  local FOLD_BASE = l.FOLD_BASE
-  local FOLD_HEADER, FOLD_BLANK = l.FOLD_HEADER, l.FOLD_BLANK
+  for pos in (text .. '\n'):gmatch('().-\r?\n') do line_starts[#line_starts + 1] = pos end
+  local style_at, CONSTANT, level = lexer.style_at, lexer.CONSTANT, start_level
+  local sphinx = lexer.property_int['fold.scintillua.rest.by.sphinx.convention'] > 0
+  local FOLD_BASE = lexer.FOLD_BASE
+  local FOLD_HEADER, FOLD_BLANK = lexer.FOLD_HEADER, lexer.FOLD_BLANK
   for i = 1, #line_starts do
     local pos, next_pos = line_starts[i], line_starts[i + 1]
     local c = text:sub(pos, pos)
@@ -237,16 +198,18 @@ M._fold = function(text, start_pos, start_line, start_level)
   return folds
 end
 
-l.property['fold.by.sphinx.convention'] = '0'
+-- lexer.property['fold.by.sphinx.convention'] = '0'
 
 --[[ Embedded languages.
-local bash = l.load('bash')
+local bash = lexer.load('bash')
 local bash_indent_level
 local start_rule =
-  #(prefix * 'code-block' * '::' * l.space^1 * 'bash' * (l.newline + -1)) *
+  #(prefix * 'code-block' * '::' * lexer.space^1 * 'bash' * (lexer.newline + -1)) *
   sphinx_directive * token('bash_begin', P(function(input, index)
     bash_indent_level = #input:match('^([ \t]*)', index)
     return index
   end))]]
 
-return M
+lexer.property['scintillua.comment'] = '.. '
+
+return lex