1 files changed, 150 insertions, 212 deletions
diff --git a/lua/lexers/scheme.lua b/lua/lexers/scheme.lua
index 681f2fd..a19fa0f 100644
--- a/lua/lexers/scheme.lua
+++ b/lua/lexers/scheme.lua
@@ -1,236 +1,174 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
 -- Scheme LPeg lexer.
+-- Contributions by Murray Calavera.
 
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
 
-local M = {_NAME = 'scheme'}
+local lex = lexer.new('scheme')
 
 -- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local line_comment = ';' * l.nonnewline^0
-local block_comment = l.nested_pair('#|', '|#')
--- TODO: this should handle any datum and take into account "#\)", ";" etc.
-local datum_comment
-  = P'#;' * l.space^0
-  * (l.delimited_range("()", false, true, true) + (l.any - l.space)^1)
-local comment = token(l.COMMENT, datum_comment + line_comment + block_comment)
-
--- Strings.
-local character
-  = P'#\\' * ( P'alarm' + P'backspace' + P'delete' + P'escape'
-             + P'newline' + P'null' + P'return' + P'space' + P'tab')
-  + P'#\\x' * l.xdigit^1
-  + P'#\\' * P(1)
-local dq_str = l.delimited_range('"')
-local string = token(l.STRING, character + dq_str)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
 
 -- Keywords.
-local keyword = token(l.KEYWORD, word_match({
-  "and", "or", "not", "else",
-
-  "library", "define-library", "export", "include-library-declarations",
-  "cond-expand", "import", "rename", "only", "except", "prefix", "include",
-  "include-ci",
-
-  "begin", "case", "case-lambda", "cond", "define", "define-record-type",
-  "define-syntax", "define-values", "delay", "delay-force", "do", "if",
-  "guard", "lambda", "let", "let*", "let*-values", "let-syntax", "let-values",
-  "letrec", "letrec*", "letrec-syntax", "parameterize", "quasiquote", "quote",
-  "set!", "unless", "unquote", "unquote-splicing", "when",
-
-  "define-macro", "fluid-let"
-}, '.-+!$%&*/:<=>?@^_~'))
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+  'and', 'or', 'not', 'else',
+  --
+  'library', 'define-library', 'export', 'include-library-declarations', 'cond-expand', 'import',
+  'rename', 'only', 'except', 'prefix', 'include', 'include-ci',
+  --
+  'begin', 'case', 'case-lambda', 'cond', 'define', 'define-record-type', 'define-syntax',
+  'define-values', 'delay', 'delay-force', 'do', 'if', 'guard', 'lambda', 'let', 'let*',
+  'let*-values', 'let-syntax', 'let-values', 'letrec', 'letrec*', 'letrec-syntax', 'parameterize',
+  'quasiquote', 'quote', 'set!', 'unless', 'unquote', 'unquote-splicing', 'when',
+  --
+  'define-macro', 'fluid-let'
+}))
 
 -- Functions.
-local func = token(l.FUNCTION, word_match({
-  "*", "+", "-", "/", "<", "<=", "=", "=>", ">", ">=", "abs", "append",
-  "apply", "assoc", "assq", "assv", "binary-port?", "boolean=?", "boolean?",
-  "bytevector", "bytevector-append", "bytevector-copy", "bytevector-copy!",
-  "bytevector-length", "bytevector-u8-ref", "bytevector-u8-set!",
-  "bytevector?", "caar", "cadr", "call-with-current-continuation",
-  "call-with-port", "call-with-values", "call/cc", "car", "cdar", "cddr",
-  "cdr", "ceiling", "char->integer", "char-ready?", "char<=?", "char<?",
-  "char=?", "char>=?", "char>?", "char?", "close-input-port",
-  "close-output-port", "close-port", "complex?", "cons", "current-error-port",
-  "current-input-port", "current-output-port", "denominator", "dynamic-wind",
-  "eof-object", "eof-object?", "eq?", "equal?", "eqv?", "error",
-  "error-object-irritants", "error-object-message", "error-object?", "even?",
-  "exact", "exact-integer-sqrt", "exact-integer?", "exact?", "expt",
-  "features", "file-error?", "floor", "floor-quotient", "floor-remainder",
-  "floor/", "flush-output-port", "for-each", "gcd", "get-output-bytevector",
-  "get-output-string", "inexact", "inexact?", "input-port-open?",
-  "input-port?", "integer->char", "integer?", "lcm", "length", "list",
-  "list->string", "list->vector", "list-copy", "list-ref", "list-set!",
-  "list-tail", "list?", "make-bytevector", "make-list", "make-parameter",
-  "make-string", "make-vector", "map", "max", "member", "memq", "memv", "min",
-  "modulo", "negative?", "newline", "null?", "number->string", "number?",
-  "numerator", "odd?", "open-input-bytevector", "open-input-string",
-  "open-output-bytevector", "open-output-string", "output-port-open?",
-  "output-port?", "pair?", "peek-char", "peek-u8", "port?", "positive?",
-  "procedure?", "quotient", "raise", "raise-continuable", "rational?",
-  "rationalize", "read-bytevector", "read-bytevector!", "read-char",
-  "read-error?", "read-line", "read-string", "read-u8", "real?", "remainder",
-  "reverse", "round", "set-car!", "set-cdr!", "square", "string",
-  "string->list", "string->number", "string->symbol", "string->utf8",
-  "string->vector", "string-append", "string-copy", "string-copy!",
-  "string-fill!", "string-for-each", "string-length", "string-map",
-  "string-ref", "string-set!", "string<=?", "string<?", "string=?",
-  "string>=?", "string>?", "string?", "substring", "symbol->string",
-  "symbol=?", "symbol?", "syntax-error", "syntax-rules", "textual-port?",
-  "truncate", "truncate-quotient", "truncate-remainder", "truncate/",
-  "u8-ready?", "utf8->string", "values", "vector", "vector->list",
-  "vector->string", "vector-append", "vector-copy", "vector-copy!",
-  "vector-fill!", "vector-for-each", "vector-length", "vector-map",
-  "vector-ref", "vector-set!", "vector?", "with-exception-handler",
-  "write-bytevector", "write-char", "write-string", "write-u8", "zero?",
-
-  "char-alphabetic?", "char-ci<=?", "char-ci<?", "char-ci=?", "char-ci>=?",
-  "char-ci>?", "char-downcase", "char-foldcase", "char-lower-case?",
-  "char-numeric?", "char-upcase", "char-upper-case?", "char-whitespace?",
-  "digit-value", "string-ci<=?", "string-ci<?", "string-ci=?", "string-ci>=?",
-  "string-ci>?", "string-downcase", "string-foldcase", "string-upcase",
-
-  "angle", "imag-part", "magnitude", "make-polar", "make-rectangular",
-  "real-part",
-
-  "caaaar", "caaadr", "caaar", "caadar", "caaddr", "caadr", "cadaar", "cadadr",
-  "cadar", "caddar", "cadddr", "caddr", "cdaaar", "cdaadr", "cdaar", "cdadar",
-  "cdaddr", "cdadr", "cddaar", "cddadr", "cddar", "cdddar", "cddddr", "cdddr",
-
-  "environment", "eval",
-
-  "call-with-input-file", "call-with-output-file", "delete-file",
-  "file-exists?", "open-binary-input-file", "open-binary-output-file",
-  "open-input-file", "open-output-file", "with-input-from-file",
-  "with-output-to-file",
-
-  "acos", "asin", "atan", "cos", "exp", "finite?", "infinite?", "log", "nan?",
-  "sin", "sqrt", "tan",
-
-  "force", "make-promise", "promise?",
-
-  "load",
-
-  "command-line", "emergency-exit", "exit", "get-environment-variable",
-  "get-environment-variables",
-
-  "read",
-
-  "interaction-environment",
-
-  "current-jiffy", "current-second", "jiffies-per-second",
-
-  "display", "write", "write-shared", "write-simple",
-
-  "syntax-case", "er-macro-transformer", "sc-macro-transformer",
-  "rsc-macro-transformer"
-}, '.-+!$%&*/:<=>?@^_~'))
-
-local directive = token(l.PREPROCESSOR, P'#!fold-case' + P'#!no-fold-case')
-local boolean = token(l.CONSTANT,
-  word_match({'#t', '#f', '#true', '#false'}, '#'))
-
--- Identifiers.
+lex:add_rule('function', token(lexer.FUNCTION, word_match{
+  '*', '+', '-', '/', '<', '<=', '=', '=>', '>', '>=', 'abs', 'append', 'apply', 'assoc', 'assq',
+  'assv', 'binary-port?', 'boolean=?', 'boolean?', 'bytevector', 'bytevector-append',
+  'bytevector-copy', 'bytevector-copy!', 'bytevector-length', 'bytevector-u8-ref',
+  'bytevector-u8-set!', 'bytevector?', 'caar', 'cadr', 'call-with-current-continuation',
+  'call-with-port', 'call-with-values', 'call/cc', 'car', 'cdar', 'cddr', 'cdr', 'ceiling',
+  'char->integer', 'char-ready?', 'char<=?', 'char<?', 'char=?', 'char>=?', 'char>?', 'char?',
+  'close-input-port', 'close-output-port', 'close-port', 'complex?', 'cons', 'current-error-port',
+  'current-input-port', 'current-output-port', 'denominator', 'dynamic-wind', 'eof-object',
+  'eof-object?', 'eq?', 'equal?', 'eqv?', 'error', 'error-object-irritants', 'error-object-message',
+  'error-object?', 'even?', 'exact', 'exact-integer-sqrt', 'exact-integer?', 'exact?', 'expt',
+  'features', 'file-error?', 'floor', 'floor-quotient', 'floor-remainder', 'floor/',
+  'flush-output-port', 'for-each', 'gcd', 'get-output-bytevector', 'get-output-string', 'inexact',
+  'inexact?', 'input-port-open?', 'input-port?', 'integer->char', 'integer?', 'lcm', 'length',
+  'list', 'list->string', 'list->vector', 'list-copy', 'list-ref', 'list-set!', 'list-tail',
+  'list?', 'make-bytevector', 'make-list', 'make-parameter', 'make-string', 'make-vector', 'map',
+  'max', 'member', 'memq', 'memv', 'min', 'modulo', 'negative?', 'newline', 'null?',
+  'number->string', 'number?', 'numerator', 'odd?', 'open-input-bytevector', 'open-input-string',
+  'open-output-bytevector', 'open-output-string', 'output-port-open?', 'output-port?', 'pair?',
+  'peek-char', 'peek-u8', 'port?', 'positive?', 'procedure?', 'quotient', 'raise',
+  'raise-continuable', 'rational?', 'rationalize', 'read-bytevector', 'read-bytevector!',
+  'read-char', 'read-error?', 'read-line', 'read-string', 'read-u8', 'real?', 'remainder',
+  'reverse', 'round', 'set-car!', 'set-cdr!', 'square', 'string', 'string->list', 'string->number',
+  'string->symbol', 'string->utf8', 'string->vector', 'string-append', 'string-copy',
+  'string-copy!', 'string-fill!', 'string-for-each', 'string-length', 'string-map', 'string-ref',
+  'string-set!', 'string<=?', 'string<?', 'string=?', 'string>=?', 'string>?', 'string?',
+  'substring', 'symbol->string', 'symbol=?', 'symbol?', 'syntax-error', 'syntax-rules',
+  'textual-port?', 'truncate', 'truncate-quotient', 'truncate-remainder', 'truncate/', 'u8-ready?',
+  'utf8->string', 'values', 'vector', 'vector->list', 'vector->string', 'vector-append',
+  'vector-copy', 'vector-copy!', 'vector-fill!', 'vector-for-each', 'vector-length', 'vector-map',
+  'vector-ref', 'vector-set!', 'vector?', 'with-exception-handler', 'write-bytevector',
+  'write-char', 'write-string', 'write-u8', 'zero?',
+  --
+  'char-alphabetic?', 'char-ci<=?', 'char-ci<?', 'char-ci=?', 'char-ci>=?', 'char-ci>?',
+  'char-downcase', 'char-foldcase', 'char-lower-case?', 'char-numeric?', 'char-upcase',
+  'char-upper-case?', 'char-whitespace?', 'digit-value', 'string-ci<=?', 'string-ci<?',
+  'string-ci=?', 'string-ci>=?', 'string-ci>?', 'string-downcase', 'string-foldcase',
+  'string-upcase',
+  --
+  'angle', 'imag-part', 'magnitude', 'make-polar', 'make-rectangular', 'real-part',
+  --
+  'caaaar', 'caaadr', 'caaar', 'caadar', 'caaddr', 'caadr', 'cadaar', 'cadadr', 'cadar', 'caddar',
+  'cadddr', 'caddr', 'cdaaar', 'cdaadr', 'cdaar', 'cdadar', 'cdaddr', 'cdadr', 'cddaar', 'cddadr',
+  'cddar', 'cdddar', 'cddddr', 'cdddr',
+  --
+  'environment', 'eval',
+  --
+  'call-with-input-file', 'call-with-output-file', 'delete-file', 'file-exists?',
+  'open-binary-input-file', 'open-binary-output-file', 'open-input-file', 'open-output-file',
+  'with-input-from-file', 'with-output-to-file',
+  --
+  'acos', 'asin', 'atan', 'cos', 'exp', 'finite?', 'infinite?', 'log', 'nan?', 'sin', 'sqrt', 'tan',
+  --
+  'force', 'make-promise', 'promise?',
+  --
+  'load',
+  --
+  'command-line', 'emergency-exit', 'exit', 'get-environment-variable', 'get-environment-variables',
+  --
+  'read',
+  --
+  'interaction-environment',
+  --
+  'current-jiffy', 'current-second', 'jiffies-per-second',
+  --
+  'display', 'write', 'write-shared', 'write-simple',
+  --
+  'syntax-case', 'er-macro-transformer', 'sc-macro-transformer', 'rsc-macro-transformer'
+}))
+
+-- Identifiers and symbols.
 local explicit_sign = S('+-')
-
-local initial = l.alpha + S('!$%&*/:<=>?@^_~')
-local subsequent = initial + l.digit + explicit_sign + P'.'
-
+local initial = lexer.alpha + S('!$%&*/:<=>?@^_~')
+local subsequent = initial + lexer.digit + explicit_sign + '.'
 local sign_subsequent = initial + explicit_sign
-local dot_subsequent = sign_subsequent + P'.'
+local dot_subsequent = sign_subsequent + '.'
+-- LuaFormatter off
+local peculiar_identifier =
+  explicit_sign * '.' * dot_subsequent * subsequent^0 +
+  explicit_sign * sign_subsequent * subsequent^0 +
+  '.' * dot_subsequent * subsequent^0 +
+  explicit_sign
+-- LuaFormatter on
+local ident = lexer.range('|') + initial * subsequent^0 + peculiar_identifier
+lex:add_rule('identifier', token(lexer.IDENTIFIER, ident))
+lex:add_rule('symbol', token(lexer.CLASS, "'" * ident))
 
-local peculiar_identifier
-  = explicit_sign * P'.' * dot_subsequent * subsequent^0
-  + explicit_sign * sign_subsequent * subsequent^0
-  + P'.' * dot_subsequent * subsequent^0
-  + explicit_sign
+-- Strings.
+local character = '#\\' *
+  (word_match('alarm backspace delete escape newline null return space tab') + 'x' * lexer.xdigit^1 +
+    lexer.any)
+local dq_str = lexer.range('"')
+lex:add_rule('string', token(lexer.STRING, character + dq_str))
+
+-- Constants.
+lex:add_rule('constant', token(lexer.CONSTANT, word_match('#t #f #true #false')))
 
-local ident
-  = l.delimited_range('|')
-  + initial * subsequent^0
-  + peculiar_identifier
+-- Directives.
+lex:add_rule('directive', token(lexer.PREPROCESSOR, P('#!fold-case') + '#!no-fold-case'))
 
-local identifier = token(l.IDENTIFIER, ident)
-local symbol = token(l.CLASS, P"'" * ident)
+-- Comments.
+local line_comment = lexer.to_eol(';')
+local block_comment = lexer.range('#|', '|#', false, false, true)
+local datum_comment = '#;' * lexer.space^0 * lexer.range('(', ')', false, true, true) *
+  (lexer.any - lexer.space)^0
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment + datum_comment))
 
 -- Numbers.
+local radixes = {[2] = P('#b'), [8] = P('#o'), [10] = P('#d')^-1, [16] = P('#x')}
+local digits = {[2] = S('01'), [8] = lpeg.R('07'), [10] = lexer.digit, [16] = lexer.xdigit}
 local function num(r)
-  local exactness = (P'#i' + P'#e')^-1
-
-  local radix = ({
-    [2] = P'#b',
-    [8] = P'#o',
-    [10] = P('#d')^-1,
-    [16] = P'#x'
-  })[r]
-
-  local digit = ({
-    [2] = S'01',
-    [8] = R'07',
-    [10] = l.digit,
-    [16] = l.xdigit
-  })[r]
-
+  local exactness = (P('#i') + '#e')^-1
+  local radix, digit = radixes[r], digits[r]
   local prefix = radix * exactness + exactness * radix
-  local suffix = (P'e' * S('+-')^-1 * l.digit^1)^-1
-
-  local infnan = P'+inf.0' + P'-inf.0' + P'+nan.0' + P'-nan.0'
-
-  local decimal
-    = l.digit^1 * suffix
-    + P'.' * l.digit^1 * suffix
-    + l.digit^1 * P'.' * l.digit^0 * suffix
-
-  local ureal
-    = digit^1 * P'/' * digit^1
-    + (r == 10 and decimal or P(false))
-    + digit^1
-  local real
-    = S('+-')^-1 * ureal
-    + infnan
-
-  local i = P'i'
-  local complex
-    = real * P'@' * real
-    + real * S'+-' * ureal^-1 * i
-    + real * infnan * i
-    + infnan * i
-    + real
-    + S'+-' * ureal^-1 * i
-
+  local suffix = ('e' * S('+-')^-1 * lexer.digit^1)^-1
+  local infnan = S('+-') * word_match[[inf nan]] * '.0'
+  -- LuaFormatter off
+  local decimal = lexer.digit^1 * suffix +
+    '.' * lexer.digit^1 * suffix +
+    lexer.digit^1 * '.' * lexer.digit^0 * suffix
+  local ureal = digit^1 * '/' * digit^1 +
+    (r == 10 and decimal or P(false)) +
+    digit^1
+  local real = S('+-')^-1 * ureal + infnan
+  local i = P('i')
+  local complex = real * '@' * real +
+    real * S('+-') * ureal^-1 * i +
+    real * infnan * i +
+    infnan * i +
+    real +
+    S('+-') * ureal^-1 * i
+  -- LuaFormatter on
   return prefix * complex
 end
-
-local number = token(l.NUMBER, num(2) + num(8) + num(10) + num(16))
+lex:add_rule('number', token(lexer.NUMBER, num(2) + num(8) + num(10) + num(16)))
 
 -- Operators.
-local operator = token(l.OPERATOR, P'#u8' + P',@' + S(".`'#(),"))
-
-M._rules = {
-  {'whitespace', ws},
-  {'directive', directive},
-  {'boolean', boolean},
-  {'comment', comment},
-  {'string', string},
-  {'number', number},
-  {'keyword', keyword},
-  {'func', func},
-  {'identifier', identifier},
-  {'symbol', symbol},
-  {'operator', operator},
-}
-
+lex:add_rule('operator', token(lexer.OPERATOR, P('#u8') + ',@' + S(".`'#(),")))
 
-M._foldsymbols = {
-  _patterns = {'[%(%)%[%]{}]', '#|', '|#', ';'},
-  [l.OPERATOR] = {
-    ['('] = 1, [')'] = -1, ['['] = 1, [']'] = -1, ['{'] = 1, ['}'] = -1
-  },
-  [l.COMMENT] = {['#|'] = 1, ['|#'] = -1, [';'] = l.fold_line_comments(';')}
-}
+-- Fold points.
+lex:add_fold_point(lexer.OPERATOR, '(', ')')
+lex:add_fold_point(lexer.COMMENT, '#|', '|#')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines(';'))
 
-return M
+return lex