Resync the lexers with Scintillua

- Resync the lexers with Scintillua - Update the lexer readme - Update `zenburn` theme to fix some highlighting issues - lexers: redirect print function to vis:info() - Fix support for custom style names - As per error message "lexer.delimited_range() is deprecated, use lexer.range()". - Remove remaining `lexer.delimited_range()` call - Set syntax to `nil` if the file type has no matching lexer - Updated Go lexer for Go 1.18. - lexers/dsv: convert to new lexer format (cherry picked from commit 9edbc3cd9ea1d7142b1305840432a3d2739e755a) - lexers/gemini: disable legacy gemini lexer This reverts commit 468f9ee1b027a7ce98b1a249fa1af5888feeb989. It is in legacy format and of questionable quality. Ideally it should be contributed upstream from where it will eventually trickle down to us. - lexers/git-rebase: convert to new lexer format (cherry picked from commit 4000a4cc9ac4a4c2869dfae772b977a82aee8d8c) - lexers/strace: convert to new lexer format (cherry picked from commit e420451320d97eb164f5629c1bcfab0b595be29d) - lexers/typescript: add new upstream lexer revision 28e2b60 (cherry picked from commit 7326e6deecdaa75fa94ae9ebdb653f9f907b33f2) - use `package.searchpath` instead of a local `searchpath` function - Restore `filetype: support filetype detection via hashbang` - Remove redundant comment - Restore gemini lexer
author: qiu-x <alex@alexslomka.xyz> 2022-06-29 07:56:51 +0200
committer: Felix Van der Jeugt <felix.vanderjeugt@posteo.net> 2022-11-29 21:57:18 +0100
commit: 8a420ecc4c1ed50111464ec66901bd983eaf2dbd (patch)
tree: f31d2186cafaee6e7f18d32fe99144c3e8148c00 /lua/lexers/scheme.lua
parent: 981b90a203484182feace48471fe2b53dae7676f (diff)
download: vis-8a420ecc4c1ed50111464ec66901bd983eaf2dbd.tar.gz
vis-8a420ecc4c1ed50111464ec66901bd983eaf2dbd.tar.xz
1 files changed, 150 insertions, 212 deletions
diff --git a/lua/lexers/scheme.lua b/lua/lexers/scheme.lua
index 681f2fd..a19fa0f 100644
--- a/lua/lexers/scheme.lua
+++ b/lua/lexers/scheme.lua
@@ -1,236 +1,174 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
 -- Scheme LPeg lexer.
+-- Contributions by Murray Calavera.
 
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
 
-local M = {_NAME = 'scheme'}
+local lex = lexer.new('scheme')
 
 -- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local line_comment = ';' * l.nonnewline^0
-local block_comment = l.nested_pair('#|', '|#')
--- TODO: this should handle any datum and take into account "#\)", ";" etc.
-local datum_comment
-  = P'#;' * l.space^0
-  * (l.delimited_range("()", false, true, true) + (l.any - l.space)^1)
-local comment = token(l.COMMENT, datum_comment + line_comment + block_comment)
-
--- Strings.
-local character
-  = P'#\\' * ( P'alarm' + P'backspace' + P'delete' + P'escape'
-             + P'newline' + P'null' + P'return' + P'space' + P'tab')
-  + P'#\\x' * l.xdigit^1
-  + P'#\\' * P(1)
-local dq_str = l.delimited_range('"')
-local string = token(l.STRING, character + dq_str)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
 
 -- Keywords.
-local keyword = token(l.KEYWORD, word_match({
-  "and", "or", "not", "else",
-
-  "library", "define-library", "export", "include-library-declarations",
-  "cond-expand", "import", "rename", "only", "except", "prefix", "include",
-  "include-ci",
-
-  "begin", "case", "case-lambda", "cond", "define", "define-record-type",
-  "define-syntax", "define-values", "delay", "delay-force", "do", "if",
-  "guard", "lambda", "let", "let*", "let*-values", "let-syntax", "let-values",
-  "letrec", "letrec*", "letrec-syntax", "parameterize", "quasiquote", "quote",
-  "set!", "unless", "unquote", "unquote-splicing", "when",
-
-  "define-macro", "fluid-let"
-}, '.-+!$%&*/:<=>?@^_~'))
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+  'and', 'or', 'not', 'else',
+  --
+  'library', 'define-library', 'export', 'include-library-declarations', 'cond-expand', 'import',
+  'rename', 'only', 'except', 'prefix', 'include', 'include-ci',
+  --
+  'begin', 'case', 'case-lambda', 'cond', 'define', 'define-record-type', 'define-syntax',
+  'define-values', 'delay', 'delay-force', 'do', 'if', 'guard', 'lambda', 'let', 'let*',
+  'let*-values', 'let-syntax', 'let-values', 'letrec', 'letrec*', 'letrec-syntax', 'parameterize',
+  'quasiquote', 'quote', 'set!', 'unless', 'unquote', 'unquote-splicing', 'when',
+  --
+  'define-macro', 'fluid-let'
+}))
 
 -- Functions.
-local func = token(l.FUNCTION, word_match({
-  "*", "+", "-", "/", "<", "<=", "=", "=>", ">", ">=", "abs", "append",
-  "apply", "assoc", "assq", "assv", "binary-port?", "boolean=?", "boolean?",
-  "bytevector", "bytevector-append", "bytevector-copy", "bytevector-copy!",
-  "bytevector-length", "bytevector-u8-ref", "bytevector-u8-set!",
-  "bytevector?", "caar", "cadr", "call-with-current-continuation",
-  "call-with-port", "call-with-values", "call/cc", "car", "cdar", "cddr",
-  "cdr", "ceiling", "char->integer", "char-ready?", "char<=?", "char<?",
-  "char=?", "char>=?", "char>?", "char?", "close-input-port",
-  "close-output-port", "close-port", "complex?", "cons", "current-error-port",
-  "current-input-port", "current-output-port", "denominator", "dynamic-wind",
-  "eof-object", "eof-object?", "eq?", "equal?", "eqv?", "error",
-  "error-object-irritants", "error-object-message", "error-object?", "even?",
-  "exact", "exact-integer-sqrt", "exact-integer?", "exact?", "expt",
-  "features", "file-error?", "floor", "floor-quotient", "floor-remainder",
-  "floor/", "flush-output-port", "for-each", "gcd", "get-output-bytevector",
-  "get-output-string", "inexact", "inexact?", "input-port-open?",
-  "input-port?", "integer->char", "integer?", "lcm", "length", "list",
-  "list->string", "list->vector", "list-copy", "list-ref", "list-set!",
-  "list-tail", "list?", "make-bytevector", "make-list", "make-parameter",
-  "make-string", "make-vector", "map", "max", "member", "memq", "memv", "min",
-  "modulo", "negative?", "newline", "null?", "number->string", "number?",
-  "numerator", "odd?", "open-input-bytevector", "open-input-string",
-  "open-output-bytevector", "open-output-string", "output-port-open?",
-  "output-port?", "pair?", "peek-char", "peek-u8", "port?", "positive?",
-  "procedure?", "quotient", "raise", "raise-continuable", "rational?",
-  "rationalize", "read-bytevector", "read-bytevector!", "read-char",
-  "read-error?", "read-line", "read-string", "read-u8", "real?", "remainder",
-  "reverse", "round", "set-car!", "set-cdr!", "square", "string",
-  "string->list", "string->number", "string->symbol", "string->utf8",
-  "string->vector", "string-append", "string-copy", "string-copy!",
-  "string-fill!", "string-for-each", "string-length", "string-map",
-  "string-ref", "string-set!", "string<=?", "string<?", "string=?",
-  "string>=?", "string>?", "string?", "substring", "symbol->string",
-  "symbol=?", "symbol?", "syntax-error", "syntax-rules", "textual-port?",
-  "truncate", "truncate-quotient", "truncate-remainder", "truncate/",
-  "u8-ready?", "utf8->string", "values", "vector", "vector->list",
-  "vector->string", "vector-append", "vector-copy", "vector-copy!",
-  "vector-fill!", "vector-for-each", "vector-length", "vector-map",
-  "vector-ref", "vector-set!", "vector?", "with-exception-handler",
-  "write-bytevector", "write-char", "write-string", "write-u8", "zero?",
-
-  "char-alphabetic?", "char-ci<=?", "char-ci<?", "char-ci=?", "char-ci>=?",
-  "char-ci>?", "char-downcase", "char-foldcase", "char-lower-case?",
-  "char-numeric?", "char-upcase", "char-upper-case?", "char-whitespace?",
-  "digit-value", "string-ci<=?", "string-ci<?", "string-ci=?", "string-ci>=?",
-  "string-ci>?", "string-downcase", "string-foldcase", "string-upcase",
-
-  "angle", "imag-part", "magnitude", "make-polar", "make-rectangular",
-  "real-part",
-
-  "caaaar", "caaadr", "caaar", "caadar", "caaddr", "caadr", "cadaar", "cadadr",
-  "cadar", "caddar", "cadddr", "caddr", "cdaaar", "cdaadr", "cdaar", "cdadar",
-  "cdaddr", "cdadr", "cddaar", "cddadr", "cddar", "cdddar", "cddddr", "cdddr",
-
-  "environment", "eval",
-
-  "call-with-input-file", "call-with-output-file", "delete-file",
-  "file-exists?", "open-binary-input-file", "open-binary-output-file",
-  "open-input-file", "open-output-file", "with-input-from-file",
-  "with-output-to-file",
-
-  "acos", "asin", "atan", "cos", "exp", "finite?", "infinite?", "log", "nan?",
-  "sin", "sqrt", "tan",
-
-  "force", "make-promise", "promise?",
-
-  "load",
-
-  "command-line", "emergency-exit", "exit", "get-environment-variable",
-  "get-environment-variables",
-
-  "read",
-
-  "interaction-environment",
-
-  "current-jiffy", "current-second", "jiffies-per-second",
-
-  "display", "write", "write-shared", "write-simple",
-
-  "syntax-case", "er-macro-transformer", "sc-macro-transformer",
-  "rsc-macro-transformer"
-}, '.-+!$%&*/:<=>?@^_~'))
-
-local directive = token(l.PREPROCESSOR, P'#!fold-case' + P'#!no-fold-case')
-local boolean = token(l.CONSTANT,
-  word_match({'#t', '#f', '#true', '#false'}, '#'))
-
--- Identifiers.
+lex:add_rule('function', token(lexer.FUNCTION, word_match{
+  '*', '+', '-', '/', '<', '<=', '=', '=>', '>', '>=', 'abs', 'append', 'apply', 'assoc', 'assq',
+  'assv', 'binary-port?', 'boolean=?', 'boolean?', 'bytevector', 'bytevector-append',
+  'bytevector-copy', 'bytevector-copy!', 'bytevector-length', 'bytevector-u8-ref',
+  'bytevector-u8-set!', 'bytevector?', 'caar', 'cadr', 'call-with-current-continuation',
+  'call-with-port', 'call-with-values', 'call/cc', 'car', 'cdar', 'cddr', 'cdr', 'ceiling',
+  'char->integer', 'char-ready?', 'char<=?', 'char<?', 'char=?', 'char>=?', 'char>?', 'char?',
+  'close-input-port', 'close-output-port', 'close-port', 'complex?', 'cons', 'current-error-port',
+  'current-input-port', 'current-output-port', 'denominator', 'dynamic-wind', 'eof-object',
+  'eof-object?', 'eq?', 'equal?', 'eqv?', 'error', 'error-object-irritants', 'error-object-message',
+  'error-object?', 'even?', 'exact', 'exact-integer-sqrt', 'exact-integer?', 'exact?', 'expt',
+  'features', 'file-error?', 'floor', 'floor-quotient', 'floor-remainder', 'floor/',
+  'flush-output-port', 'for-each', 'gcd', 'get-output-bytevector', 'get-output-string', 'inexact',
+  'inexact?', 'input-port-open?', 'input-port?', 'integer->char', 'integer?', 'lcm', 'length',
+  'list', 'list->string', 'list->vector', 'list-copy', 'list-ref', 'list-set!', 'list-tail',
+  'list?', 'make-bytevector', 'make-list', 'make-parameter', 'make-string', 'make-vector', 'map',
+  'max', 'member', 'memq', 'memv', 'min', 'modulo', 'negative?', 'newline', 'null?',
+  'number->string', 'number?', 'numerator', 'odd?', 'open-input-bytevector', 'open-input-string',
+  'open-output-bytevector', 'open-output-string', 'output-port-open?', 'output-port?', 'pair?',
+  'peek-char', 'peek-u8', 'port?', 'positive?', 'procedure?', 'quotient', 'raise',
+  'raise-continuable', 'rational?', 'rationalize', 'read-bytevector', 'read-bytevector!',
+  'read-char', 'read-error?', 'read-line', 'read-string', 'read-u8', 'real?', 'remainder',
+  'reverse', 'round', 'set-car!', 'set-cdr!', 'square', 'string', 'string->list', 'string->number',
+  'string->symbol', 'string->utf8', 'string->vector', 'string-append', 'string-copy',
+  'string-copy!', 'string-fill!', 'string-for-each', 'string-length', 'string-map', 'string-ref',
+  'string-set!', 'string<=?', 'string<?', 'string=?', 'string>=?', 'string>?', 'string?',
+  'substring', 'symbol->string', 'symbol=?', 'symbol?', 'syntax-error', 'syntax-rules',
+  'textual-port?', 'truncate', 'truncate-quotient', 'truncate-remainder', 'truncate/', 'u8-ready?',
+  'utf8->string', 'values', 'vector', 'vector->list', 'vector->string', 'vector-append',
+  'vector-copy', 'vector-copy!', 'vector-fill!', 'vector-for-each', 'vector-length', 'vector-map',
+  'vector-ref', 'vector-set!', 'vector?', 'with-exception-handler', 'write-bytevector',
+  'write-char', 'write-string', 'write-u8', 'zero?',
+  --
+  'char-alphabetic?', 'char-ci<=?', 'char-ci<?', 'char-ci=?', 'char-ci>=?', 'char-ci>?',
+  'char-downcase', 'char-foldcase', 'char-lower-case?', 'char-numeric?', 'char-upcase',
+  'char-upper-case?', 'char-whitespace?', 'digit-value', 'string-ci<=?', 'string-ci<?',
+  'string-ci=?', 'string-ci>=?', 'string-ci>?', 'string-downcase', 'string-foldcase',
+  'string-upcase',
+  --
+  'angle', 'imag-part', 'magnitude', 'make-polar', 'make-rectangular', 'real-part',
+  --
+  'caaaar', 'caaadr', 'caaar', 'caadar', 'caaddr', 'caadr', 'cadaar', 'cadadr', 'cadar', 'caddar',
+  'cadddr', 'caddr', 'cdaaar', 'cdaadr', 'cdaar', 'cdadar', 'cdaddr', 'cdadr', 'cddaar', 'cddadr',
+  'cddar', 'cdddar', 'cddddr', 'cdddr',
+  --
+  'environment', 'eval',
+  --
+  'call-with-input-file', 'call-with-output-file', 'delete-file', 'file-exists?',
+  'open-binary-input-file', 'open-binary-output-file', 'open-input-file', 'open-output-file',
+  'with-input-from-file', 'with-output-to-file',
+  --
+  'acos', 'asin', 'atan', 'cos', 'exp', 'finite?', 'infinite?', 'log', 'nan?', 'sin', 'sqrt', 'tan',
+  --
+  'force', 'make-promise', 'promise?',
+  --
+  'load',
+  --
+  'command-line', 'emergency-exit', 'exit', 'get-environment-variable', 'get-environment-variables',
+  --
+  'read',
+  --
+  'interaction-environment',
+  --
+  'current-jiffy', 'current-second', 'jiffies-per-second',
+  --
+  'display', 'write', 'write-shared', 'write-simple',
+  --
+  'syntax-case', 'er-macro-transformer', 'sc-macro-transformer', 'rsc-macro-transformer'
+}))
+
+-- Identifiers and symbols.
 local explicit_sign = S('+-')
-
-local initial = l.alpha + S('!$%&*/:<=>?@^_~')
-local subsequent = initial + l.digit + explicit_sign + P'.'
-
+local initial = lexer.alpha + S('!$%&*/:<=>?@^_~')
+local subsequent = initial + lexer.digit + explicit_sign + '.'
 local sign_subsequent = initial + explicit_sign
-local dot_subsequent = sign_subsequent + P'.'
+local dot_subsequent = sign_subsequent + '.'
+-- LuaFormatter off
+local peculiar_identifier =
+  explicit_sign * '.' * dot_subsequent * subsequent^0 +
+  explicit_sign * sign_subsequent * subsequent^0 +
+  '.' * dot_subsequent * subsequent^0 +
+  explicit_sign
+-- LuaFormatter on
+local ident = lexer.range('|') + initial * subsequent^0 + peculiar_identifier
+lex:add_rule('identifier', token(lexer.IDENTIFIER, ident))
+lex:add_rule('symbol', token(lexer.CLASS, "'" * ident))
 
-local peculiar_identifier
-  = explicit_sign * P'.' * dot_subsequent * subsequent^0
-  + explicit_sign * sign_subsequent * subsequent^0
-  + P'.' * dot_subsequent * subsequent^0
-  + explicit_sign
+-- Strings.
+local character = '#\\' *
+  (word_match('alarm backspace delete escape newline null return space tab') + 'x' * lexer.xdigit^1 +
+    lexer.any)
+local dq_str = lexer.range('"')
+lex:add_rule('string', token(lexer.STRING, character + dq_str))
+
+-- Constants.
+lex:add_rule('constant', token(lexer.CONSTANT, word_match('#t #f #true #false')))
 
-local ident
-  = l.delimited_range('|')
-  + initial * subsequent^0
-  + peculiar_identifier
+-- Directives.
+lex:add_rule('directive', token(lexer.PREPROCESSOR, P('#!fold-case') + '#!no-fold-case'))
 
-local identifier = token(l.IDENTIFIER, ident)
-local symbol = token(l.CLASS, P"'" * ident)
+-- Comments.
+local line_comment = lexer.to_eol(';')
+local block_comment = lexer.range('#|', '|#', false, false, true)
+local datum_comment = '#;' * lexer.space^0 * lexer.range('(', ')', false, true, true) *
+  (lexer.any - lexer.space)^0
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment + datum_comment))
 
 -- Numbers.
+local radixes = {[2] = P('#b'), [8] = P('#o'), [10] = P('#d')^-1, [16] = P('#x')}
+local digits = {[2] = S('01'), [8] = lpeg.R('07'), [10] = lexer.digit, [16] = lexer.xdigit}
 local function num(r)
-  local exactness = (P'#i' + P'#e')^-1
-
-  local radix = ({
-    [2] = P'#b',
-    [8] = P'#o',
-    [10] = P('#d')^-1,
-    [16] = P'#x'
-  })[r]
-
-  local digit = ({
-    [2] = S'01',
-    [8] = R'07',
-    [10] = l.digit,
-    [16] = l.xdigit
-  })[r]
-
+  local exactness = (P('#i') + '#e')^-1
+  local radix, digit = radixes[r], digits[r]
   local prefix = radix * exactness + exactness * radix
-  local suffix = (P'e' * S('+-')^-1 * l.digit^1)^-1
-
-  local infnan = P'+inf.0' + P'-inf.0' + P'+nan.0' + P'-nan.0'
-
-  local decimal
-    = l.digit^1 * suffix
-    + P'.' * l.digit^1 * suffix
-    + l.digit^1 * P'.' * l.digit^0 * suffix
-
-  local ureal
-    = digit^1 * P'/' * digit^1
-    + (r == 10 and decimal or P(false))
-    + digit^1
-  local real
-    = S('+-')^-1 * ureal
-    + infnan
-
-  local i = P'i'
-  local complex
-    = real * P'@' * real
-    + real * S'+-' * ureal^-1 * i
-    + real * infnan * i
-    + infnan * i
-    + real
-    + S'+-' * ureal^-1 * i
-
+  local suffix = ('e' * S('+-')^-1 * lexer.digit^1)^-1
+  local infnan = S('+-') * word_match[[inf nan]] * '.0'
+  -- LuaFormatter off
+  local decimal = lexer.digit^1 * suffix +
+    '.' * lexer.digit^1 * suffix +
+    lexer.digit^1 * '.' * lexer.digit^0 * suffix
+  local ureal = digit^1 * '/' * digit^1 +
+    (r == 10 and decimal or P(false)) +
+    digit^1
+  local real = S('+-')^-1 * ureal + infnan
+  local i = P('i')
+  local complex = real * '@' * real +
+    real * S('+-') * ureal^-1 * i +
+    real * infnan * i +
+    infnan * i +
+    real +
+    S('+-') * ureal^-1 * i
+  -- LuaFormatter on
   return prefix * complex
 end
-
-local number = token(l.NUMBER, num(2) + num(8) + num(10) + num(16))
+lex:add_rule('number', token(lexer.NUMBER, num(2) + num(8) + num(10) + num(16)))
 
 -- Operators.
-local operator = token(l.OPERATOR, P'#u8' + P',@' + S(".`'#(),"))
-
-M._rules = {
-  {'whitespace', ws},
-  {'directive', directive},
-  {'boolean', boolean},
-  {'comment', comment},
-  {'string', string},
-  {'number', number},
-  {'keyword', keyword},
-  {'func', func},
-  {'identifier', identifier},
-  {'symbol', symbol},
-  {'operator', operator},
-}
-
+lex:add_rule('operator', token(lexer.OPERATOR, P('#u8') + ',@' + S(".`'#(),")))
 
-M._foldsymbols = {
-  _patterns = {'[%(%)%[%]{}]', '#|', '|#', ';'},
-  [l.OPERATOR] = {
-    ['('] = 1, [')'] = -1, ['['] = 1, [']'] = -1, ['{'] = 1, ['}'] = -1
-  },
-  [l.COMMENT] = {['#|'] = 1, ['|#'] = -1, [';'] = l.fold_line_comments(';')}
-}
+-- Fold points.
+lex:add_fold_point(lexer.OPERATOR, '(', ')')
+lex:add_fold_point(lexer.COMMENT, '#|', '|#')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines(';'))
 
-return M
+return lex
author	qiu-x <alex@alexslomka.xyz>	2022-06-29 07:56:51 +0200
committer	Felix Van der Jeugt <felix.vanderjeugt@posteo.net>	2022-11-29 21:57:18 +0100
commit	8a420ecc4c1ed50111464ec66901bd983eaf2dbd (patch)
tree	f31d2186cafaee6e7f18d32fe99144c3e8148c00 /lua/lexers/scheme.lua
parent	981b90a203484182feace48471fe2b53dae7676f (diff)
download	vis-8a420ecc4c1ed50111464ec66901bd983eaf2dbd.tar.gz vis-8a420ecc4c1ed50111464ec66901bd983eaf2dbd.tar.xz