From 8a420ecc4c1ed50111464ec66901bd983eaf2dbd Mon Sep 17 00:00:00 2001 From: qiu-x Date: Wed, 29 Jun 2022 07:56:51 +0200 Subject: Resync the lexers with Scintillua - Resync the lexers with Scintillua - Update the lexer readme - Update `zenburn` theme to fix some highlighting issues - lexers: redirect print function to vis:info() - Fix support for custom style names - As per error message "lexer.delimited_range() is deprecated, use lexer.range()". - Remove remaining `lexer.delimited_range()` call - Set syntax to `nil` if the file type has no matching lexer - Updated Go lexer for Go 1.18. - lexers/dsv: convert to new lexer format (cherry picked from commit 9edbc3cd9ea1d7142b1305840432a3d2739e755a) - lexers/gemini: disable legacy gemini lexer This reverts commit 468f9ee1b027a7ce98b1a249fa1af5888feeb989. It is in legacy format and of questionable quality. Ideally it should be contributed upstream from where it will eventually trickle down to us. - lexers/git-rebase: convert to new lexer format (cherry picked from commit 4000a4cc9ac4a4c2869dfae772b977a82aee8d8c) - lexers/strace: convert to new lexer format (cherry picked from commit e420451320d97eb164f5629c1bcfab0b595be29d) - lexers/typescript: add new upstream lexer revision 28e2b60 (cherry picked from commit 7326e6deecdaa75fa94ae9ebdb653f9f907b33f2) - use `package.searchpath` instead of a local `searchpath` function - Restore `filetype: support filetype detection via hashbang` - Remove redundant comment - Restore gemini lexer --- lua/lexers/sml.lua | 180 ++++++++++++++++++++++++----------------------------- 1 file changed, 80 insertions(+), 100 deletions(-) (limited to 'lua/lexers/sml.lua') diff --git a/lua/lexers/sml.lua b/lua/lexers/sml.lua index 093e67c..ba2015e 100644 --- a/lua/lexers/sml.lua +++ b/lua/lexers/sml.lua @@ -1,111 +1,91 @@ --- Copyright 2017 Murray Calavera. See LICENSE. +-- Copyright 2017-2022 Murray Calavera. See LICENSE. -- Standard ML LPeg lexer. -local l = require('lexer') -local token = l.token - -local function mlword(words) - return l.word_match(words, "'") -end +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S + +local lex = lexer.new('sml') + +-- Whitespace. +local ws = token(lexer.WHITESPACE, lexer.space^1) +lex:add_rule('whitespace', ws) + +-- Structures. +local id = (lexer.alnum + "'" + '_')^0 +local aid = lexer.alpha * id +local longid = (aid * '.')^0 * aid +local struct_dec = token(lexer.KEYWORD, 'structure') * ws * token(lexer.CLASS, aid) * ws * + token(lexer.OPERATOR, '=') * ws +lex:add_rule('struct_new', struct_dec * token(lexer.KEYWORD, 'struct')) +lex:add_rule('struct_alias', struct_dec * token(lexer.CLASS, longid)) +lex:add_rule('structure', token(lexer.CLASS, aid * '.')) + +-- Open. +lex:add_rule('open', token(lexer.KEYWORD, word_match('open structure functor')) * ws * + token(lexer.CLASS, longid)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match{ + 'abstype', 'and', 'andalso', 'as', 'case', 'do', 'datatype', 'else', 'end', 'exception', 'fn', + 'fun', 'handle', 'if', 'in', 'infix', 'infixr', 'let', 'local', 'nonfix', 'of', 'op', 'orelse', + 'raise', 'rec', 'then', 'type', 'val', 'with', 'withtype', 'while', -- + 'eqtype', 'functor', 'include', 'sharing', 'sig', 'signature', 'struct', 'structure' +})) + +-- Types. +lex:add_rule('type', token(lexer.TYPE, word_match{ + 'int', 'real', 'word', 'bool', 'char', 'string', 'unit', 'array', 'exn', 'list', 'option', + 'order', 'ref', 'substring', 'vector' +})) + +-- Functions. +-- `real`, `vector` and `substring` are a problem. +lex:add_rule('function', token(lexer.FUNCTION, word_match{ + 'app', 'before', 'ceil', 'chr', 'concat', 'exnMessage', 'exnName', 'explode', 'floor', 'foldl', + 'foldr', 'getOpt', 'hd', 'ignore', 'implode', 'isSome', 'length', 'map', 'not', 'null', 'ord', + 'print', 'real', 'rev', 'round', 'size', 'str', 'substring', 'tl', 'trunc', 'valOf', 'vector', + 'o', 'abs', 'mod', 'div' +})) -local ws = token(l.WHITESPACE, l.space^1) +-- Constants. +lex:add_rule('constant', token(lexer.CONSTANT, word_match('true false nil') + lexer.upper * id)) --- single line comments are valid in successor ml -local cl = '(*)' * l.nonnewline^0 -local comment = token(l.COMMENT, cl + l.nested_pair('(*', '*)')) +-- Indentifiers (non-symbolic). +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.lower * id)) -local string = token(l.STRING, lpeg.P('#')^-1 * l.delimited_range('"', true)) +-- Strings. +lex:add_rule('string', token(lexer.STRING, P('#')^-1 * lexer.range('"', true))) -local function num(digit) - return digit * (digit^0 * lpeg.P('_'))^0 * digit^1 + digit -end +-- Comments. +local line_comment = lexer.to_eol('(*)') +local block_comment = lexer.range('(*', '*)', false, false, true) +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) -local int = num(l.digit) -local frac = lpeg.P('.') * int +-- Numbers. +local function num(digit) return digit * (digit^0 * '_')^0 * digit^1 + digit end +local int = num(lexer.digit) +local frac = '.' * int local minus = lpeg.P('~')^-1 local exp = lpeg.S('eE') * minus * int local real = int * frac^-1 * exp + int * frac * exp^-1 -local hex = num(l.xdigit) +local hex = num(lexer.xdigit) local bin = num(lpeg.S('01')) - -local number = token(l.NUMBER, - lpeg.P('0w') * int - + (lpeg.P('0wx') + lpeg.P('0xw')) * hex - + (lpeg.P('0wb') + lpeg.P('0bw')) * bin - + minus * lpeg.P('0x') * hex - + minus * lpeg.P('0b') * bin - + minus * real - + minus * int -) - -local keyword = token(l.KEYWORD, mlword{ - 'abstype', 'and', 'andalso', 'as', 'case', 'do', 'datatype', 'else', 'end', - 'exception', 'fn', 'fun', 'handle', 'if', 'in', 'infix', 'infixr', 'let', - 'local', 'nonfix', 'of', 'op', 'orelse', 'raise', 'rec', 'then', - 'type', 'val', 'with', 'withtype', 'while', - - 'eqtype', 'functor', 'include', 'sharing', 'sig', 'signature', - 'struct', 'structure' -}) - --- includes valid symbols for identifiers -local operator = token(l.OPERATOR, lpeg.S('!*/+-^:@=<>()[]{},;._|#%&$?~`\\')) - -local type = token(l.TYPE, mlword{ - 'int', 'real', 'word', 'bool', 'char', 'string', 'unit', - 'array', 'exn', 'list', 'option', 'order', 'ref', 'substring', 'vector' -}) - --- `real`, `vector` and `substring` are a problem -local func = token(l.FUNCTION, mlword{ - 'app', 'before', 'ceil', 'chr', 'concat', 'exnMessage', 'exnName', - 'explode', 'floor', 'foldl', 'foldr', 'getOpt', 'hd', 'ignore', - 'implode', 'isSome', 'length', 'map', 'not', 'null', 'ord', 'print', - 'real', 'rev', 'round', 'size', 'str', 'substring', 'tl', 'trunc', - 'valOf', 'vector', - 'o', 'abs', 'mod', 'div' -}) - --- non-symbolic identifiers only -local id = (l.alnum + "'" + '_')^0 -local aid = l.alpha * id -local longid = (aid * lpeg.P('.'))^0 * aid -local identifier = token(l.IDENTIFIER, l.lower * id) -local typevar = token(l.VARIABLE, "'" * id) -local c = mlword{'true', 'false', 'nil'} -local const = token(l.CONSTANT, l.upper * id + c) -local structure = token(l.CLASS, aid * lpeg.P('.')) - -local open - = token(l.KEYWORD, mlword{'open', 'structure', 'functor'}) - * ws * token(l.CLASS, longid) - -local struct_dec - = token(l.KEYWORD, lpeg.P('structure')) * ws - * token(l.CLASS, aid) * ws - * token(l.OPERATOR, lpeg.P('=')) * ws - -local struct_new = struct_dec * token(l.KEYWORD, lpeg.P('struct')) -local struct_alias = struct_dec * token(l.CLASS, longid) - -local M = {_NAME = 'sml'} - -M._rules = { - {'whitespace', ws}, - {'comment', comment}, - {'number', number}, - {'struct_new', struct_new}, - {'struct_alias', struct_alias}, - {'structure', structure}, - {'open', open}, - {'type', type}, - {'keyword', keyword}, - {'function', func}, - {'string', string}, - {'operator', operator}, - {'typevar', typevar}, - {'constant', const}, - {'identifier', identifier}, -} - -return M +-- LuaFormatter off +lex:add_rule('number', token(lexer.NUMBER, + '0w' * int + + (P('0wx') + '0xw') * hex + + (P('0wb') + '0bw') * bin + + minus * '0x' * hex + + minus * '0b' * bin + + minus * real + + minus * int)) +-- LuaFormatter on + +-- Type variables. +lex:add_rule('typevar', token(lexer.VARIABLE, "'" * id)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('!*/+-^:@=<>()[]{},;._|#%&$?~`\\'))) + +return lex -- cgit v1.2.3