diff options
Diffstat (limited to 'lua/lexers/pony.lua')
| -rw-r--r-- | lua/lexers/pony.lua | 200 |
1 files changed, 89 insertions, 111 deletions
diff --git a/lua/lexers/pony.lua b/lua/lexers/pony.lua index 5cb07a1..b02f2a7 100644 --- a/lua/lexers/pony.lua +++ b/lua/lexers/pony.lua @@ -1,116 +1,94 @@ --- Copyright 2017 Murray Calavera. See LICENSE. +-- Copyright 2017-2022 Murray Calavera. See LICENSE. -- Pony LPeg lexer. -local l = require('lexer') -local token = l.token +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match local P, S = lpeg.P, lpeg.S -local function pword(words) - return l.word_match(words, "'") -end - -local ws = token(l.WHITESPACE, l.space^1) - -local comment_line = '//' * l.nonnewline^0 -local comment_block = l.nested_pair('/*', '*/') -local comment = token(l.COMMENT, comment_line + comment_block) - -local annotation = token(l.PREPROCESSOR, l.delimited_range('\\', false, true)) - -local lit_bool = token(l.CONSTANT, pword{'true', 'false'}) - -local nq = l.any - P'"' -local lit_str = token(l.STRING, - P'"""' * (nq + (P'"' * #(nq + (P'"' * nq))))^0 * P'"""' - + l.delimited_range('"') - + l.delimited_range("'") -) - -local function num(digit) - return digit * (digit^0 * P'_')^0 * digit^1 + digit -end - -local int = num(l.digit) -local frac = P('.') * int -local exp = S('eE') * (P('-') + P('+'))^-1 * int - -local lit_num = token(l.NUMBER, - P('0x') * num(l.xdigit) - + P('0b') * num(S('01')) - + int * frac^-1 * exp^-1 -) - -local keyword = token(l.KEYWORD, pword{ - 'actor', 'as', 'be', 'break', 'class', 'compile_error', 'compile_intrinsic', - 'continue', 'consume', 'do', 'else', 'elseif', 'embed', 'end', 'error', - 'for', 'fun', 'if', 'ifdef', 'iftype', 'in', 'interface', 'is', 'isnt', - 'lambda', 'let', 'match', 'new', 'object', 'primitive', 'recover', 'repeat', - 'return', 'struct', 'then', 'this', 'trait', 'try', 'type', 'until', 'use', - 'var', 'where', 'while', 'with'}) -local capability = token(l.LABEL, pword{ - 'box', 'iso', 'ref', 'tag', 'trn', 'val'}) -local qualifier = token(l.LABEL, - P'#' * pword{'read', 'send', 'share', 'any', 'alias'}) - -local operator = token(l.OPERATOR, - pword{'and', 'or', 'xor', 'not', 'addressof', 'digestof'} - + lpeg.Cmt(S('+-*/%<>=!~')^1, function(input, index, op) - local ops = { - ['+'] = true, ['-'] = true, ['*'] = true, ['/'] = true, ['%'] = true, - ['+~'] = true, ['-~'] = true, ['*~'] = true, ['/~'] = true, - ['%~'] = true, ['<<'] = true, ['>>'] = true, ['<<~'] = true, - ['>>~'] = true, ['=='] = true, ['!='] = true, ['<'] = true, - ['<='] = true, ['>='] = true, ['>'] = true, ['==~'] = true, - ['!=~'] = true, ['<~'] = true, ['<=~'] = true, ['>=~'] = true, - ['>~'] = true - } - return ops[op] and index or nil - end) -) - --- there is no suitable token name for this, change this if ever one is added -local punctuation = token(l.OPERATOR, - P'=>' + P'.>' + P'<:' + P'->' - + S('=.,:;()[]{}!?~^&|_@')) - --- highlight functions with syntax sugar at declaration -local func - = token(l.KEYWORD, pword{'fun', 'new', 'be'}) * ws^-1 - * annotation^-1 * ws^-1 - * capability^-1 * ws^-1 - * token(l.FUNCTION, pword{ - 'create', 'dispose', '_final', 'apply', 'update', - 'add', 'sub', 'mul', 'div', 'mod', 'add_unsafe', 'sub_unsafe', - 'mul_unsafe', 'div_unsafe', 'mod_unsafe', 'shl', 'shr', 'shl_unsafe', - 'shr_unsafe', 'op_and', 'op_or', 'op_xor', 'eq', 'ne', 'lt', 'le', 'ge', - 'gt', 'eq_unsafe', 'ne_unsafe', 'lt_unsafe', 'le_unsafe', 'ge_unsafe', - 'gt_unsafe', 'neg', 'neg_unsafe', 'op_not', - 'has_next', 'next', - '_serialise_space', '_serialise', '_deserialise'}) - -local id_suffix = (l.alnum + P("'") + P('_'))^0 -local type = token(l.TYPE, P('_')^-1 * l.upper * id_suffix) -local identifier = token(l.IDENTIFIER, P('_')^-1 * l.lower * id_suffix) -local tuple_lookup = token(l.IDENTIFIER, P('_') * l.digit^1) - -local M = {_NAME = 'pony'} - -M._rules = { - {'whitespace', ws}, - {'comment', comment}, - {'annotation', annotation}, - {'boolean', lit_bool}, - {'number', lit_num}, - {'string', lit_str}, - {'function', func}, - {'keyword', keyword}, - {'capability', capability}, - {'qualifier', qualifier}, - {'operator', operator}, - {'type', type}, - {'identifier', identifier}, - {'lookup', tuple_lookup}, - {'punctuation', punctuation} +local lex = lexer.new('pony') + +-- Whitespace. +local ws = token(lexer.WHITESPACE, lexer.space^1) +lex:add_rule('whitespace', ws) + +-- Capabilities. +local capability = token(lexer.LABEL, word_match('box iso ref tag trn val')) +lex:add_rule('capability', capability) + +-- Annotations. +local annotation = token(lexer.PREPROCESSOR, lexer.range('\\', false, false)) +lex:add_rule('annotation', annotation) + +-- Functions. +-- Highlight functions with syntax sugar at declaration. +lex:add_rule('function', + token(lexer.KEYWORD, word_match('fun new be')) * ws^-1 * annotation^-1 * ws^-1 * capability^-1 * + ws^-1 * token(lexer.FUNCTION, word_match{ + 'create', 'dispose', '_final', 'apply', 'update', 'add', 'sub', 'mul', 'div', 'mod', + 'add_unsafe', 'sub_unsafe', 'mul_unsafe', 'div_unsafe', 'mod_unsafe', 'shl', 'shr', + 'shl_unsafe', 'shr_unsafe', 'op_and', 'op_or', 'op_xor', 'eq', 'ne', 'lt', 'le', 'ge', 'gt', + 'eq_unsafe', 'ne_unsafe', 'lt_unsafe', 'le_unsafe', 'ge_unsafe', 'gt_unsafe', 'neg', + 'neg_unsafe', 'op_not', -- + 'has_next', 'next', -- + '_serialise_space', '_serialise', '_deserialise' + })) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match{ + 'actor', 'as', 'be', 'break', 'class', 'compile_error', 'compile_intrinsic', 'continue', + 'consume', 'do', 'else', 'elseif', 'embed', 'end', 'error', 'for', 'fun', 'if', 'ifdef', 'iftype', + 'in', 'interface', 'is', 'isnt', 'lambda', 'let', 'match', 'new', 'object', 'primitive', + 'recover', 'repeat', 'return', 'struct', 'then', 'this', 'trait', 'try', 'type', 'until', 'use', + 'var', 'where', 'while', 'with' +})) + +-- Constants. +lex:add_rule('constant', token(lexer.CONSTANT, word_match('true false'))) + +-- Operators. +local ops = { + ['+'] = true, ['-'] = true, ['*'] = true, ['/'] = true, ['%'] = true, ['+~'] = true, + ['-~'] = true, ['*~'] = true, ['/~'] = true, ['%~'] = true, ['<<'] = true, ['>>'] = true, + ['<<~'] = true, ['>>~'] = true, ['=='] = true, ['!='] = true, ['<'] = true, ['<='] = true, + ['>='] = true, ['>'] = true, ['==~'] = true, ['!=~'] = true, ['<~'] = true, ['<=~'] = true, + ['>=~'] = true, ['>~'] = true } - -return M +lex:add_rule('operator', token(lexer.OPERATOR, word_match('and or xor not addressof digestof') + + lpeg.Cmt(S('+-*/%<>=!~')^1, function(input, index, op) return ops[op] and index or nil end))) + +-- Identifiers. +local id_suffix = (lexer.alnum + "'" + '_')^0 +lex:add_rule('type', token(lexer.TYPE, P('_')^-1 * lexer.upper * id_suffix)) +lex:add_rule('identifier', token(lexer.IDENTIFIER, P('_')^-1 * lexer.lower * id_suffix)) +lex:add_rule('lookup', token(lexer.IDENTIFIER, '_' * lexer.digit^1)) + +-- Strings. +local sq_str = lexer.range("'") +local dq_str = lexer.range('"') +local tq_str = lexer.range('"""') +lex:add_rule('string', token(lexer.STRING, sq_str + tq_str + dq_str)) + +-- Numbers. +local function num(digit) return digit * (digit^0 * '_')^0 * digit^1 + digit end +local int = num(lexer.digit) +local frac = '.' * int +local exp = S('eE') * (P('-') + '+')^-1 * int +local hex = '0x' * num(lexer.xdigit) +local bin = '0b' * num(S('01')) +local float = int * frac^-1 * exp^-1 +lex:add_rule('number', token(lexer.NUMBER, hex + bin + float)) + +-- Comments. +local line_comment = lexer.to_eol('//') +local block_comment = lexer.range('/*', '*/', false, false, true) +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) + +-- Punctuation. +-- There is no suitable token name for this, change this if ever one is added. +lex:add_rule('punctuation', + token(lexer.OPERATOR, P('=>') + '.>' + '<:' + '->' + S('=.,:;()[]{}!?~^&|_@'))) + +-- Qualifiers. +lex:add_rule('qualifier', token(lexer.LABEL, '#' * word_match('read send share any alias'))) + +return lex |
