From 3cab10e3a5d2363352be8256e9872bbc08cbbce7 Mon Sep 17 00:00:00 2001 From: stutonk Date: Sun, 19 Mar 2017 01:13:35 -0400 Subject: Fix errors and add ANS Forth 2012 keywords Added all ANS Forth 2012 keywords as defined at http://lars.nocrew.org/forth2012/core.html and removed keywords that were not part of the standard. This necessitated rewriting most of the Strings rules as well as removing some rules not consistent with the standard. Only the s\" form should allow escaping. The list of characters which may appear as part of a keyword has also been expanded where appropriate. Because '.' is a keyword as well as the first chatacter in a string pattern, strings must now be given parsing precedence over keywords to ensure proper highlighting. A few errors were also fixed such as moving the true (which should make keywords case-insensitive) within the word_match function's closing paren. Parens have been removed from the operator list and moved to their correct place as the delimiters for block comments. --- lua/lexers/forth.lua | 48 +++++++++++++++++++++++++++++++----------------- 1 file changed, 31 insertions(+), 17 deletions(-) diff --git a/lua/lexers/forth.lua b/lua/lexers/forth.lua index 9471740..17d67b5 100644 --- a/lua/lexers/forth.lua +++ b/lua/lexers/forth.lua @@ -12,42 +12,56 @@ local ws = token(l.WHITESPACE, l.space^1) -- Comments. local line_comment = S('|\\') * l.nonnewline^0 -local block_comment = '(*' * (l.any - '*)')^0 * P('*)')^-1 +local block_comment = '(' * (l.any - ')')^0 * P(')')^-1 local comment = token(l.COMMENT, line_comment + block_comment) -- Strings. +local c_str = 'c' * l.delimited_range('"', true, true) local s_str = 's' * l.delimited_range('"', true, true) +local s_bs_str = 's\\' * l.delimited_range('"', true, false) local dot_str = '.' * l.delimited_range('"', true, true) -local f_str = 'f' * l.delimited_range('"', true, true) -local dq_str = l.delimited_range('"', true, true) -local string = token(l.STRING, s_str + dot_str + f_str + dq_str) +local dot_paren_str = '.' * l.delimited_range('()', true, true, false) +local abort_str = 'abort' * l.delimited_range('"', true, true) +local string = token( + l.STRING, + c_str + s_str + s_bs_str + dot_str + dot_paren_str + abort_str +) -- Numbers. local number = token(l.NUMBER, P('-')^-1 * l.digit^1 * (S('./') * l.digit^1)^-1) -- Keywords. local keyword = token(l.KEYWORD, word_match({ - 'swap', 'drop', 'dup', 'nip', 'over', 'rot', '-rot', '2dup', '2drop', '2over', - '2swap', '>r', 'r>', - 'and', 'or', 'xor', '>>', '<<', 'not', 'negate', 'mod', '/mod', '1+', '1-', - 'base', 'hex', 'decimal', 'binary', 'octal', - '@', '!', 'c@', 'c!', '+!', 'cell+', 'cells', 'char+', 'chars', - 'create', 'does>', 'variable', 'variable,', 'literal', 'last', '1,', '2,', - '3,', ',', 'here', 'allot', 'parse', 'find', 'compile', - -- Operators. - 'if', '=if', 'if', '<>if', 'then', 'repeat', 'until', 'forth', 'macro' -}, '2><1-@!+3,=')) + '#>', '#s', '*/', '*/mod', '+loop', ',', '.', '.r', '/mod', '0<', '0<>', + '0>', '0=', '1+', '1-', '2!', '2*', '2/', '2>r', '2@', '2drop', '2dup', + '2over', '2r>', '2r@', '2swap', ':noname', '<#', '<>', '>body', '>in', + '>number', '>r', '?do','?dup', '@', 'abort', 'abs', 'accept', 'action-of', + 'again', 'align', 'aligned', 'allot', 'and', 'base', 'begin', 'bl', + 'buffer:', 'c!', 'c,', 'c@', 'case', 'cell+', 'cells', 'char', 'char+', + 'chars', 'compile,', 'constant', 'count', 'cr', 'create', 'decimal', 'defer', + 'defer!', 'defer@', 'depth', 'do', 'does>', 'drop', 'dup', 'else', 'emit', + 'endcase', 'endof', 'environment?', 'erase', 'evaluate', 'execute', 'exit', + 'false', 'fill', 'find', 'fm/mod', 'here', 'hex', 'hold', 'holds', 'i', 'if', + 'immediate', 'invert', 'is', 'j', 'key', 'leave', 'literal', 'loop', + 'lshift', 'm*', 'marker', 'max', 'min', 'mod', 'move', 'negate', 'nip', 'of', + 'or', 'over', 'pad', 'parse', 'parse-name', 'pick', 'postpone', 'quit', 'r>', + 'r@', 'recurse', 'refill', 'restore-input', 'roll', 'rot', 'rshift', 's>d', + 'save-input', 'sign', 'sm/rem', 'source', 'source-id', 'space', 'spaces', + 'state', 'swap', 'to', 'then', 'true', 'tuck', 'type', 'u.', 'u.r', 'u>', + 'u<', 'um*', 'um/mod', 'unloop', 'until', 'unused', 'value', 'variable', + 'while', 'within', 'word', 'xor', '[\']', '[char]', '[compile]' +}, '><-@!?+,=[].\'', true)) -- Identifiers. -local identifier = token(l.IDENTIFIER, (l.alnum + S('+-*=<>.?/\'%,_$'))^1) +local identifier = token(l.IDENTIFIER, (l.alnum + S('+-*=<>.?/\'%,_$#'))^1) -- Operators. -local operator = token(l.OPERATOR, S(':;<>+*-/()[]')) +local operator = token(l.OPERATOR, S(':;<>+*-/[]#')) M._rules = { {'whitespace', ws}, - {'keyword', keyword}, {'string', string}, + {'keyword', keyword}, {'identifier', identifier}, {'comment', comment}, {'number', number}, -- cgit v1.2.3