diff options
Diffstat (limited to 'lua/lexers/python.lua')
| -rw-r--r-- | lua/lexers/python.lua | 177 |
1 files changed, 74 insertions, 103 deletions
diff --git a/lua/lexers/python.lua b/lua/lexers/python.lua index 63e2e82..901ccba 100644 --- a/lua/lexers/python.lua +++ b/lua/lexers/python.lua @@ -1,135 +1,106 @@ --- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Copyright 2006-2022 Mitchell. See LICENSE. -- Python LPeg lexer. -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S = lpeg.P, lpeg.R, lpeg.S +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'python'} +local lex = lexer.new('python', {fold_by_indentation = true}) -- Whitespace. -local ws = token(l.WHITESPACE, l.space^1) +local ws = token(lexer.WHITESPACE, lexer.space^1) +lex:add_rule('whitespace', ws) --- Comments. -local comment = token(l.COMMENT, '#' * l.nonnewline_esc^0) - --- Strings. -local sq_str = P('u')^-1 * l.delimited_range("'", true) -local dq_str = P('U')^-1 * l.delimited_range('"', true) -local triple_sq_str = "'''" * (l.any - "'''")^0 * P("'''")^-1 -local triple_dq_str = '"""' * (l.any - '"""')^0 * P('"""')^-1 --- TODO: raw_strs cannot end in single \. -local raw_sq_str = P('u')^-1 * 'r' * l.delimited_range("'", false, true) -local raw_dq_str = P('U')^-1 * 'R' * l.delimited_range('"', false, true) -local string = token(l.STRING, triple_sq_str + triple_dq_str + sq_str + dq_str + - raw_sq_str + raw_dq_str) - --- Numbers. -local dec = l.digit^1 * S('Ll')^-1 -local bin = '0b' * S('01')^1 * ('_' * S('01')^1)^0 -local oct = '0' * R('07')^1 * S('Ll')^-1 -local integer = S('+-')^-1 * (bin + l.hex_num + oct + dec) -local number = token(l.NUMBER, l.float + integer) +-- Classes. +lex:add_rule('classdef', token(lexer.KEYWORD, 'class') * ws * token(lexer.CLASS, lexer.word)) -- Keywords. -local keyword = token(l.KEYWORD, word_match{ - 'async', 'await', - 'and', 'as', 'assert', 'break', 'class', 'continue', 'def', 'del', 'elif', - 'else', 'except', 'exec', 'finally', 'for', 'from', 'global', 'if', 'import', - 'in', 'is', 'lambda', 'nonlocal', 'not', 'or', 'pass', 'print', 'raise', - 'return', 'try', 'while', 'with', 'yield', +lex:add_rule('keyword', token(lexer.KEYWORD, word_match{ + 'and', 'as', 'assert', 'async', 'await', 'break', 'continue', 'def', 'del', 'elif', 'else', + 'except', 'exec', 'finally', 'for', 'from', 'global', 'if', 'import', 'in', 'is', 'lambda', + 'nonlocal', 'not', 'or', 'pass', 'print', 'raise', 'return', 'try', 'while', 'with', 'yield', -- Descriptors/attr access. '__get__', '__set__', '__delete__', '__slots__', -- Class. - '__new__', '__init__', '__del__', '__repr__', '__str__', '__cmp__', - '__index__', '__lt__', '__le__', '__gt__', '__ge__', '__eq__', '__ne__', - '__hash__', '__nonzero__', '__getattr__', '__getattribute__', '__setattr__', - '__delattr__', '__call__', + '__new__', '__init__', '__del__', '__repr__', '__str__', '__cmp__', '__index__', '__lt__', + '__le__', '__gt__', '__ge__', '__eq__', '__ne__', '__hash__', '__nonzero__', '__getattr__', + '__getattribute__', '__setattr__', '__delattr__', '__call__', -- Operator. - '__add__', '__sub__', '__mul__', '__div__', '__floordiv__', '__mod__', - '__divmod__', '__pow__', '__and__', '__xor__', '__or__', '__lshift__', - '__rshift__', '__nonzero__', '__neg__', '__pos__', '__abs__', '__invert__', - '__iadd__', '__isub__', '__imul__', '__idiv__', '__ifloordiv__', '__imod__', - '__ipow__', '__iand__', '__ixor__', '__ior__', '__ilshift__', '__irshift__', + '__add__', '__sub__', '__mul__', '__div__', '__floordiv__', '__mod__', '__divmod__', '__pow__', + '__and__', '__xor__', '__or__', '__lshift__', '__rshift__', '__nonzero__', '__neg__', '__pos__', + '__abs__', '__invert__', '__iadd__', '__isub__', '__imul__', '__idiv__', '__ifloordiv__', + '__imod__', '__ipow__', '__iand__', '__ixor__', '__ior__', '__ilshift__', '__irshift__', -- Conversions. - '__int__', '__long__', '__float__', '__complex__', '__oct__', '__hex__', - '__coerce__', + '__int__', '__long__', '__float__', '__complex__', '__oct__', '__hex__', '__coerce__', -- Containers. - '__len__', '__getitem__', '__missing__', '__setitem__', '__delitem__', - '__contains__', '__iter__', '__getslice__', '__setslice__', '__delslice__', + '__len__', '__getitem__', '__missing__', '__setitem__', '__delitem__', '__contains__', '__iter__', + '__getslice__', '__setslice__', '__delslice__', -- Module and class attribs. - '__doc__', '__name__', '__dict__', '__file__', '__path__', '__module__', - '__bases__', '__class__', '__self__', + '__doc__', '__name__', '__dict__', '__file__', '__path__', '__module__', '__bases__', '__class__', + '__self__', -- Stdlib/sys. - '__builtin__', '__future__', '__main__', '__import__', '__stdin__', - '__stdout__', '__stderr__', + '__builtin__', '__future__', '__main__', '__import__', '__stdin__', '__stdout__', '__stderr__', -- Other. '__debug__', '__doc__', '__import__', '__name__' -}) +})) -- Functions. -local func = token(l.FUNCTION, word_match{ - 'abs', 'all', 'any', 'apply', 'basestring', 'bool', 'buffer', 'callable', - 'chr', 'classmethod', 'cmp', 'coerce', 'compile', 'complex', 'copyright', - 'credits', 'delattr', 'dict', 'dir', 'divmod', 'enumerate', 'eval', - 'execfile', 'exit', 'file', 'filter', 'float', 'frozenset', 'getattr', - 'globals', 'hasattr', 'hash', 'help', 'hex', 'id', 'input', 'int', 'intern', - 'isinstance', 'issubclass', 'iter', 'len', 'license', 'list', 'locals', - 'long', 'map', 'max', 'min', 'object', 'oct', 'open', 'ord', 'pow', - 'property', 'quit', 'range', 'raw_input', 'reduce', 'reload', 'repr', - 'reversed', 'round', 'set', 'setattr', 'slice', 'sorted', 'staticmethod', - 'str', 'sum', 'super', 'tuple', 'type', 'unichr', 'unicode', 'vars', 'xrange', - 'zip' -}) +lex:add_rule('function', token(lexer.FUNCTION, word_match{ + 'abs', 'all', 'any', 'apply', 'basestring', 'bool', 'buffer', 'callable', 'chr', 'classmethod', + 'cmp', 'coerce', 'compile', 'complex', 'copyright', 'credits', 'delattr', 'dict', 'dir', 'divmod', + 'enumerate', 'eval', 'execfile', 'exit', 'file', 'filter', 'float', 'frozenset', 'getattr', + 'globals', 'hasattr', 'hash', 'help', 'hex', 'id', 'input', 'int', 'intern', 'isinstance', + 'issubclass', 'iter', 'len', 'license', 'list', 'locals', 'long', 'map', 'max', 'min', 'object', + 'oct', 'open', 'ord', 'pow', 'property', 'quit', 'range', 'raw_input', 'reduce', 'reload', 'repr', + 'reversed', 'round', 'set', 'setattr', 'slice', 'sorted', 'staticmethod', 'str', 'sum', 'super', + 'tuple', 'type', 'unichr', 'unicode', 'vars', 'xrange', 'zip' +})) -- Constants. -local constant = token(l.CONSTANT, word_match{ - 'ArithmeticError', 'AssertionError', 'AttributeError', 'BaseException', - 'DeprecationWarning', 'EOFError', 'Ellipsis', 'EnvironmentError', 'Exception', - 'False', 'FloatingPointError', 'FutureWarning', 'GeneratorExit', 'IOError', - 'ImportError', 'ImportWarning', 'IndentationError', 'IndexError', 'KeyError', - 'KeyboardInterrupt', 'LookupError', 'MemoryError', 'NameError', 'None', - 'NotImplemented', 'NotImplementedError', 'OSError', 'OverflowError', - 'PendingDeprecationWarning', 'ReferenceError', 'RuntimeError', - 'RuntimeWarning', 'StandardError', 'StopIteration', 'SyntaxError', - 'SyntaxWarning', 'SystemError', 'SystemExit', 'TabError', 'True', 'TypeError', - 'UnboundLocalError', 'UnicodeDecodeError', 'UnicodeEncodeError', - 'UnicodeError', 'UnicodeTranslateError', 'UnicodeWarning', 'UserWarning', - 'ValueError', 'Warning', 'ZeroDivisionError' -}) +lex:add_rule('constant', token(lexer.CONSTANT, word_match{ + 'ArithmeticError', 'AssertionError', 'AttributeError', 'BaseException', 'DeprecationWarning', + 'EOFError', 'Ellipsis', 'EnvironmentError', 'Exception', 'False', 'FloatingPointError', + 'FutureWarning', 'GeneratorExit', 'IOError', 'ImportError', 'ImportWarning', 'IndentationError', + 'IndexError', 'KeyError', 'KeyboardInterrupt', 'LookupError', 'MemoryError', 'NameError', 'None', + 'NotImplemented', 'NotImplementedError', 'OSError', 'OverflowError', 'PendingDeprecationWarning', + 'ReferenceError', 'RuntimeError', 'RuntimeWarning', 'StandardError', 'StopIteration', + 'SyntaxError', 'SyntaxWarning', 'SystemError', 'SystemExit', 'TabError', 'True', 'TypeError', + 'UnboundLocalError', 'UnicodeDecodeError', 'UnicodeEncodeError', 'UnicodeError', + 'UnicodeTranslateError', 'UnicodeWarning', 'UserWarning', 'ValueError', 'Warning', + 'ZeroDivisionError' +})) -- Self. -local self = token('self', P('self')) +lex:add_rule('self', token('self', 'self')) +lex:add_style('self', lexer.styles.type) -- Identifiers. -local identifier = token(l.IDENTIFIER, l.word) - --- Operators. -local operator = token(l.OPERATOR, S('!%^&*()[]{}-=+/|:;.,?<>~`')) +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) --- Decorators. -local decorator = token('decorator', l.starts_line('@') * l.nonnewline^0) +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#', true))) -M._rules = { - {'whitespace', ws}, - {'keyword', keyword}, - {'function', func}, - {'constant', constant}, - {'self', self}, - {'identifier', identifier}, - {'comment', comment}, - {'string', string}, - {'number', number}, - {'decorator', decorator}, - {'operator', operator}, -} +-- Strings. +local sq_str = P('u')^-1 * lexer.range("'", true) +local dq_str = P('U')^-1 * lexer.range('"', true) +local tq_str = lexer.range("'''") + lexer.range('"""') +-- TODO: raw_strs cannot end in single \. +local raw_sq_str = P('u')^-1 * 'r' * lexer.range("'", false, false) +local raw_dq_str = P('U')^-1 * 'R' * lexer.range('"', false, false) +lex:add_rule('string', token(lexer.STRING, tq_str + sq_str + dq_str + raw_sq_str + raw_dq_str)) +-- Numbers. +local dec = lexer.dec_num * S('Ll')^-1 +local bin = '0b' * S('01')^1 * ('_' * S('01')^1)^0 +local oct = lexer.oct_num * S('Ll')^-1 +local integer = S('+-')^-1 * (bin + lexer.hex_num + oct + dec) +lex:add_rule('number', token(lexer.NUMBER, lexer.float + integer)) -M._tokenstyles = { - self = l.STYLE_TYPE, - decorator = l.STYLE_PREPROCESSOR -} +-- Decorators. +lex:add_rule('decorator', token('decorator', lexer.to_eol('@'))) +lex:add_style('decorator', lexer.styles.preprocessor) -M._FOLDBYINDENTATION = true +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('!%^&*()[]{}-=+/|:;.,?<>~`'))) -return M +return lex |
