aboutsummaryrefslogtreecommitdiff
path: root/lua/lexers/python.lua
diff options
context:
space:
mode:
authorMatěj Cepl <mcepl@cepl.eu>2023-08-11 01:27:32 +0200
committerRandy Palamar <randy@rnpnr.xyz>2024-03-27 06:04:21 -0600
commit4c4392d29df777ff702dfe99b4f3c23142976e05 (patch)
tree5355324abe18952f7d19d6cfc5dbeb5d6cb72b84 /lua/lexers/python.lua
parent95bf9f59f8a9a37148bdc0787db378d62c7cd032 (diff)
downloadvis-4c4392d29df777ff702dfe99b4f3c23142976e05.tar.gz
vis-4c4392d29df777ff702dfe99b4f3c23142976e05.tar.xz
update lexers to orbitalquark/scintillua@b789dde
Rather than cherry pick patches from after 6.2 we will just grab everything as is.
Diffstat (limited to 'lua/lexers/python.lua')
-rw-r--r--lua/lexers/python.lua186
1 files changed, 104 insertions, 82 deletions
diff --git a/lua/lexers/python.lua b/lua/lexers/python.lua
index 901ccba..b870502 100644
--- a/lua/lexers/python.lua
+++ b/lua/lexers/python.lua
@@ -1,106 +1,128 @@
--- Copyright 2006-2022 Mitchell. See LICENSE.
+-- Copyright 2006-2024 Mitchell. See LICENSE.
-- Python LPeg lexer.
-local lexer = require('lexer')
+local lexer = lexer
local token, word_match = lexer.token, lexer.word_match
-local P, S = lpeg.P, lpeg.S
+local P, S, B = lpeg.P, lpeg.S, lpeg.B
-local lex = lexer.new('python', {fold_by_indentation = true})
-
--- Whitespace.
-local ws = token(lexer.WHITESPACE, lexer.space^1)
-lex:add_rule('whitespace', ws)
+local lex = lexer.new(..., {fold_by_indentation = true})
-- Classes.
-lex:add_rule('classdef', token(lexer.KEYWORD, 'class') * ws * token(lexer.CLASS, lexer.word))
+lex:add_rule('classdef', lex:tag(lexer.KEYWORD, 'class') * lex:get_rule('whitespace') *
+ lex:tag(lexer.CLASS, lexer.word))
-- Keywords.
-lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
- 'and', 'as', 'assert', 'async', 'await', 'break', 'continue', 'def', 'del', 'elif', 'else',
- 'except', 'exec', 'finally', 'for', 'from', 'global', 'if', 'import', 'in', 'is', 'lambda',
- 'nonlocal', 'not', 'or', 'pass', 'print', 'raise', 'return', 'try', 'while', 'with', 'yield',
- -- Descriptors/attr access.
- '__get__', '__set__', '__delete__', '__slots__',
- -- Class.
- '__new__', '__init__', '__del__', '__repr__', '__str__', '__cmp__', '__index__', '__lt__',
- '__le__', '__gt__', '__ge__', '__eq__', '__ne__', '__hash__', '__nonzero__', '__getattr__',
- '__getattribute__', '__setattr__', '__delattr__', '__call__',
- -- Operator.
- '__add__', '__sub__', '__mul__', '__div__', '__floordiv__', '__mod__', '__divmod__', '__pow__',
- '__and__', '__xor__', '__or__', '__lshift__', '__rshift__', '__nonzero__', '__neg__', '__pos__',
- '__abs__', '__invert__', '__iadd__', '__isub__', '__imul__', '__idiv__', '__ifloordiv__',
- '__imod__', '__ipow__', '__iand__', '__ixor__', '__ior__', '__ilshift__', '__irshift__',
- -- Conversions.
- '__int__', '__long__', '__float__', '__complex__', '__oct__', '__hex__', '__coerce__',
- -- Containers.
- '__len__', '__getitem__', '__missing__', '__setitem__', '__delitem__', '__contains__', '__iter__',
- '__getslice__', '__setslice__', '__delslice__',
- -- Module and class attribs.
- '__doc__', '__name__', '__dict__', '__file__', '__path__', '__module__', '__bases__', '__class__',
- '__self__',
- -- Stdlib/sys.
- '__builtin__', '__future__', '__main__', '__import__', '__stdin__', '__stdout__', '__stderr__',
- -- Other.
- '__debug__', '__doc__', '__import__', '__name__'
-}))
+lex:add_rule('keyword', lex:tag(lexer.KEYWORD, lex:word_match(lexer.KEYWORD)) +
+ lex:tag(lexer.KEYWORD .. '.soft', lex:word_match(lexer.KEYWORD .. '.soft')))
-- Functions.
-lex:add_rule('function', token(lexer.FUNCTION, word_match{
- 'abs', 'all', 'any', 'apply', 'basestring', 'bool', 'buffer', 'callable', 'chr', 'classmethod',
- 'cmp', 'coerce', 'compile', 'complex', 'copyright', 'credits', 'delattr', 'dict', 'dir', 'divmod',
- 'enumerate', 'eval', 'execfile', 'exit', 'file', 'filter', 'float', 'frozenset', 'getattr',
- 'globals', 'hasattr', 'hash', 'help', 'hex', 'id', 'input', 'int', 'intern', 'isinstance',
- 'issubclass', 'iter', 'len', 'license', 'list', 'locals', 'long', 'map', 'max', 'min', 'object',
- 'oct', 'open', 'ord', 'pow', 'property', 'quit', 'range', 'raw_input', 'reduce', 'reload', 'repr',
- 'reversed', 'round', 'set', 'setattr', 'slice', 'sorted', 'staticmethod', 'str', 'sum', 'super',
- 'tuple', 'type', 'unichr', 'unicode', 'vars', 'xrange', 'zip'
-}))
+local builtin_func = -B('.') *
+ lex:tag(lexer.FUNCTION_BUILTIN, lex:word_match(lexer.FUNCTION_BUILTIN))
+local special_func = lex:tag(lexer.FUNCTION_BUILTIN .. '.special',
+ lex:word_match(lexer.FUNCTION_BUILTIN .. '.special'))
+local func = lex:tag(lexer.FUNCTION, lexer.word)
+local method = B('.') * lex:tag(lexer.FUNCTION_METHOD, lexer.word)
+lex:add_rule('function', (builtin_func + special_func + method + func) * #(lexer.space^0 * '('))
-- Constants.
-lex:add_rule('constant', token(lexer.CONSTANT, word_match{
- 'ArithmeticError', 'AssertionError', 'AttributeError', 'BaseException', 'DeprecationWarning',
- 'EOFError', 'Ellipsis', 'EnvironmentError', 'Exception', 'False', 'FloatingPointError',
- 'FutureWarning', 'GeneratorExit', 'IOError', 'ImportError', 'ImportWarning', 'IndentationError',
- 'IndexError', 'KeyError', 'KeyboardInterrupt', 'LookupError', 'MemoryError', 'NameError', 'None',
- 'NotImplemented', 'NotImplementedError', 'OSError', 'OverflowError', 'PendingDeprecationWarning',
- 'ReferenceError', 'RuntimeError', 'RuntimeWarning', 'StandardError', 'StopIteration',
- 'SyntaxError', 'SyntaxWarning', 'SystemError', 'SystemExit', 'TabError', 'True', 'TypeError',
- 'UnboundLocalError', 'UnicodeDecodeError', 'UnicodeEncodeError', 'UnicodeError',
- 'UnicodeTranslateError', 'UnicodeWarning', 'UserWarning', 'ValueError', 'Warning',
- 'ZeroDivisionError'
-}))
-
--- Self.
-lex:add_rule('self', token('self', 'self'))
-lex:add_style('self', lexer.styles.type)
+local builtin_const = lex:tag(lexer.CONSTANT_BUILTIN, lex:word_match(lexer.CONSTANT_BUILTIN))
+local attr = lex:tag(lexer.ATTRIBUTE, B('.') * lex:word_match(lexer.ATTRIBUTE) + '__name__')
+lex:add_rule('constant', builtin_const + attr)
+
+-- Strings.
+local sq_str = lexer.range("'", true)
+local dq_str = lexer.range('"', true)
+local tq_str = lexer.range("'''") + lexer.range('"""')
+lex:add_rule('string', lex:tag(lexer.STRING, (S('fFrRbBrR') * S('rRfFrRbB') + S('ruRUfFbB'))^-1 *
+ (tq_str + sq_str + dq_str)))
-- Identifiers.
-lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
+lex:add_rule('identifier', lex:tag(lexer.IDENTIFIER, lexer.word))
-- Comments.
-lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#', true)))
-
--- Strings.
-local sq_str = P('u')^-1 * lexer.range("'", true)
-local dq_str = P('U')^-1 * lexer.range('"', true)
-local tq_str = lexer.range("'''") + lexer.range('"""')
--- TODO: raw_strs cannot end in single \.
-local raw_sq_str = P('u')^-1 * 'r' * lexer.range("'", false, false)
-local raw_dq_str = P('U')^-1 * 'R' * lexer.range('"', false, false)
-lex:add_rule('string', token(lexer.STRING, tq_str + sq_str + dq_str + raw_sq_str + raw_dq_str))
+lex:add_rule('comment', lex:tag(lexer.COMMENT, lexer.to_eol('#', true)))
-- Numbers.
-local dec = lexer.dec_num * S('Ll')^-1
-local bin = '0b' * S('01')^1 * ('_' * S('01')^1)^0
-local oct = lexer.oct_num * S('Ll')^-1
-local integer = S('+-')^-1 * (bin + lexer.hex_num + oct + dec)
-lex:add_rule('number', token(lexer.NUMBER, lexer.float + integer))
+lex:add_rule('number', lex:tag(lexer.NUMBER, lexer.number_('_') * S('jJ')^-1))
-- Decorators.
-lex:add_rule('decorator', token('decorator', lexer.to_eol('@')))
-lex:add_style('decorator', lexer.styles.preprocessor)
+lex:add_rule('decorator', lex:tag(lexer.ANNOTATION, '@' * lexer.word))
-- Operators.
-lex:add_rule('operator', token(lexer.OPERATOR, S('!%^&*()[]{}-=+/|:;.,?<>~`')))
+lex:add_rule('operator', lex:tag(lexer.OPERATOR, S('!@%^&*()[]{}-=+/|:;.,<>~')))
+
+-- Word lists.
+lex:set_word_list(lexer.KEYWORD, {
+ 'and', 'as', 'assert', 'async', 'await', 'break', 'class', 'continue', 'def', 'del', 'elif',
+ 'else', 'except', 'False', 'finally', 'for', 'from', 'global', 'if', 'import', 'in', 'is',
+ 'lambda', 'None', 'nonlocal', 'not', 'or', 'pass', 'raise', 'return', 'True', 'try', 'while',
+ 'with', 'yield'
+})
+
+lex:set_word_list(lexer.KEYWORD .. '.soft', '_ case match')
+
+lex:set_word_list(lexer.FUNCTION_BUILTIN, {
+ 'abs', 'aiter', 'all', 'any', 'anext', 'ascii', 'bin', 'bool', 'breakpoint', 'bytearray', 'bytes',
+ 'callable', 'chr', 'classmethod', 'compile', 'complex', 'delattr', 'dict', 'dir', 'divmod',
+ 'enumerate', 'eval', 'exec', 'filter', 'float', 'format', 'frozenset', 'getattr', 'globals',
+ 'hasattr', 'hash', 'help', 'hex', 'id', 'input', 'int', 'isinstance', 'issubclass', 'iter', 'len',
+ 'list', 'locals', 'map', 'max', 'memoryview', 'min', 'next', 'object', 'oct', 'open', 'ord',
+ 'pow', 'print', 'property', 'range', 'repr', 'reversed', 'round', 'set', 'setattr', 'slice',
+ 'sorted', 'staticmethod', 'str', 'sum', 'super', 'tuple', 'type', 'vars', 'zip', '__import__'
+})
+
+lex:set_word_list(lexer.FUNCTION_BUILTIN .. '.special', {
+ '__new__', '__init__', '__del__', '__repr__', '__str__', '__bytes', '__format__', '__lt__',
+ '__le__', '__eq__', '__ne__', '__gt__', '__ge__', '__hash__', '__bool__', --
+ '__getattr__', '__getattribute__', '__setattr__', '__delattr__', '__dir__', --
+ '__get__', '__set__', '__delete__', '__slots__', --
+ '__init_subclass__', '__set_name__', --
+ '__instancecheck__', '__subclasscheck__', --
+ '__class_getitem__', --
+ '__call__', --
+ '__len__', '__length_hint', '__getitem__', '__setitem__', '__delitem__', '__missing__',
+ '__iter__', '__reversed__', '__contains__', --
+ '__add__', '__sub__', '__mul__', '__matmul__', '__truediv__', '__floordiv__', '__mod__',
+ '__divmod__', '__pow__', '__lshift__', '__rshift__', '__and__', '__xor__', '__or__', --
+ '__radd__', '__rsub__', '__rmul__', '__rmatmul__', '__rtruediv__', '__rfloordiv__', '__rmod__',
+ '__rdivmod__', '__rpow__', '__rlshift__', '__rrshift__', '__rand__', '__rxor__', '__ror__', --
+ '__iadd__', '__isub__', '__imul__', '__imatmul__', '__itruediv__', '__ifloordiv__', '__imod__',
+ '__idivmod__', '__ipow__', '__ilshift__', '__irshift__', '__iand__', '__ixor__', '__ior__', --
+ '__neg__', '__pos__', '__abs__', '__invert__', '__complex__', '__int__', '__float__', '__index__',
+ '__round__', '__trunc__', '__floor__', '__ceil__', --
+ '__enter__', '__exit__', --
+ '__match_args__', --
+ '__await__', --
+ '__aiter__', '__anext__', '__aenter__', '__aexit__' --
+})
+
+lex:set_word_list(lexer.CONSTANT_BUILTIN, {
+ 'BaseException', 'Exception', 'Exception', 'ArithmeticError', 'BufferError', 'LookupError', --
+ 'AssertionError', 'AttributeError', 'EOFError', 'FloatingPointError', 'GeneratorExit',
+ 'ImportError', 'ModuleNotFoundError', 'IndexError', 'KeyError', 'KeyboardInterrupt',
+ 'MemoryError', 'NameError', 'NotImplementedError', 'OSError', 'OverflowError', 'RecursionError',
+ 'ReferenceError', 'RuntimeError', 'StopIteration', 'StopAsyncIteration', 'SyntaxError',
+ 'IndentationError', 'TabError', 'SystemError', 'SystemExit', 'TypeError', 'UnboundLocalError',
+ 'UnicodeError', 'UnicodeEncodeError', 'UnicodeDecodeError', 'UnicodeTranslateError', 'ValueError',
+ 'ZeroDivisionError', --
+ 'EnvironmentError', 'IOError', 'WindowsError', --
+ 'BlockingIOError', 'ChildProcessError', 'ConnectionError', 'BrokenPipeError',
+ 'ConnectionAbortedError', 'ConnectionRefusedError', 'FileExistsError', 'FileNotFoundError',
+ 'InterruptedError', 'IsADirectoryError', 'NotADirectoryError', 'PermissionError',
+ 'ProcessLookupError', 'TimeoutError', --
+ 'Warning', 'UserWarning', 'DeprecationWarning', 'PendingDeprecationWarning', 'SyntaxWarning',
+ 'RuntimeWarning', 'FutureWarning', 'ImportWarning', 'UnicodeWarning', 'BytesWarning',
+ 'ResourceWarning'
+})
+
+lex:set_word_list(lexer.ATTRIBUTE, {
+ '__doc__', '__name__', '__qualname__', '__module__', '__defaults__', '__code__', '__globals__',
+ '__dict__', '__closure__', '__annotations__', '__kwdefaults__', --
+ '__file__', '__bases__', --
+ '__class__', --
+ '__self__', '__func__' --
+})
+
+lexer.property['scintillua.comment'] = '#'
return lex