1 files changed, 74 insertions, 103 deletions
diff --git a/lua/lexers/python.lua b/lua/lexers/python.lua
index 63e2e82..901ccba 100644
--- a/lua/lexers/python.lua
+++ b/lua/lexers/python.lua
@@ -1,135 +1,106 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
 -- Python LPeg lexer.
 
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
 
-local M = {_NAME = 'python'}
+local lex = lexer.new('python', {fold_by_indentation = true})
 
 -- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
+local ws = token(lexer.WHITESPACE, lexer.space^1)
+lex:add_rule('whitespace', ws)
 
--- Comments.
-local comment = token(l.COMMENT, '#' * l.nonnewline_esc^0)
-
--- Strings.
-local sq_str = P('u')^-1 * l.delimited_range("'", true)
-local dq_str = P('U')^-1 * l.delimited_range('"', true)
-local triple_sq_str = "'''" * (l.any - "'''")^0 * P("'''")^-1
-local triple_dq_str = '"""' * (l.any - '"""')^0 * P('"""')^-1
--- TODO: raw_strs cannot end in single \.
-local raw_sq_str = P('u')^-1 * 'r' * l.delimited_range("'", false, true)
-local raw_dq_str = P('U')^-1 * 'R' * l.delimited_range('"', false, true)
-local string = token(l.STRING, triple_sq_str + triple_dq_str + sq_str + dq_str +
-                               raw_sq_str + raw_dq_str)
-
--- Numbers.
-local dec = l.digit^1 * S('Ll')^-1
-local bin = '0b' * S('01')^1 * ('_' * S('01')^1)^0
-local oct = '0' * R('07')^1 * S('Ll')^-1
-local integer = S('+-')^-1 * (bin + l.hex_num + oct + dec)
-local number = token(l.NUMBER, l.float + integer)
+-- Classes.
+lex:add_rule('classdef', token(lexer.KEYWORD, 'class') * ws * token(lexer.CLASS, lexer.word))
 
 -- Keywords.
-local keyword = token(l.KEYWORD, word_match{
-  'async', 'await',
-  'and', 'as', 'assert', 'break', 'class', 'continue', 'def', 'del', 'elif',
-  'else', 'except', 'exec', 'finally', 'for', 'from', 'global', 'if', 'import',
-  'in', 'is', 'lambda', 'nonlocal', 'not', 'or', 'pass', 'print', 'raise',
-  'return', 'try', 'while', 'with', 'yield',
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+  'and', 'as', 'assert', 'async', 'await', 'break', 'continue', 'def', 'del', 'elif', 'else',
+  'except', 'exec', 'finally', 'for', 'from', 'global', 'if', 'import', 'in', 'is', 'lambda',
+  'nonlocal', 'not', 'or', 'pass', 'print', 'raise', 'return', 'try', 'while', 'with', 'yield',
   -- Descriptors/attr access.
   '__get__', '__set__', '__delete__', '__slots__',
   -- Class.
-  '__new__', '__init__', '__del__', '__repr__', '__str__', '__cmp__',
-  '__index__', '__lt__', '__le__', '__gt__', '__ge__', '__eq__', '__ne__',
-  '__hash__', '__nonzero__', '__getattr__', '__getattribute__', '__setattr__',
-  '__delattr__', '__call__',
+  '__new__', '__init__', '__del__', '__repr__', '__str__', '__cmp__', '__index__', '__lt__',
+  '__le__', '__gt__', '__ge__', '__eq__', '__ne__', '__hash__', '__nonzero__', '__getattr__',
+  '__getattribute__', '__setattr__', '__delattr__', '__call__',
   -- Operator.
-  '__add__', '__sub__', '__mul__', '__div__', '__floordiv__', '__mod__',
-  '__divmod__', '__pow__', '__and__', '__xor__', '__or__', '__lshift__',
-  '__rshift__', '__nonzero__', '__neg__', '__pos__', '__abs__', '__invert__',
-  '__iadd__', '__isub__', '__imul__', '__idiv__', '__ifloordiv__', '__imod__',
-  '__ipow__', '__iand__', '__ixor__', '__ior__', '__ilshift__', '__irshift__',
+  '__add__', '__sub__', '__mul__', '__div__', '__floordiv__', '__mod__', '__divmod__', '__pow__',
+  '__and__', '__xor__', '__or__', '__lshift__', '__rshift__', '__nonzero__', '__neg__', '__pos__',
+  '__abs__', '__invert__', '__iadd__', '__isub__', '__imul__', '__idiv__', '__ifloordiv__',
+  '__imod__', '__ipow__', '__iand__', '__ixor__', '__ior__', '__ilshift__', '__irshift__',
   -- Conversions.
-  '__int__', '__long__', '__float__', '__complex__', '__oct__', '__hex__',
-  '__coerce__',
+  '__int__', '__long__', '__float__', '__complex__', '__oct__', '__hex__', '__coerce__',
   -- Containers.
-  '__len__', '__getitem__', '__missing__', '__setitem__', '__delitem__',
-  '__contains__', '__iter__', '__getslice__', '__setslice__', '__delslice__',
+  '__len__', '__getitem__', '__missing__', '__setitem__', '__delitem__', '__contains__', '__iter__',
+  '__getslice__', '__setslice__', '__delslice__',
   -- Module and class attribs.
-  '__doc__', '__name__', '__dict__', '__file__', '__path__', '__module__',
-  '__bases__', '__class__', '__self__',
+  '__doc__', '__name__', '__dict__', '__file__', '__path__', '__module__', '__bases__', '__class__',
+  '__self__',
   -- Stdlib/sys.
-  '__builtin__', '__future__', '__main__', '__import__', '__stdin__',
-  '__stdout__', '__stderr__',
+  '__builtin__', '__future__', '__main__', '__import__', '__stdin__', '__stdout__', '__stderr__',
   -- Other.
   '__debug__', '__doc__', '__import__', '__name__'
-})
+}))
 
 -- Functions.
-local func = token(l.FUNCTION, word_match{
-  'abs', 'all', 'any', 'apply', 'basestring', 'bool', 'buffer', 'callable',
-  'chr', 'classmethod', 'cmp', 'coerce', 'compile', 'complex', 'copyright',
-  'credits', 'delattr', 'dict', 'dir', 'divmod', 'enumerate', 'eval',
-  'execfile', 'exit', 'file', 'filter', 'float', 'frozenset', 'getattr',
-  'globals', 'hasattr', 'hash', 'help', 'hex', 'id', 'input', 'int', 'intern',
-  'isinstance', 'issubclass', 'iter', 'len', 'license', 'list', 'locals',
-  'long', 'map', 'max', 'min', 'object', 'oct', 'open', 'ord', 'pow',
-  'property', 'quit', 'range', 'raw_input', 'reduce', 'reload', 'repr',
-  'reversed', 'round', 'set', 'setattr', 'slice', 'sorted', 'staticmethod',
-  'str', 'sum', 'super', 'tuple', 'type', 'unichr', 'unicode', 'vars', 'xrange',
-  'zip'
-})
+lex:add_rule('function', token(lexer.FUNCTION, word_match{
+  'abs', 'all', 'any', 'apply', 'basestring', 'bool', 'buffer', 'callable', 'chr', 'classmethod',
+  'cmp', 'coerce', 'compile', 'complex', 'copyright', 'credits', 'delattr', 'dict', 'dir', 'divmod',
+  'enumerate', 'eval', 'execfile', 'exit', 'file', 'filter', 'float', 'frozenset', 'getattr',
+  'globals', 'hasattr', 'hash', 'help', 'hex', 'id', 'input', 'int', 'intern', 'isinstance',
+  'issubclass', 'iter', 'len', 'license', 'list', 'locals', 'long', 'map', 'max', 'min', 'object',
+  'oct', 'open', 'ord', 'pow', 'property', 'quit', 'range', 'raw_input', 'reduce', 'reload', 'repr',
+  'reversed', 'round', 'set', 'setattr', 'slice', 'sorted', 'staticmethod', 'str', 'sum', 'super',
+  'tuple', 'type', 'unichr', 'unicode', 'vars', 'xrange', 'zip'
+}))
 
 -- Constants.
-local constant = token(l.CONSTANT, word_match{
-  'ArithmeticError', 'AssertionError', 'AttributeError', 'BaseException',
-  'DeprecationWarning', 'EOFError', 'Ellipsis', 'EnvironmentError', 'Exception',
-  'False', 'FloatingPointError', 'FutureWarning', 'GeneratorExit', 'IOError',
-  'ImportError', 'ImportWarning', 'IndentationError', 'IndexError', 'KeyError',
-  'KeyboardInterrupt', 'LookupError', 'MemoryError', 'NameError', 'None',
-  'NotImplemented', 'NotImplementedError', 'OSError', 'OverflowError',
-  'PendingDeprecationWarning', 'ReferenceError', 'RuntimeError',
-  'RuntimeWarning', 'StandardError', 'StopIteration', 'SyntaxError',
-  'SyntaxWarning', 'SystemError', 'SystemExit', 'TabError', 'True', 'TypeError',
-  'UnboundLocalError', 'UnicodeDecodeError', 'UnicodeEncodeError',
-  'UnicodeError', 'UnicodeTranslateError', 'UnicodeWarning', 'UserWarning',
-  'ValueError', 'Warning', 'ZeroDivisionError'
-})
+lex:add_rule('constant', token(lexer.CONSTANT, word_match{
+  'ArithmeticError', 'AssertionError', 'AttributeError', 'BaseException', 'DeprecationWarning',
+  'EOFError', 'Ellipsis', 'EnvironmentError', 'Exception', 'False', 'FloatingPointError',
+  'FutureWarning', 'GeneratorExit', 'IOError', 'ImportError', 'ImportWarning', 'IndentationError',
+  'IndexError', 'KeyError', 'KeyboardInterrupt', 'LookupError', 'MemoryError', 'NameError', 'None',
+  'NotImplemented', 'NotImplementedError', 'OSError', 'OverflowError', 'PendingDeprecationWarning',
+  'ReferenceError', 'RuntimeError', 'RuntimeWarning', 'StandardError', 'StopIteration',
+  'SyntaxError', 'SyntaxWarning', 'SystemError', 'SystemExit', 'TabError', 'True', 'TypeError',
+  'UnboundLocalError', 'UnicodeDecodeError', 'UnicodeEncodeError', 'UnicodeError',
+  'UnicodeTranslateError', 'UnicodeWarning', 'UserWarning', 'ValueError', 'Warning',
+  'ZeroDivisionError'
+}))
 
 -- Self.
-local self = token('self', P('self'))
+lex:add_rule('self', token('self', 'self'))
+lex:add_style('self', lexer.styles.type)
 
 -- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
-
--- Operators.
-local operator = token(l.OPERATOR, S('!%^&*()[]{}-=+/|:;.,?<>~`'))
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
 
--- Decorators.
-local decorator = token('decorator', l.starts_line('@') * l.nonnewline^0)
+-- Comments.
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#', true)))
 
-M._rules = {
-  {'whitespace', ws},
-  {'keyword', keyword},
-  {'function', func},
-  {'constant', constant},
-  {'self', self},
-  {'identifier', identifier},
-  {'comment', comment},
-  {'string', string},
-  {'number', number},
-  {'decorator', decorator},
-  {'operator', operator},
-}
+-- Strings.
+local sq_str = P('u')^-1 * lexer.range("'", true)
+local dq_str = P('U')^-1 * lexer.range('"', true)
+local tq_str = lexer.range("'''") + lexer.range('"""')
+-- TODO: raw_strs cannot end in single \.
+local raw_sq_str = P('u')^-1 * 'r' * lexer.range("'", false, false)
+local raw_dq_str = P('U')^-1 * 'R' * lexer.range('"', false, false)
+lex:add_rule('string', token(lexer.STRING, tq_str + sq_str + dq_str + raw_sq_str + raw_dq_str))
 
+-- Numbers.
+local dec = lexer.dec_num * S('Ll')^-1
+local bin = '0b' * S('01')^1 * ('_' * S('01')^1)^0
+local oct = lexer.oct_num * S('Ll')^-1
+local integer = S('+-')^-1 * (bin + lexer.hex_num + oct + dec)
+lex:add_rule('number', token(lexer.NUMBER, lexer.float + integer))
 
-M._tokenstyles = {
-  self = l.STYLE_TYPE,
-  decorator = l.STYLE_PREPROCESSOR
-}
+-- Decorators.
+lex:add_rule('decorator', token('decorator', lexer.to_eol('@')))
+lex:add_style('decorator', lexer.styles.preprocessor)
 
-M._FOLDBYINDENTATION = true
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S('!%^&*()[]{}-=+/|:;.,?<>~`')))
 
-return M
+return lex