aboutsummaryrefslogtreecommitdiff
path: root/lua/lexers
diff options
context:
space:
mode:
Diffstat (limited to 'lua/lexers')
-rw-r--r--lua/lexers/README.md63
-rw-r--r--lua/lexers/actionscript.lua98
-rw-r--r--lua/lexers/ada.lua82
-rw-r--r--lua/lexers/ansi_c.lua213
-rw-r--r--lua/lexers/antlr.lua90
-rw-r--r--lua/lexers/apdl.lua146
-rw-r--r--lua/lexers/apl.lua62
-rw-r--r--lua/lexers/applescript.lua97
-rw-r--r--lua/lexers/asm.lua709
-rw-r--r--lua/lexers/asp.lua45
-rw-r--r--lua/lexers/autoit.lua243
-rw-r--r--lua/lexers/awk.lua174
-rw-r--r--lua/lexers/bash.lua104
-rw-r--r--lua/lexers/batch.lua88
-rw-r--r--lua/lexers/bibtex.lua74
-rw-r--r--lua/lexers/boo.lua103
-rw-r--r--lua/lexers/caml.lua114
-rw-r--r--lua/lexers/chuck.lua122
-rw-r--r--lua/lexers/clojure.lua312
-rw-r--r--lua/lexers/cmake.lua261
-rw-r--r--lua/lexers/coffeescript.lua83
-rw-r--r--lua/lexers/container.lua6
-rw-r--r--lua/lexers/context.lua76
-rw-r--r--lua/lexers/cpp.lua133
-rw-r--r--lua/lexers/crystal.lua185
-rw-r--r--lua/lexers/csharp.lua114
-rw-r--r--lua/lexers/css.lua304
-rw-r--r--lua/lexers/cuda.lua125
-rw-r--r--lua/lexers/dart.lua101
-rw-r--r--lua/lexers/desktop.lua81
-rw-r--r--lua/lexers/diff.lua51
-rw-r--r--lua/lexers/django.lua94
-rw-r--r--lua/lexers/dmd.lua264
-rw-r--r--lua/lexers/dockerfile.lua69
-rw-r--r--lua/lexers/dot.lua95
-rw-r--r--lua/lexers/dsv.lua17
-rw-r--r--lua/lexers/eiffel.lua95
-rw-r--r--lua/lexers/elixir.lua186
-rw-r--r--lua/lexers/elm.lua75
-rw-r--r--lua/lexers/erlang.lua143
-rw-r--r--lua/lexers/fantom.lua119
-rw-r--r--lua/lexers/faust.lua68
-rw-r--r--lua/lexers/fennel.lua109
-rw-r--r--lua/lexers/fish.lua101
-rw-r--r--lua/lexers/forth.lua99
-rw-r--r--lua/lexers/fortran.lua132
-rw-r--r--lua/lexers/fsharp.lua101
-rw-r--r--lua/lexers/fstab.lua661
-rw-r--r--lua/lexers/gap.lua74
-rw-r--r--lua/lexers/gemini.lua66
-rw-r--r--lua/lexers/gettext.lua46
-rw-r--r--lua/lexers/gherkin.lua74
-rw-r--r--lua/lexers/git-rebase.lua54
-rw-r--r--lua/lexers/gleam.lua119
-rw-r--r--lua/lexers/glsl.lua190
-rw-r--r--lua/lexers/gnuplot.lua109
-rw-r--r--lua/lexers/go.lua101
-rw-r--r--lua/lexers/groovy.lua122
-rw-r--r--lua/lexers/gtkrc.lua85
-rw-r--r--lua/lexers/hare.lua59
-rw-r--r--lua/lexers/haskell.lua75
-rw-r--r--lua/lexers/html.lua270
-rw-r--r--lua/lexers/icon.lua102
-rw-r--r--lua/lexers/idl.lua88
-rw-r--r--lua/lexers/inform.lua138
-rw-r--r--lua/lexers/ini.lua63
-rw-r--r--lua/lexers/io_lang.lua86
-rw-r--r--lua/lexers/java.lua107
-rw-r--r--lua/lexers/javascript.lua125
-rw-r--r--lua/lexers/jq.lua83
-rw-r--r--lua/lexers/json.lua63
-rw-r--r--lua/lexers/jsp.lua31
-rw-r--r--lua/lexers/julia.lua187
-rw-r--r--lua/lexers/latex.lua85
-rw-r--r--lua/lexers/ledger.lua62
-rw-r--r--lua/lexers/less.lua25
-rw-r--r--lua/lexers/lexer.lua2322
-rw-r--r--lua/lexers/lilypond.lua46
-rw-r--r--lua/lexers/lisp.lua114
-rw-r--r--lua/lexers/litcoffee.lua28
-rw-r--r--lua/lexers/logtalk.lua98
-rw-r--r--lua/lexers/lua.lua232
-rw-r--r--lua/lexers/makefile.lua139
-rw-r--r--lua/lexers/man.lua43
-rw-r--r--lua/lexers/markdown.lua193
-rw-r--r--lua/lexers/matlab.lua142
-rw-r--r--lua/lexers/mediawiki.lua47
-rw-r--r--lua/lexers/meson.lua163
-rw-r--r--lua/lexers/moonscript.lua202
-rw-r--r--lua/lexers/myrddin.lua94
-rw-r--r--lua/lexers/nemerle.lua112
-rw-r--r--lua/lexers/networkd.lua335
-rw-r--r--lua/lexers/nim.lua172
-rw-r--r--lua/lexers/nsis.lua268
-rw-r--r--lua/lexers/null.lua6
-rw-r--r--lua/lexers/objective_c.lua113
-rw-r--r--lua/lexers/pascal.lua106
-rw-r--r--lua/lexers/perl.lua220
-rw-r--r--lua/lexers/php.lua185
-rw-r--r--lua/lexers/pico8.lua60
-rw-r--r--lua/lexers/pike.lua91
-rw-r--r--lua/lexers/pkgbuild.lua145
-rw-r--r--lua/lexers/pony.lua200
-rw-r--r--lua/lexers/powershell.lua86
-rw-r--r--lua/lexers/prolog.lua494
-rw-r--r--lua/lexers/props.lua51
-rw-r--r--lua/lexers/protobuf.lua76
-rw-r--r--lua/lexers/ps.lua80
-rw-r--r--lua/lexers/pure.lua74
-rw-r--r--lua/lexers/python.lua177
-rw-r--r--lua/lexers/rails.lua92
-rw-r--r--lua/lexers/rc.lua73
-rw-r--r--lua/lexers/reason.lua114
-rw-r--r--lua/lexers/rebol.lua199
-rw-r--r--lua/lexers/rest.lua101
-rw-r--r--lua/lexers/rexx.lua140
-rw-r--r--lua/lexers/rhtml.lua31
-rw-r--r--lua/lexers/routeros.lua143
-rw-r--r--lua/lexers/rstats.lua78
-rw-r--r--lua/lexers/ruby.lua188
-rw-r--r--lua/lexers/rust.lua130
-rw-r--r--lua/lexers/sass.lua30
-rw-r--r--lua/lexers/scala.lua101
-rw-r--r--lua/lexers/scheme.lua362
-rw-r--r--lua/lexers/smalltalk.lua76
-rw-r--r--lua/lexers/sml.lua180
-rw-r--r--lua/lexers/snobol4.lua113
-rw-r--r--lua/lexers/spin.lua169
-rw-r--r--lua/lexers/sql.lua115
-rw-r--r--lua/lexers/strace.lua59
-rw-r--r--lua/lexers/systemd.lua540
-rw-r--r--lua/lexers/taskpaper.lua86
-rw-r--r--lua/lexers/tcl.lua73
-rw-r--r--lua/lexers/tex.lua49
-rw-r--r--lua/lexers/texinfo.lua309
-rw-r--r--lua/lexers/text.lua15
-rw-r--r--lua/lexers/toml.lua84
-rw-r--r--lua/lexers/txt2tags.lua146
-rw-r--r--lua/lexers/typescript.lua23
-rw-r--r--lua/lexers/vala.lua98
-rw-r--r--lua/lexers/vb.lua83
-rw-r--r--lua/lexers/vbscript.lua2
-rw-r--r--lua/lexers/vcard.lua128
-rw-r--r--lua/lexers/verilog.lua149
-rw-r--r--lua/lexers/vhdl.lua121
-rw-r--r--lua/lexers/wsf.lua136
-rw-r--r--lua/lexers/xml.lua133
-rw-r--r--lua/lexers/xs.lua84
-rw-r--r--lua/lexers/xtend.lua156
-rw-r--r--lua/lexers/yaml.lua142
-rw-r--r--lua/lexers/zig.lua167
151 files changed, 9492 insertions, 12216 deletions
diff --git a/lua/lexers/README.md b/lua/lexers/README.md
index e97ea1f..1d74a8c 100644
--- a/lua/lexers/README.md
+++ b/lua/lexers/README.md
@@ -2,8 +2,7 @@ Lua LPeg lexers for vis
=======================
Vis reuses the [Lua](http://www.lua.org/) [LPeg](http://www.inf.puc-rio.br/~roberto/lpeg/)
-based lexers from the [Scintillua](http://foicica.com/scintillua/) project
-which is now part of the [Scintilla 3.x branch](https://foicica.com/hg/scintilla/file/tip/lexlua).
+based lexers from the [Scintillua](https://orbitalquark.github.io/scintillua/index.html) project.
# Vis integration
@@ -26,13 +25,13 @@ where `<name>` corresponds to the filename without the `.lua` extension.
To add a new lexer, start with the template quoted below or a lexer of a
similiar language. Read the
-[lexer module documentation](http://foicica.com/scintillua/api.html#lexer).
+[lexer module documentation](https://orbitalquark.github.io/scintillua/api.html#lexer).
The [LPeg](http://www.inf.puc-rio.br/~roberto/lpeg/) introduction might also
be useful.
For development purposes it is recommended to test the lexers from a lua
script as described in the
-[Scintillua manual](http://foicica.com/scintillua/manual.html#Using.Scintillua.as.a.Lua.Library).
+[Scintillua manual](https://orbitalquark.github.io/scintillua/manual.html#Using.Scintillua.as.a.Lua.Library).
To enable auto syntax highlighting when opening a file you can associate your
new lexer with a set of file extensions by adding a corresponding entry into
@@ -42,35 +41,57 @@ Changes to existing lexers should also be sent upstream for consideration.
A template for new lexers:
-```
+```lua
+-- Copyright 2006-2021 Mitchell. See LICENSE.
-- ? LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = '?'}
+local lex = lexer.new('?')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
+
+-- Keywords.
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[
+ keyword1 keyword2 keyword3
+]]))
+
+-- Identifiers.
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
+
+-- Strings.
+local sq_str = lexer.range("'")
+local dq_str = lexer.range('"')
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
+
+-- Comments.
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#')))
-M._rules = {
- {'whitespace', ws},
-}
+-- Numbers.
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-M._tokenstyles = {
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S('+-*/%^=<>,.{}[]()')))
-}
+-- Fold points.
+lex:add_fold_point(lexer.KEYWORD, 'start', 'end')
+lex:add_fold_point(lexer.OPERATOR, '{', '}')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('#'))
-return M
+return lex
```
# Color Themes
-The `../themes` directory contains the color schemes. Depending on the
-number of colors supported by your terminal, vis will start with either
-the `default-16` or `default-256` theme. Symlink it to your prefered
-style or add a command like the following one to your `visrc.lua`:
+The [`../themes directory`](../themes) contains the color
+schemes. Depending on the number of colors supported by your terminal,
+vis will start with either the [`default-16`](../themes/default-16.lua)
+or [`default-256`](../themes/default-256.lua) theme. Symlink it to
+your prefered style or add a command like the following one to your
+`visrc.lua`:
```
vis:command("set theme solarized")
@@ -79,4 +100,4 @@ vis:command("set theme solarized")
# Dependencies
* [Lua](http://www.lua.org/) 5.1 or greater
- * [LPeg](http://www.inf.puc-rio.br/~roberto/lpeg/) 0.12 or greater
+ * [LPeg](http://www.inf.puc-rio.br/~roberto/lpeg/) 1.0.0 or greater
diff --git a/lua/lexers/actionscript.lua b/lua/lexers/actionscript.lua
index e92ab75..24aef11 100644
--- a/lua/lexers/actionscript.lua
+++ b/lua/lexers/actionscript.lua
@@ -1,75 +1,59 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- Actionscript LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'actionscript'}
+local lex = lexer.new('actionscript')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local line_comment = '//' * l.nonnewline^0
-local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1
-local comment = token(l.COMMENT, line_comment + block_comment)
-
--- Strings.
-local sq_str = l.delimited_range("'", true)
-local dq_str = l.delimited_range('"', true)
-local ml_str = '<![CDATA[' * (l.any - ']]>')^0 * ']]>'
-local string = token(l.STRING, sq_str + dq_str + ml_str)
-
--- Numbers.
-local number = token(l.NUMBER, (l.float + l.integer) * S('LlUuFf')^-2)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- 'break', 'continue', 'delete', 'do', 'else', 'for', 'function', 'if', 'in',
- 'new', 'on', 'return', 'this', 'typeof', 'var', 'void', 'while', 'with',
- 'NaN', 'Infinity', 'false', 'null', 'true', 'undefined',
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'break', 'continue', 'delete', 'do', 'else', 'for', 'function', 'if', 'in', 'new', 'on', 'return',
+ 'this', 'typeof', 'var', 'void', 'while', 'with', 'NaN', 'Infinity', 'false', 'null', 'true',
+ 'undefined',
-- Reserved for future use.
- 'abstract', 'case', 'catch', 'class', 'const', 'debugger', 'default',
- 'export', 'extends', 'final', 'finally', 'goto', 'implements', 'import',
- 'instanceof', 'interface', 'native', 'package', 'private', 'Void',
- 'protected', 'public', 'dynamic', 'static', 'super', 'switch', 'synchonized',
- 'throw', 'throws', 'transient', 'try', 'volatile'
-})
+ 'abstract', 'case', 'catch', 'class', 'const', 'debugger', 'default', 'export', 'extends',
+ 'final', 'finally', 'goto', 'implements', 'import', 'instanceof', 'interface', 'native',
+ 'package', 'private', 'Void', 'protected', 'public', 'dynamic', 'static', 'super', 'switch',
+ 'synchonized', 'throw', 'throws', 'transient', 'try', 'volatile'
+}))
-- Types.
-local type = token(l.TYPE, word_match{
- 'Array', 'Boolean', 'Color', 'Date', 'Function', 'Key', 'MovieClip', 'Math',
- 'Mouse', 'Number', 'Object', 'Selection', 'Sound', 'String', 'XML', 'XMLNode',
- 'XMLSocket',
+lex:add_rule('type', token(lexer.TYPE, word_match{
+ 'Array', 'Boolean', 'Color', 'Date', 'Function', 'Key', 'MovieClip', 'Math', 'Mouse', 'Number',
+ 'Object', 'Selection', 'Sound', 'String', 'XML', 'XMLNode', 'XMLSocket',
-- Reserved for future use.
'boolean', 'byte', 'char', 'double', 'enum', 'float', 'int', 'long', 'short'
-})
+}))
-- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
--- Operators.
-local operator = token(l.OPERATOR, S('=!<>+-/*%&|^~.,;?()[]{}'))
+-- Strings.
+local sq_str = lexer.range("'", true)
+local dq_str = lexer.range('"', true)
+local ml_str = lexer.range('<![CDATA[', ']]>')
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str + ml_str))
+
+-- Comments.
+local line_comment = lexer.to_eol('//')
+local block_comment = lexer.range('/*', '*/')
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'type', type},
- {'identifier', identifier},
- {'string', string},
- {'comment', comment},
- {'number', number},
- {'operator', operator},
-}
+-- Numbers.
+lex:add_rule('number', token(lexer.NUMBER, lexer.number * S('LlUuFf')^-2))
+
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S('=!<>+-/*%&|^~.,;?()[]{}')))
-M._foldsymbols = {
- _patterns = {'[{}]', '/%*', '%*/', '//', '<!%[CDATA%[', '%]%]>'},
- [l.OPERATOR] = {['{'] = 1, ['}'] = -1},
- [l.COMMENT] = {
- ['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//')
- },
- [l.STRING] = {['<![CDATA['] = 1, [']]>'] = -1}
-}
+-- Fold points.
+lex:add_fold_point(lexer.OPERATOR, '{', '}')
+lex:add_fold_point(lexer.COMMENT, '/*', '*/')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('//'))
+lex:add_fold_point(lexer.STRING, '<![CDATA[', ']]>')
-return M
+return lex
diff --git a/lua/lexers/ada.lua b/lua/lexers/ada.lua
index 24ec8a5..0a33868 100644
--- a/lua/lexers/ada.lua
+++ b/lua/lexers/ada.lua
@@ -1,68 +1,54 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- Ada LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'ada'}
+local lex = lexer.new('ada')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local comment = token(l.COMMENT, '--' * l.nonnewline^0)
-
--- Strings.
-local string = token(l.STRING, l.delimited_range('"', true, true))
-
--- Numbers.
-local hex_num = 'O' * S('xX') * (l.xdigit + '_')^1
-local integer = l.digit^1 * ('_' * l.digit^1)^0
-local float = integer^1 * ('.' * integer^0)^-1 * S('eE') * S('+-')^-1 * integer
-local number = token(l.NUMBER, hex_num + S('+-')^-1 * (float + integer) *
- S('LlUuFf')^-3)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- 'abort', 'abs', 'accept', 'all', 'and', 'begin', 'body', 'case', 'declare',
- 'delay', 'do', 'else', 'elsif', 'end', 'entry', 'exception', 'exit', 'for',
- 'generic', 'goto', 'if', 'in', 'is', 'loop', 'mod', 'new', 'not', 'null',
- 'or', 'others', 'out', 'protected', 'raise', 'record', 'rem', 'renames',
- 'requeue', 'reverse', 'select', 'separate', 'subtype', 'task', 'terminate',
- 'then', 'type', 'until', 'when', 'while', 'xor',
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'abort', 'abs', 'accept', 'all', 'and', 'begin', 'body', 'case', 'declare', 'delay', 'do', 'else',
+ 'elsif', 'end', 'entry', 'exception', 'exit', 'for', 'generic', 'goto', 'if', 'in', 'is', 'loop',
+ 'mod', 'new', 'not', 'null', 'or', 'others', 'out', 'protected', 'raise', 'record', 'rem',
+ 'renames', 'requeue', 'reverse', 'select', 'separate', 'subtype', 'task', 'terminate', 'then',
+ 'type', 'until', 'when', 'while', 'xor',
-- Preprocessor.
'package', 'pragma', 'use', 'with',
- -- Function
+ -- Function.
'function', 'procedure', 'return',
-- Storage class.
- 'abstract', 'access', 'aliased', 'array', 'at', 'constant', 'delta', 'digits',
- 'interface', 'limited', 'of', 'private', 'range', 'tagged', 'synchronized',
+ 'abstract', 'access', 'aliased', 'array', 'at', 'constant', 'delta', 'digits', 'interface',
+ 'limited', 'of', 'private', 'range', 'tagged', 'synchronized',
-- Boolean.
'true', 'false'
-})
+}))
-- Types.
-local type = token(l.TYPE, word_match{
- 'boolean', 'character', 'count', 'duration', 'float', 'integer', 'long_float',
- 'long_integer', 'priority', 'short_float', 'short_integer', 'string'
-})
+lex:add_rule('type', token(lexer.TYPE, word_match{
+ 'boolean', 'character', 'count', 'duration', 'float', 'integer', 'long_float', 'long_integer',
+ 'priority', 'short_float', 'short_integer', 'string'
+}))
-- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
--- Operators.
-local operator = token(l.OPERATOR, S(':;=<>&+-*/.()'))
+-- Strings.
+lex:add_rule('string', token(lexer.STRING, lexer.range('"', true, false)))
+
+-- Comments.
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('--')))
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'type', type},
- {'identifier', identifier},
- {'string', string},
- {'comment', comment},
- {'number', number},
- {'operator', operator},
-}
+-- Numbers.
+local integer = lexer.digit^1 * ('_' * lexer.digit^1)^0
+local float = integer^1 * ('.' * integer^0)^-1 * S('eE') * S('+-')^-1 * integer
+lex:add_rule('number', token(lexer.NUMBER, S('+-')^-1 * (float + integer)))
+
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S(':;=<>&+-*/.()')))
-return M
+return lex
diff --git a/lua/lexers/ansi_c.lua b/lua/lexers/ansi_c.lua
index 0235e46..68aba5d 100644
--- a/lua/lexers/ansi_c.lua
+++ b/lua/lexers/ansi_c.lua
@@ -1,154 +1,97 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- C LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'ansi_c'}
+local lex = lexer.new('ansi_c')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local line_comment = '//' * l.nonnewline_esc^0
-local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1
--- local preproc_ifzero = l.starts_line('#if') * S(' \t')^0 * '0' * l.space *
--- (l.starts_line('#endif'))
-local comment = token(l.COMMENT, line_comment + block_comment)
-
--- Strings.
-local sq_str = P('L')^-1 * l.delimited_range("'", true)
-local dq_str = P('L')^-1 * l.delimited_range('"', true)
-local string = token(l.STRING, sq_str + dq_str)
-
--- Numbers.
-local float_suffix = P('f')^-1
-local integer_suffix = (S('uU')^-1 * word_match{ 'l', 'L', 'll', 'LL' }^-1) +
- (word_match{ 'l', 'L', 'll', 'LL' }^-1 * S('uU')^-1)
-local number = token(l.NUMBER, (l.float * float_suffix) +
- (l.integer * integer_suffix))
-
--- Preprocessor.
-local preproc_word = word_match{
- 'define', 'elif', 'else', 'endif', 'error', 'if', 'ifdef', 'ifndef', 'line',
- 'pragma', 'undef', 'warning'
-}
-
-local preproc = #l.starts_line('#') *
- (token(l.PREPROCESSOR, '#' * S('\t ')^0 * preproc_word) +
- token(l.PREPROCESSOR, '#' * S('\t ')^0 * 'include') *
- (token(l.WHITESPACE, S('\t ')^0) *
- token(l.STRING, l.delimited_range('<>', true, true)))^-1)
+local ws = token(lexer.WHITESPACE, lexer.space^1)
+lex:add_rule('whitespace', ws)
-- Keywords.
-local storage_class = word_match{
- -- C11 6.7.1
- 'typedef', 'extern', 'static', '_Thread_local', 'auto', 'register',
-}
-
-local type_qualifier = word_match{
- -- C11 6.7.3
- 'const', 'restrict', 'volatile', '_Atomic',
-}
-
-local function_specifier = word_match{
- -- C11 6.7.4
- 'inline', '_Noreturn',
-}
-
-local extra_keywords = word_match{
- 'asm', '__asm', '__asm__', '__restrict__', '__inline', '__inline__',
- '__attribute__', '__declspec'
-}
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'auto', 'break', 'case', 'const', 'continue', 'default', 'do', 'else', 'enum', 'extern', 'for',
+ 'goto', 'if', 'inline', 'register', 'restrict', 'return', 'sizeof', 'static', 'switch', 'typedef',
+ 'volatile', 'while',
+ -- C99.
+ 'false', 'true',
+ -- C11.
+ '_Alignas', '_Alignof', '_Atomic', '_Generic', '_Noreturn', '_Static_assert', '_Thread_local',
+ -- Compiler.
+ 'asm', '__asm', '__asm__', '__restrict__', '__inline', '__inline__', '__attribute__', '__declspec'
+}))
-local keyword = token(l.KEYWORD, word_match{
- 'break', 'case', 'continue', 'default', 'do', 'else', 'enum', 'for', 'goto',
- 'if', 'return', 'switch', 'while',
- '_Alignas', '_Generic', '_Static_assert',
-} + storage_class + type_qualifier + function_specifier + extra_keywords)
+-- Types.
+lex:add_rule('type', token(lexer.TYPE, word_match{
+ 'bool', 'char', 'double', 'float', 'int', 'long', 'short', 'signed', 'struct', 'union',
+ 'unsigned', 'void', '_Bool', '_Complex', '_Imaginary',
+ -- Stdlib types.
+ 'ptrdiff_t', 'size_t', 'max_align_t', 'wchar_t', 'intptr_t', 'uintptr_t', 'intmax_t', 'uintmax_t'
+} + P('u')^-1 * 'int' * (P('_least') + '_fast')^-1 * lexer.digit^1 * '_t'))
-- Constants.
-local errno = word_match{
- -- http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html
- 'E2BIG', 'EACCES', 'EADDRINUSE', 'EADDRNOTAVAIL', 'EAFNOSUPPORT',
- 'EAGAIN', 'EALREADY', 'EBADF', 'EBADMSG', 'EBUSY', 'ECANCELED', 'ECHILD',
- 'ECONNABORTED', 'ECONNREFUSED', 'ECONNRESET', 'EDEADLK', 'EDESTADDRREQ',
- 'EDOM', 'EDQUOT', 'EEXIST', 'EFAULT', 'EFBIG', 'EHOSTUNREACH', 'EIDRM',
- 'EILSEQ', 'EINPROGRESS', 'EINTR', 'EINVAL', 'EIO', 'EISCONN', 'EISDIR',
- 'ELOOP', 'EMFILE', 'EMLINK', 'EMSGSIZE', 'EMULTIHOP', 'ENAMETOOLONG',
- 'ENETDOWN', 'ENETRESET', 'ENETUNREACH', 'ENFILE', 'ENOBUFS', 'ENODATA',
- 'ENODEV', 'ENOENT', 'ENOEXEC', 'ENOLCK', 'ENOLINK', 'ENOMEM',
- 'ENOMSG', 'ENOPROTOOPT', 'ENOSPC', 'ENOSR', 'ENOSTR', 'ENOSYS',
- 'ENOTCONN', 'ENOTDIR', 'ENOTEMPTY', 'ENOTRECOVERABLE', 'ENOTSOCK',
- 'ENOTSUP', 'ENOTTY', 'ENXIO', 'EOPNOTSUPP', 'EOVERFLOW', 'EOWNERDEAD',
- 'EPERM', 'EPIPE', 'EPROTO', 'EPROTONOSUPPORT', 'EPROTOTYPE', 'ERANGE',
- 'EROFS', 'ESPIPE', 'ESRCH', 'ESTALE', 'ETIME', 'ETIMEDOUT', 'ETXTBSY',
- 'EWOULDBLOCK', 'EXDEV',
-}
+lex:add_rule('constants', token(lexer.CONSTANT, word_match{
+ 'NULL',
+ -- Preprocessor.
+ '__DATE__', '__FILE__', '__LINE__', '__TIME__', '__func__',
+ -- errno.h.
+ 'E2BIG', 'EACCES', 'EADDRINUSE', 'EADDRNOTAVAIL', 'EAFNOSUPPORT', 'EAGAIN', 'EALREADY', 'EBADF',
+ 'EBADMSG', 'EBUSY', 'ECANCELED', 'ECHILD', 'ECONNABORTED', 'ECONNREFUSED', 'ECONNRESET',
+ 'EDEADLK', 'EDESTADDRREQ', 'EDOM', 'EDQUOT', 'EEXIST', 'EFAULT', 'EFBIG', 'EHOSTUNREACH', 'EIDRM',
+ 'EILSEQ', 'EINPROGRESS', 'EINTR', 'EINVAL', 'EIO', 'EISCONN', 'EISDIR', 'ELOOP', 'EMFILE',
+ 'EMLINK', 'EMSGSIZE', 'EMULTIHOP', 'ENAMETOOLONG', 'ENETDOWN', 'ENETRESET', 'ENETUNREACH',
+ 'ENFILE', 'ENOBUFS', 'ENODATA', 'ENODEV', 'ENOENT', 'ENOEXEC', 'ENOLCK', 'ENOLINK', 'ENOMEM',
+ 'ENOMSG', 'ENOPROTOOPT', 'ENOSPC', 'ENOSR', 'ENOSTR', 'ENOSYS', 'ENOTCONN', 'ENOTDIR',
+ 'ENOTEMPTY', 'ENOTRECOVERABLE', 'ENOTSOCK', 'ENOTSUP', 'ENOTTY', 'ENXIO', 'EOPNOTSUPP',
+ 'EOVERFLOW', 'EOWNERDEAD', 'EPERM', 'EPIPE', 'EPROTO', 'EPROTONOSUPPORT', 'EPROTOTYPE', 'ERANGE',
+ 'EROFS', 'ESPIPE', 'ESRCH', 'ESTALE', 'ETIME', 'ETIMEDOUT', 'ETXTBSY', 'EWOULDBLOCK', 'EXDEV',
+ -- stdint.h.
+ 'PTRDIFF_MIN', 'PTRDIFF_MAX', 'SIZE_MAX', 'SIG_ATOMIC_MIN', 'SIG_ATOMIC_MAX', 'WINT_MIN',
+ 'WINT_MAX', 'WCHAR_MIN', 'WCHAR_MAX'
+} + P('U')^-1 * 'INT' * ((P('_LEAST') + '_FAST')^-1 * lexer.digit^1 + 'PTR' + 'MAX') *
+ (P('_MIN') + '_MAX')))
-local preproc_macros = word_match{
- -- C11 6.10.8.1 Mandatory macros
- '__DATE__', '__FILE__', '__LINE__', '__TIME__',
- -- C11 6.4.2.2 Predefined identifiers
- '__func__',
-}
+-- Labels.
+lex:add_rule('label', token(lexer.LABEL, lexer.starts_line(lexer.word * ':')))
-local constant = token(l.CONSTANT, word_match{
- 'true', 'false',
- 'NULL', 'CHAR_BIT', 'SIZE_MAX', } +
- ((P('WINT') + P('WCHAR') + P('SIG_ATOMIC') + P('PTRDIFF')) * (P('_MIN') + P('_MAX'))) +
- ( P('INT') * (((P('_LEAST') + P('_FAST'))^-1 * l.dec_num^1) + P('MAX') + P('PTR')) * (P('_MIN') + P('_MAX'))) +
- (P('UINT') * (((P('_LEAST') + P('_FAST'))^-1 * l.dec_num^1) + P('MAX') + P('PTR')) * P('_MAX')) +
- errno + preproc_macros
-)
+-- Strings.
+local sq_str = P('L')^-1 * lexer.range("'", true)
+local dq_str = P('L')^-1 * lexer.range('"', true)
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
--- Types.
-local type = token(l.TYPE, word_match{
- 'bool', 'char', 'double', 'float', 'int', 'long', 'short',
- 'signed', 'struct', 'union', 'unsigned', 'void', '_Bool', '_Complex',
- '_Imaginary', 'ptrdiff_t', 'size_t', 'max_align_t', 'wchar_t',
- 'intptr_t', 'uintptr_t', 'intmax_t', 'uintmax_t'} +
- (P('u')^-1 * P('int') * (P('_least') + P('_fast'))^-1 * l.dec_num^1 * P('_t')) +
- (S('usif') * l.dec_num^1 * P('_t')) +
- (P('__')^-1 * S('usif') * l.dec_num^1)
-)
+-- Identifiers.
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
--- Labels.
--- FIXME: Accept whitespace before label.
-local label = token(l.LABEL, l.starts_line(l.word * ':'))
+-- Comments.
+local line_comment = lexer.to_eol('//', true)
+local block_comment = lexer.range('/*', '*/') +
+ lexer.range('#if' * S(' \t')^0 * '0' * lexer.space, '#endif')
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
--- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+-- Numbers.
+local integer = lexer.integer * word_match('u l ll ul ull lu llu', true)^-1
+local float = lexer.float * P('f')^-1
+lex:add_rule('number', token(lexer.NUMBER, float + integer))
--- Operators.
-local operator = token(l.OPERATOR,
- S('+-/*%<>~!=^&|?~:;,.()[]{}') +
- word_match{ 'sizeof', '_Alignof' }
-)
+-- Preprocessor.
+local include = token(lexer.PREPROCESSOR, '#' * S('\t ')^0 * 'include') *
+ (ws * token(lexer.STRING, lexer.range('<', '>', true)))^-1
+local preproc = token(lexer.PREPROCESSOR, '#' * S('\t ')^0 *
+ word_match('define elif else endif if ifdef ifndef line pragma undef'))
+lex:add_rule('preprocessor', include + preproc)
-M._rules = {
- {'whitespace', ws},
- {'comment', comment},
- {'keyword', keyword},
- {'type', type},
- {'constant', constant},
- {'operator', operator},
- {'label', label},
- {'identifier', identifier},
- {'string', string},
- {'number', number},
- {'preproc', preproc},
-}
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>~!=^&|?~:;,.()[]{}')))
-M._foldsymbols = {
- _patterns = {'#?%l+', '[{}]', '/%*', '%*/', '//'},
- [l.PREPROCESSOR] = {['if'] = 1, ifdef = 1, ifndef = 1, endif = -1},
- [l.OPERATOR] = {['{'] = 1, ['}'] = -1},
- [l.COMMENT] = {
- ['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//'),
- ['#if'] = 1, ['#endif'] = -1
- }
-}
+-- Fold points.
+lex:add_fold_point(lexer.PREPROCESSOR, '#if', '#endif')
+lex:add_fold_point(lexer.PREPROCESSOR, '#ifdef', '#endif')
+lex:add_fold_point(lexer.PREPROCESSOR, '#ifndef', '#endif')
+lex:add_fold_point(lexer.OPERATOR, '{', '}')
+lex:add_fold_point(lexer.COMMENT, '/*', '*/')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('//'))
-return M
+return lex
diff --git a/lua/lexers/antlr.lua b/lua/lexers/antlr.lua
index 28fb2e1..32c9a77 100644
--- a/lua/lexers/antlr.lua
+++ b/lua/lexers/antlr.lua
@@ -1,74 +1,56 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- ANTLR LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'antlr'}
+local lex = lexer.new('antlr')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local line_comment = '//' * l.nonnewline^0
-local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1
-local comment = token(l.COMMENT, line_comment + block_comment)
-
--- Strings.
-local string = token(l.STRING, l.delimited_range("'", true))
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- 'abstract', 'break', 'case', 'catch', 'continue', 'default', 'do', 'else',
- 'extends', 'final', 'finally', 'for', 'if', 'implements', 'instanceof',
- 'native', 'new', 'private', 'protected', 'public', 'return', 'static',
- 'switch', 'synchronized', 'throw', 'throws', 'transient', 'try', 'volatile',
- 'while', 'package', 'import', 'header', 'options', 'tokens', 'strictfp',
- 'false', 'null', 'super', 'this', 'true'
-})
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'abstract', 'break', 'case', 'catch', 'continue', 'default', 'do', 'else', 'extends', 'final',
+ 'finally', 'for', 'if', 'implements', 'instanceof', 'native', 'new', 'private', 'protected',
+ 'public', 'return', 'static', 'switch', 'synchronized', 'throw', 'throws', 'transient', 'try',
+ 'volatile', 'while', 'package', 'import', 'header', 'options', 'tokens', 'strictfp', 'false',
+ 'null', 'super', 'this', 'true'
+}))
-- Types.
-local type = token(l.TYPE, word_match{
- 'boolean', 'byte', 'char', 'class', 'double', 'float', 'int', 'interface',
- 'long', 'short', 'void'
-})
+lex:add_rule('type', token(lexer.TYPE, word_match(
+ 'boolean byte char class double float int interface long short void')))
-- Functions.
-local func = token(l.FUNCTION, 'assert')
+lex:add_rule('func', token(lexer.FUNCTION, 'assert'))
-- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
--- Operators.
-local operator = token(l.OPERATOR, S('$@:;|.=+*?~!^>-()[]{}'))
+-- Comments.
+local line_comment = lexer.to_eol('//')
+local block_comment = lexer.range('/*', '*/')
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-- Actions.
-local action = #P('{') * operator * token('action', (1 - P('}'))^0) *
- (#P('}') * operator)^-1
+local open_brace = token(lexer.OPERATOR, '{')
+local close_brace = token(lexer.OPERATOR, '}')
+lex:add_rule('action', open_brace * token('action', (1 - P('}'))^0) * close_brace^-1)
+lex:add_style('action', lexer.styles.nothing)
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'type', type},
- {'function', func},
- {'identifier', identifier},
- {'string', string},
- {'comment', comment},
- {'action', action},
- {'operator', operator},
-}
+-- Strings.
+lex:add_rule('string', token(lexer.STRING, lexer.range("'", true)))
-M._tokenstyles = {
- action = l.STYLE_NOTHING
-}
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S('$@:;|.=+*?~!^>-()[]{}')))
-M._foldsymbols = {
- _patterns = {'[:;%(%){}]', '/%*', '%*/', '//'},
- [l.OPERATOR] = {
- [':'] = 1, [';'] = -1, ['('] = 1, [')'] = -1, ['{'] = 1, ['}'] = -1
- },
- [l.COMMENT] = {['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//')}
-}
+-- Fold points.
+lex:add_fold_point(lexer.OPERATOR, ':', ';')
+lex:add_fold_point(lexer.OPERATOR, '(', ')')
+lex:add_fold_point(lexer.OPERATOR, '{', '}')
+lex:add_fold_point(lexer.COMMENT, '/*', '*/')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('//'))
-return M
+return lex
diff --git a/lua/lexers/apdl.lua b/lua/lexers/apdl.lua
index ae5f005..6f29963 100644
--- a/lua/lexers/apdl.lua
+++ b/lua/lexers/apdl.lua
@@ -1,102 +1,78 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- APDL LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'apdl'}
+local lex = lexer.new('apdl', {case_insensitive_fold_points = true})
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
--- Comments.
-local comment = token(l.COMMENT, '!' * l.nonnewline^0)
+-- Keywords.
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match({
+ '*abbr', '*abb', '*afun', '*afu', '*ask', '*cfclos', '*cfc', '*cfopen', '*cfo', '*cfwrite',
+ '*cfw', '*create', '*cre', '*cycle', '*cyc', '*del', '*dim', '*do', '*elseif', '*else', '*enddo',
+ '*endif', '*end', '*eval', '*eva', '*exit', '*exi', '*get', '*go', '*if', '*list', '*lis',
+ '*mfouri', '*mfo', '*mfun', '*mfu', '*mooney', '*moo', '*moper', '*mop', '*msg', '*repeat',
+ '*rep', '*set', '*status', '*sta', '*tread', '*tre', '*ulib', '*uli', '*use', '*vabs', '*vab',
+ '*vcol', '*vco', '*vcum', '*vcu', '*vedit', '*ved', '*vfact', '*vfa', '*vfill', '*vfi', '*vfun',
+ '*vfu', '*vget', '*vge', '*vitrp', '*vit', '*vlen', '*vle', '*vmask', '*vma', '*voper', '*vop',
+ '*vplot', '*vpl', '*vput', '*vpu', '*vread', '*vre', '*vscfun', '*vsc', '*vstat', '*vst',
+ '*vwrite', '*vwr', --
+ '/anfile', '/anf', '/angle', '/ang', '/annot', '/ann', '/anum', '/anu', '/assign', '/ass',
+ '/auto', '/aut', '/aux15', '/aux2', '/aux', '/axlab', '/axl', '/batch', '/bat', '/clabel', '/cla',
+ '/clear', '/cle', '/clog', '/clo', '/cmap', '/cma', '/color', '/col', '/com', '/config',
+ '/contour', '/con', '/copy', '/cop', '/cplane', '/cpl', '/ctype', '/cty', '/cval', '/cva',
+ '/delete', '/del', '/devdisp', '/device', '/dev', '/dist', '/dis', '/dscale', '/dsc', '/dv3d',
+ '/dv3', '/edge', '/edg', '/efacet', '/efa', '/eof', '/erase', '/era', '/eshape', '/esh', '/exit',
+ '/exi', '/expand', '/exp', '/facet', '/fac', '/fdele', '/fde', '/filname', '/fil', '/focus',
+ '/foc', '/format', '/for', '/ftype', '/fty', '/gcmd', '/gcm', '/gcolumn', '/gco', '/gfile',
+ '/gfi', '/gformat', '/gfo', '/gline', '/gli', '/gmarker', '/gma', '/golist', '/gol', '/gopr',
+ '/gop', '/go', '/graphics', '/gra', '/gresume', '/gre', '/grid', '/gri', '/gropt', '/gro',
+ '/grtyp', '/grt', '/gsave', '/gsa', '/gst', '/gthk', '/gth', '/gtype', '/gty', '/header', '/hea',
+ '/input', '/inp', '/larc', '/lar', '/light', '/lig', '/line', '/lin', '/lspec', '/lsp',
+ '/lsymbol', '/lsy', '/menu', '/men', '/mplib', '/mpl', '/mrep', '/mre', '/mstart', '/mst',
+ '/nerr', '/ner', '/noerase', '/noe', '/nolist', '/nol', '/nopr', '/nop', '/normal', '/nor',
+ '/number', '/num', '/opt', '/output', '/out', '/page', '/pag', '/pbc', '/pbf', '/pcircle', '/pci',
+ '/pcopy', '/pco', '/plopts', '/plo', '/pmacro', '/pma', '/pmeth', '/pme', '/pmore', '/pmo',
+ '/pnum', '/pnu', '/polygon', '/pol', '/post26', '/post1', '/pos', '/prep7', '/pre', '/psearch',
+ '/pse', '/psf', '/pspec', '/psp', '/pstatus', '/pst', '/psymb', '/psy', '/pwedge', '/pwe',
+ '/quit', '/qui', '/ratio', '/rat', '/rename', '/ren', '/replot', '/rep', '/reset', '/res', '/rgb',
+ '/runst', '/run', '/seclib', '/sec', '/seg', '/shade', '/sha', '/showdisp', '/show', '/sho',
+ '/shrink', '/shr', '/solu', '/sol', '/sscale', '/ssc', '/status', '/sta', '/stitle', '/sti',
+ '/syp', '/sys', '/title', '/tit', '/tlabel', '/tla', '/triad', '/tri', '/trlcy', '/trl', '/tspec',
+ '/tsp', '/type', '/typ', '/ucmd', '/ucm', '/uis', '/ui', '/units', '/uni', '/user', '/use',
+ '/vcone', '/vco', '/view', '/vie', '/vscale', '/vsc', '/vup', '/wait', '/wai', '/window', '/win',
+ '/xrange', '/xra', '/yrange', '/yra', '/zoom', '/zoo'
+}, true)))
+
+-- Identifiers.
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Strings.
-local string = token(l.STRING, l.delimited_range("'", true, true))
+lex:add_rule('string', token(lexer.STRING, lexer.range("'", true, false)))
-- Numbers.
-local number = token(l.NUMBER, l.float + l.integer)
-
--- Keywords.
-local keyword = token(l.KEYWORD, word_match({
- '*abbr', '*abb', '*afun', '*afu', '*ask', '*cfclos', '*cfc', '*cfopen',
- '*cfo', '*cfwrite', '*cfw', '*create', '*cre', '*cycle', '*cyc', '*del',
- '*dim', '*do', '*elseif', '*else', '*enddo', '*endif', '*end', '*eval',
- '*eva', '*exit', '*exi', '*get', '*go', '*if', '*list', '*lis', '*mfouri',
- '*mfo', '*mfun', '*mfu', '*mooney', '*moo', '*moper', '*mop', '*msg',
- '*repeat', '*rep', '*set', '*status', '*sta', '*tread', '*tre', '*ulib',
- '*uli', '*use', '*vabs', '*vab', '*vcol', '*vco', '*vcum', '*vcu', '*vedit',
- '*ved', '*vfact', '*vfa', '*vfill', '*vfi', '*vfun', '*vfu', '*vget', '*vge',
- '*vitrp', '*vit', '*vlen', '*vle', '*vmask', '*vma', '*voper', '*vop',
- '*vplot', '*vpl', '*vput', '*vpu', '*vread', '*vre', '*vscfun', '*vsc',
- '*vstat', '*vst', '*vwrite', '*vwr', '/anfile', '/anf', '/angle', '/ang',
- '/annot', '/ann', '/anum', '/anu', '/assign', '/ass', '/auto', '/aut',
- '/aux15', '/aux2', '/aux', '/axlab', '/axl', '/batch', '/bat', '/clabel',
- '/cla', '/clear', '/cle', '/clog', '/clo', '/cmap', '/cma', '/color', '/col',
- '/com', '/config', '/contour', '/con', '/copy', '/cop', '/cplane', '/cpl',
- '/ctype', '/cty', '/cval', '/cva', '/delete', '/del', '/devdisp', '/device',
- '/dev', '/dist', '/dis', '/dscale', '/dsc', '/dv3d', '/dv3', '/edge', '/edg',
- '/efacet', '/efa', '/eof', '/erase', '/era', '/eshape', '/esh', '/exit',
- '/exi', '/expand', '/exp', '/facet', '/fac', '/fdele', '/fde', '/filname',
- '/fil', '/focus', '/foc', '/format', '/for', '/ftype', '/fty', '/gcmd',
- '/gcm', '/gcolumn', '/gco', '/gfile', '/gfi', '/gformat', '/gfo', '/gline',
- '/gli', '/gmarker', '/gma', '/golist', '/gol', '/gopr', '/gop', '/go',
- '/graphics', '/gra', '/gresume', '/gre', '/grid', '/gri', '/gropt', '/gro',
- '/grtyp', '/grt', '/gsave', '/gsa', '/gst', '/gthk', '/gth', '/gtype', '/gty',
- '/header', '/hea', '/input', '/inp', '/larc', '/lar', '/light', '/lig',
- '/line', '/lin', '/lspec', '/lsp', '/lsymbol', '/lsy', '/menu', '/men',
- '/mplib', '/mpl', '/mrep', '/mre', '/mstart', '/mst', '/nerr', '/ner',
- '/noerase', '/noe', '/nolist', '/nol', '/nopr', '/nop', '/normal', '/nor',
- '/number', '/num', '/opt', '/output', '/out', '/page', '/pag', '/pbc', '/pbf',
- '/pcircle', '/pci', '/pcopy', '/pco', '/plopts', '/plo', '/pmacro', '/pma',
- '/pmeth', '/pme', '/pmore', '/pmo', '/pnum', '/pnu', '/polygon', '/pol',
- '/post26', '/post1', '/pos', '/prep7', '/pre', '/psearch', '/pse', '/psf',
- '/pspec', '/psp', '/pstatus', '/pst', '/psymb', '/psy', '/pwedge', '/pwe',
- '/quit', '/qui', '/ratio', '/rat', '/rename', '/ren', '/replot', '/rep',
- '/reset', '/res', '/rgb', '/runst', '/run', '/seclib', '/sec', '/seg',
- '/shade', '/sha', '/showdisp', '/show', '/sho', '/shrink', '/shr', '/solu',
- '/sol', '/sscale', '/ssc', '/status', '/sta', '/stitle', '/sti', '/syp',
- '/sys', '/title', '/tit', '/tlabel', '/tla', '/triad', '/tri', '/trlcy',
- '/trl', '/tspec', '/tsp', '/type', '/typ', '/ucmd', '/ucm', '/uis', '/ui',
- '/units', '/uni', '/user', '/use', '/vcone', '/vco', '/view', '/vie',
- '/vscale', '/vsc', '/vup', '/wait', '/wai', '/window', '/win', '/xrange',
- '/xra', '/yrange', '/yra', '/zoom', '/zoo'
-}, '*/', true))
-
--- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-- Functions.
-local func = token(l.FUNCTION, l.delimited_range('%', true, true))
-
--- Operators.
-local operator = token(l.OPERATOR, S('+-*/$=,;()'))
+lex:add_rule('function', token(lexer.FUNCTION, lexer.range('%', true, false)))
-- Labels.
-local label = token(l.LABEL, l.starts_line(':') * l.word)
+lex:add_rule('label', token(lexer.LABEL, lexer.starts_line(':') * lexer.word))
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'identifier', identifier},
- {'string', string},
- {'number', number},
- {'function', func},
- {'label', label},
- {'comment', comment},
- {'operator', operator},
-}
+-- Comments.
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('!')))
+
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S('+-*/$=,;()')))
-M._foldsymbols = {
- _patterns = {'%*[A-Za-z]+', '!'},
- [l.KEYWORD] = {
- ['*if'] = 1, ['*IF'] = 1, ['*do'] = 1, ['*DO'] = 1, ['*dowhile'] = 1,
- ['*DOWHILE'] = 1,
- ['*endif'] = -1, ['*ENDIF'] = -1, ['*enddo'] = -1, ['*ENDDO'] = -1
- },
- [l.COMMENT] = {['!'] = l.fold_line_comments('!')}
-}
+-- Fold points.
+lex:add_fold_point(lexer.KEYWORD, '*if', '*endif')
+lex:add_fold_point(lexer.KEYWORD, '*do', '*enddo')
+lex:add_fold_point(lexer.KEYWORD, '*dowhile', '*enddo')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('!'))
-return M
+return lex
diff --git a/lua/lexers/apl.lua b/lua/lexers/apl.lua
index 32e1f3a..9ee22cf 100644
--- a/lua/lexers/apl.lua
+++ b/lua/lexers/apl.lua
@@ -1,69 +1,55 @@
--- Copyright 2015-2017 David B. Lamkins <david@lamkins.net>. See LICENSE.
+-- Copyright 2015-2022 David B. Lamkins <david@lamkins.net>. See LICENSE.
-- APL LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'apl'}
+local lex = lexer.new('apl')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Comments.
-local comment = token(l.COMMENT, (P('⍝') + P('#')) * l.nonnewline^0)
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol(P('⍝') + '#')))
-- Strings.
-local sq_str = l.delimited_range("'", false, true)
-local dq_str = l.delimited_range('"')
-
-local string = token(l.STRING, sq_str + dq_str)
+local sq_str = lexer.range("'", false, false)
+local dq_str = lexer.range('"')
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
-- Numbers.
-local dig = R('09')
+local dig = lexer.digit
local rad = P('.')
local exp = S('eE')
local img = S('jJ')
local sgn = P('¯')^-1
-local float = sgn * (dig^0 * rad * dig^1 + dig^1 * rad * dig^0 + dig^1) *
- (exp * sgn *dig^1)^-1
-local number = token(l.NUMBER, float * img * float + float)
+local float = sgn * (dig^0 * rad * dig^1 + dig^1 * rad * dig^0 + dig^1) * (exp * sgn * dig^1)^-1
+lex:add_rule('number', token(lexer.NUMBER, float * img * float + float))
-- Keywords.
-local keyword = token(l.KEYWORD, P('⍞') + P('χ') + P('⍺') + P('⍶') + P('⍵') +
- P('⍹') + P('⎕') * R('AZ', 'az')^0)
+lex:add_rule('keyword', token(lexer.KEYWORD,
+ P('⍞') + 'χ' + '⍺' + '⍶' + '⍵' + '⍹' + '⎕' * lexer.alpha^0))
-- Names.
-local n1l = R('AZ', 'az')
-local n1b = P('_') + P('∆') + P('⍙')
-local n2l = n1l + R('09')
-local n2b = n1b + P('¯')
+local n1l = lexer.alpha
+local n1b = P('_') + '∆' + '⍙'
+local n2l = n1l + lexer.digit
+local n2b = n1b + '¯'
local n1 = n1l + n1b
local n2 = n2l + n2b
local name = n1 * n2^0
-- Labels.
-local label = token(l.LABEL, name * P(':'))
+lex:add_rule('label', token(lexer.LABEL, name * ':'))
-- Variables.
-local variable = token(l.VARIABLE, name)
+lex:add_rule('variable', token(lexer.VARIABLE, name))
-- Special.
-local special = token(l.TYPE, S('{}[]();') + P('←') + P('→') + P('◊'))
+lex:add_rule('special', token(lexer.TYPE, S('{}[]();') + '←' + '→' + '◊'))
-- Nabla.
-local nabla = token(l.PREPROCESSOR, P('∇') + P('⍫'))
-
-M._rules = {
- {'whitespace', ws},
- {'comment', comment},
- {'string', string},
- {'number', number},
- {'keyword', keyword},
- {'label', label},
- {'variable', variable},
- {'special', special},
- {'nabla', nabla},
-}
+lex:add_rule('nabla', token(lexer.PREPROCESSOR, P('∇') + '⍫'))
-return M
+return lex
diff --git a/lua/lexers/applescript.lua b/lua/lexers/applescript.lua
index ea5082e..e1c25d1 100644
--- a/lua/lexers/applescript.lua
+++ b/lua/lexers/applescript.lua
@@ -1,82 +1,69 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- Applescript LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'applescript'}
+local lex = lexer.new('applescript')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local line_comment = '--' * l.nonnewline^0
-local block_comment = '(*' * (l.any - '*)')^0 * P('*)')^-1
-local comment = token(l.COMMENT, line_comment + block_comment)
-
--- Strings.
-local string = token(l.STRING, l.delimited_range('"', true))
-
--- Numbers.
-local number = token(l.NUMBER, l.float + l.integer)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match({
- 'script', 'property', 'prop', 'end', 'copy', 'to', 'set', 'global', 'local',
- 'on', 'to', 'of', 'in', 'given', 'with', 'without', 'return', 'continue',
- 'tell', 'if', 'then', 'else', 'repeat', 'times', 'while', 'until', 'from',
- 'exit', 'try', 'error', 'considering', 'ignoring', 'timeout', 'transaction',
- 'my', 'get', 'put', 'into', 'is',
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match({
+ 'script', 'property', 'prop', 'end', 'copy', 'to', 'set', 'global', 'local', 'on', 'to', 'of',
+ 'in', 'given', 'with', 'without', 'return', 'continue', 'tell', 'if', 'then', 'else', 'repeat',
+ 'times', 'while', 'until', 'from', 'exit', 'try', 'error', 'considering', 'ignoring', 'timeout',
+ 'transaction', 'my', 'get', 'put', 'into', 'is',
-- References.
- 'each', 'some', 'every', 'whose', 'where', 'id', 'index', 'first', 'second',
- 'third', 'fourth', 'fifth', 'sixth', 'seventh', 'eighth', 'ninth', 'tenth',
- 'last', 'front', 'back', 'st', 'nd', 'rd', 'th', 'middle', 'named', 'through',
- 'thru', 'before', 'after', 'beginning', 'the',
+ 'each', 'some', 'every', 'whose', 'where', 'id', 'index', 'first', 'second', 'third', 'fourth',
+ 'fifth', 'sixth', 'seventh', 'eighth', 'ninth', 'tenth', 'last', 'front', 'back', 'st', 'nd',
+ 'rd', 'th', 'middle', 'named', 'through', 'thru', 'before', 'after', 'beginning', 'the',
-- Commands.
- 'close', 'copy', 'count', 'delete', 'duplicate', 'exists', 'launch', 'make',
- 'move', 'open', 'print', 'quit', 'reopen', 'run', 'save', 'saving',
+ 'close', 'copy', 'count', 'delete', 'duplicate', 'exists', 'launch', 'make', 'move', 'open',
+ 'print', 'quit', 'reopen', 'run', 'save', 'saving',
-- Operators.
- 'div', 'mod', 'and', 'not', 'or', 'as', 'contains', 'equal', 'equals',
- 'isn\'t',
-}, "'", true))
+ 'div', 'mod', 'and', 'not', 'or', 'as', 'contains', 'equal', 'equals', 'isn\'t'
+}, true)))
-- Constants.
-local constant = token(l.CONSTANT, word_match({
+lex:add_rule('constant', token(lexer.CONSTANT, word_match({
'case', 'diacriticals', 'expansion', 'hyphens', 'punctuation',
-- Predefined variables.
'it', 'me', 'version', 'pi', 'result', 'space', 'tab', 'anything',
-- Text styles.
- 'bold', 'condensed', 'expanded', 'hidden', 'italic', 'outline', 'plain',
- 'shadow', 'strikethrough', 'subscript', 'superscript', 'underline',
+ 'bold', 'condensed', 'expanded', 'hidden', 'italic', 'outline', 'plain', 'shadow',
+ 'strikethrough', 'subscript', 'superscript', 'underline',
-- Save options.
'ask', 'no', 'yes',
-- Booleans.
'false', 'true',
-- Date and time.
- 'weekday', 'monday', 'mon', 'tuesday', 'tue', 'wednesday', 'wed', 'thursday',
- 'thu', 'friday', 'fri', 'saturday', 'sat', 'sunday', 'sun', 'month',
- 'january', 'jan', 'february', 'feb', 'march', 'mar', 'april', 'apr', 'may',
- 'june', 'jun', 'july', 'jul', 'august', 'aug', 'september', 'sep', 'october',
- 'oct', 'november', 'nov', 'december', 'dec', 'minutes', 'hours', 'days',
- 'weeks'
-}, nil, true))
+ 'weekday', 'monday', 'mon', 'tuesday', 'tue', 'wednesday', 'wed', 'thursday', 'thu', 'friday',
+ 'fri', 'saturday', 'sat', 'sunday', 'sun', 'month', 'january', 'jan', 'february', 'feb', 'march',
+ 'mar', 'april', 'apr', 'may', 'june', 'jun', 'july', 'jul', 'august', 'aug', 'september', 'sep',
+ 'october', 'oct', 'november', 'nov', 'december', 'dec', 'minutes', 'hours', 'days', 'weeks'
+}, true)))
-- Identifiers.
-local identifier = token(l.IDENTIFIER, (l.alpha + '_') * l.alnum^0)
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.alpha * (lexer.alnum + '_')^0))
+
+-- Strings.
+lex:add_rule('string', token(lexer.STRING, lexer.range('"', true)))
+
+-- Comments.
+local line_comment = lexer.to_eol('--')
+local block_comment = lexer.range('(*', '*)')
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
+
+-- Numbers.
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-- Operators.
-local operator = token(l.OPERATOR, S('+-^*/&<>=:,(){}'))
+lex:add_rule('operator', token(lexer.OPERATOR, S('+-^*/&<>=:,(){}')))
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'constant', constant},
- {'identifier', identifier},
- {'string', string},
- {'comment', comment},
- {'number', number},
- {'operator', operator},
-}
+-- Fold points.
+lex:add_fold_point(lexer.COMMENT, '(*', '*)')
-return M
+return lex
diff --git a/lua/lexers/asm.lua b/lua/lexers/asm.lua
index 321fe39..416113c 100644
--- a/lua/lexers/asm.lua
+++ b/lua/lexers/asm.lua
@@ -1,212 +1,152 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- NASM Assembly LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'asm'}
+local lex = lexer.new('asm')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local comment = token(l.COMMENT, ';' * l.nonnewline^0)
-
--- Strings.
-local sq_str = l.delimited_range("'", true)
-local dq_str = l.delimited_range('"', true)
-local string = token(l.STRING, sq_str + dq_str)
-
--- Numbers.
-local number = token(l.NUMBER, l.float + l.integer * S('hqb')^-1)
-
--- Preprocessor.
-local preproc_word = word_match{
- 'arg', 'assign', 'clear', 'define', 'defstr', 'deftok', 'depend', 'elif',
- 'elifctx', 'elifdef', 'elifempty', 'elifenv', 'elifid', 'elifidn', 'elifidni',
- 'elifmacro', 'elifn', 'elifnctx', 'elifndef', 'elifnempty', 'elifnenv',
- 'elifnid', 'elifnidn', 'elifnidni', 'elifnmacro', 'elifnnum', 'elifnstr',
- 'elifntoken', 'elifnum', 'elifstr', 'eliftoken', 'else', 'endif', 'endmacro',
- 'endrep', 'endwhile', 'error', 'exitmacro', 'exitrep', 'exitwhile', 'fatal',
- 'final', 'idefine', 'idefstr', 'ideftok', 'if', 'ifctx', 'ifdef', 'ifempty',
- 'ifenv', 'ifid', 'ifidn', 'ifidni', 'ifmacro', 'ifn', 'ifnctx', 'ifndef',
- 'ifnempty', 'ifnenv', 'ifnid', 'ifnidn', 'ifnidni', 'ifnmacro', 'ifnnum',
- 'ifnstr', 'ifntoken', 'ifnum', 'ifstr', 'iftoken', 'imacro', 'include',
- 'ixdefine', 'line', 'local', 'macro', 'pathsearch', 'pop', 'push', 'rep',
- 'repl', 'rmacro', 'rotate', 'stacksize', 'strcat', 'strlen', 'substr',
- 'undef', 'unmacro', 'use', 'warning', 'while', 'xdefine',
-}
-local preproc_symbol = '??' + S('!$+?') + '%' * -l.space + R('09')^1
-local preproc = token(l.PREPROCESSOR, '%' * (preproc_word + preproc_symbol))
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match({
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
-- Preprocessor macros.
- 'struc', 'endstruc', 'istruc', 'at', 'iend', 'align', 'alignb', 'sectalign',
- '.nolist',
+ 'struc', 'endstruc', 'istruc', 'at', 'iend', 'align', 'alignb', 'sectalign', '.nolist',
-- Preprocessor Packages.
- --'altreg', 'smartalign', 'fp', 'ifunc'
+ 'altreg', 'smartalign', 'fp', 'ifunc',
-- Directives.
- 'absolute', 'bits', 'class', 'common', 'common', 'cpu', 'default', 'export',
- 'extern', 'float', 'global', 'group', 'import', 'osabi', 'overlay', 'private',
- 'public', '__SECT__', 'section', 'segment', 'stack', 'use16', 'use32',
- 'use64',
+ 'absolute', 'bits', 'class', 'common', 'common', 'cpu', 'default', 'export', 'extern', 'float',
+ 'global', 'group', 'import', 'osabi', 'overlay', 'private', 'public', '__SECT__', 'section',
+ 'segment', 'stack', 'use16', 'use32', 'use64',
-- Section Names.
- '.bss', '.comment', '.data', '.lbss', '.ldata', '.lrodata', '.rdata',
- '.rodata', '.tbss', '.tdata', '.text',
+ '.bss', '.comment', '.data', '.lbss', '.ldata', '.lrodata', '.rdata', '.rodata', '.tbss',
+ '.tdata', '.text',
-- Section Qualifiers.
- 'alloc', 'bss', 'code', 'exec', 'data', 'noalloc', 'nobits', 'noexec',
- 'nowrite', 'progbits', 'rdata', 'tls', 'write',
+ 'alloc', 'bss', 'code', 'exec', 'data', 'noalloc', 'nobits', 'noexec', 'nowrite', 'progbits',
+ 'rdata', 'tls', 'write',
-- Operators.
- 'abs', 'rel', 'seg', 'wrt', 'strict',
- '__utf16__', '__utf16be__', '__utf16le__', '__utf32__', '__utf32be__',
- '__utf32le__',
-}, '.'))
+ 'abs', 'rel', 'seg', 'wrt', 'strict', '__utf16__', '__utf16be__', '__utf16le__', '__utf32__',
+ '__utf32be__', '__utf32le__'
+}))
-- Instructions.
-- awk '{print $1}'|uniq|tr '[:upper:]' '[:lower:]'|
--- lua -e "for l in io.lines() do print(\"'\"..l..\"',\") end"|fmt -w 78
-local instruction = token('instruction', word_match{
+-- lua -e "for l in io.lines() do print(\"'\"..l..\"',\") end"|fmt -w 98
+lex:add_rule('instruction', token('instruction', word_match{
-- Special Instructions.
- 'db', 'dd', 'do', 'dq', 'dt', 'dw', 'dy', 'resb', 'resd', 'reso', 'resq',
- 'rest', 'resw', 'resy',
+ 'db', 'dd', 'do', 'dq', 'dt', 'dw', 'dy', 'resb', 'resd', 'reso', 'resq', 'rest', 'resw', 'resy',
-- Conventional Instructions.
- 'aaa', 'aad', 'aam', 'aas', 'adc', 'add', 'and', 'arpl', 'bb0_reset',
- 'bb1_reset', 'bound', 'bsf', 'bsr', 'bswap', 'bt', 'btc', 'btr', 'bts',
- 'call', 'cbw', 'cdq', 'cdqe', 'clc', 'cld', 'cli', 'clts', 'cmc', 'cmp',
- 'cmpsb', 'cmpsd', 'cmpsq', 'cmpsw', 'cmpxchg', 'cmpxchg486', 'cmpxchg8b',
- 'cmpxchg16b', 'cpuid', 'cpu_read', 'cpu_write', 'cqo', 'cwd', 'cwde', 'daa',
- 'das', 'dec', 'div', 'dmint', 'emms', 'enter', 'equ', 'f2xm1', 'fabs',
- 'fadd', 'faddp', 'fbld', 'fbstp', 'fchs', 'fclex', 'fcmovb', 'fcmovbe',
- 'fcmove', 'fcmovnb', 'fcmovnbe', 'fcmovne', 'fcmovnu', 'fcmovu', 'fcom',
- 'fcomi', 'fcomip', 'fcomp', 'fcompp', 'fcos', 'fdecstp', 'fdisi', 'fdiv',
- 'fdivp', 'fdivr', 'fdivrp', 'femms', 'feni', 'ffree', 'ffreep', 'fiadd',
- 'ficom', 'ficomp', 'fidiv', 'fidivr', 'fild', 'fimul', 'fincstp', 'finit',
- 'fist', 'fistp', 'fisttp', 'fisub', 'fisubr', 'fld', 'fld1', 'fldcw',
- 'fldenv', 'fldl2e', 'fldl2t', 'fldlg2', 'fldln2', 'fldpi', 'fldz', 'fmul',
- 'fmulp', 'fnclex', 'fndisi', 'fneni', 'fninit', 'fnop', 'fnsave', 'fnstcw',
- 'fnstenv', 'fnstsw', 'fpatan', 'fprem', 'fprem1', 'fptan', 'frndint',
- 'frstor', 'fsave', 'fscale', 'fsetpm', 'fsin', 'fsincos', 'fsqrt',
- 'fst', 'fstcw', 'fstenv', 'fstp', 'fstsw', 'fsub', 'fsubp', 'fsubr',
- 'fsubrp', 'ftst', 'fucom', 'fucomi', 'fucomip', 'fucomp', 'fucompp',
- 'fxam', 'fxch', 'fxtract', 'fyl2x', 'fyl2xp1', 'hlt', 'ibts', 'icebp',
- 'idiv', 'imul', 'in', 'inc', 'incbin', 'insb', 'insd', 'insw', 'int',
- 'int01', 'int1', 'int03', 'int3', 'into', 'invd', 'invpcid', 'invlpg',
- 'invlpga', 'iret', 'iretd', 'iretq', 'iretw', 'jcxz', 'jecxz', 'jrcxz',
- 'jmp', 'jmpe', 'lahf', 'lar', 'lds', 'lea', 'leave', 'les', 'lfence',
- 'lfs', 'lgdt', 'lgs', 'lidt', 'lldt', 'lmsw', 'loadall', 'loadall286',
- 'lodsb', 'lodsd', 'lodsq', 'lodsw', 'loop', 'loope', 'loopne', 'loopnz',
- 'loopz', 'lsl', 'lss', 'ltr', 'mfence', 'monitor', 'mov', 'movd', 'movq',
- 'movsb', 'movsd', 'movsq', 'movsw', 'movsx', 'movsxd', 'movsx', 'movzx',
- 'mul', 'mwait', 'neg', 'nop', 'not', 'or', 'out', 'outsb', 'outsd', 'outsw',
- 'packssdw', 'packsswb', 'packuswb', 'paddb', 'paddd', 'paddsb', 'paddsiw',
- 'paddsw', 'paddusb', 'paddusw', 'paddw', 'pand', 'pandn', 'pause', 'paveb',
- 'pavgusb', 'pcmpeqb', 'pcmpeqd', 'pcmpeqw', 'pcmpgtb', 'pcmpgtd', 'pcmpgtw',
- 'pdistib', 'pf2id', 'pfacc', 'pfadd', 'pfcmpeq', 'pfcmpge', 'pfcmpgt',
- 'pfmax', 'pfmin', 'pfmul', 'pfrcp', 'pfrcpit1', 'pfrcpit2', 'pfrsqit1',
- 'pfrsqrt', 'pfsub', 'pfsubr', 'pi2fd', 'pmachriw', 'pmaddwd', 'pmagw',
- 'pmulhriw', 'pmulhrwa', 'pmulhrwc', 'pmulhw', 'pmullw', 'pmvgezb', 'pmvlzb',
- 'pmvnzb', 'pmvzb', 'pop', 'popa', 'popad', 'popaw', 'popf', 'popfd',
- 'popfq', 'popfw', 'por', 'prefetch', 'prefetchw', 'pslld', 'psllq',
- 'psllw', 'psrad', 'psraw', 'psrld', 'psrlq', 'psrlw', 'psubb', 'psubd',
- 'psubsb', 'psubsiw', 'psubsw', 'psubusb', 'psubusw', 'psubw', 'punpckhbw',
- 'punpckhdq', 'punpckhwd', 'punpcklbw', 'punpckldq', 'punpcklwd', 'push',
- 'pusha', 'pushad', 'pushaw', 'pushf', 'pushfd', 'pushfq', 'pushfw', 'pxor',
- 'rcl', 'rcr', 'rdshr', 'rdmsr', 'rdpmc', 'rdtsc', 'rdtscp', 'ret', 'retf',
- 'retn', 'rol', 'ror', 'rdm', 'rsdc', 'rsldt', 'rsm', 'rsts', 'sahf', 'sal',
- 'salc', 'sar', 'sbb', 'scasb', 'scasd', 'scasq', 'scasw', 'sfence', 'sgdt',
- 'shl', 'shld', 'shr', 'shrd', 'sidt', 'sldt', 'skinit', 'smi', 'smint',
- 'smintold', 'smsw', 'stc', 'std', 'sti', 'stosb', 'stosd', 'stosq', 'stosw',
- 'str', 'sub', 'svdc', 'svldt', 'svts', 'swapgs', 'syscall', 'sysenter',
- 'sysexit', 'sysret', 'test', 'ud0', 'ud1', 'ud2b', 'ud2', 'ud2a', 'umov',
- 'verr', 'verw', 'fwait', 'wbinvd', 'wrshr', 'wrmsr', 'xadd', 'xbts',
- 'xchg', 'xlatb', 'xlat', 'xor', 'cmova', 'cmovae', 'cmovb', 'cmovbe',
- 'cmovc', 'cmove', 'cmovg', 'cmovge', 'cmovl', 'cmovle', 'cmovna', 'cmovnae',
- 'cmovnb', 'cmovnbe', 'cmovnc', 'cmovne', 'cmovng', 'cmovnge', 'cmovnl',
- 'cmovnle', 'cmovno', 'cmovnp', 'cmovns', 'cmovnz', 'cmovo', 'cmovp',
- 'cmovpe', 'cmovpo', 'cmovs', 'cmovz', 'cmovcc', 'ja', 'jae', 'jb', 'jbe',
- 'jc', 'je', 'jg', 'jge', 'jl', 'jle', 'jna', 'jnae', 'jnb', 'jnbe', 'jnc',
- 'jne', 'jng', 'jnge', 'jnl', 'jnle', 'jno', 'jnp', 'jns', 'jnz', 'jo', 'jp',
- 'jpe', 'jpo', 'js', 'jz', 'seta', 'setae', 'setb', 'setbe', 'setc', 'sete',
- 'setg', 'setge', 'setl', 'setle', 'setna', 'setnae', 'setnb', 'setnbe',
- 'setnc', 'setne', 'setng', 'setnge', 'setnl', 'setnle', 'setno', 'setnp',
- 'setns', 'setnz', 'seto', 'setp', 'setpe', 'setpo', 'sets', 'setz',
- --" Katmai Streaming SIMD instructions (SSE -- a.k.a. KNI, XMM, MMX2).
- 'addps', 'addss', 'andnps', 'andps', 'cmpeqps', 'cmpeqss', 'cmpleps',
- 'cmpless', 'cmpltps', 'cmpltss', 'cmpneqps', 'cmpneqss', 'cmpnleps',
- 'cmpnless', 'cmpnltps', 'cmpnltss', 'cmpordps', 'cmpordss', 'cmpunordps',
- 'cmpunordss', 'cmpps', 'cmpss', 'comiss', 'cvtpi2ps', 'cvtps2pi', 'cvtsi2ss',
- 'cvtss2si', 'cvttps2pi', 'cvttss2si', 'divps', 'divss', 'ldmxcsr', 'maxps',
- 'maxss', 'minps', 'minss', 'movaps', 'movhps', 'movlhps', 'movlps',
- 'movhlps', 'movmskps', 'movntps', 'movss', 'movups', 'mulps', 'mulss',
- 'orps', 'rcpps', 'rcpss', 'rsqrtps', 'rsqrtss', 'shufps', 'sqrtps', 'sqrtss',
- 'stmxcsr', 'subps', 'subss', 'ucomiss', 'unpckhps', 'unpcklps', 'xorps',
+ 'aaa', 'aad', 'aam', 'aas', 'adc', 'add', 'and', 'arpl', 'bb0_reset', 'bb1_reset', 'bound', 'bsf',
+ 'bsr', 'bswap', 'bt', 'btc', 'btr', 'bts', 'call', 'cbw', 'cdq', 'cdqe', 'clc', 'cld', 'cli',
+ 'clts', 'cmc', 'cmp', 'cmpsb', 'cmpsd', 'cmpsq', 'cmpsw', 'cmpxchg', 'cmpxchg486', 'cmpxchg8b',
+ 'cmpxchg16b', 'cpuid', 'cpu_read', 'cpu_write', 'cqo', 'cwd', 'cwde', 'daa', 'das', 'dec', 'div',
+ 'dmint', 'emms', 'enter', 'equ', 'f2xm1', 'fabs', 'fadd', 'faddp', 'fbld', 'fbstp', 'fchs',
+ 'fclex', 'fcmovb', 'fcmovbe', 'fcmove', 'fcmovnb', 'fcmovnbe', 'fcmovne', 'fcmovnu', 'fcmovu',
+ 'fcom', 'fcomi', 'fcomip', 'fcomp', 'fcompp', 'fcos', 'fdecstp', 'fdisi', 'fdiv', 'fdivp',
+ 'fdivr', 'fdivrp', 'femms', 'feni', 'ffree', 'ffreep', 'fiadd', 'ficom', 'ficomp', 'fidiv',
+ 'fidivr', 'fild', 'fimul', 'fincstp', 'finit', 'fist', 'fistp', 'fisttp', 'fisub', 'fisubr',
+ 'fld', 'fld1', 'fldcw', 'fldenv', 'fldl2e', 'fldl2t', 'fldlg2', 'fldln2', 'fldpi', 'fldz', 'fmul',
+ 'fmulp', 'fnclex', 'fndisi', 'fneni', 'fninit', 'fnop', 'fnsave', 'fnstcw', 'fnstenv', 'fnstsw',
+ 'fpatan', 'fprem', 'fprem1', 'fptan', 'frndint', 'frstor', 'fsave', 'fscale', 'fsetpm', 'fsin',
+ 'fsincos', 'fsqrt', 'fst', 'fstcw', 'fstenv', 'fstp', 'fstsw', 'fsub', 'fsubp', 'fsubr', 'fsubrp',
+ 'ftst', 'fucom', 'fucomi', 'fucomip', 'fucomp', 'fucompp', 'fxam', 'fxch', 'fxtract', 'fyl2x',
+ 'fyl2xp1', 'hlt', 'ibts', 'icebp', 'idiv', 'imul', 'in', 'inc', 'incbin', 'insb', 'insd', 'insw',
+ 'int', 'int01', 'int1', 'int03', 'int3', 'into', 'invd', 'invpcid', 'invlpg', 'invlpga', 'iret',
+ 'iretd', 'iretq', 'iretw', 'jcxz', 'jecxz', 'jrcxz', 'jmp', 'jmpe', 'lahf', 'lar', 'lds', 'lea',
+ 'leave', 'les', 'lfence', 'lfs', 'lgdt', 'lgs', 'lidt', 'lldt', 'lmsw', 'loadall', 'loadall286',
+ 'lodsb', 'lodsd', 'lodsq', 'lodsw', 'loop', 'loope', 'loopne', 'loopnz', 'loopz', 'lsl', 'lss',
+ 'ltr', 'mfence', 'monitor', 'mov', 'movd', 'movq', 'movsb', 'movsd', 'movsq', 'movsw', 'movsx',
+ 'movsxd', 'movsx', 'movzx', 'mul', 'mwait', 'neg', 'nop', 'not', 'or', 'out', 'outsb', 'outsd',
+ 'outsw', 'packssdw', 'packsswb', 'packuswb', 'paddb', 'paddd', 'paddsb', 'paddsiw', 'paddsw',
+ 'paddusb', 'paddusw', 'paddw', 'pand', 'pandn', 'pause', 'paveb', 'pavgusb', 'pcmpeqb', 'pcmpeqd',
+ 'pcmpeqw', 'pcmpgtb', 'pcmpgtd', 'pcmpgtw', 'pdistib', 'pf2id', 'pfacc', 'pfadd', 'pfcmpeq',
+ 'pfcmpge', 'pfcmpgt', 'pfmax', 'pfmin', 'pfmul', 'pfrcp', 'pfrcpit1', 'pfrcpit2', 'pfrsqit1',
+ 'pfrsqrt', 'pfsub', 'pfsubr', 'pi2fd', 'pmachriw', 'pmaddwd', 'pmagw', 'pmulhriw', 'pmulhrwa',
+ 'pmulhrwc', 'pmulhw', 'pmullw', 'pmvgezb', 'pmvlzb', 'pmvnzb', 'pmvzb', 'pop', 'popa', 'popad',
+ 'popaw', 'popf', 'popfd', 'popfq', 'popfw', 'por', 'prefetch', 'prefetchw', 'pslld', 'psllq',
+ 'psllw', 'psrad', 'psraw', 'psrld', 'psrlq', 'psrlw', 'psubb', 'psubd', 'psubsb', 'psubsiw',
+ 'psubsw', 'psubusb', 'psubusw', 'psubw', 'punpckhbw', 'punpckhdq', 'punpckhwd', 'punpcklbw',
+ 'punpckldq', 'punpcklwd', 'push', 'pusha', 'pushad', 'pushaw', 'pushf', 'pushfd', 'pushfq',
+ 'pushfw', 'pxor', 'rcl', 'rcr', 'rdshr', 'rdmsr', 'rdpmc', 'rdtsc', 'rdtscp', 'ret', 'retf',
+ 'retn', 'rol', 'ror', 'rdm', 'rsdc', 'rsldt', 'rsm', 'rsts', 'sahf', 'sal', 'salc', 'sar', 'sbb',
+ 'scasb', 'scasd', 'scasq', 'scasw', 'sfence', 'sgdt', 'shl', 'shld', 'shr', 'shrd', 'sidt',
+ 'sldt', 'skinit', 'smi', 'smint', 'smintold', 'smsw', 'stc', 'std', 'sti', 'stosb', 'stosd',
+ 'stosq', 'stosw', 'str', 'sub', 'svdc', 'svldt', 'svts', 'swapgs', 'syscall', 'sysenter',
+ 'sysexit', 'sysret', 'test', 'ud0', 'ud1', 'ud2b', 'ud2', 'ud2a', 'umov', 'verr', 'verw', 'fwait',
+ 'wbinvd', 'wrshr', 'wrmsr', 'xadd', 'xbts', 'xchg', 'xlatb', 'xlat', 'xor', 'xor', 'cmova',
+ 'cmovae', 'cmovb', 'cmovbe', 'cmovc', 'cmove', 'cmovg', 'cmovge', 'cmovl', 'cmovle', 'cmovna',
+ 'cmovnae', 'cmovnb', 'cmovnbe', 'cmovnc', 'cmovne', 'cmovng', 'cmovnge', 'cmovnl', 'cmovnle',
+ 'cmovno', 'cmovnp', 'cmovns', 'cmovnz', 'cmovo', 'cmovp', 'cmovpe', 'cmovpo', 'cmovs', 'cmovz',
+ 'cmovcc', 'ja', 'jae', 'jb', 'jbe', 'jc', 'je', 'jg', 'jge', 'jl', 'jle', 'jna', 'jnae', 'jnb',
+ 'jnbe', 'jnc', 'jne', 'jng', 'jnge', 'jnl', 'jnle', 'jno', 'jnp', 'jns', 'jnz', 'jo', 'jp', 'jpe',
+ 'jpo', 'js', 'jz', 'seta', 'setae', 'setb', 'setbe', 'setc', 'sete', 'setg', 'setge', 'setl',
+ 'setle', 'setna', 'setnae', 'setnb', 'setnbe', 'setnc', 'setne', 'setng', 'setnge', 'setnl',
+ 'setnle', 'setno', 'setnp', 'setns', 'setnz', 'seto', 'setp', 'setpe', 'setpo', 'sets', 'setz',
+ -- Katmai Streaming SIMD instructions (SSE -- a.k.a. KNI XMM MMX2).
+ 'addps', 'addss', 'andnps', 'andps', 'cmpeqps', 'cmpeqss', 'cmpleps', 'cmpless', 'cmpltps',
+ 'cmpltss', 'cmpneqps', 'cmpneqss', 'cmpnleps', 'cmpnless', 'cmpnltps', 'cmpnltss', 'cmpordps',
+ 'cmpordss', 'cmpunordps', 'cmpunordss', 'cmpps', 'cmpss', 'comiss', 'cvtpi2ps', 'cvtps2pi',
+ 'cvtsi2ss', 'cvtss2si', 'cvttps2pi', 'cvttss2si', 'divps', 'divss', 'ldmxcsr', 'maxps', 'maxss',
+ 'minps', 'minss', 'movaps', 'movhps', 'movlhps', 'movlps', 'movhlps', 'movmskps', 'movntps',
+ 'movss', 'movups', 'mulps', 'mulss', 'orps', 'rcpps', 'rcpss', 'rsqrtps', 'rsqrtss', 'shufps',
+ 'sqrtps', 'sqrtss', 'stmxcsr', 'subps', 'subss', 'ucomiss', 'unpckhps', 'unpcklps', 'xorps',
-- Introduced in Deschutes but necessary for SSE support.
'fxrstor', 'fxrstor64', 'fxsave', 'fxsave64',
-- XSAVE group (AVX and extended state).
- 'xgetbv', 'xsetbv', 'xsave', 'xsave64', 'xsaveopt', 'xsaveopt64', 'xrstor',
- 'xrstor64',
+ 'xgetbv', 'xsetbv', 'xsave', 'xsave64', 'xsaveopt', 'xsaveopt64', 'xrstor', 'xrstor64',
-- Generic memory operations.
'prefetchnta', 'prefetcht0', 'prefetcht1', 'prefetcht2', 'sfence',
-- New MMX instructions introduced in Katmai.
- 'maskmovq', 'movntq', 'pavgb', 'pavgw', 'pextrw', 'pinsrw', 'pmaxsw',
- 'pmaxub', 'pminsw', 'pminub', 'pmovmskb', 'pmulhuw', 'psadbw', 'pshufw',
+ 'maskmovq', 'movntq', 'pavgb', 'pavgw', 'pextrw', 'pinsrw', 'pmaxsw', 'pmaxub', 'pminsw',
+ 'pminub', 'pmovmskb', 'pmulhuw', 'psadbw', 'pshufw',
-- AMD Enhanced 3DNow! (Athlon) instructions.
'pf2iw', 'pfnacc', 'pfpnacc', 'pi2fw', 'pswapd',
-- Willamette SSE2 Cacheability Instructions.
'maskmovdqu', 'clflush', 'movntdq', 'movnti', 'movntpd', 'lfence', 'mfence',
-- Willamette MMX instructions (SSE2 SIMD Integer Instructions).
- 'movd', 'movdqa', 'movdqu', 'movdq2q', 'movq', 'movq2dq', 'packsswb',
- 'packssdw', 'packuswb', 'paddb', 'paddw', 'paddd', 'paddq', 'paddsb',
- 'paddsw', 'paddusb', 'paddusw', 'pand', 'pandn', 'pavgb', 'pavgw', 'pcmpeqb',
- 'pcmpeqw', 'pcmpeqd', 'pcmpgtb', 'pcmpgtw', 'pcmpgtd', 'pextrw', 'pinsrw',
- 'pmaddwd', 'pmaxsw', 'pmaxub', 'pminsw', 'pminub', 'pmovmskb', 'pmulhuw',
- 'pmulhw', 'pmullw', 'pmuludq', 'por', 'psadbw', 'pshufd', 'pshufhw',
- 'pshuflw', 'pslldq', 'psllw', 'pslld', 'psllq', 'psraw', 'psrad', 'psrldq',
- 'psrlw', 'psrld', 'psrlq', 'psubb', 'psubw', 'psubd', 'psubq', 'psubsb',
- 'psubsw', 'psubusb', 'psubusw', 'punpckhbw', 'punpckhwd', 'punpckhdq',
+ 'movd', 'movdqa', 'movdqu', 'movdq2q', 'movq', 'movq2dq', 'packsswb', 'packssdw', 'packuswb',
+ 'paddb', 'paddw', 'paddd', 'paddq', 'paddsb', 'paddsw', 'paddusb', 'paddusw', 'pand', 'pandn',
+ 'pavgb', 'pavgw', 'pcmpeqb', 'pcmpeqw', 'pcmpeqd', 'pcmpgtb', 'pcmpgtw', 'pcmpgtd', 'pextrw',
+ 'pinsrw', 'pmaddwd', 'pmaxsw', 'pmaxub', 'pminsw', 'pminub', 'pmovmskb', 'pmulhuw', 'pmulhw',
+ 'pmullw', 'pmuludq', 'por', 'psadbw', 'pshufd', 'pshufhw', 'pshuflw', 'pslldq', 'psllw', 'pslld',
+ 'psllq', 'psraw', 'psrad', 'psrldq', 'psrlw', 'psrld', 'psrlq', 'psubb', 'psubw', 'psubd',
+ 'psubq', 'psubsb', 'psubsw', 'psubusb', 'psubusw', 'punpckhbw', 'punpckhwd', 'punpckhdq',
'punpckhqdq', 'punpcklbw', 'punpcklwd', 'punpckldq', 'punpcklqdq', 'pxor',
-- Willamette Streaming SIMD instructions (SSE2).
- 'addpd', 'addsd', 'andnpd', 'andpd', 'cmpeqpd', 'cmpeqsd', 'cmplepd',
- 'cmplesd', 'cmpltpd', 'cmpltsd', 'cmpneqpd', 'cmpneqsd', 'cmpnlepd',
- 'cmpnlesd', 'cmpnltpd', 'cmpnltsd', 'cmpordpd', 'cmpordsd', 'cmpunordpd',
- 'cmpunordsd', 'cmppd', 'cmpsd', 'comisd', 'cvtdq2pd', 'cvtdq2ps',
- 'cvtpd2dq', 'cvtpd2pi', 'cvtpd2ps', 'cvtpi2pd', 'cvtps2dq', 'cvtps2pd',
- 'cvtsd2si', 'cvtsd2ss', 'cvtsi2sd', 'cvtss2sd', 'cvttpd2pi', 'cvttpd2dq',
- 'cvttps2dq', 'cvttsd2si', 'divpd', 'divsd', 'maxpd', 'maxsd', 'minpd',
- 'minsd', 'movapd', 'movhpd', 'movlpd', 'movmskpd', 'movsd', 'movupd',
- 'mulpd', 'mulsd', 'orpd', 'shufpd', 'sqrtpd', 'sqrtsd', 'subpd', 'subsd',
- 'ucomisd', 'unpckhpd', 'unpcklpd', 'xorpd',
+ 'addpd', 'addsd', 'andnpd', 'andpd', 'cmpeqpd', 'cmpeqsd', 'cmplepd', 'cmplesd', 'cmpltpd',
+ 'cmpltsd', 'cmpneqpd', 'cmpneqsd', 'cmpnlepd', 'cmpnlesd', 'cmpnltpd', 'cmpnltsd', 'cmpordpd',
+ 'cmpordsd', 'cmpunordpd', 'cmpunordsd', 'cmppd', 'cmpsd', 'comisd', 'cvtdq2pd', 'cvtdq2ps',
+ 'cvtpd2dq', 'cvtpd2pi', 'cvtpd2ps', 'cvtpi2pd', 'cvtps2dq', 'cvtps2pd', 'cvtsd2si', 'cvtsd2ss',
+ 'cvtsi2sd', 'cvtss2sd', 'cvttpd2pi', 'cvttpd2dq', 'cvttps2dq', 'cvttsd2si', 'divpd', 'divsd',
+ 'maxpd', 'maxsd', 'minpd', 'minsd', 'movapd', 'movhpd', 'movlpd', 'movmskpd', 'movsd', 'movupd',
+ 'mulpd', 'mulsd', 'orpd', 'shufpd', 'sqrtpd', 'sqrtsd', 'subpd', 'subsd', 'ucomisd', 'unpckhpd',
+ 'unpcklpd', 'xorpd',
-- Prescott New Instructions (SSE3).
- 'addsubpd', 'addsubps', 'haddpd', 'haddps', 'hsubpd', 'hsubps', 'lddqu',
- 'movddup', 'movshdup', 'movsldup',
+ 'addsubpd', 'addsubps', 'haddpd', 'haddps', 'hsubpd', 'hsubps', 'lddqu', 'movddup', 'movshdup',
+ 'movsldup',
-- VMX/SVM Instructions.
- 'clgi', 'stgi', 'vmcall', 'vmclear', 'vmfunc', 'vmlaunch', 'vmload',
- 'vmmcall', 'vmptrld', 'vmptrst', 'vmread', 'vmresume', 'vmrun', 'vmsave',
- 'vmwrite', 'vmxoff', 'vmxon',
+ 'clgi', 'stgi', 'vmcall', 'vmclear', 'vmfunc', 'vmlaunch', 'vmload', 'vmmcall', 'vmptrld',
+ 'vmptrst', 'vmread', 'vmresume', 'vmrun', 'vmsave', 'vmwrite', 'vmxoff', 'vmxon',
-- Extended Page Tables VMX instructions.
'invept', 'invvpid',
-- Tejas New Instructions (SSSE3).
- 'pabsb', 'pabsw', 'pabsd', 'palignr', 'phaddw', 'phaddd', 'phaddsw',
- 'phsubw', 'phsubd', 'phsubsw', 'pmaddubsw', 'pmulhrsw', 'pshufb', 'psignb',
- 'psignw', 'psignd',
+ 'pabsb', 'pabsw', 'pabsd', 'palignr', 'phaddw', 'phaddd', 'phaddsw', 'phsubw', 'phsubd',
+ 'phsubsw', 'pmaddubsw', 'pmulhrsw', 'pshufb', 'psignb', 'psignw', 'psignd',
-- AMD SSE4A.
'extrq', 'insertq', 'movntsd', 'movntss',
-- New instructions in Barcelona.
'lzcnt',
-- Penryn New Instructions (SSE4.1).
- 'blendpd', 'blendps', 'blendvpd', 'blendvps', 'dppd', 'dpps', 'extractps',
- 'insertps', 'movntdqa', 'mpsadbw', 'packusdw', 'pblendvb', 'pblendw',
- 'pcmpeqq', 'pextrb', 'pextrd', 'pextrq', 'pextrw', 'phminposuw', 'pinsrb',
- 'pinsrd', 'pinsrq', 'pmaxsb', 'pmaxsd', 'pmaxud', 'pmaxuw', 'pminsb',
- 'pminsd', 'pminud', 'pminuw', 'pmovsxbw', 'pmovsxbd', 'pmovsxbq', 'pmovsxwd',
- 'pmovsxwq', 'pmovsxdq', 'pmovzxbw', 'pmovzxbd', 'pmovzxbq', 'pmovzxwd',
- 'pmovzxwq', 'pmovzxdq', 'pmuldq', 'pmulld', 'ptest', 'roundpd', 'roundps',
- 'roundsd', 'roundss',
+ 'blendpd', 'blendps', 'blendvpd', 'blendvps', 'dppd', 'dpps', 'extractps', 'insertps', 'movntdqa',
+ 'mpsadbw', 'packusdw', 'pblendvb', 'pblendw', 'pcmpeqq', 'pextrb', 'pextrd', 'pextrq', 'pextrw',
+ 'phminposuw', 'pinsrb', 'pinsrd', 'pinsrq', 'pmaxsb', 'pmaxsd', 'pmaxud', 'pmaxuw', 'pminsb',
+ 'pminsd', 'pminud', 'pminuw', 'pmovsxbw', 'pmovsxbd', 'pmovsxbq', 'pmovsxwd', 'pmovsxwq',
+ 'pmovsxdq', 'pmovzxbw', 'pmovzxbd', 'pmovzxbq', 'pmovzxwd', 'pmovzxwq', 'pmovzxdq', 'pmuldq',
+ 'pmulld', 'ptest', 'roundpd', 'roundps', 'roundsd', 'roundss',
-- Nehalem New Instructions (SSE4.2).
- 'crc32', 'pcmpestri', 'pcmpestrm', 'pcmpistri', 'pcmpistrm', 'pcmpgtq',
- 'popcnt',
+ 'crc32', 'pcmpestri', 'pcmpestrm', 'pcmpistri', 'pcmpistrm', 'pcmpgtq', 'popcnt',
-- Intel SMX.
'getsec',
-- Geode (Cyrix) 3DNow! additions.
@@ -216,271 +156,234 @@ local instruction = token('instruction', word_match{
-- Intel AES instructions.
'aesenc', 'aesenclast', 'aesdec', 'aesdeclast', 'aesimc', 'aeskeygenassist',
-- Intel AVX AES instructions.
- 'vaesenc', 'vaesenclast', 'vaesdec', 'vaesdeclast', 'vaesimc',
- 'vaeskeygenassist',
+ 'vaesenc', 'vaesenclast', 'vaesdec', 'vaesdeclast', 'vaesimc', 'vaeskeygenassist',
-- Intel AVX instructions.
- 'vaddpd', 'vaddps', 'vaddsd', 'vaddss', 'vaddsubpd', 'vaddsubps',
- 'vandpd', 'vandps', 'vandnpd', 'vandnps', 'vblendpd', 'vblendps',
- 'vblendvpd', 'vblendvps', 'vbroadcastss', 'vbroadcastsd', 'vbroadcastf128',
- 'vcmpeq_ospd', 'vcmpeqpd', 'vcmplt_ospd', 'vcmpltpd', 'vcmple_ospd',
- 'vcmplepd', 'vcmpunord_qpd', 'vcmpunordpd', 'vcmpneq_uqpd', 'vcmpneqpd',
- 'vcmpnlt_uspd', 'vcmpnltpd', 'vcmpnle_uspd', 'vcmpnlepd', 'vcmpord_qpd',
- 'vcmpordpd', 'vcmpeq_uqpd', 'vcmpnge_uspd', 'vcmpngepd', 'vcmpngt_uspd',
- 'vcmpngtpd', 'vcmpfalse_oqpd', 'vcmpfalsepd', 'vcmpneq_oqpd', 'vcmpge_ospd',
- 'vcmpgepd', 'vcmpgt_ospd', 'vcmpgtpd', 'vcmptrue_uqpd', 'vcmptruepd',
- 'vcmpeq_ospd', 'vcmplt_oqpd', 'vcmple_oqpd', 'vcmpunord_spd', 'vcmpneq_uspd',
- 'vcmpnlt_uqpd', 'vcmpnle_uqpd', 'vcmpord_spd', 'vcmpeq_uspd', 'vcmpnge_uqpd',
- 'vcmpngt_uqpd', 'vcmpfalse_ospd', 'vcmpneq_ospd', 'vcmpge_oqpd',
- 'vcmpgt_oqpd', 'vcmptrue_uspd', 'vcmppd', 'vcmpeq_osps', 'vcmpeqps',
- 'vcmplt_osps', 'vcmpltps', 'vcmple_osps', 'vcmpleps', 'vcmpunord_qps',
- 'vcmpunordps', 'vcmpneq_uqps', 'vcmpneqps', 'vcmpnlt_usps', 'vcmpnltps',
- 'vcmpnle_usps', 'vcmpnleps', 'vcmpord_qps', 'vcmpordps', 'vcmpeq_uqps',
- 'vcmpnge_usps', 'vcmpngeps', 'vcmpngt_usps', 'vcmpngtps', 'vcmpfalse_oqps',
- 'vcmpfalseps', 'vcmpneq_oqps', 'vcmpge_osps', 'vcmpgeps', 'vcmpgt_osps',
- 'vcmpgtps', 'vcmptrue_uqps', 'vcmptrueps', 'vcmpeq_osps', 'vcmplt_oqps',
- 'vcmple_oqps', 'vcmpunord_sps', 'vcmpneq_usps', 'vcmpnlt_uqps',
- 'vcmpnle_uqps', 'vcmpord_sps', 'vcmpeq_usps', 'vcmpnge_uqps',
- 'vcmpngt_uqps', 'vcmpfalse_osps', 'vcmpneq_osps', 'vcmpge_oqps',
- 'vcmpgt_oqps', 'vcmptrue_usps', 'vcmpps', 'vcmpeq_ossd', 'vcmpeqsd',
- 'vcmplt_ossd', 'vcmpltsd', 'vcmple_ossd', 'vcmplesd', 'vcmpunord_qsd',
- 'vcmpunordsd', 'vcmpneq_uqsd', 'vcmpneqsd', 'vcmpnlt_ussd', 'vcmpnltsd',
- 'vcmpnle_ussd', 'vcmpnlesd', 'vcmpord_qsd', 'vcmpordsd', 'vcmpeq_uqsd',
- 'vcmpnge_ussd', 'vcmpngesd', 'vcmpngt_ussd', 'vcmpngtsd', 'vcmpfalse_oqsd',
- 'vcmpfalsesd', 'vcmpneq_oqsd', 'vcmpge_ossd', 'vcmpgesd', 'vcmpgt_ossd',
- 'vcmpgtsd', 'vcmptrue_uqsd', 'vcmptruesd', 'vcmpeq_ossd', 'vcmplt_oqsd',
- 'vcmple_oqsd', 'vcmpunord_ssd', 'vcmpneq_ussd', 'vcmpnlt_uqsd',
- 'vcmpnle_uqsd', 'vcmpord_ssd', 'vcmpeq_ussd', 'vcmpnge_uqsd',
- 'vcmpngt_uqsd', 'vcmpfalse_ossd', 'vcmpneq_ossd', 'vcmpge_oqsd',
- 'vcmpgt_oqsd', 'vcmptrue_ussd', 'vcmpsd', 'vcmpeq_osss', 'vcmpeqss',
- 'vcmplt_osss', 'vcmpltss', 'vcmple_osss', 'vcmpless', 'vcmpunord_qss',
- 'vcmpunordss', 'vcmpneq_uqss', 'vcmpneqss', 'vcmpnlt_usss', 'vcmpnltss',
- 'vcmpnle_usss', 'vcmpnless', 'vcmpord_qss', 'vcmpordss', 'vcmpeq_uqss',
- 'vcmpnge_usss', 'vcmpngess', 'vcmpngt_usss', 'vcmpngtss', 'vcmpfalse_oqss',
- 'vcmpfalsess', 'vcmpneq_oqss', 'vcmpge_osss', 'vcmpgess', 'vcmpgt_osss',
- 'vcmpgtss', 'vcmptrue_uqss', 'vcmptruess', 'vcmpeq_osss', 'vcmplt_oqss',
- 'vcmple_oqss', 'vcmpunord_sss', 'vcmpneq_usss', 'vcmpnlt_uqss',
- 'vcmpnle_uqss', 'vcmpord_sss', 'vcmpeq_usss', 'vcmpnge_uqss',
- 'vcmpngt_uqss', 'vcmpfalse_osss', 'vcmpneq_osss', 'vcmpge_oqss',
- 'vcmpgt_oqss', 'vcmptrue_usss', 'vcmpss', 'vcomisd', 'vcomiss',
- 'vcvtdq2pd', 'vcvtdq2ps', 'vcvtpd2dq', 'vcvtpd2ps', 'vcvtps2dq',
- 'vcvtps2pd', 'vcvtsd2si', 'vcvtsd2ss', 'vcvtsi2sd', 'vcvtsi2ss',
- 'vcvtss2sd', 'vcvtss2si', 'vcvttpd2dq', 'vcvttps2dq', 'vcvttsd2si',
- 'vcvttss2si', 'vdivpd', 'vdivps', 'vdivsd', 'vdivss', 'vdppd', 'vdpps',
- 'vextractf128', 'vextractps', 'vhaddpd', 'vhaddps', 'vhsubpd', 'vhsubps',
- 'vinsertf128', 'vinsertps', 'vlddqu', 'vldqqu', 'vlddqu', 'vldmxcsr',
- 'vmaskmovdqu', 'vmaskmovps', 'vmaskmovpd', 'vmaxpd', 'vmaxps', 'vmaxsd',
- 'vmaxss', 'vminpd', 'vminps', 'vminsd', 'vminss', 'vmovapd', 'vmovaps',
- 'vmovd', 'vmovq', 'vmovddup', 'vmovdqa', 'vmovqqa', 'vmovdqa', 'vmovdqu',
- 'vmovqqu', 'vmovdqu', 'vmovhlps', 'vmovhpd', 'vmovhps', 'vmovlhps',
- 'vmovlpd', 'vmovlps', 'vmovmskpd', 'vmovmskps', 'vmovntdq', 'vmovntqq',
- 'vmovntdq', 'vmovntdqa', 'vmovntpd', 'vmovntps', 'vmovsd', 'vmovshdup',
- 'vmovsldup', 'vmovss', 'vmovupd', 'vmovups', 'vmpsadbw', 'vmulpd',
- 'vmulps', 'vmulsd', 'vmulss', 'vorpd', 'vorps', 'vpabsb', 'vpabsw',
- 'vpabsd', 'vpacksswb', 'vpackssdw', 'vpackuswb', 'vpackusdw', 'vpaddb',
- 'vpaddw', 'vpaddd', 'vpaddq', 'vpaddsb', 'vpaddsw', 'vpaddusb', 'vpaddusw',
- 'vpalignr', 'vpand', 'vpandn', 'vpavgb', 'vpavgw', 'vpblendvb', 'vpblendw',
- 'vpcmpestri', 'vpcmpestrm', 'vpcmpistri', 'vpcmpistrm', 'vpcmpeqb',
- 'vpcmpeqw', 'vpcmpeqd', 'vpcmpeqq', 'vpcmpgtb', 'vpcmpgtw', 'vpcmpgtd',
- 'vpcmpgtq', 'vpermilpd', 'vpermilps', 'vperm2f128', 'vpextrb', 'vpextrw',
- 'vpextrd', 'vpextrq', 'vphaddw', 'vphaddd', 'vphaddsw', 'vphminposuw',
- 'vphsubw', 'vphsubd', 'vphsubsw', 'vpinsrb', 'vpinsrw', 'vpinsrd',
- 'vpinsrq', 'vpmaddwd', 'vpmaddubsw', 'vpmaxsb', 'vpmaxsw', 'vpmaxsd',
- 'vpmaxub', 'vpmaxuw', 'vpmaxud', 'vpminsb', 'vpminsw', 'vpminsd', 'vpminub',
- 'vpminuw', 'vpminud', 'vpmovmskb', 'vpmovsxbw', 'vpmovsxbd', 'vpmovsxbq',
- 'vpmovsxwd', 'vpmovsxwq', 'vpmovsxdq', 'vpmovzxbw', 'vpmovzxbd', 'vpmovzxbq',
- 'vpmovzxwd', 'vpmovzxwq', 'vpmovzxdq', 'vpmulhuw', 'vpmulhrsw', 'vpmulhw',
- 'vpmullw', 'vpmulld', 'vpmuludq', 'vpmuldq', 'vpor', 'vpsadbw', 'vpshufb',
- 'vpshufd', 'vpshufhw', 'vpshuflw', 'vpsignb', 'vpsignw', 'vpsignd',
- 'vpslldq', 'vpsrldq', 'vpsllw', 'vpslld', 'vpsllq', 'vpsraw', 'vpsrad',
- 'vpsrlw', 'vpsrld', 'vpsrlq', 'vptest', 'vpsubb', 'vpsubw', 'vpsubd',
- 'vpsubq', 'vpsubsb', 'vpsubsw', 'vpsubusb', 'vpsubusw', 'vpunpckhbw',
- 'vpunpckhwd', 'vpunpckhdq', 'vpunpckhqdq', 'vpunpcklbw', 'vpunpcklwd',
- 'vpunpckldq', 'vpunpcklqdq', 'vpxor', 'vrcpps', 'vrcpss', 'vrsqrtps',
- 'vrsqrtss', 'vroundpd', 'vroundps', 'vroundsd', 'vroundss', 'vshufpd',
- 'vshufps', 'vsqrtpd', 'vsqrtps', 'vsqrtsd', 'vsqrtss', 'vstmxcsr', 'vsubpd',
- 'vsubps', 'vsubsd', 'vsubss', 'vtestps', 'vtestpd', 'vucomisd', 'vucomiss',
- 'vunpckhpd', 'vunpckhps', 'vunpcklpd', 'vunpcklps', 'vxorpd', 'vxorps',
- 'vzeroall', 'vzeroupper',
+ 'vaddpd', 'vaddps', 'vaddsd', 'vaddss', 'vaddsubpd', 'vaddsubps', 'vandpd', 'vandps', 'vandnpd',
+ 'vandnps', 'vblendpd', 'vblendps', 'vblendvpd', 'vblendvps', 'vbroadcastss', 'vbroadcastsd',
+ 'vbroadcastf128', 'vcmpeq_ospd', 'vcmpeqpd', 'vcmplt_ospd', 'vcmpltpd', 'vcmple_ospd', 'vcmplepd',
+ 'vcmpunord_qpd', 'vcmpunordpd', 'vcmpneq_uqpd', 'vcmpneqpd', 'vcmpnlt_uspd', 'vcmpnltpd',
+ 'vcmpnle_uspd', 'vcmpnlepd', 'vcmpord_qpd', 'vcmpordpd', 'vcmpeq_uqpd', 'vcmpnge_uspd',
+ 'vcmpngepd', 'vcmpngt_uspd', 'vcmpngtpd', 'vcmpfalse_oqpd', 'vcmpfalsepd', 'vcmpneq_oqpd',
+ 'vcmpge_ospd', 'vcmpgepd', 'vcmpgt_ospd', 'vcmpgtpd', 'vcmptrue_uqpd', 'vcmptruepd',
+ 'vcmpeq_ospd', 'vcmplt_oqpd', 'vcmple_oqpd', 'vcmpunord_spd', 'vcmpneq_uspd', 'vcmpnlt_uqpd',
+ 'vcmpnle_uqpd', 'vcmpord_spd', 'vcmpeq_uspd', 'vcmpnge_uqpd', 'vcmpngt_uqpd', 'vcmpfalse_ospd',
+ 'vcmpneq_ospd', 'vcmpge_oqpd', 'vcmpgt_oqpd', 'vcmptrue_uspd', 'vcmppd', 'vcmpeq_osps',
+ 'vcmpeqps', 'vcmplt_osps', 'vcmpltps', 'vcmple_osps', 'vcmpleps', 'vcmpunord_qps', 'vcmpunordps',
+ 'vcmpneq_uqps', 'vcmpneqps', 'vcmpnlt_usps', 'vcmpnltps', 'vcmpnle_usps', 'vcmpnleps',
+ 'vcmpord_qps', 'vcmpordps', 'vcmpeq_uqps', 'vcmpnge_usps', 'vcmpngeps', 'vcmpngt_usps',
+ 'vcmpngtps', 'vcmpfalse_oqps', 'vcmpfalseps', 'vcmpneq_oqps', 'vcmpge_osps', 'vcmpgeps',
+ 'vcmpgt_osps', 'vcmpgtps', 'vcmptrue_uqps', 'vcmptrueps', 'vcmpeq_osps', 'vcmplt_oqps',
+ 'vcmple_oqps', 'vcmpunord_sps', 'vcmpneq_usps', 'vcmpnlt_uqps', 'vcmpnle_uqps', 'vcmpord_sps',
+ 'vcmpeq_usps', 'vcmpnge_uqps', 'vcmpngt_uqps', 'vcmpfalse_osps', 'vcmpneq_osps', 'vcmpge_oqps',
+ 'vcmpgt_oqps', 'vcmptrue_usps', 'vcmpps', 'vcmpeq_ossd', 'vcmpeqsd', 'vcmplt_ossd', 'vcmpltsd',
+ 'vcmple_ossd', 'vcmplesd', 'vcmpunord_qsd', 'vcmpunordsd', 'vcmpneq_uqsd', 'vcmpneqsd',
+ 'vcmpnlt_ussd', 'vcmpnltsd', 'vcmpnle_ussd', 'vcmpnlesd', 'vcmpord_qsd', 'vcmpordsd',
+ 'vcmpeq_uqsd', 'vcmpnge_ussd', 'vcmpngesd', 'vcmpngt_ussd', 'vcmpngtsd', 'vcmpfalse_oqsd',
+ 'vcmpfalsesd', 'vcmpneq_oqsd', 'vcmpge_ossd', 'vcmpgesd', 'vcmpgt_ossd', 'vcmpgtsd',
+ 'vcmptrue_uqsd', 'vcmptruesd', 'vcmpeq_ossd', 'vcmplt_oqsd', 'vcmple_oqsd', 'vcmpunord_ssd',
+ 'vcmpneq_ussd', 'vcmpnlt_uqsd', 'vcmpnle_uqsd', 'vcmpord_ssd', 'vcmpeq_ussd', 'vcmpnge_uqsd',
+ 'vcmpngt_uqsd', 'vcmpfalse_ossd', 'vcmpneq_ossd', 'vcmpge_oqsd', 'vcmpgt_oqsd', 'vcmptrue_ussd',
+ 'vcmpsd', 'vcmpeq_osss', 'vcmpeqss', 'vcmplt_osss', 'vcmpltss', 'vcmple_osss', 'vcmpless',
+ 'vcmpunord_qss', 'vcmpunordss', 'vcmpneq_uqss', 'vcmpneqss', 'vcmpnlt_usss', 'vcmpnltss',
+ 'vcmpnle_usss', 'vcmpnless', 'vcmpord_qss', 'vcmpordss', 'vcmpeq_uqss', 'vcmpnge_usss',
+ 'vcmpngess', 'vcmpngt_usss', 'vcmpngtss', 'vcmpfalse_oqss', 'vcmpfalsess', 'vcmpneq_oqss',
+ 'vcmpge_osss', 'vcmpgess', 'vcmpgt_osss', 'vcmpgtss', 'vcmptrue_uqss', 'vcmptruess',
+ 'vcmpeq_osss', 'vcmplt_oqss', 'vcmple_oqss', 'vcmpunord_sss', 'vcmpneq_usss', 'vcmpnlt_uqss',
+ 'vcmpnle_uqss', 'vcmpord_sss', 'vcmpeq_usss', 'vcmpnge_uqss', 'vcmpngt_uqss', 'vcmpfalse_osss',
+ 'vcmpneq_osss', 'vcmpge_oqss', 'vcmpgt_oqss', 'vcmptrue_usss', 'vcmpss', 'vcomisd', 'vcomiss',
+ 'vcvtdq2pd', 'vcvtdq2ps', 'vcvtpd2dq', 'vcvtpd2ps', 'vcvtps2dq', 'vcvtps2pd', 'vcvtsd2si',
+ 'vcvtsd2ss', 'vcvtsi2sd', 'vcvtsi2ss', 'vcvtss2sd', 'vcvtss2si', 'vcvttpd2dq', 'vcvttps2dq',
+ 'vcvttsd2si', 'vcvttss2si', 'vdivpd', 'vdivps', 'vdivsd', 'vdivss', 'vdppd', 'vdpps',
+ 'vextractf128', 'vextractps', 'vhaddpd', 'vhaddps', 'vhsubpd', 'vhsubps', 'vinsertf128',
+ 'vinsertps', 'vlddqu', 'vldqqu', 'vlddqu', 'vldmxcsr', 'vmaskmovdqu', 'vmaskmovps', 'vmaskmovpd',
+ 'vmaxpd', 'vmaxps', 'vmaxsd', 'vmaxss', 'vminpd', 'vminps', 'vminsd', 'vminss', 'vmovapd',
+ 'vmovaps', 'vmovd', 'vmovq', 'vmovddup', 'vmovdqa', 'vmovqqa', 'vmovdqa', 'vmovdqu', 'vmovqqu',
+ 'vmovdqu', 'vmovhlps', 'vmovhpd', 'vmovhps', 'vmovlhps', 'vmovlpd', 'vmovlps', 'vmovmskpd',
+ 'vmovmskps', 'vmovntdq', 'vmovntqq', 'vmovntdq', 'vmovntdqa', 'vmovntpd', 'vmovntps', 'vmovsd',
+ 'vmovshdup', 'vmovsldup', 'vmovss', 'vmovupd', 'vmovups', 'vmpsadbw', 'vmulpd', 'vmulps',
+ 'vmulsd', 'vmulss', 'vorpd', 'vorps', 'vpabsb', 'vpabsw', 'vpabsd', 'vpacksswb', 'vpackssdw',
+ 'vpackuswb', 'vpackusdw', 'vpaddb', 'vpaddw', 'vpaddd', 'vpaddq', 'vpaddsb', 'vpaddsw',
+ 'vpaddusb', 'vpaddusw', 'vpalignr', 'vpand', 'vpandn', 'vpavgb', 'vpavgw', 'vpblendvb',
+ 'vpblendw', 'vpcmpestri', 'vpcmpestrm', 'vpcmpistri', 'vpcmpistrm', 'vpcmpeqb', 'vpcmpeqw',
+ 'vpcmpeqd', 'vpcmpeqq', 'vpcmpgtb', 'vpcmpgtw', 'vpcmpgtd', 'vpcmpgtq', 'vpermilpd', 'vpermilps',
+ 'vperm2f128', 'vpextrb', 'vpextrw', 'vpextrd', 'vpextrq', 'vphaddw', 'vphaddd', 'vphaddsw',
+ 'vphminposuw', 'vphsubw', 'vphsubd', 'vphsubsw', 'vpinsrb', 'vpinsrw', 'vpinsrd', 'vpinsrq',
+ 'vpmaddwd', 'vpmaddubsw', 'vpmaxsb', 'vpmaxsw', 'vpmaxsd', 'vpmaxub', 'vpmaxuw', 'vpmaxud',
+ 'vpminsb', 'vpminsw', 'vpminsd', 'vpminub', 'vpminuw', 'vpminud', 'vpmovmskb', 'vpmovsxbw',
+ 'vpmovsxbd', 'vpmovsxbq', 'vpmovsxwd', 'vpmovsxwq', 'vpmovsxdq', 'vpmovzxbw', 'vpmovzxbd',
+ 'vpmovzxbq', 'vpmovzxwd', 'vpmovzxwq', 'vpmovzxdq', 'vpmulhuw', 'vpmulhrsw', 'vpmulhw', 'vpmullw',
+ 'vpmulld', 'vpmuludq', 'vpmuldq', 'vpor', 'vpsadbw', 'vpshufb', 'vpshufd', 'vpshufhw', 'vpshuflw',
+ 'vpsignb', 'vpsignw', 'vpsignd', 'vpslldq', 'vpsrldq', 'vpsllw', 'vpslld', 'vpsllq', 'vpsraw',
+ 'vpsrad', 'vpsrlw', 'vpsrld', 'vpsrlq', 'vptest', 'vpsubb', 'vpsubw', 'vpsubd', 'vpsubq',
+ 'vpsubsb', 'vpsubsw', 'vpsubusb', 'vpsubusw', 'vpunpckhbw', 'vpunpckhwd', 'vpunpckhdq',
+ 'vpunpckhqdq', 'vpunpcklbw', 'vpunpcklwd', 'vpunpckldq', 'vpunpcklqdq', 'vpxor', 'vrcpps',
+ 'vrcpss', 'vrsqrtps', 'vrsqrtss', 'vroundpd', 'vroundps', 'vroundsd', 'vroundss', 'vshufpd',
+ 'vshufps', 'vsqrtpd', 'vsqrtps', 'vsqrtsd', 'vsqrtss', 'vstmxcsr', 'vsubpd', 'vsubps', 'vsubsd',
+ 'vsubss', 'vtestps', 'vtestpd', 'vucomisd', 'vucomiss', 'vunpckhpd', 'vunpckhps', 'vunpcklpd',
+ 'vunpcklps', 'vxorpd', 'vxorps', 'vzeroall', 'vzeroupper',
-- Intel Carry-Less Multiplication instructions (CLMUL).
'pclmullqlqdq', 'pclmulhqlqdq', 'pclmullqhqdq', 'pclmulhqhqdq', 'pclmulqdq',
-- Intel AVX Carry-Less Multiplication instructions (CLMUL).
- 'vpclmullqlqdq', 'vpclmulhqlqdq', 'vpclmullqhqdq', 'vpclmulhqhqdq',
- 'vpclmulqdq',
+ 'vpclmullqlqdq', 'vpclmulhqlqdq', 'vpclmullqhqdq', 'vpclmulhqhqdq', 'vpclmulqdq',
-- Intel Fused Multiply-Add instructions (FMA).
- 'vfmadd132ps', 'vfmadd132pd', 'vfmadd312ps', 'vfmadd312pd', 'vfmadd213ps',
- 'vfmadd213pd', 'vfmadd123ps', 'vfmadd123pd', 'vfmadd231ps', 'vfmadd231pd',
- 'vfmadd321ps', 'vfmadd321pd', 'vfmaddsub132ps', 'vfmaddsub132pd',
- 'vfmaddsub312ps', 'vfmaddsub312pd', 'vfmaddsub213ps', 'vfmaddsub213pd',
- 'vfmaddsub123ps', 'vfmaddsub123pd', 'vfmaddsub231ps', 'vfmaddsub231pd',
- 'vfmaddsub321ps', 'vfmaddsub321pd', 'vfmsub132ps', 'vfmsub132pd',
- 'vfmsub312ps', 'vfmsub312pd', 'vfmsub213ps', 'vfmsub213pd', 'vfmsub123ps',
- 'vfmsub123pd', 'vfmsub231ps', 'vfmsub231pd', 'vfmsub321ps', 'vfmsub321pd',
- 'vfmsubadd132ps', 'vfmsubadd132pd', 'vfmsubadd312ps', 'vfmsubadd312pd',
- 'vfmsubadd213ps', 'vfmsubadd213pd', 'vfmsubadd123ps', 'vfmsubadd123pd',
- 'vfmsubadd231ps', 'vfmsubadd231pd', 'vfmsubadd321ps', 'vfmsubadd321pd',
- 'vfnmadd132ps', 'vfnmadd132pd', 'vfnmadd312ps', 'vfnmadd312pd',
- 'vfnmadd213ps', 'vfnmadd213pd', 'vfnmadd123ps', 'vfnmadd123pd',
- 'vfnmadd231ps', 'vfnmadd231pd', 'vfnmadd321ps', 'vfnmadd321pd',
- 'vfnmsub132ps', 'vfnmsub132pd', 'vfnmsub312ps', 'vfnmsub312pd',
- 'vfnmsub213ps', 'vfnmsub213pd', 'vfnmsub123ps', 'vfnmsub123pd',
- 'vfnmsub231ps', 'vfnmsub231pd', 'vfnmsub321ps', 'vfnmsub321pd',
- 'vfmadd132ss', 'vfmadd132sd', 'vfmadd312ss', 'vfmadd312sd', 'vfmadd213ss',
- 'vfmadd213sd', 'vfmadd123ss', 'vfmadd123sd', 'vfmadd231ss', 'vfmadd231sd',
- 'vfmadd321ss', 'vfmadd321sd', 'vfmsub132ss', 'vfmsub132sd', 'vfmsub312ss',
- 'vfmsub312sd', 'vfmsub213ss', 'vfmsub213sd', 'vfmsub123ss', 'vfmsub123sd',
- 'vfmsub231ss', 'vfmsub231sd', 'vfmsub321ss', 'vfmsub321sd', 'vfnmadd132ss',
- 'vfnmadd132sd', 'vfnmadd312ss', 'vfnmadd312sd', 'vfnmadd213ss',
- 'vfnmadd213sd', 'vfnmadd123ss', 'vfnmadd123sd', 'vfnmadd231ss',
- 'vfnmadd231sd', 'vfnmadd321ss', 'vfnmadd321sd', 'vfnmsub132ss',
- 'vfnmsub132sd', 'vfnmsub312ss', 'vfnmsub312sd', 'vfnmsub213ss',
- 'vfnmsub213sd', 'vfnmsub123ss', 'vfnmsub123sd', 'vfnmsub231ss',
- 'vfnmsub231sd', 'vfnmsub321ss', 'vfnmsub321sd',
+ 'vfmadd132ps', 'vfmadd132pd', 'vfmadd312ps', 'vfmadd312pd', 'vfmadd213ps', 'vfmadd213pd',
+ 'vfmadd123ps', 'vfmadd123pd', 'vfmadd231ps', 'vfmadd231pd', 'vfmadd321ps', 'vfmadd321pd',
+ 'vfmaddsub132ps', 'vfmaddsub132pd', 'vfmaddsub312ps', 'vfmaddsub312pd', 'vfmaddsub213ps',
+ 'vfmaddsub213pd', 'vfmaddsub123ps', 'vfmaddsub123pd', 'vfmaddsub231ps', 'vfmaddsub231pd',
+ 'vfmaddsub321ps', 'vfmaddsub321pd', 'vfmsub132ps', 'vfmsub132pd', 'vfmsub312ps', 'vfmsub312pd',
+ 'vfmsub213ps', 'vfmsub213pd', 'vfmsub123ps', 'vfmsub123pd', 'vfmsub231ps', 'vfmsub231pd',
+ 'vfmsub321ps', 'vfmsub321pd', 'vfmsubadd132ps', 'vfmsubadd132pd', 'vfmsubadd312ps',
+ 'vfmsubadd312pd', 'vfmsubadd213ps', 'vfmsubadd213pd', 'vfmsubadd123ps', 'vfmsubadd123pd',
+ 'vfmsubadd231ps', 'vfmsubadd231pd', 'vfmsubadd321ps', 'vfmsubadd321pd', 'vfnmadd132ps',
+ 'vfnmadd132pd', 'vfnmadd312ps', 'vfnmadd312pd', 'vfnmadd213ps', 'vfnmadd213pd', 'vfnmadd123ps',
+ 'vfnmadd123pd', 'vfnmadd231ps', 'vfnmadd231pd', 'vfnmadd321ps', 'vfnmadd321pd', 'vfnmsub132ps',
+ 'vfnmsub132pd', 'vfnmsub312ps', 'vfnmsub312pd', 'vfnmsub213ps', 'vfnmsub213pd', 'vfnmsub123ps',
+ 'vfnmsub123pd', 'vfnmsub231ps', 'vfnmsub231pd', 'vfnmsub321ps', 'vfnmsub321pd', 'vfmadd132ss',
+ 'vfmadd132sd', 'vfmadd312ss', 'vfmadd312sd', 'vfmadd213ss', 'vfmadd213sd', 'vfmadd123ss',
+ 'vfmadd123sd', 'vfmadd231ss', 'vfmadd231sd', 'vfmadd321ss', 'vfmadd321sd', 'vfmsub132ss',
+ 'vfmsub132sd', 'vfmsub312ss', 'vfmsub312sd', 'vfmsub213ss', 'vfmsub213sd', 'vfmsub123ss',
+ 'vfmsub123sd', 'vfmsub231ss', 'vfmsub231sd', 'vfmsub321ss', 'vfmsub321sd', 'vfnmadd132ss',
+ 'vfnmadd132sd', 'vfnmadd312ss', 'vfnmadd312sd', 'vfnmadd213ss', 'vfnmadd213sd', 'vfnmadd123ss',
+ 'vfnmadd123sd', 'vfnmadd231ss', 'vfnmadd231sd', 'vfnmadd321ss', 'vfnmadd321sd', 'vfnmsub132ss',
+ 'vfnmsub132sd', 'vfnmsub312ss', 'vfnmsub312sd', 'vfnmsub213ss', 'vfnmsub213sd', 'vfnmsub123ss',
+ 'vfnmsub123sd', 'vfnmsub231ss', 'vfnmsub231sd', 'vfnmsub321ss', 'vfnmsub321sd',
-- Intel post-32 nm processor instructions.
- 'rdfsbase', 'rdgsbase', 'rdrand', 'wrfsbase', 'wrgsbase', 'vcvtph2ps',
- 'vcvtps2ph', 'adcx', 'adox', 'rdseed', 'clac', 'stac',
+ 'rdfsbase', 'rdgsbase', 'rdrand', 'wrfsbase', 'wrgsbase', 'vcvtph2ps', 'vcvtps2ph', 'adcx',
+ 'adox', 'rdseed', 'clac', 'stac',
-- VIA (Centaur) security instructions.
- 'xstore', 'xcryptecb', 'xcryptcbc', 'xcryptctr', 'xcryptcfb', 'xcryptofb',
- 'montmul', 'xsha1', 'xsha256',
+ 'xstore', 'xcryptecb', 'xcryptcbc', 'xcryptctr', 'xcryptcfb', 'xcryptofb', 'montmul', 'xsha1',
+ 'xsha256',
-- AMD Lightweight Profiling (LWP) instructions.
'llwpcb', 'slwpcb', 'lwpval', 'lwpins',
-- AMD XOP and FMA4 instructions (SSE5).
- 'vfmaddpd', 'vfmaddps', 'vfmaddsd', 'vfmaddss', 'vfmaddsubpd',
- 'vfmaddsubps', 'vfmsubaddpd', 'vfmsubaddps', 'vfmsubpd', 'vfmsubps',
- 'vfmsubsd', 'vfmsubss', 'vfnmaddpd', 'vfnmaddps', 'vfnmaddsd', 'vfnmaddss',
- 'vfnmsubpd', 'vfnmsubps', 'vfnmsubsd', 'vfnmsubss', 'vfrczpd', 'vfrczps',
- 'vfrczsd', 'vfrczss', 'vpcmov', 'vpcomb', 'vpcomd', 'vpcomq', 'vpcomub',
- 'vpcomud', 'vpcomuq', 'vpcomuw', 'vpcomw', 'vphaddbd', 'vphaddbq',
- 'vphaddbw', 'vphadddq', 'vphaddubd', 'vphaddubq', 'vphaddubw', 'vphaddudq',
- 'vphadduwd', 'vphadduwq', 'vphaddwd', 'vphaddwq', 'vphsubbw', 'vphsubdq',
- 'vphsubwd', 'vpmacsdd', 'vpmacsdqh', 'vpmacsdql', 'vpmacssdd', 'vpmacssdqh',
- 'vpmacssdql', 'vpmacsswd', 'vpmacssww', 'vpmacswd', 'vpmacsww', 'vpmadcsswd',
- 'vpmadcswd', 'vpperm', 'vprotb', 'vprotd', 'vprotq', 'vprotw', 'vpshab',
- 'vpshad', 'vpshaq', 'vpshaw', 'vpshlb', 'vpshld', 'vpshlq', 'vpshlw',
+ 'vfmaddpd', 'vfmaddps', 'vfmaddsd', 'vfmaddss', 'vfmaddsubpd', 'vfmaddsubps', 'vfmsubaddpd',
+ 'vfmsubaddps', 'vfmsubpd', 'vfmsubps', 'vfmsubsd', 'vfmsubss', 'vfnmaddpd', 'vfnmaddps',
+ 'vfnmaddsd', 'vfnmaddss', 'vfnmsubpd', 'vfnmsubps', 'vfnmsubsd', 'vfnmsubss', 'vfrczpd',
+ 'vfrczps', 'vfrczsd', 'vfrczss', 'vpcmov', 'vpcomb', 'vpcomd', 'vpcomq', 'vpcomub', 'vpcomud',
+ 'vpcomuq', 'vpcomuw', 'vpcomw', 'vphaddbd', 'vphaddbq', 'vphaddbw', 'vphadddq', 'vphaddubd',
+ 'vphaddubq', 'vphaddubw', 'vphaddudq', 'vphadduwd', 'vphadduwq', 'vphaddwd', 'vphaddwq',
+ 'vphsubbw', 'vphsubdq', 'vphsubwd', 'vpmacsdd', 'vpmacsdqh', 'vpmacsdql', 'vpmacssdd',
+ 'vpmacssdqh', 'vpmacssdql', 'vpmacsswd', 'vpmacssww', 'vpmacswd', 'vpmacsww', 'vpmadcsswd',
+ 'vpmadcswd', 'vpperm', 'vprotb', 'vprotd', 'vprotq', 'vprotw', 'vpshab', 'vpshad', 'vpshaq',
+ 'vpshaw', 'vpshlb', 'vpshld', 'vpshlq', 'vpshlw',
-- Intel AVX2 instructions.
- 'vmpsadbw', 'vpabsb', 'vpabsw', 'vpabsd', 'vpacksswb', 'vpackssdw',
- 'vpackusdw', 'vpackuswb', 'vpaddb', 'vpaddw', 'vpaddd', 'vpaddq',
- 'vpaddsb', 'vpaddsw', 'vpaddusb', 'vpaddusw', 'vpalignr', 'vpand',
- 'vpandn', 'vpavgb', 'vpavgw', 'vpblendvb', 'vpblendw', 'vpcmpeqb',
- 'vpcmpeqw', 'vpcmpeqd', 'vpcmpeqq', 'vpcmpgtb', 'vpcmpgtw', 'vpcmpgtd',
- 'vpcmpgtq', 'vphaddw', 'vphaddd', 'vphaddsw', 'vphsubw', 'vphsubd',
- 'vphsubsw', 'vpmaddubsw', 'vpmaddwd', 'vpmaxsb', 'vpmaxsw', 'vpmaxsd',
- 'vpmaxub', 'vpmaxuw', 'vpmaxud', 'vpminsb', 'vpminsw', 'vpminsd', 'vpminub',
- 'vpminuw', 'vpminud', 'vpmovmskb', 'vpmovsxbw', 'vpmovsxbd', 'vpmovsxbq',
- 'vpmovsxwd', 'vpmovsxwq', 'vpmovsxdq', 'vpmovzxbw', 'vpmovzxbd', 'vpmovzxbq',
- 'vpmovzxwd', 'vpmovzxwq', 'vpmovzxdq', 'vpmuldq', 'vpmulhrsw', 'vpmulhuw',
- 'vpmulhw', 'vpmullw', 'vpmulld', 'vpmuludq', 'vpor', 'vpsadbw', 'vpshufb',
- 'vpshufd', 'vpshufhw', 'vpshuflw', 'vpsignb', 'vpsignw', 'vpsignd',
- 'vpslldq', 'vpsllw', 'vpslld', 'vpsllq', 'vpsraw', 'vpsrad', 'vpsrldq',
- 'vpsrlw', 'vpsrld', 'vpsrlq', 'vpsubb', 'vpsubw', 'vpsubd', 'vpsubq',
- 'vpsubsb', 'vpsubsw', 'vpsubusb', 'vpsubusw', 'vpunpckhbw', 'vpunpckhwd',
- 'vpunpckhdq', 'vpunpckhqdq', 'vpunpcklbw', 'vpunpcklwd', 'vpunpckldq',
- 'vpunpcklqdq', 'vpxor', 'vmovntdqa', 'vbroadcastss', 'vbroadcastsd',
- 'vbroadcasti128', 'vpblendd', 'vpbroadcastb', 'vpbroadcastw', 'vpbroadcastd',
- 'vpbroadcastq', 'vpermd', 'vpermpd', 'vpermps', 'vpermq', 'vperm2i128',
- 'vextracti128', 'vinserti128', 'vpmaskmovd', 'vpmaskmovq', 'vpmaskmovd',
- 'vpmaskmovq', 'vpsllvd', 'vpsllvq', 'vpsllvd', 'vpsllvq', 'vpsravd',
- 'vpsrlvd', 'vpsrlvq', 'vpsrlvd', 'vpsrlvq', 'vgatherdpd', 'vgatherqpd',
- 'vgatherdpd', 'vgatherqpd', 'vgatherdps', 'vgatherqps', 'vgatherdps',
- 'vgatherqps', 'vpgatherdd', 'vpgatherqd', 'vpgatherdd', 'vpgatherqd',
+ 'vmpsadbw', 'vpabsb', 'vpabsw', 'vpabsd', 'vpacksswb', 'vpackssdw', 'vpackusdw', 'vpackuswb',
+ 'vpaddb', 'vpaddw', 'vpaddd', 'vpaddq', 'vpaddsb', 'vpaddsw', 'vpaddusb', 'vpaddusw', 'vpalignr',
+ 'vpand', 'vpandn', 'vpavgb', 'vpavgw', 'vpblendvb', 'vpblendw', 'vpcmpeqb', 'vpcmpeqw',
+ 'vpcmpeqd', 'vpcmpeqq', 'vpcmpgtb', 'vpcmpgtw', 'vpcmpgtd', 'vpcmpgtq', 'vphaddw', 'vphaddd',
+ 'vphaddsw', 'vphsubw', 'vphsubd', 'vphsubsw', 'vpmaddubsw', 'vpmaddwd', 'vpmaxsb', 'vpmaxsw',
+ 'vpmaxsd', 'vpmaxub', 'vpmaxuw', 'vpmaxud', 'vpminsb', 'vpminsw', 'vpminsd', 'vpminub', 'vpminuw',
+ 'vpminud', 'vpmovmskb', 'vpmovsxbw', 'vpmovsxbd', 'vpmovsxbq', 'vpmovsxwd', 'vpmovsxwq',
+ 'vpmovsxdq', 'vpmovzxbw', 'vpmovzxbd', 'vpmovzxbq', 'vpmovzxwd', 'vpmovzxwq', 'vpmovzxdq',
+ 'vpmuldq', 'vpmulhrsw', 'vpmulhuw', 'vpmulhw', 'vpmullw', 'vpmulld', 'vpmuludq', 'vpor',
+ 'vpsadbw', 'vpshufb', 'vpshufd', 'vpshufhw', 'vpshuflw', 'vpsignb', 'vpsignw', 'vpsignd',
+ 'vpslldq', 'vpsllw', 'vpslld', 'vpsllq', 'vpsraw', 'vpsrad', 'vpsrldq', 'vpsrlw', 'vpsrld',
+ 'vpsrlq', 'vpsubb', 'vpsubw', 'vpsubd', 'vpsubq', 'vpsubsb', 'vpsubsw', 'vpsubusb', 'vpsubusw',
+ 'vpunpckhbw', 'vpunpckhwd', 'vpunpckhdq', 'vpunpckhqdq', 'vpunpcklbw', 'vpunpcklwd', 'vpunpckldq',
+ 'vpunpcklqdq', 'vpxor', 'vmovntdqa', 'vbroadcastss', 'vbroadcastsd', 'vbroadcasti128', 'vpblendd',
+ 'vpbroadcastb', 'vpbroadcastw', 'vpbroadcastd', 'vpbroadcastq', 'vpermd', 'vpermpd', 'vpermps',
+ 'vpermq', 'vperm2i128', 'vextracti128', 'vinserti128', 'vpmaskmovd', 'vpmaskmovq', 'vpmaskmovd',
+ 'vpmaskmovq', 'vpsllvd', 'vpsllvq', 'vpsllvd', 'vpsllvq', 'vpsravd', 'vpsrlvd', 'vpsrlvq',
+ 'vpsrlvd', 'vpsrlvq', 'vgatherdpd', 'vgatherqpd', 'vgatherdpd', 'vgatherqpd', 'vgatherdps',
+ 'vgatherqps', 'vgatherdps', 'vgatherqps', 'vpgatherdd', 'vpgatherqd', 'vpgatherdd', 'vpgatherqd',
'vpgatherdq', 'vpgatherqq', 'vpgatherdq', 'vpgatherqq',
-- Transactional Synchronization Extensions (TSX).
'xabort', 'xbegin', 'xend', 'xtest',
- -- Intel BMI1 and BMI2 instructions, AMD TBM instructions.
- 'andn', 'bextr', 'blci', 'blcic', 'blsi', 'blsic', 'blcfill', 'blsfill',
- 'blcmsk', 'blsmsk', 'blsr', 'blcs', 'bzhi', 'mulx', 'pdep', 'pext', 'rorx',
- 'sarx', 'shlx', 'shrx', 'tzcnt', 'tzmsk', 't1mskc',
+ -- Intel BMI1 and BMI2 instructions AMD TBM instructions.
+ 'andn', 'bextr', 'blci', 'blcic', 'blsi', 'blsic', 'blcfill', 'blsfill', 'blcmsk', 'blsmsk',
+ 'blsr', 'blcs', 'bzhi', 'mulx', 'pdep', 'pext', 'rorx', 'sarx', 'shlx', 'shrx', 'tzcnt', 'tzmsk',
+ 't1mskc',
-- Systematic names for the hinting nop instructions.
- 'hint_nop0', 'hint_nop1', 'hint_nop2', 'hint_nop3', 'hint_nop4',
- 'hint_nop5', 'hint_nop6', 'hint_nop7', 'hint_nop8', 'hint_nop9',
- 'hint_nop10', 'hint_nop11', 'hint_nop12', 'hint_nop13', 'hint_nop14',
- 'hint_nop15', 'hint_nop16', 'hint_nop17', 'hint_nop18', 'hint_nop19',
- 'hint_nop20', 'hint_nop21', 'hint_nop22', 'hint_nop23', 'hint_nop24',
- 'hint_nop25', 'hint_nop26', 'hint_nop27', 'hint_nop28', 'hint_nop29',
- 'hint_nop30', 'hint_nop31', 'hint_nop32', 'hint_nop33', 'hint_nop34',
- 'hint_nop35', 'hint_nop36', 'hint_nop37', 'hint_nop38', 'hint_nop39',
- 'hint_nop40', 'hint_nop41', 'hint_nop42', 'hint_nop43', 'hint_nop44',
- 'hint_nop45', 'hint_nop46', 'hint_nop47', 'hint_nop48', 'hint_nop49',
- 'hint_nop50', 'hint_nop51', 'hint_nop52', 'hint_nop53', 'hint_nop54',
- 'hint_nop55', 'hint_nop56', 'hint_nop57', 'hint_nop58', 'hint_nop59',
- 'hint_nop60', 'hint_nop61', 'hint_nop62', 'hint_nop63',
-})
-
--- Types.
-local sizes = word_match{
- 'byte', 'word', 'dword', 'qword', 'tword', 'oword', 'yword',
- 'a16', 'a32', 'a64', 'o16', 'o32', 'o64' -- instructions
-}
-local wrt_types = '..' * word_match{
- 'start', 'gotpc', 'gotoff', 'gottpoff', 'got', 'plt', 'sym', 'tlsie'
-}
-local type = token(l.TYPE, sizes + wrt_types)
+ 'hint_nop0', 'hint_nop1', 'hint_nop2', 'hint_nop3', 'hint_nop4', 'hint_nop5', 'hint_nop6',
+ 'hint_nop7', 'hint_nop8', 'hint_nop9', 'hint_nop10', 'hint_nop11', 'hint_nop12', 'hint_nop13',
+ 'hint_nop14', 'hint_nop15', 'hint_nop16', 'hint_nop17', 'hint_nop18', 'hint_nop19', 'hint_nop20',
+ 'hint_nop21', 'hint_nop22', 'hint_nop23', 'hint_nop24', 'hint_nop25', 'hint_nop26', 'hint_nop27',
+ 'hint_nop28', 'hint_nop29', 'hint_nop30', 'hint_nop31', 'hint_nop32', 'hint_nop33', 'hint_nop34',
+ 'hint_nop35', 'hint_nop36', 'hint_nop37', 'hint_nop38', 'hint_nop39', 'hint_nop40', 'hint_nop41',
+ 'hint_nop42', 'hint_nop43', 'hint_nop44', 'hint_nop45', 'hint_nop46', 'hint_nop47', 'hint_nop48',
+ 'hint_nop49', 'hint_nop50', 'hint_nop51', 'hint_nop52', 'hint_nop53', 'hint_nop54', 'hint_nop55',
+ 'hint_nop56', 'hint_nop57', 'hint_nop58', 'hint_nop59', 'hint_nop60', 'hint_nop61', 'hint_nop62',
+ 'hint_nop63'
+}))
+lex:add_style('instruction', lexer.styles['function'])
-- Registers.
-local register = token('register', word_match{
+lex:add_rule('register', token('register', word_match{
-- 32-bit registers.
- 'ah', 'al', 'ax', 'bh', 'bl', 'bp', 'bx', 'ch', 'cl', 'cx', 'dh', 'di', 'dl',
- 'dx', 'eax', 'ebx', 'ebx', 'ecx', 'edi', 'edx', 'esi', 'esp', 'fs', 'mm0',
- 'mm1', 'mm2', 'mm3', 'mm4', 'mm5', 'mm6', 'mm7', 'si', 'st0', 'st1', 'st2',
- 'st3', 'st4', 'st5', 'st6', 'st7', 'xmm0', 'xmm1', 'xmm2', 'xmm3', 'xmm4',
- 'xmm5', 'xmm6', 'xmm7', 'ymm0', 'ymm1', 'ymm2', 'ymm3', 'ymm4', 'ymm5',
- 'ymm6', 'ymm7',
+ 'ah', 'al', 'ax', 'bh', 'bl', 'bp', 'bx', 'ch', 'cl', 'cx', 'dh', 'di', 'dl', 'dx', 'eax', 'ebx',
+ 'ebx', 'ecx', 'edi', 'edx', 'esi', 'esp', 'fs', 'mm0', 'mm1', 'mm2', 'mm3', 'mm4', 'mm5', 'mm6',
+ 'mm7', 'si', 'st0', 'st1', 'st2', 'st3', 'st4', 'st5', 'st6', 'st7', 'xmm0', 'xmm1', 'xmm2',
+ 'xmm3', 'xmm4', 'xmm5', 'xmm6', 'xmm7', 'ymm0', 'ymm1', 'ymm2', 'ymm3', 'ymm4', 'ymm5', 'ymm6',
+ 'ymm7',
-- 64-bit registers.
- 'bpl', 'dil', 'gs', 'r8', 'r8b', 'r8w', 'r9', 'r9b', 'r9w', 'r10', 'r10b',
- 'r10w', 'r11', 'r11b', 'r11w', 'r12', 'r12b', 'r12w', 'r13', 'r13b', 'r13w',
- 'r14', 'r14b', 'r14w', 'r15', 'r15b', 'r15w', 'rax', 'rbp', 'rbx', 'rcx',
- 'rdi', 'rdx', 'rsi', 'rsp', 'sil', 'xmm8', 'xmm9', 'xmm10', 'xmm11', 'xmm12',
- 'xmm13', 'xmm14', 'xmm15', 'ymm8', 'ymm9', 'ymm10', 'ymm11', 'ymm12', 'ymm13',
+ 'bpl', 'dil', 'gs', 'r8', 'r8b', 'r8w', 'r9', 'r9b', 'r9w', 'r10', 'r10b', 'r10w', 'r11', 'r11b',
+ 'r11w', 'r12', 'r12b', 'r12w', 'r13', 'r13b', 'r13w', 'r14', 'r14b', 'r14w', 'r15', 'r15b',
+ 'r15w', 'rax', 'rbp', 'rbx', 'rcx', 'rdi', 'rdx', 'rsi', 'rsp', 'sil', 'xmm8', 'xmm9', 'xmm10',
+ 'xmm11', 'xmm12', 'xmm13', 'xmm14', 'xmm15', 'ymm8', 'ymm9', 'ymm10', 'ymm11', 'ymm12', 'ymm13',
'ymm14', 'ymm15'
-})
+}))
+lex:add_style('register', lexer.styles.constant)
-local word = (l.alpha + S('$._?')) * (l.alnum + S('$._?#@~'))^0
+-- Types.
+local sizes = word_match{
+ 'byte', 'word', 'dword', 'qword', 'tword', 'oword', 'yword',
+ -- Instructions.
+ 'a16', 'a32', 'a64', 'o16', 'o32', 'o64'
+}
+local wrt_types = '..' * word_match('start gotpc gotoff gottpoff got plt sym tlsie')
+lex:add_rule('type', token(lexer.TYPE, sizes + wrt_types))
+
+-- Constants.
+local word = (lexer.alpha + S('$._?')) * (lexer.alnum + S('$._?#@~'))^0
+local constants = word_match{
+ '__float128h__', '__float128l__', '__float16__', '__float32__', '__float64__', '__float8__',
+ '__float80e__', '__float80m__', '__Infinity__', '__NaN__', '__QNaN__', '__SNaN__'
+}
+lex:add_rule('constant', token(lexer.CONSTANT, constants + '$' * P('$')^-1 * -word))
-- Labels.
-local label = token(l.LABEL, word * ':')
+lex:add_rule('label', token(lexer.LABEL, word * ':'))
-- Identifiers.
-local identifier = token(l.IDENTIFIER, word)
+lex:add_rule('identifier', token(lexer.IDENTIFIER, word))
--- Constants.
-local constants = word_match{
- '__float8__', '__float16__', '__float32__', '__float64__', '__float80m__',
- '__float80e__', '__float128l__', '__float128h__', '__Infinity__', '__QNaN__',
- '__NaN__', '__SNaN__'
-}
-local constant = token(l.CONSTANT, constants + '$' * P('$')^-1 * -identifier)
+-- Strings.
+local sq_str = lexer.range("'", true)
+local dq_str = lexer.range('"', true)
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
--- Operators.
-local operator = token(l.OPERATOR, S('+-/*%<>!=^&|~:,()[]'))
+-- Comments.
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol(';')))
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'instruction', instruction},
- {'register', register},
- {'type', type},
- {'constant', constant},
- {'label', label},
- {'identifier', identifier},
- {'string', string},
- {'comment', comment},
- {'number', number},
- {'preproc', preproc},
- {'operator', operator},
-}
+-- Numbers.
+lex:add_rule('number', token(lexer.NUMBER, lexer.number * S('hqb')^-1))
-M._tokenstyles = {
- instruction = l.STYLE_FUNCTION,
- register = l.STYLE_CONSTANT,
+-- Preprocessor.
+local pp_word = word_match{
+ 'arg', 'assign', 'clear', 'define', 'defstr', 'deftok', 'depend', 'elif', 'elifctx', 'elifdef',
+ 'elifempty', 'elifenv', 'elifid', 'elifidn', 'elifidni', 'elifmacro', 'elifn', 'elifnctx',
+ 'elifndef', 'elifnempty', 'elifnenv', 'elifnid', 'elifnidn', 'elifnidni', 'elifnmacro',
+ 'elifnnum', 'elifnstr', 'elifntoken', 'elifnum', 'elifstr', 'eliftoken', 'else', 'endif',
+ 'endmacro', 'endrep', 'endwhile', 'error', 'exitmacro', 'exitrep', 'exitwhile', 'fatal', 'final',
+ 'idefine', 'idefstr', 'ideftok', 'if', 'ifctx', 'ifdef', 'ifempty', 'ifenv', 'ifid', 'ifidn',
+ 'ifidni', 'ifmacro', 'ifn', 'ifnctx', 'ifndef', 'ifnempty', 'ifnenv', 'ifnid', 'ifnidn',
+ 'ifnidni', 'ifnmacro', 'ifnnum', 'ifnstr', 'ifntoken', 'ifnum', 'ifstr', 'iftoken', 'imacro',
+ 'include', 'ixdefine', 'line', 'local', 'macro', 'pathsearch', 'pop', 'push', 'rep', 'repl',
+ 'rmacro', 'rotate', 'stacksize', 'strcat', 'strlen', 'substr', 'undef', 'unmacro', 'use',
+ 'warning', 'while', 'xdefine'
}
+local pp_symbol = '??' + S('!$+?') + '%' * -lexer.space + lexer.digit^1
+lex:add_rule('preproc', token(lexer.PREPROCESSOR, '%' * (pp_word + pp_symbol)))
-M._foldsymbols = {
- _patterns = {'%l+', '//'},
- [l.PREPROCESSOR] = {
- ['if'] = 1, endif = -1, macro = 1, endmacro = -1, rep = 1, endrep = -1,
- ['while'] = 1, endwhile = -1,
- },
- [l.KEYWORD] = {struc = 1, endstruc = -1},
- [l.COMMENT] = {['//'] = l.fold_line_comments('//')}
-}
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>!=^&|~:,()[]')))
+
+-- Fold points.
+lex:add_fold_point(lexer.PREPROCESSOR, '%if', '%endif')
+lex:add_fold_point(lexer.PREPROCESSOR, '%macro', '%endmacro')
+lex:add_fold_point(lexer.PREPROCESSOR, '%rep', '%endrep')
+lex:add_fold_point(lexer.PREPROCESSOR, '%while', '%endwhile')
+lex:add_fold_point(lexer.KEYWORD, 'struc', 'endstruc')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines(';'))
-return M
+return lex
diff --git a/lua/lexers/asp.lua b/lua/lexers/asp.lua
index 6fb5800..a16f4b6 100644
--- a/lua/lexers/asp.lua
+++ b/lua/lexers/asp.lua
@@ -1,42 +1,31 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- ASP LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'asp'}
-
--- Embedded in HTML.
-local html = l.load('html')
+local html = lexer.load('html')
+local lex = lexer.new('asp', {inherit = html}) -- proxy for HTML
-- Embedded VB.
-local vb = l.load('vb')
+local vb = lexer.load('vb')
local vb_start_rule = token('asp_tag', '<%' * P('=')^-1)
local vb_end_rule = token('asp_tag', '%>')
-l.embed_lexer(html, vb, vb_start_rule, vb_end_rule)
+lex:embed(vb, vb_start_rule, vb_end_rule)
+lex:add_style('asp_tag', lexer.styles.embedded)
-- Embedded VBScript.
-local vbs = l.load('vbscript')
-local script_element = word_match({'script'}, nil, html.case_insensitive_tags)
+local vbs = lexer.load('vb', 'vbscript')
+local script_element = word_match('script', true)
local vbs_start_rule = #(P('<') * script_element * (P(function(input, index)
if input:find('^%s+language%s*=%s*(["\'])vbscript%1', index) or
- input:find('^%s+type%s*=%s*(["\'])text/vbscript%1', index) then
- return index
- end
+ input:find('^%s+type%s*=%s*(["\'])text/vbscript%1', index) then return index end
end) + '>')) * html.embed_start_tag -- <script language="vbscript">
-local vbs_end_rule = #('</' * script_element * l.space^0 * '>') *
- html.embed_end_tag -- </script>
-l.embed_lexer(html, vbs, vbs_start_rule, vbs_end_rule)
-
-M._tokenstyles = {
- asp_tag = l.STYLE_EMBEDDED
-}
+local vbs_end_rule = #('</' * script_element * lexer.space^0 * '>') * html.embed_end_tag -- </script>
+lex:embed(vbs, vbs_start_rule, vbs_end_rule)
-local _foldsymbols = html._foldsymbols
-_foldsymbols._patterns[#_foldsymbols._patterns + 1] = '<%%'
-_foldsymbols._patterns[#_foldsymbols._patterns + 1] = '%%>'
-_foldsymbols.asp_tag = {['<%'] = 1, ['%>'] = -1}
-M._foldsymbols = _foldsymbols
+-- Fold points.
+lex:add_fold_point('asp_tag', '<%', '%>')
-return M
+return lex
diff --git a/lua/lexers/autoit.lua b/lua/lexers/autoit.lua
index ee72cfb..42b4bdf 100644
--- a/lua/lexers/autoit.lua
+++ b/lua/lexers/autoit.lua
@@ -1,168 +1,129 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- AutoIt LPeg lexer.
-- Contributed by Jeff Stone.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'autoit'}
+local lex = lexer.new('autoit')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local line_comment = ';' * l.nonnewline_esc^0
-local block_comment1 = '#comments-start' * (l.any - '#comments-end')^0 *
- P('#comments-end')^-1
-local block_comment2 = '#cs' * (l.any - '#ce')^0 * P('#ce')^-1
-local comment = token(l.COMMENT, line_comment + block_comment1 + block_comment2)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords.
-local kw = token(l.KEYWORD, word_match({
- 'False', 'True', 'And', 'Or', 'Not', 'ContinueCase', 'ContinueLoop',
- 'Default', 'Dim', 'Global', 'Local', 'Const', 'Do', 'Until', 'Enum', 'Exit',
- 'ExitLoop', 'For', 'To', 'Step', 'Next', 'In', 'Func', 'Return', 'EndFunc',
- 'If', 'Then', 'ElseIf', 'Else', 'EndIf', 'Null', 'ReDim', 'Select', 'Case',
- 'EndSelect', 'Static', 'Switch', 'EndSwitch', 'Volatile', 'While', 'WEnd',
- 'With', 'EndWith'
-}, nil, true))
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match({
+ 'False', 'True', 'And', 'Or', 'Not', 'ContinueCase', 'ContinueLoop', 'Default', 'Dim', 'Global',
+ 'Local', 'Const', 'Do', 'Until', 'Enum', 'Exit', 'ExitLoop', 'For', 'To', 'Step', 'Next', 'In',
+ 'Func', 'Return', 'EndFunc', 'If', 'Then', 'ElseIf', 'Else', 'EndIf', 'Null', 'ReDim', 'Select',
+ 'Case', 'EndSelect', 'Static', 'Switch', 'EndSwitch', 'Volatile', 'While', 'WEnd', 'With',
+ 'EndWith'
+}, true)))
-- Functions.
-local func = token(l.FUNCTION, word_match({
- 'Abs', 'ACos', 'AdlibRegister', 'AdlibUnRegister', 'Asc', 'AscW', 'ASin',
- 'Assign', 'ATan', 'AutoItSetOption', 'AutoItWinGetTitle', 'AutoItWinSetTitle',
- 'Beep', 'Binary', 'BinaryLen', 'BinaryMid', 'BinaryToString', 'BitAND',
- 'BitNOT', 'BitOR', 'BitRotate', 'BitShift', 'BitXOR', 'BlockInput', 'Break',
- 'Call', 'CDTray', 'Ceiling', 'Chr', 'ChrW', 'ClipGet', 'ClipPut',
- 'ConsoleRead', 'ConsoleWrite', 'ConsoleWriteError', 'ControlClick',
- 'ControlCommand', 'ControlDisable', 'ControlEnable', 'ControlFocus',
- 'ControlGetFocus', 'ControlGetHandle', 'ControlGetPos', 'ControlGetText',
- 'ControlHide', 'ControlListView', 'ControlMove', 'ControlSend',
- 'ControlSetText', 'ControlShow', 'ControlTreeView', 'Cos', 'Dec', 'DirCopy',
- 'DirCreate', 'DirGetSize', 'DirMove', 'DirRemove', 'DllCall',
- 'DllCallAddress', 'DllCallbackFree', 'DllCallbackGetPtr',
- 'DllCallbackRegister', 'DllClose', 'DllOpen', 'DllStructCreate',
- 'DllStructGetData', 'DllStructGetPtr', 'DllStructGetSize', 'DllStructSetData',
- 'DriveGetDrive', 'DriveGetFileSystem', 'DriveGetLabel', 'DriveGetSerial',
- 'DriveGetType', 'DriveMapAdd', 'DriveMapDel', 'DriveMapGet', 'DriveSetLabel',
- 'DriveSpaceFree', 'DriveSpaceTotal', 'DriveStatus', 'EnvGet', 'EnvSet',
- 'EnvUpdate', 'Eval', 'Execute', 'Exp', 'FileChangeDir', 'FileClose',
- 'FileCopy', 'FileCreateNTFSLink', 'FileCreateShortcut', 'FileDelete',
- 'FileExists', 'FileFindFirstFile', 'FileFindNextFile', 'FileFlush',
- 'FileGetAttrib', 'FileGetEncoding', 'FileGetLongName', 'FileGetPos',
- 'FileGetShortcut', 'FileGetShortName', 'FileGetSize', 'FileGetTime',
- 'FileGetVersion', 'FileInstall', 'FileMove', 'FileOpen', 'FileOpenDialog',
- 'FileRead', 'FileReadLine', 'FileReadToArray', 'FileRecycle',
- 'FileRecycleEmpty', 'FileSaveDialog', 'FileSelectFolder', 'FileSetAttrib',
- 'FileSetEnd', 'FileSetPos', 'FileSetTime', 'FileWrite', 'FileWriteLine',
- 'Floor', 'FtpSetProxy', 'FuncName', 'GUICreate', 'GUICtrlCreateAvi',
- 'GUICtrlCreateButton', 'GUICtrlCreateCheckbox', 'GUICtrlCreateCombo',
- 'GUICtrlCreateContextMenu', 'GUICtrlCreateDate', 'GUICtrlCreateDummy',
- 'GUICtrlCreateEdit', 'GUICtrlCreateGraphic', 'GUICtrlCreateGroup',
- 'GUICtrlCreateIcon', 'GUICtrlCreateInput', 'GUICtrlCreateLabel',
- 'GUICtrlCreateList', 'GUICtrlCreateListView', 'GUICtrlCreateListViewItem',
- 'GUICtrlCreateMenu', 'GUICtrlCreateMenuItem', 'GUICtrlCreateMonthCal',
- 'GUICtrlCreateObj', 'GUICtrlCreatePic', 'GUICtrlCreateProgress',
- 'GUICtrlCreateRadio', 'GUICtrlCreateSlider', 'GUICtrlCreateTab',
+lex:add_rule('function', token(lexer.FUNCTION, word_match({
+ 'Abs', 'ACos', 'AdlibRegister', 'AdlibUnRegister', 'Asc', 'AscW', 'ASin', 'Assign', 'ATan',
+ 'AutoItSetOption', 'AutoItWinGetTitle', 'AutoItWinSetTitle', 'Beep', 'Binary', 'BinaryLen',
+ 'BinaryMid', 'BinaryToString', 'BitAND', 'BitNOT', 'BitOR', 'BitRotate', 'BitShift', 'BitXOR',
+ 'BlockInput', 'Break', 'Call', 'CDTray', 'Ceiling', 'Chr', 'ChrW', 'ClipGet', 'ClipPut',
+ 'ConsoleRead', 'ConsoleWrite', 'ConsoleWriteError', 'ControlClick', 'ControlCommand',
+ 'ControlDisable', 'ControlEnable', 'ControlFocus', 'ControlGetFocus', 'ControlGetHandle',
+ 'ControlGetPos', 'ControlGetText', 'ControlHide', 'ControlListView', 'ControlMove', 'ControlSend',
+ 'ControlSetText', 'ControlShow', 'ControlTreeView', 'Cos', 'Dec', 'DirCopy', 'DirCreate',
+ 'DirGetSize', 'DirMove', 'DirRemove', 'DllCall', 'DllCallAddress', 'DllCallbackFree',
+ 'DllCallbackGetPtr', 'DllCallbackRegister', 'DllClose', 'DllOpen', 'DllStructCreate',
+ 'DllStructGetData', 'DllStructGetPtr', 'DllStructGetSize', 'DllStructSetData', 'DriveGetDrive',
+ 'DriveGetFileSystem', 'DriveGetLabel', 'DriveGetSerial', 'DriveGetType', 'DriveMapAdd',
+ 'DriveMapDel', 'DriveMapGet', 'DriveSetLabel', 'DriveSpaceFree', 'DriveSpaceTotal', 'DriveStatus',
+ 'EnvGet', 'EnvSet', 'EnvUpdate', 'Eval', 'Execute', 'Exp', 'FileChangeDir', 'FileClose',
+ 'FileCopy', 'FileCreateNTFSLink', 'FileCreateShortcut', 'FileDelete', 'FileExists',
+ 'FileFindFirstFile', 'FileFindNextFile', 'FileFlush', 'FileGetAttrib', 'FileGetEncoding',
+ 'FileGetLongName', 'FileGetPos', 'FileGetShortcut', 'FileGetShortName', 'FileGetSize',
+ 'FileGetTime', 'FileGetVersion', 'FileInstall', 'FileMove', 'FileOpen', 'FileOpenDialog',
+ 'FileRead', 'FileReadLine', 'FileReadToArray', 'FileRecycle', 'FileRecycleEmpty',
+ 'FileSaveDialog', 'FileSelectFolder', 'FileSetAttrib', 'FileSetEnd', 'FileSetPos', 'FileSetTime',
+ 'FileWrite', 'FileWriteLine', 'Floor', 'FtpSetProxy', 'FuncName', 'GUICreate', 'GUICtrlCreateAvi',
+ 'GUICtrlCreateButton', 'GUICtrlCreateCheckbox', 'GUICtrlCreateCombo', 'GUICtrlCreateContextMenu',
+ 'GUICtrlCreateDate', 'GUICtrlCreateDummy', 'GUICtrlCreateEdit', 'GUICtrlCreateGraphic',
+ 'GUICtrlCreateGroup', 'GUICtrlCreateIcon', 'GUICtrlCreateInput', 'GUICtrlCreateLabel',
+ 'GUICtrlCreateList', 'GUICtrlCreateListView', 'GUICtrlCreateListViewItem', 'GUICtrlCreateMenu',
+ 'GUICtrlCreateMenuItem', 'GUICtrlCreateMonthCal', 'GUICtrlCreateObj', 'GUICtrlCreatePic',
+ 'GUICtrlCreateProgress', 'GUICtrlCreateRadio', 'GUICtrlCreateSlider', 'GUICtrlCreateTab',
'GUICtrlCreateTabItem', 'GUICtrlCreateTreeView', 'GUICtrlCreateTreeViewItem',
- 'GUICtrlCreateUpdown', 'GUICtrlDelete', 'GUICtrlGetHandle', 'GUICtrlGetState',
- 'GUICtrlRead', 'GUICtrlRecvMsg', 'GUICtrlRegisterListViewSort',
- 'GUICtrlSendMsg', 'GUICtrlSendToDummy', 'GUICtrlSetBkColor',
- 'GUICtrlSetColor', 'GUICtrlSetCursor', 'GUICtrlSetData',
- 'GUICtrlSetDefBkColor', 'GUICtrlSetDefColor', 'GUICtrlSetFont',
- 'GUICtrlSetGraphic', 'GUICtrlSetImage', 'GUICtrlSetLimit',
- 'GUICtrlSetOnEvent', 'GUICtrlSetPos', 'GUICtrlSetResizing', 'GUICtrlSetState',
- 'GUICtrlSetStyle', 'GUICtrlSetTip', 'GUIDelete', 'GUIGetCursorInfo',
- 'GUIGetMsg', 'GUIGetStyle', 'GUIRegisterMsg', 'GUISetAccelerators',
- 'GUISetBkColor', 'GUISetCoord', 'GUISetCursor', 'GUISetFont', 'GUISetHelp',
- 'GUISetIcon', 'GUISetOnEvent', 'GUISetState', 'GUISetStyle', 'GUIStartGroup',
- 'GUISwitch', 'Hex', 'HotKeySet', 'HttpSetProxy', 'HttpSetUserAgent', 'HWnd',
- 'InetClose', 'InetGet', 'InetGetInfo', 'InetGetSize', 'InetRead', 'IniDelete',
- 'IniRead', 'IniReadSection', 'IniReadSectionNames', 'IniRenameSection',
- 'IniWrite', 'IniWriteSection', 'InputBox', 'Int', 'IsAdmin', 'IsArray',
- 'IsBinary', 'IsBool', 'IsDeclared', 'IsDllStruct', 'IsFloat', 'IsFunc',
- 'IsHWnd', 'IsInt', 'IsKeyword', 'IsNumber', 'IsObj', 'IsPtr', 'IsString',
- 'Log', 'MemGetStats', 'Mod', 'MouseClick', 'MouseClickDrag', 'MouseDown',
- 'MouseGetCursor', 'MouseGetPos', 'MouseMove', 'MouseUp', 'MouseWheel',
- 'MsgBox', 'Number', 'ObjCreate', 'ObjCreateInterface', 'ObjEvent', 'ObjGet',
- 'ObjName', 'OnAutoItExitRegister', 'OnAutoItExitUnRegister', 'Ping',
- 'PixelChecksum', 'PixelGetColor', 'PixelSearch', 'ProcessClose',
- 'ProcessExists', 'ProcessGetStats', 'ProcessList', 'ProcessSetPriority',
- 'ProcessWait', 'ProcessWaitClose', 'ProgressOff', 'ProgressOn', 'ProgressSet',
- 'Ptr', 'Random', 'RegDelete', 'RegEnumKey', 'RegEnumVal', 'RegRead',
- 'RegWrite', 'Round', 'Run', 'RunAs', 'RunAsWait', 'RunWait', 'Send',
- 'SendKeepActive', 'SetError', 'SetExtended', 'ShellExecute',
- 'ShellExecuteWait', 'Shutdown', 'Sin', 'Sleep', 'SoundPlay',
- 'SoundSetWaveVolume', 'SplashImageOn', 'SplashOff', 'SplashTextOn', 'Sqrt',
- 'SRandom', 'StatusbarGetText', 'StderrRead', 'StdinWrite', 'StdioClose',
- 'StdoutRead', 'String', 'StringAddCR', 'StringCompare', 'StringFormat',
- 'StringFromASCIIArray', 'StringInStr', 'StringIsAlNum', 'StringIsAlpha',
- 'StringIsASCII', 'StringIsDigit', 'StringIsFloat', 'StringIsInt',
- 'StringIsLower', 'StringIsSpace', 'StringIsUpper', 'StringIsXDigit',
- 'StringLeft', 'StringLen', 'StringLower', 'StringMid', 'StringRegExp',
- 'StringRegExpReplace', 'StringReplace', 'StringReverse', 'StringRight',
- 'StringSplit', 'StringStripCR', 'StringStripWS', 'StringToASCIIArray',
- 'StringToBinary', 'StringTrimLeft', 'StringTrimRight', 'StringUpper', 'Tan',
- 'TCPAccept', 'TCPCloseSocket', 'TCPConnect', 'TCPListen', 'TCPNameToIP',
- 'TCPRecv', 'TCPSend', 'TCPShutdown, UDPShutdown', 'TCPStartup, UDPStartup',
- 'TimerDiff', 'TimerInit', 'ToolTip', 'TrayCreateItem', 'TrayCreateMenu',
- 'TrayGetMsg', 'TrayItemDelete', 'TrayItemGetHandle', 'TrayItemGetState',
- 'TrayItemGetText', 'TrayItemSetOnEvent', 'TrayItemSetState',
- 'TrayItemSetText', 'TraySetClick', 'TraySetIcon', 'TraySetOnEvent',
- 'TraySetPauseIcon', 'TraySetState', 'TraySetToolTip', 'TrayTip', 'UBound',
- 'UDPBind', 'UDPCloseSocket', 'UDPOpen', 'UDPRecv', 'UDPSend', 'VarGetType',
- 'WinActivate', 'WinActive', 'WinClose', 'WinExists', 'WinFlash',
- 'WinGetCaretPos', 'WinGetClassList', 'WinGetClientSize', 'WinGetHandle',
- 'WinGetPos', 'WinGetProcess', 'WinGetState', 'WinGetText', 'WinGetTitle',
- 'WinKill', 'WinList', 'WinMenuSelectItem', 'WinMinimizeAll',
- 'WinMinimizeAllUndo', 'WinMove', 'WinSetOnTop', 'WinSetState', 'WinSetTitle',
+ 'GUICtrlCreateUpdown', 'GUICtrlDelete', 'GUICtrlGetHandle', 'GUICtrlGetState', 'GUICtrlRead',
+ 'GUICtrlRecvMsg', 'GUICtrlRegisterListViewSort', 'GUICtrlSendMsg', 'GUICtrlSendToDummy',
+ 'GUICtrlSetBkColor', 'GUICtrlSetColor', 'GUICtrlSetCursor', 'GUICtrlSetData',
+ 'GUICtrlSetDefBkColor', 'GUICtrlSetDefColor', 'GUICtrlSetFont', 'GUICtrlSetGraphic',
+ 'GUICtrlSetImage', 'GUICtrlSetLimit', 'GUICtrlSetOnEvent', 'GUICtrlSetPos', 'GUICtrlSetResizing',
+ 'GUICtrlSetState', 'GUICtrlSetStyle', 'GUICtrlSetTip', 'GUIDelete', 'GUIGetCursorInfo',
+ 'GUIGetMsg', 'GUIGetStyle', 'GUIRegisterMsg', 'GUISetAccelerators', 'GUISetBkColor',
+ 'GUISetCoord', 'GUISetCursor', 'GUISetFont', 'GUISetHelp', 'GUISetIcon', 'GUISetOnEvent',
+ 'GUISetState', 'GUISetStyle', 'GUIStartGroup', 'GUISwitch', 'Hex', 'HotKeySet', 'HttpSetProxy',
+ 'HttpSetUserAgent', 'HWnd', 'InetClose', 'InetGet', 'InetGetInfo', 'InetGetSize', 'InetRead',
+ 'IniDelete', 'IniRead', 'IniReadSection', 'IniReadSectionNames', 'IniRenameSection', 'IniWrite',
+ 'IniWriteSection', 'InputBox', 'Int', 'IsAdmin', 'IsArray', 'IsBinary', 'IsBool', 'IsDeclared',
+ 'IsDllStruct', 'IsFloat', 'IsFunc', 'IsHWnd', 'IsInt', 'IsKeyword', 'IsNumber', 'IsObj', 'IsPtr',
+ 'IsString', 'Log', 'MemGetStats', 'Mod', 'MouseClick', 'MouseClickDrag', 'MouseDown',
+ 'MouseGetCursor', 'MouseGetPos', 'MouseMove', 'MouseUp', 'MouseWheel', 'MsgBox', 'Number',
+ 'ObjCreate', 'ObjCreateInterface', 'ObjEvent', 'ObjGet', 'ObjName', 'OnAutoItExitRegister',
+ 'OnAutoItExitUnRegister', 'Ping', 'PixelChecksum', 'PixelGetColor', 'PixelSearch', 'ProcessClose',
+ 'ProcessExists', 'ProcessGetStats', 'ProcessList', 'ProcessSetPriority', 'ProcessWait',
+ 'ProcessWaitClose', 'ProgressOff', 'ProgressOn', 'ProgressSet', 'Ptr', 'Random', 'RegDelete',
+ 'RegEnumKey', 'RegEnumVal', 'RegRead', 'RegWrite', 'Round', 'Run', 'RunAs', 'RunAsWait',
+ 'RunWait', 'Send', 'SendKeepActive', 'SetError', 'SetExtended', 'ShellExecute',
+ 'ShellExecuteWait', 'Shutdown', 'Sin', 'Sleep', 'SoundPlay', 'SoundSetWaveVolume',
+ 'SplashImageOn', 'SplashOff', 'SplashTextOn', 'Sqrt', 'SRandom', 'StatusbarGetText', 'StderrRead',
+ 'StdinWrite', 'StdioClose', 'StdoutRead', 'String', 'StringAddCR', 'StringCompare',
+ 'StringFormat', 'StringFromASCIIArray', 'StringInStr', 'StringIsAlNum', 'StringIsAlpha',
+ 'StringIsASCII', 'StringIsDigit', 'StringIsFloat', 'StringIsInt', 'StringIsLower',
+ 'StringIsSpace', 'StringIsUpper', 'StringIsXDigit', 'StringLeft', 'StringLen', 'StringLower',
+ 'StringMid', 'StringRegExp', 'StringRegExpReplace', 'StringReplace', 'StringReverse',
+ 'StringRight', 'StringSplit', 'StringStripCR', 'StringStripWS', 'StringToASCIIArray',
+ 'StringToBinary', 'StringTrimLeft', 'StringTrimRight', 'StringUpper', 'Tan', 'TCPAccept',
+ 'TCPCloseSocket', 'TCPConnect', 'TCPListen', 'TCPNameToIP', 'TCPRecv', 'TCPSend', 'TCPShutdown',
+ 'TCPStartup', 'TimerDiff', 'TimerInit', 'ToolTip', 'TrayCreateItem', 'TrayCreateMenu',
+ 'TrayGetMsg', 'TrayItemDelete', 'TrayItemGetHandle', 'TrayItemGetState', 'TrayItemGetText',
+ 'TrayItemSetOnEvent', 'TrayItemSetState', 'TrayItemSetText', 'TraySetClick', 'TraySetIcon',
+ 'TraySetOnEvent', 'TraySetPauseIcon', 'TraySetState', 'TraySetToolTip', 'TrayTip', 'UBound',
+ 'UDPBind', 'UDPCloseSocket', 'UDPOpen', 'UDPRecv', 'UDPSend', 'UDPShutdown', 'UDPStartup',
+ 'VarGetType', 'WinActivate', 'WinActive', 'WinClose', 'WinExists', 'WinFlash', 'WinGetCaretPos',
+ 'WinGetClassList', 'WinGetClientSize', 'WinGetHandle', 'WinGetPos', 'WinGetProcess',
+ 'WinGetState', 'WinGetText', 'WinGetTitle', 'WinKill', 'WinList', 'WinMenuSelectItem',
+ 'WinMinimizeAll', 'WinMinimizeAllUndo', 'WinMove', 'WinSetOnTop', 'WinSetState', 'WinSetTitle',
'WinSetTrans', 'WinWait', 'WinWaitActive', 'WinWaitClose', 'WinWaitNotActive'
-}, nil, true))
+}, true)))
+
+-- Identifiers.
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
+
+-- Comments.
+local line_comment = lexer.to_eol(';')
+local block_comment = lexer.range('#comments-start', '#comments-end') + lexer.range('#cs', '#ce')
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-- Preprocessor.
-local preproc = token(l.PREPROCESSOR, '#' * word_match({
+lex:add_rule('preprocessor', token(lexer.PREPROCESSOR, '#' * word_match({
'include-once', 'include', 'pragma', 'forceref', 'RequireAdmin', 'NoTrayIcon',
'OnAutoItStartRegister'
-}, '-', true))
+}, true)))
-- Strings.
-local dq_str = l.delimited_range('"', true, true)
-local sq_str = l.delimited_range("'", true, true)
-local inc = l.delimited_range('<>', true, true, true)
-local str = token(l.STRING, dq_str + sq_str + inc)
+local dq_str = lexer.range('"', true, false)
+local sq_str = lexer.range("'", true, false)
+local inc = lexer.range('<', '>', true, false, true)
+lex:add_rule('string', token(lexer.STRING, dq_str + sq_str + inc))
-- Macros.
-local macro = token('macro', '@' * (l.alnum + '_')^1)
+lex:add_rule('macro', token('macro', '@' * (lexer.alnum + '_')^1))
+lex:add_style('macro', lexer.styles.preprocessor)
-- Variables.
-local var = token(l.VARIABLE, '$' * (l.alnum + '_')^1)
-
--- Identifiers.
-local ident = token(l.IDENTIFIER, (l.alnum + '_')^1)
+lex:add_rule('variable', token(lexer.VARIABLE, '$' * (lexer.alnum + '_')^1))
-- Numbers.
-local nbr = token(l.NUMBER, l.float + l.integer)
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-- Operators.
-local oper = token(l.OPERATOR, S('+-^*/&<>=?:()[]'))
-
-M._rules = {
- {'whitespace', ws},
- {'comment', comment},
- {'keyword', kw},
- {'function', func},
- {'preproc', preproc},
- {'string', str},
- {'macro', macro},
- {'variable', var},
- {'number', nbr},
- {'identifier', ident},
- {'operator', oper}
-}
-
-M._tokenstyles = {
- macro = l.STYLE_PREPROCESSOR
-}
+lex:add_rule('operator', token(lexer.OPERATOR, S('+-^*/&<>=?:()[]')))
-return M
+return lex
diff --git a/lua/lexers/awk.lua b/lua/lexers/awk.lua
index 87e39d9..0b3f9bf 100644
--- a/lua/lexers/awk.lua
+++ b/lua/lexers/awk.lua
@@ -1,12 +1,12 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- AWK LPeg lexer.
-- Modified by Wolfgang Seeberg 2012, 2013.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'awk'}
+local lex = lexer.new('awk')
local LEFTBRACKET = '['
local RIGHTBRACKET = ']'
@@ -20,14 +20,13 @@ local DQUOTE = '"'
local DELIMITER_MATCHES = {['('] = ')', ['['] = ']'}
local COMPANION = {['('] = '[', ['['] = '('}
local CC = {
- alnum = 1, alpha = 1, blank = 1, cntrl = 1, digit = 1, graph = 1, lower = 1,
- print = 1, punct = 1, space = 1, upper = 1, xdigit = 1
+ alnum = 1, alpha = 1, blank = 1, cntrl = 1, digit = 1, graph = 1, lower = 1, print = 1, punct = 1,
+ space = 1, upper = 1, xdigit = 1
}
local LastRegexEnd = 0
local BackslashAtCommentEnd = 0
local KW_BEFORE_RX = {
- case = 1, ['do'] = 1, ['else'] = 1, exit = 1, print = 1, printf = 1,
- ['return'] = 1
+ case = 1, ['do'] = 1, ['else'] = 1, exit = 1, print = 1, printf = 1, ['return'] = 1
}
local function findKeyword(input, e)
@@ -146,9 +145,7 @@ local function scanGawkRegex(input, index)
return false
end
-- Is only called immediately after scanGawkRegex().
-local function scanRegex()
- return ScanRegexResult
-end
+local function scanRegex() return ScanRegexResult end
local function scanString(input, index)
local i = index
@@ -160,7 +157,7 @@ local function scanString(input, index)
return i + 1
elseif input:sub(i, i) == BACKSLASH then
i = i + 1
- -- l.delimited_range() doesn't handle CRLF.
+ -- lexer.range() doesn't handle CRLF.
if input:sub(i, i + 1) == CRLF then i = i + 1 end
end
i = i + 1
@@ -168,8 +165,7 @@ local function scanString(input, index)
return false
end
--- purpose: prevent isRegex() from entering a comment line that ends with a
--- backslash.
+-- purpose: prevent isRegex() from entering a comment line that ends with a backslash.
local function scanComment(input, index)
local _, i = input:find('[^\r\n]*', index)
if input:sub(i, i) == BACKSLASH then BackslashAtCommentEnd = i end
@@ -220,115 +216,69 @@ local function scanFieldDelimiters(input, index)
end
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Comments.
-local comment = token(l.COMMENT, '#' * P(scanComment))
+lex:add_rule('comment', token(lexer.COMMENT, '#' * P(scanComment)))
-- Strings.
-local string = token(l.STRING, DQUOTE * P(scanString))
-
--- Regular expressions.
--- Slash delimited regular expressions are preceded by most operators or
--- the keywords 'print' and 'case', possibly on a preceding line. They
--- can contain unescaped slashes and brackets in brackets. Some escape
--- sequences like '\S', '\s' have special meanings with Gawk. Tokens that
--- contain them are displayed differently.
-local regex = token(l.REGEX, SLASH * P(scanRegex))
-local gawkRegex = token('gawkRegex', SLASH * P(scanGawkRegex))
+lex:add_rule('string', token(lexer.STRING, DQUOTE * P(scanString)))
--- no leading sign because it might be binary.
-local float = ((l.digit ^ 1 * ('.' * l.digit ^ 0) ^ -1) +
- ('.' * l.digit ^ 1)) * (S('eE') * S('+-') ^ -1 * l.digit ^ 1) ^ -1
--- Numbers.
-local number = token(l.NUMBER, float)
-local gawkNumber = token('gawkNumber', l.hex_num + l.oct_num)
-
--- Operators.
-local operator = token(l.OPERATOR, S('!%&()*+,-/:;<=>?[\\]^{|}~'))
-local gawkOperator = token('gawkOperator', P("|&") + "@" + "**=" + "**")
+-- No leading sign because it might be binary.
+local float = ((lexer.digit^1 * ('.' * lexer.digit^0)^-1) + ('.' * lexer.digit^1)) *
+ (S('eE') * S('+-')^-1 * lexer.digit^1)^-1
-- Fields. E.g. $1, $a, $(x), $a(x), $a[x], $"1", $$a, etc.
-local field = token('field', P('$') * S('$+-') ^ 0 *
- (float + (l.word ^ 0 * '(' * P(scanFieldDelimiters)) +
- (l.word ^ 1 * ('[' * P(scanFieldDelimiters)) ^ -1) +
- ('"' * P(scanString)) + ('/' * P(eatRegex) * '/')))
-
--- Functions.
-local func = token(l.FUNCTION, l.word * #P('('))
-
--- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
-
--- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- 'BEGIN', 'END', 'atan2', 'break', 'close', 'continue', 'cos', 'delete', 'do',
- 'else', 'exit', 'exp', 'fflush', 'for', 'function', 'getline', 'gsub', 'if',
- 'in', 'index', 'int', 'length', 'log', 'match', 'next', 'nextfile', 'print',
- 'printf', 'rand', 'return', 'sin', 'split', 'sprintf', 'sqrt', 'srand', 'sub',
- 'substr', 'system', 'tolower', 'toupper', 'while'
-})
-
-local gawkKeyword = token('gawkKeyword', word_match{
- 'BEGINFILE', 'ENDFILE', 'adump', 'and', 'asort', 'asorti', 'bindtextdomain',
- 'case', 'compl', 'dcgettext', 'dcngettext', 'default', 'extension', 'func',
- 'gensub', 'include', 'isarray', 'load', 'lshift', 'mktime', 'or', 'patsplit',
- 'rshift', 'stopme', 'strftime', 'strtonum', 'switch', 'systime', 'xor'
-})
+lex:add_rule('field', token('field', '$' * S('$+-')^0 *
+ (float + lexer.word^0 * '(' * P(scanFieldDelimiters) + lexer.word^1 *
+ ('[' * P(scanFieldDelimiters))^-1 + '"' * P(scanString) + '/' * P(eatRegex) * '/')))
+lex:add_style('field', lexer.styles.label)
-local builtInVariable = token('builtInVariable', word_match{
- 'ARGC', 'ARGV', 'CONVFMT', 'ENVIRON', 'FILENAME', 'FNR', 'FS', 'NF', 'NR',
- 'OFMT', 'OFS', 'ORS', 'RLENGTH', 'RS', 'RSTART', 'SUBSEP'
-})
-
-local gawkBuiltInVariable = token('gawkBuiltInVariable', word_match {
- 'ARGIND', 'BINMODE', 'ERRNO', 'FIELDWIDTHS', 'FPAT', 'FUNCTAB', 'IGNORECASE',
- 'LINT', 'PREC', 'PROCINFO', 'ROUNDMODE', 'RT', 'SYMTAB', 'TEXTDOMAIN'
-})
-
--- Within each group order matters, but the groups themselves (except the
--- last) can be in any order.
-M._rules = {
- {'whitespace', ws},
-
- {'comment', comment},
-
- {'string', string},
+-- Regular expressions.
+-- Slash delimited regular expressions are preceded by most operators or the keywords 'print'
+-- and 'case', possibly on a preceding line. They can contain unescaped slashes and brackets
+-- in brackets. Some escape sequences like '\S', '\s' have special meanings with Gawk. Tokens
+-- that contain them are displayed differently.
+lex:add_rule('gawkRegex', token('gawkRegex', SLASH * P(scanGawkRegex)))
+lex:add_style('gawkRegex', lexer.styles.preprocessor .. {underlined = true})
+lex:add_rule('regex', token(lexer.REGEX, SLASH * P(scanRegex)))
- {'field', field},
+-- Operators.
+lex:add_rule('gawkOperator', token('gawkOperator', P("|&") + "@" + "**=" + "**"))
+lex:add_style('gawkOperator', lexer.styles.operator .. {underlined = true})
+lex:add_rule('operator', token(lexer.OPERATOR, S('!%&()*+,-/:;<=>?[\\]^{|}~')))
- {'gawkRegex', gawkRegex},
- {'regex', regex},
- {'gawkOperator', gawkOperator},
- {'operator', operator},
+-- Numbers.
+lex:add_rule('gawkNumber', token('gawkNumber', lexer.hex_num + lexer.oct_num))
+lex:add_style('gawkNumber', lexer.styles.number .. {underlined = true})
+lex:add_rule('number', token(lexer.NUMBER, float))
- {'gawkNumber', gawkNumber},
- {'number', number},
+-- Keywords.
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'BEGIN', 'END', 'atan2', 'break', 'close', 'continue', 'cos', 'delete', 'do', 'else', 'exit',
+ 'exp', 'fflush', 'for', 'function', 'getline', 'gsub', 'if', 'in', 'index', 'int', 'length',
+ 'log', 'match', 'next', 'nextfile', 'print', 'printf', 'rand', 'return', 'sin', 'split',
+ 'sprintf', 'sqrt', 'srand', 'sub', 'substr', 'system', 'tolower', 'toupper', 'while'
+}))
+
+lex:add_rule('builtInVariable', token('builtInVariable', word_match(
+ 'ARGC ARGV CONVFMT ENVIRON FILENAME FNR FS NF NR OFMT OFS ORS RLENGTH RS RSTART SUBSEP')))
+lex:add_style('builtInVariable', lexer.styles.constant)
+
+lex:add_rule('gawkBuiltInVariable', token('gawkBuiltInVariable', word_match{
+ 'ARGIND', 'BINMODE', 'ERRNO', 'FIELDWIDTHS', 'FPAT', 'FUNCTAB', 'IGNORECASE', 'LINT', 'PREC',
+ 'PROCINFO', 'ROUNDMODE', 'RT', 'SYMTAB', 'TEXTDOMAIN'
+}))
+lex:add_style('gawkBuiltInVariable', lexer.styles.constant .. {underlined = true})
- {'keyword', keyword},
- {'builtInVariable', builtInVariable},
- {'gawkKeyword', gawkKeyword},
- {'gawkBuiltInVariable', gawkBuiltInVariable},
- {'function', func},
- {'identifier', identifier},
-}
+-- Functions.
+lex:add_rule('function', token(lexer.FUNCTION, lexer.word * #P('(')))
-M._tokenstyles = {
- builtInVariable = l.STYLE_CONSTANT,
- default = l.STYLE_ERROR,
- field = l.STYLE_LABEL,
- gawkBuiltInVariable = l.STYLE_CONSTANT..',underlined',
- gawkKeyword = l.STYLE_KEYWORD..',underlined',
- gawkNumber = l.STYLE_NUMBER..',underlined',
- gawkOperator = l.STYLE_OPERATOR..',underlined',
- gawkRegex = l.STYLE_PREPROCESSOR..',underlined',
- regex = l.STYLE_PREPROCESSOR
-}
+-- Identifiers.
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-M._foldsymbols = {
- _patterns = {'[{}]', '#'},
- [l.OPERATOR] = {['{'] = 1, ['}'] = -1},
- [l.COMMENT] = {['#'] = l.fold_line_comments('#')}
-}
+-- Fold points.
+lex:add_fold_point(lexer.OPERATOR, '{', '}')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('#'))
-return M
+return lex
diff --git a/lua/lexers/bash.lua b/lua/lexers/bash.lua
index 7927b4a..da4472e 100644
--- a/lua/lexers/bash.lua
+++ b/lua/lexers/bash.lua
@@ -1,82 +1,58 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- Shell LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'bash'}
+local lex = lexer.new('bash')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
--- Comments.
-local comment = token(l.COMMENT, '#' * l.nonnewline^0)
+-- Keywords.
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'if', 'then', 'elif', 'else', 'fi', 'case', 'in', 'esac', 'while', 'for', 'do', 'done',
+ 'continue', 'local', 'return', 'select',
+ -- Operators.
+ '-a', '-b', '-c', '-d', '-e', '-f', '-g', '-h', '-k', '-p', '-r', '-s', '-t', '-u', '-w', '-x',
+ '-O', '-G', '-L', '-S', '-N', '-nt', '-ot', '-ef', '-o', '-z', '-n', '-eq', '-ne', '-lt', '-le',
+ '-gt', '-ge'
+}))
+
+-- Identifiers.
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Strings.
-local sq_str = l.delimited_range("'", false, true)
-local dq_str = l.delimited_range('"')
-local ex_str = l.delimited_range('`')
+local sq_str = lexer.range("'", false, false)
+local dq_str = lexer.range('"')
+local ex_str = lexer.range('`')
local heredoc = '<<' * P(function(input, index)
- local s, e, minus, _, delimiter =
- input:find('(-?)(["\']?)([%a_][%w_]*)%2[\n\r\f;]+', index)
- if s == index and delimiter then
- -- If the starting delimiter of a here-doc begins with "-", then
- -- spaces are allowed to come before the closing delimiter.
- local close_pattern
- if minus == '-' then
- close_pattern = '[\n\r\f%s]+'..delimiter..'\n'
- else
- close_pattern = '[\n\r\f]+'..delimiter..'\n'
- end
- local _, e = input:find(close_pattern, e)
- return e and e + 1 or #input + 1
- end
+ local _, e, _, delimiter = input:find('^%-?(["\']?)([%a_][%w_]*)%1[\n\r\f;]+', index)
+ if not delimiter then return end
+ _, e = input:find('[\n\r\f]+' .. delimiter, e)
+ return e and e + 1 or #input + 1
end)
-local string = token(l.STRING, sq_str + dq_str + ex_str + heredoc)
-
--- Numbers.
-local number = token(l.NUMBER, l.float + l.integer)
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str + ex_str + heredoc))
--- Keywords.
-local keyword = token(l.KEYWORD, word_match({
- 'if', 'then', 'elif', 'else', 'fi', 'case', 'in', 'esac', 'while', 'for',
- 'do', 'done', 'continue', 'local', 'return', 'select',
- -- Operators.
- '-a', '-b', '-c', '-d', '-e', '-f', '-g', '-h', '-k', '-p', '-r', '-s', '-t',
- '-u', '-w', '-x', '-O', '-G', '-L', '-S', '-N', '-nt', '-ot', '-ef', '-o',
- '-z', '-n', '-eq', '-ne', '-lt', '-le', '-gt', '-ge'
-}, '-'))
+-- Comments.
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#')))
--- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+-- Numbers.
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-- Variables.
-local variable = token(l.VARIABLE,
- '$' * (S('!#?*@$') + l.digit^1 + l.word +
- l.delimited_range('{}', true, true, true)))
+lex:add_rule('variable', token(lexer.VARIABLE, '$' *
+ (S('!#?*@$') + lexer.digit^1 + lexer.word + lexer.range('{', '}', true, false, true))))
-- Operators.
-local operator = token(l.OPERATOR, S('=!<>+-/*^&|~.,:;?()[]{}'))
-
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'identifier', identifier},
- {'string', string},
- {'comment', comment},
- {'number', number},
- {'variable', variable},
- {'operator', operator},
-}
+lex:add_rule('operator', token(lexer.OPERATOR, S('=!<>+-/*^&|~.,:;?()[]{}')))
-M._foldsymbols = {
- _patterns = {'[a-z]+', '[{}]', '#'},
- [l.KEYWORD] = {
- ['if'] = 1, fi = -1, case = 1, esac = -1, ['do'] = 1, done = -1
- },
- [l.OPERATOR] = {['{'] = 1, ['}'] = -1},
- [l.COMMENT] = {['#'] = l.fold_line_comments('#')}
-}
+-- Fold points.
+lex:add_fold_point(lexer.KEYWORD, 'if', 'fi')
+lex:add_fold_point(lexer.KEYWORD, 'case', 'esac')
+lex:add_fold_point(lexer.KEYWORD, 'do', 'done')
+lex:add_fold_point(lexer.OPERATOR, '{', '}')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('#'))
-return M
+return lex
diff --git a/lua/lexers/batch.lua b/lua/lexers/batch.lua
index bb20a33..5468a1d 100644
--- a/lua/lexers/batch.lua
+++ b/lua/lexers/batch.lua
@@ -1,71 +1,53 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- Batch LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'batch'}
+local lex = lexer.new('batch', {case_insensitive_fold_points = true})
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local rem = (P('REM') + 'rem') * l.space
-local comment = token(l.COMMENT, (rem + '::') * l.nonnewline^0)
-
--- Strings.
-local string = token(l.STRING, l.delimited_range('"', true))
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match({
- 'cd', 'chdir', 'md', 'mkdir', 'cls', 'for', 'if', 'echo', 'echo.', 'move',
- 'copy', 'ren', 'del', 'set', 'call', 'exit', 'setlocal', 'shift',
- 'endlocal', 'pause', 'defined', 'exist', 'errorlevel', 'else', 'in', 'do',
- 'NUL', 'AUX', 'PRN', 'not', 'goto', 'pushd', 'popd'
-}, nil, true))
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match({
+ 'cd', 'chdir', 'md', 'mkdir', 'cls', 'for', 'if', 'echo', 'echo.', 'move', 'copy', 'ren', 'del',
+ 'set', 'call', 'exit', 'setlocal', 'shift', 'endlocal', 'pause', 'defined', 'exist', 'errorlevel',
+ 'else', 'in', 'do', 'NUL', 'AUX', 'PRN', 'not', 'goto', 'pushd', 'popd'
+}, true)))
-- Functions.
-local func = token(l.FUNCTION, word_match({
- 'APPEND', 'ATTRIB', 'CHKDSK', 'CHOICE', 'DEBUG', 'DEFRAG', 'DELTREE',
- 'DISKCOMP', 'DISKCOPY', 'DOSKEY', 'DRVSPACE', 'EMM386', 'EXPAND', 'FASTOPEN',
- 'FC', 'FDISK', 'FIND', 'FORMAT', 'GRAPHICS', 'KEYB', 'LABEL', 'LOADFIX',
- 'MEM', 'MODE', 'MORE', 'MOVE', 'MSCDEX', 'NLSFUNC', 'POWER', 'PRINT', 'RD',
- 'REPLACE', 'RESTORE', 'SETVER', 'SHARE', 'SORT', 'SUBST', 'SYS', 'TREE',
- 'UNDELETE', 'UNFORMAT', 'VSAFE', 'XCOPY'
-}, nil, true))
+lex:add_rule('function', token(lexer.FUNCTION, word_match({
+ 'APPEND', 'ATTRIB', 'CHKDSK', 'CHOICE', 'DEBUG', 'DEFRAG', 'DELTREE', 'DISKCOMP', 'DISKCOPY',
+ 'DOSKEY', 'DRVSPACE', 'EMM386', 'EXPAND', 'FASTOPEN', 'FC', 'FDISK', 'FIND', 'FORMAT', 'GRAPHICS',
+ 'KEYB', 'LABEL', 'LOADFIX', 'MEM', 'MODE', 'MORE', 'MOVE', 'MSCDEX', 'NLSFUNC', 'POWER', 'PRINT',
+ 'RD', 'REPLACE', 'RESTORE', 'SETVER', 'SHARE', 'SORT', 'SUBST', 'SYS', 'TREE', 'UNDELETE',
+ 'UNFORMAT', 'VSAFE', 'XCOPY'
+}, true)))
+
+-- Comments.
+local rem = (P('REM') + 'rem') * #lexer.space
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol(rem + '::')))
-- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
--- Variables.
-local variable = token(l.VARIABLE,
- '%' * (l.digit + '%' * l.alpha) +
- l.delimited_range('%', true, true))
+-- Strings.
+lex:add_rule('string', token(lexer.STRING, lexer.range('"', true)))
--- Operators.
-local operator = token(l.OPERATOR, S('+|&!<>='))
+-- Variables.
+local arg = '%' * lexer.digit + '%~' * lexer.alnum^1
+local variable = lexer.range('%', true, false)
+lex:add_rule('variable', token(lexer.VARIABLE, arg + variable))
-- Labels.
-local label = token(l.LABEL, ':' * l.word)
-
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'function', func},
- {'comment', comment},
- {'identifier', identifier},
- {'string', string},
- {'variable', variable},
- {'label', label},
- {'operator', operator},
-}
+lex:add_rule('label', token(lexer.LABEL, ':' * lexer.word))
-M._LEXBYLINE = true
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S('+|&!<>=')))
-M._foldsymbols = {
- _patterns = {'[A-Za-z]+'},
- [l.KEYWORD] = {setlocal = 1, endlocal = -1, SETLOCAL = 1, ENDLOCAL = -1}
-}
+-- Fold points.
+lex:add_fold_point(lexer.KEYWORD, 'setlocal', 'endlocal')
-return M
+return lex
diff --git a/lua/lexers/bibtex.lua b/lua/lexers/bibtex.lua
index bf72095..06795e6 100644
--- a/lua/lexers/bibtex.lua
+++ b/lua/lexers/bibtex.lua
@@ -1,58 +1,46 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- Bibtex LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'bibtex'}
+local lex = lexer.new('bibtex')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Strings.
-local string = token(l.STRING, l.delimited_range('"') +
- l.delimited_range('{}', false, true, true))
+local ws = token(lexer.WHITESPACE, lexer.space^1)
-- Fields.
-local field = token('field', word_match{
- 'author', 'title', 'journal', 'year', 'volume', 'number', 'pages', 'month',
- 'note', 'key', 'publisher', 'editor', 'series', 'address', 'edition',
- 'howpublished', 'booktitle', 'organization', 'chapter', 'school',
- 'institution', 'type', 'isbn', 'issn', 'affiliation', 'issue', 'keyword',
- 'url'
-})
+lex:add_rule('field', token('field', word_match{
+ 'author', 'title', 'journal', 'year', 'volume', 'number', 'pages', 'month', 'note', 'key',
+ 'publisher', 'editor', 'series', 'address', 'edition', 'howpublished', 'booktitle',
+ 'organization', 'chapter', 'school', 'institution', 'type', 'isbn', 'issn', 'affiliation',
+ 'issue', 'keyword', 'url'
+}))
+lex:add_style('field', lexer.styles.constant)
-- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
--- Operators.
-local operator = token(l.OPERATOR, S(',='))
+-- Strings.
+local dq_str = lexer.range('"')
+local br_str = lexer.range('{', '}', false, false, true)
+lex:add_rule('string', token(lexer.STRING, dq_str + br_str))
-M._rules = {
- {'whitespace', ws},
- {'field', field},
- {'identifier', identifier},
- {'string', string},
- {'operator', operator},
-}
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S(',=')))
-- Embedded in Latex.
-local latex = l.load('latex')
+local latex = lexer.load('latex')
-- Embedded Bibtex.
-local entry = token('entry', P('@') * word_match({
- 'book', 'article', 'booklet', 'conference', 'inbook', 'incollection',
- 'inproceedings', 'manual', 'mastersthesis', 'lambda', 'misc', 'phdthesis',
- 'proceedings', 'techreport', 'unpublished'
-}, nil, true))
-local bibtex_start_rule = entry * ws^0 * token(l.OPERATOR, P('{'))
-local bibtex_end_rule = token(l.OPERATOR, P('}'))
-l.embed_lexer(latex, M, bibtex_start_rule, bibtex_end_rule)
-
-M._tokenstyles = {
- field = l.STYLE_CONSTANT,
- entry = l.STYLE_PREPROCESSOR
-}
-
-return M
+local entry = token('entry', '@' * word_match({
+ 'book', 'article', 'booklet', 'conference', 'inbook', 'incollection', 'inproceedings', 'manual',
+ 'mastersthesis', 'lambda', 'misc', 'phdthesis', 'proceedings', 'techreport', 'unpublished'
+}, true))
+lex:add_style('entry', lexer.styles.preprocessor)
+local bibtex_start_rule = entry * ws^0 * token(lexer.OPERATOR, '{')
+local bibtex_end_rule = token(lexer.OPERATOR, '}')
+latex:embed(lex, bibtex_start_rule, bibtex_end_rule)
+
+return lex
diff --git a/lua/lexers/boo.lua b/lua/lexers/boo.lua
index 01d989e..5b6a6e4 100644
--- a/lua/lexers/boo.lua
+++ b/lua/lexers/boo.lua
@@ -1,81 +1,64 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- Boo LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'boo'}
+local lex = lexer.new('boo')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local line_comment = '#' * l.nonnewline_esc^0
-local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1
-local comment = token(l.COMMENT, line_comment + block_comment)
-
--- Strings.
-local sq_str = l.delimited_range("'", true)
-local dq_str = l.delimited_range('"', true)
-local triple_dq_str = '"""' * (l.any - '"""')^0 * P('"""')^-1
-local regex_str = #('/') * l.last_char_includes('!%^&*([{-=+|:;,?<>~') *
- l.delimited_range('/', true)
-local string = token(l.STRING, triple_dq_str + sq_str + dq_str) +
- token(l.REGEX, regex_str)
-
-
--- Numbers.
-local number = token(l.NUMBER, (l.float + l.integer) *
- (S('msdhsfFlL') + 'ms')^-1)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- 'and', 'break', 'cast', 'continue', 'elif', 'else', 'ensure', 'except', 'for',
- 'given', 'goto', 'if', 'in', 'isa', 'is', 'not', 'or', 'otherwise', 'pass',
- 'raise', 'ref', 'try', 'unless', 'when', 'while',
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'and', 'break', 'cast', 'continue', 'elif', 'else', 'ensure', 'except', 'for', 'given', 'goto',
+ 'if', 'in', 'isa', 'is', 'not', 'or', 'otherwise', 'pass', 'raise', 'ref', 'try', 'unless',
+ 'when', 'while',
-- Definitions.
- 'abstract', 'callable', 'class', 'constructor', 'def', 'destructor', 'do',
- 'enum', 'event', 'final', 'get', 'interface', 'internal', 'of', 'override',
- 'partial', 'private', 'protected', 'public', 'return', 'set', 'static',
- 'struct', 'transient', 'virtual', 'yield',
+ 'abstract', 'callable', 'class', 'constructor', 'def', 'destructor', 'do', 'enum', 'event',
+ 'final', 'get', 'interface', 'internal', 'of', 'override', 'partial', 'private', 'protected',
+ 'public', 'return', 'set', 'static', 'struct', 'transient', 'virtual', 'yield',
-- Namespaces.
'as', 'from', 'import', 'namespace',
-- Other.
'self', 'super', 'null', 'true', 'false'
-})
+}))
-- Types.
-local type = token(l.TYPE, word_match{
- 'bool', 'byte', 'char', 'date', 'decimal', 'double', 'duck', 'float', 'int',
- 'long', 'object', 'operator', 'regex', 'sbyte', 'short', 'single', 'string',
- 'timespan', 'uint', 'ulong', 'ushort'
-})
+lex:add_rule('type', token(lexer.TYPE, word_match{
+ 'bool', 'byte', 'char', 'date', 'decimal', 'double', 'duck', 'float', 'int', 'long', 'object',
+ 'operator', 'regex', 'sbyte', 'short', 'single', 'string', 'timespan', 'uint', 'ulong', 'ushort'
+}))
-- Functions.
-local func = token(l.FUNCTION, word_match{
- 'array', 'assert', 'checked', 'enumerate', '__eval__', 'filter', 'getter',
- 'len', 'lock', 'map', 'matrix', 'max', 'min', 'normalArrayIndexing', 'print',
- 'property', 'range', 'rawArrayIndexing', 'required', '__switch__', 'typeof',
- 'unchecked', 'using', 'yieldAll', 'zip'
-})
+lex:add_rule('function', token(lexer.FUNCTION, word_match{
+ 'array', 'assert', 'checked', 'enumerate', '__eval__', 'filter', 'getter', 'len', 'lock', 'map',
+ 'matrix', 'max', 'min', 'normalArrayIndexing', 'print', 'property', 'range', 'rawArrayIndexing',
+ 'required', '__switch__', 'typeof', 'unchecked', 'using', 'yieldAll', 'zip'
+}))
-- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
--- Operators.
-local operator = token(l.OPERATOR, S('!%^&*()[]{}-=+/|:;.,?<>~`'))
+-- Strings.
+local sq_str = lexer.range("'", true)
+local dq_str = lexer.range('"', true)
+local tq_str = lexer.range('"""')
+local string = token(lexer.STRING, tq_str + sq_str + dq_str)
+local regex_str = #P('/') * lexer.last_char_includes('!%^&*([{-=+|:;,?<>~') * lexer.range('/', true)
+local regex = token(lexer.REGEX, regex_str)
+lex:add_rule('string', string + regex)
+
+-- Comments.
+local line_comment = lexer.to_eol('#', true)
+local block_comment = lexer.range('/*', '*/')
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'type', type},
- {'function', func},
- {'identifier', identifier},
- {'string', string},
- {'comment', comment},
- {'number', number},
- {'operator', operator},
-}
+-- Numbers.
+lex:add_rule('number', token(lexer.NUMBER, lexer.number * (S('msdhsfFlL') + 'ms')^-1))
+
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S('!%^&*()[]{}-=+/|:;.,?<>~`')))
-return M
+return lex
diff --git a/lua/lexers/caml.lua b/lua/lexers/caml.lua
index 8d2bfa6..fe70689 100644
--- a/lua/lexers/caml.lua
+++ b/lua/lexers/caml.lua
@@ -1,83 +1,63 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- OCaml LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'caml'}
+local lex = lexer.new('caml')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local comment = token(l.COMMENT, l.nested_pair('(*', '*)'))
-
--- Strings.
-local sq_str = l.delimited_range("'", true)
-local dq_str = l.delimited_range('"', true)
-local string = token(l.STRING, sq_str + dq_str)
-
--- Numbers.
-local number = token(l.NUMBER, l.float + l.integer)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- 'and', 'as', 'asr', 'begin', 'class', 'closed', 'constraint', 'do', 'done',
- 'downto', 'else', 'end', 'exception', 'external', 'failwith', 'false',
- 'flush', 'for', 'fun', 'function', 'functor', 'if', 'in', 'include',
- 'inherit', 'incr', 'land', 'let', 'load', 'los', 'lsl', 'lsr', 'lxor',
- 'match', 'method', 'mod', 'module', 'mutable', 'new', 'not', 'of', 'open',
- 'option', 'or', 'parser', 'private', 'ref', 'rec', 'raise', 'regexp', 'sig',
- 'struct', 'stdout', 'stdin', 'stderr', 'then', 'to', 'true', 'try', 'type',
- 'val', 'virtual', 'when', 'while', 'with'
-})
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'and', 'as', 'asr', 'begin', 'class', 'closed', 'constraint', 'do', 'done', 'downto', 'else',
+ 'end', 'exception', 'external', 'failwith', 'false', 'flush', 'for', 'fun', 'function', 'functor',
+ 'if', 'in', 'include', 'incr', 'inherit', 'land', 'let', 'load', 'los', 'lsl', 'lsr', 'lxor',
+ 'match', 'method', 'mod', 'module', 'mutable', 'new', 'not', 'of', 'open', 'option', 'or',
+ 'parser', 'private', 'raise', 'rec', 'ref', 'regexp', 'sig', 'stderr', 'stdin', 'stdout',
+ 'struct', 'then', 'to', 'true', 'try', 'type', 'val', 'virtual', 'when', 'while', 'with'
+}))
-- Types.
-local type = token(l.TYPE, word_match{
- 'int', 'float', 'bool', 'char', 'string', 'unit'
-})
+lex:add_rule('type', token(lexer.TYPE, word_match('bool char float int string unit')))
-- Functions.
-local func = token(l.FUNCTION, word_match{
- 'raise', 'invalid_arg', 'failwith', 'compare', 'min', 'max', 'succ', 'pred',
- 'mod', 'abs', 'max_int', 'min_int', 'sqrt', 'exp', 'log', 'log10', 'cos',
- 'sin', 'tan', 'acos', 'asin', 'atan', 'atan2', 'cosh', 'sinh', 'tanh', 'ceil',
- 'floor', 'abs_float', 'mod_float', 'frexp', 'ldexp', 'modf', 'float',
- 'float_of_int', 'truncate', 'int_of_float', 'infinity', 'nan', 'max_float',
- 'min_float', 'epsilon_float', 'classify_float', 'int_of_char', 'char_of_int',
- 'ignore', 'string_of_bool', 'bool_of_string', 'string_of_int',
- 'int_of_string', 'string_of_float', 'float_of_string', 'fst', 'snd', 'stdin',
- 'stdout', 'stderr', 'print_char', 'print_string', 'print_int', 'print_float',
- 'print_endline', 'print_newline', 'prerr_char', 'prerr_string', 'prerr_int',
- 'prerr_float', 'prerr_endline', 'prerr_newline', 'read_line', 'read_int',
- 'read_float', 'open_out', 'open_out_bin', 'open_out_gen', 'flush',
- 'flush_all', 'output_char', 'output_string', 'output', 'output_byte',
- 'output_binary_int', 'output_value', 'seek_out', 'pos_out',
- 'out_channel_length', 'close_out', 'close_out_noerr', 'set_binary_mode_out',
- 'open_in', 'open_in_bin', 'open_in_gen', 'input_char', 'input_line', 'input',
- 'really_input', 'input_byte', 'input_binary_int', 'input_value', 'seek_in',
- 'pos_in', 'in_channel_length', 'close_in', 'close_in_noerr',
- 'set_binary_mode_in', 'incr', 'decr', 'string_of_format', 'format_of_string',
- 'exit', 'at_exit'
-})
+lex:add_rule('function', token(lexer.FUNCTION, word_match{
+ 'abs', 'abs_float', 'acos', 'asin', 'atan', 'atan2', 'at_exit', 'bool_of_string', 'ceil',
+ 'char_of_int', 'classify_float', 'close_in', 'close_in_noerr', 'close_out', 'close_out_noerr',
+ 'compare', 'cos', 'cosh', 'decr', 'epsilon_float', 'exit', 'exp', 'failwith', 'float',
+ 'float_of_int', 'float_of_string', 'floor', 'flush', 'flush_all', 'format_of_string', 'frexp',
+ 'fst', 'ignore', 'in_channel_length', 'incr', 'infinity', 'input', 'input_binary_int',
+ 'input_byte', 'input_char', 'input_line', 'input_value', 'int_of_char', 'int_of_float',
+ 'int_of_string', 'invalid_arg', 'ldexp', 'log', 'log10', 'max', 'max_float', 'max_int', 'min',
+ 'min_float', 'min_int', 'mod', 'modf', 'mod_float', 'nan', 'open_in', 'open_in_bin',
+ 'open_in_gen', 'open_out', 'open_out_bin', 'open_out_gen', 'out_channel_length', 'output',
+ 'output_binary_int', 'output_byte', 'output_char', 'output_string', 'output_value', 'pos_in',
+ 'pos_out', 'pred', 'prerr_char', 'prerr_endline', 'prerr_float', 'prerr_int', 'prerr_newline',
+ 'prerr_string', 'print_char', 'print_endline', 'print_float', 'print_int', 'print_newline',
+ 'print_string', 'raise', 'read_float', 'read_int', 'read_line', 'really_input', 'seek_in',
+ 'seek_out', 'set_binary_mode_in', 'set_binary_mode_out', 'sin', 'sinh', 'snd', 'sqrt', 'stderr',
+ 'stdin', 'stdout', 'string_of_bool', 'string_of_float', 'string_of_format', 'string_of_int',
+ 'succ', 'tan', 'tanh', 'truncate'
+}))
-- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
--- Operators.
-local operator = token(l.OPERATOR, S('=<>+-*/.,:;~!#%^&|?[](){}'))
+-- Strings.
+local sq_str = lexer.range("'", true)
+local dq_str = lexer.range('"', true)
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
+
+-- Comments.
+lex:add_rule('comment', token(lexer.COMMENT, lexer.range('(*', '*)', false, false, true)))
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'type', type},
- {'function', func},
- {'identifier', identifier},
- {'string', string},
- {'comment', comment},
- {'number', number},
- {'operator', operator},
-}
+-- Numbers.
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
+
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S('=<>+-*/.,:;~!#%^&|?[](){}')))
-return M
+return lex
diff --git a/lua/lexers/chuck.lua b/lua/lexers/chuck.lua
index 0a2aa30..efd7a73 100644
--- a/lua/lexers/chuck.lua
+++ b/lua/lexers/chuck.lua
@@ -1,92 +1,68 @@
--- Copyright 2010-2017 Martin Morawetz. See LICENSE.
+-- Copyright 2010-2022 Martin Morawetz. See LICENSE.
-- ChucK LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'chuck'}
+local lex = lexer.new('chuck')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local line_comment = '//' * l.nonnewline_esc^0
-local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1
-local comment = token(l.COMMENT, line_comment + block_comment)
-
--- Strings.
-local sq_str = P('L')^-1 * l.delimited_range("'", true)
-local dq_str = P('L')^-1 * l.delimited_range('"', true)
-local string = token(l.STRING, sq_str + dq_str)
-
--- Numbers.
-local number = token(l.NUMBER, l.float + l.integer)
-
--- Constants.
-local constant = token(l.CONSTANT, word_match{
- -- special values
- 'false', 'maybe', 'me', 'null', 'NULL', 'pi', 'true'
-})
-
--- Special special value.
-local now = token('now', P('now'))
-
--- Times.
-local time = token('time', word_match{
- 'samp', 'ms', 'second', 'minute', 'hour', 'day', 'week'
-})
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match{
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
-- Control structures.
- 'break', 'continue', 'else', 'for', 'if', 'repeat', 'return', 'switch',
- 'until', 'while',
+ 'break', 'continue', 'else', 'for', 'if', 'repeat', 'return', 'switch', 'until', 'while',
-- Other chuck keywords.
'function', 'fun', 'spork', 'const', 'new'
-})
+}))
--- Classes.
-local class = token(l.CLASS, word_match{
- -- Class keywords.
- 'class', 'extends', 'implements', 'interface', 'private', 'protected',
- 'public', 'pure', 'super', 'static', 'this'
-})
+-- Constants.
+lex:add_rule('constant', token(lexer.CONSTANT, word_match{
+ -- Special values.
+ 'false', 'maybe', 'me', 'null', 'NULL', 'pi', 'true'
+}))
-- Types.
-local types = token(l.TYPE, word_match{
- 'float', 'int', 'time', 'dur', 'void', 'same'
-})
+lex:add_rule('type', token(lexer.TYPE, word_match('float int time dur void same')))
+
+-- Classes.
+lex:add_rule('class', token(lexer.CLASS, word_match{
+ -- Class keywords.
+ 'class', 'extends', 'implements', 'interface', 'private', 'protected', 'public', 'pure', 'static',
+ 'super', 'this'
+}))
-- Global ugens.
-local ugen = token('ugen', word_match{'dac', 'adc', 'blackhole'})
+lex:add_rule('ugen', token('ugen', word_match('dac adc blackhole')))
+lex:add_style('ugen', lexer.styles.constant)
+
+-- Times.
+lex:add_rule('time', token('time', word_match('samp ms second minute hour day week')))
+lex:add_style('time', lexer.styles.number)
+
+-- Special special value.
+lex:add_rule('now', token('now', 'now'))
+lex:add_style('now', lexer.styles.constant .. {bold = true})
+
+-- Strings.
+local sq_str = P('L')^-1 * lexer.range("'", true)
+local dq_str = P('L')^-1 * lexer.range('"', true)
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
-- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
+
+-- Comments.
+local line_comment = lexer.to_eol('//', true)
+local block_comment = lexer.range('/*', '*/')
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
+
+-- Numbers.
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-- Operators.
-local operator = token(l.OPERATOR, S('+-/*%<>!=^&|?~:;.()[]{}@'))
-
-M._rules = {
- {'whitespace', ws},
- {'string', string},
- {'keyword', keyword},
- {'constant', constant},
- {'type', types},
- {'class', class},
- {'ugen', ugen},
- {'time', time},
- {'now', now},
- {'identifier', identifier},
- {'comment', comment},
- {'number', number},
- {'operator', operator},
-}
-
-M._tokenstyles = {
- ugen = l.STYLE_CONSTANT,
- time = l.STYLE_NUMBER,
- now = l.STYLE_CONSTANT..',bold'
-}
-
-return M
+lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>!=^&|?~:;.()[]{}@')))
+
+return lex
diff --git a/lua/lexers/clojure.lua b/lua/lexers/clojure.lua
index 26bddec..270a058 100644
--- a/lua/lexers/clojure.lua
+++ b/lua/lexers/clojure.lua
@@ -1,193 +1,147 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2018-2022 Mitchell. See LICENSE.
-- Clojure LPeg lexer.
+-- Contributed by Christos Chatzifountas.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'clojure'}
+local lex = lexer.new('clojure')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local line_comment = ';' * l.nonnewline^0
-local block_comment = '#_(' * (l.any - ')')^0 * P(')')
-local comment = token(l.COMMENT, line_comment + block_comment)
-
--- Strings.
-local string = token(l.STRING, l.delimited_range('"'))
-
--- Numbers.
-local number = token(l.NUMBER, P('-')^-1 * l.digit^1 * (S('./') * l.digit^1)^-1)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match({
- 'fn','try','catch','finaly','defonce',
- 'and', 'case', 'cond', 'def', 'defn', 'defmacro',
- 'do', 'else', 'when', 'when-let', 'if-let', 'if', 'let', 'loop',
- 'or', 'recur', 'quote',
-}, '-*!'))
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'fn', 'try', 'catch', 'finaly', 'defonce', 'and', 'case', 'cond', 'def', 'defn', 'defmacro', 'do',
+ 'else', 'when', 'when-let', 'if-let', 'if', 'let', 'loop', 'or', 'recur', 'quote'
+}))
-- Functions.
-local func = token(l.FUNCTION, word_match({
- '*', '+', '-', '->ArrayChunk', '->Eduction', '->Vec',
- '->VecNode', '->VecSeq', '/', '<', '<=', '=', '==', '>', '>=',
- 'StackTraceElement->vec', 'Throwable->map', 'accessor', 'aclone',
- 'add-classpath', 'add-watch', 'agent', 'agent-error', 'agent-errors', 'aget',
- 'alength', 'alias', 'all-ns', 'alter', 'alter-meta!', 'alter-var-root',
- 'ancestors', 'any?', 'apply', 'array-map', 'aset', 'aset-boolean',
- 'aset-byte', 'aset-char', 'aset-double', 'aset-float', 'aset-int',
- 'aset-long', 'aset-short', 'assoc', 'assoc!', 'assoc-in', 'associative?',
- 'atom', 'await', 'await-for', 'bases', 'bean', 'bigdec', 'bigint',
- 'biginteger', 'bit-and', 'bit-and-not', 'bit-clear', 'bit-flip', 'bit-not',
- 'bit-or', 'bit-set', 'bit-shift-left', 'bit-shift-right', 'bit-test',
- 'bit-xor', 'boolean', 'boolean-array', 'boolean?', 'booleans', 'bound-fn*',
- 'bound?', 'bounded-count', 'butlast', 'byte', 'byte-array', 'bytes', 'bytes?',
- 'cast', 'cat', 'char', 'char-array', 'char?', 'chars', 'class', 'class?',
- 'clear-agent-errors', 'clojure-version', 'coll?', 'commute', 'comp',
- 'comparator', 'compare', 'compare-and-set!', 'compile', 'complement',
- 'completing', 'concat', 'conj', 'conj!', 'cons', 'constantly',
- 'construct-proxy', 'contains?', 'count', 'counted?', 'create-ns',
- 'create-struct', 'cycle', 'dec', 'decimal?', 'dedupe', 'delay?',
- 'deliver', 'denominator', 'deref', 'derive', 'descendants', 'disj', 'disj!',
- 'dissoc', 'dissoc!', 'distinct', 'distinct?', 'doall', 'dorun', 'double',
- 'double-array', 'double?', 'doubles', 'drop', 'drop-last', 'drop-while',
- 'eduction', 'empty', 'empty?', 'ensure', 'ensure-reduced', 'enumeration-seq',
- 'error-handler', 'error-mode', 'eval', 'even?', 'every-pred', 'every?',
- 'ex-data', 'ex-info', 'extend', 'extenders', 'extends?', 'false?', 'ffirst',
- 'file-seq', 'filter', 'filterv', 'find', 'find-keyword', 'find-ns',
- 'find-var', 'first', 'flatten', 'float', 'float-array', 'float?', 'floats',
- 'flush', 'fn?', 'fnext', 'fnil', 'force', 'format', 'frequencies',
- 'future-call', 'future-cancel', 'future-cancelled?', 'future-done?',
- 'future?', 'gensym', 'get', 'get-in', 'get-method', 'get-proxy-class',
- 'get-thread-bindings', 'get-validator', 'group-by', 'halt-when', 'hash',
- 'hash-map', 'hash-ordered-coll', 'hash-set', 'hash-unordered-coll', 'ident?',
- 'identical?', 'identity', 'ifn?', 'in-ns', 'inc', 'inc', 'indexed?',
- 'init-proxy', 'inst-ms', 'inst?', 'instance?', 'int', 'int-array', 'int?',
- 'integer?', 'interleave', 'intern', 'interpose', 'into', 'into-array', 'ints',
- 'isa?', 'iterate', 'iterator-seq', 'juxt', 'keep', 'keep-indexed', 'key',
- 'keys', 'keyword', 'keyword?', 'last', 'line-seq', 'list', 'list*', 'list?',
- 'load', 'load-file', 'load-reader', 'load-string', 'loaded-libs', 'long',
- 'long-array', 'longs', 'macroexpand', 'macroexpand-1', 'make-array',
- 'make-hierarchy', 'map', 'map-entry?', 'map-indexed', 'map?', 'mapcat',
- 'mapv', 'max', 'max-key', 'memoize', 'merge', 'merge-with', 'meta', 'methods',
- 'min', 'min-key', 'mix-collection-hash', 'mod', 'name', 'namespace',
- 'namespace-munge', 'nat-int?', 'neg-int?', 'neg?', 'newline', 'next',
- 'nfirst', 'nil?', 'nnext', 'not', 'not-any?', 'not-empty', 'not-every?',
- 'not=', 'ns-aliases', 'ns-imports', 'ns-interns', 'ns-map', 'ns-name',
- 'ns-publics', 'ns-refers', 'ns-resolve', 'ns-unalias', 'ns-unmap', 'nth',
- 'nthnext', 'nthrest', 'num', 'number?', 'numerator', 'object-array', 'odd?',
- 'parents', 'partial', 'partition', 'partition-all', 'partition-by', 'pcalls',
- 'peek', 'persistent!', 'pmap', 'pop', 'pop!', 'pop-thread-bindings',
- 'pos-int?', 'pos?', 'pr-str', 'prefer-method', 'prefers', 'print',
- 'print-str', 'printf', 'println', 'println-str', 'prn', 'prn-str', 'promise',
- 'proxy-mappings', 'push-thread-bindings', 'qualified-ident?',
- 'qualified-keyword?', 'qualified-symbol?', 'quot', 'rand', 'rand-int',
- 'rand-nth', 'random-sample', 'range', 'ratio?', 'rational?', 'rationalize',
- 're-find', 're-groups', 're-matcher', 're-matches', 're-pattern', 're-seq',
- 'read', 'read-line', 'read-string', 'reader-conditional',
- 'reader-conditional?', 'realized?', 'record?', 'reduce', 'reduce-kv',
- 'reduced', 'reduced?', 'reductions', 'ref', 'ref-history-count',
- 'ref-max-history', 'ref-min-history', 'ref-set', 'refer',
- 'release-pending-sends', 'rem', 'remove', 'remove-all-methods',
- 'remove-method', 'remove-ns', 'remove-watch', 'repeat', 'repeatedly',
- 'replace', 'replicate', 'require', 'reset!', 'reset-meta!', 'reset-vals!',
- 'resolve', 'rest', 'restart-agent', 'resultset-seq', 'reverse', 'reversible?',
- 'rseq', 'rsubseq', 'run!', 'satisfies?', 'second', 'select-keys', 'send',
- 'send-off', 'send-via', 'seq', 'seq?', 'seqable?', 'seque', 'sequence',
- 'sequential?', 'set', 'set-agent-send-executor!',
- 'set-agent-send-off-executor!', 'set-error-handler!', 'set-error-mode!',
- 'set-validator!', 'set?', 'short', 'short-array', 'shorts', 'shuffle',
- 'shutdown-agents', 'simple-ident?', 'simple-keyword?', 'simple-symbol?',
- 'slurp', 'some', 'some-fn', 'some?', 'sort', 'sort-by', 'sorted-map',
- 'sorted-map-by', 'sorted-set', 'sorted-set-by', 'sorted?', 'special-symbol?',
- 'spit', 'split-at', 'split-with', 'str', 'string?', 'struct', 'struct-map',
- 'subs', 'subseq', 'subvec', 'supers', 'swap!', 'swap-vals!', 'symbol',
- 'symbol?', 'tagged-literal', 'tagged-literal?', 'take', 'take-last',
- 'take-nth', 'take-while', 'test', 'the-ns', 'thread-bound?', 'to-array',
- 'to-array-2d', 'trampoline', 'transduce', 'transient', 'tree-seq', 'true?',
- 'type', 'unchecked-add', 'unchecked-add-int', 'unchecked-byte',
- 'unchecked-char', 'unchecked-dec', 'unchecked-dec-int',
- 'unchecked-divide-int', 'unchecked-double', 'unchecked-float',
- 'unchecked-inc', 'unchecked-inc-int', 'unchecked-int', 'unchecked-long',
- 'unchecked-multiply', 'unchecked-multiply-int', 'unchecked-negate',
- 'unchecked-negate-int', 'unchecked-remainder-int', 'unchecked-short',
- 'unchecked-subtract', 'unchecked-subtract-int', 'underive', 'unreduced',
- 'unsigned-bit-shift-right', 'update', 'update-in', 'update-proxy', 'uri?',
- 'use', 'uuid?', 'val', 'vals', 'var-get', 'var-set', 'var?', 'vary-meta',
- 'vec', 'vector', 'vector-of', 'vector?', 'volatile!', 'volatile?', 'vreset!',
- 'with-bindings*', 'with-meta', 'with-redefs-fn', 'xml-seq', 'zero?', 'zipmap',
- 'diff-similar', 'equality-partition', 'diff', 'inspect', 'inspect-table',
- 'inspect-tree', '', 'validated', 'browse-url', 'as-file', 'as-url',
- 'make-input-stream', 'make-output-stream', 'make-reader', 'make-writer',
- 'as-relative-path', 'copy', 'delete-file', 'file', 'input-stream',
- 'make-parents', 'output-stream', 'reader', 'resource', 'writer',
- 'add-local-javadoc', 'add-remote-javadoc', 'javadoc', 'sh', 'demunge',
- 'load-script', 'main', 'repl', 'repl-caught', 'repl-exception', 'repl-prompt',
- 'repl-read', 'root-cause', 'skip-if-eol', 'skip-whitespace',
- 'stack-element-str', 'cl-format', 'fresh-line', 'get-pretty-writer', 'pprint',
- 'pprint-indent', 'pprint-newline', 'pprint-tab', 'print-table',
- 'set-pprint-dispatch', 'write', 'write-out', 'resolve-class', 'do-reflect',
- 'typename', '->AsmReflector', '->Constructor', '->Field', '->JavaReflector',
- '->Method', 'map->Constructor', 'map->Field', 'map->Method', 'reflect',
- 'type-reflect', 'apropos', 'dir-fn', 'find-doc', 'pst', 'set-break-handler!',
- 'source-fn', 'thread-stopper', 'difference', 'index', 'intersection', 'join',
- 'map-invert', 'project', 'rename', 'rename-keys', 'select', 'subset?',
- 'superset?', 'union', 'e', 'print-cause-trace', 'print-stack-trace',
- 'print-throwable', 'print-trace-element', 'blank?', 'capitalize',
- 'ends-with?', 'escape', 'includes?', 'index-of', 'last-index-of',
- 'lower-case', 're-quote-replacement', 'replace-first', 'split', 'split-lines',
- 'starts-with?', 'trim', 'trim-newline', 'triml', 'trimr', 'upper-case',
- 'apply-template', 'assert-any', 'assert-predicate', 'compose-fixtures',
- 'do-report', 'file-position', 'function?', 'get-possibly-unbound-var',
- 'inc-report-counter', 'join-fixtures', 'run-all-tests', 'run-tests',
- 'successful?', 'test-all-vars', 'test-ns', 'test-vars',
- 'testing-contexts-str', 'testing-vars-str', 'keywordize-keys',
- 'macroexpand-all', 'postwalk', 'postwalk-demo', 'postwalk-replace', 'prewalk',
- 'prewalk-demo', 'prewalk-replace', 'stringify-keys', 'walk', 'append-child',
- 'branch?', 'children', 'down', 'edit', 'end?', 'insert-child', 'insert-left',
- 'insert-right', 'left', 'leftmost', 'lefts', 'make-node', 'node', 'path',
- 'prev', 'right', 'rightmost', 'rights', 'root', 'seq-zip', 'up', 'vector-zip',
- 'xml-zip', 'zipper'
-}, '-/<>!?=#\''))
-
--- Identifiers.
-local word = (l.alpha + S('-!?*$=-')) * (l.alnum + S('.-!?*$+-'))^0
-local identifier = token(l.IDENTIFIER, word)
+lex:add_rule('function', token(lexer.FUNCTION, word_match{
+ '*', '+', '-', '->ArrayChunk', '->Eduction', '->Vec', '->VecNode', '->VecSeq', '/', '<', '<=',
+ '=', '==', '>', '>=', 'StackTraceElement->vec', 'Throwable->map', 'accessor', 'aclone',
+ 'add-classpath', 'add-watch', 'agent', 'agent-error', 'agent-errors', 'aget', 'alength', 'alias',
+ 'all-ns', 'alter', 'alter-meta!', 'alter-var-root', 'ancestors', 'any?', 'apply', 'array-map',
+ 'aset', 'aset-boolean', 'aset-byte', 'aset-char', 'aset-double', 'aset-float', 'aset-int',
+ 'aset-long', 'aset-short', 'assoc', 'assoc!', 'assoc-in', 'associative?', 'atom', 'await',
+ 'await-for', 'bases', 'bean', 'bigdec', 'bigint', 'biginteger', 'bit-and', 'bit-and-not',
+ 'bit-clear', 'bit-flip', 'bit-not', 'bit-or', 'bit-set', 'bit-shift-left', 'bit-shift-right',
+ 'bit-test', 'bit-xor', 'boolean', 'boolean-array', 'boolean?', 'booleans', 'bound-fn*', 'bound?',
+ 'bounded-count', 'butlast', 'byte', 'byte-array', 'bytes', 'bytes?', 'cast', 'cat', 'char',
+ 'char-array', 'char?', 'chars', 'class', 'class?', 'clear-agent-errors', 'clojure-version',
+ 'coll?', 'commute', 'comp', 'comparator', 'compare', 'compare-and-set!', 'compile', 'complement',
+ 'completing', 'concat', 'conj', 'conj!', 'cons', 'constantly', 'construct-proxy', 'contains?',
+ 'count', 'counted?', 'create-ns', 'create-struct', 'cycle', 'dec', 'decimal?', 'dedupe', 'delay?',
+ 'deliver', 'denominator', 'deref', 'derive', 'descendants', 'disj', 'disj!', 'dissoc', 'dissoc!',
+ 'distinct', 'distinct?', 'doall', 'dorun', 'double', 'double-array', 'double?', 'doubles', 'drop',
+ 'drop-last', 'drop-while', 'eduction', 'empty', 'empty?', 'ensure', 'ensure-reduced',
+ 'enumeration-seq', 'error-handler', 'error-mode', 'eval', 'even?', 'every-pred', 'every?',
+ 'ex-data', 'ex-info', 'extend', 'extenders', 'extends?', 'false?', 'ffirst', 'file-seq', 'filter',
+ 'filterv', 'find', 'find-keyword', 'find-ns', 'find-var', 'first', 'flatten', 'float',
+ 'float-array', 'float?', 'floats', 'flush', 'fn?', 'fnext', 'fnil', 'force', 'format',
+ 'frequencies', 'future-call', 'future-cancel', 'future-cancelled?', 'future-done?', 'future?',
+ 'gensym', 'get', 'get-in', 'get-method', 'get-proxy-class', 'get-thread-bindings',
+ 'get-validator', 'group-by', 'halt-when', 'hash', 'hash-map', 'hash-ordered-coll', 'hash-set',
+ 'hash-unordered-coll', 'ident?', 'identical?', 'identity', 'ifn?', 'in-ns', 'inc', 'inc',
+ 'indexed?', 'init-proxy', 'inst-ms', 'inst?', 'instance?', 'int', 'int-array', 'int?', 'integer?',
+ 'interleave', 'intern', 'interpose', 'into', 'into-array', 'ints', 'isa?', 'iterate',
+ 'iterator-seq', 'juxt', 'keep', 'keep-indexed', 'key', 'keys', 'keyword', 'keyword?', 'last',
+ 'line-seq', 'list', 'list*', 'list?', 'load', 'load-file', 'load-reader', 'load-string',
+ 'loaded-libs', 'long', 'long-array', 'longs', 'macroexpand', 'macroexpand-1', 'make-array',
+ 'make-hierarchy', 'map', 'map-entry?', 'map-indexed', 'map?', 'mapcat', 'mapv', 'max', 'max-key',
+ 'memoize', 'merge', 'merge-with', 'meta', 'methods', 'min', 'min-key', 'mix-collection-hash',
+ 'mod', 'name', 'namespace', 'namespace-munge', 'nat-int?', 'neg-int?', 'neg?', 'newline', 'next',
+ 'nfirst', 'nil?', 'nnext', 'not', 'not-any?', 'not-empty', 'not-every?', 'not=', 'ns-aliases',
+ 'ns-imports', 'ns-interns', 'ns-map', 'ns-name', 'ns-publics', 'ns-refers', 'ns-resolve',
+ 'ns-unalias', 'ns-unmap', 'nth', 'nthnext', 'nthrest', 'num', 'number?', 'numerator',
+ 'object-array', 'odd?', 'parents', 'partial', 'partition', 'partition-all', 'partition-by',
+ 'pcalls', 'peek', 'persistent!', 'pmap', 'pop', 'pop!', 'pop-thread-bindings', 'pos-int?', 'pos?',
+ 'pr-str', 'prefer-method', 'prefers', 'print', 'print-str', 'printf', 'println', 'println-str',
+ 'prn', 'prn-str', 'promise', 'proxy-mappings', 'push-thread-bindings', 'qualified-ident?',
+ 'qualified-keyword?', 'qualified-symbol?', 'quot', 'rand', 'rand-int', 'rand-nth',
+ 'random-sample', 'range', 'ratio?', 'rational?', 'rationalize', 're-find', 're-groups',
+ 're-matcher', 're-matches', 're-pattern', 're-seq', 'read', 'read-line', 'read-string',
+ 'reader-conditional', 'reader-conditional?', 'realized?', 'record?', 'reduce', 'reduce-kv',
+ 'reduced', 'reduced?', 'reductions', 'ref', 'ref-history-count', 'ref-max-history',
+ 'ref-min-history', 'ref-set', 'refer', 'release-pending-sends', 'rem', 'remove',
+ 'remove-all-methods', 'remove-method', 'remove-ns', 'remove-watch', 'repeat', 'repeatedly',
+ 'replace', 'replicate', 'require', 'reset!', 'reset-meta!', 'reset-vals!', 'resolve', 'rest',
+ 'restart-agent', 'resultset-seq', 'reverse', 'reversible?', 'rseq', 'rsubseq', 'run!',
+ 'satisfies?', 'second', 'select-keys', 'send', 'send-off', 'send-via', 'seq', 'seq?', 'seqable?',
+ 'seque', 'sequence', 'sequential?', 'set', 'set-agent-send-executor!',
+ 'set-agent-send-off-executor!', 'set-error-handler!', 'set-error-mode!', 'set-validator!', 'set?',
+ 'short', 'short-array', 'shorts', 'shuffle', 'shutdown-agents', 'simple-ident?',
+ 'simple-keyword?', 'simple-symbol?', 'slurp', 'some', 'some-fn', 'some?', 'sort', 'sort-by',
+ 'sorted-map', 'sorted-map-by', 'sorted-set', 'sorted-set-by', 'sorted?', 'special-symbol?',
+ 'spit', 'split-at', 'split-with', 'str', 'string?', 'struct', 'struct-map', 'subs', 'subseq',
+ 'subvec', 'supers', 'swap!', 'swap-vals!', 'symbol', 'symbol?', 'tagged-literal',
+ 'tagged-literal?', 'take', 'take-last', 'take-nth', 'take-while', 'test', 'the-ns',
+ 'thread-bound?', 'to-array', 'to-array-2d', 'trampoline', 'transduce', 'transient', 'tree-seq',
+ 'true?', 'type', 'unchecked-add', 'unchecked-add-int', 'unchecked-byte', 'unchecked-char',
+ 'unchecked-dec', 'unchecked-dec-int', 'unchecked-divide-int', 'unchecked-double',
+ 'unchecked-float', 'unchecked-inc', 'unchecked-inc-int', 'unchecked-int', 'unchecked-long',
+ 'unchecked-multiply', 'unchecked-multiply-int', 'unchecked-negate', 'unchecked-negate-int',
+ 'unchecked-remainder-int', 'unchecked-short', 'unchecked-subtract', 'unchecked-subtract-int',
+ 'underive', 'unreduced', 'unsigned-bit-shift-right', 'update', 'update-in', 'update-proxy',
+ 'uri?', 'use', 'uuid?', 'val', 'vals', 'var-get', 'var-set', 'var?', 'vary-meta', 'vec', 'vector',
+ 'vector-of', 'vector?', 'volatile!', 'volatile?', 'vreset!', 'with-bindings*', 'with-meta',
+ 'with-redefs-fn', 'xml-seq', 'zero?', 'zipmap', 'diff-similar', 'equality-partition', 'diff',
+ 'inspect', 'inspect-table', 'inspect-tree', 'validated', 'browse-url', 'as-file', 'as-url',
+ 'make-input-stream', 'make-output-stream', 'make-reader', 'make-writer', 'as-relative-path',
+ 'copy', 'delete-file', 'file', 'input-stream', 'make-parents', 'output-stream', 'reader',
+ 'resource', 'writer', 'add-local-javadoc', 'add-remote-javadoc', 'javadoc', 'sh', 'demunge',
+ 'load-script', 'main', 'repl', 'repl-caught', 'repl-exception', 'repl-prompt', 'repl-read',
+ 'root-cause', 'skip-if-eol', 'skip-whitespace', 'stack-element-str', 'cl-format', 'fresh-line',
+ 'get-pretty-writer', 'pprint', 'pprint-indent', 'pprint-newline', 'pprint-tab', 'print-table',
+ 'set-pprint-dispatch', 'write', 'write-out', 'resolve-class', 'do-reflect', 'typename',
+ '->AsmReflector', '->Constructor', '->Field', '->JavaReflector', '->Method', 'map->Constructor',
+ 'map->Field', 'map->Method', 'reflect', 'type-reflect', 'apropos', 'dir-fn', 'find-doc', 'pst',
+ 'set-break-handler!', 'source-fn', 'thread-stopper', 'difference', 'index', 'intersection',
+ 'join', 'map-invert', 'project', 'rename', 'rename-keys', 'select', 'subset?', 'superset?',
+ 'union', 'e', 'print-cause-trace', 'print-stack-trace', 'print-throwable', 'print-trace-element',
+ 'blank?', 'capitalize', 'ends-with?', 'escape', 'includes?', 'index-of', 'last-index-of',
+ 'lower-case', 're-quote-replacement', 'replace-first', 'split', 'split-lines', 'starts-with?',
+ 'trim', 'trim-newline', 'triml', 'trimr', 'upper-case', 'apply-template', 'assert-any',
+ 'assert-predicate', 'compose-fixtures', 'do-report', 'file-position', 'function?',
+ 'get-possibly-unbound-var', 'inc-report-counter', 'join-fixtures', 'run-all-tests', 'run-tests',
+ 'successful?', 'test-all-vars', 'test-ns', 'test-vars', 'testing-contexts-str',
+ 'testing-vars-str', 'keywordize-keys', 'macroexpand-all', 'postwalk', 'postwalk-demo',
+ 'postwalk-replace', 'prewalk', 'prewalk-demo', 'prewalk-replace', 'stringify-keys', 'walk',
+ 'append-child', 'branch?', 'children', 'down', 'edit', 'end?', 'insert-child', 'insert-left',
+ 'insert-right', 'left', 'leftmost', 'lefts', 'make-node', 'node', 'path', 'prev', 'right',
+ 'rightmost', 'rights', 'root', 'seq-zip', 'up', 'vector-zip', 'xml-zip', 'zipper'
+}))
--- Operators.
-local operator = token(l.OPERATOR, S('`@()'))
-
--- Clojure keywords
-local clojure_keyword = token('clojure_keyword', ':' * S(':')^-1 * word * ('/' * word )^-1)
-local clojure_symbol = token('clojure_symbol', "\'" * word * ('/' * word )^-1 )
-
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'func', func},
- {'identifier', identifier},
- {'string', string},
- {'comment', comment},
- {'number', number},
- {'operator', operator},
- {"clojure_keyword", clojure_keyword},
- {"clojure_symbol", clojure_symbol}
-}
+-- Numbers.
+lex:add_rule('number', token(lexer.NUMBER, P('-')^-1 * lexer.digit^1 * (S('./') * lexer.digit^1)^-1))
+-- Identifiers.
+local word = (lexer.alpha + S('-!?*$=-')) * (lexer.alnum + S('.-!?*$+-'))^0
+lex:add_rule('identifier', token(lexer.IDENTIFIER, word))
-M._tokenstyles = {
- clojure_keyword = l.STYLE_TYPE,
- clojure_symbol = l.STYLE_TYPE..',bold',
-}
+-- Strings.
+lex:add_rule('string', token(lexer.STRING, lexer.range('"')))
-M._foldsymbols = {
- _patterns = {'[%(%)%[%]{}]', ';'},
- [l.OPERATOR] = {
- ['('] = 1, [')'] = -1, ['['] = 1, [']'] = -1, ['{'] = 1, ['}'] = -1
- },
- [l.COMMENT] = {['#_('] = 1, [';'] = l.fold_line_comments(';')}
-}
+-- Comments.
+local line_comment = lexer.to_eol(';')
+local block_comment = lexer.range('#_(', ')')
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-return M
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S('`@()')))
+
+-- Clojure keywords.
+lex:add_rule('clojure_keyword', token('clojure_keyword', ':' * S(':')^-1 * word * ('/' * word)^-1))
+lex:add_style('clojure_keyword', lexer.styles.type)
+lex:add_rule('clojure_symbol', token('clojure_symbol', "\'" * word * ('/' * word)^-1))
+lex:add_style('clojure_symbol', lexer.styles.type .. {bold = true})
+
+-- Fold points.
+lex:add_fold_point(lexer.COMMENT, '#_(', ')')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines(';'))
+lex:add_fold_point(lexer.OPERATOR, '(', ')')
+lex:add_fold_point(lexer.OPERATOR, '[', ']')
+lex:add_fold_point(lexer.OPERATOR, '{', '}')
+
+return lex
diff --git a/lua/lexers/cmake.lua b/lua/lexers/cmake.lua
index 53c5112..8fcafeb 100644
--- a/lua/lexers/cmake.lua
+++ b/lua/lexers/cmake.lua
@@ -1,173 +1,132 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- CMake LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'cmake'}
+local lex = lexer.new('cmake')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local comment = token(l.COMMENT, '#' * l.nonnewline^0)
-
--- Strings.
-local string = token(l.STRING, l.delimited_range('"'))
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match({
- 'IF', 'ENDIF', 'FOREACH', 'ENDFOREACH', 'WHILE', 'ENDWHILE', 'ELSE', 'ELSEIF'
-}, nil, true))
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match(
+ 'IF ENDIF FOREACH ENDFOREACH WHILE ENDWHILE ELSE ELSEIF', true)))
-- Commands.
-local command = token(l.FUNCTION, word_match({
- 'ADD_CUSTOM_COMMAND', 'ADD_CUSTOM_TARGET', 'ADD_DEFINITIONS',
- 'ADD_DEPENDENCIES', 'ADD_EXECUTABLE', 'ADD_LIBRARY', 'ADD_SUBDIRECTORY',
- 'ADD_TEST', 'AUX_SOURCE_DIRECTORY', 'BUILD_COMMAND', 'BUILD_NAME',
- 'CMAKE_MINIMUM_REQUIRED', 'CONFIGURE_FILE', 'CREATE_TEST_SOURCELIST',
- 'ENABLE_LANGUAGE', 'ENABLE_TESTING', 'ENDMACRO', 'EXECUTE_PROCESS',
- 'EXEC_PROGRAM', 'EXPORT_LIBRARY_DEPENDENCIES', 'FILE', 'FIND_FILE',
- 'FIND_LIBRARY', 'FIND_PACKAGE', 'FIND_PATH', 'FIND_PROGRAM', 'FLTK_WRAP_UI',
- 'GET_CMAKE_PROPERTY', 'GET_DIRECTORY_PROPERTY', 'GET_FILENAME_COMPONENT',
- 'GET_SOURCE_FILE_PROPERTY', 'GET_TARGET_PROPERTY', 'GET_TEST_PROPERTY',
- 'INCLUDE', 'INCLUDE_DIRECTORIES', 'INCLUDE_EXTERNAL_MSPROJECT',
- 'INCLUDE_REGULAR_EXPRESSION', 'INSTALL', 'INSTALL_FILES', 'INSTALL_PROGRAMS',
- 'INSTALL_TARGETS', 'LINK_DIRECTORIES', 'LINK_LIBRARIES', 'LIST', 'LOAD_CACHE',
- 'LOAD_COMMAND', 'MACRO', 'MAKE_DIRECTORY', 'MARK_AS_ADVANCED', 'MATH',
- 'MESSAGE', 'OPTION', 'OUTPUT_REQUIRED_FILES', 'PROJECT', 'QT_WRAP_CPP',
- 'QT_WRAP_UI', 'REMOVE', 'REMOVE_DEFINITIONS', 'SEPARATE_ARGUMENTS', 'SET',
- 'SET_DIRECTORY_PROPERTIES', 'SET_SOURCE_FILES_PROPERTIES',
- 'SET_TARGET_PROPERTIES', 'SET_TESTS_PROPERTIES', 'SITE_NAME', 'SOURCE_GROUP',
- 'STRING', 'SUBDIRS', 'SUBDIR_DEPENDS', 'TARGET_LINK_LIBRARIES', 'TRY_COMPILE',
- 'TRY_RUN', 'USE_MANGLED_MESA', 'UTILITY_SOURCE', 'VARIABLE_REQUIRES',
- 'VTK_MAKE_INSTANTIATOR', 'VTK_WRAP_JAVA', 'VTK_WRAP_PYTHON', 'VTK_WRAP_TCL',
- 'WRITE_FILE',
-}, nil, true))
+lex:add_rule('command', token(lexer.FUNCTION, word_match({
+ 'ADD_CUSTOM_COMMAND', 'ADD_CUSTOM_TARGET', 'ADD_DEFINITIONS', 'ADD_DEPENDENCIES',
+ 'ADD_EXECUTABLE', 'ADD_LIBRARY', 'ADD_SUBDIRECTORY', 'ADD_TEST', 'AUX_SOURCE_DIRECTORY',
+ 'BUILD_COMMAND', 'BUILD_NAME', 'CMAKE_MINIMUM_REQUIRED', 'CONFIGURE_FILE',
+ 'CREATE_TEST_SOURCELIST', 'ENABLE_LANGUAGE', 'ENABLE_TESTING', 'ENDMACRO', 'EXEC_PROGRAM',
+ 'EXECUTE_PROCESS', 'EXPORT_LIBRARY_DEPENDENCIES', 'FILE', 'FIND_FILE', 'FIND_LIBRARY',
+ 'FIND_PACKAGE', 'FIND_PATH', 'FIND_PROGRAM', 'FLTK_WRAP_UI', 'GET_CMAKE_PROPERTY',
+ 'GET_DIRECTORY_PROPERTY', 'GET_FILENAME_COMPONENT', 'GET_SOURCE_FILE_PROPERTY',
+ 'GET_TARGET_PROPERTY', 'GET_TEST_PROPERTY', 'INCLUDE', 'INCLUDE_DIRECTORIES',
+ 'INCLUDE_EXTERNAL_MSPROJECT', 'INCLUDE_REGULAR_EXPRESSION', 'INSTALL', 'INSTALL_FILES',
+ 'INSTALL_PROGRAMS', 'INSTALL_TARGETS', 'LINK_DIRECTORIES', 'LINK_LIBRARIES', 'LIST', 'LOAD_CACHE',
+ 'LOAD_COMMAND', 'MACRO', 'MAKE_DIRECTORY', 'MARK_AS_ADVANCED', 'MATH', 'MESSAGE', 'OPTION',
+ 'OUTPUT_REQUIRED_FILES', 'PROJECT', 'QT_WRAP_CPP', 'QT_WRAP_UI', 'REMOVE', 'REMOVE_DEFINITIONS',
+ 'SEPARATE_ARGUMENTS', 'SET', 'SET_DIRECTORY_PROPERTIES', 'SET_SOURCE_FILES_PROPERTIES',
+ 'SET_TARGET_PROPERTIES', 'SET_TESTS_PROPERTIES', 'SITE_NAME', 'SOURCE_GROUP', 'STRING',
+ 'SUBDIR_DEPENDS', 'SUBDIRS', 'TARGET_LINK_LIBRARIES', 'TRY_COMPILE', 'TRY_RUN',
+ 'USE_MANGLED_MESA', 'UTILITY_SOURCE', 'VARIABLE_REQUIRES', 'VTK_MAKE_INSTANTIATOR',
+ 'VTK_WRAP_JAVA', 'VTK_WRAP_PYTHON', 'VTK_WRAP_TCL', 'WRITE_FILE'
+}, true)))
-- Constants.
-local constant = token(l.CONSTANT, word_match({
- 'BOOL', 'CACHE', 'FALSE', 'N', 'NO', 'ON', 'OFF', 'NOTFOUND', 'TRUE'
-}, nil, true))
+lex:add_rule('constant',
+ token(lexer.CONSTANT, word_match('BOOL CACHE FALSE N NO ON OFF NOTFOUND TRUE', true)))
-- Variables.
-local variable = token(l.VARIABLE, word_match{
- 'APPLE', 'BORLAND', 'CMAKE_AR', 'CMAKE_BACKWARDS_COMPATIBILITY',
- 'CMAKE_BASE_NAME', 'CMAKE_BINARY_DIR', 'CMAKE_BUILD_TOOL', 'CMAKE_BUILD_TYPE',
- 'CMAKE_CACHEFILE_DIR', 'CMAKE_CACHE_MAJOR_VERSION',
- 'CMAKE_CACHE_MINOR_VERSION', 'CMAKE_CACHE_RELEASE_VERSION',
- 'CMAKE_CFG_INTDIR', 'CMAKE_COLOR_MAKEFILE', 'CMAKE_COMMAND',
- 'CMAKE_COMPILER_IS_GNUCC', 'CMAKE_COMPILER_IS_GNUCC_RUN',
- 'CMAKE_COMPILER_IS_GNUCXX', 'CMAKE_COMPILER_IS_GNUCXX_RUN',
+lex:add_rule('variable', token(lexer.VARIABLE, word_match{
+ 'APPLE', 'ARGS', 'BORLAND', 'CMAKE_AR', 'CMAKE_BACKWARDS_COMPATIBILITY', 'CMAKE_BASE_NAME',
+ 'CMAKE_BINARY_DIR', 'CMAKE_BUILD_TOOL', 'CMAKE_BUILD_TYPE', 'CMAKE_CACHEFILE_DIR',
+ 'CMAKE_CACHE_MAJOR_VERSION', 'CMAKE_CACHE_MINOR_VERSION', 'CMAKE_CACHE_RELEASE_VERSION',
+ 'CMAKE_C_COMPILE_OBJECT', 'CMAKE_C_COMPILER', 'CMAKE_C_COMPILER_ARG1', 'CMAKE_C_COMPILER_ENV_VAR',
+ 'CMAKE_C_COMPILER_FULLPATH', 'CMAKE_C_COMPILER_LOADED', 'CMAKE_C_COMPILER_WORKS',
+ 'CMAKE_C_CREATE_SHARED_LIBRARY', 'CMAKE_C_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS',
+ 'CMAKE_C_CREATE_SHARED_MODULE', 'CMAKE_C_CREATE_STATIC_LIBRARY', 'CMAKE_CFG_INTDIR',
+ 'CMAKE_C_FLAGS', 'CMAKE_C_FLAGS_DEBUG', 'CMAKE_C_FLAGS_DEBUG_INIT', 'CMAKE_C_FLAGS_INIT',
+ 'CMAKE_C_FLAGS_MINSIZEREL', 'CMAKE_C_FLAGS_MINSIZEREL_INIT', 'CMAKE_C_FLAGS_RELEASE',
+ 'CMAKE_C_FLAGS_RELEASE_INIT', 'CMAKE_C_FLAGS_RELWITHDEBINFO', 'CMAKE_C_FLAGS_RELWITHDEBINFO_INIT',
+ 'CMAKE_C_IGNORE_EXTENSIONS', 'CMAKE_C_INFORMATION_LOADED', 'CMAKE_C_LINKER_PREFERENCE',
+ 'CMAKE_C_LINK_EXECUTABLE', 'CMAKE_C_LINK_FLAGS', 'CMAKE_COLOR_MAKEFILE', 'CMAKE_COMMAND',
+ 'CMAKE_COMPILER_IS_GNUCC', 'CMAKE_COMPILER_IS_GNUCC_RUN', 'CMAKE_COMPILER_IS_GNUCXX',
+ 'CMAKE_COMPILER_IS_GNUCXX_RUN', 'CMAKE_C_OUTPUT_EXTENSION', 'CMAKE_C_SOURCE_FILE_EXTENSIONS',
'CMAKE_CTEST_COMMAND', 'CMAKE_CURRENT_BINARY_DIR', 'CMAKE_CURRENT_SOURCE_DIR',
- 'CMAKE_CXX_COMPILER', 'CMAKE_CXX_COMPILER_ARG1', 'CMAKE_CXX_COMPILER_ENV_VAR',
- 'CMAKE_CXX_COMPILER_FULLPATH', 'CMAKE_CXX_COMPILER_LOADED',
- 'CMAKE_CXX_COMPILER_WORKS', 'CMAKE_CXX_COMPILE_OBJECT',
- 'CMAKE_CXX_CREATE_SHARED_LIBRARY',
- 'CMAKE_CXX_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS',
- 'CMAKE_CXX_CREATE_SHARED_MODULE', 'CMAKE_CXX_CREATE_STATIC_LIBRARY',
- 'CMAKE_CXX_FLAGS', 'CMAKE_CXX_FLAGS_DEBUG', 'CMAKE_CXX_FLAGS_DEBUG_INIT',
- 'CMAKE_CXX_FLAGS_INIT', 'CMAKE_CXX_FLAGS_MINSIZEREL',
- 'CMAKE_CXX_FLAGS_MINSIZEREL_INIT', 'CMAKE_CXX_FLAGS_RELEASE',
- 'CMAKE_CXX_FLAGS_RELEASE_INIT', 'CMAKE_CXX_FLAGS_RELWITHDEBINFO',
- 'CMAKE_CXX_FLAGS_RELWITHDEBINFO_INIT', 'CMAKE_CXX_IGNORE_EXTENSIONS',
- 'CMAKE_CXX_INFORMATION_LOADED', 'CMAKE_CXX_LINKER_PREFERENCE',
- 'CMAKE_CXX_LINK_EXECUTABLE', 'CMAKE_CXX_LINK_FLAGS',
- 'CMAKE_CXX_OUTPUT_EXTENSION', 'CMAKE_CXX_SOURCE_FILE_EXTENSIONS',
- 'CMAKE_C_COMPILER', 'CMAKE_C_COMPILER_ARG1', 'CMAKE_C_COMPILER_ENV_VAR',
- 'CMAKE_C_COMPILER_FULLPATH', 'CMAKE_C_COMPILER_LOADED',
- 'CMAKE_C_COMPILER_WORKS', 'CMAKE_C_COMPILE_OBJECT',
- 'CMAKE_C_CREATE_SHARED_LIBRARY',
- 'CMAKE_C_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS',
- 'CMAKE_C_CREATE_SHARED_MODULE', 'CMAKE_C_CREATE_STATIC_LIBRARY',
- 'CMAKE_C_FLAGS', 'CMAKE_C_FLAGS_DEBUG', 'CMAKE_C_FLAGS_DEBUG_INIT',
- 'CMAKE_C_FLAGS_INIT', 'CMAKE_C_FLAGS_MINSIZEREL',
- 'CMAKE_C_FLAGS_MINSIZEREL_INIT', 'CMAKE_C_FLAGS_RELEASE',
- 'CMAKE_C_FLAGS_RELEASE_INIT', 'CMAKE_C_FLAGS_RELWITHDEBINFO',
- 'CMAKE_C_FLAGS_RELWITHDEBINFO_INIT', 'CMAKE_C_IGNORE_EXTENSIONS',
- 'CMAKE_C_INFORMATION_LOADED', 'CMAKE_C_LINKER_PREFERENCE',
- 'CMAKE_C_LINK_EXECUTABLE', 'CMAKE_C_LINK_FLAGS', 'CMAKE_C_OUTPUT_EXTENSION',
- 'CMAKE_C_SOURCE_FILE_EXTENSIONS', 'CMAKE_DL_LIBS', 'CMAKE_EDIT_COMMAND',
- 'CMAKE_EXECUTABLE_SUFFIX', 'CMAKE_EXE_LINKER_FLAGS',
- 'CMAKE_EXE_LINKER_FLAGS_DEBUG', 'CMAKE_EXE_LINKER_FLAGS_MINSIZEREL',
- 'CMAKE_EXE_LINKER_FLAGS_RELEASE', 'CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO',
- 'CMAKE_FILES_DIRECTORY', 'CMAKE_FIND_APPBUNDLE', 'CMAKE_FIND_FRAMEWORK',
- 'CMAKE_FIND_LIBRARY_PREFIXES', 'CMAKE_FIND_LIBRARY_SUFFIXES',
- 'CMAKE_GENERATOR', 'CMAKE_HOME_DIRECTORY', 'CMAKE_INCLUDE_FLAG_C',
- 'CMAKE_INCLUDE_FLAG_CXX', 'CMAKE_INCLUDE_FLAG_C_SEP', 'CMAKE_INIT_VALUE',
- 'CMAKE_INSTALL_PREFIX', 'CMAKE_LIBRARY_PATH_FLAG', 'CMAKE_LINK_LIBRARY_FLAG',
- 'CMAKE_LINK_LIBRARY_SUFFIX', 'CMAKE_MAJOR_VERSION', 'CMAKE_MAKE_PROGRAM',
- 'CMAKE_MINOR_VERSION', 'CMAKE_MODULE_EXISTS', 'CMAKE_MODULE_LINKER_FLAGS',
- 'CMAKE_MODULE_LINKER_FLAGS_DEBUG', 'CMAKE_MODULE_LINKER_FLAGS_MINSIZEREL',
- 'CMAKE_MODULE_LINKER_FLAGS_RELEASE',
- 'CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO',
- 'CMAKE_MacOSX_Content_COMPILE_OBJECT', 'CMAKE_NUMBER_OF_LOCAL_GENERATORS',
- 'CMAKE_OSX_ARCHITECTURES', 'CMAKE_OSX_SYSROOT', 'CMAKE_PARENT_LIST_FILE',
+ 'CMAKE_CXX_COMPILE_OBJECT', 'CMAKE_CXX_COMPILER', 'CMAKE_CXX_COMPILER_ARG1',
+ 'CMAKE_CXX_COMPILER_ENV_VAR', 'CMAKE_CXX_COMPILER_FULLPATH', 'CMAKE_CXX_COMPILER_LOADED',
+ 'CMAKE_CXX_COMPILER_WORKS', 'CMAKE_CXX_CREATE_SHARED_LIBRARY',
+ 'CMAKE_CXX_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS', 'CMAKE_CXX_CREATE_SHARED_MODULE',
+ 'CMAKE_CXX_CREATE_STATIC_LIBRARY', 'CMAKE_CXX_FLAGS', 'CMAKE_CXX_FLAGS_DEBUG',
+ 'CMAKE_CXX_FLAGS_DEBUG_INIT', 'CMAKE_CXX_FLAGS_INIT', 'CMAKE_CXX_FLAGS_MINSIZEREL',
+ 'CMAKE_CXX_FLAGS_MINSIZEREL_INIT', 'CMAKE_CXX_FLAGS_RELEASE', 'CMAKE_CXX_FLAGS_RELEASE_INIT',
+ 'CMAKE_CXX_FLAGS_RELWITHDEBINFO', 'CMAKE_CXX_FLAGS_RELWITHDEBINFO_INIT',
+ 'CMAKE_CXX_IGNORE_EXTENSIONS', 'CMAKE_CXX_INFORMATION_LOADED', 'CMAKE_CXX_LINKER_PREFERENCE',
+ 'CMAKE_CXX_LINK_EXECUTABLE', 'CMAKE_CXX_LINK_FLAGS', 'CMAKE_CXX_OUTPUT_EXTENSION',
+ 'CMAKE_CXX_SOURCE_FILE_EXTENSIONS', 'CMAKE_DL_LIBS', 'CMAKE_EDIT_COMMAND',
+ 'CMAKE_EXECUTABLE_SUFFIX', 'CMAKE_EXE_LINKER_FLAGS', 'CMAKE_EXE_LINKER_FLAGS_DEBUG',
+ 'CMAKE_EXE_LINKER_FLAGS_MINSIZEREL', 'CMAKE_EXE_LINKER_FLAGS_RELEASE',
+ 'CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO', 'CMAKE_FILES_DIRECTORY', 'CMAKE_FIND_APPBUNDLE',
+ 'CMAKE_FIND_FRAMEWORK', 'CMAKE_FIND_LIBRARY_PREFIXES', 'CMAKE_FIND_LIBRARY_SUFFIXES',
+ 'CMAKE_GENERATOR', 'CMAKE_HOME_DIRECTORY', 'CMAKE_INCLUDE_FLAG_C', 'CMAKE_INCLUDE_FLAG_C_SEP',
+ 'CMAKE_INCLUDE_FLAG_CXX', 'CMAKE_INIT_VALUE', 'CMAKE_INSTALL_PREFIX', 'CMAKE_LIBRARY_PATH_FLAG',
+ 'CMAKE_LINK_LIBRARY_FLAG', 'CMAKE_LINK_LIBRARY_SUFFIX', 'CMAKE_MacOSX_Content_COMPILE_OBJECT',
+ 'CMAKE_MAJOR_VERSION', 'CMAKE_MAKE_PROGRAM', 'CMAKE_MINOR_VERSION', 'CMAKE_MODULE_EXISTS',
+ 'CMAKE_MODULE_LINKER_FLAGS', 'CMAKE_MODULE_LINKER_FLAGS_DEBUG',
+ 'CMAKE_MODULE_LINKER_FLAGS_MINSIZEREL', 'CMAKE_MODULE_LINKER_FLAGS_RELEASE',
+ 'CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO', 'CMAKE_NUMBER_OF_LOCAL_GENERATORS',
+ 'CMAKE_OSX_ARCHITECTURES', '_CMAKE_OSX_MACHINE', 'CMAKE_OSX_SYSROOT', 'CMAKE_PARENT_LIST_FILE',
'CMAKE_PATCH_VERSION', 'CMAKE_PLATFORM_HAS_INSTALLNAME',
- 'CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES', 'CMAKE_PLATFORM_ROOT_BIN',
- 'CMAKE_PROJECT_NAME', 'CMAKE_RANLIB', 'CMAKE_ROOT',
- 'CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS',
- 'CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS',
- 'CMAKE_SHARED_LIBRARY_CXX_FLAGS', 'CMAKE_SHARED_LIBRARY_C_FLAGS',
- 'CMAKE_SHARED_LIBRARY_LINK_C_FLAGS', 'CMAKE_SHARED_LIBRARY_PREFIX',
- 'CMAKE_SHARED_LIBRARY_RUNTIME_C_FLAG',
- 'CMAKE_SHARED_LIBRARY_RUNTIME_C_FLAG_SEP',
- 'CMAKE_SHARED_LIBRARY_SONAME_CXX_FLAG', 'CMAKE_SHARED_LIBRARY_SONAME_C_FLAG',
- 'CMAKE_SHARED_LIBRARY_SUFFIX', 'CMAKE_SHARED_LINKER_FLAGS',
- 'CMAKE_SHARED_LINKER_FLAGS_DEBUG', 'CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL',
- 'CMAKE_SHARED_LINKER_FLAGS_RELEASE',
- 'CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO',
- 'CMAKE_SHARED_MODULE_CREATE_CXX_FLAGS', 'CMAKE_SHARED_MODULE_CREATE_C_FLAGS',
- 'CMAKE_SHARED_MODULE_PREFIX', 'CMAKE_SHARED_MODULE_SUFFIX',
- 'CMAKE_SIZEOF_VOID_P', 'CMAKE_SKIP_RPATH', 'CMAKE_SOURCE_DIR',
+ 'CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES', 'CMAKE_PLATFORM_ROOT_BIN', 'CMAKE_PROJECT_NAME',
+ 'CMAKE_RANLIB', 'CMAKE_ROOT', 'CMAKE_SHARED_LIBRARY_C_FLAGS',
+ 'CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS', 'CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS',
+ 'CMAKE_SHARED_LIBRARY_CXX_FLAGS', 'CMAKE_SHARED_LIBRARY_LINK_C_FLAGS',
+ 'CMAKE_SHARED_LIBRARY_PREFIX', 'CMAKE_SHARED_LIBRARY_RUNTIME_C_FLAG',
+ 'CMAKE_SHARED_LIBRARY_RUNTIME_C_FLAG_SEP', 'CMAKE_SHARED_LIBRARY_SONAME_C_FLAG',
+ 'CMAKE_SHARED_LIBRARY_SONAME_CXX_FLAG', 'CMAKE_SHARED_LIBRARY_SUFFIX',
+ 'CMAKE_SHARED_LINKER_FLAGS', 'CMAKE_SHARED_LINKER_FLAGS_DEBUG',
+ 'CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL', 'CMAKE_SHARED_LINKER_FLAGS_RELEASE',
+ 'CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO', 'CMAKE_SHARED_MODULE_CREATE_C_FLAGS',
+ 'CMAKE_SHARED_MODULE_CREATE_CXX_FLAGS', 'CMAKE_SHARED_MODULE_PREFIX',
+ 'CMAKE_SHARED_MODULE_SUFFIX', 'CMAKE_SIZEOF_VOID_P', 'CMAKE_SKIP_RPATH', 'CMAKE_SOURCE_DIR',
'CMAKE_STATIC_LIBRARY_PREFIX', 'CMAKE_STATIC_LIBRARY_SUFFIX', 'CMAKE_SYSTEM',
- 'CMAKE_SYSTEM_AND_CXX_COMPILER_INFO_FILE',
- 'CMAKE_SYSTEM_AND_C_COMPILER_INFO_FILE', 'CMAKE_SYSTEM_APPBUNDLE_PATH',
- 'CMAKE_SYSTEM_FRAMEWORK_PATH', 'CMAKE_SYSTEM_INCLUDE_PATH',
- 'CMAKE_SYSTEM_INFO_FILE', 'CMAKE_SYSTEM_LIBRARY_PATH', 'CMAKE_SYSTEM_LOADED',
- 'CMAKE_SYSTEM_NAME', 'CMAKE_SYSTEM_PROCESSOR', 'CMAKE_SYSTEM_PROGRAM_PATH',
- 'CMAKE_SYSTEM_SPECIFIC_INFORMATION_LOADED', 'CMAKE_SYSTEM_VERSION',
- 'CMAKE_UNAME', 'CMAKE_USE_RELATIVE_PATHS', 'CMAKE_VERBOSE_MAKEFILE', 'CYGWIN',
- 'EXECUTABLE_OUTPUT_PATH', 'FORCE', 'HAVE_CMAKE_SIZEOF_VOID_P',
- 'LIBRARY_OUTPUT_PATH', 'MACOSX_BUNDLE', 'MINGW', 'MSVC60', 'MSVC70', 'MSVC71',
- 'MSVC80', 'MSVC', 'MSVC_IDE', 'PROJECT_BINARY_DIR', 'PROJECT_NAME',
- 'PROJECT_SOURCE_DIR', 'PROJECT_BINARY_DIR', 'PROJECT_SOURCE_DIR',
- 'RUN_CONFIGURE', 'UNIX', 'WIN32', '_CMAKE_OSX_MACHINE',
- -- More variables.
- 'LOCATION', 'TARGET', 'POST_BUILD', 'PRE_BUILD', 'ARGS'
-} + P('$') * l.delimited_range('{}', false, true))
+ 'CMAKE_SYSTEM_AND_C_COMPILER_INFO_FILE', 'CMAKE_SYSTEM_AND_CXX_COMPILER_INFO_FILE',
+ 'CMAKE_SYSTEM_APPBUNDLE_PATH', 'CMAKE_SYSTEM_FRAMEWORK_PATH', 'CMAKE_SYSTEM_INCLUDE_PATH',
+ 'CMAKE_SYSTEM_INFO_FILE', 'CMAKE_SYSTEM_LIBRARY_PATH', 'CMAKE_SYSTEM_LOADED', 'CMAKE_SYSTEM_NAME',
+ 'CMAKE_SYSTEM_PROCESSOR', 'CMAKE_SYSTEM_PROGRAM_PATH', 'CMAKE_SYSTEM_SPECIFIC_INFORMATION_LOADED',
+ 'CMAKE_SYSTEM_VERSION', 'CMAKE_UNAME', 'CMAKE_USE_RELATIVE_PATHS', 'CMAKE_VERBOSE_MAKEFILE',
+ 'CYGWIN', 'EXECUTABLE_OUTPUT_PATH', 'FORCE', 'HAVE_CMAKE_SIZEOF_VOID_P', 'LIBRARY_OUTPUT_PATH',
+ 'LOCATION', 'MACOSX_BUNDLE', 'MINGW', 'MSVC', 'MSVC60', 'MSVC70', 'MSVC71', 'MSVC80', 'MSVC_IDE',
+ 'POST_BUILD', 'PRE_BUILD', 'PROJECT_BINARY_DIR', 'PROJECT_NAME', 'PROJECT_SOURCE_DIR',
+ 'RUN_CONFIGURE', 'TARGET', 'UNIX', 'WIN32'
+} + P('$') * lexer.range('{', '}')))
+
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, word_match{
+ 'AND', 'COMMAND', 'DEFINED', 'DOC', 'EQUAL', 'EXISTS', 'GREATER', 'INTERNAL', 'LESS', 'MATCHES',
+ 'NAME', 'NAMES', 'NAME_WE', 'NOT', 'OR', 'PATH', 'PATHS', 'PROGRAM', 'STREQUAL', 'STRGREATER',
+ 'STRINGS', 'STRLESS'
+} + S('=(){}')))
-- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
--- Operators.
-local operator = token(l.OPERATOR, word_match({
- 'AND', 'COMMAND', 'DEFINED', 'DOC', 'EQUAL', 'EXISTS', 'GREATER', 'INTERNAL',
- 'LESS', 'MATCHES', 'NAME', 'NAMES', 'NAME_WE', 'NOT', 'OR', 'PATH', 'PATHS',
- 'PROGRAM', 'STREQUAL', 'STRGREATER', 'STRINGS', 'STRLESS'
-}) + S('=(){}'))
+-- Strings.
+lex:add_rule('string', token(lexer.STRING, lexer.range('"')))
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'command', command},
- {'constant', constant},
- {'variable', variable},
- {'operator', operator},
- {'identifier', identifier},
- {'string', string},
- {'comment', comment},
-}
+-- Comments.
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#')))
-M._foldsymbols = {
- _patterns = {'[A-Z]+', '[%(%){}]', '#'},
- [l.KEYWORD] = {
- IF = 1, ENDIF = -1, FOREACH = 1, ENDFOREACH = -1, WHILE = 1, ENDWHILE = -1
- },
- [l.FUNCTION] = {MACRO = 1, ENDMACRO = -1},
- [l.OPERATOR] = {['('] = 1, [')'] = -1, ['{'] = 1, ['}'] = -1},
- [l.COMMENT] = {['#'] = l.fold_line_comments('#')}
-}
+-- Fold points.
+lex:add_fold_point(lexer.KEYWORD, 'IF', 'ENDIF')
+lex:add_fold_point(lexer.KEYWORD, 'FOREACH', 'ENDFOREACH')
+lex:add_fold_point(lexer.KEYWORD, 'WHILE', 'ENDWHILE')
+lex:add_fold_point(lexer.FUNCTION, 'MACRO', 'ENDMACRO')
+lex:add_fold_point(lexer.OPERATOR, '(', ')')
+lex:add_fold_point(lexer.OPERATOR, '{', '}')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('#'))
-return M
+return lex
diff --git a/lua/lexers/coffeescript.lua b/lua/lexers/coffeescript.lua
index 737c10b..be4be45 100644
--- a/lua/lexers/coffeescript.lua
+++ b/lua/lexers/coffeescript.lua
@@ -1,62 +1,49 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- CoffeeScript LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'coffeescript'}
+local lex = lexer.new('coffeescript', {fold_by_indentation = true})
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local block_comment = '###' * (l.any - '###')^0 * P('###')^-1
-local line_comment = '#' * l.nonnewline_esc^0
-local comment = token(l.COMMENT, block_comment + line_comment)
-
--- Strings.
-local sq_str = l.delimited_range("'")
-local dq_str = l.delimited_range('"')
-local regex_str = #P('/') * l.last_char_includes('+-*%<>!=^&|?~:;,([{') *
- l.delimited_range('/', true) * S('igm')^0
-local string = token(l.STRING, sq_str + dq_str) + token(l.REGEX, regex_str)
-
--- Numbers.
-local number = token(l.NUMBER, l.float + l.integer)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- 'all', 'and', 'bind', 'break', 'by', 'case', 'catch', 'class', 'const',
- 'continue', 'default', 'delete', 'do', 'each', 'else', 'enum', 'export',
- 'extends', 'false', 'for', 'finally', 'function', 'if', 'import', 'in',
- 'instanceof', 'is', 'isnt', 'let', 'loop', 'native', 'new', 'no', 'not', 'of',
- 'off', 'on', 'or', 'return', 'super', 'switch', 'then', 'this', 'throw',
- 'true', 'try', 'typeof', 'unless', 'until', 'var', 'void', 'with', 'when',
- 'while', 'yes'
-})
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'all', 'and', 'bind', 'break', 'by', 'case', 'catch', 'class', 'const', 'continue', 'default',
+ 'delete', 'do', 'each', 'else', 'enum', 'export', 'extends', 'false', 'finally', 'for',
+ 'function', 'if', 'import', 'in', 'instanceof', 'is', 'isnt', 'let', 'loop', 'native', 'new',
+ 'no', 'not', 'of', 'off', 'on', 'or', 'return', 'super', 'switch', 'then', 'this', 'throw',
+ 'true', 'try', 'typeof', 'unless', 'until', 'var', 'void', 'when', 'while', 'with', 'yes'
+}))
-- Fields: object properties and methods.
-local field = token(l.FUNCTION, '.' * (S('_$') + l.alpha) *
- (S('_$') + l.alnum)^0)
+lex:add_rule('field',
+ token(lexer.FUNCTION, '.' * (S('_$') + lexer.alpha) * (S('_$') + lexer.alnum)^0))
-- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
+
+-- Strings.
+local sq_str = lexer.range("'")
+local dq_str = lexer.range('"')
+local string = token(lexer.STRING, sq_str + dq_str)
+local regex_str =
+ #P('/') * lexer.last_char_includes('+-*%<>!=^&|?~:;,([{') * lexer.range('/', true) * S('igm')^0
+local regex = token(lexer.REGEX, regex_str)
+lex:add_rule('string', string + regex)
+
+-- Comments.
+local block_comment = lexer.range('###')
+local line_comment = lexer.to_eol('#', true)
+lex:add_rule('comment', token(lexer.COMMENT, block_comment + line_comment))
+
+-- Numbers.
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-- Operators.
-local operator = token(l.OPERATOR, S('+-/*%<>!=^&|?~:;,.()[]{}'))
-
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'field', field},
- {'identifier', identifier},
- {'comment', comment},
- {'number', number},
- {'string', string},
- {'operator', operator},
-}
-
-M._FOLDBYINDENTATION = true
-
-return M
+lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>!=^&|?~:;,.()[]{}')))
+
+return lex
diff --git a/lua/lexers/container.lua b/lua/lexers/container.lua
index 90990f0..52edc0d 100644
--- a/lua/lexers/container.lua
+++ b/lua/lexers/container.lua
@@ -1,7 +1,5 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- Container LPeg lexer.
-- This is SciTE's plain text lexer.
-local M = {_NAME = 'container'}
-
-return M
+return require('lexer').new('container')
diff --git a/lua/lexers/context.lua b/lua/lexers/context.lua
index 8c3a2bb..2124287 100644
--- a/lua/lexers/context.lua
+++ b/lua/lexers/context.lua
@@ -1,59 +1,53 @@
--- Copyright 2006-2017 Robert Gieseke. See LICENSE.
+-- Copyright 2006-2022 Robert Gieseke, Lars Otter. See LICENSE.
-- ConTeXt LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'context'}
+local lex = lexer.new('context')
+
+-- TeX and ConTeXt mkiv environment definitions.
+local beginend = (P('begin') + 'end')
+local startstop = (P('start') + 'stop')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Comments.
-local comment = token(l.COMMENT, '%' * l.nonnewline^0)
-
--- Commands.
-local command = token(l.KEYWORD, '\\' * (l.alpha^1 + S('#$&~_^%{}')))
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('%')))
-- Sections.
-local section = token('section', '\\' * word_match{
- 'part', 'chapter', 'section', 'subsection', 'subsubsection', 'title',
- 'subject', 'subsubject', 'subsubsubject'
-})
+local wm_section = word_match{
+ 'chapter', 'part', 'section', 'subject', 'subsection', 'subsubject', 'subsubsection',
+ 'subsubsubject', 'subsubsubsection', 'subsubsubsubject', 'title'
+}
+local section = token(lexer.CLASS, '\\' * startstop^-1 * wm_section)
+lex:add_rule('section', section)
--- ConTeXt environments.
-local environment = token('environment', '\\' * (P('start') + 'stop') * l.word)
+-- TeX and ConTeXt mkiv environments.
+local environment = token(lexer.STRING, '\\' * (beginend + startstop) * lexer.alpha^1)
+lex:add_rule('environment', environment)
--- Operators.
-local operator = token(l.OPERATOR, S('$&#{}[]'))
-
-M._rules = {
- {'whitespace', ws},
- {'comment', comment},
- {'environment', environment},
- {'section', section},
- {'keyword', command},
- {'operator', operator},
-}
+-- Commands.
+local command = token(lexer.KEYWORD, '\\' *
+ (lexer.alpha^1 * P('\\') * lexer.space^1 + lexer.alpha^1 + S('!"#$%&\',./;=[\\]_{|}~`^-')))
+lex:add_rule('command', command)
-M._tokenstyles = {
- environment = l.STYLE_KEYWORD,
- section = l.STYLE_CLASS
-}
+-- Operators.
+local operator = token(lexer.OPERATOR, S('#$_[]{}~^'))
+lex:add_rule('operator', operator)
-M._foldsymbols = {
- _patterns = {'\\start', '\\stop', '[{}]', '%%'},
- ['environment'] = {['\\start'] = 1, ['\\stop'] = -1},
- [l.OPERATOR] = {['{'] = 1, ['}'] = -1},
- [l.COMMENT] = {['%'] = l.fold_line_comments('%')}
-}
+-- Fold points.
+lex:add_fold_point('environment', '\\start', '\\stop')
+lex:add_fold_point('environment', '\\begin', '\\end')
+lex:add_fold_point(lexer.OPERATOR, '{', '}')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('%'))
-- Embedded Lua.
-local luatex = l.load('lua')
+local luatex = lexer.load('lua')
local luatex_start_rule = #P('\\startluacode') * environment
local luatex_end_rule = #P('\\stopluacode') * environment
-l.embed_lexer(M, luatex, luatex_start_rule, luatex_end_rule)
-
+lex:embed(luatex, luatex_start_rule, luatex_end_rule)
-return M
+return lex
diff --git a/lua/lexers/cpp.lua b/lua/lexers/cpp.lua
index ec3daad..73ee0b1 100644
--- a/lua/lexers/cpp.lua
+++ b/lua/lexers/cpp.lua
@@ -1,90 +1,75 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- C++ LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'cpp'}
+local lex = lexer.new('cpp')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local line_comment = '//' * l.nonnewline_esc^0
-local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1
-local comment = token(l.COMMENT, line_comment + block_comment)
-
--- Strings.
-local sq_str = P('L')^-1 * l.delimited_range("'", true)
-local dq_str = P('L')^-1 * l.delimited_range('"', true)
-local string = token(l.STRING, sq_str + dq_str)
-
--- Numbers.
-local number = token(l.NUMBER, l.float + l.integer)
-
--- Preprocessor.
-local preproc_word = word_match{
- 'define', 'elif', 'else', 'endif', 'error', 'if', 'ifdef', 'ifndef', 'import',
- 'line', 'pragma', 'undef', 'using', 'warning'
-}
-local preproc = #l.starts_line('#') *
- (token(l.PREPROCESSOR, '#' * S('\t ')^0 * preproc_word) +
- token(l.PREPROCESSOR, '#' * S('\t ')^0 * 'include') *
- (token(l.WHITESPACE, S('\t ')^1) *
- token(l.STRING, l.delimited_range('<>', true, true)))^-1)
+local ws = token(lexer.WHITESPACE, lexer.space^1)
+lex:add_rule('whitespace', ws)
-- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- 'asm', 'auto', 'break', 'case', 'catch', 'class', 'const', 'const_cast',
- 'continue', 'default', 'delete', 'do', 'dynamic_cast', 'else', 'explicit',
- 'export', 'extern', 'false', 'for', 'friend', 'goto', 'if', 'inline',
- 'mutable', 'namespace', 'new', 'operator', 'private', 'protected', 'public',
- 'register', 'reinterpret_cast', 'return', 'sizeof', 'static', 'static_cast',
- 'switch', 'template', 'this', 'throw', 'true', 'try', 'typedef', 'typeid',
- 'typename', 'using', 'virtual', 'volatile', 'while',
- -- Operators
- 'and', 'and_eq', 'bitand', 'bitor', 'compl', 'not', 'not_eq', 'or', 'or_eq',
- 'xor', 'xor_eq',
- -- C++11
- 'alignas', 'alignof', 'constexpr', 'decltype', 'final', 'noexcept',
- 'override', 'static_assert', 'thread_local'
-})
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'asm', 'auto', 'break', 'case', 'catch', 'class', 'const', 'const_cast', 'continue', 'default',
+ 'delete', 'do', 'dynamic_cast', 'else', 'explicit', 'export', 'extern', 'false', 'for', 'friend',
+ 'goto', 'if', 'inline', 'mutable', 'namespace', 'new', 'operator', 'private', 'protected',
+ 'public', 'register', 'reinterpret_cast', 'return', 'sizeof', 'static', 'static_cast', 'switch',
+ 'template', 'this', 'throw', 'true', 'try', 'typedef', 'typeid', 'typename', 'using', 'virtual',
+ 'volatile', 'while',
+ -- Operators.
+ 'and', 'and_eq', 'bitand', 'bitor', 'compl', 'not', 'not_eq', 'or', 'or_eq', 'xor', 'xor_eq',
+ -- C++11.
+ 'alignas', 'alignof', 'constexpr', 'decltype', 'final', 'noexcept', 'override', 'static_assert',
+ 'thread_local'
+}))
-- Types.
-local type = token(l.TYPE, word_match{
- 'bool', 'char', 'double', 'enum', 'float', 'int', 'long', 'short', 'signed',
- 'struct', 'union', 'unsigned', 'void', 'wchar_t',
- -- C++11
+lex:add_rule('type', token(lexer.TYPE, word_match{
+ 'bool', 'char', 'double', 'enum', 'float', 'int', 'long', 'short', 'signed', 'struct', 'union',
+ 'unsigned', 'void', 'wchar_t',
+ -- C++11.
'char16_t', 'char32_t', 'nullptr'
-})
+}))
+
+-- Strings.
+local sq_str = P('L')^-1 * lexer.range("'", true)
+local dq_str = P('L')^-1 * lexer.range('"', true)
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
-- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
--- Operators.
-local operator = token(l.OPERATOR, S('+-/*%<>!=^&|?~:;,.()[]{}'))
+-- Comments.
+local line_comment = lexer.to_eol('//', true)
+local block_comment = lexer.range('/*', '*/')
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
+
+-- Numbers.
+local dec = lexer.digit^1 * ("'" * lexer.digit^1)^0
+local hex = '0' * S('xX') * lexer.xdigit^1 * ("'" * lexer.xdigit^1)^0
+local bin = '0' * S('bB') * S('01')^1 * ("'" * S('01')^1)^0 * -lexer.xdigit
+local integer = S('+-')^-1 * (hex + bin + dec)
+lex:add_rule('number', token(lexer.NUMBER, lexer.float + integer))
+
+-- Preprocessor.
+local include = token(lexer.PREPROCESSOR, '#' * S('\t ')^0 * 'include') *
+ (ws * token(lexer.STRING, lexer.range('<', '>', true)))^-1
+local preproc = token(lexer.PREPROCESSOR, '#' * S('\t ')^0 *
+ word_match('define elif else endif error if ifdef ifndef import line pragma undef using warning'))
+lex:add_rule('preprocessor', include + preproc)
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'type', type},
- {'identifier', identifier},
- {'string', string},
- {'comment', comment},
- {'number', number},
- {'preproc', preproc},
- {'operator', operator},
-}
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>!=^&|?~:;,.()[]{}')))
-M._foldsymbols = {
- _patterns = {'%l+', '[{}]', '/%*', '%*/', '//'},
- [l.PREPROCESSOR] = {
- region = 1, endregion = -1,
- ['if'] = 1, ifdef = 1, ifndef = 1, endif = -1
- },
- [l.OPERATOR] = {['{'] = 1, ['}'] = -1},
- [l.COMMENT] = {['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//')}
-}
+-- Fold points.
+lex:add_fold_point(lexer.PREPROCESSOR, 'if', 'endif')
+lex:add_fold_point(lexer.PREPROCESSOR, 'ifdef', 'endif')
+lex:add_fold_point(lexer.PREPROCESSOR, 'ifndef', 'endif')
+lex:add_fold_point(lexer.OPERATOR, '{', '}')
+lex:add_fold_point(lexer.COMMENT, '/*', '*/')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('//'))
-return M
+return lex
diff --git a/lua/lexers/crystal.lua b/lua/lexers/crystal.lua
index 5195387..f8b2211 100644
--- a/lua/lexers/crystal.lua
+++ b/lua/lexers/crystal.lua
@@ -1,141 +1,102 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- Copyright 2017 Michel Martens.
-- Crystal LPeg lexer (based on Ruby).
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'crystal'}
+local lex = lexer.new('crystal')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
--- Comments.
-local line_comment = '#' * l.nonnewline_esc^0
-local comment = token(l.COMMENT, line_comment)
+-- Keywords.
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'alias', 'begin', 'break', 'case', 'class', 'def', 'defined?', 'do', 'else', 'elsif', 'end',
+ 'ensure', 'false', 'for', 'if', 'in', 'module', 'next', 'nil', 'not', 'redo', 'rescue', 'retry',
+ 'return', 'self', 'super', 'then', 'true', 'undef', 'unless', 'until', 'when', 'while', 'yield',
+ '__FILE__', '__LINE__'
+}))
-local delimiter_matches = {['('] = ')', ['['] = ']', ['{'] = '}'}
-local literal_delimitted = P(function(input, index)
- local delimiter = input:sub(index, index)
- if not delimiter:find('[%w\r\n\f\t ]') then -- only non alpha-numerics
- local match_pos, patt
- if delimiter_matches[delimiter] then
- -- Handle nested delimiter/matches in strings.
- local s, e = delimiter, delimiter_matches[delimiter]
- patt = l.delimited_range(s..e, false, false, true)
- else
- patt = l.delimited_range(delimiter)
- end
- match_pos = lpeg.match(patt, input, index)
- return match_pos or #input + 1
- end
-end)
+-- Functions.
+lex:add_rule('function', token(lexer.FUNCTION, word_match{
+ 'abort', 'at_exit', 'caller', 'delay', 'exit', 'fork', 'future', 'get_stack_top', 'gets', 'lazy',
+ 'loop', 'main', 'p', 'print', 'printf', 'puts', 'raise', 'rand', 'read_line', 'require', 'sleep',
+ 'spawn', 'sprintf', 'system', 'with_color',
+ -- Macros.
+ 'assert_responds_to', 'debugger', 'parallel', 'pp', 'record', 'redefine_main'
+}) * -S('.:|'))
+
+-- Identifiers.
+local word_char = lexer.alnum + S('_!?')
+local word = (lexer.alpha + '_') * word_char^0
+lex:add_rule('identifier', token(lexer.IDENTIFIER, word))
+
+-- Comments.
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#', true)))
-- Strings.
-local cmd_str = l.delimited_range('`')
-local sq_str = l.delimited_range("'")
-local dq_str = l.delimited_range('"')
+local cmd_str = lexer.range('`')
+local sq_str = lexer.range("'")
+local dq_str = lexer.range('"')
local heredoc = '<<' * P(function(input, index)
- local s, e, indented, _, delimiter =
- input:find('(%-?)(["`]?)([%a_][%w_]*)%2[\n\r\f;]+', index)
- if s == index and delimiter then
- local end_heredoc = (#indented > 0 and '[\n\r\f]+ *' or '[\n\r\f]+')
- local _, e = input:find(end_heredoc..delimiter, e)
- return e and e + 1 or #input + 1
- end
+ local _, e, indented, _, delimiter = input:find('^(%-?)(["`]?)([%a_][%w_]*)%2[\n\r\f;]+', index)
+ if not delimiter then return end
+ local end_heredoc = (#indented > 0 and '[\n\r\f]+ *' or '[\n\r\f]+')
+ _, e = input:find(end_heredoc .. delimiter, e)
+ return e and e + 1 or #input + 1
end)
+local string = token(lexer.STRING, (sq_str + dq_str + heredoc + cmd_str) * S('f')^-1)
-- TODO: regex_str fails with `obj.method /patt/` syntax.
-local regex_str = #P('/') * l.last_char_includes('!%^&*([{-=+|:;,?<>~') *
- l.delimited_range('/', true, false) * S('iomx')^0
-local string = token(l.STRING, (sq_str + dq_str + heredoc + cmd_str) *
- S('f')^-1) +
- token(l.REGEX, regex_str)
-
-local word_char = l.alnum + S('_!?')
+local regex_str =
+ #P('/') * lexer.last_char_includes('!%^&*([{-=+|:;,?<>~') * lexer.range('/', true) * S('iomx')^0
+local regex = token(lexer.REGEX, regex_str)
+lex:add_rule('string', string + regex)
-- Numbers.
-local dec = l.digit^1 * ('_' * l.digit^1)^0 * S('ri')^-1
+local dec = lexer.digit^1 * ('_' * lexer.digit^1)^0 * S('ri')^-1
local bin = '0b' * S('01')^1 * ('_' * S('01')^1)^0
-local integer = S('+-')^-1 * (bin + l.hex_num + l.oct_num + dec)
+local integer = S('+-')^-1 * (bin + lexer.hex_num + lexer.oct_num + dec)
-- TODO: meta, control, etc. for numeric_literal.
-local numeric_literal = '?' * (l.any - l.space) * -word_char
-local number = token(l.NUMBER, l.float * S('ri')^-1 + integer + numeric_literal)
-
--- Keywords.
-local keyword = token(l.KEYWORD, word_match({
- 'alias', 'begin', 'break', 'case', 'class', 'def', 'defined?', 'do', 'else',
- 'elsif', 'end', 'ensure', 'false', 'for', 'if', 'in', 'module', 'next', 'nil',
- 'not', 'redo', 'rescue', 'retry', 'return', 'self', 'super', 'then', 'true',
- 'undef', 'unless', 'until', 'when', 'while', 'yield', '__FILE__', '__LINE__'
-}, '?!'))
-
--- Functions.
-local func = token(l.FUNCTION, word_match({
- 'abort', 'at_exit', 'caller', 'delay', 'exit', 'fork', 'future',
- 'get_stack_top', 'gets', 'lazy', 'loop', 'main', 'p', 'print', 'printf',
- 'puts', 'raise', 'rand', 'read_line', 'require', 'sleep', 'spawn', 'sprintf',
- 'system', 'with_color',
- -- Macros
- 'assert_responds_to', 'debugger', 'parallel', 'pp', 'record', 'redefine_main'
-}, '?!')) * -S('.:|')
-
--- Identifiers.
-local word = (l.alpha + '_') * word_char^0
-local identifier = token(l.IDENTIFIER, word)
+local numeric_literal = '?' * (lexer.any - lexer.space) * -word_char
+lex:add_rule('number', token(lexer.NUMBER, lexer.float * S('ri')^-1 + integer + numeric_literal))
-- Variables.
-local global_var = '$' * (word + S('!@L+`\'=~/\\,.;<>_*"$?:') + l.digit + '-' *
- S('0FadiIKlpvw'))
+local global_var = '$' *
+ (word + S('!@L+`\'=~/\\,.;<>_*"$?:') + lexer.digit + '-' * S('0FadiIKlpvw'))
local class_var = '@@' * word
local inst_var = '@' * word
-local variable = token(l.VARIABLE, global_var + class_var + inst_var)
+lex:add_rule('variable', token(lexer.VARIABLE, global_var + class_var + inst_var))
-- Symbols.
-local symbol = token('symbol', ':' * P(function(input, index)
+lex:add_rule('symbol', token('symbol', ':' * P(function(input, index)
if input:sub(index - 2, index - 2) ~= ':' then return index end
-end) * (word_char^1 + sq_str + dq_str))
+end) * (word_char^1 + sq_str + dq_str)))
+lex:add_style('symbol', lexer.styles.constant)
-- Operators.
-local operator = token(l.OPERATOR, S('!%^&*()[]{}-=+/|:;.,?<>~'))
-
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'function', func},
- {'identifier', identifier},
- {'comment', comment},
- {'string', string},
- {'number', number},
- {'variable', variable},
- {'symbol', symbol},
- {'operator', operator},
-}
-
-M._tokenstyles = {
- symbol = l.STYLE_CONSTANT
-}
+lex:add_rule('operator', token(lexer.OPERATOR, S('!%^&*()[]{}-=+/|:;.,?<>~')))
+-- Fold points.
local function disambiguate(text, pos, line, s)
- return line:sub(1, s - 1):match('^%s*$') and
- not text:sub(1, pos - 1):match('\\[ \t]*\r?\n$') and 1 or 0
+ return line:sub(1, s - 1):match('^%s*$') and not text:sub(1, pos - 1):match('\\[ \t]*\r?\n$') and
+ 1 or 0
end
-
-M._foldsymbols = {
- _patterns = {'%l+', '[%(%)%[%]{}]', '#'},
- [l.KEYWORD] = {
- begin = 1, class = 1, def = 1, ['do'] = 1, ['for'] = 1, ['module'] = 1,
- case = 1,
- ['if'] = disambiguate, ['while'] = disambiguate,
- ['unless'] = disambiguate, ['until'] = disambiguate,
- ['end'] = -1
- },
- [l.OPERATOR] = {
- ['('] = 1, [')'] = -1, ['['] = 1, [']'] = -1, ['{'] = 1, ['}'] = -1
- },
- [l.COMMENT] = {
- ['#'] = l.fold_line_comments('#')
- }
-}
-
-return M
+lex:add_fold_point(lexer.KEYWORD, 'begin', 'end')
+lex:add_fold_point(lexer.KEYWORD, 'case', 'end')
+lex:add_fold_point(lexer.KEYWORD, 'class', 'end')
+lex:add_fold_point(lexer.KEYWORD, 'def', 'end')
+lex:add_fold_point(lexer.KEYWORD, 'do', 'end')
+lex:add_fold_point(lexer.KEYWORD, 'for', 'end')
+lex:add_fold_point(lexer.KEYWORD, 'module', 'end')
+lex:add_fold_point(lexer.KEYWORD, 'if', disambiguate)
+lex:add_fold_point(lexer.KEYWORD, 'while', disambiguate)
+lex:add_fold_point(lexer.KEYWORD, 'unless', disambiguate)
+lex:add_fold_point(lexer.KEYWORD, 'until', disambiguate)
+lex:add_fold_point(lexer.OPERATOR, '(', ')')
+lex:add_fold_point(lexer.OPERATOR, '[', ']')
+lex:add_fold_point(lexer.OPERATOR, '{', '}')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('#'))
+
+return lex
diff --git a/lua/lexers/csharp.lua b/lua/lexers/csharp.lua
index cdae6ab..1d209ed 100644
--- a/lua/lexers/csharp.lua
+++ b/lua/lexers/csharp.lua
@@ -1,84 +1,64 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- C# LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'csharp'}
+local lex = lexer.new('csharp')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
+
+-- Keywords.
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'class', 'delegate', 'enum', 'event', 'interface', 'namespace', 'struct', 'using', 'abstract',
+ 'const', 'explicit', 'extern', 'fixed', 'implicit', 'internal', 'lock', 'out', 'override',
+ 'params', 'partial', 'private', 'protected', 'public', 'ref', 'sealed', 'static', 'readonly',
+ 'unsafe', 'virtual', 'volatile', 'add', 'as', 'assembly', 'base', 'break', 'case', 'catch',
+ 'checked', 'continue', 'default', 'do', 'else', 'finally', 'for', 'foreach', 'get', 'goto', 'if',
+ 'in', 'is', 'new', 'remove', 'return', 'set', 'sizeof', 'stackalloc', 'super', 'switch', 'this',
+ 'throw', 'try', 'typeof', 'unchecked', 'value', 'var', 'void', 'while', 'yield', 'null', 'true',
+ 'false'
+}))
+
+-- Types.
+lex:add_rule('type', token(lexer.TYPE, word_match{
+ 'bool', 'byte', 'char', 'decimal', 'double', 'float', 'int', 'long', 'object', 'operator',
+ 'sbyte', 'short', 'string', 'uint', 'ulong', 'ushort'
+}))
+
+-- Identifiers.
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Comments.
-local line_comment = '//' * l.nonnewline_esc^0
-local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1
-local comment = token(l.COMMENT, line_comment + block_comment)
+local line_comment = lexer.to_eol('//', true)
+local block_comment = lexer.range('/*', '*/')
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-- Strings.
-local sq_str = l.delimited_range("'", true)
-local dq_str = l.delimited_range('"', true)
-local ml_str = P('@')^-1 * l.delimited_range('"', false, true)
-local string = token(l.STRING, sq_str + dq_str + ml_str)
+local sq_str = lexer.range("'", true)
+local dq_str = lexer.range('"', true)
+local ml_str = P('@')^-1 * lexer.range('"', false, false)
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str + ml_str))
-- Numbers.
-local number = token(l.NUMBER, (l.float + l.integer) * S('lLdDfFMm')^-1)
+lex:add_rule('number', token(lexer.NUMBER, lexer.number * S('lLdDfFmM')^-1))
-- Preprocessor.
-local preproc_word = word_match{
- 'define', 'elif', 'else', 'endif', 'error', 'if', 'line', 'undef', 'warning',
- 'region', 'endregion'
-}
-local preproc = token(l.PREPROCESSOR,
- l.starts_line('#') * S('\t ')^0 * preproc_word *
- (l.nonnewline_esc^1 + l.space * l.nonnewline_esc^0))
-
--- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- 'class', 'delegate', 'enum', 'event', 'interface', 'namespace', 'struct',
- 'using', 'abstract', 'const', 'explicit', 'extern', 'fixed', 'implicit',
- 'internal', 'lock', 'out', 'override', 'params', 'partial', 'private',
- 'protected', 'public', 'ref', 'sealed', 'static', 'readonly', 'unsafe',
- 'virtual', 'volatile', 'add', 'as', 'assembly', 'base', 'break', 'case',
- 'catch', 'checked', 'continue', 'default', 'do', 'else', 'finally', 'for',
- 'foreach', 'get', 'goto', 'if', 'in', 'is', 'new', 'remove', 'return', 'set',
- 'sizeof', 'stackalloc', 'super', 'switch', 'this', 'throw', 'try', 'typeof',
- 'unchecked', 'value', 'void', 'while', 'yield',
- 'null', 'true', 'false'
-})
-
--- Types.
-local type = token(l.TYPE, word_match{
- 'bool', 'byte', 'char', 'decimal', 'double', 'float', 'int', 'long', 'object',
- 'operator', 'sbyte', 'short', 'string', 'uint', 'ulong', 'ushort'
-})
-
--- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+lex:add_rule('preprocessor', token(lexer.PREPROCESSOR, '#' * S('\t ')^0 *
+ word_match('define elif else endif error if line undef warning region endregion')))
-- Operators.
-local operator = token(l.OPERATOR, S('~!.,:;+-*/<>=\\^|&%?()[]{}'))
-
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'type', type},
- {'identifier', identifier},
- {'string', string},
- {'comment', comment},
- {'number', number},
- {'preproc', preproc},
- {'operator', operator},
-}
+lex:add_rule('operator', token(lexer.OPERATOR, S('~!.,:;+-*/<>=\\^|&%?()[]{}')))
-M._foldsymbols = {
- _patterns = {'%l+', '[{}]', '/%*', '%*/', '//'},
- [l.PREPROCESSOR] = {
- region = 1, endregion = -1,
- ['if'] = 1, ifdef = 1, ifndef = 1, endif = -1
- },
- [l.OPERATOR] = {['{'] = 1, ['}'] = -1},
- [l.COMMENT] = {['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//')}
-}
+-- Fold points.
+lex:add_fold_point(lexer.PREPROCESSOR, 'if', 'endif')
+lex:add_fold_point(lexer.PREPROCESSOR, 'ifdef', 'endif')
+lex:add_fold_point(lexer.PREPROCESSOR, 'ifndef', 'endif')
+lex:add_fold_point(lexer.PREPROCESSOR, 'region', 'endregion')
+lex:add_fold_point(lexer.OPERATOR, '{', '}')
+lex:add_fold_point(lexer.COMMENT, '/*', '*/')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('//'))
-return M
+return lex
diff --git a/lua/lexers/css.lua b/lua/lexers/css.lua
index 0fb88ec..de9fd96 100644
--- a/lua/lexers/css.lua
+++ b/lua/lexers/css.lua
@@ -1,166 +1,168 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- CSS LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'css'}
+local lex = lexer.new('css')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
+
+-- Properties.
+lex:add_rule('property', token('property', word_match{
+ -- CSS 1.
+ 'color', 'background-color', 'background-image', 'background-repeat', 'background-attachment',
+ 'background-position', 'background', 'font-family', 'font-style', 'font-variant', 'font-weight',
+ 'font-size', 'font', 'word-spacing', 'letter-spacing', 'text-decoration', 'vertical-align',
+ 'text-transform', 'text-align', 'text-indent', 'line-height', 'margin-top', 'margin-right',
+ 'margin-bottom', 'margin-left', 'margin', 'padding-top', 'padding-right', 'padding-bottom',
+ 'padding-left', 'padding', 'border-top-width', 'border-right-width', 'border-bottom-width',
+ 'border-left-width', 'border-width', 'border-top', 'border-right', 'border-bottom', 'border-left',
+ 'border', 'border-color', 'border-style', 'width', 'height', 'float', 'clear', 'display',
+ 'white-space', 'list-style-type', 'list-style-image', 'list-style-position', 'list-style',
+ -- CSS 2.
+ 'border-top-color', 'border-right-color', 'border-bottom-color', 'border-left-color',
+ 'border-color', 'border-top-style', 'border-right-style', 'border-bottom-style',
+ 'border-left-style', 'border-style', 'top', 'right', 'bottom', 'left', 'position', 'z-index',
+ 'direction', 'unicode-bidi', 'min-width', 'max-width', 'min-height', 'max-height', 'overflow',
+ 'clip', 'visibility', 'content', 'quotes', 'counter-reset', 'counter-increment', 'marker-offset',
+ 'size', 'marks', 'page-break-before', 'page-break-after', 'page-break-inside', 'page', 'orphans',
+ 'widows', 'font-stretch', 'font-size-adjust', 'unicode-range', 'units-per-em', 'src', 'panose-1',
+ 'stemv', 'stemh', 'slope', 'cap-height', 'x-height', 'ascent', 'descent', 'widths', 'bbox',
+ 'definition-src', 'baseline', 'centerline', 'mathline', 'topline', 'text-shadow', 'caption-side',
+ 'table-layout', 'border-collapse', 'border-spacing', 'empty-cells', 'speak-header', 'cursor',
+ 'outline', 'outline-width', 'outline-style', 'outline-color', 'volume', 'speak', 'pause-before',
+ 'pause-after', 'pause', 'cue-before', 'cue-after', 'cue', 'play-during', 'azimuth', 'elevation',
+ 'speech-rate', 'voice-family', 'pitch', 'pitch-range', 'stress', 'richness', 'speak-punctuation',
+ 'speak-numeral',
+ -- CSS 3.
+ 'flex', 'flex-basis', 'flex-direction', 'flex-flow', 'flex-grow', 'flex-shrink', 'flex-wrap',
+ 'align-content', 'align-items', 'align-self', 'justify-content', 'order', 'border-radius',
+ 'transition', 'transform', 'box-shadow', 'filter', 'opacity', 'resize', 'word-break', 'word-wrap',
+ 'box-sizing', 'animation', 'text-overflow'
+}))
+lex:add_style('property', lexer.styles.keyword)
+
+-- Values.
+lex:add_rule('value', token('value', word_match{
+ -- CSS 1.
+ 'auto', 'none', 'normal', 'italic', 'oblique', 'small-caps', 'bold', 'bolder', 'lighter',
+ 'xx-small', 'x-small', 'small', 'medium', 'large', 'x-large', 'xx-large', 'larger', 'smaller',
+ 'transparent', 'repeat', 'repeat-x', 'repeat-y', 'no-repeat', 'scroll', 'fixed', 'top', 'bottom',
+ 'left', 'center', 'right', 'justify', 'both', 'underline', 'overline', 'line-through', 'blink',
+ 'baseline', 'sub', 'super', 'text-top', 'middle', 'text-bottom', 'capitalize', 'uppercase',
+ 'lowercase', 'thin', 'medium', 'thick', 'dotted', 'dashed', 'solid', 'double', 'groove', 'ridge',
+ 'inset', 'outset', 'block', 'inline', 'list-item', 'pre', 'no-wrap', 'inside', 'outside', 'disc',
+ 'circle', 'square', 'decimal', 'lower-roman', 'upper-roman', 'lower-alpha', 'upper-alpha', 'aqua',
+ 'black', 'blue', 'fuchsia', 'gray', 'green', 'lime', 'maroon', 'navy', 'olive', 'purple', 'red',
+ 'silver', 'teal', 'white', 'yellow',
+ -- CSS 2.
+ 'inherit', 'run-in', 'compact', 'marker', 'table', 'inline-table', 'table-row-group',
+ 'table-header-group', 'table-footer-group', 'table-row', 'table-column-group', 'table-column',
+ 'table-cell', 'table-caption', 'static', 'relative', 'absolute', 'fixed', 'ltr', 'rtl', 'embed',
+ 'bidi-override', 'visible', 'hidden', 'scroll', 'collapse', 'open-quote', 'close-quote',
+ 'no-open-quote', 'no-close-quote', 'decimal-leading-zero', 'lower-greek', 'lower-latin',
+ 'upper-latin', 'hebrew', 'armenian', 'georgian', 'cjk-ideographic', 'hiragana', 'katakana',
+ 'hiragana-iroha', 'katakana-iroha', 'landscape', 'portrait', 'crop', 'cross', 'always', 'avoid',
+ 'wider', 'narrower', 'ultra-condensed', 'extra-condensed', 'condensed', 'semi-condensed',
+ 'semi-expanded', 'expanded', 'extra-expanded', 'ultra-expanded', 'caption', 'icon', 'menu',
+ 'message-box', 'small-caption', 'status-bar', 'separate', 'show', 'hide', 'once', 'crosshair',
+ 'default', 'pointer', 'move', 'text', 'wait', 'help', 'e-resize', 'ne-resize', 'nw-resize',
+ 'n-resize', 'se-resize', 'sw-resize', 's-resize', 'w-resize', 'ActiveBorder', 'ActiveCaption',
+ 'AppWorkspace', 'Background', 'ButtonFace', 'ButtonHighlight', 'ButtonShadow',
+ 'InactiveCaptionText', 'ButtonText', 'CaptionText', 'GrayText', 'Highlight', 'HighlightText',
+ 'InactiveBorder', 'InactiveCaption', 'InfoBackground', 'InfoText', 'Menu', 'MenuText',
+ 'Scrollbar', 'ThreeDDarkShadow', 'ThreeDFace', 'ThreeDHighlight', 'ThreeDLightShadow',
+ 'ThreeDShadow', 'Window', 'WindowFrame', 'WindowText', 'silent', 'x-soft', 'soft', 'medium',
+ 'loud', 'x-loud', 'spell-out', 'mix', 'left-side', 'far-left', 'center-left', 'center-right',
+ 'far-right', 'right-side', 'behind', 'leftwards', 'rightwards', 'below', 'level', 'above',
+ 'higher', 'lower', 'x-slow', 'slow', 'medium', 'fast', 'x-fast', 'faster', 'slower', 'male',
+ 'female', 'child', 'x-low', 'low', 'high', 'x-high', 'code', 'digits', 'continous',
+ -- CSS 3.
+ 'flex', 'row', 'column', 'ellipsis', 'inline-block'
+}))
+lex:add_style('value', lexer.styles.constant)
+
+-- Functions.
+lex:add_rule('function', token(lexer.FUNCTION, word_match{
+ 'attr', 'blackness', 'blend', 'blenda', 'blur', 'brightness', 'calc', 'circle', 'color-mod',
+ 'contrast', 'counter', 'cubic-bezier', 'device-cmyk', 'drop-shadow', 'ellipse', 'gray',
+ 'grayscale', 'hsl', 'hsla', 'hue', 'hue-rotate', 'hwb', 'image', 'inset', 'invert', 'lightness',
+ 'linear-gradient', 'matrix', 'matrix3d', 'opacity', 'perspective', 'polygon', 'radial-gradient',
+ 'rect', 'repeating-linear-gradient', 'repeating-radial-gradient', 'rgb', 'rgba', 'rotate',
+ 'rotate3d', 'rotateX', 'rotateY', 'rotateZ', 'saturate', 'saturation', 'scale', 'scale3d',
+ 'scaleX', 'scaleY', 'scaleZ', 'sepia', 'shade', 'skewX', 'skewY', 'steps', 'tint', 'toggle',
+ 'translate', 'translate3d', 'translateX', 'translateY', 'translateZ', 'url', 'whiteness', 'var'
+}))
--- Comments.
-local comment = token(l.COMMENT, '/*' * (l.any - '*/')^0 * P('*/')^-1)
+-- Colors.
+local xdigit = lexer.xdigit
+lex:add_rule('color', token('color', word_match{
+ 'aliceblue', 'antiquewhite', 'aqua', 'aquamarine', 'azure', 'beige', 'bisque', 'black',
+ 'blanchedalmond', 'blue', 'blueviolet', 'brown', 'burlywood', 'cadetblue', 'chartreuse',
+ 'chocolate', 'coral', 'cornflowerblue', 'cornsilk', 'crimson', 'cyan', 'darkblue', 'darkcyan',
+ 'darkgoldenrod', 'darkgray', 'darkgreen', 'darkgrey', 'darkkhaki', 'darkmagenta',
+ 'darkolivegreen', 'darkorange', 'darkorchid', 'darkred', 'darksalmon', 'darkseagreen',
+ 'darkslateblue', 'darkslategray', 'darkslategrey', 'darkturquoise', 'darkviolet', 'deeppink',
+ 'deepskyblue', 'dimgray', 'dimgrey', 'dodgerblue', 'firebrick', 'floralwhite', 'forestgreen',
+ 'fuchsia', 'gainsboro', 'ghostwhite', 'gold', 'goldenrod', 'gray', 'green', 'greenyellow', 'grey',
+ 'honeydew', 'hotpink', 'indianred', 'indigo', 'ivory', 'khaki', 'lavender', 'lavenderblush',
+ 'lawngreen', 'lemonchiffon', 'lightblue', 'lightcoral', 'lightcyan', 'lightgoldenrodyellow',
+ 'lightgray', 'lightgreen', 'lightgrey', 'lightpink', 'lightsalmon', 'lightseagreen',
+ 'lightskyblue', 'lightslategray', 'lightslategrey', 'lightsteelblue', 'lightyellow', 'lime',
+ 'limegreen', 'linen', 'magenta', 'maroon', 'mediumaquamarine', 'mediumblue', 'mediumorchid',
+ 'mediumpurple', 'mediumseagreen', 'mediumslateblue', 'mediumspringgreen', 'mediumturquoise',
+ 'mediumvioletred', 'midnightblue', 'mintcream', 'mistyrose', 'moccasin', 'navajowhite', 'navy',
+ 'oldlace', 'olive', 'olivedrab', 'orange', 'orangered', 'orchid', 'palegoldenrod', 'palegreen',
+ 'paleturquoise', 'palevioletred', 'papayawhip', 'peachpuff', 'peru', 'pink', 'plum', 'powderblue',
+ 'purple', 'rebeccapurple', 'red', 'rosybrown', 'royalblue', 'saddlebrown', 'salmon', 'sandybrown',
+ 'seagreen', 'seashell', 'sienna', 'silver', 'skyblue', 'slateblue', 'slategray', 'slategrey',
+ 'snow', 'springgreen', 'steelblue', 'tan', 'teal', 'thistle', 'tomato', 'transparent',
+ 'turquoise', 'violet', 'wheat', 'white', 'whitesmoke', 'yellow', 'yellowgreen'
+} + '#' * xdigit * xdigit * xdigit * (xdigit * xdigit * xdigit)^-1))
+lex:add_style('color', lexer.styles.number)
+
+-- Identifiers.
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.alpha * (lexer.alnum + S('_-'))^0))
+
+-- Pseudo classes and pseudo elements.
+lex:add_rule('pseudoclass', ':' * token('pseudoclass', word_match{
+ 'active', 'checked', 'disabled', 'empty', 'enabled', 'first-child', 'first-of-type', 'focus',
+ 'hover', 'in-range', 'invalid', 'lang', 'last-child', 'last-of-type', 'link', 'not', 'nth-child',
+ 'nth-last-child', 'nth-last-of-type', 'nth-of-type', 'only-of-type', 'only-child', 'optional',
+ 'out-of-range', 'read-only', 'read-write', 'required', 'root', 'target', 'valid', 'visited'
+}))
+lex:add_style('pseudoclass', lexer.styles.constant)
+lex:add_rule('pseudoelement', '::' *
+ token('pseudoelement', word_match('after before first-letter first-line selection')))
+lex:add_style('pseudoelement', lexer.styles.constant)
-- Strings.
-local sq_str = l.delimited_range("'")
-local dq_str = l.delimited_range('"')
-local string = token(l.STRING, sq_str + dq_str)
+local sq_str = lexer.range("'")
+local dq_str = lexer.range('"')
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
--- Numbers.
-local number = token(l.NUMBER, l.digit^1)
-
--- Keywords.
-local css1_property = word_match({
- 'color', 'background-color', 'background-image', 'background-repeat',
- 'background-attachment', 'background-position', 'background', 'font-family',
- 'font-style', 'font-variant', 'font-weight', 'font-size', 'font',
- 'word-spacing', 'letter-spacing', 'text-decoration', 'vertical-align',
- 'text-transform', 'text-align', 'text-indent', 'line-height', 'margin-top',
- 'margin-right', 'margin-bottom', 'margin-left', 'margin', 'padding-top',
- 'padding-right', 'padding-bottom', 'padding-left', 'padding',
- 'border-top-width', 'border-right-width', 'border-bottom-width',
- 'border-left-width', 'border-width', 'border-top', 'border-right',
- 'border-bottom', 'border-left', 'border', 'border-color', 'border-style',
- 'width', 'height', 'float', 'clear', 'display', 'white-space',
- 'list-style-type', 'list-style-image', 'list-style-position', 'list-style'
-}, '-')
-local css1_value = word_match({
- 'auto', 'none', 'normal', 'italic', 'oblique', 'small-caps', 'bold', 'bolder',
- 'lighter', 'xx-small', 'x-small', 'small', 'medium', 'large', 'x-large',
- 'xx-large', 'larger', 'smaller', 'transparent', 'repeat', 'repeat-x',
- 'repeat-y', 'no-repeat', 'scroll', 'fixed', 'top', 'bottom', 'left', 'center',
- 'right', 'justify', 'both', 'underline', 'overline', 'line-through', 'blink',
- 'baseline', 'sub', 'super', 'text-top', 'middle', 'text-bottom', 'capitalize',
- 'uppercase', 'lowercase', 'thin', 'medium', 'thick', 'dotted', 'dashed',
- 'solid', 'double', 'groove', 'ridge', 'inset', 'outset', 'block', 'inline',
- 'list-item', 'pre', 'no-wrap', 'inside', 'outside', 'disc', 'circle',
- 'square', 'decimal', 'lower-roman', 'upper-roman', 'lower-alpha',
- 'upper-alpha', 'aqua', 'black', 'blue', 'fuchsia', 'gray', 'green', 'lime',
- 'maroon', 'navy', 'olive', 'purple', 'red', 'silver', 'teal', 'white',
- 'yellow'
-}, '-')
-local css2_property = word_match({
- 'border-top-color', 'border-right-color', 'border-bottom-color',
- 'border-left-color', 'border-color', 'border-top-style', 'border-right-style',
- 'border-bottom-style', 'border-left-style', 'border-style', 'top', 'right',
- 'bottom', 'left', 'position', 'z-index', 'direction', 'unicode-bidi',
- 'min-width', 'max-width', 'min-height', 'max-height', 'overflow', 'clip',
- 'visibility', 'content', 'quotes', 'counter-reset', 'counter-increment',
- 'marker-offset', 'size', 'marks', 'page-break-before', 'page-break-after',
- 'page-break-inside', 'page', 'orphans', 'widows', 'font-stretch',
- 'font-size-adjust', 'unicode-range', 'units-per-em', 'src', 'panose-1',
- 'stemv', 'stemh', 'slope', 'cap-height', 'x-height', 'ascent', 'descent',
- 'widths', 'bbox', 'definition-src', 'baseline', 'centerline', 'mathline',
- 'topline', 'text-shadow', 'caption-side', 'table-layout', 'border-collapse',
- 'border-spacing', 'empty-cells', 'speak-header', 'cursor', 'outline',
- 'outline-width', 'outline-style', 'outline-color', 'volume', 'speak',
- 'pause-before', 'pause-after', 'pause', 'cue-before', 'cue-after', 'cue',
- 'play-during', 'azimuth', 'elevation', 'speech-rate', 'voice-family', 'pitch',
- 'pitch-range', 'stress', 'richness', 'speak-punctuation', 'speak-numeral'
-}, '-')
-local css2_value = word_match({
- 'inherit', 'run-in', 'compact', 'marker', 'table', 'inline-table',
- 'table-row-group', 'table-header-group', 'table-footer-group', 'table-row',
- 'table-column-group', 'table-column', 'table-cell', 'table-caption', 'static',
- 'relative', 'absolute', 'fixed', 'ltr', 'rtl', 'embed', 'bidi-override',
- 'visible', 'hidden', 'scroll', 'collapse', 'open-quote', 'close-quote',
- 'no-open-quote', 'no-close-quote', 'decimal-leading-zero', 'lower-greek',
- 'lower-latin', 'upper-latin', 'hebrew', 'armenian', 'georgian',
- 'cjk-ideographic', 'hiragana', 'katakana', 'hiragana-iroha', 'katakana-iroha',
- 'landscape', 'portrait', 'crop', 'cross', 'always', 'avoid', 'wider',
- 'narrower', 'ultra-condensed', 'extra-condensed', 'condensed',
- 'semi-condensed', 'semi-expanded', 'expanded', 'extra-expanded',
- 'ultra-expanded', 'caption', 'icon', 'menu', 'message-box', 'small-caption',
- 'status-bar', 'separate', 'show', 'hide', 'once', 'crosshair', 'default',
- 'pointer', 'move', 'text', 'wait', 'help', 'e-resize', 'ne-resize',
- 'nw-resize', 'n-resize', 'se-resize', 'sw-resize', 's-resize', 'w-resize',
- 'ActiveBorder', 'ActiveCaption', 'AppWorkspace', 'Background', 'ButtonFace',
- 'ButtonHighlight', 'ButtonShadow', 'InactiveCaptionText', 'ButtonText',
- 'CaptionText', 'GrayText', 'Highlight', 'HighlightText', 'InactiveBorder',
- 'InactiveCaption', 'InfoBackground', 'InfoText', 'Menu', 'MenuText',
- 'Scrollbar', 'ThreeDDarkShadow', 'ThreeDFace', 'ThreeDHighlight',
- 'ThreeDLightShadow', 'ThreeDShadow', 'Window', 'WindowFrame', 'WindowText',
- 'silent', 'x-soft', 'soft', 'medium', 'loud', 'x-loud', 'spell-out', 'mix',
- 'left-side', 'far-left', 'center-left', 'center-right', 'far-right',
- 'right-side', 'behind', 'leftwards', 'rightwards', 'below', 'level', 'above',
- 'higher', 'lower', 'x-slow', 'slow', 'medium', 'fast', 'x-fast', 'faster',
- 'slower', 'male', 'female', 'child', 'x-low', 'low', 'high', 'x-high', 'code',
- 'digits', 'continous'
-}, '-')
-local property = token(l.KEYWORD, css1_property + css2_property)
-local value = token('value', css1_value + css2_value)
-local keyword = property + value
+-- Comments.
+lex:add_rule('comment', token(lexer.COMMENT, lexer.range('/*', '*/')))
--- Identifiers.
-local identifier = token(l.IDENTIFIER, l.alpha * (l.alnum + S('_-'))^0)
+-- Numbers.
+local unit = token('unit', word_match(
+ 'ch cm deg dpcm dpi dppx em ex grad Hz in kHz mm ms pc pt px q rad rem s turn vh vmax vmin vw'))
+lex:add_style('unit', lexer.styles.number)
+lex:add_rule('number', token(lexer.NUMBER, lexer.dec_num) * unit^-1)
-- Operators.
-local operator = token(l.OPERATOR, S('~!#*>+=|.,:;()[]{}'))
+lex:add_rule('operator', token(lexer.OPERATOR, S('~!#*>+=|.,:;()[]{}')))
-- At rule.
-local at_rule = token('at_rule', P('@') * word_match{
- 'charset', 'font-face', 'media', 'page', 'import'
-})
+lex:add_rule('at_rule', token('at_rule', '@' *
+ word_match('charset font-face media page import namespace keyframes')))
+lex:add_style('at_rule', lexer.styles.preprocessor)
--- Colors.
-local xdigit = l.xdigit
-local hex_color = '#' * xdigit * xdigit * xdigit * (xdigit * xdigit * xdigit)^-1
-local color_name = word_match{
- 'aqua', 'black', 'blue', 'fuchsia', 'gray', 'green', 'lime', 'maroon', 'navy',
- 'olive', 'orange', 'purple', 'red', 'silver', 'teal', 'white', 'yellow'
-}
-local color = token('color', hex_color + color_name)
-
--- Pseudo.
-local pseudo = token(l.CONSTANT, word_match({
- -- Pseudo elements.
- 'first-line', 'first-letter', 'before', 'after',
- -- Pseudo classes.
- 'first-child', 'link', 'visited', 'hover', 'active', 'focus', 'lang',
-}, '-'))
-
--- Units.
-local unit = token('unit', word_match{
- 'em', 'ex', 'px', 'pt', 'pc', 'in', 'ft', 'mm', 'cm', 'kHz', 'Hz', 'deg',
- 'rad', 'grad', 'ms', 's'
-} + '%')
-
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'pseudo', pseudo},
- {'color', color},
- {'identifier', identifier},
- {'string', string},
- {'comment', comment},
- {'number', number * unit^-1},
- {'operator', operator},
- {'at_rule', at_rule},
-}
-
-M._tokenstyles = {
- unit = l.STYLE_LABEL,
- value = l.STYLE_CONSTANT,
- color = l.STYLE_NUMBER,
- at_rule = l.STYLE_PREPROCESSOR
-}
-
-M._foldsymbols = {
- _patterns = {'[{}]', '/%*', '%*/'},
- [l.OPERATOR] = {['{'] = 1, ['}'] = -1},
- [l.COMMENT] = {['/*'] = 1, ['*/'] = -1}
-}
-
-return M
+-- Fold points.
+lex:add_fold_point(lexer.OPERATOR, '{', '}')
+lex:add_fold_point(lexer.COMMENT, '/*', '*/')
+
+return lex
diff --git a/lua/lexers/cuda.lua b/lua/lexers/cuda.lua
index 6a2c5cf..91818ae 100644
--- a/lua/lexers/cuda.lua
+++ b/lua/lexers/cuda.lua
@@ -1,92 +1,69 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- CUDA LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
-local table = _G.table
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'cuda'}
+local lex = lexer.new('cuda', {inherit = lexer.load('cpp')})
-- Whitespace
-local ws = token(l.WHITESPACE, l.space^1)
+lex:modify_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- '__global__', '__host__', '__device__', '__constant__', '__shared__'
-})
+local keyword = token(lexer.KEYWORD,
+ word_match('__global__ __host__ __device__ __constant__ __shared__'))
+lex:modify_rule('keyword', keyword + lex:get_rule('keyword'))
+
+-- Types.
+lex:modify_rule('type', token(lexer.TYPE, word_match{
+ 'uint', 'int1', 'uint1', 'int2', 'uint2', 'int3', 'uint3', 'int4', 'uint4', 'float1', 'float2',
+ 'float3', 'float4', 'char1', 'char2', 'char3', 'char4', 'uchar1', 'uchar2', 'uchar3', 'uchar4',
+ 'short1', 'short2', 'short3', 'short4', 'dim1', 'dim2', 'dim3', 'dim4'
+}) + lex:get_rule('type') +
-- Functions.
-local func = token(l.FUNCTION, word_match{
+token(lexer.FUNCTION, word_match{
-- Atom.
- 'atomicAdd', 'atomicAnd', 'atomicCAS', 'atomicDec', 'atomicExch', 'atomicInc',
- 'atomicMax', 'atomicMin', 'atomicOr', 'atomicSub', 'atomicXor',
+ 'atomicAdd', 'atomicAnd', 'atomicCAS', 'atomicDec', 'atomicExch', 'atomicInc', 'atomicMax',
+ 'atomicMin', 'atomicOr', 'atomicSub', 'atomicXor', --
-- Dev.
- 'tex1D', 'tex1Dfetch', 'tex2D', '__float_as_int', '__int_as_float',
- '__float2int_rn', '__float2int_rz', '__float2int_ru', '__float2int_rd',
- '__float2uint_rn', '__float2uint_rz', '__float2uint_ru', '__float2uint_rd',
- '__int2float_rn', '__int2float_rz', '__int2float_ru', '__int2float_rd',
- '__uint2float_rn', '__uint2float_rz', '__uint2float_ru', '__uint2float_rd',
- '__fadd_rz', '__fmul_rz', '__fdividef', '__mul24', '__umul24', '__mulhi',
- '__umulhi', '__mul64hi', '__umul64hi', 'min', 'umin', 'fminf', 'fmin', 'max',
- 'umax', 'fmaxf', 'fmax', 'abs', 'fabsf', 'fabs', 'sqrtf', 'sqrt', 'sinf',
- '__sinf', 'sin', 'cosf', '__cosf', 'cos', 'sincosf', '__sincosf', 'expf',
- '__expf', 'exp', 'logf', '__logf', 'log',
+ 'tex1D', 'tex1Dfetch', 'tex2D', '__float_as_int', '__int_as_float', '__float2int_rn',
+ '__float2int_rz', '__float2int_ru', '__float2int_rd', '__float2uint_rn', '__float2uint_rz',
+ '__float2uint_ru', '__float2uint_rd', '__int2float_rn', '__int2float_rz', '__int2float_ru',
+ '__int2float_rd', '__uint2float_rn', '__uint2float_rz', '__uint2float_ru', '__uint2float_rd',
+ '__fadd_rz', '__fmul_rz', '__fdividef', '__mul24', '__umul24', '__mulhi', '__umulhi', '__mul64hi',
+ '__umul64hi', 'min', 'umin', 'fminf', 'fmin', 'max', 'umax', 'fmaxf', 'fmax', 'abs', 'fabsf',
+ 'fabs', 'sqrtf', 'sqrt', 'sinf', '__sinf', 'sin', 'cosf', '__cosf', 'cos', 'sincosf', '__sincosf',
+ 'expf', '__expf', 'exp', 'logf', '__logf', 'log', --
-- Runtime.
- 'cudaBindTexture', 'cudaBindTextureToArray', 'cudaChooseDevice',
- 'cudaConfigureCall', 'cudaCreateChannelDesc', 'cudaD3D10GetDevice',
- 'cudaD3D10MapResources', 'cudaD3D10RegisterResource',
- 'cudaD3D10ResourceGetMappedArray', 'cudaD3D10ResourceGetMappedPitch',
+ 'cudaBindTexture', 'cudaBindTextureToArray', 'cudaChooseDevice', 'cudaConfigureCall',
+ 'cudaCreateChannelDesc', 'cudaD3D10GetDevice', 'cudaD3D10MapResources',
+ 'cudaD3D10RegisterResource', 'cudaD3D10ResourceGetMappedArray', 'cudaD3D10ResourceGetMappedPitch',
'cudaD3D10ResourceGetMappedPointer', 'cudaD3D10ResourceGetMappedSize',
'cudaD3D10ResourceGetSurfaceDimensions', 'cudaD3D10ResourceSetMapFlags',
- 'cudaD3D10SetDirect3DDevice', 'cudaD3D10UnmapResources',
- 'cudaD3D10UnregisterResource', 'cudaD3D9GetDevice',
- 'cudaD3D9GetDirect3DDevice', 'cudaD3D9MapResources',
- 'cudaD3D9RegisterResource', 'cudaD3D9ResourceGetMappedArray',
- 'cudaD3D9ResourceGetMappedPitch', 'cudaD3D9ResourceGetMappedPointer',
- 'cudaD3D9ResourceGetMappedSize', 'cudaD3D9ResourceGetSurfaceDimensions',
- 'cudaD3D9ResourceSetMapFlags', 'cudaD3D9SetDirect3DDevice',
- 'cudaD3D9UnmapResources', 'cudaD3D9UnregisterResource', 'cudaEventCreate',
- 'cudaEventDestroy', 'cudaEventElapsedTime', 'cudaEventQuery',
- 'cudaEventRecord', 'cudaEventSynchronize', 'cudaFree', 'cudaFreeArray',
- 'cudaFreeHost', 'cudaGetChannelDesc', 'cudaGetDevice', 'cudaGetDeviceCount',
- 'cudaGetDeviceProperties', 'cudaGetErrorString', 'cudaGetLastError',
- 'cudaGetSymbolAddress', 'cudaGetSymbolSize', 'cudaGetTextureAlignmentOffset',
- 'cudaGetTextureReference', 'cudaGLMapBufferObject',
+ 'cudaD3D10SetDirect3DDevice', 'cudaD3D10UnmapResources', 'cudaD3D10UnregisterResource',
+ 'cudaD3D9GetDevice', 'cudaD3D9GetDirect3DDevice', 'cudaD3D9MapResources',
+ 'cudaD3D9RegisterResource', 'cudaD3D9ResourceGetMappedArray', 'cudaD3D9ResourceGetMappedPitch',
+ 'cudaD3D9ResourceGetMappedPointer', 'cudaD3D9ResourceGetMappedSize',
+ 'cudaD3D9ResourceGetSurfaceDimensions', 'cudaD3D9ResourceSetMapFlags',
+ 'cudaD3D9SetDirect3DDevice', 'cudaD3D9UnmapResources', 'cudaD3D9UnregisterResource',
+ 'cudaEventCreate', 'cudaEventDestroy', 'cudaEventElapsedTime', 'cudaEventQuery',
+ 'cudaEventRecord', 'cudaEventSynchronize', 'cudaFree', 'cudaFreeArray', 'cudaFreeHost',
+ 'cudaGetChannelDesc', 'cudaGetDevice', 'cudaGetDeviceCount', 'cudaGetDeviceProperties',
+ 'cudaGetErrorString', 'cudaGetLastError', 'cudaGetSymbolAddress', 'cudaGetSymbolSize',
+ 'cudaGetTextureAlignmentOffset', 'cudaGetTextureReference', 'cudaGLMapBufferObject',
'cudaGLRegisterBufferObject', 'cudaGLSetGLDevice', 'cudaGLUnmapBufferObject',
- 'cudaGLUnregisterBufferObject', 'cudaLaunch', 'cudaMalloc', 'cudaMalloc3D',
- 'cudaMalloc3DArray', 'cudaMallocArray', 'cudaMallocHost', 'cudaMallocPitch',
- 'cudaMemcpy', 'cudaMemcpy2D', 'cudaMemcpy2DArrayToArray',
- 'cudaMemcpy2DFromArray', 'cudaMemcpy2DToArray', 'cudaMemcpy3D',
- 'cudaMemcpyArrayToArray', 'cudaMemcpyFromArray', 'cudaMemcpyFromSymbol',
- 'cudaMemcpyToArray', 'cudaMemcpyToSymbol', 'cudaMemset', 'cudaMemset2D',
- 'cudaMemset3D', 'cudaSetDevice', 'cudaSetupArgument', 'cudaStreamCreate',
- 'cudaStreamDestroy', 'cudaStreamQuery', 'cudaStreamSynchronize',
- 'cudaThreadExit', 'cudaThreadSynchronize', 'cudaUnbindTexture'
-})
-
--- Types.
-local type = token(l.TYPE, word_match{
- 'uint', 'int1', 'uint1', 'int2', 'uint2', 'int3', 'uint3', 'int4', 'uint4',
- 'float1', 'float2', 'float3', 'float4', 'char1', 'char2', 'char3', 'char4',
- 'uchar1', 'uchar2', 'uchar3', 'uchar4', 'short1', 'short2', 'short3',
- 'short4', 'dim1', 'dim2', 'dim3', 'dim4'
-})
+ 'cudaGLUnregisterBufferObject', 'cudaLaunch', 'cudaMalloc', 'cudaMalloc3D', 'cudaMalloc3DArray',
+ 'cudaMallocArray', 'cudaMallocHost', 'cudaMallocPitch', 'cudaMemcpy', 'cudaMemcpy2D',
+ 'cudaMemcpy2DArrayToArray', 'cudaMemcpy2DFromArray', 'cudaMemcpy2DToArray', 'cudaMemcpy3D',
+ 'cudaMemcpyArrayToArray', 'cudaMemcpyFromArray', 'cudaMemcpyFromSymbol', 'cudaMemcpyToArray',
+ 'cudaMemcpyToSymbol', 'cudaMemset', 'cudaMemset2D', 'cudaMemset3D', 'cudaSetDevice',
+ 'cudaSetupArgument', 'cudaStreamCreate', 'cudaStreamDestroy', 'cudaStreamQuery',
+ 'cudaStreamSynchronize', 'cudaThreadExit', 'cudaThreadSynchronize', 'cudaUnbindTexture'
+}) +
-- Variables.
-local variable = token(l.VARIABLE, word_match{
- 'gridDim', 'blockIdx', 'blockDim', 'threadIdx'
-})
-
--- Extend cpp lexer to include CUDA elements.
-local cpp = l.load('cpp')
-local _rules = cpp._rules
-_rules[1] = {'whitespace', ws}
-table.insert(_rules, 2, {'cuda_keyword', keyword})
-table.insert(_rules, 3, {'cuda_function', func})
-table.insert(_rules, 4, {'cuda_type', type})
-table.insert(_rules, 5, {'cuda_variable', variable})
-M._rules = _rules
-M._foldsymbols = cpp._foldsymbols
+token(lexer.VARIABLE, word_match('gridDim blockIdx blockDim threadIdx')))
-return M
+return lex
diff --git a/lua/lexers/dart.lua b/lua/lexers/dart.lua
index b5964e2..ca253ff 100644
--- a/lua/lexers/dart.lua
+++ b/lua/lexers/dart.lua
@@ -1,77 +1,56 @@
--- Copyright 2013-2017 Brian Schott (@Hackerpilot on Github). See LICENSE.
+-- Copyright 2013-2022 Mitchell. See LICENSE.
-- Dart LPeg lexer.
+-- Written by Brian Schott (@Hackerpilot on Github).
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'dart'}
+local lex = lexer.new('dart')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local line_comment = '//' * l.nonnewline_esc^0
-local nested_comment = l.nested_pair('/*', '*/')
-local comment = token(l.COMMENT, line_comment + nested_comment)
-
--- Strings.
-local sq_str = S('r')^-1 * l.delimited_range("'", true)
-local dq_str = S('r')^-1 * l.delimited_range('"', true)
-local sq_str_multiline = S('r')^-1 * l.delimited_range('"""')
-local dq_str_multiline = S('r')^-1 * l.delimited_range("''' ")
-local string = token(l.STRING,
- sq_str + dq_str + sq_str_multiline + dq_str_multiline)
-
--- Numbers.
-local number = token(l.NUMBER, (l.float + l.hex_num))
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- 'assert', 'break', 'case', 'catch', 'class', 'const', 'continue', 'default',
- 'do', 'else', 'enum', 'extends', 'false', 'final' , 'finally', 'for', 'if',
- 'in', 'is', 'new', 'null', 'rethrow', 'return', 'super', 'switch', 'this',
- 'throw', 'true', 'try', 'var', 'void', 'while', 'with',
-})
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'assert', 'break', 'case', 'catch', 'class', 'const', 'continue', 'default', 'do', 'else', 'enum',
+ 'extends', 'false', 'final', 'finally', 'for', 'if', 'in', 'is', 'new', 'null', 'rethrow',
+ 'return', 'super', 'switch', 'this', 'throw', 'true', 'try', 'var', 'void', 'while', 'with'
+}))
+
+-- Built-ins.
+lex:add_rule('builtin', token(lexer.CONSTANT, word_match{
+ 'abstract', 'as', 'dynamic', 'export', 'external', 'factory', 'get', 'implements', 'import',
+ 'library', 'operator', 'part', 'set', 'static', 'typedef'
+}))
-local builtin_identifiers = token(l.CONSTANT, word_match{
- 'abstract', 'as', 'dynamic', 'export', 'external', 'factory', 'get',
- 'implements', 'import', 'library', 'operator', 'part', 'set', 'static',
- 'typedef'
-})
+-- Strings.
+local sq_str = S('r')^-1 * lexer.range("'", true)
+local dq_str = S('r')^-1 * lexer.range('"', true)
+local tq_str = S('r')^-1 * (lexer.range("'''") + lexer.range('"""'))
+lex:add_rule('string', token(lexer.STRING, tq_str + sq_str + dq_str))
-- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
--- Operators.
-local operator = token(l.OPERATOR, S('#?=!<>+-*$/%&|^~.,;()[]{}'))
+-- Comments.
+local line_comment = lexer.to_eol('//', true)
+local block_comment = lexer.range('/*', '*/', false, false, true)
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
--- Preprocs.
-local annotation = token('annotation', '@' * l.word^1)
+-- Numbers.
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'constant', builtin_identifiers},
- {'string', string},
- {'identifier', identifier},
- {'comment', comment},
- {'number', number},
- {'operator', operator},
- {'annotation', annotation},
-}
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S('#?=!<>+-*$/%&|^~.,;()[]{}')))
-M._tokenstyles = {
- annotation = l.STYLE_PREPROCESSOR,
-}
+-- Annotations.
+lex:add_rule('annotation', token('annotation', '@' * lexer.word^1))
+lex:add_style('annotation', lexer.styles.preprocessor)
-M._foldsymbols = {
- _patterns = {'[{}]', '/[*+]', '[*+]/', '//'},
- [l.OPERATOR] = {['{'] = 1, ['}'] = -1},
- [l.COMMENT] = {
- ['/*'] = 1, ['*/'] = -1, ['/+'] = 1, ['+/'] = -1,
- ['//'] = l.fold_line_comments('//')
- }
-}
+-- Fold points.
+lex:add_fold_point(lexer.OPERATOR, '{', '}')
+lex:add_fold_point(lexer.COMMENT, '/*', '*/')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('//'))
-return M
+return lex
diff --git a/lua/lexers/desktop.lua b/lua/lexers/desktop.lua
index d6d40f5..2c824e0 100644
--- a/lua/lexers/desktop.lua
+++ b/lua/lexers/desktop.lua
@@ -1,62 +1,53 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- Desktop Entry LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'desktop'}
+local lex = lexer.new('desktop')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
--- Comments.
-local comment = token(l.COMMENT, '#' * l.nonnewline^0)
+-- Keys.
+lex:add_rule('key', token('key', word_match{
+ 'Type', 'Version', 'Name', 'GenericName', 'NoDisplay', 'Comment', 'Icon', 'Hidden', 'OnlyShowIn',
+ 'NotShowIn', 'TryExec', 'Exec', 'Exec', 'Path', 'Terminal', 'MimeType', 'Categories',
+ 'StartupNotify', 'StartupWMClass', 'URL'
+}))
+lex:add_style('key', lexer.styles.keyword)
--- Strings.
-local string = token(l.STRING, l.delimited_range('"'))
+-- Values.
+lex:add_rule('value', token('value', word_match('true false')))
+lex:add_style('value', lexer.styles.constant)
+
+-- Identifiers.
+lex:add_rule('identifier', lexer.token(lexer.IDENTIFIER, lexer.alpha * (lexer.alnum + S('_-'))^0))
-- Group headers.
-local group_header = l.starts_line(token(l.STRING,
- l.delimited_range('[]', false, true)))
+local bracketed = lexer.range('[', ']')
+lex:add_rule('header', lexer.starts_line(token('header', bracketed)))
+lex:add_style('header', lexer.styles.label)
--- Numbers.
-local number = token(l.NUMBER, (l.float + l.integer))
+-- Locales.
+lex:add_rule('locale', token('locale', bracketed))
+lex:add_style('locale', lexer.styles.class)
--- Keywords.
-local keyword = token(l.KEYWORD, word_match{'true', 'false'})
+-- Strings.
+lex:add_rule('string', token(lexer.STRING, lexer.range('"')))
--- Locales.
-local locale = token(l.CLASS, l.delimited_range('[]', false, true))
+-- Comments.
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#')))
--- Keys.
-local key = token(l.VARIABLE, word_match{
- 'Type', 'Version', 'Name', 'GenericName', 'NoDisplay', 'Comment', 'Icon',
- 'Hidden', 'OnlyShowIn', 'NotShowIn', 'TryExec', 'Exec', 'Exec', 'Path',
- 'Terminal', 'MimeType', 'Categories', 'StartupNotify', 'StartupWMClass', 'URL'
-})
+-- Numbers.
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-- Field codes.
-local code = l.token(l.CONSTANT, P('%') * S('fFuUdDnNickvm'))
-
--- Identifiers.
-local identifier = l.token(l.IDENTIFIER, l.alpha * (l.alnum + S('_-'))^0)
+lex:add_rule('code', lexer.token('code', '%' * S('fFuUdDnNickvm')))
+lex:add_style('code', lexer.styles.variable)
-- Operators.
-local operator = token(l.OPERATOR, S('='))
-
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'key', key},
- {'identifier', identifier},
- {'group_header', group_header},
- {'locale', locale},
- {'string', string},
- {'comment', comment},
- {'number', number},
- {'code', code},
- {'operator', operator},
-}
-
-return M
+lex:add_rule('operator', token(lexer.OPERATOR, S('=')))
+
+return lex
diff --git a/lua/lexers/diff.lua b/lua/lexers/diff.lua
index cc7ab78..4c87dc2 100644
--- a/lua/lexers/diff.lua
+++ b/lua/lexers/diff.lua
@@ -1,44 +1,29 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- Diff LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'diff'}
+local lex = lexer.new('diff', {lex_by_line = true})
-- Text, separators, and file headers.
-local index = token(l.COMMENT, 'Index: ' * l.any^0 * P(-1))
-local separator = token(l.COMMENT, ('---' + P('*')^4 + P('=')^1) * l.space^0 *
- -1)
-local header = token('header', (P('*** ') + '--- ' + '+++ ') * l.any^1)
+lex:add_rule('index', token(lexer.COMMENT, 'Index: ' * lexer.any^0 * -1))
+lex:add_rule('separator', token(lexer.COMMENT, ('---' + P('*')^4 + P('=')^1) * lexer.space^0 * -1))
+lex:add_rule('header', token('header', (P('*** ') + '--- ' + '+++ ') * lexer.any^1))
+lex:add_style('header', lexer.styles.comment)
-- Location.
-local location = token(l.NUMBER, ('@@' + l.digit^1 + '****') * l.any^1)
+lex:add_rule('location', token(lexer.NUMBER, ('@@' + lexer.dec_num + '****') * lexer.any^1))
-- Additions, deletions, and changes.
-local addition = token('addition', S('>+') * l.any^0)
-local deletion = token('deletion', S('<-') * l.any^0)
-local change = token('change', '! ' * l.any^0)
+lex:add_rule('addition', token('addition', S('>+') * lexer.any^0))
+lex:add_style('addition', {fore = lexer.colors.green})
+lex:add_rule('deletion', token('deletion', S('<-') * lexer.any^0))
+lex:add_style('deletion', {fore = lexer.colors.red})
+lex:add_rule('change', token('change', '!' * lexer.any^0))
+lex:add_style('change', {fore = lexer.colors.yellow})
-M._rules = {
- {'index', index},
- {'separator', separator},
- {'header', header},
- {'location', location},
- {'addition', addition},
- {'deletion', deletion},
- {'change', change},
- {'any_line', token('default', l.any^1)},
-}
+lex:add_rule('any_line', token(lexer.DEFAULT, lexer.any^1))
-M._tokenstyles = {
- header = l.STYLE_COMMENT,
- addition = 'fore:green',
- deletion = 'fore:red',
- change = 'fore:yellow',
-}
-
-M._LEXBYLINE = true
-
-return M
+return lex
diff --git a/lua/lexers/django.lua b/lua/lexers/django.lua
index a671006..2147853 100644
--- a/lua/lexers/django.lua
+++ b/lua/lexers/django.lua
@@ -1,77 +1,55 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- Django LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'django'}
+local lex = lexer.new('django')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local comment = token(l.COMMENT, '{#' * (l.any - l.newline - '#}')^0 *
- P('#}')^-1)
-
--- Strings.
-local string = token(l.STRING, l.delimited_range('"', false, true))
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- 'as', 'block', 'blocktrans', 'by', 'endblock', 'endblocktrans', 'comment',
- 'endcomment', 'cycle', 'date', 'debug', 'else', 'extends', 'filter',
- 'endfilter', 'firstof', 'for', 'endfor', 'if', 'endif', 'ifchanged',
- 'endifchanged', 'ifnotequal', 'endifnotequal', 'in', 'load', 'not', 'now',
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'as', 'block', 'blocktrans', 'by', 'endblock', 'endblocktrans', 'comment', 'endcomment', 'cycle',
+ 'date', 'debug', 'else', 'extends', 'filter', 'endfilter', 'firstof', 'for', 'endfor', 'if',
+ 'endif', 'ifchanged', 'endifchanged', 'ifnotequal', 'endifnotequal', 'in', 'load', 'not', 'now',
'or', 'parsed', 'regroup', 'ssi', 'trans', 'with', 'widthratio'
-})
+}))
-- Functions.
-local func = token(l.FUNCTION, word_match{
- 'add', 'addslashes', 'capfirst', 'center', 'cut', 'date', 'default',
- 'dictsort', 'dictsortreversed', 'divisibleby', 'escape', 'filesizeformat',
- 'first', 'fix_ampersands', 'floatformat', 'get_digit', 'join', 'length',
- 'length_is', 'linebreaks', 'linebreaksbr', 'linenumbers', 'ljust', 'lower',
- 'make_list', 'phone2numeric', 'pluralize', 'pprint', 'random', 'removetags',
- 'rjust', 'slice', 'slugify', 'stringformat', 'striptags', 'time', 'timesince',
- 'title', 'truncatewords', 'unordered_list', 'upper', 'urlencode', 'urlize',
- 'urlizetrunc', 'wordcount', 'wordwrap', 'yesno',
-})
+lex:add_rule('function', token(lexer.FUNCTION, word_match{
+ 'add', 'addslashes', 'capfirst', 'center', 'cut', 'date', 'default', 'dictsort',
+ 'dictsortreversed', 'divisibleby', 'escape', 'filesizeformat', 'first', 'fix_ampersands',
+ 'floatformat', 'get_digit', 'join', 'length', 'length_is', 'linebreaks', 'linebreaksbr',
+ 'linenumbers', 'ljust', 'lower', 'make_list', 'phone2numeric', 'pluralize', 'pprint', 'random',
+ 'removetags', 'rjust', 'slice', 'slugify', 'stringformat', 'striptags', 'time', 'timesince',
+ 'title', 'truncatewords', 'unordered_list', 'upper', 'urlencode', 'urlize', 'urlizetrunc',
+ 'wordcount', 'wordwrap', 'yesno'
+}))
-- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
-
--- Operators.
-local operator = token(l.OPERATOR, S(':,.|'))
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'function', func},
- {'identifier', identifier},
- {'string', string},
- {'comment', comment},
- {'operator', operator},
-}
+-- Strings.
+lex:add_rule('string', token(lexer.STRING, lexer.range('"', false, false)))
--- Embedded in HTML.
-local html = l.load('html')
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S(':,.|')))
--- Embedded Django.
+-- Embed Django in HTML.
+local html = lexer.load('html')
+local html_comment = lexer.range('<!--', '-->')
+local django_comment = lexer.range('{#', '#}', true)
+html:modify_rule('comment', token(lexer.COMMENT, html_comment + django_comment))
local django_start_rule = token('django_tag', '{' * S('{%'))
local django_end_rule = token('django_tag', S('%}') * '}')
-l.embed_lexer(html, M, django_start_rule, django_end_rule)
--- Modify HTML patterns to embed Django.
-html._RULES['comment'] = html._RULES['comment'] + comment
-
-M._tokenstyles = {
- django_tag = l.STYLE_EMBEDDED
-}
+html:embed(lex, django_start_rule, django_end_rule)
+lex:add_style('django_tag', lexer.styles.embedded)
-local _foldsymbols = html._foldsymbols
-_foldsymbols._patterns[#_foldsymbols._patterns + 1] = '{[%%{]'
-_foldsymbols._patterns[#_foldsymbols._patterns + 1] = '[%%}]}'
-_foldsymbols.django_tag = {['{{'] = 1, ['}}'] = -1, ['{%'] = 1, ['%}'] = -1}
-M._foldsymbols = _foldsymbols
+-- Fold points.
+lex:add_fold_point('django_tag', '{{', '}}')
+lex:add_fold_point('django_tag', '{%', '%}')
-return M
+return lex
diff --git a/lua/lexers/dmd.lua b/lua/lexers/dmd.lua
index ab751e2..9fcacb5 100644
--- a/lua/lexers/dmd.lua
+++ b/lua/lexers/dmd.lua
@@ -1,176 +1,142 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- D LPeg lexer.
-- Heavily modified by Brian Schott (@Hackerpilot on Github).
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'dmd'}
+local lex = lexer.new('dmd')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
+local ws = token(lexer.WHITESPACE, lexer.space^1)
+lex:add_rule('whitespace', ws)
+
+-- Class names.
+lex:add_rule('class',
+ token(lexer.TYPE, P('class') + 'struct') * ws^-1 * token(lexer.CLASS, lexer.word))
+
+-- Versions.
+local version = word_match{
+ 'AArch64', 'AIX', 'all', 'Alpha', 'Alpha_HardFloat', 'Alpha_SoftFloat', 'Android', 'ARM',
+ 'ARM_HardFloat', 'ARM_SoftFloat', 'ARM_SoftFP', 'ARM_Thumb', 'assert', 'BigEndian', 'BSD',
+ 'Cygwin', 'D_Coverage', 'D_Ddoc', 'D_HardFloat', 'DigitalMars', 'D_InlineAsm_X86',
+ 'D_InlineAsm_X86_64', 'D_LP64', 'D_NoBoundsChecks', 'D_PIC', 'DragonFlyBSD', 'D_SIMD',
+ 'D_SoftFloat', 'D_Version2', 'D_X32', 'FreeBSD', 'GNU', 'Haiku', 'HPPA', 'HPPA64', 'Hurd', 'IA64',
+ 'LDC', 'linux', 'LittleEndian', 'MIPS32', 'MIPS64', 'MIPS_EABI', 'MIPS_HardFloat', 'MIPS_N32',
+ 'MIPS_N64', 'MIPS_O32', 'MIPS_O64', 'MIPS_SoftFloat', 'NetBSD', 'none', 'OpenBSD', 'OSX', 'Posix',
+ 'PPC', 'PPC64', 'PPC_HardFloat', 'PPC_SoftFloat', 'S390', 'S390X', 'SDC', 'SH', 'SH64', 'SkyOS',
+ 'Solaris', 'SPARC', 'SPARC64', 'SPARC_HardFloat', 'SPARC_SoftFloat', 'SPARC_V8Plus', 'SysV3',
+ 'SysV4', 'unittest', 'Win32', 'Win64', 'Windows', 'X86', 'X86_64'
+}
+local open_paren = token(lexer.OPERATOR, '(')
+lex:add_rule('version', token(lexer.KEYWORD, 'version') * ws^-1 * open_paren * ws^-1 *
+ token('versions', version))
+lex:add_style('versions', lexer.styles.constant)
--- Comments.
-local line_comment = '//' * l.nonnewline_esc^0
-local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1
-local nested_comment = l.nested_pair('/+', '+/')
-local comment = token(l.COMMENT, line_comment + block_comment + nested_comment)
+-- Scopes.
+local scope = word_match('exit success failure')
+lex:add_rule('scope',
+ token(lexer.KEYWORD, 'scope') * ws^-1 * open_paren * ws^-1 * token('scopes', scope))
+lex:add_style('scopes', lexer.styles.constant)
--- Strings.
-local sq_str = l.delimited_range("'", true) * S('cwd')^-1
-local dq_str = l.delimited_range('"') * S('cwd')^-1
-local lit_str = 'r' * l.delimited_range('"', false, true) * S('cwd')^-1
-local bt_str = l.delimited_range('`', false, true) * S('cwd')^-1
-local hex_str = 'x' * l.delimited_range('"') * S('cwd')^-1
-local other_hex_str = '\\x' * (l.xdigit * l.xdigit)^1
-local del_str = l.nested_pair('q"[', ']"') * S('cwd')^-1 +
- l.nested_pair('q"(', ')"') * S('cwd')^-1 +
- l.nested_pair('q"{', '}"') * S('cwd')^-1 +
- l.nested_pair('q"<', '>"') * S('cwd')^-1 +
- P('q') * l.nested_pair('{', '}') * S('cwd')^-1
-local string = token(l.STRING, del_str + sq_str + dq_str + lit_str + bt_str +
- hex_str + other_hex_str)
+-- Traits.
+local trait = word_match{
+ 'allMembers', 'classInstanceSize', 'compiles', 'derivedMembers', 'getAttributes', 'getMember',
+ 'getOverloads', 'getProtection', 'getUnitTests', 'getVirtualFunctions', 'getVirtualIndex',
+ 'getVirtualMethods', 'hasMember', 'identifier', 'isAbstractClass', 'isAbstractFunction',
+ 'isArithmetic', 'isAssociativeArray', 'isFinalClass', 'isFinalFunction', 'isFloating',
+ 'isIntegral', 'isLazy', 'isNested', 'isOut', 'isOverrideFunction', 'isPOD', 'isRef', 'isSame',
+ 'isScalar', 'isStaticArray', 'isStaticFunction', 'isUnsigned', 'isVirtualFunction',
+ 'isVirtualMethod', 'parent'
+}
+lex:add_rule('trait',
+ token(lexer.KEYWORD, '__traits') * ws^-1 * open_paren * ws^-1 * token('traits', trait))
+lex:add_style('traits', {fore = lexer.colors.yellow})
--- Numbers.
-local dec = l.digit^1 * ('_' * l.digit^1)^0
-local hex_num = l.hex_num * ('_' * l.xdigit^1)^0
-local bin_num = '0' * S('bB') * S('01_')^1
-local oct_num = '0' * S('01234567_')^1
-local integer = S('+-')^-1 * (hex_num + oct_num + bin_num + dec)
-local number = token(l.NUMBER, (l.float + integer) * S('uUlLdDfFi')^-1)
+-- Function names.
+lex:add_rule('function',
+ token(lexer.FUNCTION, lexer.word) * #(ws^-1 * ('!' * lexer.word^-1 * ws^-1)^-1 * '('))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- 'abstract', 'align', 'asm', 'assert', 'auto', 'body', 'break', 'case', 'cast',
- 'catch', 'const', 'continue', 'debug', 'default', 'delete',
- 'deprecated', 'do', 'else', 'extern', 'export', 'false', 'final', 'finally',
- 'for', 'foreach', 'foreach_reverse', 'goto', 'if', 'import', 'immutable',
- 'in', 'inout', 'invariant', 'is', 'lazy', 'macro', 'mixin', 'new', 'nothrow',
- 'null', 'out', 'override', 'pragma', 'private', 'protected', 'public', 'pure',
- 'ref', 'return', 'scope', 'shared', 'static', 'super', 'switch',
- 'synchronized', 'this', 'throw','true', 'try', 'typeid', 'typeof', 'unittest',
- 'version', 'virtual', 'volatile', 'while', 'with', '__gshared', '__thread',
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'abstract', 'align', 'asm', 'assert', 'auto', 'body', 'break', 'case', 'cast', 'catch', 'const',
+ 'continue', 'debug', 'default', 'delete', 'deprecated', 'do', 'else', 'extern', 'export', 'false',
+ 'final', 'finally', 'for', 'foreach', 'foreach_reverse', 'goto', 'if', 'import', 'immutable',
+ 'in', 'inout', 'invariant', 'is', 'lazy', 'macro', 'mixin', 'new', 'nothrow', 'null', 'out',
+ 'override', 'pragma', 'private', 'protected', 'public', 'pure', 'ref', 'return', 'scope',
+ 'shared', 'static', 'super', 'switch', 'synchronized', 'this', 'throwtrue', 'try', 'typeid',
+ 'typeof', 'unittest', 'version', 'virtual', 'volatile', 'while', 'with', '__gshared', '__thread',
'__traits', '__vector', '__parameters'
-})
+}))
-- Types.
-local type = token(l.TYPE, word_match{
- 'alias', 'bool', 'byte', 'cdouble', 'cent', 'cfloat', 'char', 'class',
- 'creal', 'dchar', 'delegate', 'double', 'enum', 'float', 'function',
- 'idouble', 'ifloat', 'int', 'interface', 'ireal', 'long', 'module', 'package',
- 'ptrdiff_t', 'real', 'short', 'size_t', 'struct', 'template', 'typedef',
- 'ubyte', 'ucent', 'uint', 'ulong', 'union', 'ushort', 'void', 'wchar',
+local type = token(lexer.TYPE, word_match{
+ 'alias', 'bool', 'byte', 'cdouble', 'cent', 'cfloat', 'char', 'class', 'creal', 'dchar',
+ 'delegate', 'double', 'enum', 'float', 'function', 'idouble', 'ifloat', 'int', 'interface',
+ 'ireal', 'long', 'module', 'package', 'ptrdiff_t', 'real', 'short', 'size_t', 'struct',
+ 'template', 'typedef', 'ubyte', 'ucent', 'uint', 'ulong', 'union', 'ushort', 'void', 'wchar',
'string', 'wstring', 'dstring', 'hash_t', 'equals_t'
})
+lex:add_rule('type', type)
-- Constants.
-local constant = token(l.CONSTANT, word_match{
- '__FILE__', '__LINE__', '__DATE__', '__EOF__', '__TIME__', '__TIMESTAMP__',
- '__VENDOR__', '__VERSION__', '__FUNCTION__', '__PRETTY_FUNCTION__',
- '__MODULE__',
-})
-
-local class_sequence = token(l.TYPE, P('class') + P('struct')) * ws^1 *
- token(l.CLASS, l.word)
+lex:add_rule('constant', token(lexer.CONSTANT, word_match{
+ '__FILE__', '__LINE__', '__DATE__', '__EOF__', '__TIME__', '__TIMESTAMP__', '__VENDOR__',
+ '__VERSION__', '__FUNCTION__', '__PRETTY_FUNCTION__', '__MODULE__'
+}))
--- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+-- Properties.
+local dot = token(lexer.OPERATOR, '.')
+lex:add_rule('property', lpeg.B(lexer.alnum + ')') * dot * token(lexer.VARIABLE, word_match{
+ 'alignof', 'dig', 'dup', 'epsilon', 'idup', 'im', 'init', 'infinity', 'keys', 'length',
+ 'mangleof', 'mant_dig', 'max', 'max_10_exp', 'max_exp', 'min', 'min_normal', 'min_10_exp',
+ 'min_exp', 'nan', 'offsetof', 'ptr', 're', 'rehash', 'reverse', 'sizeof', 'sort', 'stringof',
+ 'tupleof', 'values'
+}))
--- Operators.
-local operator = token(l.OPERATOR, S('?=!<>+-*$/%&|^~.,;()[]{}'))
+-- Strings.
+local sq_str = lexer.range("'", true) * S('cwd')^-1
+local dq_str = lexer.range('"') * S('cwd')^-1
+local lit_str = 'r' * lexer.range('"', false, false) * S('cwd')^-1
+local bt_str = lexer.range('`', false, false) * S('cwd')^-1
+local hex_str = 'x' * lexer.range('"') * S('cwd')^-1
+local other_hex_str = '\\x' * (lexer.xdigit * lexer.xdigit)^1
+local str = sq_str + dq_str + lit_str + bt_str + hex_str + other_hex_str
+for left, right in pairs{['['] = ']', ['('] = ')', ['{'] = '}', ['<'] = '>'} do
+ str = str + lexer.range('q"' .. left, right .. '"', false, false, true) * S('cwd')^-1
+end
+lex:add_rule('string', token(lexer.STRING, str))
--- Properties.
-local properties = (type + identifier + operator) * token(l.OPERATOR, '.') *
- token(l.VARIABLE, word_match{
- 'alignof', 'dig', 'dup', 'epsilon', 'idup', 'im', 'init', 'infinity',
- 'keys', 'length', 'mangleof', 'mant_dig', 'max', 'max_10_exp', 'max_exp',
- 'min', 'min_normal', 'min_10_exp', 'min_exp', 'nan', 'offsetof', 'ptr',
- 're', 'rehash', 'reverse', 'sizeof', 'sort', 'stringof', 'tupleof',
- 'values'
- })
-
--- Preprocs.
-local annotation = token('annotation', '@' * l.word^1)
-local preproc = token(l.PREPROCESSOR, '#' * l.nonnewline^0)
+-- Identifiers.
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
--- Traits.
-local traits_list = token('traits', word_match{
- 'allMembers', 'classInstanceSize', 'compiles', 'derivedMembers',
- 'getAttributes', 'getMember', 'getOverloads', 'getProtection', 'getUnitTests',
- 'getVirtualFunctions', 'getVirtualIndex', 'getVirtualMethods', 'hasMember',
- 'identifier', 'isAbstractClass', 'isAbstractFunction', 'isArithmetic',
- 'isAssociativeArray', 'isFinalClass', 'isFinalFunction', 'isFloating',
- 'isIntegral', 'isLazy', 'isNested', 'isOut', 'isOverrideFunction', 'isPOD',
- 'isRef', 'isSame', 'isScalar', 'isStaticArray', 'isStaticFunction',
- 'isUnsigned', 'isVirtualFunction', 'isVirtualMethod', 'parent'
-})
+-- Comments.
+local line_comment = lexer.to_eol('//', true)
+local block_comment = lexer.range('/*', '*/')
+local nested_comment = lexer.range('/+', '+/', false, false, true)
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment + nested_comment))
-local scopes_list = token('scopes', word_match{'exit', 'success', 'failure'})
-
--- versions
-local versions_list = token('versions', word_match{
- 'AArch64', 'AIX', 'all', 'Alpha', 'Alpha_HardFloat', 'Alpha_SoftFloat',
- 'Android', 'ARM', 'ARM_HardFloat', 'ARM_SoftFloat', 'ARM_SoftFP', 'ARM_Thumb',
- 'assert', 'BigEndian', 'BSD', 'Cygwin', 'D_Coverage', 'D_Ddoc', 'D_HardFloat',
- 'DigitalMars', 'D_InlineAsm_X86', 'D_InlineAsm_X86_64', 'D_LP64',
- 'D_NoBoundsChecks', 'D_PIC', 'DragonFlyBSD', 'D_SIMD', 'D_SoftFloat',
- 'D_Version2', 'D_X32', 'FreeBSD', 'GNU', 'Haiku', 'HPPA', 'HPPA64', 'Hurd',
- 'IA64', 'LDC', 'linux', 'LittleEndian', 'MIPS32', 'MIPS64', 'MIPS_EABI',
- 'MIPS_HardFloat', 'MIPS_N32', 'MIPS_N64', 'MIPS_O32', 'MIPS_O64',
- 'MIPS_SoftFloat', 'NetBSD', 'none', 'OpenBSD', 'OSX', 'Posix', 'PPC', 'PPC64',
- 'PPC_HardFloat', 'PPC_SoftFloat', 'S390', 'S390X', 'SDC', 'SH', 'SH64',
- 'SkyOS', 'Solaris', 'SPARC', 'SPARC64', 'SPARC_HardFloat', 'SPARC_SoftFloat',
- 'SPARC_V8Plus', 'SysV3', 'SysV4', 'unittest', 'Win32', 'Win64', 'Windows',
- 'X86', 'X86_64'
-})
+-- Numbers.
+local dec = lexer.digit^1 * ('_' * lexer.digit^1)^0
+local hex_num = lexer.hex_num * ('_' * lexer.xdigit^1)^0
+local bin_num = '0' * S('bB') * S('01_')^1 * -lexer.xdigit
+local oct_num = '0' * S('01234567_')^1
+local integer = S('+-')^-1 * (hex_num + oct_num + bin_num + dec)
+lex:add_rule('number', token(lexer.NUMBER, (lexer.float + integer) * S('uULdDfFi')^-1))
-local versions = token(l.KEYWORD, 'version') * l.space^0 *
- token(l.OPERATOR, '(') * l.space^0 * versions_list
-
-local scopes = token(l.KEYWORD, 'scope') * l.space^0 *
- token(l.OPERATOR, '(') * l.space^0 * scopes_list
-
-local traits = token(l.KEYWORD, '__traits') * l.space^0 *
- token(l.OPERATOR, '(') * l.space^0 * traits_list
-
-local func = token(l.FUNCTION, l.word) *
- #(l.space^0 * (P('!') * l.word^-1 * l.space^-1)^-1 * P('('))
-
-M._rules = {
- {'whitespace', ws},
- {'class', class_sequence},
- {'traits', traits},
- {'versions', versions},
- {'scopes', scopes},
- {'keyword', keyword},
- {'variable', properties},
- {'type', type},
- {'function', func},
- {'constant', constant},
- {'string', string},
- {'identifier', identifier},
- {'comment', comment},
- {'number', number},
- {'preproc', preproc},
- {'operator', operator},
- {'annotation', annotation},
-}
+-- Preprocessor.
+lex:add_rule('annotation', token('annotation', '@' * lexer.word^1))
+lex:add_style('annotation', lexer.styles.preprocessor)
+lex:add_rule('preprocessor', token(lexer.PREPROCESSOR, lexer.to_eol('#')))
-M._tokenstyles = {
- annotation = l.STYLE_PREPROCESSOR,
- traits = l.STYLE_CLASS,
- versions = l.STYLE_CONSTANT,
- scopes = l.STYLE_CONSTANT
-}
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S('?=!<>+-*$/%&|^~.,;:()[]{}')))
-M._foldsymbols = {
- _patterns = {'[{}]', '/[*+]', '[*+]/', '//'},
- [l.OPERATOR] = {['{'] = 1, ['}'] = -1},
- [l.COMMENT] = {
- ['/*'] = 1, ['*/'] = -1, ['/+'] = 1, ['+/'] = -1,
- ['//'] = l.fold_line_comments('//')
- }
-}
+-- Fold points.
+lex:add_fold_point(lexer.OPERATOR, '{', '}')
+lex:add_fold_point(lexer.COMMENT, '/*', '*/')
+lex:add_fold_point(lexer.COMMENT, '/+', '+/')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('//'))
-return M
+return lex
diff --git a/lua/lexers/dockerfile.lua b/lua/lexers/dockerfile.lua
index 3880021..4b131df 100644
--- a/lua/lexers/dockerfile.lua
+++ b/lua/lexers/dockerfile.lua
@@ -1,55 +1,40 @@
--- Copyright 2016-2017 Alejandro Baez (https://keybase.io/baez). See LICENSE.
+-- Copyright 2016-2022 Alejandro Baez (https://keybase.io/baez). See LICENSE.
-- Dockerfile LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'dockerfile'}
+local lex = lexer.new('dockerfile', {fold_by_indentation = true})
-- Whitespace
-local indent = #l.starts_line(S(' \t')) *
- (token(l.WHITESPACE, ' ') + token('indent_error', '\t'))^1
-local ws = token(l.WHITESPACE, S(' \t')^1 + l.newline^1)
-
--- Comments.
-local comment = token(l.COMMENT, '#' * l.nonnewline^0)
-
--- Strings.
-local sq_str = l.delimited_range("'", false, true)
-local dq_str = l.delimited_range('"')
-local string = token(l.STRING, sq_str + dq_str)
-
--- Numbers.
-local number = token(l.NUMBER, l.float + l.integer)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- 'ADD', 'ARG', 'CMD', 'COPY', 'ENTRYPOINT', 'ENV', 'EXPOSE', 'FROM', 'LABEL',
- 'MAINTAINER', 'ONBUILD', 'RUN', 'STOPSIGNAL', 'USER', 'VOLUME', 'WORKDIR'
-})
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'ADD', 'ARG', 'CMD', 'COPY', 'ENTRYPOINT', 'ENV', 'EXPOSE', 'FROM', 'LABEL', 'MAINTAINER',
+ 'ONBUILD', 'RUN', 'STOPSIGNAL', 'USER', 'VOLUME', 'WORKDIR'
+}))
-- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Variable.
-local variable = token(l.VARIABLE,
- S('$')^1 * (S('{')^1 * l.word * S('}')^1 + l.word))
+lex:add_rule('variable',
+ token(lexer.VARIABLE, S('$')^1 * (P('{')^1 * lexer.word * P('}')^1 + lexer.word)))
+
+-- Strings.
+local sq_str = lexer.range("'", false, false)
+local dq_str = lexer.range('"')
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
+
+-- Comments.
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#')))
+
+-- Numbers.
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-- Operators.
-local operator = token(l.OPERATOR, S('\\[],=:{}'))
-
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'variable', variable},
- {'identifier', identifier},
- {'string', string},
- {'comment', comment},
- {'number', number},
- {'operator', operator},
-}
-
-M._FOLDBYINDENTATION = true
-
-return M
+lex:add_rule('operator', token(lexer.OPERATOR, S('\\[],=:{}')))
+
+return lex
diff --git a/lua/lexers/dot.lua b/lua/lexers/dot.lua
index aa09fa7..19c4b60 100644
--- a/lua/lexers/dot.lua
+++ b/lua/lexers/dot.lua
@@ -1,71 +1,56 @@
--- Copyright 2006-2017 Brian "Sir Alaran" Schott. See LICENSE.
+-- Copyright 2006-2022 Brian "Sir Alaran" Schott. See LICENSE.
-- Dot LPeg lexer.
-- Based off of lexer code by Mitchell.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'dot'}
+local lex = lexer.new('dot')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local line_comment = '//' * l.nonnewline_esc^0
-local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1
-local comment = token(l.COMMENT, line_comment + block_comment)
-
--- Strings.
-local sq_str = l.delimited_range("'")
-local dq_str = l.delimited_range('"')
-local string = token(l.STRING, sq_str + dq_str)
-
--- Numbers.
-local number = token(l.NUMBER, l.digit^1 + l.float)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- 'graph', 'node', 'edge', 'digraph', 'fontsize', 'rankdir',
- 'fontname', 'shape', 'label', 'arrowhead', 'arrowtail', 'arrowsize',
- 'color', 'comment', 'constraint', 'decorate', 'dir', 'headlabel', 'headport',
- 'headURL', 'labelangle', 'labeldistance', 'labelfloat', 'labelfontcolor',
- 'labelfontname', 'labelfontsize', 'layer', 'lhead', 'ltail', 'minlen',
- 'samehead', 'sametail', 'style', 'taillabel', 'tailport', 'tailURL', 'weight',
- 'subgraph'
-})
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'graph', 'node', 'edge', 'digraph', 'fontsize', 'rankdir', 'fontname', 'shape', 'label',
+ 'arrowhead', 'arrowtail', 'arrowsize', 'color', 'comment', 'constraint', 'decorate', 'dir',
+ 'headlabel', 'headport', 'headURL', 'labelangle', 'labeldistance', 'labelfloat', 'labelfontcolor',
+ 'labelfontname', 'labelfontsize', 'layer', 'lhead', 'ltail', 'minlen', 'samehead', 'sametail',
+ 'style', 'taillabel', 'tailport', 'tailURL', 'weight', 'subgraph'
+}))
-- Types.
-local type = token(l.TYPE, word_match{
- 'box', 'polygon', 'ellipse', 'circle', 'point', 'egg', 'triangle',
- 'plaintext', 'diamond', 'trapezium', 'parallelogram', 'house', 'pentagon',
- 'hexagon', 'septagon', 'octagon', 'doublecircle', 'doubleoctagon',
- 'tripleoctagon', 'invtriangle', 'invtrapezium', 'invhouse', 'Mdiamond',
- 'Msquare', 'Mcircle', 'rect', 'rectangle', 'none', 'note', 'tab', 'folder',
- 'box3d', 'record'
-})
+lex:add_rule('type', token(lexer.TYPE, word_match{
+ ' box', 'polygon', 'ellipse', 'circle', 'point', 'egg', 'triangle', 'plaintext', 'diamond',
+ 'trapezium', 'parallelogram', 'house', 'pentagon', 'hexagon', 'septagon', 'octagon',
+ 'doublecircle', 'doubleoctagon', 'tripleoctagon', 'invtriangle', 'invtrapezium', 'invhouse',
+ 'Mdiamond', 'Msquare', 'Mcircle', 'rect', 'rectangle', 'none', 'note', 'tab', 'folder', 'box3d',
+ 'record'
+}))
-- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
--- Operators.
-local operator = token(l.OPERATOR, S('->()[]{};'))
+-- Strings.
+local sq_str = lexer.range("'")
+local dq_str = lexer.range('"')
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
+
+-- Comments.
+local line_comment = lexer.to_eol('//', true)
+local block_comment = lexer.range('/*', '*/')
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-M._rules = {
- {'whitespace', ws},
- {'comment', comment},
- {'keyword', keyword},
- {'type', type},
- {'identifier', identifier},
- {'number', number},
- {'string', string},
- {'operator', operator},
-}
+-- Numbers.
+lex:add_rule('number', token(lexer.NUMBER, lexer.dec_num + lexer.float))
+
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S('->()[]{};')))
-M._foldsymbols = {
- _patterns = {'[{}]', '/%*', '%*/', '//'},
- [l.OPERATOR] = {['{'] = 1, ['}'] = -1},
- [l.COMMENT] = {['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//')}
-}
+-- Fold points.
+lex:add_fold_point(lexer.OPERATOR, '{', '}')
+lex:add_fold_point(lexer.COMMENT, '/*', '*/')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('//'))
-return M
+return lex
diff --git a/lua/lexers/dsv.lua b/lua/lexers/dsv.lua
index ad2b286..6578566 100644
--- a/lua/lexers/dsv.lua
+++ b/lua/lexers/dsv.lua
@@ -1,17 +1,12 @@
-- Copyright 2016 Christian Hesse
-- delimiter separated values LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token = lexer.token
+local S = lpeg.S
-local M = {_NAME = 'dsv'}
+local lex = lexer.new('dsv')
--- Operators.
-local operator = token(l.OPERATOR, S(',;:|'))
+lex:add_rule('operator', token(lexer.OPERATOR, S(',;:|')))
-M._rules = {
- {'operator', operator}
-}
-
-return M
+return lex
diff --git a/lua/lexers/eiffel.lua b/lua/lexers/eiffel.lua
index 4de8b3f..8f92250 100644
--- a/lua/lexers/eiffel.lua
+++ b/lua/lexers/eiffel.lua
@@ -1,69 +1,58 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- Eiffel LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'eiffel'}
+local lex = lexer.new('eiffel')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local comment = token(l.COMMENT, '--' * l.nonnewline^0)
-
--- Strings.
-local sq_str = l.delimited_range("'", true)
-local dq_str = l.delimited_range('"', true)
-local string = token(l.STRING, sq_str + dq_str)
-
--- Numbers.
-local number = token(l.NUMBER, l.float + l.integer)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- 'alias', 'all', 'and', 'as', 'check', 'class', 'creation', 'debug',
- 'deferred', 'do', 'else', 'elseif', 'end', 'ensure', 'expanded', 'export',
- 'external', 'feature', 'from', 'frozen', 'if', 'implies', 'indexing', 'infix',
- 'inherit', 'inspect', 'invariant', 'is', 'like', 'local', 'loop', 'not',
- 'obsolete', 'old', 'once', 'or', 'prefix', 'redefine', 'rename', 'require',
- 'rescue', 'retry', 'select', 'separate', 'then', 'undefine', 'until',
- 'variant', 'when', 'xor',
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'alias', 'all', 'and', 'as', 'check', 'class', 'creation', 'debug', 'deferred', 'do', 'else',
+ 'elseif', 'end', 'ensure', 'expanded', 'export', 'external', 'feature', 'from', 'frozen', 'if',
+ 'implies', 'indexing', 'infix', 'inherit', 'inspect', 'invariant', 'is', 'like', 'local', 'loop',
+ 'not', 'obsolete', 'old', 'once', 'or', 'prefix', 'redefine', 'rename', 'require', 'rescue',
+ 'retry', 'select', 'separate', 'then', 'undefine', 'until', 'variant', 'when', 'xor', --
'current', 'false', 'precursor', 'result', 'strip', 'true', 'unique', 'void'
-})
+}))
-- Types.
-local type = token(l.TYPE, word_match{
- 'character', 'string', 'bit', 'boolean', 'integer', 'real', 'none', 'any'
-})
+lex:add_rule('type',
+ token(lexer.TYPE, word_match('character string bit boolean integer real none any')))
-- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
--- Operators.
-local operator = token(l.OPERATOR, S('=!<>+-/*%&|^~.,:;?()[]{}'))
+-- Strings.
+local sq_str = lexer.range("'", true)
+local dq_str = lexer.range('"', true)
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'type', type},
- {'identifier', identifier},
- {'string', string},
- {'comment', comment},
- {'number', number},
- {'operator', operator},
-}
+-- Comments.
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('--')))
-M._foldsymbols = {
- _patterns = {'[a-z]+', '%-%-'},
- [l.KEYWORD] = {
- check = 1, debug = 1, deferred = 1, ['do'] = 1, from = 1, ['if'] = 1,
- inspect = 1, once = 1, class = function(text, pos, line, s)
- return line:find('deferred%s+class') and 0 or 1
- end, ['end'] = -1
- },
- [l.COMMENT] = {['--'] = l.fold_line_comments('--')}
-}
+-- Numbers.
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-return M
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S('=!<>+-/*%&|^~.,:;?()[]{}')))
+
+-- Fold points.
+lex:add_fold_point(lexer.KEYWORD, 'check', 'end')
+lex:add_fold_point(lexer.KEYWORD, 'debug', 'end')
+lex:add_fold_point(lexer.KEYWORD, 'deferred',
+ function(text, pos, line, s) return line:find('deferred%s+class') and 0 or 1 end)
+lex:add_fold_point(lexer.KEYWORD, 'do', 'end')
+lex:add_fold_point(lexer.KEYWORD, 'from', 'end')
+lex:add_fold_point(lexer.KEYWORD, 'if', 'end')
+lex:add_fold_point(lexer.KEYWORD, 'inspect', 'end')
+lex:add_fold_point(lexer.KEYWORD, 'once', 'end')
+lex:add_fold_point(lexer.KEYWORD, 'class',
+ function(text, pos, line, s) return line:find('deferred%s+class') and 0 or 1 end)
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('--'))
+
+return lex
diff --git a/lua/lexers/elixir.lua b/lua/lexers/elixir.lua
index 4863e4a..b06e6c0 100644
--- a/lua/lexers/elixir.lua
+++ b/lua/lexers/elixir.lua
@@ -1,123 +1,97 @@
--- Copyright 2015-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2015-2022 Mitchell. See LICENSE.
-- Contributed by Richard Philips.
--- Elixer LPeg lexer.
+-- Elixir LPeg lexer.
-local l = require('lexer')
-local token, style, color, word_match = l.token, l.style, l.color, l.word_match
-local B, P, R, S = lpeg.B, lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local B, P, S = lpeg.B, lpeg.P, lpeg.S
-local M = {_NAME = 'elixir'}
+local lex = lexer.new('elixir', {fold_by_indentation = true})
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local comment = token(l.COMMENT, '#' * l.nonnewline_esc^0)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
+
+-- Sigils.
+local sigil11 = '~' * S('CRSW') * lexer.range('<', '>')
+local sigil12 = '~' * S('CRSW') * lexer.range('{', '}')
+local sigil13 = '~' * S('CRSW') * lexer.range('[', ']')
+local sigil14 = '~' * S('CRSW') * lexer.range('(', ')')
+local sigil15 = '~' * S('CRSW') * lexer.range('|', false, false)
+local sigil16 = '~' * S('CRSW') * lexer.range('/', false, false)
+local sigil17 = '~' * S('CRSW') * lexer.range('"', false, false)
+local sigil18 = '~' * S('CRSW') * lexer.range("'", false, false)
+local sigil19 = '~' * S('CRSW') * lexer.range('"""')
+local sigil10 = '~' * S('CRSW') * lexer.range("'''")
+local sigil21 = '~' * S('crsw') * lexer.range('<', '>')
+local sigil22 = '~' * S('crsw') * lexer.range('{', '}')
+local sigil23 = '~' * S('crsw') * lexer.range('[', ']')
+local sigil24 = '~' * S('crsw') * lexer.range('(', ')')
+local sigil25 = '~' * S('crsw') * lexer.range('|')
+local sigil26 = '~' * S('crsw') * lexer.range('/')
+local sigil27 = '~' * S('crsw') * lexer.range('"')
+local sigil28 = '~' * S('crsw') * lexer.range("'")
+local sigil29 = '~' * S('crsw') * lexer.range('"""')
+local sigil20 = '~' * S('crsw') * lexer.range("'''")
+local sigil_token = token(lexer.REGEX,
+ sigil10 + sigil19 + sigil11 + sigil12 + sigil13 + sigil14 + sigil15 + sigil16 + sigil17 + sigil18 +
+ sigil20 + sigil29 + sigil21 + sigil22 + sigil23 + sigil24 + sigil25 + sigil26 + sigil27 +
+ sigil28)
+local sigiladdon_token = token(lexer.EMBEDDED, lexer.alpha^0)
+lex:add_rule('sigil', sigil_token * sigiladdon_token)
+
+-- Atoms.
+local atom1 = B(1 - P(':')) * ':' * lexer.range('"')
+local atom2 = B(1 - P(':')) * ':' * lexer.alpha * (lexer.alnum + S('_@'))^0 * S('?!')^-1
+local atom3 = B(1 - (lexer.alnum + S('_:'))) * lexer.upper * (lexer.alnum + S('_@'))^0 * S('?!')^-1
+lex:add_rule('atom', token(lexer.CONSTANT, atom1 + atom2 + atom3))
-- Strings.
-local dq_str = l.delimited_range('"', false)
-local triple_dq_str = '"""' * (l.any - '"""')^0 * P('"""')^-1
-local string = token(l.STRING, triple_dq_str + dq_str)
-
--- Numbers
-local dec = l.digit * (l.digit + P("_"))^0
-local bin = '0b' * S('01')^1
-local oct = '0o' * R('07')^1
-local integer = bin + l.hex_num + oct + dec
-local float = l.digit^1 * P(".") * l.digit^1 * S("eE") *
- (S('+-')^-1 * l.digit^1)^-1
-local number_token = B(1 - R('az', 'AZ', '__')) *
- (S('+-')^-1) * token(l.NUMBER, (float + integer))
-
--- Keywords.
-local keyword_token = token(l.KEYWORD, word_match{
- "is_atom", "is_binary", "is_bitstring", "is_boolean", "is_float",
- "is_function", "is_integer", "is_list", "is_map", "is_number", "is_pid",
- "is_port", "is_record", "is_reference", "is_tuple", "is_exception", "case",
- "when", "cond", "for", "if", "unless", "try", "receive", "send", "exit",
- "raise", "throw", "after", "rescue", "catch", "else", "do", "end", "quote",
- "unquote", "super", "import", "require", "alias", "use", "self", "with", "fn"
-})
+local dq_str = lexer.range('"')
+local triple_dq_str = lexer.range('"""')
+lex:add_rule('string', token(lexer.STRING, triple_dq_str + dq_str))
--- Functions
-local function_token = token(l.FUNCTION, word_match{
- "defstruct", "defrecordp", "defrecord", "defprotocol", "defp",
- "defoverridable", "defmodule", "defmacrop", "defmacro", "defimpl",
- "defexception", "defdelegate", "defcallback", "def"
-})
-
--- Sigils
-local sigil11 = P("~") * S("CRSW") * l.delimited_range('<>', false, true)
-local sigil12 = P("~") * S("CRSW") * l.delimited_range('{}', false, true)
-local sigil13 = P("~") * S("CRSW") * l.delimited_range('[]', false, true)
-local sigil14 = P("~") * S("CRSW") * l.delimited_range('()', false, true)
-local sigil15 = P("~") * S("CRSW") * l.delimited_range('|', false, true)
-local sigil16 = P("~") * S("CRSW") * l.delimited_range('/', false, true)
-local sigil17 = P("~") * S("CRSW") * l.delimited_range('"', false, true)
-local sigil18 = P("~") * S("CRSW") * l.delimited_range("'", false, true)
-local sigil19 = P("~") * S("CRSW") * '"""' * (l.any - '"""')^0 * P('"""')^-1
-local sigil10 = P("~") * S("CRSW") * "'''" * (l.any - "'''")^0 * P("'''")^-1
-local sigil21 = P("~") * S("crsw") * l.delimited_range('<>', false, false)
-local sigil22 = P("~") * S("crsw") * l.delimited_range('{}', false, false)
-local sigil23 = P("~") * S("crsw") * l.delimited_range('[]', false, false)
-local sigil24 = P("~") * S("crsw") * l.delimited_range('()', false, false)
-local sigil25 = P("~") * S("crsw") * l.delimited_range('|', false, false)
-local sigil26 = P("~") * S("crsw") * l.delimited_range('/', false, false)
-local sigil27 = P("~") * S("crsw") * l.delimited_range('"', false, false)
-local sigil28 = P("~") * S("crsw") * l.delimited_range("'", false, false)
-local sigil29 = P("~") * S("csrw") * '"""' * (l.any - '"""')^0 * P('"""')^-1
-local sigil20 = P("~") * S("csrw") * "'''" * (l.any - "'''")^0 * P("'''")^-1
-local sigil_token = token(l.REGEX, sigil10 + sigil19 + sigil11 + sigil12 +
- sigil13 + sigil14 + sigil15 + sigil16 +
- sigil17 + sigil18 + sigil20 + sigil29 +
- sigil21 + sigil22 + sigil23 + sigil24 +
- sigil25 + sigil26 + sigil27 + sigil28)
-local sigiladdon_token = token(l.EMBEDDED, R('az', 'AZ')^0)
+-- Comments.
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#', true)))
--- Attributes
-local attribute_token = token(l.LABEL, B(1 - R('az', 'AZ', '__')) * P('@') *
- R('az','AZ') * R('az','AZ','09','__')^0)
+-- Attributes.
+lex:add_rule('attribute', token(lexer.LABEL, B(1 - (lexer.alnum + '_')) * '@' * lexer.alpha *
+ (lexer.alnum + '_')^0))
--- Booleans
-local boolean_token = token(l.NUMBER,
- P(':')^-1 * word_match{"true", "false", "nil"})
+-- Booleans.
+lex:add_rule('boolean', token(lexer.NUMBER, P(':')^-1 * word_match('true false nil')))
--- Identifiers
-local identifier = token(l.IDENTIFIER, R('az', '__') *
- R('az', 'AZ', '__', '09')^0 * S('?!')^-1)
+-- Functions.
+lex:add_rule('function', token(lexer.FUNCTION, word_match{
+ 'defstruct', 'defrecordp', 'defrecord', 'defprotocol', 'defp', 'defoverridable', 'defmodule',
+ 'defmacrop', 'defmacro', 'defimpl', 'defexception', 'defdelegate', 'defcallback', 'def'
+}))
--- Atoms
-local atom1 = B(1 - P(':')) * P(':') * dq_str
-local atom2 = B(1 - P(':')) * P(':') * R('az', 'AZ') *
- R('az', 'AZ', '__', '@@', '09')^0 * S('?!')^-1
-local atom3 = B(1 - R('az', 'AZ', '__', '09', '::')) *
- R('AZ') * R('az', 'AZ', '__', '@@', '09')^0 * S('?!')^-1
-local atom_token = token(l.CONSTANT, atom1 + atom2 + atom3)
+-- Keywords.
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'is_atom', 'is_binary', 'is_bitstring', 'is_boolean', 'is_float', 'is_function', 'is_integer',
+ 'is_list', 'is_map', 'is_number', 'is_pid', 'is_port', 'is_record', 'is_reference', 'is_tuple',
+ 'is_exception', 'case', 'when', 'cond', 'for', 'if', 'unless', 'try', 'receive', 'send', 'exit',
+ 'raise', 'throw', 'after', 'rescue', 'catch', 'else', 'do', 'end', 'quote', 'unquote', 'super',
+ 'import', 'require', 'alias', 'use', 'self', 'with', 'fn'
+}))
-- Operators
-local operator1 = word_match{"and", "or", "not", "when", "xor", "in"}
-local operator2 = P('!==') + '!=' + '!' + '=~' + '===' + '==' + '=' + '<<<' +
- '<<' + '<=' + '<-' + '<' + '>>>' + '>>' + '>=' + '>' + '->' +
- '--' + '-' + '++' + '+' + '&&&' + '&&' + '&' + '|||' + '||' +
- '|>' + '|' + '..' + '.' + '^^^' + '^' + '\\\\' + '::' + '*' +
- '/' + '~~~' + '@'
-local operator_token = token(l.OPERATOR, operator1 + operator2)
+local operator1 = word_match('and or not when xor in')
+local operator2 = P('!==') + '!=' + '!' + '=~' + '===' + '==' + '=' + '<<<' + '<<' + '<=' + '<-' +
+ '<' + '>>>' + '>>' + '>=' + '>' + '->' + '--' + '-' + '++' + '+' + '&&&' + '&&' + '&' + '|||' +
+ '||' + '|>' + '|' + '..' + '.' + '^^^' + '^' + '\\\\' + '::' + '*' + '/' + '~~~' + '@'
+lex:add_rule('operator', token(lexer.OPERATOR, operator1 + operator2))
-M._rules = {
- {'whitespace', ws},
- {'sigil', sigil_token * sigiladdon_token},
- {'atom', atom_token},
- {'string', string},
- {'comment', comment},
- {'attribute', attribute_token},
- {'boolean', boolean_token},
- {'function', function_token},
- {'keyword', keyword_token},
- {'operator', operator_token},
- {'identifier', identifier},
- {'number', number_token},
-}
+-- Identifiers
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word * S('?!')^-1))
-M._FOLDBYINDENTATION = true
+-- Numbers
+local dec = lexer.digit * (lexer.digit + '_')^0
+local bin = '0b' * S('01')^1
+local oct = '0o' * lpeg.R('07')^1
+local integer = bin + lexer.hex_num + oct + dec
+local float = lexer.digit^1 * '.' * lexer.digit^1 * S('eE') * (S('+-')^-1 * lexer.digit^1)^-1
+lex:add_rule('number',
+ B(1 - (lexer.alpha + '_')) * S('+-')^-1 * token(lexer.NUMBER, float + integer))
-return M
+return lex
diff --git a/lua/lexers/elm.lua b/lua/lexers/elm.lua
index 796fc93..d48e1e3 100644
--- a/lua/lexers/elm.lua
+++ b/lua/lexers/elm.lua
@@ -1,64 +1,43 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2020-2022 Mitchell. See LICENSE.
-- Elm LPeg lexer
--- Modified by Alex Suraci.
-- Adapted from Haskell LPeg lexer by Karl Schultheisz.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'elm'}
+local lex = lexer.new('elm', {fold_by_indentation = true})
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
--- Comments.
-local line_comment = '--' * l.nonnewline_esc^0
-local block_comment = '{-' * (l.any - '-}')^0 * P('-}')^-1
-local comment = token(l.COMMENT, line_comment + block_comment)
+-- Keywords.
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match(
+ 'if then else case of let in module import as exposing type alias port')))
+
+-- Types & type constructors.
+local word = (lexer.alnum + S("._'#"))^0
+local op = lexer.punct - S('()[]{}')
+lex:add_rule('type', token(lexer.TYPE, lexer.upper * word + ':' * (op^1 - ':')))
+
+-- Identifiers.
+lex:add_rule('identifier', token(lexer.IDENTIFIER, (lexer.alpha + '_') * word))
-- Strings.
-local string = token(l.STRING, l.delimited_range('"'))
+lex:add_rule('string', token(lexer.STRING, lexer.range('"')))
-- Chars.
-local char = token(l.STRING, l.delimited_range("'", true))
-
--- Numbers.
-local number = token(l.NUMBER, l.float + l.integer)
+lex:add_rule('character', token(lexer.STRING, lexer.range("'", true)))
--- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- 'if', 'then', 'else',
- 'case', 'of',
- 'let', 'in',
- 'module', 'import', 'as', 'exposing',
- 'type', 'alias',
- 'port',
-})
+-- Comments.
+local line_comment = lexer.to_eol('--', true)
+local block_comment = lexer.range('{-', '-}', false, false, true)
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
--- Identifiers.
-local word = (l.alnum + S("._'#"))^0
-local identifier = token(l.IDENTIFIER, (l.alpha + '_') * word)
+-- Numbers.
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-- Operators.
-local op = l.punct - S('()[]{}')
-local operator = token(l.OPERATOR, op)
-
--- Types & type constructors.
-local constructor = token(l.TYPE, (l.upper * word) + (P(":") * (op^1 - P(":"))))
-
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'type', constructor},
- {'identifier', identifier},
- {'string', string},
- {'char', char},
- {'comment', comment},
- {'number', number},
- {'operator', operator},
-}
-
-M._FOLDBYINDENTATION = true
+lex:add_rule('operator', token(lexer.OPERATOR, op))
-return M
+return lex
diff --git a/lua/lexers/erlang.lua b/lua/lexers/erlang.lua
index d4d48b7..a0a3a7d 100644
--- a/lua/lexers/erlang.lua
+++ b/lua/lexers/erlang.lua
@@ -1,81 +1,90 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- Erlang LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'erlang'}
+local lex = lexer.new('erlang')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
--- Comments.
-local comment = token(l.COMMENT, '%' * l.nonnewline^0)
+-- Keywords.
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'after', 'begin', 'case', 'catch', 'cond', 'end', 'fun', 'if', 'let', 'of', 'query', 'receive',
+ 'try', 'when',
+ -- Operators.
+ 'div', 'rem', 'or', 'xor', 'bor', 'bxor', 'bsl', 'bsr', 'and', 'band', 'not', 'bnot', 'badarg',
+ 'nocookie', 'orelse', 'andalso', 'false', 'true'
+}))
--- Strings.
-local string = token(l.STRING, l.delimited_range('"'))
+-- Functions.
+lex:add_rule('function', token(lexer.FUNCTION, word_match{
+ 'abs', 'alive', 'apply', 'atom_to_list', 'binary_to_list', 'binary_to_term', 'concat_binary',
+ 'date', 'disconnect_node', 'element', 'erase', 'exit', 'float', 'float_to_list', 'get',
+ 'get_keys', 'group_leader', 'halt', 'hd', 'integer_to_list', 'is_alive', 'is_record', 'length',
+ 'link', 'list_to_atom', 'list_to_binary', 'list_to_float', 'list_to_integer', 'list_to_pid',
+ 'list_to_tuple', 'load_module', 'make_ref', 'monitor_node', 'node', 'nodes', 'now', 'open_port',
+ 'pid_to_list', 'process_flag', 'process_info', 'process', 'put', 'register', 'registered',
+ 'round', 'self', 'setelement', 'size', 'spawn', 'spawn_link', 'split_binary', 'statistics',
+ 'term_to_binary', 'throw', 'time', 'tl', 'trunc', 'tuple_to_list', 'unlink', 'unregister',
+ 'whereis',
+ -- Others.
+ 'any', 'atom', 'binary', 'bitstring', 'byte', 'constant', 'function', 'integer', 'list', 'map',
+ 'mfa', 'non_neg_integer', 'number', 'pid', 'ports', 'port_close', 'port_info', 'pos_integer',
+ 'reference', 'record',
+ -- Erlang.
+ 'check_process_code', 'delete_module', 'get_cookie', 'hash', 'math', 'module_loaded', 'preloaded',
+ 'processes', 'purge_module', 'set_cookie', 'set_node',
+ -- Math.
+ 'acos', 'asin', 'atan', 'atan2', 'cos', 'cosh', 'exp', 'log', 'log10', 'min', 'max', 'pi', 'pow',
+ 'power', 'sin', 'sinh', 'sqrt', 'tan', 'tanh'
+}))
--- Numbers.
-local const_char = '$' * (('\\' * l.ascii) + l.any)
-local number = token(l.NUMBER, const_char + l.float + l.integer)
+-- Identifiers.
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.lower * ('_' + lexer.alnum)^0))
--- Atoms.
-local atom_pat = (l.lower * (l.alnum + '_')^0) + l.delimited_range("'")
-local atom = token(l.LABEL, atom_pat)
+-- Variables.
+lex:add_rule('variable', token(lexer.VARIABLE, P('_')^0 * lexer.upper * ('_' + lexer.alnum)^0))
--- Functions.
-local func = token(l.FUNCTION, atom_pat * #l.delimited_range("()", false, false, true))
+-- Directives.
+lex:add_rule('directive', token('directive', '-' * word_match{
+ 'author', 'behaviour', 'behavior', 'compile', 'copyright', 'define', 'doc', 'else', 'endif',
+ 'export', 'file', 'ifdef', 'ifndef', 'import', 'include', 'include_lib', 'module', 'record',
+ 'spec', 'type', 'undef'
+}))
+lex:add_style('directive', lexer.styles.preprocessor)
--- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- 'after', 'begin', 'case', 'catch', 'cond', 'end', 'fun', 'if', 'let', 'of',
- 'query', 'receive', 'when'
-})
+-- Strings.
+local sq_str = lexer.range("'", true)
+local dq_str = lexer.range('"')
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str + '$' * lexer.any * lexer.alnum^0))
--- Identifiers.
-local identifier = token(l.IDENTIFIER, ((l.upper + '_') * (l.alnum + '_')^0))
+-- Comments.
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('%')))
--- Operators.
-local named_operator = word_match{
- 'div', 'rem', 'or', 'xor', 'bor', 'bxor', 'bsl', 'bsr', 'and', 'band', 'not',
- 'bnot'
-}
-local operator = token(l.OPERATOR, S('-<>.;=/|#+*:,?!()[]{}') + named_operator)
+-- Numbers.
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
--- Directives.
-local directive = token('directive', '-' * word_match{
- 'author', 'compile', 'copyright', 'define', 'doc', 'else', 'endif', 'export',
- 'file', 'ifdef', 'ifndef', 'import', 'include_lib', 'include', 'module',
- 'record', 'undef'
-})
-
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'function', func},
- {'operator', operator},
- {'atom', atom},
- {'identifier', identifier},
- {'directive', directive},
- {'string', string},
- {'comment', comment},
- {'number', number}
-}
-
-M._tokenstyles = {
- directive = l.STYLE_PREPROCESSOR
-}
-
-M._foldsymbols = {
- _patterns = {'[a-z]+', '[%(%)%[%]{}]', '%%'},
- [l.KEYWORD] = {
- case = 1, fun = 1, ['if'] = 1, query = 1, receive = 1, ['end'] = -1
- },
- [l.OPERATOR] = {
- ['('] = 1, [')'] = -1, ['['] = 1, [']'] = -1, ['{'] = 1, ['}'] = -1
- },
- [l.COMMENT] = {['%'] = l.fold_line_comments('%')}
-}
-
-return M
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S('-<>.;=/|+*:,!()[]{}')))
+
+-- Preprocessor.
+lex:add_rule('preprocessor', token(lexer.TYPE, '?' * lexer.word))
+
+-- Records.
+lex:add_rule('type', token(lexer.TYPE, '#' * lexer.word))
+
+-- Fold points.
+lex:add_fold_point(lexer.KEYWORD, 'case', 'end')
+lex:add_fold_point(lexer.KEYWORD, 'fun', 'end')
+lex:add_fold_point(lexer.KEYWORD, 'if', 'end')
+lex:add_fold_point(lexer.KEYWORD, 'query', 'end')
+lex:add_fold_point(lexer.KEYWORD, 'receive', 'end')
+lex:add_fold_point(lexer.OPERATOR, '(', ')')
+lex:add_fold_point(lexer.OPERATOR, '[', ']')
+lex:add_fold_point(lexer.OPERATOR, '{', '}')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('%'))
+
+return lex
diff --git a/lua/lexers/fantom.lua b/lua/lexers/fantom.lua
index 7056620..776c597 100644
--- a/lua/lexers/fantom.lua
+++ b/lua/lexers/fantom.lua
@@ -1,32 +1,27 @@
+-- Copyright 2018-2022 Simeon Maryasin (MarSoft). See LICENSE.
-- Fantom LPeg lexer.
--- Based on Java LPeg lexer by Mitchell mitchell.att.foicica.com and Vim's Fantom syntax.
--- By MarSoft.
+-- Based on Java LPeg lexer by Mitchell and Vim's Fantom syntax.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'fantom'}
+local lex = lexer.new('fantom')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^2)
+local ws = token(lexer.WHITESPACE, lexer.space^1)
+lex:add_rule('whitespace', ws)
--- Comments.
-local line_comment = '//' * l.nonnewline_esc^0
-local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1
-local doc_comment = '**' * l.nonnewline_esc^0
-local comment = token(l.COMMENT, line_comment + block_comment + doc_comment)
-
--- Strings.
-local sq_str = l.delimited_range("'", true)
-local dq_str = l.delimited_range('"', true)
-local string = token(l.STRING, sq_str + dq_str)
-
--- Numbers.
-local number = token(l.NUMBER, (l.float + l.integer) * S('LlFfDd')^-1)
+-- Classes.
+local type = token(lexer.TYPE, lexer.word)
+lex:add_rule('class_sequence',
+ token(lexer.KEYWORD, 'class') * ws * type * ( -- at most one inheritance spec
+ ws * token(lexer.OPERATOR, ':') * ws * type *
+ ( -- at least 0 (i.e. any number) of additional classes
+ ws^-1 * token(lexer.OPERATOR, ',') * ws^-1 * type)^0)^-1)
-- Keywords.
-local keyword = token(l.KEYWORD, word_match{
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
'using', 'native', -- external
'goto', 'void', 'serializable', 'volatile', -- error
'if', 'else', 'switch', -- conditional
@@ -35,7 +30,9 @@ local keyword = token(l.KEYWORD, word_match{
'null', -- constant
'this', 'super', -- typedef
'new', 'is', 'isnot', 'as', -- operator
- 'plus', 'minus', 'mult', 'div', 'mod', 'get', 'set', 'slice', 'lshift', 'rshift', 'and', 'or', 'xor', 'inverse', 'negate', 'increment', 'decrement', 'equals', 'compare', -- long operator
+ 'plus', 'minus', 'mult', 'div', 'mod', 'get', 'set', 'slice', 'lshift', 'rshift', 'and', 'or',
+ 'xor', 'inverse', 'negate', --
+ 'increment', 'decrement', 'equals', 'compare', -- long operator
'return', -- stmt
'static', 'const', 'final', -- storage class
'virtual', 'override', 'once', -- slot
@@ -44,62 +41,44 @@ local keyword = token(l.KEYWORD, word_match{
'assert', -- assert
'class', 'enum', 'mixin', -- typedef
'break', 'continue', -- branch
- 'default', 'case', -- labels
- 'public', 'internal', 'protected', 'private', 'abstract', -- scope decl
-})
+ 'default', 'case', -- labels
+ 'public', 'internal', 'protected', 'private', 'abstract' -- scope decl
+}))
-- Types.
-local type = token(l.TYPE, word_match{
- 'Void', 'Bool', 'Int', 'Float', 'Decimal',
- 'Str', 'Duration', 'Uri', 'Type', 'Range',
- 'List', 'Map', 'Obj',
- 'Err', 'Env',
-})
+lex:add_rule('type', token(lexer.TYPE, word_match(
+ 'Void Bool Int Float Decimal Str Duration Uri Type Range List Map Obj Err Env')))
--- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+-- Functions.
+-- lex:add_rule('function', token(lexer.FUNCTION, lexer.word) * #P('('))
--- Operators.
-local operator = token(l.OPERATOR, S('+-/*%<>!=^&|?~:;.()[]{}#'))
+-- Identifiers.
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
--- Annotations.
-local facet = token('facet', '@' * l.word)
+-- Strings.
+local sq_str = lexer.range("'", true)
+local dq_str = lexer.range('"', true)
+local bq_str = lexer.range('`', true)
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str + bq_str))
--- Functions.
-local func = token(l.FUNCTION, l.word) * #P('(')
+-- Comments.
+local line_comment = lexer.to_eol('//', true)
+local block_comment = lexer.range('/*', '*/')
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
--- Classes.
-local class_sequence = token(l.KEYWORD, P('class')) * ws^1 *
- token(l.TYPE, l.word) * ( -- at most one inheritance spec
- ws^1 * token(l.OPERATOR, P(':')) * ws^1 *
- token(l.TYPE, l.word) *
- ( -- at least 0 (i.e. any number) of additional classes
- ws^0 * token(l.OPERATOR, P(',')) * ws^0 * token(l.TYPE, l.word)
- )^0
- )^-1
+-- Numbers.
+lex:add_rule('number', token(lexer.NUMBER, lexer.number * S('LlFfDd')^-1))
-M._rules = {
- {'whitespace', ws},
- {'class', class_sequence},
- {'keyword', keyword},
- {'type', type},
- {'function', func},
- {'identifier', identifier},
- {'string', string},
- {'comment', comment},
- {'number', number},
- {'facet', facet},
- {'operator', operator},
-}
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>!=^&|?~:;.()[]{}#')))
-M._tokenstyles = {
- facet = l.STYLE_PREPROCESSOR
-}
+-- Annotations.
+lex:add_rule('facet', token('facet', '@' * lexer.word))
+lex:add_style('facet', lexer.styles.preprocessor)
-M._foldsymbols = {
- _patterns = {'[{}]', '/%*', '%*/', '//'},
- [l.OPERATOR] = {['{'] = 1, ['}'] = -1},
- [l.COMMENT] = {['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//')}
-}
+-- Fold points.
+lex:add_fold_point(lexer.OPERATOR, '{', '}')
+lex:add_fold_point(lexer.COMMENT, '/*', '*/')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('//'))
-return M
+return lex
diff --git a/lua/lexers/faust.lua b/lua/lexers/faust.lua
index d685c49..ff47149 100644
--- a/lua/lexers/faust.lua
+++ b/lua/lexers/faust.lua
@@ -1,58 +1,44 @@
--- Copyright 2015-2017 David B. Lamkins <david@lamkins.net>. See LICENSE.
+-- Copyright 2015-2022 David B. Lamkins <david@lamkins.net>. See LICENSE.
-- Faust LPeg lexer, see http://faust.grame.fr/
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'faust'}
+local lex = lexer.new('faust')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
--- Comments.
-local line_comment = '//' * l.nonnewline^0
-local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1
-local comment = token(l.COMMENT, line_comment + block_comment)
+-- Keywords.
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'declare', 'import', 'mdoctags', 'dependencies', 'distributed', 'inputs', 'outputs', 'par', 'seq',
+ 'sum', 'prod', 'xor', 'with', 'environment', 'library', 'component', 'ffunction', 'fvariable',
+ 'fconstant', 'int', 'float', 'case', 'waveform', 'h:', 'v:', 't:'
+}))
+
+-- Identifiers.
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Strings.
-local string = token(l.STRING, l.delimited_range('"', true))
+lex:add_rule('string', token(lexer.STRING, lexer.range('"', true)))
+
+-- Comments.
+local line_comment = lexer.to_eol('//')
+local block_comment = lexer.range('/*', '*/')
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-- Numbers.
-local int = R('09')^1
+local int = lexer.digit^1
local rad = P('.')
local exp = (P('e') * S('+-')^-1 * int)^-1
local flt = int * (rad * int)^-1 * exp + int^-1 * rad * int * exp
-local number = token(l.NUMBER, flt + int)
+lex:add_rule('number', token(lexer.NUMBER, flt + int))
--- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- 'declare', 'import', 'mdoctags', 'dependencies', 'distributed', 'inputs',
- 'outputs', 'par', 'seq', 'sum', 'prod', 'xor', 'with', 'environment',
- 'library', 'component', 'ffunction', 'fvariable', 'fconstant', 'int', 'float',
- 'case', 'waveform', 'h:', 'v:', 't:'
-})
-
--- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+-- Pragmas.
+lex:add_rule('pragma', token(lexer.PREPROCESSOR, lexer.range('<mdoc>', '</mdoc>')))
-- Operators.
-local punct = S('+-/*%<>~!=^&|?~:;,.()[]{}@#$`\\\'')
-local operator = token(l.OPERATOR, punct)
+lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>~!=^&|?~:;,.()[]{}@#$`\\\'')))
--- Pragmas.
-local mdoc = P('<mdoc>') * (l.any - P('</mdoc>'))^0 * P('</mdoc>')
-local pragma = token(l.PREPROCESSOR, mdoc)
-
-M._rules = {
- {'whitespace', ws},
- {'comment', comment},
- {'pragma', pragma},
- {'keyword', keyword},
- {'number', number},
- {'operator', operator},
- {'identifier', identifier},
- {'string', string},
-}
-
-return M
+return lex
diff --git a/lua/lexers/fennel.lua b/lua/lexers/fennel.lua
index ee8127c..3e5abbf 100644
--- a/lua/lexers/fennel.lua
+++ b/lua/lexers/fennel.lua
@@ -1,88 +1,43 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
--- Lua LPeg lexer.
--- Original written by Peter Odding, 2007/04/04.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
+-- Fennel LPeg lexer.
+-- Contributed by Momohime Honda.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'fennel'}
+local lex = lexer.new('fennel', {inherit = lexer.load('lua')})
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local line_comment = ';' * l.nonnewline^0
-local comment = token(l.COMMENT, line_comment)
-
--- Strings.
-local dq_str = l.delimited_range('"')
-local string = token(l.STRING, dq_str)
-
--- Numbers.
-local lua_integer = P('-')^-1 * (l.hex_num + l.dec_num)
-local number = token(l.NUMBER, l.float + lua_integer)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match({
- '%', '*', '+', '-', '->', '->>', '-?>', '-?>>', '.', '..', '/', '//', ':', '<', '<=', '=', '>', '>=', '^', '~=', 'λ',
- 'and', 'comment', 'do', 'doc', 'doto', 'each', 'eval-compiler', 'fn', 'for', 'global', 'hashfn', 'if', 'include', 'lambda',
- 'length', 'let', 'local', 'lua', 'macro', 'macros', 'match', 'not', 'not=', 'or', 'partial', 'quote', 'require-macros',
- 'set', 'set-forcibly!', 'tset', 'values', 'var', 'when', 'while'
-}, "%*+-./:<=>?~^λ!"))
-
--- Libraries.
-local library = token('library', word_match({
- -- Coroutine.
- 'coroutine', 'coroutine.create', 'coroutine.resume', 'coroutine.running',
- 'coroutine.status', 'coroutine.wrap', 'coroutine.yield',
- -- Module.
- 'package', 'package.cpath', 'package.loaded', 'package.loadlib',
- 'package.path', 'package.preload',
- -- String.
- 'string', 'string.byte', 'string.char', 'string.dump', 'string.find',
- 'string.format', 'string.gmatch', 'string.gsub', 'string.len', 'string.lower',
- 'string.match', 'string.rep', 'string.reverse', 'string.sub', 'string.upper',
- -- Table.
- 'table', 'table.concat', 'table.insert', 'table.remove', 'table.sort',
- -- Math.
- 'math', 'math.abs', 'math.acos', 'math.asin', 'math.atan', 'math.ceil',
- 'math.cos', 'math.deg', 'math.exp', 'math.floor', 'math.fmod', 'math.huge',
- 'math.log', 'math.max', 'math.min', 'math.modf', 'math.pi', 'math.rad',
- 'math.random', 'math.randomseed', 'math.sin', 'math.sqrt', 'math.tan',
- -- IO.
- 'io', 'io.close', 'io.flush', 'io.input', 'io.lines', 'io.open', 'io.output',
- 'io.popen', 'io.read', 'io.stderr', 'io.stdin', 'io.stdout', 'io.tmpfile',
- 'io.type', 'io.write',
- -- OS.
- 'os', 'os.clock', 'os.date', 'os.difftime', 'os.execute', 'os.exit',
- 'os.getenv', 'os.remove', 'os.rename', 'os.setlocale', 'os.time',
- 'os.tmpname',
- -- Debug.
- 'debug', 'debug.debug', 'debug.gethook', 'debug.getinfo', 'debug.getlocal',
- 'debug.getmetatable', 'debug.getregistry', 'debug.getupvalue',
- 'debug.sethook', 'debug.setlocal', 'debug.setmetatable', 'debug.setupvalue',
- 'debug.traceback',
-}, '.'))
-
-local initial = l.alpha + S"|$%&#*+-./:<=>?~^_λ!"
-local subsequent = initial + l.digit
+lex:modify_rule('keyword', token(lexer.KEYWORD, word_match{
+ '#', '%', '*', '+', '-', '->>', '->', '-?>>', '-?>', '..', '.', '//', '/', ':', '<=', '<', '=',
+ '>=', '>', '?.', '^', '~=', 'λ', 'accumulate', 'and', 'band', 'bnot', 'bor', 'bxor', 'collect',
+ 'comment', 'do', 'doto', 'each', 'eval-compiler', 'fn', 'for', 'global', 'hashfn', 'icollect',
+ 'if', 'import-macros', 'include', 'lambda', 'length', 'let', 'local', 'lshift', 'lua', 'macro',
+ 'macrodebug', 'macros', 'match', 'not', 'not=', 'or', 'partial', 'pick-args', 'pick-values',
+ 'quote', 'require-macros', 'rshift', 'set', 'set-forcibly!', 'tset', 'values', 'var', 'when',
+ 'while', 'with-open'
+}))
-- Identifiers.
-local identifier = token(l.IDENTIFIER, initial * subsequent^0)
+local initial = lexer.alpha + S('|$%&#*+-/<=>?~^_λ!')
+local subsequent = initial + lexer.digit
+lex:modify_rule('identifier', token(lexer.IDENTIFIER, initial * subsequent^0 * P('#')^-1))
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'library', library},
- {'identifier', identifier},
- {'string', string},
- {'comment', comment},
- {'number', number}
-}
+-- Strings.
+local dq_str = lexer.range('"')
+local kw_str = lpeg.B(1 - subsequent) * ':' * subsequent^1
+lex:modify_rule('string', token(lexer.STRING, dq_str + kw_str))
+
+-- Comments.
+lex:modify_rule('comment', token(lexer.COMMENT, lexer.to_eol(';')))
-M._tokenstyles = {
- library = l.STYLE_TYPE,
-}
+-- Ignore these rules.
+lex:modify_rule('longstring', P(false))
+lex:modify_rule('label', P(false))
+lex:modify_rule('operator', P(false))
-return M
+return lex
diff --git a/lua/lexers/fish.lua b/lua/lexers/fish.lua
index 0fd583a..e493e8f 100644
--- a/lua/lexers/fish.lua
+++ b/lua/lexers/fish.lua
@@ -1,76 +1,57 @@
--- Copyright 2015-2017 Jason Schindler. See LICENSE.
+-- Copyright 2015-2022 Jason Schindler. See LICENSE.
-- Fish (http://fishshell.com/) script LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'fish'}
+local lex = lexer.new('fish')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- shebang
-local shebang = token('shebang', '#!/' * l.nonnewline^0)
-
--- Comments.
-local comment = token(l.COMMENT, '#' * l.nonnewline^0)
-
--- Strings.
-local sq_str = l.delimited_range("'", false, true)
-local dq_str = l.delimited_range('"')
-
-local string = token(l.STRING, sq_str + dq_str)
-
--- Numbers.
-local number = token(l.NUMBER, l.float + l.integer)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- 'alias', 'and', 'begin', 'bg', 'bind', 'block', 'break', 'breakpoint',
- 'builtin', 'case', 'cd', 'command', 'commandline', 'complete', 'contains',
- 'continue', 'count', 'dirh', 'dirs', 'echo', 'else', 'emit', 'end', 'eval',
- 'exec', 'exit', 'fg', 'fish', 'fish_config', 'fish_indent', 'fish_pager',
- 'fish_prompt', 'fish_right_prompt', 'fish_update_completions', 'fishd', 'for',
- 'funced', 'funcsave', 'function', 'functions', 'help', 'history', 'if', 'in',
- 'isatty', 'jobs', 'math', 'mimedb', 'nextd', 'not', 'open', 'or', 'popd',
- 'prevd', 'psub', 'pushd', 'pwd', 'random', 'read', 'return', 'set',
- 'set_color', 'source', 'status', 'switch', 'test', 'trap', 'type', 'ulimit',
- 'umask', 'vared', 'while'
-})
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'alias', 'and', 'begin', 'bg', 'bind', 'block', 'break', 'breakpoint', 'builtin', 'case', 'cd',
+ 'command', 'commandline', 'complete', 'contains', 'continue', 'count', 'dirh', 'dirs', 'echo',
+ 'else', 'emit', 'end', 'eval', 'exec', 'exit', 'fg', 'fish', 'fish_config', 'fishd',
+ 'fish_indent', 'fish_pager', 'fish_prompt', 'fish_right_prompt', 'fish_update_completions', 'for',
+ 'funced', 'funcsave', 'function', 'functions', 'help', 'history', 'if', 'in', 'isatty', 'jobs',
+ 'math', 'mimedb', 'nextd', 'not', 'open', 'or', 'popd', 'prevd', 'psub', 'pushd', 'pwd', 'random',
+ 'read', 'return', 'set', 'set_color', 'source', 'status', 'switch', 'test', 'trap', 'type',
+ 'ulimit', 'umask', 'vared', 'while'
+}))
-- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Variables.
-local variable = token(l.VARIABLE,
- '$' * l.word + '$' * l.delimited_range('{}', true, true))
+lex:add_rule('variable', token(lexer.VARIABLE, '$' * (lexer.word + lexer.range('{', '}', true))))
--- Operators.
-local operator = token(l.OPERATOR, S('=!<>+-/*^&|~.,:;?()[]{}'))
+-- Strings.
+local sq_str = lexer.range("'", false, false)
+local dq_str = lexer.range('"')
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
-M._rules = {
- {'whitespace', ws},
- {'shebang', shebang},
- {'keyword', keyword},
- {'identifier', identifier},
- {'variable', variable},
- {'string', string},
- {'comment', comment},
- {'number', number},
- {'operator', operator},
-}
+-- Shebang.
+lex:add_rule('shebang', token('shebang', lexer.to_eol('#!/')))
+lex:add_style('shebang', lexer.styles.label)
+
+-- Comments.
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#')))
-M._tokenstyles = {
- shebang = l.STYLE_LABEL
-}
+-- Numbers.
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
+
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S('=!<>+-/*^&|~.,:;?()[]{}')))
-M._foldsymbols = {
- _patterns = {'%l+'},
- [l.KEYWORD] = {
- begin = 1, ['for'] = 1, ['function'] = 1, ['if'] = 1, switch = 1,
- ['while'] = 1, ['end'] = -1
- }
-}
+-- Fold points.
+lex:add_fold_point(lexer.KEYWORD, 'begin', 'end')
+lex:add_fold_point(lexer.KEYWORD, 'for', 'end')
+lex:add_fold_point(lexer.KEYWORD, 'function', 'end')
+lex:add_fold_point(lexer.KEYWORD, 'if', 'end')
+lex:add_fold_point(lexer.KEYWORD, 'switch', 'end')
+lex:add_fold_point(lexer.KEYWORD, 'while', 'end')
-return M
+return lex
diff --git a/lua/lexers/forth.lua b/lua/lexers/forth.lua
index 17d67b5..9a1e6df 100644
--- a/lua/lexers/forth.lua
+++ b/lua/lexers/forth.lua
@@ -1,71 +1,56 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- Forth LPeg lexer.
+-- Contributions from Joseph Eib.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'forth'}
+local lex = lexer.new('forth')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local line_comment = S('|\\') * l.nonnewline^0
-local block_comment = '(' * (l.any - ')')^0 * P(')')^-1
-local comment = token(l.COMMENT, line_comment + block_comment)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Strings.
-local c_str = 'c' * l.delimited_range('"', true, true)
-local s_str = 's' * l.delimited_range('"', true, true)
-local s_bs_str = 's\\' * l.delimited_range('"', true, false)
-local dot_str = '.' * l.delimited_range('"', true, true)
-local dot_paren_str = '.' * l.delimited_range('()', true, true, false)
-local abort_str = 'abort' * l.delimited_range('"', true, true)
-local string = token(
- l.STRING,
- c_str + s_str + s_bs_str + dot_str + dot_paren_str + abort_str
-)
-
--- Numbers.
-local number = token(l.NUMBER, P('-')^-1 * l.digit^1 * (S('./') * l.digit^1)^-1)
+local c_str = 'c' * lexer.range('"', true, false)
+local s_str = 's' * lexer.range('"', true, false)
+local s_bs_str = 's\\' * lexer.range('"', true)
+local dot_str = '.' * lexer.range('"', true, false)
+local dot_paren_str = '.' * lexer.range('(', ')', true)
+local abort_str = 'abort' * lexer.range('"', true, false)
+lex:add_rule('string',
+ token(lexer.STRING, c_str + s_str + s_bs_str + dot_str + dot_paren_str + abort_str))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match({
- '#>', '#s', '*/', '*/mod', '+loop', ',', '.', '.r', '/mod', '0<', '0<>',
- '0>', '0=', '1+', '1-', '2!', '2*', '2/', '2>r', '2@', '2drop', '2dup',
- '2over', '2r>', '2r@', '2swap', ':noname', '<#', '<>', '>body', '>in',
- '>number', '>r', '?do','?dup', '@', 'abort', 'abs', 'accept', 'action-of',
- 'again', 'align', 'aligned', 'allot', 'and', 'base', 'begin', 'bl',
- 'buffer:', 'c!', 'c,', 'c@', 'case', 'cell+', 'cells', 'char', 'char+',
- 'chars', 'compile,', 'constant', 'count', 'cr', 'create', 'decimal', 'defer',
- 'defer!', 'defer@', 'depth', 'do', 'does>', 'drop', 'dup', 'else', 'emit',
- 'endcase', 'endof', 'environment?', 'erase', 'evaluate', 'execute', 'exit',
- 'false', 'fill', 'find', 'fm/mod', 'here', 'hex', 'hold', 'holds', 'i', 'if',
- 'immediate', 'invert', 'is', 'j', 'key', 'leave', 'literal', 'loop',
- 'lshift', 'm*', 'marker', 'max', 'min', 'mod', 'move', 'negate', 'nip', 'of',
- 'or', 'over', 'pad', 'parse', 'parse-name', 'pick', 'postpone', 'quit', 'r>',
- 'r@', 'recurse', 'refill', 'restore-input', 'roll', 'rot', 'rshift', 's>d',
- 'save-input', 'sign', 'sm/rem', 'source', 'source-id', 'space', 'spaces',
- 'state', 'swap', 'to', 'then', 'true', 'tuck', 'type', 'u.', 'u.r', 'u>',
- 'u<', 'um*', 'um/mod', 'unloop', 'until', 'unused', 'value', 'variable',
- 'while', 'within', 'word', 'xor', '[\']', '[char]', '[compile]'
-}, '><-@!?+,=[].\'', true))
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match({
+ '#>', '#s', '*/', '*/mod', '+loop', ',', '.', '.r', '/mod', '0<', '0<>', '0>', '0=', '1+', '1-',
+ '2!', '2*', '2/', '2>r', '2@', '2drop', '2dup', '2over', '2r>', '2r@', '2swap', ':noname', '<#',
+ '<>', '>body', '>in', '>number', '>r', '?do', '?dup', '@', 'abort', 'abs', 'accept', 'action-of',
+ 'again', 'align', 'aligned', 'allot', 'and', 'base', 'begin', 'bl', 'buffer:', 'c!', 'c,', 'c@',
+ 'case', 'cell+', 'cells', 'char', 'char+', 'chars', 'compile,', 'constant,', 'count', 'cr',
+ 'create', 'decimal', 'defer', 'defer!', 'defer@', 'depth', 'do', 'does>', 'drop', 'dup', 'else',
+ 'emit', 'endcase', 'endof', 'environment?', 'erase', 'evaluate', 'execute', 'exit', 'false',
+ 'fill', 'find', 'fm/mod', 'here', 'hex', 'hold', 'holds', 'i', 'if', 'immediate', 'invert', 'is',
+ 'j', 'key', 'leave', 'literal', 'loop', 'lshift', 'm*', 'marker', 'max', 'min', 'mod', 'move',
+ 'negate', 'nip', 'of', 'or', 'over', 'pad', 'parse', 'parse-name', 'pick', 'postpone', 'quit',
+ 'r>', 'r@', 'recurse', 'refill', 'restore-input', 'roll', 'rot', 'rshift', 's>d', 'save-input',
+ 'sign', 'sm/rem', 'source', 'source-id', 'space', 'spaces', 'state', 'swap', 'to', 'then', 'true',
+ 'tuck', 'type', 'u.', 'u.r', 'u>', 'u<', 'um*', 'um/mod', 'unloop', 'until', 'unused', 'value',
+ 'variable', 'while', 'within', 'word', 'xor', "[']", '[char]', '[compile]'
+}, true)))
-- Identifiers.
-local identifier = token(l.IDENTIFIER, (l.alnum + S('+-*=<>.?/\'%,_$#'))^1)
+lex:add_rule('identifier', token(lexer.IDENTIFIER, (lexer.alnum + S('+-*=<>.?/\'%,_$#'))^1))
--- Operators.
-local operator = token(l.OPERATOR, S(':;<>+*-/[]#'))
+-- Comments.
+local line_comment = lexer.to_eol(S('|\\'))
+local block_comment = lexer.range('(', ')')
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
+
+-- Numbers.
+lex:add_rule('number', token(lexer.NUMBER, P('-')^-1 * lexer.digit^1 * (S('./') * lexer.digit^1)^-1))
-M._rules = {
- {'whitespace', ws},
- {'string', string},
- {'keyword', keyword},
- {'identifier', identifier},
- {'comment', comment},
- {'number', number},
- {'operator', operator},
-}
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S(':;<>+*-/[]#')))
-return M
+return lex
diff --git a/lua/lexers/fortran.lua b/lua/lexers/fortran.lua
index 87026a7..7d0480f 100644
--- a/lua/lexers/fortran.lua
+++ b/lua/lexers/fortran.lua
@@ -1,91 +1,85 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- Fortran LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'fortran'}
+local lex = lexer.new('fortran')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Comments.
-local c_comment = l.starts_line(S('Cc')) * l.nonnewline^0
-local d_comment = l.starts_line(S('Dd')) * l.nonnewline^0
-local ex_comment = l.starts_line('!') * l.nonnewline^0
-local ast_comment = l.starts_line('*') * l.nonnewline^0
-local line_comment = '!' * l.nonnewline^0
-local comment = token(l.COMMENT, c_comment + d_comment + ex_comment +
- ast_comment + line_comment)
-
--- Strings.
-local sq_str = l.delimited_range("'", true, true)
-local dq_str = l.delimited_range('"', true, true)
-local string = token(l.STRING, sq_str + dq_str)
-
--- Numbers.
-local number = token(l.NUMBER, (l.float + l.integer) * -l.alpha)
+local line_comment = lexer.to_eol(lexer.starts_line(S('CcDd!*')) + '!')
+lex:add_rule('comment', token(lexer.COMMENT, line_comment))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match({
- 'include', 'program', 'module', 'subroutine', 'function', 'contains', 'use',
- 'call', 'return',
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match({
+ 'include', 'interface', 'program', 'module', 'subroutine', 'function', 'contains', 'use', 'call',
+ 'return',
-- Statements.
- 'case', 'select', 'default', 'continue', 'cycle', 'do', 'while', 'else', 'if',
- 'elseif', 'then', 'elsewhere', 'end', 'endif', 'enddo', 'forall', 'where',
- 'exit', 'goto', 'pause', 'stop',
+ 'case', 'select', 'default', 'continue', 'cycle', 'do', 'while', 'else', 'if', 'elseif', 'then',
+ 'elsewhere', 'end', 'endif', 'enddo', 'equivalence', 'external', 'forall', 'where', 'exit',
+ 'goto', 'pause', 'save', 'stop',
-- Operators.
- '.not.', '.and.', '.or.', '.xor.', '.eqv.', '.neqv.', '.eq.', '.ne.', '.gt.',
- '.ge.', '.lt.', '.le.',
+ '.not.', '.and.', '.or.', '.xor.', '.eqv.', '.neqv.', '.eq.', '.ne.', '.gt.', '.ge.', '.lt.',
+ '.le.',
-- Logical.
- '.false.', '.true.'
-}, '.', true))
+ '.false.', '.true.',
+ -- Attributes and other keywords.
+ 'access', 'action', 'advance', 'assignment', 'block', 'entry', 'in', 'inout', 'intent', 'only',
+ 'out', 'optional', 'pointer', 'precision', 'procedure', 'recursive', 'result', 'sequence', 'size',
+ 'stat', 'target', 'type'
+}, true)))
-- Functions.
-local func = token(l.FUNCTION, word_match({
+lex:add_rule('function', token(lexer.FUNCTION, word_match({
-- I/O.
- 'backspace', 'close', 'endfile', 'inquire', 'open', 'print', 'read', 'rewind',
- 'write', 'format',
- -- Type conversion, utility, and math.
- 'aimag', 'aint', 'amax0', 'amin0', 'anint', 'ceiling', 'cmplx', 'conjg',
- 'dble', 'dcmplx', 'dfloat', 'dim', 'dprod', 'float', 'floor', 'ifix', 'imag',
- 'int', 'logical', 'modulo', 'nint', 'real', 'sign', 'sngl', 'transfer',
- 'zext', 'abs', 'acos', 'aimag', 'aint', 'alog', 'alog10', 'amax0', 'amax1',
- 'amin0', 'amin1', 'amod', 'anint', 'asin', 'atan', 'atan2', 'cabs', 'ccos',
- 'char', 'clog', 'cmplx', 'conjg', 'cos', 'cosh', 'csin', 'csqrt', 'dabs',
- 'dacos', 'dasin', 'datan', 'datan2', 'dble', 'dcos', 'dcosh', 'ddim', 'dexp',
- 'dim', 'dint', 'dlog', 'dlog10', 'dmax1', 'dmin1', 'dmod', 'dnint', 'dprod',
- 'dreal', 'dsign', 'dsin', 'dsinh', 'dsqrt', 'dtan', 'dtanh', 'exp', 'float',
- 'iabs', 'ichar', 'idim', 'idint', 'idnint', 'ifix', 'index', 'int', 'isign',
- 'len', 'lge', 'lgt', 'lle', 'llt', 'log', 'log10', 'max', 'max0', 'max1',
- 'min', 'min0', 'min1', 'mod', 'nint', 'real', 'sign', 'sin', 'sinh', 'sngl',
- 'sqrt', 'tan', 'tanh'
-}, nil, true))
+ 'backspace', 'close', 'endfile', 'inquire', 'open', 'print', 'read', 'rewind', 'write', 'format',
+ -- Type conversion utility and math.
+ 'aimag', 'aint', 'amax0', 'amin0', 'anint', 'ceiling', 'cmplx', 'conjg', 'dble', 'dcmplx',
+ 'dfloat', 'dim', 'dprod', 'float', 'floor', 'ifix', 'imag', 'int', 'logical', 'modulo', 'nint',
+ 'real', 'sign', 'sngl', 'transfer', 'zext', 'abs', 'acos', 'aimag', 'aint', 'alog', 'alog10',
+ 'amax0', 'amax1', 'amin0', 'amin1', 'amod', 'anint', 'asin', 'atan', 'atan2', 'cabs', 'ccos',
+ 'char', 'clog', 'cmplx', 'conjg', 'cos', 'cosh', 'csin', 'csqrt', 'dabs', 'dacos', 'dasin',
+ 'datan', 'datan2', 'dble', 'dcos', 'dcosh', 'ddim', 'dexp', 'dim', 'dint', 'dlog', 'dlog10',
+ 'dmax1', 'dmin1', 'dmod', 'dnint', 'dprod', 'dreal', 'dsign', 'dsin', 'dsinh', 'dsqrt', 'dtan',
+ 'dtanh', 'exp', 'float', 'iabs', 'ichar', 'idim', 'idint', 'idnint', 'ifix', 'index', 'int',
+ 'isign', 'len', 'lge', 'lgt', 'lle', 'llt', 'log', 'log10', 'max', 'max0', 'max1', 'min', 'min0',
+ 'min1', 'mod', 'nint', 'real', 'sign', 'sin', 'sinh', 'sngl', 'sqrt', 'tan', 'tanh',
+ -- Matrix math.
+ 'matmul', 'transpose', 'reshape',
+ -- Other frequently used built-in statements.
+ 'assign', 'nullify',
+ -- ISO C binding from Fortran 2003.
+ 'c_sizeof', 'c_f_pointer', 'c_associated'
+}, true)))
-- Types.
-local type = token(l.TYPE, word_match({
- 'implicit', 'explicit', 'none', 'data', 'parameter', 'allocate',
- 'allocatable', 'allocated', 'deallocate', 'integer', 'real', 'double',
- 'precision', 'complex', 'logical', 'character', 'dimension', 'kind',
-}, nil, true))
+lex:add_rule('type', token(lexer.TYPE, word_match({
+ 'implicit', 'explicit', 'none', 'data', 'parameter', 'allocate', 'allocatable', 'allocated',
+ 'deallocate', 'integer', 'real', 'double', 'precision', 'complex', 'logical', 'character',
+ 'dimension', 'kind',
+ -- ISO C binding from Fortran 2003
+ 'bind', 'c_int', 'c_short', 'c_long', 'c_long_long', 'c_signed_char', 'c_size_t', 'c_int8_t',
+ 'c_int16_t', 'c_int32_t', 'c_int64_t', 'c_int128_t', 'c_intptr_t', 'c_float', 'c_double',
+ 'c_long_double', 'c_float128', 'c_float_complex', 'c_double_complex', 'c_long_double_complex',
+ 'c_float128_complex', 'c_bool', 'c_char', 'c_null_char', 'c_new_line', 'c_null_ptr', 'c_funptr'
+}, true)))
+
+-- Numbers.
+lex:add_rule('number', token(lexer.NUMBER, lexer.number * -lexer.alpha))
-- Identifiers.
-local identifier = token(l.IDENTIFIER, l.alnum^1)
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.alnum^1))
--- Operators.
-local operator = token(l.OPERATOR, S('<>=&+-/*,()'))
+-- Strings.
+local sq_str = lexer.range("'", true, false)
+local dq_str = lexer.range('"', true, false)
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
-M._rules = {
- {'whitespace', ws},
- {'comment', comment},
- {'keyword', keyword},
- {'function', func},
- {'type', type},
- {'number', number},
- {'identifier', identifier},
- {'string', string},
- {'operator', operator},
-}
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S('<>=&+-/*,()')))
-return M
+return lex
diff --git a/lua/lexers/fsharp.lua b/lua/lexers/fsharp.lua
index 39416a5..18df9bb 100644
--- a/lua/lexers/fsharp.lua
+++ b/lua/lexers/fsharp.lua
@@ -1,76 +1,57 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- F# LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'fsharp'}
+local lex = lexer.new('fsharp', {fold_by_indentation = true})
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local line_comment = P('//') * l.nonnewline^0
-local block_comment = l.nested_pair('(*', '*)')
-local comment = token(l.COMMENT, line_comment + block_comment)
-
--- Strings.
-local sq_str = l.delimited_range("'", true)
-local dq_str = l.delimited_range('"', true)
-local string = token(l.STRING, sq_str + dq_str)
-
--- Numbers.
-local number = token(l.NUMBER, (l.float + l.integer * S('uUlL')^-1))
-
--- Preprocessor.
-local preproc_word = word_match{
- 'ifndef', 'ifdef', 'if', 'else', 'endif', 'light', 'region', 'endregion'
-}
-local preproc = token(l.PREPROCESSOR,
- l.starts_line('#') * S('\t ')^0 * preproc_word *
- (l.nonnewline_esc^1 + l.space * l.nonnewline_esc^0))
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- 'abstract', 'and', 'as', 'assert', 'asr', 'begin', 'class', 'default',
- 'delegate', 'do', 'done', 'downcast', 'downto', 'else', 'end', 'enum',
- 'exception', 'false', 'finaly', 'for', 'fun', 'function', 'if', 'in',
- 'iherit', 'interface', 'land', 'lazy', 'let', 'lor', 'lsl', 'lsr', 'lxor',
- 'match', 'member', 'mod', 'module', 'mutable', 'namespace', 'new', 'null',
- 'of', 'open', 'or', 'override', 'sig', 'static', 'struct', 'then', 'to',
- 'true', 'try', 'type', 'val', 'when', 'inline', 'upcast', 'while', 'with',
- 'async', 'atomic', 'break', 'checked', 'component', 'const', 'constructor',
- 'continue', 'eager', 'event', 'external', 'fixed', 'functor', 'include',
- 'method', 'mixin', 'process', 'property', 'protected', 'public', 'pure',
- 'readonly', 'return', 'sealed', 'switch', 'virtual', 'void', 'volatile',
- 'where',
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'abstract', 'and', 'as', 'assert', 'asr', 'begin', 'class', 'default', 'delegate', 'do', 'done',
+ 'downcast', 'downto', 'else', 'end', 'enum', 'exception', 'false', 'finaly', 'for', 'fun',
+ 'function', 'if', 'in', 'iherit', 'interface', 'land', 'lazy', 'let', 'lor', 'lsl', 'lsr', 'lxor',
+ 'match', 'member', 'mod', 'module', 'mutable', 'namespace', 'new', 'null', 'of', 'open', 'or',
+ 'override', 'sig', 'static', 'struct', 'then', 'to', 'true', 'try', 'type', 'val', 'when',
+ 'inline', 'upcast', 'while', 'with', 'async', 'atomic', 'break', 'checked', 'component', 'const',
+ 'constructor', 'continue', 'eager', 'event', 'external', 'fixed', 'functor', 'include', 'method',
+ 'mixin', 'process', 'property', 'protected', 'public', 'pure', 'readonly', 'return', 'sealed',
+ 'switch', 'virtual', 'void', 'volatile', 'where',
-- Booleans.
'true', 'false'
-})
+}))
-- Types.
-local type = token(l.TYPE, word_match{
- 'bool', 'byte', 'sbyte', 'int16', 'uint16', 'int', 'uint32', 'int64',
- 'uint64', 'nativeint', 'unativeint', 'char', 'string', 'decimal', 'unit',
- 'void', 'float32', 'single', 'float', 'double'
-})
+lex:add_rule('type', token(lexer.TYPE, word_match{
+ 'bool', 'byte', 'sbyte', 'int16', 'uint16', 'int', 'uint32', 'int64', 'uint64', 'nativeint',
+ 'unativeint', 'char', 'string', 'decimal', 'unit', 'void', 'float32', 'single', 'float', 'double'
+}))
-- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
--- Operators.
-local operator = token(l.OPERATOR, S('=<>+-*/^.,:;~!@#%^&|?[](){}'))
+-- Strings.
+local sq_str = lexer.range("'", true)
+local dq_str = lexer.range('"', true)
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
+
+-- Comments.
+local line_comment = lexer.to_eol('//')
+local block_comment = lexer.range('(*', '*)', false, false, true)
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
+
+-- Numbers.
+lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer * S('uUlL')^-1))
+
+-- Preprocessor.
+lex:add_rule('preproc', token(lexer.PREPROCESSOR, lexer.starts_line('#') * S('\t ')^0 *
+ word_match('else endif endregion if ifdef ifndef light region')))
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'type', type},
- {'identifier', identifier},
- {'string', string},
- {'comment', comment},
- {'number', number},
- {'operator', operator},
-}
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S('=<>+-*/^.,:;~!@#%^&|?[](){}')))
-return M
+return lex
diff --git a/lua/lexers/fstab.lua b/lua/lexers/fstab.lua
index fd0df74..7c9d00e 100644
--- a/lua/lexers/fstab.lua
+++ b/lua/lexers/fstab.lua
@@ -1,569 +1,126 @@
--- Copyright 2016 Christian Hesse
+-- Copyright 2016-2022 Christian Hesse. See LICENSE.
-- fstab LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'fstab'}
+local lex = lexer.new('fstab', {lex_by_line = true})
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local comment = token(l.COMMENT, l.starts_line('#') * l.nonnewline^0)
-
--- Numbers.
-local dec = l.digit^1 * ('_' * l.digit^1)^0
-local oct_num = '0' * S('01234567_')^1
-local integer = S('+-')^-1 * (l.hex_num + oct_num + dec)
-local number = token(l.NUMBER, (l.float + integer))
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match({
- -- basic filesystem-independent mount options
- 'async',
- 'atime',
- 'auto',
- 'comment',
- 'context',
- 'defaults',
- 'defcontext',
- 'dev',
- 'dirsync',
- 'exec',
- 'fscontext',
- 'group',
- 'iversion',
- 'lazytime',
- 'loud',
- 'mand',
- '_netdev',
- 'noatime',
- 'noauto',
- 'nodev',
- 'nodiratime',
- 'noexec',
- 'nofail',
- 'noiversion',
- 'nolazytime',
- 'nomand',
- 'norelatime',
- 'nostrictatime',
- 'nosuid',
- 'nouser',
- 'owner',
- 'relatime',
- 'remount',
- 'ro',
- 'rootcontext',
- 'rw',
- 'silent',
- 'strictatime',
- 'suid',
- 'sync',
- 'user',
- 'users',
-
- -- mount options for systemd, see systemd.mount(5)
- 'x-systemd.automount',
- 'x-systemd.device-timeout',
- 'x-systemd.idle-timeout',
- 'x-systemd.mount-timeout',
- 'x-systemd.requires',
- 'x-systemd.requires-mounts-for',
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ -- Basic filesystem-independent mount options.
+ 'async', 'atime', 'auto', 'comment', 'context', 'defaults', 'defcontext', 'dev', 'dirsync',
+ 'exec', 'fscontext', 'group', 'iversion', 'lazytime', 'loud', 'mand', '_netdev', 'noatime',
+ 'noauto', 'nodev', 'nodiratime', 'noexec', 'nofail', 'noiversion', 'nolazytime', 'nomand',
+ 'norelatime', 'nostrictatime', 'nosuid', 'nouser', 'owner', 'relatime', 'remount', 'ro',
+ 'rootcontext', 'rw', 'silent', 'strictatime', 'suid', 'sync', 'user', 'users',
+ -- Mount options for systemd see systemd.mount(5).
+ 'x-systemd.automount', 'x-systemd.device-timeout', 'x-systemd.idle-timeout',
+ 'x-systemd.mount-timeout', 'x-systemd.requires', 'x-systemd.requires-mounts-for',
'x-initrd.mount',
-
- -- mount options for adfs
- 'uid',
- 'gid',
- 'ownmask',
- 'othmask',
-
- -- mount options for affs
- 'uid',
- 'gid',
- 'setuid',
- 'setgid',
- 'mode',
- 'protect',
- 'usemp',
- 'verbose',
- 'prefix',
- 'volume',
- 'reserved',
- 'root',
- 'bs',
- 'grpquota',
- 'noquota',
- 'quota',
- 'usrquota',
-
- -- mount options for btrfs
- 'alloc_start',
- 'autodefrag',
- 'check_int',
- 'check_int_data',
- 'check_int_print_mask',
- 'commit',
- 'compress',
- 'zlib',
- 'lzo',
- 'no',
- 'compress-force',
- 'degraded',
- 'device',
- 'discard',
- 'enospc_debug',
- 'fatal_errors',
- 'bug',
- 'panic',
- 'flushoncommit',
- 'inode_cache',
- 'max_inline',
- 'metadata_ratio',
- 'noacl',
- 'nobarrier',
- 'nodatacow',
- 'nodatasum',
- 'notreelog',
- 'recovery',
- 'rescan_uuid_tree',
- 'skip_balance',
- 'nospace_cache',
- 'clear_cache',
- 'ssd',
- 'nossd',
- 'ssd_spread',
- 'subvol',
- 'subvolid',
- 'subvolrootid',
- 'thread_pool',
- 'user_subvol_rm_allowed',
-
- -- mount options for devpts
- 'uid',
- 'gid',
- 'mode',
- 'newinstance',
- 'ptmxmode',
-
- -- mount options for ext2
- 'acl',
- 'noacl',
- 'bsddf',
- 'minixdf',
- 'check',
- 'nocheck',
- 'debug',
- 'errors',
- 'continue',
- 'remount-ro',
- 'panic',
- 'grpid',
- 'bsdgroups',
- 'nogrpid',
- 'sysvgroups',
- 'grpquota',
- 'noquota',
- 'quota',
- 'usrquota',
- 'nouid32',
- 'oldalloc',
- 'orlov',
- 'resgid',
- 'resuid',
- 'sb',
- 'user_xattr',
+ -- Mount options for adfs.
+ 'uid', 'gid', 'ownmask', 'othmask',
+ -- Mount options for affs.
+ 'uid', 'gid', 'setuid', 'setgid', 'mode', 'protect', 'usemp', 'verbose', 'prefix', 'volume',
+ 'reserved', 'root', 'bs', 'grpquota', 'noquota', 'quota', 'usrquota',
+ -- Mount options for btrfs.
+ 'alloc_start', 'autodefrag', 'check_int', 'check_int_data', 'check_int_print_mask', 'commit',
+ 'compress', 'zlib', 'lzo', 'no', 'compress-force', 'degraded', 'device', 'discard',
+ 'enospc_debug', 'fatal_errors', 'bug', 'panic', 'flushoncommit', 'inode_cache', 'max_inline',
+ 'metadata_ratio', 'noacl', 'nobarrier', 'nodatacow', 'nodatasum', 'notreelog', 'recovery',
+ 'rescan_uuid_tree', 'skip_balance', 'nospace_cache', 'clear_cache', 'ssd', 'nossd', 'ssd_spread',
+ 'subvol', 'subvolid', 'subvolrootid', 'thread_pool', 'user_subvol_rm_allowed',
+ -- Mount options for devpts.
+ 'uid', 'gid', 'mode', 'newinstance', 'ptmxmode',
+ -- Mount options for ext2.
+ 'acl', 'noacl', 'bsddf', 'minixdf', 'check', 'nocheck', 'debug', 'errors', 'continue',
+ 'remount-ro', 'panic', 'grpid', 'bsdgroups', 'nogrpid', 'sysvgroups', 'grpquota', 'noquota',
+ 'quota', 'usrquota', 'nouid32', 'oldalloc', 'orlov', 'resgid', 'resuid', 'sb', 'user_xattr',
'nouser_xattr',
-
- -- mount options for ext3
- 'journal',
- 'update',
- 'journal_dev',
- 'journal_path',
- 'norecoverynoload',
- 'data',
- 'journal',
- 'ordered',
- 'writeback',
- 'data_err',
- 'ignore',
- 'abort',
- 'barrier',
- 'commit',
- 'user_xattr',
- 'acl',
- 'usrjquota',
- 'grpjquota',
- 'jqfmt',
-
- -- mount options for ext4
- 'journal_checksum',
- 'journal_async_commit',
- 'barrier',
- 'nobarrier',
- 'inode_readahead_blks',
- 'stripe',
- 'delalloc',
- 'nodelalloc',
- 'max_batch_time',
- 'min_batch_time',
- 'journal_ioprio',
- 'abort',
- 'auto_da_alloc',
- 'noauto_da_alloc',
- 'noinit_itable',
- 'init_itable',
- 'discard',
- 'nodiscard',
- 'nouid32',
- 'block_validity',
- 'noblock_validity',
- 'dioread_lock',
- 'dioread_nolock',
- 'max_dir_size_kb',
- 'i_version',
-
- -- mount options for fat (common part of msdos, umsdos and vfat)
- 'blocksize',
- 'uid',
- 'gid',
- 'umask',
- 'dmask',
- 'fmask',
- 'allow_utime',
- 'check',
- 'relaxed',
- 'normal',
- 'strict',
- 'codepage',
- 'conv',
- 'binary',
- 'text',
- 'auto',
- 'cvf_format',
- 'cvf_option',
- 'debug',
- 'discard',
- 'dos1xfloppy',
- 'errors',
- 'panic',
- 'continue',
- 'remount-ro',
- 'fat',
- 'iocharset',
- 'nfs',
- 'stale_rw',
- 'nostale_ro',
- 'tz',
- 'time_offset',
- 'quiet',
- 'rodir',
- 'showexec',
- 'sys_immutable',
+ -- Mount options for ext3.
+ 'journal', 'update', 'journal_dev', 'journal_path', 'norecoverynoload', 'data', 'journal',
+ 'ordered', 'writeback', 'data_err', 'ignore', 'abort', 'barrier', 'commit', 'user_xattr', 'acl',
+ 'usrjquota', 'grpjquota', 'jqfmt',
+ -- Mount options for ext4.
+ 'journal_checksum', 'journal_async_commit', 'barrier', 'nobarrier', 'inode_readahead_blks',
+ 'stripe', 'delalloc', 'nodelalloc', 'max_batch_time', 'min_batch_time', 'journal_ioprio', 'abort',
+ 'auto_da_alloc', 'noauto_da_alloc', 'noinit_itable', 'init_itable', 'discard', 'nodiscard',
+ 'nouid32', 'block_validity', 'noblock_validity', 'dioread_lock', 'dioread_nolock',
+ 'max_dir_size_kb', 'i_version',
+ -- Mount options for fat (common part of msdos umsdos and vfat).
+ 'blocksize', 'uid', 'gid', 'umask', 'dmask', 'fmask', 'allow_utime', 'check', 'relaxed', 'normal',
+ 'strict', 'codepage', 'conv', 'binary', 'text', 'auto', 'cvf_format', 'cvf_option', 'debug',
+ 'discard', 'dos1xfloppy', 'errors', 'panic', 'continue', 'remount-ro', 'fat', 'iocharset', 'nfs',
+ 'stale_rw', 'nostale_ro', 'tz', 'time_offset', 'quiet', 'rodir', 'showexec', 'sys_immutable',
+ 'flush', 'usefree', 'dots', 'nodots', 'dotsOK',
+ -- Mount options for hfs.
+ 'creator', 'type', 'uid', 'gid', 'dir_umask', 'file_umask', 'umask', 'session', 'part', 'quiet',
+ -- Mount options for hpfs.
+ 'uid', 'gid', 'umask', 'case', 'lower', 'asis', 'conv', 'binary', 'text', 'auto', 'nocheck',
+ -- Mount options for iso9660.
+ 'norock', 'nojoliet', 'check', 'relaxed', 'strict', 'uid', 'gid', 'map', 'normal', 'offacorn',
+ 'mode', 'unhide', 'block', 'conv', 'auto', 'binary', 'mtext', 'text', 'cruft', 'session',
+ 'sbsector', 'iocharset', 'utf8',
+ -- Mount options for jfs.
+ 'iocharset', 'resize', 'nointegrity', 'integrity', 'errors', 'continue', 'remount-ro', 'panic',
+ 'noquota', 'quota', 'usrquota', 'grpquota',
+ -- Mount options for ntfs.
+ 'iocharset', 'nls', 'utf8', 'uni_xlate', 'posix', 'uid', 'gid', 'umask',
+ -- Mount options for overlay.
+ 'lowerdir', 'upperdir', 'workdir',
+ -- Mount options for reiserfs.
+ 'conv', 'hash', 'rupasov', 'tea', 'r5', 'detect', 'hashed_relocation', 'no_unhashed_relocation',
+ 'noborder', 'nolog', 'notail', 'replayonly', 'resize', 'user_xattr', 'acl', 'barrier', 'none',
'flush',
- 'usefree',
- 'dots',
- 'nodots',
- 'dotsOK',
-
- -- mount options for hfs
- 'creator',
- 'type',
- 'uid',
- 'gid',
- 'dir_umask',
- 'file_umask',
- 'umask',
- 'session',
- 'part',
- 'quiet',
-
- -- mount options for hpfs
- 'uid',
- 'gid',
- 'umask',
- 'case',
- 'lower',
- 'asis',
- 'conv',
- 'binary',
- 'text',
- 'auto',
- 'nocheck',
+ -- Mount options for tmpfs.
+ 'size', 'nr_blocks', 'nr_inodes', 'mode', 'uid', 'gid', 'mpol', 'default', 'prefer', 'bind',
+ 'interleave',
+ -- Mount options for ubifs.
+ 'bulk_read', 'no_bulk_read', 'chk_data_crc', 'no_chk_data_crc.', 'compr', 'none', 'lzo', 'zlib',
+ -- Mount options for udf.
+ 'gid', 'umask', 'uid', 'unhide', 'undelete', 'nostrict', 'iocharset', 'bs', 'novrs', 'session',
+ 'anchor', 'volume', 'partition', 'lastblock', 'fileset', 'rootdir',
+ -- Mount options for ufs.
+ 'ufstype', 'old', '44bsd', 'ufs2', '5xbsd', 'sun', 'sunx86', 'hp', 'nextstep', 'nextstep-cd',
+ 'openstep', 'onerror', 'lock', 'umount', 'repair',
+ -- Mount options for vfat.
+ 'uni_xlate', 'posix', 'nonumtail', 'utf8', 'shortname', 'lower', 'win95', 'winnt', 'mixed',
+ -- Mount options for usbfs.
+ 'devuid', 'devgid', 'devmode', 'busuid', 'busgid', 'busmode', 'listuid', 'listgid', 'listmode',
+ -- Filesystems.
+ 'adfs', 'ados', 'affs', 'anon_inodefs', 'atfs', 'audiofs', 'auto', 'autofs', 'bdev', 'befs',
+ 'bfs', 'btrfs', 'binfmt_misc', 'cd9660', 'cfs', 'cgroup', 'cifs', 'coda', 'configfs', 'cpuset',
+ 'cramfs', 'devfs', 'devpts', 'devtmpfs', 'e2compr', 'efs', 'ext2', 'ext2fs', 'ext3', 'ext4',
+ 'fdesc', 'ffs', 'filecore', 'fuse', 'fuseblk', 'fusectl', 'hfs', 'hpfs', 'hugetlbfs', 'iso9660',
+ 'jffs', 'jffs2', 'jfs', 'kernfs', 'lfs', 'linprocfs', 'mfs', 'minix', 'mqueue', 'msdos', 'ncpfs',
+ 'nfs', 'nfsd', 'nilfs2', 'none', 'ntfs', 'null', 'nwfs', 'overlay', 'ovlfs', 'pipefs', 'portal',
+ 'proc', 'procfs', 'pstore', 'ptyfs', 'qnx4', 'reiserfs', 'ramfs', 'romfs', 'securityfs', 'shm',
+ 'smbfs', 'squashfs', 'sockfs', 'sshfs', 'std', 'subfs', 'swap', 'sysfs', 'sysv', 'tcfs', 'tmpfs',
+ 'udf', 'ufs', 'umap', 'umsdos', 'union', 'usbfs', 'userfs', 'vfat', 'vs3fs', 'vxfs', 'wrapfs',
+ 'wvfs', 'xenfs', 'xfs', 'zisofs'
+}))
- -- mount options for iso9660
- 'norock',
- 'nojoliet',
- 'check',
- 'relaxed',
- 'strict',
- 'uid',
- 'gid',
- 'map',
- 'normal',
- 'offacorn',
- 'mode',
- 'unhide',
- 'block',
- 'conv',
- 'auto',
- 'binary',
- 'mtext',
- 'text',
- 'cruft',
- 'session',
- 'sbsector',
- 'iocharset',
- 'utf8',
-
- -- mount options for jfs
- 'iocharset',
- 'resize',
- 'nointegrity',
- 'integrity',
- 'errors',
- 'continue',
- 'remount-ro',
- 'panic',
- 'noquota',
- 'quota',
- 'usrquota',
- 'grpquota',
-
- -- mount options for ntfs
- 'iocharset',
- 'nls',
- 'utf8',
- 'uni_xlate',
- 'posix',
- 'uid',
- 'gid',
- 'umask',
-
- -- mount options for overlay
- 'lowerdir',
- 'upperdir',
- 'workdir',
-
- -- mount options for reiserfs
- 'conv',
- 'hash',
- 'rupasov',
- 'tea',
- 'r5',
- 'detect',
- 'hashed_relocation',
- 'no_unhashed_relocation',
- 'noborder',
- 'nolog',
- 'notail',
- 'replayonly',
- 'resize',
- 'user_xattr',
- 'acl',
- 'barrier',
- 'none',
- 'flush',
-
- -- mount options for tmpfs
- 'size',
- 'nr_blocks',
- 'nr_inodes',
- 'mode',
- 'uid',
- 'gid',
- 'mpol',
- 'default',
- 'prefer',
- 'bind',
- 'interleave',
-
- -- mount options for ubifs
- 'bulk_read',
- 'no_bulk_read',
- 'chk_data_crc',
- 'no_chk_data_crc.',
- 'compr',
- 'none',
- 'lzo',
- 'zlib',
-
- -- mount options for udf
- 'gid',
- 'umask',
- 'uid',
- 'unhide',
- 'undelete',
- 'nostrict',
- 'iocharset',
- 'bs',
- 'novrs',
- 'session',
- 'anchor',
- 'volume',
- 'partition',
- 'lastblock',
- 'fileset',
- 'rootdir',
-
- -- mount options for ufs
- 'ufstype',
- 'old',
- '44bsd',
- 'ufs2',
- '5xbsd',
- 'sun',
- 'sunx86',
- 'hp',
- 'nextstep',
- 'nextstep-cd',
- 'openstep',
- 'onerror',
- 'lock',
- 'umount',
- 'repair',
-
- -- mount options for vfat
- 'uni_xlate',
- 'posix',
- 'nonumtail',
- 'utf8',
- 'shortname',
- 'lower',
- 'win95',
- 'winnt',
- 'mixed',
-
- -- mount options for usbfs
- 'devuid',
- 'devgid',
- 'devmode',
- 'busuid',
- 'busgid',
- 'busmode',
- 'listuid',
- 'listgid',
- 'listmode',
-
- -- filesystems
- 'adfs',
- 'ados',
- 'affs',
- 'anon_inodefs',
- 'atfs',
- 'audiofs',
- 'auto',
- 'autofs',
- 'bdev',
- 'befs',
- 'bfs',
- 'btrfs',
- 'binfmt_misc',
- 'cd9660',
- 'cfs',
- 'cgroup',
- 'cifs',
- 'coda',
- 'configfs',
- 'cpuset',
- 'cramfs',
- 'devfs',
- 'devpts',
- 'devtmpfs',
- 'e2compr',
- 'efs',
- 'ext2',
- 'ext2fs',
- 'ext3',
- 'ext4',
- 'fdesc',
- 'ffs',
- 'filecore',
- 'fuse',
- 'fuseblk',
- 'fusectl',
- 'hfs',
- 'hpfs',
- 'hugetlbfs',
- 'iso9660',
- 'jffs',
- 'jffs2',
- 'jfs',
- 'kernfs',
- 'lfs',
- 'linprocfs',
- 'mfs',
- 'minix',
- 'mqueue',
- 'msdos',
- 'ncpfs',
- 'nfs',
- 'nfsd',
- 'nilfs2',
- 'none',
- 'ntfs',
- 'null',
- 'nwfs',
- 'overlay',
- 'ovlfs',
- 'pipefs',
- 'portal',
- 'proc',
- 'procfs',
- 'pstore',
- 'ptyfs',
- 'qnx4',
- 'reiserfs',
- 'ramfs',
- 'romfs',
- 'securityfs',
- 'shm',
- 'smbfs',
- 'squashfs',
- 'sockfs',
- 'sshfs',
- 'std',
- 'subfs',
- 'swap',
- 'sysfs',
- 'sysv',
- 'tcfs',
- 'tmpfs',
- 'udf',
- 'ufs',
- 'umap',
- 'umsdos',
- 'union',
- 'usbfs',
- 'userfs',
- 'vfat',
- 'vs3fs',
- 'vxfs',
- 'wrapfs',
- 'wvfs',
- 'xenfs',
- 'xfs',
- 'zisofs',
-}, '.-'))
+-- Numbers.
+local uuid = lexer.xdigit^8 * ('-' * lexer.xdigit^4)^-3 * '-' * lexer.xdigit^12
+local dec = lexer.digit^1 * ('_' * lexer.digit^1)^0
+local oct_num = '0' * S('01234567_')^1
+local integer = S('+-')^-1 * (lexer.hex_num + oct_num + dec)
+lex:add_rule('number', token(lexer.NUMBER, uuid + lexer.float + integer))
-- Identifiers.
-local word = (l.alpha + '_') * (l.alnum + S('_.'))^0
-local identifier = token(l.IDENTIFIER, word)
+lex:add_rule('identifier', token(lexer.IDENTIFIER, (lexer.alpha + '_') * (lexer.alnum + S('_.'))^0))
--- Operators.
-local operator = token(l.OPERATOR, S('=,'))
+-- Comments.
+lex:add_rule('comment', token(lexer.COMMENT, lexer.starts_line(lexer.to_eol('#'))))
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'identifier', identifier},
- {'comment', comment},
- {'number', number},
- {'operator', operator},
-}
+-- Directories.
+lex:add_rule('directory', token(lexer.VARIABLE, '/' * (1 - lexer.space)^0))
-M._LEXBYLINE = true
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S('=,')))
-return M
+return lex
diff --git a/lua/lexers/gap.lua b/lua/lexers/gap.lua
index a4117d3..29cfdf9 100644
--- a/lua/lexers/gap.lua
+++ b/lua/lexers/gap.lua
@@ -1,56 +1,44 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- Gap LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'gap'}
+local lex = lexer.new('gap')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
--- Comments.
-local comment = token(l.COMMENT, '#' * l.nonnewline^0)
+-- Keywords.
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'and', 'break', 'continue', 'do', 'elif', 'else', 'end', 'fail', 'false', 'fi', 'for', 'function',
+ 'if', 'in', 'infinity', 'local', 'not', 'od', 'or', 'rec', 'repeat', 'return', 'then', 'true',
+ 'until', 'while'
+}))
+
+-- Identifiers.
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Strings.
-local sq_str = l.delimited_range("'", true)
-local dq_str = l.delimited_range('"', true)
-local string = token(l.STRING, sq_str + dq_str)
+local sq_str = lexer.range("'", true)
+local dq_str = lexer.range('"', true)
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
+
+-- Comments.
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#')))
-- Numbers.
-local number = token(l.NUMBER, l.digit^1 * -l.alpha)
+lex:add_rule('number', token(lexer.NUMBER, lexer.dec_num * -lexer.alpha))
--- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- 'and', 'break', 'continue', 'do', 'elif', 'else', 'end', 'fail', 'false',
- 'fi', 'for', 'function', 'if', 'in', 'infinity', 'local', 'not', 'od', 'or',
- 'rec', 'repeat', 'return', 'then', 'true', 'until', 'while'
-})
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S('*+-,./:;<=>~^#()[]{}')))
--- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+-- Fold points.
+lex:add_fold_point(lexer.KEYWORD, 'function', 'end')
+lex:add_fold_point(lexer.KEYWORD, 'do', 'od')
+lex:add_fold_point(lexer.KEYWORD, 'if', 'fi')
+lex:add_fold_point(lexer.KEYWORD, 'repeat', 'until')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('#'))
--- Operators.
-local operator = token(l.OPERATOR, S('*+-,./:;<=>~^#()[]{}'))
-
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'identifier', identifier},
- {'string', string},
- {'comment', comment},
- {'number', number},
- {'operator', operator},
-}
-
-M._foldsymbols = {
- _patterns = {'[a-z]+', '#'},
- [l.KEYWORD] = {
- ['function'] = 1, ['end'] = -1, ['do'] = 1, od = -1, ['if'] = 1, fi = -1,
- ['repeat'] = 1, ['until'] = -1
- },
- [l.COMMENT] = {['#'] = l.fold_line_comments('#')}
-}
-
-return M
+return lex
diff --git a/lua/lexers/gemini.lua b/lua/lexers/gemini.lua
index 6755de9..dc529d4 100644
--- a/lua/lexers/gemini.lua
+++ b/lua/lexers/gemini.lua
@@ -1,48 +1,42 @@
+
-- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
-- Markdown LPeg lexer.
-- Copyright 2020 Haelwenn (lanodan) Monnier <contact+gemini.lua@hacktivis.me>
-- Gemini / Gemtext LPeg lexer.
-- See https://gemini.circumlunar.space/docs/specification.html
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
local P, R, S = lpeg.P, lpeg.R, lpeg.S
-local M = {_NAME = 'gemini'}
+local lex = lexer.new('gemini')
+
+local header = token('h3', lexer.starts_line('###') * lexer.nonnewline^0) +
+ token('h2', lexer.starts_line('##') * lexer.nonnewline^0) +
+ token('h1', lexer.starts_line('#') * lexer.nonnewline^0)
+lex:add_rule('header', header)
+lex:add_style('h1', {fore = lexer.colors.red, size = 15})
+lex:add_style('h2', {fore = lexer.colors.red, size = 14})
+lex:add_style('h3', {fore = lexer.colors.red, size = 13})
-local ws = token(l.WHITESPACE, S(' \t')^1 + S('\v\r\n')^1)
+local list = token('list', lexer.starts_line('*') * lexer.nonnewline^0)
+lex:add_rule('list', list)
+lex:add_style('list', lexer.styles.constant)
-local link = token('link', l.starts_line('=>') * l.nonnewline^0)
+local blockquote = token(lexer.STRING, lexer.starts_line('>') * lexer.nonnewline^0)
+lex:add_rule('blockquote', blockquote)
-- Should only match ``` at start of line
-local pre = token('pre', l.delimited_range('```', false, true))
-
-local header = token('h3', l.starts_line('###') * l.nonnewline^0) +
- token('h2', l.starts_line('##') * l.nonnewline^0) +
- token('h1', l.starts_line('#') * l.nonnewline^0)
-
-local list = token('list', l.starts_line('*') * l.nonnewline^0)
-
-local blockquote = token(l.STRING, l.starts_line('>') * l.nonnewline^0)
-
-M._rules = {
- {'header', header},
- {'list', list},
- {'blockquote', blockquote},
- {'pre', pre},
- {'whitespace', ws},
- {'link', link}
-}
-
-local font_size = 10
-local hstyle = 'fore:red'
-M._tokenstyles = {
- h3 = hstyle..',size:'..(font_size + 3),
- h2 = hstyle..',size:'..(font_size + 4),
- h1 = hstyle..',size:'..(font_size + 5),
- pre = l.STYLE_EMBEDDED..',eolfilled',
- link = 'underlined',
- list = l.STYLE_CONSTANT
-}
-
-return M
+local pre = token('pre', lexer.range('```', false, true))
+lex:add_rule('pre', pre)
+lex:add_style('pre', lexer.styles.embedded .. {eolfilled = true})
+
+-- Whitespace.
+local ws = token(lexer.WHITESPACE, S(' \t')^1 + S('\v\r\n')^1)
+lex:add_rule('whitespace', ws)
+
+local link = token('link', lexer.starts_line('=>') * lexer.nonnewline^0)
+lex:add_rule('link', link)
+lex:add_style('link', {underlined=true})
+
+return lex
diff --git a/lua/lexers/gettext.lua b/lua/lexers/gettext.lua
index ce8671d..c4c1150 100644
--- a/lua/lexers/gettext.lua
+++ b/lua/lexers/gettext.lua
@@ -1,39 +1,29 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- Gettext LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'gettext'}
+local lex = lexer.new('gettext')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local comment = token(l.COMMENT, '#' * S(': .~') * l.nonnewline^0)
-
--- Strings.
-local string = token(l.STRING, l.delimited_range('"', true))
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match({
- 'msgid', 'msgid_plural', 'msgstr', 'fuzzy', 'c-format', 'no-c-format'
-}, '-', true))
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match(
+ 'msgid msgid_plural msgstr fuzzy c-format no-c-format', true)))
-- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Variables.
-local variable = token(l.VARIABLE, S('%$@') * l.word)
-
-M._rules = {
- {'whitespace', ws},
- {'comment', comment},
- {'string', string},
- {'keyword', keyword},
- {'identifier', identifier},
- {'variable', variable},
-}
-
-return M
+lex:add_rule('variable', token(lexer.VARIABLE, S('%$@') * lexer.word))
+
+-- Strings.
+lex:add_rule('string', token(lexer.STRING, lexer.range('"', true)))
+
+-- Comments.
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#' * S(': .~'))))
+
+return lex
diff --git a/lua/lexers/gherkin.lua b/lua/lexers/gherkin.lua
index fa7d898..c876fe3 100644
--- a/lua/lexers/gherkin.lua
+++ b/lua/lexers/gherkin.lua
@@ -1,64 +1,40 @@
--- Copyright 2015-2017 Jason Schindler. See LICENSE.
+-- Copyright 2015-2022 Jason Schindler. See LICENSE.
-- Gherkin (https://github.com/cucumber/cucumber/wiki/Gherkin) LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'gherkin'}
+local lex = lexer.new('gherkin', {fold_by_indentation = true})
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Tags.
-local tag = token('tag', '@' * l.word^0)
-
--- Comments.
-local comment = token(l.COMMENT, '#' * l.nonnewline^0)
-
--- Strings.
-local doc_str = '"""' * (l.any - '"""')^0 * P('"""')^-1
-local dq_str = l.delimited_range('"')
-
-local string = token(l.STRING, doc_str + dq_str)
-
--- Placeholders.
-local placeholder = token('placeholder', l.nested_pair('<', '>'))
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- 'Given', 'When', 'Then', 'And', 'But'
-})
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match(
+ 'And Background But Examples Feature Given Outline Scenario Scenarios Then When')))
--- Identifiers.
-local identifier = token(l.KEYWORD, P('Scenario Outline') + word_match{
- 'Feature', 'Background', 'Scenario', 'Scenarios', 'Examples'
-})
+-- Strings.
+local doc_str = lexer.range('"""')
+local dq_str = lexer.range('"')
+lex:add_rule('string', token(lexer.STRING, doc_str + dq_str))
--- Examples.
-local example = token('example', '|' * l.nonnewline^0)
+-- Comments.
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#')))
-- Numbers.
-local number = token(l.NUMBER, l.float + l.integer)
+-- lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-M._rules = {
- {'whitespace', ws},
- {'comment', comment},
- {'tag', tag},
- {'placeholder', placeholder},
- {'keyword', keyword},
- {'identifier', identifier},
- {'example', example},
- {'string', string},
- {'number', number}
-}
+-- Tags.
+lex:add_rule('tag', token('tag', '@' * lexer.word^0))
+lex:add_style('tag', lexer.styles.label)
-M._tokenstyles = {
- tag = l.STYLE_LABEL,
- placeholder = l.STYLE_NUMBER,
- example = l.STYLE_NUMBER
-}
+-- Placeholders.
+lex:add_rule('placeholder', token('placeholder', lexer.range('<', '>', false, false, true)))
+lex:add_style('placeholder', lexer.styles.variable)
-M._FOLDBYINDENTATION = true
+-- Examples.
+lex:add_rule('example', token('example', lexer.to_eol('|')))
+lex:add_style('example', lexer.styles.number)
-return M
+return lex
diff --git a/lua/lexers/git-rebase.lua b/lua/lexers/git-rebase.lua
index 42c0e6c..ee68040 100644
--- a/lua/lexers/git-rebase.lua
+++ b/lua/lexers/git-rebase.lua
@@ -1,51 +1,41 @@
-- Copyright 2017-2021 Marc André Tanner
-- git-rebase(1) LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
local P, R = lpeg.P, lpeg.R
-local M = {_NAME = 'git-rebase'}
+local lex = lexer.new('git-rebase', {lex_by_line = true})
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Comments.
-local comment = token(l.COMMENT, l.starts_line('#') * l.nonnewline^0)
+lex:add_rule('comment', token(lexer.COMMENT, lexer.starts_line('#') * lexer.nonnewline^0))
-- Keywords.
-local keywords = l.starts_line(word_match{
- 'p', 'pick',
- 'r', 'reword',
- 'e', 'edit',
- 's', 'squash',
- 'f', 'fixup',
- 'x', 'exec',
- 'd', 'drop',
- 'b', 'break',
- 'l', 'label',
- 't', 'reset',
- 'm', 'merge',
-})
-local keyword = token(l.KEYWORD, keywords)
+local keywords = lexer.starts_line(word_match[[
+ p pick
+ r reword
+ e edit
+ s squash
+ f fixup
+ x exec
+ d drop
+ b break
+ l label
+ t reset
+ m merge
+]])
+lex:add_rule('keyword', token(lexer.KEYWORD, keywords))
-- Commit SHA1.
local function patn(pat, min, max)
return -pat^(max + 1) * pat^min
end
-local commit = token(l.NUMBER, patn(R('09', 'af'), 7, 40))
+lex:add_rule('commit', token(lexer.NUMBER, patn(R('09', 'af'), 7, 40)))
-local message = token(l.STRING, l.nonnewline^1)
+lex:add_rule('message', token(lexer.STRING, lexer.nonnewline^1))
-M._rules = {
- {'whitespace', ws},
- {'comment', comment},
- {'keyword', keyword},
- {'commit', commit},
- {'message', message},
-}
-
-M._LEXBYLINE = true
-
-return M
+return lex
diff --git a/lua/lexers/gleam.lua b/lua/lexers/gleam.lua
new file mode 100644
index 0000000..6aee725
--- /dev/null
+++ b/lua/lexers/gleam.lua
@@ -0,0 +1,119 @@
+-- Copyright 2021-2022 Mitchell. See LICENSE.
+-- Gleam LPeg lexer
+-- https://gleam.run/
+-- Contributed by Tynan Beatty
+
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
+
+local KEY, OP = lexer.KEYWORD, lexer.OPERATOR
+
+local lex = lexer.new('gleam')
+
+-- Whitespace.
+local gleam_ws = token(lexer.WHITESPACE, lexer.space^1)
+lex:add_rule('whitespace', gleam_ws)
+
+-- Types.
+local typ_tok = token(lexer.TYPE, lexer.upper * lexer.alnum^0)
+lex:add_rule('type', typ_tok)
+
+-- Modules.
+local name = (lexer.lower + '_') * (lexer.lower + lexer.digit + '_')^0
+local fn_name = token(lexer.FUNCTION, name)
+local mod_name = token('module', name)
+local typ_or_fn = typ_tok + fn_name
+local function mod_tok(ws)
+ return token(KEY, 'import') * ws^1 * mod_name * (ws^0 * token(OP, '/') * ws^0 * mod_name)^0 *
+ (ws^1 * token(KEY, 'as') * ws^1 * mod_name)^-1 *
+ (ws^0 * token(OP, '.') * ws^0 * token(OP, '{') * ws^0 * typ_or_fn *
+ (ws^0 * token(OP, ',') * ws^0 * typ_or_fn)^0 * ws^0 * token(OP, '}'))^-1
+end
+lex:add_rule('module', mod_tok(gleam_ws))
+lex:add_style('module', lexer.styles.constant)
+
+-- Keywords.
+local key_tok = token(KEY, word_match(
+ 'as assert case const external fn if import let opaque pub todo try tuple type'))
+lex:add_rule('keyword', key_tok)
+
+-- Functions.
+local function fn_tok(ws)
+ local mod_name_op = mod_name * ws^0 * token(OP, '.')
+ local fn_def_call = mod_name_op^-1 * ws^0 * fn_name * ws^0 * #P('(')
+ local fn_pipe = token(OP, '|>') * ws^0 * (token(KEY, 'fn') + mod_name_op^-1 * fn_name)
+ return fn_def_call + fn_pipe
+end
+lex:add_rule('function', fn_tok(gleam_ws))
+
+-- Labels.
+local id = token(lexer.IDENTIFIER, name)
+local function lab_tok(ws)
+ return token(OP, S('(,')) * ws^0 * token(lexer.LABEL, name) * #(ws^1 * id)
+end
+lex:add_rule('label', lab_tok(gleam_ws))
+
+-- Identifiers.
+local discard_id = token('discard', '_' * name)
+local id_tok = discard_id + id
+lex:add_rule('identifier', id_tok)
+lex:add_style('discard', lexer.styles.comment)
+
+-- Strings.
+local str_tok = token(lexer.STRING, lexer.range('"'))
+lex:add_rule('string', str_tok)
+
+-- Comments.
+local com_tok = token(lexer.COMMENT, lexer.to_eol('//'))
+lex:add_rule('comment', com_tok)
+
+-- Numbers.
+local function can_neg(patt) return (lpeg.B(lexer.space + S('+-/*%<>=&|:,.')) * '-')^-1 * patt end
+local function can_sep(patt) return (P('_')^-1 * patt^1)^1 end
+local dec = lexer.digit * can_sep(lexer.digit)^0
+local float = dec * '.' * dec^0
+local bin = '0' * S('bB') * can_sep(S('01')) * -lexer.xdigit
+local oct = '0' * S('oO') * can_sep(lpeg.R('07'))
+local hex = '0' * S('xX') * can_sep(lexer.xdigit)
+local num_tok = token(lexer.NUMBER, can_neg(float) + bin + oct + hex + can_neg(dec))
+lex:add_rule('number', num_tok)
+
+-- Operators.
+local op_tok = token(OP, S('+-*/%#!=<>&|.,:;{}[]()'))
+lex:add_rule('operator', op_tok)
+
+-- Errors.
+local err_tok = token(lexer.ERROR, lexer.any)
+lex:add_rule('error', err_tok)
+
+-- Fold points.
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('//'))
+lex:add_fold_point(lexer.OPERATOR, '{', '}')
+lex:add_fold_point(lexer.OPERATOR, '[', ']')
+lex:add_fold_point(lexer.OPERATOR, '(', ')')
+
+-- Embedded Bit Strings.
+-- Mimic lexer.load() by creating a bitstring-specific whitespace style.
+local bitstring = lexer.new(lex._NAME .. '_bitstring')
+local bitstring_ws = token(bitstring._NAME .. '_whitespace', lexer.space^1)
+bitstring:add_rule('whitespace', bitstring_ws)
+bitstring:add_style(bitstring._NAME .. '_whitespace', lexer.styles.whitespace)
+bitstring:add_rule('type', typ_tok)
+bitstring:add_rule('module', mod_tok(bitstring_ws))
+bitstring:add_rule('keyword', key_tok + token(KEY, word_match{
+ 'binary', 'bytes', 'int', 'float', 'bit_string', 'bits', 'utf8', 'utf16', 'utf32',
+ 'utf8_codepoint', 'utf16_codepoint', 'utf32_codepoint', 'signed', 'unsigned', 'big', 'little',
+ 'native', 'unit', 'size'
+}))
+bitstring:add_rule('function', fn_tok(bitstring_ws))
+bitstring:add_rule('label', lab_tok(bitstring_ws))
+bitstring:add_rule('identifier', id_tok)
+bitstring:add_rule('string', str_tok)
+bitstring:add_rule('comment', com_tok)
+bitstring:add_rule('number', num_tok)
+bitstring:add_rule('operator', op_tok)
+bitstring:add_rule('error', err_tok)
+lex:embed(bitstring, token(OP, '<<'), token(OP, '>>'))
+
+return lex
diff --git a/lua/lexers/glsl.lua b/lua/lexers/glsl.lua
index 6fdaf20..9997a13 100644
--- a/lua/lexers/glsl.lua
+++ b/lua/lexers/glsl.lua
@@ -1,132 +1,104 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- GLSL LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
-local table = _G.table
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S, R = lpeg.P, lpeg.S, lpeg.R
-local M = {_NAME = 'glsl'}
+local lex = lexer.new('glsl', {inherit = lexer.load('cpp')})
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
+lex:modify_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- 'attribute', 'const', 'in', 'inout', 'out', 'uniform', 'varying', 'invariant',
- 'centroid', 'flat', 'smooth', 'noperspective', 'layout', 'patch', 'sample',
- 'subroutine', 'lowp', 'mediump', 'highp', 'precision',
+lex:modify_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'attribute', 'const', 'in', 'inout', 'out', 'uniform', 'varying', 'invariant', 'centroid', 'flat',
+ 'smooth', 'noperspective', 'layout', 'patch', 'sample', 'subroutine', 'lowp', 'mediump', 'highp',
+ 'precision',
-- Macros.
- '__VERSION__', '__LINE__', '__FILE__',
-})
-
--- Functions.
-local func = token(l.FUNCTION, word_match{
- 'radians', 'degrees', 'sin', 'cos', 'tan', 'asin', 'acos', 'atan', 'sinh',
- 'cosh', 'tanh', 'asinh', 'acosh', 'atanh', 'pow', 'exp', 'log', 'exp2',
- 'log2', 'sqrt', 'inversesqrt', 'abs', 'sign', 'floor', 'trunc', 'round',
- 'roundEven', 'ceil', 'fract', 'mod', 'modf', 'min', 'max', 'clamp', 'mix',
- 'step', 'smoothstep', 'isnan', 'isinf', 'floatBitsToInt', 'floatBitsToUint',
- 'intBitsToFloat', 'uintBitsToFloat', 'fma', 'frexp', 'ldexp', 'packUnorm2x16',
- 'packUnorm4x8', 'packSnorm4x8', 'unpackUnorm2x16', 'unpackUnorm4x8',
- 'unpackSnorm4x8', 'packDouble2x32', 'unpackDouble2x32', 'length', 'distance',
- 'dot', 'cross', 'normalize', 'ftransform', 'faceforward', 'reflect',
- 'refract', 'matrixCompMult', 'outerProduct', 'transpose', 'determinant',
- 'inverse', 'lessThan', 'lessThanEqual', 'greaterThan', 'greaterThanEqual',
- 'equal', 'notEqual', 'any', 'all', 'not', 'uaddCarry', 'usubBorrow',
- 'umulExtended', 'imulExtended', 'bitfieldExtract', 'bitfildInsert',
- 'bitfieldReverse', 'bitCount', 'findLSB', 'findMSB', 'textureSize',
- 'textureQueryLOD', 'texture', 'textureProj', 'textureLod', 'textureOffset',
- 'texelFetch', 'texelFetchOffset', 'textureProjOffset', 'textureLodOffset',
- 'textureProjLod', 'textureProjLodOffset', 'textureGrad', 'textureGradOffset',
- 'textureProjGrad', 'textureProjGradOffset', 'textureGather',
- 'textureGatherOffset', 'texture1D', 'texture2D', 'texture3D', 'texture1DProj',
- 'texture2DProj', 'texture3DProj', 'texture1DLod', 'texture2DLod',
- 'texture3DLod', 'texture1DProjLod', 'texture2DProjLod', 'texture3DProjLod',
- 'textureCube', 'textureCubeLod', 'shadow1D', 'shadow2D', 'shadow1DProj',
- 'shadow2DProj', 'shadow1DLod', 'shadow2DLod', 'shadow1DProjLod',
- 'shadow2DProjLod', 'dFdx', 'dFdy', 'fwidth', 'interpolateAtCentroid',
- 'interpolateAtSample', 'interpolateAtOffset', 'noise1', 'noise2', 'noise3',
- 'noise4', 'EmitStreamVertex', 'EndStreamPrimitive', 'EmitVertex',
- 'EndPrimitive', 'barrier'
-})
+ '__VERSION__', '__LINE__', '__FILE__'
+}) + lex:get_rule('keyword'))
-- Types.
-local type = token(l.TYPE,
- S('bdiu')^-1 * 'vec' * R('24') +
- P('d')^-1 * 'mat' * R('24') * ('x' * R('24')^-1) +
- S('iu')^-1 * 'sampler' * R('13') * 'D' +
- 'sampler' * R('12') * 'D' * P('Array')^-1 * 'Shadow' +
- S('iu')^-1 * 'sampler' * (R('12') * 'DArray' + word_match{
- 'Cube', '2DRect', 'Buffer', '2DMS', '2DMSArray',
- '2DMSCubeArray'
- }) +
- word_match{
- 'samplerCubeShadow', 'sampler2DRectShadow',
- 'samplerCubeArrayShadow'
- })
+-- LuaFormatter off
+lex:modify_rule('type', token(lexer.TYPE,
+ S('bdiu')^-1 * 'vec' * R('24') +
+ P('d')^-1 * 'mat' * R('24') * ('x' * R('24')^-1) +
+ S('iu')^-1 * 'sampler' * R('13') * 'D' +
+ 'sampler' * R('12') * 'D' * P('Array')^-1 * 'Shadow' +
+ (S('iu')^-1 * 'sampler' * (R('12') * 'DArray' +
+ word_match('Cube 2DRect Buffer 2DMS 2DMSArray 2DMSCubeArray'))) +
+ word_match('samplerCubeShadow sampler2DRectShadow samplerCubeArrayShadow')) +
+-- LuaFormatter on
+ lex:get_rule('type') +
+
+-- Functions.
+token(lexer.FUNCTION, word_match{
+ 'radians', 'degrees', 'sin', 'cos', 'tan', 'asin', 'acos', 'atan', 'sinh', 'cosh', 'tanh',
+ 'asinh', 'acosh', 'atanh', 'pow', 'exp', 'log', 'exp2', 'log2', 'sqrt', 'inversesqrt', 'abs',
+ 'sign', 'floor', 'trunc', 'round', 'roundEven', 'ceil', 'fract', 'mod', 'modf', 'min', 'max',
+ 'clamp', 'mix', 'step', 'smoothstep', 'isnan', 'isinf', 'floatBitsToInt', 'floatBitsToUint',
+ 'intBitsToFloat', 'uintBitsToFloat', 'fma', 'frexp', 'ldexp', 'packUnorm2x16', 'packUnorm4x8',
+ 'packSnorm4x8', 'unpackUnorm2x16', 'unpackUnorm4x8', 'unpackSnorm4x8', 'packDouble2x32',
+ 'unpackDouble2x32', 'length', 'distance', 'dot', 'cross', 'normalize', 'ftransform',
+ 'faceforward', 'reflect', 'refract', 'matrixCompMult', 'outerProduct', 'transpose', 'determinant',
+ 'inverse', 'lessThan', 'lessThanEqual', 'greaterThan', 'greaterThanEqual', 'equal', 'notEqual',
+ 'any', 'all', 'not', 'uaddCarry', 'usubBorrow', 'umulExtended', 'imulExtended', 'bitfieldExtract',
+ 'bitfildInsert', 'bitfieldReverse', 'bitCount', 'findLSB', 'findMSB', 'textureSize',
+ 'textureQueryLOD', 'texture', 'textureProj', 'textureLod', 'textureOffset', 'texelFetch',
+ 'texelFetchOffset', 'textureProjOffset', 'textureLodOffset', 'textureProjLod',
+ 'textureProjLodOffset', 'textureGrad', 'textureGradOffset', 'textureProjGrad',
+ 'textureProjGradOffset', 'textureGather', 'textureGatherOffset', 'texture1D', 'texture2D',
+ 'texture3D', 'texture1DProj', 'texture2DProj', 'texture3DProj', 'texture1DLod', 'texture2DLod',
+ 'texture3DLod', 'texture1DProjLod', 'texture2DProjLod', 'texture3DProjLod', 'textureCube',
+ 'textureCubeLod', 'shadow1D', 'shadow2D', 'shadow1DProj', 'shadow2DProj', 'shadow1DLod',
+ 'shadow2DLod', 'shadow1DProjLod', 'shadow2DProjLod', 'dFdx', 'dFdy', 'fwidth',
+ 'interpolateAtCentroid', 'interpolateAtSample', 'interpolateAtOffset', 'noise1', 'noise2',
+ 'noise3', 'noise4', 'EmitStreamVertex', 'EndStreamPrimitive', 'EmitVertex', 'EndPrimitive',
+ 'barrier'
+}) +
-- Variables.
-local variable = token(l.VARIABLE, word_match{
- 'gl_VertexID', 'gl_InstanceID', 'gl_Position', 'gl_PointSize',
- 'gl_ClipDistance', 'gl_PrimitiveIDIn', 'gl_InvocationID', 'gl_PrimitiveID',
- 'gl_Layer', 'gl_PatchVerticesIn', 'gl_TessLevelOuter', 'gl_TessLevelInner',
- 'gl_TessCoord', 'gl_FragCoord', 'gl_FrontFacing', 'gl_PointCoord',
- 'gl_SampleID', 'gl_SamplePosition', 'gl_FragColor', 'gl_FragData',
- 'gl_FragDepth', 'gl_SampleMask', 'gl_ClipVertex', 'gl_FrontColor',
- 'gl_BackColor', 'gl_FrontSecondaryColor', 'gl_BackSecondaryColor',
- 'gl_TexCoord', 'gl_FogFragCoord', 'gl_Color', 'gl_SecondaryColor',
- 'gl_Normal', 'gl_Vertex', 'gl_MultiTexCoord0', 'gl_MultiTexCoord1',
- 'gl_MultiTexCoord2', 'gl_MultiTexCoord3', 'gl_MultiTexCoord4',
- 'gl_MultiTexCoord5', 'gl_MultiTexCoord6', 'gl_MultiTexCoord7', 'gl_FogCoord'
-})
+token(lexer.VARIABLE, word_match{
+ 'gl_VertexID', 'gl_InstanceID', 'gl_Position', 'gl_PointSize', 'gl_ClipDistance',
+ 'gl_PrimitiveIDIn', 'gl_InvocationID', 'gl_PrimitiveID', 'gl_Layer', 'gl_PatchVerticesIn',
+ 'gl_TessLevelOuter', 'gl_TessLevelInner', 'gl_TessCoord', 'gl_FragCoord', 'gl_FrontFacing',
+ 'gl_PointCoord', 'gl_SampleID', 'gl_SamplePosition', 'gl_FragColor', 'gl_FragData',
+ 'gl_FragDepth', 'gl_SampleMask', 'gl_ClipVertex', 'gl_FrontColor', 'gl_BackColor',
+ 'gl_FrontSecondaryColor', 'gl_BackSecondaryColor', 'gl_TexCoord', 'gl_FogFragCoord', 'gl_Color',
+ 'gl_SecondaryColor', 'gl_Normal', 'gl_Vertex', 'gl_MultiTexCoord0', 'gl_MultiTexCoord1',
+ 'gl_MultiTexCoord2', 'gl_MultiTexCoord3', 'gl_MultiTexCoord4', 'gl_MultiTexCoord5',
+ 'gl_MultiTexCoord6', 'gl_MultiTexCoord7', 'gl_FogCoord'
+}) +
-- Constants.
-local constant = token(l.CONSTANT, word_match{
+token(lexer.CONSTANT, word_match{
'gl_MaxVertexAttribs', 'gl_MaxVertexUniformComponents', 'gl_MaxVaryingFloats',
- 'gl_MaxVaryingComponents', 'gl_MaxVertexOutputComponents',
- 'gl_MaxGeometryInputComponents', 'gl_MaxGeometryOutputComponents',
- 'gl_MaxFragmentInputComponents', 'gl_MaxVertexTextureImageUnits',
- 'gl_MaxCombinedTextureImageUnits', 'gl_MaxTextureImageUnits',
+ 'gl_MaxVaryingComponents', 'gl_MaxVertexOutputComponents', 'gl_MaxGeometryInputComponents',
+ 'gl_MaxGeometryOutputComponents', 'gl_MaxFragmentInputComponents',
+ 'gl_MaxVertexTextureImageUnits', 'gl_MaxCombinedTextureImageUnits', 'gl_MaxTextureImageUnits',
'gl_MaxFragmentUniformComponents', 'gl_MaxDrawBuffers', 'gl_MaxClipDistances',
'gl_MaxGeometryTextureImageUnits', 'gl_MaxGeometryOutputVertices',
'gl_MaxGeometryTotalOutputComponents', 'gl_MaxGeometryUniformComponents',
'gl_MaxGeometryVaryingComponents', 'gl_MaxTessControlInputComponents',
'gl_MaxTessControlOutputComponents', 'gl_MaxTessControlTextureImageUnits',
- 'gl_MaxTessControlUniformComponents',
- 'gl_MaxTessControlTotalOutputComponents',
+ 'gl_MaxTessControlUniformComponents', 'gl_MaxTessControlTotalOutputComponents',
'gl_MaxTessEvaluationInputComponents', 'gl_MaxTessEvaluationOutputComponents',
- 'gl_MaxTessEvaluationTextureImageUnits',
- 'gl_MaxTessEvaluationUniformComponents', 'gl_MaxTessPatchComponents',
- 'gl_MaxPatchVertices', 'gl_MaxTessGenLevel', 'gl_MaxTextureUnits',
- 'gl_MaxTextureCoords', 'gl_MaxClipPlanes',
-
- 'gl_DepthRange', 'gl_ModelViewMatrix', 'gl_ProjectionMatrix',
- 'gl_ModelViewProjectionMatrix', 'gl_TextureMatrix', 'gl_NormalMatrix',
- 'gl_ModelViewMatrixInverse', 'gl_ProjectionMatrixInverse',
- 'gl_ModelViewProjectionMatrixInverse', 'gl_TextureMatrixInverse',
- 'gl_ModelViewMatrixTranspose', 'gl_ProjectionMatrixTranspose',
- 'gl_ModelViewProjectionMatrixTranspose', 'gl_TextureMatrixTranspose',
- 'gl_ModelViewMatrixInverseTranspose', 'gl_ProjectionMatrixInverseTranspose',
- 'gl_ModelViewProjectionMatrixInverseTranspose',
- 'gl_TextureMatrixInverseTranspose', 'gl_NormalScale', 'gl_ClipPlane',
- 'gl_Point', 'gl_FrontMaterial', 'gl_BackMaterial', 'gl_LightSource',
- 'gl_LightModel', 'gl_FrontLightModelProduct', 'gl_BackLightModelProduct',
- 'gl_FrontLightProduct', 'gl_BackLightProduct', 'gl_TextureEnvColor',
- 'gl_EyePlaneS', 'gl_EyePlaneT', 'gl_EyePlaneR', 'gl_EyePlaneQ',
- 'gl_ObjectPlaneS', 'gl_ObjectPlaneT', 'gl_ObjectPlaneR', 'gl_ObjectPlaneQ',
+ 'gl_MaxTessEvaluationTextureImageUnits', 'gl_MaxTessEvaluationUniformComponents',
+ 'gl_MaxTessPatchComponents', 'gl_MaxPatchVertices', 'gl_MaxTessGenLevel', 'gl_MaxTextureUnits',
+ 'gl_MaxTextureCoords', 'gl_MaxClipPlanes', --
+ 'gl_DepthRange', 'gl_ModelViewMatrix', 'gl_ProjectionMatrix', 'gl_ModelViewProjectionMatrix',
+ 'gl_TextureMatrix', 'gl_NormalMatrix', 'gl_ModelViewMatrixInverse', 'gl_ProjectionMatrixInverse',
+ 'gl_ModelViewProjectionMatrixInverse', 'gl_TextureMatrixInverse', 'gl_ModelViewMatrixTranspose',
+ 'gl_ProjectionMatrixTranspose', 'gl_ModelViewProjectionMatrixTranspose',
+ 'gl_TextureMatrixTranspose', 'gl_ModelViewMatrixInverseTranspose',
+ 'gl_ProjectionMatrixInverseTranspose', 'gl_ModelViewProjectionMatrixInverseTranspose',
+ 'gl_TextureMatrixInverseTranspose', 'gl_NormalScale', 'gl_ClipPlane', 'gl_Point',
+ 'gl_FrontMaterial', 'gl_BackMaterial', 'gl_LightSource', 'gl_LightModel',
+ 'gl_FrontLightModelProduct', 'gl_BackLightModelProduct', 'gl_FrontLightProduct',
+ 'gl_BackLightProduct', 'gl_TextureEnvColor', 'gl_EyePlaneS', 'gl_EyePlaneT', 'gl_EyePlaneR',
+ 'gl_EyePlaneQ', 'gl_ObjectPlaneS', 'gl_ObjectPlaneT', 'gl_ObjectPlaneR', 'gl_ObjectPlaneQ',
'gl_Fog'
-})
-
--- Extend cpp lexer to include GLSL elements.
-local cpp = l.load('cpp')
-local _rules = cpp._rules
-_rules[1] = {'whitespace', ws}
-table.insert(_rules, 2, {'glsl_keyword', keyword})
-table.insert(_rules, 3, {'glsl_function', func})
-table.insert(_rules, 4, {'glsl_type', type})
-table.insert(_rules, 5, {'glsl_variable', variable})
-M._rules = _rules
-M._foldsymbols = cpp._foldsymbols
+}))
-return M
+return lex
diff --git a/lua/lexers/gnuplot.lua b/lua/lexers/gnuplot.lua
index 4ee72de..a3d93dd 100644
--- a/lua/lexers/gnuplot.lua
+++ b/lua/lexers/gnuplot.lua
@@ -1,80 +1,61 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- Gnuplot LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'gnuplot'}
+local lex = lexer.new('gnuplot')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local comment = token(l.COMMENT, '#' * l.nonnewline^0)
-
--- Strings.
-local sq_str = l.delimited_range("'")
-local dq_str = l.delimited_range('"')
-local bk_str = l.delimited_range('[]', true)
-local bc_str = l.delimited_range('{}', true)
-local string = token(l.STRING, sq_str + dq_str + bk_str + bc_str)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- 'cd', 'call', 'clear', 'exit', 'fit', 'help', 'history', 'if', 'load',
- 'pause', 'plot', 'using', 'with', 'index', 'every', 'smooth', 'thru', 'print',
- 'pwd', 'quit', 'replot', 'reread', 'reset', 'save', 'set', 'show', 'unset',
- 'shell', 'splot', 'system', 'test', 'unset', 'update'
-})
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'cd', 'call', 'clear', 'exit', 'fit', 'help', 'history', 'if', 'load', 'pause', 'plot', 'using',
+ 'with', 'index', 'every', 'smooth', 'thru', 'print', 'pwd', 'quit', 'replot', 'reread', 'reset',
+ 'save', 'set', 'show', 'unset', 'shell', 'splot', 'system', 'test', 'unset', 'update'
+}))
-- Functions.
-local func = token(l.FUNCTION, word_match{
- 'abs', 'acos', 'acosh', 'arg', 'asin', 'asinh', 'atan', 'atan2', 'atanh',
- 'besj0', 'besj1', 'besy0', 'besy1', 'ceil', 'cos', 'cosh', 'erf', 'erfc',
- 'exp', 'floor', 'gamma', 'ibeta', 'inverf', 'igamma', 'imag', 'invnorm',
- 'int', 'lambertw', 'lgamma', 'log', 'log10', 'norm', 'rand', 'real', 'sgn',
- 'sin', 'sinh', 'sqrt', 'tan', 'tanh', 'column', 'defined', 'tm_hour',
- 'tm_mday', 'tm_min', 'tm_mon', 'tm_sec', 'tm_wday', 'tm_yday', 'tm_year',
- 'valid'
-})
+lex:add_rule('function', token(lexer.FUNCTION, word_match{
+ 'abs', 'acos', 'acosh', 'arg', 'asin', 'asinh', 'atan', 'atan2', 'atanh', 'besj0', 'besj1',
+ 'besy0', 'besy1', 'ceil', 'cos', 'cosh', 'erf', 'erfc', 'exp', 'floor', 'gamma', 'ibeta',
+ 'inverf', 'igamma', 'imag', 'invnorm', 'int', 'lambertw', 'lgamma', 'log', 'log10', 'norm',
+ 'rand', 'real', 'sgn', 'sin', 'sinh', 'sqrt', 'tan', 'tanh', 'column', 'defined', 'tm_hour',
+ 'tm_mday', 'tm_min', 'tm_mon', 'tm_sec', 'tm_wday', 'tm_yday', 'tm_year', 'valid'
+}))
-- Variables.
-local variable = token(l.VARIABLE, word_match{
- 'angles', 'arrow', 'autoscale', 'bars', 'bmargin', 'border', 'boxwidth',
- 'clabel', 'clip', 'cntrparam', 'colorbox', 'contour', 'datafile ',
- 'decimalsign', 'dgrid3d', 'dummy', 'encoding', 'fit', 'fontpath', 'format',
- 'functions', 'function', 'grid', 'hidden3d', 'historysize', 'isosamples',
- 'key', 'label', 'lmargin', 'loadpath', 'locale', 'logscale', 'mapping',
- 'margin', 'mouse', 'multiplot', 'mx2tics', 'mxtics', 'my2tics', 'mytics',
- 'mztics', 'offsets', 'origin', 'output', 'parametric', 'plot', 'pm3d',
- 'palette', 'pointsize', 'polar', 'print', 'rmargin', 'rrange', 'samples',
- 'size', 'style', 'surface', 'terminal', 'tics', 'ticslevel', 'ticscale',
- 'timestamp', 'timefmt', 'title', 'tmargin', 'trange', 'urange', 'variables',
- 'version', 'view', 'vrange', 'x2data', 'x2dtics', 'x2label', 'x2mtics',
- 'x2range', 'x2tics', 'x2zeroaxis', 'xdata', 'xdtics', 'xlabel', 'xmtics',
- 'xrange', 'xtics', 'xzeroaxis', 'y2data', 'y2dtics', 'y2label', 'y2mtics',
- 'y2range', 'y2tics', 'y2zeroaxis', 'ydata', 'ydtics', 'ylabel', 'ymtics',
- 'yrange', 'ytics', 'yzeroaxis', 'zdata', 'zdtics', 'cbdata', 'cbdtics',
- 'zero', 'zeroaxis', 'zlabel', 'zmtics', 'zrange', 'ztics', 'cblabel',
- 'cbmtics', 'cbrange', 'cbtics'
-})
+lex:add_rule('variable', token(lexer.VARIABLE, word_match{
+ 'angles', 'arrow', 'autoscale', 'bars', 'bmargin', 'border', 'boxwidth', 'clabel', 'clip',
+ 'cntrparam', 'colorbox', 'contour', 'datafile', 'decimalsign', 'dgrid3d', 'dummy', 'encoding',
+ 'fit', 'fontpath', 'format', 'functions', 'function', 'grid', 'hidden3d', 'historysize',
+ 'isosamples', 'key', 'label', 'lmargin', 'loadpath', 'locale', 'logscale', 'mapping', 'margin',
+ 'mouse', 'multiplot', 'mx2tics', 'mxtics', 'my2tics', 'mytics', 'mztics', 'offsets', 'origin',
+ 'output', 'parametric', 'plot', 'pm3d', 'palette', 'pointsize', 'polar', 'print', 'rmargin',
+ 'rrange', 'samples', 'size', 'style', 'surface', 'terminal', 'tics', 'ticslevel', 'ticscale',
+ 'timestamp', 'timefmt', 'title', 'tmargin', 'trange', 'urange', 'variables', 'version', 'view',
+ 'vrange', 'x2data', 'x2dtics', 'x2label', 'x2mtics', 'x2range', 'x2tics', 'x2zeroaxis', 'xdata',
+ 'xdtics', 'xlabel', 'xmtics', 'xrange', 'xtics', 'xzeroaxis', 'y2data', 'y2dtics', 'y2label',
+ 'y2mtics', 'y2range', 'y2tics', 'y2zeroaxis', 'ydata', 'ydtics', 'ylabel', 'ymtics', 'yrange',
+ 'ytics', 'yzeroaxis', 'zdata', 'zdtics', 'cbdata', 'cbdtics', 'zero', 'zeroaxis', 'zlabel',
+ 'zmtics', 'zrange', 'ztics', 'cblabel', 'cbmtics', 'cbrange', 'cbtics'
+}))
-- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
--- Operators.
-local operator = token(l.OPERATOR, S('-+~!$*%=<>&|^?:()'))
+-- Strings.
+local sq_str = lexer.range("'")
+local dq_str = lexer.range('"')
+local br_str = lexer.range('[', ']', true) + lexer.range('{', '}', true)
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str + br_str))
+
+-- Comments.
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#')))
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'function', func},
- {'variable', variable},
- {'identifier', identifier},
- {'string', string},
- {'comment', comment},
- {'operator', operator},
-}
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S('-+~!$*%=<>&|^?:()')))
-return M
+return lex
diff --git a/lua/lexers/go.lua b/lua/lexers/go.lua
index 5eaeedb..e440422 100644
--- a/lua/lexers/go.lua
+++ b/lua/lexers/go.lua
@@ -1,78 +1,61 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- Go LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'go'}
+local lex = lexer.new('go')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local line_comment = '//' * l.nonnewline^0
-local block_comment = '/*' * (l.any - '*/')^0 * '*/'
-local comment = token(l.COMMENT, line_comment + block_comment)
-
--- Strings.
-local sq_str = l.delimited_range("'", true)
-local dq_str = l.delimited_range('"', true)
-local raw_str = l.delimited_range('`', false, true)
-local string = token(l.STRING, sq_str + dq_str + raw_str)
-
--- Numbers.
-local number = token(l.NUMBER, (l.float + l.integer) * P('i')^-1)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- 'break', 'case', 'chan', 'const', 'continue', 'default', 'defer', 'else',
- 'fallthrough', 'for', 'func', 'go', 'goto', 'if', 'import', 'interface',
- 'map', 'package', 'range', 'return', 'select', 'struct', 'switch', 'type',
- 'var'
-})
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'break', 'case', 'chan', 'const', 'continue', 'default', 'defer', 'else', 'fallthrough', 'for',
+ 'func', 'go', 'goto', 'if', 'import', 'interface', 'map', 'package', 'range', 'return', 'select',
+ 'struct', 'switch', 'type', 'var'
+}))
-- Constants.
-local constant = token(l.CONSTANT, word_match{
- 'true', 'false', 'iota', 'nil'
-})
+lex:add_rule('constant', token(lexer.CONSTANT, word_match('true false iota nil')))
-- Types.
-local type = token(l.TYPE, word_match{
- 'bool', 'byte', 'complex64', 'complex128', 'error', 'float32', 'float64',
- 'int', 'int8', 'int16', 'int32', 'int64', 'rune', 'string', 'uint', 'uint8',
- 'uint16', 'uint32', 'uint64', 'uintptr'
-})
+lex:add_rule('type', token(lexer.TYPE, word_match{
+ 'any', 'bool', 'byte', 'comparable', 'complex64', 'complex128', 'error', 'float32', 'float64',
+ 'int', 'int8', 'int16', 'int32', 'int64', 'rune', 'string', 'uint', 'uint8', 'uint16', 'uint32',
+ 'uint64', 'uintptr'
+}))
-- Functions.
-local func = token(l.FUNCTION, word_match{
- 'append', 'cap', 'close', 'complex', 'copy', 'delete', 'imag', 'len', 'make',
- 'new', 'panic', 'print', 'println', 'real', 'recover'
-})
+lex:add_rule('function', token(lexer.FUNCTION, word_match{
+ 'append', 'cap', 'close', 'complex', 'copy', 'delete', 'imag', 'len', 'make', 'new', 'panic',
+ 'print', 'println', 'real', 'recover'
+}))
-- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
--- Operators.
-local operator = token(l.OPERATOR, S('+-*/%&|^<>=!:;.,()[]{}'))
+-- Strings.
+local sq_str = lexer.range("'", true)
+local dq_str = lexer.range('"', true)
+local raw_str = lexer.range('`', false, false)
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str + raw_str))
+
+-- Comments.
+local line_comment = lexer.to_eol('//')
+local block_comment = lexer.range('/*', '*/')
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'constant', constant},
- {'type', type},
- {'function', func},
- {'identifier', identifier},
- {'string', string},
- {'comment', comment},
- {'number', number},
- {'operator', operator},
-}
+-- Numbers.
+lex:add_rule('number', token(lexer.NUMBER, lexer.number * P('i')^-1))
+
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S('+-*/%&|^<>=!~:;.,()[]{}')))
-M._foldsymbols = {
- _patterns = {'[{}]', '/%*', '%*/', '//'},
- [l.OPERATOR] = {['{'] = 1, ['}'] = -1},
- [l.COMMENT] = {['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//')}
-}
+-- Fold points.
+lex:add_fold_point(lexer.OPERATOR, '{', '}')
+lex:add_fold_point(lexer.COMMENT, '/*', '*/')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('//'))
-return M
+return lex
diff --git a/lua/lexers/groovy.lua b/lua/lexers/groovy.lua
index c30c8b7..42ab49d 100644
--- a/lua/lexers/groovy.lua
+++ b/lua/lexers/groovy.lua
@@ -1,89 +1,67 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- Groovy LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'groovy'}
+local lex = lexer.new('groovy')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local line_comment = '//' * l.nonnewline_esc^0
-local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1
-local comment = token(l.COMMENT, line_comment + block_comment)
-
--- Strings.
-local sq_str = l.delimited_range("'")
-local dq_str = l.delimited_range('"')
-local triple_sq_str = "'''" * (l.any - "'''")^0 * P("'''")^-1
-local triple_dq_str = '"""' * (l.any - '"""')^0 * P('"""')^-1
-local regex_str = #P('/') * l.last_char_includes('=~|!<>+-*?&,:;([{') *
- l.delimited_range('/', true)
-local string = token(l.STRING, triple_sq_str + triple_dq_str + sq_str +
- dq_str) +
- token(l.REGEX, regex_str)
-
--- Numbers.
-local number = token(l.NUMBER, l.float + l.integer)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- 'abstract', 'break', 'case', 'catch', 'continue', 'default', 'do', 'else',
- 'extends', 'final', 'finally', 'for', 'if', 'implements', 'instanceof',
- 'native', 'new', 'private', 'protected', 'public', 'return', 'static',
- 'switch', 'synchronized', 'throw', 'throws', 'transient', 'try', 'volatile',
- 'while', 'strictfp', 'package', 'import', 'as', 'assert', 'def', 'mixin',
- 'property', 'test', 'using', 'in',
- 'false', 'null', 'super', 'this', 'true', 'it'
-})
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'abstract', 'break', 'case', 'catch', 'continue', 'default', 'do', 'else', 'extends', 'final',
+ 'finally', 'for', 'if', 'implements', 'instanceof', 'native', 'new', 'private', 'protected',
+ 'public', 'return', 'static', 'switch', 'synchronized', 'throw', 'throws', 'transient', 'try',
+ 'volatile', 'while', 'strictfp', 'package', 'import', 'as', 'assert', 'def', 'mixin', 'property',
+ 'test', 'using', 'in', 'false', 'null', 'super', 'this', 'true', 'it'
+}))
-- Functions.
-local func = token(l.FUNCTION, word_match{
- 'abs', 'any', 'append', 'asList', 'asWritable', 'call', 'collect',
- 'compareTo', 'count', 'div', 'dump', 'each', 'eachByte', 'eachFile',
- 'eachLine', 'every', 'find', 'findAll', 'flatten', 'getAt', 'getErr', 'getIn',
- 'getOut', 'getText', 'grep', 'immutable', 'inject', 'inspect', 'intersect',
- 'invokeMethods', 'isCase', 'join', 'leftShift', 'minus', 'multiply',
- 'newInputStream', 'newOutputStream', 'newPrintWriter', 'newReader',
- 'newWriter', 'next', 'plus', 'pop', 'power', 'previous', 'print', 'println',
- 'push', 'putAt', 'read', 'readBytes', 'readLines', 'reverse', 'reverseEach',
- 'round', 'size', 'sort', 'splitEachLine', 'step', 'subMap', 'times',
- 'toInteger', 'toList', 'tokenize', 'upto', 'waitForOrKill', 'withPrintWriter',
- 'withReader', 'withStream', 'withWriter', 'withWriterAppend', 'write',
- 'writeLine'
-})
+lex:add_rule('function', token(lexer.FUNCTION, word_match{
+ 'abs', 'any', 'append', 'asList', 'asWritable', 'call', 'collect', 'compareTo', 'count', 'div',
+ 'dump', 'each', 'eachByte', 'eachFile', 'eachLine', 'every', 'find', 'findAll', 'flatten',
+ 'getAt', 'getErr', 'getIn', 'getOut', 'getText', 'grep', 'immutable', 'inject', 'inspect',
+ 'intersect', 'invokeMethods', 'isCase', 'join', 'leftShift', 'minus', 'multiply',
+ 'newInputStream', 'newOutputStream', 'newPrintWriter', 'newReader', 'newWriter', 'next', 'plus',
+ 'pop', 'power', 'previous', 'print', 'println', 'push', 'putAt', 'read', 'readBytes', 'readLines',
+ 'reverse', 'reverseEach', 'round', 'size', 'sort', 'splitEachLine', 'step', 'subMap', 'times',
+ 'toInteger', 'toList', 'tokenize', 'upto', 'waitForOrKill', 'withPrintWriter', 'withReader',
+ 'withStream', 'withWriter', 'withWriterAppend', 'write', 'writeLine'
+}))
-- Types.
-local type = token(l.TYPE, word_match{
- 'boolean', 'byte', 'char', 'class', 'double', 'float', 'int', 'interface',
- 'long', 'short', 'void'
-})
+lex:add_rule('type', token(lexer.TYPE, word_match(
+ 'boolean byte char class double float int interface long short void')))
-- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
--- Operators.
-local operator = token(l.OPERATOR, S('=~|!<>+-/*?&.,:;()[]{}'))
+-- Comments.
+local line_comment = lexer.to_eol('//', true)
+local block_comment = lexer.range('/*', '*/')
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
+
+-- Strings.
+local sq_str = lexer.range("'")
+local dq_str = lexer.range('"')
+local tq_str = lexer.range("'''") + lexer.range('"""')
+local string = token(lexer.STRING, tq_str + sq_str + dq_str)
+local regex_str = #P('/') * lexer.last_char_includes('=~|!<>+-*?&,:;([{') * lexer.range('/', true)
+local regex = token(lexer.REGEX, regex_str)
+lex:add_rule('string', string + regex)
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'function', func},
- {'type', type},
- {'identifier', identifier},
- {'comment', comment},
- {'string', string},
- {'number', number},
- {'operator', operator},
-}
+-- Numbers.
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
+
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S('=~|!<>+-/*?&.,:;()[]{}')))
-M._foldsymbols = {
- _patterns = {'[{}]', '/%*', '%*/', '//'},
- [l.OPERATOR] = {['{'] = 1, ['}'] = -1},
- [l.COMMENT] = {['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//')}
-}
+-- Fold points.
+lex:add_fold_point(lexer.OPERATOR, '{', '}')
+lex:add_fold_point(lexer.COMMENT, '/*', '*/')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('//'))
-return M
+return lex
diff --git a/lua/lexers/gtkrc.lua b/lua/lexers/gtkrc.lua
index bedc764..0dcbbd0 100644
--- a/lua/lexers/gtkrc.lua
+++ b/lua/lexers/gtkrc.lua
@@ -1,71 +1,52 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- Gtkrc LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'gtkrc'}
+local lex = lexer.new('gtkrc')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local comment = token(l.COMMENT, '#' * l.nonnewline^0)
-
--- Strings.
-local sq_str = l.delimited_range("'", true)
-local dq_str = l.delimited_range('"', true)
-local string = token(l.STRING, sq_str + dq_str)
-
--- Numbers.
-local number = token(l.NUMBER, l.digit^1 * ('.' * l.digit^1)^-1)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- 'binding', 'class', 'include', 'module_path', 'pixmap_path', 'im_module_file',
- 'style', 'widget', 'widget_class'
-})
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match(
+ 'binding class include module_path pixmap_path im_module_file style widget widget_class')))
-- Variables.
-local variable = token(l.VARIABLE, word_match{
- 'bg', 'fg', 'base', 'text', 'xthickness', 'ythickness', 'bg_pixmap', 'font',
- 'fontset', 'font_name', 'stock', 'color', 'engine'
-})
+lex:add_rule('variable', token(lexer.VARIABLE, word_match{
+ 'bg', 'fg', 'base', 'text', 'xthickness', 'ythickness', 'bg_pixmap', 'font', 'fontset',
+ 'font_name', 'stock', 'color', 'engine'
+}))
-- States.
-local state = token(l.CONSTANT, word_match{
- 'ACTIVE', 'SELECTED', 'NORMAL', 'PRELIGHT', 'INSENSITIVE', 'TRUE', 'FALSE'
-})
+lex:add_rule('state',
+ token('state', word_match('ACTIVE SELECTED NORMAL PRELIGHT INSENSITIVE TRUE FALSE')))
+lex:add_style('state', lexer.styles.constant)
-- Functions.
-local func = token(l.FUNCTION, word_match{
- 'mix', 'shade', 'lighter', 'darker'
-})
+lex:add_rule('function', token(lexer.FUNCTION, word_match('mix shade lighter darker')))
-- Identifiers.
-local identifier = token(l.IDENTIFIER, l.alpha * (l.alnum + S('_-'))^0)
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.alpha * (lexer.alnum + S('_-'))^0))
--- Operators.
-local operator = token(l.OPERATOR, S(':=,*()[]{}'))
+-- Strings.
+local sq_str = lexer.range("'", true)
+local dq_str = lexer.range('"', true)
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
+
+-- Comments.
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#')))
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'variable', variable},
- {'state', state},
- {'function', func},
- {'identifier', identifier},
- {'string', string},
- {'comment', comment},
- {'number', number},
- {'operator', operator},
-}
+-- Numbers.
+lex:add_rule('number', token(lexer.NUMBER, lexer.digit^1 * ('.' * lexer.digit^1)^-1))
+
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S(':=,*()[]{}')))
-M._foldsymbols = {
- _patterns = {'[{}]', '#'},
- [l.OPERATOR] = {['{'] = 1, ['}'] = -1},
- [l.COMMENT] = {['#'] = l.fold_line_comments('#')}
-}
+-- Fold points.
+lex:add_fold_point(lexer.OPERATOR, '{', '}')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('#'))
-return M
+return lex
diff --git a/lua/lexers/hare.lua b/lua/lexers/hare.lua
new file mode 100644
index 0000000..73068bf
--- /dev/null
+++ b/lua/lexers/hare.lua
@@ -0,0 +1,59 @@
+-- Copyright 2021-2022 Mitchell. See LICENSE.
+-- Hare LPeg lexer
+-- https://harelang.org
+-- Contributed by Qiu
+
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
+
+local lex = lexer.new('hare')
+
+-- Whitespace.
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
+
+-- Keywords.
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'as', 'break', 'case', 'const', 'continue', 'def', 'defer', 'else', 'export', 'false', 'fn',
+ 'for', 'if', 'is', 'let', 'match', 'null', 'nullable', 'return', 'static', 'struct', 'switch',
+ 'true', 'type', 'use', 'yield'
+}))
+
+-- Functions.
+lex:add_rule('function', token(lexer.FUNCTION, word_match{
+ 'len', 'alloc', 'free', 'assert', 'abort', 'size', 'append', 'insert', 'delete', 'vastart',
+ 'vaarg', 'vaend'
+}))
+
+-- Types.
+lex:add_rule('type', token(lexer.TYPE, word_match{
+ 'bool', 'enum', 'f32', 'f64', 'i16', 'i32', 'i64', 'i8', 'int', 'u16', 'u32', 'u64', 'u8', 'uint',
+ 'uintptr', 'union', 'void', 'rune', 'str', 'char'
+}))
+
+-- Identifiers.
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
+
+-- Strings.
+local dq_str = lexer.range('"')
+local raw_str = lexer.range('`')
+lex:add_rule('string', token(lexer.STRING, dq_str + raw_str))
+
+-- Comments.
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('//')))
+
+-- Numbers.
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
+
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%^!=&|?:;,.()[]{}<>')))
+
+-- At rule.
+lex:add_rule('at_rule', token('at_rule', '@' * word_match('noreturn offset init fini test symbol')))
+lex:add_style('at_rule', lexer.styles.preprocessor)
+
+-- Fold points.
+lex:add_fold_point(lexer.OPERATOR, '{', '}')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('//'))
+
+return lex
diff --git a/lua/lexers/haskell.lua b/lua/lexers/haskell.lua
index 62ccd4a..9fd6631 100644
--- a/lua/lexers/haskell.lua
+++ b/lua/lexers/haskell.lua
@@ -1,60 +1,45 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- Haskell LPeg lexer.
-- Modified by Alex Suraci.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'haskell'}
+local lex = lexer.new('haskell', {fold_by_indentation = true})
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
--- Comments.
-local line_comment = '--' * l.nonnewline_esc^0
-local block_comment = '{-' * (l.any - '-}')^0 * P('-}')^-1
-local comment = token(l.COMMENT, line_comment + block_comment)
+-- Keywords.
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'case', 'class', 'data', 'default', 'deriving', 'do', 'else', 'if', 'import', 'in', 'infix',
+ 'infixl', 'infixr', 'instance', 'let', 'module', 'newtype', 'of', 'then', 'type', 'where', '_',
+ 'as', 'qualified', 'hiding'
+}))
--- Strings.
-local string = token(l.STRING, l.delimited_range('"'))
+-- Types & type constructors.
+local word = (lexer.alnum + S("._'#"))^0
+local op = lexer.punct - S('()[]{}')
+lex:add_rule('type', token(lexer.TYPE, (lexer.upper * word) + (':' * (op^1 - ':'))))
--- Chars.
-local char = token(l.STRING, l.delimited_range("'", true))
+-- Identifiers.
+lex:add_rule('identifier', token(lexer.IDENTIFIER, (lexer.alpha + '_') * word))
--- Numbers.
-local number = token(l.NUMBER, l.float + l.integer)
+-- Strings.
+local sq_str = lexer.range("'", true)
+local dq_str = lexer.range('"')
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
--- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- 'case', 'class', 'data', 'default', 'deriving', 'do', 'else', 'if', 'import',
- 'in', 'infix', 'infixl', 'infixr', 'instance', 'let', 'module', 'newtype',
- 'of', 'then', 'type', 'where', '_', 'as', 'qualified', 'hiding'
-})
+-- Comments.
+local line_comment = lexer.to_eol('--', true)
+local block_comment = lexer.range('{-', '-}')
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
--- Identifiers.
-local word = (l.alnum + S("._'#"))^0
-local identifier = token(l.IDENTIFIER, (l.alpha + '_') * word)
+-- Numbers.
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-- Operators.
-local op = l.punct - S('()[]{}')
-local operator = token(l.OPERATOR, op)
+lex:add_rule('operator', token(lexer.OPERATOR, op))
--- Types & type constructors.
-local constructor = token(l.TYPE, (l.upper * word) + (P(":") * (op^1 - P(":"))))
-
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'type', constructor},
- {'identifier', identifier},
- {'string', string},
- {'char', char},
- {'comment', comment},
- {'number', number},
- {'operator', operator},
-}
-
-M._FOLDBYINDENTATION = true
-
-return M
+return lex
diff --git a/lua/lexers/html.lua b/lua/lexers/html.lua
index ba6e3e2..0cb3c2f 100644
--- a/lua/lexers/html.lua
+++ b/lua/lexers/html.lua
@@ -1,162 +1,148 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- HTML LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'html'}
-
-case_insensitive_tags = true
+local lex = lexer.new('html')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
+local ws = token(lexer.WHITESPACE, lexer.space^1)
+lex:add_rule('whitespace', ws)
-- Comments.
-local comment = token(l.COMMENT, '<!--' * (l.any - '-->')^0 * P('-->')^-1)
-
--- Strings.
-local sq_str = l.delimited_range("'")
-local dq_str = l.delimited_range('"')
-local string = #S('\'"') * l.last_char_includes('=') *
- token(l.STRING, sq_str + dq_str)
+lex:add_rule('comment', token(lexer.COMMENT, lexer.range('<!--', '-->')))
-local in_tag = #P((1 - S'><')^0 * '>')
-
--- Numbers.
-local number = #l.digit * l.last_char_includes('=') *
- token(l.NUMBER, l.digit^1 * P('%')^-1) * in_tag
+-- Doctype.
+lex:add_rule('doctype', token('doctype', lexer.range('<!' * word_match('doctype', true), '>')))
+lex:add_style('doctype', lexer.styles.comment)
-- Elements.
-local known_element = token('element', '<' * P('/')^-1 * word_match({
- 'a', 'abbr', 'address', 'area', 'article', 'aside', 'audio', 'b', 'base',
- 'bdi', 'bdo', 'blockquote', 'body', 'br', 'button', 'canvas', 'caption',
- 'cite', 'code', 'col', 'colgroup', 'content', 'data', 'datalist', 'dd',
- 'decorator', 'del', 'details', 'dfn', 'div', 'dl', 'dt', 'element', 'em',
- 'embed', 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'h1', 'h2',
- 'h3', 'h4', 'h5', 'h6', 'head', 'header', 'hr', 'html', 'i', 'iframe', 'img',
- 'input', 'ins', 'kbd', 'keygen', 'label', 'legend', 'li', 'link', 'main',
- 'map', 'mark', 'menu', 'menuitem', 'meta', 'meter', 'nav', 'noscript',
- 'object', 'ol', 'optgroup', 'option', 'output', 'p', 'param', 'pre',
- 'progress', 'q', 'rp', 'rt', 'ruby', 's', 'samp', 'script', 'section',
- 'select', 'shadow', 'small', 'source', 'spacer', 'spacer', 'span', 'strong',
- 'style', 'sub', 'summary', 'sup', 'table', 'tbody', 'td', 'template',
- 'textarea', 'tfoot', 'th', 'thead', 'time', 'title', 'tr', 'track', 'u', 'ul',
- 'var', 'video', 'wbr'
-}, nil, case_insensitive_tags))
-local unknown_element = token('unknown_element', '<' * P('/')^-1 * l.word)
-local element = known_element + unknown_element
-
--- Attributes.
-local known_attribute = token('attribute', word_match({
- 'accept', 'accept-charset', 'accesskey', 'action', 'align', 'alt', 'async',
- 'autocomplete', 'autofocus', 'autoplay', 'bgcolor', 'border', 'buffered',
- 'challenge', 'charset', 'checked', 'cite', 'class', 'code', 'codebase',
- 'color', 'cols', 'colspan', 'content', 'contenteditable', 'contextmenu',
- 'controls', 'coords', 'data', 'data-', 'datetime', 'default', 'defer', 'dir',
- 'dirname', 'disabled', 'download', 'draggable', 'dropzone', 'enctype', 'for',
- 'form', 'headers', 'height', 'hidden', 'high', 'href', 'hreflang',
- 'http-equiv', 'icon', 'id', 'ismap', 'itemprop', 'keytype', 'kind', 'label',
- 'lang', 'language', 'list', 'loop', 'low', 'manifest', 'max', 'maxlength',
- 'media', 'method', 'min', 'multiple', 'name', 'novalidate', 'open', 'optimum',
- 'pattern', 'ping', 'placeholder', 'poster', 'preload', 'pubdate',
- 'radiogroup', 'readonly', 'rel', 'required', 'reversed', 'role', 'rows',
- 'rowspan', 'sandbox', 'spellcheck', 'scope', 'scoped', 'seamless', 'selected',
- 'shape', 'size', 'sizes', 'span', 'src', 'srcdoc', 'srclang', 'start',
- 'step', 'style', 'summary', 'tabindex', 'target', 'title', 'type', 'usemap',
- 'value', 'width', 'wrap'
-}, '-', case_insensitive_tags) + ((P('data-') + 'aria-') * (l.alnum + '-')^1))
-local unknown_attribute = token('unknown_attribute', l.word)
-local attribute = (known_attribute + unknown_attribute) * #(l.space^0 * '=')
+local single_element = token('single_element', '<' * P('/')^-1 * word_match(
+ {
+ 'area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'meta',
+ 'param', 'source', 'track', 'wbr'
+ }, true))
+local paired_element = token('element', '<' * P('/')^-1 * word_match({
+ 'a', 'abbr', 'address', 'article', 'aside', 'audio', 'b', 'bdi', 'bdo', 'blockquote', 'body',
+ 'button', 'canvas', 'caption', 'cite', 'code', 'colgroup', 'content', 'data', 'datalist', 'dd',
+ 'decorator', 'del', 'details', 'dfn', 'div', 'dl', 'dt', 'element', 'em', 'fieldset',
+ 'figcaption', 'figure', 'footer', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'head', 'header',
+ 'html', 'i', 'iframe', 'ins', 'kbd', 'label', 'legend', 'li', 'main', 'map', 'mark', 'menu',
+ 'menuitem', 'meter', 'nav', 'noscript', 'object', 'ol', 'optgroup', 'option', 'output', 'p',
+ 'pre', 'progress', 'q', 'rp', 'rt', 'ruby', 's', 'samp', 'script', 'section', 'select', 'shadow',
+ 'small', 'spacer', 'span', 'strong', 'style', 'sub', 'summary', 'sup', 'table', 'tbody', 'td',
+ 'template', 'textarea', 'tfoot', 'th', 'thead', 'time', 'title', 'tr', 'u', 'ul', 'var', 'video'
+}, true))
+local known_element = single_element + paired_element
+local unknown_element = token('unknown_element', '<' * P('/')^-1 * (lexer.alnum + '-')^1)
+local element = (known_element + unknown_element) * -P(':')
+lex:add_rule('element', element)
+lex:add_style('single_element', lexer.styles.keyword)
+lex:add_style('element', lexer.styles.keyword)
+lex:add_style('unknown_element', lexer.styles.keyword .. {italics = true})
-- Closing tags.
local tag_close = token('element', P('/')^-1 * '>')
+lex:add_rule('tag_close', tag_close)
+
+-- Attributes.
+local known_attribute = token('attribute', word_match({
+ 'accept', 'accept-charset', 'accesskey', 'action', 'align', 'alt', 'async', 'autocomplete',
+ 'autofocus', 'autoplay', 'bgcolor', 'border', 'buffered', 'challenge', 'charset', 'checked',
+ 'cite', 'class', 'code', 'codebase', 'color', 'cols', 'colspan', 'content', 'contenteditable',
+ 'contextmenu', 'controls', 'coords', 'data', 'data-', 'datetime', 'default', 'defer', 'dir',
+ 'dirname', 'disabled', 'download', 'draggable', 'dropzone', 'enctype', 'for', 'form', 'headers',
+ 'height', 'hidden', 'high', 'href', 'hreflang', 'http-equiv', 'icon', 'id', 'ismap', 'itemprop',
+ 'keytype', 'kind', 'label', 'lang', 'language', 'list', 'loop', 'low', 'manifest', 'max',
+ 'maxlength', 'media', 'method', 'min', 'multiple', 'name', 'novalidate', 'open', 'optimum',
+ 'pattern', 'ping', 'placeholder', 'poster', 'preload', 'pubdate', 'radiogroup', 'readonly', 'rel',
+ 'required', 'reversed', 'role', 'rows', 'rowspan', 'sandbox', 'scope', 'scoped', 'seamless',
+ 'selected', 'shape', 'size', 'sizes', 'span', 'spellcheck', 'src', 'srcdoc', 'srclang', 'start',
+ 'step', 'style', 'summary', 'tabindppex', 'target', 'title', 'type', 'usemap', 'value', 'width',
+ 'wrap'
+}, true) + ((P('data-') + 'aria-') * (lexer.alnum + '-')^1))
+local unknown_attribute = token('unknown_attribute', (lexer.alnum + '-')^1)
+local attribute = (known_attribute + unknown_attribute) * #(lexer.space^0 * '=')
+lex:add_rule('attribute', attribute)
+lex:add_style('attribute', lexer.styles.type)
+lex:add_style('unknown_attribute', lexer.styles.type .. {italics = true})
-- Equals.
-local equals = token(l.OPERATOR, '=') * in_tag
+-- TODO: performance is terrible on large files.
+local in_tag = P(function(input, index)
+ local before = input:sub(1, index - 1)
+ local s, e = before:find('<[^>]-$'), before:find('>[^<]-$')
+ if s and e then return s > e and index or nil end
+ if s then return index end
+ return input:find('^[^<]->', index) and index or nil
+end)
+
+local equals = token(lexer.OPERATOR, '=') -- * in_tag
+-- lex:add_rule('equals', equals)
--- Entities.
-local entity = token('entity', '&' * (l.any - l.space - ';')^1 * ';')
+-- Strings.
+local string = #S('\'"') * lexer.last_char_includes('=') *
+ token(lexer.STRING, lexer.range("'") + lexer.range('"'))
+lex:add_rule('string', string)
--- Doctype.
-local doctype = token('doctype', '<!' *
- word_match({'doctype'}, nil, case_insensitive_tags) *
- (l.any - '>')^1 * '>')
-
-M._rules = {
- {'whitespace', ws},
- {'comment', comment},
- {'doctype', doctype},
- {'element', element},
- {'tag_close', tag_close},
- {'attribute', attribute},
--- {'equals', equals},
- {'string', string},
- {'number', number},
- {'entity', entity},
-}
-
-M._tokenstyles = {
- element = l.STYLE_KEYWORD,
- unknown_element = l.STYLE_KEYWORD..',italics',
- attribute = l.STYLE_TYPE,
- unknown_attribute = l.STYLE_TYPE..',italics',
- entity = l.STYLE_OPERATOR,
- doctype = l.STYLE_COMMENT
-}
-
-M._foldsymbols = {
- _patterns = {'</?', '/>', '<!%-%-', '%-%->'},
- element = {['<'] = 1, ['/>'] = -1, ['</'] = -1},
- unknown_element = {['<'] = 1, ['/>'] = -1, ['</'] = -1},
- [l.COMMENT] = {['<!--'] = 1, ['-->'] = -1}
-}
+-- Numbers.
+local number = token(lexer.NUMBER, lexer.dec_num * P('%')^-1)
+lex:add_rule('number', #lexer.digit * lexer.last_char_includes('=') * number) -- *in_tag)
--- Tags that start embedded languages.
-M.embed_start_tag = element *
- (ws^1 * attribute * ws^0 * equals * ws^0 * string)^0 *
- ws^0 * tag_close
-M.embed_end_tag = element * tag_close
-
--- Embedded CSS.
-local css = l.load('css')
-local style_element = word_match({'style'}, nil, case_insensitive_tags)
-local css_start_rule = #(P('<') * style_element *
- ('>' + P(function(input, index)
- if input:find('^%s+type%s*=%s*(["\'])text/css%1', index) then
- return index
- end
-end))) * M.embed_start_tag -- <style type="text/css">
-local css_end_rule = #('</' * style_element * ws^0 * '>') *
- M.embed_end_tag -- </style>
-l.embed_lexer(M, css, css_start_rule, css_end_rule)
-
--- Embedded JavaScript.
-local js = l.load('javascript')
-local script_element = word_match({'script'}, nil, case_insensitive_tags)
-local js_start_rule = #(P('<') * script_element *
- ('>' + P(function(input, index)
- if input:find('^%s+type%s*=%s*(["\'])text/javascript%1', index) then
- return index
- end
-end))) * M.embed_start_tag -- <script type="text/javascript">
-local js_end_rule = #('</' * script_element * ws^0 * '>') *
- M.embed_end_tag -- </script>
-local js_line_comment = '//' * (l.nonnewline_esc - js_end_rule)^0
-local js_block_comment = '/*' * (l.any - '*/' - js_end_rule)^0 * P('*/')^-1
-js._RULES['comment'] = token(l.COMMENT, js_line_comment + js_block_comment)
-l.embed_lexer(M, js, js_start_rule, js_end_rule)
-
--- Embedded CoffeeScript.
-local cs = l.load('coffeescript')
-local script_element = word_match({'script'}, nil, case_insensitive_tags)
-local cs_start_rule = #(P('<') * script_element * P(function(input, index)
- if input:find('^[^>]+type%s*=%s*(["\'])text/coffeescript%1', index) then
- return index
+-- Entities.
+lex:add_rule('entity', token('entity', '&' * (lexer.any - lexer.space - ';')^1 * ';'))
+lex:add_style('entity', lexer.styles.comment)
+
+-- Fold points.
+local function disambiguate_lt(text, pos, line, s)
+ if line:find('/>', s) then
+ return 0
+ elseif line:find('^</', s) then
+ return -1
+ else
+ return 1
end
-end)) * M.embed_start_tag -- <script type="text/coffeescript">
-local cs_end_rule = #('</' * script_element * ws^0 * '>') *
- M.embed_end_tag -- </script>
-l.embed_lexer(M, cs, cs_start_rule, cs_end_rule)
+end
+lex:add_fold_point('element', '<', disambiguate_lt)
+lex:add_fold_point('unknown_element', '<', disambiguate_lt)
+lex:add_fold_point(lexer.COMMENT, '<!--', '-->')
-return M
+-- Tags that start embedded languages.
+-- Export these patterns for proxy lexers (e.g. ASP) that need them.
+lex.embed_start_tag = element * (ws * attribute * ws^-1 * equals * ws^-1 * string)^0 * ws^-1 *
+ tag_close
+lex.embed_end_tag = element * tag_close
+
+-- Embedded CSS (<style type="text/css"> ... </style>).
+local css = lexer.load('css')
+local style_element = word_match('style', true)
+local css_start_rule = #('<' * style_element * ('>' + P(function(input, index)
+ if input:find('^%s+type%s*=%s*(["\'])text/css%1', index) then return index end
+end))) * lex.embed_start_tag
+local css_end_rule = #('</' * style_element * ws^-1 * '>') * lex.embed_end_tag
+lex:embed(css, css_start_rule, css_end_rule)
+
+-- Embedded JavaScript (<script type="text/javascript"> ... </script>).
+local js = lexer.load('javascript')
+local script_element = word_match('script', true)
+local js_start_rule = #('<' * script_element * ('>' + P(function(input, index)
+ if input:find('^%s+type%s*=%s*(["\'])text/javascript%1', index) then return index end
+end))) * lex.embed_start_tag
+local js_end_rule = #('</' * script_element * ws^-1 * '>') * lex.embed_end_tag
+local js_line_comment = '//' * (lexer.nonnewline - js_end_rule)^0
+local js_block_comment = '/*' * (lexer.any - '*/' - js_end_rule)^0 * P('*/')^-1
+js:modify_rule('comment', token(lexer.COMMENT, js_line_comment + js_block_comment))
+lex:embed(js, js_start_rule, js_end_rule)
+
+-- Embedded CoffeeScript (<script type="text/coffeescript"> ... </script>).
+local cs = lexer.load('coffeescript')
+script_element = word_match('script', true)
+local cs_start_rule = #('<' * script_element * P(function(input, index)
+ if input:find('^[^>]+type%s*=%s*(["\'])text/coffeescript%1', index) then return index end
+end)) * lex.embed_start_tag
+local cs_end_rule = #('</' * script_element * ws^-1 * '>') * lex.embed_end_tag
+lex:embed(cs, cs_start_rule, cs_end_rule)
+
+return lex
diff --git a/lua/lexers/icon.lua b/lua/lexers/icon.lua
index 98b554d..b66312c 100644
--- a/lua/lexers/icon.lua
+++ b/lua/lexers/icon.lua
@@ -1,78 +1,60 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- LPeg lexer for the Icon programming language.
-- http://www.cs.arizona.edu/icon
-- Contributed by Carl Sturtivant.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'icon'}
+local lex = lexer.new('icon')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
---Comments
-local line_comment = '#' * l.nonnewline_esc^0
-local comment = token(l.COMMENT, line_comment)
+-- Keywords.
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'break', 'by', 'case', 'create', 'default', 'do', 'else', 'end', 'every', 'fail', 'global', 'if',
+ 'initial', 'invocable', 'link', 'local', 'next', 'not', 'of', 'procedure', 'record', 'repeat',
+ 'return', 'static', 'suspend', 'then', 'to', 'until', 'while'
+}))
+
+-- Icon Keywords: unique to Icon.
+lex:add_rule('special_keyword', token('special_keyword', '&' * word_match{
+ 'allocated', 'ascii', 'clock', 'collections', 'cset', 'current', 'date', 'dateline', 'digits',
+ 'dump', 'e', 'error', 'errornumber', 'errortext', 'errorvalue', 'errout', 'fail', 'features',
+ 'file', 'host', 'input', 'lcase', 'letters', 'level', 'line', 'main', 'null', 'output', 'phi',
+ 'pi', 'pos', 'progname', 'random', 'regions', 'source', 'storage', 'subject', 'time', 'trace',
+ 'ucase', 'version'
+}))
+lex:add_style('special_keyword', lexer.styles.type)
+
+-- Identifiers.
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Strings.
-local cset = l.delimited_range("'")
-local str = l.delimited_range('"')
-local string = token(l.STRING, cset + str)
+local sq_str = lexer.range("'")
+local dq_str = lexer.range('"')
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
+
+-- Comments.
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#', true)))
-- Numbers.
-local radix_literal = P('-')^-1 * l.dec_num * S('rR') * l.alnum^1
-local number = token(l.NUMBER, radix_literal + l.float + l.integer)
+local radix_literal = P('-')^-1 * lexer.dec_num * S('rR') * lexer.alnum^1
+lex:add_rule('number', token(lexer.NUMBER, radix_literal + lexer.number))
-- Preprocessor.
-local preproc_word = word_match{
- 'include', 'line', 'define', 'undef', 'ifdef', 'ifndef', 'else', 'endif',
- 'error'
-}
-local preproc = token(l.PREPROCESSOR, S(' \t')^0 * P('$') * preproc_word)
-
--- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- 'break', 'by', 'case', 'create', 'default', 'do', 'else', 'end', 'every',
- 'fail', 'global', 'if', 'initial', 'invocable', 'link', 'local', 'next',
- 'not', 'of', 'procedure', 'record', 'repeat', 'return', 'static', 'suspend',
- 'then', 'to', 'until', 'while'
-})
-
--- Icon Keywords: unique to Icon; use l.TYPE, as Icon is dynamically typed
-local type = token(l.TYPE, P('&') * word_match{
- 'allocated', 'ascii', 'clock', 'collections', 'cset', 'current', 'date',
- 'dateline', 'digits', 'dump', 'e', 'error', 'errornumber', 'errortext',
- 'errorvalue', 'errout', 'fail', 'features', 'file', 'host', 'input', 'lcase',
- 'letters', 'level', 'line', 'main', 'null', 'output', 'phi', 'pi', 'pos',
- 'progname', 'random', 'regions', 'source', 'storage', 'subject', 'time',
- 'trace', 'ucase', 'version'
-})
-
--- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+lex:add_rule('preproc', token(lexer.PREPROCESSOR, '$' *
+ word_match('define else endif error ifdef ifndef include line undef')))
-- Operators.
-local operator = token(l.OPERATOR, S('+-/*%<>~!=^&|?~@:;,.()[]{}'))
-
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'type', type},
- {'identifier', identifier},
- {'comment', comment},
- {'string', string},
- {'number', number},
- {'preproc', preproc},
- {'operator', operator},
-}
+lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>~!=^&|?~@:;,.()[]{}')))
-M._foldsymbols = {
- _patterns = {'%l+', '#'},
- [l.PREPROCESSOR] = {ifdef = 1, ifndef = 1, endif = -1},
- [l.KEYWORD] = { procedure = 1, ['end'] = -1},
- [l.COMMENT] = {['#'] = l.fold_line_comments('#')}
-}
+-- Fold points.
+lex:add_fold_point(lexer.PREPROCESSOR, 'ifdef', 'endif')
+lex:add_fold_point(lexer.PREPROCESSOR, 'ifndef', 'endif')
+lex:add_fold_point(lexer.KEYWORD, 'procedure', 'end')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('#'))
-return M
+return lex
diff --git a/lua/lexers/idl.lua b/lua/lexers/idl.lua
index a3ce325..9a2250c 100644
--- a/lua/lexers/idl.lua
+++ b/lua/lexers/idl.lua
@@ -1,68 +1,50 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- IDL LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'idl'}
+local lex = lexer.new('idl')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
--- Comments.
-local line_comment = '//' * l.nonnewline_esc^0
-local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1
-local comment = token(l.COMMENT, line_comment + block_comment)
+-- Keywords.
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'abstract', 'attribute', 'case', 'const', 'context', 'custom', 'default', 'enum', 'exception',
+ 'factory', 'FALSE', 'in', 'inout', 'interface', 'local', 'module', 'native', 'oneway', 'out',
+ 'private', 'public', 'raises', 'readonly', 'struct', 'support', 'switch', 'TRUE', 'truncatable',
+ 'typedef', 'union', 'valuetype'
+}))
--- Strings.
-local sq_str = l.delimited_range("'", true)
-local dq_str = l.delimited_range('"', true)
-local string = token(l.STRING, sq_str + dq_str)
+-- Types.
+lex:add_rule('type', token(lexer.TYPE, word_match{
+ 'any', 'boolean', 'char', 'double', 'fixed', 'float', 'long', 'Object', 'octet', 'sequence',
+ 'short', 'string', 'unsigned', 'ValueBase', 'void', 'wchar', 'wstring'
+}))
--- Numbers.
-local number = token(l.NUMBER, l.float + l.integer)
+-- Identifiers.
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
--- Preprocessor.
-local preproc_word = word_match{
- 'define', 'undef', 'ifdef', 'ifndef', 'if', 'elif', 'else', 'endif',
- 'include', 'warning', 'pragma'
-}
-local preproc = token(l.PREPROCESSOR,
- l.starts_line('#') * preproc_word * l.nonnewline^0)
+-- Strings.
+local sq_str = lexer.range("'", true)
+local dq_str = lexer.range('"', true)
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
--- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- 'abstract', 'attribute', 'case', 'const', 'context', 'custom', 'default',
- 'exception', 'enum', 'factory', 'FALSE', 'in', 'inout', 'interface', 'local',
- 'module', 'native', 'oneway', 'out', 'private', 'public', 'raises',
- 'readonly', 'struct', 'support', 'switch', 'TRUE', 'truncatable', 'typedef',
- 'union', 'valuetype'
-})
+-- Comments.
+local line_comment = lexer.to_eol('//', true)
+local block_comment = lexer.range('/*', '*/')
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
--- Types.
-local type = token(l.TYPE, word_match{
- 'any', 'boolean', 'char', 'double', 'fixed', 'float', 'long', 'Object',
- 'octet', 'sequence', 'short', 'string', 'unsigned', 'ValueBase', 'void',
- 'wchar', 'wstring'
-})
+-- Numbers.
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
--- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+-- Preprocessor.
+lex:add_rule('preproc', token(lexer.PREPROCESSOR, lexer.starts_line('#') *
+ word_match('define undef ifdef ifndef if elif else endif include warning pragma')))
-- Operators.
-local operator = token(l.OPERATOR, S('!<>=+-/*%&|^~.,:;?()[]{}'))
-
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'type', type},
- {'identifier', identifier},
- {'string', string},
- {'comment', comment},
- {'number', number},
- {'preprocessor', preproc},
- {'operator', operator},
-}
+lex:add_rule('operator', token(lexer.OPERATOR, S('!<>=+-/*%&|^~.,:;?()[]{}')))
-return M
+return lex
diff --git a/lua/lexers/inform.lua b/lua/lexers/inform.lua
index c2e63e9..728cbfc 100644
--- a/lua/lexers/inform.lua
+++ b/lua/lexers/inform.lua
@@ -1,97 +1,75 @@
--- Copyright 2010-2017 Jeff Stone. See LICENSE.
+-- Copyright 2010-2022 Jeff Stone. See LICENSE.
-- Inform LPeg lexer for Scintillua.
-- JMS 2010-04-25.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'inform'}
+local lex = lexer.new('inform')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local comment = token(l.COMMENT, '!' * l.nonnewline^0)
-
--- Strings.
-local sq_str = l.delimited_range("'")
-local dq_str = l.delimited_range('"')
-local string = token(l.STRING, sq_str + dq_str)
-
--- Numbers.
-local inform_hex = '$' * l.xdigit^1
-local inform_bin = '$$' * S('01')^1
-local number = token(l.NUMBER, l.integer + inform_hex + inform_bin)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- 'Abbreviate', 'Array', 'Attribute', 'Class', 'Constant', 'Default', 'End',
- 'Endif', 'Extend', 'Global', 'Ifdef', 'Iffalse', 'Ifndef', 'Ifnot', 'Iftrue',
- 'Import', 'Include', 'Link', 'Lowstring', 'Message', 'Object', 'Property',
- 'Release', 'Replace', 'Serial', 'StartDaemon', 'Statusline', 'StopDaemon',
- 'Switches', 'Verb', 'absent', 'action', 'actor', 'add_to_scope', 'address',
- 'additive', 'after', 'and', 'animate', 'article', 'articles', 'before',
- 'bold', 'box', 'break', 'cant_go', 'capacity', 'char', 'class', 'child',
- 'children', 'clothing', 'concealed', 'container', 'continue', 'creature',
- 'daemon', 'deadflag', 'default', 'describe', 'description', 'do', 'door',
- 'door_dir', 'door_to', 'd_to', 'd_obj', 'e_to', 'e_obj', 'each_turn',
- 'edible', 'else', 'enterable', 'false', 'female', 'first', 'font', 'for',
- 'found_in', 'general', 'give', 'grammar', 'has', 'hasnt', 'held', 'if', 'in',
- 'in_to', 'in_obj', 'initial', 'inside_description', 'invent', 'jump', 'last',
- 'life', 'light', 'list_together', 'location', 'lockable', 'locked', 'male',
- 'move', 'moved', 'multi', 'multiexcept', 'multiheld', 'multiinside', 'n_to',
- 'n_obj', 'ne_to', 'ne_obj', 'nw_to', 'nw_obj', 'name', 'neuter', 'new_line',
- 'nothing', 'notin', 'noun', 'number', 'objectloop', 'ofclass', 'off', 'on',
- 'only', 'open', 'openable', 'or', 'orders', 'out_to', 'out_obj', 'parent',
- 'parse_name', 'player', 'plural', 'pluralname', 'print', 'print_ret',
- 'private', 'proper', 'provides', 'random', 'react_after', 'react_before',
- 'remove', 'replace', 'return', 'reverse', 'rfalse','roman', 'rtrue', 's_to',
- 's_obj', 'se_to', 'se_obj', 'sw_to', 'sw_obj', 'scenery', 'scope', 'score',
- 'scored', 'second', 'self', 'short_name', 'short_name_indef', 'sibling',
- 'spaces', 'static', 'string', 'style', 'supporter', 'switch', 'switchable',
- 'talkable', 'thedark', 'time_left', 'time_out', 'to', 'topic', 'transparent',
- 'true', 'underline', 'u_to', 'u_obj', 'visited', 'w_to', 'w_obj',
- 'when_closed', 'when_off', 'when_on', 'when_open', 'while', 'with',
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'Abbreviate', 'Array', 'Attribute', 'Class', 'Constant', 'Default', 'End', 'Endif', 'Extend',
+ 'Global', 'Ifdef', 'Iffalse', 'Ifndef', 'Ifnot', 'Iftrue', 'Import', 'Include', 'Link',
+ 'Lowstring', 'Message', 'Object', 'Property', 'Release', 'Replace', 'Serial', 'StartDaemon',
+ 'Statusline', 'StopDaemon', 'Switches', 'Verb', --
+ 'absent', 'action', 'actor', 'add_to_scope', 'address', 'additive', 'after', 'and', 'animate',
+ 'article', 'articles', 'before', 'bold', 'box', 'break', 'cant_go', 'capacity', 'char', 'class',
+ 'child', 'children', 'clothing', 'concealed', 'container', 'continue', 'creature', 'daemon',
+ 'deadflag', 'default', 'describe', 'description', 'do', 'door', 'door_dir', 'door_to', 'd_to',
+ 'd_obj', 'e_to', 'e_obj', 'each_turn', 'edible', 'else', 'enterable', 'false', 'female', 'first',
+ 'font', 'for', 'found_in', 'general', 'give', 'grammar', 'has', 'hasnt', 'held', 'if', 'in',
+ 'in_to', 'in_obj', 'initial', 'inside_description', 'invent', 'jump', 'last', 'life', 'light',
+ 'list_together', 'location', 'lockable', 'locked', 'male', 'move', 'moved', 'multi',
+ 'multiexcept', 'multiheld', 'multiinside', 'n_to', 'n_obj', 'ne_to', 'ne_obj', 'nw_to', 'nw_obj',
+ 'name', 'neuter', 'new_line', 'nothing', 'notin', 'noun', 'number', 'objectloop', 'ofclass',
+ 'off', 'on', 'only', 'open', 'openable', 'or', 'orders', 'out_to', 'out_obj', 'parent',
+ 'parse_name', 'player', 'plural', 'pluralname', 'print', 'print_ret', 'private', 'proper',
+ 'provides', 'random', 'react_after', 'react_before', 'remove', 'replace', 'return', 'reverse',
+ 'rfalseroman', 'rtrue', 's_to', 's_obj', 'se_to', 'se_obj', 'sw_to', 'sw_obj', 'scenery', 'scope',
+ 'score', 'scored', 'second', 'self', 'short_name', 'short_name_indef', 'sibling', 'spaces',
+ 'static', 'string', 'style', 'supporter', 'switch', 'switchable', 'talkable', 'thedark',
+ 'time_left', 'time_out', 'to', 'topic', 'transparent', 'true', 'underline', 'u_to', 'u_obj',
+ 'visited', 'w_to', 'w_obj', 'when_closed', 'when_off', 'when_on', 'when_open', 'while', 'with',
'with_key', 'workflag', 'worn'
-})
+}))
-- Library actions.
-local action = token('action', word_match{
- 'Answer', 'Ask', 'AskFor', 'Attack', 'Blow', 'Burn', 'Buy', 'Climb', 'Close',
- 'Consult', 'Cut', 'Dig', 'Disrobe', 'Drink', 'Drop', 'Eat', 'Empty', 'EmptyT',
- 'Enter', 'Examine', 'Exit', 'Fill', 'FullScore', 'GetOff', 'Give', 'Go',
- 'GoIn', 'Insert', 'Inv', 'InvTall', 'InvWide', 'Jump', 'JumpOver', 'Kiss',
- 'LetGo', 'Listen', 'LMode1', 'LMode2', 'LMode3', 'Lock', 'Look', 'LookUnder',
- 'Mild', 'No', 'NotifyOff', 'NotifyOn', 'Objects', 'Open', 'Order', 'Places',
- 'Pray', 'Pronouns', 'Pull', 'Push', 'PushDir', 'PutOn', 'Quit', 'Receive',
- 'Remove', 'Restart', 'Restore', 'Rub', 'Save', 'Score', 'ScriptOff',
- 'ScriptOn', 'Search', 'Set', 'SetTo', 'Show', 'Sing', 'Sleep', 'Smell',
- 'Sorry', 'Squeeze', 'Strong', 'Swim', 'Swing', 'SwitchOff', 'SwitchOn',
- 'Take', 'Taste', 'Tell', 'Think', 'ThrowAt', 'ThrownAt', 'Tie', 'Touch',
- 'Transfer', 'Turn', 'Unlock', 'VagueGo', 'Verify', 'Version', 'Wake',
- 'WakeOther', 'Wait', 'Wave', 'WaveHands', 'Wear', 'Yes'
-})
+lex:add_rule('action', token('action', word_match{
+ 'Answer', 'Ask', 'AskFor', 'Attack', 'Blow', 'Burn', 'Buy', 'Climb', 'Close', 'Consult', 'Cut',
+ 'Dig', 'Disrobe', 'Drink', 'Drop', 'Eat', 'Empty', 'EmptyT', 'Enter', 'Examine', 'Exit', 'Fill',
+ 'FullScore', 'GetOff', 'Give', 'Go', 'GoIn', 'Insert', 'Inv', 'InvTall', 'InvWide', 'Jump',
+ 'JumpOver', 'Kiss', 'LetGo', 'Listen', 'LMode1', 'LMode2', 'LMode3', 'Lock', 'Look', 'LookUnder',
+ 'Mild', 'No', 'NotifyOff', 'NotifyOn', 'Objects', 'Open', 'Order', 'Places', 'Pray', 'Pronouns',
+ 'Pull', 'Push', 'PushDir', 'PutOn', 'Quit', 'Receive', 'Remove', 'Restart', 'Restore', 'Rub',
+ 'Save', 'Score', 'ScriptOff', 'ScriptOn', 'Search', 'Set', 'SetTo', 'Show', 'Sing', 'Sleep',
+ 'Smell', 'Sorry', 'Squeeze', 'Strong', 'Swim', 'Swing', 'SwitchOff', 'SwitchOn', 'Take', 'Taste',
+ 'Tell', 'Think', 'ThrowAt', 'ThrownAt', 'Tie', 'Touch', 'Transfer', 'Turn', 'Unlock', 'VagueGo',
+ 'Verify', 'Version', 'Wait', 'Wake', 'WakeOther', 'Wave', 'WaveHands', 'Wear', 'Yes'
+}))
+lex:add_style('action', lexer.styles.variable)
-- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
--- Operators.
-local operator = token(l.OPERATOR, S('@~=+-*/%^#=<>;:,.{}[]()&|?'))
+-- Strings.
+local sq_str = lexer.range("'")
+local dq_str = lexer.range('"')
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
-M._rules = {
- {'whitespace', ws},
- {'comment', comment},
- {'string', string},
- {'number', number},
- {'keyword', keyword},
- {'action', action},
- {'identifier', identifier},
- {'operator', operator},
-}
+-- Comments.
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('!')))
-_styles = {
- {'action', l.STYLE_VARIABLE}
-}
+-- Numbers.
+local inform_hex = '$' * lexer.xdigit^1
+local inform_bin = '$$' * S('01')^1
+lex:add_rule('number', token(lexer.NUMBER, lexer.integer + inform_hex + inform_bin))
+
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S('@~=+-*/%^#=<>;:,.{}[]()&|?')))
-return M
+return lex
diff --git a/lua/lexers/ini.lua b/lua/lexers/ini.lua
index 291f0b2..a2b107e 100644
--- a/lua/lexers/ini.lua
+++ b/lua/lexers/ini.lua
@@ -1,52 +1,39 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- Ini LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'ini'}
+local lex = lexer.new('ini')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
--- Comments.
-local comment = token(l.COMMENT, l.starts_line(S(';#')) * l.nonnewline^0)
+-- Keywords.
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match('true false on off yes no')))
+
+-- Identifiers.
+lex:add_rule('identifier', token(lexer.IDENTIFIER, (lexer.alpha + '_') * (lexer.alnum + S('_.'))^0))
-- Strings.
-local sq_str = l.delimited_range("'")
-local dq_str = l.delimited_range('"')
-local label = l.delimited_range('[]', true, true)
-local string = token(l.STRING, sq_str + dq_str + label)
+local sq_str = lexer.range("'")
+local dq_str = lexer.range('"')
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
--- Numbers.
-local dec = l.digit^1 * ('_' * l.digit^1)^0
-local oct_num = '0' * S('01234567_')^1
-local integer = S('+-')^-1 * (l.hex_num + oct_num + dec)
-local number = token(l.NUMBER, (l.float + integer))
+-- Labels.
+lex:add_rule('label', token(lexer.LABEL, lexer.range('[', ']', true)))
--- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- 'true', 'false', 'on', 'off', 'yes', 'no'
-})
+-- Comments.
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol(lexer.starts_line(S(';#')))))
--- Identifiers.
-local word = (l.alpha + '_') * (l.alnum + S('_.'))^0
-local identifier = token(l.IDENTIFIER, word)
+-- Numbers.
+local dec = lexer.digit^1 * ('_' * lexer.digit^1)^0
+local oct_num = '0' * S('01234567_')^1
+local integer = S('+-')^-1 * (lexer.hex_num + oct_num + dec)
+lex:add_rule('number', token(lexer.NUMBER, lexer.float + integer))
-- Operators.
-local operator = token(l.OPERATOR, '=')
-
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'identifier', identifier},
- {'string', string},
- {'comment', comment},
- {'number', number},
- {'operator', operator},
-}
-
-M._LEXBYLINE = true
+lex:add_rule('operator', token(lexer.OPERATOR, '='))
-return M
+return lex
diff --git a/lua/lexers/io_lang.lua b/lua/lexers/io_lang.lua
index 2a0f7f8..18648fc 100644
--- a/lua/lexers/io_lang.lua
+++ b/lua/lexers/io_lang.lua
@@ -1,66 +1,50 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- Io LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'io_lang'}
+local lex = lexer.new('io_lang')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local line_comment = (P('#') + '//') * l.nonnewline^0
-local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1
-local comment = token(l.COMMENT, line_comment + block_comment)
-
--- Strings.
-local sq_str = l.delimited_range("'")
-local dq_str = l.delimited_range('"')
-local tq_str = '"""' * (l.any - '"""')^0 * P('"""')^-1
-local string = token(l.STRING, tq_str + sq_str + dq_str)
-
--- Numbers.
-local number = token(l.NUMBER, l.float + l.integer)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- 'block', 'method', 'while', 'foreach', 'if', 'else', 'do', 'super', 'self',
- 'clone', 'proto', 'setSlot', 'hasSlot', 'type', 'write', 'print', 'forward'
-})
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'block', 'method', 'while', 'foreach', 'if', 'else', 'do', 'super', 'self', 'clone', 'proto',
+ 'setSlot', 'hasSlot', 'type', 'write', 'print', 'forward'
+}))
-- Types.
-local type = token(l.TYPE, word_match{
- 'Block', 'Buffer', 'CFunction', 'Date', 'Duration', 'File', 'Future', 'List',
- 'LinkedList', 'Map', 'Nop', 'Message', 'Nil', 'Number', 'Object', 'String',
- 'WeakLink'
-})
+lex:add_rule('type', token(lexer.TYPE, word_match{
+ 'Block', 'Buffer', 'CFunction', 'Date', 'Duration', 'File', 'Future', 'LinkedList', 'List', 'Map',
+ 'Message', 'Nil', 'Nop', 'Number', 'Object', 'String', 'WeakLink'
+}))
-- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
--- Operators.
-local operator = token(l.OPERATOR, S('`~@$%^&*-+/=\\<>?.,:;()[]{}'))
+-- Strings.
+local sq_str = lexer.range("'")
+local dq_str = lexer.range('"')
+local tq_str = lexer.range('"""')
+lex:add_rule('string', token(lexer.STRING, tq_str + sq_str + dq_str))
+
+-- Comments.
+local line_comment = lexer.to_eol(P('#') + '//')
+local block_comment = lexer.range('/*', '*/')
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'type', type},
- {'identifier', identifier},
- {'string', string},
- {'comment', comment},
- {'number', number},
- {'operator', operator},
-}
+-- Numbers.
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
+
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S('`~@$%^&*-+/=\\<>?.,:;()[]{}')))
-M._foldsymbols = {
- _patterns = {'[%(%)]', '/%*', '%*/', '#', '//'},
- [l.OPERATOR] = {['('] = 1, [')'] = -1},
- [l.COMMENT] = {
- ['/*'] = 1, ['*/'] = -1, ['#'] = l.fold_line_comments('#'),
- ['//'] = l.fold_line_comments('//')
- }
-}
+-- Fold points.
+lex:add_fold_point(lexer.OPERATOR, '(', ')')
+lex:add_fold_point(lexer.COMMENT, '/*', '*/')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('//'))
-return M
+return lex
diff --git a/lua/lexers/java.lua b/lua/lexers/java.lua
index dab6ddd..df83b61 100644
--- a/lua/lexers/java.lua
+++ b/lua/lexers/java.lua
@@ -1,86 +1,67 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- Java LPeg lexer.
-- Modified by Brian Schott.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'java'}
+local lex = lexer.new('java')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
+local ws = token(lexer.WHITESPACE, lexer.space^1)
+lex:add_rule('whitespace', ws)
--- Comments.
-local line_comment = '//' * l.nonnewline_esc^0
-local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1
-local comment = token(l.COMMENT, line_comment + block_comment)
-
--- Strings.
-local sq_str = l.delimited_range("'", true)
-local dq_str = l.delimited_range('"', true)
-local string = token(l.STRING, sq_str + dq_str)
-
--- Numbers.
-local number = token(l.NUMBER, (l.float + l.integer) * S('LlFfDd')^-1)
+-- Classes.
+lex:add_rule('classdef', token(lexer.KEYWORD, 'class') * ws * token(lexer.CLASS, lexer.word))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- 'abstract', 'assert', 'break', 'case', 'catch', 'class', 'const', 'continue',
- 'default', 'do', 'else', 'enum', 'extends', 'final', 'finally', 'for', 'goto',
- 'if', 'implements', 'import', 'instanceof', 'interface', 'native', 'new',
- 'package', 'private', 'protected', 'public', 'return', 'static', 'strictfp',
- 'super', 'switch', 'synchronized', 'this', 'throw', 'throws', 'transient',
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'abstract', 'assert', 'break', 'case', 'catch', 'class', 'const', 'continue', 'default', 'do',
+ 'else', 'enum', 'extends', 'final', 'finally', 'for', 'goto', 'if', 'implements', 'import',
+ 'instanceof', 'interface', 'native', 'new', 'package', 'private', 'protected', 'public', 'return',
+ 'static', 'strictfp', 'super', 'switch', 'synchronized', 'this', 'throw', 'throws', 'transient',
'try', 'while', 'volatile',
-- Literals.
'true', 'false', 'null'
-})
+}))
-- Types.
-local type = token(l.TYPE, word_match{
- 'boolean', 'byte', 'char', 'double', 'float', 'int', 'long', 'short', 'void',
- 'Boolean', 'Byte', 'Character', 'Double', 'Float', 'Integer', 'Long', 'Short',
- 'String'
-})
+lex:add_rule('type', token(lexer.TYPE, word_match{
+ 'boolean', 'byte', 'char', 'double', 'float', 'int', 'long', 'short', 'void', 'Boolean', 'Byte',
+ 'Character', 'Double', 'Float', 'Integer', 'Long', 'Short', 'String'
+}))
--- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+-- Functions.
+lex:add_rule('function', token(lexer.FUNCTION, lexer.word) * #P('('))
--- Operators.
-local operator = token(l.OPERATOR, S('+-/*%<>!=^&|?~:;.()[]{}'))
+-- Identifiers.
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
--- Annotations.
-local annotation = token('annotation', '@' * l.word)
+-- Strings.
+local sq_str = lexer.range("'", true)
+local dq_str = lexer.range('"', true)
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
--- Functions.
-local func = token(l.FUNCTION, l.word) * #P('(')
+-- Comments.
+local line_comment = lexer.to_eol('//', true)
+local block_comment = lexer.range('/*', '*/')
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
--- Classes.
-local class_sequence = token(l.KEYWORD, P('class')) * ws^1 *
- token(l.CLASS, l.word)
+-- Numbers.
+lex:add_rule('number', token(lexer.NUMBER, lexer.number * S('LlFfDd')^-1))
-M._rules = {
- {'whitespace', ws},
- {'class', class_sequence},
- {'keyword', keyword},
- {'type', type},
- {'function', func},
- {'identifier', identifier},
- {'string', string},
- {'comment', comment},
- {'number', number},
- {'annotation', annotation},
- {'operator', operator},
-}
+-- Annotations.
+lex:add_rule('annotation', token('annotation', '@' * lexer.word))
+lex:add_style('annotation', lexer.styles.preprocessor)
-M._tokenstyles = {
- annotation = l.STYLE_PREPROCESSOR
-}
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>!=^&|?~:;.()[]{}')))
-M._foldsymbols = {
- _patterns = {'[{}]', '/%*', '%*/', '//'},
- [l.OPERATOR] = {['{'] = 1, ['}'] = -1},
- [l.COMMENT] = {['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//')}
-}
+-- Fold points.
+lex:add_fold_point(lexer.OPERATOR, '{', '}')
+lex:add_fold_point(lexer.COMMENT, '/*', '*/')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('//'))
+lex:add_fold_point(lexer.KEYWORD, lexer.fold_consecutive_lines('import'))
-return M
+return lex
diff --git a/lua/lexers/javascript.lua b/lua/lexers/javascript.lua
index 5366298..d94baed 100644
--- a/lua/lexers/javascript.lua
+++ b/lua/lexers/javascript.lua
@@ -1,65 +1,90 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- JavaScript LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'javascript'}
+local lex = lexer.new('javascript')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
--- Comments.
-local line_comment = '//' * l.nonnewline_esc^0
-local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1
-local comment = token(l.COMMENT, line_comment + block_comment)
+-- Keywords.
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'abstract', 'async', 'await', 'boolean', 'break', 'byte', 'case', 'catch', 'char', 'class',
+ 'const', 'continue', 'debugger', 'default', 'delete', 'do', 'double', 'else', 'enum', 'export',
+ 'extends', 'false', 'final', 'finally', 'float', 'for', 'function', 'get', 'goto', 'if',
+ 'implements', 'import', 'in', 'instanceof', 'int', 'interface', 'let', 'long', 'native', 'new',
+ 'null', 'of', 'package', 'private', 'protected', 'public', 'return', 'set', 'short', 'static',
+ 'super', 'switch', 'synchronized', 'this', 'throw', 'throws', 'transient', 'true', 'try',
+ 'typeof', 'var', 'void', 'volatile', 'while', 'with', 'yield'
+}))
--- Strings.
-local sq_str = l.delimited_range("'")
-local dq_str = l.delimited_range('"')
-local template_str = l.delimited_range('`')
-local regex_str = #P('/') * l.last_char_includes('+-*%^!=&|?:;,([{<>') *
- l.delimited_range('/', true) * S('igm')^0
-local string = token(l.STRING, sq_str + dq_str + template_str) +
- token(l.REGEX, regex_str)
+-- Types.
+lex:add_rule('type', token(lexer.TYPE, word_match{
+ -- Fundamental objects.
+ 'Object', 'Function', 'Boolean', 'Symbol',
+ -- Error Objects.
+ 'Error', 'AggregateError', 'EvalError', 'InternalError', 'RangeError', 'ReferenceError',
+ 'SyntaxError', 'TypeError', 'URIError',
+ -- Numbers and dates.
+ 'Number', 'BigInt', 'Math', 'Date',
+ -- Text Processing.
+ 'String', 'RegExp',
+ -- Indexed collections.
+ 'Array', 'Int8Array', 'Uint8Array', 'Uint8ClampedArray', 'Int16Array', 'Uint16Array',
+ 'Int32Array', 'Uint32Array', 'Float32Array', 'Float64Array', 'BigInt64Array', 'BigUint64Array',
+ -- Keyed collections.
+ 'Map', 'Set', 'WeakMap', 'WeakSet',
+ -- Structured data.
+ 'ArrayBuffer', 'SharedArrayBuffer', 'Atomics', 'DataView', 'JSON',
+ -- Control abstraction objects.
+ 'GeneratorFunction', 'AsyncGeneratorFunction', 'Generator', 'AsyncGenerator', 'AsyncFunction',
+ 'Promise',
+ -- Reflection.
+ 'Reflect', 'Proxy',
+ -- Other.
+ 'Intl', 'WebAssembly'
+}))
--- Numbers.
-local number = token(l.NUMBER, l.float + l.integer)
+-- Functions.
+lex:add_rule('function', token(lexer.FUNCTION, word_match{
+ 'eval', 'isFinite', 'isNaN', 'parseFloat', 'parseInt', 'decodeURI', 'decodeURIComponent',
+ 'encodeURI', 'encodeURIComponent'
+}))
--- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- 'abstract', 'async', 'await', 'boolean', 'break', 'byte', 'case', 'catch',
- 'char', 'class', 'const', 'continue', 'debugger', 'default', 'delete',
- 'do', 'double', 'else', 'enum', 'export', 'extends', 'false', 'final',
- 'finally', 'float', 'for', 'function', 'get', 'goto', 'if', 'implements',
- 'import', 'in', 'instanceof', 'int', 'interface', 'let', 'long', 'native',
- 'new', 'null', 'of', 'package', 'private', 'protected', 'public', 'return',
- 'set', 'short', 'static', 'super', 'switch', 'synchronized', 'this',
- 'throw', 'throws', 'transient', 'true', 'try', 'typeof', 'var', 'void',
- 'volatile', 'while', 'with', 'yield'
-})
+-- Constants.
+lex:add_rule('constant',
+ token(lexer.CONSTANT, word_match('Infinity NaN undefined globalThis arguments')))
-- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
--- Operators.
-local operator = token(l.OPERATOR, S('+-/*%^!=&|?:;,.()[]{}<>'))
+-- Comments.
+local line_comment = lexer.to_eol('//', true)
+local block_comment = lexer.range('/*', '*/')
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
+
+-- Strings.
+local sq_str = lexer.range("'")
+local dq_str = lexer.range('"')
+local bq_str = lexer.range('`')
+local string = token(lexer.STRING, sq_str + dq_str + bq_str)
+local regex_str =
+ #P('/') * lexer.last_char_includes('+-*%^!=&|?:;,([{<>') * lexer.range('/', true) * S('igm')^0
+local regex = token(lexer.REGEX, regex_str)
+lex:add_rule('string', string + regex)
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'identifier', identifier},
- {'comment', comment},
- {'number', number},
- {'string', string},
- {'operator', operator},
-}
+-- Numbers.
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
+
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%^!=&|?:;,.()[]{}<>')))
-M._foldsymbols = {
- _patterns = {'[{}]', '/%*', '%*/', '//'},
- [l.OPERATOR] = {['{'] = 1, ['}'] = -1},
- [l.COMMENT] = {['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//')}
-}
+-- Fold points.
+lex:add_fold_point(lexer.OPERATOR, '{', '}')
+lex:add_fold_point(lexer.COMMENT, '/*', '*/')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('//'))
-return M
+return lex
diff --git a/lua/lexers/jq.lua b/lua/lexers/jq.lua
new file mode 100644
index 0000000..3797e8d
--- /dev/null
+++ b/lua/lexers/jq.lua
@@ -0,0 +1,83 @@
+-- Copyright 2006-2022 Mitchell. See LICENSE.
+-- jq 1.6 Lua lexer -- https://stedolan.github.io/jq/wiki
+-- Anonymously contributed.
+
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
+
+local lex = lexer.new('jq')
+
+-- Whitespace.
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
+
+-- Keywords.
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ -- keywords not listed by jq's "builtins", minus operators 'and' and 'or', plus the '?' shorthand
+ 'as', 'break', 'catch', 'def', 'elif', 'else', 'end', 'foreach', 'if', 'import', 'include',
+ 'label', 'module', 'reduce', 'then', 'try'
+} + '?'))
+
+-- Functions.
+lex:add_rule('function', token(lexer.FUNCTION, word_match{
+ -- jq 1.6 built-in functions (SQL in upper caisse)
+ 'acos', 'acosh', 'add', 'all', 'any', 'arrays', 'ascii_downcase', 'ascii_upcase', 'asin', 'asinh',
+ 'atan', 'atan2', 'atanh', 'booleans', 'bsearch', 'builtins', 'capture', 'cbrt', 'ceil',
+ 'combinations', 'contains', 'copysign', 'cos', 'cosh', 'debug', 'del', 'delpaths', 'drem',
+ 'empty', 'endswith', 'env', 'erf', 'erfc', 'error', 'exp', 'exp10', 'exp2', 'explode', 'expm1',
+ 'fabs', 'fdim', 'finites', 'first', 'flatten', 'floor', 'fma', 'fmax', 'fmin', 'fmod', 'format',
+ 'frexp', 'from_entries', 'fromdate', 'fromdateiso8601', 'fromjson', 'fromstream', 'gamma',
+ 'get_jq_origin', 'get_prog_origin', 'get_search_list', 'getpath', 'gmtime', 'group_by', 'gsub',
+ 'halt', 'halt_error', 'has', 'hypot', 'implode', 'IN', 'in', 'INDEX', 'index', 'indices',
+ 'infinite', 'input', 'input_filename', 'input_line_number', 'inputs', 'inside', 'isempty',
+ 'isfinite', 'isinfinite', 'isnan', 'isnormal', 'iterables', 'j0', 'j1', 'jn', 'JOIN', 'join',
+ 'keys', 'keys_unsorted', 'last', 'ldexp', 'leaf_paths', 'length', 'lgamma', 'lgamma_r', 'limit',
+ 'localtime', 'log', 'log10', 'log1p', 'log2', 'logb', 'ltrimstr', 'map', 'map_values', 'match',
+ 'max', 'max_by', 'min', 'min_by', 'mktime', 'modf', 'modulemeta', 'nan', 'nearbyint', 'nextafter',
+ 'nexttoward', 'normals', 'not', 'now', 'nth', 'nulls', 'numbers', 'objects', 'path', 'paths',
+ 'pow', 'pow10', 'range', 'recurse', 'recurse_down', 'remainder', 'repeat', 'reverse', 'rindex',
+ 'rint', 'round', 'rtrimstr', 'scalars', 'scalars_or_empty', 'scalb', 'scalbln', 'scan', 'select',
+ 'setpath', 'significand', 'sin', 'sinh', 'sort', 'sort_by', 'split', 'splits', 'sqrt',
+ 'startswith', 'stderr', 'strflocaltime', 'strftime', 'strings', 'strptime', 'sub', 'tan', 'tanh',
+ 'test', 'tgamma', 'to_entries', 'todate', 'todateiso8601', 'tojson', 'tonumber', 'tostream',
+ 'tostring', 'transpose', 'trunc', 'truncate_stream', 'type', 'unique', 'unique_by', 'until',
+ 'utf8bytelength', 'values', 'walk', 'while', 'with_entries', 'y0', 'y1', 'yn'
+}))
+
+-- Strings.
+local string = token(lexer.STRING, lexer.range('"', true))
+local literal = token(lexer.STRING, word_match('null false true'))
+lex:add_rule('string', string + literal)
+
+-- Operators.
+-- 'not' isn't an operator but a function (filter)
+lex:add_rule('operator', token(lexer.OPERATOR,
+ P('.[]') + '?//' + '//=' + 'and' + '[]' + '//' + '==' + '!=' + '>=' + '<=' + '|=' + '+=' + '-=' +
+ '*=' + '/=' + '%=' + 'or' + S('=+-*/%<>()[]{}.,') + '|' + ';'))
+
+-- Identifiers.
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
+
+-- Comments.
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#')))
+
+-- Numbers.
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
+
+-- Formats.
+lex:add_rule('format',
+ token('format', '@' * word_match('text json html uri csv tsv sh base64 base64d')))
+lex:add_style('format', lexer.styles.constant)
+
+-- Variables.
+lex:add_rule('sysvar', token('sysvar', '$' * word_match('ENV ORIGIN __loc__')))
+lex:add_style('sysvar', lexer.styles.constant .. {bold = true})
+lex:add_rule('variable', token(lexer.VARIABLE, '$' * lexer.word))
+
+-- Fold points.
+lex:add_fold_point(lexer.KEYWORD, 'if', 'end')
+lex:add_fold_point(lexer.OPERATOR, '[', ']')
+lex:add_fold_point(lexer.OPERATOR, '{', '}')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('#'))
+
+return lex
diff --git a/lua/lexers/json.lua b/lua/lexers/json.lua
index 6e7025d..10b6406 100644
--- a/lua/lexers/json.lua
+++ b/lua/lexers/json.lua
@@ -1,47 +1,40 @@
--- Copyright 2006-2017 Brian "Sir Alaran" Schott. See LICENSE.
+-- Copyright 2006-2022 Brian "Sir Alaran" Schott. See LICENSE.
-- JSON LPeg lexer.
-- Based off of lexer code by Mitchell.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'json'}
+local lex = lexer.new('json')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local comment = token(l.COMMENT, '/*' * (l.any - '*/')^0 * P('*/')^-1)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Strings.
-local sq_str = P('u')^-1 * l.delimited_range("'", true)
-local dq_str = P('U')^-1 * l.delimited_range('"', true)
-local string = token(l.STRING, sq_str + dq_str)
-
--- Numbers.
-local integer = S('+-')^-1 * l.digit^1 * S('Ll')^-1
-local number = token(l.NUMBER, l.float + integer)
+local sq_str = lexer.range("'", true)
+local dq_str = lexer.range('"', true)
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match{"true", "false", "null"})
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match('true false null')))
+
+-- Comments.
+local line_comment = lexer.to_eol('//', true)
+local block_comment = lexer.range('/*', '*/')
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
+
+-- Numbers.
+local integer = S('+-')^-1 * lexer.dec_num * S('Ll')^-1
+lex:add_rule('number', token(lexer.NUMBER, lexer.float + integer))
-- Operators.
-local operator = token(l.OPERATOR, S('[]{}:,'))
-
-M._rules = {
- {'whitespace', ws},
- {'comment', comment},
- {'string', string},
- {'number', number},
- {'keyword', keyword},
- {'operator', operator},
-}
-
-M._foldsymbols = {
- _patterns = {'[%[%]{}]', '/%*', '%*/'},
- [l.OPERATOR] = {['['] = 1, [']'] = -1, ['{'] = 1, ['}'] = -1},
- [l.COMMENT] = {['/*'] = 1, ['*/'] = -1}
-}
-
-return M
+lex:add_rule('operator', token(lexer.OPERATOR, S('[]{}:,')))
+
+-- Fold points.
+lex:add_fold_point(lexer.OPERATOR, '[', ']')
+lex:add_fold_point(lexer.OPERATOR, '{', '}')
+lex:add_fold_point(lexer.COMMENT, '/*', '*/')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('//'))
+
+return lex
diff --git a/lua/lexers/jsp.lua b/lua/lexers/jsp.lua
index e6390c2..b92b886 100644
--- a/lua/lexers/jsp.lua
+++ b/lua/lexers/jsp.lua
@@ -1,29 +1,20 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- JSP LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'jsp'}
-
--- Embedded in HTML.
-local html = l.load('html')
+local lex = lexer.new('jsp', {inherit = lexer.load('html')})
-- Embedded Java.
-local java = l.load('java')
+local java = lexer.load('java')
local java_start_rule = token('jsp_tag', '<%' * P('=')^-1)
local java_end_rule = token('jsp_tag', '%>')
-l.embed_lexer(html, java, java_start_rule, java_end_rule, true)
-
-M._tokenstyles = {
- jsp_tag = l.STYLE_EMBEDDED
-}
+lex:embed(java, java_start_rule, java_end_rule, true)
+lex:add_style('jsp_tag', lexer.styles.embedded)
-local _foldsymbols = html._foldsymbols
-_foldsymbols._patterns[#_foldsymbols._patterns + 1] = '<%%'
-_foldsymbols._patterns[#_foldsymbols._patterns + 1] = '%%>'
-_foldsymbols.jsp_tag = {['<%'] = 1, ['%>'] = -1}
-M._foldsymbols = _foldsymbols
+-- Fold points.
+lex:add_fold_point('jsp_tag', '<%', '%>')
-return M
+return lex
diff --git a/lua/lexers/julia.lua b/lua/lexers/julia.lua
index d1aebc6..0e751a0 100644
--- a/lua/lexers/julia.lua
+++ b/lua/lexers/julia.lua
@@ -1,147 +1,110 @@
+-- Copyright 2020-2022 Tobias Frilling. See LICENSE.
-- Julia lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local B, P, R, S = lpeg.B, lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local B, P, S = lpeg.B, lpeg.P, lpeg.S
-local M = {_NAME = 'julia'}
+local lex = lexer.new('julia')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
+local id = lexer.word * P('!')^0
--- Identifier
-local id = l.word * P('!')^0
-local identifier = token(l.IDENTIFIER, id)
+-- Keyword
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'baremodule', 'begin', 'break', 'catch', 'const', 'continue', 'do', 'else', 'elseif', 'end',
+ 'export', 'finally', 'for', 'function', 'global', 'if', 'in', 'isa', 'import', 'let', 'local',
+ 'macro', 'module', 'quote', 'return', 'struct', 'try', 'using', 'where', 'while'
+} + 'abstract type' + 'mutable struct' + 'primitive type'))
+-- Constant
+local const_bool = word_match('true false')
+local const_numerical = (P('Inf') + 'NaN') * (P('16') + '32' + '64')^-1 * -lexer.alnum
+local const_special = word_match('nothing undef missing')
+local const_env = word_match('ARGS ENV ENDIAN_BOM LOAD_PATH VERSION PROGRAM_FILE DEPOT_PATH')
+local const_io = word_match('stdout stdin stderr devnull')
+lex:add_rule('constant', token(lexer.CONSTANT,
+ const_bool + const_numerical + const_special + const_env + const_io))
--- Operator
-local operator = token(l.OPERATOR, S('+-*/÷<>=!≠≈≤≥%^&|⊻~\\\':?.√'))
+-- Type
+local type_annotated = (B('::') + B(':: ')) * id
+local type_para = id * #P('{')
+local type_subtyping = id * #(lexer.space^0 * '<:') + (B('<:') + B('<: ')) * id
+local type_struct = B('struct ') * id
+-- LuaFormatter off
+local type_builtin_numerical = ((P('Abstract') + 'Big') * 'Float' +
+ 'Float' * (P('16') + '32' + '64') +
+ P('U')^-1 * 'Int' * (P('8') + '16' + '32' + '64' + '128')^-1 +
+ P('Abstract')^-1 * 'Irrational'
+) * -lexer.alnum + word_match('Number Complex Real Integer Bool Signed Unsigned Rational')
+-- LuaFormatter on
+local type_builtin_range = ((P('Lin') + 'Ordinal' + (P('Abstract')^-1 * P('Unit')^-1)) * 'Range' +
+ 'StepRange' * P('Len')^-1 - 'Range'
+) * -lexer.alnum
+local type_builtin_array = ((P('Abstract') + 'Bit' + 'Dense' + 'PermutedDims' + 'Sub')^-1 *
+ word_match('Array Vector Matrix VecOrMat') +
+ (P('Abstract') + 'Sym' + (P('Unit')^-1 * (P('Lower') + 'Upper')))^-1 * 'Triangular'
+) * -lexer.alnum +
+ word_match('Adjoint Bidiagonal Diagonal Hermitian LQPackedQ Symmetric Transpose UpperHessenberg')
+lex:add_rule('type', token(lexer.TYPE,
+ type_para + type_annotated + type_subtyping + type_struct + type_builtin_numerical +
+ type_builtin_range + type_builtin_array))
+-- Macro
+lex:add_rule('macro', token('macro', '@' * (id + '.')))
+lex:add_style('macro', lexer.styles.preprocessor)
--- Comment
-local line_comment = '#' * l.nonnewline^0
-local block_comment = '#=' * (l.any - '=#')^0 * P('=#')^-1
-local comment = token(l.COMMENT, block_comment + line_comment)
+-- Symbol
+lex:add_rule('symbol', token('symbol', -B(P(':') + '<') * ':' * id))
+lex:add_style('symbol', lexer.styles.constant)
+-- Function
+lex:add_rule('function', token(lexer.FUNCTION, id * #(P('.')^-1 * '(')))
--- Constant
-local const_bool = word_match{'true', 'false'}
-local const_numerical = (P('Inf') + P('NaN')) * (P('16') + P('32') + P('64'))^-1 * #(-l.alnum)
-local const_special = word_match{'nothing', 'undef', 'missing'}
-local const_env = word_match{'ARGS', 'ENV', 'ENDIAN_BOM', 'LOAD_PATH', 'VERSION', 'PROGRAM_FILE', 'DEPOT_PATH'}
-local const_io = word_match{'stdout', 'stdin', 'stderr', 'devnull'}
-local constant = token(l.CONSTANT, const_bool + const_numerical + const_special + const_env + const_io)
+-- Identifier
+lex:add_rule('identifier', token(lexer.IDENTIFIER, id))
+-- Comment
+local line_comment = lexer.to_eol('#')
+local block_comment = lexer.range('#=', '=#')
+lex:add_rule('comment', token(lexer.COMMENT, block_comment + line_comment))
-- Number
-local decimal = l.digit^1 * ('_' * l.digit^1)^0
-local hex_digits = l.xdigit^1 * ('_' * l.xdigit^1)^0
+local decimal = lexer.digit^1 * ('_' * lexer.digit^1)^0
+local hex_digits = lexer.xdigit^1 * ('_' * lexer.xdigit^1)^0
local hexadecimal = '0x' * hex_digits
local binary = '0b' * S('01')^1 * ('_' * S('01')^1)^0
local integer = binary + hexadecimal + decimal
local float_dec_coeff = decimal^0 * '.' * decimal + decimal * '.' * decimal^0
-local float_dec_expon = S('eEf') * S('+-')^-1 * l.digit^1
+local float_dec_expon = S('eEf') * S('+-')^-1 * lexer.digit^1
local float_dec = float_dec_coeff * float_dec_expon^-1 + decimal * float_dec_expon
local float_hex_coeff = '0x' * (hex_digits^0 * '.' * hex_digits + hex_digits * '.' * hex_digits^0)
-local float_hex_expon = 'p' * S('+-')^-1 * l.digit^1
+local float_hex_expon = 'p' * S('+-')^-1 * lexer.digit^1
local float_hex = float_hex_coeff * float_hex_expon^-1 + hexadecimal * float_hex_expon
local float = float_dec + float_hex
local imaginary = (float_dec + decimal) * 'im'
-local number = token(l.NUMBER, S('+-')^-1 * (imaginary + float + integer) * #(-l.alpha))
+lex:add_rule('number',
+ token(lexer.NUMBER, S('+-')^-1 * (imaginary + float + integer) * -lexer.alpha))
+-- String & Character
+local doc_str = lexer.range('"""')
+local str = lexer.range('"')
+lex:add_rule('string', token(lexer.STRING, doc_str + str))
--- Character & String
local c_esc = '\\' * S('\\"\'nrbtfav')
-local unicode = '\\' * S('uU') * l.xdigit^1
-local char = "'" * (l.alnum + c_esc + unicode) * "'"
-local character = token('character', char)
-
-local doc_str = l.delimited_range('"""')
-local str = l.delimited_range('"')
-local string = token(l.STRING, doc_str + str)
-
-
--- Keyword
-local keyword_single = word_match{
- 'baremodule', 'begin', 'break', 'catch', 'const', 'continue',
- 'do', 'else', 'elseif', 'end', 'export', 'finally', 'for',
- 'function', 'global', 'if', 'in', 'isa', 'import', 'let', 'local',
- 'macro', 'module', 'quote', 'return', 'struct', 'try', 'using',
- 'where', 'while'
-}
-local keyword_mult = P('abstract type') + P('mutable struct') + P('primitive type')
-local keyword = token(l.KEYWORD, keyword_single + keyword_mult)
-
-
--- Function
-local func = token(l.FUNCTION, id * #(P('.')^-1 * #P('(')))
-
-
--- Macro
-local macro = token('macro', '@' * (id + '.'))
-
-
--- Type
-local type_annotated = (B('::') + B(':: ')) * id
-local type_para = id * #P('{')
-local type_subtyping = id * #(l.space^0 * P('<:')) + (B('<:') + B('<: ')) * id
-local type_struct = B('struct ') * id
-local type_builtin_numerical = (
- (P('Abstract') + P('Big')) * P('Float') +
- P('Float') * (P('16') + P('32') + P('64')) +
- P('U')^-1 * P('Int') * (P('8') + P('16') + P('32') + P('64') + P('128'))^-1 +
- P('Abstract')^-1 * P('Irrational')
- ) * #(-l.alnum) +
- word_match{'Number', 'Complex', 'Real', 'Integer', 'Bool', 'Signed', 'Unsigned', 'Rational'}
-local type_builtin_range = (
- (P('Lin') + P('Ordinal') + (P('Abstract')^-1 * P('Unit')^-1)) * P('Range') +
- P('StepRange') * P('Len')^-1 - P('Range')
- ) * #(-l.alnum)
-local type_builtin_array = (
- (P('Abstract') + P('Bit') + P('Dense') + P('PermutedDims') + P('Sub'))^-1 *
- (P('Array') + P('Vector') + P('Matrix') + P('VecOrMat')) +
- (P('Abstract') + P('Sym') + (P('Unit')^-1 * (P('Lower') + P('Upper'))))^-1 * P('Triangular')
- ) * #(-l.alnum) +
- word_match{
- 'Adjoint', 'Bidiagonal', 'Diagonal', 'Hermitian', 'LQPackedQ',
- 'Symmetric', 'Transpose', 'UpperHessenberg'
- }
-local type = token(l.TYPE,
- type_para + type_annotated + type_subtyping + type_struct + type_builtin_numerical + type_builtin_range +
- type_builtin_array
- )
+local unicode = '\\' * S('uU') * lexer.xdigit^1
+local char = "'" * (lexer.alnum + c_esc + unicode) * "'"
+lex:add_rule('character', token('character', char))
+lex:add_style('character', lexer.styles.constant)
+-- Operator
+lex:add_rule('operator', token(lexer.OPERATOR, S('+-*/÷<>=!≠≈≤≥%^&|⊻~\\\':?.√')))
--- Symbol
-local symbol = token('symbol', -B(P(':') + P('<')) * P(':') * id)
-
-
-M._rules = {
- {'whitespace', ws},
- {'comment', comment},
- {'string', string},
- {'character', character},
- {'keyword', keyword},
- {'constant', constant},
- {'type', type},
- {'symbol', symbol},
- {'macro', macro},
- {'function', func},
- {'identifier', identifier},
- {'operator', operator},
- {'number', number},
-}
-
-M._tokenstyles = {
- character = l.STYLE_CONSTANT,
- symbol = l.STYLE_CONSTANT,
- macro = l.STYLE_PREPROCESSOR,
-}
-
-return M
+return lex
diff --git a/lua/lexers/latex.lua b/lua/lexers/latex.lua
index 3e276e1..a0b8995 100644
--- a/lua/lexers/latex.lua
+++ b/lua/lexers/latex.lua
@@ -1,73 +1,50 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- Latex LPeg lexer.
-- Modified by Brian Schott.
-- Modified by Robert Gieseke.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'latex'}
+local lex = lexer.new('latex')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Comments.
-local line_comment = '%' * l.nonnewline^0
-local block_comment = '\\begin' * P(' ')^0 * '{comment}' *
- (l.any - '\\end' * P(' ')^0 * '{comment}')^0 *
- P('\\end' * P(' ')^0 * '{comment}')^-1
--- Note: need block_comment before line_comment or LPeg cannot compile rule.
-local comment = token(l.COMMENT, block_comment + line_comment)
-
--- Sections.
-local section = token('section', '\\' * word_match{
- 'part', 'chapter', 'section', 'subsection', 'subsubsection', 'paragraph',
- 'subparagraph'
-} * P('*')^-1)
+local line_comment = lexer.to_eol('%')
+local block_comment = lexer.range('\\begin' * P(' ')^0 * '{comment}',
+ '\\end' * P(' ')^0 * '{comment}')
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-- Math environments.
-local math_word = word_match{
- 'align', 'displaymath', 'eqnarray', 'equation', 'gather', 'math', 'multline'
-}
-local math_begin_end = (P('begin') + P('end')) * P(' ')^0 *
- '{' * math_word * P('*')^-1 * '}'
-local math = token('math', '$' + '\\' * (S('[]()') + math_begin_end))
+local math_word = word_match('align displaymath eqnarray equation gather math multline')
+local math_begin_end = (P('begin') + P('end')) * P(' ')^0 * '{' * math_word * P('*')^-1 * '}'
+lex:add_rule('math', token('math', '$' + '\\' * (S('[]()') + math_begin_end)))
+lex:add_style('math', lexer.styles['function'])
-- LaTeX environments.
-local environment = token('environment', '\\' * (P('begin') + P('end')) *
- P(' ')^0 *
- '{' * l.word * P('*')^-1 * '}')
+lex:add_rule('environment', token('environment', '\\' * (P('begin') + 'end') * P(' ')^0 * '{' *
+ lexer.word * P('*')^-1 * '}'))
+lex:add_style('environment', lexer.styles.keyword)
+
+-- Sections.
+lex:add_rule('section', token('section', '\\' *
+ word_match('part chapter section subsection subsubsection paragraph subparagraph') * P('*')^-1))
+lex:add_style('section', lexer.styles.class)
-- Commands.
-local command = token(l.KEYWORD, '\\' * (l.alpha^1 + S('#$&~_^%{}')))
+lex:add_rule('command', token('command', '\\' * (lexer.alpha^1 + S('#$&~_^%{}\\'))))
+lex:add_style('command', lexer.styles.keyword)
-- Operators.
-local operator = token(l.OPERATOR, S('&#{}[]'))
-
-M._rules = {
- {'whitespace', ws},
- {'comment', comment},
- {'math', math},
- {'environment', environment},
- {'section', section},
- {'keyword', command},
- {'operator', operator},
-}
-
-M._tokenstyles = {
- environment = l.STYLE_KEYWORD,
- math = l.STYLE_FUNCTION,
- section = l.STYLE_CLASS
-}
+lex:add_rule('operator', token(lexer.OPERATOR, S('&#{}[]')))
-M._foldsymbols = {
- _patterns = {'\\[a-z]+', '[{}]', '%%'},
- [l.COMMENT] = {
- ['\\begin'] = 1, ['\\end'] = -1, ['%'] = l.fold_line_comments('%')
- },
- ['environment'] = {['\\begin'] = 1, ['\\end'] = -1},
- [l.OPERATOR] = {['{'] = 1, ['}'] = -1}
-}
+-- Fold points.
+lex:add_fold_point(lexer.COMMENT, '\\begin', '\\end')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('%'))
+lex:add_fold_point('environment', '\\begin', '\\end')
+lex:add_fold_point(lexer.OPERATOR, '{', '}')
-return M
+return lex
diff --git a/lua/lexers/ledger.lua b/lua/lexers/ledger.lua
index 7b594d8..27ca588 100644
--- a/lua/lexers/ledger.lua
+++ b/lua/lexers/ledger.lua
@@ -1,57 +1,43 @@
--- Copyright 2015-2017 Charles Lehner. See LICENSE.
+-- Copyright 2015-2022 Charles Lehner. See LICENSE.
-- ledger journal LPeg lexer, see http://www.ledger-cli.org/
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'ledger'}
+local lex = lexer.new('ledger', {lex_by_line = true})
local delim = P('\t') + P(' ')
--- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
+-- Account.
+lex:add_rule('account', token(lexer.VARIABLE, lexer.starts_line(S(' \t')^1 * lexer.graph^1)))
+
+-- Amount.
+lex:add_rule('amount', token(lexer.NUMBER, delim * (1 - S(';\r\n'))^1))
-- Comments.
-local comment = token(l.COMMENT, S(';#') * l.nonnewline^0)
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol(S(';#'))))
+
+-- Whitespace.
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Strings.
-local sq_str = l.delimited_range("'")
-local dq_str = l.delimited_range('"')
-local label = l.delimited_range('[]', true, true)
-local string = token(l.STRING, sq_str + dq_str + label)
+local sq_str = lexer.range("'")
+local dq_str = lexer.range('"')
+local label = lexer.range('[', ']', true)
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str + label))
-- Date.
-local date = token(l.CONSTANT, l.starts_line((l.digit + S('/-')) ^1))
-
--- Account.
-local account = token(l.VARIABLE,
- l.starts_line(S(' \t')^1 * (l.print - delim)^1))
-
--- Amount.
-local amount = token(l.NUMBER, delim * (1 - S(';\r\n'))^1)
+lex:add_rule('date', token(lexer.CONSTANT, lexer.starts_line((lexer.digit + S('/-'))^1)))
-- Automated transactions.
-local auto_tx = token(l.PREPROCESSOR, l.starts_line(S('=~') * l.nonnewline^0))
+lex:add_rule('auto_tx', token(lexer.PREPROCESSOR, lexer.to_eol(lexer.starts_line(S('=~')))))
-- Directives.
local directive_word = word_match{
- 'account', 'alias', 'assert', 'bucket', 'capture', 'check', 'comment',
- 'commodity', 'define', 'end', 'fixed', 'endfixed', 'include', 'payee',
- 'apply', 'tag', 'test', 'year'
+ ' account', 'alias', 'assert', 'bucket', 'capture', 'check', 'comment', 'commodity', 'define',
+ 'end', 'fixed', 'endfixed', 'include', 'payee', 'apply', 'tag', 'test', 'year'
} + S('AYNDCIiOobh')
-local directive = token(l.KEYWORD, l.starts_line(S('!@')^-1 * directive_word))
-
-M._rules = {
- {'account', account},
- {'amount', amount},
- {'comment', comment},
- {'whitespace', ws},
- {'date', date},
- {'auto_tx', auto_tx},
- {'directive', directive},
-}
-
-M._LEXBYLINE = true
+lex:add_rule('directive', token(lexer.KEYWORD, lexer.starts_line(S('!@')^-1 * directive_word)))
-return M
+return lex
diff --git a/lua/lexers/less.lua b/lua/lexers/less.lua
index 2f56cdd..317d252 100644
--- a/lua/lexers/less.lua
+++ b/lua/lexers/less.lua
@@ -1,27 +1,20 @@
--- Copyright 2006-2017 Robert Gieseke. See LICENSE.
+-- Copyright 2006-2022 Robert Gieseke. See LICENSE.
-- Less CSS LPeg lexer.
-- http://lesscss.org
-local l = require('lexer')
-local token = l.token
+local lexer = require('lexer')
+local token = lexer.token
local S = lpeg.S
-local M = {_NAME = 'less'}
+local lex = lexer.new('less', {inherit = lexer.load('css')})
-- Line comments.
-local line_comment = token(l.COMMENT, '//' * l.nonnewline^0)
+lex:add_rule('line_comment', token(lexer.COMMENT, lexer.to_eol('//')))
-- Variables.
-local variable = token(l.VARIABLE, '@' * (l.alnum + S('_-{}'))^1)
+lex:add_rule('variable', token(lexer.VARIABLE, '@' * (lexer.alnum + S('_-{}'))^1))
-local css = l.load('css')
-local _rules = css._rules
-table.insert(_rules, #_rules - 1, {'line_comment', line_comment})
-table.insert(_rules, #_rules - 1, {'variable', variable})
-M._rules = _rules
+-- Fold points.
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('//'))
-M._tokenstyles = css._tokenstyles
-
-M._foldsymbols = css._foldsymbols
-
-return M
+return lex
diff --git a/lua/lexers/lexer.lua b/lua/lexers/lexer.lua
index 2973ea6..15ff432 100644
--- a/lua/lexers/lexer.lua
+++ b/lua/lexers/lexer.lua
@@ -1,602 +1,464 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
local M = {}
--[=[ This comment is for LuaDoc.
---
--- Lexes Scintilla documents with Lua and LPeg.
---
--- ## Overview
---
--- Lexers highlight the syntax of source code. Scintilla (the editing component
--- behind [Textadept][] and [SciTE][]) traditionally uses static, compiled C++
--- lexers which are notoriously difficult to create and/or extend. On the other
--- hand, Lua makes it easy to to rapidly create new lexers, extend existing
--- ones, and embed lexers within one another. Lua lexers tend to be more
--- readable than C++ lexers too.
---
--- Lexers are Parsing Expression Grammars, or PEGs, composed with the Lua
--- [LPeg library][]. The following table comes from the LPeg documentation and
--- summarizes all you need to know about constructing basic LPeg patterns. This
--- module provides convenience functions for creating and working with other
--- more advanced patterns and concepts.
---
--- Operator | Description
--- ---------------------|------------
--- `lpeg.P(string)` | Matches `string` literally.
--- `lpeg.P(`_`n`_`)` | Matches exactly _`n`_ characters.
--- `lpeg.S(string)` | Matches any character in set `string`.
--- `lpeg.R("`_`xy`_`")` | Matches any character between range `x` and `y`.
--- `patt^`_`n`_ | Matches at least _`n`_ repetitions of `patt`.
--- `patt^-`_`n`_ | Matches at most _`n`_ repetitions of `patt`.
--- `patt1 * patt2` | Matches `patt1` followed by `patt2`.
--- `patt1 + patt2` | Matches `patt1` or `patt2` (ordered choice).
--- `patt1 - patt2` | Matches `patt1` if `patt2` does not match.
--- `-patt` | Equivalent to `("" - patt)`.
--- `#patt` | Matches `patt` but consumes no input.
---
--- The first part of this document deals with rapidly constructing a simple
--- lexer. The next part deals with more advanced techniques, such as custom
--- coloring and embedding lexers within one another. Following that is a
--- discussion about code folding, or being able to tell Scintilla which code
--- blocks are "foldable" (temporarily hideable from view). After that are
--- instructions on how to use LPeg lexers with the aforementioned Textadept and
--- SciTE editors. Finally there are comments on lexer performance and
--- limitations.
+-- Lexes Scintilla documents and source code with Lua and LPeg.
+--
+-- ### Writing Lua Lexers
+--
+-- Lexers highlight the syntax of source code. Scintilla (the editing component behind
+-- [Textadept][] and [SciTE][]) traditionally uses static, compiled C++ lexers which are
+-- notoriously difficult to create and/or extend. On the other hand, Lua makes it easy to to
+-- rapidly create new lexers, extend existing ones, and embed lexers within one another. Lua
+-- lexers tend to be more readable than C++ lexers too.
+--
+-- Lexers are Parsing Expression Grammars, or PEGs, composed with the Lua [LPeg library][]. The
+-- following table comes from the LPeg documentation and summarizes all you need to know about
+-- constructing basic LPeg patterns. This module provides convenience functions for creating
+-- and working with other more advanced patterns and concepts.
+--
+-- Operator | Description
+-- -|-
+-- `lpeg.P(string)` | Matches `string` literally.
+-- `lpeg.P(`_`n`_`)` | Matches exactly _`n`_ number of characters.
+-- `lpeg.S(string)` | Matches any character in set `string`.
+-- `lpeg.R("`_`xy`_`")`| Matches any character between range `x` and `y`.
+-- `patt^`_`n`_ | Matches at least _`n`_ repetitions of `patt`.
+-- `patt^-`_`n`_ | Matches at most _`n`_ repetitions of `patt`.
+-- `patt1 * patt2` | Matches `patt1` followed by `patt2`.
+-- `patt1 + patt2` | Matches `patt1` or `patt2` (ordered choice).
+-- `patt1 - patt2` | Matches `patt1` if `patt2` does not also match.
+-- `-patt` | Equivalent to `("" - patt)`.
+-- `#patt` | Matches `patt` but consumes no input.
+--
+-- The first part of this document deals with rapidly constructing a simple lexer. The next part
+-- deals with more advanced techniques, such as custom coloring and embedding lexers within one
+-- another. Following that is a discussion about code folding, or being able to tell Scintilla
+-- which code blocks are "foldable" (temporarily hideable from view). After that are instructions
+-- on how to use Lua lexers with the aforementioned Textadept and SciTE editors. Finally there
+-- are comments on lexer performance and limitations.
--
-- [LPeg library]: http://www.inf.puc-rio.br/~roberto/lpeg/lpeg.html
--- [Textadept]: http://foicica.com/textadept
--- [SciTE]: http://scintilla.org/SciTE.html
+-- [Textadept]: https://orbitalquark.github.io/textadept
+-- [SciTE]: https://scintilla.org/SciTE.html
--
--- ## Lexer Basics
+-- ### Lexer Basics
--
--- The *lexers/* directory contains all lexers, including your new one. Before
--- attempting to write one from scratch though, first determine if your
--- programming language is similar to any of the 80+ languages supported. If so,
--- you may be able to copy and modify that lexer, saving some time and effort.
--- The filename of your lexer should be the name of your programming language in
--- lower case followed by a *.lua* extension. For example, a new Lua lexer has
--- the name *lua.lua*.
+-- The *lexers/* directory contains all lexers, including your new one. Before attempting to
+-- write one from scratch though, first determine if your programming language is similar to
+-- any of the 100+ languages supported. If so, you may be able to copy and modify that lexer,
+-- saving some time and effort. The filename of your lexer should be the name of your programming
+-- language in lower case followed by a *.lua* extension. For example, a new Lua lexer has the
+-- name *lua.lua*.
--
--- Note: Try to refrain from using one-character language names like "c", "d",
--- or "r". For example, Scintillua uses "ansi_c", "dmd", and "rstats",
--- respectively.
+-- Note: Try to refrain from using one-character language names like "c", "d", or "r". For
+-- example, Scintillua uses "ansi_c", "dmd", and "rstats", respectively.
--
--- ### New Lexer Template
+-- #### New Lexer Template
--
--- There is a *lexers/template.txt* file that contains a simple template for a
--- new lexer. Feel free to use it, replacing the '?'s with the name of your
--- lexer:
+-- There is a *lexers/template.txt* file that contains a simple template for a new lexer. Feel
+-- free to use it, replacing the '?'s with the name of your lexer. Consider this snippet from
+-- the template:
--
-- -- ? LPeg lexer.
--
--- local l = require('lexer')
--- local token, word_match = l.token, l.word_match
--- local P, R, S = lpeg.P, lpeg.R, lpeg.S
+-- local lexer = require('lexer')
+-- local token, word_match = lexer.token, lexer.word_match
+-- local P, S = lpeg.P, lpeg.S
--
--- local M = {_NAME = '?'}
+-- local lex = lexer.new('?')
--
-- -- Whitespace.
--- local ws = token(l.WHITESPACE, l.space^1)
+-- local ws = token(lexer.WHITESPACE, lexer.space^1)
+-- lex:add_rule('whitespace', ws)
+--
+-- [...]
+--
+-- return lex
+--
+-- The first 3 lines of code simply define often used convenience variables. The fourth and
+-- last lines [define](#lexer.new) and return the lexer object Scintilla uses; they are very
+-- important and must be part of every lexer. The fifth line defines something called a "token",
+-- an essential building block of lexers. You will learn about tokens shortly. The sixth line
+-- defines a lexer grammar rule, which you will learn about later, as well as token styles. (Be
+-- aware that it is common practice to combine these two lines for short rules.) Note, however,
+-- the `local` prefix in front of variables, which is needed so-as not to affect Lua's global
+-- environment. All in all, this is a minimal, working lexer that you can build on.
+--
+-- #### Tokens
+--
+-- Take a moment to think about your programming language's structure. What kind of key
+-- elements does it have? In the template shown earlier, one predefined element all languages
+-- have is whitespace. Your language probably also has elements like comments, strings, and
+-- keywords. Lexers refer to these elements as "tokens". Tokens are the fundamental "building
+-- blocks" of lexers. Lexers break down source code into tokens for coloring, which results
+-- in the syntax highlighting familiar to you. It is up to you how specific your lexer is
+-- when it comes to tokens. Perhaps only distinguishing between keywords and identifiers is
+-- necessary, or maybe recognizing constants and built-in functions, methods, or libraries is
+-- desirable. The Lua lexer, for example, defines 11 tokens: whitespace, keywords, built-in
+-- functions, constants, built-in libraries, identifiers, strings, comments, numbers, labels,
+-- and operators. Even though constants, built-in functions, and built-in libraries are subsets
+-- of identifiers, Lua programmers find it helpful for the lexer to distinguish between them
+-- all. It is perfectly acceptable to just recognize keywords and identifiers.
+--
+-- In a lexer, tokens consist of a token name and an LPeg pattern that matches a sequence of
+-- characters recognized as an instance of that token. Create tokens using the [`lexer.token()`]()
+-- function. Let us examine the "whitespace" token defined in the template shown earlier:
+--
+-- local ws = token(lexer.WHITESPACE, lexer.space^1)
+--
+-- At first glance, the first argument does not appear to be a string name and the second
+-- argument does not appear to be an LPeg pattern. Perhaps you expected something like:
--
--- M._rules = {
--- {'whitespace', ws},
--- }
+-- local ws = token('whitespace', S('\t\v\f\n\r ')^1)
--
--- M._tokenstyles = {
+-- The `lexer` module actually provides a convenient list of common token names and common LPeg
+-- patterns for you to use. Token names include [`lexer.DEFAULT`](), [`lexer.WHITESPACE`](),
+-- [`lexer.COMMENT`](), [`lexer.STRING`](), [`lexer.NUMBER`](), [`lexer.KEYWORD`](),
+-- [`lexer.IDENTIFIER`](), [`lexer.OPERATOR`](), [`lexer.ERROR`](), [`lexer.PREPROCESSOR`](),
+-- [`lexer.CONSTANT`](), [`lexer.VARIABLE`](), [`lexer.FUNCTION`](), [`lexer.CLASS`](),
+-- [`lexer.TYPE`](), [`lexer.LABEL`](), [`lexer.REGEX`](), and [`lexer.EMBEDDED`](). Patterns
+-- include [`lexer.any`](), [`lexer.alpha`](), [`lexer.digit`](), [`lexer.alnum`](),
+-- [`lexer.lower`](), [`lexer.upper`](), [`lexer.xdigit`](), [`lexer.graph`](), [`lexer.print`](),
+-- [`lexer.punct`](), [`lexer.space`](), [`lexer.newline`](), [`lexer.nonnewline`](),
+-- [`lexer.dec_num`](), [`lexer.hex_num`](), [`lexer.oct_num`](), [`lexer.integer`](),
+-- [`lexer.float`](), [`lexer.number`](), and [`lexer.word`](). You may use your own token names
+-- if none of the above fit your language, but an advantage to using predefined token names is
+-- that your lexer's tokens will inherit the universal syntax highlighting color theme used by
+-- your text editor.
+--
+-- ##### Example Tokens
+--
+-- So, how might you define other tokens like keywords, comments, and strings? Here are some
+-- examples.
--
--- }
+-- **Keywords**
--
--- return M
+-- Instead of matching _n_ keywords with _n_ `P('keyword_`_`n`_`')` ordered choices, use another
+-- convenience function: [`lexer.word_match()`](). It is much easier and more efficient to
+-- write word matches like:
--
--- The first 3 lines of code simply define often used convenience variables. The
--- 5th and last lines define and return the lexer object Scintilla uses; they
--- are very important and must be part of every lexer. The sixth line defines
--- something called a "token", an essential building block of lexers. You will
--- learn about tokens shortly. The rest of the code defines a set of grammar
--- rules and token styles. You will learn about those later. Note, however, the
--- `M.` prefix in front of `_rules` and `_tokenstyles`: not only do these tables
--- belong to their respective lexers, but any non-local variables need the `M.`
--- prefix too so-as not to affect Lua's global environment. All in all, this is
--- a minimal, working lexer that you can build on.
---
--- ### Tokens
---
--- Take a moment to think about your programming language's structure. What kind
--- of key elements does it have? In the template shown earlier, one predefined
--- element all languages have is whitespace. Your language probably also has
--- elements like comments, strings, and keywords. Lexers refer to these elements
--- as "tokens". Tokens are the fundamental "building blocks" of lexers. Lexers
--- break down source code into tokens for coloring, which results in the syntax
--- highlighting familiar to you. It is up to you how specific your lexer is when
--- it comes to tokens. Perhaps only distinguishing between keywords and
--- identifiers is necessary, or maybe recognizing constants and built-in
--- functions, methods, or libraries is desirable. The Lua lexer, for example,
--- defines 11 tokens: whitespace, comments, strings, numbers, keywords, built-in
--- functions, constants, built-in libraries, identifiers, labels, and operators.
--- Even though constants, built-in functions, and built-in libraries are subsets
--- of identifiers, Lua programmers find it helpful for the lexer to distinguish
--- between them all. It is perfectly acceptable to just recognize keywords and
--- identifiers.
---
--- In a lexer, tokens consist of a token name and an LPeg pattern that matches a
--- sequence of characters recognized as an instance of that token. Create tokens
--- using the [`lexer.token()`]() function. Let us examine the "whitespace" token
--- defined in the template shown earlier:
+-- local keyword = token(lexer.KEYWORD, lexer.word_match{
+-- 'keyword_1', 'keyword_2', ..., 'keyword_n'
+-- })
--
--- local ws = token(l.WHITESPACE, l.space^1)
+-- local case_insensitive_keyword = token(lexer.KEYWORD, lexer.word_match({
+-- 'KEYWORD_1', 'keyword_2', ..., 'KEYword_n'
+-- }, true))
--
--- At first glance, the first argument does not appear to be a string name and
--- the second argument does not appear to be an LPeg pattern. Perhaps you
--- expected something like:
+-- local hyphened_keyword = token(lexer.KEYWORD, lexer.word_match{
+-- 'keyword-1', 'keyword-2', ..., 'keyword-n'
+-- })
--
--- local ws = token('whitespace', S('\t\v\f\n\r ')^1)
+-- For short keyword lists, you can use a single string of words. For example:
--
--- The `lexer` (`l`) module actually provides a convenient list of common token
--- names and common LPeg patterns for you to use. Token names include
--- [`lexer.DEFAULT`](), [`lexer.WHITESPACE`](), [`lexer.COMMENT`](),
--- [`lexer.STRING`](), [`lexer.NUMBER`](), [`lexer.KEYWORD`](),
--- [`lexer.IDENTIFIER`](), [`lexer.OPERATOR`](), [`lexer.ERROR`](),
--- [`lexer.PREPROCESSOR`](), [`lexer.CONSTANT`](), [`lexer.VARIABLE`](),
--- [`lexer.FUNCTION`](), [`lexer.CLASS`](), [`lexer.TYPE`](), [`lexer.LABEL`](),
--- [`lexer.REGEX`](), and [`lexer.EMBEDDED`](). Patterns include
--- [`lexer.any`](), [`lexer.ascii`](), [`lexer.extend`](), [`lexer.alpha`](),
--- [`lexer.digit`](), [`lexer.alnum`](), [`lexer.lower`](), [`lexer.upper`](),
--- [`lexer.xdigit`](), [`lexer.cntrl`](), [`lexer.graph`](), [`lexer.print`](),
--- [`lexer.punct`](), [`lexer.space`](), [`lexer.newline`](),
--- [`lexer.nonnewline`](), [`lexer.nonnewline_esc`](), [`lexer.dec_num`](),
--- [`lexer.hex_num`](), [`lexer.oct_num`](), [`lexer.integer`](),
--- [`lexer.float`](), and [`lexer.word`](). You may use your own token names if
--- none of the above fit your language, but an advantage to using predefined
--- token names is that your lexer's tokens will inherit the universal syntax
--- highlighting color theme used by your text editor.
---
--- #### Example Tokens
---
--- So, how might you define other tokens like comments, strings, and keywords?
--- Here are some examples.
+-- local keyword = token(lexer.KEYWORD, lexer.word_match('key_1 key_2 ... key_n'))
--
-- **Comments**
--
-- Line-style comments with a prefix character(s) are easy to express with LPeg:
--
--- local shell_comment = token(l.COMMENT, '#' * l.nonnewline^0)
--- local c_line_comment = token(l.COMMENT, '//' * l.nonnewline_esc^0)
+-- local shell_comment = token(lexer.COMMENT, lexer.to_eol('#'))
+-- local c_line_comment = token(lexer.COMMENT, lexer.to_eol('//', true))
--
--- The comments above start with a '#' or "//" and go to the end of the line.
--- The second comment recognizes the next line also as a comment if the current
--- line ends with a '\' escape character.
+-- The comments above start with a '#' or "//" and go to the end of the line. The second comment
+-- recognizes the next line also as a comment if the current line ends with a '\' escape character.
--
--- C-style "block" comments with a start and end delimiter are also easy to
--- express:
+-- C-style "block" comments with a start and end delimiter are also easy to express:
--
--- local c_comment = token(l.COMMENT, '/*' * (l.any - '*/')^0 * P('*/')^-1)
+-- local c_comment = token(lexer.COMMENT, lexer.range('/*', '*/'))
--
--- This comment starts with a "/\*" sequence and contains anything up to and
--- including an ending "\*/" sequence. The ending "\*/" is optional so the lexer
--- can recognize unfinished comments as comments and highlight them properly.
+-- This comment starts with a "/\*" sequence and contains anything up to and including an ending
+-- "\*/" sequence. The ending "\*/" is optional so the lexer can recognize unfinished comments
+-- as comments and highlight them properly.
--
-- **Strings**
--
--- It is tempting to think that a string is not much different from the block
--- comment shown above in that both have start and end delimiters:
+-- Most programming languages allow escape sequences in strings such that a sequence like
+-- "\\&quot;" in a double-quoted string indicates that the '&quot;' is not the end of the
+-- string. [`lexer.range()`]() handles escapes inherently.
--
--- local dq_str = '"' * (l.any - '"')^0 * P('"')^-1
--- local sq_str = "'" * (l.any - "'")^0 * P("'")^-1
--- local simple_string = token(l.STRING, dq_str + sq_str)
+-- local dq_str = lexer.range('"')
+-- local sq_str = lexer.range("'")
+-- local string = token(lexer.STRING, dq_str + sq_str)
--
--- However, most programming languages allow escape sequences in strings such
--- that a sequence like "\\&quot;" in a double-quoted string indicates that the
--- '&quot;' is not the end of the string. The above token incorrectly matches
--- such a string. Instead, use the [`lexer.delimited_range()`]() convenience
--- function.
+-- In this case, the lexer treats '\' as an escape character in a string sequence.
--
--- local dq_str = l.delimited_range('"')
--- local sq_str = l.delimited_range("'")
--- local string = token(l.STRING, dq_str + sq_str)
+-- **Numbers**
--
--- In this case, the lexer treats '\' as an escape character in a string
--- sequence.
+-- Most programming languages have the same format for integer and float tokens, so it might
+-- be as simple as using a predefined LPeg pattern:
--
--- **Keywords**
+-- local number = token(lexer.NUMBER, lexer.number)
--
--- Instead of matching _n_ keywords with _n_ `P('keyword_`_`n`_`')` ordered
--- choices, use another convenience function: [`lexer.word_match()`](). It is
--- much easier and more efficient to write word matches like:
+-- However, some languages allow postfix characters on integers.
--
--- local keyword = token(l.KEYWORD, l.word_match{
--- 'keyword_1', 'keyword_2', ..., 'keyword_n'
--- })
+-- local integer = P('-')^-1 * (lexer.dec_num * S('lL')^-1)
+-- local number = token(lexer.NUMBER, lexer.float + lexer.hex_num + integer)
--
--- local case_insensitive_keyword = token(l.KEYWORD, l.word_match({
--- 'KEYWORD_1', 'keyword_2', ..., 'KEYword_n'
--- }, nil, true))
+-- Your language may need other tweaks, but it is up to you how fine-grained you want your
+-- highlighting to be. After all, you are not writing a compiler or interpreter!
--
--- local hyphened_keyword = token(l.KEYWORD, l.word_match({
--- 'keyword-1', 'keyword-2', ..., 'keyword-n'
--- }, '-'))
+-- #### Rules
--
--- By default, characters considered to be in keywords are in the set of
--- alphanumeric characters and underscores. The last token demonstrates how to
--- allow '-' (hyphen) characters to be in keywords as well.
+-- Programming languages have grammars, which specify valid token structure. For example,
+-- comments usually cannot appear within a string. Grammars consist of rules, which are simply
+-- combinations of tokens. Recall from the lexer template the [`lexer.add_rule()`]() call,
+-- which adds a rule to the lexer's grammar:
--
--- **Numbers**
+-- lex:add_rule('whitespace', ws)
--
--- Most programming languages have the same format for integer and float tokens,
--- so it might be as simple as using a couple of predefined LPeg patterns:
+-- Each rule has an associated name, but rule names are completely arbitrary and serve only to
+-- identify and distinguish between different rules. Rule order is important: if text does not
+-- match the first rule added to the grammar, the lexer tries to match the second rule added, and
+-- so on. Right now this lexer simply matches whitespace tokens under a rule named "whitespace".
--
--- local number = token(l.NUMBER, l.float + l.integer)
+-- To illustrate the importance of rule order, here is an example of a simplified Lua lexer:
--
--- However, some languages allow postfix characters on integers.
+-- lex:add_rule('whitespace', token(lexer.WHITESPACE, ...))
+-- lex:add_rule('keyword', token(lexer.KEYWORD, ...))
+-- lex:add_rule('identifier', token(lexer.IDENTIFIER, ...))
+-- lex:add_rule('string', token(lexer.STRING, ...))
+-- lex:add_rule('comment', token(lexer.COMMENT, ...))
+-- lex:add_rule('number', token(lexer.NUMBER, ...))
+-- lex:add_rule('label', token(lexer.LABEL, ...))
+-- lex:add_rule('operator', token(lexer.OPERATOR, ...))
--
--- local integer = P('-')^-1 * (l.dec_num * S('lL')^-1)
--- local number = token(l.NUMBER, l.float + l.hex_num + integer)
+-- Note how identifiers come after keywords. In Lua, as with most programming languages,
+-- the characters allowed in keywords and identifiers are in the same set (alphanumerics
+-- plus underscores). If the lexer added the "identifier" rule before the "keyword" rule,
+-- all keywords would match identifiers and thus incorrectly highlight as identifiers instead
+-- of keywords. The same idea applies to function, constant, etc. tokens that you may want to
+-- distinguish between: their rules should come before identifiers.
--
--- Your language may need other tweaks, but it is up to you how fine-grained you
--- want your highlighting to be. After all, you are not writing a compiler or
--- interpreter!
+-- So what about text that does not match any rules? For example in Lua, the '!' character is
+-- meaningless outside a string or comment. Normally the lexer skips over such text. If instead
+-- you want to highlight these "syntax errors", add an additional end rule:
--
--- ### Rules
+-- lex:add_rule('whitespace', ws)
+-- ...
+-- lex:add_rule('error', token(lexer.ERROR, lexer.any))
--
--- Programming languages have grammars, which specify valid token structure. For
--- example, comments usually cannot appear within a string. Grammars consist of
--- rules, which are simply combinations of tokens. Recall from the lexer
--- template the `_rules` table, which defines all the rules used by the lexer
--- grammar:
+-- This identifies and highlights any character not matched by an existing rule as a `lexer.ERROR`
+-- token.
--
--- M._rules = {
--- {'whitespace', ws},
--- }
+-- Even though the rules defined in the examples above contain a single token, rules may
+-- consist of multiple tokens. For example, a rule for an HTML tag could consist of a tag token
+-- followed by an arbitrary number of attribute tokens, allowing the lexer to highlight all
+-- tokens separately. That rule might look something like this:
--
--- Each entry in a lexer's `_rules` table consists of a rule name and its
--- associated pattern. Rule names are completely arbitrary and serve only to
--- identify and distinguish between different rules. Rule order is important: if
--- text does not match the first rule, the lexer tries the second rule, and so
--- on. This simple grammar says to match whitespace tokens under a rule named
--- "whitespace".
+-- lex:add_rule('tag', tag_start * (ws * attributes)^0 * tag_end^-1)
--
--- To illustrate the importance of rule order, here is an example of a
--- simplified Lua grammar:
+-- Note however that lexers with complex rules like these are more prone to lose track of their
+-- state, especially if they span multiple lines.
--
--- M._rules = {
--- {'whitespace', ws},
--- {'keyword', keyword},
--- {'identifier', identifier},
--- {'string', string},
--- {'comment', comment},
--- {'number', number},
--- {'label', label},
--- {'operator', operator},
--- }
+-- #### Summary
--
--- Note how identifiers come after keywords. In Lua, as with most programming
--- languages, the characters allowed in keywords and identifiers are in the same
--- set (alphanumerics plus underscores). If the lexer specified the "identifier"
--- rule before the "keyword" rule, all keywords would match identifiers and thus
--- incorrectly highlight as identifiers instead of keywords. The same idea
--- applies to function, constant, etc. tokens that you may want to distinguish
--- between: their rules should come before identifiers.
+-- Lexers primarily consist of tokens and grammar rules. At your disposal are a number of
+-- convenience patterns and functions for rapidly creating a lexer. If you choose to use
+-- predefined token names for your tokens, you do not have to define how the lexer highlights
+-- them. The tokens will inherit the default syntax highlighting color theme your editor uses.
--
--- So what about text that does not match any rules? For example in Lua, the '!'
--- character is meaningless outside a string or comment. Normally the lexer
--- skips over such text. If instead you want to highlight these "syntax errors",
--- add an additional end rule:
+-- ### Advanced Techniques
--
--- M._rules = {
--- {'whitespace', ws},
--- {'error', token(l.ERROR, l.any)},
--- }
---
--- This identifies and highlights any character not matched by an existing
--- rule as an `lexer.ERROR` token.
---
--- Even though the rules defined in the examples above contain a single token,
--- rules may consist of multiple tokens. For example, a rule for an HTML tag
--- could consist of a tag token followed by an arbitrary number of attribute
--- tokens, allowing the lexer to highlight all tokens separately. The rule might
--- look something like this:
---
--- {'tag', tag_start * (ws * attributes)^0 * tag_end^-1}
---
--- Note however that lexers with complex rules like these are more prone to lose
--- track of their state.
---
--- ### Summary
---
--- Lexers primarily consist of tokens and grammar rules. At your disposal are a
--- number of convenience patterns and functions for rapidly creating a lexer. If
--- you choose to use predefined token names for your tokens, you do not have to
--- define how the lexer highlights them. The tokens will inherit the default
--- syntax highlighting color theme your editor uses.
---
--- ## Advanced Techniques
---
--- ### Styles and Styling
---
--- The most basic form of syntax highlighting is assigning different colors to
--- different tokens. Instead of highlighting with just colors, Scintilla allows
--- for more rich highlighting, or "styling", with different fonts, font sizes,
--- font attributes, and foreground and background colors, just to name a few.
--- The unit of this rich highlighting is called a "style". Styles are simply
--- strings of comma-separated property settings. By default, lexers associate
--- predefined token names like `lexer.WHITESPACE`, `lexer.COMMENT`,
--- `lexer.STRING`, etc. with particular styles as part of a universal color
--- theme. These predefined styles include [`lexer.STYLE_CLASS`](),
--- [`lexer.STYLE_COMMENT`](), [`lexer.STYLE_CONSTANT`](),
--- [`lexer.STYLE_ERROR`](), [`lexer.STYLE_EMBEDDED`](),
--- [`lexer.STYLE_FUNCTION`](), [`lexer.STYLE_IDENTIFIER`](),
--- [`lexer.STYLE_KEYWORD`](), [`lexer.STYLE_LABEL`](), [`lexer.STYLE_NUMBER`](),
--- [`lexer.STYLE_OPERATOR`](), [`lexer.STYLE_PREPROCESSOR`](),
--- [`lexer.STYLE_REGEX`](), [`lexer.STYLE_STRING`](), [`lexer.STYLE_TYPE`](),
--- [`lexer.STYLE_VARIABLE`](), and [`lexer.STYLE_WHITESPACE`](). Like with
--- predefined token names and LPeg patterns, you may define your own styles. At
--- their core, styles are just strings, so you may create new ones and/or modify
--- existing ones. Each style consists of the following comma-separated settings:
---
--- Setting | Description
--- ---------------|------------
--- font:_name_ | The name of the font the style uses.
--- size:_int_ | The size of the font the style uses.
--- [not]bold | Whether or not the font face is bold.
--- weight:_int_ | The weight or boldness of a font, between 1 and 999.
--- [not]italics | Whether or not the font face is italic.
--- [not]underlined| Whether or not the font face is underlined.
--- fore:_color_ | The foreground color of the font face.
--- back:_color_ | The background color of the font face.
--- [not]eolfilled | Does the background color extend to the end of the line?
--- case:_char_ | The case of the font ('u': upper, 'l': lower, 'm': normal).
--- [not]visible | Whether or not the text is visible.
--- [not]changeable| Whether the text is changeable or read-only.
---
--- Specify font colors in either "#RRGGBB" format, "0xBBGGRR" format, or the
--- decimal equivalent of the latter. As with token names, LPeg patterns, and
--- styles, there is a set of predefined color names, but they vary depending on
--- the current color theme in use. Therefore, it is generally not a good idea to
--- manually define colors within styles in your lexer since they might not fit
--- into a user's chosen color theme. Try to refrain from even using predefined
--- colors in a style because that color may be theme-specific. Instead, the best
--- practice is to either use predefined styles or derive new color-agnostic
--- styles from predefined ones. For example, Lua "longstring" tokens use the
--- existing `lexer.STYLE_STRING` style instead of defining a new one.
---
--- #### Example Styles
---
--- Defining styles is pretty straightforward. An empty style that inherits the
--- default theme settings is simply an empty string:
---
--- local style_nothing = ''
---
--- A similar style but with a bold font face looks like this:
+-- #### Styles and Styling
--
--- local style_bold = 'bold'
+-- The most basic form of syntax highlighting is assigning different colors to different
+-- tokens. Instead of highlighting with just colors, Scintilla allows for more rich highlighting,
+-- or "styling", with different fonts, font sizes, font attributes, and foreground and background
+-- colors, just to name a few. The unit of this rich highlighting is called a "style". Styles
+-- are simply Lua tables of properties. By default, lexers associate predefined token names like
+-- `lexer.WHITESPACE`, `lexer.COMMENT`, `lexer.STRING`, etc. with particular styles as part
+-- of a universal color theme. These predefined styles are contained in [`lexer.styles`](),
+-- and you may define your own styles. See that table's documentation for more information. As
+-- with token names, LPeg patterns, and styles, there is a set of predefined color names,
+-- but they vary depending on the current color theme in use. Therefore, it is generally not
+-- a good idea to manually define colors within styles in your lexer since they might not fit
+-- into a user's chosen color theme. Try to refrain from even using predefined colors in a
+-- style because that color may be theme-specific. Instead, the best practice is to either use
+-- predefined styles or derive new color-agnostic styles from predefined ones. For example, Lua
+-- "longstring" tokens use the existing `lexer.styles.string` style instead of defining a new one.
--
--- If you want the same style, but also with an italic font face, define the new
--- style in terms of the old one:
+-- ##### Example Styles
--
--- local style_bold_italic = style_bold..',italics'
+-- Defining styles is pretty straightforward. An empty style that inherits the default theme
+-- settings is simply an empty table:
--
--- This allows you to derive new styles from predefined ones without having to
--- rewrite them. This operation leaves the old style unchanged. Thus if you
--- had a "static variable" token whose style you wanted to base off of
--- `lexer.STYLE_VARIABLE`, it would probably look like:
+-- local style_nothing = {}
--
--- local style_static_var = l.STYLE_VARIABLE..',italics'
+-- A similar style but with a bold font face looks like this:
--
--- The color theme files in the *lexers/themes/* folder give more examples of
--- style definitions.
+-- local style_bold = {bold = true}
--
--- ### Token Styles
+-- You can derive new styles from predefined ones without having to rewrite them. This operation
+-- leaves the old style unchanged. For example, if you had a "static variable" token whose
+-- style you wanted to base off of `lexer.styles.variable`, it would probably look like:
--
--- Lexers use the `_tokenstyles` table to assign tokens to particular styles.
--- Recall the token definition and `_tokenstyles` table from the lexer template:
+-- local style_static_var = lexer.styles.variable .. {italics = true}
--
--- local ws = token(l.WHITESPACE, l.space^1)
+-- The color theme files in the *lexers/themes/* folder give more examples of style definitions.
--
--- ...
+-- #### Token Styles
--
--- M._tokenstyles = {
+-- Lexers use the [`lexer.add_style()`]() function to assign styles to particular tokens. Recall
+-- the token definition and from the lexer template:
--
--- }
+-- local ws = token(lexer.WHITESPACE, lexer.space^1)
+-- lex:add_rule('whitespace', ws)
--
--- Why is a style not assigned to the `lexer.WHITESPACE` token? As mentioned
--- earlier, lexers automatically associate tokens that use predefined token
--- names with a particular style. Only tokens with custom token names need
--- manual style associations. As an example, consider a custom whitespace token:
+-- Why is a style not assigned to the `lexer.WHITESPACE` token? As mentioned earlier, lexers
+-- automatically associate tokens that use predefined token names with a particular style. Only
+-- tokens with custom token names need manual style associations. As an example, consider a
+-- custom whitespace token:
--
--- local ws = token('custom_whitespace', l.space^1)
+-- local ws = token('custom_whitespace', lexer.space^1)
--
-- Assigning a style to this token looks like:
--
--- M._tokenstyles = {
--- custom_whitespace = l.STYLE_WHITESPACE
--- }
+-- lex:add_style('custom_whitespace', lexer.styles.whitespace)
--
--- Do not confuse token names with rule names. They are completely different
--- entities. In the example above, the lexer assigns the "custom_whitespace"
--- token the existing style for `WHITESPACE` tokens. If instead you want to
--- color the background of whitespace a shade of grey, it might look like:
+-- Do not confuse token names with rule names. They are completely different entities. In the
+-- example above, the lexer associates the "custom_whitespace" token with the existing style
+-- for `lexer.WHITESPACE` tokens. If instead you prefer to color the background of whitespace
+-- a shade of grey, it might look like:
--
--- local custom_style = l.STYLE_WHITESPACE..',back:$(color.grey)'
--- M._tokenstyles = {
--- custom_whitespace = custom_style
--- }
+-- lex:add_style('custom_whitespace', lexer.styles.whitespace .. {back = lexer.colors.grey})
--
--- Notice that the lexer peforms Scintilla/SciTE-style "$()" property expansion.
--- You may also use "%()". Remember to refrain from assigning specific colors in
--- styles, but in this case, all user color themes probably define the
--- "color.grey" property.
+-- Remember to refrain from assigning specific colors in styles, but in this case, all user
+-- color themes probably define `colors.grey`.
--
--- ### Line Lexers
+-- #### Line Lexers
--
--- By default, lexers match the arbitrary chunks of text passed to them by
--- Scintilla. These chunks may be a full document, only the visible part of a
--- document, or even just portions of lines. Some lexers need to match whole
--- lines. For example, a lexer for the output of a file "diff" needs to know if
--- the line started with a '+' or '-' and then style the entire line
--- accordingly. To indicate that your lexer matches by line, use the
--- `_LEXBYLINE` field:
+-- By default, lexers match the arbitrary chunks of text passed to them by Scintilla. These
+-- chunks may be a full document, only the visible part of a document, or even just portions
+-- of lines. Some lexers need to match whole lines. For example, a lexer for the output of a
+-- file "diff" needs to know if the line started with a '+' or '-' and then style the entire
+-- line accordingly. To indicate that your lexer matches by line, create the lexer with an
+-- extra parameter:
--
--- M._LEXBYLINE = true
+-- local lex = lexer.new('?', {lex_by_line = true})
--
--- Now the input text for the lexer is a single line at a time. Keep in mind
--- that line lexers do not have the ability to look ahead at subsequent lines.
+-- Now the input text for the lexer is a single line at a time. Keep in mind that line lexers
+-- do not have the ability to look ahead at subsequent lines.
--
--- ### Embedded Lexers
+-- #### Embedded Lexers
--
--- Lexers embed within one another very easily, requiring minimal effort. In the
--- following sections, the lexer being embedded is called the "child" lexer and
--- the lexer a child is being embedded in is called the "parent". For example,
--- consider an HTML lexer and a CSS lexer. Either lexer stands alone for styling
--- their respective HTML and CSS files. However, CSS can be embedded inside
--- HTML. In this specific case, the CSS lexer is the "child" lexer with the HTML
--- lexer being the "parent". Now consider an HTML lexer and a PHP lexer. This
--- sounds a lot like the case with CSS, but there is a subtle difference: PHP
--- _embeds itself_ into HTML while CSS is _embedded in_ HTML. This fundamental
--- difference results in two types of embedded lexers: a parent lexer that
--- embeds other child lexers in it (like HTML embedding CSS), and a child lexer
--- that embeds itself within a parent lexer (like PHP embedding itself in HTML).
+-- Lexers embed within one another very easily, requiring minimal effort. In the following
+-- sections, the lexer being embedded is called the "child" lexer and the lexer a child is
+-- being embedded in is called the "parent". For example, consider an HTML lexer and a CSS
+-- lexer. Either lexer stands alone for styling their respective HTML and CSS files. However, CSS
+-- can be embedded inside HTML. In this specific case, the CSS lexer is the "child" lexer with
+-- the HTML lexer being the "parent". Now consider an HTML lexer and a PHP lexer. This sounds
+-- a lot like the case with CSS, but there is a subtle difference: PHP _embeds itself into_
+-- HTML while CSS is _embedded in_ HTML. This fundamental difference results in two types of
+-- embedded lexers: a parent lexer that embeds other child lexers in it (like HTML embedding CSS),
+-- and a child lexer that embeds itself into a parent lexer (like PHP embedding itself in HTML).
--
--- #### Parent Lexer
+-- ##### Parent Lexer
--
--- Before embedding a child lexer into a parent lexer, the parent lexer needs to
--- load the child lexer. This is done with the [`lexer.load()`]() function. For
--- example, loading the CSS lexer within the HTML lexer looks like:
+-- Before embedding a child lexer into a parent lexer, the parent lexer needs to load the child
+-- lexer. This is done with the [`lexer.load()`]() function. For example, loading the CSS lexer
+-- within the HTML lexer looks like:
--
--- local css = l.load('css')
+-- local css = lexer.load('css')
--
--- The next part of the embedding process is telling the parent lexer when to
--- switch over to the child lexer and when to switch back. The lexer refers to
--- these indications as the "start rule" and "end rule", respectively, and are
--- just LPeg patterns. Continuing with the HTML/CSS example, the transition from
--- HTML to CSS is when the lexer encounters a "style" tag with a "type"
--- attribute whose value is "text/css":
+-- The next part of the embedding process is telling the parent lexer when to switch over
+-- to the child lexer and when to switch back. The lexer refers to these indications as the
+-- "start rule" and "end rule", respectively, and are just LPeg patterns. Continuing with the
+-- HTML/CSS example, the transition from HTML to CSS is when the lexer encounters a "style"
+-- tag with a "type" attribute whose value is "text/css":
--
-- local css_tag = P('<style') * P(function(input, index)
--- if input:find('^[^>]+type="text/css"', index) then
--- return index
--- end
+-- if input:find('^[^>]+type="text/css"', index) then return index end
-- end)
--
--- This pattern looks for the beginning of a "style" tag and searches its
--- attribute list for the text "`type="text/css"`". (In this simplified example,
--- the Lua pattern does not consider whitespace between the '=' nor does it
--- consider that using single quotes is valid.) If there is a match, the
--- functional pattern returns a value instead of `nil`. In this case, the value
--- returned does not matter because we ultimately want to style the "style" tag
--- as an HTML tag, so the actual start rule looks like this:
+-- This pattern looks for the beginning of a "style" tag and searches its attribute list for
+-- the text "`type="text/css"`". (In this simplified example, the Lua pattern does not consider
+-- whitespace between the '=' nor does it consider that using single quotes is valid.) If there
+-- is a match, the functional pattern returns a value instead of `nil`. In this case, the value
+-- returned does not matter because we ultimately want to style the "style" tag as an HTML tag,
+-- so the actual start rule looks like this:
--
-- local css_start_rule = #css_tag * tag
--
--- Now that the parent knows when to switch to the child, it needs to know when
--- to switch back. In the case of HTML/CSS, the switch back occurs when the
--- lexer encounters an ending "style" tag, though the lexer should still style
--- the tag as an HTML tag:
+-- Now that the parent knows when to switch to the child, it needs to know when to switch
+-- back. In the case of HTML/CSS, the switch back occurs when the lexer encounters an ending
+-- "style" tag, though the lexer should still style the tag as an HTML tag:
--
-- local css_end_rule = #P('</style>') * tag
--
--- Once the parent loads the child lexer and defines the child's start and end
--- rules, it embeds the child with the [`lexer.embed_lexer()`]() function:
+-- Once the parent loads the child lexer and defines the child's start and end rules, it embeds
+-- the child with the [`lexer.embed()`]() function:
--
--- l.embed_lexer(M, css, css_start_rule, css_end_rule)
+-- lex:embed(css, css_start_rule, css_end_rule)
--
--- The first parameter is the parent lexer object to embed the child in, which
--- in this case is `M`. The other three parameters are the child lexer object
--- loaded earlier followed by its start and end rules.
+-- ##### Child Lexer
--
--- #### Child Lexer
+-- The process for instructing a child lexer to embed itself into a parent is very similar to
+-- embedding a child into a parent: first, load the parent lexer into the child lexer with the
+-- [`lexer.load()`]() function and then create start and end rules for the child lexer. However,
+-- in this case, call [`lexer.embed()`]() with switched arguments. For example, in the PHP lexer:
--
--- The process for instructing a child lexer to embed itself into a parent is
--- very similar to embedding a child into a parent: first, load the parent lexer
--- into the child lexer with the [`lexer.load()`]() function and then create
--- start and end rules for the child lexer. However, in this case, swap the
--- lexer object arguments to [`lexer.embed_lexer()`](). For example, in the PHP
--- lexer:
---
--- local html = l.load('html')
+-- local html = lexer.load('html')
-- local php_start_rule = token('php_tag', '<?php ')
-- local php_end_rule = token('php_tag', '?>')
--- l.embed_lexer(html, M, php_start_rule, php_end_rule)
+-- lex:add_style('php_tag', lexer.styles.embedded)
+-- html:embed(lex, php_start_rule, php_end_rule)
--
--- ### Lexers with Complex State
+-- #### Lexers with Complex State
--
--- A vast majority of lexers are not stateful and can operate on any chunk of
--- text in a document. However, there may be rare cases where a lexer does need
--- to keep track of some sort of persistent state. Rather than using `lpeg.P`
--- function patterns that set state variables, it is recommended to make use of
--- Scintilla's built-in, per-line state integers via [`lexer.line_state`](). It
--- was designed to accommodate up to 32 bit flags for tracking state.
--- [`lexer.line_from_position()`]() will return the line for any position given
--- to an `lpeg.P` function pattern. (Any positions derived from that position
--- argument will also work.)
+-- A vast majority of lexers are not stateful and can operate on any chunk of text in a
+-- document. However, there may be rare cases where a lexer does need to keep track of some
+-- sort of persistent state. Rather than using `lpeg.P` function patterns that set state
+-- variables, it is recommended to make use of Scintilla's built-in, per-line state integers via
+-- [`lexer.line_state`](). It was designed to accommodate up to 32 bit flags for tracking state.
+-- [`lexer.line_from_position()`]() will return the line for any position given to an `lpeg.P`
+-- function pattern. (Any positions derived from that position argument will also work.)
--
-- Writing stateful lexers is beyond the scope of this document.
--
--- ## Code Folding
+-- ### Code Folding
--
--- When reading source code, it is occasionally helpful to temporarily hide
--- blocks of code like functions, classes, comments, etc. This is the concept of
--- "folding". In the Textadept and SciTE editors for example, little indicators
--- in the editor margins appear next to code that can be folded at places called
--- "fold points". When the user clicks an indicator, the editor hides the code
--- associated with the indicator until the user clicks the indicator again. The
+-- When reading source code, it is occasionally helpful to temporarily hide blocks of code like
+-- functions, classes, comments, etc. This is the concept of "folding". In the Textadept and
+-- SciTE editors for example, little indicators in the editor margins appear next to code that
+-- can be folded at places called "fold points". When the user clicks an indicator, the editor
+-- hides the code associated with the indicator until the user clicks the indicator again. The
-- lexer specifies these fold points and what code exactly to fold.
--
--- The fold points for most languages occur on keywords or character sequences.
--- Examples of fold keywords are "if" and "end" in Lua and examples of fold
--- character sequences are '{', '}', "/\*", and "\*/" in C for code block and
--- comment delimiters, respectively. However, these fold points cannot occur
--- just anywhere. For example, lexers should not recognize fold keywords that
--- appear within strings or comments. The lexer's `_foldsymbols` table allows
--- you to conveniently define fold points with such granularity. For example,
--- consider C:
+-- The fold points for most languages occur on keywords or character sequences. Examples of
+-- fold keywords are "if" and "end" in Lua and examples of fold character sequences are '{',
+-- '}', "/\*", and "\*/" in C for code block and comment delimiters, respectively. However,
+-- these fold points cannot occur just anywhere. For example, lexers should not recognize fold
+-- keywords that appear within strings or comments. The [`lexer.add_fold_point()`]() function
+-- allows you to conveniently define fold points with such granularity. For example, consider C:
--
--- M._foldsymbols = {
--- [l.OPERATOR] = {['{'] = 1, ['}'] = -1},
--- [l.COMMENT] = {['/*'] = 1, ['*/'] = -1},
--- _patterns = {'[{}]', '/%*', '%*/'}
--- }
+-- lex:add_fold_point(lexer.OPERATOR, '{', '}')
+-- lex:add_fold_point(lexer.COMMENT, '/*', '*/')
--
--- The first assignment states that any '{' or '}' that the lexer recognized as
--- an `lexer.OPERATOR` token is a fold point. The integer `1` indicates the
--- match is a beginning fold point and `-1` indicates the match is an ending
--- fold point. Likewise, the second assignment states that any "/\*" or "\*/"
--- that the lexer recognizes as part of a `lexer.COMMENT` token is a fold point.
--- The lexer does not consider any occurences of these characters outside their
--- defined tokens (such as in a string) as fold points. Finally, every
--- `_foldsymbols` table must have a `_patterns` field that contains a list of
--- [Lua patterns][] that match fold points. If the lexer encounters text that
--- matches one of those patterns, the lexer looks up the matched text in its
--- token's table in order to determine whether or not the text is a fold point.
--- In the example above, the first Lua pattern matches any '{' or '}'
--- characters. When the lexer comes across one of those characters, it checks if
--- the match is an `lexer.OPERATOR` token. If so, the lexer identifies the match
--- as a fold point. The same idea applies for the other patterns. (The '%' is in
--- the other patterns because '\*' is a special character in Lua patterns that
--- needs escaping.) How do you specify fold keywords? Here is an example for
--- Lua:
+-- The first assignment states that any '{' or '}' that the lexer recognized as an `lexer.OPERATOR`
+-- token is a fold point. Likewise, the second assignment states that any "/\*" or "\*/" that
+-- the lexer recognizes as part of a `lexer.COMMENT` token is a fold point. The lexer does
+-- not consider any occurrences of these characters outside their defined tokens (such as in
+-- a string) as fold points. How do you specify fold keywords? Here is an example for Lua:
--
--- M._foldsymbols = {
--- [l.KEYWORD] = {
--- ['if'] = 1, ['do'] = 1, ['function'] = 1,
--- ['end'] = -1, ['repeat'] = 1, ['until'] = -1
--- },
--- _patterns = {'%l+'}
--- }
---
--- Any time the lexer encounters a lower case word, if that word is a
--- `lexer.KEYWORD` token and in the associated list of fold points, the lexer
--- identifies the word as a fold point.
+-- lex:add_fold_point(lexer.KEYWORD, 'if', 'end')
+-- lex:add_fold_point(lexer.KEYWORD, 'do', 'end')
+-- lex:add_fold_point(lexer.KEYWORD, 'function', 'end')
+-- lex:add_fold_point(lexer.KEYWORD, 'repeat', 'until')
--
-- If your lexer has case-insensitive keywords as fold points, simply add a
--- `_case_insensitive = true` option to the `_foldsymbols` table and specify
--- keywords in lower case.
+-- `case_insensitive_fold_points = true` option to [`lexer.new()`](), and specify keywords in
+-- lower case.
--
--- If your lexer needs to do some additional processing to determine if a match
--- is a fold point, assign a function that returns an integer. Returning `1` or
--- `-1` indicates the match is a fold point. Returning `0` indicates it is not.
--- For example:
+-- If your lexer needs to do some additional processing in order to determine if a token is
+-- a fold point, pass a function that returns an integer to `lex:add_fold_point()`. Returning
+-- `1` indicates the token is a beginning fold point and returning `-1` indicates the token is
+-- an ending fold point. Returning `0` indicates the token is not a fold point. For example:
--
--- local function fold_strange_token(text, pos, line, s, match)
+-- local function fold_strange_token(text, pos, line, s, symbol)
-- if ... then
-- return 1 -- beginning fold point
-- elseif ... then
@@ -605,107 +467,205 @@ local M = {}
-- return 0
-- end
--
+-- lex:add_fold_point('strange_token', '|', fold_strange_token)
+--
+-- Any time the lexer encounters a '|' that is a "strange_token", it calls the `fold_strange_token`
+-- function to determine if '|' is a fold point. The lexer calls these functions with the
+-- following arguments: the text to identify fold points in, the beginning position of the
+-- current line in the text to fold, the current line's text, the position in the current line
+-- the fold point text starts at, and the fold point text itself.
+--
+-- #### Fold by Indentation
+--
+-- Some languages have significant whitespace and/or no delimiters that indicate fold points. If
+-- your lexer falls into this category and you would like to mark fold points based on changes
+-- in indentation, create the lexer with a `fold_by_indentation = true` option:
+--
+-- local lex = lexer.new('?', {fold_by_indentation = true})
+--
+-- ### Using Lexers
+--
+-- **Textadept**
+--
+-- Put your lexer in your *~/.textadept/lexers/* directory so you do not overwrite it when
+-- upgrading Textadept. Also, lexers in this directory override default lexers. Thus, Textadept
+-- loads a user *lua* lexer instead of the default *lua* lexer. This is convenient for tweaking
+-- a default lexer to your liking. Then add a [file type](#textadept.file_types) for your lexer
+-- if necessary.
+--
+-- **SciTE**
+--
+-- Create a *.properties* file for your lexer and `import` it in either your *SciTEUser.properties*
+-- or *SciTEGlobal.properties*. The contents of the *.properties* file should contain:
+--
+-- file.patterns.[lexer_name]=[file_patterns]
+-- lexer.$(file.patterns.[lexer_name])=[lexer_name]
+--
+-- where `[lexer_name]` is the name of your lexer (minus the *.lua* extension) and
+-- `[file_patterns]` is a set of file extensions to use your lexer for.
+--
+-- Please note that Lua lexers ignore any styling information in *.properties* files. Your
+-- theme file in the *lexers/themes/* directory contains styling information.
+--
+-- ### Migrating Legacy Lexers
+--
+-- Legacy lexers are of the form:
+--
+-- local l = require('lexer')
+-- local token, word_match = l.token, l.word_match
+-- local P, R, S = lpeg.P, lpeg.R, lpeg.S
+--
+-- local M = {_NAME = '?'}
+--
+-- [... token and pattern definitions ...]
+--
+-- M._rules = {
+-- {'rule', pattern},
+-- [...]
+-- }
+--
+-- M._tokenstyles = {
+-- 'token' = 'style',
+-- [...]
+-- }
+--
-- M._foldsymbols = {
--- ['strange_token'] = {['|'] = fold_strange_token},
--- _patterns = {'|'}
+-- _patterns = {...},
+-- ['token'] = {['start'] = 1, ['end'] = -1},
+-- [...]
-- }
--
--- Any time the lexer encounters a '|' that is a "strange_token", it calls the
--- `fold_strange_token` function to determine if '|' is a fold point. The lexer
--- calls these functions with the following arguments: the text to identify fold
--- points in, the beginning position of the current line in the text to fold,
--- the current line's text, the position in the current line the matched text
--- starts at, and the matched text itself.
+-- return M
--
--- [Lua patterns]: http://www.lua.org/manual/5.2/manual.html#6.4.1
+-- While Scintillua will handle such legacy lexers just fine without any changes, it is
+-- recommended that you migrate yours. The migration process is fairly straightforward:
+--
+-- 1. Replace all instances of `l` with `lexer`, as it's better practice and results in less
+-- confusion.
+-- 2. Replace `local M = {_NAME = '?'}` with `local lex = lexer.new('?')`, where `?` is the
+-- name of your legacy lexer. At the end of the lexer, change `return M` to `return lex`.
+-- 3. Instead of defining rules towards the end of your lexer, define your rules as you define
+-- your tokens and patterns using [`lex:add_rule()`](#lexer.add_rule).
+-- 4. Similarly, any custom token names should have their styles immediately defined using
+-- [`lex:add_style()`](#lexer.add_style).
+-- 5. Optionally convert any table arguments passed to [`lexer.word_match()`]() to a
+-- space-separated string of words.
+-- 6. Replace any calls to `lexer.embed(M, child, ...)` and `lexer.embed(parent, M, ...)` with
+-- [`lex:embed`](#lexer.embed)`(child, ...)` and `parent:embed(lex, ...)`, respectively.
+-- 7. Define fold points with simple calls to [`lex:add_fold_point()`](#lexer.add_fold_point). No
+-- need to mess with Lua patterns anymore.
+-- 8. Any legacy lexer options such as `M._FOLDBYINDENTATION`, `M._LEXBYLINE`, `M._lexer`,
+-- etc. should be added as table options to [`lexer.new()`]().
+-- 9. Any external lexer rule fetching and/or modifications via `lexer._RULES` should be changed
+-- to use [`lexer.get_rule()`]() and [`lexer.modify_rule()`]().
+--
+-- As an example, consider the following sample legacy lexer:
--
--- ### Fold by Indentation
+-- local l = require('lexer')
+-- local token, word_match = l.token, l.word_match
+-- local P, R, S = lpeg.P, lpeg.R, lpeg.S
--
--- Some languages have significant whitespace and/or no delimiters that indicate
--- fold points. If your lexer falls into this category and you would like to
--- mark fold points based on changes in indentation, use the
--- `_FOLDBYINDENTATION` field:
+-- local M = {_NAME = 'legacy'}
--
--- M._FOLDBYINDENTATION = true
+-- local ws = token(l.WHITESPACE, l.space^1)
+-- local comment = token(l.COMMENT, '#' * l.nonnewline^0)
+-- local string = token(l.STRING, l.delimited_range('"'))
+-- local number = token(l.NUMBER, l.float + l.integer)
+-- local keyword = token(l.KEYWORD, word_match{'foo', 'bar', 'baz'})
+-- local custom = token('custom', P('quux'))
+-- local identifier = token(l.IDENTIFIER, l.word)
+-- local operator = token(l.OPERATOR, S('+-*/%^=<>,.()[]{}'))
--
--- ## Using Lexers
+-- M._rules = {
+-- {'whitespace', ws},
+-- {'keyword', keyword},
+-- {'custom', custom},
+-- {'identifier', identifier},
+-- {'string', string},
+-- {'comment', comment},
+-- {'number', number},
+-- {'operator', operator}
+-- }
--
--- ### Textadept
+-- M._tokenstyles = {
+-- 'custom' = l.STYLE_KEYWORD .. ',bold'
+-- }
--
--- Put your lexer in your *~/.textadept/lexers/* directory so you do not
--- overwrite it when upgrading Textadept. Also, lexers in this directory
--- override default lexers. Thus, Textadept loads a user *lua* lexer instead of
--- the default *lua* lexer. This is convenient for tweaking a default lexer to
--- your liking. Then add a [file type][] for your lexer if necessary.
+-- M._foldsymbols = {
+-- _patterns = {'[{}]'},
+-- [l.OPERATOR] = {['{'] = 1, ['}'] = -1}
+-- }
--
--- [file type]: _M.textadept.file_types.html
+-- return M
--
--- ### SciTE
+-- Following the migration steps would yield:
--
--- Create a *.properties* file for your lexer and `import` it in either your
--- *SciTEUser.properties* or *SciTEGlobal.properties*. The contents of the
--- *.properties* file should contain:
+-- local lexer = require('lexer')
+-- local token, word_match = lexer.token, lexer.word_match
+-- local P, S = lpeg.P, lpeg.S
--
--- file.patterns.[lexer_name]=[file_patterns]
--- lexer.$(file.patterns.[lexer_name])=[lexer_name]
+-- local lex = lexer.new('legacy')
--
--- where `[lexer_name]` is the name of your lexer (minus the *.lua* extension)
--- and `[file_patterns]` is a set of file extensions to use your lexer for.
+-- lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
+-- lex:add_rule('keyword', token(lexer.KEYWORD, word_match('foo bar baz')))
+-- lex:add_rule('custom', token('custom', 'quux'))
+-- lex:add_style('custom', lexer.styles.keyword .. {bold = true})
+-- lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
+-- lex:add_rule('string', token(lexer.STRING, lexer.range('"')))
+-- lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#')))
+-- lex:add_rule('number', token(lexer.NUMBER, lexer.number))
+-- lex:add_rule('operator', token(lexer.OPERATOR, S('+-*/%^=<>,.()[]{}')))
--
--- Please note that Lua lexers ignore any styling information in *.properties*
--- files. Your theme file in the *lexers/themes/* directory contains styling
--- information.
+-- lex:add_fold_point(lexer.OPERATOR, '{', '}')
--
--- ## Considerations
+-- return lex
--
--- ### Performance
+-- ### Considerations
--
--- There might be some slight overhead when initializing a lexer, but loading a
--- file from disk into Scintilla is usually more expensive. On modern computer
--- systems, I see no difference in speed between LPeg lexers and Scintilla's C++
--- ones. Optimize lexers for speed by re-arranging rules in the `_rules` table
--- so that the most common rules match first. Do keep in mind that order matters
--- for similar rules.
+-- #### Performance
--
--- ### Limitations
+-- There might be some slight overhead when initializing a lexer, but loading a file from disk
+-- into Scintilla is usually more expensive. On modern computer systems, I see no difference in
+-- speed between Lua lexers and Scintilla's C++ ones. Optimize lexers for speed by re-arranging
+-- `lexer.add_rule()` calls so that the most common rules match first. Do keep in mind that
+-- order matters for similar rules.
--
--- Embedded preprocessor languages like PHP cannot completely embed in their
--- parent languages in that the parent's tokens do not support start and end
--- rules. This mostly goes unnoticed, but code like
+-- In some cases, folding may be far more expensive than lexing, particularly in lexers with a
+-- lot of potential fold points. If your lexer is exhibiting signs of slowness, try disabling
+-- folding in your text editor first. If that speeds things up, you can try reducing the number
+-- of fold points you added, overriding `lexer.fold()` with your own implementation, or simply
+-- eliminating folding support from your lexer.
--
--- <div id="<?php echo $id; ?>">
+-- #### Limitations
--
--- or
+-- Embedded preprocessor languages like PHP cannot completely embed in their parent languages
+-- in that the parent's tokens do not support start and end rules. This mostly goes unnoticed,
+-- but code like
--
--- <div <?php if ($odd) { echo 'class="odd"'; } ?>>
+-- <div id="<?php echo $id; ?>">
--
-- will not style correctly.
--
--- ### Troubleshooting
+-- #### Troubleshooting
--
--- Errors in lexers can be tricky to debug. Lexers print Lua errors to
--- `io.stderr` and `_G.print()` statements to `io.stdout`. Running your editor
--- from a terminal is the easiest way to see errors as they occur.
+-- Errors in lexers can be tricky to debug. Lexers print Lua errors to `io.stderr` and `_G.print()`
+-- statements to `io.stdout`. Running your editor from a terminal is the easiest way to see
+-- errors as they occur.
--
--- ### Risks
+-- #### Risks
--
--- Poorly written lexers have the ability to crash Scintilla (and thus its
--- containing application), so unsaved data might be lost. However, I have only
--- observed these crashes in early lexer development, when syntax errors or
--- pattern errors are present. Once the lexer actually starts styling text
--- (either correctly or incorrectly, it does not matter), I have not observed
+-- Poorly written lexers have the ability to crash Scintilla (and thus its containing application),
+-- so unsaved data might be lost. However, I have only observed these crashes in early lexer
+-- development, when syntax errors or pattern errors are present. Once the lexer actually starts
+-- styling text (either correctly or incorrectly, it does not matter), I have not observed
-- any crashes.
--
--- ### Acknowledgements
+-- #### Acknowledgements
--
--- Thanks to Peter Odding for his [lexer post][] on the Lua mailing list
--- that inspired me, and thanks to Roberto Ierusalimschy for LPeg.
+-- Thanks to Peter Odding for his [lexer post][] on the Lua mailing list that provided inspiration,
+-- and thanks to Roberto Ierusalimschy for LPeg.
--
-- [lexer post]: http://lua-users.org/lists/lua-l/2007-04/msg00116.html
--- @field LEXERPATH (string)
--- The path used to search for a lexer to load.
--- Identical in format to Lua's `package.path` string.
--- The default value is `package.path`.
-- @field DEFAULT (string)
-- The token name for default tokens.
-- @field WHITESPACE (string)
@@ -740,58 +700,6 @@ local M = {}
-- The token name for label tokens.
-- @field REGEX (string)
-- The token name for regex tokens.
--- @field STYLE_CLASS (string)
--- The style typically used for class definitions.
--- @field STYLE_COMMENT (string)
--- The style typically used for code comments.
--- @field STYLE_CONSTANT (string)
--- The style typically used for constants.
--- @field STYLE_ERROR (string)
--- The style typically used for erroneous syntax.
--- @field STYLE_FUNCTION (string)
--- The style typically used for function definitions.
--- @field STYLE_KEYWORD (string)
--- The style typically used for language keywords.
--- @field STYLE_LABEL (string)
--- The style typically used for labels.
--- @field STYLE_NUMBER (string)
--- The style typically used for numbers.
--- @field STYLE_OPERATOR (string)
--- The style typically used for operators.
--- @field STYLE_REGEX (string)
--- The style typically used for regular expression strings.
--- @field STYLE_STRING (string)
--- The style typically used for strings.
--- @field STYLE_PREPROCESSOR (string)
--- The style typically used for preprocessor statements.
--- @field STYLE_TYPE (string)
--- The style typically used for static types.
--- @field STYLE_VARIABLE (string)
--- The style typically used for variables.
--- @field STYLE_WHITESPACE (string)
--- The style typically used for whitespace.
--- @field STYLE_EMBEDDED (string)
--- The style typically used for embedded code.
--- @field STYLE_IDENTIFIER (string)
--- The style typically used for identifier words.
--- @field STYLE_DEFAULT (string)
--- The style all styles are based off of.
--- @field STYLE_LINENUMBER (string)
--- The style used for all margins except fold margins.
--- @field STYLE_BRACELIGHT (string)
--- The style used for highlighted brace characters.
--- @field STYLE_BRACEBAD (string)
--- The style used for unmatched brace characters.
--- @field STYLE_CONTROLCHAR (string)
--- The style used for control characters.
--- Color attributes are ignored.
--- @field STYLE_INDENTGUIDE (string)
--- The style used for indentation guides.
--- @field STYLE_CALLTIP (string)
--- The style used by call tips if [`buffer.call_tip_use_style`]() is set.
--- Only the font name, size, and color attributes are used.
--- @field STYLE_FOLDDISPLAYTEXT (string)
--- The style used for fold display text.
-- @field any (pattern)
-- A pattern that matches any single character.
-- @field ascii (pattern)
@@ -803,8 +711,7 @@ local M = {}
-- @field digit (pattern)
-- A pattern that matches any digit ('0'-'9').
-- @field alnum (pattern)
--- A pattern that matches any alphanumeric character ('A'-'Z', 'a'-'z',
--- '0'-'9').
+-- A pattern that matches any alphanumeric character ('A'-'Z', 'a'-'z', '0'-'9').
-- @field lower (pattern)
-- A pattern that matches any lower case character ('a'-'z').
-- @field upper (pattern)
@@ -818,18 +725,14 @@ local M = {}
-- @field print (pattern)
-- A pattern that matches any printable character (' ' to '~').
-- @field punct (pattern)
--- A pattern that matches any punctuation character ('!' to '/', ':' to '@',
--- '[' to ''', '{' to '~').
+-- A pattern that matches any punctuation character ('!' to '/', ':' to '@', '[' to ''',
+-- '{' to '~').
-- @field space (pattern)
--- A pattern that matches any whitespace character ('\t', '\v', '\f', '\n',
--- '\r', space).
+-- A pattern that matches any whitespace character ('\t', '\v', '\f', '\n', '\r', space).
-- @field newline (pattern)
--- A pattern that matches any set of end of line characters.
+-- A pattern that matches a sequence of end of line characters.
-- @field nonnewline (pattern)
-- A pattern that matches any single, non-newline character.
--- @field nonnewline_esc (pattern)
--- A pattern that matches any single, non-newline character or any set of end
--- of line characters escaped with '\'.
-- @field dec_num (pattern)
-- A pattern that matches a decimal number.
-- @field hex_num (pattern)
@@ -840,9 +743,12 @@ local M = {}
-- A pattern that matches either a decimal, hexadecimal, or octal number.
-- @field float (pattern)
-- A pattern that matches a floating point number.
+-- @field number (pattern)
+-- A pattern that matches a typical number, either a floating point, decimal, hexadecimal,
+-- or octal number.
-- @field word (pattern)
--- A pattern that matches a typical word. Words begin with a letter or
--- underscore and consist of alphanumeric and underscore characters.
+-- A pattern that matches a typical word. Words begin with a letter or underscore and consist
+-- of alphanumeric and underscore characters.
-- @field FOLD_BASE (number)
-- The initial (root) fold level.
-- @field FOLD_BLANK (number)
@@ -850,9 +756,8 @@ local M = {}
-- @field FOLD_HEADER (number)
-- Flag indicating the line is fold point.
-- @field fold_level (table, Read-only)
--- Table of fold level bit-masks for line numbers starting from zero.
--- Fold level masks are composed of an integer level combined with any of the
--- following bits:
+-- Table of fold level bit-masks for line numbers starting from 1.
+-- Fold level masks are composed of an integer level combined with any of the following bits:
--
-- * `lexer.FOLD_BASE`
-- The initial fold level.
@@ -861,87 +766,328 @@ local M = {}
-- * `lexer.FOLD_HEADER`
-- The line is a header, or fold point.
-- @field indent_amount (table, Read-only)
--- Table of indentation amounts in character columns, for line numbers
--- starting from zero.
+-- Table of indentation amounts in character columns, for line numbers starting from 1.
-- @field line_state (table)
--- Table of integer line states for line numbers starting from zero.
+-- Table of integer line states for line numbers starting from 1.
-- Line states can be used by lexers for keeping track of persistent states.
-- @field property (table)
-- Map of key-value string pairs.
-- @field property_expanded (table, Read-only)
--- Map of key-value string pairs with `$()` and `%()` variable replacement
--- performed in values.
+-- Map of key-value string pairs with `$()` and `%()` variable replacement performed in values.
-- @field property_int (table, Read-only)
--- Map of key-value pairs with values interpreted as numbers, or `0` if not
--- found.
+-- Map of key-value pairs with values interpreted as numbers, or `0` if not found.
-- @field style_at (table, Read-only)
-- Table of style names at positions in the buffer starting from 1.
+-- @field folding (boolean)
+-- Whether or not folding is enabled for the lexers that support it.
+-- This option is disabled by default.
+-- This is an alias for `lexer.property['fold'] = '1|0'`.
+-- @field fold_on_zero_sum_lines (boolean)
+-- Whether or not to mark as a fold point lines that contain both an ending and starting fold
+-- point. For example, `} else {` would be marked as a fold point.
+-- This option is disabled by default. This is an alias for
+-- `lexer.property['fold.on.zero.sum.lines'] = '1|0'`.
+-- @field fold_compact (boolean)
+-- Whether or not blank lines after an ending fold point are included in that
+-- fold.
+-- This option is disabled by default.
+-- This is an alias for `lexer.property['fold.compact'] = '1|0'`.
+-- @field fold_by_indentation (boolean)
+-- Whether or not to fold based on indentation level if a lexer does not have
+-- a folder.
+-- Some lexers automatically enable this option. It is disabled by default.
+-- This is an alias for `lexer.property['fold.by.indentation'] = '1|0'`.
+-- @field fold_line_groups (boolean)
+-- Whether or not to fold multiple, consecutive line groups (such as line comments and import
+-- statements) and only show the top line.
+-- This option is disabled by default.
+-- This is an alias for `lexer.property['fold.line.groups'] = '1|0'`.
module('lexer')]=]
+if not require then
+ -- Substitute for Lua's require() function, which does not require the package module to
+ -- be loaded.
+ -- Note: all modules must be in the global namespace, which is the case in LexerLPeg's default
+ -- Lua State.
+ function require(name) return name == 'lexer' and M or _G[name] end
+end
+
+local print = function(...)
+ local args = table.pack(...)
+ local msg = {}
+ for i = 1, args.n do
+ msg[#msg + 1] = tostring(args[i])
+ end
+ vis:info(table.concat(msg, ' '))
+end
+
lpeg = require('lpeg')
local lpeg_P, lpeg_R, lpeg_S, lpeg_V = lpeg.P, lpeg.R, lpeg.S, lpeg.V
local lpeg_Ct, lpeg_Cc, lpeg_Cp = lpeg.Ct, lpeg.Cc, lpeg.Cp
-local lpeg_Cmt, lpeg_C, lpeg_Carg = lpeg.Cmt, lpeg.C, lpeg.Carg
+local lpeg_Cmt, lpeg_C = lpeg.Cmt, lpeg.C
local lpeg_match = lpeg.match
-M.LEXERPATH = package.path
+-- Searches for the given *name* in the given *path*.
+-- This is a safe implementation of Lua 5.2's `package.searchpath()` function that does not
+-- require the package module to be loaded.
+local function searchpath(name, path)
+ local tried = {}
+ for part in path:gmatch('[^;]+') do
+ local filename = part:gsub('%?', name)
+ local ok, errmsg = loadfile(filename)
+ if ok or not errmsg:find('cannot open') then return filename end
+ tried[#tried + 1] = string.format("no file '%s'", filename)
+ end
+ return nil, table.concat(tried, '\n')
+end
--- Table of loaded lexers.
-M.lexers = {}
+---
+-- Map of color name strings to color values in `0xBBGGRR` or `"#RRGGBB"` format.
+-- Note: for applications running within a terminal emulator, only 16 color values are recognized,
+-- regardless of how many colors a user's terminal actually supports. (A terminal emulator's
+-- settings determines how to actually display these recognized color values, which may end up
+-- being mapped to a completely different color set.) In order to use the light variant of a
+-- color, some terminals require a style's `bold` attribute must be set along with that normal
+-- color. Recognized color values are black (0x000000), red (0x000080), green (0x008000), yellow
+-- (0x008080), blue (0x800000), magenta (0x800080), cyan (0x808000), white (0xC0C0C0), light black
+-- (0x404040), light red (0x0000FF), light green (0x00FF00), light yellow (0x00FFFF), light blue
+-- (0xFF0000), light magenta (0xFF00FF), light cyan (0xFFFF00), and light white (0xFFFFFF).
+-- @name colors
+-- @class table
+M.colors = setmetatable({}, {
+ __index = function(_, name)
+ local color = M.property['color.' .. name]
+ return tonumber(color) or color
+ end, __newindex = function(_, name, color) M.property['color.' .. name] = color end
+})
--- Keep track of the last parent lexer loaded. This lexer's rules are used for
--- proxy lexers (those that load parent and child lexers to embed) that do not
--- declare a parent lexer.
-local parent_lexer
+-- A style object that distills into a property string that can be read by the LPeg lexer.
+local style_obj = {}
+style_obj.__index = style_obj
-if not package.searchpath then
- -- Searches for the given *name* in the given *path*.
- -- This is an implementation of Lua 5.2's `package.searchpath()` function for
- -- Lua 5.1.
- function package.searchpath(name, path)
- local tried = {}
- for part in path:gmatch('[^;]+') do
- local filename = part:gsub('%?', name)
- local f = io.open(filename, 'r')
- if f then f:close() return filename end
- tried[#tried + 1] = ("no file '%s'"):format(filename)
+-- Create a style object from a style name, property table, or legacy style string.
+function style_obj.new(name_or_props)
+ local prop_string = tostring(name_or_props)
+ if type(name_or_props) == 'string' and name_or_props:find('^[%w_]+$') then
+ prop_string = string.format('$(style.%s)', name_or_props)
+ elseif type(name_or_props) == 'table' then
+ local settings = {}
+ for k, v in pairs(name_or_props) do
+ settings[#settings + 1] = type(v) ~= 'boolean' and string.format('%s:%s', k, v) or
+ string.format('%s%s', v and '' or 'not', k)
end
- return nil, table.concat(tried, '\n')
+ prop_string = table.concat(settings, ',')
end
+ return setmetatable({prop_string = prop_string}, style_obj)
end
--- Adds a rule to a lexer's current ordered list of rules.
+-- Returns a new style based on this one with the properties defined in the given table or
+-- legacy style string.
+function style_obj.__concat(self, props)
+ if type(props) == 'table' then props = tostring(style_obj.new(props)) end
+ return setmetatable({prop_string = string.format('%s,%s', self.prop_string, props)}, style_obj)
+end
+
+-- Returns this style object as property string for use with the LPeg lexer.
+function style_obj.__tostring(self) return self.prop_string end
+
+---
+-- Map of style names to style definition tables.
+--
+-- Style names consist of the following default names as well as the token names defined by lexers.
+--
+-- * `default`: The default style all others are based on.
+-- * `line_number`: The line number margin style.
+-- * `control_char`: The style of control character blocks.
+-- * `indent_guide`: The style of indentation guides.
+-- * `call_tip`: The style of call tip text. Only the `font`, `size`, `fore`, and `back` style
+-- definition fields are supported.
+-- * `fold_display_text`: The style of text displayed next to folded lines.
+-- * `class`, `comment`, `constant`, `embedded`, `error`, `function`, `identifier`, `keyword`,
+-- `label`, `number`, `operator`, `preprocessor`, `regex`, `string`, `type`, `variable`,
+-- `whitespace`: Some token names used by lexers. Some lexers may define more token names,
+-- so this list is not exhaustive.
+-- * *`lang`*`_whitespace`: A special style for whitespace tokens in lexer name *lang*. It
+-- inherits from `whitespace`, and is used in place of it for all lexers.
+--
+-- Style definition tables may contain the following fields:
+--
+-- * `font`: String font name.
+-- * `size`: Integer font size.
+-- * `bold`: Whether or not the font face is bold. The default value is `false`.
+-- * `weight`: Integer weight or boldness of a font, between 1 and 999.
+-- * `italics`: Whether or not the font face is italic. The default value is `false`.
+-- * `underlined`: Whether or not the font face is underlined. The default value is `false`.
+-- * `fore`: Font face foreground color in `0xBBGGRR` or `"#RRGGBB"` format.
+-- * `back`: Font face background color in `0xBBGGRR` or `"#RRGGBB"` format.
+-- * `eolfilled`: Whether or not the background color extends to the end of the line. The
+-- default value is `false`.
+-- * `case`: Font case: `'u'` for upper, `'l'` for lower, and `'m'` for normal, mixed case. The
+-- default value is `'m'`.
+-- * `visible`: Whether or not the text is visible. The default value is `true`.
+-- * `changeable`: Whether the text is changeable instead of read-only. The default value is
+-- `true`.
+-- @class table
+-- @name styles
+M.styles = setmetatable({}, {
+ __index = function(_, name) return style_obj.new(name) end, __newindex = function(_, name, style)
+ if getmetatable(style) ~= style_obj then style = style_obj.new(style) end
+ M.property['style.' .. name] = tostring(style)
+ end
+})
+
+-- Default styles.
+local default = {
+ 'nothing', 'whitespace', 'comment', 'string', 'number', 'keyword', 'identifier', 'operator',
+ 'error', 'preprocessor', 'constant', 'variable', 'function', 'class', 'type', 'label', 'regex',
+ 'embedded'
+}
+for _, name in ipairs(default) do
+ M[name:upper()] = name
+ M['STYLE_' .. name:upper()] = style_obj.new(name) -- backward compatibility
+end
+-- Predefined styles.
+local predefined = {
+ 'default', 'line_number', 'brace_light', 'brace_bad', 'control_char', 'indent_guide', 'call_tip',
+ 'fold_display_text'
+}
+for _, name in ipairs(predefined) do
+ M[name:upper()] = name
+ M['STYLE_' .. name:upper()] = style_obj.new(name) -- backward compatibility
+end
+
+---
+-- Adds pattern *rule* identified by string *id* to the ordered list of rules for lexer *lexer*.
-- @param lexer The lexer to add the given rule to.
--- @param name The name associated with this rule. It is used for other lexers
--- to access this particular rule from the lexer's `_RULES` table. It does not
--- have to be the same as the name passed to `token`.
+-- @param id The id associated with this rule. It does not have to be the same as the name
+-- passed to `token()`.
-- @param rule The LPeg pattern of the rule.
-local function add_rule(lexer, id, rule)
+-- @see modify_rule
+-- @name add_rule
+function M.add_rule(lexer, id, rule)
+ if lexer._lexer then lexer = lexer._lexer end -- proxy; get true parent
if not lexer._RULES then
lexer._RULES = {}
- -- Contains an ordered list (by numerical index) of rule names. This is used
- -- in conjunction with lexer._RULES for building _TOKENRULE.
+ -- Contains an ordered list (by numerical index) of rule names. This is used in conjunction
+ -- with lexer._RULES for building _TOKENRULE.
lexer._RULEORDER = {}
end
lexer._RULES[id] = rule
lexer._RULEORDER[#lexer._RULEORDER + 1] = id
+ lexer:build_grammar()
end
--- Adds a new Scintilla style to Scintilla.
--- @param lexer The lexer to add the given style to.
--- @param token_name The name of the token associated with this style.
--- @param style A Scintilla style created from `style()`.
--- @see style
-local function add_style(lexer, token_name, style)
+---
+-- Replaces in lexer *lexer* the existing rule identified by string *id* with pattern *rule*.
+-- @param lexer The lexer to modify.
+-- @param id The id associated with this rule.
+-- @param rule The LPeg pattern of the rule.
+-- @name modify_rule
+function M.modify_rule(lexer, id, rule)
+ if lexer._lexer then lexer = lexer._lexer end -- proxy; get true parent
+ lexer._RULES[id] = rule
+ lexer:build_grammar()
+end
+
+---
+-- Returns the rule identified by string *id*.
+-- @param lexer The lexer to fetch a rule from.
+-- @param id The id of the rule to fetch.
+-- @return pattern
+-- @name get_rule
+function M.get_rule(lexer, id)
+ if lexer._lexer then lexer = lexer._lexer end -- proxy; get true parent
+ return lexer._RULES[id]
+end
+
+---
+-- Associates string *token_name* in lexer *lexer* with style table *style*.
+-- *style* may have the following fields:
+--
+-- * `font`: String font name.
+-- * `size`: Integer font size.
+-- * `bold`: Whether or not the font face is bold. The default value is `false`.
+-- * `weight`: Integer weight or boldness of a font, between 1 and 999.
+-- * `italics`: Whether or not the font face is italic. The default value is `false`.
+-- * `underlined`: Whether or not the font face is underlined. The default value is `false`.
+-- * `fore`: Font face foreground color in `0xBBGGRR` or `"#RRGGBB"` format.
+-- * `back`: Font face background color in `0xBBGGRR` or `"#RRGGBB"` format.
+-- * `eolfilled`: Whether or not the background color extends to the end of the line. The
+-- default value is `false`.
+-- * `case`: Font case, `'u'` for upper, `'l'` for lower, and `'m'` for normal, mixed case. The
+-- default value is `'m'`.
+-- * `visible`: Whether or not the text is visible. The default value is `true`.
+-- * `changeable`: Whether the text is changeable instead of read-only. The default value is
+-- `true`.
+--
+-- Field values may also contain "$(property.name)" expansions for properties defined in Scintilla,
+-- theme files, etc.
+-- @param lexer The lexer to add a style to.
+-- @param token_name The name of the token to associated with the style.
+-- @param style A style string for Scintilla.
+-- @usage lex:add_style('longstring', lexer.styles.string)
+-- @usage lex:add_style('deprecated_func', lexer.styles['function'] .. {italics = true}
+-- @usage lex:add_style('visible_ws', lexer.styles.whitespace .. {back = lexer.colors.grey}
+-- @name add_style
+function M.add_style(lexer, token_name, style)
local num_styles = lexer._numstyles
- if num_styles == 32 then num_styles = num_styles + 8 end -- skip predefined
- if num_styles >= 255 then print('Too many styles defined (255 MAX)') end
+ if num_styles == 33 then num_styles = num_styles + 8 end -- skip predefined
+ if num_styles >= 256 then print('Too many styles defined (256 MAX)') end
lexer._TOKENSTYLES[token_name], lexer._numstyles = num_styles, num_styles + 1
- lexer._EXTRASTYLES[token_name] = style
+ if type(style) == 'table' and not getmetatable(style) then style = style_obj.new(style) end
+ lexer._EXTRASTYLES[token_name] = tostring(style)
+ -- If the lexer is a proxy or a child that embedded itself, copy this style to the parent lexer.
+ if lexer._lexer then lexer._lexer:add_style(token_name, style) end
+end
+
+---
+-- Adds to lexer *lexer* a fold point whose beginning and end tokens are string *token_name*
+-- tokens with string content *start_symbol* and *end_symbol*, respectively.
+-- In the event that *start_symbol* may or may not be a fold point depending on context, and that
+-- additional processing is required, *end_symbol* may be a function that ultimately returns
+-- `1` (indicating a beginning fold point), `-1` (indicating an ending fold point), or `0`
+-- (indicating no fold point). That function is passed the following arguments:
+--
+-- * `text`: The text being processed for fold points.
+-- * `pos`: The position in *text* of the beginning of the line currently being processed.
+-- * `line`: The text of the line currently being processed.
+-- * `s`: The position of *start_symbol* in *line*.
+-- * `symbol`: *start_symbol* itself.
+-- @param lexer The lexer to add a fold point to.
+-- @param token_name The token name of text that indicates a fold point.
+-- @param start_symbol The text that indicates the beginning of a fold point.
+-- @param end_symbol Either the text that indicates the end of a fold point, or a function that
+-- returns whether or not *start_symbol* is a beginning fold point (1), an ending fold point
+-- (-1), or not a fold point at all (0).
+-- @usage lex:add_fold_point(lexer.OPERATOR, '{', '}')
+-- @usage lex:add_fold_point(lexer.KEYWORD, 'if', 'end')
+-- @usage lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('#'))
+-- @usage lex:add_fold_point('custom', function(text, pos, line, s, symbol) ... end)
+-- @name add_fold_point
+function M.add_fold_point(lexer, token_name, start_symbol, end_symbol)
+ if not lexer._FOLDPOINTS then lexer._FOLDPOINTS = {_SYMBOLS = {}} end
+ local symbols = lexer._FOLDPOINTS._SYMBOLS
+ if not lexer._FOLDPOINTS[token_name] then lexer._FOLDPOINTS[token_name] = {} end
+ if lexer._CASEINSENSITIVEFOLDPOINTS then
+ start_symbol = start_symbol:lower()
+ if type(end_symbol) == 'string' then end_symbol = end_symbol:lower() end
+ end
+ if type(end_symbol) == 'string' then
+ if not symbols[end_symbol] then symbols[#symbols + 1], symbols[end_symbol] = end_symbol, true end
+ lexer._FOLDPOINTS[token_name][start_symbol] = 1
+ lexer._FOLDPOINTS[token_name][end_symbol] = -1
+ else
+ lexer._FOLDPOINTS[token_name][start_symbol] = end_symbol -- function or int
+ end
+ if not symbols[start_symbol] then
+ symbols[#symbols + 1], symbols[start_symbol] = start_symbol, true
+ end
+ -- If the lexer is a proxy or a child that embedded itself, copy this fold point to the
+ -- parent lexer.
+ if lexer._lexer then lexer._lexer:add_fold_point(token_name, start_symbol, end_symbol) end
end
-- (Re)constructs `lexer._TOKENRULE`.
--- @param parent The parent lexer.
local function join_tokens(lexer)
local patterns, order = lexer._RULES, lexer._RULEORDER
local token_rule = patterns[order[1]]
@@ -950,218 +1096,118 @@ local function join_tokens(lexer)
return lexer._TOKENRULE
end
--- Adds a given lexer and any of its embedded lexers to a given grammar.
--- @param grammar The grammar to add the lexer to.
--- @param lexer The lexer to add.
-local function add_lexer(grammar, lexer, token_rule)
- local token_rule = join_tokens(lexer)
- local lexer_name = lexer._NAME
- for i = 1, #lexer._CHILDREN do
- local child = lexer._CHILDREN[i]
- if child._CHILDREN then add_lexer(grammar, child) end
- local child_name = child._NAME
- local rules = child._EMBEDDEDRULES[lexer_name]
- local rules_token_rule = grammar['__'..child_name] or rules.token_rule
- grammar[child_name] = (-rules.end_rule * rules_token_rule)^0 *
- rules.end_rule^-1 * lpeg_V(lexer_name)
- local embedded_child = '_'..child_name
- grammar[embedded_child] = rules.start_rule * (-rules.end_rule *
- rules_token_rule)^0 * rules.end_rule^-1
- token_rule = lpeg_V(embedded_child) + token_rule
- end
- grammar['__'..lexer_name] = token_rule -- can contain embedded lexer rules
- grammar[lexer_name] = token_rule^0
-end
+-- Metatable for Scintillua grammars.
+-- These grammars are just tables ultimately passed to `lpeg.P()`.
+local grammar_mt = {
+ __index = {
+ -- Adds lexer *lexer* and any of its embedded lexers to this grammar.
+ -- @param lexer The lexer to add.
+ add_lexer = function(self, lexer)
+ local lexer_name = lexer._PARENTNAME or lexer._NAME
+ local token_rule = lexer:join_tokens()
+ for _, child in ipairs(lexer._CHILDREN) do
+ if child._CHILDREN then self:add_lexer(child) end
+ local rules = child._EMBEDDEDRULES[lexer_name]
+ local rules_token_rule = self['__' .. child._NAME] or rules.token_rule
+ self[child._NAME] = (-rules.end_rule * rules_token_rule)^0 * rules.end_rule^-1 *
+ lpeg_V(lexer_name)
+ local embedded_child = '_' .. child._NAME
+ self[embedded_child] = rules.start_rule * (-rules.end_rule * rules_token_rule)^0 *
+ rules.end_rule^-1
+ token_rule = lpeg_V(embedded_child) + token_rule
+ end
+ self['__' .. lexer_name] = token_rule -- can contain embedded lexer rules
+ self[lexer_name] = token_rule^0
+ end
+ }
+}
-- (Re)constructs `lexer._GRAMMAR`.
--- @param lexer The parent lexer.
--- @param initial_rule The name of the rule to start lexing with. The default
--- value is `lexer._NAME`. Multilang lexers use this to start with a child
--- rule if necessary.
+-- @param initial_rule The name of the rule to start lexing with. The default value is
+-- `lexer._NAME`. Multilang lexers use this to start with a child rule if necessary.
local function build_grammar(lexer, initial_rule)
- local children = lexer._CHILDREN
- if children then
- local lexer_name = lexer._NAME
- if not initial_rule then initial_rule = lexer_name end
- local grammar = {initial_rule}
- add_lexer(grammar, lexer)
+ if not lexer._RULES then return end
+ if lexer._CHILDREN then
+ if not initial_rule then initial_rule = lexer._NAME end
+ local grammar = setmetatable({initial_rule}, grammar_mt)
+ grammar:add_lexer(lexer)
lexer._INITIALRULE = initial_rule
lexer._GRAMMAR = lpeg_Ct(lpeg_P(grammar))
else
- local function tmout(_, _, t1, redrawtime_max, flag)
- if not redrawtime_max or os.clock() - t1 < redrawtime_max then return true end
- if flag then flag.timedout = true end
- end
- local tokens = join_tokens(lexer)
- -- every 500 tokens (approx. a screenful), check whether we have exceeded the timeout
- lexer._GRAMMAR = lpeg_Ct((tokens * tokens^-500 * lpeg_Cmt(lpeg_Carg(1) * lpeg_Carg(2) * lpeg_Carg(3), tmout))^0)
- end
-end
-
-local string_upper = string.upper
--- Default styles.
-local default = {
- 'nothing', 'whitespace', 'comment', 'string', 'number', 'keyword',
- 'identifier', 'operator', 'error', 'preprocessor', 'constant', 'variable',
- 'function', 'class', 'type', 'label', 'regex', 'embedded'
-}
-for i = 1, #default do
- local name, upper_name = default[i], string_upper(default[i])
- M[upper_name] = name
- if not M['STYLE_'..upper_name] then
- M['STYLE_'..upper_name] = ''
- end
-end
--- Predefined styles.
-local predefined = {
- 'default', 'linenumber', 'bracelight', 'bracebad', 'controlchar',
- 'indentguide', 'calltip', 'folddisplaytext'
-}
-for i = 1, #predefined do
- local name, upper_name = predefined[i], string_upper(predefined[i])
- M[upper_name] = name
- if not M['STYLE_'..upper_name] then
- M['STYLE_'..upper_name] = ''
+ lexer._GRAMMAR = lpeg_Ct(lexer:join_tokens()^0)
end
end
---
--- Initializes or loads and returns the lexer of string name *name*.
--- Scintilla calls this function in order to load a lexer. Parent lexers also
--- call this function in order to load child lexers and vice-versa. The user
--- calls this function in order to load a lexer when using Scintillua as a Lua
--- library.
--- @param name The name of the lexing language.
--- @param alt_name The alternate name of the lexing language. This is useful for
--- embedding the same child lexer with multiple sets of start and end tokens.
--- @param cache Flag indicating whether or not to load lexers from the cache.
--- This should only be `true` when initially loading a lexer (e.g. not from
--- within another lexer for embedding purposes).
--- The default value is `false`.
--- @return lexer object
--- @name load
-function M.load(name, alt_name, cache)
- if cache and M.lexers[alt_name or name] then return M.lexers[alt_name or name] end
- parent_lexer = nil -- reset
-
- -- When using Scintillua as a stand-alone module, the `property` and
- -- `property_int` tables do not exist (they are not useful). Create them to
- -- prevent errors from occurring.
- if not M.property then
- M.property, M.property_int = {}, setmetatable({}, {
- __index = function(t, k) return tonumber(M.property[k]) or 0 end,
- __newindex = function() error('read-only property') end
- })
- end
-
- -- Load the language lexer with its rules, styles, etc.
- M.WHITESPACE = (alt_name or name)..'_whitespace'
- local lexer_file, error = package.searchpath('lexers/'..name, M.LEXERPATH)
- local ok, lexer = pcall(dofile, lexer_file or '')
- if not ok then
- return nil
- end
- if alt_name then lexer._NAME = alt_name end
-
- -- Create the initial maps for token names to style numbers and styles.
- local token_styles = {}
- for i = 1, #default do token_styles[default[i]] = i - 1 end
- for i = 1, #predefined do token_styles[predefined[i]] = i + 31 end
- lexer._TOKENSTYLES, lexer._numstyles = token_styles, #default
- lexer._EXTRASTYLES = {}
-
- -- If the lexer is a proxy (loads parent and child lexers to embed) and does
- -- not declare a parent, try and find one and use its rules.
- if not lexer._rules and not lexer._lexer then lexer._lexer = parent_lexer end
-
- -- If the lexer is a proxy or a child that embedded itself, add its rules and
- -- styles to the parent lexer. Then set the parent to be the main lexer.
- if lexer._lexer then
- local l, _r, _s = lexer._lexer, lexer._rules, lexer._tokenstyles
- if not l._tokenstyles then l._tokenstyles = {} end
- if _r then
- for i = 1, #_r do
- -- Prevent rule id clashes.
- l._rules[#l._rules + 1] = {lexer._NAME..'_'.._r[i][1], _r[i][2]}
- end
- end
- if _s then
- for token, style in pairs(_s) do l._tokenstyles[token] = style end
- end
- lexer = l
- end
-
- -- Add the lexer's styles and build its grammar.
- if lexer._rules then
- if lexer._tokenstyles then
- for token, style in pairs(lexer._tokenstyles) do
- add_style(lexer, token, style)
+-- Embeds child lexer *child* in parent lexer *lexer* using patterns *start_rule* and *end_rule*,
+-- which signal the beginning and end of the embedded lexer, respectively.
+-- @param lexer The parent lexer.
+-- @param child The child lexer.
+-- @param start_rule The pattern that signals the beginning of the embedded lexer.
+-- @param end_rule The pattern that signals the end of the embedded lexer.
+-- @usage html:embed(css, css_start_rule, css_end_rule)
+-- @usage html:embed(lex, php_start_rule, php_end_rule) -- from php lexer
+-- @name embed
+function M.embed(lexer, child, start_rule, end_rule)
+ if lexer._lexer then lexer = lexer._lexer end -- proxy; get true parent
+ -- Add child rules.
+ if not child._EMBEDDEDRULES then child._EMBEDDEDRULES = {} end
+ if not child._RULES then error('Cannot embed lexer with no rules') end
+ child._EMBEDDEDRULES[lexer._NAME] = {
+ start_rule = start_rule, token_rule = child:join_tokens(), end_rule = end_rule
+ }
+ if not lexer._CHILDREN then lexer._CHILDREN = {} end
+ local children = lexer._CHILDREN
+ children[#children + 1] = child
+ -- Add child styles.
+ for token, style in pairs(child._EXTRASTYLES) do lexer:add_style(token, style) end
+ -- Add child fold symbols.
+ if child._FOLDPOINTS then
+ for token_name, symbols in pairs(child._FOLDPOINTS) do
+ if token_name ~= '_SYMBOLS' then
+ for symbol, v in pairs(symbols) do lexer:add_fold_point(token_name, symbol, v) end
end
end
- for i = 1, #lexer._rules do
- add_rule(lexer, lexer._rules[i][1], lexer._rules[i][2])
- end
- build_grammar(lexer)
- end
- -- Add the lexer's unique whitespace style.
- add_style(lexer, lexer._NAME..'_whitespace', M.STYLE_WHITESPACE)
-
- -- Process the lexer's fold symbols.
- if lexer._foldsymbols and lexer._foldsymbols._patterns then
- local patterns = lexer._foldsymbols._patterns
- for i = 1, #patterns do patterns[i] = '()('..patterns[i]..')' end
end
-
- lexer.lex, lexer.fold = M.lex, M.fold
- M.lexers[alt_name or name] = lexer
- return lexer
+ lexer:build_grammar()
+ child._lexer = lexer -- use parent's tokens if child is embedding itself
end
---
--- Lexes a chunk of text *text* (that has an initial style number of
--- *init_style*) with lexer *lexer*.
--- If *lexer* has a `_LEXBYLINE` flag set, the text is lexed one line at a time.
--- Otherwise the text is lexed as a whole.
--- @param lexer The lexer object to lex with.
+-- Lexes a chunk of text *text* (that has an initial style number of *init_style*) using lexer
+-- *lexer*, returning a table of token names and positions.
+-- @param lexer The lexer to lex text with.
-- @param text The text in the buffer to lex.
--- @param init_style The current style. Multiple-language lexers use this to
--- determine which language to start lexing in.
--- @param redrawtime_max Stop lexing after that many seconds and set the second return value (timedout) to true.
--- @param init Start lexing from this offset in *text* (default is 1).
+-- @param init_style The current style. Multiple-language lexers use this to determine which
+-- language to start lexing in.
-- @return table of token names and positions.
--- @return whether the lexing timed out.
-- @name lex
-function M.lex(lexer, text, init_style, redrawtime_max, init)
+function M.lex(lexer, text, init_style)
if not lexer._GRAMMAR then return {M.DEFAULT, #text + 1} end
if not lexer._LEXBYLINE then
- -- For multilang lexers, build a new grammar whose initial_rule is the
- -- current language.
+ -- For multilang lexers, build a new grammar whose initial_rule is the current language.
if lexer._CHILDREN then
for style, style_num in pairs(lexer._TOKENSTYLES) do
if style_num == init_style then
- local lexer_name = style:match('^(.+)_whitespace') or lexer._NAME
- if lexer._INITIALRULE ~= lexer_name then
- build_grammar(lexer, lexer_name)
- end
+ local lexer_name = style:match('^(.+)_whitespace') or lexer._PARENTNAME or lexer._NAME
+ if lexer._INITIALRULE ~= lexer_name then lexer:build_grammar(lexer_name) end
break
end
end
end
- local flag = {}
- return lpeg_match(lexer._GRAMMAR, text, init, os.clock(), redrawtime_max, flag), flag.timedout
+ return lpeg_match(lexer._GRAMMAR, text)
else
- local tokens = {}
local function append(tokens, line_tokens, offset)
for i = 1, #line_tokens, 2 do
tokens[#tokens + 1] = line_tokens[i]
tokens[#tokens + 1] = line_tokens[i + 1] + offset
end
end
+ local tokens = {}
local offset = 0
local grammar = lexer._GRAMMAR
- local flag = {}
for line in text:gmatch('[^\r\n]*\r?\n?') do
- local line_tokens = lpeg_match(grammar, line, init, os.clock(), redrawtime_max, flag)
+ local line_tokens = lpeg_match(grammar, line)
if line_tokens then append(tokens, line_tokens, offset) end
offset = offset + #line
-- Use the default style to the end of the line if none was specified.
@@ -1169,75 +1215,90 @@ function M.lex(lexer, text, init_style, redrawtime_max, init)
tokens[#tokens + 1], tokens[#tokens + 2] = 'default', offset + 1
end
end
- return tokens, flag.timedout
+ return tokens
end
end
---
--- Determines fold points in a chunk of text *text* with lexer *lexer*.
--- *text* starts at position *start_pos* on line number *start_line* with a
--- beginning fold level of *start_level* in the buffer. If *lexer* has a `_fold`
--- function or a `_foldsymbols` table, that field is used to perform folding.
--- Otherwise, if *lexer* has a `_FOLDBYINDENTATION` field set, or if a
--- `fold.by.indentation` property is set, folding by indentation is done.
--- @param lexer The lexer object to fold with.
+-- Determines fold points in a chunk of text *text* using lexer *lexer*, returning a table of
+-- fold levels associated with line numbers.
+-- *text* starts at position *start_pos* on line number *start_line* with a beginning fold
+-- level of *start_level* in the buffer.
+-- @param lexer The lexer to fold text with.
-- @param text The text in the buffer to fold.
--- @param start_pos The position in the buffer *text* starts at, starting at
--- zero.
--- @param start_line The line number *text* starts on.
+-- @param start_pos The position in the buffer *text* starts at, counting from 1.
+-- @param start_line The line number *text* starts on, counting from 1.
-- @param start_level The fold level *text* starts on.
--- @return table of fold levels.
+-- @return table of fold levels associated with line numbers.
-- @name fold
function M.fold(lexer, text, start_pos, start_line, start_level)
local folds = {}
if text == '' then return folds end
local fold = M.property_int['fold'] > 0
local FOLD_BASE = M.FOLD_BASE
- local FOLD_HEADER, FOLD_BLANK = M.FOLD_HEADER, M.FOLD_BLANK
- if fold and lexer._fold then
- return lexer._fold(text, start_pos, start_line, start_level)
- elseif fold and lexer._foldsymbols then
+ local FOLD_HEADER, FOLD_BLANK = M.FOLD_HEADER, M.FOLD_BLANK
+ if fold and lexer._FOLDPOINTS then
local lines = {}
- for p, l in (text..'\n'):gmatch('()(.-)\r?\n') do
- lines[#lines + 1] = {p, l}
- end
+ for p, l in (text .. '\n'):gmatch('()(.-)\r?\n') do lines[#lines + 1] = {p, l} end
local fold_zero_sum_lines = M.property_int['fold.on.zero.sum.lines'] > 0
- local fold_symbols = lexer._foldsymbols
- local fold_symbols_patterns = fold_symbols._patterns
- local fold_symbols_case_insensitive = fold_symbols._case_insensitive
+ local fold_compact = M.property_int['fold.compact'] > 0
+ local fold_points = lexer._FOLDPOINTS
+ local fold_point_symbols = fold_points._SYMBOLS
local style_at, fold_level = M.style_at, M.fold_level
local line_num, prev_level = start_line, start_level
local current_level = prev_level
- for i = 1, #lines do
- local pos, line = lines[i][1], lines[i][2]
+ for _, captures in ipairs(lines) do
+ local pos, line = captures[1], captures[2]
if line ~= '' then
- if fold_symbols_case_insensitive then line = line:lower() end
+ if lexer._CASEINSENSITIVEFOLDPOINTS then line = line:lower() end
+ local ranges = {}
+ local function is_valid_range(s, e)
+ if not s or not e then return false end
+ for i = 1, #ranges - 1, 2 do
+ local range_s, range_e = ranges[i], ranges[i + 1]
+ if s >= range_s and s <= range_e or e >= range_s and e <= range_e then
+ return false
+ end
+ end
+ ranges[#ranges + 1] = s
+ ranges[#ranges + 1] = e
+ return true
+ end
local level_decreased = false
- for j = 1, #fold_symbols_patterns do
- for s, match in line:gmatch(fold_symbols_patterns[j]) do
- local symbols = fold_symbols[style_at[start_pos + pos + s - 1]]
- local l = symbols and symbols[match]
- if type(l) == 'function' then l = l(text, pos, line, s, match) end
- if type(l) == 'number' then
- current_level = current_level + l
- if l < 0 and current_level < prev_level then
- -- Potential zero-sum line. If the level were to go back up on
- -- the same line, the line may be marked as a fold header.
- level_decreased = true
+ for _, symbol in ipairs(fold_point_symbols) do
+ local word = not symbol:find('[^%w_]')
+ local s, e = line:find(symbol, 1, true)
+ while is_valid_range(s, e) do
+ -- if not word or line:find('^%f[%w_]' .. symbol .. '%f[^%w_]', s) then
+ local word_before = s > 1 and line:find('^[%w_]', s - 1)
+ local word_after = line:find('^[%w_]', e + 1)
+ if not word or not (word_before or word_after) then
+ local symbols = fold_points[style_at[start_pos + pos - 1 + s - 1]]
+ local level = symbols and symbols[symbol]
+ if type(level) == 'function' then
+ level = level(text, pos, line, s, symbol)
+ end
+ if type(level) == 'number' then
+ current_level = current_level + level
+ if level < 0 and current_level < prev_level then
+ -- Potential zero-sum line. If the level were to go back up on the same line,
+ -- the line may be marked as a fold header.
+ level_decreased = true
+ end
end
end
+ s, e = line:find(symbol, s + 1, true)
end
end
folds[line_num] = prev_level
if current_level > prev_level then
folds[line_num] = prev_level + FOLD_HEADER
- elseif level_decreased and current_level == prev_level and
- fold_zero_sum_lines then
+ elseif level_decreased and current_level == prev_level and fold_zero_sum_lines then
if line_num > start_line then
folds[line_num] = prev_level - 1 + FOLD_HEADER
else
-- Typing within a zero-sum line.
- local level = fold_level[line_num - 1] - 1
+ local level = fold_level[line_num] - 1
if level > FOLD_HEADER then level = level - FOLD_HEADER end
if level > FOLD_BLANK then level = level - FOLD_BLANK end
folds[line_num] = level + FOLD_HEADER
@@ -1247,33 +1308,29 @@ function M.fold(lexer, text, start_pos, start_line, start_level)
if current_level < FOLD_BASE then current_level = FOLD_BASE end
prev_level = current_level
else
- folds[line_num] = prev_level + FOLD_BLANK
+ folds[line_num] = prev_level + (fold_compact and FOLD_BLANK or 0)
end
line_num = line_num + 1
end
- elseif fold and (lexer._FOLDBYINDENTATION or
- M.property_int['fold.by.indentation'] > 0) then
+ elseif fold and (lexer._FOLDBYINDENTATION or M.property_int['fold.by.indentation'] > 0) then
-- Indentation based folding.
-- Calculate indentation per line.
local indentation = {}
- for indent, line in (text..'\n'):gmatch('([\t ]*)([^\r\n]*)\r?\n') do
+ for indent, line in (text .. '\n'):gmatch('([\t ]*)([^\r\n]*)\r?\n') do
indentation[#indentation + 1] = line ~= '' and #indent
end
- -- Find the first non-blank line before start_line. If the current line is
- -- indented, make that previous line a header and update the levels of any
- -- blank lines inbetween. If the current line is blank, match the level of
- -- the previous non-blank line.
+ -- Find the first non-blank line before start_line. If the current line is indented, make
+ -- that previous line a header and update the levels of any blank lines inbetween. If the
+ -- current line is blank, match the level of the previous non-blank line.
local current_level = start_level
- for i = start_line - 1, 0, -1 do
+ for i = start_line, 1, -1 do
local level = M.fold_level[i]
if level >= FOLD_HEADER then level = level - FOLD_HEADER end
if level < FOLD_BLANK then
local indent = M.indent_amount[i]
if indentation[1] and indentation[1] > indent then
folds[i] = FOLD_BASE + indent + FOLD_HEADER
- for j = i + 1, start_line - 1 do
- folds[j] = start_level + FOLD_BLANK
- end
+ for j = i + 1, start_line - 1 do folds[j] = start_level + FOLD_BLANK end
elseif not indentation[1] then
current_level = FOLD_BASE + indent
end
@@ -1309,91 +1366,295 @@ function M.fold(lexer, text, start_pos, start_line, start_level)
return folds
end
+---
+-- Creates a returns a new lexer with the given name.
+-- @param name The lexer's name.
+-- @param opts Table of lexer options. Options currently supported:
+-- * `lex_by_line`: Whether or not the lexer only processes whole lines of text (instead of
+-- arbitrary chunks of text) at a time. Line lexers cannot look ahead to subsequent lines.
+-- The default value is `false`.
+-- * `fold_by_indentation`: Whether or not the lexer does not define any fold points and that
+-- fold points should be calculated based on changes in line indentation. The default value
+-- is `false`.
+-- * `case_insensitive_fold_points`: Whether or not fold points added via
+-- `lexer.add_fold_point()` ignore case. The default value is `false`.
+-- * `inherit`: Lexer to inherit from. The default value is `nil`.
+-- @usage lexer.new('rhtml', {inherit = lexer.load('html')})
+-- @name new
+function M.new(name, opts)
+ local lexer = {
+ _NAME = assert(name, 'lexer name expected'), _LEXBYLINE = opts and opts['lex_by_line'],
+ _FOLDBYINDENTATION = opts and opts['fold_by_indentation'],
+ _CASEINSENSITIVEFOLDPOINTS = opts and opts['case_insensitive_fold_points'],
+ _lexer = opts and opts['inherit']
+ }
+
+ -- Create the initial maps for token names to style numbers and styles.
+ local token_styles = {}
+ for i = 1, #default do token_styles[default[i]] = i end
+ for i = 1, #predefined do token_styles[predefined[i]] = i + 32 end
+ lexer._TOKENSTYLES, lexer._numstyles = token_styles, #default + 1
+ lexer._EXTRASTYLES = {}
+
+ return setmetatable(lexer, {
+ __index = {
+ add_rule = M.add_rule, modify_rule = M.modify_rule, get_rule = M.get_rule,
+ add_style = M.add_style, add_fold_point = M.add_fold_point, join_tokens = join_tokens,
+ build_grammar = build_grammar, embed = M.embed, lex = M.lex, fold = M.fold
+ }
+ })
+end
+
+-- Legacy support for older lexers.
+-- Processes the `lex._rules`, `lex._tokenstyles`, and `lex._foldsymbols` tables. Since legacy
+-- lexers may be processed up to twice, ensure their default styles and rules are not processed
+-- more than once.
+local function process_legacy_lexer(lexer)
+ local function warn(msg) --[[io.stderr:write(msg, "\n")]]end
+ if not lexer._LEGACY then
+ lexer._LEGACY = true
+ warn("lexers as tables are deprecated; use 'lexer.new()'")
+ local token_styles = {}
+ for i = 1, #default do token_styles[default[i]] = i end
+ for i = 1, #predefined do token_styles[predefined[i]] = i + 32 end
+ lexer._TOKENSTYLES, lexer._numstyles = token_styles, #default + 1
+ lexer._EXTRASTYLES = {}
+ setmetatable(lexer, getmetatable(M.new('')))
+ if lexer._rules then
+ warn("lexer '_rules' table is deprecated; use 'add_rule()'")
+ for _, rule in ipairs(lexer._rules) do lexer:add_rule(rule[1], rule[2]) end
+ end
+ end
+ if lexer._tokenstyles then
+ warn("lexer '_tokenstyles' table is deprecated; use 'add_style()'")
+ for token, style in pairs(lexer._tokenstyles) do
+ -- If this legacy lexer is being processed a second time, only add styles added since
+ -- the first processing.
+ if not lexer._TOKENSTYLES[token] then lexer:add_style(token, style) end
+ end
+ end
+ if lexer._foldsymbols then
+ warn("lexer '_foldsymbols' table is deprecated; use 'add_fold_point()'")
+ for token_name, symbols in pairs(lexer._foldsymbols) do
+ if type(symbols) == 'table' and token_name ~= '_patterns' then
+ for symbol, v in pairs(symbols) do lexer:add_fold_point(token_name, symbol, v) end
+ end
+ end
+ if lexer._foldsymbols._case_insensitive then lexer._CASEINSENSITIVEFOLDPOINTS = true end
+ elseif lexer._fold then
+ lexer.fold = function(self, ...) return lexer._fold(...) end
+ end
+end
+
+local lexers = {} -- cache of loaded lexers
+---
+-- Initializes or loads and returns the lexer of string name *name*.
+-- Scintilla calls this function in order to load a lexer. Parent lexers also call this function
+-- in order to load child lexers and vice-versa. The user calls this function in order to load
+-- a lexer when using Scintillua as a Lua library.
+-- @param name The name of the lexing language.
+-- @param alt_name The alternate name of the lexing language. This is useful for embedding the
+-- same child lexer with multiple sets of start and end tokens.
+-- @param cache Flag indicating whether or not to load lexers from the cache. This should only
+-- be `true` when initially loading a lexer (e.g. not from within another lexer for embedding
+-- purposes). The default value is `false`.
+-- @return lexer object
+-- @name load
+function M.load(name, alt_name, cache)
+ if cache and lexers[alt_name or name] then return lexers[alt_name or name] end
+
+ -- When using Scintillua as a stand-alone module, the `property`, `property_int`, and
+ -- `property_expanded` tables do not exist (they are not useful). Create them in order prevent
+ -- errors from occurring.
+ if not M.property then
+ M.property = setmetatable({['lexer.lpeg.home'] = package.path:gsub('/%?%.lua', '')}, {
+ __index = function() return '' end,
+ __newindex = function(t, k, v) rawset(t, k, tostring(v)) end
+ })
+ M.property_int = setmetatable({}, {
+ __index = function(t, k) return tonumber(M.property[k]) or 0 end,
+ __newindex = function() error('read-only property') end
+ })
+ M.property_expanded = setmetatable({}, {
+ __index = function(t, key)
+ return M.property[key]:gsub('[$%%](%b())', function(key) return t[key:sub(2, -2)] end)
+ end, __newindex = function() error('read-only property') end
+ })
+ end
+
+ -- Load the language lexer with its rules, styles, etc.
+ -- However, replace the default `WHITESPACE` style name with a unique whitespace style name
+ -- (and then automatically add it afterwards), since embedded lexing relies on these unique
+ -- whitespace style names. Note that loading embedded lexers changes `WHITESPACE` again,
+ -- so when adding it later, do not reference the potentially incorrect value.
+ M.WHITESPACE = (alt_name or name) .. '_whitespace'
+ local path = M.property['lexer.lpeg.home']:gsub(';', '/?.lua;') .. '/?.lua'
+ local lexer = dofile(assert(searchpath('lexers/'..name, path)))
+ assert(lexer, string.format("'%s.lua' did not return a lexer", name))
+ if alt_name then lexer._NAME = alt_name end
+ if not getmetatable(lexer) or lexer._LEGACY then
+ -- A legacy lexer may need to be processed a second time in order to pick up any `_tokenstyles`
+ -- or `_foldsymbols` added after `lexer.embed_lexer()`.
+ process_legacy_lexer(lexer)
+ if lexer._lexer and lexer._lexer._LEGACY then
+ process_legacy_lexer(lexer._lexer) -- mainly for `_foldsymbols` edits
+ end
+ end
+ lexer:add_style((alt_name or name) .. '_whitespace', M.styles.whitespace)
+
+ -- If the lexer is a proxy or a child that embedded itself, set the parent to be the main
+ -- lexer. Keep a reference to the old parent name since embedded child rules reference and
+ -- use that name.
+ if lexer._lexer then
+ lexer = lexer._lexer
+ lexer._PARENTNAME, lexer._NAME = lexer._NAME, alt_name or name
+ end
+
+ if cache then lexers[alt_name or name] = lexer end
+ return lexer
+end
+
-- The following are utility functions lexers will have access to.
-- Common patterns.
M.any = lpeg_P(1)
-M.ascii = lpeg_R('\000\127')
-M.extend = lpeg_R('\000\255')
M.alpha = lpeg_R('AZ', 'az')
M.digit = lpeg_R('09')
M.alnum = lpeg_R('AZ', 'az', '09')
M.lower = lpeg_R('az')
M.upper = lpeg_R('AZ')
M.xdigit = lpeg_R('09', 'AF', 'af')
-M.cntrl = lpeg_R('\000\031')
M.graph = lpeg_R('!~')
-M.print = lpeg_R(' ~')
M.punct = lpeg_R('!/', ':@', '[\'', '{~')
M.space = lpeg_S('\t\v\f\n\r ')
-M.newline = lpeg_S('\r\n\f')^1
+M.newline = lpeg_P('\r')^-1 * '\n'
M.nonnewline = 1 - M.newline
-M.nonnewline_esc = 1 - (M.newline + '\\') + '\\' * M.any
M.dec_num = M.digit^1
M.hex_num = '0' * lpeg_S('xX') * M.xdigit^1
M.oct_num = '0' * lpeg_R('07')^1
M.integer = lpeg_S('+-')^-1 * (M.hex_num + M.oct_num + M.dec_num)
M.float = lpeg_S('+-')^-1 *
- ((M.digit^0 * '.' * M.digit^1 + M.digit^1 * '.' * M.digit^0) *
- (lpeg_S('eE') * lpeg_S('+-')^-1 * M.digit^1)^-1 +
- (M.digit^1 * lpeg_S('eE') * lpeg_S('+-')^-1 * M.digit^1))
+ ((M.digit^0 * '.' * M.digit^1 + M.digit^1 * '.' * M.digit^0 * -lpeg_P('.')) *
+ (lpeg_S('eE') * lpeg_S('+-')^-1 * M.digit^1)^-1 +
+ (M.digit^1 * lpeg_S('eE') * lpeg_S('+-')^-1 * M.digit^1))
+M.number = M.float + M.integer
M.word = (M.alpha + '_') * (M.alnum + '_')^0
+-- Deprecated.
+M.nonnewline_esc = 1 - (M.newline + '\\') + '\\' * M.any
+M.ascii = lpeg_R('\000\127')
+M.extend = lpeg_R('\000\255')
+M.cntrl = lpeg_R('\000\031')
+M.print = lpeg_R(' ~')
+
---
--- Creates and returns a token pattern with token name *name* and pattern
--- *patt*.
--- If *name* is not a predefined token name, its style must be defined in the
--- lexer's `_tokenstyles` table.
--- @param name The name of token. If this name is not a predefined token name,
--- then a style needs to be assiciated with it in the lexer's `_tokenstyles`
--- table.
+-- Creates and returns a token pattern with token name *name* and pattern *patt*.
+-- If *name* is not a predefined token name, its style must be defined via `lexer.add_style()`.
+-- @param name The name of token. If this name is not a predefined token name, then a style
+-- needs to be assiciated with it via `lexer.add_style()`.
-- @param patt The LPeg pattern associated with the token.
-- @return pattern
--- @usage local ws = token(l.WHITESPACE, l.space^1)
--- @usage local annotation = token('annotation', '@' * l.word)
+-- @usage local ws = token(lexer.WHITESPACE, lexer.space^1)
+-- @usage local annotation = token('annotation', '@' * lexer.word)
-- @name token
function M.token(name, patt)
return lpeg_Cc(name) * patt * lpeg_Cp()
end
---
--- Creates and returns a pattern that matches a range of text bounded by
--- *chars* characters.
--- This is a convenience function for matching more complicated delimited ranges
--- like strings with escape characters and balanced parentheses. *single_line*
--- indicates whether or not the range must be on a single line, *no_escape*
--- indicates whether or not to ignore '\' as an escape character, and *balanced*
--- indicates whether or not to handle balanced ranges like parentheses and
--- requires *chars* to be composed of two characters.
+-- Creates and returns a pattern that matches from string or pattern *prefix* until the end of
+-- the line.
+-- *escape* indicates whether the end of the line can be escaped with a '\' character.
+-- @param prefix String or pattern prefix to start matching at.
+-- @param escape Optional flag indicating whether or not newlines can be escaped by a '\'
+-- character. The default value is `false`.
+-- @return pattern
+-- @usage local line_comment = lexer.to_eol('//')
+-- @usage local line_comment = lexer.to_eol(S('#;'))
+-- @name to_eol
+function M.to_eol(prefix, escape)
+ return prefix * (not escape and M.nonnewline or M.nonnewline_esc)^0
+end
+
+---
+-- Creates and returns a pattern that matches a range of text bounded by strings or patterns *s*
+-- and *e*.
+-- This is a convenience function for matching more complicated ranges like strings with escape
+-- characters, balanced parentheses, and block comments (nested or not). *e* is optional and
+-- defaults to *s*. *single_line* indicates whether or not the range must be on a single line;
+-- *escapes* indicates whether or not to allow '\' as an escape character; and *balanced*
+-- indicates whether or not to handle balanced ranges like parentheses, and requires *s* and *e*
+-- to be different.
+-- @param s String or pattern start of a range.
+-- @param e Optional string or pattern end of a range. The default value is *s*.
+-- @param single_line Optional flag indicating whether or not the range must be on a single
+-- line. The default value is `false`.
+-- @param escapes Optional flag indicating whether or not the range end may be escaped by a '\'
+-- character. The default value is `false` unless *s* and *e* are identical, single-character
+-- strings. In that case, the default value is `true`.
+-- @param balanced Optional flag indicating whether or not to match a balanced range, like the
+-- "%b" Lua pattern. This flag only applies if *s* and *e* are different.
+-- @return pattern
+-- @usage local dq_str_escapes = lexer.range('"')
+-- @usage local dq_str_noescapes = lexer.range('"', false, false)
+-- @usage local unbalanced_parens = lexer.range('(', ')')
+-- @usage local balanced_parens = lexer.range('(', ')', false, false, true)
+-- @name range
+function M.range(s, e, single_line, escapes, balanced)
+ if type(e) ~= 'string' and type(e) ~= 'userdata' then
+ e, single_line, escapes, balanced = s, e, single_line, escapes
+ end
+ local any = M.any - e
+ if single_line then any = any - '\n' end
+ if balanced then any = any - s end
+ if escapes == nil then
+ -- Only allow escapes by default for ranges with identical, single-character string delimiters.
+ escapes = type(s) == 'string' and #s == 1 and s == e
+ end
+ if escapes then any = any - '\\' + '\\' * M.any end
+ if balanced and s ~= e then
+ return lpeg_P{s * (any + lpeg_V(1))^0 * lpeg_P(e)^-1}
+ else
+ return s * any^0 * lpeg_P(e)^-1
+ end
+end
+
+-- Deprecated function. Use `lexer.range()` instead.
+-- Creates and returns a pattern that matches a range of text bounded by *chars* characters.
+-- This is a convenience function for matching more complicated delimited ranges like strings
+-- with escape characters and balanced parentheses. *single_line* indicates whether or not the
+-- range must be on a single line, *no_escape* indicates whether or not to ignore '\' as an
+-- escape character, and *balanced* indicates whether or not to handle balanced ranges like
+-- parentheses and requires *chars* to be composed of two characters.
-- @param chars The character(s) that bound the matched range.
--- @param single_line Optional flag indicating whether or not the range must be
--- on a single line.
--- @param no_escape Optional flag indicating whether or not the range end
--- character may be escaped by a '\\' character.
--- @param balanced Optional flag indicating whether or not to match a balanced
--- range, like the "%b" Lua pattern. This flag only applies if *chars*
--- consists of two different characters (e.g. "()").
+-- @param single_line Optional flag indicating whether or not the range must be on a single line.
+-- @param no_escape Optional flag indicating whether or not the range end character may be
+-- escaped by a '\\' character.
+-- @param balanced Optional flag indicating whether or not to match a balanced range, like the
+-- "%b" Lua pattern. This flag only applies if *chars* consists of two different characters
+-- (e.g. "()").
-- @return pattern
--- @usage local dq_str_escapes = l.delimited_range('"')
--- @usage local dq_str_noescapes = l.delimited_range('"', false, true)
--- @usage local unbalanced_parens = l.delimited_range('()')
--- @usage local balanced_parens = l.delimited_range('()', false, false, true)
--- @see nested_pair
+-- @usage local dq_str_escapes = lexer.delimited_range('"')
+-- @usage local dq_str_noescapes = lexer.delimited_range('"', false, true)
+-- @usage local unbalanced_parens = lexer.delimited_range('()')
+-- @usage local balanced_parens = lexer.delimited_range('()', false, false, true)
+-- @see range
-- @name delimited_range
function M.delimited_range(chars, single_line, no_escape, balanced)
+ print("lexer.delimited_range() is deprecated, use lexer.range()")
local s = chars:sub(1, 1)
local e = #chars == 2 and chars:sub(2, 2) or s
local range
local b = balanced and s or ''
local n = single_line and '\n' or ''
if no_escape then
- local invalid = lpeg_S(e..n..b)
+ local invalid = lpeg_S(e .. n .. b)
range = M.any - invalid
else
- local invalid = lpeg_S(e..n..b) + '\\'
+ local invalid = lpeg_S(e .. n .. b) + '\\'
range = M.any - invalid + '\\' * M.any
end
if balanced and s ~= e then
@@ -1404,12 +1665,10 @@ function M.delimited_range(chars, single_line, no_escape, balanced)
end
---
--- Creates and returns a pattern that matches pattern *patt* only at the
--- beginning of a line.
+-- Creates and returns a pattern that matches pattern *patt* only at the beginning of a line.
-- @param patt The LPeg pattern to match on the beginning of a line.
-- @return pattern
--- @usage local preproc = token(l.PREPROCESSOR, l.starts_line('#') *
--- l.nonnewline^0)
+-- @usage local preproc = token(lexer.PREPROCESSOR, lexer.starts_line(lexer.to_eol('#')))
-- @name starts_line
function M.starts_line(patt)
return lpeg_Cmt(lpeg_C(patt), function(input, index, match, ...)
@@ -1421,15 +1680,14 @@ function M.starts_line(patt)
end
---
--- Creates and returns a pattern that verifies that string set *s* contains the
--- first non-whitespace character behind the current match position.
+-- Creates and returns a pattern that verifies the first non-whitespace character behind the
+-- current match position is in string set *s*.
-- @param s String character set like one passed to `lpeg.S()`.
-- @return pattern
--- @usage local regex = l.last_char_includes('+-*!%^&|=,([{') *
--- l.delimited_range('/')
+-- @usage local regex = lexer.last_char_includes('+-*!%^&|=,([{') * lexer.range('/')
-- @name last_char_includes
function M.last_char_includes(s)
- s = '['..s:gsub('[-%%%[]', '%%%1')..']'
+ s = string.format('[%s]', s:gsub('[-%%%[]', '%%%1'))
return lpeg_P(function(input, index)
if index == 1 then return index end
local i = index
@@ -1438,109 +1696,77 @@ function M.last_char_includes(s)
end)
end
----
--- Returns a pattern that matches a balanced range of text that starts with
--- string *start_chars* and ends with string *end_chars*.
--- With single-character delimiters, this function is identical to
--- `delimited_range(start_chars..end_chars, false, true, true)`.
+-- Deprecated function. Use `lexer.range()` instead.
+-- Returns a pattern that matches a balanced range of text that starts with string *start_chars*
+-- and ends with string *end_chars*.
+-- With single-character delimiters, this function is identical to `delimited_range(start_chars ..
+-- end_chars, false, true, true)`.
-- @param start_chars The string starting a nested sequence.
-- @param end_chars The string ending a nested sequence.
-- @return pattern
--- @usage local nested_comment = l.nested_pair('/*', '*/')
--- @see delimited_range
+-- @usage local nested_comment = lexer.nested_pair('/*', '*/')
+-- @see range
-- @name nested_pair
function M.nested_pair(start_chars, end_chars)
+ print("lexer.nested_pair() is deprecated, use lexer.range()")
local s, e = start_chars, lpeg_P(end_chars)^-1
return lpeg_P{s * (M.any - s - end_chars + lpeg_V(1))^0 * e}
end
---
--- Creates and returns a pattern that matches any single word in list *words*.
--- Words consist of alphanumeric and underscore characters, as well as the
--- characters in string set *word_chars*. *case_insensitive* indicates whether
--- or not to ignore case when matching words.
--- This is a convenience function for simplifying a set of ordered choice word
--- patterns.
--- @param words A table of words.
--- @param word_chars Optional string of additional characters considered to be
--- part of a word. By default, word characters are alphanumerics and
--- underscores ("%w_" in Lua). This parameter may be `nil` or the empty string
--- in order to indicate no additional word characters.
--- @param case_insensitive Optional boolean flag indicating whether or not the
--- word match is case-insensitive. The default is `false`.
+-- Creates and returns a pattern that matches any single word in list or string *words*.
+-- *case_insensitive* indicates whether or not to ignore case when matching words.
+-- This is a convenience function for simplifying a set of ordered choice word patterns.
+-- @param word_list A list of words or a string list of words separated by spaces.
+-- @param case_insensitive Optional boolean flag indicating whether or not the word match is
+-- case-insensitive. The default value is `false`.
+-- @param word_chars Unused legacy parameter.
-- @return pattern
--- @usage local keyword = token(l.KEYWORD, word_match{'foo', 'bar', 'baz'})
--- @usage local keyword = token(l.KEYWORD, word_match({'foo-bar', 'foo-baz',
--- 'bar-foo', 'bar-baz', 'baz-foo', 'baz-bar'}, '-', true))
+-- @usage local keyword = token(lexer.KEYWORD, word_match{'foo', 'bar', 'baz'})
+-- @usage local keyword = token(lexer.KEYWORD, word_match({'foo-bar', 'foo-baz', 'bar-foo',
+-- 'bar-baz', 'baz-foo', 'baz-bar'}, true))
+-- @usage local keyword = token(lexer.KEYWORD, word_match('foo bar baz'))
-- @name word_match
-function M.word_match(words, word_chars, case_insensitive)
- local word_list = {}
- for i = 1, #words do
- word_list[case_insensitive and words[i]:lower() or words[i]] = true
+function M.word_match(word_list, case_insensitive, word_chars)
+ if type(case_insensitive) == 'string' or type(word_chars) == 'boolean' then
+ -- Legacy `word_match(word_list, word_chars, case_insensitive)` form.
+ word_chars, case_insensitive = case_insensitive, word_chars
+ elseif type(word_list) == 'string' then
+ local words = word_list -- space-separated list of words
+ word_list = {}
+ for word in words:gsub('%-%-[^\n]+', ''):gmatch('%S+') do word_list[#word_list + 1] = word end
+ end
+ if not word_chars then word_chars = '' end
+ for _, word in ipairs(word_list) do
+ word_list[case_insensitive and word:lower() or word] = true
+ for char in word:gmatch('[^%w_%s]') do
+ if not word_chars:find(char, 1, true) then word_chars = word_chars .. char end
+ end
end
local chars = M.alnum + '_'
- if word_chars then chars = chars + lpeg_S(word_chars) end
+ if word_chars ~= '' then chars = chars + lpeg_S(word_chars) end
return lpeg_Cmt(chars^1, function(input, index, word)
if case_insensitive then word = word:lower() end
return word_list[word] and index or nil
end)
end
----
--- Embeds child lexer *child* in parent lexer *parent* using patterns
--- *start_rule* and *end_rule*, which signal the beginning and end of the
--- embedded lexer, respectively.
+-- Deprecated legacy function. Use `parent:embed()` instead.
+-- Embeds child lexer *child* in parent lexer *parent* using patterns *start_rule* and *end_rule*,
+-- which signal the beginning and end of the embedded lexer, respectively.
-- @param parent The parent lexer.
-- @param child The child lexer.
--- @param start_rule The pattern that signals the beginning of the embedded
--- lexer.
+-- @param start_rule The pattern that signals the beginning of the embedded lexer.
-- @param end_rule The pattern that signals the end of the embedded lexer.
--- @usage l.embed_lexer(M, css, css_start_rule, css_end_rule)
--- @usage l.embed_lexer(html, M, php_start_rule, php_end_rule)
--- @usage l.embed_lexer(html, ruby, ruby_start_rule, ruby_end_rule)
+-- @usage lexer.embed_lexer(M, css, css_start_rule, css_end_rule)
+-- @usage lexer.embed_lexer(html, M, php_start_rule, php_end_rule)
+-- @usage lexer.embed_lexer(html, ruby, ruby_start_rule, ruby_end_rule)
+-- @see embed
-- @name embed_lexer
function M.embed_lexer(parent, child, start_rule, end_rule)
- -- Add child rules.
- if not child._EMBEDDEDRULES then child._EMBEDDEDRULES = {} end
- if not child._RULES then -- creating a child lexer to be embedded
- if not child._rules then error('Cannot embed language with no rules') end
- for i = 1, #child._rules do
- add_rule(child, child._rules[i][1], child._rules[i][2])
- end
- end
- child._EMBEDDEDRULES[parent._NAME] = {
- ['start_rule'] = start_rule,
- token_rule = join_tokens(child),
- ['end_rule'] = end_rule
- }
- if not parent._CHILDREN then parent._CHILDREN = {} end
- local children = parent._CHILDREN
- children[#children + 1] = child
- -- Add child styles.
- if not parent._tokenstyles then parent._tokenstyles = {} end
- local tokenstyles = parent._tokenstyles
- tokenstyles[child._NAME..'_whitespace'] = M.STYLE_WHITESPACE
- if child._tokenstyles then
- for token, style in pairs(child._tokenstyles) do
- tokenstyles[token] = style
- end
- end
- -- Add child fold symbols.
- if not parent._foldsymbols then parent._foldsymbols = {} end
- if child._foldsymbols then
- for token, symbols in pairs(child._foldsymbols) do
- if not parent._foldsymbols[token] then parent._foldsymbols[token] = {} end
- for k, v in pairs(symbols) do
- if type(k) == 'number' then
- parent._foldsymbols[token][#parent._foldsymbols[token] + 1] = v
- elseif not parent._foldsymbols[token][k] then
- parent._foldsymbols[token][k] = v
- end
- end
- end
- end
- child._lexer = parent -- use parent's tokens if child is embedding itself
- parent_lexer = parent -- use parent's tokens if the calling lexer is a proxy
+ if not getmetatable(parent) then process_legacy_lexer(parent) end
+ if not getmetatable(child) then process_legacy_lexer(child) end
+ parent:embed(child, start_rule, end_rule)
end
-- Determines if the previous line is a comment.
@@ -1584,16 +1810,17 @@ local function next_line_is_comment(prefix, text, pos, line, s)
end
---
--- Returns a fold function (to be used within the lexer's `_foldsymbols` table)
--- that folds consecutive line comments that start with string *prefix*.
--- @param prefix The prefix string defining a line comment.
--- @usage [l.COMMENT] = {['--'] = l.fold_line_comments('--')}
--- @usage [l.COMMENT] = {['//'] = l.fold_line_comments('//')}
--- @name fold_line_comments
-function M.fold_line_comments(prefix)
+-- Returns for `lexer.add_fold_point()` the parameters needed to fold consecutive lines that
+-- start with string *prefix*.
+-- @param prefix The prefix string (e.g. a line comment).
+-- @usage lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('--'))
+-- @usage lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('//'))
+-- @usage lex:add_fold_point(lexer.KEYWORD, lexer.fold_consecutive_lines('import'))
+-- @name fold_consecutive_lines
+function M.fold_consecutive_lines(prefix)
local property_int = M.property_int
- return function(text, pos, line, s)
- if property_int['fold.line.comments'] == 0 then return 0 end
+ return prefix, function(text, pos, line, s)
+ if property_int['fold.line.groups'] == 0 then return 0 end
if s > 1 and line:match('^%s*()') < s then return 0 end
local prev_line_comment = prev_line_is_comment(prefix, text, pos, line, s)
local next_line_comment = next_line_is_comment(prefix, text, pos, line, s)
@@ -1603,73 +1830,26 @@ function M.fold_line_comments(prefix)
end
end
-M.property_expanded = setmetatable({}, {
- -- Returns the string property value associated with string property *key*,
- -- replacing any "$()" and "%()" expressions with the values of their keys.
- __index = function(t, key)
- return M.property[key]:gsub('[$%%]%b()', function(key)
- return t[key:sub(3, -2)]
- end)
- end,
- __newindex = function() error('read-only property') end
-})
+-- Deprecated legacy function. Use `lexer.fold_consecutive_lines()` instead.
+-- Returns a fold function (to be passed to `lexer.add_fold_point()`) that folds consecutive
+-- line comments that start with string *prefix*.
+-- @param prefix The prefix string defining a line comment.
+-- @usage lex:add_fold_point(lexer.COMMENT, '--', lexer.fold_line_comments('--'))
+-- @usage lex:add_fold_point(lexer.COMMENT, '//', lexer.fold_line_comments('//'))
+-- @name fold_line_comments
+function M.fold_line_comments(prefix)
+ print('lexer.fold_line_comments() is deprecated, use lexer.fold_consecutive_lines()')
+ return select(2, M.fold_consecutive_lines(prefix))
+end
--[[ The functions and fields below were defined in C.
---
--- Returns the line number of the line that contains position *pos*, which
+-- Returns the line number (starting from 1) of the line that contains position *pos*, which
-- starts from 1.
-- @param pos The position to get the line number of.
-- @return number
local function line_from_position(pos) end
-
----
--- Individual fields for a lexer instance.
--- @field _NAME The string name of the lexer.
--- @field _rules An ordered list of rules for a lexer grammar.
--- Each rule is a table containing an arbitrary rule name and the LPeg pattern
--- associated with the rule. The order of rules is important, as rules are
--- matched sequentially.
--- Child lexers should not use this table to access and/or modify their
--- parent's rules and vice-versa. Use the `_RULES` table instead.
--- @field _tokenstyles A map of non-predefined token names to styles.
--- Remember to use token names, not rule names. It is recommended to use
--- predefined styles or color-agnostic styles derived from predefined styles
--- to ensure compatibility with user color themes.
--- @field _foldsymbols A table of recognized fold points for the lexer.
--- Keys are token names with table values defining fold points. Those table
--- values have string keys of keywords or characters that indicate a fold
--- point whose values are integers. A value of `1` indicates a beginning fold
--- point and a value of `-1` indicates an ending fold point. Values can also
--- be functions that return `1`, `-1`, or `0` (indicating no fold point) for
--- keys which need additional processing.
--- There is also a required `_patterns` key whose value is a table containing
--- Lua pattern strings that match all fold points (the string keys contained
--- in token name table values). When the lexer encounters text that matches
--- one of those patterns, the matched text is looked up in its token's table
--- to determine whether or not it is a fold point.
--- There is also an optional `_case_insensitive` option that indicates whether
--- or not fold point keys are case-insensitive. If `true`, fold point keys
--- should be in lower case.
--- @field _fold If this function exists in the lexer, it is called for folding
--- the document instead of using `_foldsymbols` or indentation.
--- @field _lexer The parent lexer object whose rules should be used. This field
--- is only necessary to disambiguate a proxy lexer that loaded parent and
--- child lexers for embedding and ended up having multiple parents loaded.
--- @field _RULES A map of rule name keys with their associated LPeg pattern
--- values for the lexer.
--- This is constructed from the lexer's `_rules` table and accessible to other
--- lexers for embedded lexer applications like modifying parent or child
--- rules.
--- @field _LEXBYLINE Indicates the lexer can only process one whole line of text
--- (instead of an arbitrary chunk of text) at a time.
--- The default value is `false`. Line lexers cannot look ahead to subsequent
--- lines.
--- @field _FOLDBYINDENTATION Declares the lexer does not define fold points and
--- that fold points should be calculated based on changes in indentation.
--- @class table
--- @name lexer
-local lexer
]]
return M
diff --git a/lua/lexers/lilypond.lua b/lua/lexers/lilypond.lua
index f7bdf79..9fca571 100644
--- a/lua/lexers/lilypond.lua
+++ b/lua/lexers/lilypond.lua
@@ -1,40 +1,30 @@
--- Copyright 2006-2017 Robert Gieseke. See LICENSE.
+-- Copyright 2006-2022 Robert Gieseke. See LICENSE.
-- Lilypond LPeg lexer.
-- TODO Embed Scheme; Notes?, Numbers?
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'lilypond'}
+local lex = lexer.new('lilypond')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local line_comment = '%' * l.nonnewline^0
--- TODO: block comment.
-local comment = token(l.COMMENT, line_comment)
-
--- Strings.
-local string = token(l.STRING, l.delimited_range('"', false, true))
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords, commands.
-local keyword = token(l.KEYWORD, '\\' * l.word)
+lex:add_rule('keyword', token(lexer.KEYWORD, '\\' * lexer.word))
-- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
+
+-- Strings.
+lex:add_rule('string', token(lexer.STRING, lexer.range('"', false, false)))
+
+-- Comments.
+-- TODO: block comment.
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('%')))
-- Operators.
-local operator = token(l.OPERATOR, S("{}'~<>|"))
-
-M._rules = {
- {'whitespace', ws},
- {'comment', comment},
- {'string', string},
- {'keyword', keyword},
- {'operator', operator},
- {'identifier', identifier},
-}
-
-return M
+lex:add_rule('operator', token(lexer.OPERATOR, S("{}'~<>|")))
+
+return lex
diff --git a/lua/lexers/lisp.lua b/lua/lexers/lisp.lua
index 824e561..6a0e680 100644
--- a/lua/lexers/lisp.lua
+++ b/lua/lexers/lisp.lua
@@ -1,84 +1,62 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- Lisp LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'lisp'}
+local lex = lexer.new('lisp')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local line_comment = ';' * l.nonnewline^0
-local block_comment = '#|' * (l.any - '|#')^0 * P('|#')^-1
-local comment = token(l.COMMENT, line_comment + block_comment)
-
-local word = l.alpha * (l.alnum + '_' + '-')^0
-
--- Strings.
-local literal = "'" * word
-local dq_str = l.delimited_range('"')
-local string = token(l.STRING, literal + dq_str)
-
--- Numbers.
-local number = token(l.NUMBER, P('-')^-1 * l.digit^1 * (S('./') * l.digit^1)^-1)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match({
- 'defclass', 'defconstant', 'defgeneric', 'define-compiler-macro',
- 'define-condition', 'define-method-combination', 'define-modify-macro',
- 'define-setf-expander', 'define-symbol-macro', 'defmacro', 'defmethod',
- 'defpackage', 'defparameter', 'defsetf', 'defstruct', 'deftype', 'defun',
- 'defvar',
- 'abort', 'assert', 'block', 'break', 'case', 'catch', 'ccase', 'cerror',
- 'cond', 'ctypecase', 'declaim', 'declare', 'do', 'do*', 'do-all-symbols',
- 'do-external-symbols', 'do-symbols', 'dolist', 'dotimes', 'ecase', 'error',
- 'etypecase', 'eval-when', 'flet', 'handler-bind', 'handler-case', 'if',
- 'ignore-errors', 'in-package', 'labels', 'lambda', 'let', 'let*', 'locally',
- 'loop', 'macrolet', 'multiple-value-bind', 'proclaim', 'prog', 'prog*',
- 'prog1', 'prog2', 'progn', 'progv', 'provide', 'require', 'restart-bind',
- 'restart-case', 'restart-name', 'return', 'return-from', 'signal',
- 'symbol-macrolet', 'tagbody', 'the', 'throw', 'typecase', 'unless',
- 'unwind-protect', 'when', 'with-accessors', 'with-compilation-unit',
- 'with-condition-restarts', 'with-hash-table-iterator',
- 'with-input-from-string', 'with-open-file', 'with-open-stream',
- 'with-output-to-string', 'with-package-iterator', 'with-simple-restart',
- 'with-slots', 'with-standard-io-syntax',
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'defclass', 'defconstant', 'defgeneric', 'define-compiler-macro', 'define-condition',
+ 'define-method-combination', 'define-modify-macro', 'define-setf-expander', 'define-symbol-macro',
+ 'defmacro', 'defmethod', 'defpackage', 'defparameter', 'defsetf', 'defstruct', 'deftype', 'defun',
+ 'defvar', --
+ 'abort', 'assert', 'block', 'break', 'case', 'catch', 'ccase', 'cerror', 'cond', 'ctypecase',
+ 'declaim', 'declare', 'do', 'do*', 'do-all-symbols', 'do-external-symbols', 'do-symbols',
+ 'dolist', 'dotimes', 'ecase', 'error', 'etypecase', 'eval-when', 'flet', 'handler-bind',
+ 'handler-case', 'if', 'ignore-errors', 'in-package', 'labels', 'lambda', 'let', 'let*', 'locally',
+ 'loop', 'macrolet', 'multiple-value-bind', 'proclaim', 'prog', 'prog*', 'prog1', 'prog2', 'progn',
+ 'progv', 'provide', 'require', 'restart-bind', 'restart-case', 'restart-name', 'return',
+ 'return-from', 'signal', 'symbol-macrolet', 'tagbody', 'the', 'throw', 'typecase', 'unless',
+ 'unwind-protect', 'when', 'with-accessors', 'with-compilation-unit', 'with-condition-restarts',
+ 'with-hash-table-iterator', 'with-input-from-string', 'with-open-file', 'with-open-stream',
+ 'with-output-to-string', 'with-package-iterator', 'with-simple-restart', 'with-slots',
+ 'with-standard-io-syntax', --
't', 'nil'
-}, '-'))
+}))
-- Identifiers.
-local identifier = token(l.IDENTIFIER, word)
+local word = lexer.alpha * (lexer.alnum + '_' + '-')^0
+lex:add_rule('identifier', token(lexer.IDENTIFIER, word))
--- Operators.
-local operator = token(l.OPERATOR, S('<>=*/+-`@%()'))
+-- Strings.
+lex:add_rule('string', token(lexer.STRING, "'" * word + lexer.range('"') + '#\\' * lexer.any))
--- Entities.
-local entity = token('entity', '&' * word)
+-- Comments.
+local line_comment = lexer.to_eol(';')
+local block_comment = lexer.range('#|', '|#')
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'identifier', identifier},
- {'string', string},
- {'comment', comment},
- {'number', number},
- {'operator', operator},
- {'entity', entity},
-}
+-- Numbers.
+lex:add_rule('number', token(lexer.NUMBER, P('-')^-1 * lexer.digit^1 * (S('./') * lexer.digit^1)^-1))
-M._tokenstyles = {
- entity = l.STYLE_VARIABLE
-}
+-- Entities.
+lex:add_rule('entity', token('entity', '&' * word))
+lex:add_style('entity', lexer.styles.variable)
+
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S('<>=*/+-`@%()')))
-M._foldsymbols = {
- _patterns = {'[%(%)%[%]{}]', '#|', '|#', ';'},
- [l.OPERATOR] = {
- ['('] = 1, [')'] = -1, ['['] = 1, [']'] = -1, ['{'] = 1, ['}'] = -1
- },
- [l.COMMENT] = {['#|'] = 1, ['|#'] = -1, [';'] = l.fold_line_comments(';')}
-}
+-- Fold points.
+lex:add_fold_point(lexer.OPERATOR, '(', ')')
+lex:add_fold_point(lexer.OPERATOR, '[', ']')
+lex:add_fold_point(lexer.OPERATOR, '{', '}')
+lex:add_fold_point(lexer.COMMENT, '#|', '|#')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines(';'))
-return M
+return lex
diff --git a/lua/lexers/litcoffee.lua b/lua/lexers/litcoffee.lua
index 48d1a2e..f097e5f 100644
--- a/lua/lexers/litcoffee.lua
+++ b/lua/lexers/litcoffee.lua
@@ -1,21 +1,21 @@
--- Copyright 2006-2017 Robert Gieseke. See LICENSE.
+-- Copyright 2006-2022 Robert Gieseke. See LICENSE.
-- Literate CoffeeScript LPeg lexer.
-- http://coffeescript.org/#literate
-local l = require('lexer')
-local token = l.token
-local P = lpeg.P
+local lexer = require('lexer')
+local token = lexer.token
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'litcoffee'}
-
--- Embedded in Markdown.
-local markdown = l.load('markdown')
-M._lexer = markdown -- ensure markdown's rules are loaded, not HTML's
+local lex = lexer.new('litcoffee', {inherit = lexer.load('markdown')})
-- Embedded CoffeeScript.
-local coffeescript = l.load('coffeescript')
-local coffee_start_rule = token(l.STYLE_EMBEDDED, (P(' ')^4 + P('\t')))
-local coffee_end_rule = token(l.STYLE_EMBEDDED, l.newline)
-l.embed_lexer(markdown, coffeescript, coffee_start_rule, coffee_end_rule)
+local coffeescript = lexer.load('coffeescript')
+local coffee_start_rule = token(lexer.STYLE_EMBEDDED, (P(' ')^4 + P('\t')))
+local coffee_end_rule = token(lexer.STYLE_EMBEDDED, lexer.newline)
+lex:embed(coffeescript, coffee_start_rule, coffee_end_rule)
+
+-- Use 'markdown_whitespace' instead of lexer.WHITESPACE since the latter would expand to
+-- 'litcoffee_whitespace'.
+lex:modify_rule('whitespace', token('markdown_whitespace', S(' \t')^1 + S('\r\n')^1))
-return M
+return lex
diff --git a/lua/lexers/logtalk.lua b/lua/lexers/logtalk.lua
index 3986859..e33e80a 100644
--- a/lua/lexers/logtalk.lua
+++ b/lua/lexers/logtalk.lua
@@ -1,52 +1,62 @@
--- Copyright © 2017 Michael T. Richter <ttmrichter@gmail.com>. See LICENSE.
+-- Copyright © 2017-2022 Michael T. Richter <ttmrichter@gmail.com>. See LICENSE.
-- Logtalk LPeg lexer.
-local l = require 'lexer'
-local token, word_match = l.token, l.word_match
-local B, P, R, S, V = lpeg.B, lpeg.P, lpeg.R, lpeg.S, lpeg.V
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = { _NAME = 'logtalk' }
+local lex = lexer.new('logtalk', {inherit = lexer.load('prolog')})
-local keyword = token(l.KEYWORD, word_match{
- -- Logtalk "keywords" generated from Vim syntax highlighting file with Prolog
- -- keywords stripped since we're building up on the Prolog lexer.
- 'abolish_category', 'abolish_events', 'abolish_object', 'abolish_protocol',
- 'after', 'alias', 'as', 'before', 'built_in', 'calls', 'category',
- 'category_property', 'coinductive', 'complements', 'complements_object',
- 'conforms_to_protocol', 'create', 'create_category', 'create_object',
- 'create_protocol', 'create_logtalk_flag', 'current', 'current_category',
- 'current_event', 'current_logtalk_flag', 'current_object', 'current_protocol',
- 'define_events', 'encoding', 'end_category', 'end_class', 'end_object',
- 'end_protocol', 'extends', 'extends_category', 'extends_object',
- 'extends_protocol', 'forward', 'implements', 'implements_protocol', 'imports',
- 'imports_category', 'include', 'info', 'instantiates', 'instantiates_class',
- 'is', 'logtalk_compile', 'logtalk_library_path', 'logtalk_load',
- 'logtalk_load_context', 'logtalk_make', 'meta_non_terminal', 'mode', 'object',
- 'object_property', 'parameter', 'private', 'protected', 'protocol_property',
- 'self', 'sender', 'set_logtalk_flag', 'specializes', 'specializes_class',
- 'synchronized', 'this', 'threaded', 'threaded_call', 'threaded_engine',
- 'threaded_engine_create', 'threaded_engine_destroy', 'threaded_engine_fetch',
- 'threaded_engine_next', 'threaded_engine_next_reified',
- 'threaded_engine_post', 'threaded_engine_self', 'threaded_engine_yield',
- 'threaded_exit', 'threaded_ignore', 'threaded_notify', 'threaded_once',
- 'threaded_peek', 'threaded_wait', 'uses',
+-- add logtalk keywords to prolog ones
+local directives = {
+ 'set_logtalk_flag', 'object', 'info', 'built_in', 'threaded', 'uses', 'alias', 'use_module',
+ 'coinductive', 'export', 'reexport', 'public', 'metapredicate', 'mode', 'meta_non_terminal',
+ 'protected', 'synchronized', 'private', 'module', 'if', 'elif', 'else', 'endif', 'category',
+ 'protocol', 'end_object', 'end_category', 'end_protocol', 'meta_predicate'
+}
+local indent = token(lexer.WHITESPACE, lexer.starts_line(S(' \t')^1))^-1
+lex:modify_rule('directive',
+ (indent * token(lexer.OPERATOR, ':-') * token(lexer.WHITESPACE, S(' \t')^0) *
+ token(lexer.PREPROCESSOR, word_match(directives))
+) + lex:get_rule('directive'))
- -- info/1 and info/2 predicates have their own keywords, manually extracted
- -- from documentation
- 'comment', 'argnames', 'arguments', 'author', 'version', 'date', 'parameters',
- 'parnames', 'copyright', 'license', 'remarks', 'see_also',
-})
+-- Whitespace.
+lex:modify_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
--- Extend prolog lexer to include logtalk extensions.
-local prolog = l.load('prolog')
-local _rules = prolog._rules
-for i = 1, #_rules do
- if _rules[i][1] == 'keyword' then
- table.insert(_rules, i, {'logtalk_keyword', keyword})
- end
-end
+local zero_arity_keywords = {
+ -- extracted from test document in logtalk distribution
+ 'comment', 'argnames', 'arguments', 'author', 'version', 'date', 'parameters', 'parnames',
+ 'copyright', 'license', 'remarks', 'see_also', 'as', 'logtalk_make', 'instantiation_error',
+ 'system_error'
+}
+local one_plus_arity_keywords = {
+ -- extracted from test document in logtalk distribution
+ 'implements', 'imports', 'extends', 'instantiates', 'specializes', 'number_chars', 'number_code',
+ 'current_category', 'current_object', 'current_protocol', 'create_category', 'create_object',
+ 'create_protocol', 'abolish_category', 'abolish_object', 'abolish_protocol', 'category_property',
+ 'object_property', 'protocol_property', 'extends_category', 'extends_object', 'extends_protocol',
+ 'implements_protocol', 'imports_category', 'instantiates_class', 'specializes_class',
+ 'complements_object', 'conforms_to_protocol', 'abolish_events', 'current_event', 'define_events',
+ 'threaded', 'threaded_call', 'threaded_call', 'threaded_once', 'threaded_ignore', 'threaded_exit',
+ 'threaded_peek', 'threaded_cancel', 'threaded_wait', 'threaded_notify', 'threaded_engine',
+ 'threaded_engine_create', 'threaded_engine_destroy', 'threaded_engine_self',
+ 'threaded_engine_next', 'threaded_engine_next_reified', 'threaded_engine_yield',
+ 'threaded_engine_post', 'threaded_engine_fetch', 'logtalk_compile', 'logtalk_load',
+ 'logtalk_library_path', 'logtalk_load_context', 'logtalk_make_target_action',
+ 'current_logtalk_flag', 'set_logtalk_flag', 'create_logtalk_flag', 'context', 'parameter', 'self',
+ 'sender', 'this', 'type_error', 'domain_error', 'existence_error', 'permission_error',
+ 'representation_error', 'evaluation_error', 'resource_error', 'syntax_error', 'bagof', 'findall',
+ 'forall', 'setof', 'before', 'after', 'forward', 'phrase', 'expand_term', 'expand_goal',
+ 'term_expansion', 'goal_expansion', 'numbervars', 'put_code', 'put_byte', 'current_op', 'op',
+ 'ignore', 'repeat', 'number_codes', 'current_prolog_flag', 'set_prolog_flag', 'keysort', 'sort'
+}
+local keyword = word_match(zero_arity_keywords) + (word_match(one_plus_arity_keywords) * #P('('))
+lex:modify_rule('keyword', token(lexer.KEYWORD, keyword) + lex:get_rule('keyword'))
-M._rules = _rules
-M._foldsymbols = prolog._foldsymbols
+local operators = {
+ -- extracted from test document in logtalk distribution
+ 'as'
+}
+lex:modify_rule('operator', token(lexer.OPERATOR, word_match(operators)) + lex:get_rule('operator'))
-return M
+return lex
diff --git a/lua/lexers/lua.lua b/lua/lexers/lua.lua
index c2edf69..03c37e0 100644
--- a/lua/lexers/lua.lua
+++ b/lua/lexers/lua.lua
@@ -1,86 +1,69 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- Lua LPeg lexer.
-- Original written by Peter Odding, 2007/04/04.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local B, P, S = lpeg.B, lpeg.P, lpeg.S
-local M = {_NAME = 'lua'}
+local lex = lexer.new('lua')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
-local longstring = lpeg.Cmt('[' * lpeg.C(P('=')^0) * '[',
- function(input, index, eq)
- local _, e = input:find(']'..eq..']', index, true)
- return (e or #input) + 1
- end)
-
--- Comments.
-local line_comment = '--' * l.nonnewline^0
-local block_comment = '--' * longstring
-local comment = token(l.COMMENT, block_comment + line_comment)
-
--- Strings.
-local sq_str = l.delimited_range("'")
-local dq_str = l.delimited_range('"')
-local string = token(l.STRING, sq_str + dq_str) +
- token('longstring', longstring)
-
--- Numbers.
-local lua_integer = P('-')^-1 * (l.hex_num + l.dec_num)
-local number = token(l.NUMBER, l.float + lua_integer)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- 'and', 'break', 'do', 'else', 'elseif', 'end', 'false', 'for', 'function',
- 'goto', 'if', 'in', 'local', 'nil', 'not', 'or', 'repeat', 'return', 'then',
- 'true', 'until', 'while'
-})
-
--- Functions.
-local func = token(l.FUNCTION, word_match{
- 'assert', 'collectgarbage', 'dofile', 'error', 'getmetatable', 'ipairs',
- 'load', 'loadfile', 'next', 'pairs', 'pcall', 'print', 'rawequal', 'rawget',
- 'rawset', 'require', 'select', 'setmetatable', 'tonumber', 'tostring', 'type',
- 'xpcall',
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'and', 'break', 'do', 'else', 'elseif', 'end', 'false', 'for', 'function', 'if', 'in', 'local',
+ 'nil', 'not', 'or', 'repeat', 'return', 'then', 'true', 'until', 'while',
-- Added in 5.2.
- 'rawlen'
+ 'goto'
+}))
+
+-- Functions and deprecated functions.
+local func = token(lexer.FUNCTION, word_match{
+ 'assert', 'collectgarbage', 'dofile', 'error', 'getmetatable', 'ipairs', 'load', 'loadfile',
+ 'next', 'pairs', 'pcall', 'print', 'rawequal', 'rawget', 'rawset', 'require', 'select',
+ 'setmetatable', 'tonumber', 'tostring', 'type', 'xpcall',
+ -- Added in 5.2.
+ 'rawlen',
+ -- Added in 5.4.
+ 'warn'
})
-
--- Deprecated functions.
local deprecated_func = token('deprecated_function', word_match{
-- Deprecated in 5.2.
'getfenv', 'loadstring', 'module', 'setfenv', 'unpack'
})
+lex:add_rule('function', -B('.') * (func + deprecated_func))
+lex:add_style('deprecated_function', lexer.styles['function'] .. {italics = true})
-- Constants.
-local constant = token(l.CONSTANT, word_match{
+lex:add_rule('constant', token(lexer.CONSTANT, -B('.') * word_match{
'_G', '_VERSION',
-- Added in 5.2.
'_ENV'
-})
+}))
--- Libraries.
-local library = token('library', word_match({
+-- Libraries and deprecated libraries.
+local library = token('library', word_match{
-- Coroutine.
- 'coroutine', 'coroutine.create', 'coroutine.resume', 'coroutine.running',
- 'coroutine.status', 'coroutine.wrap', 'coroutine.yield',
+ 'coroutine', 'coroutine.create', 'coroutine.resume', 'coroutine.running', 'coroutine.status',
+ 'coroutine.wrap', 'coroutine.yield',
-- Coroutine added in 5.3.
'coroutine.isyieldable',
+ -- Coroutine added in 5.4.
+ 'coroutine.close',
-- Module.
- 'package', 'package.cpath', 'package.loaded', 'package.loadlib',
- 'package.path', 'package.preload',
+ 'package', 'package.cpath', 'package.loaded', 'package.loadlib', 'package.path',
+ 'package.preload',
-- Module added in 5.2.
'package.config', 'package.searchers', 'package.searchpath',
-- UTF-8 added in 5.3.
- 'utf8', 'utf8.char', 'utf8.charpattern', 'utf8.codepoint', 'utf8.codes',
- 'utf8.len', 'utf8.offset',
+ 'utf8', 'utf8.char', 'utf8.charpattern', 'utf8.codepoint', 'utf8.codes', 'utf8.len',
+ 'utf8.offset',
-- String.
- 'string', 'string.byte', 'string.char', 'string.dump', 'string.find',
- 'string.format', 'string.gmatch', 'string.gsub', 'string.len', 'string.lower',
- 'string.match', 'string.rep', 'string.reverse', 'string.sub', 'string.upper',
+ 'string', 'string.byte', 'string.char', 'string.dump', 'string.find', 'string.format',
+ 'string.gmatch', 'string.gsub', 'string.len', 'string.lower', 'string.match', 'string.rep',
+ 'string.reverse', 'string.sub', 'string.upper',
-- String added in 5.3.
'string.pack', 'string.packsize', 'string.unpack',
-- Table.
@@ -90,33 +73,26 @@ local library = token('library', word_match({
-- Table added in 5.3.
'table.move',
-- Math.
- 'math', 'math.abs', 'math.acos', 'math.asin', 'math.atan', 'math.ceil',
- 'math.cos', 'math.deg', 'math.exp', 'math.floor', 'math.fmod', 'math.huge',
- 'math.log', 'math.max', 'math.min', 'math.modf', 'math.pi', 'math.rad',
- 'math.random', 'math.randomseed', 'math.sin', 'math.sqrt', 'math.tan',
+ 'math', 'math.abs', 'math.acos', 'math.asin', 'math.atan', 'math.ceil', 'math.cos', 'math.deg',
+ 'math.exp', 'math.floor', 'math.fmod', 'math.huge', 'math.log', 'math.max', 'math.min',
+ 'math.modf', 'math.pi', 'math.rad', 'math.random', 'math.randomseed', 'math.sin', 'math.sqrt',
+ 'math.tan',
-- Math added in 5.3.
- 'math.maxinteger', 'math.mininteger', 'math.tointeger', 'math.type',
- 'math.ult',
+ 'math.maxinteger', 'math.mininteger', 'math.tointeger', 'math.type', 'math.ult',
-- IO.
- 'io', 'io.close', 'io.flush', 'io.input', 'io.lines', 'io.open', 'io.output',
- 'io.popen', 'io.read', 'io.stderr', 'io.stdin', 'io.stdout', 'io.tmpfile',
- 'io.type', 'io.write',
+ 'io', 'io.close', 'io.flush', 'io.input', 'io.lines', 'io.open', 'io.output', 'io.popen',
+ 'io.read', 'io.stderr', 'io.stdin', 'io.stdout', 'io.tmpfile', 'io.type', 'io.write',
-- OS.
- 'os', 'os.clock', 'os.date', 'os.difftime', 'os.execute', 'os.exit',
- 'os.getenv', 'os.remove', 'os.rename', 'os.setlocale', 'os.time',
- 'os.tmpname',
+ 'os', 'os.clock', 'os.date', 'os.difftime', 'os.execute', 'os.exit', 'os.getenv', 'os.remove',
+ 'os.rename', 'os.setlocale', 'os.time', 'os.tmpname',
-- Debug.
- 'debug', 'debug.debug', 'debug.gethook', 'debug.getinfo', 'debug.getlocal',
- 'debug.getmetatable', 'debug.getregistry', 'debug.getupvalue',
- 'debug.sethook', 'debug.setlocal', 'debug.setmetatable', 'debug.setupvalue',
- 'debug.traceback',
+ 'debug', 'debug.debug', 'debug.gethook', 'debug.getinfo', 'debug.getlocal', 'debug.getmetatable',
+ 'debug.getregistry', 'debug.getupvalue', 'debug.sethook', 'debug.setlocal', 'debug.setmetatable',
+ 'debug.setupvalue', 'debug.traceback',
-- Debug added in 5.2.
- 'debug.getuservalue', 'debug.setuservalue', 'debug.upvalueid',
- 'debug.upvaluejoin',
-}, '.'))
-
--- Deprecated libraries.
-local deprecated_library = token('deprecated_library', word_match({
+ 'debug.getuservalue', 'debug.setuservalue', 'debug.upvalueid', 'debug.upvaluejoin'
+})
+local deprecated_library = token('deprecated_library', word_match{
-- Module deprecated in 5.2.
'package.loaders', 'package.seeall',
-- Table deprecated in 5.2.
@@ -124,67 +100,69 @@ local deprecated_library = token('deprecated_library', word_match({
-- Math deprecated in 5.2.
'math.log10',
-- Math deprecated in 5.3.
- 'math.atan2', 'math.cosh', 'math.frexp', 'math.ldexp', 'math.pow',
- 'math.sinh', 'math.tanh',
+ 'math.atan2', 'math.cosh', 'math.frexp', 'math.ldexp', 'math.pow', 'math.sinh', 'math.tanh',
-- Bit32 deprecated in 5.3.
- 'bit32', 'bit32.arshift', 'bit32.band', 'bit32.bnot', 'bit32.bor',
- 'bit32.btest', 'bit32.extract', 'bit32.lrotate', 'bit32.lshift',
- 'bit32.replace', 'bit32.rrotate', 'bit32.rshift', 'bit32.xor',
+ 'bit32', 'bit32.arshift', 'bit32.band', 'bit32.bnot', 'bit32.bor', 'bit32.btest', 'bit32.extract',
+ 'bit32.lrotate', 'bit32.lshift', 'bit32.replace', 'bit32.rrotate', 'bit32.rshift', 'bit32.xor',
-- Debug deprecated in 5.2.
'debug.getfenv', 'debug.setfenv'
-}, '.'))
+})
+lex:add_rule('library', -B('.') * (library + deprecated_library))
+lex:add_style('library', lexer.styles.type)
+lex:add_style('deprecated_library', lexer.styles.type .. {italics = true})
-- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
+
+-- Strings.
+local sq_str = lexer.range("'")
+local dq_str = lexer.range('"')
+local longstring = lpeg.Cmt('[' * lpeg.C(P('=')^0) * '[', function(input, index, eq)
+ local _, e = input:find(']' .. eq .. ']', index, true)
+ return (e or #input) + 1
+end)
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str) + token('longstring', longstring))
+lex:add_style('longstring', lexer.styles.string)
+
+-- Comments.
+local line_comment = lexer.to_eol('--')
+local block_comment = '--' * longstring
+lex:add_rule('comment', token(lexer.COMMENT, block_comment + line_comment))
+
+-- Numbers.
+local lua_integer = P('-')^-1 * (lexer.hex_num + lexer.dec_num)
+lex:add_rule('number', token(lexer.NUMBER, lexer.float + lua_integer))
-- Labels.
-local label = token(l.LABEL, '::' * l.word * '::')
+lex:add_rule('label', token(lexer.LABEL, '::' * lexer.word * '::'))
+
+-- Attributes.
+lex:add_rule('attribute', token('attribute', '<' * lexer.space^0 * word_match('const close') *
+ lexer.space^0 * '>'))
+lex:add_style('attribute', lexer.styles.class)
-- Operators.
-local operator = token(l.OPERATOR, S('+-*/%^#=<>&|~;:,.{}[]()'))
-
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'function', func + deprecated_func},
- {'constant', constant},
- {'library', library + deprecated_library},
- {'identifier', identifier},
- {'string', string},
- {'comment', comment},
- {'number', number},
- {'label', label},
- {'operator', operator},
-}
-
-M._tokenstyles = {
- longstring = l.STYLE_STRING,
- deprecated_function = l.STYLE_FUNCTION..',italics',
- library = l.STYLE_TYPE,
- deprecated_library = l.STYLE_TYPE..',italics'
-}
-
-local function fold_longcomment(text, pos, line, s, match)
- if match == '[' then
+lex:add_rule('operator', token(lexer.OPERATOR, '..' + S('+-*/%^#=<>&|~;:,.{}[]()')))
+
+-- Fold points.
+local function fold_longcomment(text, pos, line, s, symbol)
+ if symbol == '[' then
if line:find('^%[=*%[', s) then return 1 end
- elseif match == ']' then
+ elseif symbol == ']' then
if line:find('^%]=*%]', s) then return -1 end
end
return 0
end
-
-M._foldsymbols = {
- _patterns = {'%l+', '[%({%)}]', '[%[%]]', '%-%-'},
- [l.KEYWORD] = {
- ['if'] = 1, ['do'] = 1, ['function'] = 1, ['end'] = -1, ['repeat'] = 1,
- ['until'] = -1
- },
- [l.COMMENT] = {
- ['['] = fold_longcomment, [']'] = fold_longcomment,
- ['--'] = l.fold_line_comments('--')
- },
- longstring = {['['] = 1, [']'] = -1},
- [l.OPERATOR] = {['('] = 1, ['{'] = 1, [')'] = -1, ['}'] = -1}
-}
-
-return M
+lex:add_fold_point(lexer.KEYWORD, 'if', 'end')
+lex:add_fold_point(lexer.KEYWORD, 'do', 'end')
+lex:add_fold_point(lexer.KEYWORD, 'function', 'end')
+lex:add_fold_point(lexer.KEYWORD, 'repeat', 'until')
+lex:add_fold_point(lexer.COMMENT, '[', fold_longcomment)
+lex:add_fold_point(lexer.COMMENT, ']', fold_longcomment)
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('--'))
+lex:add_fold_point('longstring', '[', ']')
+lex:add_fold_point(lexer.OPERATOR, '(', ')')
+lex:add_fold_point(lexer.OPERATOR, '[', ']')
+lex:add_fold_point(lexer.OPERATOR, '{', '}')
+
+return lex
diff --git a/lua/lexers/makefile.lua b/lua/lexers/makefile.lua
index aaf2dc0..9c5f332 100644
--- a/lua/lexers/makefile.lua
+++ b/lua/lexers/makefile.lua
@@ -1,109 +1,88 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- Makefile LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'makefile'}
+local lex = lexer.new('makefile', {lex_by_line = true})
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local comment = token(l.COMMENT, '#' * l.nonnewline^0)
+local ws = token(lexer.WHITESPACE, lexer.space^1)
+lex:add_rule('whitespace', ws)
-- Keywords.
-local keyword = token(l.KEYWORD, P('!')^-1 * l.word_match({
+lex:add_rule('keyword', token(lexer.KEYWORD, P('!')^-1 * word_match({
-- GNU Make conditionals.
'ifeq', 'ifneq', 'ifdef', 'ifndef', 'else', 'endif',
-- Other conditionals.
'if', 'elseif', 'elseifdef', 'elseifndef',
-- Directives and other keywords.
- 'define', 'endef', 'export', 'include', 'override', 'private', 'undefine',
- 'unexport', 'vpath'
-}, nil, true))
+ 'define', 'endef', 'export', 'include', 'override', 'private', 'undefine', 'unexport', 'vpath'
+}, true)))
--- Functions.
-local func = token(l.FUNCTION, l.word_match({
- -- Functions for String Substitution and Analysis.
- 'subst', 'patsubst', 'strip', 'findstring', 'filter', 'filter-out', 'sort',
- 'word', 'wordlist', 'words', 'firstword', 'lastword',
- -- Functions for File Names.
- 'dir', 'notdir', 'suffix', 'basename', 'addsuffix', 'addprefix', 'join',
- 'wildcard', 'realpath', 'abspath',
- -- Functions for Conditionals.
- 'if', 'or', 'and',
- -- Miscellaneous Functions.
- 'foreach', 'call', 'value', 'eval', 'origin', 'flavor', 'shell',
- -- Functions That Control Make.
- 'error', 'warning', 'info'
-}), '-')
+-- Targets.
+local special_target = token(lexer.CONSTANT, word_match{
+ '.PHONY', '.SUFFIXES', '.DEFAULT', '.PRECIOUS', '.INTERMEDIATE', '.SECONDARY', '.SECONDEXPANSION',
+ '.DELETE_ON_ERROR', '.IGNORE', '.LOW_RESOLUTION_TIME', '.SILENT', '.EXPORT_ALL_VARIABLES',
+ '.NOTPARALLEL', '.ONESHELL', '.POSIX'
+})
+local normal_target = token('target', (lexer.any - lexer.space - S(':#='))^1)
+local target_list = normal_target * (ws * normal_target)^0
+lex:add_rule('target', lexer.starts_line((special_target + target_list) * ws^0 * #(':' * -P('='))))
+lex:add_style('target', lexer.styles.label)
-- Variables.
-local word_char, assign = l.any - l.space - S(':#=(){}'), S(':+?')^-1 * '='
+local word_char = lexer.any - lexer.space - S(':#=(){}')
+local assign = S(':+?')^-1 * '='
local expanded_var = '$' * ('(' * word_char^1 * ')' + '{' * word_char^1 * '}')
local auto_var = '$' * S('@%<?^+|*')
-local special_var = l.word_match({
- 'MAKEFILE_LIST', '.DEFAULT_GOAL', 'MAKE_RESTARTS', '.RECIPEPREFIX',
- '.VARIABLES', '.FEATURES', '.INCLUDE_DIRS',
- 'GPATH', 'MAKECMDGOALS', 'MAKESHELL', 'SHELL', 'VPATH'
-}, '.') * #(ws^0 * assign)
-local implicit_var = l.word_match{
+local special_var = word_match{
+ 'MAKEFILE_LIST', '.DEFAULT_GOAL', 'MAKE_RESTARTS', '.RECIPEPREFIX', '.VARIABLES', '.FEATURES',
+ '.INCLUDE_DIRS', 'GPATH', 'MAKECMDGOALS', 'MAKESHELL', 'SHELL', 'VPATH'
+} * #(ws^0 * assign)
+local implicit_var = word_match{
-- Some common variables.
- 'AR', 'AS', 'CC', 'CXX', 'CPP', 'FC', 'M2C', 'PC', 'CO', 'GET', 'LEX', 'YACC',
- 'LINT', 'MAKEINFO', 'TEX', 'TEXI2DVI', 'WEAVE', 'CWEAVE', 'TANGLE', 'CTANGLE',
- 'RM',
+ 'AR', 'AS', 'CC', 'CXX', 'CPP', 'FC', 'M2C', 'PC', 'CO', 'GET', 'LEX', 'YACC', 'LINT', 'MAKEINFO',
+ 'TEX', 'TEXI2DVI', 'WEAVE', 'CWEAVE', 'TANGLE', 'CTANGLE', 'RM',
-- Some common flag variables.
- 'ARFLAGS', 'ASFLAGS', 'CFLAGS', 'CXXFLAGS', 'COFLAGS', 'CPPFLAGS', 'FFLAGS',
- 'GFLAGS', 'LDFLAGS', 'LFLAGS', 'YFLAGS', 'PFLAGS', 'RFLAGS', 'LINTFLAGS',
+ 'ARFLAGS', 'ASFLAGS', 'CFLAGS', 'CXXFLAGS', 'COFLAGS', 'CPPFLAGS', 'FFLAGS', 'GFLAGS', 'LDFLAGS',
+ 'LDLIBS', 'LFLAGS', 'YFLAGS', 'PFLAGS', 'RFLAGS', 'LINTFLAGS',
-- Other.
'DESTDIR', 'MAKE', 'MAKEFLAGS', 'MAKEOVERRIDES', 'MFLAGS'
} * #(ws^0 * assign)
-local computed_var = token(l.OPERATOR, '$' * S('({')) * func
-local variable = token(l.VARIABLE,
- expanded_var + auto_var + special_var + implicit_var) +
- computed_var
-
--- Targets.
-local special_target = token(l.CONSTANT, l.word_match({
- '.PHONY', '.SUFFIXES', '.DEFAULT', '.PRECIOUS', '.INTERMEDIATE', '.SECONDARY',
- '.SECONDEXPANSION', '.DELETE_ON_ERROR', '.IGNORE', '.LOW_RESOLUTION_TIME',
- '.SILENT', '.EXPORT_ALL_VARIABLES', '.NOTPARALLEL', '.ONESHELL', '.POSIX'
-}, '.'))
-local normal_target = token('target', (l.any - l.space - S(':#='))^1)
-local target_list = normal_target * (ws * normal_target)^0
-local target = l.starts_line((special_target + target_list) * ws^0 *
- #(':' * -P('=')))
-
--- Identifiers.
-local identifier = token(l.IDENTIFIER, word_char^1)
+local variable = token(lexer.VARIABLE, expanded_var + auto_var + special_var + implicit_var)
+local computed_var = token(lexer.OPERATOR, '$' * S('({')) * token(lexer.FUNCTION, word_match{
+ -- Functions for String Substitution and Analysis.
+ 'subst', 'patsubst', 'strip', 'findstring', 'filter', 'filter-out', 'sort', 'word', 'wordlist',
+ 'words', 'firstword', 'lastword',
+ -- Functions for File Names.
+ 'dir', 'notdir', 'suffix', 'basename', 'addsuffix', 'addprefix', 'join', 'wildcard', 'realpath',
+ 'abspath',
+ -- Functions for Conditionals.
+ 'if', 'or', 'and',
+ -- Miscellaneous Functions.
+ 'foreach', 'call', 'value', 'eval', 'origin', 'flavor', 'shell',
+ -- Functions That Control Make.
+ 'error', 'warning', 'info'
+})
+lex:add_rule('variable', variable + computed_var)
-- Operators.
-local operator = token(l.OPERATOR, assign + S(':$(){}'))
+lex:add_rule('operator', token(lexer.OPERATOR, assign + S(':$(){}')))
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'target', target},
- {'variable', variable},
- {'operator', operator},
- {'identifier', identifier},
- {'comment', comment},
-}
-
-M._tokenstyles = {
- target = l.STYLE_LABEL
-}
+-- Identifiers.
+lex:add_rule('identifier', token(lexer.IDENTIFIER, word_char^1))
-M._LEXBYLINE = true
+-- Comments.
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#')))
-- Embedded Bash.
-local bash = l.load('bash')
-bash._RULES['variable'] = token(l.VARIABLE, '$$' * word_char^1) +
- bash._RULES['variable'] + variable
-local bash_start_rule = token(l.WHITESPACE, P('\t')) + token(l.OPERATOR, P(';'))
-local bash_end_rule = token(l.WHITESPACE, P('\n'))
-l.embed_lexer(M, bash, bash_start_rule, bash_end_rule)
+local bash = lexer.load('bash')
+bash:modify_rule('variable',
+ token(lexer.VARIABLE, '$$' * word_char^1) + bash:get_rule('variable') + variable)
+local bash_start_rule = token(lexer.WHITESPACE, '\t') + token(lexer.OPERATOR, ';')
+local bash_end_rule = token(lexer.WHITESPACE, '\n')
+lex:embed(bash, bash_start_rule, bash_end_rule)
-return M
+return lex
diff --git a/lua/lexers/man.lua b/lua/lexers/man.lua
index afeb2f2..3ca9910 100644
--- a/lua/lexers/man.lua
+++ b/lua/lexers/man.lua
@@ -1,37 +1,22 @@
--- Copyright 2015-2017 David B. Lamkins <david@lamkins.net>. See LICENSE.
+-- Copyright 2015-2022 David B. Lamkins <david@lamkins.net>. See LICENSE.
-- man/roff LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'man'}
+local lex = lexer.new('man')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Markup.
-local rule1 = token(l.STRING,
- P('.') * (P('B') * P('R')^-1 + P('I') * P('PR')^-1) *
- l.nonnewline^0)
-local rule2 = token(l.NUMBER, P('.') * S('ST') * P('H') * l.nonnewline^0)
-local rule3 = token(l.KEYWORD,
- P('.br') + P('.DS') + P('.RS') + P('.RE') + P('.PD'))
-local rule4 = token(l.LABEL, P('.') * (S('ST') * P('H') + P('.TP')))
-local rule5 = token(l.VARIABLE,
- P('.B') * P('R')^-1 + P('.I') * S('PR')^-1 + P('.PP'))
-local rule6 = token(l.TYPE, P('\\f') * S('BIPR'))
-local rule7 = token(l.PREPROCESSOR, l.starts_line('.') * l.alpha^1)
+lex:add_rule('rule1', token(lexer.STRING, '.' * lexer.to_eol('B' * P('R')^-1 + 'I' * P('PR')^-1)))
+lex:add_rule('rule2', token(lexer.NUMBER, lexer.to_eol('.' * S('ST') * 'H')))
+lex:add_rule('rule3', token(lexer.KEYWORD, P('.br') + '.DS' + '.RS' + '.RE' + '.PD'))
+lex:add_rule('rule4', token(lexer.LABEL, '.' * (S('ST') * 'H' + '.TP')))
+lex:add_rule('rule5', token(lexer.VARIABLE, '.B' * P('R')^-1 + '.I' * S('PR')^-1 + '.PP'))
+lex:add_rule('rule6', token(lexer.TYPE, '\\f' * S('BIPR')))
+lex:add_rule('rule7', token(lexer.PREPROCESSOR, lexer.starts_line('.') * lexer.alpha^1))
-M._rules = {
- {'whitespace', ws},
- {'rule1', rule1},
- {'rule2', rule2},
- {'rule3', rule3},
- {'rule4', rule4},
- {'rule5', rule5},
- {'rule6', rule6},
- {'rule7', rule7},
-}
-
-return M
+return lex
diff --git a/lua/lexers/markdown.lua b/lua/lexers/markdown.lua
index fe57a1b..cbd4ba2 100644
--- a/lua/lexers/markdown.lua
+++ b/lua/lexers/markdown.lua
@@ -1,109 +1,104 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- Markdown LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'markdown'}
-
--- Whitespace.
-local ws = token(l.WHITESPACE, S(' \t')^1 + S('\v\r\n')^1)
+local lex = lexer.new('markdown')
-- Block elements.
-local header = token('h6', l.starts_line('######') * l.nonnewline^0) +
- token('h5', l.starts_line('#####') * l.nonnewline^0) +
- token('h4', l.starts_line('####') * l.nonnewline^0) +
- token('h3', l.starts_line('###') * l.nonnewline^0) +
- token('h2', l.starts_line('##') * l.nonnewline^0) +
- token('h1', l.starts_line('#') * l.nonnewline^0)
-
-local blockquote = token(l.STRING,
- lpeg.Cmt(l.starts_line(S(' \t')^0 * '>'),
- function(input, index)
- local _, e = input:find('\n[ \t]*\r?\n',
- index)
- return (e or #input) + 1
- end))
-
-local blockcode = token('code', l.starts_line(P(' ')^4 + P('\t')) * -P('<') *
- l.nonnewline^0)
-
-local hr = token('hr', lpeg.Cmt(l.starts_line(S(' \t')^0 * lpeg.C(S('*-_'))),
- function(input, index, c)
- local line = input:match('[^\n]*', index)
- line = line:gsub('[ \t]', '')
- if line:find('[^'..c..']') or #line < 2 then
- return nil
- end
- return (input:find('\n', index) or #input) + 1
- end))
+local function h(n) return token('h' .. n, lexer.to_eol(lexer.starts_line(string.rep('#', n)))) end
+lex:add_rule('header', h(6) + h(5) + h(4) + h(3) + h(2) + h(1))
+local font_size = tonumber(lexer.property_expanded['style.default']:match('size:(%d+)')) or 10
+local function add_header_style(n)
+ lex:add_style('h' .. n, {fore = lexer.colors.red, size = (font_size + (6 - n))})
+end
+for i = 1, 6 do add_header_style(i) end
+
+lex:add_rule('blockquote',
+ token(lexer.STRING, lpeg.Cmt(lexer.starts_line(S(' \t')^0 * '>'), function(input, index)
+ local _, e = input:find('\n[ \t]*\r?\n', index)
+ return (e or #input) + 1
+ end)))
+
+lex:add_rule('list', token('list',
+ lexer.starts_line(S(' \t')^0 * (S('*+-') + lexer.digit^1 * '.')) * S(' \t')))
+lex:add_style('list', lexer.styles.constant)
+
+local hspace = S('\t\v\f\r ')
+local blank_line = '\n' * hspace^0 * ('\n' + P(-1))
+
+local code_line = lexer.to_eol(lexer.starts_line(P(' ')^4 + '\t') * -P('<')) * lexer.newline^-1
+local code_block = lexer.range(lexer.starts_line('```'), '\n```' * hspace^0 * ('\n' + P(-1)))
+local code_inline = lpeg.Cmt(lpeg.C(P('`')^1), function(input, index, bt)
+ -- `foo`, ``foo``, ``foo`bar``, `foo``bar` are all allowed.
+ local _, e = input:find('[^`]' .. bt .. '%f[^`]', index)
+ return (e or #input) + 1
+end)
+lex:add_rule('block_code', token('code', code_line + code_block + code_inline))
+lex:add_style('code', lexer.styles.embedded .. {eolfilled = true})
+
+lex:add_rule('hr',
+ token('hr', lpeg.Cmt(lexer.starts_line(S(' \t')^0 * lpeg.C(S('*-_'))), function(input, index, c)
+ local line = input:match('[^\r\n]*', index):gsub('[ \t]', '')
+ if line:find('[^' .. c .. ']') or #line < 2 then return nil end
+ return (select(2, input:find('\r?\n', index)) or #input) + 1
+ end)))
+lex:add_style('hr', {back = lexer.colors.black, eolfilled = true})
+
+-- Whitespace.
+local ws = token(lexer.WHITESPACE, S(' \t')^1 + S('\v\r\n')^1)
+lex:add_rule('whitespace', ws)
-- Span elements.
-local dq_str = token(l.STRING, l.delimited_range('"', false, true))
-local sq_str = token(l.STRING, l.delimited_range("'", false, true))
-local paren_str = token(l.STRING, l.delimited_range('()'))
-local link = token('link', P('!')^-1 * l.delimited_range('[]') *
- (P('(') * (l.any - S(') \t'))^0 *
- (S(' \t')^1 *
- l.delimited_range('"', false, true))^-1 * ')' +
- S(' \t')^0 * l.delimited_range('[]')) +
- P('http://') * (l.any - l.space)^1)
-local link_label = token('link_label', l.delimited_range('[]') * ':') * ws *
- token('link_url', (l.any - l.space)^1) *
- (ws * (dq_str + sq_str + paren_str))^-1
-
-local strong = token('strong', (P('**') * (l.any - '**')^0 * P('**')^-1) +
- (P('__') * (l.any - '__')^0 * P('__')^-1))
-local em = token('em',
- l.delimited_range('*', true) + l.delimited_range('_', true))
-local code = token('code', (P('``') * (l.any - '``')^0 * P('``')^-1) +
- l.delimited_range('`', true, true))
-
-local escape = token(l.DEFAULT, P('\\') * 1)
-
-local list = token('list',
- l.starts_line(S(' \t')^0 * (S('*+-') + R('09')^1 * '.')) *
- S(' \t'))
-
-M._rules = {
- {'header', header},
- {'list', list},
- {'blockquote', blockquote},
- {'blockcode', blockcode},
- {'hr', hr},
- {'whitespace', ws},
- {'link_label', link_label},
- {'escape', escape},
- {'link', link},
- {'strong', strong},
- {'em', em},
- {'code', code},
-}
-
-local font_size = 10
-local hstyle = 'fore:red'
-M._tokenstyles = {
- h6 = hstyle,
- h5 = hstyle..',size:'..(font_size + 1),
- h4 = hstyle..',size:'..(font_size + 2),
- h3 = hstyle..',size:'..(font_size + 3),
- h2 = hstyle..',size:'..(font_size + 4),
- h1 = hstyle..',size:'..(font_size + 5),
- code = l.STYLE_EMBEDDED..',eolfilled',
- hr = l.STYLE_DEFAULT..',bold',
- link = 'underlined',
- link_url = 'underlined',
- link_label = l.STYLE_LABEL,
- strong = 'bold',
- em = 'italics',
- list = l.STYLE_CONSTANT,
-}
+lex:add_rule('escape', token(lexer.DEFAULT, P('\\') * 1))
+
+local ref_link_label = token('link_label', lexer.range('[', ']', true) * ':')
+local ref_link_url = token('link_url', (lexer.any - lexer.space)^1)
+local ref_link_title = token(lexer.STRING, lexer.range('"', true, false) +
+ lexer.range("'", true, false) + lexer.range('(', ')', true))
+lex:add_rule('link_label', ref_link_label * ws * ref_link_url * (ws * ref_link_title)^-1)
+lex:add_style('link_label', lexer.styles.label)
+lex:add_style('link_url', {underlined = true})
+
+local link_label = P('!')^-1 * lexer.range('[', ']', true)
+local link_target =
+ '(' * (lexer.any - S(') \t'))^0 * (S(' \t')^1 * lexer.range('"', false, false))^-1 * ')'
+local link_ref = S(' \t')^0 * lexer.range('[', ']', true)
+local link_url = 'http' * P('s')^-1 * '://' * (lexer.any - lexer.space)^1 +
+ ('<' * lexer.alpha^2 * ':' * (lexer.any - lexer.space - '>')^1 * '>')
+lex:add_rule('link', token('link', link_label * (link_target + link_ref) + link_url))
+lex:add_style('link', {underlined = true})
+
+local punct_space = lexer.punct + lexer.space
+
+-- Handles flanking delimiters as described in
+-- https://github.github.com/gfm/#emphasis-and-strong-emphasis in the cases where simple
+-- delimited ranges are not sufficient.
+local function flanked_range(s, not_inword)
+ local fl_char = lexer.any - s - lexer.space
+ local left_fl = lpeg.B(punct_space - s) * s * #fl_char + s * #(fl_char - lexer.punct)
+ local right_fl = lpeg.B(lexer.punct) * s * #(punct_space - s) + lpeg.B(fl_char) * s
+ return left_fl * (lexer.any - blank_line - (not_inword and s * #punct_space or s))^0 * right_fl
+end
+
+local asterisk_strong = flanked_range('**')
+local underscore_strong = (lpeg.B(punct_space) + #lexer.starts_line('_')) *
+ flanked_range('__', true) * #(punct_space + -1)
+lex:add_rule('strong', token('strong', asterisk_strong + underscore_strong))
+lex:add_style('strong', {bold = true})
+
+local asterisk_em = flanked_range('*')
+local underscore_em = (lpeg.B(punct_space) + #lexer.starts_line('_')) * flanked_range('_', true) *
+ #(punct_space + -1)
+lex:add_rule('em', token('em', asterisk_em + underscore_em))
+lex:add_style('em', {italics = true})
-- Embedded HTML.
-local html = l.load('html')
-local start_rule = token('tag', l.starts_line(S(' \t')^0 * '<'))
-local end_rule = token(l.DEFAULT, P('\n')) -- TODO: l.WHITESPACE causes errors
-l.embed_lexer(M, html, start_rule, end_rule)
+local html = lexer.load('html')
+local start_rule = lexer.starts_line(P(' ')^-3) * #P('<') * html:get_rule('element') -- P(' ')^4 starts code_line
+local end_rule = token(lexer.DEFAULT, blank_line) -- TODO: lexer.WHITESPACE errors
+lex:embed(html, start_rule, end_rule)
-return M
+return lex
diff --git a/lua/lexers/matlab.lua b/lua/lexers/matlab.lua
index caef80d..b4871b3 100644
--- a/lua/lexers/matlab.lua
+++ b/lua/lexers/matlab.lua
@@ -1,105 +1,83 @@
--- Copyright 2006-2017 Martin Morawetz. See LICENSE.
+-- Copyright 2006-2022 Martin Morawetz. See LICENSE.
-- Matlab LPeg lexer.
-- Based off of lexer code by Mitchell.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'matlab'}
+local lex = lexer.new('matlab')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local line_comment = (P('%') + '#') * l.nonnewline^0
-local block_comment = '%{' * (l.any - '%}')^0 * P('%}')^-1
-local comment = token(l.COMMENT, block_comment + line_comment)
-
--- Strings.
-local sq_str = l.delimited_range("'", true)
-local dq_str = l.delimited_range('"')
-local bt_str = l.delimited_range('`')
-local string = token(l.STRING, sq_str + dq_str + bt_str)
-
--- Numbers.
-local number = token(l.NUMBER, l.float + l.integer + l.dec_num + l.hex_num +
- l.oct_num)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match({
- 'break', 'case', 'catch', 'continue', 'do', 'else', 'elseif', 'end',
- 'end_try_catch', 'end_unwind_protect', 'endfor', 'endif', 'endswitch',
- 'endwhile', 'for', 'function', 'endfunction', 'global', 'if', 'otherwise',
- 'persistent', 'replot', 'return', 'static', 'switch', 'try', 'until',
- 'unwind_protect', 'unwind_protect_cleanup', 'varargin', 'varargout', 'while'
-}, nil, true))
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match({
+ 'break', 'case', 'catch', 'continue', 'do', 'else', 'elseif', 'end', 'end_try_catch',
+ 'end_unwind_protect', 'endfor', 'endif', 'endswitch', 'endwhile', 'for', 'function',
+ 'endfunction', 'global', 'if', 'otherwise', 'persistent', 'replot', 'return', 'static', 'switch',
+ 'try', 'until', 'unwind_protect', 'unwind_protect_cleanup', 'varargin', 'varargout', 'while'
+}, true)))
-- Functions.
-local func = token(l.FUNCTION, word_match{
- 'abs', 'any', 'argv','atan2', 'axes', 'axis', 'ceil', 'cla', 'clear', 'clf',
- 'columns', 'cos', 'delete', 'diff', 'disp', 'doc', 'double', 'drawnow', 'exp',
- 'figure', 'find', 'fix', 'floor', 'fprintf', 'gca', 'gcf', 'get', 'grid',
- 'help', 'hist', 'hold', 'isempty', 'isnull', 'length', 'load', 'log', 'log10',
- 'loglog', 'max', 'mean', 'median', 'min', 'mod', 'ndims', 'numel', 'num2str',
- 'ones', 'pause', 'plot', 'printf', 'quit', 'rand', 'randn', 'rectangle',
- 'rem', 'repmat', 'reshape', 'round', 'rows', 'save', 'semilogx', 'semilogy',
- 'set', 'sign', 'sin', 'size', 'sizeof', 'size_equal', 'sort', 'sprintf',
- 'squeeze', 'sqrt', 'std', 'strcmp', 'subplot', 'sum', 'tan', 'tic', 'title',
- 'toc', 'uicontrol', 'who', 'xlabel', 'ylabel', 'zeros'
-})
+lex:add_rule('function', token(lexer.FUNCTION, word_match{
+ 'abs', 'any', 'argvatan2', 'axes', 'axis', 'ceil', 'cla', 'clear', 'clf', 'columns', 'cos',
+ 'delete', 'diff', 'disp', 'doc', 'double', 'drawnow', 'exp', 'figure', 'find', 'fix', 'floor',
+ 'fprintf', 'gca', 'gcf', 'get', 'grid', 'help', 'hist', 'hold', 'isempty', 'isnull', 'length',
+ 'load', 'log', 'log10', 'loglog', 'max', 'mean', 'median', 'min', 'mod', 'ndims', 'numel',
+ 'num2str', 'ones', 'pause', 'plot', 'printf', 'quit', 'rand', 'randn', 'rectangle', 'rem',
+ 'repmat', 'reshape', 'round', 'rows', 'save', 'semilogx', 'semilogy', 'set', 'sign', 'sin',
+ 'size', 'sizeof', 'size_equal', 'sort', 'sprintf', 'squeeze', 'sqrt', 'std', 'strcmp', 'subplot',
+ 'sum', 'tan', 'tic', 'title', 'toc', 'uicontrol', 'who', 'xlabel', 'ylabel', 'zeros'
+}))
-- Constants.
-local constant = token(l.CONSTANT, word_match{
- 'EDITOR', 'I', 'IMAGEPATH', 'INFO_FILE', 'J', 'LOADPATH', 'OCTAVE_VERSION',
- 'PAGER', 'PS1', 'PS2', 'PS4', 'PWD'
-})
+lex:add_rule('constant', token(lexer.CONSTANT, word_match(
+ 'EDITOR I IMAGEPATH INFO_FILE J LOADPATH OCTAVE_VERSION PAGER PS1 PS2 PS4 PWD')))
-- Variable.
-local variable = token(l.VARIABLE, word_match{
+lex:add_rule('variable', token(lexer.VARIABLE, word_match{
'ans', 'automatic_replot', 'default_return_value', 'do_fortran_indexing',
- 'define_all_return_values', 'empty_list_elements_ok', 'eps', 'false',
- 'gnuplot_binary', 'ignore_function_time_stamp', 'implicit_str_to_num_ok',
- 'Inf', 'inf', 'NaN', 'nan', 'ok_to_lose_imaginary_part',
- 'output_max_field_width', 'output_precision', 'page_screen_output', 'pi',
- 'prefer_column_vectors', 'prefer_zero_one_indexing', 'print_answer_id_name',
+ 'define_all_return_values', 'empty_list_elements_ok', 'eps', 'false', 'gnuplot_binary',
+ 'ignore_function_time_stamp', 'implicit_str_to_num_ok', 'Inf', 'inf', 'NaN', 'nan',
+ 'ok_to_lose_imaginary_part', 'output_max_field_width', 'output_precision', 'page_screen_output',
+ 'pi', 'prefer_column_vectors', 'prefer_zero_one_indexing', 'print_answer_id_name',
'print_empty_dimensions', 'realmax', 'realmin', 'resize_on_range_error',
- 'return_last_computed_value', 'save_precision', 'silent_functions',
- 'split_long_rows', 'suppress_verbose_help_message', 'treat_neg_dim_as_zero',
- 'true', 'warn_assign_as_truth_value', 'warn_comma_in_global_decl',
- 'warn_divide_by_zero', 'warn_function_name_clash',
+ 'return_last_computed_value', 'save_precision', 'silent_functions', 'split_long_rows',
+ 'suppress_verbose_help_message', 'treat_neg_dim_as_zero', 'true', 'warn_assign_as_truth_value',
+ 'warn_comma_in_global_decl', 'warn_divide_by_zero', 'warn_function_name_clash',
'whitespace_in_literal_matrix'
-})
+}))
-- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
--- Operators.
-local operator = token(l.OPERATOR, S('!%^&*()[]{}-=+/\\|:;.,?<>~`´'))
+-- Strings.
+local sq_str = lexer.range("'", true)
+local dq_str = lexer.range('"')
+local bq_str = lexer.range('`')
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str + bq_str))
+
+-- Comments.
+local line_comment = lexer.to_eol(S('%#'))
+local block_comment = lexer.range('%{', '%}')
+lex:add_rule('comment', token(lexer.COMMENT, block_comment + line_comment))
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'function', func},
- {'constant', constant},
- {'variable', variable},
- {'identifier', identifier},
- {'string', string},
- {'comment', comment},
- {'number', number},
- {'operator', operator},
-}
+-- Numbers.
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
+
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S('!%^&*()[]{}-=+/\\|:;.,?<>~`´')))
-M._foldsymbols = {
- _patterns = {'[a-z]+', '[%(%)%[%]]', '%%[{}]?', '#'},
- [l.KEYWORD] = {
- ['if'] = 1, ['for'] = 1, ['while'] = 1, switch = 1, ['end'] = -1
- },
- [l.OPERATOR] = {['('] = 1, [')'] = -1, ['['] = 1, [']'] = -1},
- [l.COMMENT] = {
- ['%{'] = 1, ['%}'] = -1, ['%'] = l.fold_line_comments('%'),
- ['#'] = l.fold_line_comments('#')
- }
-}
+-- Fold points.
+lex:add_fold_point(lexer.KEYWORD, 'if', 'end')
+lex:add_fold_point(lexer.KEYWORD, 'for', 'end')
+lex:add_fold_point(lexer.KEYWORD, 'while', 'end')
+lex:add_fold_point(lexer.KEYWORD, 'switch', 'end')
+lex:add_fold_point(lexer.OPERATOR, '(', ')')
+lex:add_fold_point(lexer.OPERATOR, '[', ']')
+lex:add_fold_point(lexer.COMMENT, '%{', '%}')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('%'))
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('#'))
-return M
+return lex
diff --git a/lua/lexers/mediawiki.lua b/lua/lexers/mediawiki.lua
new file mode 100644
index 0000000..f304a7a
--- /dev/null
+++ b/lua/lexers/mediawiki.lua
@@ -0,0 +1,47 @@
+-- Copyright 2006-2022 Mitchell. See LICENSE.
+-- MediaWiki LPeg lexer.
+-- Contributed by Alexander Misel.
+
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S, B = lpeg.P, lpeg.S, lpeg.B
+
+local lex = lexer.new('mediawiki')
+
+-- Comments.
+lex:add_rule('comment', token(lexer.COMMENT, lexer.range('<!--', '-->')))
+
+-- HTML-like tags
+local tag_start = token('tag_start', '<' * P('/')^-1 * lexer.alnum^1 * lexer.space^0)
+local dq_str = '"' * ((lexer.any - S('>"\\')) + ('\\' * lexer.any))^0 * '"'
+local tag_attr = token('tag_attr', lexer.alpha^1 * lexer.space^0 *
+ ('=' * lexer.space^0 * (dq_str + (lexer.any - lexer.space - '>')^0)^-1)^0 * lexer.space^0)
+local tag_end = token('tag_end', P('/')^-1 * '>')
+lex:add_rule('tag', tag_start * tag_attr^0 * tag_end)
+lex:add_style('tag_start', lexer.styles.keyword)
+lex:add_style('tag_attr', lexer.styles.type)
+lex:add_style('tag_end', lexer.styles.keyword)
+
+-- Link
+lex:add_rule('link', token(lexer.STRING, S('[]')))
+lex:add_rule('internal_link', B('[[') * token('link_article', (lexer.any - '|' - ']]')^1))
+lex:add_style('link_article', lexer.styles.string .. {underlined = true})
+
+-- Templates and parser functions.
+lex:add_rule('template', token(lexer.OPERATOR, S('{}')))
+lex:add_rule('parser_func',
+ B('{{') * token('parser_func', '#' * lexer.alpha^1 + lexer.upper^1 * ':'))
+lex:add_rule('template_name', B('{{') * token('template_name', (lexer.any - S('{}|'))^1))
+lex:add_style('parser_func', lexer.styles['function'])
+lex:add_style('template_name', lexer.styles.operator .. {underlined = true})
+
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S('-=|#~!')))
+
+-- Behavior switches
+local start_pat = P(function(_, pos) return pos == 1 end)
+lex:add_rule('behavior_switch', (B(lexer.space) + start_pat) * token('behavior_switch', word_match(
+ '__TOC__ __FORCETOC__ __NOTOC__ __NOEDITSECTION__ __NOCC__ __NOINDEX__')) * #lexer.space)
+lex:add_style('behavior_switch', lexer.styles.keyword)
+
+return lex
diff --git a/lua/lexers/meson.lua b/lua/lexers/meson.lua
index b6cbe15..c224f82 100644
--- a/lua/lexers/meson.lua
+++ b/lua/lexers/meson.lua
@@ -1,36 +1,18 @@
--- Copyright 2020 Florian Fischer. See LICENSE.
+-- Copyright 2020-2022 Florian Fischer. See LICENSE.
-- Meson file LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
local P, R, S = lpeg.P, lpeg.R, lpeg.S
-local M = {_NAME = 'meson'}
+local lex = lexer.new('meson', {fold_by_indentation = true})
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local comment = token(l.COMMENT, '#' * l.nonnewline_esc^0)
-
--- Strings.
-local str = l.delimited_range("'", true)
-local multiline_str = "'''" * (l.any - "'''")^0 * P("'''")^-1
-local string = token(l.STRING, multiline_str + str)
-
--- Numbers.
-local dec = R('19')^1 * R('09')^0
-local bin = '0b' * S('01')^1
-local oct = '0o' * R('07')^1
-local integer = S('+-')^-1 * (bin + l.hex_num + oct + dec)
-local number = token(l.NUMBER, integer)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- 'and', 'or', 'not',
- 'if', 'elif', 'else', 'endif',
- 'foreach', 'break', 'continue', 'endforeach',
-})
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match(
+ 'and or not if elif else endif foreach break continue endforeach')))
-- Methods.
-- https://mesonbuild.com/Reference-manual.html#builtin-objects
@@ -47,40 +29,34 @@ local method_names = word_match{
-- integer --
'is_even', 'is_odd',
-- string --
- 'contains', 'endswith', 'format', 'join', 'split', 'startswith', 'substring',
- 'strip', 'to_int', 'to_lower', 'to_upper', 'underscorify', 'version_compare',
+ 'contains', 'endswith', 'format', 'join', 'split', 'startswith', 'substring', 'strip', 'to_int',
+ 'to_lower', 'to_upper', 'underscorify', 'version_compare',
-- meson object --
- 'add_dist_script', 'add_install_script', 'add_postconf_script', 'backend',
- 'build_root', 'source_root', 'project_build_root', 'project_source_root',
- 'current_build_dir', 'current_source_dir', 'get_compiler',
- 'get_cross_property', 'get_external_property', 'can_run_host_binaries',
- 'has_exe_wrapper', 'install_dependency_manifest', 'is_cross_build',
- 'is_subproject', 'is_unity', 'override_find_program', 'override_dependency',
- 'project_version', 'project_license', 'project_name', 'version',
+ 'add_dist_script', 'add_install_script', 'add_postconf_script', 'backend', 'build_root',
+ 'source_root', 'project_build_root', 'project_source_root', 'current_build_dir',
+ 'current_source_dir', 'get_compiler', 'get_cross_property', 'get_external_property',
+ 'can_run_host_binaries', 'has_exe_wrapper', 'install_dependency_manifest', 'is_cross_build',
+ 'is_subproject', 'is_unity', 'override_find_program', 'override_dependency', 'project_version',
+ 'project_license', 'project_name', 'version',
-- *_machine object --
'cpu_family', 'cpu', 'system', 'endian',
-- compiler object --
- 'alignment', 'cmd_array', 'compiles', 'compute_int', 'find_library',
- 'first_supported_argument', 'first_supported_link_argument', 'get_define',
- 'get_id', 'get_argument_syntax', 'get_linker_id', 'get_supported_arguments',
- 'get_supported_link_arguments', 'has_argument', 'has_link_argument',
- 'has_function', 'check_header', 'has_header', 'has_header_symbol',
- 'has_member', 'has_members', 'has_multi_arguments',
- 'has_multi_link_arguments', 'has_type', 'links', 'run',
- 'symbols_have_underscore_prefix', 'sizeof', 'version',
- 'has_function_attribute', 'get_supported_function_attributes',
+ 'alignment', 'cmd_array', 'compiles', 'compute_int', 'find_library', 'first_supported_argument',
+ 'first_supported_link_argument', 'get_define', 'get_id', 'get_argument_syntax', 'get_linker_id',
+ 'get_supported_arguments', 'get_supported_link_arguments', 'has_argument', 'has_link_argument',
+ 'has_function', 'check_header', 'has_header', 'has_header_symbol', 'has_member', 'has_members',
+ 'has_multi_arguments', 'has_multi_link_arguments', 'has_type', 'links', 'run',
+ 'symbols_have_underscore_prefix', 'sizeof', 'version', 'has_function_attribute',
+ 'get_supported_function_attributes',
-- build target object --
- 'extract_all_objects', 'extract_objects', 'full_path', 'private_dir_include',
- 'name',
+ 'extract_all_objects', 'extract_objects', 'full_path', 'private_dir_include', 'name',
-- configuration data object --
- 'get', 'get_unquoted', 'has', 'keys', 'merge_from', 'set', 'set10',
- 'set_quoted',
+ 'get', 'get_unquoted', 'has', 'keys', 'merge_from', 'set', 'set10', 'set_quoted',
-- custom target object --
'full_path', 'to_list',
-- dependency object --
- 'found', 'name', 'get_pkgconfig_variable', 'get_configtool_variable',
- 'type_name', 'version', 'include_type', 'as_system', 'as_link_whole',
- 'partial_dependency', 'found',
+ 'found', 'name', 'get_pkgconfig_variable', 'get_configtool_variable', 'type_name', 'version',
+ 'include_type', 'as_system', 'as_link_whole', 'partial_dependency', 'found',
-- external program object --
'found', 'path', 'full_path',
-- environment object --
@@ -92,70 +68,59 @@ local method_names = word_match{
-- subproject object --
'found', 'get_variable',
-- run result object --
- 'compiled', 'returncode', 'stderr', 'stdout',
+ 'compiled', 'returncode', 'stderr', 'stdout'
}
-- A method call must be followed by an opening parenthesis.
-local method = token('method', method_names * #P(l.space^0 * S('(')))
+lex:add_rule('method', token('method', method_names * #(lexer.space^0 * '(')))
+lex:add_style('method', lexer.styles['function'])
-- Function.
-- https://mesonbuild.com/Reference-manual.html#functions
-local func_names = word_match{
- 'add_global_arguments', 'add_global_link_arguments', 'add_languages',
- 'add_project_arguments', 'add_project_link_arguments', 'add_test_setup',
- 'alias_targ', 'assert', 'benchmark', 'both_libraries', 'build_target',
- 'configuration_data', 'configure_file', 'custom_target', 'declare_dependency',
- 'dependency', 'disabler', 'error', 'environment', 'executable',
- 'find_library', 'find_program', 'files', 'generator', 'get_option',
- 'get_variable', 'import', 'include_directories', 'install_data',
- 'install_headers', 'install_man', 'install_subdir', 'is_disabler',
- 'is_variable', 'jar', 'join_paths', 'library', 'message', 'warning',
- 'summary', 'project', 'run_command', 'run_targ', 'set_variable',
- 'shared_library', 'shared_module', 'static_library', 'subdir', 'subdir_done',
- 'subproject', 'test', 'vcs_tag',
+local func_names = word_match{
+ 'add_global_arguments', 'add_global_link_arguments', 'add_languages', 'add_project_arguments',
+ 'add_project_link_arguments', 'add_test_setup', 'alias_targ', 'assert', 'benchmark',
+ 'both_libraries', 'build_target', 'configuration_data', 'configure_file', 'custom_target',
+ 'declare_dependency', 'dependency', 'disabler', 'error', 'environment', 'executable',
+ 'find_library', 'find_program', 'files', 'generator', 'get_option', 'get_variable', 'import',
+ 'include_directories', 'install_data', 'install_headers', 'install_man', 'install_subdir',
+ 'is_disabler', 'is_variable', 'jar', 'join_paths', 'library', 'message', 'warning', 'summary',
+ 'project', 'run_command', 'run_targ', 'set_variable', 'shared_library', 'shared_module',
+ 'static_library', 'subdir', 'subdir_done', 'subproject', 'test', 'vcs_tag'
}
--- A function call must be followed by an opening parenthesis.
--- The matching of function calls instead of just their names is needed to not
--- falsely highlight function names which can also be keyword arguments.
--- For example 'include_directories' can be a function call itself or a keyword
--- argument of an 'executable' or 'library' function call.
-local func = token(l.FUNCTION, func_names * #P(l.space^0 * S('(')))
+-- A function call must be followed by an opening parenthesis. The matching of function calls
+-- instead of just their names is needed to not falsely highlight function names which can also
+-- be keyword arguments. For example 'include_directories' can be a function call itself or a
+-- keyword argument of an 'executable' or 'library' function call.
+lex:add_rule('function', token(lexer.FUNCTION, func_names * #(lexer.space^0 * '(')))
-- Builtin objects.
-- https://mesonbuild.com/Reference-manual.html#builtin-objects
-local object = token('object', word_match{
- 'meson', 'build_machine', 'host_machine', 'target_machine',
-})
+lex:add_rule('object',
+ token('object', word_match('meson build_machine host_machine target_machine')))
+lex:add_style('object', lexer.styles.type)
-- Constants.
-local constant = token(l.CONSTANT, word_match{
- 'false', 'true',
-})
+lex:add_rule('constant', token(lexer.CONSTANT, word_match('false true')))
-- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
--- Operators.
-local operator = token(l.OPERATOR, S('()[]{}-=+/%:.,?<>'))
+-- Strings.
+local str = lexer.range("'", true)
+local multiline_str = lexer.range("'''")
+lex:add_rule('string', token(lexer.STRING, multiline_str + str))
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'method', method},
- {'function', func},
- {'object', object},
- {'constant', constant},
- {'identifier', identifier},
- {'comment', comment},
- {'string', string},
- {'number', number},
- {'operator', operator},
-}
+-- Comments.
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#', true)))
-M._tokenstyles = {
- method = l.STYLE_FUNCTION,
- object = l.STYLE_TYPE,
-}
+-- Numbers.
+local dec = R('19')^1 * R('09')^0
+local bin = '0b' * S('01')^1
+local oct = '0o' * R('07')^1
+local integer = S('+-')^-1 * (bin + lexer.hex_num + oct + dec)
+lex:add_rule('number', token(lexer.NUMBER, integer))
-M._FOLDBYINDENTATION = true
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S('()[]{}-=+/%:.,?<>')))
-return M
+return lex
diff --git a/lua/lexers/moonscript.lua b/lua/lexers/moonscript.lua
index c0a00d2..f9bc4dd 100644
--- a/lua/lexers/moonscript.lua
+++ b/lua/lexers/moonscript.lua
@@ -1,82 +1,71 @@
--- Copyright 2016-2017 Alejandro Baez (https://keybase.io/baez). See LICENSE.
+-- Copyright 2016-2022 Alejandro Baez (https://keybase.io/baez). See LICENSE.
-- Moonscript LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, S, R = lpeg.P, lpeg.S, lpeg.R
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'moonscript'}
+local lex = lexer.new('moonscript', {fold_by_indentation = true})
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
+lex:add_rule('whitspace', token(lexer.WHITESPACE, lexer.space^1))
-local longstring = lpeg.Cmt('[' * lpeg.C(P('=')^0) * '[',
- function(input, index, eq)
- local _, e = input:find(']'..eq..']', index, true)
- return (e or #input) + 1
- end)
-
--- Comments.
-local line_comment = '--' * l.nonnewline^0
-local block_comment = '--' * longstring
-local comment = token(l.COMMENT, block_comment + line_comment)
-
--- Strings.
-local sq_str = l.delimited_range("'", false, true)
-local dq_str = l.delimited_range('"', false, true)
-
-local string = token(l.STRING, sq_str + dq_str) +
- token('longstring', longstring)
-
--- Numbers.
-local number = token(l.NUMBER, l.float + l.integer)
+-- Table keys.
+lex:add_rule('tbl_key', token('tbl_key', lexer.word * ':' + ':' * lexer.word))
+lex:add_style('tbl_key', lexer.STYLE_REGEX)
-- Keywords.
-local keyword = token(l.KEYWORD, word_match {
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
-- Lua.
- 'and', 'break', 'do', 'else', 'elseif', 'false', 'for', 'if', 'in', 'local',
- 'nil', 'not', 'or', 'return', 'then', 'true', 'while',
+ 'and', 'break', 'do', 'else', 'elseif', 'false', 'for', 'if', 'in', 'local', 'nil', 'not', 'or',
+ 'return', 'then', 'true', 'while',
-- Moonscript.
- 'continue', 'class', 'export', 'extends', 'from', 'import', 'super', 'switch',
- 'unless', 'using', 'when', 'with'
-})
+ 'continue', 'class', 'export', 'extends', 'from', 'import', 'super', 'switch', 'unless', 'using',
+ 'when', 'with'
+}))
--- Constants.
-local constant = token(l.CONSTANT, word_match{
- '_G', '_VERSION',
- -- Added in 5.2.
- '_ENV'
-})
+-- Error words.
+lex:add_rule('error', token(lexer.ERROR, word_match('function end')))
+
+-- Self reference.
+lex:add_rule('self_ref', token('self_ref', '@' * lexer.word^-1 + 'self'))
+lex:add_style('self_ref', lexer.styles.label)
-- Functions.
-local func = token(l.FUNCTION, word_match{
- 'assert', 'collectgarbage', 'dofile', 'error', 'getmetatable', 'ipairs',
- 'load', 'loadfile', 'next', 'pairs', 'pcall', 'print', 'rawequal', 'rawget',
- 'rawset', 'require', 'select', 'setmetatable', 'tonumber', 'tostring', 'type',
- 'xpcall',
+lex:add_rule('function', token(lexer.FUNCTION, word_match{
+ 'assert', 'collectgarbage', 'dofile', 'error', 'getmetatable', 'ipairs', 'load', 'loadfile',
+ 'next', 'pairs', 'pcall', 'print', 'rawequal', 'rawget', 'rawset', 'require', 'select',
+ 'setmetatable', 'tonumber', 'tostring', 'type', 'xpcall',
-- Added in 5.2.
'rawlen'
-})
+}))
+
+-- Constants.
+lex:add_rule('constant', token(lexer.CONSTANT, word_match{
+ '_G', '_VERSION',
+ -- Added in 5.2.
+ '_ENV'
+}))
-- Libraries.
-local library = token('library', word_match({
+lex:add_rule('library', token('library', word_match{
-- Coroutine.
- 'coroutine', 'coroutine.create', 'coroutine.resume', 'coroutine.running',
- 'coroutine.status', 'coroutine.wrap', 'coroutine.yield',
+ 'coroutine', 'coroutine.create', 'coroutine.resume', 'coroutine.running', 'coroutine.status',
+ 'coroutine.wrap', 'coroutine.yield',
-- Coroutine added in 5.3.
'coroutine.isyieldable',
-- Module.
- 'package', 'package.cpath', 'package.loaded', 'package.loadlib',
- 'package.path', 'package.preload',
+ 'package', 'package.cpath', 'package.loaded', 'package.loadlib', 'package.path',
+ 'package.preload',
-- Module added in 5.2.
'package.config', 'package.searchers', 'package.searchpath',
-- UTF-8 added in 5.3.
- 'utf8', 'utf8.char', 'utf8.charpattern', 'utf8.codepoint', 'utf8.codes',
- 'utf8.len', 'utf8.offset',
+ 'utf8', 'utf8.char', 'utf8.charpattern', 'utf8.codepoint', 'utf8.codes', 'utf8.len',
+ 'utf8.offset',
-- String.
- 'string', 'string.byte', 'string.char', 'string.dump', 'string.find',
- 'string.format', 'string.gmatch', 'string.gsub', 'string.len', 'string.lower',
- 'string.match', 'string.rep', 'string.reverse', 'string.sub', 'string.upper',
+ 'string', 'string.byte', 'string.char', 'string.dump', 'string.find', 'string.format',
+ 'string.gmatch', 'string.gsub', 'string.len', 'string.lower', 'string.match', 'string.rep',
+ 'string.reverse', 'string.sub', 'string.upper',
-- String added in 5.3.
'string.pack', 'string.packsize', 'string.unpack',
-- Table.
@@ -86,29 +75,24 @@ local library = token('library', word_match({
-- Table added in 5.3.
'table.move',
-- Math.
- 'math', 'math.abs', 'math.acos', 'math.asin', 'math.atan', 'math.ceil',
- 'math.cos', 'math.deg', 'math.exp', 'math.floor', 'math.fmod', 'math.huge',
- 'math.log', 'math.max', 'math.min', 'math.modf', 'math.pi', 'math.rad',
- 'math.random', 'math.randomseed', 'math.sin', 'math.sqrt', 'math.tan',
+ 'math', 'math.abs', 'math.acos', 'math.asin', 'math.atan', 'math.ceil', 'math.cos', 'math.deg',
+ 'math.exp', 'math.floor', 'math.fmod', 'math.huge', 'math.log', 'math.max', 'math.min',
+ 'math.modf', 'math.pi', 'math.rad', 'math.random', 'math.randomseed', 'math.sin', 'math.sqrt',
+ 'math.tan',
-- Math added in 5.3.
- 'math.maxinteger', 'math.mininteger', 'math.tointeger', 'math.type',
- 'math.ult',
+ 'math.maxinteger', 'math.mininteger', 'math.tointeger', 'math.type', 'math.ult',
-- IO.
- 'io', 'io.close', 'io.flush', 'io.input', 'io.lines', 'io.open', 'io.output',
- 'io.popen', 'io.read', 'io.stderr', 'io.stdin', 'io.stdout', 'io.tmpfile',
- 'io.type', 'io.write',
+ 'io', 'io.close', 'io.flush', 'io.input', 'io.lines', 'io.open', 'io.output', 'io.popen',
+ 'io.read', 'io.stderr', 'io.stdin', 'io.stdout', 'io.tmpfile', 'io.type', 'io.write',
-- OS.
- 'os', 'os.clock', 'os.date', 'os.difftime', 'os.execute', 'os.exit',
- 'os.getenv', 'os.remove', 'os.rename', 'os.setlocale', 'os.time',
- 'os.tmpname',
+ 'os', 'os.clock', 'os.date', 'os.difftime', 'os.execute', 'os.exit', 'os.getenv', 'os.remove',
+ 'os.rename', 'os.setlocale', 'os.time', 'os.tmpname',
-- Debug.
- 'debug', 'debug.debug', 'debug.gethook', 'debug.getinfo', 'debug.getlocal',
- 'debug.getmetatable', 'debug.getregistry', 'debug.getupvalue',
- 'debug.sethook', 'debug.setlocal', 'debug.setmetatable', 'debug.setupvalue',
- 'debug.traceback',
+ 'debug', 'debug.debug', 'debug.gethook', 'debug.getinfo', 'debug.getlocal', 'debug.getmetatable',
+ 'debug.getregistry', 'debug.getupvalue', 'debug.sethook', 'debug.setlocal', 'debug.setmetatable',
+ 'debug.setupvalue', 'debug.traceback',
-- Debug added in 5.2.
- 'debug.getuservalue', 'debug.setuservalue', 'debug.upvalueid',
- 'debug.upvaluejoin',
+ 'debug.getuservalue', 'debug.setuservalue', 'debug.upvalueid', 'debug.upvaluejoin',
--- MoonScript 0.3.1 standard library.
-- Printing functions.
@@ -120,51 +104,41 @@ local library = token('library', word_match({
-- Misc functions.
'fold',
-- Debug functions.
- 'debug.upvalue',
-}, '.'))
+ 'debug.upvalue'
+}))
+lex:add_style('library', lexer.styles.type)
-- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
-local proper_ident = token('proper_ident', R('AZ') * l.word)
-local tbl_key = token('tbl_key', l.word * ':' + ':' * l.word )
+local identifier = token(lexer.IDENTIFIER, lexer.word)
+local proper_ident = token('proper_ident', lexer.upper * lexer.word)
+lex:add_rule('identifier', proper_ident + identifier)
+lex:add_style('proper_ident', lexer.styles.class)
-local fndef = token('fndef', P('->') + '=>')
-local err = token(l.ERROR, word_match{'function', 'end'})
+-- Strings.
+local sq_str = lexer.range("'", false, false)
+local dq_str = lexer.range('"', false, false)
+local longstring = lpeg.Cmt('[' * lpeg.C(P('=')^0) * '[', function(input, index, eq)
+ local _, e = input:find(']' .. eq .. ']', index, true)
+ return (e or #input) + 1
+end)
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str) + token('longstring', longstring))
+lex:add_style('longstring', lexer.styles.string)
+
+-- Comments.
+local line_comment = lexer.to_eol('--')
+local block_comment = '--' * longstring
+lex:add_rule('comment', token(lexer.COMMENT, block_comment + line_comment))
+
+-- Numbers.
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
+
+-- Function definition.
+lex:add_rule('fndef', token('fndef', P('->') + '=>'))
+lex:add_style('fndef', lexer.styles.preprocessor)
-- Operators.
-local symbol = token('symbol', S('(){}[]'))
-local operator = token(l.OPERATOR, S('+-*!\\/%^#=<>;:,.'))
+lex:add_rule('operator', token(lexer.OPERATOR, S('+-*!\\/%^#=<>;:,.')))
+lex:add_rule('symbol', token('symbol', S('(){}[]')))
+lex:add_style('symbol', lexer.styles.embedded)
--- Self reference.
-local self_var = token('self_ref', '@' * l.word + 'self')
-
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'error', err},
- {'self', self_var},
- {'function', func},
- {'constant', constant},
- {'library', library},
- {'identifier', proper_ident + tbl_key + identifier},
- {'string', string},
- {'comment', comment},
- {'number', number},
- {'fndef', fndef},
- {'symbol', symbol},
- {'operator', operator},
-}
-
-M._tokenstyles = {
- longstring = l.STYLE_STRING,
- library = l.STYLE_TYPE,
- self_ref = l.STYLE_LABEL,
- proper_ident = l.STYLE_CLASS,
- fndef = l.STYLE_PREPROCESSOR,
- symbol = l.STYLE_EMBEDDED,
- tbl_key = l.STYLE_REGEX,
-}
-
-M._FOLDBYINDENTATION = true
-
-return M
+return lex
diff --git a/lua/lexers/myrddin.lua b/lua/lexers/myrddin.lua
index a5b9b18..4b7f559 100644
--- a/lua/lexers/myrddin.lua
+++ b/lua/lexers/myrddin.lua
@@ -1,72 +1,52 @@
--- Copyright 2017 Michael Forney. See LICENSE
+-- Copyright 2017-2022 Michael Forney. See LICENSE
-- Myrddin LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'myrddin'}
+local lex = lexer.new('myrddin')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
+
+-- Keywords.
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'break', 'const', 'continue', 'elif', 'else', 'extern', 'false', 'for', 'generic', 'goto', 'if',
+ 'impl', 'in', 'match', 'pkg', 'pkglocal', 'sizeof', 'struct', 'trait', 'true', 'type', 'union',
+ 'use', 'var', 'while'
+}))
+
+-- Types.
+lex:add_rule('type', token(lexer.TYPE, word_match{
+ 'void', 'bool', 'char', 'byte', 'int', 'uint', 'int8', 'uint8', 'int16', 'uint16', 'int32',
+ 'uint32', 'int64', 'uint64', 'flt32', 'flt64'
+} + '@' * lexer.word))
+
+-- Identifiers.
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Comments.
-local line_comment = '//' * l.nonnewline_esc^0
-local block_comment = P{
- V'part' * P'*/'^-1,
- part = '/*' * (V'full' + (l.any - '/*' - '*/'))^0,
- full = V'part' * '*/',
-}
-local comment = token(l.COMMENT, line_comment + block_comment)
+local line_comment = lexer.to_eol('//', true)
+local block_comment = lexer.range('/*', '*/', false, false, true)
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-- Strings.
-local sq_str = l.delimited_range("'", true)
-local dq_str = l.delimited_range('"', true)
-local string = token(l.STRING, sq_str + dq_str)
+local sq_str = lexer.range("'", true)
+local dq_str = lexer.range('"', true)
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
-- Numbers.
-local digit = l.digit + '_'
-local bdigit = R'01' + '_'
-local xdigit = l.xdigit + '_'
-local odigit = R'07' + '_'
+local digit = lexer.digit + '_'
+local bdigit = S('01') + '_'
+local xdigit = lexer.xdigit + '_'
+local odigit = lpeg.R('07') + '_'
local integer = '0x' * xdigit^1 + '0o' * odigit^1 + '0b' * bdigit^1 + digit^1
-local float = digit^1 * (('.' * digit^1) * (S'eE' * S'+-'^-1 * digit^1)^-1 +
- ('.' * digit^1)^-1 * S'eE' * S'+-'^-1 * digit^1)
-local number = token(l.NUMBER, float + integer)
-
--- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- 'break', 'const', 'continue', 'elif', 'else', 'extern', 'false', 'for',
- 'generic', 'goto', 'if', 'impl', 'in', 'match', 'pkg', 'pkglocal', 'sizeof',
- 'struct', 'trait', 'true', 'type', 'union', 'use', 'var', 'while',
-})
-
--- Types.
-local type = token(l.TYPE, word_match{
- 'void', 'bool', 'char', 'byte',
- 'int8', 'uint8',
- 'int16', 'uint16',
- 'int32', 'uint32',
- 'int64', 'uint64',
- 'int', 'uint',
- 'flt32', 'flt64',
-} + '@' * l.word)
-
--- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+local float = digit^1 * ((('.' * digit^1) * (S('eE') * S('+-')^-1 * digit^1)^-1) +
+ (('.' * digit^1)^-1 * S('eE') * S('+-')^-1 * digit^1))
+lex:add_rule('number', token(lexer.NUMBER, float + integer))
-- Operators.
-local operator = token(l.OPERATOR, S'`#_+-/*%<>~!=^&|~:;,.()[]{}')
-
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'type', type},
- {'identifier', identifier},
- {'string', string},
- {'comment', comment},
- {'number', number},
- {'operator', operator},
-}
+lex:add_rule('operator', token(lexer.OPERATOR, S('`#_+-/*%<>~!=^&|~:;,.()[]{}')))
-return M
+return lex
diff --git a/lua/lexers/nemerle.lua b/lua/lexers/nemerle.lua
index ee7c38a..96feb51 100644
--- a/lua/lexers/nemerle.lua
+++ b/lua/lexers/nemerle.lua
@@ -1,81 +1,65 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- Nemerle LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'nemerle'}
+local lex = lexer.new('nemerle')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local line_comment = '//' * l.nonnewline_esc^0
-local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1
-local comment = token(l.COMMENT, line_comment + block_comment)
-
--- Strings.
-local sq_str = P('L')^-1 * l.delimited_range("'", true)
-local dq_str = P('L')^-1 * l.delimited_range('"', true)
-local string = token(l.STRING, sq_str + dq_str)
-
--- Numbers.
-local number = token(l.NUMBER, l.float + l.integer)
-
--- Preprocessor.
-local preproc_word = word_match{
- 'define', 'elif', 'else', 'endif', 'endregion', 'error', 'if', 'ifdef',
- 'ifndef', 'line', 'pragma', 'region', 'undef', 'using', 'warning'
-}
-local preproc = token(l.PREPROCESSOR,
- l.starts_line('#') * S('\t ')^0 * preproc_word)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- '_', 'abstract', 'and', 'array', 'as', 'base', 'catch', 'class', 'def', 'do',
- 'else', 'extends', 'extern', 'finally', 'foreach', 'for', 'fun', 'if',
- 'implements', 'in', 'interface', 'internal', 'lock', 'macro', 'match',
- 'module', 'mutable', 'namespace', 'new', 'out', 'override', 'params',
- 'private', 'protected', 'public', 'ref', 'repeat', 'sealed', 'static',
- 'struct', 'syntax', 'this', 'throw', 'try', 'type', 'typeof', 'unless',
- 'until', 'using', 'variant', 'virtual', 'when', 'where', 'while',
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ '_', 'abstract', 'and', 'array', 'as', 'base', 'catch', 'class', 'def', 'do', 'else', 'extends',
+ 'extern', 'finally', 'foreach', 'for', 'fun', 'if', 'implements', 'in', 'interface', 'internal',
+ 'lock', 'macro', 'match', 'module', 'mutable', 'namespace', 'new', 'out', 'override', 'params',
+ 'private', 'protected', 'public', 'ref', 'repeat', 'sealed', 'static', 'struct', 'syntax', 'this',
+ 'throw', 'try', 'type', 'typeof', 'unless', 'until', 'using', 'variant', 'virtual', 'when',
+ 'where', 'while',
-- Values.
'null', 'true', 'false'
-})
+}))
-- Types.
-local type = token(l.TYPE, word_match{
- 'bool', 'byte', 'char', 'decimal', 'double', 'float', 'int', 'list', 'long',
- 'object', 'sbyte', 'short', 'string', 'uint', 'ulong', 'ushort', 'void'
-})
+lex:add_rule('type', token(lexer.TYPE, word_match{
+ 'bool', 'byte', 'char', 'decimal', 'double', 'float', 'int', 'list', 'long', 'object', 'sbyte',
+ 'short', 'string', 'uint', 'ulong', 'ushort', 'void'
+}))
+
+-- Strings.
+local sq_str = P('L')^-1 * lexer.range("'", true)
+local dq_str = P('L')^-1 * lexer.range('"', true)
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
-- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
--- Operators.
-local operator = token(l.OPERATOR, S('+-/*%<>!=^&|?~:;.()[]{}'))
+-- Comments.
+local line_comment = lexer.to_eol('//', true)
+local block_comment = lexer.range('/*', '*/')
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
+
+-- Numbers.
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
+
+-- Preprocessor.
+lex:add_rule('preproc', token(lexer.PREPROCESSOR, lexer.starts_line('#') * S('\t ')^0 * word_match{
+ 'define', 'elif', 'else', 'endif', 'endregion', 'error', 'if', 'ifdef', 'ifndef', 'line',
+ 'pragma', 'region', 'undef', 'using', 'warning'
+}))
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'type', type},
- {'identifier', identifier},
- {'string', string},
- {'comment', comment},
- {'number', number},
- {'preproc', preproc},
- {'operator', operator},
-}
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>!=^&|?~:;.()[]{}')))
-M._foldsymbols = {
- _patterns = {'%l+', '[{}]', '/%*', '%*/', '//'},
- [l.PREPROCESSOR] = {
- region = 1, endregion = -1,
- ['if'] = 1, ifdef = 1, ifndef = 1, endif = -1
- },
- [l.OPERATOR] = {['{'] = 1, ['}'] = -1},
- [l.COMMENT] = {['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//')}
-}
+-- Fold points.
+lex:add_fold_point(lexer.PREPROCESSOR, 'region', 'endregion')
+lex:add_fold_point(lexer.PREPROCESSOR, 'if', 'endif')
+lex:add_fold_point(lexer.PREPROCESSOR, 'ifdef', 'endif')
+lex:add_fold_point(lexer.PREPROCESSOR, 'ifndef', 'endif')
+lex:add_fold_point(lexer.OPERATOR, '{', '}')
+lex:add_fold_point(lexer.COMMENT, '/*', '*/')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('//'))
-return M
+return lex
diff --git a/lua/lexers/networkd.lua b/lua/lexers/networkd.lua
index 3f27bae..ff9af93 100644
--- a/lua/lexers/networkd.lua
+++ b/lua/lexers/networkd.lua
@@ -1,274 +1,101 @@
--- Copyright 2016 Christian Hesse
+-- Copyright 2016-2022 Christian Hesse. See LICENSE.
-- systemd networkd file LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'networkd'}
+local lex = lexer.new('networkd', {lex_by_line = true})
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local comment = token(l.COMMENT, l.starts_line(S(';#')) * l.nonnewline^0)
-
--- Strings.
-local sq_str = l.delimited_range("'")
-local dq_str = l.delimited_range('"')
-local section_word = word_match{
- 'Address',
- 'Link',
- 'Match',
- 'Network',
- 'Route',
- 'DHCP',
- 'DHCPServer',
- 'Bridge',
- 'BridgeFDB',
- 'NetDev',
- 'VLAN',
- 'MACVLAN',
- 'MACVTAP',
- 'IPVLAN',
- 'VXLAN',
- 'Tunnel',
- 'Peer',
- 'Tun',
- 'Tap',
- 'Bond'
-}
-local string = token(l.STRING, sq_str + dq_str + '[' * section_word * ']')
-
--- Numbers.
-local dec = l.digit^1 * ('_' * l.digit^1)^0
-local oct_num = '0' * S('01234567_')^1
-local integer = S('+-')^-1 * (l.hex_num + oct_num + dec)
-local number = token(l.NUMBER, (l.float + integer))
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- -- boolean values
- 'true',
- 'false',
- 'on',
- 'off',
- 'yes',
- 'no',
-})
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ -- Boolean values.
+ 'true', 'false', 'on', 'off', 'yes', 'no'
+}))
-- Options.
-local option_word = word_match{
- -- match section options
- 'MACAddress',
- 'OriginalName',
- 'Path',
- 'Driver',
- 'Type',
- 'Host',
- 'Name',
- 'Virtualization',
- 'KernelCommandLine',
- 'Architecture',
-
- -- link section options
- 'Description',
- 'Alias',
- 'MACAddressPolicy',
- 'MACAddress',
- 'NamePolicy',
- 'Name',
- 'MTUBytes',
- 'BitsPerSecond',
- 'Duplex',
- 'WakeOnLan',
-
- -- network section options
- 'Description',
- 'DHCP',
- 'DHCPServer',
- 'LinkLocalAddressing',
- 'IPv4LLRoute',
- 'IPv6Token',
- 'LLMNR',
- 'MulticastDNS',
- 'DNSSEC',
- 'DNSSECNegativeTrustAnchors',
- 'LLDP',
- 'BindCarrier',
- 'Address',
- 'Gateway',
- 'DNS',
- 'Domains',
- 'NTP',
- 'IPForward',
- 'IPMasquerade',
- 'IPv6PrivacyExtensions',
- 'IPv6AcceptRouterAdvertisements',
- 'IPv6DuplicateAddressDetection',
- 'IPv6HopLimit',
- 'Bridge',
- 'Bond',
- 'VLAN',
- 'MACVLAN',
- 'VXLAN',
- 'Tunnel',
-
- -- address section options
- 'Address',
- 'Peer',
- 'Broadcast',
- 'Label',
-
- -- route section options
- 'Gateway',
- 'Destination',
- 'Source',
- 'Metric',
- 'Scope',
- 'PreferredSource',
-
- -- dhcp section options
- 'UseDNS',
- 'UseNTP',
- 'UseMTU',
- 'SendHostname',
- 'UseHostname',
- 'Hostname',
- 'UseDomains',
- 'UseRoutes',
- 'UseTimezone',
- 'CriticalConnection',
- 'ClientIdentifier',
- 'VendorClassIdentifier',
- 'RequestBroadcast',
- 'RouteMetric',
-
- -- dhcpserver section options
- 'PoolOffset',
- 'PoolSize',
- 'DefaultLeaseTimeSec',
- 'MaxLeaseTimeSec',
- 'EmitDNS',
- 'DNS',
- 'EmitNTP',
- 'NTP',
- 'EmitTimezone',
- 'Timezone',
-
- -- bridge section options
- 'UnicastFlood',
- 'HairPin',
- 'UseBPDU',
- 'FastLeave',
- 'AllowPortToBeRoot',
- 'Cost',
-
- -- bridgefdb section options
- 'MACAddress',
- 'VLANId',
-
- -- netdev section options
- 'Description',
- 'Name',
- 'Kind',
- 'MTUBytes',
- 'MACAddress',
-
- -- bridge (netdev) section options
- 'HelloTimeSec',
- 'MaxAgeSec',
- 'ForwardDelaySec',
-
- -- vlan section options
+lex:add_rule('option', token(lexer.PREPROCESSOR, word_match{
+ -- Match section.
+ 'MACAddress', 'OriginalName', 'Path', 'Driver', 'Type', 'Host', 'Name', 'Virtualization',
+ 'KernelCommandLine', 'Architecture',
+ -- Link section.
+ 'Description', 'Alias', 'MACAddressPolicy', 'MACAddress', 'NamePolicy', 'Name', 'MTUBytes',
+ 'BitsPerSecond', 'Duplex', 'WakeOnLan',
+ -- Network section.
+ 'Description', 'DHCP', 'DHCPServer', 'LinkLocalAddressing', 'IPv4LLRoute', 'IPv6Token', 'LLMNR',
+ 'MulticastDNS', 'DNSSEC', 'DNSSECNegativeTrustAnchors', 'LLDP', 'BindCarrier', 'Address',
+ 'Gateway', 'DNS', 'Domains', 'NTP', 'IPForward', 'IPMasquerade', 'IPv6PrivacyExtensions',
+ 'IPv6AcceptRouterAdvertisements', 'IPv6DuplicateAddressDetection', 'IPv6HopLimit', 'Bridge',
+ 'Bond', 'VLAN', 'MACVLAN', 'VXLAN', 'Tunnel',
+ -- Address section.
+ 'Address', 'Peer', 'Broadcast', 'Label',
+ -- Route section.
+ 'Gateway', 'Destination', 'Source', 'Metric', 'Scope', 'PreferredSource',
+ -- DHCP section.
+ 'UseDNS', 'UseNTP', 'UseMTU', 'SendHostname', 'UseHostname', 'Hostname', 'UseDomains',
+ 'UseRoutes', 'UseTimezone', 'CriticalConnection', 'ClientIdentifier', 'VendorClassIdentifier',
+ 'RequestBroadcast', 'RouteMetric',
+ -- DHCPServer section.
+ 'PoolOffset', 'PoolSize', 'DefaultLeaseTimeSec', 'MaxLeaseTimeSec', 'EmitDNS', 'DNS', 'EmitNTP',
+ 'NTP', 'EmitTimezone', 'Timezone',
+ -- Bridge section.
+ 'UnicastFlood', 'HairPin', 'UseBPDU', 'FastLeave', 'AllowPortToBeRoot', 'Cost',
+ -- BridgeFDP section.
+ 'MACAddress', 'VLANId',
+ -- NetDev section.
+ 'Description', 'Name', 'Kind', 'MTUBytes', 'MACAddress',
+ -- Bridge (netdev) section.
+ 'HelloTimeSec', 'MaxAgeSec', 'ForwardDelaySec',
+ -- VLAN section.
'Id',
-
- -- macvlan, macvtap and ipvlan section options
+ -- MACVLAN MACVTAP and IPVLAN section.
'Mode',
-
- -- vxlan section options
- 'Id',
- 'Group',
- 'TOS',
- 'TTL',
- 'MacLearning',
- 'FDBAgeingSec',
- 'MaximumFDBEntries',
- 'ARPProxy',
- 'L2MissNotification',
- 'L3MissNotification',
- 'RouteShortCircuit',
- 'UDPCheckSum',
- 'UDP6ZeroChecksumTx',
- 'UDP6ZeroCheckSumRx',
- 'GroupPolicyExtension',
- 'DestinationPort',
+ -- VXLAN section.
+ 'Id', 'Group', 'TOS', 'TTL', 'MacLearning', 'FDBAgeingSec', 'MaximumFDBEntries', 'ARPProxy',
+ 'L2MissNotification', 'L3MissNotification', 'RouteShortCircuit', 'UDPCheckSum',
+ 'UDP6ZeroChecksumTx', 'UDP6ZeroCheckSumRx', 'GroupPolicyExtension', 'DestinationPort',
'PortRange',
+ -- Tunnel section.
+ 'Local', 'Remote', 'TOS', 'TTL', 'DiscoverPathMTU', 'IPv6FlowLabel', 'CopyDSCP',
+ 'EncapsulationLimit', 'Mode',
+ -- Peer section.
+ 'Name', 'MACAddress',
+ -- Tun and Tap section.
+ 'OneQueue', 'MultiQueue', 'PacketInfo', 'VNetHeader', 'User', 'Group',
+ -- Bond section.
+ 'Mode', 'TransmitHashPolicy', 'LACPTransmitRate', 'MIIMonitorSec', 'UpDelaySec', 'DownDelaySec',
+ 'LearnPacketIntervalSec', 'AdSelect', 'FailOverMACPolicy', 'ARPValidate', 'ARPIntervalSec',
+ 'ARPIPTargets', 'ARPAllTargets', 'PrimaryReselectPolicy', 'ResendIGMP', 'PacketsPerSlave',
+ 'GratuitousARP', 'AllSlavesActive', 'MinLinks'
+}))
- -- tunnel section options
- 'Local',
- 'Remote',
- 'TOS',
- 'TTL',
- 'DiscoverPathMTU',
- 'IPv6FlowLabel',
- 'CopyDSCP',
- 'EncapsulationLimit',
- 'Mode',
+-- Identifiers.
+lex:add_rule('identifier', token(lexer.IDENTIFIER, (lexer.alpha + '_') * (lexer.alnum + S('_.'))^0))
- -- peer section options
- 'Name',
- 'MACAddress',
+-- Strings.
+local sq_str = lexer.range("'")
+local dq_str = lexer.range('"')
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
- -- tun and tap section options
- 'OneQueue',
- 'MultiQueue',
- 'PacketInfo',
- 'VNetHeader',
- 'User',
- 'Group',
+-- Sections.
+lex:add_rule('section', token(lexer.LABEL, '[' * word_match{
+ 'Address', 'Link', 'Match', 'Network', 'Route', 'DHCP', 'DHCPServer', 'Bridge', 'BridgeFDB',
+ 'NetDev', 'VLAN', 'MACVLAN', 'MACVTAP', 'IPVLAN', 'VXLAN', 'Tunnel', 'Peer', 'Tun', 'Tap', 'Bond'
+} * ']'))
- -- bond section options
- 'Mode',
- 'TransmitHashPolicy',
- 'LACPTransmitRate',
- 'MIIMonitorSec',
- 'UpDelaySec',
- 'DownDelaySec',
- 'LearnPacketIntervalSec',
- 'AdSelect',
- 'FailOverMACPolicy',
- 'ARPValidate',
- 'ARPIntervalSec',
- 'ARPIPTargets',
- 'ARPAllTargets',
- 'PrimaryReselectPolicy',
- 'ResendIGMP',
- 'PacketsPerSlave',
- 'GratuitousARP',
- 'AllSlavesActive',
- 'MinLinks',
-}
-local preproc = token(l.PREPROCESSOR, option_word)
+-- Comments.
+lex:add_rule('comment', token(lexer.COMMENT, lexer.starts_line(lexer.to_eol(S(';#')))))
--- Identifiers.
-local word = (l.alpha + '_') * (l.alnum + S('_.'))^0
-local identifier = token(l.IDENTIFIER, word)
+-- Numbers.
+local dec = lexer.digit^1 * ('_' * lexer.digit^1)^0
+local oct_num = '0' * S('01234567_')^1
+local integer = S('+-')^-1 * (lexer.hex_num + oct_num + dec)
+lex:add_rule('number', token(lexer.NUMBER, lexer.float + integer))
-- Operators.
-local operator = token(l.OPERATOR, '=')
-
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'string', string},
- {'preproc', preproc},
- {'identifier', identifier},
- {'comment', comment},
- {'number', number},
- {'operator', operator},
-}
-
-M._LEXBYLINE = true
+lex:add_rule('operator', token(lexer.OPERATOR, '='))
-return M
+return lex
diff --git a/lua/lexers/nim.lua b/lua/lexers/nim.lua
index d99ef19..af333bb 100644
--- a/lua/lexers/nim.lua
+++ b/lua/lexers/nim.lua
@@ -1,124 +1,102 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- Nim LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'nim'}
+local lex = lexer.new('nim', {fold_by_indentation = true})
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local comment = token(l.COMMENT, '#' * l.nonnewline_esc^0)
-
--- Strings.
-local sq_str = l.delimited_range("'", true)
-local dq_str = l.delimited_range('"', true)
-local triple_dq_str = '"""' * (l.any - '"""')^0 * P('"""')^-1
-local raw_dq_str = 'r' * l.delimited_range('"', false, true)
-local string = token(l.STRING, triple_dq_str + sq_str + dq_str + raw_dq_str)
-
--- Numbers.
-local dec = l.digit^1 * ('_' * l.digit^1)^0
-local hex = '0' * S('xX') * l.xdigit^1 * ('_' * l.xdigit^1)^0
-local bin = '0' * S('bB') * S('01')^1 * ('_' * S('01')^1)^0
-local oct = '0o' * R('07')^1
-local integer = S('+-')^-1 * (bin + hex + oct + dec) *
- ("'" * S('iIuUfF') * (P('8') + '16' + '32' + '64'))^-1
-local float = l.digit^1 * ('_' * l.digit^1)^0 * ('.' * ('_' * l.digit)^0)^-1 *
- S('eE') * S('+-')^-1 * l.digit^1 * ('_' * l.digit^1)^0
-local number = token(l.NUMBER, l.float + integer)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match({
- 'addr', 'and', 'as', 'asm', 'atomic', 'bind', 'block', 'break', 'case',
- 'cast', 'const', 'continue', 'converter', 'discard', 'distinct', 'div', 'do',
- 'elif', 'else', 'end', 'enum', 'except', 'export', 'finally', 'for', 'from',
- 'generic', 'if', 'import', 'in', 'include', 'interface', 'is', 'isnot',
- 'iterator', 'lambda', 'let', 'macro', 'method', 'mixin', 'mod', 'nil', 'not',
- 'notin', 'object', 'of', 'or', 'out', 'proc', 'ptr', 'raise', 'ref', 'return',
- 'shared', 'shl', 'static', 'template', 'try', 'tuple', 'type', 'var', 'when',
- 'while', 'with', 'without', 'xor', 'yield'
-}, nil, true))
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match({
+ 'addr', 'and', 'as', 'asm', 'atomic', 'bind', 'block', 'break', 'case', 'cast', 'const',
+ 'continue', 'converter', 'discard', 'distinct', 'div', 'do', 'elif', 'else', 'end', 'enum',
+ 'except', 'export', 'finally', 'for', 'from', 'generic', 'if', 'import', 'in', 'include',
+ 'interface', 'is', 'isnot', 'iterator', 'lambda', 'let', 'macro', 'method', 'mixin', 'mod', 'nil',
+ 'not', 'notin', 'object', 'of', 'or', 'out', 'proc', 'ptr', 'raise', 'ref', 'return', 'shared',
+ 'shl', 'static', 'template', 'try', 'tuple', 'type', 'var', 'when', 'while', 'with', 'without',
+ 'xor', 'yield'
+}, true)))
-- Functions.
-local func = token(l.FUNCTION, word_match({
+lex:add_rule('function', token(lexer.FUNCTION, word_match({
-- Procs.
- 'defined', 'definedInScope', 'new', 'unsafeNew', 'internalNew', 'reset',
- 'high', 'low', 'sizeof', 'succ', 'pred', 'inc', 'dec', 'newSeq', 'len',
- 'incl', 'excl', 'card', 'ord', 'chr', 'ze', 'ze64', 'toU8', 'toU16', 'toU32',
- 'abs', 'min', 'max', 'contains', 'cmp', 'setLen', 'newString',
- 'newStringOfCap', 'add', 'compileOption', 'quit', 'shallowCopy', 'del',
- 'delete', 'insert', 'repr', 'toFloat', 'toBiggestFloat', 'toInt',
- 'toBiggestInt', 'addQuitProc', 'substr', 'zeroMem', 'copyMem', 'moveMem',
- 'equalMem', 'swap', 'getRefcount', 'clamp', 'isNil', 'find', 'contains',
- 'pop', 'each', 'map', 'GC_ref', 'GC_unref', 'echo', 'debugEcho',
- 'getTypeInfo', 'Open', 'repopen', 'Close', 'EndOfFile', 'readChar',
- 'FlushFile', 'readAll', 'readFile', 'writeFile', 'write', 'readLine',
- 'writeln', 'getFileSize', 'ReadBytes', 'ReadChars', 'readBuffer',
- 'writeBytes', 'writeChars', 'writeBuffer', 'setFilePos', 'getFilePos',
- 'fileHandle', 'cstringArrayToSeq', 'allocCStringArray', 'deallocCStringArray',
- 'atomicInc', 'atomicDec', 'compareAndSwap', 'setControlCHook',
- 'writeStackTrace', 'getStackTrace', 'alloc', 'alloc0', 'dealloc', 'realloc',
- 'getFreeMem', 'getTotalMem', 'getOccupiedMem', 'allocShared', 'allocShared0',
- 'deallocShared', 'reallocShared', 'IsOnStack', 'GC_addCycleRoot',
- 'GC_disable', 'GC_enable', 'GC_setStrategy', 'GC_enableMarkAndSweep',
- 'GC_disableMarkAndSweep', 'GC_fullCollect', 'GC_getStatistics',
- 'nimDestroyRange', 'getCurrentException', 'getCurrentExceptionMsg', 'onRaise',
- 'likely', 'unlikely', 'rawProc', 'rawEnv', 'finished', 'slurp', 'staticRead',
- 'gorge', 'staticExec', 'rand', 'astToStr', 'InstatiationInfo', 'raiseAssert',
- 'shallow', 'compiles', 'safeAdd', 'locals',
+ 'defined', 'definedInScope', 'new', 'unsafeNew', 'internalNew', 'reset', 'high', 'low', 'sizeof',
+ 'succ', 'pred', 'inc', 'dec', 'newSeq', 'len', 'incl', 'excl', 'card', 'ord', 'chr', 'ze', 'ze64',
+ 'toU8', 'toU16', 'toU32', 'abs', 'min', 'max', 'contains', 'cmp', 'setLen', 'newString',
+ 'newStringOfCap', 'add', 'compileOption', 'quit', 'shallowCopy', 'del', 'delete', 'insert',
+ 'repr', 'toFloat', 'toBiggestFloat', 'toInt', 'toBiggestInt', 'addQuitProc', 'substr', 'zeroMem',
+ 'copyMem', 'moveMem', 'equalMem', 'swap', 'getRefcount', 'clamp', 'isNil', 'find', 'contains',
+ 'pop', 'each', 'map', 'GC_ref', 'GC_unref', 'echo', 'debugEcho', 'getTypeInfo', 'Open', 'repopen',
+ 'Close', 'EndOfFile', 'readChar', 'FlushFile', 'readAll', 'readFile', 'writeFile', 'write',
+ 'readLine', 'writeln', 'getFileSize', 'ReadBytes', 'ReadChars', 'readBuffer', 'writeBytes',
+ 'writeChars', 'writeBuffer', 'setFilePos', 'getFilePos', 'fileHandle', 'cstringArrayToSeq',
+ 'allocCStringArray', 'deallocCStringArray', 'atomicInc', 'atomicDec', 'compareAndSwap',
+ 'setControlCHook', 'writeStackTrace', 'getStackTrace', 'alloc', 'alloc0', 'dealloc', 'realloc',
+ 'getFreeMem', 'getTotalMem', 'getOccupiedMem', 'allocShared', 'allocShared0', 'deallocShared',
+ 'reallocShared', 'IsOnStack', 'GC_addCycleRoot', 'GC_disable', 'GC_enable', 'GC_setStrategy',
+ 'GC_enableMarkAndSweep', 'GC_disableMarkAndSweep', 'GC_fullCollect', 'GC_getStatistics',
+ 'nimDestroyRange', 'getCurrentException', 'getCurrentExceptionMsg', 'onRaise', 'likely',
+ 'unlikely', 'rawProc', 'rawEnv', 'finished', 'slurp', 'staticRead', 'gorge', 'staticExec', 'rand',
+ 'astToStr', 'InstatiationInfo', 'raiseAssert', 'shallow', 'compiles', 'safeAdd', 'locals',
-- Iterators.
'countdown', 'countup', 'items', 'pairs', 'fields', 'fieldPairs', 'lines',
-- Templates.
- 'accumulateResult', 'newException', 'CurrentSourcePath', 'assert', 'doAssert',
- 'onFailedAssert', 'eval',
+ 'accumulateResult', 'newException', 'CurrentSourcePath', 'assert', 'doAssert', 'onFailedAssert',
+ 'eval',
-- Threads.
- 'running', 'joinThread', 'joinThreads', 'createThread', 'threadId',
- 'myThreadId',
+ 'running', 'joinThread', 'joinThreads', 'createThread', 'threadId', 'myThreadId',
-- Channels.
'send', 'recv', 'peek', 'ready'
-}, nil, true))
+}, true)))
-- Types.
-local type = token(l.TYPE , word_match({
- 'int', 'int8', 'int16', 'int32', 'int64', 'uint', 'uint8', 'uint16', 'uint32',
- 'uint64', 'float', 'float32', 'float64', 'bool', 'char', 'string', 'cstring',
- 'pointer', 'Ordinal', 'auto', 'any', 'TSignedInt', 'TUnsignedInt', 'TInteger',
- 'TOrdinal', 'TReal', 'TNumber', 'range', 'array', 'openarray', 'varargs',
- 'seq', 'set', 'TSlice', 'TThread', 'TChannel',
+lex:add_rule('type', token(lexer.TYPE, word_match({
+ 'int', 'int8', 'int16', 'int32', 'int64', 'uint', 'uint8', 'uint16', 'uint32', 'uint64', 'float',
+ 'float32', 'float64', 'bool', 'char', 'string', 'cstring', 'pointer', 'Ordinal', 'auto', 'any',
+ 'TSignedInt', 'TUnsignedInt', 'TInteger', 'TOrdinal', 'TReal', 'TNumber', 'range', 'array',
+ 'openarray', 'varargs', 'seq', 'set', 'TSlice', 'TThread', 'TChannel',
-- Meta Types.
- 'expr', 'stmt', 'typeDesc', 'void',
-}, nil, true))
+ 'expr', 'stmt', 'typeDesc', 'void'
+}, true)))
-- Constants.
-local constant = token(l.CONSTANT, word_match{
- 'on', 'off', 'isMainModule', 'CompileDate', 'CompileTime', 'NimVersion',
- 'NimMajor', 'NimMinor', 'NimPatch', 'cpuEndian', 'hostOS', 'hostCPU',
- 'appType', 'QuitSuccess', 'QuitFailure', 'inf', 'neginf', 'nan'
-})
+lex:add_rule('constant', token(lexer.CONSTANT, word_match{
+ 'on', 'off', 'isMainModule', 'CompileDate', 'CompileTime', 'NimVersion', 'NimMajor', 'NimMinor',
+ 'NimPatch', 'cpuEndian', 'hostOS', 'hostCPU', 'appType', 'QuitSuccess', 'QuitFailure', 'inf',
+ 'neginf', 'nan'
+}))
+
+-- Strings.
+local sq_str = lexer.range("'", true)
+local dq_str = lexer.range('"', true)
+local tq_str = lexer.range('"""')
+local raw_str = 'r' * lexer.range('"', false, false)
+lex:add_rule('string', token(lexer.STRING, tq_str + sq_str + dq_str + raw_str))
-- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
--- Operators.
-local operator = token(l.OPERATOR, S('=+-*/<>@$~&%|!?^.:\\`()[]{},;'))
+-- Comments.
+local line_comment = lexer.to_eol('#', true)
+local block_comment = lexer.range('#[', ']#')
+lex:add_rule('comment', token(lexer.COMMENT, block_comment + line_comment))
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'function', func},
- {'type', type},
- {'constant', constant},
- {'identifier', identifier},
- {'comment', comment},
- {'string', string},
- {'number', number},
- {'operator', operator},
-}
+-- Numbers.
+local dec = lexer.digit^1 * ('_' * lexer.digit^1)^0
+local hex = '0' * S('xX') * lexer.xdigit^1 * ('_' * lexer.xdigit^1)^0
+local bin = '0' * S('bB') * S('01')^1 * ('_' * S('01')^1)^0 * -lexer.xdigit
+local oct = '0o' * lpeg.R('07')^1
+local integer = S('+-')^-1 * (bin + hex + oct + dec) *
+ ("'" * S('iIuUfF') * (P('8') + '16' + '32' + '64'))^-1
+local float = lexer.digit^1 * ('_' * lexer.digit^1)^0 * ('.' * ('_' * lexer.digit)^0)^-1 * S('eE') *
+ S('+-')^-1 * lexer.digit^1 * ('_' * lexer.digit^1)^0
+lex:add_rule('number', token(lexer.NUMBER, float + integer))
-M._FOLDBYINDENTATION = true
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S('=+-*/<>@$~&%|!?^.:\\`()[]{},;')))
-return M
+return lex
diff --git a/lua/lexers/nsis.lua b/lua/lexers/nsis.lua
index e690b59..9f57012 100644
--- a/lua/lexers/nsis.lua
+++ b/lua/lexers/nsis.lua
@@ -1,182 +1,148 @@
--- Copyright 2006-2017 Robert Gieseke. See LICENSE.
+-- Copyright 2006-2022 Robert Gieseke. See LICENSE.
-- NSIS LPeg lexer
-- Based on NSIS 2.46 docs: http://nsis.sourceforge.net/Docs/.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'nsis'}
+local lex = lexer.new('nsis')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Comments (4.1).
-local line_comment = (';' * l.nonnewline^0) + ('#' * l.nonnewline^0)
-local block_comment = '/*' * (l.any - '*/')^0 * '*/'
-local comment = token(l.COMMENT, line_comment + block_comment)
+local line_comment = lexer.to_eol(S(';#'))
+local block_comment = lexer.range('/*', '*/')
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-- Strings.
-local sq_str = l.delimited_range("'")
-local dq_str = l.delimited_range('"')
-local ex_str = l.delimited_range('`')
-local string = token(l.STRING, sq_str + dq_str + ex_str)
-
--- Numbers.
-local number = token(l.NUMBER, l.integer)
-
--- Variables (4.2).
-local variable = token(l.VARIABLE, word_match({
- '$0', '$1', '$2', '$3', '$4', '$5', '$6', '$7', '$8', '$9',
- '$R0', '$R1', '$R2', '$R3', '$R4', '$R5', '$R6', '$R7', '$R8', '$R9',
- '$INSTDIR', '$OUTDIR', '$CMDLINE', '$LANGUAGE',
- 'Var', '/GLOBAL'
-}, '$/') + ('$' * l.word))
+local sq_str = lexer.range("'")
+local dq_str = lexer.range('"')
+local bq_str = lexer.range('`')
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str + bq_str))
-- Constants (4.2.3).
-local constant = token(l.CONSTANT, word_match({
- '$PROGRAMFILES', '$PROGRAMFILES32', '$PROGRAMFILES64',
- '$COMMONFILES', '$COMMONFILES32', '$COMMONFILES64',
- '$DESKTOP', '$EXEDIR', '$EXEFILE', '$EXEPATH', '${NSISDIR}', '$WINDIR',
- '$SYSDIR', '$TEMP', '$STARTMENU', '$SMPROGRAMS', '$SMSTARTUP',
- '$QUICKLAUNCH','$DOCUMENTS', '$SENDTO', '$RECENT', '$FAVORITES', '$MUSIC',
- '$PICTURES', '$VIDEOS', '$NETHOOD', '$FONTS', '$TEMPLATES', '$APPDATA',
- '$LOCALAPPDATA', '$PRINTHOOD', '$INTERNET_CACHE', '$COOKIES', '$HISTORY',
- '$PROFILE', '$ADMINTOOLS', '$RESOURCES', '$RESOURCES_LOCALIZED',
- '$CDBURN_AREA', '$HWNDPARENT', '$PLUGINSDIR',
-}, '$_{}'))
+lex:add_rule('constant', token(lexer.CONSTANT, word_match{
+ '$PROGRAMFILES', '$PROGRAMFILES32', '$PROGRAMFILES64', '$COMMONFILES', '$COMMONFILES32',
+ '$COMMONFILES64', '$DESKTOP', '$EXEDIR', '$EXEFILE', '$EXEPATH', '${NSISDIR}', '$WINDIR',
+ '$SYSDIR', '$TEMP', '$STARTMENU', '$SMPROGRAMS', '$SMSTARTUP', '$QUICKLAUNCH$DOCUMENTS',
+ '$SENDTO', '$RECENT', '$FAVORITES', '$MUSIC', '$PICTURES', '$VIDEOS', '$NETHOOD', '$FONTS',
+ '$TEMPLATES', '$APPDATA', '$LOCALAPPDATA', '$PRINTHOOD', '$INTERNET_CACHE', '$COOKIES',
+ '$HISTORY', '$PROFILE', '$ADMINTOOLS', '$RESOURCES', '$RESOURCES_LOCALIZED', '$CDBURN_AREA',
+ '$HWNDPARENT', '$PLUGINSDIR'
+}))
-- TODO? Constants used in strings: $$ $\r $\n $\t
--- Labels (4.3).
-local label = token(l.LABEL, l.word * ':')
+-- Variables (4.2).
+lex:add_rule('variable', token(lexer.VARIABLE, word_match{
+ '$0', '$1', '$2', '$3', '$4', '$5', '$6', '$7', '$8', '$9', '$R0', '$R1', '$R2', '$R3', '$R4',
+ '$R5', '$R6', '$R7', '$R8', '$R9', '$INSTDIR', '$OUTDIR', '$CMDLINE', '$LANGUAGE', 'Var',
+ '/GLOBAL'
+} + '$' * lexer.word))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match({
--- Pages (4.5).
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ -- Pages (4.5).
'Page', 'UninstPage', 'PageEx', 'PageEnd', 'PageExEnd',
--- Section commands (4.6).
- 'AddSize', 'Section', 'SectionEnd', 'SectionIn', 'SectionGroup',
- 'SectionGroupEnd',
--- Functions (4.7).
+ -- Section commands (4.6).
+ 'AddSize', 'Section', 'SectionEnd', 'SectionIn', 'SectionGroup', 'SectionGroupEnd',
+ -- Functions (4.7).
'Function', 'FunctionEnd',
--- Callbacks (4.7.2).
- '.onGUIInit', '.onInit', '.onInstFailed', '.onInstSuccess', '.onGUIEnd',
- '.onMouseOverSection', '.onRebootFailed', '.onSelChange', '.onUserAbort',
- '.onVerifyInstDir', 'un.onGUIInit', 'un.onInit', 'un.onUninstFailed',
- 'un.onUninstSuccess', 'un.onGUIEnd', 'un.onRebootFailed', 'un.onSelChange',
- 'un.onUserAbort',
--- General Attributes (4.8.1).
- 'AddBrandingImage', 'AllowRootDirInstall', 'AutoCloseWindow', 'BGFont',
- 'BGFont', 'BrandingText', '/TRIMLEFT', '/TRIMRIGHT', '/TRIMCENTER', 'Caption',
- 'ChangeUI', 'CheckBitmap', 'CompletedText', 'ComponentText', 'CRCCheck',
- 'DetailsButtonText', 'DirText', 'DirVar', 'DirVerify', 'FileErrorText',
- 'Icon', 'InstallButtonText', 'InstallColors', 'InstallDir',
- 'InstallDirRegKey', 'InstProgressFlags', 'InstType', 'LicenseBkColor',
- 'LicenseData', 'LicenseForceSelection', 'LicenseText', 'MiscButtonText',
- 'Name', 'OutFile', 'RequestExecutionLevel', 'SetFont', 'ShowInstDetails',
- 'ShowUninstDetails', 'SilentInstall', 'SilentUnInstall', 'SpaceTexts',
- 'SubCaption', 'UninstallButtonText', 'UninstallCaption', 'UninstallIcon',
- 'UninstallSubCaption', 'UninstallText', 'WindowIcon', 'XPStyle', 'admin',
- 'auto', 'bottom', 'checkbox', 'false', 'force', 'height', 'hide', 'highest',
- 'leave', 'left', 'nevershow', 'none', 'normal', 'off', 'on', 'radiobuttons',
- 'right', 'show', 'silent', 'silentlog', 'top', 'true', 'user', 'width',
--- Compiler Flags (4.8.2).
- 'AllowSkipFiles', 'FileBufSize', 'SetCompress', 'SetCompressor',
- '/SOLID', '/FINAL', 'zlib', 'bzip2', 'lzma', 'SetCompressorDictSize',
- 'SetDatablockOptimize', 'SetDateSave', 'SetOverwrite', 'ifnewer', 'ifdiff',
- 'lastused', 'try',
--- Version Information (4.8.3).
- 'VIAddVersionKey', 'VIProductVersion', '/LANG',
- 'ProductName', 'Comments', 'CompanyName', 'LegalCopyright', 'FileDescription',
- 'FileVersion', 'ProductVersion', 'InternalName', 'LegalTrademarks',
- 'OriginalFilename', 'PrivateBuild', 'SpecialBuild',
--- Basic Instructions (4.9.1).
- 'Delete', '/REBOOTOK', 'Exec', 'ExecShell', 'ExecShell', 'File', '/nonfatal',
- 'Rename', 'ReserveFile', 'RMDir', 'SetOutPath',
--- Registry, INI, File Instructions (4.9.2).
- 'DeleteINISec', 'DeleteINIStr', 'DeleteRegKey', '/ifempty',
- 'DeleteRegValue', 'EnumRegKey', 'EnumRegValue', 'ExpandEnvStrings',
- 'FlushINI', 'ReadEnvStr', 'ReadINIStr', 'ReadRegDWORD', 'ReadRegStr',
- 'WriteINIStr', 'WriteRegBin', 'WriteRegDWORD', 'WriteRegStr',
- 'WriteRegExpandStr', 'HKCR', 'HKEY_CLASSES_ROOT', 'HKLM', 'HKEY_LOCAL_MACHINE',
- 'HKCU', 'HKEY_CURRENT_USER', 'HKU', 'HKEY_USERS', 'HKCC',
- 'HKEY_CURRENT_CONFIG', 'HKDD', 'HKEY_DYN_DATA', 'HKPD',
+ -- Callbacks (4.7.2).
+ '.onGUIInit', '.onInit', '.onInstFailed', '.onInstSuccess', '.onGUIEnd', '.onMouseOverSection',
+ '.onRebootFailed', '.onSelChange', '.onUserAbort', '.onVerifyInstDir', 'un.onGUIInit',
+ 'un.onInit', 'un.onUninstFailed', 'un.onUninstSuccess', 'un.onGUIEnd', 'un.onRebootFailed',
+ 'un.onSelChange', 'un.onUserAbort',
+ -- General Attributes (4.8.1).
+ 'AddBrandingImage', 'AllowRootDirInstall', 'AutoCloseWindow', 'BGFont', 'BGFont', 'BrandingText',
+ '/TRIMLEFT', '/TRIMRIGHT', '/TRIMCENTER', 'Caption', 'ChangeUI', 'CheckBitmap', 'CompletedText',
+ 'ComponentText', 'CRCCheck', 'DetailsButtonText', 'DirText', 'DirVar', 'DirVerify',
+ 'FileErrorText', 'Icon', 'InstallButtonText', 'InstallColors', 'InstallDir', 'InstallDirRegKey',
+ 'InstProgressFlags', 'InstType', 'LicenseBkColor', 'LicenseData', 'LicenseForceSelection',
+ 'LicenseText', 'MiscButtonText', 'Name', 'OutFile', 'RequestExecutionLevel', 'SetFont',
+ 'ShowInstDetails', 'ShowUninstDetails', 'SilentInstall', 'SilentUnInstall', 'SpaceTexts',
+ 'SubCaption', 'UninstallButtonText', 'UninstallCaption', 'UninstallIcon', 'UninstallSubCaption',
+ 'UninstallText', 'WindowIcon', 'XPStyle', 'admin', 'auto', 'bottom', 'checkbox', 'false', 'force',
+ 'height', 'hide', 'highest', 'leave', 'left', 'nevershow', 'none', 'normal', 'off', 'on',
+ 'radiobuttons', 'right', 'show', 'silent', 'silentlog', 'top', 'true', 'user', 'width',
+ -- Compiler Flags (4.8.2).
+ 'AllowSkipFiles', 'FileBufSize', 'SetCompress', 'SetCompressor', '/SOLID', '/FINAL', 'zlib',
+ 'bzip2', 'lzma', 'SetCompressorDictSize', 'SetDatablockOptimize', 'SetDateSave', 'SetOverwrite',
+ 'ifnewer', 'ifdiff', 'lastused', 'try',
+ -- Version Information (4.8.3).
+ 'VIAddVersionKey', 'VIProductVersion', '/LANG', 'ProductName', 'Comments', 'CompanyName',
+ 'LegalCopyright', 'FileDescription', 'FileVersion', 'ProductVersion', 'InternalName',
+ 'LegalTrademarks', 'OriginalFilename', 'PrivateBuild', 'SpecialBuild',
+ -- Basic Instructions (4.9.1).
+ 'Delete', '/REBOOTOK', 'Exec', 'ExecShell', 'ExecShell', 'File', '/nonfatal', 'Rename',
+ 'ReserveFile', 'RMDir', 'SetOutPath',
+ -- Registry INI File Instructions (4.9.2).
+ 'DeleteINISec', 'DeleteINIStr', 'DeleteRegKey', '/ifempty', 'DeleteRegValue', 'EnumRegKey',
+ 'EnumRegValue', 'ExpandEnvStrings', 'FlushINI', 'ReadEnvStr', 'ReadINIStr', 'ReadRegDWORD',
+ 'ReadRegStr', 'WriteINIStr', 'WriteRegBin', 'WriteRegDWORD', 'WriteRegStr', 'WriteRegExpandStr',
+ 'HKCR', 'HKEY_CLASSES_ROOT', 'HKLM', 'HKEY_LOCAL_MACHINE', 'HKCU', 'HKEY_CURRENT_USER', 'HKU',
+ 'HKEY_USERS', 'HKCC', 'HKEY_CURRENT_CONFIG', 'HKDD', 'HKEY_DYN_DATA', 'HKPD',
'HKEY_PERFORMANCE_DATA', 'SHCTX', 'SHELL_CONTEXT',
-
--- General Purpose Instructions (4.9.3).
- 'CallInstDLL', 'CopyFiles',
- '/SILENT', '/FILESONLY', 'CreateDirectory', 'CreateShortCut', 'GetDLLVersion',
- 'GetDLLVersionLocal', 'GetFileTime', 'GetFileTimeLocal', 'GetFullPathName',
- '/SHORT', 'GetTempFileName', 'SearchPath', 'SetFileAttributes', 'RegDLL',
- 'UnRegDLL',
--- Flow Control Instructions (4.9.4).
- 'Abort', 'Call', 'ClearErrors', 'GetCurrentAddress', 'GetFunctionAddress',
- 'GetLabelAddress', 'Goto', 'IfAbort', 'IfErrors', 'IfFileExists',
- 'IfRebootFlag', 'IfSilent', 'IntCmp', 'IntCmpU', 'MessageBox', 'MB_OK',
- 'MB_OKCANCEL', 'MB_ABORTRETRYIGNORE', 'MB_RETRYCANCEL', 'MB_YESNO',
- 'MB_YESNOCANCEL', 'MB_ICONEXCLAMATION', 'MB_ICONINFORMATION',
- 'MB_ICONQUESTION', 'MB_ICONSTOP', 'MB_USERICON', 'MB_TOPMOST',
- 'MB_SETFOREGROUND', 'MB_RIGHT', 'MB_RTLREADING', 'MB_DEFBUTTON1',
- 'MB_DEFBUTTON2', 'MB_DEFBUTTON3', 'MB_DEFBUTTON4', 'IDABORT', 'IDCANCEL',
- 'IDIGNORE', 'IDNO', 'IDOK', 'IDRETRY', 'IDYES', 'Return', 'Quit', 'SetErrors',
- 'StrCmp', 'StrCmpS',
--- File Instructions (4.9.5).
- 'FileClose', 'FileOpen', 'FileRead', 'FileReadByte', 'FileSeek', 'FileWrite',
- 'FileWriteByte', 'FindClose', 'FindFirst', 'FindNext',
--- Uninstaller Instructions (4.9.6).
+ -- General Purpose Instructions (4.9.3).
+ 'CallInstDLL', 'CopyFiles', '/SILENT', '/FILESONLY', 'CreateDirectory', 'CreateShortCut',
+ 'GetDLLVersion', 'GetDLLVersionLocal', 'GetFileTime', 'GetFileTimeLocal', 'GetFullPathName',
+ '/SHORT', 'GetTempFileName', 'SearchPath', 'SetFileAttributes', 'RegDLL', 'UnRegDLL',
+ -- Flow Control Instructions (4.9.4).
+ 'Abort', 'Call', 'ClearErrors', 'GetCurrentAddress', 'GetFunctionAddress', 'GetLabelAddress',
+ 'Goto', 'IfAbort', 'IfErrors', 'IfFileExists', 'IfRebootFlag', 'IfSilent', 'IntCmp', 'IntCmpU',
+ 'MessageBox', 'MB_OK', 'MB_OKCANCEL', 'MB_ABORTRETRYIGNORE', 'MB_RETRYCANCEL', 'MB_YESNO',
+ 'MB_YESNOCANCEL', 'MB_ICONEXCLAMATION', 'MB_ICONINFORMATION', 'MB_ICONQUESTION', 'MB_ICONSTOP',
+ 'MB_USERICON', 'MB_TOPMOST', 'MB_SETFOREGROUND', 'MB_RIGHT', 'MB_RTLREADING', 'MB_DEFBUTTON1',
+ 'MB_DEFBUTTON2', 'MB_DEFBUTTON3', 'MB_DEFBUTTON4', 'IDABORT', 'IDCANCEL', 'IDIGNORE', 'IDNO',
+ 'IDOK', 'IDRETRY', 'IDYES', 'Return', 'Quit', 'SetErrors', 'StrCmp', 'StrCmpS',
+ -- File Instructions (4.9.5).
+ 'FileClose', 'FileOpen', 'FileRead', 'FileReadByte', 'FileSeek', 'FileWrite', 'FileWriteByte',
+ 'FindClose', 'FindFirst', 'FindNext',
+ -- Uninstaller Instructions (4.9.6).
'WriteUninstaller',
--- Miscellaneous Instructions (4.9.7).
- 'GetErrorLevel', 'GetInstDirError', 'InitPluginsDir', 'Nop', 'SetErrorLevel',
- 'SetRegView', 'SetShellVarContext', 'all', 'current', 'Sleep',
--- String Manipulation Instructions (4.9.8).
+ -- Miscellaneous Instructions (4.9.7).
+ 'GetErrorLevel', 'GetInstDirError', 'InitPluginsDir', 'Nop', 'SetErrorLevel', 'SetRegView',
+ 'SetShellVarContext', 'all', 'current', 'Sleep',
+ -- String Manipulation Instructions (4.9.8).
'StrCpy', 'StrLen',
--- Stack Support (4.9.9).
+ -- Stack Support (4.9.9).
'Exch', 'Pop', 'Push',
--- Integer Support (4.9.10).
+ -- Integer Support (4.9.10).
'IntFmt', 'IntOp',
--- Reboot Instructions (4.9.11).
+ -- Reboot Instructions (4.9.11).
'Reboot', 'SetRebootFlag',
--- Install Logging Instructions (4.9.12).
+ -- Install Logging Instructions (4.9.12).
'LogSet', 'LogText',
--- Section Management (4.9.13).
- 'SectionSetFlags', 'SectionGetFlags', 'SectionGetFlags',
- 'SectionSetText', 'SectionGetText', 'SectionSetInstTypes',
- 'SectionGetInstTypes', 'SectionSetSize', 'SectionGetSize', 'SetCurInstType',
- 'GetCurInstType', 'InstTypeSetText', 'InstTypeGetText',
--- User Interface Instructions (4.9.14).
- 'BringToFront', 'CreateFont', 'DetailPrint', 'EnableWindow', 'FindWindow',
- 'GetDlgItem', 'HideWindow', 'IsWindow', 'LockWindow', 'SendMessage',
- 'SetAutoClose', 'SetBrandingImage', 'SetDetailsView', 'SetDetailsPrint',
- 'listonly','textonly', 'both', 'SetCtlColors', '/BRANDING', 'SetSilent',
- 'ShowWindow',
--- Multiple Languages Instructions (4.9.15).
+ -- Section Management (4.9.13).
+ 'SectionSetFlags', 'SectionGetFlags', 'SectionGetFlags', 'SectionSetText', 'SectionGetText',
+ 'SectionSetInstTypes', 'SectionGetInstTypes', 'SectionSetSize', 'SectionGetSize',
+ 'SetCurInstType', 'GetCurInstType', 'InstTypeSetText', 'InstTypeGetText',
+ -- User Interface Instructions (4.9.14).
+ 'BringToFront', 'CreateFont', 'DetailPrint', 'EnableWindow', 'FindWindow', 'GetDlgItem',
+ 'HideWindow', 'IsWindow', 'LockWindow', 'SendMessage', 'SetAutoClose', 'SetBrandingImage',
+ 'SetDetailsView', 'SetDetailsPrint', 'listonlytextonly', 'both', 'SetCtlColors', '/BRANDING',
+ 'SetSilent', 'ShowWindow',
+ -- Multiple Languages Instructions (4.9.15).
'LoadLanguageFile', 'LangString', 'LicenseLangString',
--- Compile time commands (5).
- '!include', '!addincludedir', '!addplugindir', '!appendfile', '!cd',
- '!delfile', '!echo', '!error', '!execute', '!packhdr', '!system', '!tempfile',
- '!warning', '!verbose', '{__FILE__}', '{__LINE__}', '{__DATE__}',
- '{__TIME__}', '{__TIMESTAMP__}', '{NSIS_VERSION}', '!define', '!undef',
- '!ifdef', '!ifndef', '!if', '!ifmacrodef', '!ifmacrondef', '!else', '!endif',
- '!insertmacro', '!macro', '!macroend', '!searchparse', '!searchreplace',
-}, '/!.{}_'))
+ -- Compile time commands (5).
+ '!include', '!addincludedir', '!addplugindir', '!appendfile', '!cd', '!delfile', '!echo',
+ '!error', '!execute', '!packhdr', '!system', '!tempfile', '!warning', '!verbose', '{__FILE__}',
+ '{__LINE__}', '{__DATE__}', '{__TIME__}', '{__TIMESTAMP__}', '{NSIS_VERSION}', '!define',
+ '!undef', '!ifdef', '!ifndef', '!if', '!ifmacrodef', '!ifmacrondef', '!else', '!endif',
+ '!insertmacro', '!macro', '!macroend', '!searchparse', '!searchreplace'
+}))
+
+-- Numbers.
+lex:add_rule('number', token(lexer.NUMBER, lexer.integer))
-- Operators.
-local operator = token(l.OPERATOR, S('+-*/%|&^~!<>'))
+lex:add_rule('operator', token(lexer.OPERATOR, S('+-*/%|&^~!<>')))
--- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+-- Labels (4.3).
+lex:add_rule('label', token(lexer.LABEL, lexer.word * ':'))
-M._rules = {
- {'whitespace', ws},
- {'comment', comment},
- {'string', string},
- {'constant', constant},
- {'variable', variable},
- {'keyword', keyword},
- {'number', number},
- {'operator', operator},
- {'label', label},
- {'identifier', identifier},
-}
+-- Identifiers.
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-return M
+return lex
diff --git a/lua/lexers/null.lua b/lua/lexers/null.lua
index d5e14c5..ac61ab8 100644
--- a/lua/lexers/null.lua
+++ b/lua/lexers/null.lua
@@ -1,6 +1,4 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- Null LPeg lexer.
-local M = {_NAME = 'null'}
-
-return M
+return require('lexer').new('null')
diff --git a/lua/lexers/objective_c.lua b/lua/lexers/objective_c.lua
index 3071482..101cb90 100644
--- a/lua/lexers/objective_c.lua
+++ b/lua/lexers/objective_c.lua
@@ -1,87 +1,68 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- Objective C LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'objective_c'}
+local lex = lexer.new('objective_c')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local line_comment = '//' * l.nonnewline_esc^0
-local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1
-local comment = token(l.COMMENT, line_comment + block_comment)
-
--- Strings.
-local sq_str = P('L')^-1 * l.delimited_range("'", true)
-local dq_str = P('L')^-1 * l.delimited_range('"', true)
-local string = token(l.STRING, sq_str + dq_str)
-
--- Numbers.
-local number = token(l.NUMBER, l.float + l.integer)
-
--- Preprocessor.
-local preproc_word = word_match{
- 'define', 'elif', 'else', 'endif', 'error', 'if', 'ifdef',
- 'ifndef', 'import', 'include', 'line', 'pragma', 'undef',
- 'warning'
-}
-local preproc = token(l.PREPROCESSOR,
- l.starts_line('#') * S('\t ')^0 * preproc_word *
- (l.nonnewline_esc^1 + l.space * l.nonnewline_esc^0))
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match({
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
-- From C.
- 'asm', 'auto', 'break', 'case', 'const', 'continue', 'default', 'do', 'else',
- 'extern', 'false', 'for', 'goto', 'if', 'inline', 'register', 'return',
- 'sizeof', 'static', 'switch', 'true', 'typedef', 'void', 'volatile', 'while',
- 'restrict', '_Bool', '_Complex', '_Pragma', '_Imaginary',
+ 'asm', 'auto', 'break', 'case', 'const', 'continue', 'default', 'do', 'else', 'extern', 'false',
+ 'for', 'goto', 'if', 'inline', 'register', 'return', 'sizeof', 'static', 'switch', 'true',
+ 'typedef', 'void', 'volatile', 'while', 'restrict', '_Bool', '_Complex', '_Pragma', '_Imaginary',
-- Objective C.
'oneway', 'in', 'out', 'inout', 'bycopy', 'byref', 'self', 'super',
-- Preprocessor directives.
- '@interface', '@implementation', '@protocol', '@end', '@private',
- '@protected', '@public', '@class', '@selector', '@encode', '@defs',
- '@synchronized', '@try', '@throw', '@catch', '@finally',
+ '@interface', '@implementation', '@protocol', '@end', '@private', '@protected', '@public',
+ '@class', '@selector', '@encode', '@defs', '@synchronized', '@try', '@throw', '@catch',
+ '@finally',
-- Constants.
'TRUE', 'FALSE', 'YES', 'NO', 'NULL', 'nil', 'Nil', 'METHOD_NULL'
-}, '@'))
+}))
-- Types.
-local type = token(l.TYPE, word_match{
- 'apply_t', 'id', 'Class', 'MetaClass', 'Object', 'Protocol', 'retval_t',
- 'SEL', 'STR', 'IMP', 'BOOL', 'TypedStream'
-})
+lex:add_rule('type', token(lexer.TYPE, word_match(
+ 'apply_t id Class MetaClass Object Protocol retval_t SEL STR IMP BOOL TypedStream')))
+
+-- Strings.
+local sq_str = P('L')^-1 * lexer.range("'", true)
+local dq_str = P('L')^-1 * lexer.range('"', true)
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
-- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
--- Operators.
-local operator = token(l.OPERATOR, S('+-/*%<>!=^&|?~:;.()[]{}'))
+-- Comments.
+local line_comment = lexer.to_eol('//', true)
+local block_comment = lexer.range('/*', '*/')
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
+
+-- Numbers.
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
+
+-- Preprocessor.
+lex:add_rule('preprocessor',
+ #lexer.starts_line('#') * token(lexer.PREPROCESSOR, '#' * S('\t ')^0 * word_match{
+ 'define', 'elif', 'else', 'endif', 'error', 'if', 'ifdef', 'ifndef', 'import', 'include',
+ 'line', 'pragma', 'undef', 'warning'
+ }))
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'type', type},
- {'string', string},
- {'identifier', identifier},
- {'comment', comment},
- {'number', number},
- {'preproc', preproc},
- {'operator', operator},
-}
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>!=^&|?~:;.()[]{}')))
-M._foldsymbols = {
- _patterns = {'%l+', '[{}]', '/%*', '%*/', '//'},
- [l.PREPROCESSOR] = {
- region = 1, endregion = -1,
- ['if'] = 1, ifdef = 1, ifndef = 1, endif = -1
- },
- [l.OPERATOR] = {['{'] = 1, ['}'] = -1},
- [l.COMMENT] = {['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//')}
-}
+-- Fold symbols.
+lex:add_fold_point(lexer.PREPROCESSOR, 'region', 'endregion')
+lex:add_fold_point(lexer.PREPROCESSOR, 'if', 'endif')
+lex:add_fold_point(lexer.PREPROCESSOR, 'ifdef', 'endif')
+lex:add_fold_point(lexer.PREPROCESSOR, 'ifndef', 'endif')
+lex:add_fold_point(lexer.OPERATOR, '{', '}')
+lex:add_fold_point(lexer.COMMENT, '/*', '*/')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('//'))
-return M
+return lex
diff --git a/lua/lexers/pascal.lua b/lua/lexers/pascal.lua
index fab4bdb..05e9f62 100644
--- a/lua/lexers/pascal.lua
+++ b/lua/lexers/pascal.lua
@@ -1,78 +1,62 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- Pascal LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'pascal'}
+local lex = lexer.new('pascal')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local line_comment = '//' * l.nonnewline_esc^0
-local bblock_comment = '{' * (l.any - '}')^0 * P('}')^-1
-local pblock_comment = '(*' * (l.any - '*)')^0 * P('*)')^-1
-local comment = token(l.COMMENT, line_comment + bblock_comment + pblock_comment)
-
--- Strings.
-local string = token(l.STRING, S('uUrR')^-1 *
- l.delimited_range("'", true, true))
-
--- Numbers.
-local number = token(l.NUMBER, (l.float + l.integer) * S('LlDdFf')^-1)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match({
- 'and', 'array', 'as', 'at', 'asm', 'begin', 'case', 'class', 'const',
- 'constructor', 'destructor', 'dispinterface', 'div', 'do', 'downto', 'else',
- 'end', 'except', 'exports', 'file', 'final', 'finalization', 'finally', 'for',
- 'function', 'goto', 'if', 'implementation', 'in', 'inherited',
- 'initialization', 'inline', 'interface', 'is', 'label', 'mod', 'not',
- 'object', 'of', 'on', 'or', 'out', 'packed', 'procedure', 'program',
- 'property', 'raise', 'record', 'repeat', 'resourcestring', 'set', 'sealed',
- 'shl', 'shr', 'static', 'string', 'then', 'threadvar', 'to', 'try', 'type',
- 'unit', 'unsafe', 'until', 'uses', 'var', 'while', 'with', 'xor',
- 'absolute', 'abstract', 'assembler', 'automated', 'cdecl', 'contains',
- 'default', 'deprecated', 'dispid', 'dynamic', 'export', 'external', 'far',
- 'forward', 'implements', 'index', 'library', 'local', 'message', 'name',
- 'namespaces', 'near', 'nodefault', 'overload', 'override', 'package',
- 'pascal', 'platform', 'private', 'protected', 'public', 'published', 'read',
- 'readonly', 'register', 'reintroduce', 'requires', 'resident', 'safecall',
- 'stdcall', 'stored', 'varargs', 'virtual', 'write', 'writeln', 'writeonly',
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match({
+ 'and', 'array', 'as', 'at', 'asm', 'begin', 'case', 'class', 'const', 'constructor', 'destructor',
+ 'dispinterface', 'div', 'do', 'downto', 'else', 'end', 'except', 'exports', 'file', 'final',
+ 'finalization', 'finally', 'for', 'function', 'goto', 'if', 'implementation', 'in', 'inherited',
+ 'initialization', 'inline', 'interface', 'is', 'label', 'mod', 'not', 'object', 'of', 'on', 'or',
+ 'out', 'packed', 'procedure', 'program', 'property', 'raise', 'record', 'repeat',
+ 'resourcestring', 'set', 'sealed', 'shl', 'shr', 'static', 'string', 'then', 'threadvar', 'to',
+ 'try', 'type', 'unit', 'unsafe', 'until', 'uses', 'var', 'while', 'with', 'xor', 'absolute',
+ 'abstract', 'assembler', 'automated', 'cdecl', 'contains', 'default', 'deprecated', 'dispid',
+ 'dynamic', 'export', 'external', 'far', 'forward', 'implements', 'index', 'library', 'local',
+ 'message', 'name', 'namespaces', 'near', 'nodefault', 'overload', 'override', 'package', 'pascal',
+ 'platform', 'private', 'protected', 'public', 'published', 'read', 'readonly', 'register',
+ 'reintroduce', 'requires', 'resident', 'safecall', 'stdcall', 'stored', 'varargs', 'virtual',
+ 'write', 'writeln', 'writeonly', --
'false', 'nil', 'self', 'true'
-}, nil, true))
+}, true)))
-- Functions.
-local func = token(l.FUNCTION, word_match({
- 'chr', 'ord', 'succ', 'pred', 'abs', 'round', 'trunc', 'sqr', 'sqrt',
- 'arctan', 'cos', 'sin', 'exp', 'ln', 'odd', 'eof', 'eoln'
-}, nil, true))
+lex:add_rule('function', token(lexer.FUNCTION, word_match({
+ 'chr', 'ord', 'succ', 'pred', 'abs', 'round', 'trunc', 'sqr', 'sqrt', 'arctan', 'cos', 'sin',
+ 'exp', 'ln', 'odd', 'eof', 'eoln'
+}, true)))
-- Types.
-local type = token(l.TYPE, word_match({
- 'shortint', 'byte', 'char', 'smallint', 'integer', 'word', 'longint',
- 'cardinal', 'boolean', 'bytebool', 'wordbool', 'longbool', 'real', 'single',
- 'double', 'extended', 'comp', 'currency', 'pointer'
-}, nil, true))
+lex:add_rule('type', token(lexer.TYPE, word_match({
+ 'shortint', 'byte', 'char', 'smallint', 'integer', 'word', 'longint', 'cardinal', 'boolean',
+ 'bytebool', 'wordbool', 'longbool', 'real', 'single', 'double', 'extended', 'comp', 'currency',
+ 'pointer'
+}, true)))
+
+-- Strings.
+lex:add_rule('string', token(lexer.STRING, S('uUrR')^-1 * lexer.range("'", true, false)))
-- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
--- Operators.
-local operator = token(l.OPERATOR, S('.,;^@:=<>+-/*()[]'))
+-- Comments.
+local line_comment = lexer.to_eol('//', true)
+local bblock_comment = lexer.range('{', '}')
+local pblock_comment = lexer.range('(*', '*)')
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + bblock_comment + pblock_comment))
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'function', func},
- {'type', type},
- {'string', string},
- {'identifier', identifier},
- {'comment', comment},
- {'number', number},
- {'operator', operator},
-}
+-- Numbers.
+lex:add_rule('number', token(lexer.NUMBER, lexer.number * S('LlDdFf')^-1))
+
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S('.,;^@:=<>+-/*()[]')))
-return M
+return lex
diff --git a/lua/lexers/perl.lua b/lua/lexers/perl.lua
index b490c7f..40727a4 100644
--- a/lua/lexers/perl.lua
+++ b/lua/lexers/perl.lua
@@ -1,164 +1,150 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- Perl LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'perl'}
+local lex = lexer.new('perl')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
+lex:add_rule('perl', token(lexer.WHITESPACE, lexer.space^1))
--- Comments.
-local line_comment = '#' * l.nonnewline_esc^0
-local block_comment = l.starts_line('=') * l.alpha *
- (l.any - l.newline * '=cut')^0 * (l.newline * '=cut')^-1
-local comment = token(l.COMMENT, block_comment + line_comment)
+-- Keywords.
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'STDIN', 'STDOUT', 'STDERR', 'BEGIN', 'END', 'CHECK', 'INIT', --
+ 'require', 'use', --
+ 'break', 'continue', 'do', 'each', 'else', 'elsif', 'foreach', 'for', 'if', 'last', 'local', 'my',
+ 'next', 'our', 'package', 'return', 'sub', 'unless', 'until', 'while', '__FILE__', '__LINE__',
+ '__PACKAGE__', --
+ 'and', 'or', 'not', 'eq', 'ne', 'lt', 'gt', 'le', 'ge'
+}))
+-- Markers.
+lex:add_rule('marker', token(lexer.COMMENT, word_match('__DATA__ __END__') * lexer.any^0))
+
+-- Functions.
+lex:add_rule('function', token(lexer.FUNCTION, word_match{
+ 'abs', 'accept', 'alarm', 'atan2', 'bind', 'binmode', 'bless', 'caller', 'chdir', 'chmod',
+ 'chomp', 'chop', 'chown', 'chr', 'chroot', 'closedir', 'close', 'connect', 'cos', 'crypt',
+ 'dbmclose', 'dbmopen', 'defined', 'delete', 'die', 'dump', 'each', 'endgrent', 'endhostent',
+ 'endnetent', 'endprotoent', 'endpwent', 'endservent', 'eof', 'eval', 'exec', 'exists', 'exit',
+ 'exp', 'fcntl', 'fileno', 'flock', 'fork', 'format', 'formline', 'getc', 'getgrent', 'getgrgid',
+ 'getgrnam', 'gethostbyaddr', 'gethostbyname', 'gethostent', 'getlogin', 'getnetbyaddr',
+ 'getnetbyname', 'getnetent', 'getpeername', 'getpgrp', 'getppid', 'getpriority', 'getprotobyname',
+ 'getprotobynumber', 'getprotoent', 'getpwent', 'getpwnam', 'getpwuid', 'getservbyname',
+ 'getservbyport', 'getservent', 'getsockname', 'getsockopt', 'glob', 'gmtime', 'goto', 'grep',
+ 'hex', 'import', 'index', 'int', 'ioctl', 'join', 'keys', 'kill', 'lcfirst', 'lc', 'length',
+ 'link', 'listen', 'localtime', 'log', 'lstat', 'map', 'mkdir', 'msgctl', 'msgget', 'msgrcv',
+ 'msgsnd', 'new', 'oct', 'opendir', 'open', 'ord', 'pack', 'pipe', 'pop', 'pos', 'printf', 'print',
+ 'prototype', 'push', 'quotemeta', 'rand', 'readdir', 'read', 'readlink', 'recv', 'redo', 'ref',
+ 'rename', 'reset', 'reverse', 'rewinddir', 'rindex', 'rmdir', 'scalar', 'seekdir', 'seek',
+ 'select', 'semctl', 'semget', 'semop', 'send', 'setgrent', 'sethostent', 'setnetent', 'setpgrp',
+ 'setpriority', 'setprotoent', 'setpwent', 'setservent', 'setsockopt', 'shift', 'shmctl', 'shmget',
+ 'shmread', 'shmwrite', 'shutdown', 'sin', 'sleep', 'socket', 'socketpair', 'sort', 'splice',
+ 'split', 'sprintf', 'sqrt', 'srand', 'stat', 'study', 'substr', 'symlink', 'syscall', 'sysread',
+ 'sysseek', 'system', 'syswrite', 'telldir', 'tell', 'tied', 'tie', 'time', 'times', 'truncate',
+ 'ucfirst', 'uc', 'umask', 'undef', 'unlink', 'unpack', 'unshift', 'untie', 'utime', 'values',
+ 'vec', 'wait', 'waitpid', 'wantarray', 'warn', 'write'
+}))
+
+-- Strings.
local delimiter_matches = {['('] = ')', ['['] = ']', ['{'] = '}', ['<'] = '>'}
-local literal_delimitted = P(function(input, index) -- for single delimiter sets
+local literal_delimited = P(function(input, index) -- for single delimiter sets
local delimiter = input:sub(index, index)
if not delimiter:find('%w') then -- only non alpha-numerics
- local match_pos, patt
+ local patt
if delimiter_matches[delimiter] then
-- Handle nested delimiter/matches in strings.
local s, e = delimiter, delimiter_matches[delimiter]
- patt = l.delimited_range(s..e, false, false, true)
+ patt = lexer.range(s, e, false, true, true)
else
- patt = l.delimited_range(delimiter)
+ patt = lexer.range(delimiter)
end
- match_pos = lpeg.match(patt, input, index)
+ local match_pos = lpeg.match(patt, input, index)
return match_pos or #input + 1
end
end)
-local literal_delimitted2 = P(function(input, index) -- for 2 delimiter sets
+local literal_delimited2 = P(function(input, index) -- for 2 delimiter sets
local delimiter = input:sub(index, index)
- -- Only consider non-alpha-numerics and non-spaces as delimiters. The
- -- non-spaces are used to ignore operators like "-s".
+ -- Only consider non-alpha-numerics and non-spaces as delimiters. The non-spaces are used to
+ -- ignore operators like "-s".
if not delimiter:find('[%w ]') then
- local match_pos, patt
+ local patt
if delimiter_matches[delimiter] then
-- Handle nested delimiter/matches in strings.
local s, e = delimiter, delimiter_matches[delimiter]
- patt = l.delimited_range(s..e, false, false, true)
+ patt = lexer.range(s, e, false, true, true)
else
- patt = l.delimited_range(delimiter)
- end
- first_match_pos = lpeg.match(patt, input, index)
- if not first_match_pos then
- return #input + 1
+ patt = lexer.range(delimiter)
end
- final_match_pos = lpeg.match(patt, input, first_match_pos - 1)
+ local first_match_pos = lpeg.match(patt, input, index)
+ local final_match_pos = lpeg.match(patt, input, first_match_pos - 1)
if not final_match_pos then -- using (), [], {}, or <> notation
- final_match_pos = lpeg.match(l.space^0 * patt, input, first_match_pos)
+ final_match_pos = lpeg.match(lexer.space^0 * patt, input, first_match_pos)
end
return final_match_pos or #input + 1
end
end)
--- Strings.
-local sq_str = l.delimited_range("'")
-local dq_str = l.delimited_range('"')
-local cmd_str = l.delimited_range('`')
+local sq_str = lexer.range("'")
+local dq_str = lexer.range('"')
+local cmd_str = lexer.range('`')
local heredoc = '<<' * P(function(input, index)
local s, e, delimiter = input:find('([%a_][%w_]*)[\n\r\f;]+', index)
if s == index and delimiter then
local end_heredoc = '[\n\r\f]+'
- local _, e = input:find(end_heredoc..delimiter, e)
+ e = select(2, input:find(end_heredoc .. delimiter, e))
return e and e + 1 or #input + 1
end
end)
-local lit_str = 'q' * P('q')^-1 * literal_delimitted
-local lit_array = 'qw' * literal_delimitted
-local lit_cmd = 'qx' * literal_delimitted
-local lit_tr = (P('tr') + 'y') * literal_delimitted2 * S('cds')^0
-local regex_str = #P('/') * l.last_char_includes('-<>+*!~\\=%&|^?:;([{') *
- l.delimited_range('/', true) * S('imosx')^0
-local lit_regex = 'qr' * literal_delimitted * S('imosx')^0
-local lit_match = 'm' * literal_delimitted * S('cgimosx')^0
-local lit_sub = 's' * literal_delimitted2 * S('ecgimosx')^0
-local string = token(l.STRING, sq_str + dq_str + cmd_str + heredoc + lit_str +
- lit_array + lit_cmd + lit_tr) +
- token(l.REGEX, regex_str + lit_regex + lit_match + lit_sub)
-
--- Numbers.
-local number = token(l.NUMBER, l.float + l.integer)
+local lit_str = 'q' * P('q')^-1 * literal_delimited
+local lit_array = 'qw' * literal_delimited
+local lit_cmd = 'qx' * literal_delimited
+local lit_tr = (P('tr') + 'y') * literal_delimited2 * S('cds')^0
+local string = token(lexer.STRING,
+ sq_str + dq_str + cmd_str + heredoc + lit_str + lit_array + lit_cmd + lit_tr)
+local regex_str = #P('/') * lexer.last_char_includes('-<>+*!~\\=%&|^?:;([{') *
+ lexer.range('/', true) * S('imosx')^0
+local lit_regex = 'qr' * literal_delimited * S('imosx')^0
+local lit_match = 'm' * literal_delimited * S('cgimosx')^0
+local lit_sub = 's' * literal_delimited2 * S('ecgimosx')^0
+local regex = token(lexer.REGEX, regex_str + lit_regex + lit_match + lit_sub)
+lex:add_rule('string', string + regex)
--- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- 'STDIN', 'STDOUT', 'STDERR', 'BEGIN', 'END', 'CHECK', 'INIT',
- 'require', 'use',
- 'break', 'continue', 'do', 'each', 'else', 'elsif', 'foreach', 'for', 'if',
- 'last', 'local', 'my', 'next', 'our', 'package', 'return', 'sub', 'unless',
- 'until', 'while', '__FILE__', '__LINE__', '__PACKAGE__',
- 'and', 'or', 'not', 'eq', 'ne', 'lt', 'gt', 'le', 'ge'
-})
+-- Identifiers.
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
--- Functions.
-local func = token(l.FUNCTION, word_match({
- 'abs', 'accept', 'alarm', 'atan2', 'bind', 'binmode', 'bless', 'caller',
- 'chdir', 'chmod', 'chomp', 'chop', 'chown', 'chr', 'chroot', 'closedir',
- 'close', 'connect', 'cos', 'crypt', 'dbmclose', 'dbmopen', 'defined',
- 'delete', 'die', 'dump', 'each', 'endgrent', 'endhostent', 'endnetent',
- 'endprotoent', 'endpwent', 'endservent', 'eof', 'eval', 'exec', 'exists',
- 'exit', 'exp', 'fcntl', 'fileno', 'flock', 'fork', 'format', 'formline',
- 'getc', 'getgrent', 'getgrgid', 'getgrnam', 'gethostbyaddr', 'gethostbyname',
- 'gethostent', 'getlogin', 'getnetbyaddr', 'getnetbyname', 'getnetent',
- 'getpeername', 'getpgrp', 'getppid', 'getpriority', 'getprotobyname',
- 'getprotobynumber', 'getprotoent', 'getpwent', 'getpwnam', 'getpwuid',
- 'getservbyname', 'getservbyport', 'getservent', 'getsockname', 'getsockopt',
- 'glob', 'gmtime', 'goto', 'grep', 'hex', 'import', 'index', 'int', 'ioctl',
- 'join', 'keys', 'kill', 'lcfirst', 'lc', 'length', 'link', 'listen',
- 'localtime', 'log', 'lstat', 'map', 'mkdir', 'msgctl', 'msgget', 'msgrcv',
- 'msgsnd', 'new', 'oct', 'opendir', 'open', 'ord', 'pack', 'pipe', 'pop',
- 'pos', 'printf', 'print', 'prototype', 'push', 'quotemeta', 'rand', 'readdir',
- 'read', 'readlink', 'recv', 'redo', 'ref', 'rename', 'reset', 'reverse',
- 'rewinddir', 'rindex', 'rmdir', 'scalar', 'seekdir', 'seek', 'select',
- 'semctl', 'semget', 'semop', 'send', 'setgrent', 'sethostent', 'setnetent',
- 'setpgrp', 'setpriority', 'setprotoent', 'setpwent', 'setservent',
- 'setsockopt', 'shift', 'shmctl', 'shmget', 'shmread', 'shmwrite', 'shutdown',
- 'sin', 'sleep', 'socket', 'socketpair', 'sort', 'splice', 'split', 'sprintf',
- 'sqrt', 'srand', 'stat', 'study', 'substr', 'symlink', 'syscall', 'sysread',
- 'sysseek', 'system', 'syswrite', 'telldir', 'tell', 'tied', 'tie', 'time',
- 'times', 'truncate', 'ucfirst', 'uc', 'umask', 'undef', 'unlink', 'unpack',
- 'unshift', 'untie', 'utime', 'values', 'vec', 'wait', 'waitpid', 'wantarray',
- 'warn', 'write'
-}, '2'))
+-- Comments.
+local line_comment = lexer.to_eol('#', true)
+local block_comment = lexer.range(lexer.starts_line('=' * lexer.alpha), lexer.starts_line('=cut'))
+lex:add_rule('comment', token(lexer.COMMENT, block_comment + line_comment))
--- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+-- Numbers.
+local dec = lexer.digit^1 * ('_' * lexer.digit^1)^0
+local hex = '0' * S('xX') * lexer.xdigit^1 * ('_' * lexer.xdigit^1)^0
+local bin = '0' * S('bB') * S('01')^1 * ('_' * S('01')^1)^0 * -lexer.xdigit
+local integer = S('+-')^-1 * (hex + bin + dec)
+lex:add_rule('number', token(lexer.NUMBER, lexer.float + integer))
-- Variables.
-local special_var = '$' * ('^' * S('ADEFHILMOPSTWX')^-1 +
- S('\\"[]\'&`+*.,;=%~?@<>(|/!-') +
- ':' * (l.any - ':') + P('$') * -l.word + l.digit^1)
-local plain_var = ('$#' + S('$@%')) * P('$')^0 * l.word + '$#'
-local variable = token(l.VARIABLE, special_var + plain_var)
+-- LuaFormatter off
+local special_var = '$' * (
+ '^' * S('ADEFHILMOPSTWX')^-1 +
+ S('\\"[]\'&`+*.,;=%~?@<>(|/!-') +
+ ':' * (lexer.any - ':') +
+ P('$') * -lexer.word +
+ lexer.digit^1)
+-- LuaFormatter on
+local plain_var = ('$#' + S('$@%')) * P('$')^0 * lexer.word + '$#'
+lex:add_rule('variable', token(lexer.VARIABLE, special_var + plain_var))
-- Operators.
-local operator = token(l.OPERATOR, S('-<>+*!~\\=/%&|^.?:;()[]{}'))
-
--- Markers.
-local marker = token(l.COMMENT, word_match{'__DATA__', '__END__'} * l.any^0)
-
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'marker', marker},
- {'function', func},
- {'string', string},
- {'identifier', identifier},
- {'comment', comment},
- {'number', number},
- {'variable', variable},
- {'operator', operator},
-}
+lex:add_rule('operator', token(lexer.OPERATOR, S('-<>+*!~\\=/%&|^.,?:;()[]{}')))
-M._foldsymbols = {
- _patterns = {'[%[%]{}]', '#'},
- [l.OPERATOR] = {['['] = 1, [']'] = -1, ['{'] = 1, ['}'] = -1},
- [l.COMMENT] = {['#'] = l.fold_line_comments('#')}
-}
+-- Fold points.
+lex:add_fold_point(lexer.OPERATOR, '[', ']')
+lex:add_fold_point(lexer.OPERATOR, '{', '}')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('#'))
-return M
+return lex
diff --git a/lua/lexers/php.lua b/lua/lexers/php.lua
index 5653880..f1fbda7 100644
--- a/lua/lexers/php.lua
+++ b/lua/lexers/php.lua
@@ -1,132 +1,101 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- PHP LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'php'}
+local lex = lexer.new('php')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
--- Comments.
-local line_comment = (P('//') + '#') * (l.nonnewline - '?>')^0
-local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1
-local comment = token(l.COMMENT, block_comment + line_comment)
+-- Keywords.
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ -- Reserved words (http://php.net/manual/en/reserved.keywords.php)
+ '__halt_compiler', 'abstract', 'and', 'array', 'as', 'break', 'callable', 'case', 'catch',
+ 'class', 'clone', 'const', 'continue', 'declare', 'default', 'die', 'do', 'echo', 'else',
+ 'elseif', 'empty', 'enddeclare', 'endfor', 'endforeach', 'endif', 'endswitch', 'endwhile', 'eval',
+ 'exit', 'extends', 'final', 'finally', 'fn', 'for', 'foreach', 'function', 'global', 'goto', 'if',
+ 'implements', 'include', 'include_once', 'instanceof', 'insteadof', 'interface', 'isset', 'list',
+ 'namespace', 'new', 'or', 'print', 'private', 'protected', 'public', 'require', 'require_once',
+ 'return', 'static', 'switch', 'throw', 'trait', 'try', 'unset', 'use', 'var', 'while', 'xor',
+ 'yield', 'from',
+ -- Reserved classes (http://php.net/manual/en/reserved.classes.php)
+ 'Directory', 'stdClass', '__PHP_Incomplete_Class', 'Exception', 'ErrorException',
+ 'php_user_filter', 'Closure', 'Generator', 'ArithmeticError', 'AssertionError',
+ 'DivisionByZeroError', 'Error', 'Throwable', 'ParseError', 'TypeError', 'self', 'static', 'parent'
+}))
+
+-- Types.
+lex:add_rule('type', token(lexer.TYPE,
+ word_match('int float bool string true false null void iterable object')))
+
+-- Constants.
+lex:add_rule('constant', token(lexer.CONSTANT, word_match{
+ -- Compile-time (https://www.php.net/manual/en/reserved.keywords.php)
+ '__CLASS__', '__DIR__', '__FILE__', '__FUNCTION__', '__LINE__', '__METHOD__', '__NAMESPACE__',
+ '__TRAIT__',
+ -- Reserved (https://www.php.net/manual/en/reserved.constants.php)
+ 'PHP_VERSION', 'PHP_MAJOR_VERSION', 'PHP_MINOR_VERSION', 'PHP_RELEASE_VERSION', 'PHP_VERSION_ID',
+ 'PHP_EXTRA_VERSION', 'PHP_ZTS', 'PHP_DEBUG', 'PHP_MAXPATHLEN', 'PHP_OS', 'PHP_OS_FAMILY',
+ 'PHP_SAPI', 'PHP_EOL', 'PHP_INT_MAX', 'PHP_INT_MIN', 'PHP_INT_SIZE', 'PHP_FLOAT_DIG',
+ 'PHP_FLOAT_EPSILON', 'PHP_FLOAT_MIN', 'PHP_FLOAT_MAX', 'DEFAULT_INCLUDE_PATH', 'PEAR_INSTALL_DIR',
+ 'PEAR_EXTENSION_DIR', 'PHP_EXTENSION_DIR', 'PHP_PREFIX', 'PHP_BINDIR', 'PHP_BINARY', 'PHP_MANDIR',
+ 'PHP_LIBDIR', 'PHP_DATADIR', 'PHP_SYSCONFDIR', 'PHP_LOCALSTATEDIR', 'PHP_CONFIG_FILE_PATH',
+ 'PHP_CONFIG_FILE_SCAN_DIR', 'PHP_SHLIB_SUFFIX', 'PHP_FD_SETSIZE', 'E_ERROR', 'E_WARNING',
+ 'E_PARSE', 'E_NOTICE', 'E_CORE_ERROR', 'E_CORE_WARNING', 'E_COMPILE_ERROR', 'E_USER_ERROR',
+ 'E_USER_WARNING', 'E_USER_NOTICE', 'E_DEPRECATED', 'E_DEPRECATED', 'E_USER_DEPRECATED', 'E_ALL',
+ 'E_STRICT', '__COMPILER_HALT_OFFSET__'
+}))
+
+-- Identifiers.
+local word = (lexer.alpha + '_' + lpeg.R('\127\255')) * (lexer.alnum + '_' + lpeg.R('\127\255'))^0
+lex:add_rule('identifier', token(lexer.IDENTIFIER, word))
+
+-- Variables.
+lex:add_rule('variable', token(lexer.VARIABLE, '$' * word))
-- Strings.
-local sq_str = l.delimited_range("'")
-local dq_str = l.delimited_range('"')
-local bt_str = l.delimited_range('`')
+local sq_str = lexer.range("'")
+local dq_str = lexer.range('"')
+local bq_str = lexer.range('`')
local heredoc = '<<<' * P(function(input, index)
local _, e, delimiter = input:find('([%a_][%w_]*)[\n\r\f]+', index)
if delimiter then
- local _, e = input:find('[\n\r\f]+'..delimiter, e)
+ e = select(2, input:find('[\n\r\f]+' .. delimiter, e))
return e and e + 1
end
end)
-local string = token(l.STRING, sq_str + dq_str + bt_str + heredoc)
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str + bq_str + heredoc))
-- TODO: interpolated code.
--- Numbers.
-local number = token(l.NUMBER, l.float + l.integer)
-
--- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- -- http://php.net/manual/en/reserved.keywords.php
- '__halt_compiler', 'abstract', 'and', 'array', 'as', 'break',
- 'callable', 'case', 'catch', 'class', 'clone', 'const',
- 'continue', 'declare', 'default', 'die', 'do', 'echo', 'else',
- 'elseif', 'empty', 'enddeclare', 'endfor', 'endforeach',
- 'endif', 'endswitch', 'endwhile', 'eval', 'exit', 'extends',
- 'final', 'for', 'foreach', 'function', 'global', 'goto',
- 'if', 'implements', 'include', 'list', 'namespace', 'new',
- 'or', 'print', 'private', 'protected', 'public', 'require',
- 'require_once', 'return', 'static', 'switch', 'throw', 'trait',
- 'try', 'unset', 'use', 'var', 'while', 'xor',
- -- http://php.net/manual/en/reserved.classes.php
- 'directory', 'stdclass', '__php_incomplete_class', 'exception',
- 'errorexception', 'closure', 'generator', 'arithmeticerror',
- 'assertionerror', 'divisionbyzeroerror', 'error', 'throwable',
- 'parseerror', 'typeerror', 'self', 'parent',
- -- http://php.net/manual/en/reserved.other-reserved-words.php
- 'int', 'float', 'bool', 'string', 'true', 'false', 'null',
- 'void', 'iterable', 'resource', 'object', 'mixed', 'numeric'
-})
-
--- Constants.
-local constant = token(l.CONSTANT, word_match{
- -- Compile-time constants
- -- http://php.net/manual/en/reserved.keywords.php
- '__CLASS__', '__DIR__', '__FILE__', '__FUNCTION__', '__LINE__',
- '__METHOD__', '__NAMESPACE__', '__TRAIT__',
- -- http://php.net/manual/en/reserved.constants.php
- 'PHP_VERSION', 'PHP_MAJOR_VERSION', 'PHP_MINOR_VERSION',
- 'PHP_RELEASE_VERSION', 'PHP_VERSION_ID', 'PHP_EXTRA_VERSION',
- 'PHP_ZTS', 'PHP_DEBUG', 'PHP_MAXPATHLEN', 'PHP_OS',
- 'PHP_OS_FAMILY', 'PHP_SAPI', 'PHP_EOL', 'PHP_INT_MAX',
- 'PHP_INT_MIN', 'PHP_INT_SIZE', 'PHP_FLOAT_DIG',
- 'PHP_FLOAT_EPSILON', 'PHP_FLOAT_MIN', 'PHP_FLOAT_MAX',
- 'DEFAULT_INCLUDE_PATH', 'PEAR_INSTALL_DIR', 'PEAR_EXTENSION_DIR',
- 'PHP_EXTENSION_DIR', 'PHP_PREFIX', 'PHP_BINDIR',
- 'PHP_BINARY', 'PHP_MANDIR', 'PHP_LIBDIR', 'PHP_DATADIR',
- 'PHP_SYSCONFDIR', 'PHP_LOCALSTATEDIR', 'PHP_CONFIG_FILE_PATH',
- 'PHP_CONFIG_FILE_SCAN_DIR', 'PHP_SHLIB_SUFFIX', 'PHP_FD_SETSIZE',
- 'E_ERROR', 'E_WARNING', 'E_PARSE', 'E_NOTICE', 'E_CORE_ERROR',
- 'E_CORE_WARNING', 'E_COMPILE_ERROR', 'E_USER_ERROR',
- 'E_USER_WARNING', 'E_USER_NOTICE', 'E_DEPRECATED',
- 'E_DEPRECATED', 'E_USER_DEPRECATED', 'E_ALL', 'E_STRICT',
- '__COMPILER_HALT_OFFSET__',
-})
-
--- Variables.
-local word = (l.alpha + '_' + R('\127\255')) * (l.alnum + '_' + R('\127\255'))^0
-local variable = token(l.VARIABLE, '$' * word)
+-- Comments.
+local line_comment = lexer.to_eol(P('//') + '#')
+local block_comment = lexer.range('/*', '*/')
+lex:add_rule('comment', token(lexer.COMMENT, block_comment + line_comment))
--- Identifiers.
-local identifier = token(l.IDENTIFIER, word)
+-- Numbers.
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-- Operators.
-local operator = token(l.OPERATOR, S('!@%^*&()-+=|/.,;:<>[]{}') + '?' * -P('>'))
-
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'constant', constant},
- {'identifier', identifier},
- {'string', string},
- {'variable', variable},
- {'comment', comment},
- {'number', number},
- {'operator', operator},
-}
+lex:add_rule('operator', token(lexer.OPERATOR, S('!@%^*&()-+=|/?.,;:<>[]{}')))
-- Embedded in HTML.
-local html = l.load('html')
+local html = lexer.load('html')
-- Embedded PHP.
-local php_start_rule = token('php_tag', '<?' * ('php' * l.space)^-1)
+local php_start_rule = token('php_tag', '<?' * ('php' * lexer.space)^-1)
local php_end_rule = token('php_tag', '?>')
-l.embed_lexer(html, M, php_start_rule, php_end_rule)
-
-M._tokenstyles = {
- php_tag = l.STYLE_EMBEDDED
-}
-
-local _foldsymbols = html._foldsymbols
-_foldsymbols._patterns[#_foldsymbols._patterns + 1] = '<%?'
-_foldsymbols._patterns[#_foldsymbols._patterns + 1] = '%?>'
-_foldsymbols._patterns[#_foldsymbols._patterns + 1] = '/%*'
-_foldsymbols._patterns[#_foldsymbols._patterns + 1] = '%*/'
-_foldsymbols._patterns[#_foldsymbols._patterns + 1] = '//'
-_foldsymbols._patterns[#_foldsymbols._patterns + 1] = '#'
-_foldsymbols._patterns[#_foldsymbols._patterns + 1] = '[{}()]'
-_foldsymbols.php_tag = {['<?'] = 1, ['?>'] = -1}
-_foldsymbols[l.COMMENT]['/*'], _foldsymbols[l.COMMENT]['*/'] = 1, -1
-_foldsymbols[l.COMMENT]['//'] = l.fold_line_comments('//')
-_foldsymbols[l.COMMENT]['#'] = l.fold_line_comments('#')
-_foldsymbols[l.OPERATOR] = {['{'] = 1, ['}'] = -1, ['('] = 1, [')'] = -1}
-M._foldsymbols = _foldsymbols
-
-return M
+html:embed(lex, php_start_rule, php_end_rule)
+lex:add_style('php_tag', lexer.styles.embedded)
+
+-- Fold points.
+lex:add_fold_point('php_tag', '<?', '?>')
+lex:add_fold_point(lexer.COMMENT, '/*', '*/')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('//'))
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('#'))
+lex:add_fold_point(lexer.OPERATOR, '{', '}')
+lex:add_fold_point(lexer.OPERATOR, '(', ')')
+
+return lex
diff --git a/lua/lexers/pico8.lua b/lua/lexers/pico8.lua
index d6df3e2..03f260e 100644
--- a/lua/lexers/pico8.lua
+++ b/lua/lexers/pico8.lua
@@ -1,53 +1,37 @@
--- Copyright 2016-2017 Alejandro Baez (https://keybase.io/baez). See LICENSE.
--- PICO-8 Lexer.
+-- Copyright 2016-2022 Alejandro Baez (https://keybase.io/baez). See LICENSE.
+-- PICO-8 lexer.
-- http://www.lexaloffle.com/pico-8.php
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'pico8'}
+local lex = lexer.new('pico8')
-- Whitespace
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments
-local comment = token(l.COMMENT, '//' * l.nonnewline_esc^0)
-
--- Numbers
-local number = token(l.NUMBER, l.integer)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords
-local keyword = token(l.KEYWORD, word_match{
- '__lua__', '__gfx__', '__gff__', '__map__', '__sfx__', '__music__'
-})
+lex:add_rule('keyword',
+ token(lexer.KEYWORD, word_match('__lua__ __gfx__ __gff__ __map__ __sfx__ __music__')))
-- Identifiers
-local identifier = token(l.IDENTIFIER, l.word)
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
--- Operators
-local operator = token(l.OPERATOR, S('_'))
+-- Comments
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('//', true)))
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'identifier', identifier},
- {'comment', comment},
- {'number', number},
- {'operator', operator},
-}
+-- Numbers
+lex:add_rule('number', token(lexer.NUMBER, lexer.integer))
--- Embed Lua into PICO-8.
-local lua = l.load('lua')
+-- Operators
+lex:add_rule('operator', token(lexer.OPERATOR, '_'))
+-- Embed Lua into PICO-8.
+local lua = lexer.load('lua')
local lua_start_rule = token('pico8_tag', '__lua__')
-local lua_end_rule = token('pico8_tag', '__gfx__' )
-l.embed_lexer(M, lua, lua_start_rule, lua_end_rule)
-
-M._tokenstyles = {
- pico8_tag = l.STYLE_EMBEDDED
-}
-
-M._foldsymbols = lua._foldsymbols
+local lua_end_rule = token('pico8_tag', '__gfx__')
+lex:embed(lua, lua_start_rule, lua_end_rule)
+lex:add_style('pico8_tag', lexer.styles.embedded)
-return M
+return lex
diff --git a/lua/lexers/pike.lua b/lua/lexers/pike.lua
index f5ec13a..79efd53 100644
--- a/lua/lexers/pike.lua
+++ b/lua/lexers/pike.lua
@@ -1,70 +1,53 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- Pike LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'pike'}
+local lex = lexer.new('pike')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local line_comment = '//' * l.nonnewline_esc^0
-local nested_comment = l.nested_pair('/*', '*/')
-local comment = token(l.COMMENT, line_comment + nested_comment)
-
--- Strings.
-local sq_str = l.delimited_range("'", true)
-local dq_str = l.delimited_range('"', true)
-local lit_str = '#' * l.delimited_range('"')
-local string = token(l.STRING, sq_str + dq_str + lit_str)
-
--- Numbers.
-local number = token(l.NUMBER, (l.float + l.integer) * S('lLdDfF')^-1)
-
--- Preprocessors.
-local preproc = token(l.PREPROCESSOR, l.starts_line('#') * l.nonnewline^0)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- 'break', 'case', 'catch', 'continue', 'default', 'do', 'else', 'for',
- 'foreach', 'gauge', 'if', 'lambda', 'return', 'sscanf', 'switch', 'while',
- 'import', 'inherit',
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'break', 'case', 'catch', 'continue', 'default', 'do', 'else', 'for', 'foreach', 'gauge', 'if',
+ 'lambda', 'return', 'sscanf', 'switch', 'while', 'import', 'inherit',
-- Type modifiers.
- 'constant', 'extern', 'final', 'inline', 'local', 'nomask', 'optional',
- 'private', 'protected', 'public', 'static', 'variant'
-})
+ 'constant', 'extern', 'final', 'inline', 'local', 'nomask', 'optional', 'private', 'protected',
+ 'public', 'static', 'variant'
+}))
-- Types.
-local type = token(l.TYPE, word_match{
- 'array', 'class', 'float', 'function', 'int', 'mapping', 'mixed', 'multiset',
- 'object', 'program', 'string', 'void'
-})
+lex:add_rule('type', token(lexer.TYPE, word_match(
+ 'array class float function int mapping mixed multiset object program string void')))
-- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
--- Operators.
-local operator = token(l.OPERATOR, S('<>=!+-/*%&|^~@`.,:;()[]{}'))
+-- Strings.
+local sq_str = lexer.range("'", true)
+local dq_str = P('#')^-1 * lexer.range('"', true)
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
+
+-- Comments.
+local line_comment = lexer.to_eol('//', true)
+local block_comment = lexer.range('/*', '*/', false, false, true)
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
+
+-- Numbers.
+lex:add_rule('number', token(lexer.NUMBER, lexer.number * S('lLdDfF')^-1))
+
+-- Preprocessors.
+lex:add_rule('preprocessor', token(lexer.PREPROCESSOR, lexer.to_eol(lexer.starts_line('#'))))
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'type', type},
- {'identifier', identifier},
- {'string', string},
- {'comment', comment},
- {'number', number},
- {'preproc', preproc},
- {'operator', operator},
-}
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S('<>=!+-/*%&|^~@`.,:;()[]{}')))
-M._foldsymbols = {
- _patterns = {'[{}]', '/%*', '%*/', '//'},
- [l.OPERATOR] = {['{'] = 1, ['}'] = -1},
- [l.COMMENT] = {['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//')}
-}
+-- Fold points.
+lex:add_fold_point(lexer.OPERATOR, '{', '}')
+lex:add_fold_point(lexer.COMMENT, '/*', '*/')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('//'))
-return M
+return lex
diff --git a/lua/lexers/pkgbuild.lua b/lua/lexers/pkgbuild.lua
index f08f114..eae3703 100644
--- a/lua/lexers/pkgbuild.lua
+++ b/lua/lexers/pkgbuild.lua
@@ -1,131 +1,78 @@
--- Copyright 2006-2013 gwash. See LICENSE.
+-- Copyright 2006-2022 gwash. See LICENSE.
-- Archlinux PKGBUILD LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'pkgbuild'}
+local lex = lexer.new('pkgbuild')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Comments.
-local comment = token(l.COMMENT, '#' * l.nonnewline^0)
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#')))
-- Strings.
-local sq_str = l.delimited_range("'", false, true)
-local dq_str = l.delimited_range('"')
-local ex_str = l.delimited_range('`')
+local sq_str = lexer.range("'", false, false)
+local dq_str = lexer.range('"')
+local ex_str = lexer.range('`')
local heredoc = '<<' * P(function(input, index)
- local s, e, _, delimiter =
- input:find('(["\']?)([%a_][%w_]*)%1[\n\r\f;]+', index)
+ local s, e, _, delimiter = input:find('(["\']?)([%a_][%w_]*)%1[\n\r\f;]+', index)
if s == index and delimiter then
- local _, e = input:find('[\n\r\f]+'..delimiter, e)
+ e = select(2, input:find('[\n\r\f]+' .. delimiter, e))
return e and e + 1 or #input + 1
end
end)
-local string = token(l.STRING, sq_str + dq_str + ex_str + heredoc)
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str + ex_str + heredoc))
-- Numbers.
-local number = token(l.NUMBER, l.float + l.integer)
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match({
- 'patch', 'cd', 'make', 'patch', 'mkdir', 'cp', 'sed', 'install', 'rm',
- 'if', 'then', 'elif', 'else', 'fi', 'case', 'in', 'esac', 'while', 'for',
- 'do', 'done', 'continue', 'local', 'return', 'git', 'svn', 'co', 'clone',
- 'gconf-merge-schema', 'msg', 'echo', 'ln',
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'patch', 'cd', 'make', 'patch', 'mkdir', 'cp', 'sed', 'install', 'rm', 'if', 'then', 'elif',
+ 'else', 'fi', 'case', 'in', 'esac', 'while', 'for', 'do', 'done', 'continue', 'local', 'return',
+ 'git', 'svn', 'co', 'clone', 'gconf-merge-schema', 'msg', 'echo', 'ln',
-- Operators.
- '-a', '-b', '-c', '-d', '-e', '-f', '-g', '-h', '-k', '-p', '-r', '-s', '-t',
- '-u', '-w', '-x', '-O', '-G', '-L', '-S', '-N', '-nt', '-ot', '-ef', '-o',
- '-z', '-n', '-eq', '-ne', '-lt', '-le', '-gt', '-ge', '-Np', '-i'
-}, '-'))
+ '-a', '-b', '-c', '-d', '-e', '-f', '-g', '-h', '-k', '-p', '-r', '-s', '-t', '-u', '-w', '-x',
+ '-O', '-G', '-L', '-S', '-N', '-nt', '-ot', '-ef', '-o', '-z', '-n', '-eq', '-ne', '-lt', '-le',
+ '-gt', '-ge', '-Np', '-i'
+}))
-- Functions.
-local func = token(l.FUNCTION, word_match{
- 'build',
- 'check',
- 'package',
- 'pkgver',
- 'prepare'
-} * '()')
+lex:add_rule('function',
+ token(lexer.FUNCTION, word_match('build check package pkgver prepare') * '()'))
-- Constants.
-local constants = {
- -- We do *not* list pkgver, srcdir and startdir here.
+lex:add_rule('constant', token(lexer.CONSTANT, word_match{
+ -- We do *not* list pkgver srcdir and startdir here.
-- These are defined by makepkg but user should not alter them.
- 'arch',
- 'backup',
- 'changelog',
- 'epoch',
- 'groups',
- 'install',
- 'license',
- 'noextract',
- 'options',
- 'pkgbase',
- 'pkgdesc',
- 'pkgname',
- 'pkgrel',
- 'pkgver',
- 'url',
- 'validpgpkeys'
-}
-local arch_specific = {
- 'checkdepends',
- 'conflicts',
- 'depends',
- 'makedepends',
- 'md5sums',
- 'optdepends',
- 'provides',
- 'replaces',
- 'sha1sums',
- 'sha256sums',
- 'sha384sums',
- 'sha512sums',
- 'source'
-}
-for _, field in ipairs(arch_specific) do
- for _,arch in ipairs({ '', 'i686', 'x86_64' }) do
- table.insert(constants, field..(arch ~= '' and '_'..arch or ''))
- end
-end
-local constant = token(l.CONSTANT, word_match(constants))
+ 'arch', 'backup', 'changelog', 'checkdepends', 'conflicts', 'depends', 'epoch', 'groups',
+ 'install', 'license', 'makedepends', 'md5sums', 'noextract', 'optdepends', 'options', 'pkgbase',
+ 'pkgdesc', 'pkgname', 'pkgrel', 'pkgver', 'provides', 'replaces', 'sha1sums', 'sha256sums',
+ 'sha384sums', 'sha512sums', 'source', 'url', 'validpgpkeys'
+}))
-- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Variables.
-local variable = token(l.VARIABLE,
- '$' * (S('!#?*@$') +
- l.delimited_range('()', true, true) +
- l.delimited_range('[]', true, true) +
- l.delimited_range('{}', true, true) +
- l.delimited_range('`', true, true) +
- l.digit^1 + l.word))
+local symbol = S('!#?*@$')
+local parens = lexer.range('(', ')', true)
+local brackets = lexer.range('[', ']', true)
+local braces = lexer.range('{', '}', true)
+local backticks = lexer.range('`', true, false)
+local number = lexer.dec_num
+lex:add_rule('variable', token(lexer.VARIABLE, '$' *
+ (symbol + parens + brackets + braces + backticks + number + lexer.word)))
-- Operators.
-local operator = token(l.OPERATOR, S('=!<>+-/*^~.,:;?()[]{}'))
-
-M._rules = {
- {'whitespace', ws},
- {'comment', comment},
- {'string', string},
- {'number', number},
- {'keyword', keyword},
- {'function', func},
- {'constant', constant},
- {'identifier', identifier},
- {'variable', variable},
- {'operator', operator},
-}
+lex:add_rule('operator', token(lexer.OPERATOR, S('=!<>+-/*^~.,:;?()[]{}')))
-M._foldsymbols = {
- _patterns = {'[%(%){}]', '#'},
- [l.OPERATOR] = {['('] = 1, [')'] = -1, ['{'] = 1, ['}'] = -1},
- [l.COMMENT] = {['#'] = l.fold_line_comments('#')}
-}
+-- Fold points.
+lex:add_fold_point(lexer.OPERATOR, '(', ')')
+lex:add_fold_point(lexer.OPERATOR, '{', '}')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('#'))
-return M
+return lex
diff --git a/lua/lexers/pony.lua b/lua/lexers/pony.lua
index 5cb07a1..b02f2a7 100644
--- a/lua/lexers/pony.lua
+++ b/lua/lexers/pony.lua
@@ -1,116 +1,94 @@
--- Copyright 2017 Murray Calavera. See LICENSE.
+-- Copyright 2017-2022 Murray Calavera. See LICENSE.
-- Pony LPeg lexer.
-local l = require('lexer')
-local token = l.token
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
local P, S = lpeg.P, lpeg.S
-local function pword(words)
- return l.word_match(words, "'")
-end
-
-local ws = token(l.WHITESPACE, l.space^1)
-
-local comment_line = '//' * l.nonnewline^0
-local comment_block = l.nested_pair('/*', '*/')
-local comment = token(l.COMMENT, comment_line + comment_block)
-
-local annotation = token(l.PREPROCESSOR, l.delimited_range('\\', false, true))
-
-local lit_bool = token(l.CONSTANT, pword{'true', 'false'})
-
-local nq = l.any - P'"'
-local lit_str = token(l.STRING,
- P'"""' * (nq + (P'"' * #(nq + (P'"' * nq))))^0 * P'"""'
- + l.delimited_range('"')
- + l.delimited_range("'")
-)
-
-local function num(digit)
- return digit * (digit^0 * P'_')^0 * digit^1 + digit
-end
-
-local int = num(l.digit)
-local frac = P('.') * int
-local exp = S('eE') * (P('-') + P('+'))^-1 * int
-
-local lit_num = token(l.NUMBER,
- P('0x') * num(l.xdigit)
- + P('0b') * num(S('01'))
- + int * frac^-1 * exp^-1
-)
-
-local keyword = token(l.KEYWORD, pword{
- 'actor', 'as', 'be', 'break', 'class', 'compile_error', 'compile_intrinsic',
- 'continue', 'consume', 'do', 'else', 'elseif', 'embed', 'end', 'error',
- 'for', 'fun', 'if', 'ifdef', 'iftype', 'in', 'interface', 'is', 'isnt',
- 'lambda', 'let', 'match', 'new', 'object', 'primitive', 'recover', 'repeat',
- 'return', 'struct', 'then', 'this', 'trait', 'try', 'type', 'until', 'use',
- 'var', 'where', 'while', 'with'})
-local capability = token(l.LABEL, pword{
- 'box', 'iso', 'ref', 'tag', 'trn', 'val'})
-local qualifier = token(l.LABEL,
- P'#' * pword{'read', 'send', 'share', 'any', 'alias'})
-
-local operator = token(l.OPERATOR,
- pword{'and', 'or', 'xor', 'not', 'addressof', 'digestof'}
- + lpeg.Cmt(S('+-*/%<>=!~')^1, function(input, index, op)
- local ops = {
- ['+'] = true, ['-'] = true, ['*'] = true, ['/'] = true, ['%'] = true,
- ['+~'] = true, ['-~'] = true, ['*~'] = true, ['/~'] = true,
- ['%~'] = true, ['<<'] = true, ['>>'] = true, ['<<~'] = true,
- ['>>~'] = true, ['=='] = true, ['!='] = true, ['<'] = true,
- ['<='] = true, ['>='] = true, ['>'] = true, ['==~'] = true,
- ['!=~'] = true, ['<~'] = true, ['<=~'] = true, ['>=~'] = true,
- ['>~'] = true
- }
- return ops[op] and index or nil
- end)
-)
-
--- there is no suitable token name for this, change this if ever one is added
-local punctuation = token(l.OPERATOR,
- P'=>' + P'.>' + P'<:' + P'->'
- + S('=.,:;()[]{}!?~^&|_@'))
-
--- highlight functions with syntax sugar at declaration
-local func
- = token(l.KEYWORD, pword{'fun', 'new', 'be'}) * ws^-1
- * annotation^-1 * ws^-1
- * capability^-1 * ws^-1
- * token(l.FUNCTION, pword{
- 'create', 'dispose', '_final', 'apply', 'update',
- 'add', 'sub', 'mul', 'div', 'mod', 'add_unsafe', 'sub_unsafe',
- 'mul_unsafe', 'div_unsafe', 'mod_unsafe', 'shl', 'shr', 'shl_unsafe',
- 'shr_unsafe', 'op_and', 'op_or', 'op_xor', 'eq', 'ne', 'lt', 'le', 'ge',
- 'gt', 'eq_unsafe', 'ne_unsafe', 'lt_unsafe', 'le_unsafe', 'ge_unsafe',
- 'gt_unsafe', 'neg', 'neg_unsafe', 'op_not',
- 'has_next', 'next',
- '_serialise_space', '_serialise', '_deserialise'})
-
-local id_suffix = (l.alnum + P("'") + P('_'))^0
-local type = token(l.TYPE, P('_')^-1 * l.upper * id_suffix)
-local identifier = token(l.IDENTIFIER, P('_')^-1 * l.lower * id_suffix)
-local tuple_lookup = token(l.IDENTIFIER, P('_') * l.digit^1)
-
-local M = {_NAME = 'pony'}
-
-M._rules = {
- {'whitespace', ws},
- {'comment', comment},
- {'annotation', annotation},
- {'boolean', lit_bool},
- {'number', lit_num},
- {'string', lit_str},
- {'function', func},
- {'keyword', keyword},
- {'capability', capability},
- {'qualifier', qualifier},
- {'operator', operator},
- {'type', type},
- {'identifier', identifier},
- {'lookup', tuple_lookup},
- {'punctuation', punctuation}
+local lex = lexer.new('pony')
+
+-- Whitespace.
+local ws = token(lexer.WHITESPACE, lexer.space^1)
+lex:add_rule('whitespace', ws)
+
+-- Capabilities.
+local capability = token(lexer.LABEL, word_match('box iso ref tag trn val'))
+lex:add_rule('capability', capability)
+
+-- Annotations.
+local annotation = token(lexer.PREPROCESSOR, lexer.range('\\', false, false))
+lex:add_rule('annotation', annotation)
+
+-- Functions.
+-- Highlight functions with syntax sugar at declaration.
+lex:add_rule('function',
+ token(lexer.KEYWORD, word_match('fun new be')) * ws^-1 * annotation^-1 * ws^-1 * capability^-1 *
+ ws^-1 * token(lexer.FUNCTION, word_match{
+ 'create', 'dispose', '_final', 'apply', 'update', 'add', 'sub', 'mul', 'div', 'mod',
+ 'add_unsafe', 'sub_unsafe', 'mul_unsafe', 'div_unsafe', 'mod_unsafe', 'shl', 'shr',
+ 'shl_unsafe', 'shr_unsafe', 'op_and', 'op_or', 'op_xor', 'eq', 'ne', 'lt', 'le', 'ge', 'gt',
+ 'eq_unsafe', 'ne_unsafe', 'lt_unsafe', 'le_unsafe', 'ge_unsafe', 'gt_unsafe', 'neg',
+ 'neg_unsafe', 'op_not', --
+ 'has_next', 'next', --
+ '_serialise_space', '_serialise', '_deserialise'
+ }))
+
+-- Keywords.
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'actor', 'as', 'be', 'break', 'class', 'compile_error', 'compile_intrinsic', 'continue',
+ 'consume', 'do', 'else', 'elseif', 'embed', 'end', 'error', 'for', 'fun', 'if', 'ifdef', 'iftype',
+ 'in', 'interface', 'is', 'isnt', 'lambda', 'let', 'match', 'new', 'object', 'primitive',
+ 'recover', 'repeat', 'return', 'struct', 'then', 'this', 'trait', 'try', 'type', 'until', 'use',
+ 'var', 'where', 'while', 'with'
+}))
+
+-- Constants.
+lex:add_rule('constant', token(lexer.CONSTANT, word_match('true false')))
+
+-- Operators.
+local ops = {
+ ['+'] = true, ['-'] = true, ['*'] = true, ['/'] = true, ['%'] = true, ['+~'] = true,
+ ['-~'] = true, ['*~'] = true, ['/~'] = true, ['%~'] = true, ['<<'] = true, ['>>'] = true,
+ ['<<~'] = true, ['>>~'] = true, ['=='] = true, ['!='] = true, ['<'] = true, ['<='] = true,
+ ['>='] = true, ['>'] = true, ['==~'] = true, ['!=~'] = true, ['<~'] = true, ['<=~'] = true,
+ ['>=~'] = true, ['>~'] = true
}
-
-return M
+lex:add_rule('operator', token(lexer.OPERATOR, word_match('and or xor not addressof digestof') +
+ lpeg.Cmt(S('+-*/%<>=!~')^1, function(input, index, op) return ops[op] and index or nil end)))
+
+-- Identifiers.
+local id_suffix = (lexer.alnum + "'" + '_')^0
+lex:add_rule('type', token(lexer.TYPE, P('_')^-1 * lexer.upper * id_suffix))
+lex:add_rule('identifier', token(lexer.IDENTIFIER, P('_')^-1 * lexer.lower * id_suffix))
+lex:add_rule('lookup', token(lexer.IDENTIFIER, '_' * lexer.digit^1))
+
+-- Strings.
+local sq_str = lexer.range("'")
+local dq_str = lexer.range('"')
+local tq_str = lexer.range('"""')
+lex:add_rule('string', token(lexer.STRING, sq_str + tq_str + dq_str))
+
+-- Numbers.
+local function num(digit) return digit * (digit^0 * '_')^0 * digit^1 + digit end
+local int = num(lexer.digit)
+local frac = '.' * int
+local exp = S('eE') * (P('-') + '+')^-1 * int
+local hex = '0x' * num(lexer.xdigit)
+local bin = '0b' * num(S('01'))
+local float = int * frac^-1 * exp^-1
+lex:add_rule('number', token(lexer.NUMBER, hex + bin + float))
+
+-- Comments.
+local line_comment = lexer.to_eol('//')
+local block_comment = lexer.range('/*', '*/', false, false, true)
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
+
+-- Punctuation.
+-- There is no suitable token name for this, change this if ever one is added.
+lex:add_rule('punctuation',
+ token(lexer.OPERATOR, P('=>') + '.>' + '<:' + '->' + S('=.,:;()[]{}!?~^&|_@')))
+
+-- Qualifiers.
+lex:add_rule('qualifier', token(lexer.LABEL, '#' * word_match('read send share any alias')))
+
+return lex
diff --git a/lua/lexers/powershell.lua b/lua/lexers/powershell.lua
index 6ee33fd..f499cdb 100644
--- a/lua/lexers/powershell.lua
+++ b/lua/lexers/powershell.lua
@@ -1,82 +1,60 @@
--- Copyright 2015-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2015-2022 Mitchell. See LICENSE.
-- PowerShell LPeg lexer.
-- Contributed by Jeff Stone.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'powershell'}
+local lex = lexer.new('powershell')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Comments.
-local comment = token(l.COMMENT, '#' * l.nonnewline^0)
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#')))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match({
- 'Begin', 'Break', 'Continue', 'Do', 'Else', 'End', 'Exit', 'For', 'ForEach',
- 'ForEach-Object', 'Get-Date', 'Get-Random', 'If', 'Param', 'Pause',
- 'Powershell', 'Process', 'Read-Host', 'Return', 'Switch', 'While',
- 'Write-Host'
-}, '-', true))
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match({
+ 'Begin', 'Break', 'Continue', 'Do', 'Else', 'End', 'Exit', 'For', 'ForEach', 'ForEach-Object',
+ 'Get-Date', 'Get-Random', 'If', 'Param', 'Pause', 'Powershell', 'Process', 'Read-Host', 'Return',
+ 'Switch', 'While', 'Write-Host'
+}, true)))
-- Comparison Operators.
-local comparison = token(l.KEYWORD, '-' * word_match({
- 'and', 'as', 'band', 'bor', 'contains', 'eq', 'ge', 'gt', 'is', 'isnot', 'le',
- 'like', 'lt', 'match', 'ne', 'nomatch', 'not', 'notcontains', 'notlike', 'or',
- 'replace'
-}, nil, true))
+lex:add_rule('comparison', token(lexer.KEYWORD, '-' * word_match({
+ 'and', 'as', 'band', 'bor', 'contains', 'eq', 'ge', 'gt', 'is', 'isnot', 'le', 'like', 'lt',
+ 'match', 'ne', 'nomatch', 'not', 'notcontains', 'notlike', 'or', 'replace'
+}, true)))
-- Parameters.
-local parameter = token(l.KEYWORD, '-' * word_match({
- 'Confirm', 'Debug', 'ErrorAction', 'ErrorVariable', 'OutBuffer',
- 'OutVariable', 'Verbose', 'WhatIf'
-}, nil, true))
+lex:add_rule('parameter', token(lexer.KEYWORD, '-' *
+ word_match('Confirm Debug ErrorAction ErrorVariable OutBuffer OutVariable Verbose WhatIf', true)))
-- Properties.
-local property = token(l.KEYWORD, '.' * word_match({
- 'day', 'dayofweek', 'dayofyear', 'hour', 'millisecond', 'minute', 'month',
- 'second', 'timeofday', 'year'
-}, nil, true))
+lex:add_rule('property', token(lexer.KEYWORD, '.' *
+ word_match('day dayofweek dayofyear hour millisecond minute month second timeofday year', true)))
-- Types.
-local type = token(l.KEYWORD, '[' * word_match({
- 'array', 'boolean', 'byte', 'char', 'datetime', 'decimal', 'double',
- 'hashtable', 'int', 'long', 'single', 'string', 'xml'
-}, nil, true) * ']')
+lex:add_rule('type', token(lexer.KEYWORD, '[' * word_match({
+ 'array', 'boolean', 'byte', 'char', 'datetime', 'decimal', 'double', 'hashtable', 'int', 'long',
+ 'single', 'string', 'xml'
+}, true) * ']'))
-- Variables.
-local variable = token(l.VARIABLE, '$' * (l.digit^1 + l.word +
- l.delimited_range('{}', true, true)))
+lex:add_rule('variable', token(lexer.VARIABLE,
+ '$' * (lexer.digit^1 + lexer.word + lexer.range('{', '}', true))))
-- Strings.
-local string = token(l.STRING, l.delimited_range('"', true))
+lex:add_rule('string', token(lexer.STRING, lexer.range('"', true)))
-- Numbers.
-local number = token(l.NUMBER, l.float + l.integer)
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-- Operators.
-local operator = token(l.OPERATOR, S('=!<>+-/*^&|~.,:;?()[]{}%`'))
+lex:add_rule('operator', token(lexer.OPERATOR, S('=!<>+-/*^&|~.,:;?()[]{}%`')))
-M._rules = {
- {'whitespace', ws},
- {'comment', comment},
- {'keyword', keyword},
- {'comparison', comparison},
- {'parameter', parameter},
- {'property', property},
- {'type', type},
- {'variable', variable},
- {'string', string},
- {'number', number},
- {'operator', operator},
-}
+-- Fold points.
+lex:add_fold_point(lexer.OPERATOR, '{', '}')
-M._foldsymbols = {
- _patterns = {'[{}]'},
- [l.OPERATOR] = {['{'] = 1, ['}'] = -1}
-}
-
-return M
+return lex
diff --git a/lua/lexers/prolog.lua b/lua/lexers/prolog.lua
index 2999674..53e25c8 100644
--- a/lua/lexers/prolog.lua
+++ b/lua/lexers/prolog.lua
@@ -1,167 +1,353 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
+-- Lexer enhanced to conform to the realities of Prologs on the ground by
+-- Michael T. Richter. Copyright is explicitly assigned back to Mitchell.
-- Prolog LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+--[[
+ Prologs are notoriously fractious with many barely-compatible dialects. To
+ make Textadept more useful for these cases, directives and keywords are
+ grouped by dialect. Selecting a dialect is a simple matter of setting the
+ buffer/lexer property "prolog.dialect" in init.lua. Dialects currently in
+ the lexer file are:
+ - 'iso': the generic ISO standard without modules.
+ - 'gprolog': GNU Prolog.
+ - 'swipl': SWI-Prolog.
-local M = {_NAME = 'prolog'}
+ The default dialect is 'iso' if none is defined. (You probably don't want
+ this.)
+
+ Note that there will be undoubtedly duplicated entries in various categories
+ because of the flexibility of Prolog and the automated tools used to gather
+ most information. This is not an issue, however, because directives override
+ arity-0 predicates which override arity-1+ predicates which override bifs
+ which override operators.
+]]
+
+local lexer = require('lexer')
+
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
+
+local lex = lexer.new('prolog')
+
+local dialects = setmetatable({gprolog = 'gprolog', swipl = 'swipl'},
+ {__index = function(_, _) return 'iso' end})
+local dialect = dialects[lexer.property['prolog.dialect']]
+
+-- Directives.
+local directives = {}
+directives.iso = [[
+ -- Gathered by inspection of GNU Prolog documentation.
+ dynamic multifile discontiguous include ensure_loaded op char_conversion
+ set_prolog_flag initialization
+]]
+directives.gprolog = directives.iso .. [[
+ -- Gathered by inspection of GNU Prolog documentation.
+ public ensure_linked built_in if else endif elif foreign
+]]
+directives.swipl = directives.iso .. [[
+ -- Gathered by liberal use of grep on the SWI source and libraries.
+ coinductive current_predicate_option expects_dialect http_handler listen
+ module multifile use_foreign_library use_module dynamic http_handler
+ initialization json_object multifile record use_module abolish
+ arithmetic_function asserta at_halt begin_tests chr_constraint chr_option
+ chr_type clear_cache constraints consult create_prolog_flag
+ current_prolog_flag debug discontiguous dynamic elif else encoding end_tests
+ endif expects_dialect export forall format format_predicate html_meta
+ html_resource http_handler http_request_expansion if include
+ init_color_term_flag init_options initialization json_object
+ lazy_list_iterator license listen load_extensions load_files
+ load_foreign_library meta_predicate mode module module_transparent multifile
+ noprofile op pce_begin_class pce_end_class pce_global pce_group persistent
+ pop_operators pred predicate_options print_message prolog_load_context prompt
+ public push_hprolog_library push_ifprolog_library, push_operators
+ push_sicstus_library push_xsb_library push_yap_library, quasi_quotation_syntax
+ record redefine_system_predicate reexport register_iri_scheme residual_goals
+ retract set_module set_prolog_flag set_script_dir set_test_options setenv
+ setting style_check table thread_local thread_local message type
+ use_class_template use_foreign_library use_module utter volatile build_schema
+ chr_constraint chr_option chr_type cql_option determinate discontiguous
+ dynamic endif format_predicate if initialization license meta_predicate mode
+ module multifile op reexport thread_local use_module volatile
+]]
+lex:add_rule('directive',
+ token(lexer.WHITESPACE, lexer.starts_line(S(' \t'))^0) *
+ token(lexer.OPERATOR, ':-') *
+ token(lexer.WHITESPACE, S(' \t')^0) *
+ token(lexer.PREPROCESSOR, word_match(directives[dialect])))
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
--- Comments.
-local line_comment = '%' * l.nonnewline^0
-local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1
-local comment = token(l.COMMENT, line_comment + block_comment)
+-- Keywords.
+local zero_arity_keywords = {}
+zero_arity_keywords.iso = [[
+ -- eyeballed from GNU Prolog documentation
+ true fail pi float_overflow int_overflow int_underflow undefined asserta
+ assertz retract retractall clause abolish current_predicate findall bagof
+ setof at_end_of_stream flush_output nl halt false
+]]
+zero_arity_keywords.gprolog = [[
+ -- Collected automatically via current_predicate/1 with some cleanup.
+ at_end_of_stream wam_debug listing flush_output fail told false top_level
+ shell trace debugging seen repeat abort nl statistics halt notrace randomize
+ true nospyall nodebug debug stop break
+]]
+zero_arity_keywords.swipl = [[
+ -- Collected automatically via current_predicate/1 with some cleanup.
+ noprotocol compiling ttyflush true abort license known_licenses
+ print_toplevel_variables initialize mutex_statistics break reset_profiler
+ win_has_menu version prolog abolish_nonincremental_tables false halt undefined
+ abolish_all_tables reload_library_index garbage_collect repeat nospyall
+ tracing trace notrace trim_stacks garbage_collect_clauses
+ garbage_collect_atoms mutex_unlock_all seen told nl debugging fail
+ at_end_of_stream attach_packs flush_output true
+]]
+local one_plus_arity_keywords = {}
+one_plus_arity_keywords.iso = [[
+ -- eyeballed from GNU Prolog documentation
+ call catch throw var nonvar atom integer float number atomic compound
+ callable ground unify_with_occurs_check compare functor arg copy_term
+ term_variables subsumes_term acyclic_term predicate_property current_input
+ current_output set_input set_output open close current_stream stream_property
+ set_stream_position get_char get_code is peek_char peek_code put_char putcode
+ get_byte peek_byte read_term read write_term write writeq write_canonical
+ char_conversion current_char_conversion call once repeat atom_length
+ atom_concat sub_atom char_code atom_chars atom_codes
+]]
+one_plus_arity_keywords.gprolog = [[
+ -- Collected automatically via current_predicate/1 with some cleanup.
+ abolish absolute_file_name acyclic_term add_linedit_completion
+ add_stream_alias add_stream_mirror append architecture arg argument_counter
+ argument_list argument_value asserta assertz at_end_of_stream atom atom_chars
+ atom_codes atom_concat atom_length atom_property atomic bagof between
+ bind_variables call call_det call_with_args callable catch change_directory
+ char_code char_conversion character_count clause close close_input_atom_stream
+ close_input_chars_stream close_input_codes_stream close_output_atom_stream
+ close_output_chars_stream close_output_codes_stream compare compound consult
+ copy_term cpu_time create_pipe current_alias current_atom current_bip_name
+ current_char_conversion current_input current_mirror current_op current_output
+ current_predicate current_prolog_flag current_stream date_time
+ decompose_file_name delete delete_directory delete_file directory_files
+ display display_to_atom display_to_chars display_to_codes environ exec
+ expand_term fd_all_different fd_at_least_one fd_at_most_one fd_atleast
+ fd_atmost fd_cardinality fd_dom fd_domain fd_domain_bool fd_element
+ fd_element_var fd_exactly fd_has_extra_cstr fd_has_vector fd_labeling
+ fd_labelingff fd_max fd_max_integer fd_maximize fd_min fd_minimize
+ fd_not_prime fd_only_one fd_prime fd_reified_in fd_relation fd_relationc
+ fd_set_vector_max fd_size fd_use_vector fd_var fd_vector_max file_exists
+ file_permission file_property find_linedit_completion findall flatten float
+ flush_output for forall fork_prolog format format_to_atom format_to_chars
+ format_to_codes functor g_array_size g_assign g_assignb g_dec g_deco g_inc
+ g_inco g_link g_read g_reset_bit g_set_bit g_test_reset_bit g_test_set_bit
+ generic_var get get_byte get_char get_code get_key get_key_no_echo
+ get_linedit_prompt get_print_stream get_seed get0 ground halt host_name
+ hostname_address integer is_absolute_file_name is_list is_relative_file_name
+ keysort last last_read_start_line_column leash length line_count line_position
+ list list_or_partial_list listing load lower_upper make_directory maplist
+ max_list member memberchk min_list msort name name_query_vars
+ name_singleton_vars new_atom nl non_fd_var non_generic_var nonvar nospy nth
+ nth0 nth1 number number_atom number_chars number_codes numbervars once op open
+ open_input_atom_stream open_input_chars_stream open_input_codes_stream
+ open_output_atom_stream open_output_chars_stream open_output_codes_stream
+ os_version partial_list peek_byte peek_char peek_code permutation phrase popen
+ portray_clause predicate_property prefix print print_to_atom print_to_chars
+ print_to_codes prolog_file_name prolog_pid put put_byte put_char put_code
+ random read read_atom read_from_atom read_from_chars read_from_codes
+ read_integer read_number read_pl_state_file read_term read_term_from_atom
+ read_term_from_chars read_term_from_codes read_token read_token_from_atom
+ read_token_from_chars read_token_from_codes real_time remove_stream_mirror
+ rename_file retract retractall reverse see seeing seek select send_signal
+ set_bip_name set_input set_linedit_prompt set_output set_prolog_flag set_seed
+ set_stream_buffering set_stream_eof_action set_stream_line_column
+ set_stream_position set_stream_type setarg setof shell skip sleep socket
+ socket_accept socket_bind socket_close socket_connect socket_listen sort spawn
+ spy spypoint_condition sr_change_options sr_close sr_current_descriptor
+ sr_error_from_exception sr_get_error_counters sr_get_file_name
+ sr_get_include_list sr_get_include_stream_list sr_get_module sr_get_position
+ sr_get_size_counters sr_get_stream sr_new_pass sr_open sr_read_term
+ sr_set_error_counters sr_write_error sr_write_message statistics
+ stream_line_column stream_position stream_property sub_atom sublist
+ subsumes_term subtract succ suffix sum_list syntax_error_info system
+ system_time tab tell telling temporary_file temporary_name term_hash term_ref
+ term_variables throw unget_byte unget_char unget_code unify_with_occurs_check
+ unlink user_time var wait working_directory write write_canonical
+ write_canonical_to_atom write_canonical_to_chars write_canonical_to_codes
+ write_pl_state_file write_term write_term_to_atom write_term_to_chars
+ write_term_to_codes write_to_atom write_to_chars write_to_codes writeq
+ writeq_to_atom writeq_to_chars writeq_to_codes
+]]
+one_plus_arity_keywords.swipl = [[
+ -- Collected automatically via current_predicate/1 with some cleanup.
+ prolog_exception_hook term_expansion expand_answer message_property resource
+ help goal_expansion file_search_path prolog_clause_name thread_message_hook
+ prolog_file_type goal_expansion prolog_predicate_name exception writeln
+ term_expansion expand_query url_path message_hook library_directory resource
+ portray prolog_load_file prolog_list_goal ansi_format source_file_property
+ asserta call_dcg source_location wait_for_input locale_destroy set_locale
+ read_pending_codes thread_join open_dde_conversation win_folder protocol
+ copy_stream_data current_locale read_pending_chars win_add_dll_directory
+ protocola thread_property win_shell goal_expansion phrase gc_file_search_cache
+ dcg_translate_rule protocolling win_registry_get_value term_expansion
+ dcg_translate_rule assert copy_stream_data once bagof prompt1 tnot assertz
+ phrase sort ignore thread_statistics assert locale_create
+ win_remove_dll_directory term_expansion read_term asserta clause assertz
+ predicate_option_type is_thread get_single_char set_prolog_IO expand_goal
+ ground message_queue_create locale_property close_dde_conversation
+ goal_expansion clause zipper_open_new_file_in_zip term_to_atom with_output_to
+ module expand_term redefine_system_predicate thread_detach dde_execute
+ term_string read_clause compile_predicates predicate_option_mode noprofile
+ read_term_from_atom cancel_halt non_terminal atom_to_term line_position frozen
+ dde_request findnsols prolog_skip_level prolog_current_choice get get_attrs
+ license var_property nb_delete unwrap_predicate zipper_open_current put_attrs
+ dde_poke set_stream read_term zip_file_info_ memberchk seek expand_goal get0
+ call var integer attach_packs byte_count zipper_goto findnsols character_count
+ expand_term get_flag atom line_count set_flag atomic tab create_prolog_flag
+ copy_term import_module verbose_expansion b_setval duplicate_term
+ prolog_load_context attach_packs prolog_listen b_getval prolog_frame_attribute
+ prompt copy_term_nat nb_linkval tab prolog_choice_attribute set_prolog_flag
+ nb_getval prolog_skip_frame del_attrs skip sort license open_null_stream
+ nb_current prolog_listen msort is_list is_stream get keysort win_shell
+ prolog_unlisten notrace get0 add_import_module wildcard_match profiler
+ delete_directory trie_gen_compiled expand_file_name file_name_extension
+ delete_file writeq win_module_file call write get_dict win_exec
+ directory_files trie_insert make_directory engine_next_reified del_dict sleep
+ getenv call_continuation trie_gen_compiled prolog_to_os_filename
+ is_absolute_file_name trie_insert engine_fetch engine_create strip_module call
+ delete_import_module write_canonical compile_aux_clauses setenv callable
+ is_engine write_term call set_module call halt catch findall trie_gen
+ trie_destroy rename_file shift unify_with_occurs_check engine_yield forall
+ unsetenv trie_term file_directory_name version current_engine file_base_name
+ engine_self import trie_gen trie_lookup write_term trie_update freeze
+ engine_post export put_dict same_file trie_new call trie_delete start_tabling
+ is_trie residual_goals thread_peek_message thread_get_message dict_pairs
+ set_end_of_stream call_cleanup current_predicate arg dict_create
+ thread_setconcurrency read_link is_dict at_halt tmp_file not put_dict
+ setup_call_cleanup abolish_nonincremental_tables time_file
+ start_subsumptive_tabling char_conversion compound sub_atom access_file call
+ call_cleanup abolish nonvar current_functor abolish_module_tables
+ subsumes_term engine_post call retractall compare engine_next prolog_cut_to
+ size_file current_char_conversion predicate_property nonground engine_destroy
+ message_queue_property format abolish qcompile thread_send_message stream_pair
+ message_queue_create same_term number select_dict catch_with_backtrace
+ thread_get_message thread_send_message win_insert_menu_item message_queue_set
+ <meta-call> exists_directory copy_term nb_set_dict prolog_nodebug functor
+ current_table cyclic_term untable read exists_file thread_peek_message
+ b_set_dict engine_create prolog_debug acyclic_term writeln get_dict
+ compound_name_arity abolish_table_subgoals start_tabling trie_insert
+ nb_link_dict message_queue_destroy thread_get_message is_dict nth_clause
+ absolute_file_name term_singletons make_library_index set_output retract
+ context_module current_trie term_attvars load_files get_char ensure_loaded
+ current_input prolog_current_frame make_library_index term_variables
+ compound_name_arguments reexport autoload_path get_code set_input flag
+ thread_create use_module findall thread_join call_with_inference_limit
+ var_number dwim_match consult peek_code close nospy print_message
+ term_variables trie_property read_history get_byte default_module get_byte
+ print on_signal get_char call_residue_vars dwim_match atom_prefix unifiable
+ use_module numbervars load_files get_code open format_time
+ copy_predicate_clauses reexport leash current_output sub_string close
+ format_time atom_codes stamp_date_time require name open_shared_object open
+ atom_chars current_predicate format tmp_file_stream term_hash rational
+ source_file reset atom_concat atom_length current_prolog_flag rational
+ dwim_predicate date_time_stamp stream_property string_upper setlocale format
+ writeln current_module normalize_space writeq current_flag shell upcase_atom
+ qcompile char_code atomic_concat read string_lower write term_string
+ numbervars working_directory number_codes set_prolog_gc_thread downcase_atom
+ format_predicate number_string open_shared_object style_check char_type print
+ stream_position_data code_type write_canonical number_chars length
+ current_arithmetic_function atomic_list_concat del_attr read_string zip_unlock
+ open_resource string_length zip_lock see erase open_resource setof
+ atomic_list_concat current_format_predicate current_resource with_mutex
+ atomics_to_string term_hash absolute_file_name deterministic current_atom
+ thread_create collation_key get_attr variant_hash string_concat atom_number
+ put put_attr variant_sha1 thread_signal mutex_unlock tty_size current_key
+ mutex_create fill_buffer expand_file_search_path blob shell
+ register_iri_scheme skip fast_read divmod mutex_trylock thread_self put
+ mutex_property fast_write mutex_lock current_blob sub_atom_icasechk
+ mutex_destroy fast_term_serialized split_string set_stream_position recorda
+ telling setarg thread_exit zip_open_stream instance mutex_create statistics
+ append get_time zip_close_ tell atomics_to_string clause_property attvar
+ zip_clone seeing nth_integer_root_and_remainder recorda put_byte string_chars
+ spy recordz print_message_lines current_op put_char nl source_file
+ string_codes op setup_call_catcher_cleanup nb_linkarg recorded put_code
+ peek_byte apply module_property atom_string nb_setarg succ recordz
+ message_to_string close_shared_object peek_char between recorded visible plus
+ call_shared_object_function peek_code peek_byte set_prolog_stack float throw
+ at_end_of_stream get_string_code call_with_depth_limit random_property
+ flush_output peek_string open_xterm peek_char open_string string_code
+ set_random prolog_stack_property put_char unload_file nb_setval put_byte
+ current_signal put_code write_length string read_string text_to_string
+]]
+lex:add_rule('keyword', token(lexer.KEYWORD,
+ word_match(zero_arity_keywords[dialect]) +
+ word_match(one_plus_arity_keywords[dialect]) * #P('(')))
--- Strings.
-local sq_str = l.delimited_range("'", true)
-local dq_str = l.delimited_range('"', true)
-local string = token(l.STRING, sq_str + dq_str)
+-- BIFs.
+local bifs = {}
+bifs.iso = [[
+ -- eyeballed from GNU Prolog documentation
+ xor abs sign min max sqrt tan atan atan2 cos acos sin asin exp log float
+ ceiling floor round truncate float_fractional_part float_integer_part rem div
+ mod
+]]
+bifs.gprolog = bifs.iso .. [[
+ -- eyeballed from GNU Prolog documentation
+ inc dec lsb msb popcount gcd tanh atanh cosh acosh sinh asinh log10 rnd
+]]
+bifs.swipl = [[
+ -- Collected automatically via current_arithmetic_function/1 with some
+ -- cleanup.
+ abs acos acosh asinh atan atan atanh atan2 ceil ceiling copysign cos cosh
+ cputime div getbit e epsilon erf erfc eval exp float float_fractional_part
+ float_integer_part floor gcd inf integer lgamma log log10 lsb max min mod msb
+ nan pi popcount powm random random_float rational rationalize rdiv rem round
+ sign sin sinh sqrt tan tanh truncate xor
+]]
+lex:add_rule('bif', token(lexer.FUNCTION, word_match(bifs[dialect]) * #P('(')))
-- Numbers.
-local number = token(l.NUMBER, l.digit^1 * ('.' * l.digit^1)^-1)
+local decimal_group = S('+-')^-1 * (lexer.digit + '_')^1
+local binary_number = '0b' * (S('01') + '_')^1
+local character_code = '0\'' * S('\\')^-1 * lexer.graph
+local decimal_number = decimal_group * ('.' * decimal_group)^-1 *
+ ('e' * decimal_group)^-1
+local hexadecimal_number = '0x' * (lexer.xdigit + '_')^1
+local octal_number = '0o' * (S('01234567') + '_')^1
+lex:add_rule('number', token(lexer.NUMBER, character_code + binary_number +
+ hexadecimal_number + octal_number + decimal_number))
--- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- -- Directives, by manual scanning of SWI-Prolog source code
- 'abolish', 'arithmetic_function', 'at_halt', 'create_prolog_flag',
- 'discontiguous', 'dynamic', 'elif', 'else', 'endif', 'format_predicate', 'if',
- 'initialization', 'lazy_list_iterator', 'listing', 'load_extensions',
- 'meta_predicate', 'mode', 'module', 'module_transparent', 'multifile', 'op',
- 'persistent', 'pop_operators', 'pred', 'predicate_options',
- 'prolog_load_context', 'public', 'push_operators', 'record',
- 'redefine_system_predicate', 'reexport', 'set_prolog_flag', 'setting',
- 'thread_local', 'type', 'use_foreign_library', 'use_module', 'volatile',
-
- -- Built-in predicates, generated in SWI-Prolog via current_predictate/1.
- 'abolish', 'abort', 'absolute_file_name', 'access_file', 'acyclic_term',
- 'add_import_module', 'append', 'apply', 'arg', 'assert', 'asserta', 'assertz',
- 'at_end_of_stream', 'at_halt', 'atom', 'atom_chars', 'atom_codes',
- 'atom_concat', 'atomic', 'atomic_concat', 'atomic_list_concat',
- 'atomics_to_string', 'atom_length', 'atom_number', 'atom_prefix',
- 'atom_string', 'atom_to_term', 'attach_packs', 'attvar', 'autoload_path',
- 'bagof', 'between', 'b_getval', 'blob', 'break', 'b_set_dict', 'b_setval',
- 'byte_count', 'call', 'callable', 'call_cleanup', 'call_continuation',
- 'call_dcg', 'call_residue_vars', 'call_shared_object_function',
- 'call_with_depth_limit', 'call_with_inference_limit', 'cancel_halt', 'catch',
- 'character_count', 'char_code', 'char_conversion', 'char_type', 'clause',
- 'clause_property', 'close', 'close_shared_object', 'code_type',
- 'collation_key', 'compare', 'compile_aux_clauses', 'compile_predicates',
- 'compiling', 'compound', 'compound_name_arguments', 'compound_name_arity',
- 'consult', 'context_module', 'copy_predicate_clauses', 'copy_stream_data',
- 'copy_term', 'copy_term_nat', 'create_prolog_flag',
- 'current_arithmetic_function', 'current_atom', 'current_blob',
- 'current_char_conversion', 'current_engine', 'current_flag',
- 'current_format_predicate', 'current_functor', 'current_input', 'current_key',
- 'current_locale', 'current_module', 'current_op', 'current_output',
- 'current_predicate', 'current_prolog_flag', 'current_resource',
- 'current_signal', 'current_trie', 'cwd', 'cyclic_term', 'date_time_stamp',
- 'dcg_translate_rule', 'debugging', 'default_module', 'del_attr', 'del_attrs',
- 'del_dict', 'delete_directory', 'delete_file', 'delete_import_module',
- 'deterministic', 'dict_create', 'dict_pairs', 'directory_files', 'divmod',
- 'downcase_atom', 'duplicate_term', 'dwim_match', 'dwim_predicate',
- 'engine_create', 'engine_destroy', 'engine_fetch', 'engine_next',
- 'engine_next_reified', 'engine_post', 'engine_self', 'engine_yield',
- 'ensure_loaded', 'erase', 'exception', 'exists_directory', 'exists_file',
- 'expand_answer', 'expand_file_name', 'expand_file_search_path', 'expand_goal',
- 'expand_query', 'expand_term', 'export', 'extern_indirect', 'fail', 'false',
- 'fast_read', 'fast_term_serialized', 'fast_write', 'file_base_name',
- 'file_directory_name', 'file_name_extension', 'file_search_path',
- 'fill_buffer', 'findall', 'findnsols', 'flag', 'float', 'flush_output',
- 'forall', 'format', 'format_predicate', 'format_time', 'freeze', 'frozen',
- 'functor', 'garbage_collect', 'garbage_collect_atoms',
- 'garbage_collect_clauses', 'gc_file_search_cache', 'get0', 'get', 'get_attr',
- 'get_attrs', 'get_byte', 'get_char', 'get_code', 'get_dict', 'getenv',
- 'get_flag', 'get_single_char', 'get_string_code', 'get_time',
- 'goal_expansion', 'ground', 'halt', 'ignore', 'import', 'import_module',
- 'instance', 'integer', 'intern_indirect', 'is_absolute_file_name', 'is_dict',
- 'is_engine', 'is_list', 'is_stream', 'is_thread', 'keysort', 'known_licenses',
- 'leash', 'length', 'library_directory', 'license', 'line_count',
- 'line_position', 'load_files', 'locale_create', 'locale_destroy',
- 'locale_property', 'make_directory', 'make_library_index', 'memberchk',
- 'message_hook', 'message_property', 'message_queue_create',
- 'message_queue_destroy', 'message_queue_property', 'message_to_string',
- 'module', 'module_property', 'msort', 'mutex_create', 'mutex_destroy',
- 'mutex_lock', 'mutex_property', 'mutex_statistics', 'mutex_trylock',
- 'mutex_unlock', 'mutex_unlock_all', 'name', 'nb_current', 'nb_delete',
- 'nb_getval', 'nb_linkarg', 'nb_link_dict', 'nb_linkval', 'nb_setarg',
- 'nb_set_dict', 'nb_setval', 'nl', 'nonvar', 'noprofile', 'noprotocol',
- 'normalize_space', 'nospy', 'nospyall', 'not', 'notrace', 'nth_clause',
- 'nth_integer_root_and_remainder', 'number', 'number_chars', 'number_codes',
- 'number_string', 'numbervars', 'once', 'on_signal', 'op', 'open',
- 'open_null_stream', 'open_resource', 'open_shared_object', 'open_string',
- 'open_xterm', 'peek_byte', 'peek_char', 'peek_code', 'peek_string', 'phrase',
- 'plus', 'portray', 'predicate_option_mode', 'predicate_option_type',
- 'predicate_property', 'print', 'print_message', 'print_message_lines',
- 'print_toplevel_variables', 'profiler', 'prolog', 'prolog_choice_attribute',
- 'prolog_current_choice', 'prolog_current_frame', 'prolog_cut_to',
- 'prolog_debug', 'prolog_event_hook', 'prolog_file_type',
- 'prolog_frame_attribute', 'prolog_list_goal', 'prolog_load_context',
- 'prolog_load_file', 'prolog_nodebug', 'prolog_skip_frame',
- 'prolog_skip_level', 'prolog_stack_property', 'prolog_to_os_filename',
- 'prompt1', 'prompt', 'protocol', 'protocola', 'protocolling', 'put',
- 'put_attr', 'put_attrs', 'put_byte', 'put_char', 'put_code', 'put_dict',
- 'pwd', 'qcompile', 'random_property', 'rational', 'read', 'read_clause',
- 'read_history', 'read_link', 'read_pending_chars', 'read_pending_codes',
- 'read_string', 'read_term', 'read_term_from_atom', 'recorda', 'recorded',
- 'recordz', 'redefine_system_predicate', 'reexport', 'reload_library_index',
- 'rename_file', 'repeat', 'require', 'reset', 'reset_profiler',
- 'residual_goals', 'resource', 'retract', 'retractall', 'same_file',
- 'same_term', 'see', 'seeing', 'seek', 'seen', 'select_dict', 'setarg',
- 'set_end_of_stream', 'setenv', 'set_flag', 'set_input', 'set_locale',
- 'setlocale', 'set_module', 'setof', 'set_output', 'set_prolog_flag',
- 'set_prolog_IO', 'set_prolog_stack', 'set_random', 'set_stream',
- 'set_stream_position', 'setup_call_catcher_cleanup', 'setup_call_cleanup',
- 'shell', 'shift', 'size_file', 'skip', 'sleep', 'sort', 'source_file',
- 'source_file_property', 'source_location', 'split_string', 'spy',
- 'stamp_date_time', 'statistics', 'stream_pair', 'stream_position_data',
- 'stream_property', 'string', 'string_chars', 'string_code', 'string_codes',
- 'string_concat', 'string_length', 'string_lower', 'string_upper',
- 'strip_module', 'style_check', 'sub_atom', 'sub_atom_icasechk', 'sub_string',
- 'subsumes_term', 'succ', 'swiplrc', 'tab', 'tell', 'telling', 'term_attvars',
- 'term_expansion', 'term_hash', 'term_string', 'term_to_atom',
- 'term_variables', 'text_to_string', 'thread_at_exit', 'thread_create',
- 'thread_detach', 'thread_exit', 'thread_get_message', 'thread_join',
- 'thread_message_hook', 'thread_peek_message', 'thread_property',
- 'thread_self', 'thread_send_message', 'thread_setconcurrency',
- 'thread_signal', 'thread_statistics', 'throw', 'time_file', 'tmp_file',
- 'tmp_file_stream', 'told', 'trace', 'tracing', 'trie_destroy', 'trie_gen',
- 'trie_insert', 'trie_insert_new', 'trie_lookup', 'trie_new', 'trie_property',
- 'trie_term', 'trim_stacks', 'true', 'ttyflush', 'tty_get_capability',
- 'tty_goto', 'tty_put', 'tty_size', 'unifiable', 'unify_with_occurs_check',
- 'unload_file', 'unsetenv', 'upcase_atom', 'use_module', 'var', 'variant_hash',
- 'variant_sha1', 'var_number', 'var_property', 'verbose_expansion', 'version',
- 'visible', 'wait_for_input', 'wildcard_match', 'with_mutex', 'with_output_to',
- 'working_directory', 'write', 'write_canonical', 'write_length', 'writeln',
- 'writeq', 'write_term',
-
- -- Built-in functions, generated in SWI-Prolog via current_arithmetic_function/1.
- 'xor', 'rem', 'rdiv', 'mod', 'div', 'abs', 'acos', 'acosh', 'asin', 'asinh',
- 'atan2', 'atan', 'atanh', 'ceil', 'ceiling', 'copysign', 'cos', 'cosh',
- 'cputime', 'e', 'epsilon', 'erf', 'erfc', 'eval', 'exp', 'float',
- 'float_fractional_part', 'float_integer_part', 'floor', 'gcd', 'getbit',
- 'inf', 'integer', 'lgamma', 'log10', 'log', 'lsb', 'max', 'min', 'msb',
- 'nan', 'pi', 'popcount', 'powm', 'random', 'random_float', 'rational',
- 'rationalize', 'round', 'sign', 'sin', 'sinh', 'sqrt', 'tan', 'tanh',
- 'truncate',
-})
+-- Comments.
+local line_comment = lexer.to_eol('%')
+local block_comment = lexer.range('/*', '*/')
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
+
+-- Operators.
+local operators = {}
+operators.iso = [[
+ -- Collected automatically via current_op/3 with some cleanup and comparison
+ -- to docs.
+ rem div mod is
+]]
+operators.gprolog = operators.iso -- GNU Prolog's textual operators are the same
+operators.swipl = [[
+ -- Collected automatically via current_op/3 with some cleanup.
+ is as volatile mod discontiguous div rdiv meta_predicate public xor
+ module_transparent multifile table dynamic thread_initialization thread_local
+ initialization rem
+]]
+lex:add_rule('operator', token(lexer.OPERATOR, word_match(operators[dialect]) +
+ S('-!+\\|=:;&<>()[]{}/*^@?.')))
+
+-- Variables.
+lex:add_rule('variable', token(lexer.VARIABLE, (lexer.upper + '_') *
+ (lexer.word^1 + lexer.digit^1 + P('_')^1)^0))
-- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
--- Operators.
-local operator = token(l.OPERATOR, S('-!+\\|=:;&<>()[]{}'))
-
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'identifier', identifier},
- {'string', string},
- {'comment', comment},
- {'number', number},
- {'operator', operator},
-}
-
-return M
+-- Strings.
+local sq_str = lexer.range("'", true)
+local dq_str = lexer.range('"', true)
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
+
+return lex
diff --git a/lua/lexers/props.lua b/lua/lexers/props.lua
index 55e8777..a7b6723 100644
--- a/lua/lexers/props.lua
+++ b/lua/lexers/props.lua
@@ -1,47 +1,32 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- Props LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'props'}
+local lex = lexer.new('props', {lex_by_line = true})
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
+
+-- Colors.
+local xdigit = lexer.xdigit
+lex:add_rule('color', token('color', '#' * xdigit * xdigit * xdigit * xdigit * xdigit * xdigit))
+lex:add_style('color', lexer.styles.number)
-- Comments.
-local comment = token(l.COMMENT, '#' * l.nonnewline^0)
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#')))
-- Equals.
-local equals = token(l.OPERATOR, '=')
+lex:add_rule('equals', token(lexer.OPERATOR, '='))
-- Strings.
-local sq_str = l.delimited_range("'")
-local dq_str = l.delimited_range('"')
-local string = token(l.STRING, sq_str + dq_str)
+local sq_str = lexer.range("'")
+local dq_str = lexer.range('"')
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
-- Variables.
-local variable = token(l.VARIABLE, '$(' * (l.any - ')')^1 * ')')
+lex:add_rule('variable', token(lexer.VARIABLE, '$' * lexer.range('(', ')', true)))
--- Colors.
-local xdigit = l.xdigit
-local color = token('color', '#' * xdigit * xdigit * xdigit * xdigit * xdigit *
- xdigit)
-
-M._rules = {
- {'whitespace', ws},
- {'color', color},
- {'comment', comment},
- {'equals', equals},
- {'string', string},
- {'variable', variable},
-}
-
-M._tokenstyles = {
- color = l.STYLE_NUMBER
-}
-
-M._LEXBYLINE = true
-
-return M
+return lex
diff --git a/lua/lexers/protobuf.lua b/lua/lexers/protobuf.lua
index 41cba2a..df08d9f 100644
--- a/lua/lexers/protobuf.lua
+++ b/lua/lexers/protobuf.lua
@@ -1,58 +1,46 @@
--- Copyright 2016-2017 David B. Lamkins <david@lamkins.net>. See LICENSE.
+-- Copyright 2016-2022 David B. Lamkins <david@lamkins.net>. See LICENSE.
-- Protocol Buffer IDL LPeg lexer.
-- <https://developers.google.com/protocol-buffers/>
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'protobuf'}
+local lex = lexer.new('protobuf')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local line_comment = '//' * l.nonnewline_esc^0
-local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1
-local comment = token(l.COMMENT, line_comment + block_comment)
-
--- Strings.
-local sq_str = P('L')^-1 * l.delimited_range("'", true)
-local dq_str = P('L')^-1 * l.delimited_range('"', true)
-local string = token(l.STRING, sq_str + dq_str)
-
--- Numbers.
-local number = token(l.NUMBER, l.float + l.integer)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- 'contained', 'syntax', 'import', 'option', 'package', 'message', 'group',
- 'oneof', 'optional', 'required', 'repeated', 'default', 'extend',
- 'extensions', 'to', 'max', 'reserved', 'service', 'rpc', 'returns'
-})
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'contained', 'syntax', 'import', 'option', 'package', 'message', 'group', 'oneof', 'optional',
+ 'required', 'repeated', 'default', 'extend', 'extensions', 'to', 'max', 'reserved', 'service',
+ 'rpc', 'returns'
+}))
-- Types.
-local type = token(l.TYPE, word_match{
- 'int32', 'int64', 'uint32', 'uint64', 'sint32', 'sint64', 'fixed32',
- 'fixed64', 'sfixed32', 'sfixed64', 'float', 'double', 'bool', 'string',
- 'bytes', 'enum', 'true', 'false'
-})
+lex:add_rule('type', token(lexer.TYPE, word_match{
+ 'int32', 'int64', 'uint32', 'uint64', 'sint32', 'sint64', 'fixed32', 'fixed64', 'sfixed32',
+ 'sfixed64', 'float', 'double', 'bool', 'string', 'bytes', 'enum', 'true', 'false'
+}))
+
+-- Strings.
+local sq_str = P('L')^-1 * lexer.range("'", true)
+local dq_str = P('L')^-1 * lexer.range('"', true)
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
-- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
+
+-- Comments.
+local line_comment = lexer.to_eol('//', true)
+local block_comment = lexer.range('/*', '*/')
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
+
+-- Numbers.
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-- Operators.
-local operator = token(l.OPERATOR, S('<>=|;,.()[]{}'))
-
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'type', type},
- {'identifier', identifier},
- {'string', string},
- {'comment', comment},
- {'number', number},
- {'operator', operator},
-}
-
-return M
+lex:add_rule('operator', token(lexer.OPERATOR, S('<>=|;,.()[]{}')))
+
+return lex
diff --git a/lua/lexers/ps.lua b/lua/lexers/ps.lua
index 662e0d4..4161539 100644
--- a/lua/lexers/ps.lua
+++ b/lua/lexers/ps.lua
@@ -1,61 +1,47 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- Postscript LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'ps'}
+local lex = lexer.new('ps')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local comment = token(l.COMMENT, '%' * l.nonnewline^0)
-
--- Strings.
-local arrow_string = l.delimited_range('<>')
-local nested_string = l.delimited_range('()', false, false, true)
-local string = token(l.STRING, arrow_string + nested_string)
-
--- Numbers.
-local number = token(l.NUMBER, l.float + l.integer)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- 'pop', 'exch', 'dup', 'copy', 'roll', 'clear', 'count', 'mark', 'cleartomark',
- 'counttomark', 'exec', 'if', 'ifelse', 'for', 'repeat', 'loop', 'exit',
- 'stop', 'stopped', 'countexecstack', 'execstack', 'quit', 'start',
- 'true', 'false', 'NULL'
-})
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'pop', 'exch', 'dup', 'copy', 'roll', 'clear', 'count', 'mark', 'cleartomark', 'counttomark',
+ 'exec', 'if', 'ifelse', 'for', 'repeat', 'loop', 'exit', 'stop', 'stopped', 'countexecstack',
+ 'execstack', 'quit', 'start', 'true', 'false', 'NULL'
+}))
-- Functions.
-local func = token(l.FUNCTION, word_match{
- 'add', 'div', 'idiv', 'mod', 'mul', 'sub', 'abs', 'ned', 'ceiling', 'floor',
- 'round', 'truncate', 'sqrt', 'atan', 'cos', 'sin', 'exp', 'ln', 'log', 'rand',
- 'srand', 'rrand'
-})
+lex:add_rule('function', token(lexer.FUNCTION, word_match{
+ 'add', 'div', 'idiv', 'mod', 'mul', 'sub', 'abs', 'ned', 'ceiling', 'floor', 'round', 'truncate',
+ 'sqrt', 'atan', 'cos', 'sin', 'exp', 'ln', 'log', 'rand', 'srand', 'rrand'
+}))
-- Identifiers.
-local word = (l.alpha + '-') * (l.alnum + '-')^0
-local identifier = token(l.IDENTIFIER, word)
+local word = (lexer.alpha + '-') * (lexer.alnum + '-')^0
+lex:add_rule('identifier', token(lexer.IDENTIFIER, word))
--- Operators.
-local operator = token(l.OPERATOR, S('[]{}'))
+-- Strings.
+local arrow_string = lexer.range('<', '>')
+local nested_string = lexer.range('(', ')', false, false, true)
+lex:add_rule('string', token(lexer.STRING, arrow_string + nested_string))
+
+-- Comments.
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('%')))
+
+-- Numbers.
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-- Labels.
-local label = token(l.LABEL, '/' * word)
-
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'function', func},
- {'identifier', identifier},
- {'string', string},
- {'comment', comment},
- {'number', number},
- {'label', label},
- {'operator', operator},
-}
-
-return M
+lex:add_rule('label', token(lexer.LABEL, '/' * word))
+
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S('[]{}')))
+
+return lex
diff --git a/lua/lexers/pure.lua b/lua/lexers/pure.lua
index 56f002d..35e8a48 100644
--- a/lua/lexers/pure.lua
+++ b/lua/lexers/pure.lua
@@ -1,62 +1,48 @@
--- Copyright 2015-2017 David B. Lamkins <david@lamkins.net>. See LICENSE.
+-- Copyright 2015-2022 David B. Lamkins <david@lamkins.net>. See LICENSE.
-- pure LPeg lexer, see http://purelang.bitbucket.org/
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'pure'}
+local lex = lexer.new('pure')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
--- Comments.
-local line_comment = '//' * l.nonnewline^0
-local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1
-local comment = token(l.COMMENT, line_comment + block_comment)
+-- Keywords.
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'namespace', 'with', 'end', 'using', 'interface', 'extern', 'let', 'const', 'def', 'type',
+ 'public', 'private', 'nonfix', 'outfix', 'infix', 'infixl', 'infixr', 'prefix', 'postfix', 'if',
+ 'otherwise', 'when', 'case', 'of', 'then', 'else'
+}))
+
+-- Identifiers.
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Strings.
-local string = token(l.STRING, l.delimited_range('"', true))
+lex:add_rule('string', token(lexer.STRING, lexer.range('"', true)))
+
+-- Comments.
+local line_comment = lexer.to_eol('//')
+local block_comment = lexer.range('/*', '*/')
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-- Numbers.
local bin = '0' * S('Bb') * S('01')^1
-local hex = '0' * S('Xx') * (R('09') + R('af') + R('AF'))^1
-local dec = R('09')^1
+local hex = lexer.hex_num
+local dec = lexer.dec_num
local int = (bin + hex + dec) * P('L')^-1
-local rad = P('.') - P('..')
+local rad = P('.') - '..'
local exp = (S('Ee') * S('+-')^-1 * int)^-1
local flt = int * (rad * dec)^-1 * exp + int^-1 * rad * dec * exp
-local number = token(l.NUMBER, flt + int)
+lex:add_rule('number', token(lexer.NUMBER, flt + int))
--- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- 'namespace', 'with', 'end', 'using', 'interface', 'extern', 'let', 'const',
- 'def', 'type', 'public', 'private', 'nonfix', 'outfix', 'infix', 'infixl',
- 'infixr', 'prefix', 'postfix', 'if', 'otherwise', 'when', 'case', 'of',
- 'then', 'else'
-})
-
--- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+-- Pragmas.
+local hashbang = lexer.starts_line('#!') * (lexer.nonnewline - '//')^0
+lex:add_rule('pragma', token(lexer.PREPROCESSOR, hashbang))
-- Operators.
-local punct = S('+-/*%<>~!=^&|?~:;,.()[]{}@#$`\\\'')
-local dots = P('..')
-local operator = token(l.OPERATOR, dots + punct)
+lex:add_rule('operator', token(lexer.OPERATOR, '..' + S('+-/*%<>~!=^&|?~:;,.()[]{}@#$`\\\'')))
--- Pragmas.
-local hashbang = l.starts_line('#!') * (l.nonnewline - P('//'))^0
-local pragma = token(l.PREPROCESSOR, hashbang)
-
-M._rules = {
- {'whitespace', ws},
- {'comment', comment},
- {'pragma', pragma},
- {'keyword', keyword},
- {'number', number},
- {'operator', operator},
- {'identifier', identifier},
- {'string', string},
-}
-
-return M
+return lex
diff --git a/lua/lexers/python.lua b/lua/lexers/python.lua
index 63e2e82..901ccba 100644
--- a/lua/lexers/python.lua
+++ b/lua/lexers/python.lua
@@ -1,135 +1,106 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- Python LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'python'}
+local lex = lexer.new('python', {fold_by_indentation = true})
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
+local ws = token(lexer.WHITESPACE, lexer.space^1)
+lex:add_rule('whitespace', ws)
--- Comments.
-local comment = token(l.COMMENT, '#' * l.nonnewline_esc^0)
-
--- Strings.
-local sq_str = P('u')^-1 * l.delimited_range("'", true)
-local dq_str = P('U')^-1 * l.delimited_range('"', true)
-local triple_sq_str = "'''" * (l.any - "'''")^0 * P("'''")^-1
-local triple_dq_str = '"""' * (l.any - '"""')^0 * P('"""')^-1
--- TODO: raw_strs cannot end in single \.
-local raw_sq_str = P('u')^-1 * 'r' * l.delimited_range("'", false, true)
-local raw_dq_str = P('U')^-1 * 'R' * l.delimited_range('"', false, true)
-local string = token(l.STRING, triple_sq_str + triple_dq_str + sq_str + dq_str +
- raw_sq_str + raw_dq_str)
-
--- Numbers.
-local dec = l.digit^1 * S('Ll')^-1
-local bin = '0b' * S('01')^1 * ('_' * S('01')^1)^0
-local oct = '0' * R('07')^1 * S('Ll')^-1
-local integer = S('+-')^-1 * (bin + l.hex_num + oct + dec)
-local number = token(l.NUMBER, l.float + integer)
+-- Classes.
+lex:add_rule('classdef', token(lexer.KEYWORD, 'class') * ws * token(lexer.CLASS, lexer.word))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- 'async', 'await',
- 'and', 'as', 'assert', 'break', 'class', 'continue', 'def', 'del', 'elif',
- 'else', 'except', 'exec', 'finally', 'for', 'from', 'global', 'if', 'import',
- 'in', 'is', 'lambda', 'nonlocal', 'not', 'or', 'pass', 'print', 'raise',
- 'return', 'try', 'while', 'with', 'yield',
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'and', 'as', 'assert', 'async', 'await', 'break', 'continue', 'def', 'del', 'elif', 'else',
+ 'except', 'exec', 'finally', 'for', 'from', 'global', 'if', 'import', 'in', 'is', 'lambda',
+ 'nonlocal', 'not', 'or', 'pass', 'print', 'raise', 'return', 'try', 'while', 'with', 'yield',
-- Descriptors/attr access.
'__get__', '__set__', '__delete__', '__slots__',
-- Class.
- '__new__', '__init__', '__del__', '__repr__', '__str__', '__cmp__',
- '__index__', '__lt__', '__le__', '__gt__', '__ge__', '__eq__', '__ne__',
- '__hash__', '__nonzero__', '__getattr__', '__getattribute__', '__setattr__',
- '__delattr__', '__call__',
+ '__new__', '__init__', '__del__', '__repr__', '__str__', '__cmp__', '__index__', '__lt__',
+ '__le__', '__gt__', '__ge__', '__eq__', '__ne__', '__hash__', '__nonzero__', '__getattr__',
+ '__getattribute__', '__setattr__', '__delattr__', '__call__',
-- Operator.
- '__add__', '__sub__', '__mul__', '__div__', '__floordiv__', '__mod__',
- '__divmod__', '__pow__', '__and__', '__xor__', '__or__', '__lshift__',
- '__rshift__', '__nonzero__', '__neg__', '__pos__', '__abs__', '__invert__',
- '__iadd__', '__isub__', '__imul__', '__idiv__', '__ifloordiv__', '__imod__',
- '__ipow__', '__iand__', '__ixor__', '__ior__', '__ilshift__', '__irshift__',
+ '__add__', '__sub__', '__mul__', '__div__', '__floordiv__', '__mod__', '__divmod__', '__pow__',
+ '__and__', '__xor__', '__or__', '__lshift__', '__rshift__', '__nonzero__', '__neg__', '__pos__',
+ '__abs__', '__invert__', '__iadd__', '__isub__', '__imul__', '__idiv__', '__ifloordiv__',
+ '__imod__', '__ipow__', '__iand__', '__ixor__', '__ior__', '__ilshift__', '__irshift__',
-- Conversions.
- '__int__', '__long__', '__float__', '__complex__', '__oct__', '__hex__',
- '__coerce__',
+ '__int__', '__long__', '__float__', '__complex__', '__oct__', '__hex__', '__coerce__',
-- Containers.
- '__len__', '__getitem__', '__missing__', '__setitem__', '__delitem__',
- '__contains__', '__iter__', '__getslice__', '__setslice__', '__delslice__',
+ '__len__', '__getitem__', '__missing__', '__setitem__', '__delitem__', '__contains__', '__iter__',
+ '__getslice__', '__setslice__', '__delslice__',
-- Module and class attribs.
- '__doc__', '__name__', '__dict__', '__file__', '__path__', '__module__',
- '__bases__', '__class__', '__self__',
+ '__doc__', '__name__', '__dict__', '__file__', '__path__', '__module__', '__bases__', '__class__',
+ '__self__',
-- Stdlib/sys.
- '__builtin__', '__future__', '__main__', '__import__', '__stdin__',
- '__stdout__', '__stderr__',
+ '__builtin__', '__future__', '__main__', '__import__', '__stdin__', '__stdout__', '__stderr__',
-- Other.
'__debug__', '__doc__', '__import__', '__name__'
-})
+}))
-- Functions.
-local func = token(l.FUNCTION, word_match{
- 'abs', 'all', 'any', 'apply', 'basestring', 'bool', 'buffer', 'callable',
- 'chr', 'classmethod', 'cmp', 'coerce', 'compile', 'complex', 'copyright',
- 'credits', 'delattr', 'dict', 'dir', 'divmod', 'enumerate', 'eval',
- 'execfile', 'exit', 'file', 'filter', 'float', 'frozenset', 'getattr',
- 'globals', 'hasattr', 'hash', 'help', 'hex', 'id', 'input', 'int', 'intern',
- 'isinstance', 'issubclass', 'iter', 'len', 'license', 'list', 'locals',
- 'long', 'map', 'max', 'min', 'object', 'oct', 'open', 'ord', 'pow',
- 'property', 'quit', 'range', 'raw_input', 'reduce', 'reload', 'repr',
- 'reversed', 'round', 'set', 'setattr', 'slice', 'sorted', 'staticmethod',
- 'str', 'sum', 'super', 'tuple', 'type', 'unichr', 'unicode', 'vars', 'xrange',
- 'zip'
-})
+lex:add_rule('function', token(lexer.FUNCTION, word_match{
+ 'abs', 'all', 'any', 'apply', 'basestring', 'bool', 'buffer', 'callable', 'chr', 'classmethod',
+ 'cmp', 'coerce', 'compile', 'complex', 'copyright', 'credits', 'delattr', 'dict', 'dir', 'divmod',
+ 'enumerate', 'eval', 'execfile', 'exit', 'file', 'filter', 'float', 'frozenset', 'getattr',
+ 'globals', 'hasattr', 'hash', 'help', 'hex', 'id', 'input', 'int', 'intern', 'isinstance',
+ 'issubclass', 'iter', 'len', 'license', 'list', 'locals', 'long', 'map', 'max', 'min', 'object',
+ 'oct', 'open', 'ord', 'pow', 'property', 'quit', 'range', 'raw_input', 'reduce', 'reload', 'repr',
+ 'reversed', 'round', 'set', 'setattr', 'slice', 'sorted', 'staticmethod', 'str', 'sum', 'super',
+ 'tuple', 'type', 'unichr', 'unicode', 'vars', 'xrange', 'zip'
+}))
-- Constants.
-local constant = token(l.CONSTANT, word_match{
- 'ArithmeticError', 'AssertionError', 'AttributeError', 'BaseException',
- 'DeprecationWarning', 'EOFError', 'Ellipsis', 'EnvironmentError', 'Exception',
- 'False', 'FloatingPointError', 'FutureWarning', 'GeneratorExit', 'IOError',
- 'ImportError', 'ImportWarning', 'IndentationError', 'IndexError', 'KeyError',
- 'KeyboardInterrupt', 'LookupError', 'MemoryError', 'NameError', 'None',
- 'NotImplemented', 'NotImplementedError', 'OSError', 'OverflowError',
- 'PendingDeprecationWarning', 'ReferenceError', 'RuntimeError',
- 'RuntimeWarning', 'StandardError', 'StopIteration', 'SyntaxError',
- 'SyntaxWarning', 'SystemError', 'SystemExit', 'TabError', 'True', 'TypeError',
- 'UnboundLocalError', 'UnicodeDecodeError', 'UnicodeEncodeError',
- 'UnicodeError', 'UnicodeTranslateError', 'UnicodeWarning', 'UserWarning',
- 'ValueError', 'Warning', 'ZeroDivisionError'
-})
+lex:add_rule('constant', token(lexer.CONSTANT, word_match{
+ 'ArithmeticError', 'AssertionError', 'AttributeError', 'BaseException', 'DeprecationWarning',
+ 'EOFError', 'Ellipsis', 'EnvironmentError', 'Exception', 'False', 'FloatingPointError',
+ 'FutureWarning', 'GeneratorExit', 'IOError', 'ImportError', 'ImportWarning', 'IndentationError',
+ 'IndexError', 'KeyError', 'KeyboardInterrupt', 'LookupError', 'MemoryError', 'NameError', 'None',
+ 'NotImplemented', 'NotImplementedError', 'OSError', 'OverflowError', 'PendingDeprecationWarning',
+ 'ReferenceError', 'RuntimeError', 'RuntimeWarning', 'StandardError', 'StopIteration',
+ 'SyntaxError', 'SyntaxWarning', 'SystemError', 'SystemExit', 'TabError', 'True', 'TypeError',
+ 'UnboundLocalError', 'UnicodeDecodeError', 'UnicodeEncodeError', 'UnicodeError',
+ 'UnicodeTranslateError', 'UnicodeWarning', 'UserWarning', 'ValueError', 'Warning',
+ 'ZeroDivisionError'
+}))
-- Self.
-local self = token('self', P('self'))
+lex:add_rule('self', token('self', 'self'))
+lex:add_style('self', lexer.styles.type)
-- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
-
--- Operators.
-local operator = token(l.OPERATOR, S('!%^&*()[]{}-=+/|:;.,?<>~`'))
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
--- Decorators.
-local decorator = token('decorator', l.starts_line('@') * l.nonnewline^0)
+-- Comments.
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#', true)))
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'function', func},
- {'constant', constant},
- {'self', self},
- {'identifier', identifier},
- {'comment', comment},
- {'string', string},
- {'number', number},
- {'decorator', decorator},
- {'operator', operator},
-}
+-- Strings.
+local sq_str = P('u')^-1 * lexer.range("'", true)
+local dq_str = P('U')^-1 * lexer.range('"', true)
+local tq_str = lexer.range("'''") + lexer.range('"""')
+-- TODO: raw_strs cannot end in single \.
+local raw_sq_str = P('u')^-1 * 'r' * lexer.range("'", false, false)
+local raw_dq_str = P('U')^-1 * 'R' * lexer.range('"', false, false)
+lex:add_rule('string', token(lexer.STRING, tq_str + sq_str + dq_str + raw_sq_str + raw_dq_str))
+-- Numbers.
+local dec = lexer.dec_num * S('Ll')^-1
+local bin = '0b' * S('01')^1 * ('_' * S('01')^1)^0
+local oct = lexer.oct_num * S('Ll')^-1
+local integer = S('+-')^-1 * (bin + lexer.hex_num + oct + dec)
+lex:add_rule('number', token(lexer.NUMBER, lexer.float + integer))
-M._tokenstyles = {
- self = l.STYLE_TYPE,
- decorator = l.STYLE_PREPROCESSOR
-}
+-- Decorators.
+lex:add_rule('decorator', token('decorator', lexer.to_eol('@')))
+lex:add_style('decorator', lexer.styles.preprocessor)
-M._FOLDBYINDENTATION = true
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S('!%^&*()[]{}-=+/|:;.,?<>~`')))
-return M
+return lex
diff --git a/lua/lexers/rails.lua b/lua/lexers/rails.lua
index a36ed56..9f750c6 100644
--- a/lua/lexers/rails.lua
+++ b/lua/lexers/rails.lua
@@ -1,65 +1,43 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- Ruby on Rails LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
-local table = _G.table
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'rails'}
+local lex = lexer.new('rails', {inherit = lexer.load('ruby')})
-- Whitespace
-local ws = token(l.WHITESPACE, l.space^1)
+lex:modify_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Functions.
-
-local actionpack = token(l.FUNCTION, word_match{
- 'before_filter', 'skip_before_filter', 'skip_after_filter', 'after_filter',
- 'around_filter', 'filter', 'filter_parameter_logging', 'layout',
- 'require_dependency', 'render', 'render_action', 'render_text', 'render_file',
- 'render_template', 'render_nothing', 'render_component',
- 'render_without_layout', 'rescue_from', 'url_for', 'redirect_to',
- 'redirect_to_path', 'redirect_to_url', 'respond_to', 'helper',
- 'helper_method', 'model', 'service', 'observer', 'serialize', 'scaffold',
- 'verify', 'hide_action'
-})
-
-local view_helpers = token(l.FUNCTION, word_match{
- 'check_box', 'content_for', 'error_messages_for', 'form_for', 'fields_for',
- 'file_field', 'hidden_field', 'image_submit_tag', 'label', 'link_to',
- 'password_field', 'radio_button', 'submit', 'text_field', 'text_area'
-})
-
-local activerecord = token(l.FUNCTION, word_match{
- 'after_create', 'after_destroy', 'after_save', 'after_update',
- 'after_validation', 'after_validation_on_create',
- 'after_validation_on_update', 'before_create', 'before_destroy',
- 'before_save', 'before_update', 'before_validation',
- 'before_validation_on_create', 'before_validation_on_update', 'composed_of',
- 'belongs_to', 'has_one', 'has_many', 'has_and_belongs_to_many', 'validate',
- 'validates', 'validate_on_create', 'validates_numericality_of',
- 'validate_on_update', 'validates_acceptance_of', 'validates_associated',
- 'validates_confirmation_of', 'validates_each', 'validates_format_of',
+lex:modify_rule('function', token(lexer.FUNCTION, word_match{
+ -- ActionPack.
+ 'before_filter', 'skip_before_filter', 'skip_after_filter', 'after_filter', 'around_filter',
+ 'filter', 'filter_parameter_logging', 'layout', 'require_dependency', 'render', 'render_action',
+ 'render_text', 'render_file', 'render_template', 'render_nothing', 'render_component',
+ 'render_without_layout', 'rescue_from', 'url_for', 'redirect_to', 'redirect_to_path',
+ 'redirect_to_url', 'respond_to', 'helper', 'helper_method', 'model', 'service', 'observer',
+ 'serialize', 'scaffold', 'verify', 'hide_action',
+ -- View helpers.
+ 'check_box', 'content_for', 'error_messages_for', 'form_for', 'fields_for', 'file_field',
+ 'hidden_field', 'image_submit_tag', 'label', 'link_to', 'password_field', 'radio_button',
+ 'submit', 'text_field', 'text_area',
+ -- ActiveRecord.
+ 'after_create', 'after_destroy', 'after_save', 'after_update', 'after_validation',
+ 'after_validation_on_create', 'after_validation_on_update', 'before_create', 'before_destroy',
+ 'before_save', 'before_update', 'before_validation', 'before_validation_on_create',
+ 'before_validation_on_update', 'composed_of', 'belongs_to', 'has_one', 'has_many',
+ 'has_and_belongs_to_many', 'validate', 'validates', 'validate_on_create',
+ 'validates_numericality_of', 'validate_on_update', 'validates_acceptance_of',
+ 'validates_associated', 'validates_confirmation_of', 'validates_each', 'validates_format_of',
'validates_inclusion_of', 'validates_exclusion_of', 'validates_length_of',
- 'validates_presence_of', 'validates_size_of', 'validates_uniqueness_of',
- 'attr_protected', 'attr_accessible', 'attr_readonly',
- 'accepts_nested_attributes_for', 'default_scope', 'scope'
-})
-
-local active_support = token(l.FUNCTION, word_match{
- 'alias_method_chain', 'alias_attribute', 'delegate', 'cattr_accessor',
- 'mattr_accessor', 'returning', 'memoize'
-})
-
--- Extend Ruby lexer to include Rails methods.
-local ruby = l.load('ruby')
-local _rules = ruby._rules
-_rules[1] = {'whitespace', ws}
-table.insert(_rules, 3, {'actionpack', actionpack})
-table.insert(_rules, 4, {'view_helpers', view_helpers})
-table.insert(_rules, 5, {'activerecord', activerecord})
-table.insert(_rules, 6, {'active_support', active_support})
-M._rules = _rules
-M._foldsymbols = ruby._foldsymbols
-
-return M
+ 'validates_presence_of', 'validates_size_of', 'validates_uniqueness_of', --
+ 'attr_protected', 'attr_accessible', 'attr_readonly', 'accepts_nested_attributes_for',
+ 'default_scope', 'scope',
+ -- ActiveSupport.
+ 'alias_method_chain', 'alias_attribute', 'delegate', 'cattr_accessor', 'mattr_accessor',
+ 'returning', 'memoize'
+}) + lex:get_rule('function'))
+
+return lex
diff --git a/lua/lexers/rc.lua b/lua/lexers/rc.lua
index 3e61be8..7fb0ade 100644
--- a/lua/lexers/rc.lua
+++ b/lua/lexers/rc.lua
@@ -1,66 +1,51 @@
--- Copyright 2017 Michael Forney. See LICENSE.
+-- Copyright 2017-2022 Michael Forney. See LICENSE.
-- rc LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'rc'}
+local lex = lexer.new('rc')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
--- Comments.
-local comment = token(l.COMMENT, '#' * l.nonnewline^0)
+-- Keywords.
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'for', 'in', 'while', 'if', 'not', 'switch', 'case', 'fn', 'builtin', 'cd', 'eval', 'exec',
+ 'exit', 'flag', 'rfork', 'shift', 'ulimit', 'umask', 'wait', 'whatis', '.', '~'
+}))
+
+-- Identifiers.
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Strings.
-local str = l.delimited_range("'", false, true)
+local str = lexer.range("'", false, false)
local heredoc = '<<' * P(function(input, index)
- local s, e, _, delimiter =
- input:find('[ \t]*(["\']?)([%w!"%%+,-./:?@_~]+)%1', index)
+ local s, e, _, delimiter = input:find('[ \t]*(["\']?)([%w!"%%+,-./:?@_~]+)%1', index)
if s == index and delimiter then
delimiter = delimiter:gsub('[%%+-.?]', '%%%1')
- local _, e = input:find('[\n\r]'..delimiter..'[\n\r]', e)
+ e = select(2, input:find('[\n\r]' .. delimiter .. '[\n\r]', e))
return e and e + 1 or #input + 1
end
end)
-local string = token(l.STRING, str + heredoc)
-
--- Numbers.
-local number = token(l.NUMBER, l.integer + l.float)
+lex:add_rule('string', token(lexer.STRING, str + heredoc))
--- Keywords.
-local keyword = token(l.KEYWORD, word_match({
- 'for', 'in', 'while', 'if', 'not', 'switch', 'case', 'fn',
- 'builtin', 'cd', 'eval', 'exec', 'exit', 'flag', 'rfork', 'shift', 'ulimit',
- 'umask', 'wait', 'whatis', '.', '~',
-}, '!"%*+,-./:?@[]~'))
+-- Comments.
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#')))
--- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+-- Numbers.
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-- Variables.
-local variable = token(l.VARIABLE,
- '$' * S('"#')^-1 * ('*' + l.digit^1 + l.word))
+lex:add_rule('variable',
+ token(lexer.VARIABLE, '$' * S('"#')^-1 * ('*' + lexer.digit^1 + lexer.word)))
-- Operators.
-local operator = token(l.OPERATOR, S('@`=!<>*&^|;?()[]{}') + '\\\n')
-
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'identifier', identifier},
- {'string', string},
- {'comment', comment},
- {'number', number},
- {'variable', variable},
- {'operator', operator},
-}
+lex:add_rule('operator', token(lexer.OPERATOR, S('@`=!<>*&^|;?()[]{}') + '\\\n'))
-M._foldsymbols = {
- _patterns = {'[{}]', '#'},
- [l.OPERATOR] = {['{'] = 1, ['}'] = -1},
- [l.COMMENT] = {['#'] = l.fold_line_comments('#')}
-}
+-- Fold points.
+lex:add_fold_point(lexer.OPERATOR, '{', '}')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('#'))
-return M
+return lex
diff --git a/lua/lexers/reason.lua b/lua/lexers/reason.lua
index b67e340..4607c25 100644
--- a/lua/lexers/reason.lua
+++ b/lua/lexers/reason.lua
@@ -1,83 +1,65 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2018-2022 Hugo O. Rivera. See LICENSE.
-- Reason (https://reasonml.github.io/) LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'reason'}
+local lex = lexer.new('reason')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local comment = token(l.COMMENT, l.nested_pair('/*', '*/'))
-
--- Strings.
-local sq_str = l.delimited_range("'", true)
-local dq_str = l.delimited_range('"', true)
-local string = token(l.STRING, sq_str + dq_str)
-
--- Numbers.
-local number = token(l.NUMBER, l.float + l.integer)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- 'and', 'as', 'asr', 'begin', 'class', 'closed', 'constraint', 'do', 'done',
- 'downto', 'else', 'end', 'exception', 'external', 'failwith', 'false',
- 'flush', 'for', 'fun', 'function', 'functor', 'if', 'in', 'include',
- 'inherit', 'incr', 'land', 'let', 'load', 'los', 'lsl', 'lsr', 'lxor',
- 'method', 'mod', 'module', 'mutable', 'new', 'not', 'of', 'open', 'option',
- 'or', 'parser', 'private', 'ref', 'rec', 'raise', 'regexp', 'sig', 'struct',
- 'stdout', 'stdin', 'stderr', 'switch', 'then', 'to', 'true', 'try', 'type',
- 'val', 'virtual', 'when', 'while', 'with'
-})
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'and', 'as', 'asr', 'begin', 'class', 'closed', 'constraint', 'do', 'done', 'downto', 'else',
+ 'end', 'exception', 'external', 'failwith', 'false', 'flush', 'for', 'fun', 'function', 'functor',
+ 'if', 'in', 'include', 'inherit', 'incr', 'land', 'let', 'load', 'los', 'lsl', 'lsr', 'lxor',
+ 'method', 'mod', 'module', 'mutable', 'new', 'not', 'of', 'open', 'option', 'or', 'parser',
+ 'private', 'ref', 'rec', 'raise', 'regexp', 'sig', 'struct', 'stdout', 'stdin', 'stderr',
+ 'switch', 'then', 'to', 'true', 'try', 'type', 'val', 'virtual', 'when', 'while', 'with'
+}))
-- Types.
-local type = token(l.TYPE, word_match{
- 'int', 'float', 'bool', 'char', 'string', 'unit'
-})
+lex:add_rule('type', token(lexer.TYPE, word_match('int float bool char string unit')))
-- Functions.
-local func = token(l.FUNCTION, word_match{
- 'raise', 'invalid_arg', 'failwith', 'compare', 'min', 'max', 'succ', 'pred',
- 'mod', 'abs', 'max_int', 'min_int', 'sqrt', 'exp', 'log', 'log10', 'cos',
- 'sin', 'tan', 'acos', 'asin', 'atan', 'atan2', 'cosh', 'sinh', 'tanh', 'ceil',
- 'floor', 'abs_float', 'mod_float', 'frexp', 'ldexp', 'modf', 'float',
- 'float_of_int', 'truncate', 'int_of_float', 'infinity', 'nan', 'max_float',
- 'min_float', 'epsilon_float', 'classify_float', 'int_of_char', 'char_of_int',
- 'ignore', 'string_of_bool', 'bool_of_string', 'string_of_int',
- 'int_of_string', 'string_of_float', 'float_of_string', 'fst', 'snd', 'stdin',
- 'stdout', 'stderr', 'print_char', 'print_string', 'print_int', 'print_float',
- 'print_endline', 'print_newline', 'prerr_char', 'prerr_string', 'prerr_int',
- 'prerr_float', 'prerr_endline', 'prerr_newline', 'read_line', 'read_int',
- 'read_float', 'open_out', 'open_out_bin', 'open_out_gen', 'flush',
- 'flush_all', 'output_char', 'output_string', 'output', 'output_byte',
- 'output_binary_int', 'output_value', 'seek_out', 'pos_out',
- 'out_channel_length', 'close_out', 'close_out_noerr', 'set_binary_mode_out',
- 'open_in', 'open_in_bin', 'open_in_gen', 'input_char', 'input_line', 'input',
- 'really_input', 'input_byte', 'input_binary_int', 'input_value', 'seek_in',
- 'pos_in', 'in_channel_length', 'close_in', 'close_in_noerr',
- 'set_binary_mode_in', 'incr', 'decr', 'string_of_format', 'format_of_string',
+lex:add_rule('function', token(lexer.FUNCTION, word_match{
+ 'raise', 'invalid_arg', 'failwith', 'compare', 'min', 'max', 'succ', 'pred', 'mod', 'abs',
+ 'max_int', 'min_int', 'sqrt', 'exp', 'log', 'log10', 'cos', 'sin', 'tan', 'acos', 'asin', 'atan',
+ 'atan2', 'cosh', 'sinh', 'tanh', 'ceil', 'floor', 'abs_float', 'mod_float', 'frexp', 'ldexp',
+ 'modf', 'float', 'float_of_int', 'truncate', 'int_of_float', 'infinity', 'nan', 'max_float',
+ 'min_float', 'epsilon_float', 'classify_float', 'int_of_char', 'char_of_int', 'ignore',
+ 'string_of_bool', 'bool_of_string', 'string_of_int', 'int_of_string', 'string_of_float',
+ 'float_of_string', 'fst', 'snd', 'stdin', 'stdout', 'stderr', 'print_char', 'print_string',
+ 'print_int', 'print_float', 'print_endline', 'print_newline', 'prerr_char', 'prerr_string',
+ 'prerr_int', 'prerr_float', 'prerr_endline', 'prerr_newline', 'read_line', 'read_int',
+ 'read_float', 'open_out', 'open_out_bin', 'open_out_gen', 'flush', 'flush_all', 'output_char',
+ 'output_string', 'output', 'output_byte', 'output_binary_int', 'output_value', 'seek_out',
+ 'pos_out', 'out_channel_length', 'close_out', 'close_out_noerr', 'set_binary_mode_out', 'open_in',
+ 'open_in_bin', 'open_in_gen', 'input_char', 'input_line', 'input', 'really_input', 'input_byte',
+ 'input_binary_int', 'input_value', 'seek_in', 'pos_in', 'in_channel_length', 'close_in',
+ 'close_in_noerr', 'set_binary_mode_in', 'incr', 'decr', 'string_of_format', 'format_of_string',
'exit', 'at_exit'
-})
+}))
-- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
--- Operators.
-local operator = token(l.OPERATOR, S('=<>+-*/.,:;~!#%^&|?[](){}'))
+-- Strings.
+local sq_str = lexer.range("'", true)
+local dq_str = lexer.range('"', true)
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
+
+-- Comments.
+local line_comment = lexer.to_eol('//')
+local block_comment = lexer.range('/*', '*/', false, false, true)
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'type', type},
- {'function', func},
- {'identifier', identifier},
- {'string', string},
- {'comment', comment},
- {'number', number},
- {'operator', operator},
-}
+-- Numbers.
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
+
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S('=<>+-*/.,:;~!#%^&|?[](){}')))
-return M
+return lex
diff --git a/lua/lexers/rebol.lua b/lua/lexers/rebol.lua
index be817c5..4ece279 100644
--- a/lua/lexers/rebol.lua
+++ b/lua/lexers/rebol.lua
@@ -1,129 +1,102 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- Rebol LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'rebol'}
+local lex = lexer.new('rebol')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Comments.
-local line_comment = ';' * l.nonnewline^0;
-local block_comment = 'comment' * P(' ')^-1 *
- l.delimited_range('{}', false, true)
-local comment = token(l.COMMENT, line_comment + block_comment)
-
--- Strings.
-local sl_string = l.delimited_range('"', true)
-local ml_string = l.delimited_range('{}')
-local lit_string = "'" * l.word
-local string = token(l.STRING, sl_string + ml_string + lit_string)
+local line_comment = lexer.to_eol(';')
+local block_comment = 'comment' * P(' ')^-1 * lexer.range('{', '}')
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match({
- 'abs', 'absolute', 'add', 'and~', 'at', 'back', 'change', 'clear',
- 'complement', 'copy', 'cp', 'divide', 'fifth', 'find', 'first', 'fourth',
- 'head', 'insert', 'last', 'make', 'max', 'maximum', 'min', 'minimum',
- 'multiply', 'negate', 'next', 'or~', 'pick', 'poke', 'power', 'random',
- 'remainder', 'remove', 'second', 'select', 'skip', 'sort', 'subtract', 'tail',
- 'third', 'to', 'trim', 'xor~', 'alias', 'all', 'any', 'arccosine', 'arcsine',
- 'arctangent', 'bind', 'break', 'browse', 'call', 'caret-to-offset', 'catch',
- 'checksum', 'close', 'comment', 'compose', 'compress', 'cosine', 'debase',
- 'decompress', 'dehex', 'detab', 'dh-compute-key', 'dh-generate-key',
- 'dh-make-key', 'difference', 'disarm', 'do', 'dsa-generate-key',
- 'dsa-make-key', 'dsa-make-signature', 'dsa-verify-signature', 'either',
- 'else', 'enbase', 'entab', 'exclude', 'exit', 'exp', 'foreach', 'form',
- 'free', 'get', 'get-modes', 'halt', 'hide', 'if', 'in', 'intersect', 'load',
- 'log-10', 'log-2', 'log-e', 'loop', 'lowercase', 'maximum-of', 'minimum-of',
- 'mold', 'not', 'now', 'offset-to-caret', 'open', 'parse', 'prin', 'print',
- 'protect', 'q', 'query', 'quit', 'read', 'read-io', 'recycle', 'reduce',
- 'repeat', 'return', 'reverse', 'rsa-encrypt', 'rsa-generate-key',
- 'rsa-make-key', 'save', 'secure', 'set', 'set-modes', 'show', 'sine',
- 'size-text', 'square-root', 'tangent', 'textinfo', 'throw', 'to-hex',
- 'to-local-file', 'to-rebol-file', 'trace', 'try', 'union', 'unique',
- 'unprotect', 'unset', 'until', 'update', 'uppercase', 'use', 'wait', 'while',
- 'write', 'write-io', 'basic-syntax-header', 'crlf', 'font-fixed',
- 'font-sans-serif', 'font-serif', 'list-words', 'outstr', 'val', 'value',
- 'about', 'alert', 'alter', 'append', 'array', 'ask', 'boot-prefs',
- 'build-tag', 'center-face', 'change-dir', 'charset', 'choose', 'clean-path',
- 'clear-fields', 'confine', 'confirm', 'context', 'cvs-date', 'cvs-version',
- 'decode-cgi', 'decode-url', 'deflag-face', 'delete', 'demo', 'desktop',
- 'dirize', 'dispatch', 'do-boot', 'do-events', 'do-face', 'do-face-alt',
- 'does', 'dump-face', 'dump-pane', 'echo', 'editor', 'emailer', 'emit',
- 'extract', 'find-by-type', 'find-key-face', 'find-window', 'flag-face',
- 'flash', 'focus', 'for', 'forall', 'forever', 'forskip', 'func', 'function',
- 'get-net-info', 'get-style', 'has', 'help', 'hide-popup', 'import-email',
- 'inform', 'input', 'insert-event-func', 'join', 'launch', 'launch-thru',
- 'layout', 'license', 'list-dir', 'load-image', 'load-prefs', 'load-thru',
- 'make-dir', 'make-face', 'net-error', 'open-events', 'parse-email-addrs',
- 'parse-header', 'parse-header-date', 'parse-xml', 'path-thru', 'probe',
- 'protect-system', 'read-net', 'read-thru', 'reboot', 'reform', 'rejoin',
- 'remold', 'remove-event-func', 'rename', 'repend', 'replace', 'request',
- 'request-color', 'request-date', 'request-download', 'request-file',
- 'request-list', 'request-pass', 'request-text', 'resend', 'save-prefs',
- 'save-user', 'scroll-para', 'send', 'set-font', 'set-net', 'set-para',
- 'set-style', 'set-user', 'set-user-name', 'show-popup', 'source',
- 'split-path', 'stylize', 'switch', 'throw-on-error', 'to-binary',
- 'to-bitset', 'to-block', 'to-char', 'to-date', 'to-decimal', 'to-email',
- 'to-event', 'to-file', 'to-get-word', 'to-hash', 'to-idate', 'to-image',
- 'to-integer', 'to-issue', 'to-list', 'to-lit-path', 'to-lit-word', 'to-logic',
- 'to-money', 'to-none', 'to-pair', 'to-paren', 'to-path', 'to-refinement',
- 'to-set-path', 'to-set-word', 'to-string', 'to-tag', 'to-time', 'to-tuple',
- 'to-url', 'to-word', 'unfocus', 'uninstall', 'unview', 'upgrade', 'Usage',
- 'vbug', 'view', 'view-install', 'view-prefs', 'what', 'what-dir',
- 'write-user', 'return', 'at', 'space', 'pad', 'across', 'below', 'origin',
- 'guide', 'tabs', 'indent', 'style', 'styles', 'size', 'sense', 'backcolor',
- 'do', 'none',
- 'action?', 'any-block?', 'any-function?', 'any-string?', 'any-type?',
- 'any-word?', 'binary?', 'bitset?', 'block?', 'char?', 'datatype?', 'date?',
- 'decimal?', 'email?', 'empty?', 'equal?', 'error?', 'even?', 'event?',
- 'file?', 'function?', 'get-word?', 'greater-or-equal?', 'greater?', 'hash?',
- 'head?', 'image?', 'index?', 'integer?', 'issue?', 'length?',
- 'lesser-or-equal?', 'lesser?', 'library?', 'list?', 'lit-path?', 'lit-word?',
- 'logic?', 'money?', 'native?', 'negative?', 'none?', 'not-equal?', 'number?',
- 'object?', 'odd?', 'op?', 'pair?', 'paren?', 'path?', 'port?', 'positive?',
- 'refinement?', 'routine?', 'same?', 'series?', 'set-path?', 'set-word?',
- 'strict-equal?', 'strict-not-equal?', 'string?', 'struct?', 'tag?', 'tail?',
- 'time?', 'tuple?', 'unset?', 'url?', 'word?', 'zero?', 'connected?',
- 'crypt-strength?', 'exists-key?', 'input?', 'script?', 'type?', 'value?', '?',
- '??', 'dir?', 'exists-thru?', 'exists?', 'flag-face?', 'found?', 'in-window?',
- 'info?', 'inside?', 'link-app?', 'link?', 'modified?', 'offset?', 'outside?',
- 'screen-offset?', 'size?', 'span?', 'view?', 'viewed?', 'win-offset?',
- 'within?',
- 'action!', 'any-block!', 'any-function!', 'any-string!', 'any-type!',
- 'any-word!', 'binary!', 'bitset!', 'block!', 'char!', 'datatype!', 'date!',
- 'decimal!', 'email!', 'error!', 'event!', 'file!', 'function!', 'get-word!',
- 'hash!', 'image!', 'integer!', 'issue!', 'library!', 'list!', 'lit-path!',
- 'lit-word!', 'logic!', 'money!', 'native!', 'none!', 'number!', 'object!',
- 'op!', 'pair!', 'paren!', 'path!', 'port!', 'refinement!', 'routine!',
- 'series!', 'set-path!', 'set-word!', 'string!', 'struct!', 'symbol!', 'tag!',
- 'time!', 'tuple!', 'unset!', 'url!', 'word!',
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'abs', 'absolute', 'add', 'and~', 'at', 'back', 'change', 'clear', 'complement', 'copy', 'cp',
+ 'divide', 'fifth', 'find', 'first', 'fourth', 'head', 'insert', 'last', 'make', 'max', 'maximum',
+ 'min', 'minimum', 'multiply', 'negate', 'next', 'or~', 'pick', 'poke', 'power', 'random',
+ 'remainder', 'remove', 'second', 'select', 'skip', 'sort', 'subtract', 'tail', 'third', 'to',
+ 'trim', 'xor~', --
+ 'alias', 'all', 'any', 'arccosine', 'arcsine', 'arctangent', 'bind', 'break', 'browse', 'call',
+ 'caret-to-offset', 'catch', 'checksum', 'close', 'comment', 'compose', 'compress', 'cosine',
+ 'debase', 'decompress', 'dehex', 'detab', 'dh-compute-key', 'dh-generate-key', 'dh-make-key',
+ 'difference', 'disarm', 'do', 'dsa-generate-key', 'dsa-make-key', 'dsa-make-signature',
+ 'dsa-verify-signature', 'either', 'else', 'enbase', 'entab', 'exclude', 'exit', 'exp', 'foreach',
+ 'form', 'free', 'get', 'get-modes', 'halt', 'hide', 'if', 'in', 'intersect', 'load', 'log-10',
+ 'log-2', 'log-e', 'loop', 'lowercase', 'maximum-of', 'minimum-of', 'mold', 'not', 'now',
+ 'offset-to-caret', 'open', 'parse', 'prin', 'print', 'protect', 'q', 'query', 'quit', 'read',
+ 'read-io', 'recycle', 'reduce', 'repeat', 'return', 'reverse', 'rsa-encrypt', 'rsa-generate-key',
+ 'rsa-make-key', 'save', 'secure', 'set', 'set-modes', 'show', 'sine', 'size-text', 'square-root',
+ 'tangent', 'textinfo', 'throw', 'to-hex', 'to-local-file', 'to-rebol-file', 'trace', 'try',
+ 'union', 'unique', 'unprotect', 'unset', 'until', 'update', 'uppercase', 'use', 'wait', 'while',
+ 'write', 'write-io', --
+ 'basic-syntax-header', 'crlf', 'font-fixed', 'font-sans-serif', 'font-serif', 'list-words',
+ 'outstr', 'val', 'value', --
+ 'about', 'alert', 'alter', 'append', 'array', 'ask', 'boot-prefs', 'build-tag', 'center-face',
+ 'change-dir', 'charset', 'choose', 'clean-path', 'clear-fields', 'confine', 'confirm', 'context',
+ 'cvs-date', 'cvs-version', 'decode-cgi', 'decode-url', 'deflag-face', 'delete', 'demo', 'desktop',
+ 'dirize', 'dispatch', 'do-boot', 'do-events', 'do-face', 'do-face-alt', 'does', 'dump-face',
+ 'dump-pane', 'echo', 'editor', 'emailer', 'emit', 'extract', 'find-by-type', 'find-key-face',
+ 'find-window', 'flag-face', 'flash', 'focus', 'for', 'forall', 'forever', 'forskip', 'func',
+ 'function', 'get-net-info', 'get-style', 'has', 'help', 'hide-popup', 'import-email', 'inform',
+ 'input', 'insert-event-func', 'join', 'launch', 'launch-thru', 'layout', 'license', 'list-dir',
+ 'load-image', 'load-prefs', 'load-thru', 'make-dir', 'make-face', 'net-error', 'open-events',
+ 'parse-email-addrs', 'parse-header', 'parse-header-date', 'parse-xml', 'path-thru', 'probe',
+ 'protect-system', 'read-net', 'read-thru', 'reboot', 'reform', 'rejoin', 'remold',
+ 'remove-event-func', 'rename', 'repend', 'replace', 'request', 'request-color', 'request-date',
+ 'request-download', 'request-file', 'request-list', 'request-pass', 'request-text', 'resend',
+ 'save-prefs', 'save-user', 'scroll-para', 'send', 'set-font', 'set-net', 'set-para', 'set-style',
+ 'set-user', 'set-user-name', 'show-popup', 'source', 'split-path', 'stylize', 'switch',
+ 'throw-on-error', 'to-binary', 'to-bitset', 'to-block', 'to-char', 'to-date', 'to-decimal',
+ 'to-email', 'to-event', 'to-file', 'to-get-word', 'to-hash', 'to-idate', 'to-image', 'to-integer',
+ 'to-issue', 'to-list', 'to-lit-path', 'to-lit-word', 'to-logic', 'to-money', 'to-none', 'to-pair',
+ 'to-paren', 'to-path', 'to-refinement', 'to-set-path', 'to-set-word', 'to-string', 'to-tag',
+ 'to-time', 'to-tuple', 'to-url', 'to-word', 'unfocus', 'uninstall', 'unview', 'upgrade', 'Usage',
+ 'vbug', 'view', 'view-install', 'view-prefs', 'what', 'what-dir', 'write-user', 'return', 'at',
+ 'space', 'pad', 'across', 'below', 'origin', 'guide', 'tabs', 'indent', 'style', 'styles', 'size',
+ 'sense', 'backcolor', 'do', 'none', --
+ 'action?', 'any-block?', 'any-function?', 'any-string?', 'any-type?', 'any-word?', 'binary?',
+ 'bitset?', 'block?', 'char?', 'datatype?', 'date?', 'decimal?', 'email?', 'empty?', 'equal?',
+ 'error?', 'even?', 'event?', 'file?', 'function?', 'get-word?', 'greater-or-equal?', 'greater?',
+ 'hash?', 'head?', 'image?', 'index?', 'integer?', 'issue?', 'length?', 'lesser-or-equal?',
+ 'lesser?', 'library?', 'list?', 'lit-path?', 'lit-word?', 'logic?', 'money?', 'native?',
+ 'negative?', 'none?', 'not-equal?', 'number?', 'object?', 'odd?', 'op?', 'pair?', 'paren?',
+ 'path?', 'port?', 'positive?', 'refinement?', 'routine?', 'same?', 'series?', 'set-path?',
+ 'set-word?', 'strict-equal?', 'strict-not-equal?', 'string?', 'struct?', 'tag?', 'tail?', 'time?',
+ 'tuple?', 'unset?', 'url?', 'word?', 'zero?', 'connected?', 'crypt-strength?', 'exists-key?',
+ 'input?', 'script?', 'type?', 'value?', '?', '??', 'dir?', 'exists-thru?', 'exists?',
+ 'flag-face?', 'found?', 'in-window?', 'info?', 'inside?', 'link-app?', 'link?', 'modified?',
+ 'offset?', 'outside?', 'screen-offset?', 'size?', 'span?', 'view?', 'viewed?', 'win-offset?',
+ 'within?', 'action!', 'any-block!', 'any-function!', 'any-string!', 'any-type!', 'any-word!',
+ 'binary!', 'bitset!', 'block!', 'char!', 'datatype!', 'date!', 'decimal!', 'email!', 'error!',
+ 'event!', 'file!', 'function!', 'get-word!', 'hash!', 'image!', 'integer!', 'issue!', 'library!',
+ 'list!', 'lit-path!', 'lit-word!', 'logic!', 'money!', 'native!', 'none!', 'number!', 'object!',
+ 'op!', 'pair!', 'paren!', 'path!', 'port!', 'refinement!', 'routine!', 'series!', 'set-path!',
+ 'set-word!', 'string!', 'struct!', 'symbol!', 'tag!', 'time!', 'tuple!', 'unset!', 'url!',
+ 'word!', --
'true', 'false', 'self'
-}, '~-?!'))
+}))
-- Identifiers.
-local word = (l.alpha + '-') * (l.alnum + '-')^0
-local identifier = token(l.IDENTIFIER, word)
+lex:add_rule('identifier', token(lexer.IDENTIFIER, (lexer.alpha + '-') * (lexer.alnum + '-')^0))
--- Operators.
-local operator = token(l.OPERATOR, S('=<>+/*:()[]'))
+-- Strings.
+local dq_str = lexer.range('"', true)
+local br_str = lexer.range('{', '}', false, false, true)
+local word_str = "'" * lexer.word
+lex:add_rule('string', token(lexer.STRING, dq_str + br_str + word_str))
-M._rules = {
- {'whitespace', ws},
- {'comment', comment},
- {'keyword', keyword},
- {'identifier', identifier},
- {'string', string},
- {'operator', operator},
-}
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S('=<>+/*:()[]')))
-M._foldsymbols = {
- _patterns = {'[%[%]{}]', ';'},
- [l.COMMENT] = {['{'] = 1, ['}'] = -1, [';'] = l.fold_line_comments(';')},
- [l.OPERATOR] = {['['] = 1, [']'] = -1, ['{'] = 1, ['}'] = -1}
-}
+-- Fold points.
+lex:add_fold_point(lexer.COMMENT, '{', '}')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines(';'))
+lex:add_fold_point(lexer.OPERATOR, '[', ']')
-return M
+return lex
diff --git a/lua/lexers/rest.lua b/lua/lexers/rest.lua
index be5b839..e7bf467 100644
--- a/lua/lexers/rest.lua
+++ b/lua/lexers/rest.lua
@@ -1,9 +1,9 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- reStructuredText LPeg lexer.
local l = require('lexer')
local token, word_match, starts_line = l.token, l.word_match, l.starts_line
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local P, S = lpeg.P, lpeg.S
local M = {_NAME = 'rest'}
@@ -15,11 +15,11 @@ local any_indent = S(' \t')^0
local adornment_chars = lpeg.C(S('!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'))
local adornment = lpeg.C(adornment_chars^2 * any_indent) * (l.newline + -1)
local overline = lpeg.Cmt(starts_line(adornment), function(input, index, adm, c)
- if not adm:find('^%'..c..'+%s*$') then return nil end
+ if not adm:find('^%' .. c .. '+%s*$') then return nil end
local rest = input:sub(index)
local lines = 1
for line, e in rest:gmatch('([^\r\n]+)()') do
- if lines > 1 and line:match('^(%'..c..'+)%s*$') == adm then
+ if lines > 1 and line:match('^(%' .. c .. '+)%s*$') == adm then
return index + e - 1
end
if lines > 3 or #line > #adm then return nil end
@@ -28,7 +28,7 @@ local overline = lpeg.Cmt(starts_line(adornment), function(input, index, adm, c)
return #input + 1
end)
local underline = lpeg.Cmt(starts_line(adornment), function(_, index, adm, c)
- local pos = adm:match('^%'..c..'+%s*()$')
+ local pos = adm:match('^%' .. c .. '+%s*()$')
return pos and index - #adm + pos - 1 or nil
end)
-- Token needs to be a predefined one in order for folder to work.
@@ -37,16 +37,15 @@ local title = token(l.CONSTANT, overline + underline)
-- Lists.
local bullet_list = S('*+-') -- TODO: '•‣⁃', as lpeg does not support UTF-8
local enum_list = P('(')^-1 *
- (l.digit^1 + S('ivxlcmIVXLCM')^1 + l.alnum + '#') * S('.)')
+ (l.digit^1 + S('ivxlcmIVXLCM')^1 + l.alnum + '#') * S('.)')
local field_list = ':' * (l.any - ':')^1 * P(':')^-1
local option_word = l.alnum * (l.alnum + '-')^0
local option = S('-/') * option_word * (' ' * option_word)^-1 +
- '--' * option_word * ('=' * option_word)^-1
+ '--' * option_word * ('=' * option_word)^-1
local option_list = option * (',' * l.space^1 * option)^-1
local list = #(l.space^0 * (S('*+-:/') + enum_list)) *
- starts_line(token('list', l.space^0 * (option_list + bullet_list +
- enum_list + field_list) *
- l.space))
+ starts_line(token('list', l.space^0 *
+ (option_list + bullet_list + enum_list + field_list) * l.space))
-- Literal block.
local block = P('::') * (l.newline + -1) * function(input, index)
@@ -55,7 +54,7 @@ local block = P('::') * (l.newline + -1) * function(input, index)
for pos, indent, line in rest:gmatch('()[ \t]*()([^\r\n]+)') do
local no_indent = (indent - pos < level and line ~= ' ' or level == 0)
local quoted = no_indent and line:find(quote or '^%s*%W')
- if quoted and not quote then quote = '^%s*%'..line:match('^%s*(%W)') end
+ if quoted and not quote then quote = '^%s*%' .. line:match('^%s*(%W)') end
if no_indent and not quoted and pos > 1 then return index + pos - 1 end
end
return #input + 1
@@ -74,8 +73,7 @@ local footnote = token('footnote_block', prefix * footnote_label * l.space)
local citation_label = '[' * word * ']'
local citation = token('citation_block', prefix * citation_label * l.space)
local link = token('link_block', prefix * '_' *
- (l.delimited_range('`') + (P('\\') * 1 +
- l.nonnewline - ':')^1) * ':' * l.space)
+ (l.range('`') + (P('\\') * 1 + l.nonnewline - ':')^1) * ':' * l.space)
local markup_block = #prefix * starts_line(footnote + citation + link)
-- Directives.
@@ -102,8 +100,8 @@ local directive_type = word_match({
'include', 'raw', 'class', 'role', 'default-role', 'title',
'restructuredtext-test-directive',
}, '-')
-local known_directive = token('directive',
- prefix * directive_type * '::' * l.space)
+local known_directive = token('directive', prefix * directive_type * '::' *
+ l.space)
local sphinx_directive_type = word_match({
-- The TOC tree.
'toctree',
@@ -115,12 +113,12 @@ local sphinx_directive_type = word_match({
-- Miscellaneous
'sectionauthor', 'index', 'only', 'tabularcolumns'
}, '-')
-local sphinx_directive = token('sphinx_directive',
- prefix * sphinx_directive_type * '::' * l.space)
-local unknown_directive = token('unknown_directive',
- prefix * word * '::' * l.space)
+local sphinx_directive = token('sphinx_directive', prefix *
+ sphinx_directive_type * '::' * l.space)
+local unknown_directive = token('unknown_directive', prefix * word * '::' *
+ l.space)
local directive = #prefix * starts_line(known_directive + sphinx_directive +
- unknown_directive)
+ unknown_directive)
-- Sphinx code block.
local indented_block = function(input, index)
@@ -134,42 +132,37 @@ local indented_block = function(input, index)
return #input + 1
end
local code_block = prefix * 'code-block::' * S(' \t')^1 * l.nonnewline^0 *
- (l.newline + -1) * indented_block
+ (l.newline + -1) * indented_block
local sphinx_block = #prefix * token('code_block', starts_line(code_block))
-- Substitution definitions.
-local substitution = #prefix *
- token('substitution',
- starts_line(prefix * l.delimited_range('|') *
- l.space^1 * word * '::' * l.space))
+local substitution = #prefix * token('substitution',
+ starts_line(prefix * l.range('|') * l.space^1 * word * '::' * l.space))
-- Comments.
-local line_comment = prefix * l.nonnewline^0
+local line_comment = l.to_eol(prefix)
local bprefix = any_indent * '..'
local block_comment = bprefix * l.newline * indented_block
-local comment = #bprefix *
- token(l.COMMENT, starts_line(line_comment + block_comment))
+local comment = #bprefix * token(l.COMMENT, starts_line(line_comment +
+ block_comment))
-- Inline markup.
-local em = token('em', l.delimited_range('*'))
-local strong = token('strong', ('**' * (l.any - '**')^0 * P('**')^-1))
+local em = token('em', l.range('*'))
+local strong = token('strong', l.range('**', '**'))
local role = token('role', ':' * word * ':' * (word * ':')^-1)
-local interpreted = role^-1 * token('interpreted', l.delimited_range('`')) *
- role^-1
-local inline_literal = token('inline_literal',
- '``' * (l.any - '``')^0 * P('``')^-1)
-local link_ref = token('link',
- (word + l.delimited_range('`')) * '_' * P('_')^-1 +
- '_' * l.delimited_range('`'))
+local interpreted = role^-1 * token('interpreted', l.range('`')) * role^-1
+local inline_literal = token('inline_literal', l.range('``', '``'))
+local postfix_link = (word + l.range('`')) * '_' * P('_')^-1
+local prefix_link = '_' * l.range('`')
+local link_ref = token('link', postfix_link + prefix_link)
local footnote_ref = token('footnote', footnote_label * '_')
local citation_ref = token('citation', citation_label * '_')
-local substitution_ref = token('substitution', l.delimited_range('|', true) *
- ('_' * P('_')^-1)^-1)
+local substitution_ref = token('substitution', l.range('|', true) *
+ ('_' * P('_')^-1)^-1)
local link = token('link', l.alpha * (l.alnum + S('-.'))^1 * ':' *
- (l.alnum + S('/.+-%@'))^1)
+ (l.alnum + S('/.+-%@'))^1)
local inline_markup = (strong + em + inline_literal + link_ref + interpreted +
- footnote_ref + citation_ref + substitution_ref + link) *
- -l.alnum
+ footnote_ref + citation_ref + substitution_ref + link) * -l.alnum
-- Other.
local non_space = token(l.DEFAULT, l.alnum * (l.any - l.space)^0)
@@ -193,14 +186,14 @@ M._rules = {
M._tokenstyles = {
list = l.STYLE_TYPE,
- literal_block = l.STYLE_EMBEDDED..',eolfilled',
+ literal_block = l.STYLE_EMBEDDED .. ',eolfilled',
footnote_block = l.STYLE_LABEL,
citation_block = l.STYLE_LABEL,
link_block = l.STYLE_LABEL,
directive = l.STYLE_KEYWORD,
- sphinx_directive = l.STYLE_KEYWORD..',bold',
- unknown_directive = l.STYLE_KEYWORD..',italics',
- code_block = l.STYLE_EMBEDDED..',eolfilled',
+ sphinx_directive = l.STYLE_KEYWORD .. ',bold',
+ unknown_directive = l.STYLE_KEYWORD .. ',italics',
+ code_block = l.STYLE_EMBEDDED .. ',eolfilled',
substitution = l.STYLE_VARIABLE,
strong = 'bold',
em = 'italics',
@@ -219,7 +212,7 @@ local sphinx_levels = {
-- Section-based folding.
M._fold = function(text, start_pos, start_line, start_level)
local folds, line_starts = {}, {}
- for pos in (text..'\n'):gmatch('().-\r?\n') do
+ for pos in (text .. '\n'):gmatch('().-\r?\n') do
line_starts[#line_starts + 1] = pos
end
local style_at, CONSTANT, level = l.style_at, l.CONSTANT, start_level
@@ -231,7 +224,7 @@ M._fold = function(text, start_pos, start_line, start_level)
local c = text:sub(pos, pos)
local line_num = start_line + i - 1
folds[line_num] = level
- if style_at[start_pos + pos] == CONSTANT and c:find('^[^%w%s]') then
+ if style_at[start_pos + pos - 1] == CONSTANT and c:find('^[^%w%s]') then
local sphinx_level = FOLD_BASE + (sphinx_levels[c] or #sphinx_levels)
level = not sphinx and level - 1 or sphinx_level
if level < FOLD_BASE then level = FOLD_BASE end
@@ -249,11 +242,11 @@ l.property['fold.by.sphinx.convention'] = '0'
--[[ Embedded languages.
local bash = l.load('bash')
local bash_indent_level
-local start_rule = #(prefix * 'code-block' * '::' * l.space^1 * 'bash' *
- (l.newline + -1)) * sphinx_directive *
- token('bash_begin', P(function(input, index)
- bash_indent_level = #input:match('^([ \t]*)', index)
- return index
- end))]]
+local start_rule =
+ #(prefix * 'code-block' * '::' * l.space^1 * 'bash' * (l.newline + -1)) *
+ sphinx_directive * token('bash_begin', P(function(input, index)
+ bash_indent_level = #input:match('^([ \t]*)', index)
+ return index
+ end))]]
return M
diff --git a/lua/lexers/rexx.lua b/lua/lexers/rexx.lua
index ed11b42..6766789 100644
--- a/lua/lexers/rexx.lua
+++ b/lua/lexers/rexx.lua
@@ -1,97 +1,77 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- Rexx LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'rexx'}
+local lex = lexer.new('rexx')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local line_comment = '--' * l.nonnewline_esc^0
-local block_comment = l.nested_pair('/*', '*/')
-local comment = token(l.COMMENT, line_comment + block_comment)
-
--- Strings.
-local sq_str = l.delimited_range("'", true, true)
-local dq_str = l.delimited_range('"', true, true)
-local string = token(l.STRING, sq_str + dq_str)
-
--- Numbers.
-local number = token(l.NUMBER, l.float + l.integer)
-
--- Preprocessor.
-local preproc = token(l.PREPROCESSOR, l.starts_line('#') * l.nonnewline^0)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match({
- 'address', 'arg', 'by', 'call', 'class', 'do', 'drop', 'else', 'end', 'exit',
- 'expose', 'forever', 'forward', 'guard', 'if', 'interpret', 'iterate',
- 'leave', 'method', 'nop', 'numeric', 'otherwise', 'parse', 'procedure',
- 'pull', 'push', 'queue', 'raise', 'reply', 'requires', 'return', 'routine',
- 'result', 'rc', 'say', 'select', 'self', 'sigl', 'signal', 'super', 'then',
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match({
+ 'address', 'arg', 'by', 'call', 'class', 'do', 'drop', 'else', 'end', 'exit', 'expose', 'forever',
+ 'forward', 'guard', 'if', 'interpret', 'iterate', 'leave', 'method', 'nop', 'numeric',
+ 'otherwise', 'parse', 'procedure', 'pull', 'push', 'queue', 'raise', 'reply', 'requires',
+ 'return', 'routine', 'result', 'rc', 'say', 'select', 'self', 'sigl', 'signal', 'super', 'then',
'to', 'trace', 'use', 'when', 'while', 'until'
-}, nil, true))
+}, true)))
-- Functions.
-local func = token(l.FUNCTION, word_match({
- 'abbrev', 'abs', 'address', 'arg', 'beep', 'bitand', 'bitor', 'bitxor', 'b2x',
- 'center', 'changestr', 'charin', 'charout', 'chars', 'compare', 'consition',
- 'copies', 'countstr', 'c2d', 'c2x', 'datatype', 'date', 'delstr', 'delword',
- 'digits', 'directory', 'd2c', 'd2x', 'errortext', 'filespec', 'form',
- 'format', 'fuzz', 'insert', 'lastpos', 'left', 'length', 'linein', 'lineout',
- 'lines', 'max', 'min', 'overlay', 'pos', 'queued', 'random', 'reverse',
- 'right', 'sign', 'sourceline', 'space', 'stream', 'strip', 'substr',
- 'subword', 'symbol', 'time', 'trace', 'translate', 'trunc', 'value', 'var',
- 'verify', 'word', 'wordindex', 'wordlength', 'wordpos', 'words', 'xrange',
- 'x2b', 'x2c', 'x2d', 'rxfuncadd', 'rxfuncdrop', 'rxfuncquery', 'rxmessagebox',
- 'rxwinexec', 'sysaddrexxmacro', 'sysbootdrive', 'sysclearrexxmacrospace',
- 'syscloseeventsem', 'sysclosemutexsem', 'syscls', 'syscreateeventsem',
- 'syscreatemutexsem', 'syscurpos', 'syscurstate', 'sysdriveinfo',
- 'sysdrivemap', 'sysdropfuncs', 'sysdroprexxmacro', 'sysdumpvariables',
- 'sysfiledelete', 'sysfilesearch', 'sysfilesystemtype', 'sysfiletree',
- 'sysfromunicode', 'systounicode', 'sysgeterrortext', 'sysgetfiledatetime',
- 'sysgetkey', 'sysini', 'sysloadfuncs', 'sysloadrexxmacrospace', 'sysmkdir',
- 'sysopeneventsem', 'sysopenmutexsem', 'sysposteventsem', 'syspulseeventsem',
- 'sysqueryprocess', 'sysqueryrexxmacro', 'sysreleasemutexsem',
+lex:add_rule('function', token(lexer.FUNCTION, word_match({
+ 'abbrev', 'abs', 'address', 'arg', 'beep', 'bitand', 'bitor', 'bitxor', 'b2x', 'center',
+ 'changestr', 'charin', 'charout', 'chars', 'compare', 'consition', 'copies', 'countstr', 'c2d',
+ 'c2x', 'datatype', 'date', 'delstr', 'delword', 'digits', 'directory', 'd2c', 'd2x', 'errortext',
+ 'filespec', 'form', 'format', 'fuzz', 'insert', 'lastpos', 'left', 'length', 'linein', 'lineout',
+ 'lines', 'max', 'min', 'overlay', 'pos', 'queued', 'random', 'reverse', 'right', 'sign',
+ 'sourceline', 'space', 'stream', 'strip', 'substr', 'subword', 'symbol', 'time', 'trace',
+ 'translate', 'trunc', 'value', 'var', 'verify', 'word', 'wordindex', 'wordlength', 'wordpos',
+ 'words', 'xrange', 'x2b', 'x2c', 'x2d', --
+ 'rxfuncadd', 'rxfuncdrop', 'rxfuncquery', 'rxmessagebox', 'rxwinexec', 'sysaddrexxmacro',
+ 'sysbootdrive', 'sysclearrexxmacrospace', 'syscloseeventsem', 'sysclosemutexsem', 'syscls',
+ 'syscreateeventsem', 'syscreatemutexsem', 'syscurpos', 'syscurstate', 'sysdriveinfo',
+ 'sysdrivemap', 'sysdropfuncs', 'sysdroprexxmacro', 'sysdumpvariables', 'sysfiledelete',
+ 'sysfilesearch', 'sysfilesystemtype', 'sysfiletree', 'sysfromunicode', 'systounicode',
+ 'sysgeterrortext', 'sysgetfiledatetime', 'sysgetkey', 'sysini', 'sysloadfuncs',
+ 'sysloadrexxmacrospace', 'sysmkdir', 'sysopeneventsem', 'sysopenmutexsem', 'sysposteventsem',
+ 'syspulseeventsem', 'sysqueryprocess', 'sysqueryrexxmacro', 'sysreleasemutexsem',
'sysreorderrexxmacro', 'sysrequestmutexsem', 'sysreseteventsem', 'sysrmdir',
- 'syssaverexxmacrospace', 'syssearchpath', 'syssetfiledatetime',
- 'syssetpriority', 'syssleep', 'sysstemcopy', 'sysstemdelete', 'syssteminsert',
- 'sysstemsort', 'sysswitchsession', 'syssystemdirectory', 'systempfilename',
- 'systextscreenread', 'systextscreensize', 'sysutilversion', 'sysversion',
- 'sysvolumelabel', 'syswaiteventsem', 'syswaitnamedpipe', 'syswindecryptfile',
- 'syswinencryptfile', 'syswinver'
-}, '2', true))
+ 'syssaverexxmacrospace', 'syssearchpath', 'syssetfiledatetime', 'syssetpriority', 'syssleep',
+ 'sysstemcopy', 'sysstemdelete', 'syssteminsert', 'sysstemsort', 'sysswitchsession',
+ 'syssystemdirectory', 'systempfilename', 'systextscreenread', 'systextscreensize',
+ 'sysutilversion', 'sysversion', 'sysvolumelabel', 'syswaiteventsem', 'syswaitnamedpipe',
+ 'syswindecryptfile', 'syswinencryptfile', 'syswinver'
+}, true)))
-- Identifiers.
-local word = l.alpha * (l.alnum + S('@#$\\.!?_'))^0
-local identifier = token(l.IDENTIFIER, word)
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.alpha * (lexer.alnum + S('@#$\\.!?_'))^0))
--- Operators.
-local operator = token(l.OPERATOR, S('=!<>+-/\\*%&|^~.,:;(){}'))
+-- Strings.
+local sq_str = lexer.range("'", true, false)
+local dq_str = lexer.range('"', true, false)
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
+
+-- Comments.
+local line_comment = lexer.to_eol('--', true)
+local block_comment = lexer.range('/*', '*/', false, false, true)
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
+
+-- Numbers.
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
+
+-- Preprocessor.
+lex:add_rule('preprocessor', token(lexer.PREPROCESSOR, lexer.to_eol(lexer.starts_line('#'))))
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'function', func},
- {'identifier', identifier},
- {'string', string},
- {'comment', comment},
- {'number', number},
- {'preproc', preproc},
- {'operator', operator},
-}
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S('=!<>+-/\\*%&|^~.,:;(){}')))
-M._foldsymbols = {
- _patterns = {'[a-z]+', '/%*', '%*/', '%-%-', ':'},
- [l.KEYWORD] = {['do'] = 1, select = 1, ['end'] = -1, ['return'] = -1},
- [l.COMMENT] = {
- ['/*'] = 1, ['*/'] = -1, ['--'] = l.fold_line_comments('--')
- },
- [l.OPERATOR] = {[':'] = 1}
-}
+-- Fold points
+lex:add_fold_point(lexer.KEYWORD, 'do', 'end')
+lex:add_fold_point(lexer.KEYWORD, 'select', 'return')
+lex:add_fold_point(lexer.COMMENT, '/*', '*/')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('--'))
+-- lex:add_fold_point(lexer.OPERATOR, ':', ?)
-return M
+return lex
diff --git a/lua/lexers/rhtml.lua b/lua/lexers/rhtml.lua
index 8506bff..16c7706 100644
--- a/lua/lexers/rhtml.lua
+++ b/lua/lexers/rhtml.lua
@@ -1,29 +1,20 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- RHTML LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'rhtml'}
-
--- Embedded in HTML.
-local html = l.load('html')
+local lex = lexer.new('rhtml', {inherit = lexer.load('html')})
-- Embedded Ruby.
-local ruby = l.load('rails')
+local ruby = lexer.load('rails')
local ruby_start_rule = token('rhtml_tag', '<%' * P('=')^-1)
local ruby_end_rule = token('rhtml_tag', '%>')
-l.embed_lexer(html, ruby, ruby_start_rule, ruby_end_rule)
-
-M._tokenstyles = {
- rhtml_tag = l.STYLE_EMBEDDED
-}
+lex:embed(ruby, ruby_start_rule, ruby_end_rule)
+lex:add_style('rhtml_tag', lexer.styles.embedded)
-local _foldsymbols = html._foldsymbols
-_foldsymbols._patterns[#_foldsymbols._patterns + 1] = '<%%'
-_foldsymbols._patterns[#_foldsymbols._patterns + 1] = '%%>'
-_foldsymbols.rhtml_tag = {['<%'] = 1, ['%>'] = -1}
-M._foldsymbols = _foldsymbols
+-- Fold points.
+lex:add_fold_point('rhtml_tag', '<%', '%>')
-return M
+return lex
diff --git a/lua/lexers/routeros.lua b/lua/lexers/routeros.lua
index 7544393..f301e8b 100644
--- a/lua/lexers/routeros.lua
+++ b/lua/lexers/routeros.lua
@@ -1,116 +1,59 @@
--- Copyright 2020 Christian Hesse
+-- Copyright 2020-2022 Christian Hesse. See LICENSE.
-- Mikrotik RouterOS script LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'routeros'}
+local lex = lexer.new('routeros')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
--- Comments.
-local comment = token(l.COMMENT, '#' * l.nonnewline^0)
+-- Keywords.
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ -- Control.
+ ':delay', ':do', 'on-error', 'while', ':error', ':foreach', 'in', 'do', ':for', 'from', 'to',
+ 'step', ':if', 'do', 'else', ':return', ':while', 'do',
+ -- Menu specific commands.
+ 'add', 'disable', 'edit', 'enable', 'export', 'find', 'get', 'info', 'monitor', 'print', 'append',
+ 'as-value', 'brief', 'count-only', 'detail', 'file', 'follow', 'follow-only', 'from', 'interval',
+ 'terse', 'value-list', 'where', 'without-paging', 'remove', 'set',
+ -- Output & string handling.
+ ':beep', ':blink', ':environment', ':execute', ':find', ':len', ':log', 'alert', 'critical',
+ 'debug', 'emergency', 'error', 'info', 'notice', 'warning', ':parse', ':pick', ':put',
+ ':terminal', ':time', ':typeof',
+ -- Variable declaration.
+ ':global', ':local', ':set',
+ -- Variable casting.
+ ':toarray', ':tobool', ':toid', ':toip', ':toip6', ':tonum', ':tostr', ':totime',
+ -- Boolean values and logical operators.
+ 'false', 'no', 'true', 'yes', 'and', 'in', 'or',
+ -- Networking.
+ ':ping', ':resolve'
+}))
--- Strings.
-local string = token(l.STRING, l.delimited_range('"'))
+-- Identifiers.
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
--- Numbers.
-local number = token(l.NUMBER, l.float + l.integer)
+-- Comments.
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#')))
--- Keywords.
-local keyword = token(l.KEYWORD, word_match({
- -- control
- ':delay',
- ':do', 'on-error', 'while',
- ':error',
- ':foreach', 'in', 'do',
- ':for', 'from', 'to', 'step',
- ':if', 'do', 'else',
- ':return',
- ':while', 'do',
- -- menu specific commands
- 'add',
- 'disable',
- 'edit',
- 'enable',
- 'export',
- 'find',
- 'get',
- 'info',
- 'monitor',
- 'print', 'append', 'as-value', 'brief', 'count-only', 'detail', 'file',
- 'follow', 'follow-only', 'from', 'interval', 'terse', 'value-list',
- 'where', 'without-paging',
- 'remove',
- 'set',
- -- output & string handling
- ':beep',
- ':blink',
- ':environment',
- ':execute',
- ':find',
- ':len',
- ':log', 'alert', 'critical', 'debug', 'emergency', 'error', 'info',
- 'notice', 'warning',
- ':parse',
- ':pick',
- ':put',
- ':terminal',
- ':time',
- ':typeof',
- -- variable declaration
- ':global',
- ':local',
- ':set',
- -- variable casting
- ':toarray',
- ':tobool',
- ':toid',
- ':toip',
- ':toip6',
- ':tonum',
- ':tostr',
- ':totime',
- -- boolean values and logical operators
- 'false', 'no',
- 'true', 'yes',
- 'and',
- 'in',
- 'or',
- -- networking
- ':ping',
- ':resolve'
-}, ':-'))
+-- Numbers.
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
--- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+-- Strings.
+lex:add_rule('string', token(lexer.STRING, lexer.range('"')))
-- Variables.
-local variable = token(l.VARIABLE,
- '$' * (S('!#?*@$') + l.digit^1 + l.word +
- l.delimited_range('{}', true, true, true)))
+lex:add_rule('variable', token(lexer.VARIABLE, '$' *
+ (S('!#?*@$') + lexer.digit^1 + lexer.word + lexer.range('{', '}', true, false, true))))
-- Operators.
-local operator = token(l.OPERATOR, S('=!%<>+-/*&|~.,;()[]{}'))
-
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'identifier', identifier},
- {'string', string},
- {'comment', comment},
- {'number', number},
- {'variable', variable},
- {'operator', operator},
-}
+lex:add_rule('operator', token(lexer.OPERATOR, S('=!%<>+-/*&|~.,;()[]{}')))
-M._foldsymbols = {
- _patterns = {'[a-z]+', '[{}]', '#'},
- [l.KEYWORD] = { },
- [l.OPERATOR] = {['{'] = 1, ['}'] = -1},
- [l.COMMENT] = {['#'] = l.fold_line_comments('#')}
-}
+-- Fold points.
+lex:add_fold_point(lexer.OPERATOR, '{', '}')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('#'))
-return M
+return lex
diff --git a/lua/lexers/rstats.lua b/lua/lexers/rstats.lua
index ad9b5aa..165b693 100644
--- a/lua/lexers/rstats.lua
+++ b/lua/lexers/rstats.lua
@@ -1,53 +1,51 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- R LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'rstats'}
+local lex = lexer.new('rstats')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local comment = token(l.COMMENT, '#' * l.nonnewline^0)
-
--- Strings.
-local sq_str = l.delimited_range("'", true)
-local dq_str = l.delimited_range('"', true)
-local string = token(l.STRING, sq_str + dq_str)
-
--- Numbers.
-local number = token(l.NUMBER, (l.float + l.integer) * P('i')^-1)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- 'break', 'else', 'for', 'if', 'in', 'next', 'repeat', 'return', 'switch',
- 'try', 'while', 'Inf', 'NA', 'NaN', 'NULL', 'FALSE', 'TRUE'
-})
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'break', 'else', 'for', 'if', 'in', 'next', 'repeat', 'return', 'switch', 'try', 'while', --
+ 'Inf', 'NA', 'NaN', 'NULL', 'FALSE', 'TRUE', 'F', 'T',
+ -- Frequently used operators.
+ '|>', '%%', '%*%', '%/%', '%in%', '%o%', '%x%'
+}))
-- Types.
-local type = token(l.TYPE, word_match{
- 'array', 'character', 'complex', 'data.frame', 'double', 'factor', 'function',
- 'integer', 'list', 'logical', 'matrix', 'numeric', 'vector'
-})
+lex:add_rule('type', token(lexer.TYPE, word_match{
+ 'array', 'character', 'closure', 'complex', 'data.frame', 'double', 'environment', 'expression',
+ 'externalptr', 'factor', 'function', 'integer', 'list', 'logical', 'matrix', 'numeric',
+ 'pairlist', 'promise', 'raw', 'symbol', 'vector'
+}))
-- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
+
+-- Strings.
+local sq_str = lexer.range("'", true)
+local dq_str = lexer.range('"', true)
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
+
+-- Comments.
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#')))
+
+-- Numbers.
+lex:add_rule('number', token(lexer.NUMBER, (lexer.number * P('i')^-1) * P('L')^-1))
-- Operators.
-local operator = token(l.OPERATOR, S('<->+*/^=.,:;|$()[]{}'))
-
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'type', type},
- {'identifier', identifier},
- {'string', string},
- {'comment', comment},
- {'number', number},
- {'operator', operator},
-}
-
-return M
+lex:add_rule('operator', token(lexer.OPERATOR, S('<->+*/^=.,:;|$()[]{}')))
+
+-- Folding
+lex:add_fold_point(lexer.OPERATOR, '(', ')')
+lex:add_fold_point(lexer.OPERATOR, '[', ']')
+lex:add_fold_point(lexer.OPERATOR, '{', '}')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('#'))
+
+return lex
diff --git a/lua/lexers/ruby.lua b/lua/lexers/ruby.lua
index ec712c1..f6ba415 100644
--- a/lua/lexers/ruby.lua
+++ b/lua/lexers/ruby.lua
@@ -1,148 +1,128 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- Ruby LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'ruby'}
+local lex = lexer.new('ruby')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
+
+-- Keywords.
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'BEGIN', 'END', 'alias', 'and', 'begin', 'break', 'case', 'class', 'def', 'defined?', 'do',
+ 'else', 'elsif', 'end', 'ensure', 'false', 'for', 'if', 'in', 'module', 'next', 'nil', 'not',
+ 'or', 'redo', 'rescue', 'retry', 'return', 'self', 'super', 'then', 'true', 'undef', 'unless',
+ 'until', 'when', 'while', 'yield', '__FILE__', '__LINE__'
+}))
+
+-- Functions.
+lex:add_rule('function', token(lexer.FUNCTION, word_match{
+ 'at_exit', 'autoload', 'binding', 'caller', 'catch', 'chop', 'chop!', 'chomp', 'chomp!', 'eval',
+ 'exec', 'exit', 'exit!', 'extend', 'fail', 'fork', 'format', 'gets', 'global_variables', 'gsub',
+ 'gsub!', 'include', 'iterator?', 'lambda', 'load', 'local_variables', 'loop', 'module_function',
+ 'open', 'p', 'print', 'printf', 'proc', 'putc', 'puts', 'raise', 'rand', 'readline', 'readlines',
+ 'require', 'require_relative', 'select', 'sleep', 'split', 'sprintf', 'srand', 'sub', 'sub!',
+ 'syscall', 'system', 'test', 'trace_var', 'trap', 'untrace_var'
+}) * -S('.:|'))
+
+-- Identifiers.
+local word_char = lexer.alnum + S('_!?')
+local word = (lexer.alpha + '_') * word_char^0
+lex:add_rule('identifier', token(lexer.IDENTIFIER, word))
-- Comments.
-local line_comment = '#' * l.nonnewline_esc^0
-local block_comment = l.starts_line('=begin') * (l.any - l.newline * '=end')^0 *
- (l.newline * '=end')^-1
-local comment = token(l.COMMENT, block_comment + line_comment)
+local line_comment = lexer.to_eol('#', true)
+local block_comment = lexer.range(lexer.starts_line('=begin'), lexer.starts_line('=end'))
+lex:add_rule('comment', token(lexer.COMMENT, block_comment + line_comment))
+-- Strings.
local delimiter_matches = {['('] = ')', ['['] = ']', ['{'] = '}'}
-local literal_delimitted = P(function(input, index)
+local literal_delimited = P(function(input, index)
local delimiter = input:sub(index, index)
if not delimiter:find('[%w\r\n\f\t ]') then -- only non alpha-numerics
local match_pos, patt
if delimiter_matches[delimiter] then
-- Handle nested delimiter/matches in strings.
local s, e = delimiter, delimiter_matches[delimiter]
- patt = l.delimited_range(s..e, false, false, true)
+ patt = lexer.range(s, e, false, true, true)
else
- patt = l.delimited_range(delimiter)
+ patt = lexer.range(delimiter)
end
match_pos = lpeg.match(patt, input, index)
return match_pos or #input + 1
end
end)
--- Strings.
-local cmd_str = l.delimited_range('`')
-local lit_cmd = '%x' * literal_delimitted
-local lit_array = '%w' * literal_delimitted
-local sq_str = l.delimited_range("'")
-local dq_str = l.delimited_range('"')
-local lit_str = '%' * S('qQ')^-1 * literal_delimitted
+local cmd_str = lexer.range('`')
+local lit_cmd = '%x' * literal_delimited
+local lit_array = '%w' * literal_delimited
+local sq_str = lexer.range("'")
+local dq_str = lexer.range('"')
+local lit_str = '%' * S('qQ')^-1 * literal_delimited
local heredoc = '<<' * P(function(input, index)
- local s, e, indented, _, delimiter =
- input:find('([%-~]?)(["`]?)([%a_][%w_]*)%2[\n\r\f;]+', index)
+ local s, e, indented, _, delimiter = input:find('([%-~]?)(["`]?)([%a_][%w_]*)%2[\n\r\f;]+', index)
if s == index and delimiter then
local end_heredoc = (#indented > 0 and '[\n\r\f]+ *' or '[\n\r\f]+')
- local _, e = input:find(end_heredoc..delimiter, e)
+ e = select(2, input:find(end_heredoc .. delimiter, e))
return e and e + 1 or #input + 1
end
end)
+local string = token(lexer.STRING, (sq_str + dq_str + lit_str + heredoc + cmd_str + lit_cmd +
+ lit_array) * S('f')^-1)
-- TODO: regex_str fails with `obj.method /patt/` syntax.
-local regex_str = #P('/') * l.last_char_includes('!%^&*([{-=+|:;,?<>~') *
- l.delimited_range('/', true, false) * S('iomx')^0
-local lit_regex = '%r' * literal_delimitted * S('iomx')^0
-local string = token(l.STRING, (sq_str + dq_str + lit_str + heredoc + cmd_str +
- lit_cmd + lit_array) * S('f')^-1) +
- token(l.REGEX, regex_str + lit_regex)
-
-local word_char = l.alnum + S('_!?')
+local regex_str =
+ #P('/') * lexer.last_char_includes('!%^&*([{-=+|:;,?<>~') * lexer.range('/', true) * S('iomx')^0
+local lit_regex = '%r' * literal_delimited * S('iomx')^0
+local regex = token(lexer.REGEX, regex_str + lit_regex)
+lex:add_rule('string', string + regex)
-- Numbers.
-local dec = l.digit^1 * ('_' * l.digit^1)^0 * S('ri')^-1
-local bin = '0b' * S('01')^1 * ('_' * S('01')^1)^0
-local integer = S('+-')^-1 * (bin + l.hex_num + l.oct_num + dec)
+local dec = lexer.digit^1 * ('_' * lexer.digit^1)^0 * S('ri')^-1
+local bin = '0b' * S('01')^1 * ('_' * S('01')^1)^0 * -lexer.xdigit
+local integer = S('+-')^-1 * (bin + lexer.hex_num + lexer.oct_num + dec)
-- TODO: meta, control, etc. for numeric_literal.
-local numeric_literal = '?' * (l.any - l.space) * -word_char
-local number = token(l.NUMBER, l.float * S('ri')^-1 + integer + numeric_literal)
-
--- Keywords.
-local keyword = token(l.KEYWORD, word_match({
- 'BEGIN', 'END', 'alias', 'and', 'begin', 'break', 'case', 'class', 'def',
- 'defined?', 'do', 'else', 'elsif', 'end', 'ensure', 'false', 'for', 'if',
- 'in', 'module', 'next', 'nil', 'not', 'or', 'redo', 'rescue', 'retry',
- 'return', 'self', 'super', 'then', 'true', 'undef', 'unless', 'until', 'when',
- 'while', 'yield', '__FILE__', '__LINE__'
-}, '?!'))
-
--- Functions.
-local func = token(l.FUNCTION, word_match({
- 'at_exit', 'autoload', 'binding', 'caller', 'catch', 'chop', 'chop!', 'chomp',
- 'chomp!', 'eval', 'exec', 'exit', 'exit!', 'extend', 'fail', 'fork', 'format', 'gets',
- 'global_variables', 'gsub', 'gsub!', 'include', 'iterator?', 'lambda', 'load',
- 'local_variables', 'loop', 'module_function', 'open', 'p', 'print', 'printf', 'proc', 'putc',
- 'puts', 'raise', 'rand', 'readline', 'readlines', 'require', 'require_relative', 'select',
- 'sleep', 'split', 'sprintf', 'srand', 'sub', 'sub!', 'syscall', 'system',
- 'test', 'trace_var', 'trap', 'untrace_var'
-}, '?!')) * -S('.:|')
-
--- Identifiers.
-local word = (l.alpha + '_') * word_char^0
-local identifier = token(l.IDENTIFIER, word)
+local numeric_literal = '?' * (lexer.any - lexer.space) * -word_char
+lex:add_rule('number', token(lexer.NUMBER, lexer.float * S('ri')^-1 + integer + numeric_literal))
-- Variables.
-local global_var = '$' * (word + S('!@L+`\'=~/\\,.;<>_*"$?:') + l.digit + '-' *
- S('0FadiIKlpvw'))
+local global_var = '$' *
+ (word + S('!@L+`\'=~/\\,.;<>_*"$?:') + lexer.digit + '-' * S('0FadiIKlpvw'))
local class_var = '@@' * word
local inst_var = '@' * word
-local variable = token(l.VARIABLE, global_var + class_var + inst_var)
+lex:add_rule('variable', token(lexer.VARIABLE, global_var + class_var + inst_var))
-- Symbols.
-local symbol = token('symbol', ':' * P(function(input, index)
+lex:add_rule('symbol', token('symbol', ':' * P(function(input, index)
if input:sub(index - 2, index - 2) ~= ':' then return index end
-end) * (word_char^1 + sq_str + dq_str))
+end) * (word_char^1 + sq_str + dq_str)))
+lex:add_style('symbol', lexer.styles.constant)
-- Operators.
-local operator = token(l.OPERATOR, S('!%^&*()[]{}-=+/|:;.,?<>~'))
-
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'function', func},
- {'identifier', identifier},
- {'comment', comment},
- {'string', string},
- {'number', number},
- {'variable', variable},
- {'symbol', symbol},
- {'operator', operator},
-}
-
-M._tokenstyles = {
- symbol = l.STYLE_CONSTANT
-}
+lex:add_rule('operator', token(lexer.OPERATOR, S('!%^&*()[]{}-=+/|:;.,?<>~')))
+-- Fold points.
local function disambiguate(text, pos, line, s)
- return line:sub(1, s - 1):match('^%s*$') and
- not text:sub(1, pos - 1):match('\\[ \t]*\r?\n$') and 1 or 0
+ return line:sub(1, s - 1):match('^%s*$') and not text:sub(1, pos - 1):match('\\[ \t]*\r?\n$') and
+ 1 or 0
end
-
-M._foldsymbols = {
- _patterns = {'%l+', '[%(%)%[%]{}]', '=begin', '=end', '#'},
- [l.KEYWORD] = {
- begin = 1, class = 1, def = 1, ['do'] = 1, ['for'] = 1, ['module'] = 1,
- case = 1,
- ['if'] = disambiguate, ['while'] = disambiguate,
- ['unless'] = disambiguate, ['until'] = disambiguate,
- ['end'] = -1
- },
- [l.OPERATOR] = {
- ['('] = 1, [')'] = -1, ['['] = 1, [']'] = -1, ['{'] = 1, ['}'] = -1
- },
- [l.COMMENT] = {
- ['=begin'] = 1, ['=end'] = -1, ['#'] = l.fold_line_comments('#')
- }
-}
-
-return M
+lex:add_fold_point(lexer.KEYWORD, 'begin', 'end')
+lex:add_fold_point(lexer.KEYWORD, 'class', 'end')
+lex:add_fold_point(lexer.KEYWORD, 'def', 'end')
+lex:add_fold_point(lexer.KEYWORD, 'do', 'end')
+lex:add_fold_point(lexer.KEYWORD, 'for', 'end')
+lex:add_fold_point(lexer.KEYWORD, 'module', 'end')
+lex:add_fold_point(lexer.KEYWORD, 'case', 'end')
+lex:add_fold_point(lexer.KEYWORD, 'if', disambiguate)
+lex:add_fold_point(lexer.KEYWORD, 'while', disambiguate)
+lex:add_fold_point(lexer.KEYWORD, 'unless', disambiguate)
+lex:add_fold_point(lexer.KEYWORD, 'until', disambiguate)
+lex:add_fold_point(lexer.OPERATOR, '(', ')')
+lex:add_fold_point(lexer.OPERATOR, '[', ']')
+lex:add_fold_point(lexer.OPERATOR, '{', '}')
+lex:add_fold_point(lexer.COMMENT, '=begin', '=end')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('#'))
+
+return lex
diff --git a/lua/lexers/rust.lua b/lua/lexers/rust.lua
index a5f8ecd..25555d6 100644
--- a/lua/lexers/rust.lua
+++ b/lua/lexers/rust.lua
@@ -1,88 +1,82 @@
--- Copyright 2015-2017 Alejandro Baez (https://keybase.io/baez). See LICENSE.
+-- Copyright 2015-2022 Alejandro Baez (https://keybase.io/baez). See LICENSE.
-- Rust LPeg lexer.
-local l = require("lexer")
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require("lexer")
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
+local C, Cmt = lpeg.C, lpeg.Cmt
-local M = {_NAME = 'rust'}
+local lex = lexer.new('rust')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local line_comment = '//' * l.nonnewline_esc^0
-local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1
-local comment = token(l.COMMENT, line_comment + block_comment)
-
--- Strings.
-local sq_str = P('L')^-1 * l.delimited_range("'")
-local dq_str = P('L')^-1 * l.delimited_range('"')
-local raw_str = '#"' * (l.any - '#')^0 * P('#')^-1
-local string = token(l.STRING, dq_str + raw_str)
-
--- Numbers.
-local number = token(l.NUMBER, l.float + (l.dec_num + "_")^1 +
- "0b" * (l.dec_num + "_")^1 + l.integer)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- 'abstract', 'alignof', 'as', 'become', 'box',
- 'break', 'const', 'continue', 'crate', 'do',
- 'else', 'enum', 'extern', 'false', 'final',
- 'fn', 'for', 'if', 'impl', 'in',
- 'let', 'loop', 'macro', 'match', 'mod',
- 'move', 'mut', "offsetof", 'override', 'priv',
- 'proc', 'pub', 'pure', 'ref', 'return',
- 'Self', 'self', 'sizeof', 'static', 'struct',
- 'super', 'trait', 'true', 'type', 'typeof',
- 'unsafe', 'unsized', 'use', 'virtual', 'where',
- 'while', 'yield'
-})
+-- https://github.com/rust-lang/rust/blob/stable/src/libsyntax_pos/symbol.rs
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'Self', 'abstract', 'as', 'async', 'auto', 'await', 'become', 'box', 'break', 'catch', 'const',
+ 'continue', 'crate', 'default', 'do', 'dyn', 'else', 'enum', 'extern', 'false', 'final', 'fn',
+ 'for', 'if', 'impl', 'in', 'let', 'loop', 'macro', 'match', 'mod', 'move', 'mut', 'override',
+ 'priv', 'pub', 'ref', 'return', 'self', 'static', 'struct', 'super', 'trait', 'true', 'try',
+ 'type', 'typeof', 'union', 'unsafe', 'unsized', 'use', 'virtual', 'where', 'while', 'yield'
+}))
+
+-- Macro names.
+lex:add_rule('macro', token(lexer.FUNCTION, lexer.word * S("!")))
-- Library types
-local library = token(l.LABEL, l.upper * (l.lower + l.dec_num)^1)
-
--- syntax extensions
-local extension = l.word^1 * S("!")
+lex:add_rule('library', token(lexer.LABEL, lexer.upper * (lexer.lower + lexer.dec_num)^1))
-local func = token(l.FUNCTION, extension)
+-- Numbers.
+local identifier = P('r#')^-1 * lexer.word
+local digit = lexer.digit
+local decimal_literal = digit * (digit + '_')^0
+local function integer_suffix(digit) return P('_')^0 * digit * (digit + '_')^0 end
+local function opt_cap(patt) return C(patt^-1) end
+local float = decimal_literal *
+ (Cmt(opt_cap('.' * decimal_literal) * opt_cap(S('eE') * S('+-')^-1 * integer_suffix(digit)) *
+ opt_cap(P('f32') + 'f64'), function(input, index, decimals, exponent, type)
+ return decimals ~= "" or exponent ~= "" or type ~= ""
+ end) + '.' * -(S('._') + identifier))
+local function prefixed_integer(prefix, digit) return P(prefix) * integer_suffix(digit) end
+local bin = prefixed_integer('0b', S('01'))
+local oct = prefixed_integer('0o', lpeg.R('07'))
+local hex = prefixed_integer('0x', lexer.xdigit)
+local integer = (bin + oct + hex + decimal_literal) *
+ (S('iu') * (P('8') + '16' + '32' + '64' + '128' + 'size'))^-1
+lex:add_rule('number', token(lexer.NUMBER, float + integer))
-- Types.
-local type = token(l.TYPE, word_match{
- '()', 'bool', 'isize', 'usize', 'char', 'str',
- 'u8', 'u16', 'u32', 'u64', 'i8', 'i16', 'i32', 'i64',
- 'f32','f64',
-})
+lex:add_rule('type', token(lexer.TYPE, word_match(
+ '() bool isize usize char str u8 u16 u32 u64 u128 i8 i16 i32 i64 i128 f32 f64')))
+
+-- Strings.
+local sq_str = P('b')^-1 * lexer.range("'", true)
+local dq_str = P('b')^-1 * lexer.range('"')
+local raw_str = Cmt(P('b')^-1 * P('r') * C(P('#')^0) * '"', function(input, index, hashes)
+ local _, e = input:find('"' .. hashes, index, true)
+ return (e or #input) + 1
+end)
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str + raw_str))
-- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+lex:add_rule('identifier', token(lexer.IDENTIFIER, identifier))
--- Operators.
-local operator = token(l.OPERATOR, S('+-/*%<>!=`^~@&|?#~:;,.()[]{}'))
+-- Comments.
+local line_comment = lexer.to_eol('//', true)
+local block_comment = lexer.range('/*', '*/', false, false, true)
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-- Attributes.
-local attribute = token(l.PREPROCESSOR, "#[" *
- (l.nonnewline - ']')^0 * P("]")^-1)
+lex:add_rule('preprocessor', token(lexer.PREPROCESSOR, '#' * lexer.range('[', ']', true)))
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'function', func},
- {'library', library},
- {'type', type},
- {'identifier', identifier},
- {'string', string},
- {'comment', comment},
- {'number', number},
- {'operator', operator},
- {'preprocessor', attribute},
-}
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>!=`^~@&|?#~:;,.()[]{}')))
-M._foldsymbols = {
- _patterns = {'%l+', '[{}]', '/%*', '%*/', '//'},
- [l.COMMENT] = {['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//')},
- [l.OPERATOR] = {['('] = 1, ['{'] = 1, [')'] = -1, ['}'] = -1}
-}
+-- Fold points.
+lex:add_fold_point(lexer.COMMENT, '/*', '*/')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('//'))
+lex:add_fold_point(lexer.OPERATOR, '(', ')')
+lex:add_fold_point(lexer.OPERATOR, '{', '}')
-return M
+return lex
diff --git a/lua/lexers/sass.lua b/lua/lexers/sass.lua
index 1e1b4eb..bb14e1d 100644
--- a/lua/lexers/sass.lua
+++ b/lua/lexers/sass.lua
@@ -1,32 +1,24 @@
--- Copyright 2006-2017 Robert Gieseke. See LICENSE.
+-- Copyright 2006-2022 Robert Gieseke. See LICENSE.
-- Sass CSS preprocessor LPeg lexer.
-- http://sass-lang.com
-local l = require('lexer')
-local token = l.token
+local lexer = require('lexer')
+local token = lexer.token
local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'sass'}
+local lex = lexer.new('sass', {inherit = lexer.load('css')})
-- Line comments.
-local line_comment = token(l.COMMENT, '//' * l.nonnewline^0)
+lex:add_rule('line_comment', token(lexer.COMMENT, lexer.to_eol('//')))
-- Variables.
-local variable = token(l.VARIABLE, '$' * (l.alnum + S('_-'))^1)
+lex:add_rule('variable', token(lexer.VARIABLE, '$' * (lexer.alnum + S('_-'))^1))
-- Mixins.
-local mixin = token('mixin', P('@') * l.word)
+lex:add_rule('mixin', token('mixin', '@' * lexer.word))
+lex:add_style('mixin', lexer.styles['function'])
-local css = l.load('css')
-local _rules = css._rules
-table.insert(_rules, #_rules - 1, {'mixin', mixin})
-table.insert(_rules, #_rules - 1, {'line_comment', line_comment})
-table.insert(_rules, #_rules - 1, {'variable', variable})
-M._rules = _rules
+-- Fold points.
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('//'))
-M._tokenstyles = css._tokenstyles
-M._tokenstyles['mixin'] = l.STYLE_FUNCTION
-
-M._foldsymbols = css._foldsymbols
-
-return M
+return lex
diff --git a/lua/lexers/scala.lua b/lua/lexers/scala.lua
index 96fe344..87c9095 100644
--- a/lua/lexers/scala.lua
+++ b/lua/lexers/scala.lua
@@ -1,75 +1,60 @@
--- Copyright 2006-2017 JMS. See LICENSE.
--- Scala LPeg Lexer.
+-- Copyright 2006-2022 JMS. See LICENSE.
+-- Scala LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'scala'}
+local lex = lexer.new('scala')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
+local ws = token(lexer.WHITESPACE, lexer.space^1)
+lex:add_rule('whitespace', ws)
--- Comments.
-local line_comment = '//' * l.nonnewline_esc^0
-local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1
-local comment = token(l.COMMENT, line_comment + block_comment)
-
--- Strings.
-local symbol = "'" * l.word
-local dq_str = l.delimited_range('"', true)
-local tq_str = '"""' * (l.any - '"""')^0 * P('"""')^-1
-local string = token(l.STRING, tq_str + symbol + dq_str)
-
--- Numbers.
-local number = token(l.NUMBER, (l.float + l.integer) * S('LlFfDd')^-1)
+-- Classes.
+lex:add_rule('class', token(lexer.KEYWORD, 'class') * ws^1 * token(lexer.CLASS, lexer.word))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- 'abstract', 'case', 'catch', 'class', 'def', 'do', 'else', 'extends', 'false',
- 'final', 'finally', 'for', 'forSome', 'if', 'implicit', 'import', 'lazy',
- 'match', 'new', 'null', 'object', 'override', 'package', 'private',
- 'protected', 'return', 'sealed', 'super', 'this', 'throw', 'trait', 'try',
- 'true', 'type', 'val', 'var', 'while', 'with', 'yield'
-})
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'abstract', 'case', 'catch', 'class', 'def', 'do', 'else', 'extends', 'false', 'final', 'finally',
+ 'for', 'forSome', 'if', 'implicit', 'import', 'lazy', 'match', 'new', 'null', 'object',
+ 'override', 'package', 'private', 'protected', 'return', 'sealed', 'super', 'this', 'throw',
+ 'trait', 'try', 'true', 'type', 'val', 'var', 'while', 'with', 'yield'
+}))
-- Types.
-local type = token(l.TYPE, word_match{
- 'Array', 'Boolean', 'Buffer', 'Byte', 'Char', 'Collection', 'Double', 'Float',
- 'Int', 'Iterator', 'LinkedList', 'List', 'Long', 'Map', 'None', 'Option',
- 'Set', 'Short', 'SortedMap', 'SortedSet', 'String', 'TreeMap', 'TreeSet'
-})
+lex:add_rule('type', token(lexer.TYPE, word_match{
+ 'Array', 'Boolean', 'Buffer', 'Byte', 'Char', 'Collection', 'Double', 'Float', 'Int', 'Iterator',
+ 'LinkedList', 'List', 'Long', 'Map', 'None', 'Option', 'Set', 'Short', 'SortedMap', 'SortedSet',
+ 'String', 'TreeMap', 'TreeSet'
+}))
+
+-- Functions.
+lex:add_rule('function', token(lexer.FUNCTION, lexer.word) * #P('('))
-- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
--- Operators.
-local operator = token(l.OPERATOR, S('+-/*%<>!=^&|?~:;.()[]{}'))
+-- Strings.
+local symbol = "'" * lexer.word
+local dq_str = lexer.range('"', true)
+local tq_str = lexer.range('"""')
+lex:add_rule('string', token(lexer.STRING, tq_str + symbol + dq_str))
--- Functions.
-local func = token(l.FUNCTION, l.word) * #P('(')
+-- Comments.
+local line_comment = lexer.to_eol('//', true)
+local block_comment = lexer.range('/*', '*/')
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
--- Classes.
-local class_sequence = token(l.KEYWORD, P('class')) * ws^1 *
- token(l.CLASS, l.word)
+-- Numbers.
+lex:add_rule('number', token(lexer.NUMBER, lexer.number * S('LlFfDd')^-1))
-M._rules = {
- {'whitespace', ws},
- {'class', class_sequence},
- {'keyword', keyword},
- {'type', type},
- {'function', func},
- {'identifier', identifier},
- {'string', string},
- {'comment', comment},
- {'number', number},
- {'operator', operator},
-}
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>!=^&|?~:;.()[]{}')))
-M._foldsymbols = {
- _patterns = {'[{}]', '/%*', '%*/', '//'},
- [l.OPERATOR] = {['{'] = 1, ['}'] = -1},
- [l.COMMENT] = {['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//')}
-}
+-- Fold points.
+lex:add_fold_point(lexer.OPERATOR, '{', '}')
+lex:add_fold_point(lexer.COMMENT, '/*', '*/')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('//'))
-return M
+return lex
diff --git a/lua/lexers/scheme.lua b/lua/lexers/scheme.lua
index 681f2fd..a19fa0f 100644
--- a/lua/lexers/scheme.lua
+++ b/lua/lexers/scheme.lua
@@ -1,236 +1,174 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- Scheme LPeg lexer.
+-- Contributions by Murray Calavera.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'scheme'}
+local lex = lexer.new('scheme')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local line_comment = ';' * l.nonnewline^0
-local block_comment = l.nested_pair('#|', '|#')
--- TODO: this should handle any datum and take into account "#\)", ";" etc.
-local datum_comment
- = P'#;' * l.space^0
- * (l.delimited_range("()", false, true, true) + (l.any - l.space)^1)
-local comment = token(l.COMMENT, datum_comment + line_comment + block_comment)
-
--- Strings.
-local character
- = P'#\\' * ( P'alarm' + P'backspace' + P'delete' + P'escape'
- + P'newline' + P'null' + P'return' + P'space' + P'tab')
- + P'#\\x' * l.xdigit^1
- + P'#\\' * P(1)
-local dq_str = l.delimited_range('"')
-local string = token(l.STRING, character + dq_str)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match({
- "and", "or", "not", "else",
-
- "library", "define-library", "export", "include-library-declarations",
- "cond-expand", "import", "rename", "only", "except", "prefix", "include",
- "include-ci",
-
- "begin", "case", "case-lambda", "cond", "define", "define-record-type",
- "define-syntax", "define-values", "delay", "delay-force", "do", "if",
- "guard", "lambda", "let", "let*", "let*-values", "let-syntax", "let-values",
- "letrec", "letrec*", "letrec-syntax", "parameterize", "quasiquote", "quote",
- "set!", "unless", "unquote", "unquote-splicing", "when",
-
- "define-macro", "fluid-let"
-}, '.-+!$%&*/:<=>?@^_~'))
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'and', 'or', 'not', 'else',
+ --
+ 'library', 'define-library', 'export', 'include-library-declarations', 'cond-expand', 'import',
+ 'rename', 'only', 'except', 'prefix', 'include', 'include-ci',
+ --
+ 'begin', 'case', 'case-lambda', 'cond', 'define', 'define-record-type', 'define-syntax',
+ 'define-values', 'delay', 'delay-force', 'do', 'if', 'guard', 'lambda', 'let', 'let*',
+ 'let*-values', 'let-syntax', 'let-values', 'letrec', 'letrec*', 'letrec-syntax', 'parameterize',
+ 'quasiquote', 'quote', 'set!', 'unless', 'unquote', 'unquote-splicing', 'when',
+ --
+ 'define-macro', 'fluid-let'
+}))
-- Functions.
-local func = token(l.FUNCTION, word_match({
- "*", "+", "-", "/", "<", "<=", "=", "=>", ">", ">=", "abs", "append",
- "apply", "assoc", "assq", "assv", "binary-port?", "boolean=?", "boolean?",
- "bytevector", "bytevector-append", "bytevector-copy", "bytevector-copy!",
- "bytevector-length", "bytevector-u8-ref", "bytevector-u8-set!",
- "bytevector?", "caar", "cadr", "call-with-current-continuation",
- "call-with-port", "call-with-values", "call/cc", "car", "cdar", "cddr",
- "cdr", "ceiling", "char->integer", "char-ready?", "char<=?", "char<?",
- "char=?", "char>=?", "char>?", "char?", "close-input-port",
- "close-output-port", "close-port", "complex?", "cons", "current-error-port",
- "current-input-port", "current-output-port", "denominator", "dynamic-wind",
- "eof-object", "eof-object?", "eq?", "equal?", "eqv?", "error",
- "error-object-irritants", "error-object-message", "error-object?", "even?",
- "exact", "exact-integer-sqrt", "exact-integer?", "exact?", "expt",
- "features", "file-error?", "floor", "floor-quotient", "floor-remainder",
- "floor/", "flush-output-port", "for-each", "gcd", "get-output-bytevector",
- "get-output-string", "inexact", "inexact?", "input-port-open?",
- "input-port?", "integer->char", "integer?", "lcm", "length", "list",
- "list->string", "list->vector", "list-copy", "list-ref", "list-set!",
- "list-tail", "list?", "make-bytevector", "make-list", "make-parameter",
- "make-string", "make-vector", "map", "max", "member", "memq", "memv", "min",
- "modulo", "negative?", "newline", "null?", "number->string", "number?",
- "numerator", "odd?", "open-input-bytevector", "open-input-string",
- "open-output-bytevector", "open-output-string", "output-port-open?",
- "output-port?", "pair?", "peek-char", "peek-u8", "port?", "positive?",
- "procedure?", "quotient", "raise", "raise-continuable", "rational?",
- "rationalize", "read-bytevector", "read-bytevector!", "read-char",
- "read-error?", "read-line", "read-string", "read-u8", "real?", "remainder",
- "reverse", "round", "set-car!", "set-cdr!", "square", "string",
- "string->list", "string->number", "string->symbol", "string->utf8",
- "string->vector", "string-append", "string-copy", "string-copy!",
- "string-fill!", "string-for-each", "string-length", "string-map",
- "string-ref", "string-set!", "string<=?", "string<?", "string=?",
- "string>=?", "string>?", "string?", "substring", "symbol->string",
- "symbol=?", "symbol?", "syntax-error", "syntax-rules", "textual-port?",
- "truncate", "truncate-quotient", "truncate-remainder", "truncate/",
- "u8-ready?", "utf8->string", "values", "vector", "vector->list",
- "vector->string", "vector-append", "vector-copy", "vector-copy!",
- "vector-fill!", "vector-for-each", "vector-length", "vector-map",
- "vector-ref", "vector-set!", "vector?", "with-exception-handler",
- "write-bytevector", "write-char", "write-string", "write-u8", "zero?",
-
- "char-alphabetic?", "char-ci<=?", "char-ci<?", "char-ci=?", "char-ci>=?",
- "char-ci>?", "char-downcase", "char-foldcase", "char-lower-case?",
- "char-numeric?", "char-upcase", "char-upper-case?", "char-whitespace?",
- "digit-value", "string-ci<=?", "string-ci<?", "string-ci=?", "string-ci>=?",
- "string-ci>?", "string-downcase", "string-foldcase", "string-upcase",
-
- "angle", "imag-part", "magnitude", "make-polar", "make-rectangular",
- "real-part",
-
- "caaaar", "caaadr", "caaar", "caadar", "caaddr", "caadr", "cadaar", "cadadr",
- "cadar", "caddar", "cadddr", "caddr", "cdaaar", "cdaadr", "cdaar", "cdadar",
- "cdaddr", "cdadr", "cddaar", "cddadr", "cddar", "cdddar", "cddddr", "cdddr",
-
- "environment", "eval",
-
- "call-with-input-file", "call-with-output-file", "delete-file",
- "file-exists?", "open-binary-input-file", "open-binary-output-file",
- "open-input-file", "open-output-file", "with-input-from-file",
- "with-output-to-file",
-
- "acos", "asin", "atan", "cos", "exp", "finite?", "infinite?", "log", "nan?",
- "sin", "sqrt", "tan",
-
- "force", "make-promise", "promise?",
-
- "load",
-
- "command-line", "emergency-exit", "exit", "get-environment-variable",
- "get-environment-variables",
-
- "read",
-
- "interaction-environment",
-
- "current-jiffy", "current-second", "jiffies-per-second",
-
- "display", "write", "write-shared", "write-simple",
-
- "syntax-case", "er-macro-transformer", "sc-macro-transformer",
- "rsc-macro-transformer"
-}, '.-+!$%&*/:<=>?@^_~'))
-
-local directive = token(l.PREPROCESSOR, P'#!fold-case' + P'#!no-fold-case')
-local boolean = token(l.CONSTANT,
- word_match({'#t', '#f', '#true', '#false'}, '#'))
-
--- Identifiers.
+lex:add_rule('function', token(lexer.FUNCTION, word_match{
+ '*', '+', '-', '/', '<', '<=', '=', '=>', '>', '>=', 'abs', 'append', 'apply', 'assoc', 'assq',
+ 'assv', 'binary-port?', 'boolean=?', 'boolean?', 'bytevector', 'bytevector-append',
+ 'bytevector-copy', 'bytevector-copy!', 'bytevector-length', 'bytevector-u8-ref',
+ 'bytevector-u8-set!', 'bytevector?', 'caar', 'cadr', 'call-with-current-continuation',
+ 'call-with-port', 'call-with-values', 'call/cc', 'car', 'cdar', 'cddr', 'cdr', 'ceiling',
+ 'char->integer', 'char-ready?', 'char<=?', 'char<?', 'char=?', 'char>=?', 'char>?', 'char?',
+ 'close-input-port', 'close-output-port', 'close-port', 'complex?', 'cons', 'current-error-port',
+ 'current-input-port', 'current-output-port', 'denominator', 'dynamic-wind', 'eof-object',
+ 'eof-object?', 'eq?', 'equal?', 'eqv?', 'error', 'error-object-irritants', 'error-object-message',
+ 'error-object?', 'even?', 'exact', 'exact-integer-sqrt', 'exact-integer?', 'exact?', 'expt',
+ 'features', 'file-error?', 'floor', 'floor-quotient', 'floor-remainder', 'floor/',
+ 'flush-output-port', 'for-each', 'gcd', 'get-output-bytevector', 'get-output-string', 'inexact',
+ 'inexact?', 'input-port-open?', 'input-port?', 'integer->char', 'integer?', 'lcm', 'length',
+ 'list', 'list->string', 'list->vector', 'list-copy', 'list-ref', 'list-set!', 'list-tail',
+ 'list?', 'make-bytevector', 'make-list', 'make-parameter', 'make-string', 'make-vector', 'map',
+ 'max', 'member', 'memq', 'memv', 'min', 'modulo', 'negative?', 'newline', 'null?',
+ 'number->string', 'number?', 'numerator', 'odd?', 'open-input-bytevector', 'open-input-string',
+ 'open-output-bytevector', 'open-output-string', 'output-port-open?', 'output-port?', 'pair?',
+ 'peek-char', 'peek-u8', 'port?', 'positive?', 'procedure?', 'quotient', 'raise',
+ 'raise-continuable', 'rational?', 'rationalize', 'read-bytevector', 'read-bytevector!',
+ 'read-char', 'read-error?', 'read-line', 'read-string', 'read-u8', 'real?', 'remainder',
+ 'reverse', 'round', 'set-car!', 'set-cdr!', 'square', 'string', 'string->list', 'string->number',
+ 'string->symbol', 'string->utf8', 'string->vector', 'string-append', 'string-copy',
+ 'string-copy!', 'string-fill!', 'string-for-each', 'string-length', 'string-map', 'string-ref',
+ 'string-set!', 'string<=?', 'string<?', 'string=?', 'string>=?', 'string>?', 'string?',
+ 'substring', 'symbol->string', 'symbol=?', 'symbol?', 'syntax-error', 'syntax-rules',
+ 'textual-port?', 'truncate', 'truncate-quotient', 'truncate-remainder', 'truncate/', 'u8-ready?',
+ 'utf8->string', 'values', 'vector', 'vector->list', 'vector->string', 'vector-append',
+ 'vector-copy', 'vector-copy!', 'vector-fill!', 'vector-for-each', 'vector-length', 'vector-map',
+ 'vector-ref', 'vector-set!', 'vector?', 'with-exception-handler', 'write-bytevector',
+ 'write-char', 'write-string', 'write-u8', 'zero?',
+ --
+ 'char-alphabetic?', 'char-ci<=?', 'char-ci<?', 'char-ci=?', 'char-ci>=?', 'char-ci>?',
+ 'char-downcase', 'char-foldcase', 'char-lower-case?', 'char-numeric?', 'char-upcase',
+ 'char-upper-case?', 'char-whitespace?', 'digit-value', 'string-ci<=?', 'string-ci<?',
+ 'string-ci=?', 'string-ci>=?', 'string-ci>?', 'string-downcase', 'string-foldcase',
+ 'string-upcase',
+ --
+ 'angle', 'imag-part', 'magnitude', 'make-polar', 'make-rectangular', 'real-part',
+ --
+ 'caaaar', 'caaadr', 'caaar', 'caadar', 'caaddr', 'caadr', 'cadaar', 'cadadr', 'cadar', 'caddar',
+ 'cadddr', 'caddr', 'cdaaar', 'cdaadr', 'cdaar', 'cdadar', 'cdaddr', 'cdadr', 'cddaar', 'cddadr',
+ 'cddar', 'cdddar', 'cddddr', 'cdddr',
+ --
+ 'environment', 'eval',
+ --
+ 'call-with-input-file', 'call-with-output-file', 'delete-file', 'file-exists?',
+ 'open-binary-input-file', 'open-binary-output-file', 'open-input-file', 'open-output-file',
+ 'with-input-from-file', 'with-output-to-file',
+ --
+ 'acos', 'asin', 'atan', 'cos', 'exp', 'finite?', 'infinite?', 'log', 'nan?', 'sin', 'sqrt', 'tan',
+ --
+ 'force', 'make-promise', 'promise?',
+ --
+ 'load',
+ --
+ 'command-line', 'emergency-exit', 'exit', 'get-environment-variable', 'get-environment-variables',
+ --
+ 'read',
+ --
+ 'interaction-environment',
+ --
+ 'current-jiffy', 'current-second', 'jiffies-per-second',
+ --
+ 'display', 'write', 'write-shared', 'write-simple',
+ --
+ 'syntax-case', 'er-macro-transformer', 'sc-macro-transformer', 'rsc-macro-transformer'
+}))
+
+-- Identifiers and symbols.
local explicit_sign = S('+-')
-
-local initial = l.alpha + S('!$%&*/:<=>?@^_~')
-local subsequent = initial + l.digit + explicit_sign + P'.'
-
+local initial = lexer.alpha + S('!$%&*/:<=>?@^_~')
+local subsequent = initial + lexer.digit + explicit_sign + '.'
local sign_subsequent = initial + explicit_sign
-local dot_subsequent = sign_subsequent + P'.'
+local dot_subsequent = sign_subsequent + '.'
+-- LuaFormatter off
+local peculiar_identifier =
+ explicit_sign * '.' * dot_subsequent * subsequent^0 +
+ explicit_sign * sign_subsequent * subsequent^0 +
+ '.' * dot_subsequent * subsequent^0 +
+ explicit_sign
+-- LuaFormatter on
+local ident = lexer.range('|') + initial * subsequent^0 + peculiar_identifier
+lex:add_rule('identifier', token(lexer.IDENTIFIER, ident))
+lex:add_rule('symbol', token(lexer.CLASS, "'" * ident))
-local peculiar_identifier
- = explicit_sign * P'.' * dot_subsequent * subsequent^0
- + explicit_sign * sign_subsequent * subsequent^0
- + P'.' * dot_subsequent * subsequent^0
- + explicit_sign
+-- Strings.
+local character = '#\\' *
+ (word_match('alarm backspace delete escape newline null return space tab') + 'x' * lexer.xdigit^1 +
+ lexer.any)
+local dq_str = lexer.range('"')
+lex:add_rule('string', token(lexer.STRING, character + dq_str))
+
+-- Constants.
+lex:add_rule('constant', token(lexer.CONSTANT, word_match('#t #f #true #false')))
-local ident
- = l.delimited_range('|')
- + initial * subsequent^0
- + peculiar_identifier
+-- Directives.
+lex:add_rule('directive', token(lexer.PREPROCESSOR, P('#!fold-case') + '#!no-fold-case'))
-local identifier = token(l.IDENTIFIER, ident)
-local symbol = token(l.CLASS, P"'" * ident)
+-- Comments.
+local line_comment = lexer.to_eol(';')
+local block_comment = lexer.range('#|', '|#', false, false, true)
+local datum_comment = '#;' * lexer.space^0 * lexer.range('(', ')', false, true, true) *
+ (lexer.any - lexer.space)^0
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment + datum_comment))
-- Numbers.
+local radixes = {[2] = P('#b'), [8] = P('#o'), [10] = P('#d')^-1, [16] = P('#x')}
+local digits = {[2] = S('01'), [8] = lpeg.R('07'), [10] = lexer.digit, [16] = lexer.xdigit}
local function num(r)
- local exactness = (P'#i' + P'#e')^-1
-
- local radix = ({
- [2] = P'#b',
- [8] = P'#o',
- [10] = P('#d')^-1,
- [16] = P'#x'
- })[r]
-
- local digit = ({
- [2] = S'01',
- [8] = R'07',
- [10] = l.digit,
- [16] = l.xdigit
- })[r]
-
+ local exactness = (P('#i') + '#e')^-1
+ local radix, digit = radixes[r], digits[r]
local prefix = radix * exactness + exactness * radix
- local suffix = (P'e' * S('+-')^-1 * l.digit^1)^-1
-
- local infnan = P'+inf.0' + P'-inf.0' + P'+nan.0' + P'-nan.0'
-
- local decimal
- = l.digit^1 * suffix
- + P'.' * l.digit^1 * suffix
- + l.digit^1 * P'.' * l.digit^0 * suffix
-
- local ureal
- = digit^1 * P'/' * digit^1
- + (r == 10 and decimal or P(false))
- + digit^1
- local real
- = S('+-')^-1 * ureal
- + infnan
-
- local i = P'i'
- local complex
- = real * P'@' * real
- + real * S'+-' * ureal^-1 * i
- + real * infnan * i
- + infnan * i
- + real
- + S'+-' * ureal^-1 * i
-
+ local suffix = ('e' * S('+-')^-1 * lexer.digit^1)^-1
+ local infnan = S('+-') * word_match[[inf nan]] * '.0'
+ -- LuaFormatter off
+ local decimal = lexer.digit^1 * suffix +
+ '.' * lexer.digit^1 * suffix +
+ lexer.digit^1 * '.' * lexer.digit^0 * suffix
+ local ureal = digit^1 * '/' * digit^1 +
+ (r == 10 and decimal or P(false)) +
+ digit^1
+ local real = S('+-')^-1 * ureal + infnan
+ local i = P('i')
+ local complex = real * '@' * real +
+ real * S('+-') * ureal^-1 * i +
+ real * infnan * i +
+ infnan * i +
+ real +
+ S('+-') * ureal^-1 * i
+ -- LuaFormatter on
return prefix * complex
end
-
-local number = token(l.NUMBER, num(2) + num(8) + num(10) + num(16))
+lex:add_rule('number', token(lexer.NUMBER, num(2) + num(8) + num(10) + num(16)))
-- Operators.
-local operator = token(l.OPERATOR, P'#u8' + P',@' + S(".`'#(),"))
-
-M._rules = {
- {'whitespace', ws},
- {'directive', directive},
- {'boolean', boolean},
- {'comment', comment},
- {'string', string},
- {'number', number},
- {'keyword', keyword},
- {'func', func},
- {'identifier', identifier},
- {'symbol', symbol},
- {'operator', operator},
-}
-
+lex:add_rule('operator', token(lexer.OPERATOR, P('#u8') + ',@' + S(".`'#(),")))
-M._foldsymbols = {
- _patterns = {'[%(%)%[%]{}]', '#|', '|#', ';'},
- [l.OPERATOR] = {
- ['('] = 1, [')'] = -1, ['['] = 1, [']'] = -1, ['{'] = 1, ['}'] = -1
- },
- [l.COMMENT] = {['#|'] = 1, ['|#'] = -1, [';'] = l.fold_line_comments(';')}
-}
+-- Fold points.
+lex:add_fold_point(lexer.OPERATOR, '(', ')')
+lex:add_fold_point(lexer.COMMENT, '#|', '|#')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines(';'))
-return M
+return lex
diff --git a/lua/lexers/smalltalk.lua b/lua/lexers/smalltalk.lua
index a5d22d5..982a514 100644
--- a/lua/lexers/smalltalk.lua
+++ b/lua/lexers/smalltalk.lua
@@ -1,62 +1,44 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- Smalltalk LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'smalltalk'}
+local lex = lexer.new('smalltalk')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local comment = token(l.COMMENT, l.delimited_range('"', false, true))
-
--- Strings.
-local sq_str = l.delimited_range("'")
-local literal = '$' * l.word
-local string = token(l.STRING, sq_str + literal)
-
--- Numbers.
-local number = token(l.NUMBER, l.float + l.integer)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- 'true', 'false', 'nil', 'self', 'super', 'isNil', 'not', 'Smalltalk',
- 'Transcript'
-})
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match(
+ 'true false nil self super isNil not Smalltalk Transcript')))
-- Types.
-local type = token(l.TYPE, word_match{
- 'Date', 'Time', 'Boolean', 'True', 'False', 'Character', 'String', 'Array',
- 'Symbol', 'Integer', 'Object'
-})
+lex:add_rule('type', token(lexer.TYPE, word_match(
+ 'Date Time Boolean True False Character String Array Symbol Integer Object')))
-- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
+
+-- Strings.
+local sq_str = lexer.range("'")
+local word_str = '$' * lexer.word
+lex:add_rule('string', token(lexer.STRING, sq_str + word_str))
+
+-- Comments.
+lex:add_rule('comment', token(lexer.COMMENT, lexer.range('"', false, false)))
+
+-- Numbers.
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-- Operators.
-local operator = token(l.OPERATOR, S(':=_<>+-/*!()[]'))
+lex:add_rule('operator', token(lexer.OPERATOR, S(':=_<>+-/*!()[]')))
-- Labels.
-local label = token(l.LABEL, '#' * l.word)
-
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'type', type},
- {'identifier', identifier},
- {'string', string},
- {'comment', comment},
- {'number', number},
- {'label', label},
- {'operator', operator},
-}
-
-M._foldsymbols = {
- _patterns = {'[%[%]]'},
- [l.OPERATOR] = {['['] = 1, [']'] = -1}
-}
-
-return M
+lex:add_rule('label', token(lexer.LABEL, '#' * lexer.word))
+
+-- Fold points.
+lex:add_fold_point(lexer.OPERATOR, '[', ']')
+
+return lex
diff --git a/lua/lexers/sml.lua b/lua/lexers/sml.lua
index 093e67c..ba2015e 100644
--- a/lua/lexers/sml.lua
+++ b/lua/lexers/sml.lua
@@ -1,111 +1,91 @@
--- Copyright 2017 Murray Calavera. See LICENSE.
+-- Copyright 2017-2022 Murray Calavera. See LICENSE.
-- Standard ML LPeg lexer.
-local l = require('lexer')
-local token = l.token
-
-local function mlword(words)
- return l.word_match(words, "'")
-end
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
+
+local lex = lexer.new('sml')
+
+-- Whitespace.
+local ws = token(lexer.WHITESPACE, lexer.space^1)
+lex:add_rule('whitespace', ws)
+
+-- Structures.
+local id = (lexer.alnum + "'" + '_')^0
+local aid = lexer.alpha * id
+local longid = (aid * '.')^0 * aid
+local struct_dec = token(lexer.KEYWORD, 'structure') * ws * token(lexer.CLASS, aid) * ws *
+ token(lexer.OPERATOR, '=') * ws
+lex:add_rule('struct_new', struct_dec * token(lexer.KEYWORD, 'struct'))
+lex:add_rule('struct_alias', struct_dec * token(lexer.CLASS, longid))
+lex:add_rule('structure', token(lexer.CLASS, aid * '.'))
+
+-- Open.
+lex:add_rule('open', token(lexer.KEYWORD, word_match('open structure functor')) * ws *
+ token(lexer.CLASS, longid))
+
+-- Keywords.
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'abstype', 'and', 'andalso', 'as', 'case', 'do', 'datatype', 'else', 'end', 'exception', 'fn',
+ 'fun', 'handle', 'if', 'in', 'infix', 'infixr', 'let', 'local', 'nonfix', 'of', 'op', 'orelse',
+ 'raise', 'rec', 'then', 'type', 'val', 'with', 'withtype', 'while', --
+ 'eqtype', 'functor', 'include', 'sharing', 'sig', 'signature', 'struct', 'structure'
+}))
+
+-- Types.
+lex:add_rule('type', token(lexer.TYPE, word_match{
+ 'int', 'real', 'word', 'bool', 'char', 'string', 'unit', 'array', 'exn', 'list', 'option',
+ 'order', 'ref', 'substring', 'vector'
+}))
+
+-- Functions.
+-- `real`, `vector` and `substring` are a problem.
+lex:add_rule('function', token(lexer.FUNCTION, word_match{
+ 'app', 'before', 'ceil', 'chr', 'concat', 'exnMessage', 'exnName', 'explode', 'floor', 'foldl',
+ 'foldr', 'getOpt', 'hd', 'ignore', 'implode', 'isSome', 'length', 'map', 'not', 'null', 'ord',
+ 'print', 'real', 'rev', 'round', 'size', 'str', 'substring', 'tl', 'trunc', 'valOf', 'vector',
+ 'o', 'abs', 'mod', 'div'
+}))
-local ws = token(l.WHITESPACE, l.space^1)
+-- Constants.
+lex:add_rule('constant', token(lexer.CONSTANT, word_match('true false nil') + lexer.upper * id))
--- single line comments are valid in successor ml
-local cl = '(*)' * l.nonnewline^0
-local comment = token(l.COMMENT, cl + l.nested_pair('(*', '*)'))
+-- Indentifiers (non-symbolic).
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.lower * id))
-local string = token(l.STRING, lpeg.P('#')^-1 * l.delimited_range('"', true))
+-- Strings.
+lex:add_rule('string', token(lexer.STRING, P('#')^-1 * lexer.range('"', true)))
-local function num(digit)
- return digit * (digit^0 * lpeg.P('_'))^0 * digit^1 + digit
-end
+-- Comments.
+local line_comment = lexer.to_eol('(*)')
+local block_comment = lexer.range('(*', '*)', false, false, true)
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-local int = num(l.digit)
-local frac = lpeg.P('.') * int
+-- Numbers.
+local function num(digit) return digit * (digit^0 * '_')^0 * digit^1 + digit end
+local int = num(lexer.digit)
+local frac = '.' * int
local minus = lpeg.P('~')^-1
local exp = lpeg.S('eE') * minus * int
local real = int * frac^-1 * exp + int * frac * exp^-1
-local hex = num(l.xdigit)
+local hex = num(lexer.xdigit)
local bin = num(lpeg.S('01'))
-
-local number = token(l.NUMBER,
- lpeg.P('0w') * int
- + (lpeg.P('0wx') + lpeg.P('0xw')) * hex
- + (lpeg.P('0wb') + lpeg.P('0bw')) * bin
- + minus * lpeg.P('0x') * hex
- + minus * lpeg.P('0b') * bin
- + minus * real
- + minus * int
-)
-
-local keyword = token(l.KEYWORD, mlword{
- 'abstype', 'and', 'andalso', 'as', 'case', 'do', 'datatype', 'else', 'end',
- 'exception', 'fn', 'fun', 'handle', 'if', 'in', 'infix', 'infixr', 'let',
- 'local', 'nonfix', 'of', 'op', 'orelse', 'raise', 'rec', 'then',
- 'type', 'val', 'with', 'withtype', 'while',
-
- 'eqtype', 'functor', 'include', 'sharing', 'sig', 'signature',
- 'struct', 'structure'
-})
-
--- includes valid symbols for identifiers
-local operator = token(l.OPERATOR, lpeg.S('!*/+-^:@=<>()[]{},;._|#%&$?~`\\'))
-
-local type = token(l.TYPE, mlword{
- 'int', 'real', 'word', 'bool', 'char', 'string', 'unit',
- 'array', 'exn', 'list', 'option', 'order', 'ref', 'substring', 'vector'
-})
-
--- `real`, `vector` and `substring` are a problem
-local func = token(l.FUNCTION, mlword{
- 'app', 'before', 'ceil', 'chr', 'concat', 'exnMessage', 'exnName',
- 'explode', 'floor', 'foldl', 'foldr', 'getOpt', 'hd', 'ignore',
- 'implode', 'isSome', 'length', 'map', 'not', 'null', 'ord', 'print',
- 'real', 'rev', 'round', 'size', 'str', 'substring', 'tl', 'trunc',
- 'valOf', 'vector',
- 'o', 'abs', 'mod', 'div'
-})
-
--- non-symbolic identifiers only
-local id = (l.alnum + "'" + '_')^0
-local aid = l.alpha * id
-local longid = (aid * lpeg.P('.'))^0 * aid
-local identifier = token(l.IDENTIFIER, l.lower * id)
-local typevar = token(l.VARIABLE, "'" * id)
-local c = mlword{'true', 'false', 'nil'}
-local const = token(l.CONSTANT, l.upper * id + c)
-local structure = token(l.CLASS, aid * lpeg.P('.'))
-
-local open
- = token(l.KEYWORD, mlword{'open', 'structure', 'functor'})
- * ws * token(l.CLASS, longid)
-
-local struct_dec
- = token(l.KEYWORD, lpeg.P('structure')) * ws
- * token(l.CLASS, aid) * ws
- * token(l.OPERATOR, lpeg.P('=')) * ws
-
-local struct_new = struct_dec * token(l.KEYWORD, lpeg.P('struct'))
-local struct_alias = struct_dec * token(l.CLASS, longid)
-
-local M = {_NAME = 'sml'}
-
-M._rules = {
- {'whitespace', ws},
- {'comment', comment},
- {'number', number},
- {'struct_new', struct_new},
- {'struct_alias', struct_alias},
- {'structure', structure},
- {'open', open},
- {'type', type},
- {'keyword', keyword},
- {'function', func},
- {'string', string},
- {'operator', operator},
- {'typevar', typevar},
- {'constant', const},
- {'identifier', identifier},
-}
-
-return M
+-- LuaFormatter off
+lex:add_rule('number', token(lexer.NUMBER,
+ '0w' * int +
+ (P('0wx') + '0xw') * hex +
+ (P('0wb') + '0bw') * bin +
+ minus * '0x' * hex +
+ minus * '0b' * bin +
+ minus * real +
+ minus * int))
+-- LuaFormatter on
+
+-- Type variables.
+lex:add_rule('typevar', token(lexer.VARIABLE, "'" * id))
+
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S('!*/+-^:@=<>()[]{},;._|#%&$?~`\\')))
+
+return lex
diff --git a/lua/lexers/snobol4.lua b/lua/lexers/snobol4.lua
index edf085b..0293370 100644
--- a/lua/lexers/snobol4.lua
+++ b/lua/lexers/snobol4.lua
@@ -1,64 +1,69 @@
--- Copyright 2013-2017 Michael T. Richter. See LICENSE.
+-- Copyright 2013-2022 Michael T. Richter. See LICENSE.
-- SNOBOL4 lexer.
-- This lexer works with classic SNOBOL4 as well as the CSNOBOL4 extensions.
-local l = require 'lexer'
-local token, word_match = l.token, l.word_match
-local B, P, R, S, V = lpeg.B, lpeg.P, lpeg.R, lpeg.S, lpeg.V
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local B, P, S = lpeg.B, lpeg.P, lpeg.S
-local M = { _NAME = 'snobol4' }
+local lex = lexer.new('snobol4')
+
+-- Whitespace.
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
+
+-- Keywords.
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match({
+ 'ABORT', 'ARRAY', 'CONTINUE', 'DEFINE', 'END', 'FRETURN', 'INPUT', 'NRETURN', 'OUTPUT', 'PUNCH',
+ 'RETURN', 'SCONTINUE', 'TABLE'
+}, true) + '&' * lexer.word))
-- Helper patterns.
-local dotted_id = l.word * (P'.' * l.word)^0
+local dotted_id = lexer.word * ('.' * lexer.word)^0
+
+-- Labels.
+lex:add_rule('label', token(lexer.LABEL, lexer.starts_line(dotted_id)))
-local dq_str = l.delimited_range('"', true, true)
-local sq_str = l.delimited_range("'", true, true)
+-- Targets.
+local branch = B(lexer.space * ':(') * dotted_id * #P(')')
+local sbranch = B(lexer.space * ':' * S('SsFf') * '(') * dotted_id * #P(')')
+local sbranchx = B(')' * S('SsFf') * '(') * dotted_id * #P(')')
+lex:add_rule('target', token(lexer.LABEL, branch + sbranch + sbranchx))
-local branch = B(l.space * P':(') * dotted_id * #P')'
-local sbranch = B(l.space * P':' * S'SF' * '(') * dotted_id * #P')'
-local sbranchx = B(P')' * S'SF' * P'(') * dotted_id * #P')'
+-- Patterns.
+lex:add_rule('pattern', lexer.token(lexer.CLASS, word_match({
+ -- Keep distinct.
+ 'ABORT', 'ANY', 'ARB', 'ARBNO', 'BAL', 'BREAK', 'BREAKX', 'FAIL', 'FENCE', 'LEN', 'NOTANY', 'POS',
+ 'REM', 'RPOS', 'RTAB', 'SPAN', 'SUCCEED', 'TAB'
+}, true) * #P('(')))
-- Token definitions.
-local bif = token(l.FUNCTION, l.word_match({
- 'APPLY', 'ARRAY', 'CHAR', 'CONVERT', 'COPY', 'DATA', 'DATE', 'DIFFER', 'DUPL',
- 'EQ', 'EVAL', 'FILE_ABSPATH', 'FILE_ISDIR', 'FREEZE', 'FUNCTION', 'GE', 'GT',
- 'HOST', 'IDENT', 'INTEGER', 'IO_FINDUNIT', 'ITEM', 'LABEL', 'LOAD', 'LPAD',
- 'LE', 'LGT', 'LT', 'NE', 'OPSYN', 'ORD', 'PROTOTYPE', 'REMDR', 'REPLACE',
- 'REVERSE', 'RPAD', 'RSORT', 'SERV_LISTEN', 'SET', 'SETEXIT', 'SIZE', 'SORT',
- 'SQRT', 'SSET', 'SUBSTR', 'TABLE', 'THAW', 'TIME', 'TRACE', 'TRIM', 'UNLOAD',
- 'VALUE', 'VDIFFER',
-}, '', true) * #l.delimited_range('()', false, true, true))
-local comment = token(l.COMMENT, l.starts_line(S'*#|;!' * l.nonnewline^0))
-local control = token(l.PREPROCESSOR, l.starts_line(P'-' * l.word))
-local identifier = token(l.DEFAULT, dotted_id)
-local keyword = token(l.KEYWORD, l.word_match({
- 'ABORT', 'ARRAY', 'CONTINUE', 'DEFINE', 'END', 'FRETURN', 'INPUT', 'NRETURN',
- 'OUTPUT', 'PUNCH', 'RETURN', 'SCONTINUE', 'TABLE',
-}, '', true) + P'&' * l.word)
-local label = token(l.LABEL, l.starts_line(dotted_id))
-local number = token(l.NUMBER, l.float + l.integer)
-local operator = token(l.OPERATOR, S'¬?$.!%*/#+-@⊥&^~\\=')
-local pattern = l.token(l.CLASS, l.word_match({ -- "class" to keep distinct
- 'ABORT', 'ANY', 'ARB', 'ARBNO', 'BAL', 'BREAK', 'BREAKX', 'FAIL', 'FENCE',
- 'LEN', 'NOTANY', 'POS', 'REM', 'RPOS', 'RTAB', 'SPAN', 'SUCCEED', 'TAB',
-}, '', true) * #l.delimited_range('()', false, true, true))
-local str = token(l.STRING, sq_str + dq_str)
-local target = token(l.LABEL, branch + sbranch + sbranchx)
-local ws = token(l.WHITESPACE, l.space^1)
-
-M._rules = {
- { 'comment', comment },
- { 'control', control },
- { 'string', str },
- { 'number', number },
- { 'keyword', keyword },
- { 'label', label },
- { 'target', target },
- { 'pattern', pattern },
- { 'built-in', bif },
- { 'operator', operator },
- { 'identifier', identifier },
- { 'whitespace', ws },
-}
-
-return M
+lex:add_rule('built-in', token(lexer.FUNCTION, word_match({
+ 'APPLY', 'ARRAY', 'CHAR', 'CONVERT', 'COPY', 'DATA', 'DATE', 'DIFFER', 'DUPL', 'EQ', 'EVAL',
+ 'FILE_ABSPATH', 'FILE_ISDIR', 'FREEZE', 'FUNCTION', 'GE', 'GT', 'HOST', 'IDENT', 'INTEGER',
+ 'IO_FINDUNIT', 'ITEM', 'LABEL', 'LOAD', 'LPAD', 'LE', 'LGT', 'LT', 'NE', 'OPSYN', 'ORD',
+ 'PROTOTYPE', 'REMDR', 'REPLACE', 'REVERSE', 'RPAD', 'RSORT', 'SERV_LISTEN', 'SET', 'SETEXIT',
+ 'SIZE', 'SORT', 'SQRT', 'SSET', 'SUBSTR', 'TABLE', 'THAW', 'TIME', 'TRACE', 'TRIM', 'UNLOAD',
+ 'VALUE', 'VDIFFER'
+}, true) * #P('(')))
+
+-- Identifiers.
+lex:add_rule('identifier', token(lexer.DEFAULT, dotted_id))
+
+-- Strings.
+local dq_str = lexer.range('"', true, false)
+local sq_str = lexer.range("'", true, false)
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
+
+-- Comments.
+lex:add_rule('comment', token(lexer.COMMENT, lexer.starts_line(lexer.to_eol(S('*#|;!')))))
+
+-- Numbers.
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
+
+-- Control.
+lex:add_rule('control', token(lexer.PREPROCESSOR, lexer.starts_line('-' * lexer.word)))
+
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S'¬?$.!%*/#+-@⊥&^~\\='))
+
+return lex
diff --git a/lua/lexers/spin.lua b/lua/lexers/spin.lua
index f5add67..c906289 100644
--- a/lua/lexers/spin.lua
+++ b/lua/lexers/spin.lua
@@ -1,135 +1,68 @@
--- Copyright 2017 David B. Lamkins <david@lamkins.net>. See LICENSE.
--- Spin LPeg lexer, see https://www.parallax.com/microcontrollers/propeller
+-- Copyright 2017-2022 David B. Lamkins <david@lamkins.net>. See LICENSE.
+-- Spin LPeg lexer, see https://www.parallax.com/microcontrollers/propeller.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
local P, R, S = lpeg.P, lpeg.R, lpeg.S
-local M = {_NAME = 'spin'}
+local lex = lexer.new('spin')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
--- Comments.
-local line_comment = (P("''") + P("'")) * l.nonnewline^0
-local block_comment = P('{') * (l.any - P('}'))^0 * P('}')^-1
-local block_doc_comment = P('{{') * (l.any - P('}}'))^0 * P('}}')^-1
-local comment = token(l.COMMENT, line_comment + block_doc_comment + block_comment)
+-- Keywords.
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ '_clkfreq', '_clkmode', '_free', '_stack', '_xinfreq', 'abort', 'abs', 'absneg', 'add', 'addabs',
+ 'adds', 'addsx', 'addx', 'and', 'andn', 'byte', 'bytefill', 'bytemove', 'call', 'case', 'chipver',
+ 'clkfreq', 'clkmode', 'clkset', 'cmp', 'cmps', 'cmpsub', 'cmpsx', 'cmpx', 'cnt', 'cogid',
+ 'coginit', 'cognew', 'cogstop', 'con', 'constant', 'ctra', 'ctrb', 'dat', 'dira', 'dirb', 'djnz',
+ 'else', 'elseif', 'elseifnot', 'enc', 'false', 'file', 'fit', 'float', 'from', 'frqa', 'frqb',
+ 'hubop', 'if', 'ifnot', 'if_a', 'if_ae', 'if_always', 'if_b', 'if_be', 'if_c', 'if_c_and_nz',
+ 'if_c_and_z', 'if_c_eq_z', 'if_c_ne_z', 'if_c_or_nz', 'if_c_or_z', 'if_e', 'if_nc',
+ 'if_nc_and_nz', 'if_nc_and_z', 'if_nc_or_nz', 'if_nc_or_z', 'if_ne', 'if_never', 'if_nz',
+ 'if_nz_and_c', 'if_nz_and_nc', 'if_nz_or_c', 'if_nz_or_nc', 'if_z', 'if_z_and_c', 'if_z_and_nc',
+ 'if_z_eq_c', 'if_z_ne_c', 'if_z_or_c', 'if_z_or_nc', 'ina', 'inb', 'jmp', 'jmpret', 'lockclr',
+ 'locknew', 'lockret', 'lockset', 'long', 'longfill', 'longmove', 'lookdown', 'lookdownz',
+ 'lookup', 'lookupz', 'max', 'maxs', 'min', 'mins', 'mov', 'movd', 'movi', 'movs', 'mul', 'muls',
+ 'muxc', 'muxnc', 'muxnz', 'muxz', 'neg', 'negc', 'negnc', 'negnz', 'negx', 'negz', 'next', 'nop',
+ 'not', 'nr', 'obj', 'ones', 'or', 'org', 'other', 'outa', 'outb', 'par', 'phsa', 'phsb', 'pi',
+ 'pll1x', 'pll2x', 'pll4x', 'pll8x', 'pll16x', 'posx', 'pri', 'pub', 'quit', 'rcfast', 'rcl',
+ 'rcr', 'rcslow', 'rdbyte', 'rdlong', 'rdword', 'reboot', 'repeat', 'res', 'result', 'ret',
+ 'return', 'rev', 'rol', 'ror', 'round', 'sar', 'shl', 'shr', 'spr', 'step', 'strcomp', 'string',
+ 'strsize', 'sub', 'subabs', 'subs', 'subsx', 'subx', 'sumc', 'sumnc', 'sumnz', 'sumz', 'test',
+ 'testn', 'tjnz', 'tjz', 'to', 'true', 'trunc', 'until', 'var', 'vcfg', 'vscl', 'waitcnt',
+ 'waitpeq', 'waitpne', 'waitvid', 'wc', 'while', 'word', 'wordfill', 'wordmove', 'wr', 'wrbyte',
+ 'wrlong', 'wz', 'xinput', 'xor', 'xtal1', 'xtal2', 'xtal3'
+}))
+
+-- Identifiers.
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Strings.
-local string = token(l.STRING, l.delimited_range('"', true))
+lex:add_rule('string', token(lexer.STRING, lexer.range('"', true)))
+
+-- Comments.
+local line_comment = lexer.to_eol(P("''") + "'")
+local block_comment = lexer.range('{', '}')
+local block_doc_comment = lexer.range('{{', '}}')
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_doc_comment + block_comment))
-- Numbers.
local bin = '%' * S('01_')^1
-local ter = P('%%') * (R('03') + P('_'))^1
-local hex = P('$') * (R('09') + R('af') + R('AF') + P('_'))^1
-local dec = (R('09') + P('_'))^1
+local ter = '%%' * (R('03') + '_')^1
+local hex = '$' * (lexer.xdigit + '_')^1
+local dec = (lexer.digit + '_')^1
local int = bin + ter + dec + hex
-local rad = P('.') - P('..')
+local rad = P('.') - '..'
local exp = (S('Ee') * S('+-')^-1 * int)^-1
local flt = dec * (rad * dec)^-1 * exp + dec^-1 * rad * dec * exp
-local number = token(l.NUMBER, flt + int)
-
--- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- '_clkfreq', '_clkmode', '_free', '_stack', '_xinfreq', 'abort', 'abs',
- 'absneg', 'add', 'addabs', 'adds', 'addsx', 'addx', 'and', 'andn', 'byte',
- 'bytefill', 'bytemove', 'call', 'case', 'chipver', 'clkfreq', 'clkmode',
- 'clkset', 'cmp', 'cmps', 'cmpsub', 'cmpsx', 'cmpx', 'cnt', 'cogid',
- 'coginit', 'cognew', 'cogstop', 'con', 'constant', 'ctra', 'ctrb', 'dat',
- 'dira', 'dirb', 'djnz', 'else', 'elseif', 'elseifnot', 'enc', 'false',
- 'file', 'fit', 'float', 'from', 'frqa', 'frqb', 'hubop', 'if', 'ifnot',
- 'if_a', 'if_ae', 'if_always', 'if_b', 'if_be', 'if_c', 'if_c_and_nz',
- 'if_c_and_z', 'if_c_eq_z', 'if_c_ne_z', 'if_c_or_nz', 'if_c_or_z', 'if_e',
- 'if_nc', 'if_nc_and_nz', 'if_nc_and_z', 'if_nc_or_nz', 'if_nc_or_z',
- 'if_ne', 'if_never', 'if_nz', 'if_nz_and_c', 'if_nz_and_nc', 'if_nz_or_c',
- 'if_nz_or_nc', 'if_z', 'if_z_and_c', 'if_z_and_nc', 'if_z_eq_c',
- 'if_z_ne_c', 'if_z_or_c', 'if_z_or_nc', 'ina', 'inb', 'jmp', 'jmpret',
- 'lockclr', 'locknew', 'lockret', 'lockset', 'long', 'longfill', 'longmove',
- 'lookdown', 'lookdownz', 'lookup', 'lookupz', 'max', 'maxs', 'min', 'mins',
- 'mov', 'movd', 'movi', 'movs', 'mul', 'muls', 'muxc', 'muxnc', 'muxnz',
- 'muxz', 'neg', 'negc', 'negnc', 'negnz', 'negx', 'negz', 'next', 'nop',
- 'not', 'nr', 'obj', 'ones', 'or', 'org', 'other', 'outa', 'outb', 'par',
- 'phsa', 'phsb', 'pi', 'pll1x', 'pll2x', 'pll4x', 'pll8x', 'pll16x', 'posx',
- 'pri', 'pub', 'quit', 'rcfast', 'rcl', 'rcr', 'rcslow', 'rdbyte', 'rdlong',
- 'rdword', 'reboot', 'repeat', 'res', 'result', 'ret', 'return', 'rev',
- 'rol', 'ror', 'round', 'sar', 'shl', 'shr', 'spr', 'step', 'strcomp',
- 'string', 'strsize', 'sub', 'subabs', 'subs', 'subsx', 'subx', 'sumc',
- 'sumnc', 'sumnz', 'sumz', 'test', 'testn', 'tjnz', 'tjz', 'to', 'true',
- 'trunc', 'until', 'var', 'vcfg', 'vscl', 'waitcnt', 'waitpeq', 'waitpne',
- 'waitvid', 'wc', 'while', 'word', 'wordfill', 'wordmove', 'wr', 'wrbyte',
- 'wrlong', 'wz', 'xinput', 'xor', 'xtal1', 'xtal2', 'xtal3'
-})
-
--- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+lex:add_rule('number', token(lexer.NUMBER, flt + int))
-- Operators.
-local punct = S('+-/*<>~!&=^|?:.()[]@#\\')
-local dec = P('--')
-local inc = P('++')
-local sqrt = P('^^')
-local abs = P('||')
-local sexw = P('~~')
-local decode = P('|<')
-local encode = P('>|')
-local objref = P('@@')
-local assign = P(':=')
-local plus_a = P('+=')
-local minus_a = P('-=')
-local mull_a = P('*=')
-local div_a = P('/=')
-local mulu = P('**')
-local mulu_a = P('**=')
-local mod = P('//')
-local mod_a = P('//=')
-local limmin = P('#>')
-local limmin_a = P('#>=')
-local limmax = P('<#')
-local limmax_a = P('<#=')
-local sar = P('~>')
-local sar_a = P('~>=')
-local shl = P('<<')
-local shl_a = P('<<=')
-local shr = P('>>')
-local shr_a = P('>>=')
-local rol = P('<-')
-local rol_a = P('<-=')
-local ror = P('->')
-local ror_a = P('->=')
-local rev = P('><')
-local rev_a = P('><=')
-local band_a = P('&=')
-local bor_a = P('|=')
-local sand_a = P('and=')
-local sor_a = P('or=')
-local equal = P('==')
-local equal_a = P('===')
-local nequal = P('<>')
-local nequal_a = P('<>=')
-local less_a = P('<=')
-local greater_a = P('>=')
-local leq = P('=<')
-local leq_a = P('=<=')
-local geq = P('=>')
-local geq_a = P('=>=')
-local dots = P('..')
-local operator = token(l.OPERATOR, dec + inc + sqrt + abs + sexw +
- decode + encode + objref + assign + plus_a + minus_a + mull_a + div_a +
- mulu + mulu_a + mod + mod_a + limmin + limmin_a + limmax + limmax_a +
- sar + sar_a + shl + shl_a + shr + shr_a + rol + rol_a + ror + ror_a +
- rev + rev_a + band_a + bor_a + sand_a + sor_a + equal + equal_a +
- nequal + nequal_a + less_a + greater_a + leq + leq_a + geq + geq_a +
- dots + punct)
-
-M._rules = {
- {'whitespace', ws},
- {'comment', comment},
- {'keyword', keyword},
- {'number', number},
- {'operator', operator},
- {'identifier', identifier},
- {'string', string},
-}
+lex:add_rule('operator', token(lexer.OPERATOR,
+ P('--') + '++' + '^^' + '||' + '~~' + '|<' + '>|' + '@@' + ':=' + '+=' + '-=' + '*=' + '/=' + '**' +
+ '**=' + '//' + '//=' + '#>' + '#>=' + '<#' + '<#=' + '~>' + '~>=' + '<<' + '<<=' + '>>' + '>>=' +
+ '<-' + '<-=' + '->' + '->=' + '><' + '><=' + '&=' + '|=' + 'and=' + 'or=' + '==' + '===' + '<>' +
+ '<>=' + '<=' + '>=' + '=<' + '=<=' + '=>' + '=>=' + '..' + S('+-/*<>~!&=^|?:.()[]@#\\')))
-return M
+return lex
diff --git a/lua/lexers/sql.lua b/lua/lexers/sql.lua
index f280e85..0789210 100644
--- a/lua/lexers/sql.lua
+++ b/lua/lexers/sql.lua
@@ -1,79 +1,62 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- SQL LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'sql'}
+local lex = lexer.new('sql')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
--- Comments.
-local line_comment = (P('--') + '#') * l.nonnewline^0
-local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1
-local comment = token(l.COMMENT, line_comment + block_comment)
+-- Keywords.
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match({
+ 'add', 'all', 'alter', 'analyze', 'and', 'as', 'asc', 'asensitive', 'before', 'between', 'bigint',
+ 'binary', 'blob', 'both', 'by', 'call', 'cascade', 'case', 'change', 'char', 'character', 'check',
+ 'collate', 'column', 'condition', 'connection', 'constraint', 'continue', 'convert', 'create',
+ 'cross', 'current_date', 'current_time', 'current_timestamp', 'current_user', 'cursor',
+ 'database', 'databases', 'day_hour', 'day_microsecond', 'day_minute', 'day_second', 'dec',
+ 'decimal', 'declare', 'default', 'delayed', 'delete', 'desc', 'describe', 'deterministic',
+ 'distinct', 'distinctrow', 'div', 'double', 'drop', 'dual', 'each', 'else', 'elseif', 'enclosed',
+ 'escaped', 'exists', 'exit', 'explain', 'false', 'fetch', 'float', 'for', 'force', 'foreign',
+ 'from', 'fulltext', 'goto', 'grant', 'group', 'having', 'high_priority', 'hour_microsecond',
+ 'hour_minute', 'hour_second', 'if', 'ignore', 'in', 'index', 'infile', 'inner', 'inout',
+ 'insensitive', 'insert', 'int', 'integer', 'interval', 'into', 'is', 'iterate', 'join', 'key',
+ 'keys', 'kill', 'leading', 'leave', 'left', 'like', 'limit', 'lines', 'load', 'localtime',
+ 'localtimestamp', 'lock', 'long', 'longblob', 'longtext', 'loop', 'low_priority', 'match',
+ 'mediumblob', 'mediumint', 'mediumtext', 'middleint', 'minute_microsecond', 'minute_second',
+ 'mod', 'modifies', 'natural', 'not', 'no_write_to_binlog', 'null', 'numeric', 'on', 'optimize',
+ 'option', 'optionally', 'or', 'order', 'out', 'outer', 'outfile', 'precision', 'primary',
+ 'procedure', 'purge', 'read', 'reads', 'real', 'references', 'regexp', 'rename', 'repeat',
+ 'replace', 'require', 'restrict', 'return', 'revoke', 'right', 'rlike', 'schema', 'schemas',
+ 'second_microsecond', 'select', 'sensitive', 'separator', 'set', 'show', 'smallint', 'soname',
+ 'spatial', 'specific', 'sql', 'sqlexception', 'sqlstate', 'sqlwarning', 'sql_big_result',
+ 'sql_calc_found_rows', 'sql_small_result', 'ssl', 'starting', 'straight_join', 'table',
+ 'terminated', 'text', 'then', 'tinyblob', 'tinyint', 'tinytext', 'to', 'trailing', 'trigger',
+ 'true', 'undo', 'union', 'unique', 'unlock', 'unsigned', 'update', 'usage', 'use', 'using',
+ 'utc_date', 'utc_time', 'utc_timestamp', 'values', 'varbinary', 'varchar', 'varcharacter',
+ 'varying', 'when', 'where', 'while', 'with', 'write', 'xor', 'year_month', 'zerofill'
+}, true)))
--- Strings.
-local sq_str = l.delimited_range("'")
-local dq_str = l.delimited_range('"')
-local bt_str = l.delimited_range('`')
-local string = token(l.STRING, sq_str + dq_str + bt_str)
+-- Identifiers.
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
--- Numbers.
-local number = token(l.NUMBER, l.float + l.integer)
+-- Strings.
+local sq_str = lexer.range("'")
+local dq_str = lexer.range('"')
+local bq_str = lexer.range('`')
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str + bq_str))
--- Keywords.
-local keyword = token(l.KEYWORD, word_match({
- 'add', 'all', 'alter', 'analyze', 'and', 'as', 'asc', 'asensitive', 'before',
- 'between', 'bigint', 'binary', 'blob', 'both', 'by', 'call', 'cascade',
- 'case', 'change', 'char', 'character', 'check', 'collate', 'column',
- 'condition', 'connection', 'constraint', 'continue', 'convert', 'create',
- 'cross', 'current_date', 'current_time', 'current_timestamp', 'current_user',
- 'cursor', 'database', 'databases', 'day_hour', 'day_microsecond',
- 'day_minute', 'day_second', 'dec', 'decimal', 'declare', 'default', 'delayed',
- 'delete', 'desc', 'describe', 'deterministic', 'distinct', 'distinctrow',
- 'div', 'double', 'drop', 'dual', 'each', 'else', 'elseif', 'enclosed',
- 'escaped', 'exists', 'exit', 'explain', 'false', 'fetch', 'float', 'for',
- 'force', 'foreign', 'from', 'fulltext', 'goto', 'grant', 'group', 'having',
- 'high_priority', 'hour_microsecond', 'hour_minute', 'hour_second', 'if',
- 'ignore', 'in', 'index', 'infile', 'inner', 'inout', 'insensitive', 'insert',
- 'int', 'integer', 'interval', 'into', 'is', 'iterate', 'join', 'key', 'keys',
- 'kill', 'leading', 'leave', 'left', 'like', 'limit', 'lines', 'load',
- 'localtime', 'localtimestamp', 'lock', 'long', 'longblob', 'longtext', 'loop',
- 'low_priority', 'match', 'mediumblob', 'mediumint', 'mediumtext', 'middleint',
- 'minute_microsecond', 'minute_second', 'mod', 'modifies', 'natural', 'not',
- 'no_write_to_binlog', 'null', 'numeric', 'on', 'optimize', 'option',
- 'optionally', 'or', 'order', 'out', 'outer', 'outfile', 'precision',
- 'primary', 'procedure', 'purge', 'read', 'reads', 'real', 'references',
- 'regexp', 'rename', 'repeat', 'replace', 'require', 'restrict', 'return',
- 'revoke', 'right', 'rlike', 'schema', 'schemas', 'second_microsecond',
- 'select', 'sensitive', 'separator', 'set', 'show', 'smallint', 'soname',
- 'spatial', 'specific', 'sql', 'sqlexception', 'sqlstate', 'sqlwarning',
- 'sql_big_result', 'sql_calc_found_rows', 'sql_small_result', 'ssl',
- 'starting', 'straight_join', 'table', 'terminated', 'text', 'then',
- 'tinyblob', 'tinyint', 'tinytext', 'to', 'trailing', 'trigger', 'true',
- 'undo', 'union', 'unique', 'unlock', 'unsigned', 'update', 'usage', 'use',
- 'using', 'utc_date', 'utc_time', 'utc_timestamp', 'values', 'varbinary',
- 'varchar', 'varcharacter', 'varying', 'when', 'where', 'while', 'with',
- 'write', 'xor', 'year_month', 'zerofill'
-}, nil, true))
+-- Comments.
+local line_comment = lexer.to_eol(P('--') + '#')
+local block_comment = lexer.range('/*', '*/')
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
--- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+-- Numbers.
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-- Operators.
-local operator = token(l.OPERATOR, S(',()'))
-
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'identifier', identifier},
- {'string', string},
- {'comment', comment},
- {'number', number},
- {'operator', operator},
-}
+lex:add_rule('operator', token(lexer.OPERATOR, S(',()')))
-return M
+return lex
diff --git a/lua/lexers/strace.lua b/lua/lexers/strace.lua
index 3547dd2..846c4fc 100644
--- a/lua/lexers/strace.lua
+++ b/lua/lexers/strace.lua
@@ -1,34 +1,35 @@
-- Copyright 2017-2021 Marc André Tanner. See LICENSE.
-- strace(1) output lexer
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
local S, B = lpeg.S, lpeg.B
-local M = {_NAME = 'strace'}
-
-local ws = token(l.WHITESPACE, l.space^1)
-local string = token(l.STRING, l.delimited_range('"', true) + l.delimited_range("'", true))
-local number = token(l.NUMBER, l.float + l.integer)
-local constant = token(l.CONSTANT, (l.upper + '_') * (l.upper + l.digit + '_')^0)
-local syscall = token(l.KEYWORD, l.starts_line(l.word))
-local operator = token(l.OPERATOR, S('+-/*%<>~!=^&|?~:;,.()[]{}'))
-local comment = token(l.COMMENT, l.nested_pair('/*', '*/') + (l.delimited_range('()') * l.newline))
-local result = token(l.TYPE, B(' = ') * l.integer)
-local identifier = token(l.IDENTIFIER, l.word)
-
-M._rules = {
- {'whitespace', ws},
- {'syscall', syscall},
- {'constant', constant},
- {'string', string},
- {'comment', comment},
- {'result', result},
- {'identifier', identifier},
- {'number', number},
- {'operator', operator},
-}
-
-M._LEXBYLINE = true
-
-return M
+local lex = lexer.new('strace', {lex_by_line = true})
+
+-- Whitespace
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
+
+-- Syscall
+lex:add_rule('syscall', token(lexer.KEYWORD, lexer.starts_line(lexer.word)))
+
+-- Upper case constants
+lex:add_rule('constant', token(lexer.CONSTANT,
+ (lexer.upper + '_') * (lexer.upper + lexer.digit + '_')^0))
+
+-- Single and double quoted strings
+local sq_str = lexer.range("'", true)
+local dq_str = lexer.range('"', true)
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
+
+-- Comments and text in parentheses at the line end
+local comment = lexer.range('/*', '*/')
+local description = lexer.range('(', ')') * lexer.newline
+lex:add_rule('comment', token(lexer.COMMENT, comment + description))
+
+lex:add_rule('result', token(lexer.TYPE, B(' = ') * lexer.integer))
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
+lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer))
+lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>~!=^&|?~:;,.()[]{}')))
+
+return lex
diff --git a/lua/lexers/systemd.lua b/lua/lexers/systemd.lua
index 5f9e618..8557a70 100644
--- a/lua/lexers/systemd.lua
+++ b/lua/lexers/systemd.lua
@@ -1,443 +1,133 @@
--- Copyright 2016 Christian Hesse
+-- Copyright 2016-2022 Christian Hesse. See LICENSE.
-- systemd unit file LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'systemd'}
+local lex = lexer.new('systemd', {lex_by_line = true})
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local comment = token(l.COMMENT, l.starts_line(S(';#')) * l.nonnewline^0)
-
--- Strings.
-local sq_str = l.delimited_range("'")
-local dq_str = l.delimited_range('"')
-local section_word = word_match{
- 'Automount',
- 'BusName',
- 'Install',
- 'Mount',
- 'Path',
- 'Service',
- 'Service',
- 'Socket',
- 'Timer',
- 'Unit'
-}
-local string = token(l.STRING, sq_str + dq_str + '[' * section_word * ']')
-
--- Numbers.
-local dec = l.digit^1 * ('_' * l.digit^1)^0
-local oct_num = '0' * S('01234567_')^1
-local integer = S('+-')^-1 * (l.hex_num + oct_num + dec)
-local number = token(l.NUMBER, (l.float + integer))
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match({
- -- boolean values
- 'true',
- 'false',
- 'on',
- 'off',
- 'yes',
- 'no',
-
- -- service types
- 'forking',
- 'simple',
- 'oneshot',
- 'dbus',
- 'notify',
- 'idle',
-
- -- special system units
- 'basic.target',
- 'ctrl-alt-del.target',
- 'cryptsetup.target',
- 'dbus.service',
- 'dbus.socket',
- 'default.target',
- 'display-manager.service',
- 'emergency.target',
- 'exit.target',
- 'final.target',
- 'getty.target',
- 'graphical.target',
- 'hibernate.target',
- 'hybrid-sleep.target',
- 'halt.target',
- 'initrd-fs.target',
- 'kbrequest.target',
- 'kexec.target',
- 'local-fs.target',
- 'multi-user.target',
- 'network-online.target',
- 'paths.target',
- 'poweroff.target',
- 'reboot.target',
- 'remote-fs.target',
- 'rescue.target',
- 'initrd-root-fs.target',
- 'runlevel2.target',
- 'runlevel3.target',
- 'runlevel4.target',
- 'runlevel5.target',
- 'shutdown.target',
- 'sigpwr.target',
- 'sleep.target',
- 'slices.target',
- 'sockets.target',
- 'suspend.target',
- 'swap.target',
- 'sysinit.target',
- 'syslog.socket',
- 'system-update.target',
- 'timers.target',
- 'umount.target',
-
- -- special system units for devices
- 'bluetooth.target',
- 'printer.target',
- 'smartcard.target',
- 'sound.target',
-
- -- special passive system units
- 'cryptsetup-pre.target',
- 'local-fs-pre.target',
- 'network.target',
- 'network-pre.target',
- 'nss-lookup.target',
- 'nss-user-lookup.target',
- 'remote-fs-pre.target',
- 'rpcbind.target',
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ -- Boolean values.
+ 'true', 'false', 'on', 'off', 'yes', 'no',
+ -- Service types.
+ 'forking', 'simple', 'oneshot', 'dbus', 'notify', 'idle',
+ -- Special system units.
+ 'basic.target', 'ctrl-alt-del.target', 'cryptsetup.target', 'dbus.service', 'dbus.socket',
+ 'default.target', 'display-manager.service', 'emergency.target', 'exit.target', 'final.target',
+ 'getty.target', 'graphical.target', 'hibernate.target', 'hybrid-sleep.target', 'halt.target',
+ 'initrd-fs.target', 'kbrequest.target', 'kexec.target', 'local-fs.target', 'multi-user.target',
+ 'network-online.target', 'paths.target', 'poweroff.target', 'reboot.target', 'remote-fs.target',
+ 'rescue.target', 'initrd-root-fs.target', 'runlevel2.target', 'runlevel3.target',
+ 'runlevel4.target', 'runlevel5.target', 'shutdown.target', 'sigpwr.target', 'sleep.target',
+ 'slices.target', 'sockets.target', 'suspend.target', 'swap.target', 'sysinit.target',
+ 'syslog.socket', 'system-update.target', 'timers.target', 'umount.target',
+ -- Special system units for devices.
+ 'bluetooth.target', 'printer.target', 'smartcard.target', 'sound.target',
+ -- Special passive system units.
+ 'cryptsetup-pre.target', 'local-fs-pre.target', 'network.target', 'network-pre.target',
+ 'nss-lookup.target', 'nss-user-lookup.target', 'remote-fs-pre.target', 'rpcbind.target',
'time-sync.target',
-
- -- specail slice units
- '-.slice',
- 'system.slice',
- 'user.slice',
- 'machine.slice',
-
- -- environment variables
- 'PATH',
- 'LANG',
- 'USER',
- 'LOGNAME',
- 'HOME',
- 'SHELL',
- 'XDG_RUNTIME_DIR',
- 'XDG_SESSION_ID',
- 'XDG_SEAT',
- 'XDG_VTNR',
- 'MAINPID',
- 'MANAGERPID',
- 'LISTEN_FDS',
- 'LISTEN_PID',
- 'LISTEN_FDNAMES',
- 'NOTIFY_SOCKET',
- 'WATCHDOG_PID',
- 'WATCHDOG_USEC',
- 'TERM'
-}, '.-'))
+ -- Specail slice units.
+ '-.slice', 'system.slice', 'user.slice', 'machine.slice',
+ -- Environment variables.
+ 'PATH', 'LANG', 'USER', 'LOGNAME', 'HOME', 'SHELL', 'XDG_RUNTIME_DIR', 'XDG_SESSION_ID',
+ 'XDG_SEAT', 'XDG_VTNR', 'MAINPID', 'MANAGERPID', 'LISTEN_FDS', 'LISTEN_PID', 'LISTEN_FDNAMES',
+ 'NOTIFY_SOCKET', 'WATCHDOG_PID', 'WATCHDOG_USEC', 'TERM'
+}))
-- Options.
-local option_word = word_match{
- -- unit section options
- 'Description',
- 'Documentation',
- 'Requires',
- 'Requisite',
- 'Wants',
- 'BindsTo',
- 'PartOf',
- 'Conflicts',
- 'Before',
- 'After',
- 'OnFailure',
- 'PropagatesReloadTo',
- 'ReloadPropagatedFrom',
- 'JoinsNamespaceOf',
- 'RequiresMountsFor',
- 'OnFailureJobMode',
- 'IgnoreOnIsolate',
- 'StopWhenUnneeded',
- 'RefuseManualStart',
- 'RefuseManualStop',
- 'AllowIsolate',
- 'DefaultDependencies',
- 'JobTimeoutSec',
- 'JobTimeoutAction',
- 'JobTimeoutRebootArgument',
- 'StartLimitInterval',
- 'StartLimitBurst',
- 'StartLimitAction',
- 'RebootArgument',
- 'ConditionArchitecture',
- 'ConditionVirtualization',
- 'ConditionHost',
- 'ConditionKernelCommandLine',
- 'ConditionSecurity',
- 'ConditionCapability',
- 'ConditionACPower',
- 'ConditionNeedsUpdate',
- 'ConditionFirstBoot',
- 'ConditionPathExists',
- 'ConditionPathExistsGlob',
- 'ConditionPathIsDirectory',
- 'ConditionPathIsSymbolicLink',
- 'ConditionPathIsMountPoint',
- 'ConditionPathIsReadWrite',
- 'ConditionDirectoryNotEmpty',
- 'ConditionFileNotEmpty',
- 'ConditionFileIsExecutable',
- 'AssertArchitecture',
- 'AssertVirtualization',
- 'AssertHost',
- 'AssertKernelCommandLine',
- 'AssertSecurity',
- 'AssertCapability',
- 'AssertACPower',
- 'AssertNeedsUpdate',
- 'AssertFirstBoot',
- 'AssertPathExists',
- 'AssertPathExistsGlob',
- 'AssertPathIsDirectory',
- 'AssertPathIsSymbolicLink',
- 'AssertPathIsMountPoint',
- 'AssertPathIsReadWrite',
- 'AssertDirectoryNotEmpty',
- 'AssertFileNotEmpty',
- 'AssertFileIsExecutable',
- 'SourcePath',
-
- -- install section options
- 'Alias',
- 'WantedBy',
- 'RequiredBy',
- 'Also',
- 'DefaultInstance',
-
- -- service section options
- 'Type',
- 'RemainAfterExit',
- 'GuessMainPID',
- 'PIDFile',
- 'BusName',
- 'BusPolicy',
- 'ExecStart',
- 'ExecStartPre',
- 'ExecStartPost',
- 'ExecReload',
- 'ExecStop',
- 'ExecStopPost',
- 'RestartSec',
- 'TimeoutStartSec',
- 'TimeoutStopSec',
- 'TimeoutSec',
- 'RuntimeMaxSec',
- 'WatchdogSec',
- 'Restart',
- 'SuccessExitStatus',
- 'RestartPreventExitStatus',
- 'RestartForceExitStatus',
- 'PermissionsStartOnly',
- 'RootDirectoryStartOnly',
- 'NonBlocking',
- 'NotifyAccess',
- 'Sockets',
- 'FailureAction',
- 'FileDescriptorStoreMax',
- 'USBFunctionDescriptors',
- 'USBFunctionStrings',
-
- -- socket section options
- 'ListenStream',
- 'ListenDatagram',
- 'ListenSequentialPacket',
- 'ListenFIFO',
- 'ListenSpecial',
- 'ListenNetlink',
- 'ListenMessageQueue',
- 'ListenUSBFunction',
- 'SocketProtocol',
- 'BindIPv6Only',
- 'Backlog',
- 'BindToDevice',
- 'SocketUser',
- 'SocketGroup',
- 'SocketMode',
- 'DirectoryMode',
- 'Accept',
- 'Writable',
- 'MaxConnections',
- 'KeepAlive',
- 'KeepAliveTimeSec',
- 'KeepAliveIntervalSec',
- 'KeepAliveProbes',
- 'NoDelay',
- 'Priority',
- 'DeferAcceptSec',
- 'ReceiveBuffer',
- 'SendBuffer',
- 'IPTOS',
- 'IPTTL',
- 'Mark',
- 'ReusePort',
- 'SmackLabel',
- 'SmackLabelIPIn',
- 'SmackLabelIPOut',
- 'SELinuxContextFromNet',
- 'PipeSize',
- 'MessageQueueMaxMessages',
- 'MessageQueueMessageSize',
- 'FreeBind',
- 'Transparent',
- 'Broadcast',
- 'PassCredentials',
- 'PassSecurity',
- 'TCPCongestion',
- 'ExecStartPre',
- 'ExecStartPost',
- 'ExecStopPre',
- 'ExecStopPost',
- 'TimeoutSec',
- 'Service',
- 'RemoveOnStop',
- 'Symlinks',
- 'FileDescriptorName',
+lex:add_rule('option', token(lexer.PREPROCESSOR, word_match{
+ -- Unit section.
+ 'Description', 'Documentation', 'Requires', 'Requisite', 'Wants', 'BindsTo', 'PartOf',
+ 'Conflicts', 'Before', 'After', 'OnFailure', 'PropagatesReloadTo', 'ReloadPropagatedFrom',
+ 'JoinsNamespaceOf', 'RequiresMountsFor', 'OnFailureJobMode', 'IgnoreOnIsolate',
+ 'StopWhenUnneeded', 'RefuseManualStart', 'RefuseManualStop', 'AllowIsolate',
+ 'DefaultDependencies', 'JobTimeoutSec', 'JobTimeoutAction', 'JobTimeoutRebootArgument',
+ 'StartLimitInterval', 'StartLimitBurst', 'StartLimitAction', 'RebootArgument',
+ 'ConditionArchitecture', 'ConditionVirtualization', 'ConditionHost', 'ConditionKernelCommandLine',
+ 'ConditionSecurity', 'ConditionCapability', 'ConditionACPower', 'ConditionNeedsUpdate',
+ 'ConditionFirstBoot', 'ConditionPathExists', 'ConditionPathExistsGlob',
+ 'ConditionPathIsDirectory', 'ConditionPathIsSymbolicLink', 'ConditionPathIsMountPoint',
+ 'ConditionPathIsReadWrite', 'ConditionDirectoryNotEmpty', 'ConditionFileNotEmpty',
+ 'ConditionFileIsExecutable', 'AssertArchitecture', 'AssertVirtualization', 'AssertHost',
+ 'AssertKernelCommandLine', 'AssertSecurity', 'AssertCapability', 'AssertACPower',
+ 'AssertNeedsUpdate', 'AssertFirstBoot', 'AssertPathExists', 'AssertPathExistsGlob',
+ 'AssertPathIsDirectory', 'AssertPathIsSymbolicLink', 'AssertPathIsMountPoint',
+ 'AssertPathIsReadWrite', 'AssertDirectoryNotEmpty', 'AssertFileNotEmpty',
+ 'AssertFileIsExecutable', 'SourcePath',
+ -- Install section.
+ 'Alias', 'WantedBy', 'RequiredBy', 'Also', 'DefaultInstance',
+ -- Service section.
+ 'Type', 'RemainAfterExit', 'GuessMainPID', 'PIDFile', 'BusName', 'BusPolicy', 'ExecStart',
+ 'ExecStartPre', 'ExecStartPost', 'ExecReload', 'ExecStop', 'ExecStopPost', 'RestartSec',
+ 'TimeoutStartSec', 'TimeoutStopSec', 'TimeoutSec', 'RuntimeMaxSec', 'WatchdogSec', 'Restart',
+ 'SuccessExitStatus', 'RestartPreventExitStatus', 'RestartForceExitStatus', 'PermissionsStartOnly',
+ 'RootDirectoryStartOnly', 'NonBlocking', 'NotifyAccess', 'Sockets', 'FailureAction',
+ 'FileDescriptorStoreMax', 'USBFunctionDescriptors', 'USBFunctionStrings',
+ -- Socket section.
+ 'ListenStream', 'ListenDatagram', 'ListenSequentialPacket', 'ListenFIFO', 'ListenSpecial',
+ 'ListenNetlink', 'ListenMessageQueue', 'ListenUSBFunction', 'SocketProtocol', 'BindIPv6Only',
+ 'Backlog', 'BindToDevice', 'SocketUser', 'SocketGroup', 'SocketMode', 'DirectoryMode', 'Accept',
+ 'Writable', 'MaxConnections', 'KeepAlive', 'KeepAliveTimeSec', 'KeepAliveIntervalSec',
+ 'KeepAliveProbes', 'NoDelay', 'Priority', 'DeferAcceptSec', 'ReceiveBuffer', 'SendBuffer',
+ 'IPTOS', 'IPTTL', 'Mark', 'ReusePort', 'SmackLabel', 'SmackLabelIPIn', 'SmackLabelIPOut',
+ 'SELinuxContextFromNet', 'PipeSize', 'MessageQueueMaxMessages', 'MessageQueueMessageSize',
+ 'FreeBind', 'Transparent', 'Broadcast', 'PassCredentials', 'PassSecurity', 'TCPCongestion',
+ 'ExecStartPre', 'ExecStartPost', 'ExecStopPre', 'ExecStopPost', 'TimeoutSec', 'Service',
+ 'RemoveOnStop', 'Symlinks', 'FileDescriptorName',
+ -- Mount section.
+ 'What', 'Where', 'Type', 'Options', 'SloppyOptions', 'DirectoryMode', 'TimeoutSec',
+ -- Path section.
+ 'PathExists', 'PathExistsGlob', 'PathChanged', 'PathModified', 'DirectoryNotEmpty', 'Unit',
+ 'MakeDirectory', 'DirectoryMode',
+ -- Timer section.
+ 'OnActiveSec', 'OnBootSec', 'OnStartupSec', 'OnUnitActiveSec', 'OnUnitInactiveSec', 'OnCalendar',
+ 'AccuracySec', 'RandomizedDelaySec', 'Unit', 'Persistent', 'WakeSystem', 'RemainAfterElapse',
+ -- Exec section.
+ 'WorkingDirectory', 'RootDirectory', 'User', 'Group', 'SupplementaryGroups', 'Nice',
+ 'OOMScoreAdjust', 'IOSchedulingClass', 'IOSchedulingPriority', 'CPUSchedulingPolicy',
+ 'CPUSchedulingPriority', 'CPUSchedulingResetOnFork', 'CPUAffinity', 'UMask', 'Environment',
+ 'EnvironmentFile', 'PassEnvironment', 'StandardInput', 'StandardOutput', 'StandardError',
+ 'TTYPath', 'TTYReset', 'TTYVHangup', 'TTYVTDisallocate', 'SyslogIdentifier', 'SyslogFacility',
+ 'SyslogLevel', 'SyslogLevelPrefix', 'TimerSlackNSec', 'LimitCPU', 'LimitFSIZE', 'LimitDATA',
+ 'LimitSTACK', 'LimitCORE', 'LimitRSS', 'LimitNOFILE', 'LimitAS', 'LimitNPROC', 'LimitMEMLOCK',
+ 'LimitLOCKS', 'LimitSIGPENDING', 'LimitMSGQUEUE', 'LimitNICE', 'LimitRTPRIO', 'LimitRTTIME',
+ 'PAMName', 'CapabilityBoundingSet', 'AmbientCapabilities', 'SecureBits', 'Capabilities',
+ 'ReadWriteDirectories', 'ReadOnlyDirectories', 'InaccessibleDirectories', 'PrivateTmp',
+ 'PrivateDevices', 'PrivateNetwork', 'ProtectSystem', 'ProtectHome', 'MountFlags',
+ 'UtmpIdentifier', 'UtmpMode', 'SELinuxContext', 'AppArmorProfile', 'SmackProcessLabel',
+ 'IgnoreSIGPIPE', 'NoNewPrivileges', 'SystemCallFilter', 'SystemCallErrorNumber',
+ 'SystemCallArchitectures', 'RestrictAddressFamilies', 'Personality', 'RuntimeDirectory',
+ 'RuntimeDirectoryMode'
+}))
- -- mount section options
- 'What',
- 'Where',
- 'Type',
- 'Options',
- 'SloppyOptions',
- 'DirectoryMode',
- 'TimeoutSec',
+-- Identifiers.
+lex:add_rule('identifier', token(lexer.IDENTIFIER, (lexer.alpha + '_') * (lexer.alnum + S('_.'))^0))
- -- path section options
- 'PathExists',
- 'PathExistsGlob',
- 'PathChanged',
- 'PathModified',
- 'DirectoryNotEmpty',
- 'Unit',
- 'MakeDirectory',
- 'DirectoryMode',
+-- Strings.
+local sq_str = lexer.range("'")
+local dq_str = lexer.range('"')
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
- -- timer section options
- 'OnActiveSec',
- 'OnBootSec',
- 'OnStartupSec',
- 'OnUnitActiveSec',
- 'OnUnitInactiveSec',
- 'OnCalendar',
- 'AccuracySec',
- 'RandomizedDelaySec',
- 'Unit',
- 'Persistent',
- 'WakeSystem',
- 'RemainAfterElapse',
+-- Sections.
+lex:add_rule('section', token(lexer.LABEL, '[' *
+ word_match('Automount BusName Install Mount Path Service Service Socket Timer Unit') * ']'))
- -- exec section options
- 'WorkingDirectory',
- 'RootDirectory',
- 'User',
- 'Group',
- 'SupplementaryGroups',
- 'Nice',
- 'OOMScoreAdjust',
- 'IOSchedulingClass',
- 'IOSchedulingPriority',
- 'CPUSchedulingPolicy',
- 'CPUSchedulingPriority',
- 'CPUSchedulingResetOnFork',
- 'CPUAffinity',
- 'UMask',
- 'Environment',
- 'EnvironmentFile',
- 'PassEnvironment',
- 'StandardInput',
- 'StandardOutput',
- 'StandardError',
- 'TTYPath',
- 'TTYReset',
- 'TTYVHangup',
- 'TTYVTDisallocate',
- 'SyslogIdentifier',
- 'SyslogFacility',
- 'SyslogLevel',
- 'SyslogLevelPrefix',
- 'TimerSlackNSec',
- 'LimitCPU',
- 'LimitFSIZE',
- 'LimitDATA',
- 'LimitSTACK',
- 'LimitCORE',
- 'LimitRSS',
- 'LimitNOFILE',
- 'LimitAS',
- 'LimitNPROC',
- 'LimitMEMLOCK',
- 'LimitLOCKS',
- 'LimitSIGPENDING',
- 'LimitMSGQUEUE',
- 'LimitNICE',
- 'LimitRTPRIO',
- 'LimitRTTIME',
- 'PAMName',
- 'CapabilityBoundingSet',
- 'AmbientCapabilities',
- 'SecureBits',
- 'Capabilities',
- 'ReadWriteDirectories',
- 'ReadOnlyDirectories',
- 'InaccessibleDirectories',
- 'PrivateTmp',
- 'PrivateDevices',
- 'PrivateNetwork',
- 'ProtectSystem',
- 'ProtectHome',
- 'MountFlags',
- 'UtmpIdentifier',
- 'UtmpMode',
- 'SELinuxContext',
- 'AppArmorProfile',
- 'SmackProcessLabel',
- 'IgnoreSIGPIPE',
- 'NoNewPrivileges',
- 'SystemCallFilter',
- 'SystemCallErrorNumber',
- 'SystemCallArchitectures',
- 'RestrictAddressFamilies',
- 'Personality',
- 'RuntimeDirectory',
- 'RuntimeDirectoryMode'
-}
-local preproc = token(l.PREPROCESSOR, option_word)
+-- Comments.
+lex:add_rule('comment', token(lexer.COMMENT, lexer.starts_line(lexer.to_eol(S(';#')))))
--- Identifiers.
-local word = (l.alpha + '_') * (l.alnum + S('_.'))^0
-local identifier = token(l.IDENTIFIER, word)
+-- Numbers.
+local dec = lexer.digit^1 * ('_' * lexer.digit^1)^0
+local oct_num = '0' * S('01234567_')^1
+local integer = S('+-')^-1 * (lexer.hex_num + oct_num + dec)
+lex:add_rule('number', token(lexer.NUMBER, lexer.float + integer))
-- Operators.
-local operator = token(l.OPERATOR, '=')
-
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'string', string},
- {'preproc', preproc},
- {'identifier', identifier},
- {'comment', comment},
- {'number', number},
- {'operator', operator},
-}
-
-M._LEXBYLINE = true
+lex:add_rule('operator', token(lexer.OPERATOR, '='))
-return M
+return lex
diff --git a/lua/lexers/taskpaper.lua b/lua/lexers/taskpaper.lua
index 3fb2632..4d77285 100644
--- a/lua/lexers/taskpaper.lua
+++ b/lua/lexers/taskpaper.lua
@@ -1,59 +1,39 @@
--- Copyright (c) 2016-2017 Larry Hynes. See LICENSE.
+-- Copyright (c) 2016-2022 Larry Hynes. See LICENSE.
-- Taskpaper LPeg lexer
-local l = require('lexer')
-local token = l.token
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token = lexer.token
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'taskpaper'}
+local lex = lexer.new('taskpaper', {lex_by_line = true})
local delimiter = P(' ') + P('\t')
--- Whitespace
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Tags
-local day_tag = token('day_tag', (P('@today') + P('@tomorrow')))
-
-local overdue_tag = token('overdue_tag', P('@overdue'))
-
-local plain_tag = token('plain_tag', P('@') * l.word)
-
-local extended_tag = token('extended_tag',
- P('@') * l.word * P('(') *
- (l.word + R('09') + P('-'))^1 * P(')'))
-
--- Projects
-local project = token('project',
- l.nested_pair(l.starts_line(l.alnum), ':') * l.newline)
-
--- Notes
-local note = token('note', delimiter^1 * l.alnum * l.nonnewline^0)
-
--- Tasks
-local task = token('task', delimiter^1 * P('-') + l.newline)
-
-M._rules = {
- {'note', note},
- {'task', task},
- {'project', project},
- {'extended_tag', extended_tag},
- {'day_tag', day_tag},
- {'overdue_tag', overdue_tag},
- {'plain_tag', plain_tag},
- {'whitespace', ws},
-}
-
-M._tokenstyles = {
- note = l.STYLE_CONSTANT,
- task = l.STYLE_FUNCTION,
- project = l.STYLE_TAG,
- extended_tag = l.STYLE_COMMENT,
- day_tag = l.STYLE_CLASS,
- overdue_tag = l.STYLE_PREPROCESSOR,
- plain_tag = l.STYLE_COMMENT,
-}
-
-M._LEXBYLINE = true
-
-return M
+-- Notes.
+lex:add_rule('note', token('note', delimiter^1 * lexer.to_eol(lexer.alnum)))
+lex:add_style('note', lexer.styles.constant)
+
+-- Tasks.
+lex:add_rule('task', token('task', delimiter^1 * '-' + lexer.newline))
+lex:add_style('task', lexer.styles['function'])
+
+-- Whitespace.
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
+
+-- Projects.
+lex:add_rule('project',
+ token('project', lexer.range(lexer.starts_line(lexer.alnum), ':') * lexer.newline))
+lex:add_style('project', lexer.styles.label)
+
+-- Tags.
+lex:add_rule('extended_tag', token('extended_tag', '@' * lexer.word * '(' *
+ (lexer.word + lexer.digit + '-')^1 * ')'))
+lex:add_style('extended_tag', lexer.styles.comment)
+lex:add_rule('day_tag', token('day_tag', (P('@today') + '@tomorrow')))
+lex:add_style('day_tag', lexer.styles.class)
+lex:add_rule('overdue_tag', token('overdue_tag', '@overdue'))
+lex:add_style('overdue_tag', lexer.styles.preprocessor)
+lex:add_rule('plain_tag', token('plain_tag', '@' * lexer.word))
+lex:add_style('plain_tag', lexer.styles.comment)
+
+return lex
diff --git a/lua/lexers/tcl.lua b/lua/lexers/tcl.lua
index 665cbd5..8686795 100644
--- a/lua/lexers/tcl.lua
+++ b/lua/lexers/tcl.lua
@@ -1,59 +1,46 @@
--- Copyright 2014-2017 Joshua Krämer. See LICENSE.
+-- Copyright 2014-2022 Joshua Krämer. See LICENSE.
-- Tcl LPeg lexer.
-- This lexer follows the TCL dodekalogue (http://wiki.tcl.tk/10259).
-- It is based on the previous lexer by Mitchell.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'tcl'}
+local lex = lexer.new('tcl')
-- Whitespace.
-local whitespace = token(l.WHITESPACE, l.space^1)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
--- Separator (semicolon).
-local separator = token(l.CLASS, P(';'))
+-- Comment.
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#' * P(function(input, index)
+ local i = index - 2
+ while i > 0 and input:find('^[ \t]', i) do i = i - 1 end
+ if i < 1 or input:find('^[\r\n;]', i) then return index end
+end))))
--- Delimiters.
-local braces = token(l.KEYWORD, S('{}'))
-local quotes = token(l.FUNCTION, '"')
-local brackets = token(l.VARIABLE, S('[]'))
+-- Separator (semicolon).
+lex:add_rule('separator', token(lexer.CLASS, ';'))
-- Argument expander.
-local expander = token(l.LABEL, P('{*}'))
+lex:add_rule('expander', token(lexer.LABEL, '{*}'))
+
+-- Delimiters.
+lex:add_rule('braces', token(lexer.KEYWORD, S('{}')))
+lex:add_rule('quotes', token(lexer.FUNCTION, '"'))
+lex:add_rule('brackets', token(lexer.VARIABLE, S('[]')))
-- Variable substitution.
-local variable = token(l.STRING, '$' * (l.alnum + '_' + P(':')^2)^0)
+lex:add_rule('variable', token(lexer.STRING, '$' * (lexer.alnum + '_' + P(':')^2)^0))
-- Backslash substitution.
-local backslash = token(l.TYPE, '\\' * ((l.digit * l.digit^-2) +
- ('x' * l.xdigit^1) + ('u' * l.xdigit * l.xdigit^-3) +
- ('U' * l.xdigit * l.xdigit^-7) + P(1)))
+local oct = lexer.digit * lexer.digit^-2
+local hex = 'x' * lexer.xdigit^1
+local unicode = 'u' * lexer.xdigit * lexer.xdigit^-3
+lex:add_rule('backslash', token(lexer.TYPE, '\\' * (oct + hex + unicode + 1)))
--- Comment.
-local comment = token(l.COMMENT, '#' * P(function(input, index)
- local i = index - 2
- while i > 0 and input:find('^[ \t]', i) do i = i - 1 end
- if i < 1 or input:find('^[\r\n;]', i) then return index end
-end) * l.nonnewline^0)
-
-M._rules = {
- {'whitespace', whitespace},
- {'comment', comment},
- {'separator', separator},
- {'expander', expander},
- {'braces', braces},
- {'quotes', quotes},
- {'brackets', brackets},
- {'variable', variable},
- {'backslash', backslash},
-}
-
-M._foldsymbols = {
- _patterns = {'[{}]', '#'},
- [l.KEYWORD] = {['{'] = 1, ['}'] = -1},
- [l.COMMENT] = {['#'] = l.fold_line_comments('#')}
-}
-
-return M
+-- Fold points.
+lex:add_fold_point(lexer.KEYWORD, '{', '}')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('#'))
+
+return lex
diff --git a/lua/lexers/tex.lua b/lua/lexers/tex.lua
index 2c25d73..8388c22 100644
--- a/lua/lexers/tex.lua
+++ b/lua/lexers/tex.lua
@@ -1,45 +1,32 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- Plain TeX LPeg lexer.
-- Modified by Robert Gieseke.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'tex'}
+local lex = lexer.new('tex')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Comments.
-local comment = token(l.COMMENT, '%' * l.nonnewline^0)
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('%')))
-- TeX environments.
-local environment = token('environment', '\\' * (P('begin') + 'end') * l.word)
+lex:add_rule('environment', token('environment', '\\' * (P('begin') + 'end') * lexer.word))
+lex:add_style('environment', lexer.styles.keyword)
-- Commands.
-local command = token(l.KEYWORD, '\\' * (l.alpha^1 + S('#$&~_^%{}')))
+lex:add_rule('command', token(lexer.KEYWORD, '\\' * (lexer.alpha^1 + S('#$&~_^%{}'))))
-- Operators.
-local operator = token(l.OPERATOR, S('$&#{}[]'))
-
-M._rules = {
- {'whitespace', ws},
- {'comment', comment},
- {'environment', environment},
- {'keyword', command},
- {'operator', operator},
-}
-
-M._tokenstyles = {
- environment = l.STYLE_KEYWORD
-}
-
-M._foldsymbols = {
- _patterns = {'\\begin', '\\end', '[{}]', '%%'},
- [l.COMMENT] = {['%'] = l.fold_line_comments('%')},
- ['environment'] = {['\\begin'] = 1, ['\\end'] = -1},
- [l.OPERATOR] = {['{'] = 1, ['}'] = -1}
-}
-
-return M
+lex:add_rule('operator', token(lexer.OPERATOR, S('$&#{}[]')))
+
+-- Fold points.
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('%'))
+lex:add_fold_point('environment', '\\begin', '\\end')
+lex:add_fold_point(lexer.OPERATOR, '{', '}')
+
+return lex
diff --git a/lua/lexers/texinfo.lua b/lua/lexers/texinfo.lua
index b19f131..68ddb69 100644
--- a/lua/lexers/texinfo.lua
+++ b/lua/lexers/texinfo.lua
@@ -1,15 +1,14 @@
--- Copyright 2014-2017 stef@ailleurs.land. See LICENSE.
+-- Copyright 2014-2022 stef@ailleurs.land. See LICENSE.
-- Plain Texinfo version 5.2 LPeg lexer
-- Freely inspired from Mitchell work and valuable help from him too !
--- Directives are processed (more or less) in the Reference Card Texinfo order
--- Reference Card page for each directive group is in comment for reference
+-- Directives are processed (more or less) in the Reference Card Texinfo order Reference Card
+-- page for each directive group is in comment for reference
--[[
Note: Improving Fold Points use with Texinfo
-At the very beginning of your Texinfo file, it could be wised to insert theses
-alias :
+At the very beginning of your Texinfo file, it could be wised to insert theses alias :
@alias startchapter = comment
@alias endchapter = comment
@@ -21,51 +20,94 @@ Then use this to begin each chapter :
@startchapter ------------------------------------------------------------------
With the use of Scintilla's `SCI_FOLDALL(SC_FOLDACTION_TOGGLE)` or Textadept's
-`buffer:fold_all(buffer.FOLDACTION_TOGGLE)`, you have then a nice chapter
-folding, useful with large documents.
+`buffer:fold_all(buffer.FOLDACTION_TOGGLE)`, you have then a nice chapter folding, useful with
+large documents.
]]
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'texinfo'}
-
--------------------------------------------------------------------------------
--- Common processing
--------------------------------------------------------------------------------
+local lex = lexer.new('texinfo')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local line_comment = '@c' * l.nonnewline_esc^0
---local line_comment_long = '@comment' * l.nonnewline_esc^0
-local block_comment = '@ignore' * (l.any - '@end ignore')^0 *
- P('@end ignore')^-1
-local comment = token(l.COMMENT, line_comment + block_comment)
-
--- Strings.
-local string = token(l.STRING, l.delimited_range('{}', false, true, true))
-
--- Numbers.
-local number = token(l.NUMBER, l.float + l.integer)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
--- Identifiers
-local identifier = token(l.IDENTIFIER, l.word)
+-- Directives.
+local directives_base = word_match({
+ 'end',
+ -- Custom keywords for chapter folding
+ 'startchapter', 'endchapter',
+ -- List and tables (page 2, column 2)
+ 'itemize', 'enumerate',
+ -- Beginning a Texinfo document (page 1, column 1)
+ 'titlepage', 'copying',
+ -- Block environments (page 2, column 1)
+ 'cartouche',
+ -- Block environments > Displays using fixed-width fonts (page 2, column 2)
+ 'example', 'smallexample',
+ -- List and tables (page 2, column 2)
+ 'multitable',
+ -- Floating Displays (page 2, column 3)
+ 'float', 'listoffloats', 'caption', 'shortcaption', 'image',
+ -- Floating Displays > Footnotes (page 2, column 3)
+ 'footnote', 'footnotestyle',
+ -- Conditionally (in)visible text > Output formats (page 3, column 3)
+ 'ifdocbook', 'ifhtml', 'ifinfo', 'ifplaintext', 'iftex', 'ifxml', 'ifnotdocbook', 'ifnothtml',
+ 'ifnotplaintext', 'ifnottex', 'ifnotxml', 'ifnotinfo', 'inlinefmt', 'inlinefmtifelse',
+ -- Conditionally (in)visible text > Raw formatter text (page 4, column 1)
+ 'docbook', 'html', 'tex', 'xml', 'inlineraw',
+ -- Conditionally (in)visible text > Documents variables (page 4, column 1)
+ 'set', 'clear', 'value', 'ifset', 'ifclear', 'inlineifset', 'inlineifclear',
+ -- Conditionally (in)visible text > Testing for commands (page 4, column 1)
+ 'ifcommanddefined', 'ifcommandnotdefined', 'end',
+ -- Defining new Texinfo commands (page 4, column 1)
+ 'alias', 'macro', 'unmacro', 'definfounclose',
+ -- File inclusion (page 4, column 1)
+ 'include', 'verbatiminclude',
+ -- Formatting and headers footers for TeX (page 4, column 1)
+ 'allowcodebreaks', 'finalout', 'fonttextsize',
+ -- Formatting and headers footers for TeX > paper size (page 4, column 2)
+ 'smallbook', 'afourpaper', 'afivepaper', 'afourlatex', 'afourwide', 'pagesizes',
+ -- Formatting and headers footers for TeX > Page headers and footers (page 4, column 2)
+ -- not implemented
+ -- Document preferences (page 4, column 2)
+ -- not implemented
+ -- Ending a Texinfo document (page 4, column 2)
+ 'bye'
+}, true)
+lex:add_rule('directive', token('directives', ('@end' * lexer.space^1 + '@') * directives_base))
+lex:add_style('directives', lexer.styles['function'])
--------------------------------------------------------------------------------
--- Common keywords
--------------------------------------------------------------------------------
+-- Chapters.
+local chapters_base = word_match({
+ -- Chapter structuring (page 1, column 2)
+ 'lowersections', 'raisesections', 'part',
+ -- Chapter structuring > Numbered, included in contents (page 1, column 2)
+ 'chapter', 'centerchap',
+ -- Chapter structuring > Context-dependent, included in contents (page 1, column 2)
+ 'section', 'subsection', 'subsubsection',
+ -- Chapter structuring > Unumbered, included in contents (page 1, column 2)
+ 'unnumbered', 'unnumberedsec', 'unnumberedsubsec', 'unnumberedsubsection', 'unnumberedsubsubsec',
+ 'unnumberedsubsubsection',
+ -- Chapter structuring > Letter and numbered, included in contents (page 1, column 2)
+ 'appendix', 'appendixsec', 'appendixsection', 'appendixsubsec', 'appendixsubsection',
+ 'appendixsubsubsec', 'appendixsubsubsection',
+ -- Chapter structuring > Unumbered, not included in contents, no new page (page 1, column 3)
+ 'chapheading', 'majorheading', 'heading', 'subheading', 'subsubheading'
+}, true)
+lex:add_rule('chapter', token('chapters', ('@end' * lexer.space^1 + '@') * chapters_base))
+lex:add_style('chapters', lexer.styles.class)
+-- Common keywords.
local keyword_base = word_match({
'end',
-- Beginning a Texinfo document (page 1, column 1)
'setfilename', 'settitle', 'insertcopying',
-- Beginning a Texinfo document > Internationlization (page 1, column 1)
'documentencoding', 'documentlanguage', 'frenchspacing',
- -- Beginning a Texinfo document > Info directory specification and HTML
- -- document description (page 1, column 1)
+ -- Beginning a Texinfo document > Info directory specification and HTML document description
+ -- (page 1, column 1)
'dircategory', 'direntry', 'documentdescription',
-- Beginning a Texinfo document > Titre pages (page 1, column 1)
'shorttitlepage', 'center', 'titlefont', 'title', 'subtitle', 'author',
@@ -83,8 +125,7 @@ local keyword_base = word_match({
-- Marking text > Markup for regular text (page 1, column 3)
'var', 'dfn', 'acronym', 'abbr',
-- Marking text > Markup for litteral text (page 1, column 3)
- 'code', 'file', 'command', 'env', 'option', 'kbd', 'key', 'email',
- 'indicateurl', 'samp', 'verb',
+ 'code', 'file', 'command', 'env', 'option', 'kbd', 'key', 'email', 'indicateurl', 'samp', 'verb',
-- Marking text > GUI sequences (page 2, column 1)
'clicksequence', 'click', 'clickstyle', 'arrow',
-- Marking text > Math (page 2, column 1)
@@ -93,33 +134,26 @@ local keyword_base = word_match({
'sc', 'r', 'i', 'slanted', 'b', 'sansserif', 't',
-- Block environments (page 2, column 1)
'noindent', 'indent', 'exdent',
- -- Block environments > Normally filled displays using regular text fonts
- -- (page 2, column 1)
- 'quotation', 'smallquotation', 'indentedblock', 'smallindentedblock',
- 'raggedright',
- -- Block environments > Line-for-line displays using regular test fonts (page
- -- 2, column 2)
+ -- Block environments > Normally filled displays using regular text fonts (page 2, column 1)
+ 'quotation', 'smallquotation', 'indentedblock', 'smallindentedblock', 'raggedright',
+ -- Block environments > Line-for-line displays using regular test fonts (page 2, column 2)
'format', 'smallformat', 'display', 'smalldisplay', 'flushleft', 'flushright',
-- Block environments > Displays using fixed-width fonts (page 2, column 2)
'lisp', 'smalllisp', 'verbatim',
-- List and tables (page 2, column 2)
- 'table', 'ftable', 'vtable', 'tab', 'item', 'itemx', 'headitem',
- 'headitemfont', 'asis',
+ 'table', 'ftable', 'vtable', 'tab', 'item', 'itemx', 'headitem', 'headitemfont', 'asis',
-- Indices (page 2, column 3)
- 'cindex', 'findex', 'vindex', 'kindex', 'pindex', 'tindex', 'defcodeindex',
- 'syncodeindex', 'synindex', 'printindex',
- -- Insertions within a paragraph > Characters special to Texinfo (page 2,
- -- column 3)
+ 'cindex', 'findex', 'vindex', 'kindex', 'pindex', 'tindex', 'defcodeindex', 'syncodeindex',
+ 'synindex', 'printindex',
+ -- Insertions within a paragraph > Characters special to Texinfo (page 2, column 3)
'@', '{', '}', 'backslashcar', 'comma', 'hashcar', ':', '.', '?', '!', 'dmn',
-- Insertions within a paragraph > Accents (page 3, column 1)
-- not implemented
-- Insertions within a paragraph > Non-English characters (page 3, column 1)
-- not implemented
- -- Insertions within a paragraph > Other text characters an logos (page 3,
- -- column 1)
- 'bullet', 'dots', 'enddots', 'euro', 'pounds', 'textdegree', 'copyright',
- 'registeredsymbol', 'TeX', 'LaTeX', 'today',
- 'guillemetleft', 'guillementright', 'guillemotleft', 'guillemotright',
+ -- Insertions within a paragraph > Other text characters an logos (page 3, column 1)
+ 'bullet', 'dots', 'enddots', 'euro', 'pounds', 'textdegree', 'copyright', 'registeredsymbol',
+ 'TeX', 'LaTeX', 'today', 'guillemetleft', 'guillementright', 'guillemotleft', 'guillemotright',
-- Insertions within a paragraph > Glyphs for code examples (page 3, column 2)
'equiv', 'error', 'expansion', 'point', 'print', 'result',
-- Making and preventing breaks (page 3, column 2)
@@ -128,143 +162,44 @@ local keyword_base = word_match({
'sp', 'page', 'need', 'group', 'vskip'
-- Definition commands (page 3, column 2)
-- not implemented
-}, nil, true)
-
-local keyword = token(l.KEYWORD, ('@end' * l.space^1 + '@') * keyword_base)
-
--------------------------------------------------------------------------------
--- Chapter structuring Keywords
--------------------------------------------------------------------------------
-
-local chapters_base = word_match({
- -- Chapter structuring (page 1, column 2)
- 'lowersections', 'raisesections', 'part',
- -- Chapter structuring > Numbered, included in contents (page 1, column 2)
- 'chapter', 'centerchap',
- -- Chapter structuring > Context-dependent, included in contents (page 1,
- -- column 2)
- 'section', 'subsection', 'subsubsection',
- -- Chapter structuring > Unumbered, included in contents (page 1, column 2)
- 'unnumbered', 'unnumberedsec', 'unnumberedsubsec', 'unnumberedsubsection',
- 'unnumberedsubsubsec', 'unnumberedsubsubsection',
- -- Chapter structuring > Letter and numbered, included in contents (page 1,
- -- column 2)
- 'appendix', 'appendixsec', 'appendixsection', 'appendixsubsec',
- 'appendixsubsection', 'appendixsubsubsec', 'appendixsubsubsection',
- -- Chapter structuring > Unumbered, not included in contents, no new page
- -- (page 1, column 3)
- 'chapheading', 'majorheading', 'heading', 'subheading', 'subsubheading'
-}, nil, true)
-
-local chapters = token('chapters', ('@end' * l.space^1 + '@') * chapters_base)
-
--------------------------------------------------------------------------------
--- Directives Keywords
--------------------------------------------------------------------------------
-
-local directives_base = word_match({
- 'end',
- -- Custom keywords for chapter folding
- 'startchapter', 'endchapter',
- -- List and tables (page 2, column 2)
- 'itemize', 'enumerate',
- -- Beginning a Texinfo document (page 1, column 1)
- 'titlepage', 'copying',
- -- Block environments (page 2, column 1)
- 'cartouche',
- -- Block environments > Displays using fixed-width fonts (page 2, column 2)
- 'example', 'smallexample',
- -- List and tables (page 2, column 2)
- 'multitable',
- -- Floating Displays (page 2, column 3)
- 'float', 'listoffloats', 'caption', 'shortcaption', 'image',
- -- Floating Displays > Footnotes (page 2, column 3)
- 'footnote', 'footnotestyle',
- -- Conditionally (in)visible text > Output formats (page 3, column 3)
- 'ifdocbook', 'ifhtml', 'ifinfo', 'ifplaintext', 'iftex', 'ifxml',
- 'ifnotdocbook', 'ifnothtml', 'ifnotplaintext',
- 'ifnottex', 'ifnotxml', 'ifnotinfo', 'inlinefmt', 'inlinefmtifelse',
- -- Conditionally (in)visible text > Raw formatter text (page 4, column 1)
- 'docbook', 'html', 'tex', 'xml', 'inlineraw',
- -- Conditionally (in)visible text > Documents variables (page 4, column 1)
- 'set', 'clear', 'value', 'ifset', 'ifclear', 'inlineifset', 'inlineifclear',
- -- Conditionally (in)visible text > Testing for commands (page 4, column 1)
- 'ifcommanddefined', 'ifcommandnotdefined', 'end',
- -- Defining new Texinfo commands (page 4, column 1)
- 'alias', 'macro', 'unmacro', 'definfounclose',
- -- File inclusion (page 4, column 1)
- 'include', 'verbatiminclude',
- -- Formatting and headers footers for TeX (page 4, column 1)
- 'allowcodebreaks', 'finalout', 'fonttextsize',
- -- Formatting and headers footers for TeX > paper size (page 4, column 2)
- 'smallbook', 'afourpaper', 'afivepaper', 'afourlatex', 'afourwide',
- 'pagesizes',
- -- Formatting and headers footers for TeX > Page headers and footers (page 4,
- -- column 2)
- -- not implemented
- -- Document preferences (page 4, column 2)
- -- not implemented
- -- Ending a Texinfo document (page 4, column 2)
- 'bye'
-}, nil, case_insensitive_tags)
-
-local directives = token('directives',
- ('@end' * l.space^1 + '@') * directives_base)
-
--------------------------------------------------------------------------------
--- Special keywords
--------------------------------------------------------------------------------
+}, true)
+lex:add_rule('keyword', token(lexer.KEYWORD, ('@end' * lexer.space^1 + '@') * keyword_base))
-- Italics
-local emph = token('emph', '@emph' * l.delimited_range('{}', false, true, true))
+local nested_braces = lexer.range('{', '}', false, false, true)
+lex:add_rule('emph', token('emph', '@emph' * nested_braces))
+lex:add_style('emph', lexer.styles.string .. {italics = true})
-- Bold
-local strong = token('strong',
- '@strong' * l.delimited_range('{}', false, true, true))
-
--------------------------------------------------------------------------------
--- Rules, Tokenstyles and Folds points
--------------------------------------------------------------------------------
-
-M._rules = {
- {'whitespace', ws},
- {'directives', directives},
- {'chapters', chapters},
- {'keyword', keyword},
- {'emph', emph},
- {'strong', strong},
- {'identifier', identifier},
- {'string', string},
- {'number', number},
- {'comment', comment},
-}
+lex:add_rule('strong', token('strong', '@strong' * nested_braces))
+lex:add_style('strong', lexer.styles.string .. {bold = true})
-M._tokenstyles = {
- directives = l.STYLE_FUNCTION,
- chapters = l.STYLE_CLASS,
- emph = l.STYLE_STRING..',italics',
- strong = l.STYLE_STRING..',bold'
-}
-
-M._foldsymbols = {
- _patterns = {'@end %l+', '@%l+'},
- directives = {
- ['@titlepage'] = 1, ['@end titlepage'] = -1,
- ['@copying'] = 1, ['@end copying'] = -1,
-
- ['@ifset'] = 1, ['@end ifset'] = -1,
- ['@tex'] = 1, ['@end tex'] = -1,
-
- ['@itemize'] = 1, ['@end itemize'] = -1,
- ['@enumerate'] = 1, ['@end enumerate'] = -1,
- ['@multitable'] = 1, ['@end multitable'] = -1,
+-- Identifiers
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
- ['@example'] = 1, ['@end example'] = -1,
- ['@smallexample'] = 1, ['@end smallexample'] = -1,
- ['@cartouche'] = 1, ['@end cartouche'] = -1,
+-- Strings.
+lex:add_rule('string', token(lexer.STRING, nested_braces))
- ['@startchapter'] = 1, ['@endchapter'] = -1,
- }
-}
+-- Numbers.
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-return M
+-- Comments.
+local line_comment = lexer.to_eol('@c', true)
+-- local line_comment_long = lexer.to_eol('@comment', true)
+local block_comment = lexer.range('@ignore', '@end ignore')
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
+
+-- Fold points.
+lex:add_fold_point('directives', '@titlepage', '@end titlepage')
+lex:add_fold_point('directives', '@copying', '@end copying')
+lex:add_fold_point('directives', '@ifset', '@end ifset')
+lex:add_fold_point('directives', '@tex', '@end tex')
+lex:add_fold_point('directives', '@itemize', '@end itemize')
+lex:add_fold_point('directives', '@enumerate', '@end enumerate')
+lex:add_fold_point('directives', '@multitable', '@end multitable')
+lex:add_fold_point('directives', '@example', '@end example')
+lex:add_fold_point('directives', '@smallexample', '@end smallexample')
+lex:add_fold_point('directives', '@cartouche', '@end cartouche')
+lex:add_fold_point('directives', '@startchapter', '@end startchapter')
+
+return lex
diff --git a/lua/lexers/text.lua b/lua/lexers/text.lua
index cc41bfa..cdf7c3d 100644
--- a/lua/lexers/text.lua
+++ b/lua/lexers/text.lua
@@ -1,15 +1,10 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- Text LPeg lexer.
-local l = require('lexer')
+local lexer = require('lexer')
-local M = {_NAME = 'text'}
+local lex = lexer.new('text')
--- Whitespace.
-local ws = l.token(l.WHITESPACE, l.space^1)
+lex:add_rule('whitespace', lexer.token(lexer.WHITESPACE, lexer.space^1))
-M._rules = {
- {'whitespace', ws},
-}
-
-return M
+return lex
diff --git a/lua/lexers/toml.lua b/lua/lexers/toml.lua
index 35629fc..ed61958 100644
--- a/lua/lexers/toml.lua
+++ b/lua/lexers/toml.lua
@@ -1,64 +1,48 @@
--- Copyright 2015-2017 Alejandro Baez (https://keybase.io/baez). See LICENSE.
+-- Copyright 2015-2022 Alejandro Baez (https://keybase.io/baez). See LICENSE.
-- TOML LPeg lexer.
-local l = require("lexer")
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require("lexer")
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'toml'}
+local lex = lexer.new('toml', {fold_by_indentation = true})
-- Whitespace
-local ws = token(l.WHITESPACE, S(' \t')^1 + l.newline^1)
-
--- Comments.
-local comment = token(l.COMMENT, '#' * l.nonnewline^0)
-
--- Strings.
-local string = token(l.STRING, l.delimited_range("'") + l.delimited_range('"'))
-
--- Numbers.
-local number = token(l.NUMBER, l.float + l.integer)
-
--- Datetime.
-local ts = token('timestamp', l.digit * l.digit * l.digit * l.digit * -- year
- '-' * l.digit * l.digit^-1 * -- month
- '-' * l.digit * l.digit^-1 * -- day
- ((S(' \t')^1 + S('tT'))^-1 * -- separator
- l.digit * l.digit^-1 * -- hour
- ':' * l.digit * l.digit * -- minute
- ':' * l.digit * l.digit * -- second
- ('.' * l.digit^0)^-1 * -- fraction
- ('Z' + -- timezone
- S(' \t')^0 * S('-+') * l.digit * l.digit^-1 *
- (':' * l.digit * l.digit)^-1)^-1)^-1)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, S(' \t')^1 + lexer.newline^1))
-- kewwords.
-local keyword = token(l.KEYWORD, word_match{
- 'true', 'false'
-})
-
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match('true false')))
-- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
--- Operators.
-local operator = token(l.OPERATOR, S('=+-,.{}[]()'))
+-- Strings.
+local sq_str = lexer.range("'")
+local dq_str = lexer.range('"')
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
+
+-- Comments.
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#')))
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'identifier', identifier},
- {'operator', operator},
- {'string', string},
- {'comment', comment},
- {'number', number},
- {'timestamp', ts},
-}
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S('=+-,.{}[]()')))
-M._tokenstyles = {
- timestamp = l.STYLE_NUMBER,
-}
+-- Datetime.
+local year = lexer.digit * lexer.digit * lexer.digit * lexer.digit
+local month = lexer.digit * lexer.digit^-1
+local day = lexer.digit * lexer.digit^-1
+local date = year * '-' * month * '-' * day
+local hours = lexer.digit * lexer.digit^-1
+local minutes = lexer.digit * lexer.digit
+local seconds = lexer.digit * lexer.digit
+local fraction = '.' * lexer.digit^0
+local time = hours * ':' * minutes * ':' * seconds * fraction^-1
+local T = S(' \t')^1 + S('tT')
+local zone = 'Z' + S(' \t')^0 * S('-+') * hours * (':' * minutes)^-1
+lex:add_rule('datetime', token('timestamp', date * (T * time * zone^-1)))
+lex:add_style('timestamp', lexer.styles.number)
-M._FOLDBYINDENTATION = true
+-- Numbers.
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-return M
+return lex
diff --git a/lua/lexers/txt2tags.lua b/lua/lexers/txt2tags.lua
new file mode 100644
index 0000000..9db5b80
--- /dev/null
+++ b/lua/lexers/txt2tags.lua
@@ -0,0 +1,146 @@
+-- Copyright 2019-2022 Julien L. See LICENSE.
+-- txt2tags LPeg lexer.
+-- (developed and tested with Txt2tags Markup Rules
+-- [https://txt2tags.org/doc/english/rules.t2t])
+-- Contributed by Julien L.
+
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
+local nonspace = lexer.any - lexer.space
+
+local lex = lexer.new('txt2tags')
+
+-- Whitespace.
+local ws = token(lexer.WHITESPACE, (lexer.space - lexer.newline)^1)
+
+-- Titles
+local alphanumeric = lexer.alnum + S('_-')
+local header_label = token('header_label_start', '[') * token('header_label', alphanumeric^1) *
+ token('header_label_end', ']')
+local function h(level)
+ local equal = string.rep('=', level) * (lexer.nonnewline - '=')^1 * string.rep('=', level)
+ local plus = string.rep('+', level) * (lexer.nonnewline - '+')^1 * string.rep('+', level)
+ return token('h' .. level, equal + plus) * header_label^-1
+end
+local header = h(5) + h(4) + h(3) + h(2) + h(1)
+
+-- Comments.
+local line_comment = lexer.to_eol(lexer.starts_line('%'))
+local block_comment = lexer.range(lexer.starts_line('%%%'))
+local comment = token(lexer.COMMENT, block_comment + line_comment)
+
+-- Inline.
+local function span(name, delimiter)
+ return token(name, (delimiter * nonspace * delimiter * S(delimiter)^0) +
+ (delimiter * nonspace * (lexer.nonnewline - nonspace * delimiter)^0 * nonspace * delimiter *
+ S(delimiter)^0))
+end
+local bold = span('bold', '**')
+local italic = span('italic', '//')
+local underline = span('underline', '__')
+local strike = span('strike', '--')
+local mono = span('mono', '``')
+local raw = span('raw', '""')
+local tagged = span('tagged', "''")
+local inline = bold + italic + underline + strike + mono + raw + tagged
+
+-- Link.
+local email = token('email',
+ (nonspace - '@')^1 * '@' * (nonspace - '.')^1 * ('.' * (nonspace - S('.?'))^1)^1 *
+ ('?' * nonspace^1)^-1)
+local host = token('host',
+ word_match('www ftp', true) * (nonspace - '.')^0 * '.' * (nonspace - '.')^1 * '.' *
+ (nonspace - S(',.'))^1)
+local url = token('url',
+ (nonspace - '://')^1 * '://' * (nonspace - ',' - '.')^1 * ('.' * (nonspace - S(',./?#'))^1)^1 *
+ ('/' * (nonspace - S('./?#'))^0 * ('.' * (nonspace - S(',.?#'))^1)^0)^0 *
+ ('?' * (nonspace - '#')^1)^-1 * ('#' * nonspace^0)^-1)
+local label_with_address = token('label_start', '[') * lexer.space^0 *
+ token('address_label', ((nonspace - ']')^1 * lexer.space^1)^1) *
+ token('address', (nonspace - ']')^1) * token('label_end', ']')
+local link = label_with_address + url + host + email
+
+-- Line.
+local line = token('line', S('-=_')^20)
+
+-- Image.
+local image_only = token('image_start', '[') * token('image', (nonspace - ']')^1) *
+ token('image_end', ']')
+local image_link = token('image_link_start', '[') * image_only *
+ token('image_link_sep', lexer.space^1) * token('image_link', (nonspace - ']')^1) *
+ token('image_link_end', ']')
+local image = image_link + image_only
+
+-- Macro.
+local macro = token('macro', '%%' * (nonspace - '(')^1 * lexer.range('(', ')', true)^-1)
+
+-- Verbatim.
+local verbatim_line = lexer.to_eol(lexer.starts_line('```') * S(' \t'))
+local verbatim_block = lexer.range(lexer.starts_line('```'))
+local verbatim_area = token('verbatim_area', verbatim_block + verbatim_line)
+
+-- Raw.
+local raw_line = lexer.to_eol(lexer.starts_line('"""') * S(' \t'))
+local raw_block = lexer.range(lexer.starts_line('"""'))
+local raw_area = token('raw_area', raw_block + raw_line)
+
+-- Tagged.
+local tagged_line = lexer.to_eol(lexer.starts_line('\'\'\'') * S(' \t'))
+local tagged_block = lexer.range(lexer.starts_line('\'\'\''))
+local tagged_area = token('tagged_area', tagged_block + tagged_line)
+
+-- Table.
+local table_sep = token('table_sep', '|')
+local cell_content = inline + link + image + macro + token('cell_content', lexer.nonnewline - ' |')
+local header_cell_content = token('header_cell_content', lexer.nonnewline - ' |')
+local field_sep = ' ' * table_sep^1 * ' '
+local table_row_end = P(' ')^0 * table_sep^0
+local table_row = lexer.starts_line(P(' ')^0 * table_sep) * cell_content^0 *
+ (field_sep * cell_content^0)^0 * table_row_end
+local table_row_header =
+ lexer.starts_line(P(' ')^0 * table_sep * table_sep) * header_cell_content^0 *
+ (field_sep * header_cell_content^0)^0 * table_row_end
+local table = table_row_header + table_row
+
+lex:add_rule('table', table)
+lex:add_rule('link', link)
+lex:add_rule('line', line)
+lex:add_rule('header', header)
+lex:add_rule('comment', comment)
+lex:add_rule('whitespace', ws)
+lex:add_rule('image', image)
+lex:add_rule('macro', macro)
+lex:add_rule('inline', inline)
+lex:add_rule('verbatim_area', verbatim_area)
+lex:add_rule('raw_area', raw_area)
+lex:add_rule('tagged_area', tagged_area)
+
+lex:add_style('line', {bold = true})
+local font_size = tonumber(lexer.property_expanded['style.default']:match('size:(%d+)')) or 10
+for n = 5, 1, -1 do
+ lex:add_style('h' .. n, {fore = lexer.colors.red, size = font_size + (6 - n)})
+end
+lex:add_style('header_label', lexer.styles.label)
+lex:add_style('email', {underlined = true})
+lex:add_style('host', {underlined = true})
+lex:add_style('url', {underlined = true})
+lex:add_style('address_label', lexer.styles.label)
+lex:add_style('address', {underlined = true})
+lex:add_style('image', {fore = lexer.colors.green})
+lex:add_style('image_link', {underlined = true})
+lex:add_style('macro', lexer.styles.preprocessor)
+lex:add_style('bold', {bold = true})
+lex:add_style('italic', {italics = true})
+lex:add_style('underline', {underlined = true})
+lex:add_style('strike', {italics = true}) -- a strike style is not available
+lex:add_style('mono', {font = 'mono'})
+lex:add_style('raw', {back = lexer.colors.grey})
+lex:add_style('tagged', lexer.styles.embedded)
+lex:add_style('verbatim_area', {font = 'mono'}) -- in consistency with mono
+lex:add_style('raw_area', {back = lexer.colors.grey}) -- in consistency with raw
+lex:add_style('tagged_area', lexer.styles.embedded) -- in consistency with tagged
+lex:add_style('table_sep', {fore = lexer.colors.green})
+lex:add_style('header_cell_content', {fore = lexer.colors.green})
+
+return lex
diff --git a/lua/lexers/typescript.lua b/lua/lexers/typescript.lua
new file mode 100644
index 0000000..414acf1
--- /dev/null
+++ b/lua/lexers/typescript.lua
@@ -0,0 +1,23 @@
+-- Copyright 2021-2022 Mitchell. See LICENSE.
+-- TypeScript LPeg lexer.
+
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
+
+local lex = lexer.new('typescript', {inherit = lexer.load('javascript')})
+
+-- Whitespace
+lex:modify_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
+
+-- Keywords.
+lex:modify_rule('keyword', token(lexer.KEYWORD, word_match[[
+ abstract as constructor declare is module namespace require type
+]]) + lex:get_rule('keyword'))
+
+-- Types.
+lex:modify_rule('type', token(lexer.TYPE, word_match[[
+ boolean number bigint string unknown any void never symbol object
+]]) + lex:get_rule('type'))
+
+return lex
diff --git a/lua/lexers/vala.lua b/lua/lexers/vala.lua
index 51e30fa..4ff50b0 100644
--- a/lua/lexers/vala.lua
+++ b/lua/lexers/vala.lua
@@ -1,75 +1,61 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- Vala LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'vala'}
+local lex = lexer.new('vala')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local line_comment = '//' * l.nonnewline_esc^0
-local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1
-local comment = token(l.COMMENT, line_comment + block_comment)
-
--- Strings.
-local sq_str = l.delimited_range("'", true)
-local dq_str = l.delimited_range('"', true)
-local tq_str = '"""' * (l.any - '"""')^0 * P('"""')^-1
-local ml_str = '@' * l.delimited_range('"', false, true)
-local string = token(l.STRING, tq_str + sq_str + dq_str + ml_str)
-
--- Numbers.
-local number = token(l.NUMBER, (l.float + l.integer) * S('uUlLfFdDmM')^-1)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- 'class', 'delegate', 'enum', 'errordomain', 'interface', 'namespace',
- 'signal', 'struct', 'using',
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'class', 'delegate', 'enum', 'errordomain', 'interface', 'namespace', 'signal', 'struct', 'using',
-- Modifiers.
- 'abstract', 'const', 'dynamic', 'extern', 'inline', 'out', 'override',
- 'private', 'protected', 'public', 'ref', 'static', 'virtual', 'volatile',
- 'weak',
+ 'abstract', 'const', 'dynamic', 'extern', 'inline', 'out', 'override', 'private', 'protected',
+ 'public', 'ref', 'static', 'virtual', 'volatile', 'weak',
-- Other.
- 'as', 'base', 'break', 'case', 'catch', 'construct', 'continue', 'default',
- 'delete', 'do', 'else', 'ensures', 'finally', 'for', 'foreach', 'get', 'if',
- 'in', 'is', 'lock', 'new', 'requires', 'return', 'set', 'sizeof', 'switch',
- 'this', 'throw', 'throws', 'try', 'typeof', 'value', 'var', 'void', 'while',
+ 'as', 'base', 'break', 'case', 'catch', 'construct', 'continue', 'default', 'delete', 'do',
+ 'else', 'ensures', 'finally', 'for', 'foreach', 'get', 'if', 'in', 'is', 'lock', 'new',
+ 'requires', 'return', 'set', 'sizeof', 'switch', 'this', 'throw', 'throws', 'try', 'typeof',
+ 'value', 'var', 'void', 'while',
-- Etc.
'null', 'true', 'false'
-})
+}))
-- Types.
-local type = token(l.TYPE, word_match{
- 'bool', 'char', 'double', 'float', 'int', 'int8', 'int16', 'int32', 'int64',
- 'long', 'short', 'size_t', 'ssize_t', 'string', 'uchar', 'uint', 'uint8',
- 'uint16', 'uint32', 'uint64', 'ulong', 'unichar', 'ushort'
-})
+lex:add_rule('type', token(lexer.TYPE, word_match{
+ 'bool', 'char', 'double', 'float', 'int', 'int8', 'int16', 'int32', 'int64', 'long', 'short',
+ 'size_t', 'ssize_t', 'string', 'uchar', 'uint', 'uint8', 'uint16', 'uint32', 'uint64', 'ulong',
+ 'unichar', 'ushort'
+}))
-- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
--- Operators.
-local operator = token(l.OPERATOR, S('+-/*%<>!=^&|?~:;.()[]{}'))
+-- Strings.
+local sq_str = lexer.range("'", true)
+local dq_str = lexer.range('"', true)
+local tq_str = lexer.range('"""')
+local ml_str = '@' * lexer.range('"', false, false)
+lex:add_rule('string', token(lexer.STRING, tq_str + sq_str + dq_str + ml_str))
+
+-- Comments.
+local line_comment = lexer.to_eol('//', true)
+local block_comment = lexer.range('/*', '*/')
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'type', type},
- {'identifier', identifier},
- {'string', string},
- {'comment', comment},
- {'number', number},
- {'operator', operator},
-}
+-- Numbers.
+lex:add_rule('number', token(lexer.NUMBER, lexer.number * S('uUlLfFdDmM')^-1))
+
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>!=^&|?~:;.()[]{}')))
-M._foldsymbols = {
- _patterns = {'[{}]', '/%*', '%*/', '//'},
- [l.OPERATOR] = {['{'] = 1, ['}'] = -1},
- [l.COMMENT] = {['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//')}
-}
+-- Fold points.
+lex:add_fold_point(lexer.OPERATOR, '{', '}')
+lex:add_fold_point(lexer.COMMENT, '/*', '*/')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('//'))
-return M
+return lex
diff --git a/lua/lexers/vb.lua b/lua/lexers/vb.lua
index e01e6d1..5289558 100644
--- a/lua/lexers/vb.lua
+++ b/lua/lexers/vb.lua
@@ -1,64 +1,63 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- VisualBasic LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'vb'}
+local lex = lexer.new('vb', {case_insensitive_fold_points = true})
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local comment = token(l.COMMENT, (P("'") + word_match({'rem'}, nil, true)) *
- l.nonnewline^0)
-
--- Strings.
-local string = token(l.STRING, l.delimited_range('"', true, true))
-
--- Numbers.
-local number = token(l.NUMBER, (l.float + l.integer) * S('LlUuFf')^-2)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match({
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match({
-- Control.
- 'If', 'Then', 'Else', 'ElseIf', 'While', 'Wend', 'For', 'To', 'Each',
- 'In', 'Step', 'Case', 'Select', 'Return', 'Continue', 'Do',
- 'Until', 'Loop', 'Next', 'With', 'Exit',
+ 'If', 'Then', 'Else', 'ElseIf', 'While', 'Wend', 'For', 'To', 'Each', 'In', 'Step', 'Case',
+ 'Select', 'Return', 'Continue', 'Do', 'Until', 'Loop', 'Next', 'With', 'Exit',
-- Operators.
'Mod', 'And', 'Not', 'Or', 'Xor', 'Is',
-- Storage types.
- 'Call', 'Class', 'Const', 'Dim', 'ReDim', 'Preserve', 'Function', 'Sub',
- 'Property', 'End', 'Set', 'Let', 'Get', 'New', 'Randomize', 'Option',
- 'Explicit', 'On', 'Error', 'Execute',
+ 'Call', 'Class', 'Const', 'Dim', 'ReDim', 'Preserve', 'Function', 'Sub', 'Property', 'End', 'Set',
+ 'Let', 'Get', 'New', 'Randomize', 'Option', 'Explicit', 'On', 'Error', 'Execute', 'Module',
-- Storage modifiers.
'Private', 'Public', 'Default',
-- Constants.
'Empty', 'False', 'Nothing', 'Null', 'True'
-}, nil, true))
+}, true)))
-- Types.
-local type = token(l.TYPE, word_match({
- 'Boolean', 'Byte', 'Char', 'Date', 'Decimal', 'Double', 'Long', 'Object',
- 'Short', 'Single', 'String'
-}, nil, true))
+lex:add_rule('type', token(lexer.TYPE, word_match(
+ 'Boolean Byte Char Date Decimal Double Long Object Short Single String', true)))
+
+-- Comments.
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol("'" + word_match('rem', true))))
-- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
--- Operators.
-local operator = token(l.OPERATOR, S('=><+-*^&:.,_()'))
+-- Strings.
+lex:add_rule('string', token(lexer.STRING, lexer.range('"', true, false)))
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'type', type},
- {'comment', comment},
- {'identifier', identifier},
- {'string', string},
- {'number', number},
- {'operator', operator},
-}
+-- Numbers.
+lex:add_rule('number', token(lexer.NUMBER, lexer.number * S('LlUuFf')^-2))
-return M
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S('=><+-*^&:.,_()')))
+
+-- Fold points.
+lex:add_fold_point(lexer.KEYWORD, 'If', 'End If')
+lex:add_fold_point(lexer.KEYWORD, 'Select', 'End Select')
+lex:add_fold_point(lexer.KEYWORD, 'For', 'Next')
+lex:add_fold_point(lexer.KEYWORD, 'While', 'End While')
+lex:add_fold_point(lexer.KEYWORD, 'While', 'Wend')
+lex:add_fold_point(lexer.KEYWORD, 'Do', 'Loop')
+lex:add_fold_point(lexer.KEYWORD, 'With', 'End With')
+lex:add_fold_point(lexer.KEYWORD, 'Sub', 'End Sub')
+lex:add_fold_point(lexer.KEYWORD, 'Function', 'End Function')
+lex:add_fold_point(lexer.KEYWORD, 'Property', 'End Property')
+lex:add_fold_point(lexer.KEYWORD, 'Module', 'End Module')
+lex:add_fold_point(lexer.KEYWORD, 'Class', 'End Class')
+lex:add_fold_point(lexer.KEYWORD, 'Try', 'End Try')
+
+return lex
diff --git a/lua/lexers/vbscript.lua b/lua/lexers/vbscript.lua
index e906a25..69e3dca 100644
--- a/lua/lexers/vbscript.lua
+++ b/lua/lexers/vbscript.lua
@@ -14,7 +14,7 @@ local ws = token(l.WHITESPACE, l.space^1)
local comment = token(l.COMMENT, (P("'") + word_match({'rem'}, nil, true)) * l.nonnewline^0)
-- Strings.
-local string = token(l.STRING, l.delimited_range('"', true, true))
+local string = token(l.STRING, l.range('"', true, true))
-- Numbers.
local number = token(l.NUMBER, (l.float + l.integer) * S('LlUuFf')^-2)
diff --git a/lua/lexers/vcard.lua b/lua/lexers/vcard.lua
index f19473e..2ee82ba 100644
--- a/lua/lexers/vcard.lua
+++ b/lua/lexers/vcard.lua
@@ -1,97 +1,71 @@
--- Copyright (c) 2015-2017 Piotr Orzechowski [drzewo.org]. See LICENSE.
+-- Copyright (c) 2015-2022 Piotr Orzechowski [drzewo.org]. See LICENSE.
-- vCard 2.1, 3.0 and 4.0 LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'vcard'}
+local lex = lexer.new('vcard')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
+
+-- Begin vCard, end vCard.
+lex:add_rule('begin_sequence', token(lexer.KEYWORD, 'BEGIN') * token(lexer.OPERATOR, ':') *
+ token(lexer.COMMENT, 'VCARD'))
+lex:add_rule('end_sequence', token(lexer.KEYWORD, 'END') * token(lexer.OPERATOR, ':') *
+ token(lexer.COMMENT, 'VCARD'))
+
+-- vCard version (in v3.0 and v4.0 must appear immediately after BEGIN:VCARD).
+lex:add_rule('version_sequence', token(lexer.KEYWORD, 'VERSION') * token(lexer.OPERATOR, ':') *
+ token(lexer.CONSTANT, lexer.digit^1 * ('.' * lexer.digit^1)^-1))
-- Required properties.
-local required_property = token(l.KEYWORD, word_match({
- 'BEGIN', 'END', 'FN', 'N' --[[ Not required in v4.0. ]], 'VERSION'
-}, nil, true)) * #P(':')
+local required_property = token(lexer.KEYWORD, word_match({
+ 'BEGIN', 'END', 'FN', 'VERSION', --
+ 'N' -- Not required in v4.0.
+}, true)) * #P(':')
+lex:add_rule('required_property', required_property)
-- Supported properties.
-local supported_property = token(l.TYPE, word_match({
- 'ADR', 'AGENT' --[[ Not supported in v4.0. ]],
- 'ANNIVERSARY' --[[ Supported in v4.0 only. ]], 'BDAY',
- 'CALADRURI' --[[ Supported in v4.0 only. ]],
- 'CALURI' --[[ Supported in v4.0 only. ]], 'CATEGORIES',
- 'CLASS' --[[ Supported in v3.0 only. ]],
- 'CLIENTPIDMAP' --[[ Supported in v4.0 only. ]], 'EMAIL', 'END',
- 'FBURL' --[[ Supported in v4.0 only. ]],
- 'GENDER' --[[ Supported in v4.0 only. ]], 'GEO',
- 'IMPP' --[[ Not supported in v2.1. ]], 'KEY',
- 'KIND' --[[ Supported in v4.0 only. ]],
- 'LABEL' --[[ Not supported in v4.0. ]],
- 'LANG' --[[ Supported in v4.0 only. ]], 'LOGO',
- 'MAILER' --[[ Not supported in v4.0. ]],
- 'MEMBER' --[[ Supported in v4.0 only. ]],
- 'NAME' --[[ Supported in v3.0 only. ]],
- 'NICKNAME' --[[ Not supported in v2.1. ]], 'NOTE', 'ORG', 'PHOTO',
- 'PRODID' --[[ Not supported in v2.1. ]],
- 'PROFILE' --[[ Not supported in v4.0. ]],
- 'RELATED' --[[ Supported in v4.0 only. ]], 'REV', 'ROLE',
- 'SORT-STRING' --[[ Not supported in v4.0. ]], 'SOUND', 'SOURCE', 'TEL',
- 'TITLE', 'TZ', 'UID', 'URL', 'XML' --[[ Supported in v4.0 only. ]]
-}, nil, true)) * #S(':;')
+local supported_property = token(lexer.TYPE, word_match({
+ 'ADR', 'BDAY', 'CATEGORIES', 'EMAIL', 'END', 'GEO', 'KEY', 'LOGO', 'NOTE', 'ORG', 'PHOTO', 'REV',
+ 'ROLE', 'SOUND', 'SOURCE', 'TEL', 'TITLE', 'TZ', 'UID', 'URL',
+ -- Supported in v4.0 only.
+ 'ANNIVERSARY', 'CALADRURI', 'CALURI', 'CLIENTPIDMAP', 'FBURL', 'GENDER', 'KIND', 'LANG', 'MEMBER',
+ 'RELATED', 'XML',
+ -- Not supported in v4.0.
+ 'AGENT', 'LABEL', 'MAILER', 'PROFILE', 'SORT-STRING',
+ -- Supported in v3.0 only.
+ 'CLASS', 'NAME',
+ -- Not supported in v2.1.
+ 'IMPP', 'NICKNAME', 'PRODID'
+}, true)) * #S(':;')
+lex:add_rule('supported_property', supported_property)
-local identifier = l.alpha^1 * l.digit^0 * (P('-') * l.alnum^1)^0
+-- Group and property.
+local identifier = lexer.alpha^1 * lexer.digit^0 * ('-' * lexer.alnum^1)^0
+local property = required_property + supported_property +
+ lexer.token(lexer.TYPE, S('xX') * '-' * identifier) * #S(':;')
+lex:add_rule('group_sequence', token(lexer.CONSTANT, lexer.starts_line(identifier)) *
+ token(lexer.OPERATOR, '.') * property)
-- Extension.
-local extension = token(l.TYPE,
- l.starts_line(S('xX') * P('-') * identifier * #S(':;')))
+lex:add_rule('extension',
+ token(lexer.TYPE, lexer.starts_line(S('xX') * '-' * identifier * #S(':;'))))
-- Parameter.
-local parameter = token(l.IDENTIFIER, l.starts_line(identifier * #S(':='))) +
- token(l.STRING, identifier) * #S(':=')
+local parameter = (token(lexer.IDENTIFIER, lexer.starts_line(identifier)) +
+ token(lexer.STRING, identifier)) * #S(':=')
+lex:add_rule('parameter', parameter)
-- Operators.
-local operator = token(l.OPERATOR, S('.:;='))
-
--- Group and property.
-local group_sequence = token(l.CONSTANT, l.starts_line(identifier)) *
- token(l.OPERATOR, P('.')) *
- (required_property + supported_property +
- l.token(l.TYPE, S('xX') * P('-') * identifier) *
- #S(':;'))
--- Begin vCard, end vCard.
-local begin_sequence = token(l.KEYWORD, P('BEGIN')) *
- token(l.OPERATOR, P(':')) * token(l.COMMENT, P('VCARD'))
-local end_sequence = token(l.KEYWORD, P('END')) * token(l.OPERATOR, P(':')) *
- token(l.COMMENT, P('VCARD'))
-
--- vCard version (in v3.0 and v4.0 must appear immediately after BEGIN:VCARD).
-local version_sequence = token(l.KEYWORD, P('VERSION')) *
- token(l.OPERATOR, P(':')) *
- token(l.CONSTANT, l.digit^1 * (P('.') * l.digit^1)^-1)
+lex:add_rule('operator', token(lexer.OPERATOR, S('.:;=')))
-- Data.
-local data = token(l.IDENTIFIER, l.any)
-
--- Rules.
-M._rules = {
- {'whitespace', ws},
- {'begin_sequence', begin_sequence},
- {'end_sequence', end_sequence},
- {'version_sequence', version_sequence},
- {'group_sequence', group_sequence},
- {'required_property', required_property},
- {'supported_property', supported_property},
- {'extension', extension},
- {'parameter', parameter},
- {'operator', operator},
- {'data', data},
-}
+lex:add_rule('data', token(lexer.IDENTIFIER, lexer.any))
--- Folding.
-M._foldsymbols = {
- _patterns = {'BEGIN', 'END'},
- [l.KEYWORD] = {['BEGIN'] = 1, ['END'] = -1}
-}
+-- Fold points.
+lex:add_fold_point(lexer.KEYWORD, 'BEGIN', 'END')
-return M
+return lex
diff --git a/lua/lexers/verilog.lua b/lua/lexers/verilog.lua
index 946098e..d7cb74b 100644
--- a/lua/lexers/verilog.lua
+++ b/lua/lexers/verilog.lua
@@ -1,101 +1,88 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- Verilog LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'verilog'}
+local lex = lexer.new('verilog')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local line_comment = '//' * l.nonnewline^0
-local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1
-local comment = token(l.COMMENT, line_comment + block_comment)
-
--- Strings.
-local string = token(l.STRING, l.delimited_range('"'))
-
--- Numbers.
-local bin_suffix = S('bB') * S('01_xXzZ')^1
-local oct_suffix = S('oO') * S('01234567_xXzZ')^1
-local dec_suffix = S('dD') * S('0123456789_xXzZ')^1
-local hex_suffix = S('hH') * S('0123456789abcdefABCDEF_xXzZ')^1
-local number = token(l.NUMBER, (l.digit + '_')^1 + "'" *
- (bin_suffix + oct_suffix + dec_suffix +
- hex_suffix))
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match({
- 'always', 'assign', 'begin', 'case', 'casex', 'casez', 'default', 'deassign',
- 'disable', 'else', 'end', 'endcase', 'endfunction', 'endgenerate',
- 'endmodule', 'endprimitive', 'endspecify', 'endtable', 'endtask', 'for',
- 'force', 'forever', 'fork', 'function', 'generate', 'if', 'initial', 'join',
- 'macromodule', 'module', 'negedge', 'posedge', 'primitive', 'repeat',
- 'release', 'specify', 'table', 'task', 'wait', 'while',
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'always', 'assign', 'begin', 'case', 'casex', 'casez', 'default', 'deassign', 'disable', 'else',
+ 'end', 'endcase', 'endfunction', 'endgenerate', 'endmodule', 'endprimitive', 'endspecify',
+ 'endtable', 'endtask', 'for', 'force', 'forever', 'fork', 'function', 'generate', 'if', 'initial',
+ 'join', 'macromodule', 'module', 'negedge', 'posedge', 'primitive', 'repeat', 'release',
+ 'specify', 'table', 'task', 'wait', 'while',
-- Compiler directives.
- '`include', '`define', '`undef', '`ifdef', '`ifndef', '`else', '`endif',
- '`timescale', '`resetall', '`signed', '`unsigned', '`celldefine',
- '`endcelldefine', '`default_nettype', '`unconnected_drive',
- '`nounconnected_drive', '`protect', '`endprotect', '`protected',
- '`endprotected', '`remove_gatename', '`noremove_gatename', '`remove_netname',
- '`noremove_netname', '`expand_vectornets', '`noexpand_vectornets',
- '`autoexpand_vectornets',
+ '`include', '`define', '`undef', '`ifdef', '`ifndef', '`else', '`endif', '`timescale',
+ '`resetall', '`signed', '`unsigned', '`celldefine', '`endcelldefine', '`default_nettype',
+ '`unconnected_drive', '`nounconnected_drive', '`protect', '`endprotect', '`protected',
+ '`endprotected', '`remove_gatename', '`noremove_gatename', '`remove_netname', '`noremove_netname',
+ '`expand_vectornets', '`noexpand_vectornets', '`autoexpand_vectornets',
-- Signal strengths.
- 'strong0', 'strong1', 'pull0', 'pull1', 'weak0', 'weak1', 'highz0', 'highz1',
- 'small', 'medium', 'large'
-}, '`01'))
+ 'strong0', 'strong1', 'pull0', 'pull1', 'weak0', 'weak1', 'highz0', 'highz1', 'small', 'medium',
+ 'large'
+}))
-- Function.
-local func = token(l.FUNCTION, word_match({
- '$stop', '$finish', '$time', '$stime', '$realtime', '$settrace',
- '$cleartrace', '$showscopes', '$showvars', '$monitoron', '$monitoroff',
- '$random', '$printtimescale', '$timeformat', '$display',
+lex:add_rule('function', token(lexer.FUNCTION, word_match{
+ '$stop', '$finish', '$time', '$stime', '$realtime', '$settrace', '$cleartrace', '$showscopes',
+ '$showvars', '$monitoron', '$monitoroff', '$random', '$printtimescale', '$timeformat', '$display',
-- Built-in primitives.
- 'and', 'nand', 'or', 'nor', 'xor', 'xnor', 'buf', 'bufif0', 'bufif1', 'not',
- 'notif0', 'notif1', 'nmos', 'pmos', 'cmos', 'rnmos', 'rpmos', 'rcmos', 'tran',
- 'tranif0', 'tranif1', 'rtran', 'rtranif0', 'rtranif1', 'pullup', 'pulldown'
-}, '$01'))
+ 'and', 'nand', 'or', 'nor', 'xor', 'xnor', 'buf', 'bufif0', 'bufif1', 'not', 'notif0', 'notif1',
+ 'nmos', 'pmos', 'cmos', 'rnmos', 'rpmos', 'rcmos', 'tran', 'tranif0', 'tranif1', 'rtran',
+ 'rtranif0', 'rtranif1', 'pullup', 'pulldown'
+}))
-- Types.
-local type = token(l.TYPE, word_match({
- 'integer', 'reg', 'time', 'realtime', 'defparam', 'parameter', 'event',
- 'wire', 'wand', 'wor', 'tri', 'triand', 'trior', 'tri0', 'tri1', 'trireg',
- 'vectored', 'scalared', 'input', 'output', 'inout',
- 'supply0', 'supply1'
-}, '01'))
+lex:add_rule('type', token(lexer.TYPE, word_match{
+ 'integer', 'reg', 'time', 'realtime', 'defparam', 'parameter', 'event', 'wire', 'wand', 'wor',
+ 'tri', 'triand', 'trior', 'tri0', 'tri1', 'trireg', 'vectored', 'scalared', 'input', 'output',
+ 'inout', 'supply0', 'supply1'
+}))
-- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
--- Operators.
-local operator = token(l.OPERATOR, S('=~+-/*<>%&|^~,:;()[]{}'))
+-- Strings.
+lex:add_rule('string', token(lexer.STRING, lexer.range('"')))
+
+-- Comments.
+local line_comment = lexer.to_eol('//')
+local block_comment = lexer.range('/*', '*/')
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-M._rules = {
- {'whitespace', ws},
- {'number', number},
- {'keyword', keyword},
- {'function', func},
- {'type', type},
- {'identifier', identifier},
- {'string', string},
- {'comment', comment},
- {'operator', operator},
-}
+-- Numbers.
+local bin_suffix = S('bB') * S('01_xXzZ')^1 * -lexer.xdigit
+local oct_suffix = S('oO') * S('01234567_xXzZ')^1
+local dec_suffix = S('dD') * S('0123456789_xXzZ')^1
+local hex_suffix = S('hH') * S('0123456789abcdefABCDEF_xXzZ')^1
+lex:add_rule('number', token(lexer.NUMBER, (lexer.digit + '_')^1 + "'" *
+ (bin_suffix + oct_suffix + dec_suffix + hex_suffix)))
+
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S('=~+-/*<>%&|^~,:;()[]{}')))
-M._foldsymbols = {
- _patterns = {'[a-z]+', '[%(%){}]', '/%*', '%*/', '//'},
- [l.KEYWORD] = {
- case = 1, casex = 1, casez = 1, endcase = -1, ['function'] = 1,
- endfunction = -1, fork = 1, join = -1, table = 1, endtable = -1, task = 1,
- endtask = -1, generate = 1, endgenerate = -1, specify = 1, endspecify = -1,
- primitive = 1, endprimitive = -1, ['module'] = 1, endmodule = -1, begin = 1,
- ['end'] = -1
- },
- [l.OPERATOR] = {['('] = 1, [')'] = -1, ['{'] = 1, ['}'] = -1},
- [l.COMMENT] = {['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//')}
-}
+-- Fold points.
+lex:add_fold_point(lexer.KEYWORD, 'case', 'endcase')
+lex:add_fold_point(lexer.KEYWORD, 'casex', 'endcase')
+lex:add_fold_point(lexer.KEYWORD, 'casez', 'endcase')
+lex:add_fold_point(lexer.KEYWORD, 'function', 'endfunction')
+lex:add_fold_point(lexer.KEYWORD, 'fork', 'join')
+lex:add_fold_point(lexer.KEYWORD, 'table', 'endtable')
+lex:add_fold_point(lexer.KEYWORD, 'task', 'endtask')
+lex:add_fold_point(lexer.KEYWORD, 'generate', 'endgenerate')
+lex:add_fold_point(lexer.KEYWORD, 'specify', 'endspecify')
+lex:add_fold_point(lexer.KEYWORD, 'primitive', 'endprimitive')
+lex:add_fold_point(lexer.KEYWORD, 'module', 'endmodule')
+lex:add_fold_point(lexer.KEYWORD, 'begin', 'end')
+lex:add_fold_point(lexer.OPERATOR, '(', ')')
+lex:add_fold_point(lexer.OPERATOR, '{', '}')
+lex:add_fold_point(lexer.COMMENT, '/*', '*/')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('//'))
-return M
+return lex
diff --git a/lua/lexers/vhdl.lua b/lua/lexers/vhdl.lua
index 04cc417..5cb0bad 100644
--- a/lua/lexers/vhdl.lua
+++ b/lua/lexers/vhdl.lua
@@ -1,89 +1,70 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- VHDL LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'vhdl'}
+local lex = lexer.new('vhdl')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local comment = token(l.COMMENT, '--' * l.nonnewline^0)
-
--- Strings.
-local sq_str = l.delimited_range("'", true, true)
-local dq_str = l.delimited_range('"', true)
-local string = token(l.STRING, sq_str + dq_str)
-
--- Numbers.
-local number = token(l.NUMBER, l.float + l.integer)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- 'access', 'after', 'alias', 'all', 'architecture', 'array', 'assert',
- 'attribute', 'begin', 'block', 'body', 'buffer', 'bus', 'case', 'component',
- 'configuration', 'constant', 'disconnect', 'downto', 'else', 'elsif', 'end',
- 'entity', 'exit', 'file', 'for', 'function', 'generate', 'generic', 'group',
- 'guarded', 'if', 'impure', 'in', 'inertial', 'inout', 'is', 'label',
- 'library', 'linkage', 'literal', 'loop', 'map', 'new', 'next', 'null', 'of',
- 'on', 'open', 'others', 'out', 'package', 'port', 'postponed', 'procedure',
- 'process', 'pure', 'range', 'record', 'register', 'reject', 'report',
- 'return', 'select', 'severity', 'signal', 'shared', 'subtype', 'then', 'to',
- 'transport', 'type', 'unaffected', 'units', 'until', 'use', 'variable',
- 'wait', 'when', 'while', 'with', 'note', 'warning', 'error', 'failure',
- 'and', 'nand', 'or', 'nor', 'xor', 'xnor', 'rol', 'ror', 'sla', 'sll', 'sra',
- 'srl', 'mod', 'rem', 'abs', 'not',
- 'false', 'true'
-})
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'access', 'after', 'alias', 'all', 'architecture', 'array', 'assert', 'attribute', 'begin',
+ 'block', 'body', 'buffer', 'bus', 'case', 'component', 'configuration', 'constant', 'disconnect',
+ 'downto', 'else', 'elsif', 'end', 'entity', 'exit', 'file', 'for', 'function', 'generate',
+ 'generic', 'group', 'guarded', 'if', 'impure', 'in', 'inertial', 'inout', 'is', 'label',
+ 'library', 'linkage', 'literal', 'loop', 'map', 'new', 'next', 'null', 'of', 'on', 'open',
+ 'others', 'out', 'package', 'port', 'postponed', 'procedure', 'process', 'pure', 'range',
+ 'record', 'register', 'reject', 'report', 'return', 'select', 'severity', 'signal', 'shared',
+ 'subtype', 'then', 'to', 'transport', 'type', 'unaffected', 'units', 'until', 'use', 'variable',
+ 'wait', 'when', 'while', 'with', --
+ 'note', 'warning', 'error', 'failure', --
+ 'and', 'nand', 'or', 'nor', 'xor', 'xnor', 'rol', 'ror', 'sla', 'sll', 'sra', 'srl', 'mod', 'rem', --
+ 'abs', 'not', 'false', 'true'
+}))
-- Functions.
-local func = token(l.FUNCTION, word_match{
- 'rising_edge', 'shift_left', 'shift_right', 'rotate_left', 'rotate_right',
- 'resize', 'std_match', 'to_integer', 'to_unsigned', 'to_signed', 'unsigned',
- 'signed', 'to_bit', 'to_bitvector', 'to_stdulogic', 'to_stdlogicvector',
- 'to_stdulogicvector'
-})
+lex:add_rule('function', token(lexer.FUNCTION, word_match{
+ 'rising_edge', 'shift_left', 'shift_right', 'rotate_left', 'rotate_right', 'resize', 'std_match',
+ 'to_integer', 'to_unsigned', 'to_signed', 'unsigned', 'signed', 'to_bit', 'to_bitvector',
+ 'to_stdulogic', 'to_stdlogicvector', 'to_stdulogicvector'
+}))
-- Types.
-local type = token(l.TYPE, word_match{
- 'bit', 'bit_vector', 'character', 'boolean', 'integer', 'real', 'time',
- 'string', 'severity_level', 'positive', 'natural', 'signed', 'unsigned',
- 'line', 'text', 'std_logic', 'std_logic_vector', 'std_ulogic',
- 'std_ulogic_vector', 'qsim_state', 'qsim_state_vector', 'qsim_12state',
- 'qsim_12state_vector', 'qsim_strength', 'mux_bit', 'mux_vectory', 'reg_bit',
+lex:add_rule('type', token(lexer.TYPE, word_match{
+ 'bit', 'bit_vector', 'character', 'boolean', 'integer', 'real', 'time', 'string',
+ 'severity_level', 'positive', 'natural', 'signed', 'unsigned', 'line', 'text', 'std_logic',
+ 'std_logic_vector', 'std_ulogic', 'std_ulogic_vector', 'qsim_state', 'qsim_state_vector',
+ 'qsim_12state', 'qsim_12state_vector', 'qsim_strength', 'mux_bit', 'mux_vectory', 'reg_bit',
'reg_vector', 'wor_bit', 'wor_vector'
-})
+}))
-- Constants.
-local constant = token(l.CONSTANT, word_match{
- 'EVENT', 'BASE', 'LEFT', 'RIGHT', 'LOW', 'HIGH', 'ASCENDING', 'IMAGE',
- 'VALUE', 'POS', 'VAL', 'SUCC', 'VAL', 'POS', 'PRED', 'VAL', 'POS', 'LEFTOF',
- 'RIGHTOF', 'LEFT', 'RIGHT', 'LOW', 'HIGH', 'RANGE', 'REVERSE', 'LENGTH',
- 'ASCENDING', 'DELAYED', 'STABLE', 'QUIET', 'TRANSACTION', 'EVENT', 'ACTIVE',
- 'LAST', 'LAST', 'LAST', 'DRIVING', 'DRIVING', 'SIMPLE', 'INSTANCE', 'PATH'
-})
+lex:add_rule('constant', token(lexer.CONSTANT, word_match{
+ 'EVENT', 'BASE', 'LEFT', 'RIGHT', 'LOW', 'HIGH', 'ASCENDING', 'IMAGE', 'VALUE', 'POS', 'VAL',
+ 'SUCC', 'VAL', 'POS', 'PRED', 'VAL', 'POS', 'LEFTOF', 'RIGHTOF', 'LEFT', 'RIGHT', 'LOW', 'HIGH',
+ 'RANGE', 'REVERSE', 'LENGTH', 'ASCENDING', 'DELAYED', 'STABLE', 'QUIET', 'TRANSACTION', 'EVENT',
+ 'ACTIVE', 'LAST', 'LAST', 'LAST', 'DRIVING', 'DRIVING', 'SIMPLE', 'INSTANCE', 'PATH'
+}))
-- Identifiers.
-local word = (l.alpha + "'") * (l.alnum + "_" + "'")^1
-local identifier = token(l.IDENTIFIER, word)
+lex:add_rule('identifier', token(lexer.IDENTIFIER, (lexer.alpha + "'") * (lexer.alnum + S("_'"))^1))
--- Operators.
-local operator = token(l.OPERATOR, S('=/!:;<>+-/*%&|^~()'))
+-- Strings.
+local sq_str = lexer.range("'", true, false)
+local dq_str = lexer.range('"', true)
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
+
+-- Comments.
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('--')))
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'function', func},
- {'type', type},
- {'constant', constant},
- {'identifier', identifier},
- {'string', string},
- {'comment', comment},
- {'number', number},
- {'operator', operator},
-}
+-- Numbers.
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
+
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S('=/!:;<>+-/*%&|^~()')))
-return M
+return lex
diff --git a/lua/lexers/wsf.lua b/lua/lexers/wsf.lua
index 37cb33e..6972cfe 100644
--- a/lua/lexers/wsf.lua
+++ b/lua/lexers/wsf.lua
@@ -1,102 +1,90 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- WSF LPeg lexer (based on XML).
-- Contributed by Jeff Stone.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'wsf'}
+local lex = lexer.new('wsf')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
+local ws = token(lexer.WHITESPACE, lexer.space^1)
+lex:add_rule('whitespace', ws)
-- Comments.
-local comment = token(l.COMMENT, '<!--' * (l.any - '-->')^0 * P('-->')^-1)
-
--- Strings.
-local sq_str = l.delimited_range("'", false, true)
-local dq_str = l.delimited_range('"', false, true)
-local string = #S('\'"') * l.last_char_includes('=') *
- token(l.STRING, sq_str + dq_str)
-
-local in_tag = #P((1 - S'><')^0 * '>')
-
--- Numbers.
-local number = #l.digit * l.last_char_includes('=') *
- token(l.NUMBER, l.digit^1 * P('%')^-1) * in_tag
-
-local alpha = R('az', 'AZ', '\127\255')
-local word_char = l.alnum + S('_-:.??')
-local identifier = (l.alpha + S('_-:.??')) * word_char^0
+lex:add_rule('comment', token(lexer.COMMENT, lexer.range('<!--', '-->')))
-- Elements.
+local alpha = lpeg.R('az', 'AZ', '\127\255')
+local word_char = lexer.alnum + S('_-:.?')
+local identifier = (alpha + S('_-:.?')) * word_char^0
local element = token('element', '<' * P('/')^-1 * identifier)
-
--- Attributes.
-local attribute = token('attribute', identifier) * #(l.space^0 * '=')
+lex:add_rule('element', element)
+lex:add_style('element', lexer.styles.keyword)
-- Closing tags.
local tag_close = token('element', P('/')^-1 * '>')
+lex:add_rule('tag_close', tag_close)
+
+-- Attributes.
+local attribute = token('attribute', identifier) * #(lexer.space^0 * '=')
+lex:add_rule('attribute', attribute)
+lex:add_style('attribute', lexer.styles.type)
-- Equals.
-local equals = token(l.OPERATOR, '=') * in_tag
+local in_tag = P(function(input, index)
+ local before = input:sub(1, index - 1)
+ local s, e = before:find('<[^>]-$'), before:find('>[^<]-$')
+ if s and e then return s > e and index or nil end
+ if s then return index end
+ return input:find('^[^<]->', index) and index or nil
+end)
+
+local equals = token(lexer.OPERATOR, '=') * in_tag
+lex:add_rule('equals', equals)
+
+-- Strings.
+local sq_str = lexer.range("'", false, false)
+local dq_str = lexer.range('"', false, false)
+local string = #S('\'"') * lexer.last_char_includes('=') * token(lexer.STRING, sq_str + dq_str)
+lex:add_rule('string', string)
+
+-- Numbers.
+local number = token(lexer.NUMBER, lexer.dec_num * P('%')^-1)
+lex:add_rule('number', #lexer.digit * lexer.last_char_includes('=') * number * in_tag)
-- Entities.
-local entity = token('entity', '&' * word_match{
- 'lt', 'gt', 'amp', 'apos', 'quot'
-} * ';')
-
-M._rules = {
- {'whitespace', ws},
- {'comment', comment},
- {'element', element},
- {'tag_close', tag_close},
- {'attribute', attribute},
- {'equals', equals},
- {'string', string},
- {'number', number},
- {'entity', entity}
-}
-
-M._tokenstyles = {
- element = l.STYLE_KEYWORD,
- attribute = l.STYLE_TYPE,
- entity = l.STYLE_OPERATOR
-}
-
-M._foldsymbols = {
- _patterns = {'</?', '/>', '<!%-%-', '%-%->'},
- element = {['<'] = 1, ['/>'] = -1, ['</'] = -1},
- [l.COMMENT] = {['<!--'] = 1, ['-->'] = -1},
-}
+lex:add_rule('entity', token('entity', '&' * word_match('lt gt amp apos quot') * ';'))
+lex:add_style('entity', lexer.styles.operator)
+
+-- Fold points.
+local function disambiguate_lt(text, pos, line, s) return not line:find('^</', s) and 1 or -1 end
+lex:add_fold_point('element', '<', disambiguate_lt)
+lex:add_fold_point('element', '/>', -1)
+lex:add_fold_point(lexer.COMMENT, '<!--', '-->')
-- Finally, add JavaScript and VBScript as embedded languages
-- Tags that start embedded languages.
-M.embed_start_tag = element *
- (ws^1 * attribute * ws^0 * equals * ws^0 * string)^0 *
- ws^0 * tag_close
-M.embed_end_tag = element * tag_close
+local embed_start_tag = element * (ws^1 * attribute * ws^0 * equals * ws^0 * string)^0 * ws^0 *
+ tag_close
+local embed_end_tag = element * tag_close
-- Embedded JavaScript.
-local js = l.load('javascript')
+local js = lexer.load('javascript')
local js_start_rule = #(P('<script') * (P(function(input, index)
- if input:find('^%s+language%s*=%s*(["\'])[jJ][ava]*[sS]cript%1', index) then
- return index
- end
-end) + '>')) * M.embed_start_tag -- <script language="javascript">
-local js_end_rule = #('</script' * ws^0 * '>') * M.embed_end_tag -- </script>
-l.embed_lexer(M, js, js_start_rule, js_end_rule)
+ if input:find('^%s+language%s*=%s*(["\'])[jJ][ava]*[sS]cript%1', index) then return index end
+end) + '>')) * embed_start_tag -- <script language="javascript">
+local js_end_rule = #('</script' * ws^0 * '>') * embed_end_tag -- </script>
+lex:embed(js, js_start_rule, js_end_rule)
-- Embedded VBScript.
-local vbs = l.load('vbscript')
+local vbs = lexer.load('vb', 'vbscript')
local vbs_start_rule = #(P('<script') * (P(function(input, index)
- if input:find('^%s+language%s*=%s*(["\'])[vV][bB][sS]cript%1', index) then
- return index
- end
-end) + '>')) * M.embed_start_tag -- <script language="vbscript">
-local vbs_end_rule = #('</script' * ws^0 * '>') * M.embed_end_tag -- </script>
-l.embed_lexer(M, vbs, vbs_start_rule, vbs_end_rule)
-
-return M
+ if input:find('^%s+language%s*=%s*(["\'])[vV][bB][sS]cript%1', index) then return index end
+end) + '>')) * embed_start_tag -- <script language="vbscript">
+local vbs_end_rule = #('</script' * ws^0 * '>') * embed_end_tag -- </script>
+lex:embed(vbs, vbs_start_rule, vbs_end_rule)
+
+return lex
diff --git a/lua/lexers/xml.lua b/lua/lexers/xml.lua
index e0098e5..640e924 100644
--- a/lua/lexers/xml.lua
+++ b/lua/lexers/xml.lua
@@ -1,93 +1,78 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- XML LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'xml'}
+local lex = lexer.new('xml')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
+local ws = token(lexer.WHITESPACE, lexer.space^1)
+lex:add_rule('whitespace', ws)
-- Comments and CDATA.
-local comment = token(l.COMMENT, '<!--' * (l.any - '-->')^0 * P('-->')^-1)
-local cdata = token('cdata', '<![CDATA[' * (l.any - ']]>')^0 * P(']]>')^-1)
+lex:add_rule('comment', token(lexer.COMMENT, lexer.range('<!--', '-->')))
+lex:add_rule('cdata', token('cdata', lexer.range('<![CDATA[', ']]>')))
+lex:add_style('cdata', lexer.styles.comment)
--- Strings.
-local sq_str = l.delimited_range("'", false, true)
-local dq_str = l.delimited_range('"', false, true)
-local string = #S('\'"') * l.last_char_includes('=') *
- token(l.STRING, sq_str + dq_str)
-
-local in_tag = #P((1 - S'><')^0 * '>')
-
--- Numbers.
-local number = #l.digit * l.last_char_includes('=') *
- token(l.NUMBER, l.digit^1 * P('%')^-1) * in_tag
+-- Doctypes and other markup tags.
+local alpha = lpeg.R('az', 'AZ', '\127\255')
+local word_char = lexer.alnum + S('_-:.??')
+local identifier = (alpha + S('_-:.?')) * word_char^0
+local doctype = token('doctype', '<!DOCTYPE') * ws * token('doctype', identifier) *
+ (ws * identifier)^-1 * (1 - P('>'))^0 * token('doctype', '>')
+lex:add_rule('doctype', doctype)
+lex:add_style('doctype', lexer.styles.comment)
-local alpha = R('az', 'AZ', '\127\255')
-local word_char = l.alnum + S('_-:.??')
-local identifier = (l.alpha + S('_-:.??')) * word_char^0
-local namespace = token(l.OPERATOR, ':') * token('namespace', identifier)
+-- Processing instructions.
+lex:add_rule('proc_insn', token('proc_insn', '<?' * (1 - P('?>'))^0 * P('?>')^-1))
+lex:add_style('proc_insn', lexer.styles.comment)
-- Elements.
-local element = token('element', '<' * P('/')^-1 * identifier) * namespace^-1
-
--- Attributes.
-local attribute = token('attribute', identifier) * namespace^-1 *
- #(l.space^0 * '=')
+local namespace = token(lexer.OPERATOR, ':') * token('namespace', identifier)
+lex:add_rule('element', token('element', '<' * P('/')^-1 * identifier) * namespace^-1)
+lex:add_style('element', lexer.styles.keyword)
+lex:add_style('namespace', lexer.styles.class)
-- Closing tags.
-local close_tag = token('element', P('/')^-1 * '>')
+lex:add_rule('close_tag', token('element', P('/')^-1 * '>'))
+
+-- Attributes.
+lex:add_rule('attribute', token('attribute', identifier) * namespace^-1 * #(lexer.space^0 * '='))
+lex:add_style('attribute', lexer.styles.type)
-- Equals.
-local equals = token(l.OPERATOR, '=') * in_tag
+-- TODO: performance is terrible on large files.
+local in_tag = P(function(input, index)
+ local before = input:sub(1, index - 1)
+ local s, e = before:find('<[^>]-$'), before:find('>[^<]-$')
+ if s and e then return s > e and index or nil end
+ if s then return index end
+ return input:find('^[^<]->', index) and index or nil
+end)
+
+-- lex:add_rule('equal', token(lexer.OPERATOR, '=')) -- * in_tag
+
+-- Strings.
+local sq_str = lexer.range("'", false, false)
+local dq_str = lexer.range('"', false, false)
+lex:add_rule('string',
+ #S('\'"') * lexer.last_char_includes('=') * token(lexer.STRING, sq_str + dq_str))
+
+-- Numbers.
+local number = token(lexer.NUMBER, lexer.dec_num * P('%')^-1)
+lex:add_rule('number', #lexer.digit * lexer.last_char_includes('=') * number) -- *in_tag)
-- Entities.
-local entity = token('entity', '&' * word_match{
- 'lt', 'gt', 'amp', 'apos', 'quot'
-} * ';')
+lex:add_rule('entity', token('entity', '&' * word_match('lt gt amp apos quot') * ';'))
+lex:add_style('entity', lexer.styles.operator)
--- Doctypes and other markup tags.
-local doctype = token('doctype', P('<!DOCTYPE')) * ws *
- token('doctype', identifier) * (ws * identifier)^-1 *
- (1 - P('>'))^0 * token('doctype', '>')
+-- Fold Points.
+local function disambiguate_lt(text, pos, line, s) return not line:find('^</', s) and 1 or -1 end
+lex:add_fold_point('element', '<', disambiguate_lt)
+lex:add_fold_point('element', '/>', -1)
+lex:add_fold_point(lexer.COMMENT, '<!--', '-->')
+lex:add_fold_point('cdata', '<![CDATA[', ']]>')
--- Processing instructions.
-local proc_insn = token('proc_insn', P('<?') * (1 - P('?>'))^0 * P('?>')^-1)
-
-M._rules = {
- {'whitespace', ws},
- {'comment', comment},
- {'cdata', cdata},
- {'doctype', doctype},
- {'proc_insn', proc_insn},
- {'element', element},
- {'close_tag', close_tag},
- {'attribute', attribute},
- {'equals', equals},
- {'string', string},
- {'number', number},
- {'entity', entity},
-}
-
-M._tokenstyles = {
- element = l.STYLE_KEYWORD,
- namespace = l.STYLE_CLASS,
- attribute = l.STYLE_TYPE,
- cdata = l.STYLE_COMMENT,
- entity = l.STYLE_OPERATOR,
- doctype = l.STYLE_COMMENT,
- proc_insn = l.STYLE_COMMENT,
- --markup = l.STYLE_COMMENT
-}
-
-M._foldsymbols = {
- _patterns = {'</?', '/>', '<!%-%-', '%-%->', '<!%[CDATA%[', '%]%]>'},
- element = {['<'] = 1, ['/>'] = -1, ['</'] = -1},
- [l.COMMENT] = {['<!--'] = 1, ['-->'] = -1},
- cdata = {['<![CDATA['] = 1, [']]>'] = -1}
-}
-
-return M
+return lex
diff --git a/lua/lexers/xs.lua b/lua/lexers/xs.lua
index 2243a6f..492590a 100644
--- a/lua/lexers/xs.lua
+++ b/lua/lexers/xs.lua
@@ -1,75 +1,59 @@
--- Copyright 2017 Michael Forney. See LICENSE.
--- Copyright 2017 David B. Lamkins. See LICENSE.
+-- Copyright 2017-2022 David B. Lamkins. See LICENSE.
-- xs LPeg lexer.
+-- Adapted from rc lexer by Michael Forney.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'xs'}
+local lex = lexer.new('xs')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
--- Comments.
-local comment = token(l.COMMENT, '#' * l.nonnewline^0)
+-- Keywords.
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'access', 'alias', 'catch', 'cd', 'dirs', 'echo', 'else', 'escape', 'eval', 'exec', 'exit',
+ 'false', 'fn-', 'fn', 'for', 'forever', 'fork', 'history', 'if', 'jobs', 'let', 'limit', 'local',
+ 'map', 'omap', 'popd', 'printf', 'pushd', 'read', 'result', 'set-', 'switch', 'throw', 'time',
+ 'true', 'umask', 'until', 'unwind-protect', 'var', 'vars', 'wait', 'whats', 'while', ':lt', ':le',
+ ':gt', ':ge', ':eq', ':ne', '~', '~~', '...', '.'
+}))
+
+-- Identifiers.
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Strings.
-local str = l.delimited_range("'", false, true)
+local str = lexer.range("'", false, true)
local herestr = '<<<' * str
local heredoc = '<<' * P(function(input, index)
- local s, e, _, delimiter =
- input:find('[ \t]*(["\']?)([%w!"%%+,-./:?@_~]+)%1', index)
+ local s, e, _, delimiter = input:find('[ \t]*(["\']?)([%w!"%%+,-./:?@_~]+)%1', index)
if s == index and delimiter then
delimiter = delimiter:gsub('[%%+-.?]', '%%%1')
- local _, e = input:find('[\n\r]'..delimiter..'[\n\r]', e)
+ e = select(2, input:find('[\n\r]' .. delimiter .. '[\n\r]', e))
return e and e + 1 or #input + 1
end
end)
-local string = token(l.STRING, str + herestr + heredoc)
+lex:add_rule('string', token(lexer.STRING, str + herestr + heredoc))
--- Numbers.
-local number = token(l.NUMBER, l.integer + l.float)
+-- Comments.
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#')))
--- Keywords.
-local keyword = token(l.KEYWORD, word_match({
- 'access', 'alias', 'catch', 'cd', 'dirs', 'echo', 'else', 'escape', 'eval',
- 'exec', 'exit', 'false', 'fn-', 'fn', 'for', 'forever', 'fork', 'history',
- 'if', 'jobs', 'let', 'limit', 'local', 'map', 'omap', 'popd', 'printf',
- 'pushd', 'read', 'result', 'set-', 'switch', 'throw', 'time', 'true',
- 'umask', 'until', 'unwind-protect', 'var', 'vars', 'wait', 'whats', 'while',
- ':lt', ':le', ':gt', ':ge', ':eq', ':ne', '~', '~~', '...', '.',
-}, '!"%*+,-./:?@[]~'))
+-- Numbers.
+-- lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-- Constants.
-local constant = token(l.CONSTANT, '$&' * l.word)
-
--- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+lex:add_rule('constant', token(lexer.CONSTANT, '$&' * lexer.word))
-- Variables.
-local variable = token(l.VARIABLE,
- '$' * S('"#')^-1 * ('*' + l.digit^1 + l.word))
+lex:add_rule('variable',
+ token(lexer.VARIABLE, '$' * S('"#')^-1 * ('*' + lexer.digit^1 + lexer.word)))
-- Operators.
-local operator = token(l.OPERATOR, S('@`=!<>*&^|;?()[]{}') + '\\\n')
-
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'constant', constant},
- {'identifier', identifier},
- {'string', string},
- {'comment', comment},
- {'number', number},
- {'variable', variable},
- {'operator', operator},
-}
+lex:add_rule('operator', token(lexer.OPERATOR, S('@`=!<>*&^|;?()[]{}') + '\\\n'))
-M._foldsymbols = {
- _patterns = {'[{}]', '#'},
- [l.OPERATOR] = {['{'] = 1, ['}'] = -1},
- [l.COMMENT] = {['#'] = l.fold_line_comments('#')}
-}
+-- Fold points.
+lex:add_fold_point(lexer.OPERATOR, '{', '}')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('#'))
-return M
+return lex
diff --git a/lua/lexers/xtend.lua b/lua/lexers/xtend.lua
index e761979..c22d079 100644
--- a/lua/lexers/xtend.lua
+++ b/lua/lexers/xtend.lua
@@ -1,112 +1,90 @@
--- Copyright (c) 2014-2017 Piotr Orzechowski [drzewo.org]. See LICENSE.
+-- Copyright (c) 2014-2022 Piotr Orzechowski [drzewo.org]. See LICENSE.
-- Xtend LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'xtend'}
+local lex = lexer.new('xtend')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
+local ws = token(lexer.WHITESPACE, lexer.space^1)
+lex:add_rule('whitespace', ws)
--- Comments.
-local line_comment = '//' * l.nonnewline_esc^0
-local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1
-local comment = token(l.COMMENT, line_comment + block_comment)
+-- Classes.
+lex:add_rule('class', token(lexer.KEYWORD, 'class') * ws^1 * token(lexer.CLASS, lexer.word))
--- Strings.
-local sq_str = l.delimited_range("'", true)
-local dq_str = l.delimited_range('"', true)
-local string = token(l.STRING, sq_str + dq_str)
+-- Keywords.
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ -- General.
+ 'abstract', 'annotation', 'as', 'case', 'catch', 'class', 'create', 'def', 'default', 'dispatch',
+ 'do', 'else', 'enum', 'extends', 'extension', 'final', 'finally', 'for', 'if', 'implements',
+ 'import', 'interface', 'instanceof', 'it', 'new', 'override', 'package', 'private', 'protected',
+ 'public', 'return', 'self', 'static', 'super', 'switch', 'synchronized', 'this', 'throw',
+ 'throws', 'try', 'typeof', 'val', 'var', 'while',
+ -- Templates.
+ 'AFTER', 'BEFORE', 'ENDFOR', 'ENDIF', 'FOR', 'IF', 'SEPARATOR',
+ -- Literals.
+ 'true', 'false', 'null'
+}))
+
+-- Types.
+lex:add_rule('type', token(lexer.TYPE, word_match{
+ 'boolean', 'byte', 'char', 'double', 'float', 'int', 'long', 'short', 'void', 'Boolean', 'Byte',
+ 'Character', 'Double', 'Float', 'Integer', 'Long', 'Short', 'String'
+}))
+
+-- Functions.
+lex:add_rule('function', token(lexer.FUNCTION, lexer.word) * #P('('))
+
+-- Identifiers.
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Templates.
-local templ_str = "'''" * (l.any - P("'''"))^0 * P("'''")^-1
-local template = token('template', templ_str, true)
+lex:add_rule('template', token('template', lexer.range("'''")))
+lex:add_style('template', lexer.styles.embedded)
+
+-- Strings.
+local sq_str = lexer.range("'", true)
+local dq_str = lexer.range('"', true)
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
+
+-- Comments.
+local line_comment = lexer.to_eol('//', true)
+local block_comment = lexer.range('/*', '*/')
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-- Numbers.
local small_suff = S('lL')
-local med_suff = P(S('bB') * S('iI'))
-local large_suff = S('dD') + S('fF') + P(S('bB') * S('dD'))
-local exp = S('eE') * l.digit^1
+local med_suff = S('bB') * S('iI')
+local large_suff = S('dD') + S('fF') + S('bB') * S('dD')
+local exp = S('eE') * lexer.digit^1
-local dec_inf = ('_' * l.digit^1)^0
-local hex_inf = ('_' * l.xdigit^1)^0
-local float_pref = l.digit^1 * '.' * l.digit^1
+local dec_inf = ('_' * lexer.digit^1)^0
+local hex_inf = ('_' * lexer.xdigit^1)^0
+local float_pref = lexer.digit^1 * '.' * lexer.digit^1
local float_suff = exp^-1 * med_suff^-1 * large_suff^-1
-local dec = l.digit * dec_inf * (small_suff^-1 + float_suff)
-local hex = l.hex_num * hex_inf * P('#' * (small_suff + med_suff))^-1
+local dec = lexer.digit * dec_inf * (small_suff^-1 + float_suff)
+local hex = lexer.hex_num * hex_inf * P('#' * (small_suff + med_suff))^-1
local float = float_pref * dec_inf * float_suff
-local number = token(l.NUMBER, float + hex + dec)
-
--- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- -- General.
- 'abstract', 'annotation', 'as', 'case', 'catch', 'class', 'create', 'def',
- 'default', 'dispatch', 'do', 'else', 'enum', 'extends', 'extension', 'final',
- 'finally', 'for', 'if', 'implements', 'import', 'interface', 'instanceof',
- 'it', 'new', 'override', 'package', 'private', 'protected', 'public',
- 'return', 'self', 'static', 'super', 'switch', 'synchronized', 'this',
- 'throw', 'throws', 'try', 'typeof', 'val', 'var', 'while',
- -- Templates.
- -- 'AFTER', 'BEFORE', 'ENDFOR', 'ENDIF', 'FOR', 'IF', 'SEPARATOR',
- -- Literals.
- 'true', 'false', 'null'
-})
+lex:add_rule('number', token(lexer.NUMBER, float + hex + dec))
--- Types.
-local type = token(l.TYPE, word_match{
- 'boolean', 'byte', 'char', 'double', 'float', 'int', 'long', 'short', 'void',
- 'Boolean', 'Byte', 'Character', 'Double', 'Float', 'Integer', 'Long', 'Short',
- 'String'
-})
-
--- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+-- Annotations.
+lex:add_rule('annotation', token('annotation', '@' * lexer.word))
+lex:add_style('annotation', lexer.styles.preprocessor)
-- Operators.
-local operator = token(l.OPERATOR, S('+-/*%<>!=^&|?~:;.()[]{}#'))
+lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>!=^&|?~:;.()[]{}#')))
--- Annotations.
-local annotation = token('annotation', '@' * l.word)
+-- Error.
+lex:add_rule('error', token(lexer.ERROR, lexer.any))
--- Functions.
-local func = token(l.FUNCTION, l.word) * #P('(')
+-- Fold points.
+lex:add_fold_point(lexer.OPERATOR, '{', '}')
+lex:add_fold_point(lexer.COMMENT, '/*', '*/')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('//'))
+lex:add_fold_point(lexer.KEYWORD, lexer.fold_consecutive_lines('import'))
--- Classes.
-local class = token(l.KEYWORD, P('class')) * ws^1 * token(l.CLASS, l.word)
-
--- Rules.
-M._rules = {
- {'whitespace', ws},
- {'class', class},
- {'keyword', keyword},
- {'type', type},
- {'function', func},
- {'identifier', identifier},
- {'template', template},
- {'string', string},
- {'comment', comment},
- {'number', number},
- {'annotation', annotation},
- {'operator', operator},
- {'error', token(l.ERROR, l.any)},
-}
-
--- Token styles.
-M._tokenstyles = {
- annotation = l.STYLE_PREPROCESSOR,
- template = l.STYLE_EMBEDDED
-}
-
--- Folding.
-M._foldsymbols = {
- _patterns = {'[{}]', '/%*', '%*/', '//', 'import'},
- [l.OPERATOR] = {['{'] = 1, ['}'] = -1},
- [l.COMMENT] = {['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//')},
- [l.KEYWORD] = {['import'] = l.fold_line_comments('import')}
-}
-
-return M
+return lex
diff --git a/lua/lexers/yaml.lua b/lua/lexers/yaml.lua
index 7b22451..ebf90cf 100644
--- a/lua/lexers/yaml.lua
+++ b/lua/lexers/yaml.lua
@@ -1,110 +1,84 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- YAML LPeg lexer.
-- It does not keep track of indentation perfectly.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S, B = lpeg.P, lpeg.S, lpeg.B
-local M = {_NAME = 'yaml'}
+local lex = lexer.new('yaml', {fold_by_indentation = true})
-- Whitespace.
-local indent = #l.starts_line(S(' \t')) *
- (token(l.WHITESPACE, ' ') + token('indent_error', '\t'))^1
-local ws = token(l.WHITESPACE, S(' \t')^1 + l.newline^1)
+local indent = #lexer.starts_line(S(' \t')) *
+ (token(lexer.WHITESPACE, ' ') + token('indent_error', '\t'))^1
+lex:add_rule('indent', indent)
+lex:add_style('indent_error', {back = lexer.colors.red})
+lex:add_rule('whitespace', token(lexer.WHITESPACE, S(' \t')^1 + lexer.newline^1))
--- Comments.
-local comment = token(l.COMMENT, '#' * l.nonnewline^0)
+-- Keys.
+local word = (lexer.alpha + '-' * -lexer.space) * (lexer.alnum + '-')^0
+lex:add_rule('key', token(lexer.KEYWORD, word * (S(' \t_')^1 * word^-1)^0) * #(':' * lexer.space))
+
+-- Constants.
+lex:add_rule('constant', B(lexer.space) * token(lexer.CONSTANT, word_match('null true false', true)))
-- Strings.
-local string = token(l.STRING, l.delimited_range("'") + l.delimited_range('"'))
+local sq_str = lexer.range("'")
+local dq_str = lexer.range('"')
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
--- Numbers.
-local integer = l.dec_num + l.hex_num + '0' * S('oO') * R('07')^1
-local special_num = '.' * word_match({'inf', 'nan'}, nil, true)
-local number = token(l.NUMBER, special_num + l.float + integer)
+-- Comments.
+lex:add_rule('comment', B(lexer.space) * token(lexer.COMMENT, lexer.to_eol('#')))
-- Timestamps.
-local ts = token('timestamp', l.digit * l.digit * l.digit * l.digit * -- year
- '-' * l.digit * l.digit^-1 * -- month
- '-' * l.digit * l.digit^-1 * -- day
- ((S(' \t')^1 + S('tT'))^-1 * -- separator
- l.digit * l.digit^-1 * -- hour
- ':' * l.digit * l.digit * -- minute
- ':' * l.digit * l.digit * -- second
- ('.' * l.digit^0)^-1 * -- fraction
- ('Z' + -- timezone
- S(' \t')^0 * S('-+') * l.digit * l.digit^-1 *
- (':' * l.digit * l.digit)^-1)^-1)^-1)
+local year = lexer.digit * lexer.digit * lexer.digit * lexer.digit
+local month = lexer.digit * lexer.digit^-1
+local day = lexer.digit * lexer.digit^-1
+local date = year * '-' * month * '-' * day
+local hours = lexer.digit * lexer.digit^-1
+local minutes = lexer.digit * lexer.digit
+local seconds = lexer.digit * lexer.digit
+local fraction = '.' * lexer.digit^0
+local time = hours * ':' * minutes * ':' * seconds * fraction^-1
+local T = S(' \t')^1 + S('tT')
+local zone = 'Z' + S(' \t')^0 * S('-+') * hours * (':' * minutes)^-1
+lex:add_rule('timestamp', token('timestamp', date * (T * time * zone^-1)^-1))
+lex:add_style('timestamp', lexer.styles.number)
--- Constants.
-local constant = token(l.CONSTANT,
- word_match({'null', 'true', 'false'}, nil, true))
+-- Numbers.
+local dec = lexer.digit^1 * ('_' * lexer.digit^1)^0
+local hex = '0' * S('xX') * ('_' * lexer.xdigit^1)^1
+local bin = '0' * S('bB') * S('01')^1 * ('_' * S('01')^1)^0
+local integer = S('+-')^-1 * (hex + bin + dec)
+local float = S('+-')^-1 *
+ ((dec^-1 * '.' * dec + dec * '.' * dec^-1 * -P('.')) * (S('eE') * S('+-')^-1 * dec)^-1 +
+ (dec * S('eE') * S('+-')^-1 * dec))
+local special_num = S('+-')^-1 * '.' * word_match('inf nan', true)
+lex:add_rule('number', B(lexer.space) * token(lexer.NUMBER, special_num + float + integer))
-- Types.
-local type = token(l.TYPE, '!!' * word_match({
+lex:add_rule('type', token(lexer.TYPE, '!!' * word_match({
-- Collection types.
'map', 'omap', 'pairs', 'set', 'seq',
-- Scalar types.
- 'binary', 'bool', 'float', 'int', 'merge', 'null', 'str', 'timestamp',
- 'value', 'yaml'
-}, nil, true) + '!' * l.delimited_range('<>'))
+ 'binary', 'bool', 'float', 'int', 'merge', 'null', 'str', 'timestamp', 'value', 'yaml'
+}, true) + '!' * lexer.range('<', '>', true)))
-- Document boundaries.
-local doc_bounds = token('document', l.starts_line(P('---') + '...'))
+lex:add_rule('doc_bounds', token('document', lexer.starts_line(P('---') + '...')))
+lex:add_style('document', lexer.styles.constant)
-- Directives
-local directive = token('directive', l.starts_line('%') * l.nonnewline^1)
-
-local word = (l.alpha + '-' * -l.space) * (l.alnum + '-')^0
-
--- Keys and literals.
-local colon = S(' \t')^0 * ':' * (l.space + -1)
-local key = token(l.KEYWORD, (l.alnum + '_' + '-')^1 * #(':' * l.space))
-local value = #word * (l.nonnewline - l.space^0 * S(',]}'))^1
-local block = S('|>') * S('+-')^-1 * (l.newline + -1) * function(input, index)
- local rest = input:sub(index)
- local level = #rest:match('^( *)')
- for pos, indent, line in rest:gmatch('() *()([^\r\n]+)') do
- if indent - pos < level and line ~= ' ' or level == 0 and pos > 1 then
- return index + pos - 1
- end
- end
- return #input + 1
-end
-local literal = token('literal', value + block)
+lex:add_rule('directive', token('directive', lexer.starts_line(lexer.to_eol('%'))))
+lex:add_style('directive', lexer.styles.preprocessor)
-- Indicators.
-local anchor = token(l.LABEL, '&' * word)
-local alias = token(l.VARIABLE, '*' * word)
+local anchor = B(lexer.space) * token(lexer.LABEL, '&' * word)
+local alias = token(lexer.VARIABLE, '*' * word)
local tag = token('tag', '!' * word * P('!')^-1)
-local reserved = token(l.ERROR, S('@`') * word)
-local indicator_chars = token(l.OPERATOR, S('-?:,[]{}!'))
-
-M._rules = {
- {'indent', indent},
- {'whitespace', ws},
- {'comment', comment},
- {'doc_bounds', doc_bounds},
- {'key', key},
- {'literal', literal},
- {'timestamp', ts},
- {'number', number},
- {'constant', constant},
- {'type', type},
- {'indicator', tag + indicator_chars + alias + anchor + reserved},
- {'directive', directive},
-}
-
-M._tokenstyles = {
- indent_error = 'back:red',
- document = l.STYLE_CONSTANT,
- literal = l.STYLE_DEFAULT,
- timestamp = l.STYLE_NUMBER,
- tag = l.STYLE_CLASS,
- directive = l.STYLE_PREPROCESSOR,
-}
-
-M._FOLDBYINDENTATION = true
+local reserved = token(lexer.ERROR, S('@`') * word)
+local indicator_chars = token(lexer.OPERATOR, S('-?:,>|[]{}!'))
+lex:add_rule('indicator', tag + indicator_chars + alias + anchor + reserved)
+lex:add_style('tag', lexer.styles.class)
-return M
+return lex
diff --git a/lua/lexers/zig.lua b/lua/lexers/zig.lua
index f8eb74b..efc3a1f 100644
--- a/lua/lexers/zig.lua
+++ b/lua/lexers/zig.lua
@@ -1,130 +1,93 @@
--- Copyright 2020 Karchnu karchnu@karchnu.fr.
+-- Copyright 2020-2022 Karchnu karchnu@karchnu.fr. See LICENSE.
-- Zig LPeg lexer.
--- (Based on the C++ LPeg lexer from Mitchell mitchell.att.foicica.com.)
+-- (Based on the C++ LPeg lexer from Mitchell.)
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'zig'}
+local lex = lexer.new('zig')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local line_comment = '//' * l.nonnewline_esc^0
-local comment = token(l.COMMENT, line_comment)
--- For documentation, I took the liberty of using the preprocessor coloration,
--- since it doesn't exist in Zig anyway.
-local doc_comment = '///' * l.nonnewline_esc^0
-local preprocessor = token(l.PREPROCESSOR, doc_comment)
-
--- Strings.
-local sq_str = P('L')^-1 * l.delimited_range("'", true)
-local dq_str = P('L')^-1 * l.delimited_range('"', true)
-local string = token(l.STRING, sq_str + dq_str)
-
--- Numbers.
-local number = token(l.NUMBER, l.float + l.integer)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Keywords.
-local keyword = token(l.KEYWORD, word_match{
-
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
-- Keywords.
- 'inline', 'pub', 'fn', 'comptime', 'const',
- 'extern', 'return', 'var', 'usingnamespace',
-
+ 'inline', 'pub', 'fn', 'comptime', 'const', 'extern', 'return', 'var', 'usingnamespace',
-- Defering code blocks.
'defer', 'errdefer',
-
-- Functions and structures related keywords.
- 'align', 'allowzero',
- 'noalias', 'noinline',
- 'callconv', 'packed', 'linksection',
- 'unreachable',
- 'test',
- 'asm', 'volatile',
-
+ 'align', 'allowzero', 'noalias', 'noinline', 'callconv', 'packed', 'linksection', 'unreachable',
+ 'test', 'asm', 'volatile',
-- Parallelism and concurrency related keywords.
- 'async', 'await', 'noasync',
- 'suspend', 'nosuspend', 'resume',
- 'threadlocal','anyframe',
-
+ 'async', 'await', 'noasync', 'suspend', 'nosuspend', 'resume', 'threadlocalanyframe',
-- Control flow: conditions and loops.
- 'if', 'else', 'orelse',
- 'or', 'and',
- 'while', 'for', 'switch', 'continue', 'break',
- 'catch', 'try',
-
- -- Not keyword, but overly used variable name with always the same semantic.
- 'self',
-})
+ 'if', 'else', 'orelse', 'or', 'and', 'while', 'for', 'switch', 'continue', 'break', 'catch',
+ 'try',
+ -- Not keyword but overly used variable name with always the same semantic.
+ 'self'
+}))
-- Types.
-local type = token(l.TYPE, word_match{
- 'enum', 'struct', 'union',
- 'i8', 'u8', 'i16', 'u16', 'i32', 'u32', 'i64', 'u64', 'i128', 'u128',
- 'isize', 'usize',
- 'c_short', 'c_ushort', 'c_int', 'c_uint',
- 'c_long', 'c_ulong', 'c_longlong', 'c_ulonglong', 'c_longdouble',
- 'c_void',
- 'f16', 'f32', 'f64', 'f128',
- 'bool',
- 'void',
- 'noreturn',
- 'type', 'anytype', 'error', 'anyerror',
- 'comptime_int', 'comptime_float',
-})
+lex:add_rule('type', token(lexer.TYPE, word_match{
+ 'enum', 'struct', 'union', --
+ 'i8', 'u8', 'i16', 'u16', 'i32', 'u32', 'i64', 'u64', 'i128', 'u128', --
+ 'isize', 'usize', --
+ 'c_short', 'c_ushort', 'c_int', 'c_uint', --
+ 'c_long', 'c_ulong', 'c_longlong', 'c_ulonglong', 'c_longdouble', --
+ 'c_void', --
+ 'f16', 'f32', 'f64', 'f128', --
+ 'bool', 'void', 'noreturn', 'type', 'anytype', 'error', 'anyerror', --
+ 'comptime_int', 'comptime_float'
+}))
-- Constants.
-local constant = token(l.CONSTANT, word_match{
- -- special values
- 'false', 'true', 'null', 'undefined',
-})
-
--- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+lex:add_rule('constant', token(lexer.CONSTANT, word_match{
+ -- Special values.
+ 'false', 'true', 'null', 'undefined'
+}))
-- Built-in functions.
-local functions = token(l.FUNCTION, word_match{
+lex:add_rule('function', token(lexer.FUNCTION, '@' * word_match{
'addWithOverflow', 'alignCast', 'alignOf', 'as', 'asyncCall', 'atomicLoad', 'atomicRmw',
'atomicStore', 'bitCast', 'bitOffsetOf', 'boolToInt', 'bitSizeOf', 'breakpoint', 'mulAdd',
'byteSwap', 'bitReverse', 'byteOffsetOf', 'call', 'cDefine', 'cImport', 'cInclude', 'clz',
'cmpxchgStrong', 'cmpxchgWeak', 'compileError', 'compileLog', 'ctz', 'cUndef', 'divExact',
- 'divFloor', 'divTrunc', 'embedFile', 'enumToInt', 'errorName', 'errorReturnTrace',
- 'errorToInt', 'errSetCast', 'export', 'fence', 'field', 'fieldParentPtr', 'floatCast',
- 'floatToInt', 'frame', 'Frame', 'frameAddress', 'frameSize', 'hasDecl', 'hasField', 'import',
- 'intCast', 'intToEnum', 'intToError', 'intToFloat', 'intToPtr', 'memcpy', 'memset', 'wasmMemorySize',
- 'wasmMemoryGrow', 'mod', 'mulWithOverflow', 'panic', 'popCount', 'ptrCast', 'ptrToInt', 'rem',
- 'returnAddress', 'setAlignStack', 'setCold', 'setEvalBranchQuota', 'setFloatMode', 'setRuntimeSafety',
- 'shlExact', 'shlWithOverflow', 'shrExact', 'shuffle', 'sizeOf', 'splat', 'reduce',
- 'src', 'sqrt', 'sin', 'cos', 'exp', 'exp2', 'log', 'log2', 'log10', 'fabs', 'floor',
- 'ceil', 'trunc', 'round', 'subWithOverflow', 'tagName', 'TagType', 'This', 'truncate',
- 'Type', 'typeInfo', 'typeName', 'TypeOf', 'unionInit',
-})
+ 'divFloor', 'divTrunc', 'embedFile', 'enumToInt', 'errorName', 'errorReturnTrace', 'errorToInt',
+ 'errSetCast', 'export', 'fence', 'field', 'fieldParentPtr', 'floatCast', 'floatToInt', 'frame',
+ 'Frame', 'frameAddress', 'frameSize', 'hasDecl', 'hasField', 'import', 'intCast', 'intToEnum',
+ 'intToError', 'intToFloat', 'intToPtr', 'memcpy', 'memset', 'wasmMemorySize', 'wasmMemoryGrow',
+ 'mod', 'mulWithOverflow', 'panic', 'popCount', 'ptrCast', 'ptrToInt', 'rem', 'returnAddress',
+ 'setAlignStack', 'setCold', 'setEvalBranchQuota', 'setFloatMode', 'setRuntimeSafety', 'shlExact',
+ 'shlWithOverflow', 'shrExact', 'shuffle', 'sizeOf', 'splat', 'reduce', 'src', 'sqrt', 'sin',
+ 'cos', 'exp', 'exp2', 'log', 'log2', 'log10', 'fabs', 'floor', 'ceil', 'trunc', 'round',
+ 'subWithOverflow', 'tagName', 'TagType', 'This', 'truncate', 'Type', 'typeInfo', 'typeName',
+ 'TypeOf', 'unionInit'
+}))
+-- Strings.
+local sq_str = P('L')^-1 * lexer.range("'", true)
+local dq_str = P('L')^-1 * lexer.range('"', true)
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
--- Operators.
-local operator = token(l.OPERATOR, S('+-/*%<>!=^&|?~:;,.()[]{}'))
+-- Identifiers.
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'constant', constant},
- {'function', functions},
- {'type', type},
- {'identifier', identifier},
- {'string', string},
- {'preprocessor', preprocessor},
- {'comment', comment},
- {'number', number},
- {'operator', operator},
-}
+-- Comments.
+lex:add_rule('doc_comment', token('doc_comment', lexer.to_eol('///', true)))
+lex:add_style('doc_comment', lexer.styles.comment)
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('//', true)))
+
+-- Numbers.
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
+
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>!=^&|?~:;,.()[]{}')))
-M._foldsymbols = {
- _patterns = {'%l+', '[{}]', '/%*', '%*/', '//'},
- [l.OPERATOR] = {['{'] = 1, ['}'] = -1},
- [l.COMMENT] = {['//'] = l.fold_line_comments('//')}
-}
+-- Fold points.
+lex:add_fold_point(lexer.OPERATOR, '{', '}')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('//'))
+lex:add_fold_point(lexer.PREPROCESSOR, lexer.fold_consecutive_lines('///'))
-return M
+return lex