aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lexers/LICENSE21
-rw-r--r--lexers/actionscript.lua75
-rw-r--r--lexers/ada.lua68
-rw-r--r--lexers/ansi_c.lua72
-rw-r--r--lexers/antlr.lua74
-rw-r--r--lexers/apdl.lua102
-rw-r--r--lexers/applescript.lua82
-rw-r--r--lexers/asm.lua476
-rw-r--r--lexers/asp.lua42
-rw-r--r--lexers/awk.lua334
-rw-r--r--lexers/bash.lua74
-rw-r--r--lexers/batch.lua71
-rw-r--r--lexers/bibtex.lua58
-rw-r--r--lexers/boo.lua81
-rw-r--r--lexers/caml.lua83
-rw-r--r--lexers/chuck.lua115
-rw-r--r--lexers/cmake.lua173
-rw-r--r--lexers/coffeescript.lua62
-rw-r--r--lexers/container.lua7
-rw-r--r--lexers/context.lua59
-rw-r--r--lexers/cpp.lua87
-rw-r--r--lexers/csharp.lua84
-rw-r--r--lexers/css.lua166
-rw-r--r--lexers/cuda.lua92
-rw-r--r--lexers/dart.lua77
-rw-r--r--lexers/desktop.lua62
-rw-r--r--lexers/diff.lua44
-rw-r--r--lexers/django.lua77
-rw-r--r--lexers/dmd.lua176
-rw-r--r--lexers/dot.lua71
-rw-r--r--lexers/eiffel.lua69
-rw-r--r--lexers/elixir.lua122
-rw-r--r--lexers/erlang.lua100
-rw-r--r--lexers/fish.lua76
-rw-r--r--lexers/forth.lua57
-rw-r--r--lexers/fortran.lua91
-rw-r--r--lexers/fsharp.lua76
-rw-r--r--lexers/gap.lua56
-rw-r--r--lexers/gettext.lua39
-rw-r--r--lexers/glsl.lua132
-rw-r--r--lexers/gnuplot.lua80
-rw-r--r--lexers/go.lua78
-rw-r--r--lexers/groovy.lua89
-rw-r--r--lexers/gtkrc.lua71
-rw-r--r--lexers/haskell.lua60
-rw-r--r--lexers/html.lua166
-rw-r--r--lexers/idl.lua68
-rw-r--r--lexers/inform.lua96
-rw-r--r--lexers/ini.lua52
-rw-r--r--lexers/io_lang.lua66
-rw-r--r--lexers/java.lua86
-rw-r--r--lexers/javascript.lua62
-rw-r--r--lexers/json.lua47
-rw-r--r--lexers/jsp.lua29
-rw-r--r--lexers/latex.lua73
-rw-r--r--lexers/less.lua27
-rw-r--r--lexers/lexer.lua1587
-rw-r--r--lexers/lilypond.lua40
-rw-r--r--lexers/lisp.lua84
-rw-r--r--lexers/litcoffee.lua21
-rw-r--r--lexers/lua.lua190
-rw-r--r--lexers/makefile.lua108
-rw-r--r--lexers/markdown.lua109
-rw-r--r--lexers/matlab.lua105
-rw-r--r--lexers/nemerle.lua81
-rw-r--r--lexers/nim.lua124
-rw-r--r--lexers/nsis.lua182
-rw-r--r--lexers/null.lua6
-rw-r--r--lexers/objective_c.lua87
-rw-r--r--lexers/pascal.lua78
-rw-r--r--lexers/perl.lua161
-rw-r--r--lexers/php.lua99
-rw-r--r--lexers/pike.lua70
-rw-r--r--lexers/pkgbuild.lua89
-rw-r--r--lexers/powershell.lua82
-rw-r--r--lexers/prolog.lua64
-rw-r--r--lexers/props.lua47
-rw-r--r--lexers/ps.lua61
-rw-r--r--lexers/python.lua134
-rw-r--r--lexers/rails.lua65
-rw-r--r--lexers/rebol.lua129
-rw-r--r--lexers/rest.lua259
-rw-r--r--lexers/rexx.lua97
-rw-r--r--lexers/rhtml.lua29
-rw-r--r--lexers/rstats.lua53
-rw-r--r--lexers/ruby.lua148
-rw-r--r--lexers/rust.lua87
-rw-r--r--lexers/sass.lua32
-rw-r--r--lexers/scala.lua75
-rw-r--r--lexers/scheme.lua104
-rw-r--r--lexers/smalltalk.lua62
-rw-r--r--lexers/sql.lua79
-rw-r--r--lexers/tcl.lua59
-rw-r--r--lexers/template.txt20
-rw-r--r--lexers/tex.lua45
-rw-r--r--lexers/texinfo.lua270
-rw-r--r--lexers/text.lua6
-rw-r--r--lexers/toml.lua68
-rw-r--r--lexers/vala.lua75
-rw-r--r--lexers/vb.lua64
-rw-r--r--lexers/vbscript.lua63
-rw-r--r--lexers/vcard.lua97
-rw-r--r--lexers/verilog.lua101
-rw-r--r--lexers/vhdl.lua89
-rw-r--r--lexers/wsf.lua108
-rw-r--r--lexers/xml.lua99
-rw-r--r--lexers/xtend.lua112
-rw-r--r--lexers/yaml.lua115
108 files changed, 11252 insertions, 0 deletions
diff --git a/lexers/LICENSE b/lexers/LICENSE
new file mode 100644
index 0000000..13cd311
--- /dev/null
+++ b/lexers/LICENSE
@@ -0,0 +1,21 @@
+The MIT License
+
+Copyright (c) 2007-2015 Mitchell
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/lexers/actionscript.lua b/lexers/actionscript.lua
new file mode 100644
index 0000000..0aafe63
--- /dev/null
+++ b/lexers/actionscript.lua
@@ -0,0 +1,75 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Actionscript LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'actionscript'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local line_comment = '//' * l.nonnewline^0
+local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1
+local comment = token(l.COMMENT, line_comment + block_comment)
+
+-- Strings.
+local sq_str = l.delimited_range("'", true)
+local dq_str = l.delimited_range('"', true)
+local ml_str = '<![CDATA[' * (l.any - ']]>')^0 * ']]>'
+local string = token(l.STRING, sq_str + dq_str + ml_str)
+
+-- Numbers.
+local number = token(l.NUMBER, (l.float + l.integer) * S('LlUuFf')^-2)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match{
+ 'break', 'continue', 'delete', 'do', 'else', 'for', 'function', 'if', 'in',
+ 'new', 'on', 'return', 'this', 'typeof', 'var', 'void', 'while', 'with',
+ 'NaN', 'Infinity', 'false', 'null', 'true', 'undefined',
+ -- Reserved for future use.
+ 'abstract', 'case', 'catch', 'class', 'const', 'debugger', 'default',
+ 'export', 'extends', 'final', 'finally', 'goto', 'implements', 'import',
+ 'instanceof', 'interface', 'native', 'package', 'private', 'Void',
+ 'protected', 'public', 'dynamic', 'static', 'super', 'switch', 'synchonized',
+ 'throw', 'throws', 'transient', 'try', 'volatile'
+})
+
+-- Types.
+local type = token(l.TYPE, word_match{
+ 'Array', 'Boolean', 'Color', 'Date', 'Function', 'Key', 'MovieClip', 'Math',
+ 'Mouse', 'Number', 'Object', 'Selection', 'Sound', 'String', 'XML', 'XMLNode',
+ 'XMLSocket',
+ -- Reserved for future use.
+ 'boolean', 'byte', 'char', 'double', 'enum', 'float', 'int', 'long', 'short'
+})
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.word)
+
+-- Operators.
+local operator = token(l.OPERATOR, S('=!<>+-/*%&|^~.,;?()[]{}'))
+
+M._rules = {
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'type', type},
+ {'identifier', identifier},
+ {'string', string},
+ {'comment', comment},
+ {'number', number},
+ {'operator', operator},
+}
+
+M._foldsymbols = {
+ _patterns = {'[{}]', '/%*', '%*/', '//', '<!%[CDATA%[', '%]%]>'},
+ [l.OPERATOR] = {['{'] = 1, ['}'] = -1},
+ [l.COMMENT] = {
+ ['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//')
+ },
+ [l.STRING] = {['<![CDATA['] = 1, [']]>'] = -1}
+}
+
+return M
diff --git a/lexers/ada.lua b/lexers/ada.lua
new file mode 100644
index 0000000..cbcb651
--- /dev/null
+++ b/lexers/ada.lua
@@ -0,0 +1,68 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Ada LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'ada'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local comment = token(l.COMMENT, '--' * l.nonnewline^0)
+
+-- Strings.
+local string = token(l.STRING, l.delimited_range('"', true, true))
+
+-- Numbers.
+local hex_num = 'O' * S('xX') * (l.xdigit + '_')^1
+local integer = l.digit^1 * ('_' * l.digit^1)^0
+local float = integer^1 * ('.' * integer^0)^-1 * S('eE') * S('+-')^-1 * integer
+local number = token(l.NUMBER, hex_num + S('+-')^-1 * (float + integer) *
+ S('LlUuFf')^-3)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match{
+ 'abort', 'abs', 'accept', 'all', 'and', 'begin', 'body', 'case', 'declare',
+ 'delay', 'do', 'else', 'elsif', 'end', 'entry', 'exception', 'exit', 'for',
+ 'generic', 'goto', 'if', 'in', 'is', 'loop', 'mod', 'new', 'not', 'null',
+ 'or', 'others', 'out', 'protected', 'raise', 'record', 'rem', 'renames',
+ 'requeue', 'reverse', 'select', 'separate', 'subtype', 'task', 'terminate',
+ 'then', 'type', 'until', 'when', 'while', 'xor',
+ -- Preprocessor.
+ 'package', 'pragma', 'use', 'with',
+ -- Function
+ 'function', 'procedure', 'return',
+ -- Storage class.
+ 'abstract', 'access', 'aliased', 'array', 'at', 'constant', 'delta', 'digits',
+ 'interface', 'limited', 'of', 'private', 'range', 'tagged', 'synchronized',
+ -- Boolean.
+ 'true', 'false'
+})
+
+-- Types.
+local type = token(l.TYPE, word_match{
+ 'boolean', 'character', 'count', 'duration', 'float', 'integer', 'long_float',
+ 'long_integer', 'priority', 'short_float', 'short_integer', 'string'
+})
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.word)
+
+-- Operators.
+local operator = token(l.OPERATOR, S(':;=<>&+-*/.()'))
+
+M._rules = {
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'type', type},
+ {'identifier', identifier},
+ {'string', string},
+ {'comment', comment},
+ {'number', number},
+ {'operator', operator},
+}
+
+return M
diff --git a/lexers/ansi_c.lua b/lexers/ansi_c.lua
new file mode 100644
index 0000000..e7e04d5
--- /dev/null
+++ b/lexers/ansi_c.lua
@@ -0,0 +1,72 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- C LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'ansi_c'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local line_comment = '//' * l.nonnewline_esc^0
+local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1
+local comment = token(l.COMMENT, line_comment + block_comment)
+
+-- Strings.
+local sq_str = P('L')^-1 * l.delimited_range("'", true)
+local dq_str = P('L')^-1 * l.delimited_range('"', true)
+local string = token(l.STRING, sq_str + dq_str)
+
+-- Numbers.
+local number = token(l.NUMBER, l.float + l.integer)
+
+-- Preprocessor.
+local preproc_word = word_match{
+ 'define', 'elif', 'else', 'endif', 'if', 'ifdef', 'ifndef', 'include', 'line',
+ 'pragma', 'undef'
+}
+local preproc = token(l.PREPROCESSOR,
+ l.starts_line('#') * S('\t ')^0 * preproc_word)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match{
+ 'auto', 'break', 'case', 'const', 'continue', 'default', 'do', 'else',
+ 'extern', 'for', 'goto', 'if', 'inline', 'register', 'restrict', 'return',
+ 'sizeof', 'static', 'switch', 'typedef', 'volatile', 'while'
+})
+
+-- Types.
+local type = token(l.TYPE, word_match{
+ 'char', 'double', 'enum', 'float', 'int', 'long', 'short', 'signed', 'struct',
+ 'union', 'unsigned', 'void', '_Bool', '_Complex', '_Imaginary'
+})
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.word)
+
+-- Operators.
+local operator = token(l.OPERATOR, S('+-/*%<>~!=^&|?~:;,.()[]{}'))
+
+M._rules = {
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'type', type},
+ {'identifier', identifier},
+ {'string', string},
+ {'comment', comment},
+ {'number', number},
+ {'preproc', preproc},
+ {'operator', operator},
+}
+
+M._foldsymbols = {
+ _patterns = {'%l+', '[{}]', '/%*', '%*/', '//'},
+ [l.PREPROCESSOR] = {['if'] = 1, ifdef = 1, ifndef = 1, endif = -1},
+ [l.OPERATOR] = {['{'] = 1, ['}'] = -1},
+ [l.COMMENT] = {['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//')}
+}
+
+return M
diff --git a/lexers/antlr.lua b/lexers/antlr.lua
new file mode 100644
index 0000000..7c9e444
--- /dev/null
+++ b/lexers/antlr.lua
@@ -0,0 +1,74 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- ANTLR LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'antlr'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local line_comment = '//' * l.nonnewline^0
+local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1
+local comment = token(l.COMMENT, line_comment + block_comment)
+
+-- Strings.
+local string = token(l.STRING, l.delimited_range("'", true))
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match{
+ 'abstract', 'break', 'case', 'catch', 'continue', 'default', 'do', 'else',
+ 'extends', 'final', 'finally', 'for', 'if', 'implements', 'instanceof',
+ 'native', 'new', 'private', 'protected', 'public', 'return', 'static',
+ 'switch', 'synchronized', 'throw', 'throws', 'transient', 'try', 'volatile',
+ 'while', 'package', 'import', 'header', 'options', 'tokens', 'strictfp',
+ 'false', 'null', 'super', 'this', 'true'
+})
+
+-- Types.
+local type = token(l.TYPE, word_match{
+ 'boolean', 'byte', 'char', 'class', 'double', 'float', 'int', 'interface',
+ 'long', 'short', 'void'
+})
+
+-- Functions.
+local func = token(l.FUNCTION, 'assert')
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.word)
+
+-- Operators.
+local operator = token(l.OPERATOR, S('$@:;|.=+*?~!^>-()[]{}'))
+
+-- Actions.
+local action = #P('{') * operator * token('action', (1 - P('}'))^0) *
+ (#P('}') * operator)^-1
+
+M._rules = {
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'type', type},
+ {'function', func},
+ {'identifier', identifier},
+ {'string', string},
+ {'comment', comment},
+ {'action', action},
+ {'operator', operator},
+}
+
+M._tokenstyles = {
+ action = l.STYLE_NOTHING
+}
+
+M._foldsymbols = {
+ _patterns = {'[:;%(%){}]', '/%*', '%*/', '//'},
+ [l.OPERATOR] = {
+ [':'] = 1, [';'] = -1, ['('] = 1, [')'] = -1, ['{'] = 1, ['}'] = -1
+ },
+ [l.COMMENT] = {['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//')}
+}
+
+return M
diff --git a/lexers/apdl.lua b/lexers/apdl.lua
new file mode 100644
index 0000000..97d8d2c
--- /dev/null
+++ b/lexers/apdl.lua
@@ -0,0 +1,102 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- APDL LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'apdl'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local comment = token(l.COMMENT, '!' * l.nonnewline^0)
+
+-- Strings.
+local string = token(l.STRING, l.delimited_range("'", true, true))
+
+-- Numbers.
+local number = token(l.NUMBER, l.float + l.integer)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match({
+ '*abbr', '*abb', '*afun', '*afu', '*ask', '*cfclos', '*cfc', '*cfopen',
+ '*cfo', '*cfwrite', '*cfw', '*create', '*cre', '*cycle', '*cyc', '*del',
+ '*dim', '*do', '*elseif', '*else', '*enddo', '*endif', '*end', '*eval',
+ '*eva', '*exit', '*exi', '*get', '*go', '*if', '*list', '*lis', '*mfouri',
+ '*mfo', '*mfun', '*mfu', '*mooney', '*moo', '*moper', '*mop', '*msg',
+ '*repeat', '*rep', '*set', '*status', '*sta', '*tread', '*tre', '*ulib',
+ '*uli', '*use', '*vabs', '*vab', '*vcol', '*vco', '*vcum', '*vcu', '*vedit',
+ '*ved', '*vfact', '*vfa', '*vfill', '*vfi', '*vfun', '*vfu', '*vget', '*vge',
+ '*vitrp', '*vit', '*vlen', '*vle', '*vmask', '*vma', '*voper', '*vop',
+ '*vplot', '*vpl', '*vput', '*vpu', '*vread', '*vre', '*vscfun', '*vsc',
+ '*vstat', '*vst', '*vwrite', '*vwr', '/anfile', '/anf', '/angle', '/ang',
+ '/annot', '/ann', '/anum', '/anu', '/assign', '/ass', '/auto', '/aut',
+ '/aux15', '/aux2', '/aux', '/axlab', '/axl', '/batch', '/bat', '/clabel',
+ '/cla', '/clear', '/cle', '/clog', '/clo', '/cmap', '/cma', '/color', '/col',
+ '/com', '/config', '/contour', '/con', '/copy', '/cop', '/cplane', '/cpl',
+ '/ctype', '/cty', '/cval', '/cva', '/delete', '/del', '/devdisp', '/device',
+ '/dev', '/dist', '/dis', '/dscale', '/dsc', '/dv3d', '/dv3', '/edge', '/edg',
+ '/efacet', '/efa', '/eof', '/erase', '/era', '/eshape', '/esh', '/exit',
+ '/exi', '/expand', '/exp', '/facet', '/fac', '/fdele', '/fde', '/filname',
+ '/fil', '/focus', '/foc', '/format', '/for', '/ftype', '/fty', '/gcmd',
+ '/gcm', '/gcolumn', '/gco', '/gfile', '/gfi', '/gformat', '/gfo', '/gline',
+ '/gli', '/gmarker', '/gma', '/golist', '/gol', '/gopr', '/gop', '/go',
+ '/graphics', '/gra', '/gresume', '/gre', '/grid', '/gri', '/gropt', '/gro',
+ '/grtyp', '/grt', '/gsave', '/gsa', '/gst', '/gthk', '/gth', '/gtype', '/gty',
+ '/header', '/hea', '/input', '/inp', '/larc', '/lar', '/light', '/lig',
+ '/line', '/lin', '/lspec', '/lsp', '/lsymbol', '/lsy', '/menu', '/men',
+ '/mplib', '/mpl', '/mrep', '/mre', '/mstart', '/mst', '/nerr', '/ner',
+ '/noerase', '/noe', '/nolist', '/nol', '/nopr', '/nop', '/normal', '/nor',
+ '/number', '/num', '/opt', '/output', '/out', '/page', '/pag', '/pbc', '/pbf',
+ '/pcircle', '/pci', '/pcopy', '/pco', '/plopts', '/plo', '/pmacro', '/pma',
+ '/pmeth', '/pme', '/pmore', '/pmo', '/pnum', '/pnu', '/polygon', '/pol',
+ '/post26', '/post1', '/pos', '/prep7', '/pre', '/psearch', '/pse', '/psf',
+ '/pspec', '/psp', '/pstatus', '/pst', '/psymb', '/psy', '/pwedge', '/pwe',
+ '/quit', '/qui', '/ratio', '/rat', '/rename', '/ren', '/replot', '/rep',
+ '/reset', '/res', '/rgb', '/runst', '/run', '/seclib', '/sec', '/seg',
+ '/shade', '/sha', '/showdisp', '/show', '/sho', '/shrink', '/shr', '/solu',
+ '/sol', '/sscale', '/ssc', '/status', '/sta', '/stitle', '/sti', '/syp',
+ '/sys', '/title', '/tit', '/tlabel', '/tla', '/triad', '/tri', '/trlcy',
+ '/trl', '/tspec', '/tsp', '/type', '/typ', '/ucmd', '/ucm', '/uis', '/ui',
+ '/units', '/uni', '/user', '/use', '/vcone', '/vco', '/view', '/vie',
+ '/vscale', '/vsc', '/vup', '/wait', '/wai', '/window', '/win', '/xrange',
+ '/xra', '/yrange', '/yra', '/zoom', '/zoo'
+}, '*/', true))
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.word)
+
+-- Functions.
+local func = token(l.FUNCTION, l.delimited_range('%', true, true))
+
+-- Operators.
+local operator = token(l.OPERATOR, S('+-*/$=,;()'))
+
+-- Labels.
+local label = token(l.LABEL, l.starts_line(':') * l.word)
+
+M._rules = {
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'identifier', identifier},
+ {'string', string},
+ {'number', number},
+ {'function', func},
+ {'label', label},
+ {'comment', comment},
+ {'operator', operator},
+}
+
+M._foldsymbols = {
+ _patterns = {'%*[A-Za-z]+', '!'},
+ [l.KEYWORD] = {
+ ['*if'] = 1, ['*IF'] = 1, ['*do'] = 1, ['*DO'] = 1, ['*dowhile'] = 1,
+ ['*DOWHILE'] = 1,
+ ['*endif'] = -1, ['*ENDIF'] = -1, ['*enddo'] = -1, ['*ENDDO'] = -1
+ },
+ [l.COMMENT] = {['!'] = l.fold_line_comments('!')}
+}
+
+return M
diff --git a/lexers/applescript.lua b/lexers/applescript.lua
new file mode 100644
index 0000000..08b3432
--- /dev/null
+++ b/lexers/applescript.lua
@@ -0,0 +1,82 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Applescript LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'applescript'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local line_comment = '--' * l.nonnewline^0
+local block_comment = '(*' * (l.any - '*)')^0 * P('*)')^-1
+local comment = token(l.COMMENT, line_comment + block_comment)
+
+-- Strings.
+local string = token(l.STRING, l.delimited_range('"', true))
+
+-- Numbers.
+local number = token(l.NUMBER, l.float + l.integer)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match({
+ 'script', 'property', 'prop', 'end', 'copy', 'to', 'set', 'global', 'local',
+ 'on', 'to', 'of', 'in', 'given', 'with', 'without', 'return', 'continue',
+ 'tell', 'if', 'then', 'else', 'repeat', 'times', 'while', 'until', 'from',
+ 'exit', 'try', 'error', 'considering', 'ignoring', 'timeout', 'transaction',
+ 'my', 'get', 'put', 'into', 'is',
+ -- References.
+ 'each', 'some', 'every', 'whose', 'where', 'id', 'index', 'first', 'second',
+ 'third', 'fourth', 'fifth', 'sixth', 'seventh', 'eighth', 'ninth', 'tenth',
+ 'last', 'front', 'back', 'st', 'nd', 'rd', 'th', 'middle', 'named', 'through',
+ 'thru', 'before', 'after', 'beginning', 'the',
+ -- Commands.
+ 'close', 'copy', 'count', 'delete', 'duplicate', 'exists', 'launch', 'make',
+ 'move', 'open', 'print', 'quit', 'reopen', 'run', 'save', 'saving',
+ -- Operators.
+ 'div', 'mod', 'and', 'not', 'or', 'as', 'contains', 'equal', 'equals',
+ 'isn\'t',
+}, "'", true))
+
+-- Constants.
+local constant = token(l.CONSTANT, word_match({
+ 'case', 'diacriticals', 'expansion', 'hyphens', 'punctuation',
+ -- Predefined variables.
+ 'it', 'me', 'version', 'pi', 'result', 'space', 'tab', 'anything',
+ -- Text styles.
+ 'bold', 'condensed', 'expanded', 'hidden', 'italic', 'outline', 'plain',
+ 'shadow', 'strikethrough', 'subscript', 'superscript', 'underline',
+ -- Save options.
+ 'ask', 'no', 'yes',
+ -- Booleans.
+ 'false', 'true',
+ -- Date and time.
+ 'weekday', 'monday', 'mon', 'tuesday', 'tue', 'wednesday', 'wed', 'thursday',
+ 'thu', 'friday', 'fri', 'saturday', 'sat', 'sunday', 'sun', 'month',
+ 'january', 'jan', 'february', 'feb', 'march', 'mar', 'april', 'apr', 'may',
+ 'june', 'jun', 'july', 'jul', 'august', 'aug', 'september', 'sep', 'october',
+ 'oct', 'november', 'nov', 'december', 'dec', 'minutes', 'hours', 'days',
+ 'weeks'
+}, nil, true))
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, (l.alpha + '_') * l.alnum^0)
+
+-- Operators.
+local operator = token(l.OPERATOR, S('+-^*/&<>=:,(){}'))
+
+M._rules = {
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'constant', constant},
+ {'identifier', identifier},
+ {'string', string},
+ {'comment', comment},
+ {'number', number},
+ {'operator', operator},
+}
+
+return M
diff --git a/lexers/asm.lua b/lexers/asm.lua
new file mode 100644
index 0000000..1cba511
--- /dev/null
+++ b/lexers/asm.lua
@@ -0,0 +1,476 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- NASM Assembly LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'asm'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local comment = token(l.COMMENT, ';' * l.nonnewline^0)
+
+-- Strings.
+local sq_str = l.delimited_range("'", true)
+local dq_str = l.delimited_range('"', true)
+local string = token(l.STRING, sq_str + dq_str)
+
+-- Numbers.
+local number = token(l.NUMBER, l.float + l.integer * S('hqb')^-1)
+
+-- Preprocessor.
+local preproc_word = word_match{
+ 'arg', 'assign', 'clear', 'define', 'defstr', 'deftok', 'depend', 'elif',
+ 'elifctx', 'elifdef', 'elifempty', 'elifenv', 'elifid', 'elifidn', 'elifidni',
+ 'elifmacro', 'elifn', 'elifnctx', 'elifndef', 'elifnempty', 'elifnenv',
+ 'elifnid', 'elifnidn', 'elifnidni', 'elifnmacro', 'elifnnum', 'elifnstr',
+ 'elifntoken', 'elifnum', 'elifstr', 'eliftoken', 'else', 'endif', 'endmacro',
+ 'endrep', 'endwhile', 'error', 'exitmacro', 'exitrep', 'exitwhile', 'fatal',
+ 'final', 'idefine', 'idefstr', 'ideftok', 'if', 'ifctx', 'ifdef', 'ifempty',
+ 'ifenv', 'ifid', 'ifidn', 'ifidni', 'ifmacro', 'ifn', 'ifnctx', 'ifndef',
+ 'ifnempty', 'ifnenv', 'ifnid', 'ifnidn', 'ifnidni', 'ifnmacro', 'ifnnum',
+ 'ifnstr', 'ifntoken', 'ifnum', 'ifstr', 'iftoken', 'imacro', 'include',
+ 'ixdefine', 'line', 'local', 'macro', 'pathsearch', 'pop', 'push', 'rep',
+ 'repl', 'rmacro', 'rotate', 'stacksize', 'strcat', 'strlen', 'substr',
+ 'undef', 'unmacro', 'use', 'warning', 'while', 'xdefine',
+}
+local preproc_symbol = '??' + S('!$+?') + '%' * -l.space + R('09')^1
+local preproc = token(l.PREPROCESSOR, '%' * (preproc_word + preproc_symbol))
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match({
+ -- Preprocessor macros.
+ 'struc', 'endstruc', 'istruc', 'at', 'iend', 'align', 'alignb', 'sectalign',
+ '.nolist',
+ -- Preprocessor Packages.
+ --'altreg', 'smartalign', 'fp', 'ifunc'
+ -- Directives.
+ 'absolute', 'bits', 'class', 'common', 'common', 'cpu', 'default', 'export',
+ 'extern', 'float', 'global', 'group', 'import', 'osabi', 'overlay', 'private',
+ 'public', '__SECT__', 'section', 'segment', 'stack', 'use16', 'use32',
+ 'use64',
+ -- Section Names.
+ '.bss', '.comment', '.data', '.lbss', '.ldata', '.lrodata', '.rdata',
+ '.rodata', '.tbss', '.tdata', '.text',
+ -- Section Qualifiers.
+ 'alloc', 'bss', 'code', 'exec', 'data', 'noalloc', 'nobits', 'noexec',
+ 'nowrite', 'progbits', 'rdata', 'tls', 'write',
+ -- Operators.
+ 'abs', 'rel', 'seg', 'wrt', 'strict',
+ '__utf16__', '__utf16be__', '__utf16le__', '__utf32__', '__utf32be__',
+ '__utf32le__',
+}, '.'))
+
+-- Instructions.
+-- awk '{print $1}'|uniq|tr '[:upper:]' '[:lower:]'|
+-- lua -e "for l in io.lines() do print(\"'\"..l..\"',\") end"|fmt -w 78
+local instruction = token('instruction', word_match{
+ -- Special Instructions.
+ 'db', 'dd', 'do', 'dq', 'dt', 'dw', 'dy', 'resb', 'resd', 'reso', 'resq',
+ 'rest', 'resw', 'resy',
+ -- Conventional Instructions.
+ 'aaa', 'aad', 'aam', 'aas', 'adc', 'add', 'and', 'arpl', 'bb0_reset',
+ 'bb1_reset', 'bound', 'bsf', 'bsr', 'bswap', 'bt', 'btc', 'btr', 'bts',
+ 'call', 'cbw', 'cdq', 'cdqe', 'clc', 'cld', 'cli', 'clts', 'cmc', 'cmp',
+ 'cmpsb', 'cmpsd', 'cmpsq', 'cmpsw', 'cmpxchg', 'cmpxchg486', 'cmpxchg8b',
+ 'cmpxchg16b', 'cpuid', 'cpu_read', 'cpu_write', 'cqo', 'cwd', 'cwde', 'daa',
+ 'das', 'dec', 'div', 'dmint', 'emms', 'enter', 'equ', 'f2xm1', 'fabs',
+ 'fadd', 'faddp', 'fbld', 'fbstp', 'fchs', 'fclex', 'fcmovb', 'fcmovbe',
+ 'fcmove', 'fcmovnb', 'fcmovnbe', 'fcmovne', 'fcmovnu', 'fcmovu', 'fcom',
+ 'fcomi', 'fcomip', 'fcomp', 'fcompp', 'fcos', 'fdecstp', 'fdisi', 'fdiv',
+ 'fdivp', 'fdivr', 'fdivrp', 'femms', 'feni', 'ffree', 'ffreep', 'fiadd',
+ 'ficom', 'ficomp', 'fidiv', 'fidivr', 'fild', 'fimul', 'fincstp', 'finit',
+ 'fist', 'fistp', 'fisttp', 'fisub', 'fisubr', 'fld', 'fld1', 'fldcw',
+ 'fldenv', 'fldl2e', 'fldl2t', 'fldlg2', 'fldln2', 'fldpi', 'fldz', 'fmul',
+ 'fmulp', 'fnclex', 'fndisi', 'fneni', 'fninit', 'fnop', 'fnsave', 'fnstcw',
+ 'fnstenv', 'fnstsw', 'fpatan', 'fprem', 'fprem1', 'fptan', 'frndint',
+ 'frstor', 'fsave', 'fscale', 'fsetpm', 'fsin', 'fsincos', 'fsqrt',
+ 'fst', 'fstcw', 'fstenv', 'fstp', 'fstsw', 'fsub', 'fsubp', 'fsubr',
+ 'fsubrp', 'ftst', 'fucom', 'fucomi', 'fucomip', 'fucomp', 'fucompp',
+ 'fxam', 'fxch', 'fxtract', 'fyl2x', 'fyl2xp1', 'hlt', 'ibts', 'icebp',
+ 'idiv', 'imul', 'in', 'inc', 'incbin', 'insb', 'insd', 'insw', 'int',
+ 'int01', 'int1', 'int03', 'int3', 'into', 'invd', 'invpcid', 'invlpg',
+ 'invlpga', 'iret', 'iretd', 'iretq', 'iretw', 'jcxz', 'jecxz', 'jrcxz',
+ 'jmp', 'jmpe', 'lahf', 'lar', 'lds', 'lea', 'leave', 'les', 'lfence',
+ 'lfs', 'lgdt', 'lgs', 'lidt', 'lldt', 'lmsw', 'loadall', 'loadall286',
+ 'lodsb', 'lodsd', 'lodsq', 'lodsw', 'loop', 'loope', 'loopne', 'loopnz',
+ 'loopz', 'lsl', 'lss', 'ltr', 'mfence', 'monitor', 'mov', 'movd', 'movq',
+ 'movsb', 'movsd', 'movsq', 'movsw', 'movsx', 'movsxd', 'movsx', 'movzx',
+ 'mul', 'mwait', 'neg', 'nop', 'not', 'or', 'out', 'outsb', 'outsd', 'outsw',
+ 'packssdw', 'packsswb', 'packuswb', 'paddb', 'paddd', 'paddsb', 'paddsiw',
+ 'paddsw', 'paddusb', 'paddusw', 'paddw', 'pand', 'pandn', 'pause', 'paveb',
+ 'pavgusb', 'pcmpeqb', 'pcmpeqd', 'pcmpeqw', 'pcmpgtb', 'pcmpgtd', 'pcmpgtw',
+ 'pdistib', 'pf2id', 'pfacc', 'pfadd', 'pfcmpeq', 'pfcmpge', 'pfcmpgt',
+ 'pfmax', 'pfmin', 'pfmul', 'pfrcp', 'pfrcpit1', 'pfrcpit2', 'pfrsqit1',
+ 'pfrsqrt', 'pfsub', 'pfsubr', 'pi2fd', 'pmachriw', 'pmaddwd', 'pmagw',
+ 'pmulhriw', 'pmulhrwa', 'pmulhrwc', 'pmulhw', 'pmullw', 'pmvgezb', 'pmvlzb',
+ 'pmvnzb', 'pmvzb', 'pop', 'popa', 'popad', 'popaw', 'popf', 'popfd',
+ 'popfq', 'popfw', 'por', 'prefetch', 'prefetchw', 'pslld', 'psllq',
+ 'psllw', 'psrad', 'psraw', 'psrld', 'psrlq', 'psrlw', 'psubb', 'psubd',
+ 'psubsb', 'psubsiw', 'psubsw', 'psubusb', 'psubusw', 'psubw', 'punpckhbw',
+ 'punpckhdq', 'punpckhwd', 'punpcklbw', 'punpckldq', 'punpcklwd', 'push',
+ 'pusha', 'pushad', 'pushaw', 'pushf', 'pushfd', 'pushfq', 'pushfw', 'pxor',
+ 'rcl', 'rcr', 'rdshr', 'rdmsr', 'rdpmc', 'rdtsc', 'rdtscp', 'ret', 'retf',
+ 'retn', 'rol', 'ror', 'rdm', 'rsdc', 'rsldt', 'rsm', 'rsts', 'sahf', 'sal',
+ 'salc', 'sar', 'sbb', 'scasb', 'scasd', 'scasq', 'scasw', 'sfence', 'sgdt',
+ 'shl', 'shld', 'shr', 'shrd', 'sidt', 'sldt', 'skinit', 'smi', 'smint',
+ 'smintold', 'smsw', 'stc', 'std', 'sti', 'stosb', 'stosd', 'stosq', 'stosw',
+ 'str', 'sub', 'svdc', 'svldt', 'svts', 'swapgs', 'syscall', 'sysenter',
+ 'sysexit', 'sysret', 'test', 'ud0', 'ud1', 'ud2b', 'ud2', 'ud2a', 'umov',
+ 'verr', 'verw', 'fwait', 'wbinvd', 'wrshr', 'wrmsr', 'xadd', 'xbts',
+ 'xchg', 'xlatb', 'xlat', 'xor', 'cmovcc', 'jcc', 'setcc',
+ -- Katmai Streaming SIMD instructions (SSE -- a.k.a. KNI, XMM, MMX2).
+ 'addps', 'addss', 'andnps', 'andps', 'cmpeqps', 'cmpeqss', 'cmpleps',
+ 'cmpless', 'cmpltps', 'cmpltss', 'cmpneqps', 'cmpneqss', 'cmpnleps',
+ 'cmpnless', 'cmpnltps', 'cmpnltss', 'cmpordps', 'cmpordss', 'cmpunordps',
+ 'cmpunordss', 'cmpps', 'cmpss', 'comiss', 'cvtpi2ps', 'cvtps2pi', 'cvtsi2ss',
+ 'cvtss2si', 'cvttps2pi', 'cvttss2si', 'divps', 'divss', 'ldmxcsr', 'maxps',
+ 'maxss', 'minps', 'minss', 'movaps', 'movhps', 'movlhps', 'movlps',
+ 'movhlps', 'movmskps', 'movntps', 'movss', 'movups', 'mulps', 'mulss',
+ 'orps', 'rcpps', 'rcpss', 'rsqrtps', 'rsqrtss', 'shufps', 'sqrtps', 'sqrtss',
+ 'stmxcsr', 'subps', 'subss', 'ucomiss', 'unpckhps', 'unpcklps', 'xorps',
+ -- Introduced in Deschutes but necessary for SSE support.
+ 'fxrstor', 'fxrstor64', 'fxsave', 'fxsave64',
+ -- XSAVE group (AVX and extended state).
+ 'xgetbv', 'xsetbv', 'xsave', 'xsave64', 'xsaveopt', 'xsaveopt64', 'xrstor',
+ 'xrstor64',
+ -- Generic memory operations.
+ 'prefetchnta', 'prefetcht0', 'prefetcht1', 'prefetcht2', 'sfence',
+ -- New MMX instructions introduced in Katmai.
+ 'maskmovq', 'movntq', 'pavgb', 'pavgw', 'pextrw', 'pinsrw', 'pmaxsw',
+ 'pmaxub', 'pminsw', 'pminub', 'pmovmskb', 'pmulhuw', 'psadbw', 'pshufw',
+ -- AMD Enhanced 3DNow! (Athlon) instructions.
+ 'pf2iw', 'pfnacc', 'pfpnacc', 'pi2fw', 'pswapd',
+ -- Willamette SSE2 Cacheability Instructions.
+ 'maskmovdqu', 'clflush', 'movntdq', 'movnti', 'movntpd', 'lfence', 'mfence',
+ -- Willamette MMX instructions (SSE2 SIMD Integer Instructions).
+ 'movd', 'movdqa', 'movdqu', 'movdq2q', 'movq', 'movq2dq', 'packsswb',
+ 'packssdw', 'packuswb', 'paddb', 'paddw', 'paddd', 'paddq', 'paddsb',
+ 'paddsw', 'paddusb', 'paddusw', 'pand', 'pandn', 'pavgb', 'pavgw', 'pcmpeqb',
+ 'pcmpeqw', 'pcmpeqd', 'pcmpgtb', 'pcmpgtw', 'pcmpgtd', 'pextrw', 'pinsrw',
+ 'pmaddwd', 'pmaxsw', 'pmaxub', 'pminsw', 'pminub', 'pmovmskb', 'pmulhuw',
+ 'pmulhw', 'pmullw', 'pmuludq', 'por', 'psadbw', 'pshufd', 'pshufhw',
+ 'pshuflw', 'pslldq', 'psllw', 'pslld', 'psllq', 'psraw', 'psrad', 'psrldq',
+ 'psrlw', 'psrld', 'psrlq', 'psubb', 'psubw', 'psubd', 'psubq', 'psubsb',
+ 'psubsw', 'psubusb', 'psubusw', 'punpckhbw', 'punpckhwd', 'punpckhdq',
+ 'punpckhqdq', 'punpcklbw', 'punpcklwd', 'punpckldq', 'punpcklqdq', 'pxor',
+ -- Willamette Streaming SIMD instructions (SSE2).
+ 'addpd', 'addsd', 'andnpd', 'andpd', 'cmpeqpd', 'cmpeqsd', 'cmplepd',
+ 'cmplesd', 'cmpltpd', 'cmpltsd', 'cmpneqpd', 'cmpneqsd', 'cmpnlepd',
+ 'cmpnlesd', 'cmpnltpd', 'cmpnltsd', 'cmpordpd', 'cmpordsd', 'cmpunordpd',
+ 'cmpunordsd', 'cmppd', 'cmpsd', 'comisd', 'cvtdq2pd', 'cvtdq2ps',
+ 'cvtpd2dq', 'cvtpd2pi', 'cvtpd2ps', 'cvtpi2pd', 'cvtps2dq', 'cvtps2pd',
+ 'cvtsd2si', 'cvtsd2ss', 'cvtsi2sd', 'cvtss2sd', 'cvttpd2pi', 'cvttpd2dq',
+ 'cvttps2dq', 'cvttsd2si', 'divpd', 'divsd', 'maxpd', 'maxsd', 'minpd',
+ 'minsd', 'movapd', 'movhpd', 'movlpd', 'movmskpd', 'movsd', 'movupd',
+ 'mulpd', 'mulsd', 'orpd', 'shufpd', 'sqrtpd', 'sqrtsd', 'subpd', 'subsd',
+ 'ucomisd', 'unpckhpd', 'unpcklpd', 'xorpd',
+ -- Prescott New Instructions (SSE3).
+ 'addsubpd', 'addsubps', 'haddpd', 'haddps', 'hsubpd', 'hsubps', 'lddqu',
+ 'movddup', 'movshdup', 'movsldup',
+ -- VMX/SVM Instructions.
+ 'clgi', 'stgi', 'vmcall', 'vmclear', 'vmfunc', 'vmlaunch', 'vmload',
+ 'vmmcall', 'vmptrld', 'vmptrst', 'vmread', 'vmresume', 'vmrun', 'vmsave',
+ 'vmwrite', 'vmxoff', 'vmxon',
+ -- Extended Page Tables VMX instructions.
+ 'invept', 'invvpid',
+ -- Tejas New Instructions (SSSE3).
+ 'pabsb', 'pabsw', 'pabsd', 'palignr', 'phaddw', 'phaddd', 'phaddsw',
+ 'phsubw', 'phsubd', 'phsubsw', 'pmaddubsw', 'pmulhrsw', 'pshufb', 'psignb',
+ 'psignw', 'psignd',
+ -- AMD SSE4A.
+ 'extrq', 'insertq', 'movntsd', 'movntss',
+ -- New instructions in Barcelona.
+ 'lzcnt',
+ -- Penryn New Instructions (SSE4.1).
+ 'blendpd', 'blendps', 'blendvpd', 'blendvps', 'dppd', 'dpps', 'extractps',
+ 'insertps', 'movntdqa', 'mpsadbw', 'packusdw', 'pblendvb', 'pblendw',
+ 'pcmpeqq', 'pextrb', 'pextrd', 'pextrq', 'pextrw', 'phminposuw', 'pinsrb',
+ 'pinsrd', 'pinsrq', 'pmaxsb', 'pmaxsd', 'pmaxud', 'pmaxuw', 'pminsb',
+ 'pminsd', 'pminud', 'pminuw', 'pmovsxbw', 'pmovsxbd', 'pmovsxbq', 'pmovsxwd',
+ 'pmovsxwq', 'pmovsxdq', 'pmovzxbw', 'pmovzxbd', 'pmovzxbq', 'pmovzxwd',
+ 'pmovzxwq', 'pmovzxdq', 'pmuldq', 'pmulld', 'ptest', 'roundpd', 'roundps',
+ 'roundsd', 'roundss',
+ -- Nehalem New Instructions (SSE4.2).
+ 'crc32', 'pcmpestri', 'pcmpestrm', 'pcmpistri', 'pcmpistrm', 'pcmpgtq',
+ 'popcnt',
+ -- Intel SMX.
+ 'getsec',
+ -- Geode (Cyrix) 3DNow! additions.
+ 'pfrcpv', 'pfrsqrtv',
+ -- Intel new instructions in ???.
+ 'movbe',
+ -- Intel AES instructions.
+ 'aesenc', 'aesenclast', 'aesdec', 'aesdeclast', 'aesimc', 'aeskeygenassist',
+ -- Intel AVX AES instructions.
+ 'vaesenc', 'vaesenclast', 'vaesdec', 'vaesdeclast', 'vaesimc',
+ 'vaeskeygenassist',
+ -- Intel AVX instructions.
+ 'vaddpd', 'vaddps', 'vaddsd', 'vaddss', 'vaddsubpd', 'vaddsubps',
+ 'vandpd', 'vandps', 'vandnpd', 'vandnps', 'vblendpd', 'vblendps',
+ 'vblendvpd', 'vblendvps', 'vbroadcastss', 'vbroadcastsd', 'vbroadcastf128',
+ 'vcmpeq_ospd', 'vcmpeqpd', 'vcmplt_ospd', 'vcmpltpd', 'vcmple_ospd',
+ 'vcmplepd', 'vcmpunord_qpd', 'vcmpunordpd', 'vcmpneq_uqpd', 'vcmpneqpd',
+ 'vcmpnlt_uspd', 'vcmpnltpd', 'vcmpnle_uspd', 'vcmpnlepd', 'vcmpord_qpd',
+ 'vcmpordpd', 'vcmpeq_uqpd', 'vcmpnge_uspd', 'vcmpngepd', 'vcmpngt_uspd',
+ 'vcmpngtpd', 'vcmpfalse_oqpd', 'vcmpfalsepd', 'vcmpneq_oqpd', 'vcmpge_ospd',
+ 'vcmpgepd', 'vcmpgt_ospd', 'vcmpgtpd', 'vcmptrue_uqpd', 'vcmptruepd',
+ 'vcmpeq_ospd', 'vcmplt_oqpd', 'vcmple_oqpd', 'vcmpunord_spd', 'vcmpneq_uspd',
+ 'vcmpnlt_uqpd', 'vcmpnle_uqpd', 'vcmpord_spd', 'vcmpeq_uspd', 'vcmpnge_uqpd',
+ 'vcmpngt_uqpd', 'vcmpfalse_ospd', 'vcmpneq_ospd', 'vcmpge_oqpd',
+ 'vcmpgt_oqpd', 'vcmptrue_uspd', 'vcmppd', 'vcmpeq_osps', 'vcmpeqps',
+ 'vcmplt_osps', 'vcmpltps', 'vcmple_osps', 'vcmpleps', 'vcmpunord_qps',
+ 'vcmpunordps', 'vcmpneq_uqps', 'vcmpneqps', 'vcmpnlt_usps', 'vcmpnltps',
+ 'vcmpnle_usps', 'vcmpnleps', 'vcmpord_qps', 'vcmpordps', 'vcmpeq_uqps',
+ 'vcmpnge_usps', 'vcmpngeps', 'vcmpngt_usps', 'vcmpngtps', 'vcmpfalse_oqps',
+ 'vcmpfalseps', 'vcmpneq_oqps', 'vcmpge_osps', 'vcmpgeps', 'vcmpgt_osps',
+ 'vcmpgtps', 'vcmptrue_uqps', 'vcmptrueps', 'vcmpeq_osps', 'vcmplt_oqps',
+ 'vcmple_oqps', 'vcmpunord_sps', 'vcmpneq_usps', 'vcmpnlt_uqps',
+ 'vcmpnle_uqps', 'vcmpord_sps', 'vcmpeq_usps', 'vcmpnge_uqps',
+ 'vcmpngt_uqps', 'vcmpfalse_osps', 'vcmpneq_osps', 'vcmpge_oqps',
+ 'vcmpgt_oqps', 'vcmptrue_usps', 'vcmpps', 'vcmpeq_ossd', 'vcmpeqsd',
+ 'vcmplt_ossd', 'vcmpltsd', 'vcmple_ossd', 'vcmplesd', 'vcmpunord_qsd',
+ 'vcmpunordsd', 'vcmpneq_uqsd', 'vcmpneqsd', 'vcmpnlt_ussd', 'vcmpnltsd',
+ 'vcmpnle_ussd', 'vcmpnlesd', 'vcmpord_qsd', 'vcmpordsd', 'vcmpeq_uqsd',
+ 'vcmpnge_ussd', 'vcmpngesd', 'vcmpngt_ussd', 'vcmpngtsd', 'vcmpfalse_oqsd',
+ 'vcmpfalsesd', 'vcmpneq_oqsd', 'vcmpge_ossd', 'vcmpgesd', 'vcmpgt_ossd',
+ 'vcmpgtsd', 'vcmptrue_uqsd', 'vcmptruesd', 'vcmpeq_ossd', 'vcmplt_oqsd',
+ 'vcmple_oqsd', 'vcmpunord_ssd', 'vcmpneq_ussd', 'vcmpnlt_uqsd',
+ 'vcmpnle_uqsd', 'vcmpord_ssd', 'vcmpeq_ussd', 'vcmpnge_uqsd',
+ 'vcmpngt_uqsd', 'vcmpfalse_ossd', 'vcmpneq_ossd', 'vcmpge_oqsd',
+ 'vcmpgt_oqsd', 'vcmptrue_ussd', 'vcmpsd', 'vcmpeq_osss', 'vcmpeqss',
+ 'vcmplt_osss', 'vcmpltss', 'vcmple_osss', 'vcmpless', 'vcmpunord_qss',
+ 'vcmpunordss', 'vcmpneq_uqss', 'vcmpneqss', 'vcmpnlt_usss', 'vcmpnltss',
+ 'vcmpnle_usss', 'vcmpnless', 'vcmpord_qss', 'vcmpordss', 'vcmpeq_uqss',
+ 'vcmpnge_usss', 'vcmpngess', 'vcmpngt_usss', 'vcmpngtss', 'vcmpfalse_oqss',
+ 'vcmpfalsess', 'vcmpneq_oqss', 'vcmpge_osss', 'vcmpgess', 'vcmpgt_osss',
+ 'vcmpgtss', 'vcmptrue_uqss', 'vcmptruess', 'vcmpeq_osss', 'vcmplt_oqss',
+ 'vcmple_oqss', 'vcmpunord_sss', 'vcmpneq_usss', 'vcmpnlt_uqss',
+ 'vcmpnle_uqss', 'vcmpord_sss', 'vcmpeq_usss', 'vcmpnge_uqss',
+ 'vcmpngt_uqss', 'vcmpfalse_osss', 'vcmpneq_osss', 'vcmpge_oqss',
+ 'vcmpgt_oqss', 'vcmptrue_usss', 'vcmpss', 'vcomisd', 'vcomiss',
+ 'vcvtdq2pd', 'vcvtdq2ps', 'vcvtpd2dq', 'vcvtpd2ps', 'vcvtps2dq',
+ 'vcvtps2pd', 'vcvtsd2si', 'vcvtsd2ss', 'vcvtsi2sd', 'vcvtsi2ss',
+ 'vcvtss2sd', 'vcvtss2si', 'vcvttpd2dq', 'vcvttps2dq', 'vcvttsd2si',
+ 'vcvttss2si', 'vdivpd', 'vdivps', 'vdivsd', 'vdivss', 'vdppd', 'vdpps',
+ 'vextractf128', 'vextractps', 'vhaddpd', 'vhaddps', 'vhsubpd', 'vhsubps',
+ 'vinsertf128', 'vinsertps', 'vlddqu', 'vldqqu', 'vlddqu', 'vldmxcsr',
+ 'vmaskmovdqu', 'vmaskmovps', 'vmaskmovpd', 'vmaxpd', 'vmaxps', 'vmaxsd',
+ 'vmaxss', 'vminpd', 'vminps', 'vminsd', 'vminss', 'vmovapd', 'vmovaps',
+ 'vmovd', 'vmovq', 'vmovddup', 'vmovdqa', 'vmovqqa', 'vmovdqa', 'vmovdqu',
+ 'vmovqqu', 'vmovdqu', 'vmovhlps', 'vmovhpd', 'vmovhps', 'vmovlhps',
+ 'vmovlpd', 'vmovlps', 'vmovmskpd', 'vmovmskps', 'vmovntdq', 'vmovntqq',
+ 'vmovntdq', 'vmovntdqa', 'vmovntpd', 'vmovntps', 'vmovsd', 'vmovshdup',
+ 'vmovsldup', 'vmovss', 'vmovupd', 'vmovups', 'vmpsadbw', 'vmulpd',
+ 'vmulps', 'vmulsd', 'vmulss', 'vorpd', 'vorps', 'vpabsb', 'vpabsw',
+ 'vpabsd', 'vpacksswb', 'vpackssdw', 'vpackuswb', 'vpackusdw', 'vpaddb',
+ 'vpaddw', 'vpaddd', 'vpaddq', 'vpaddsb', 'vpaddsw', 'vpaddusb', 'vpaddusw',
+ 'vpalignr', 'vpand', 'vpandn', 'vpavgb', 'vpavgw', 'vpblendvb', 'vpblendw',
+ 'vpcmpestri', 'vpcmpestrm', 'vpcmpistri', 'vpcmpistrm', 'vpcmpeqb',
+ 'vpcmpeqw', 'vpcmpeqd', 'vpcmpeqq', 'vpcmpgtb', 'vpcmpgtw', 'vpcmpgtd',
+ 'vpcmpgtq', 'vpermilpd', 'vpermilps', 'vperm2f128', 'vpextrb', 'vpextrw',
+ 'vpextrd', 'vpextrq', 'vphaddw', 'vphaddd', 'vphaddsw', 'vphminposuw',
+ 'vphsubw', 'vphsubd', 'vphsubsw', 'vpinsrb', 'vpinsrw', 'vpinsrd',
+ 'vpinsrq', 'vpmaddwd', 'vpmaddubsw', 'vpmaxsb', 'vpmaxsw', 'vpmaxsd',
+ 'vpmaxub', 'vpmaxuw', 'vpmaxud', 'vpminsb', 'vpminsw', 'vpminsd', 'vpminub',
+ 'vpminuw', 'vpminud', 'vpmovmskb', 'vpmovsxbw', 'vpmovsxbd', 'vpmovsxbq',
+ 'vpmovsxwd', 'vpmovsxwq', 'vpmovsxdq', 'vpmovzxbw', 'vpmovzxbd', 'vpmovzxbq',
+ 'vpmovzxwd', 'vpmovzxwq', 'vpmovzxdq', 'vpmulhuw', 'vpmulhrsw', 'vpmulhw',
+ 'vpmullw', 'vpmulld', 'vpmuludq', 'vpmuldq', 'vpor', 'vpsadbw', 'vpshufb',
+ 'vpshufd', 'vpshufhw', 'vpshuflw', 'vpsignb', 'vpsignw', 'vpsignd',
+ 'vpslldq', 'vpsrldq', 'vpsllw', 'vpslld', 'vpsllq', 'vpsraw', 'vpsrad',
+ 'vpsrlw', 'vpsrld', 'vpsrlq', 'vptest', 'vpsubb', 'vpsubw', 'vpsubd',
+ 'vpsubq', 'vpsubsb', 'vpsubsw', 'vpsubusb', 'vpsubusw', 'vpunpckhbw',
+ 'vpunpckhwd', 'vpunpckhdq', 'vpunpckhqdq', 'vpunpcklbw', 'vpunpcklwd',
+ 'vpunpckldq', 'vpunpcklqdq', 'vpxor', 'vrcpps', 'vrcpss', 'vrsqrtps',
+ 'vrsqrtss', 'vroundpd', 'vroundps', 'vroundsd', 'vroundss', 'vshufpd',
+ 'vshufps', 'vsqrtpd', 'vsqrtps', 'vsqrtsd', 'vsqrtss', 'vstmxcsr', 'vsubpd',
+ 'vsubps', 'vsubsd', 'vsubss', 'vtestps', 'vtestpd', 'vucomisd', 'vucomiss',
+ 'vunpckhpd', 'vunpckhps', 'vunpcklpd', 'vunpcklps', 'vxorpd', 'vxorps',
+ 'vzeroall', 'vzeroupper',
+ -- Intel Carry-Less Multiplication instructions (CLMUL).
+ 'pclmullqlqdq', 'pclmulhqlqdq', 'pclmullqhqdq', 'pclmulhqhqdq', 'pclmulqdq',
+ -- Intel AVX Carry-Less Multiplication instructions (CLMUL).
+ 'vpclmullqlqdq', 'vpclmulhqlqdq', 'vpclmullqhqdq', 'vpclmulhqhqdq',
+ 'vpclmulqdq',
+ -- Intel Fused Multiply-Add instructions (FMA).
+ 'vfmadd132ps', 'vfmadd132pd', 'vfmadd312ps', 'vfmadd312pd', 'vfmadd213ps',
+ 'vfmadd213pd', 'vfmadd123ps', 'vfmadd123pd', 'vfmadd231ps', 'vfmadd231pd',
+ 'vfmadd321ps', 'vfmadd321pd', 'vfmaddsub132ps', 'vfmaddsub132pd',
+ 'vfmaddsub312ps', 'vfmaddsub312pd', 'vfmaddsub213ps', 'vfmaddsub213pd',
+ 'vfmaddsub123ps', 'vfmaddsub123pd', 'vfmaddsub231ps', 'vfmaddsub231pd',
+ 'vfmaddsub321ps', 'vfmaddsub321pd', 'vfmsub132ps', 'vfmsub132pd',
+ 'vfmsub312ps', 'vfmsub312pd', 'vfmsub213ps', 'vfmsub213pd', 'vfmsub123ps',
+ 'vfmsub123pd', 'vfmsub231ps', 'vfmsub231pd', 'vfmsub321ps', 'vfmsub321pd',
+ 'vfmsubadd132ps', 'vfmsubadd132pd', 'vfmsubadd312ps', 'vfmsubadd312pd',
+ 'vfmsubadd213ps', 'vfmsubadd213pd', 'vfmsubadd123ps', 'vfmsubadd123pd',
+ 'vfmsubadd231ps', 'vfmsubadd231pd', 'vfmsubadd321ps', 'vfmsubadd321pd',
+ 'vfnmadd132ps', 'vfnmadd132pd', 'vfnmadd312ps', 'vfnmadd312pd',
+ 'vfnmadd213ps', 'vfnmadd213pd', 'vfnmadd123ps', 'vfnmadd123pd',
+ 'vfnmadd231ps', 'vfnmadd231pd', 'vfnmadd321ps', 'vfnmadd321pd',
+ 'vfnmsub132ps', 'vfnmsub132pd', 'vfnmsub312ps', 'vfnmsub312pd',
+ 'vfnmsub213ps', 'vfnmsub213pd', 'vfnmsub123ps', 'vfnmsub123pd',
+ 'vfnmsub231ps', 'vfnmsub231pd', 'vfnmsub321ps', 'vfnmsub321pd',
+ 'vfmadd132ss', 'vfmadd132sd', 'vfmadd312ss', 'vfmadd312sd', 'vfmadd213ss',
+ 'vfmadd213sd', 'vfmadd123ss', 'vfmadd123sd', 'vfmadd231ss', 'vfmadd231sd',
+ 'vfmadd321ss', 'vfmadd321sd', 'vfmsub132ss', 'vfmsub132sd', 'vfmsub312ss',
+ 'vfmsub312sd', 'vfmsub213ss', 'vfmsub213sd', 'vfmsub123ss', 'vfmsub123sd',
+ 'vfmsub231ss', 'vfmsub231sd', 'vfmsub321ss', 'vfmsub321sd', 'vfnmadd132ss',
+ 'vfnmadd132sd', 'vfnmadd312ss', 'vfnmadd312sd', 'vfnmadd213ss',
+ 'vfnmadd213sd', 'vfnmadd123ss', 'vfnmadd123sd', 'vfnmadd231ss',
+ 'vfnmadd231sd', 'vfnmadd321ss', 'vfnmadd321sd', 'vfnmsub132ss',
+ 'vfnmsub132sd', 'vfnmsub312ss', 'vfnmsub312sd', 'vfnmsub213ss',
+ 'vfnmsub213sd', 'vfnmsub123ss', 'vfnmsub123sd', 'vfnmsub231ss',
+ 'vfnmsub231sd', 'vfnmsub321ss', 'vfnmsub321sd',
+ -- Intel post-32 nm processor instructions.
+ 'rdfsbase', 'rdgsbase', 'rdrand', 'wrfsbase', 'wrgsbase', 'vcvtph2ps',
+ 'vcvtps2ph', 'adcx', 'adox', 'rdseed', 'clac', 'stac',
+ -- VIA (Centaur) security instructions.
+ 'xstore', 'xcryptecb', 'xcryptcbc', 'xcryptctr', 'xcryptcfb', 'xcryptofb',
+ 'montmul', 'xsha1', 'xsha256',
+ -- AMD Lightweight Profiling (LWP) instructions.
+ 'llwpcb', 'slwpcb', 'lwpval', 'lwpins',
+ -- AMD XOP and FMA4 instructions (SSE5).
+ 'vfmaddpd', 'vfmaddps', 'vfmaddsd', 'vfmaddss', 'vfmaddsubpd',
+ 'vfmaddsubps', 'vfmsubaddpd', 'vfmsubaddps', 'vfmsubpd', 'vfmsubps',
+ 'vfmsubsd', 'vfmsubss', 'vfnmaddpd', 'vfnmaddps', 'vfnmaddsd', 'vfnmaddss',
+ 'vfnmsubpd', 'vfnmsubps', 'vfnmsubsd', 'vfnmsubss', 'vfrczpd', 'vfrczps',
+ 'vfrczsd', 'vfrczss', 'vpcmov', 'vpcomb', 'vpcomd', 'vpcomq', 'vpcomub',
+ 'vpcomud', 'vpcomuq', 'vpcomuw', 'vpcomw', 'vphaddbd', 'vphaddbq',
+ 'vphaddbw', 'vphadddq', 'vphaddubd', 'vphaddubq', 'vphaddubw', 'vphaddudq',
+ 'vphadduwd', 'vphadduwq', 'vphaddwd', 'vphaddwq', 'vphsubbw', 'vphsubdq',
+ 'vphsubwd', 'vpmacsdd', 'vpmacsdqh', 'vpmacsdql', 'vpmacssdd', 'vpmacssdqh',
+ 'vpmacssdql', 'vpmacsswd', 'vpmacssww', 'vpmacswd', 'vpmacsww', 'vpmadcsswd',
+ 'vpmadcswd', 'vpperm', 'vprotb', 'vprotd', 'vprotq', 'vprotw', 'vpshab',
+ 'vpshad', 'vpshaq', 'vpshaw', 'vpshlb', 'vpshld', 'vpshlq', 'vpshlw',
+ -- Intel AVX2 instructions.
+ 'vmpsadbw', 'vpabsb', 'vpabsw', 'vpabsd', 'vpacksswb', 'vpackssdw',
+ 'vpackusdw', 'vpackuswb', 'vpaddb', 'vpaddw', 'vpaddd', 'vpaddq',
+ 'vpaddsb', 'vpaddsw', 'vpaddusb', 'vpaddusw', 'vpalignr', 'vpand',
+ 'vpandn', 'vpavgb', 'vpavgw', 'vpblendvb', 'vpblendw', 'vpcmpeqb',
+ 'vpcmpeqw', 'vpcmpeqd', 'vpcmpeqq', 'vpcmpgtb', 'vpcmpgtw', 'vpcmpgtd',
+ 'vpcmpgtq', 'vphaddw', 'vphaddd', 'vphaddsw', 'vphsubw', 'vphsubd',
+ 'vphsubsw', 'vpmaddubsw', 'vpmaddwd', 'vpmaxsb', 'vpmaxsw', 'vpmaxsd',
+ 'vpmaxub', 'vpmaxuw', 'vpmaxud', 'vpminsb', 'vpminsw', 'vpminsd', 'vpminub',
+ 'vpminuw', 'vpminud', 'vpmovmskb', 'vpmovsxbw', 'vpmovsxbd', 'vpmovsxbq',
+ 'vpmovsxwd', 'vpmovsxwq', 'vpmovsxdq', 'vpmovzxbw', 'vpmovzxbd', 'vpmovzxbq',
+ 'vpmovzxwd', 'vpmovzxwq', 'vpmovzxdq', 'vpmuldq', 'vpmulhrsw', 'vpmulhuw',
+ 'vpmulhw', 'vpmullw', 'vpmulld', 'vpmuludq', 'vpor', 'vpsadbw', 'vpshufb',
+ 'vpshufd', 'vpshufhw', 'vpshuflw', 'vpsignb', 'vpsignw', 'vpsignd',
+ 'vpslldq', 'vpsllw', 'vpslld', 'vpsllq', 'vpsraw', 'vpsrad', 'vpsrldq',
+ 'vpsrlw', 'vpsrld', 'vpsrlq', 'vpsubb', 'vpsubw', 'vpsubd', 'vpsubq',
+ 'vpsubsb', 'vpsubsw', 'vpsubusb', 'vpsubusw', 'vpunpckhbw', 'vpunpckhwd',
+ 'vpunpckhdq', 'vpunpckhqdq', 'vpunpcklbw', 'vpunpcklwd', 'vpunpckldq',
+ 'vpunpcklqdq', 'vpxor', 'vmovntdqa', 'vbroadcastss', 'vbroadcastsd',
+ 'vbroadcasti128', 'vpblendd', 'vpbroadcastb', 'vpbroadcastw', 'vpbroadcastd',
+ 'vpbroadcastq', 'vpermd', 'vpermpd', 'vpermps', 'vpermq', 'vperm2i128',
+ 'vextracti128', 'vinserti128', 'vpmaskmovd', 'vpmaskmovq', 'vpmaskmovd',
+ 'vpmaskmovq', 'vpsllvd', 'vpsllvq', 'vpsllvd', 'vpsllvq', 'vpsravd',
+ 'vpsrlvd', 'vpsrlvq', 'vpsrlvd', 'vpsrlvq', 'vgatherdpd', 'vgatherqpd',
+ 'vgatherdpd', 'vgatherqpd', 'vgatherdps', 'vgatherqps', 'vgatherdps',
+ 'vgatherqps', 'vpgatherdd', 'vpgatherqd', 'vpgatherdd', 'vpgatherqd',
+ 'vpgatherdq', 'vpgatherqq', 'vpgatherdq', 'vpgatherqq',
+ -- Transactional Synchronization Extensions (TSX).
+ 'xabort', 'xbegin', 'xend', 'xtest',
+ -- Intel BMI1 and BMI2 instructions, AMD TBM instructions.
+ 'andn', 'bextr', 'blci', 'blcic', 'blsi', 'blsic', 'blcfill', 'blsfill',
+ 'blcmsk', 'blsmsk', 'blsr', 'blcs', 'bzhi', 'mulx', 'pdep', 'pext', 'rorx',
+ 'sarx', 'shlx', 'shrx', 'tzcnt', 'tzmsk', 't1mskc',
+ -- Systematic names for the hinting nop instructions.
+ 'hint_nop0', 'hint_nop1', 'hint_nop2', 'hint_nop3', 'hint_nop4',
+ 'hint_nop5', 'hint_nop6', 'hint_nop7', 'hint_nop8', 'hint_nop9',
+ 'hint_nop10', 'hint_nop11', 'hint_nop12', 'hint_nop13', 'hint_nop14',
+ 'hint_nop15', 'hint_nop16', 'hint_nop17', 'hint_nop18', 'hint_nop19',
+ 'hint_nop20', 'hint_nop21', 'hint_nop22', 'hint_nop23', 'hint_nop24',
+ 'hint_nop25', 'hint_nop26', 'hint_nop27', 'hint_nop28', 'hint_nop29',
+ 'hint_nop30', 'hint_nop31', 'hint_nop32', 'hint_nop33', 'hint_nop34',
+ 'hint_nop35', 'hint_nop36', 'hint_nop37', 'hint_nop38', 'hint_nop39',
+ 'hint_nop40', 'hint_nop41', 'hint_nop42', 'hint_nop43', 'hint_nop44',
+ 'hint_nop45', 'hint_nop46', 'hint_nop47', 'hint_nop48', 'hint_nop49',
+ 'hint_nop50', 'hint_nop51', 'hint_nop52', 'hint_nop53', 'hint_nop54',
+ 'hint_nop55', 'hint_nop56', 'hint_nop57', 'hint_nop58', 'hint_nop59',
+ 'hint_nop60', 'hint_nop61', 'hint_nop62', 'hint_nop63',
+})
+
+-- Types.
+local sizes = word_match{
+ 'byte', 'word', 'dword', 'qword', 'tword', 'oword', 'yword',
+ 'a16', 'a32', 'a64', 'o16', 'o32', 'o64' -- instructions
+}
+local wrt_types = '..' * word_match{
+ 'start', 'gotpc', 'gotoff', 'gottpoff', 'got', 'plt', 'sym', 'tlsie'
+}
+local type = token(l.TYPE, sizes + wrt_types)
+
+-- Registers.
+local register = token('register', word_match{
+ -- 32-bit registers.
+ 'ah', 'al', 'ax', 'bh', 'bl', 'bp', 'bx', 'ch', 'cl', 'cx', 'dh', 'di', 'dl',
+ 'dx', 'eax', 'ebx', 'ebx', 'ecx', 'edi', 'edx', 'esi', 'esp', 'fs', 'mm0',
+ 'mm1', 'mm2', 'mm3', 'mm4', 'mm5', 'mm6', 'mm7', 'si', 'st0', 'st1', 'st2',
+ 'st3', 'st4', 'st5', 'st6', 'st7', 'xmm0', 'xmm1', 'xmm2', 'xmm3', 'xmm4',
+ 'xmm5', 'xmm6', 'xmm7', 'ymm0', 'ymm1', 'ymm2', 'ymm3', 'ymm4', 'ymm5',
+ 'ymm6', 'ymm7',
+ -- 64-bit registers.
+ 'bpl', 'dil', 'gs', 'r8', 'r8b', 'r8w', 'r9', 'r9b', 'r9w', 'r10', 'r10b',
+ 'r10w', 'r11', 'r11b', 'r11w', 'r12', 'r12b', 'r12w', 'r13', 'r13b', 'r13w',
+ 'r14', 'r14b', 'r14w', 'r15', 'r15b', 'r15w', 'rax', 'rbp', 'rbx', 'rcx',
+ 'rdi', 'rdx', 'rsi', 'rsp', 'sil', 'xmm8', 'xmm9', 'xmm10', 'xmm11', 'xmm12',
+ 'xmm13', 'xmm14', 'xmm15', 'ymm8', 'ymm9', 'ymm10', 'ymm11', 'ymm12', 'ymm13',
+ 'ymm14', 'ymm15'
+})
+
+local word = (l.alpha + S('$._?')) * (l.alnum + S('$._?#@~'))^0
+
+-- Labels.
+local label = token(l.LABEL, word * ':')
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, word)
+
+-- Constants.
+local constants = word_match{
+ '__float8__', '__float16__', '__float32__', '__float64__', '__float80m__',
+ '__float80e__', '__float128l__', '__float128h__', '__Infinity__', '__QNaN__',
+ '__NaN__', '__SNaN__'
+}
+local constant = token(l.CONSTANT, constants + '$' * P('$')^-1 * -identifier)
+
+-- Operators.
+local operator = token(l.OPERATOR, S('+-/*%<>!=^&|~:,()[]'))
+
+M._rules = {
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'instruction', instruction},
+ {'register', register},
+ {'type', type},
+ {'constant', constant},
+ {'label', label},
+ {'identifier', identifier},
+ {'string', string},
+ {'comment', comment},
+ {'number', number},
+ {'preproc', preproc},
+ {'operator', operator},
+}
+
+M._tokenstyles = {
+ instruction = l.STYLE_FUNCTION,
+ register = l.STYLE_CONSTANT,
+}
+
+M._foldsymbols = {
+ _patterns = {'%l+', '//'},
+ [l.PREPROCESSOR] = {
+ ['if'] = 1, endif = -1, macro = 1, endmacro = -1, rep = 1, endrep = -1,
+ ['while'] = 1, endwhile = -1,
+ },
+ [l.KEYWORD] = {struc = 1, endstruc = -1},
+ [l.COMMENT] = {['//'] = l.fold_line_comments('//')}
+}
+
+return M
diff --git a/lexers/asp.lua b/lexers/asp.lua
new file mode 100644
index 0000000..044966c
--- /dev/null
+++ b/lexers/asp.lua
@@ -0,0 +1,42 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- ASP LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'asp'}
+
+-- Embedded in HTML.
+local html = l.load('html')
+
+-- Embedded VB.
+local vb = l.load('vb')
+local vb_start_rule = token('asp_tag', '<%' * P('=')^-1)
+local vb_end_rule = token('asp_tag', '%>')
+l.embed_lexer(html, vb, vb_start_rule, vb_end_rule)
+
+-- Embedded VBScript.
+local vbs = l.load('vbscript')
+local script_element = word_match({'script'}, nil, html.case_insensitive_tags)
+local vbs_start_rule = #(P('<') * script_element * (P(function(input, index)
+ if input:find('^%s+language%s*=%s*(["\'])vbscript%1', index) or
+ input:find('^%s+type%s*=%s*(["\'])text/vbscript%1', index) then
+ return index
+ end
+end) + '>')) * html.embed_start_tag -- <script language="vbscript">
+local vbs_end_rule = #('</' * script_element * l.space^0 * '>') *
+ html.embed_end_tag -- </script>
+l.embed_lexer(html, vbs, vbs_start_rule, vbs_end_rule)
+
+M._tokenstyles = {
+ asp_tag = l.STYLE_EMBEDDED
+}
+
+local _foldsymbols = html._foldsymbols
+_foldsymbols._patterns[#_foldsymbols._patterns + 1] = '<%%'
+_foldsymbols._patterns[#_foldsymbols._patterns + 1] = '%%>'
+_foldsymbols.asp_tag = {['<%'] = 1, ['%>'] = -1}
+M._foldsymbols = _foldsymbols
+
+return M
diff --git a/lexers/awk.lua b/lexers/awk.lua
new file mode 100644
index 0000000..7948d21
--- /dev/null
+++ b/lexers/awk.lua
@@ -0,0 +1,334 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- AWK LPeg lexer.
+-- Modified by Wolfgang Seeberg 2012, 2013.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'awk'}
+
+local LEFTBRACKET = '['
+local RIGHTBRACKET = ']'
+local SLASH = '/'
+local BACKSLASH = '\\'
+local CARET = '^'
+local CR = '\r'
+local LF = '\n'
+local CRLF = CR .. LF
+local DQUOTE = '"'
+local DELIMITER_MATCHES = {['('] = ')', ['['] = ']'}
+local COMPANION = {['('] = '[', ['['] = '('}
+local CC = {
+ alnum = 1, alpha = 1, blank = 1, cntrl = 1, digit = 1, graph = 1, lower = 1,
+ print = 1, punct = 1, space = 1, upper = 1, xdigit = 1
+}
+local LastRegexEnd = 0
+local BackslashAtCommentEnd = 0
+local KW_BEFORE_RX = {
+ case = 1, ['do'] = 1, ['else'] = 1, exit = 1, print = 1, printf = 1,
+ ['return'] = 1
+}
+
+local function findKeyword(input, e)
+ local i = e
+ while i > 0 and input:find("^[%l]", i) do i = i - 1 end
+ local w = input:sub(i + 1, e)
+ if i == 0 then
+ return KW_BEFORE_RX[w] == 1
+ elseif input:find("^[%u%d_]", i) then
+ return false
+ else
+ return KW_BEFORE_RX[w] == 1
+ end
+end
+
+local function isRegex(input, i)
+ while i >= 1 and input:find('^[ \t]', i) do i = i - 1 end
+ if i < 1 then return true end
+ if input:find("^[-!%%&(*+,:;<=>?[^{|}~\f]", i) or findKeyword(input, i) then
+ return true
+ elseif input:sub(i, i) == SLASH then
+ return i ~= LastRegexEnd -- deals with /xx/ / /yy/.
+ elseif input:find('^[]%w)."]', i) then
+ return false
+ elseif input:sub(i, i) == LF then
+ if i == 1 then return true end
+ i = i - 1
+ if input:sub(i, i) == CR then
+ if i == 1 then return true end
+ i = i - 1
+ end
+ elseif input:sub(i, i) == CR then
+ if i == 1 then return true end
+ i = i - 1
+ else
+ return false
+ end
+ if input:sub(i, i) == BACKSLASH and i ~= BackslashAtCommentEnd then
+ return isRegex(input, i - 1)
+ else
+ return true
+ end
+end
+
+local function eatCharacterClass(input, s, e)
+ local i = s
+ while i <= e do
+ if input:find('^[\r\n]', i) then
+ return false
+ elseif input:sub(i, i + 1) == ':]' then
+ local str = input:sub(s, i - 1)
+ return CC[str] == 1 and i + 1
+ end
+ i = i + 1
+ end
+ return false
+end
+
+local function eatBrackets(input, i, e)
+ if input:sub(i, i) == CARET then i = i + 1 end
+ if input:sub(i, i) == RIGHTBRACKET then i = i + 1 end
+ while i <= e do
+ if input:find('^[\r\n]', i) then
+ return false
+ elseif input:sub(i, i) == RIGHTBRACKET then
+ return i
+ elseif input:sub(i, i + 1) == '[:' then
+ i = eatCharacterClass(input, i + 2, e)
+ if not i then return false end
+ elseif input:sub(i, i) == BACKSLASH then
+ i = i + 1
+ if input:sub(i, i + 1) == CRLF then i = i + 1 end
+ end
+ i = i + 1
+ end
+ return false
+end
+
+local function eatRegex(input, i)
+ local e = #input
+ while i <= e do
+ if input:find('^[\r\n]', i) then
+ return false
+ elseif input:sub(i, i) == SLASH then
+ LastRegexEnd = i
+ return i
+ elseif input:sub(i, i) == LEFTBRACKET then
+ i = eatBrackets(input, i + 1, e)
+ if not i then return false end
+ elseif input:sub(i, i) == BACKSLASH then
+ i = i + 1
+ if input:sub(i, i + 1) == CRLF then i = i + 1 end
+ end
+ i = i + 1
+ end
+ return false
+end
+
+local ScanRegexResult
+local function scanGawkRegex(input, index)
+ if isRegex(input, index - 2) then
+ local i = eatRegex(input, index)
+ if not i then
+ ScanRegexResult = false
+ return false
+ end
+ local rx = input:sub(index - 1, i)
+ for bs in rx:gmatch("[^\\](\\+)[BSsWwy<>`']") do
+ -- /\S/ is special, but /\\S/ is not.
+ if #bs % 2 == 1 then return i + 1 end
+ end
+ ScanRegexResult = i + 1
+ else
+ ScanRegexResult = false
+ end
+ return false
+end
+-- Is only called immediately after scanGawkRegex().
+local function scanRegex()
+ return ScanRegexResult
+end
+
+local function scanString(input, index)
+ local i = index
+ local e = #input
+ while i <= e do
+ if input:find('^[\r\n]', i) then
+ return false
+ elseif input:sub(i, i) == DQUOTE then
+ return i + 1
+ elseif input:sub(i, i) == BACKSLASH then
+ i = i + 1
+ -- l.delimited_range() doesn't handle CRLF.
+ if input:sub(i, i + 1) == CRLF then i = i + 1 end
+ end
+ i = i + 1
+ end
+ return false
+end
+
+-- purpose: prevent isRegex() from entering a comment line that ends with a
+-- backslash.
+local function scanComment(input, index)
+ local _, i = input:find('[^\r\n]*', index)
+ if input:sub(i, i) == BACKSLASH then BackslashAtCommentEnd = i end
+ return i + 1
+end
+
+local function scanFieldDelimiters(input, index)
+ local i = index
+ local e = #input
+ local left = input:sub(i - 1, i - 1)
+ local count = 1
+ local right = DELIMITER_MATCHES[left]
+ local left2 = COMPANION[left]
+ local count2 = 0
+ local right2 = DELIMITER_MATCHES[left2]
+ while i <= e do
+ if input:find('^[#\r\n]', i) then
+ return false
+ elseif input:sub(i, i) == right then
+ count = count - 1
+ if count == 0 then return count2 == 0 and i + 1 end
+ elseif input:sub(i, i) == left then
+ count = count + 1
+ elseif input:sub(i, i) == right2 then
+ count2 = count2 - 1
+ if count2 < 0 then return false end
+ elseif input:sub(i, i) == left2 then
+ count2 = count2 + 1
+ elseif input:sub(i, i) == DQUOTE then
+ i = scanString(input, i + 1)
+ if not i then return false end
+ i = i - 1
+ elseif input:sub(i, i) == SLASH then
+ if isRegex(input, i - 1) then
+ i = eatRegex(input, i + 1)
+ if not i then return false end
+ end
+ elseif input:sub(i, i) == BACKSLASH then
+ if input:sub(i + 1, i + 2) == CRLF then
+ i = i + 2
+ elseif input:find('^[\r\n]', i + 1) then
+ i = i + 1
+ end
+ end
+ i = i + 1
+ end
+ return false
+end
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local comment = token(l.COMMENT, '#' * P(scanComment))
+
+-- Strings.
+local string = token(l.STRING, DQUOTE * P(scanString))
+
+-- Regular expressions.
+-- Slash delimited regular expressions are preceded by most operators or
+-- the keywords 'print' and 'case', possibly on a preceding line. They
+-- can contain unescaped slashes and brackets in brackets. Some escape
+-- sequences like '\S', '\s' have special meanings with Gawk. Tokens that
+-- contain them are displayed differently.
+local regex = token(l.REGEX, SLASH * P(scanRegex))
+local gawkRegex = token('gawkRegex', SLASH * P(scanGawkRegex))
+
+-- no leading sign because it might be binary.
+local float = ((l.digit ^ 1 * ('.' * l.digit ^ 0) ^ -1) +
+ ('.' * l.digit ^ 1)) * (S('eE') * S('+-') ^ -1 * l.digit ^ 1) ^ -1
+-- Numbers.
+local number = token(l.NUMBER, float)
+local gawkNumber = token('gawkNumber', l.hex_num + l.oct_num)
+
+-- Operators.
+local operator = token(l.OPERATOR, S('!%&()*+,-/:;<=>?[\\]^{|}~'))
+local gawkOperator = token('gawkOperator', P("|&") + "@" + "**=" + "**")
+
+-- Fields. E.g. $1, $a, $(x), $a(x), $a[x], $"1", $$a, etc.
+local field = token('field', P('$') * S('$+-') ^ 0 *
+ (float + (l.word ^ 0 * '(' * P(scanFieldDelimiters)) +
+ (l.word ^ 1 * ('[' * P(scanFieldDelimiters)) ^ -1) +
+ ('"' * P(scanString)) + ('/' * P(eatRegex) * '/')))
+
+-- Functions.
+local func = token(l.FUNCTION, l.word * #P('('))
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.word)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match{
+ 'BEGIN', 'END', 'atan2', 'break', 'close', 'continue', 'cos', 'delete', 'do',
+ 'else', 'exit', 'exp', 'fflush', 'for', 'function', 'getline', 'gsub', 'if',
+ 'in', 'index', 'int', 'length', 'log', 'match', 'next', 'nextfile', 'print',
+ 'printf', 'rand', 'return', 'sin', 'split', 'sprintf', 'sqrt', 'srand', 'sub',
+ 'substr', 'system', 'tolower', 'toupper', 'while'
+})
+
+local gawkKeyword = token('gawkKeyword', word_match{
+ 'BEGINFILE', 'ENDFILE', 'adump', 'and', 'asort', 'asorti', 'bindtextdomain',
+ 'case', 'compl', 'dcgettext', 'dcngettext', 'default', 'extension', 'func',
+ 'gensub', 'include', 'isarray', 'load', 'lshift', 'mktime', 'or', 'patsplit',
+ 'rshift', 'stopme', 'strftime', 'strtonum', 'switch', 'systime', 'xor'
+})
+
+local builtInVariable = token('builtInVariable', word_match{
+ 'ARGC', 'ARGV', 'CONVFMT', 'ENVIRON', 'FILENAME', 'FNR', 'FS', 'NF', 'NR',
+ 'OFMT', 'OFS', 'ORS', 'RLENGTH', 'RS', 'RSTART', 'SUBSEP'
+})
+
+local gawkBuiltInVariable = token('gawkBuiltInVariable', word_match {
+ 'ARGIND', 'BINMODE', 'ERRNO', 'FIELDWIDTHS', 'FPAT', 'FUNCTAB', 'IGNORECASE',
+ 'LINT', 'PREC', 'PROCINFO', 'ROUNDMODE', 'RT', 'SYMTAB', 'TEXTDOMAIN'
+})
+
+-- Within each group order matters, but the groups themselves (except the
+-- last) can be in any order.
+M._rules = {
+ {'whitespace', ws},
+
+ {'comment', comment},
+
+ {'string', string},
+
+ {'field', field},
+
+ {'gawkRegex', gawkRegex},
+ {'regex', regex},
+ {'gawkOperator', gawkOperator},
+ {'operator', operator},
+
+ {'gawkNumber', gawkNumber},
+ {'number', number},
+
+ {'keyword', keyword},
+ {'builtInVariable', builtInVariable},
+ {'gawkKeyword', gawkKeyword},
+ {'gawkBuiltInVariable', gawkBuiltInVariable},
+ {'function', func},
+ {'identifier', identifier},
+}
+
+M._tokenstyles = {
+ builtInVariable = l.STYLE_CONSTANT,
+ default = l.STYLE_ERROR,
+ field = l.STYLE_LABEL,
+ gawkBuiltInVariable = l.STYLE_CONSTANT..',underlined',
+ gawkKeyword = l.STYLE_KEYWORD..',underlined',
+ gawkNumber = l.STYLE_NUMBER..',underlined',
+ gawkOperator = l.STYLE_OPERATOR..',underlined',
+ gawkRegex = l.STYLE_PREPROCESSOR..',underlined',
+ regex = l.STYLE_PREPROCESSOR
+}
+
+M._foldsymbols = {
+ _patterns = {'[{}]', '#'},
+ [l.OPERATOR] = {['{'] = 1, ['}'] = -1},
+ [l.COMMENT] = {['#'] = l.fold_line_comments('#')}
+}
+
+return M
diff --git a/lexers/bash.lua b/lexers/bash.lua
new file mode 100644
index 0000000..f15e51a
--- /dev/null
+++ b/lexers/bash.lua
@@ -0,0 +1,74 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Shell LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'bash'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local comment = token(l.COMMENT, '#' * l.nonnewline^0)
+
+-- Strings.
+local sq_str = l.delimited_range("'", false, true)
+local dq_str = l.delimited_range('"')
+local ex_str = l.delimited_range('`')
+local heredoc = '<<' * P(function(input, index)
+ local s, e, _, delimiter =
+ input:find('%-?(["\']?)([%a_][%w_]*)%1[\n\r\f;]+', index)
+ if s == index and delimiter then
+ local _, e = input:find('[\n\r\f]+'..delimiter, e)
+ return e and e + 1 or #input + 1
+ end
+end)
+local string = token(l.STRING, sq_str + dq_str + ex_str + heredoc)
+
+-- Numbers.
+local number = token(l.NUMBER, l.float + l.integer)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match({
+ 'if', 'then', 'elif', 'else', 'fi', 'case', 'in', 'esac', 'while', 'for',
+ 'do', 'done', 'continue', 'local', 'return', 'select',
+ -- Operators.
+ '-a', '-b', '-c', '-d', '-e', '-f', '-g', '-h', '-k', '-p', '-r', '-s', '-t',
+ '-u', '-w', '-x', '-O', '-G', '-L', '-S', '-N', '-nt', '-ot', '-ef', '-o',
+ '-z', '-n', '-eq', '-ne', '-lt', '-le', '-gt', '-ge'
+}, '-'))
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.word)
+
+-- Variables.
+local variable = token(l.VARIABLE,
+ '$' * (S('!#?*@$') + l.digit^1 + l.word +
+ l.delimited_range('{}', true, true)))
+
+-- Operators.
+local operator = token(l.OPERATOR, S('=!<>+-/*^&|~.,:;?()[]{}'))
+
+M._rules = {
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'identifier', identifier},
+ {'string', string},
+ {'comment', comment},
+ {'number', number},
+ {'variable', variable},
+ {'operator', operator},
+}
+
+M._foldsymbols = {
+ _patterns = {'[a-z]+', '[{}]', '#'},
+ [l.KEYWORD] = {
+ ['if'] = 1, fi = -1, case = 1, esac = -1, ['do'] = 1, done = -1
+ },
+ [l.OPERATOR] = {['{'] = 1, ['}'] = -1},
+ [l.COMMENT] = {['#'] = l.fold_line_comments('#')}
+}
+
+return M
diff --git a/lexers/batch.lua b/lexers/batch.lua
new file mode 100644
index 0000000..d3e1ece
--- /dev/null
+++ b/lexers/batch.lua
@@ -0,0 +1,71 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Batch LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'batch'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local rem = (P('REM') + 'rem') * l.space
+local comment = token(l.COMMENT, (rem + '::') * l.nonnewline^0)
+
+-- Strings.
+local string = token(l.STRING, l.delimited_range('"', true))
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match({
+ 'cd', 'chdir', 'md', 'mkdir', 'cls', 'for', 'if', 'echo', 'echo.', 'move',
+ 'copy', 'ren', 'del', 'set', 'call', 'exit', 'setlocal', 'shift',
+ 'endlocal', 'pause', 'defined', 'exist', 'errorlevel', 'else', 'in', 'do',
+ 'NUL', 'AUX', 'PRN', 'not', 'goto', 'pushd', 'popd'
+}, nil, true))
+
+-- Functions.
+local func = token(l.FUNCTION, word_match({
+ 'APPEND', 'ATTRIB', 'CHKDSK', 'CHOICE', 'DEBUG', 'DEFRAG', 'DELTREE',
+ 'DISKCOMP', 'DISKCOPY', 'DOSKEY', 'DRVSPACE', 'EMM386', 'EXPAND', 'FASTOPEN',
+ 'FC', 'FDISK', 'FIND', 'FORMAT', 'GRAPHICS', 'KEYB', 'LABEL', 'LOADFIX',
+ 'MEM', 'MODE', 'MORE', 'MOVE', 'MSCDEX', 'NLSFUNC', 'POWER', 'PRINT', 'RD',
+ 'REPLACE', 'RESTORE', 'SETVER', 'SHARE', 'SORT', 'SUBST', 'SYS', 'TREE',
+ 'UNDELETE', 'UNFORMAT', 'VSAFE', 'XCOPY'
+}, nil, true))
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.word)
+
+-- Variables.
+local variable = token(l.VARIABLE,
+ '%' * (l.digit + '%' * l.alpha) +
+ l.delimited_range('%', true, true))
+
+-- Operators.
+local operator = token(l.OPERATOR, S('+|&!<>='))
+
+-- Labels.
+local label = token(l.LABEL, ':' * l.word)
+
+M._rules = {
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'function', func},
+ {'comment', comment},
+ {'identifier', identifier},
+ {'string', string},
+ {'variable', variable},
+ {'label', label},
+ {'operator', operator},
+}
+
+M._LEXBYLINE = true
+
+M._foldsymbols = {
+ _patterns = {'[A-Za-z]+'},
+ [l.KEYWORD] = {setlocal = 1, endlocal = -1, SETLOCAL = 1, ENDLOCAL = -1}
+}
+
+return M
diff --git a/lexers/bibtex.lua b/lexers/bibtex.lua
new file mode 100644
index 0000000..992667a
--- /dev/null
+++ b/lexers/bibtex.lua
@@ -0,0 +1,58 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Bibtex LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'bibtex'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Strings.
+local string = token(l.STRING, l.delimited_range('"') +
+ l.delimited_range('{}', false, true, true))
+
+-- Fields.
+local field = token('field', word_match{
+ 'author', 'title', 'journal', 'year', 'volume', 'number', 'pages', 'month',
+ 'note', 'key', 'publisher', 'editor', 'series', 'address', 'edition',
+ 'howpublished', 'booktitle', 'organization', 'chapter', 'school',
+ 'institution', 'type', 'isbn', 'issn', 'affiliation', 'issue', 'keyword',
+ 'url'
+})
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.word)
+
+-- Operators.
+local operator = token(l.OPERATOR, S(',='))
+
+M._rules = {
+ {'whitespace', ws},
+ {'field', field},
+ {'identifier', identifier},
+ {'string', string},
+ {'operator', operator},
+}
+
+-- Embedded in Latex.
+local latex = l.load('latex')
+
+-- Embedded Bibtex.
+local entry = token('entry', P('@') * word_match({
+ 'book', 'article', 'booklet', 'conference', 'inbook', 'incollection',
+ 'inproceedings', 'manual', 'mastersthesis', 'lambda', 'misc', 'phdthesis',
+ 'proceedings', 'techreport', 'unpublished'
+}, nil, true))
+local bibtex_start_rule = entry * ws^0 * token(l.OPERATOR, P('{'))
+local bibtex_end_rule = token(l.OPERATOR, P('}'))
+l.embed_lexer(latex, M, bibtex_start_rule, bibtex_end_rule)
+
+M._tokenstyles = {
+ field = l.STYLE_CONSTANT,
+ entry = l.STYLE_PREPROCESSOR
+}
+
+return M
diff --git a/lexers/boo.lua b/lexers/boo.lua
new file mode 100644
index 0000000..54ad1a6
--- /dev/null
+++ b/lexers/boo.lua
@@ -0,0 +1,81 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Boo LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'boo'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local line_comment = '#' * l.nonnewline_esc^0
+local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1
+local comment = token(l.COMMENT, line_comment + block_comment)
+
+-- Strings.
+local sq_str = l.delimited_range("'", true)
+local dq_str = l.delimited_range('"', true)
+local triple_dq_str = '"""' * (l.any - '"""')^0 * P('"""')^-1
+local regex_str = #('/') * l.last_char_includes('!%^&*([{-=+|:;,?<>~') *
+ l.delimited_range('/', true)
+local string = token(l.STRING, triple_dq_str + sq_str + dq_str) +
+ token(l.REGEX, regex_str)
+
+
+-- Numbers.
+local number = token(l.NUMBER, (l.float + l.integer) *
+ (S('msdhsfFlL') + 'ms')^-1)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match{
+ 'and', 'break', 'cast', 'continue', 'elif', 'else', 'ensure', 'except', 'for',
+ 'given', 'goto', 'if', 'in', 'isa', 'is', 'not', 'or', 'otherwise', 'pass',
+ 'raise', 'ref', 'try', 'unless', 'when', 'while',
+ -- Definitions.
+ 'abstract', 'callable', 'class', 'constructor', 'def', 'destructor', 'do',
+ 'enum', 'event', 'final', 'get', 'interface', 'internal', 'of', 'override',
+ 'partial', 'private', 'protected', 'public', 'return', 'set', 'static',
+ 'struct', 'transient', 'virtual', 'yield',
+ -- Namespaces.
+ 'as', 'from', 'import', 'namespace',
+ -- Other.
+ 'self', 'super', 'null', 'true', 'false'
+})
+
+-- Types.
+local type = token(l.TYPE, word_match{
+ 'bool', 'byte', 'char', 'date', 'decimal', 'double', 'duck', 'float', 'int',
+ 'long', 'object', 'operator', 'regex', 'sbyte', 'short', 'single', 'string',
+ 'timespan', 'uint', 'ulong', 'ushort'
+})
+
+-- Functions.
+local func = token(l.FUNCTION, word_match{
+ 'array', 'assert', 'checked', 'enumerate', '__eval__', 'filter', 'getter',
+ 'len', 'lock', 'map', 'matrix', 'max', 'min', 'normalArrayIndexing', 'print',
+ 'property', 'range', 'rawArrayIndexing', 'required', '__switch__', 'typeof',
+ 'unchecked', 'using', 'yieldAll', 'zip'
+})
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.word)
+
+-- Operators.
+local operator = token(l.OPERATOR, S('!%^&*()[]{}-=+/|:;.,?<>~`'))
+
+M._rules = {
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'type', type},
+ {'function', func},
+ {'identifier', identifier},
+ {'string', string},
+ {'comment', comment},
+ {'number', number},
+ {'operator', operator},
+}
+
+return M
diff --git a/lexers/caml.lua b/lexers/caml.lua
new file mode 100644
index 0000000..b701306
--- /dev/null
+++ b/lexers/caml.lua
@@ -0,0 +1,83 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- OCaml LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'caml'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local comment = token(l.COMMENT, l.nested_pair('(*', '*)'))
+
+-- Strings.
+local sq_str = l.delimited_range("'", true)
+local dq_str = l.delimited_range('"', true)
+local string = token(l.STRING, sq_str + dq_str)
+
+-- Numbers.
+local number = token(l.NUMBER, l.float + l.integer)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match{
+ 'and', 'as', 'asr', 'begin', 'class', 'closed', 'constraint', 'do', 'done',
+ 'downto', 'else', 'end', 'exception', 'external', 'failwith', 'false',
+ 'flush', 'for', 'fun', 'function', 'functor', 'if', 'in', 'include',
+ 'inherit', 'incr', 'land', 'let', 'load', 'los', 'lsl', 'lsr', 'lxor',
+ 'match', 'method', 'mod', 'module', 'mutable', 'new', 'not', 'of', 'open',
+ 'option', 'or', 'parser', 'private', 'ref', 'rec', 'raise', 'regexp', 'sig',
+ 'struct', 'stdout', 'stdin', 'stderr', 'then', 'to', 'true', 'try', 'type',
+ 'val', 'virtual', 'when', 'while', 'with'
+})
+
+-- Types.
+local type = token(l.TYPE, word_match{
+ 'int', 'float', 'bool', 'char', 'string', 'unit'
+})
+
+-- Functions.
+local func = token(l.FUNCTION, word_match{
+ 'raise', 'invalid_arg', 'failwith', 'compare', 'min', 'max', 'succ', 'pred',
+ 'mod', 'abs', 'max_int', 'min_int', 'sqrt', 'exp', 'log', 'log10', 'cos',
+ 'sin', 'tan', 'acos', 'asin', 'atan', 'atan2', 'cosh', 'sinh', 'tanh', 'ceil',
+ 'floor', 'abs_float', 'mod_float', 'frexp', 'ldexp', 'modf', 'float',
+ 'float_of_int', 'truncate', 'int_of_float', 'infinity', 'nan', 'max_float',
+ 'min_float', 'epsilon_float', 'classify_float', 'int_of_char', 'char_of_int',
+ 'ignore', 'string_of_bool', 'bool_of_string', 'string_of_int',
+ 'int_of_string', 'string_of_float', 'float_of_string', 'fst', 'snd', 'stdin',
+ 'stdout', 'stderr', 'print_char', 'print_string', 'print_int', 'print_float',
+ 'print_endline', 'print_newline', 'prerr_char', 'prerr_string', 'prerr_int',
+ 'prerr_float', 'prerr_endline', 'prerr_newline', 'read_line', 'read_int',
+ 'read_float', 'open_out', 'open_out_bin', 'open_out_gen', 'flush',
+ 'flush_all', 'output_char', 'output_string', 'output', 'output_byte',
+ 'output_binary_int', 'output_value', 'seek_out', 'pos_out',
+ 'out_channel_length', 'close_out', 'close_out_noerr', 'set_binary_mode_out',
+ 'open_in', 'open_in_bin', 'open_in_gen', 'input_char', 'input_line', 'input',
+ 'really_input', 'input_byte', 'input_binary_int', 'input_value', 'seek_in',
+ 'pos_in', 'in_channel_length', 'close_in', 'close_in_noerr',
+ 'set_binary_mode_in', 'incr', 'decr', 'string_of_format', 'format_of_string',
+ 'exit', 'at_exit'
+})
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.word)
+
+-- Operators.
+local operator = token(l.OPERATOR, S('=<>+-*/.,:;~!#%^&|?[](){}'))
+
+M._rules = {
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'type', type},
+ {'function', func},
+ {'identifier', identifier},
+ {'string', string},
+ {'comment', comment},
+ {'number', number},
+ {'operator', operator},
+}
+
+return M
diff --git a/lexers/chuck.lua b/lexers/chuck.lua
new file mode 100644
index 0000000..3efe704
--- /dev/null
+++ b/lexers/chuck.lua
@@ -0,0 +1,115 @@
+--------------------------------------------------------------------------------
+-- The MIT License
+--
+-- Copyright (c) 2010 Martin Morawetz
+--
+-- Permission is hereby granted, free of charge, to any person obtaining a copy
+-- of this software and associated documentation files (the "Software"), to deal
+-- in the Software without restriction, including without limitation the rights
+-- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+-- copies of the Software, and to permit persons to whom the Software is
+-- furnished to do so, subject to the following conditions:
+--
+-- The above copyright notice and this permission notice shall be included in
+-- all copies or substantial portions of the Software.
+--
+-- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+-- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+-- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+-- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+-- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+-- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+-- THE SOFTWARE.
+--------------------------------------------------------------------------------
+
+-- Based on lexer code from Mitchell mitchell.att.foicica.com.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'chuck'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local line_comment = '//' * l.nonnewline_esc^0
+local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1
+local comment = token(l.COMMENT, line_comment + block_comment)
+
+-- Strings.
+local sq_str = P('L')^-1 * l.delimited_range("'", true)
+local dq_str = P('L')^-1 * l.delimited_range('"', true)
+local string = token(l.STRING, sq_str + dq_str)
+
+-- Numbers.
+local number = token(l.NUMBER, l.float + l.integer)
+
+-- Constants.
+local constant = token(l.CONSTANT, word_match{
+ -- special values
+ 'false', 'maybe', 'me', 'null', 'NULL', 'pi', 'true'
+})
+
+-- Special special value.
+local now = token('now', P('now'))
+
+-- Times.
+local time = token('time', word_match{
+ 'samp', 'ms', 'second', 'minute', 'hour', 'day', 'week'
+})
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match{
+ -- Control structures.
+ 'break', 'continue', 'else', 'for', 'if', 'repeat', 'return', 'switch',
+ 'until', 'while',
+ -- Other chuck keywords.
+ 'function', 'fun', 'spork', 'const', 'new'
+})
+
+-- Classes.
+local class = token(l.CLASS, word_match{
+ -- Class keywords.
+ 'class', 'extends', 'implements', 'interface', 'private', 'protected',
+ 'public', 'pure', 'super', 'static', 'this'
+})
+
+-- Types.
+local types = token(l.TYPE, word_match{
+ 'float', 'int', 'time', 'dur', 'void', 'same'
+})
+
+-- Global ugens.
+local ugen = token('ugen', word_match{'dac', 'adc', 'blackhole'})
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.word)
+
+-- Operators.
+local operator = token(l.OPERATOR, S('+-/*%<>!=^&|?~:;.()[]{}@'))
+
+M._rules = {
+ {'whitespace', ws},
+ {'string', string},
+ {'keyword', keyword},
+ {'constant', constant},
+ {'type', types},
+ {'class', class},
+ {'ugen', ugen},
+ {'time', time},
+ {'now', now},
+ {'identifier', identifier},
+ {'comment', comment},
+ {'number', number},
+ {'operator', operator},
+}
+
+M._tokenstyles = {
+ ugen = l.STYLE_CONSTANT,
+ time = l.STYLE_NUMBER,
+ now = l.STYLE_CONSTANT..',bold'
+}
+
+return M
diff --git a/lexers/cmake.lua b/lexers/cmake.lua
new file mode 100644
index 0000000..fcc3493
--- /dev/null
+++ b/lexers/cmake.lua
@@ -0,0 +1,173 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- CMake LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'cmake'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local comment = token(l.COMMENT, '#' * l.nonnewline^0)
+
+-- Strings.
+local string = token(l.STRING, l.delimited_range('"'))
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match({
+ 'IF', 'ENDIF', 'FOREACH', 'ENDFOREACH', 'WHILE', 'ENDWHILE', 'ELSE', 'ELSEIF'
+}, nil, true))
+
+-- Commands.
+local command = token(l.FUNCTION, word_match({
+ 'ADD_CUSTOM_COMMAND', 'ADD_CUSTOM_TARGET', 'ADD_DEFINITIONS',
+ 'ADD_DEPENDENCIES', 'ADD_EXECUTABLE', 'ADD_LIBRARY', 'ADD_SUBDIRECTORY',
+ 'ADD_TEST', 'AUX_SOURCE_DIRECTORY', 'BUILD_COMMAND', 'BUILD_NAME',
+ 'CMAKE_MINIMUM_REQUIRED', 'CONFIGURE_FILE', 'CREATE_TEST_SOURCELIST',
+ 'ENABLE_LANGUAGE', 'ENABLE_TESTING', 'ENDMACRO', 'EXECUTE_PROCESS',
+ 'EXEC_PROGRAM', 'EXPORT_LIBRARY_DEPENDENCIES', 'FILE', 'FIND_FILE',
+ 'FIND_LIBRARY', 'FIND_PACKAGE', 'FIND_PATH', 'FIND_PROGRAM', 'FLTK_WRAP_UI',
+ 'GET_CMAKE_PROPERTY', 'GET_DIRECTORY_PROPERTY', 'GET_FILENAME_COMPONENT',
+ 'GET_SOURCE_FILE_PROPERTY', 'GET_TARGET_PROPERTY', 'GET_TEST_PROPERTY',
+ 'INCLUDE', 'INCLUDE_DIRECTORIES', 'INCLUDE_EXTERNAL_MSPROJECT',
+ 'INCLUDE_REGULAR_EXPRESSION', 'INSTALL', 'INSTALL_FILES', 'INSTALL_PROGRAMS',
+ 'INSTALL_TARGETS', 'LINK_DIRECTORIES', 'LINK_LIBRARIES', 'LIST', 'LOAD_CACHE',
+ 'LOAD_COMMAND', 'MACRO', 'MAKE_DIRECTORY', 'MARK_AS_ADVANCED', 'MATH',
+ 'MESSAGE', 'OPTION', 'OUTPUT_REQUIRED_FILES', 'PROJECT', 'QT_WRAP_CPP',
+ 'QT_WRAP_UI', 'REMOVE', 'REMOVE_DEFINITIONS', 'SEPARATE_ARGUMENTS', 'SET',
+ 'SET_DIRECTORY_PROPERTIES', 'SET_SOURCE_FILES_PROPERTIES',
+ 'SET_TARGET_PROPERTIES', 'SET_TESTS_PROPERTIES', 'SITE_NAME', 'SOURCE_GROUP',
+ 'STRING', 'SUBDIRS', 'SUBDIR_DEPENDS', 'TARGET_LINK_LIBRARIES', 'TRY_COMPILE',
+ 'TRY_RUN', 'USE_MANGLED_MESA', 'UTILITY_SOURCE', 'VARIABLE_REQUIRES',
+ 'VTK_MAKE_INSTANTIATOR', 'VTK_WRAP_JAVA', 'VTK_WRAP_PYTHON', 'VTK_WRAP_TCL',
+ 'WRITE_FILE',
+}, nil, true))
+
+-- Constants.
+local constant = token(l.CONSTANT, word_match({
+ 'BOOL', 'CACHE', 'FALSE', 'N', 'NO', 'ON', 'OFF', 'NOTFOUND', 'TRUE'
+}, nil, true))
+
+-- Variables.
+local variable = token(l.VARIABLE, word_match{
+ 'APPLE', 'BORLAND', 'CMAKE_AR', 'CMAKE_BACKWARDS_COMPATIBILITY',
+ 'CMAKE_BASE_NAME', 'CMAKE_BINARY_DIR', 'CMAKE_BUILD_TOOL', 'CMAKE_BUILD_TYPE',
+ 'CMAKE_CACHEFILE_DIR', 'CMAKE_CACHE_MAJOR_VERSION',
+ 'CMAKE_CACHE_MINOR_VERSION', 'CMAKE_CACHE_RELEASE_VERSION',
+ 'CMAKE_CFG_INTDIR', 'CMAKE_COLOR_MAKEFILE', 'CMAKE_COMMAND',
+ 'CMAKE_COMPILER_IS_GNUCC', 'CMAKE_COMPILER_IS_GNUCC_RUN',
+ 'CMAKE_COMPILER_IS_GNUCXX', 'CMAKE_COMPILER_IS_GNUCXX_RUN',
+ 'CMAKE_CTEST_COMMAND', 'CMAKE_CURRENT_BINARY_DIR', 'CMAKE_CURRENT_SOURCE_DIR',
+ 'CMAKE_CXX_COMPILER', 'CMAKE_CXX_COMPILER_ARG1', 'CMAKE_CXX_COMPILER_ENV_VAR',
+ 'CMAKE_CXX_COMPILER_FULLPATH', 'CMAKE_CXX_COMPILER_LOADED',
+ 'CMAKE_CXX_COMPILER_WORKS', 'CMAKE_CXX_COMPILE_OBJECT',
+ 'CMAKE_CXX_CREATE_SHARED_LIBRARY',
+ 'CMAKE_CXX_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS',
+ 'CMAKE_CXX_CREATE_SHARED_MODULE', 'CMAKE_CXX_CREATE_STATIC_LIBRARY',
+ 'CMAKE_CXX_FLAGS', 'CMAKE_CXX_FLAGS_DEBUG', 'CMAKE_CXX_FLAGS_DEBUG_INIT',
+ 'CMAKE_CXX_FLAGS_INIT', 'CMAKE_CXX_FLAGS_MINSIZEREL',
+ 'CMAKE_CXX_FLAGS_MINSIZEREL_INIT', 'CMAKE_CXX_FLAGS_RELEASE',
+ 'CMAKE_CXX_FLAGS_RELEASE_INIT', 'CMAKE_CXX_FLAGS_RELWITHDEBINFO',
+ 'CMAKE_CXX_FLAGS_RELWITHDEBINFO_INIT', 'CMAKE_CXX_IGNORE_EXTENSIONS',
+ 'CMAKE_CXX_INFORMATION_LOADED', 'CMAKE_CXX_LINKER_PREFERENCE',
+ 'CMAKE_CXX_LINK_EXECUTABLE', 'CMAKE_CXX_LINK_FLAGS',
+ 'CMAKE_CXX_OUTPUT_EXTENSION', 'CMAKE_CXX_SOURCE_FILE_EXTENSIONS',
+ 'CMAKE_C_COMPILER', 'CMAKE_C_COMPILER_ARG1', 'CMAKE_C_COMPILER_ENV_VAR',
+ 'CMAKE_C_COMPILER_FULLPATH', 'CMAKE_C_COMPILER_LOADED',
+ 'CMAKE_C_COMPILER_WORKS', 'CMAKE_C_COMPILE_OBJECT',
+ 'CMAKE_C_CREATE_SHARED_LIBRARY',
+ 'CMAKE_C_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS',
+ 'CMAKE_C_CREATE_SHARED_MODULE', 'CMAKE_C_CREATE_STATIC_LIBRARY',
+ 'CMAKE_C_FLAGS', 'CMAKE_C_FLAGS_DEBUG', 'CMAKE_C_FLAGS_DEBUG_INIT',
+ 'CMAKE_C_FLAGS_INIT', 'CMAKE_C_FLAGS_MINSIZEREL',
+ 'CMAKE_C_FLAGS_MINSIZEREL_INIT', 'CMAKE_C_FLAGS_RELEASE',
+ 'CMAKE_C_FLAGS_RELEASE_INIT', 'CMAKE_C_FLAGS_RELWITHDEBINFO',
+ 'CMAKE_C_FLAGS_RELWITHDEBINFO_INIT', 'CMAKE_C_IGNORE_EXTENSIONS',
+ 'CMAKE_C_INFORMATION_LOADED', 'CMAKE_C_LINKER_PREFERENCE',
+ 'CMAKE_C_LINK_EXECUTABLE', 'CMAKE_C_LINK_FLAGS', 'CMAKE_C_OUTPUT_EXTENSION',
+ 'CMAKE_C_SOURCE_FILE_EXTENSIONS', 'CMAKE_DL_LIBS', 'CMAKE_EDIT_COMMAND',
+ 'CMAKE_EXECUTABLE_SUFFIX', 'CMAKE_EXE_LINKER_FLAGS',
+ 'CMAKE_EXE_LINKER_FLAGS_DEBUG', 'CMAKE_EXE_LINKER_FLAGS_MINSIZEREL',
+ 'CMAKE_EXE_LINKER_FLAGS_RELEASE', 'CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO',
+ 'CMAKE_FILES_DIRECTORY', 'CMAKE_FIND_APPBUNDLE', 'CMAKE_FIND_FRAMEWORK',
+ 'CMAKE_FIND_LIBRARY_PREFIXES', 'CMAKE_FIND_LIBRARY_SUFFIXES',
+ 'CMAKE_GENERATOR', 'CMAKE_HOME_DIRECTORY', 'CMAKE_INCLUDE_FLAG_C',
+ 'CMAKE_INCLUDE_FLAG_CXX', 'CMAKE_INCLUDE_FLAG_C_SEP', 'CMAKE_INIT_VALUE',
+ 'CMAKE_INSTALL_PREFIX', 'CMAKE_LIBRARY_PATH_FLAG', 'CMAKE_LINK_LIBRARY_FLAG',
+ 'CMAKE_LINK_LIBRARY_SUFFIX', 'CMAKE_MAJOR_VERSION', 'CMAKE_MAKE_PROGRAM',
+ 'CMAKE_MINOR_VERSION', 'CMAKE_MODULE_EXISTS', 'CMAKE_MODULE_LINKER_FLAGS',
+ 'CMAKE_MODULE_LINKER_FLAGS_DEBUG', 'CMAKE_MODULE_LINKER_FLAGS_MINSIZEREL',
+ 'CMAKE_MODULE_LINKER_FLAGS_RELEASE',
+ 'CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO',
+ 'CMAKE_MacOSX_Content_COMPILE_OBJECT', 'CMAKE_NUMBER_OF_LOCAL_GENERATORS',
+ 'CMAKE_OSX_ARCHITECTURES', 'CMAKE_OSX_SYSROOT', 'CMAKE_PARENT_LIST_FILE',
+ 'CMAKE_PATCH_VERSION', 'CMAKE_PLATFORM_HAS_INSTALLNAME',
+ 'CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES', 'CMAKE_PLATFORM_ROOT_BIN',
+ 'CMAKE_PROJECT_NAME', 'CMAKE_RANLIB', 'CMAKE_ROOT',
+ 'CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS',
+ 'CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS',
+ 'CMAKE_SHARED_LIBRARY_CXX_FLAGS', 'CMAKE_SHARED_LIBRARY_C_FLAGS',
+ 'CMAKE_SHARED_LIBRARY_LINK_C_FLAGS', 'CMAKE_SHARED_LIBRARY_PREFIX',
+ 'CMAKE_SHARED_LIBRARY_RUNTIME_C_FLAG',
+ 'CMAKE_SHARED_LIBRARY_RUNTIME_C_FLAG_SEP',
+ 'CMAKE_SHARED_LIBRARY_SONAME_CXX_FLAG', 'CMAKE_SHARED_LIBRARY_SONAME_C_FLAG',
+ 'CMAKE_SHARED_LIBRARY_SUFFIX', 'CMAKE_SHARED_LINKER_FLAGS',
+ 'CMAKE_SHARED_LINKER_FLAGS_DEBUG', 'CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL',
+ 'CMAKE_SHARED_LINKER_FLAGS_RELEASE',
+ 'CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO',
+ 'CMAKE_SHARED_MODULE_CREATE_CXX_FLAGS', 'CMAKE_SHARED_MODULE_CREATE_C_FLAGS',
+ 'CMAKE_SHARED_MODULE_PREFIX', 'CMAKE_SHARED_MODULE_SUFFIX',
+ 'CMAKE_SIZEOF_VOID_P', 'CMAKE_SKIP_RPATH', 'CMAKE_SOURCE_DIR',
+ 'CMAKE_STATIC_LIBRARY_PREFIX', 'CMAKE_STATIC_LIBRARY_SUFFIX', 'CMAKE_SYSTEM',
+ 'CMAKE_SYSTEM_AND_CXX_COMPILER_INFO_FILE',
+ 'CMAKE_SYSTEM_AND_C_COMPILER_INFO_FILE', 'CMAKE_SYSTEM_APPBUNDLE_PATH',
+ 'CMAKE_SYSTEM_FRAMEWORK_PATH', 'CMAKE_SYSTEM_INCLUDE_PATH',
+ 'CMAKE_SYSTEM_INFO_FILE', 'CMAKE_SYSTEM_LIBRARY_PATH', 'CMAKE_SYSTEM_LOADED',
+ 'CMAKE_SYSTEM_NAME', 'CMAKE_SYSTEM_PROCESSOR', 'CMAKE_SYSTEM_PROGRAM_PATH',
+ 'CMAKE_SYSTEM_SPECIFIC_INFORMATION_LOADED', 'CMAKE_SYSTEM_VERSION',
+ 'CMAKE_UNAME', 'CMAKE_USE_RELATIVE_PATHS', 'CMAKE_VERBOSE_MAKEFILE', 'CYGWIN',
+ 'EXECUTABLE_OUTPUT_PATH', 'FORCE', 'HAVE_CMAKE_SIZEOF_VOID_P',
+ 'LIBRARY_OUTPUT_PATH', 'MACOSX_BUNDLE', 'MINGW', 'MSVC60', 'MSVC70', 'MSVC71',
+ 'MSVC80', 'MSVC', 'MSVC_IDE', 'PROJECT_BINARY_DIR', 'PROJECT_NAME',
+ 'PROJECT_SOURCE_DIR', 'PROJECT_BINARY_DIR', 'PROJECT_SOURCE_DIR',
+ 'RUN_CONFIGURE', 'UNIX', 'WIN32', '_CMAKE_OSX_MACHINE',
+ -- More variables.
+ 'LOCATION', 'TARGET', 'POST_BUILD', 'PRE_BUILD', 'ARGS'
+} + P('$') * l.delimited_range('{}', false, true))
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.word)
+
+-- Operators.
+local operator = token(l.OPERATOR, word_match({
+ 'AND', 'COMMAND', 'DEFINED', 'DOC', 'EQUAL', 'EXISTS', 'GREATER', 'INTERNAL',
+ 'LESS', 'MATCHES', 'NAME', 'NAMES', 'NAME_WE', 'NOT', 'OR', 'PATH', 'PATHS',
+ 'PROGRAM', 'STREQUAL', 'STRGREATER', 'STRINGS', 'STRLESS'
+}) + S('=(){}'))
+
+M._rules = {
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'command', command},
+ {'constant', constant},
+ {'variable', variable},
+ {'operator', operator},
+ {'identifier', identifier},
+ {'string', string},
+ {'comment', comment},
+}
+
+M._foldsymbols = {
+ _patterns = {'[A-Z]+', '[%(%){}]', '#'},
+ [l.KEYWORD] = {
+ IF = 1, ENDIF = -1, FOREACH = 1, ENDFOREACH = -1, WHILE = 1, ENDWHILE = -1
+ },
+ [l.FUNCTION] = {MACRO = 1, ENDMACRO = -1},
+ [l.OPERATOR] = {['('] = 1, [')'] = -1, ['{'] = 1, ['}'] = -1},
+ [l.COMMENT] = {['#'] = l.fold_line_comments('#')}
+}
+
+return M
diff --git a/lexers/coffeescript.lua b/lexers/coffeescript.lua
new file mode 100644
index 0000000..b1c649e
--- /dev/null
+++ b/lexers/coffeescript.lua
@@ -0,0 +1,62 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- CoffeeScript LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, S = lpeg.P, lpeg.S
+
+local M = {_NAME = 'coffeescript'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local block_comment = '###' * (l.any - '###')^0 * P('###')^-1
+local line_comment = '#' * l.nonnewline_esc^0
+local comment = token(l.COMMENT, block_comment + line_comment)
+
+-- Strings.
+local sq_str = l.delimited_range("'")
+local dq_str = l.delimited_range('"')
+local regex_str = #P('/') * l.last_char_includes('+-*%<>!=^&|?~:;,([{') *
+ l.delimited_range('/', true) * S('igm')^0
+local string = token(l.STRING, sq_str + dq_str) + token(l.REGEX, regex_str)
+
+-- Numbers.
+local number = token(l.NUMBER, l.float + l.integer)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match{
+ 'all', 'and', 'bind', 'break', 'by', 'case', 'catch', 'class', 'const',
+ 'continue', 'default', 'delete', 'do', 'each', 'else', 'enum', 'export',
+ 'extends', 'false', 'for', 'finally', 'function', 'if', 'import', 'in',
+ 'instanceof', 'is', 'isnt', 'let', 'loop', 'native', 'new', 'no', 'not', 'of',
+ 'off', 'on', 'or', 'return', 'super', 'switch', 'then', 'this', 'throw',
+ 'true', 'try', 'typeof', 'unless', 'until', 'var', 'void', 'with', 'when',
+ 'while', 'yes'
+})
+
+-- Fields: object properties and methods.
+local field = token(l.FUNCTION, '.' * (S('_$') + l.alpha) *
+ (S('_$') + l.alnum)^0)
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.word)
+
+-- Operators.
+local operator = token(l.OPERATOR, S('+-/*%<>!=^&|?~:;,.()[]{}'))
+
+M._rules = {
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'field', field},
+ {'identifier', identifier},
+ {'comment', comment},
+ {'number', number},
+ {'string', string},
+ {'operator', operator},
+}
+
+M._FOLDBYINDENTATION = true
+
+return M
diff --git a/lexers/container.lua b/lexers/container.lua
new file mode 100644
index 0000000..6223f2e
--- /dev/null
+++ b/lexers/container.lua
@@ -0,0 +1,7 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Container LPeg lexer.
+-- This is SciTE's plain text lexer.
+
+local M = {_NAME = 'container'}
+
+return M
diff --git a/lexers/context.lua b/lexers/context.lua
new file mode 100644
index 0000000..30ec1b0
--- /dev/null
+++ b/lexers/context.lua
@@ -0,0 +1,59 @@
+-- Copyright 2006-2013 Robert Gieseke. See LICENSE.
+-- ConTeXt LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'context'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local comment = token(l.COMMENT, '%' * l.nonnewline^0)
+
+-- Commands.
+local command = token(l.KEYWORD, '\\' * (l.alpha^1 + S('#$&~_^%{}')))
+
+-- Sections.
+local section = token('section', '\\' * word_match{
+ 'part', 'chapter', 'section', 'subsection', 'subsubsection', 'title',
+ 'subject', 'subsubject', 'subsubsubject'
+})
+
+-- ConTeXt environments.
+local environment = token('environment', '\\' * (P('start') + 'stop') * l.word)
+
+-- Operators.
+local operator = token(l.OPERATOR, S('$&#{}[]'))
+
+M._rules = {
+ {'whitespace', ws},
+ {'comment', comment},
+ {'environment', environment},
+ {'section', section},
+ {'keyword', command},
+ {'operator', operator},
+}
+
+M._tokenstyles = {
+ environment = l.STYLE_KEYWORD,
+ section = l.STYLE_CLASS
+}
+
+M._foldsymbols = {
+ _patterns = {'\\start', '\\stop', '[{}]', '%%'},
+ ['environment'] = {['\\start'] = 1, ['\\stop'] = -1},
+ [l.OPERATOR] = {['{'] = 1, ['}'] = -1},
+ [l.COMMENT] = {['%'] = l.fold_line_comments('%')}
+}
+
+-- Embedded Lua.
+local luatex = l.load('lua')
+local luatex_start_rule = #P('\\startluacode') * environment
+local luatex_end_rule = #P('\\stopluacode') * environment
+l.embed_lexer(M, luatex, luatex_start_rule, luatex_end_rule)
+
+
+return M
diff --git a/lexers/cpp.lua b/lexers/cpp.lua
new file mode 100644
index 0000000..3d64c93
--- /dev/null
+++ b/lexers/cpp.lua
@@ -0,0 +1,87 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- C++ LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'cpp'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local line_comment = '//' * l.nonnewline_esc^0
+local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1
+local comment = token(l.COMMENT, line_comment + block_comment)
+
+-- Strings.
+local sq_str = P('L')^-1 * l.delimited_range("'", true)
+local dq_str = P('L')^-1 * l.delimited_range('"', true)
+local string = token(l.STRING, sq_str + dq_str)
+
+-- Numbers.
+local number = token(l.NUMBER, l.float + l.integer)
+
+-- Preprocessor.
+local preproc_word = word_match{
+ 'define', 'elif', 'else', 'endif', 'error', 'if', 'ifdef', 'ifndef', 'import',
+ 'include', 'line', 'pragma', 'undef', 'using', 'warning'
+}
+local preproc = token(l.PREPROCESSOR,
+ l.starts_line('#') * S('\t ')^0 * preproc_word)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match{
+ 'asm', 'auto', 'break', 'case', 'catch', 'class', 'const', 'const_cast',
+ 'continue', 'default', 'delete', 'do', 'dynamic_cast', 'else', 'explicit',
+ 'export', 'extern', 'false', 'for', 'friend', 'goto', 'if', 'inline',
+ 'mutable', 'namespace', 'new', 'operator', 'private', 'protected', 'public',
+ 'register', 'reinterpret_cast', 'return', 'sizeof', 'static', 'static_cast',
+ 'switch', 'template', 'this', 'throw', 'true', 'try', 'typedef', 'typeid',
+ 'typename', 'using', 'virtual', 'volatile', 'while',
+ -- Operators
+ 'and', 'and_eq', 'bitand', 'bitor', 'compl', 'not', 'not_eq', 'or', 'or_eq',
+ 'xor', 'xor_eq',
+ -- C++11
+ 'alignas', 'alignof', 'constexpr', 'decltype', 'final', 'noexcept',
+ 'override', 'static_assert', 'thread_local'
+})
+
+-- Types.
+local type = token(l.TYPE, word_match{
+ 'bool', 'char', 'double', 'enum', 'float', 'int', 'long', 'short', 'signed',
+ 'struct', 'union', 'unsigned', 'void', 'wchar_t',
+ -- C++11
+ 'char16_t', 'char32_t', 'nullptr'
+})
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.word)
+
+-- Operators.
+local operator = token(l.OPERATOR, S('+-/*%<>!=^&|?~:;,.()[]{}'))
+
+M._rules = {
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'type', type},
+ {'identifier', identifier},
+ {'string', string},
+ {'comment', comment},
+ {'number', number},
+ {'preproc', preproc},
+ {'operator', operator},
+}
+
+M._foldsymbols = {
+ _patterns = {'%l+', '[{}]', '/%*', '%*/', '//'},
+ [l.PREPROCESSOR] = {
+ region = 1, endregion = -1,
+ ['if'] = 1, ifdef = 1, ifndef = 1, endif = -1
+ },
+ [l.OPERATOR] = {['{'] = 1, ['}'] = -1},
+ [l.COMMENT] = {['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//')}
+}
+
+return M
diff --git a/lexers/csharp.lua b/lexers/csharp.lua
new file mode 100644
index 0000000..6f7ab79
--- /dev/null
+++ b/lexers/csharp.lua
@@ -0,0 +1,84 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- C# LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'csharp'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local line_comment = '//' * l.nonnewline_esc^0
+local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1
+local comment = token(l.COMMENT, line_comment + block_comment)
+
+-- Strings.
+local sq_str = l.delimited_range("'", true)
+local dq_str = l.delimited_range('"', true)
+local ml_str = P('@')^-1 * l.delimited_range('"', false, true)
+local string = token(l.STRING, sq_str + dq_str + ml_str)
+
+-- Numbers.
+local number = token(l.NUMBER, (l.float + l.integer) * S('lLdDfFMm')^-1)
+
+-- Preprocessor.
+local preproc_word = word_match{
+ 'define', 'elif', 'else', 'endif', 'error', 'if', 'line', 'undef', 'warning',
+ 'region', 'endregion'
+}
+local preproc = token(l.PREPROCESSOR,
+ l.starts_line('#') * S('\t ')^0 * preproc_word *
+ (l.nonnewline_esc^1 + l.space * l.nonnewline_esc^0))
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match{
+ 'class', 'delegate', 'enum', 'event', 'interface', 'namespace', 'struct',
+ 'using', 'abstract', 'const', 'explicit', 'extern', 'fixed', 'implicit',
+ 'internal', 'lock', 'out', 'override', 'params', 'partial', 'private',
+ 'protected', 'public', 'ref', 'sealed', 'static', 'readonly', 'unsafe',
+ 'virtual', 'volatile', 'add', 'as', 'assembly', 'base', 'break', 'case',
+ 'catch', 'checked', 'continue', 'default', 'do', 'else', 'finally', 'for',
+ 'foreach', 'get', 'goto', 'if', 'in', 'is', 'new', 'remove', 'return', 'set',
+ 'sizeof', 'stackalloc', 'super', 'switch', 'this', 'throw', 'try', 'typeof',
+ 'unchecked', 'value', 'void', 'while', 'yield',
+ 'null', 'true', 'false'
+})
+
+-- Types.
+local type = token(l.TYPE, word_match{
+ 'bool', 'byte', 'char', 'decimal', 'double', 'float', 'int', 'long', 'object',
+ 'operator', 'sbyte', 'short', 'string', 'uint', 'ulong', 'ushort'
+})
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.word)
+
+-- Operators.
+local operator = token(l.OPERATOR, S('~!.,:;+-*/<>=\\^|&%?()[]{}'))
+
+M._rules = {
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'type', type},
+ {'identifier', identifier},
+ {'string', string},
+ {'comment', comment},
+ {'number', number},
+ {'preproc', preproc},
+ {'operator', operator},
+}
+
+M._foldsymbols = {
+ _patterns = {'%l+', '[{}]', '/%*', '%*/', '//'},
+ [l.PREPROCESSOR] = {
+ region = 1, endregion = -1,
+ ['if'] = 1, ifdef = 1, ifndef = 1, endif = -1
+ },
+ [l.OPERATOR] = {['{'] = 1, ['}'] = -1},
+ [l.COMMENT] = {['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//')}
+}
+
+return M
diff --git a/lexers/css.lua b/lexers/css.lua
new file mode 100644
index 0000000..a753cc7
--- /dev/null
+++ b/lexers/css.lua
@@ -0,0 +1,166 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- CSS LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V
+
+local M = {_NAME = 'css'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local comment = token(l.COMMENT, '/*' * (l.any - '*/')^0 * P('*/')^-1)
+
+-- Strings.
+local sq_str = l.delimited_range("'")
+local dq_str = l.delimited_range('"')
+local string = token(l.STRING, sq_str + dq_str)
+
+-- Numbers.
+local number = token(l.NUMBER, l.digit^1)
+
+-- Keywords.
+local css1_property = word_match({
+ 'color', 'background-color', 'background-image', 'background-repeat',
+ 'background-attachment', 'background-position', 'background', 'font-family',
+ 'font-style', 'font-variant', 'font-weight', 'font-size', 'font',
+ 'word-spacing', 'letter-spacing', 'text-decoration', 'vertical-align',
+ 'text-transform', 'text-align', 'text-indent', 'line-height', 'margin-top',
+ 'margin-right', 'margin-bottom', 'margin-left', 'margin', 'padding-top',
+ 'padding-right', 'padding-bottom', 'padding-left', 'padding',
+ 'border-top-width', 'border-right-width', 'border-bottom-width',
+ 'border-left-width', 'border-width', 'border-top', 'border-right',
+ 'border-bottom', 'border-left', 'border', 'border-color', 'border-style',
+ 'width', 'height', 'float', 'clear', 'display', 'white-space',
+ 'list-style-type', 'list-style-image', 'list-style-position', 'list-style'
+}, '-')
+local css1_value = word_match({
+ 'auto', 'none', 'normal', 'italic', 'oblique', 'small-caps', 'bold', 'bolder',
+ 'lighter', 'xx-small', 'x-small', 'small', 'medium', 'large', 'x-large',
+ 'xx-large', 'larger', 'smaller', 'transparent', 'repeat', 'repeat-x',
+ 'repeat-y', 'no-repeat', 'scroll', 'fixed', 'top', 'bottom', 'left', 'center',
+ 'right', 'justify', 'both', 'underline', 'overline', 'line-through', 'blink',
+ 'baseline', 'sub', 'super', 'text-top', 'middle', 'text-bottom', 'capitalize',
+ 'uppercase', 'lowercase', 'thin', 'medium', 'thick', 'dotted', 'dashed',
+ 'solid', 'double', 'groove', 'ridge', 'inset', 'outset', 'block', 'inline',
+ 'list-item', 'pre', 'no-wrap', 'inside', 'outside', 'disc', 'circle',
+ 'square', 'decimal', 'lower-roman', 'upper-roman', 'lower-alpha',
+ 'upper-alpha', 'aqua', 'black', 'blue', 'fuchsia', 'gray', 'green', 'lime',
+ 'maroon', 'navy', 'olive', 'purple', 'red', 'silver', 'teal', 'white',
+ 'yellow'
+}, '-')
+local css2_property = word_match({
+ 'border-top-color', 'border-right-color', 'border-bottom-color',
+ 'border-left-color', 'border-color', 'border-top-style', 'border-right-style',
+ 'border-bottom-style', 'border-left-style', 'border-style', 'top', 'right',
+ 'bottom', 'left', 'position', 'z-index', 'direction', 'unicode-bidi',
+ 'min-width', 'max-width', 'min-height', 'max-height', 'overflow', 'clip',
+ 'visibility', 'content', 'quotes', 'counter-reset', 'counter-increment',
+ 'marker-offset', 'size', 'marks', 'page-break-before', 'page-break-after',
+ 'page-break-inside', 'page', 'orphans', 'widows', 'font-stretch',
+ 'font-size-adjust', 'unicode-range', 'units-per-em', 'src', 'panose-1',
+ 'stemv', 'stemh', 'slope', 'cap-height', 'x-height', 'ascent', 'descent',
+ 'widths', 'bbox', 'definition-src', 'baseline', 'centerline', 'mathline',
+ 'topline', 'text-shadow', 'caption-side', 'table-layout', 'border-collapse',
+ 'border-spacing', 'empty-cells', 'speak-header', 'cursor', 'outline',
+ 'outline-width', 'outline-style', 'outline-color', 'volume', 'speak',
+ 'pause-before', 'pause-after', 'pause', 'cue-before', 'cue-after', 'cue',
+ 'play-during', 'azimuth', 'elevation', 'speech-rate', 'voice-family', 'pitch',
+ 'pitch-range', 'stress', 'richness', 'speak-punctuation', 'speak-numeral'
+}, '-')
+local css2_value = word_match({
+ 'inherit', 'run-in', 'compact', 'marker', 'table', 'inline-table',
+ 'table-row-group', 'table-header-group', 'table-footer-group', 'table-row',
+ 'table-column-group', 'table-column', 'table-cell', 'table-caption', 'static',
+ 'relative', 'absolute', 'fixed', 'ltr', 'rtl', 'embed', 'bidi-override',
+ 'visible', 'hidden', 'scroll', 'collapse', 'open-quote', 'close-quote',
+ 'no-open-quote', 'no-close-quote', 'decimal-leading-zero', 'lower-greek',
+ 'lower-latin', 'upper-latin', 'hebrew', 'armenian', 'georgian',
+ 'cjk-ideographic', 'hiragana', 'katakana', 'hiragana-iroha', 'katakana-iroha',
+ 'landscape', 'portrait', 'crop', 'cross', 'always', 'avoid', 'wider',
+ 'narrower', 'ultra-condensed', 'extra-condensed', 'condensed',
+ 'semi-condensed', 'semi-expanded', 'expanded', 'extra-expanded',
+ 'ultra-expanded', 'caption', 'icon', 'menu', 'message-box', 'small-caption',
+ 'status-bar', 'separate', 'show', 'hide', 'once', 'crosshair', 'default',
+ 'pointer', 'move', 'text', 'wait', 'help', 'e-resize', 'ne-resize',
+ 'nw-resize', 'n-resize', 'se-resize', 'sw-resize', 's-resize', 'w-resize',
+ 'ActiveBorder', 'ActiveCaption', 'AppWorkspace', 'Background', 'ButtonFace',
+ 'ButtonHighlight', 'ButtonShadow', 'InactiveCaptionText', 'ButtonText',
+ 'CaptionText', 'GrayText', 'Highlight', 'HighlightText', 'InactiveBorder',
+ 'InactiveCaption', 'InfoBackground', 'InfoText', 'Menu', 'MenuText',
+ 'Scrollbar', 'ThreeDDarkShadow', 'ThreeDFace', 'ThreeDHighlight',
+ 'ThreeDLightShadow', 'ThreeDShadow', 'Window', 'WindowFrame', 'WindowText',
+ 'silent', 'x-soft', 'soft', 'medium', 'loud', 'x-loud', 'spell-out', 'mix',
+ 'left-side', 'far-left', 'center-left', 'center-right', 'far-right',
+ 'right-side', 'behind', 'leftwards', 'rightwards', 'below', 'level', 'above',
+ 'higher', 'lower', 'x-slow', 'slow', 'medium', 'fast', 'x-fast', 'faster',
+ 'slower', 'male', 'female', 'child', 'x-low', 'low', 'high', 'x-high', 'code',
+ 'digits', 'continous'
+}, '-')
+local property = token(l.KEYWORD, css1_property + css2_property)
+local value = token('value', css1_value + css2_value)
+local keyword = property + value
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.alpha * (l.alnum + S('_-'))^0)
+
+-- Operators.
+local operator = token(l.OPERATOR, S('~!#*>+=|.,:;()[]{}'))
+
+-- At rule.
+local at_rule = token('at_rule', P('@') * word_match{
+ 'charset', 'font-face', 'media', 'page', 'import'
+})
+
+-- Colors.
+local xdigit = l.xdigit
+local hex_color = '#' * xdigit * xdigit * xdigit * (xdigit * xdigit * xdigit)^-1
+local color_name = word_match{
+ 'aqua', 'black', 'blue', 'fuchsia', 'gray', 'green', 'lime', 'maroon', 'navy',
+ 'olive', 'orange', 'purple', 'red', 'silver', 'teal', 'white', 'yellow'
+}
+local color = token('color', hex_color + color_name)
+
+-- Pseudo.
+local pseudo = token(l.CONSTANT, word_match({
+ -- Pseudo elements.
+ 'first-line', 'first-letter', 'before', 'after',
+ -- Pseudo classes.
+ 'first-child', 'link', 'visited', 'hover', 'active', 'focus', 'lang',
+}, '-'))
+
+-- Units.
+local unit = token('unit', word_match{
+ 'em', 'ex', 'px', 'pt', 'pc', 'in', 'ft', 'mm', 'cm', 'kHz', 'Hz', 'deg',
+ 'rad', 'grad', 'ms', 's'
+} + '%')
+
+M._rules = {
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'pseudo', pseudo},
+ {'color', color},
+ {'identifier', identifier},
+ {'string', string},
+ {'comment', comment},
+ {'number', number * unit^-1},
+ {'operator', operator},
+ {'at_rule', at_rule},
+}
+
+M._tokenstyles = {
+ unit = l.STYLE_LABEL,
+ value = l.STYLE_CONSTANT,
+ color = l.STYLE_NUMBER,
+ at_rule = l.STYLE_PREPROCESSOR
+}
+
+M._foldsymbols = {
+ _patterns = {'[{}]', '/%*', '%*/'},
+ [l.OPERATOR] = {['{'] = 1, ['}'] = -1},
+ [l.COMMENT] = {['/*'] = 1, ['*/'] = -1}
+}
+
+return M
diff --git a/lexers/cuda.lua b/lexers/cuda.lua
new file mode 100644
index 0000000..0cf22e6
--- /dev/null
+++ b/lexers/cuda.lua
@@ -0,0 +1,92 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- CUDA LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local table = _G.table
+
+local M = {_NAME = 'cuda'}
+
+-- Whitespace
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match{
+ '__global__', '__host__', '__device__', '__constant__', '__shared__'
+})
+
+-- Functions.
+local func = token(l.FUNCTION, word_match{
+ -- Atom.
+ 'atomicAdd', 'atomicAnd', 'atomicCAS', 'atomicDec', 'atomicExch', 'atomicInc',
+ 'atomicMax', 'atomicMin', 'atomicOr', 'atomicSub', 'atomicXor',
+ -- Dev.
+ 'tex1D', 'tex1Dfetch', 'tex2D', '__float_as_int', '__int_as_float',
+ '__float2int_rn', '__float2int_rz', '__float2int_ru', '__float2int_rd',
+ '__float2uint_rn', '__float2uint_rz', '__float2uint_ru', '__float2uint_rd',
+ '__int2float_rn', '__int2float_rz', '__int2float_ru', '__int2float_rd',
+ '__uint2float_rn', '__uint2float_rz', '__uint2float_ru', '__uint2float_rd',
+ '__fadd_rz', '__fmul_rz', '__fdividef', '__mul24', '__umul24', '__mulhi',
+ '__umulhi', '__mul64hi', '__umul64hi', 'min', 'umin', 'fminf', 'fmin', 'max',
+ 'umax', 'fmaxf', 'fmax', 'abs', 'fabsf', 'fabs', 'sqrtf', 'sqrt', 'sinf',
+ '__sinf', 'sin', 'cosf', '__cosf', 'cos', 'sincosf', '__sincosf', 'expf',
+ '__expf', 'exp', 'logf', '__logf', 'log',
+ -- Runtime.
+ 'cudaBindTexture', 'cudaBindTextureToArray', 'cudaChooseDevice',
+ 'cudaConfigureCall', 'cudaCreateChannelDesc', 'cudaD3D10GetDevice',
+ 'cudaD3D10MapResources', 'cudaD3D10RegisterResource',
+ 'cudaD3D10ResourceGetMappedArray', 'cudaD3D10ResourceGetMappedPitch',
+ 'cudaD3D10ResourceGetMappedPointer', 'cudaD3D10ResourceGetMappedSize',
+ 'cudaD3D10ResourceGetSurfaceDimensions', 'cudaD3D10ResourceSetMapFlags',
+ 'cudaD3D10SetDirect3DDevice', 'cudaD3D10UnmapResources',
+ 'cudaD3D10UnregisterResource', 'cudaD3D9GetDevice',
+ 'cudaD3D9GetDirect3DDevice', 'cudaD3D9MapResources',
+ 'cudaD3D9RegisterResource', 'cudaD3D9ResourceGetMappedArray',
+ 'cudaD3D9ResourceGetMappedPitch', 'cudaD3D9ResourceGetMappedPointer',
+ 'cudaD3D9ResourceGetMappedSize', 'cudaD3D9ResourceGetSurfaceDimensions',
+ 'cudaD3D9ResourceSetMapFlags', 'cudaD3D9SetDirect3DDevice',
+ 'cudaD3D9UnmapResources', 'cudaD3D9UnregisterResource', 'cudaEventCreate',
+ 'cudaEventDestroy', 'cudaEventElapsedTime', 'cudaEventQuery',
+ 'cudaEventRecord', 'cudaEventSynchronize', 'cudaFree', 'cudaFreeArray',
+ 'cudaFreeHost', 'cudaGetChannelDesc', 'cudaGetDevice', 'cudaGetDeviceCount',
+ 'cudaGetDeviceProperties', 'cudaGetErrorString', 'cudaGetLastError',
+ 'cudaGetSymbolAddress', 'cudaGetSymbolSize', 'cudaGetTextureAlignmentOffset',
+ 'cudaGetTextureReference', 'cudaGLMapBufferObject',
+ 'cudaGLRegisterBufferObject', 'cudaGLSetGLDevice', 'cudaGLUnmapBufferObject',
+ 'cudaGLUnregisterBufferObject', 'cudaLaunch', 'cudaMalloc', 'cudaMalloc3D',
+ 'cudaMalloc3DArray', 'cudaMallocArray', 'cudaMallocHost', 'cudaMallocPitch',
+ 'cudaMemcpy', 'cudaMemcpy2D', 'cudaMemcpy2DArrayToArray',
+ 'cudaMemcpy2DFromArray', 'cudaMemcpy2DToArray', 'cudaMemcpy3D',
+ 'cudaMemcpyArrayToArray', 'cudaMemcpyFromArray', 'cudaMemcpyFromSymbol',
+ 'cudaMemcpyToArray', 'cudaMemcpyToSymbol', 'cudaMemset', 'cudaMemset2D',
+ 'cudaMemset3D', 'cudaSetDevice', 'cudaSetupArgument', 'cudaStreamCreate',
+ 'cudaStreamDestroy', 'cudaStreamQuery', 'cudaStreamSynchronize',
+ 'cudaThreadExit', 'cudaThreadSynchronize', 'cudaUnbindTexture'
+})
+
+-- Types.
+local type = token(l.TYPE, word_match{
+ 'uint', 'int1', 'uint1', 'int2', 'uint2', 'int3', 'uint3', 'int4', 'uint4',
+ 'float1', 'float2', 'float3', 'float4', 'char1', 'char2', 'char3', 'char4',
+ 'uchar1', 'uchar2', 'uchar3', 'uchar4', 'short1', 'short2', 'short3',
+ 'short4', 'dim1', 'dim2', 'dim3', 'dim4'
+})
+
+-- Variables.
+local variable = token(l.VARIABLE, word_match{
+ 'gridDim', 'blockIdx', 'blockDim', 'threadIdx'
+})
+
+-- Extend cpp lexer to include CUDA elements.
+local cpp = l.load('cpp')
+local _rules = cpp._rules
+_rules[1] = {'whitespace', ws}
+table.insert(_rules, 2, {'cuda_keyword', keyword})
+table.insert(_rules, 3, {'cuda_function', func})
+table.insert(_rules, 4, {'cuda_type', type})
+table.insert(_rules, 5, {'cuda_variable', variable})
+M._rules = _rules
+M._foldsymbols = cpp._foldsymbols
+
+return M
diff --git a/lexers/dart.lua b/lexers/dart.lua
new file mode 100644
index 0000000..c237128
--- /dev/null
+++ b/lexers/dart.lua
@@ -0,0 +1,77 @@
+-- Dart LPeg lexer.
+-- Written by Brian Schott (@Hackerpilot on Github).
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'dart'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local line_comment = '//' * l.nonnewline_esc^0
+local nested_comment = l.nested_pair('/*', '*/')
+local comment = token(l.COMMENT, line_comment + nested_comment)
+
+-- Strings.
+local sq_str = S('r')^-1 * l.delimited_range("'", true)
+local dq_str = S('r')^-1 * l.delimited_range('"', true)
+local sq_str_multiline = S('r')^-1 * l.delimited_range('"""')
+local dq_str_multiline = S('r')^-1 * l.delimited_range("''' ")
+local string = token(l.STRING,
+ sq_str + dq_str + sq_str_multiline + dq_str_multiline)
+
+-- Numbers.
+local number = token(l.NUMBER, (l.float + l.hex_num))
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match{
+ 'assert', 'break', 'case', 'catch', 'class', 'const', 'continue', 'default',
+ 'do', 'else', 'enum', 'extends', 'false', 'final' , 'finally', 'for', 'if',
+ 'in', 'is', 'new', 'null', 'rethrow', 'return', 'super', 'switch', 'this',
+ 'throw', 'true', 'try', 'var', 'void', 'while', 'with',
+})
+
+local builtin_identifiers = token(l.CONSTANT, word_match{
+ 'abstract', 'as', 'dynamic', 'export', 'external', 'factory', 'get',
+ 'implements', 'import', 'library', 'operator', 'part', 'set', 'static',
+ 'typedef'
+})
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.word)
+
+-- Operators.
+local operator = token(l.OPERATOR, S('#?=!<>+-*$/%&|^~.,;()[]{}'))
+
+-- Preprocs.
+local annotation = token('annotation', '@' * l.word^1)
+
+M._rules = {
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'constant', builtin_identifiers},
+ {'string', string},
+ {'identifier', identifier},
+ {'comment', comment},
+ {'number', number},
+ {'operator', operator},
+ {'annotation', annotation},
+}
+
+M._tokenstyles = {
+ annotation = l.STYLE_PREPROCESSOR,
+}
+
+M._foldsymbols = {
+ _patterns = {'[{}]', '/[*+]', '[*+]/', '//'},
+ [l.OPERATOR] = {['{'] = 1, ['}'] = -1},
+ [l.COMMENT] = {
+ ['/*'] = 1, ['*/'] = -1, ['/+'] = 1, ['+/'] = -1,
+ ['//'] = l.fold_line_comments('//')
+ }
+}
+
+return M
diff --git a/lexers/desktop.lua b/lexers/desktop.lua
new file mode 100644
index 0000000..267f366
--- /dev/null
+++ b/lexers/desktop.lua
@@ -0,0 +1,62 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Desktop Entry LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'desktop'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local comment = token(l.COMMENT, '#' * l.nonnewline^0)
+
+-- Strings.
+local string = token(l.STRING, l.delimited_range('"'))
+
+-- Group headers.
+local group_header = l.starts_line(token(l.STRING,
+ l.delimited_range('[]', false, true)))
+
+-- Numbers.
+local number = token(l.NUMBER, (l.float + l.integer))
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match{'true', 'false'})
+
+-- Locales.
+local locale = token(l.CLASS, l.delimited_range('[]', false, true))
+
+-- Keys.
+local key = token(l.VARIABLE, word_match{
+ 'Type', 'Version', 'Name', 'GenericName', 'NoDisplay', 'Comment', 'Icon',
+ 'Hidden', 'OnlyShowIn', 'NotShowIn', 'TryExec', 'Exec', 'Exec', 'Path',
+ 'Terminal', 'MimeType', 'Categories', 'StartupNotify', 'StartupWMClass', 'URL'
+})
+
+-- Field codes.
+local code = l.token(l.CONSTANT, P('%') * S('fFuUdDnNickvm'))
+
+-- Identifiers.
+local identifier = l.token(l.IDENTIFIER, l.alpha * (l.alnum + S('_-'))^0)
+
+-- Operators.
+local operator = token(l.OPERATOR, S('='))
+
+M._rules = {
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'key', key},
+ {'identifier', identifier},
+ {'group_header', group_header},
+ {'locale', locale},
+ {'string', string},
+ {'comment', comment},
+ {'number', number},
+ {'code', code},
+ {'operator', operator},
+}
+
+return M
diff --git a/lexers/diff.lua b/lexers/diff.lua
new file mode 100644
index 0000000..53b7f17
--- /dev/null
+++ b/lexers/diff.lua
@@ -0,0 +1,44 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Diff LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'diff'}
+
+-- Text, separators, and file headers.
+local index = token(l.COMMENT, 'Index: ' * l.any^0 * P(-1))
+local separator = token(l.COMMENT, ('---' + P('*')^4 + P('=')^1) * l.space^0 *
+ -1)
+local header = token('header', (P('*** ') + '--- ' + '+++ ') * l.any^1)
+
+-- Location.
+local location = token(l.NUMBER, ('@@' + l.digit^1 + '****') * l.any^1)
+
+-- Additions, deletions, and changes.
+local addition = token('addition', S('>+') * l.any^0)
+local deletion = token('deletion', S('<-') * l.any^0)
+local change = token('change', '! ' * l.any^0)
+
+M._rules = {
+ {'index', index},
+ {'separator', separator},
+ {'header', header},
+ {'location', location},
+ {'addition', addition},
+ {'deletion', deletion},
+ {'change', change},
+ {'any_line', token('default', l.any^1)},
+}
+
+M._tokenstyles = {
+ header = l.STYLE_COMMENT,
+ addition = 'fore:$(color.green)',
+ deletion = 'fore:$(color.red)',
+ change = 'fore:$(color.yellow)'
+}
+
+M._LEXBYLINE = true
+
+return M
diff --git a/lexers/django.lua b/lexers/django.lua
new file mode 100644
index 0000000..13a5341
--- /dev/null
+++ b/lexers/django.lua
@@ -0,0 +1,77 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Django LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V
+
+local M = {_NAME = 'django'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local comment = token(l.COMMENT, '{#' * (l.any - l.newline - '#}')^0 *
+ P('#}')^-1)
+
+-- Strings.
+local string = token(l.STRING, l.delimited_range('"', false, true))
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match{
+ 'as', 'block', 'blocktrans', 'by', 'endblock', 'endblocktrans', 'comment',
+ 'endcomment', 'cycle', 'date', 'debug', 'else', 'extends', 'filter',
+ 'endfilter', 'firstof', 'for', 'endfor', 'if', 'endif', 'ifchanged',
+ 'endifchanged', 'ifnotequal', 'endifnotequal', 'in', 'load', 'not', 'now',
+ 'or', 'parsed', 'regroup', 'ssi', 'trans', 'with', 'widthratio'
+})
+
+-- Functions.
+local func = token(l.FUNCTION, word_match{
+ 'add', 'addslashes', 'capfirst', 'center', 'cut', 'date', 'default',
+ 'dictsort', 'dictsortreversed', 'divisibleby', 'escape', 'filesizeformat',
+ 'first', 'fix_ampersands', 'floatformat', 'get_digit', 'join', 'length',
+ 'length_is', 'linebreaks', 'linebreaksbr', 'linenumbers', 'ljust', 'lower',
+ 'make_list', 'phone2numeric', 'pluralize', 'pprint', 'random', 'removetags',
+ 'rjust', 'slice', 'slugify', 'stringformat', 'striptags', 'time', 'timesince',
+ 'title', 'truncatewords', 'unordered_list', 'upper', 'urlencode', 'urlize',
+ 'urlizetrunc', 'wordcount', 'wordwrap', 'yesno',
+})
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.word)
+
+-- Operators.
+local operator = token(l.OPERATOR, S(':,.|'))
+
+M._rules = {
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'function', func},
+ {'identifier', identifier},
+ {'string', string},
+ {'comment', comment},
+ {'operator', operator},
+}
+
+-- Embedded in HTML.
+local html = l.load('html')
+
+-- Embedded Django.
+local django_start_rule = token('django_tag', '{' * S('{%'))
+local django_end_rule = token('django_tag', S('%}') * '}')
+l.embed_lexer(html, M, django_start_rule, django_end_rule)
+-- Modify HTML patterns to embed Django.
+html._RULES['comment'] = html._RULES['comment'] + comment
+
+M._tokenstyles = {
+ django_tag = l.STYLE_EMBEDDED
+}
+
+local _foldsymbols = html._foldsymbols
+_foldsymbols._patterns[#_foldsymbols._patterns + 1] = '{[%%{]'
+_foldsymbols._patterns[#_foldsymbols._patterns + 1] = '[%%}]}'
+_foldsymbols.django_tag = {['{{'] = 1, ['}}'] = -1, ['{%'] = 1, ['%}'] = -1}
+M._foldsymbols = _foldsymbols
+
+return M
diff --git a/lexers/dmd.lua b/lexers/dmd.lua
new file mode 100644
index 0000000..f419cfe
--- /dev/null
+++ b/lexers/dmd.lua
@@ -0,0 +1,176 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- D LPeg lexer.
+-- Heavily modified by Brian Schott (@Hackerpilot on Github).
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'dmd'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local line_comment = '//' * l.nonnewline_esc^0
+local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1
+local nested_comment = l.nested_pair('/+', '+/')
+local comment = token(l.COMMENT, line_comment + block_comment + nested_comment)
+
+-- Strings.
+local sq_str = l.delimited_range("'", true) * S('cwd')^-1
+local dq_str = l.delimited_range('"') * S('cwd')^-1
+local lit_str = 'r' * l.delimited_range('"', false, true) * S('cwd')^-1
+local bt_str = l.delimited_range('`', false, true) * S('cwd')^-1
+local hex_str = 'x' * l.delimited_range('"') * S('cwd')^-1
+local other_hex_str = '\\x' * (l.xdigit * l.xdigit)^1
+local del_str = l.nested_pair('q"[', ']"') * S('cwd')^-1 +
+ l.nested_pair('q"(', ')"') * S('cwd')^-1 +
+ l.nested_pair('q"{', '}"') * S('cwd')^-1 +
+ l.nested_pair('q"<', '>"') * S('cwd')^-1 +
+ P('q') * l.nested_pair('{', '}') * S('cwd')^-1
+local string = token(l.STRING, del_str + sq_str + dq_str + lit_str + bt_str +
+ hex_str + other_hex_str)
+
+-- Numbers.
+local dec = l.digit^1 * ('_' * l.digit^1)^0
+local hex_num = l.hex_num * ('_' * l.xdigit^1)^0
+local bin_num = '0' * S('bB') * S('01_')^1
+local oct_num = '0' * S('01234567_')^1
+local integer = S('+-')^-1 * (hex_num + oct_num + bin_num + dec)
+local number = token(l.NUMBER, (l.float + integer) * S('uUlLdDfFi')^-1)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match{
+ 'abstract', 'align', 'asm', 'assert', 'auto', 'body', 'break', 'case', 'cast',
+ 'catch', 'const', 'continue', 'debug', 'default', 'delete',
+ 'deprecated', 'do', 'else', 'extern', 'export', 'false', 'final', 'finally',
+ 'for', 'foreach', 'foreach_reverse', 'goto', 'if', 'import', 'immutable',
+ 'in', 'inout', 'invariant', 'is', 'lazy', 'macro', 'mixin', 'new', 'nothrow',
+ 'null', 'out', 'override', 'pragma', 'private', 'protected', 'public', 'pure',
+ 'ref', 'return', 'scope', 'shared', 'static', 'super', 'switch',
+ 'synchronized', 'this', 'throw','true', 'try', 'typeid', 'typeof', 'unittest',
+ 'version', 'virtual', 'volatile', 'while', 'with', '__gshared', '__thread',
+ '__traits', '__vector', '__parameters'
+})
+
+-- Types.
+local type = token(l.TYPE, word_match{
+ 'alias', 'bool', 'byte', 'cdouble', 'cent', 'cfloat', 'char', 'class',
+ 'creal', 'dchar', 'delegate', 'double', 'enum', 'float', 'function',
+ 'idouble', 'ifloat', 'int', 'interface', 'ireal', 'long', 'module', 'package',
+ 'ptrdiff_t', 'real', 'short', 'size_t', 'struct', 'template', 'typedef',
+ 'ubyte', 'ucent', 'uint', 'ulong', 'union', 'ushort', 'void', 'wchar',
+ 'string', 'wstring', 'dstring', 'hash_t', 'equals_t'
+})
+
+-- Constants.
+local constant = token(l.CONSTANT, word_match{
+ '__FILE__', '__LINE__', '__DATE__', '__EOF__', '__TIME__', '__TIMESTAMP__',
+ '__VENDOR__', '__VERSION__', '__FUNCTION__', '__PRETTY_FUNCTION__',
+ '__MODULE__',
+})
+
+local class_sequence = token(l.TYPE, P('class') + P('struct')) * ws^1 *
+ token(l.CLASS, l.word)
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.word)
+
+-- Operators.
+local operator = token(l.OPERATOR, S('?=!<>+-*$/%&|^~.,;()[]{}'))
+
+-- Properties.
+local properties = (type + identifier + operator) * token(l.OPERATOR, '.') *
+ token(l.VARIABLE, word_match{
+ 'alignof', 'dig', 'dup', 'epsilon', 'idup', 'im', 'init', 'infinity',
+ 'keys', 'length', 'mangleof', 'mant_dig', 'max', 'max_10_exp', 'max_exp',
+ 'min', 'min_normal', 'min_10_exp', 'min_exp', 'nan', 'offsetof', 'ptr',
+ 're', 'rehash', 'reverse', 'sizeof', 'sort', 'stringof', 'tupleof',
+ 'values'
+ })
+
+-- Preprocs.
+local annotation = token('annotation', '@' * l.word^1)
+local preproc = token(l.PREPROCESSOR, '#' * l.nonnewline^0)
+
+-- Traits.
+local traits_list = token('traits', word_match{
+ 'allMembers', 'classInstanceSize', 'compiles', 'derivedMembers',
+ 'getAttributes', 'getMember', 'getOverloads', 'getProtection', 'getUnitTests',
+ 'getVirtualFunctions', 'getVirtualIndex', 'getVirtualMethods', 'hasMember',
+ 'identifier', 'isAbstractClass', 'isAbstractFunction', 'isArithmetic',
+ 'isAssociativeArray', 'isFinalClass', 'isFinalFunction', 'isFloating',
+ 'isIntegral', 'isLazy', 'isNested', 'isOut', 'isOverrideFunction', 'isPOD',
+ 'isRef', 'isSame', 'isScalar', 'isStaticArray', 'isStaticFunction',
+ 'isUnsigned', 'isVirtualFunction', 'isVirtualMethod', 'parent'
+})
+
+local scopes_list = token('scopes', word_match{'exit', 'success', 'failure'})
+
+-- versions
+local versions_list = token('versions', word_match{
+ 'AArch64', 'AIX', 'all', 'Alpha', 'Alpha_HardFloat', 'Alpha_SoftFloat',
+ 'Android', 'ARM', 'ARM_HardFloat', 'ARM_SoftFloat', 'ARM_SoftFP', 'ARM_Thumb',
+ 'assert', 'BigEndian', 'BSD', 'Cygwin', 'D_Coverage', 'D_Ddoc', 'D_HardFloat',
+ 'DigitalMars', 'D_InlineAsm_X86', 'D_InlineAsm_X86_64', 'D_LP64',
+ 'D_NoBoundsChecks', 'D_PIC', 'DragonFlyBSD', 'D_SIMD', 'D_SoftFloat',
+ 'D_Version2', 'D_X32', 'FreeBSD', 'GNU', 'Haiku', 'HPPA', 'HPPA64', 'Hurd',
+ 'IA64', 'LDC', 'linux', 'LittleEndian', 'MIPS32', 'MIPS64', 'MIPS_EABI',
+ 'MIPS_HardFloat', 'MIPS_N32', 'MIPS_N64', 'MIPS_O32', 'MIPS_O64',
+ 'MIPS_SoftFloat', 'NetBSD', 'none', 'OpenBSD', 'OSX', 'Posix', 'PPC', 'PPC64',
+ 'PPC_HardFloat', 'PPC_SoftFloat', 'S390', 'S390X', 'SDC', 'SH', 'SH64',
+ 'SkyOS', 'Solaris', 'SPARC', 'SPARC64', 'SPARC_HardFloat', 'SPARC_SoftFloat',
+ 'SPARC_V8Plus', 'SysV3', 'SysV4', 'unittest', 'Win32', 'Win64', 'Windows',
+ 'X86', 'X86_64'
+})
+
+local versions = token(l.KEYWORD, 'version') * l.space^0 *
+ token(l.OPERATOR, '(') * l.space^0 * versions_list
+
+local scopes = token(l.KEYWORD, 'scope') * l.space^0 *
+ token(l.OPERATOR, '(') * l.space^0 * scopes_list
+
+local traits = token(l.KEYWORD, '__traits') * l.space^0 *
+ token(l.OPERATOR, '(') * l.space^0 * traits_list
+
+local func = token(l.FUNCTION, l.word) *
+ #(l.space^0 * (P('!') * l.word^-1 * l.space^-1)^-1 * P('('))
+
+M._rules = {
+ {'whitespace', ws},
+ {'class', class_sequence},
+ {'traits', traits},
+ {'versions', versions},
+ {'scopes', scopes},
+ {'keyword', keyword},
+ {'variable', properties},
+ {'type', type},
+ {'function', func},
+ {'constant', constant},
+ {'string', string},
+ {'identifier', identifier},
+ {'comment', comment},
+ {'number', number},
+ {'preproc', preproc},
+ {'operator', operator},
+ {'annotation', annotation},
+}
+
+M._tokenstyles = {
+ annotation = l.STYLE_PREPROCESSOR,
+ traits = 'fore:$(color.yellow)',
+ versions = l.STYLE_CONSTANT,
+ scopes = l.STYLE_CONSTANT
+}
+
+M._foldsymbols = {
+ _patterns = {'[{}]', '/[*+]', '[*+]/', '//'},
+ [l.OPERATOR] = {['{'] = 1, ['}'] = -1},
+ [l.COMMENT] = {
+ ['/*'] = 1, ['*/'] = -1, ['/+'] = 1, ['+/'] = -1,
+ ['//'] = l.fold_line_comments('//')
+ }
+}
+
+return M
diff --git a/lexers/dot.lua b/lexers/dot.lua
new file mode 100644
index 0000000..5ff845b
--- /dev/null
+++ b/lexers/dot.lua
@@ -0,0 +1,71 @@
+-- Copyright 2006-2013 Brian "Sir Alaran" Schott. See LICENSE.
+-- Dot LPeg lexer.
+-- Based off of lexer code by Mitchell.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'dot'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local line_comment = '//' * l.nonnewline_esc^0
+local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1
+local comment = token(l.COMMENT, line_comment + block_comment)
+
+-- Strings.
+local sq_str = l.delimited_range("'")
+local dq_str = l.delimited_range('"')
+local string = token(l.STRING, sq_str + dq_str)
+
+-- Numbers.
+local number = token(l.NUMBER, l.digit^1 + l.float)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match{
+ 'graph', 'node', 'edge', 'digraph', 'fontsize', 'rankdir',
+ 'fontname', 'shape', 'label', 'arrowhead', 'arrowtail', 'arrowsize',
+ 'color', 'comment', 'constraint', 'decorate', 'dir', 'headlabel', 'headport',
+ 'headURL', 'labelangle', 'labeldistance', 'labelfloat', 'labelfontcolor',
+ 'labelfontname', 'labelfontsize', 'layer', 'lhead', 'ltail', 'minlen',
+ 'samehead', 'sametail', 'style', 'taillabel', 'tailport', 'tailURL', 'weight',
+ 'subgraph'
+})
+
+-- Types.
+local type = token(l.TYPE, word_match{
+ 'box', 'polygon', 'ellipse', 'circle', 'point', 'egg', 'triangle',
+ 'plaintext', 'diamond', 'trapezium', 'parallelogram', 'house', 'pentagon',
+ 'hexagon', 'septagon', 'octagon', 'doublecircle', 'doubleoctagon',
+ 'tripleoctagon', 'invtriangle', 'invtrapezium', 'invhouse', 'Mdiamond',
+ 'Msquare', 'Mcircle', 'rect', 'rectangle', 'none', 'note', 'tab', 'folder',
+ 'box3d', 'record'
+})
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.word)
+
+-- Operators.
+local operator = token(l.OPERATOR, S('->()[]{};'))
+
+M._rules = {
+ {'whitespace', ws},
+ {'comment', comment},
+ {'keyword', keyword},
+ {'type', type},
+ {'identifier', identifier},
+ {'number', number},
+ {'string', string},
+ {'operator', operator},
+}
+
+M._foldsymbols = {
+ _patterns = {'[{}]', '/%*', '%*/', '//'},
+ [l.OPERATOR] = {['{'] = 1, ['}'] = -1},
+ [l.COMMENT] = {['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//')}
+}
+
+return M
diff --git a/lexers/eiffel.lua b/lexers/eiffel.lua
new file mode 100644
index 0000000..871ed8f
--- /dev/null
+++ b/lexers/eiffel.lua
@@ -0,0 +1,69 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Eiffel LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'eiffel'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local comment = token(l.COMMENT, '--' * l.nonnewline^0)
+
+-- Strings.
+local sq_str = l.delimited_range("'", true)
+local dq_str = l.delimited_range('"', true)
+local string = token(l.STRING, sq_str + dq_str)
+
+-- Numbers.
+local number = token(l.NUMBER, l.float + l.integer)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match{
+ 'alias', 'all', 'and', 'as', 'check', 'class', 'creation', 'debug',
+ 'deferred', 'do', 'else', 'elseif', 'end', 'ensure', 'expanded', 'export',
+ 'external', 'feature', 'from', 'frozen', 'if', 'implies', 'indexing', 'infix',
+ 'inherit', 'inspect', 'invariant', 'is', 'like', 'local', 'loop', 'not',
+ 'obsolete', 'old', 'once', 'or', 'prefix', 'redefine', 'rename', 'require',
+ 'rescue', 'retry', 'select', 'separate', 'then', 'undefine', 'until',
+ 'variant', 'when', 'xor',
+ 'current', 'false', 'precursor', 'result', 'strip', 'true', 'unique', 'void'
+})
+
+-- Types.
+local type = token(l.TYPE, word_match{
+ 'character', 'string', 'bit', 'boolean', 'integer', 'real', 'none', 'any'
+})
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.word)
+
+-- Operators.
+local operator = token(l.OPERATOR, S('=!<>+-/*%&|^~.,:;?()[]{}'))
+
+M._rules = {
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'type', type},
+ {'identifier', identifier},
+ {'string', string},
+ {'comment', comment},
+ {'number', number},
+ {'operator', operator},
+}
+
+M._foldsymbols = {
+ _patterns = {'[a-z]+', '%-%-'},
+ [l.KEYWORD] = {
+ check = 1, debug = 1, deferred = 1, ['do'] = 1, from = 1, ['if'] = 1,
+ inspect = 1, once = 1, class = function(text, pos, line, s)
+ return line:find('deferred%s+class') and 0 or 1
+ end, ['end'] = -1
+ },
+ [l.COMMENT] = {['--'] = l.fold_line_comments('--')}
+}
+
+return M
diff --git a/lexers/elixir.lua b/lexers/elixir.lua
new file mode 100644
index 0000000..015f29b
--- /dev/null
+++ b/lexers/elixir.lua
@@ -0,0 +1,122 @@
+-- Copyright 2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Contributed by Richard Philips.
+-- Elixer LPeg lexer.
+
+local l = lexer
+local token, style, color, word_match = l.token, l.style, l.color, l.word_match
+local B, P, R, S = lpeg.B, lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'elixir'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local comment = token(l.COMMENT, '#' * l.nonnewline_esc^0)
+
+-- Strings.
+local dq_str = l.delimited_range('"', false)
+local triple_dq_str = '"""' * (l.any - '"""')^0 * P('"""')^-1
+local string = token(l.STRING, triple_dq_str + dq_str)
+
+-- Numbers
+local dec = l.digit * (l.digit + P("_"))^0
+local bin = '0b' * S('01')^1
+local oct = '0o' * R('07')^1
+local integer = bin + l.hex_num + oct + dec
+local float = l.digit^1 * P(".") * l.digit^1 * S("eE") *
+ (S('+-')^-1 * l.digit^1)^-1
+local number_token = B(1 - R('az', 'AZ', '__')) *
+ (S('+-')^-1) * token(l.NUMBER, (float + integer))
+
+-- Keywords.
+local keyword_token = token(l.KEYWORD, word_match{
+ "is_atom", "is_binary", "is_bitstring", "is_boolean", "is_float",
+ "is_function", "is_integer", "is_list", "is_map", "is_number", "is_pid",
+ "is_port", "is_record", "is_reference", "is_tuple", "is_exception", "case",
+ "when", "cond", "for", "if", "unless", "try", "receive", "send", "exit",
+ "raise", "throw", "after", "rescue", "catch", "else", "do", "end", "quote",
+ "unquote", "super", "import", "require", "alias", "use", "self"
+})
+
+-- Functions
+local function_token = token(l.FUNCTION, word_match{
+ "defstruct", "defrecordp", "defrecord", "defprotocol", "defp",
+ "defoverridable", "defmodule", "defmacrop", "defmacro", "defimpl",
+ "defexception", "defdelegate", "defcallback", "def"
+})
+
+-- Sigils
+local sigil11 = P("~") * S("CRSW") * l.delimited_range('<>', false, true)
+local sigil12 = P("~") * S("CRSW") * l.delimited_range('{}', false, true)
+local sigil13 = P("~") * S("CRSW") * l.delimited_range('[]', false, true)
+local sigil14 = P("~") * S("CRSW") * l.delimited_range('()', false, true)
+local sigil15 = P("~") * S("CRSW") * l.delimited_range('|', false, true)
+local sigil16 = P("~") * S("CRSW") * l.delimited_range('/', false, true)
+local sigil17 = P("~") * S("CRSW") * l.delimited_range('"', false, true)
+local sigil18 = P("~") * S("CRSW") * l.delimited_range("'", false, true)
+local sigil19 = P("~") * S("CRSW") * '"""' * (l.any - '"""')^0 * P('"""')^-1
+local sigil10 = P("~") * S("CRSW") * "'''" * (l.any - "'''")^0 * P("'''")^-1
+local sigil21 = P("~") * S("crsw") * l.delimited_range('<>', false, false)
+local sigil22 = P("~") * S("crsw") * l.delimited_range('{}', false, false)
+local sigil23 = P("~") * S("crsw") * l.delimited_range('[]', false, false)
+local sigil24 = P("~") * S("crsw") * l.delimited_range('()', false, false)
+local sigil25 = P("~") * S("crsw") * l.delimited_range('|', false, false)
+local sigil26 = P("~") * S("crsw") * l.delimited_range('/', false, false)
+local sigil27 = P("~") * S("crsw") * l.delimited_range('"', false, false)
+local sigil28 = P("~") * S("crsw") * l.delimited_range("'", false, false)
+local sigil29 = P("~") * S("csrw") * '"""' * (l.any - '"""')^0 * P('"""')^-1
+local sigil20 = P("~") * S("csrw") * "'''" * (l.any - "'''")^0 * P("'''")^-1
+local sigil_token = token(l.REGEX, sigil10 + sigil19 + sigil11 + sigil12 +
+ sigil13 + sigil14 + sigil15 + sigil16 +
+ sigil17 + sigil18 + sigil20 + sigil29 +
+ sigil21 + sigil22 + sigil23 + sigil24 +
+ sigil25 + sigil26 + sigil27 + sigil28)
+local sigiladdon_token = token(l.EMBEDDED, R('az', 'AZ')^0)
+
+-- Attributes
+local attribute_token = token(l.LABEL, B(1 - R('az', 'AZ', '__')) * P('@') *
+ R('az','AZ') * R('az','AZ','09','__')^0)
+
+-- Booleans
+local boolean_token = token(l.NUMBER,
+ P(':')^-1 * word_match{"true", "false", "nil"})
+
+-- Identifiers
+local identifier = token(l.IDENTIFIER, R('az', '__') *
+ R('az', 'AZ', '__', '09')^0 * S('?!')^-1)
+
+-- Atoms
+local atom1 = B(1 - P(':')) * P(':') * dq_str
+local atom2 = B(1 - P(':')) * P(':') * R('az', 'AZ') *
+ R('az', 'AZ', '__', '@@', '09')^0 * S('?!')^-1
+local atom3 = B(1 - R('az', 'AZ', '__', '09', '::')) *
+ R('AZ') * R('az', 'AZ', '__', '@@', '09')^0 * S('?!')^-1
+local atom_token = token(l.CONSTANT, atom1 + atom2 + atom3)
+
+-- Operators
+local operator1 = word_match{"and", "or", "not", "when", "xor", "in"}
+local operator2 = P('!==') + '!=' + '!' + '=~' + '===' + '==' + '=' + '<<<' +
+ '<<' + '<=' + '<-' + '<' + '>>>' + '>>' + '>=' + '>' + '->' +
+ '--' + '-' + '++' + '+' + '&&&' + '&&' + '&' + '|||' + '||' +
+ '|>' + '|' + '..' + '.' + '^^^' + '^' + '\\\\' + '::' + '*' +
+ '/' + '~~~' + '@'
+local operator_token = token(l.OPERATOR, operator1 + operator2)
+
+M._rules = {
+ {'sigil', sigil_token * sigiladdon_token},
+ {'atom', atom_token},
+ {'string', string},
+ {'comment', comment},
+ {'attribute', attribute_token},
+ {'boolean', boolean_token},
+ {'function', function_token},
+ {'keyword', keyword_token},
+ {'operator', operator_token},
+ {'identifier', identifier},
+ {'number', number_token},
+}
+
+M._FOLDBYINDENTATION = true
+
+return M
diff --git a/lexers/erlang.lua b/lexers/erlang.lua
new file mode 100644
index 0000000..73321fa
--- /dev/null
+++ b/lexers/erlang.lua
@@ -0,0 +1,100 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Erlang LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'erlang'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local comment = token(l.COMMENT, '%' * l.nonnewline^0)
+
+-- Strings.
+local sq_str = l.delimited_range("'", true)
+local dq_str = l.delimited_range('"')
+local literal = '$' * l.any * l.alnum^0
+local string = token(l.STRING, sq_str + dq_str + literal)
+
+-- Numbers.
+local number = token(l.NUMBER, l.float + l.integer)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match{
+ 'after', 'begin', 'case', 'catch', 'cond', 'end', 'fun', 'if', 'let', 'of',
+ 'query', 'receive', 'when',
+ -- Operators.
+ 'div', 'rem', 'or', 'xor', 'bor', 'bxor', 'bsl', 'bsr', 'and', 'band', 'not',
+ 'bnot',
+ 'badarg', 'nocookie', 'false', 'true'
+})
+
+-- Functions.
+local func = token(l.FUNCTION, word_match{
+ 'abs', 'alive', 'apply', 'atom_to_list', 'binary_to_list', 'binary_to_term',
+ 'concat_binary', 'date', 'disconnect_node', 'element', 'erase', 'exit',
+ 'float', 'float_to_list', 'get', 'get_keys', 'group_leader', 'halt', 'hd',
+ 'integer_to_list', 'is_alive', 'length', 'link', 'list_to_atom',
+ 'list_to_binary', 'list_to_float', 'list_to_integer', 'list_to_pid',
+ 'list_to_tuple', 'load_module', 'make_ref', 'monitor_node', 'node', 'nodes',
+ 'now', 'open_port', 'pid_to_list', 'process_flag', 'process_info', 'process',
+ 'put', 'register', 'registered', 'round', 'self', 'setelement', 'size',
+ 'spawn', 'spawn_link', 'split_binary', 'statistics', 'term_to_binary',
+ 'throw', 'time', 'tl', 'trunc', 'tuple_to_list', 'unlink', 'unregister',
+ 'whereis',
+ -- Others.
+ 'atom', 'binary', 'constant', 'function', 'integer', 'list', 'number', 'pid',
+ 'ports', 'port_close', 'port_info', 'reference', 'record',
+ -- Erlang:.
+ 'check_process_code', 'delete_module', 'get_cookie', 'hash', 'math',
+ 'module_loaded', 'preloaded', 'processes', 'purge_module', 'set_cookie',
+ 'set_node',
+ -- Math.
+ 'acos', 'asin', 'atan', 'atan2', 'cos', 'cosh', 'exp', 'log', 'log10', 'pi',
+ 'pow', 'power', 'sin', 'sinh', 'sqrt', 'tan', 'tanh'
+})
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.word)
+
+-- Operators.
+local operator = token(l.OPERATOR, S('-<>.;=/|#+*:,?!()[]{}'))
+
+-- Directives.
+local directive = token('directive', '-' * word_match{
+ 'author', 'compile', 'copyright', 'define', 'doc', 'else', 'endif', 'export',
+ 'file', 'ifdef', 'ifndef', 'import', 'include_lib', 'include', 'module',
+ 'record', 'undef'
+})
+
+M._rules = {
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'function', func},
+ {'identifier', identifier},
+ {'directive', directive},
+ {'string', string},
+ {'comment', comment},
+ {'number', number},
+ {'operator', operator},
+}
+
+M._tokenstyles = {
+ directive = l.STYLE_PREPROCESSOR
+}
+
+M._foldsymbols = {
+ _patterns = {'[a-z]+', '[%(%)%[%]{}]', '%%'},
+ [l.KEYWORD] = {
+ case = 1, fun = 1, ['if'] = 1, query = 1, receive = 1, ['end'] = -1
+ },
+ [l.OPERATOR] = {
+ ['('] = 1, [')'] = -1, ['['] = 1, [']'] = -1, ['{'] = 1, ['}'] = -1
+ },
+ [l.COMMENT] = {['%'] = l.fold_line_comments('%')}
+}
+
+return M
diff --git a/lexers/fish.lua b/lexers/fish.lua
new file mode 100644
index 0000000..9142d60
--- /dev/null
+++ b/lexers/fish.lua
@@ -0,0 +1,76 @@
+-- Copyright 2015 Jason Schindler. See LICENSE.
+-- Fish (http://fishshell.com/) script LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'fish'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- shebang
+local shebang = token('shebang', '#!/' * l.nonnewline^0)
+
+-- Comments.
+local comment = token(l.COMMENT, '#' * l.nonnewline^0)
+
+-- Strings.
+local sq_str = l.delimited_range("'", false, true)
+local dq_str = l.delimited_range('"')
+
+local string = token(l.STRING, sq_str + dq_str)
+
+-- Numbers.
+local number = token(l.NUMBER, l.float + l.integer)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match{
+ 'alias', 'and', 'begin', 'bg', 'bind', 'block', 'break', 'breakpoint',
+ 'builtin', 'case', 'cd', 'command', 'commandline', 'complete', 'contains',
+ 'continue', 'count', 'dirh', 'dirs', 'echo', 'else', 'emit', 'end', 'eval',
+ 'exec', 'exit', 'fg', 'fish', 'fish_config', 'fish_indent', 'fish_pager',
+ 'fish_prompt', 'fish_right_prompt', 'fish_update_completions', 'fishd', 'for',
+ 'funced', 'funcsave', 'function', 'functions', 'help', 'history', 'if', 'in',
+ 'isatty', 'jobs', 'math', 'mimedb', 'nextd', 'not', 'open', 'or', 'popd',
+ 'prevd', 'psub', 'pushd', 'pwd', 'random', 'read', 'return', 'set',
+ 'set_color', 'source', 'status', 'switch', 'test', 'trap', 'type', 'ulimit',
+ 'umask', 'vared', 'while'
+})
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.word)
+
+-- Variables.
+local variable = token(l.VARIABLE,
+ '$' * l.word + '$' * l.delimited_range('{}', true, true))
+
+-- Operators.
+local operator = token(l.OPERATOR, S('=!<>+-/*^&|~.,:;?()[]{}'))
+
+M._rules = {
+ {'whitespace', ws},
+ {'shebang', shebang},
+ {'keyword', keyword},
+ {'identifier', identifier},
+ {'variable', variable},
+ {'string', string},
+ {'comment', comment},
+ {'number', number},
+ {'operator', operator},
+}
+
+M._tokenstyles = {
+ shebang = l.STYLE_LABEL
+}
+
+M._foldsymbols = {
+ _patterns = {'%l+'},
+ [l.KEYWORD] = {
+ begin = 1, ['for'] = 1, ['function'] = 1, ['if'] = 1, switch = 1,
+ ['while'] = 1, ['end'] = -1
+ }
+}
+
+return M
diff --git a/lexers/forth.lua b/lexers/forth.lua
new file mode 100644
index 0000000..450f528
--- /dev/null
+++ b/lexers/forth.lua
@@ -0,0 +1,57 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Forth LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'forth'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local line_comment = S('|\\') * l.nonnewline^0
+local block_comment = '(*' * (l.any - '*)')^0 * P('*)')^-1
+local comment = token(l.COMMENT, line_comment + block_comment)
+
+-- Strings.
+local s_str = 's' * l.delimited_range('"', true, true)
+local dot_str = '.' * l.delimited_range('"', true, true)
+local f_str = 'f' * l.delimited_range('"', true, true)
+local dq_str = l.delimited_range('"', true, true)
+local string = token(l.STRING, s_str + dot_str + f_str + dq_str)
+
+-- Numbers.
+local number = token(l.NUMBER, P('-')^-1 * l.digit^1 * (S('./') * l.digit^1)^-1)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match({
+ 'swap', 'drop', 'dup', 'nip', 'over', 'rot', '-rot', '2dup', '2drop', '2over',
+ '2swap', '>r', 'r>',
+ 'and', 'or', 'xor', '>>', '<<', 'not', 'negate', 'mod', '/mod', '1+', '1-',
+ 'base', 'hex', 'decimal', 'binary', 'octal',
+ '@', '!', 'c@', 'c!', '+!', 'cell+', 'cells', 'char+', 'chars',
+ 'create', 'does>', 'variable', 'variable,', 'literal', 'last', '1,', '2,',
+ '3,', ',', 'here', 'allot', 'parse', 'find', 'compile',
+ -- Operators.
+ 'if', '=if', '<if', '>if', '<>if', 'then', 'repeat', 'until', 'forth', 'macro'
+}, '2><1-@!+3,='))
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, (l.alnum + S('+-*=<>.?/\'%,_$'))^1)
+
+-- Operators.
+local operator = token(l.OPERATOR, S(':;<>+*-/()[]'))
+
+M._rules = {
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'string', string},
+ {'identifier', identifier},
+ {'comment', comment},
+ {'number', number},
+ {'operator', operator},
+}
+
+return M
diff --git a/lexers/fortran.lua b/lexers/fortran.lua
new file mode 100644
index 0000000..c85cbff
--- /dev/null
+++ b/lexers/fortran.lua
@@ -0,0 +1,91 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Fortran LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'fortran'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local c_comment = l.starts_line(S('Cc')) * l.nonnewline^0
+local d_comment = l.starts_line(S('Dd')) * l.nonnewline^0
+local ex_comment = l.starts_line('!') * l.nonnewline^0
+local ast_comment = l.starts_line('*') * l.nonnewline^0
+local line_comment = '!' * l.nonnewline^0
+local comment = token(l.COMMENT, c_comment + d_comment + ex_comment +
+ ast_comment + line_comment)
+
+-- Strings.
+local sq_str = l.delimited_range("'", true, true)
+local dq_str = l.delimited_range('"', true, true)
+local string = token(l.STRING, sq_str + dq_str)
+
+-- Numbers.
+local number = token(l.NUMBER, (l.float + l.integer) * -l.alpha)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match({
+ 'include', 'program', 'module', 'subroutine', 'function', 'contains', 'use',
+ 'call', 'return',
+ -- Statements.
+ 'case', 'select', 'default', 'continue', 'cycle', 'do', 'while', 'else', 'if',
+ 'elseif', 'then', 'elsewhere', 'end', 'endif', 'enddo', 'forall', 'where',
+ 'exit', 'goto', 'pause', 'stop',
+ -- Operators.
+ '.not.', '.and.', '.or.', '.xor.', '.eqv.', '.neqv.', '.eq.', '.ne.', '.gt.',
+ '.ge.', '.lt.', '.le.',
+ -- Logical.
+ '.false.', '.true.'
+}, '.', true))
+
+-- Functions.
+local func = token(l.FUNCTION, word_match({
+ -- I/O.
+ 'backspace', 'close', 'endfile', 'inquire', 'open', 'print', 'read', 'rewind',
+ 'write', 'format',
+ -- Type conversion, utility, and math.
+ 'aimag', 'aint', 'amax0', 'amin0', 'anint', 'ceiling', 'cmplx', 'conjg',
+ 'dble', 'dcmplx', 'dfloat', 'dim', 'dprod', 'float', 'floor', 'ifix', 'imag',
+ 'int', 'logical', 'modulo', 'nint', 'real', 'sign', 'sngl', 'transfer',
+ 'zext', 'abs', 'acos', 'aimag', 'aint', 'alog', 'alog10', 'amax0', 'amax1',
+ 'amin0', 'amin1', 'amod', 'anint', 'asin', 'atan', 'atan2', 'cabs', 'ccos',
+ 'char', 'clog', 'cmplx', 'conjg', 'cos', 'cosh', 'csin', 'csqrt', 'dabs',
+ 'dacos', 'dasin', 'datan', 'datan2', 'dble', 'dcos', 'dcosh', 'ddim', 'dexp',
+ 'dim', 'dint', 'dlog', 'dlog10', 'dmax1', 'dmin1', 'dmod', 'dnint', 'dprod',
+ 'dreal', 'dsign', 'dsin', 'dsinh', 'dsqrt', 'dtan', 'dtanh', 'exp', 'float',
+ 'iabs', 'ichar', 'idim', 'idint', 'idnint', 'ifix', 'index', 'int', 'isign',
+ 'len', 'lge', 'lgt', 'lle', 'llt', 'log', 'log10', 'max', 'max0', 'max1',
+ 'min', 'min0', 'min1', 'mod', 'nint', 'real', 'sign', 'sin', 'sinh', 'sngl',
+ 'sqrt', 'tan', 'tanh'
+}, nil, true))
+
+-- Types.
+local type = token(l.TYPE, word_match({
+ 'implicit', 'explicit', 'none', 'data', 'parameter', 'allocate',
+ 'allocatable', 'allocated', 'deallocate', 'integer', 'real', 'double',
+ 'precision', 'complex', 'logical', 'character', 'dimension', 'kind',
+}, nil, true))
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.alnum^1)
+
+-- Operators.
+local operator = token(l.OPERATOR, S('<>=&+-/*,()'))
+
+M._rules = {
+ {'whitespace', ws},
+ {'comment', comment},
+ {'keyword', keyword},
+ {'function', func},
+ {'type', type},
+ {'number', number},
+ {'identifier', identifier},
+ {'string', string},
+ {'operator', operator},
+}
+
+return M
diff --git a/lexers/fsharp.lua b/lexers/fsharp.lua
new file mode 100644
index 0000000..b620552
--- /dev/null
+++ b/lexers/fsharp.lua
@@ -0,0 +1,76 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- F# LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'fsharp'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local line_comment = P('//') * l.nonnewline^0
+local block_comment = l.nested_pair('(*', '*)')
+local comment = token(l.COMMENT, line_comment + block_comment)
+
+-- Strings.
+local sq_str = l.delimited_range("'", true)
+local dq_str = l.delimited_range('"', true)
+local string = token(l.STRING, sq_str + dq_str)
+
+-- Numbers.
+local number = token(l.NUMBER, (l.float + l.integer * S('uUlL')^-1))
+
+-- Preprocessor.
+local preproc_word = word_match{
+ 'ifndef', 'ifdef', 'if', 'else', 'endif', 'light', 'region', 'endregion'
+}
+local preproc = token(l.PREPROCESSOR,
+ l.starts_line('#') * S('\t ')^0 * preproc_word *
+ (l.nonnewline_esc^1 + l.space * l.nonnewline_esc^0))
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match{
+ 'abstract', 'and', 'as', 'assert', 'asr', 'begin', 'class', 'default',
+ 'delegate', 'do', 'done', 'downcast', 'downto', 'else', 'end', 'enum',
+ 'exception', 'false', 'finaly', 'for', 'fun', 'function', 'if', 'in',
+ 'iherit', 'interface', 'land', 'lazy', 'let', 'lor', 'lsl', 'lsr', 'lxor',
+ 'match', 'member', 'mod', 'module', 'mutable', 'namespace', 'new', 'null',
+ 'of', 'open', 'or', 'override', 'sig', 'static', 'struct', 'then', 'to',
+ 'true', 'try', 'type', 'val', 'when', 'inline', 'upcast', 'while', 'with',
+ 'async', 'atomic', 'break', 'checked', 'component', 'const', 'constructor',
+ 'continue', 'eager', 'event', 'external', 'fixed', 'functor', 'include',
+ 'method', 'mixin', 'process', 'property', 'protected', 'public', 'pure',
+ 'readonly', 'return', 'sealed', 'switch', 'virtual', 'void', 'volatile',
+ 'where',
+ -- Booleans.
+ 'true', 'false'
+})
+
+-- Types.
+local type = token(l.TYPE, word_match{
+ 'bool', 'byte', 'sbyte', 'int16', 'uint16', 'int', 'uint32', 'int64',
+ 'uint64', 'nativeint', 'unativeint', 'char', 'string', 'decimal', 'unit',
+ 'void', 'float32', 'single', 'float', 'double'
+})
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.word)
+
+-- Operators.
+local operator = token(l.OPERATOR, S('=<>+-*/^.,:;~!@#%^&|?[](){}'))
+
+M._rules = {
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'type', type},
+ {'identifier', identifier},
+ {'string', string},
+ {'comment', comment},
+ {'number', number},
+ {'operator', operator},
+}
+
+return M
diff --git a/lexers/gap.lua b/lexers/gap.lua
new file mode 100644
index 0000000..78a4b77
--- /dev/null
+++ b/lexers/gap.lua
@@ -0,0 +1,56 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Gap LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'gap'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local comment = token(l.COMMENT, '#' * l.nonnewline^0)
+
+-- Strings.
+local sq_str = l.delimited_range("'", true)
+local dq_str = l.delimited_range('"', true)
+local string = token(l.STRING, sq_str + dq_str)
+
+-- Numbers.
+local number = token(l.NUMBER, l.digit^1 * -l.alpha)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match{
+ 'and', 'break', 'continue', 'do', 'elif', 'else', 'end', 'fail', 'false',
+ 'fi', 'for', 'function', 'if', 'in', 'infinity', 'local', 'not', 'od', 'or',
+ 'rec', 'repeat', 'return', 'then', 'true', 'until', 'while'
+})
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.word)
+
+-- Operators.
+local operator = token(l.OPERATOR, S('*+-,./:;<=>~^#()[]{}'))
+
+M._rules = {
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'identifier', identifier},
+ {'string', string},
+ {'comment', comment},
+ {'number', number},
+ {'operator', operator},
+}
+
+M._foldsymbols = {
+ _patterns = {'[a-z]+', '#'},
+ [l.KEYWORD] = {
+ ['function'] = 1, ['end'] = -1, ['do'] = 1, od = -1, ['if'] = 1, fi = -1,
+ ['repeat'] = 1, ['until'] = -1
+ },
+ [l.COMMENT] = {['#'] = l.fold_line_comments('#')}
+}
+
+return M
diff --git a/lexers/gettext.lua b/lexers/gettext.lua
new file mode 100644
index 0000000..d63ef41
--- /dev/null
+++ b/lexers/gettext.lua
@@ -0,0 +1,39 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Gettext LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'gettext'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local comment = token(l.COMMENT, '#' * S(': .~') * l.nonnewline^0)
+
+-- Strings.
+local string = token(l.STRING, l.delimited_range('"', true))
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match({
+ 'msgid', 'msgid_plural', 'msgstr', 'fuzzy', 'c-format', 'no-c-format'
+}, '-', true))
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.word)
+
+-- Variables.
+local variable = token(l.VARIABLE, S('%$@') * l.word)
+
+M._rules = {
+ {'whitespace', ws},
+ {'comment', comment},
+ {'string', string},
+ {'keyword', keyword},
+ {'identifier', identifier},
+ {'variable', variable},
+}
+
+return M
diff --git a/lexers/glsl.lua b/lexers/glsl.lua
new file mode 100644
index 0000000..65f8303
--- /dev/null
+++ b/lexers/glsl.lua
@@ -0,0 +1,132 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- GLSL LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local table = _G.table
+
+local M = {_NAME = 'glsl'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match{
+ 'attribute', 'const', 'in', 'inout', 'out', 'uniform', 'varying', 'invariant',
+ 'centroid', 'flat', 'smooth', 'noperspective', 'layout', 'patch', 'sample',
+ 'subroutine', 'lowp', 'mediump', 'highp', 'precision',
+ -- Macros.
+ '__VERSION__', '__LINE__', '__FILE__',
+})
+
+-- Functions.
+local func = token(l.FUNCTION, word_match{
+ 'radians', 'degrees', 'sin', 'cos', 'tan', 'asin', 'acos', 'atan', 'sinh',
+ 'cosh', 'tanh', 'asinh', 'acosh', 'atanh', 'pow', 'exp', 'log', 'exp2',
+ 'log2', 'sqrt', 'inversesqrt', 'abs', 'sign', 'floor', 'trunc', 'round',
+ 'roundEven', 'ceil', 'fract', 'mod', 'modf', 'min', 'max', 'clamp', 'mix',
+ 'step', 'smoothstep', 'isnan', 'isinf', 'floatBitsToInt', 'floatBitsToUint',
+ 'intBitsToFloat', 'uintBitsToFloat', 'fma', 'frexp', 'ldexp', 'packUnorm2x16',
+ 'packUnorm4x8', 'packSnorm4x8', 'unpackUnorm2x16', 'unpackUnorm4x8',
+ 'unpackSnorm4x8', 'packDouble2x32', 'unpackDouble2x32', 'length', 'distance',
+ 'dot', 'cross', 'normalize', 'ftransform', 'faceforward', 'reflect',
+ 'refract', 'matrixCompMult', 'outerProduct', 'transpose', 'determinant',
+ 'inverse', 'lessThan', 'lessThanEqual', 'greaterThan', 'greaterThanEqual',
+ 'equal', 'notEqual', 'any', 'all', 'not', 'uaddCarry', 'usubBorrow',
+ 'umulExtended', 'imulExtended', 'bitfieldExtract', 'bitfildInsert',
+ 'bitfieldReverse', 'bitCount', 'findLSB', 'findMSB', 'textureSize',
+ 'textureQueryLOD', 'texture', 'textureProj', 'textureLod', 'textureOffset',
+ 'texelFetch', 'texelFetchOffset', 'textureProjOffset', 'textureLodOffset',
+ 'textureProjLod', 'textureProjLodOffset', 'textureGrad', 'textureGradOffset',
+ 'textureProjGrad', 'textureProjGradOffset', 'textureGather',
+ 'textureGatherOffset', 'texture1D', 'texture2D', 'texture3D', 'texture1DProj',
+ 'texture2DProj', 'texture3DProj', 'texture1DLod', 'texture2DLod',
+ 'texture3DLod', 'texture1DProjLod', 'texture2DProjLod', 'texture3DProjLod',
+ 'textureCube', 'textureCubeLod', 'shadow1D', 'shadow2D', 'shadow1DProj',
+ 'shadow2DProj', 'shadow1DLod', 'shadow2DLod', 'shadow1DProjLod',
+ 'shadow2DProjLod', 'dFdx', 'dFdy', 'fwidth', 'interpolateAtCentroid',
+ 'interpolateAtSample', 'interpolateAtOffset', 'noise1', 'noise2', 'noise3',
+ 'noise4', 'EmitStreamVertex', 'EndStreamPrimitive', 'EmitVertex',
+ 'EndPrimitive', 'barrier'
+})
+
+-- Types.
+local type = token(l.TYPE,
+ S('bdiu')^-1 * 'vec' * R('24') +
+ P('d')^-1 * 'mat' * R('24') * ('x' * R('24')^-1) +
+ S('iu')^-1 * 'sampler' * R('13') * 'D' +
+ 'sampler' * R('12') * 'D' * P('Array')^-1 * 'Shadow' +
+ S('iu')^-1 * 'sampler' * (R('12') * 'DArray' + word_match{
+ 'Cube', '2DRect', 'Buffer', '2DMS', '2DMSArray',
+ '2DMSCubeArray'
+ }) +
+ word_match{
+ 'samplerCubeShadow', 'sampler2DRectShadow',
+ 'samplerCubeArrayShadow'
+ })
+
+-- Variables.
+local variable = token(l.VARIABLE, word_match{
+ 'gl_VertexID', 'gl_InstanceID', 'gl_Position', 'gl_PointSize',
+ 'gl_ClipDistance', 'gl_PrimitiveIDIn', 'gl_InvocationID', 'gl_PrimitiveID',
+ 'gl_Layer', 'gl_PatchVerticesIn', 'gl_TessLevelOuter', 'gl_TessLevelInner',
+ 'gl_TessCoord', 'gl_FragCoord', 'gl_FrontFacing', 'gl_PointCoord',
+ 'gl_SampleID', 'gl_SamplePosition', 'gl_FragColor', 'gl_FragData',
+ 'gl_FragDepth', 'gl_SampleMask', 'gl_ClipVertex', 'gl_FrontColor',
+ 'gl_BackColor', 'gl_FrontSecondaryColor', 'gl_BackSecondaryColor',
+ 'gl_TexCoord', 'gl_FogFragCoord', 'gl_Color', 'gl_SecondaryColor',
+ 'gl_Normal', 'gl_Vertex', 'gl_MultiTexCoord0', 'gl_MultiTexCoord1',
+ 'gl_MultiTexCoord2', 'gl_MultiTexCoord3', 'gl_MultiTexCoord4',
+ 'gl_MultiTexCoord5', 'gl_MultiTexCoord6', 'gl_MultiTexCoord7', 'gl_FogCoord'
+})
+
+-- Constants.
+local constant = token(l.CONSTANT, word_match{
+ 'gl_MaxVertexAttribs', 'gl_MaxVertexUniformComponents', 'gl_MaxVaryingFloats',
+ 'gl_MaxVaryingComponents', 'gl_MaxVertexOutputComponents',
+ 'gl_MaxGeometryInputComponents', 'gl_MaxGeometryOutputComponents',
+ 'gl_MaxFragmentInputComponents', 'gl_MaxVertexTextureImageUnits',
+ 'gl_MaxCombinedTextureImageUnits', 'gl_MaxTextureImageUnits',
+ 'gl_MaxFragmentUniformComponents', 'gl_MaxDrawBuffers', 'gl_MaxClipDistances',
+ 'gl_MaxGeometryTextureImageUnits', 'gl_MaxGeometryOutputVertices',
+ 'gl_MaxGeometryTotalOutputComponents', 'gl_MaxGeometryUniformComponents',
+ 'gl_MaxGeometryVaryingComponents', 'gl_MaxTessControlInputComponents',
+ 'gl_MaxTessControlOutputComponents', 'gl_MaxTessControlTextureImageUnits',
+ 'gl_MaxTessControlUniformComponents',
+ 'gl_MaxTessControlTotalOutputComponents',
+ 'gl_MaxTessEvaluationInputComponents', 'gl_MaxTessEvaluationOutputComponents',
+ 'gl_MaxTessEvaluationTextureImageUnits',
+ 'gl_MaxTessEvaluationUniformComponents', 'gl_MaxTessPatchComponents',
+ 'gl_MaxPatchVertices', 'gl_MaxTessGenLevel', 'gl_MaxTextureUnits',
+ 'gl_MaxTextureCoords', 'gl_MaxClipPlanes',
+
+ 'gl_DepthRange', 'gl_ModelViewMatrix', 'gl_ProjectionMatrix',
+ 'gl_ModelViewProjectionMatrix', 'gl_TextureMatrix', 'gl_NormalMatrix',
+ 'gl_ModelViewMatrixInverse', 'gl_ProjectionMatrixInverse',
+ 'gl_ModelViewProjectionMatrixInverse', 'gl_TextureMatrixInverse',
+ 'gl_ModelViewMatrixTranspose', 'gl_ProjectionMatrixTranspose',
+ 'gl_ModelViewProjectionMatrixTranspose', 'gl_TextureMatrixTranspose',
+ 'gl_ModelViewMatrixInverseTranspose', 'gl_ProjectionMatrixInverseTranspose',
+ 'gl_ModelViewProjectionMatrixInverseTranspose',
+ 'gl_TextureMatrixInverseTranspose', 'gl_NormalScale', 'gl_ClipPlane',
+ 'gl_Point', 'gl_FrontMaterial', 'gl_BackMaterial', 'gl_LightSource',
+ 'gl_LightModel', 'gl_FrontLightModelProduct', 'gl_BackLightModelProduct',
+ 'gl_FrontLightProduct', 'gl_BackLightProduct', 'gl_TextureEnvColor',
+ 'gl_EyePlaneS', 'gl_EyePlaneT', 'gl_EyePlaneR', 'gl_EyePlaneQ',
+ 'gl_ObjectPlaneS', 'gl_ObjectPlaneT', 'gl_ObjectPlaneR', 'gl_ObjectPlaneQ',
+ 'gl_Fog'
+})
+
+-- Extend cpp lexer to include GLSL elements.
+local cpp = l.load('cpp')
+local _rules = cpp._rules
+_rules[1] = {'whitespace', ws}
+table.insert(_rules, 2, {'glsl_keyword', keyword})
+table.insert(_rules, 3, {'glsl_function', func})
+table.insert(_rules, 4, {'glsl_type', type})
+table.insert(_rules, 5, {'glsl_variable', variable})
+M._rules = _rules
+M._foldsymbols = cpp._foldsymbols
+
+return M
diff --git a/lexers/gnuplot.lua b/lexers/gnuplot.lua
new file mode 100644
index 0000000..8561812
--- /dev/null
+++ b/lexers/gnuplot.lua
@@ -0,0 +1,80 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Gnuplot LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'gnuplot'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local comment = token(l.COMMENT, '#' * l.nonnewline^0)
+
+-- Strings.
+local sq_str = l.delimited_range("'")
+local dq_str = l.delimited_range('"')
+local bk_str = l.delimited_range('[]', true)
+local bc_str = l.delimited_range('{}', true)
+local string = token(l.STRING, sq_str + dq_str + bk_str + bc_str)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match{
+ 'cd', 'call', 'clear', 'exit', 'fit', 'help', 'history', 'if', 'load',
+ 'pause', 'plot', 'using', 'with', 'index', 'every', 'smooth', 'thru', 'print',
+ 'pwd', 'quit', 'replot', 'reread', 'reset', 'save', 'set', 'show', 'unset',
+ 'shell', 'splot', 'system', 'test', 'unset', 'update'
+})
+
+-- Functions.
+local func = token(l.FUNCTION, word_match{
+ 'abs', 'acos', 'acosh', 'arg', 'asin', 'asinh', 'atan', 'atan2', 'atanh',
+ 'besj0', 'besj1', 'besy0', 'besy1', 'ceil', 'cos', 'cosh', 'erf', 'erfc',
+ 'exp', 'floor', 'gamma', 'ibeta', 'inverf', 'igamma', 'imag', 'invnorm',
+ 'int', 'lambertw', 'lgamma', 'log', 'log10', 'norm', 'rand', 'real', 'sgn',
+ 'sin', 'sinh', 'sqrt', 'tan', 'tanh', 'column', 'defined', 'tm_hour',
+ 'tm_mday', 'tm_min', 'tm_mon', 'tm_sec', 'tm_wday', 'tm_yday', 'tm_year',
+ 'valid'
+})
+
+-- Variables.
+local variable = token(l.VARIABLE, word_match{
+ 'angles', 'arrow', 'autoscale', 'bars', 'bmargin', 'border', 'boxwidth',
+ 'clabel', 'clip', 'cntrparam', 'colorbox', 'contour', 'datafile ',
+ 'decimalsign', 'dgrid3d', 'dummy', 'encoding', 'fit', 'fontpath', 'format',
+ 'functions', 'function', 'grid', 'hidden3d', 'historysize', 'isosamples',
+ 'key', 'label', 'lmargin', 'loadpath', 'locale', 'logscale', 'mapping',
+ 'margin', 'mouse', 'multiplot', 'mx2tics', 'mxtics', 'my2tics', 'mytics',
+ 'mztics', 'offsets', 'origin', 'output', 'parametric', 'plot', 'pm3d',
+ 'palette', 'pointsize', 'polar', 'print', 'rmargin', 'rrange', 'samples',
+ 'size', 'style', 'surface', 'terminal', 'tics', 'ticslevel', 'ticscale',
+ 'timestamp', 'timefmt', 'title', 'tmargin', 'trange', 'urange', 'variables',
+ 'version', 'view', 'vrange', 'x2data', 'x2dtics', 'x2label', 'x2mtics',
+ 'x2range', 'x2tics', 'x2zeroaxis', 'xdata', 'xdtics', 'xlabel', 'xmtics',
+ 'xrange', 'xtics', 'xzeroaxis', 'y2data', 'y2dtics', 'y2label', 'y2mtics',
+ 'y2range', 'y2tics', 'y2zeroaxis', 'ydata', 'ydtics', 'ylabel', 'ymtics',
+ 'yrange', 'ytics', 'yzeroaxis', 'zdata', 'zdtics', 'cbdata', 'cbdtics',
+ 'zero', 'zeroaxis', 'zlabel', 'zmtics', 'zrange', 'ztics', 'cblabel',
+ 'cbmtics', 'cbrange', 'cbtics'
+})
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.word)
+
+-- Operators.
+local operator = token(l.OPERATOR, S('-+~!$*%=<>&|^?:()'))
+
+M._rules = {
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'function', func},
+ {'variable', variable},
+ {'identifier', identifier},
+ {'string', string},
+ {'comment', comment},
+ {'operator', operator},
+}
+
+return M
diff --git a/lexers/go.lua b/lexers/go.lua
new file mode 100644
index 0000000..9cc7984
--- /dev/null
+++ b/lexers/go.lua
@@ -0,0 +1,78 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Go LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'go'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local line_comment = '//' * l.nonnewline^0
+local block_comment = '/*' * (l.any - '*/')^0 * '*/'
+local comment = token(l.COMMENT, line_comment + block_comment)
+
+-- Strings.
+local sq_str = l.delimited_range("'", true)
+local dq_str = l.delimited_range('"', true)
+local raw_str = l.delimited_range('`', false, true)
+local string = token(l.STRING, sq_str + dq_str + raw_str)
+
+-- Numbers.
+local number = token(l.NUMBER, (l.float + l.integer) * P('i')^-1)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match{
+ 'break', 'case', 'chan', 'const', 'continue', 'default', 'defer', 'else',
+ 'fallthrough', 'for', 'func', 'go', 'goto', 'if', 'import', 'interface',
+ 'map', 'package', 'range', 'return', 'select', 'struct', 'switch', 'type',
+ 'var'
+})
+
+-- Constants.
+local constant = token(l.CONSTANT, word_match{
+ 'true', 'false', 'iota', 'nil'
+})
+
+-- Types.
+local type = token(l.TYPE, word_match{
+ 'bool', 'byte', 'complex64', 'complex128', 'error', 'float32', 'float64',
+ 'int', 'int8', 'int16', 'int32', 'int64', 'rune', 'string', 'uint', 'uint8',
+ 'uint16', 'uint32', 'uint64', 'uintptr'
+})
+
+-- Functions.
+local func = token(l.FUNCTION, word_match{
+ 'append', 'cap', 'close', 'complex', 'copy', 'delete', 'imag', 'len', 'make',
+ 'new', 'panic', 'print', 'println', 'real', 'recover'
+})
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.word)
+
+-- Operators.
+local operator = token(l.OPERATOR, S('+-*/%&|^<>=!:;.,()[]{}'))
+
+M._rules = {
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'constant', constant},
+ {'type', type},
+ {'function', func},
+ {'identifier', identifier},
+ {'string', string},
+ {'comment', comment},
+ {'number', number},
+ {'operator', operator},
+}
+
+M._foldsymbols = {
+ _patterns = {'[{}]', '/%*', '%*/', '//'},
+ [l.OPERATOR] = {['{'] = 1, ['}'] = -1},
+ [l.COMMENT] = {['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//')}
+}
+
+return M
diff --git a/lexers/groovy.lua b/lexers/groovy.lua
new file mode 100644
index 0000000..92013a7
--- /dev/null
+++ b/lexers/groovy.lua
@@ -0,0 +1,89 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Groovy LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'groovy'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local line_comment = '//' * l.nonnewline_esc^0
+local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1
+local comment = token(l.COMMENT, line_comment + block_comment)
+
+-- Strings.
+local sq_str = l.delimited_range("'")
+local dq_str = l.delimited_range('"')
+local triple_sq_str = "'''" * (l.any - "'''")^0 * P("'''")^-1
+local triple_dq_str = '"""' * (l.any - '"""')^0 * P('"""')^-1
+local regex_str = #P('/') * l.last_char_includes('=~|!<>+-*?&,:;([{') *
+ l.delimited_range('/', true)
+local string = token(l.STRING, triple_sq_str + triple_dq_str + sq_str +
+ dq_str) +
+ token(l.REGEX, regex_str)
+
+-- Numbers.
+local number = token(l.NUMBER, l.float + l.integer)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match{
+ 'abstract', 'break', 'case', 'catch', 'continue', 'default', 'do', 'else',
+ 'extends', 'final', 'finally', 'for', 'if', 'implements', 'instanceof',
+ 'native', 'new', 'private', 'protected', 'public', 'return', 'static',
+ 'switch', 'synchronized', 'throw', 'throws', 'transient', 'try', 'volatile',
+ 'while', 'strictfp', 'package', 'import', 'as', 'assert', 'def', 'mixin',
+ 'property', 'test', 'using', 'in',
+ 'false', 'null', 'super', 'this', 'true', 'it'
+})
+
+-- Functions.
+local func = token(l.FUNCTION, word_match{
+ 'abs', 'any', 'append', 'asList', 'asWritable', 'call', 'collect',
+ 'compareTo', 'count', 'div', 'dump', 'each', 'eachByte', 'eachFile',
+ 'eachLine', 'every', 'find', 'findAll', 'flatten', 'getAt', 'getErr', 'getIn',
+ 'getOut', 'getText', 'grep', 'immutable', 'inject', 'inspect', 'intersect',
+ 'invokeMethods', 'isCase', 'join', 'leftShift', 'minus', 'multiply',
+ 'newInputStream', 'newOutputStream', 'newPrintWriter', 'newReader',
+ 'newWriter', 'next', 'plus', 'pop', 'power', 'previous', 'print', 'println',
+ 'push', 'putAt', 'read', 'readBytes', 'readLines', 'reverse', 'reverseEach',
+ 'round', 'size', 'sort', 'splitEachLine', 'step', 'subMap', 'times',
+ 'toInteger', 'toList', 'tokenize', 'upto', 'waitForOrKill', 'withPrintWriter',
+ 'withReader', 'withStream', 'withWriter', 'withWriterAppend', 'write',
+ 'writeLine'
+})
+
+-- Types.
+local type = token(l.TYPE, word_match{
+ 'boolean', 'byte', 'char', 'class', 'double', 'float', 'int', 'interface',
+ 'long', 'short', 'void'
+})
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.word)
+
+-- Operators.
+local operator = token(l.OPERATOR, S('=~|!<>+-/*?&.,:;()[]{}'))
+
+M._rules = {
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'function', func},
+ {'type', type},
+ {'identifier', identifier},
+ {'comment', comment},
+ {'string', string},
+ {'number', number},
+ {'operator', operator},
+}
+
+M._foldsymbols = {
+ _patterns = {'[{}]', '/%*', '%*/', '//'},
+ [l.OPERATOR] = {['{'] = 1, ['}'] = -1},
+ [l.COMMENT] = {['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//')}
+}
+
+return M
diff --git a/lexers/gtkrc.lua b/lexers/gtkrc.lua
new file mode 100644
index 0000000..8aec8c3
--- /dev/null
+++ b/lexers/gtkrc.lua
@@ -0,0 +1,71 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Gtkrc LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'gtkrc'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local comment = token(l.COMMENT, '#' * l.nonnewline^0)
+
+-- Strings.
+local sq_str = l.delimited_range("'", true)
+local dq_str = l.delimited_range('"', true)
+local string = token(l.STRING, sq_str + dq_str)
+
+-- Numbers.
+local number = token(l.NUMBER, l.digit^1 * ('.' * l.digit^1)^-1)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match{
+ 'binding', 'class', 'include', 'module_path', 'pixmap_path', 'im_module_file',
+ 'style', 'widget', 'widget_class'
+})
+
+-- Variables.
+local variable = token(l.VARIABLE, word_match{
+ 'bg', 'fg', 'base', 'text', 'xthickness', 'ythickness', 'bg_pixmap', 'font',
+ 'fontset', 'font_name', 'stock', 'color', 'engine'
+})
+
+-- States.
+local state = token(l.CONSTANT, word_match{
+ 'ACTIVE', 'SELECTED', 'NORMAL', 'PRELIGHT', 'INSENSITIVE', 'TRUE', 'FALSE'
+})
+
+-- Functions.
+local func = token(l.FUNCTION, word_match{
+ 'mix', 'shade', 'lighter', 'darker'
+})
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.alpha * (l.alnum + S('_-'))^0)
+
+-- Operators.
+local operator = token(l.OPERATOR, S(':=,*()[]{}'))
+
+M._rules = {
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'variable', variable},
+ {'state', state},
+ {'function', func},
+ {'identifier', identifier},
+ {'string', string},
+ {'comment', comment},
+ {'number', number},
+ {'operator', operator},
+}
+
+M._foldsymbols = {
+ _patterns = {'[{}]', '#'},
+ [l.OPERATOR] = {['{'] = 1, ['}'] = -1},
+ [l.COMMENT] = {['#'] = l.fold_line_comments('#')}
+}
+
+return M
diff --git a/lexers/haskell.lua b/lexers/haskell.lua
new file mode 100644
index 0000000..46b5434
--- /dev/null
+++ b/lexers/haskell.lua
@@ -0,0 +1,60 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Haskell LPeg lexer.
+-- Modified by Alex Suraci.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'haskell'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local line_comment = '--' * l.nonnewline_esc^0
+local block_comment = '{-' * (l.any - '-}')^0 * P('-}')^-1
+local comment = token(l.COMMENT, line_comment + block_comment)
+
+-- Strings.
+local string = token(l.STRING, l.delimited_range('"'))
+
+-- Chars.
+local char = token(l.STRING, l.delimited_range("'", true))
+
+-- Numbers.
+local number = token(l.NUMBER, l.float + l.integer)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match{
+ 'case', 'class', 'data', 'default', 'deriving', 'do', 'else', 'if', 'import',
+ 'in', 'infix', 'infixl', 'infixr', 'instance', 'let', 'module', 'newtype',
+ 'of', 'then', 'type', 'where', '_', 'as', 'qualified', 'hiding'
+})
+
+-- Identifiers.
+local word = (l.alnum + S("._'#"))^0
+local identifier = token(l.IDENTIFIER, (l.alpha + '_') * word)
+
+-- Operators.
+local op = l.punct - S('()[]{}')
+local operator = token(l.OPERATOR, op)
+
+-- Types & type constructors.
+local constructor = token(l.TYPE, (l.upper * word) + (P(":") * (op^1 - P(":"))))
+
+M._rules = {
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'type', constructor},
+ {'identifier', identifier},
+ {'string', string},
+ {'char', char},
+ {'comment', comment},
+ {'number', number},
+ {'operator', operator},
+}
+
+M._FOLDBYINDENTATION = true
+
+return M
diff --git a/lexers/html.lua b/lexers/html.lua
new file mode 100644
index 0000000..6034702
--- /dev/null
+++ b/lexers/html.lua
@@ -0,0 +1,166 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- HTML LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V
+
+local M = {_NAME = 'html'}
+
+case_insensitive_tags = true
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local comment = token(l.COMMENT, '<!--' * (l.any - '-->')^0 * P('-->')^-1)
+
+-- Strings.
+local sq_str = l.delimited_range("'")
+local dq_str = l.delimited_range('"')
+local string = #S('\'"') * l.last_char_includes('=') *
+ token(l.STRING, sq_str + dq_str)
+
+-- TODO: performance is terrible on large files.
+local in_tag = P(function(input, index)
+ local before = input:sub(1, index - 1)
+ local s, e = before:find('<[^>]-$'), before:find('>[^<]-$')
+ if s and e then return s > e and index or nil end
+ if s then return index end
+ return input:find('^[^<]->', index) and index or nil
+end)
+
+-- Numbers.
+local number = #l.digit * l.last_char_includes('=') *
+ token(l.NUMBER, l.digit^1 * P('%')^-1) --* in_tag
+
+-- Elements.
+local known_element = token('element', '<' * P('/')^-1 * word_match({
+ 'a', 'abbr', 'address', 'area', 'article', 'aside', 'audio', 'b', 'base',
+ 'bdi', 'bdo', 'blockquote', 'body', 'br', 'button', 'canvas', 'caption',
+ 'cite', 'code', 'col', 'colgroup', 'content', 'data', 'datalist', 'dd',
+ 'decorator', 'del', 'details', 'dfn', 'div', 'dl', 'dt', 'element', 'em',
+ 'embed', 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'h1', 'h2',
+ 'h3', 'h4', 'h5', 'h6', 'head', 'header', 'hr', 'html', 'i', 'iframe', 'img',
+ 'input', 'ins', 'kbd', 'keygen', 'label', 'legend', 'li', 'link', 'main',
+ 'map', 'mark', 'menu', 'menuitem', 'meta', 'meter', 'nav', 'noscript',
+ 'object', 'ol', 'optgroup', 'option', 'output', 'p', 'param', 'pre',
+ 'progress', 'q', 'rp', 'rt', 'ruby', 's', 'samp', 'script', 'section',
+ 'select', 'shadow', 'small', 'source', 'spacer', 'spacer', 'span', 'strong',
+ 'style', 'sub', 'summary', 'sup', 'table', 'tbody', 'td', 'template',
+ 'textarea', 'tfoot', 'th', 'thead', 'time', 'title', 'tr', 'track', 'u', 'ul',
+ 'var', 'video', 'wbr'
+}, nil, case_insensitive_tags))
+local unknown_element = token('unknown_element', '<' * P('/')^-1 * l.word)
+local element = known_element + unknown_element
+
+-- Attributes.
+local known_attribute = token('attribute', word_match({
+ 'accept', 'accept-charset', 'accesskey', 'action', 'align', 'alt', 'async',
+ 'autocomplete', 'autofocus', 'autoplay', 'bgcolor', 'border', 'buffered',
+ 'challenge', 'charset', 'checked', 'cite', 'class', 'code', 'codebase',
+ 'color', 'cols', 'colspan', 'content', 'contenteditable', 'contextmenu',
+ 'controls', 'coords', 'data', 'data-', 'datetime', 'default', 'defer', 'dir',
+ 'dirname', 'disabled', 'download', 'draggable', 'dropzone', 'enctype', 'for',
+ 'form', 'headers', 'height', 'hidden', 'high', 'href', 'hreflang',
+ 'http-equiv', 'icon', 'id', 'ismap', 'itemprop', 'keytype', 'kind', 'label',
+ 'lang', 'language', 'list', 'loop', 'low', 'manifest', 'max', 'maxlength',
+ 'media', 'method', 'min', 'multiple', 'name', 'novalidate', 'open', 'optimum',
+ 'pattern', 'ping', 'placeholder', 'poster', 'preload', 'pubdate',
+ 'radiogroup', 'readonly', 'rel', 'required', 'reversed', 'role', 'rows',
+ 'rowspan', 'sandbox', 'spellcheck', 'scope', 'scoped', 'seamless', 'selected',
+ 'shape', 'size', 'sizes', 'span', 'src', 'srcdoc', 'srclang', 'start',
+ 'step', 'style', 'summary', 'tabindex', 'target', 'title', 'type', 'usemap',
+ 'value', 'width', 'wrap'
+}, '-', case_insensitive_tags) + ((P('data-') + 'aria-') * (l.alnum + '-')^1))
+local unknown_attribute = token('unknown_attribute', l.word)
+local attribute = (known_attribute + unknown_attribute) * #(l.space^0 * '=')
+
+-- Closing tags.
+local tag_close = token('element', P('/')^-1 * '>')
+
+-- Equals.
+local equals = token(l.OPERATOR, '=') --* in_tag
+
+-- Entities.
+local entity = token('entity', '&' * (l.any - l.space - ';')^1 * ';')
+
+-- Doctype.
+local doctype = token('doctype', '<!' *
+ word_match({'doctype'}, nil, case_insensitive_tags) *
+ (l.any - '>')^1 * '>')
+
+M._rules = {
+ {'whitespace', ws},
+ {'comment', comment},
+ {'doctype', doctype},
+ {'element', element},
+ {'tag_close', tag_close},
+ {'attribute', attribute},
+-- {'equals', equals},
+ {'string', string},
+ {'number', number},
+ {'entity', entity},
+}
+
+M._tokenstyles = {
+ element = l.STYLE_KEYWORD,
+ unknown_element = l.STYLE_KEYWORD..',italics',
+ attribute = l.STYLE_TYPE,
+ unknown_attribute = l.STYLE_TYPE..',italics',
+ entity = l.STYLE_OPERATOR,
+ doctype = l.STYLE_COMMENT
+}
+
+-- Tags that start embedded languages.
+M.embed_start_tag = element *
+ (ws^1 * attribute * ws^0 * equals * ws^0 * string)^0 *
+ ws^0 * tag_close
+M.embed_end_tag = element * tag_close
+
+-- Embedded CSS.
+local css = l.load('css')
+local style_element = word_match({'style'}, nil, case_insensitive_tags)
+local css_start_rule = #(P('<') * style_element *
+ ('>' + P(function(input, index)
+ if input:find('^%s+type%s*=%s*(["\'])text/css%1', index) then
+ return index
+ end
+end))) * M.embed_start_tag -- <style type="text/css">
+local css_end_rule = #('</' * style_element * ws^0 * '>') *
+ M.embed_end_tag -- </style>
+l.embed_lexer(M, css, css_start_rule, css_end_rule)
+
+-- Embedded JavaScript.
+local js = l.load('javascript')
+local script_element = word_match({'script'}, nil, case_insensitive_tags)
+local js_start_rule = #(P('<') * script_element *
+ ('>' + P(function(input, index)
+ if input:find('^%s+type%s*=%s*(["\'])text/javascript%1', index) then
+ return index
+ end
+end))) * M.embed_start_tag -- <script type="text/javascript">
+local js_end_rule = #('</' * script_element * ws^0 * '>') *
+ M.embed_end_tag -- </script>
+l.embed_lexer(M, js, js_start_rule, js_end_rule)
+
+-- Embedded CoffeeScript.
+local cs = l.load('coffeescript')
+local script_element = word_match({'script'}, nil, case_insensitive_tags)
+local cs_start_rule = #(P('<') * script_element * P(function(input, index)
+ if input:find('^[^>]+type%s*=%s*(["\'])text/coffeescript%1', index) then
+ return index
+ end
+end)) * M.embed_start_tag -- <script type="text/coffeescript">
+local cs_end_rule = #('</' * script_element * ws^0 * '>') *
+ M.embed_end_tag -- </script>
+l.embed_lexer(M, cs, cs_start_rule, cs_end_rule)
+
+M._foldsymbols = {
+ _patterns = {'</?', '/>', '<!%-%-', '%-%->'},
+ element = {['<'] = 1, ['/>'] = -1, ['</'] = -1},
+ unknown_element = {['<'] = 1, ['/>'] = -1, ['</'] = -1},
+ [l.COMMENT] = {['<!--'] = 1, ['-->'] = -1}
+}
+
+return M
diff --git a/lexers/idl.lua b/lexers/idl.lua
new file mode 100644
index 0000000..bc5cbff
--- /dev/null
+++ b/lexers/idl.lua
@@ -0,0 +1,68 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- IDL LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'idl'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local line_comment = '//' * l.nonnewline_esc^0
+local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1
+local comment = token(l.COMMENT, line_comment + block_comment)
+
+-- Strings.
+local sq_str = l.delimited_range("'", true)
+local dq_str = l.delimited_range('"', true)
+local string = token(l.STRING, sq_str + dq_str)
+
+-- Numbers.
+local number = token(l.NUMBER, l.float + l.integer)
+
+-- Preprocessor.
+local preproc_word = word_match{
+ 'define', 'undef', 'ifdef', 'ifndef', 'if', 'elif', 'else', 'endif',
+ 'include', 'warning', 'pragma'
+}
+local preproc = token(l.PREPROCESSOR,
+ l.starts_line('#') * preproc_word * l.nonnewline^0)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match{
+ 'abstract', 'attribute', 'case', 'const', 'context', 'custom', 'default',
+ 'exception', 'enum', 'factory', 'FALSE', 'in', 'inout', 'interface', 'local',
+ 'module', 'native', 'oneway', 'out', 'private', 'public', 'raises',
+ 'readonly', 'struct', 'support', 'switch', 'TRUE', 'truncatable', 'typedef',
+ 'union', 'valuetype'
+})
+
+-- Types.
+local type = token(l.TYPE, word_match{
+ 'any', 'boolean', 'char', 'double', 'fixed', 'float', 'long', 'Object',
+ 'octet', 'sequence', 'short', 'string', 'unsigned', 'ValueBase', 'void',
+ 'wchar', 'wstring'
+})
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.word)
+
+-- Operators.
+local operator = token(l.OPERATOR, S('!<>=+-/*%&|^~.,:;?()[]{}'))
+
+M._rules = {
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'type', type},
+ {'identifier', identifier},
+ {'string', string},
+ {'comment', comment},
+ {'number', number},
+ {'preprocessor', preproc},
+ {'operator', operator},
+}
+
+return M
diff --git a/lexers/inform.lua b/lexers/inform.lua
new file mode 100644
index 0000000..94049fa
--- /dev/null
+++ b/lexers/inform.lua
@@ -0,0 +1,96 @@
+-- Inform LPeg lexer for Scintillua.
+-- JMS 2010-04-25.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'inform'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local comment = token(l.COMMENT, '!' * l.nonnewline^0)
+
+-- Strings.
+local sq_str = l.delimited_range("'")
+local dq_str = l.delimited_range('"')
+local string = token(l.STRING, sq_str + dq_str)
+
+-- Numbers.
+local inform_hex = '$' * l.xdigit^1
+local inform_bin = '$$' * S('01')^1
+local number = token(l.NUMBER, l.integer + inform_hex + inform_bin)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match{
+ 'Abbreviate', 'Array', 'Attribute', 'Class', 'Constant', 'Default', 'End',
+ 'Endif', 'Extend', 'Global', 'Ifdef', 'Iffalse', 'Ifndef', 'Ifnot', 'Iftrue',
+ 'Import', 'Include', 'Link', 'Lowstring', 'Message', 'Object', 'Property',
+ 'Release', 'Replace', 'Serial', 'StartDaemon', 'Statusline', 'StopDaemon',
+ 'Switches', 'Verb', 'absent', 'action', 'actor', 'add_to_scope', 'address',
+ 'additive', 'after', 'and', 'animate', 'article', 'articles', 'before',
+ 'bold', 'box', 'break', 'cant_go', 'capacity', 'char', 'class', 'child',
+ 'children', 'clothing', 'concealed', 'container', 'continue', 'creature',
+ 'daemon', 'deadflag', 'default', 'describe', 'description', 'do', 'door',
+ 'door_dir', 'door_to', 'd_to', 'd_obj', 'e_to', 'e_obj', 'each_turn',
+ 'edible', 'else', 'enterable', 'false', 'female', 'first', 'font', 'for',
+ 'found_in', 'general', 'give', 'grammar', 'has', 'hasnt', 'held', 'if', 'in',
+ 'in_to', 'in_obj', 'initial', 'inside_description', 'invent', 'jump', 'last',
+ 'life', 'light', 'list_together', 'location', 'lockable', 'locked', 'male',
+ 'move', 'moved', 'multi', 'multiexcept', 'multiheld', 'multiinside', 'n_to',
+ 'n_obj', 'ne_to', 'ne_obj', 'nw_to', 'nw_obj', 'name', 'neuter', 'new_line',
+ 'nothing', 'notin', 'noun', 'number', 'objectloop', 'ofclass', 'off', 'on',
+ 'only', 'open', 'openable', 'or', 'orders', 'out_to', 'out_obj', 'parent',
+ 'parse_name', 'player', 'plural', 'pluralname', 'print', 'print_ret',
+ 'private', 'proper', 'provides', 'random', 'react_after', 'react_before',
+ 'remove', 'replace', 'return', 'reverse', 'rfalse','roman', 'rtrue', 's_to',
+ 's_obj', 'se_to', 'se_obj', 'sw_to', 'sw_obj', 'scenery', 'scope', 'score',
+ 'scored', 'second', 'self', 'short_name', 'short_name_indef', 'sibling',
+ 'spaces', 'static', 'string', 'style', 'supporter', 'switch', 'switchable',
+ 'talkable', 'thedark', 'time_left', 'time_out', 'to', 'topic', 'transparent',
+ 'true', 'underline', 'u_to', 'u_obj', 'visited', 'w_to', 'w_obj',
+ 'when_closed', 'when_off', 'when_on', 'when_open', 'while', 'with',
+ 'with_key', 'workflag', 'worn'
+})
+
+-- Library actions.
+local action = token('action', word_match{
+ 'Answer', 'Ask', 'AskFor', 'Attack', 'Blow', 'Burn', 'Buy', 'Climb', 'Close',
+ 'Consult', 'Cut', 'Dig', 'Disrobe', 'Drink', 'Drop', 'Eat', 'Empty', 'EmptyT',
+ 'Enter', 'Examine', 'Exit', 'Fill', 'FullScore', 'GetOff', 'Give', 'Go',
+ 'GoIn', 'Insert', 'Inv', 'InvTall', 'InvWide', 'Jump', 'JumpOver', 'Kiss',
+ 'LetGo', 'Listen', 'LMode1', 'LMode2', 'LMode3', 'Lock', 'Look', 'LookUnder',
+ 'Mild', 'No', 'NotifyOff', 'NotifyOn', 'Objects', 'Open', 'Order', 'Places',
+ 'Pray', 'Pronouns', 'Pull', 'Push', 'PushDir', 'PutOn', 'Quit', 'Receive',
+ 'Remove', 'Restart', 'Restore', 'Rub', 'Save', 'Score', 'ScriptOff',
+ 'ScriptOn', 'Search', 'Set', 'SetTo', 'Show', 'Sing', 'Sleep', 'Smell',
+ 'Sorry', 'Squeeze', 'Strong', 'Swim', 'Swing', 'SwitchOff', 'SwitchOn',
+ 'Take', 'Taste', 'Tell', 'Think', 'ThrowAt', 'ThrownAt', 'Tie', 'Touch',
+ 'Transfer', 'Turn', 'Unlock', 'VagueGo', 'Verify', 'Version', 'Wake',
+ 'WakeOther', 'Wait', 'Wave', 'WaveHands', 'Wear', 'Yes'
+})
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.word)
+
+-- Operators.
+local operator = token(l.OPERATOR, S('@~=+-*/%^#=<>;:,.{}[]()&|?'))
+
+M._rules = {
+ {'whitespace', ws},
+ {'comment', comment},
+ {'string', string},
+ {'number', number},
+ {'keyword', keyword},
+ {'action', action},
+ {'identifier', identifier},
+ {'operator', operator},
+}
+
+_styles = {
+ {'action', l.STYLE_VARIABLE}
+}
+
+return M
diff --git a/lexers/ini.lua b/lexers/ini.lua
new file mode 100644
index 0000000..15ea7fa
--- /dev/null
+++ b/lexers/ini.lua
@@ -0,0 +1,52 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Ini LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'ini'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local comment = token(l.COMMENT, l.starts_line(S(';#')) * l.nonnewline^0)
+
+-- Strings.
+local sq_str = l.delimited_range("'")
+local dq_str = l.delimited_range('"')
+local label = l.delimited_range('[]', true, true)
+local string = token(l.STRING, sq_str + dq_str + label)
+
+-- Numbers.
+local dec = l.digit^1 * ('_' * l.digit^1)^0
+local oct_num = '0' * S('01234567_')^1
+local integer = S('+-')^-1 * (l.hex_num + oct_num + dec)
+local number = token(l.NUMBER, (l.float + integer))
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match{
+ 'true', 'false', 'on', 'off', 'yes', 'no'
+})
+
+-- Identifiers.
+local word = (l.alpha + '_') * (l.alnum + S('_.'))^0
+local identifier = token(l.IDENTIFIER, word)
+
+-- Operators.
+local operator = token(l.OPERATOR, '=')
+
+M._rules = {
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'identifier', identifier},
+ {'string', string},
+ {'comment', comment},
+ {'number', number},
+ {'operator', operator},
+}
+
+M._LEXBYLINE = true
+
+return M
diff --git a/lexers/io_lang.lua b/lexers/io_lang.lua
new file mode 100644
index 0000000..540628c
--- /dev/null
+++ b/lexers/io_lang.lua
@@ -0,0 +1,66 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Io LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'io_lang'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local line_comment = (P('#') + '//') * l.nonnewline^0
+local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1
+local comment = token(l.COMMENT, line_comment + block_comment)
+
+-- Strings.
+local sq_str = l.delimited_range("'")
+local dq_str = l.delimited_range('"')
+local tq_str = '"""' * (l.any - '"""')^0 * P('"""')^-1
+local string = token(l.STRING, tq_str + sq_str + dq_str)
+
+-- Numbers.
+local number = token(l.NUMBER, l.float + l.integer)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match{
+ 'block', 'method', 'while', 'foreach', 'if', 'else', 'do', 'super', 'self',
+ 'clone', 'proto', 'setSlot', 'hasSlot', 'type', 'write', 'print', 'forward'
+})
+
+-- Types.
+local type = token(l.TYPE, word_match{
+ 'Block', 'Buffer', 'CFunction', 'Date', 'Duration', 'File', 'Future', 'List',
+ 'LinkedList', 'Map', 'Nop', 'Message', 'Nil', 'Number', 'Object', 'String',
+ 'WeakLink'
+})
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.word)
+
+-- Operators.
+local operator = token(l.OPERATOR, S('`~@$%^&*-+/=\\<>?.,:;()[]{}'))
+
+M._rules = {
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'type', type},
+ {'identifier', identifier},
+ {'string', string},
+ {'comment', comment},
+ {'number', number},
+ {'operator', operator},
+}
+
+M._foldsymbols = {
+ _patterns = {'[%(%)]', '/%*', '%*/', '#', '//'},
+ [l.OPERATOR] = {['('] = 1, [')'] = -1},
+ [l.COMMENT] = {
+ ['/*'] = 1, ['*/'] = -1, ['#'] = l.fold_line_comments('#'),
+ ['//'] = l.fold_line_comments('//')
+ }
+}
+
+return M
diff --git a/lexers/java.lua b/lexers/java.lua
new file mode 100644
index 0000000..a94ae6d
--- /dev/null
+++ b/lexers/java.lua
@@ -0,0 +1,86 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Java LPeg lexer.
+-- Modified by Brian Schott.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'java'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local line_comment = '//' * l.nonnewline_esc^0
+local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1
+local comment = token(l.COMMENT, line_comment + block_comment)
+
+-- Strings.
+local sq_str = l.delimited_range("'", true)
+local dq_str = l.delimited_range('"', true)
+local string = token(l.STRING, sq_str + dq_str)
+
+-- Numbers.
+local number = token(l.NUMBER, (l.float + l.integer) * S('LlFfDd')^-1)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match{
+ 'abstract', 'assert', 'break', 'case', 'catch', 'class', 'const', 'continue',
+ 'default', 'do', 'else', 'enum', 'extends', 'final', 'finally', 'for', 'goto',
+ 'if', 'implements', 'import', 'instanceof', 'interface', 'native', 'new',
+ 'package', 'private', 'protected', 'public', 'return', 'static', 'strictfp',
+ 'super', 'switch', 'synchronized', 'this', 'throw', 'throws', 'transient',
+ 'try', 'while', 'volatile',
+ -- Literals.
+ 'true', 'false', 'null'
+})
+
+-- Types.
+local type = token(l.TYPE, word_match{
+ 'boolean', 'byte', 'char', 'double', 'float', 'int', 'long', 'short', 'void',
+ 'Boolean', 'Byte', 'Character', 'Double', 'Float', 'Integer', 'Long', 'Short',
+ 'String'
+})
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.word)
+
+-- Operators.
+local operator = token(l.OPERATOR, S('+-/*%<>!=^&|?~:;.()[]{}'))
+
+-- Annotations.
+local annotation = token('annotation', '@' * l.word)
+
+-- Functions.
+local func = token(l.FUNCTION, l.word) * #P('(')
+
+-- Classes.
+local class_sequence = token(l.KEYWORD, P('class')) * ws^1 *
+ token(l.CLASS, l.word)
+
+M._rules = {
+ {'whitespace', ws},
+ {'class', class_sequence},
+ {'keyword', keyword},
+ {'type', type},
+ {'function', func},
+ {'identifier', identifier},
+ {'string', string},
+ {'comment', comment},
+ {'number', number},
+ {'annotation', annotation},
+ {'operator', operator},
+}
+
+M._tokenstyles = {
+ annotation = l.STYLE_PREPROCESSOR
+}
+
+M._foldsymbols = {
+ _patterns = {'[{}]', '/%*', '%*/', '//'},
+ [l.OPERATOR] = {['{'] = 1, ['}'] = -1},
+ [l.COMMENT] = {['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//')}
+}
+
+return M
diff --git a/lexers/javascript.lua b/lexers/javascript.lua
new file mode 100644
index 0000000..a9f469b
--- /dev/null
+++ b/lexers/javascript.lua
@@ -0,0 +1,62 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- JavaScript LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'javascript'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local line_comment = '//' * l.nonnewline_esc^0
+local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1
+local comment = token(l.COMMENT, line_comment + block_comment)
+
+-- Strings.
+local sq_str = l.delimited_range("'")
+local dq_str = l.delimited_range('"')
+local regex_str = #P('/') * l.last_char_includes('+-*%^!=&|?:;,([{<>') *
+ l.delimited_range('/', true) * S('igm')^0
+local string = token(l.STRING, sq_str + dq_str) + token(l.REGEX, regex_str)
+
+-- Numbers.
+local number = token(l.NUMBER, l.float + l.integer)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match{
+ 'abstract', 'boolean', 'break', 'byte', 'case', 'catch', 'char', 'class',
+ 'const', 'continue', 'debugger', 'default', 'delete', 'do', 'double', 'else',
+ 'enum', 'export', 'extends', 'false', 'final', 'finally', 'float', 'for',
+ 'function', 'goto', 'if', 'implements', 'import', 'in', 'instanceof', 'int',
+ 'interface', 'let', 'long', 'native', 'new', 'null', 'package', 'private',
+ 'protected', 'public', 'return', 'short', 'static', 'super', 'switch',
+ 'synchronized', 'this', 'throw', 'throws', 'transient', 'true', 'try',
+ 'typeof', 'var', 'void', 'volatile', 'while', 'with', 'yield'
+})
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.word)
+
+-- Operators.
+local operator = token(l.OPERATOR, S('+-/*%^!=&|?:;,.()[]{}<>'))
+
+M._rules = {
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'identifier', identifier},
+ {'comment', comment},
+ {'number', number},
+ {'string', string},
+ {'operator', operator},
+}
+
+M._foldsymbols = {
+ _patterns = {'[{}]', '/%*', '%*/', '//'},
+ [l.OPERATOR] = {['{'] = 1, ['}'] = -1},
+ [l.COMMENT] = {['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//')}
+}
+
+return M
diff --git a/lexers/json.lua b/lexers/json.lua
new file mode 100644
index 0000000..6dcebec
--- /dev/null
+++ b/lexers/json.lua
@@ -0,0 +1,47 @@
+-- Copyright 2006-2013 Brian "Sir Alaran" Schott. See LICENSE.
+-- JSON LPeg lexer.
+-- Based off of lexer code by Mitchell.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'json'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local comment = token(l.COMMENT, '/*' * (l.any - '*/')^0 * P('*/')^-1)
+
+-- Strings.
+local sq_str = P('u')^-1 * l.delimited_range("'", true)
+local dq_str = P('U')^-1 * l.delimited_range('"', true)
+local string = token(l.STRING, sq_str + dq_str)
+
+-- Numbers.
+local integer = S('+-')^-1 * l.digit^1 * S('Ll')^-1
+local number = token(l.NUMBER, l.float + integer)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match{"true", "false", "null"})
+
+-- Operators.
+local operator = token(l.OPERATOR, S('[]{}:,'))
+
+M._rules = {
+ {'whitespace', ws},
+ {'comment', comment},
+ {'string', string},
+ {'number', number},
+ {'keyword', keyword},
+ {'operator', operator},
+}
+
+M._foldsymbols = {
+ _patterns = {'[%[%]{}]', '/%*', '%*/'},
+ [l.OPERATOR] = {['['] = 1, [']'] = -1, ['{'] = 1, ['}'] = -1},
+ [l.COMMENT] = {['/*'] = 1, ['*/'] = -1}
+}
+
+return M
diff --git a/lexers/jsp.lua b/lexers/jsp.lua
new file mode 100644
index 0000000..946fe7e
--- /dev/null
+++ b/lexers/jsp.lua
@@ -0,0 +1,29 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- JSP LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'jsp'}
+
+-- Embedded in HTML.
+local html = l.load('html')
+
+-- Embedded Java.
+local java = l.load('java')
+local java_start_rule = token('jsp_tag', '<%' * P('=')^-1)
+local java_end_rule = token('jsp_tag', '%>')
+l.embed_lexer(html, java, java_start_rule, java_end_rule, true)
+
+M._tokenstyles = {
+ jsp_tag = l.STYLE_EMBEDDED
+}
+
+local _foldsymbols = html._foldsymbols
+_foldsymbols._patterns[#_foldsymbols._patterns + 1] = '<%%'
+_foldsymbols._patterns[#_foldsymbols._patterns + 1] = '%%>'
+_foldsymbols.jsp_tag = {['<%'] = 1, ['%>'] = -1}
+M._foldsymbols = _foldsymbols
+
+return M
diff --git a/lexers/latex.lua b/lexers/latex.lua
new file mode 100644
index 0000000..e3eaa27
--- /dev/null
+++ b/lexers/latex.lua
@@ -0,0 +1,73 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Latex LPeg lexer.
+-- Modified by Brian Schott.
+-- Modified by Robert Gieseke.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'latex'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local line_comment = '%' * l.nonnewline^0
+local block_comment = '\\begin' * P(' ')^0 * '{comment}' *
+ (l.any - '\\end' * P(' ')^0 * '{comment}')^0 *
+ P('\\end' * P(' ')^0 * '{comment}')^-1
+-- Note: need block_comment before line_comment or LPeg cannot compile rule.
+local comment = token(l.COMMENT, block_comment + line_comment)
+
+-- Sections.
+local section = token('section', '\\' * word_match{
+ 'part', 'chapter', 'section', 'subsection', 'subsubsection', 'paragraph',
+ 'subparagraph'
+} * P('*')^-1)
+
+-- Math environments.
+local math_word = word_match{
+ 'align', 'displaymath', 'eqnarray', 'equation', 'gather', 'math', 'multline'
+}
+local math_begin_end = (P('begin') + P('end')) * P(' ')^0 *
+ '{' * math_word * P('*')^-1 * '}'
+local math = token('math', '$' + '\\' * (S('[]()') + math_begin_end))
+
+-- LaTeX environments.
+local environment = token('environment', '\\' * (P('begin') + P('end')) *
+ P(' ')^0 *
+ '{' * l.word * P('*')^-1 * '}')
+
+-- Commands.
+local command = token(l.KEYWORD, '\\' * (l.alpha^1 + S('#$&~_^%{}')))
+
+-- Operators.
+local operator = token(l.OPERATOR, S('&#{}[]'))
+
+M._rules = {
+ {'whitespace', ws},
+ {'comment', comment},
+ {'math', math},
+ {'environment', environment},
+ {'section', section},
+ {'keyword', command},
+ {'operator', operator},
+}
+
+M._tokenstyles = {
+ environment = l.STYLE_KEYWORD,
+ math = l.STYLE_FUNCTION,
+ section = l.STYLE_CLASS
+}
+
+M._foldsymbols = {
+ _patterns = {'\\[a-z]+', '[{}]', '%%'},
+ [l.COMMENT] = {
+ ['\\begin'] = 1, ['\\end'] = -1, ['%'] = l.fold_line_comments('%')
+ },
+ ['environment'] = {['\\begin'] = 1, ['\\end'] = -1},
+ [l.OPERATOR] = {['{'] = 1, ['}'] = -1}
+}
+
+return M
diff --git a/lexers/less.lua b/lexers/less.lua
new file mode 100644
index 0000000..2ca3c38
--- /dev/null
+++ b/lexers/less.lua
@@ -0,0 +1,27 @@
+-- Copyright 2006-2013 Robert Gieseke. See LICENSE.
+-- Less CSS LPeg lexer.
+-- http://lesscss.org
+
+local l = require('lexer')
+local token = l.token
+local S = lpeg.S
+
+local M = {_NAME = 'less'}
+
+-- Line comments.
+local line_comment = token(l.COMMENT, '//' * l.nonnewline^0)
+
+-- Variables.
+local variable = token(l.VARIABLE, '@' * (l.alnum + S('_-{}'))^1)
+
+local css = l.load('css')
+local _rules = css._rules
+table.insert(_rules, #_rules - 1, {'line_comment', line_comment})
+table.insert(_rules, #_rules - 1, {'variable', variable})
+M._rules = _rules
+
+M._tokenstyles = css._tokenstyles
+
+M._foldsymbols = css._foldsymbols
+
+return M
diff --git a/lexers/lexer.lua b/lexers/lexer.lua
new file mode 100644
index 0000000..0dc9674
--- /dev/null
+++ b/lexers/lexer.lua
@@ -0,0 +1,1587 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+
+local M = {}
+
+--[=[ This comment is for LuaDoc.
+---
+-- Lexes Scintilla documents with Lua and LPeg.
+--
+-- ## Overview
+--
+-- Lexers highlight the syntax of source code. Scintilla (the editing component
+-- behind [Textadept][] and [SciTE][]) traditionally uses static, compiled C++
+-- lexers which are notoriously difficult to create and/or extend. On the other
+-- hand, Lua makes it easy to to rapidly create new lexers, extend existing
+-- ones, and embed lexers within one another. Lua lexers tend to be more
+-- readable than C++ lexers too.
+--
+-- Lexers are Parsing Expression Grammars, or PEGs, composed with the Lua
+-- [LPeg library][]. The following table comes from the LPeg documentation and
+-- summarizes all you need to know about constructing basic LPeg patterns. This
+-- module provides convenience functions for creating and working with other
+-- more advanced patterns and concepts.
+--
+-- Operator | Description
+-- ---------------------|------------
+-- `lpeg.P(string)` | Matches `string` literally.
+-- `lpeg.P(`_`n`_`)` | Matches exactly _`n`_ characters.
+-- `lpeg.S(string)` | Matches any character in set `string`.
+-- `lpeg.R("`_`xy`_`")` | Matches any character between range `x` and `y`.
+-- `patt^`_`n`_ | Matches at least _`n`_ repetitions of `patt`.
+-- `patt^-`_`n`_ | Matches at most _`n`_ repetitions of `patt`.
+-- `patt1 * patt2` | Matches `patt1` followed by `patt2`.
+-- `patt1 + patt2` | Matches `patt1` or `patt2` (ordered choice).
+-- `patt1 - patt2` | Matches `patt1` if `patt2` does not match.
+-- `-patt` | Equivalent to `("" - patt)`.
+-- `#patt` | Matches `patt` but consumes no input.
+--
+-- The first part of this document deals with rapidly constructing a simple
+-- lexer. The next part deals with more advanced techniques, such as custom
+-- coloring and embedding lexers within one another. Following that is a
+-- discussion about code folding, or being able to tell Scintilla which code
+-- blocks are "foldable" (temporarily hideable from view). After that are
+-- instructions on how to use LPeg lexers with the aforementioned Textadept and
+-- SciTE editors. Finally there are comments on lexer performance and
+-- limitations.
+--
+-- [LPeg library]: http://www.inf.puc-rio.br/~roberto/lpeg/lpeg.html
+-- [Textadept]: http://foicica.com/textadept
+-- [SciTE]: http://scintilla.org/SciTE.html
+--
+-- ## Lexer Basics
+--
+-- The *lexers/* directory contains all lexers, including your new one. Before
+-- attempting to write one from scratch though, first determine if your
+-- programming language is similar to any of the 80+ languages supported. If so,
+-- you may be able to copy and modify that lexer, saving some time and effort.
+-- The filename of your lexer should be the name of your programming language in
+-- lower case followed by a *.lua* extension. For example, a new Lua lexer has
+-- the name *lua.lua*.
+--
+-- Note: Try to refrain from using one-character language names like "b", "c",
+-- or "d". For example, Scintillua uses "b_lang", "cpp", and "dmd",
+-- respectively.
+--
+-- ### New Lexer Template
+--
+-- There is a *lexers/template.txt* file that contains a simple template for a
+-- new lexer. Feel free to use it, replacing the '?'s with the name of your
+-- lexer:
+--
+-- -- ? LPeg lexer.
+--
+-- local l = require('lexer')
+-- local token, word_match = l.token, l.word_match
+-- local P, R, S = lpeg.P, lpeg.R, lpeg.S
+--
+-- local M = {_NAME = '?'}
+--
+-- -- Whitespace.
+-- local ws = token(l.WHITESPACE, l.space^1)
+--
+-- M._rules = {
+-- {'whitespace', ws},
+-- }
+--
+-- M._tokenstyles = {
+--
+-- }
+--
+-- return M
+--
+-- The first 4 lines of code simply define often used convenience variables. The
+-- 5th and last lines define and return the lexer object Scintilla uses; they
+-- are very important and must be part of every lexer. The sixth line defines
+-- something called a "token", an essential building block of lexers. You will
+-- learn about tokens shortly. The rest of the code defines a set of grammar
+-- rules and token styles. You will learn about those later. Note, however, the
+-- `M.` prefix in front of `_rules` and `_tokenstyles`: not only do these tables
+-- belong to their respective lexers, but any non-local variables need the `M.`
+-- prefix too so-as not to affect Lua's global environment. All in all, this is
+-- a minimal, working lexer that you can build on.
+--
+-- ### Tokens
+--
+-- Take a moment to think about your programming language's structure. What kind
+-- of key elements does it have? In the template shown earlier, one predefined
+-- element all languages have is whitespace. Your language probably also has
+-- elements like comments, strings, and keywords. Lexers refer to these elements
+-- as "tokens". Tokens are the fundamental "building blocks" of lexers. Lexers
+-- break down source code into tokens for coloring, which results in the syntax
+-- highlighting familiar to you. It is up to you how specific your lexer is when
+-- it comes to tokens. Perhaps only distinguishing between keywords and
+-- identifiers is necessary, or maybe recognizing constants and built-in
+-- functions, methods, or libraries is desirable. The Lua lexer, for example,
+-- defines 11 tokens: whitespace, comments, strings, numbers, keywords, built-in
+-- functions, constants, built-in libraries, identifiers, labels, and operators.
+-- Even though constants, built-in functions, and built-in libraries are subsets
+-- of identifiers, Lua programmers find it helpful for the lexer to distinguish
+-- between them all. It is perfectly acceptable to just recognize keywords and
+-- identifiers.
+--
+-- In a lexer, tokens consist of a token name and an LPeg pattern that matches a
+-- sequence of characters recognized as an instance of that token. Create tokens
+-- using the [`lexer.token()`]() function. Let us examine the "whitespace" token
+-- defined in the template shown earlier:
+--
+-- local ws = token(l.WHITESPACE, l.space^1)
+--
+-- At first glance, the first argument does not appear to be a string name and
+-- the second argument does not appear to be an LPeg pattern. Perhaps you
+-- expected something like:
+--
+-- local ws = token('whitespace', S('\t\v\f\n\r ')^1)
+--
+-- The `lexer` (`l`) module actually provides a convenient list of common token
+-- names and common LPeg patterns for you to use. Token names include
+-- [`lexer.DEFAULT`](), [`lexer.WHITESPACE`](), [`lexer.COMMENT`](),
+-- [`lexer.STRING`](), [`lexer.NUMBER`](), [`lexer.KEYWORD`](),
+-- [`lexer.IDENTIFIER`](), [`lexer.OPERATOR`](), [`lexer.ERROR`](),
+-- [`lexer.PREPROCESSOR`](), [`lexer.CONSTANT`](), [`lexer.VARIABLE`](),
+-- [`lexer.FUNCTION`](), [`lexer.CLASS`](), [`lexer.TYPE`](), [`lexer.LABEL`](),
+-- [`lexer.REGEX`](), and [`lexer.EMBEDDED`](). Patterns include
+-- [`lexer.any`](), [`lexer.ascii`](), [`lexer.extend`](), [`lexer.alpha`](),
+-- [`lexer.digit`](), [`lexer.alnum`](), [`lexer.lower`](), [`lexer.upper`](),
+-- [`lexer.xdigit`](), [`lexer.cntrl`](), [`lexer.graph`](), [`lexer.print`](),
+-- [`lexer.punct`](), [`lexer.space`](), [`lexer.newline`](),
+-- [`lexer.nonnewline`](), [`lexer.nonnewline_esc`](), [`lexer.dec_num`](),
+-- [`lexer.hex_num`](), [`lexer.oct_num`](), [`lexer.integer`](),
+-- [`lexer.float`](), and [`lexer.word`](). You may use your own token names if
+-- none of the above fit your language, but an advantage to using predefined
+-- token names is that your lexer's tokens will inherit the universal syntax
+-- highlighting color theme used by your text editor.
+--
+-- #### Example Tokens
+--
+-- So, how might you define other tokens like comments, strings, and keywords?
+-- Here are some examples.
+--
+-- **Comments**
+--
+-- Line-style comments with a prefix character(s) are easy to express with LPeg:
+--
+-- local shell_comment = token(l.COMMENT, '#' * l.nonnewline^0)
+-- local c_line_comment = token(l.COMMENT, '//' * l.nonnewline_esc^0)
+--
+-- The comments above start with a '#' or "//" and go to the end of the line.
+-- The second comment recognizes the next line also as a comment if the current
+-- line ends with a '\' escape character.
+--
+-- C-style "block" comments with a start and end delimiter are also easy to
+-- express:
+--
+-- local c_comment = token(l.COMMENT, '/*' * (l.any - '*/')^0 * P('*/')^-1)
+--
+-- This comment starts with a "/\*" sequence and contains anything up to and
+-- including an ending "\*/" sequence. The ending "\*/" is optional so the lexer
+-- can recognize unfinished comments as comments and highlight them properly.
+--
+-- **Strings**
+--
+-- It is tempting to think that a string is not much different from the block
+-- comment shown above in that both have start and end delimiters:
+--
+-- local dq_str = '"' * (l.any - '"')^0 * P('"')^-1
+-- local sq_str = "'" * (l.any - "'")^0 * P("'")^-1
+-- local simple_string = token(l.STRING, dq_str + sq_str)
+--
+-- However, most programming languages allow escape sequences in strings such
+-- that a sequence like "\\&quot;" in a double-quoted string indicates that the
+-- '&quot;' is not the end of the string. The above token incorrectly matches
+-- such a string. Instead, use the [`lexer.delimited_range()`]() convenience
+-- function.
+--
+-- local dq_str = l.delimited_range('"')
+-- local sq_str = l.delimited_range("'")
+-- local string = token(l.STRING, dq_str + sq_str)
+--
+-- In this case, the lexer treats '\' as an escape character in a string
+-- sequence.
+--
+-- **Keywords**
+--
+-- Instead of matching _n_ keywords with _n_ `P('keyword_`_`n`_`')` ordered
+-- choices, use another convenience function: [`lexer.word_match()`](). It is
+-- much easier and more efficient to write word matches like:
+--
+-- local keyword = token(l.KEYWORD, l.word_match{
+-- 'keyword_1', 'keyword_2', ..., 'keyword_n'
+-- })
+--
+-- local case_insensitive_keyword = token(l.KEYWORD, l.word_match({
+-- 'KEYWORD_1', 'keyword_2', ..., 'KEYword_n'
+-- }, nil, true))
+--
+-- local hyphened_keyword = token(l.KEYWORD, l.word_match({
+-- 'keyword-1', 'keyword-2', ..., 'keyword-n'
+-- }, '-'))
+--
+-- By default, characters considered to be in keywords are in the set of
+-- alphanumeric characters and underscores. The last token demonstrates how to
+-- allow '-' (hyphen) characters to be in keywords as well.
+--
+-- **Numbers**
+--
+-- Most programming languages have the same format for integer and float tokens,
+-- so it might be as simple as using a couple of predefined LPeg patterns:
+--
+-- local number = token(l.NUMBER, l.float + l.integer)
+--
+-- However, some languages allow postfix characters on integers.
+--
+-- local integer = P('-')^-1 * (l.dec_num * S('lL')^-1)
+-- local number = token(l.NUMBER, l.float + l.hex_num + integer)
+--
+-- Your language may need other tweaks, but it is up to you how fine-grained you
+-- want your highlighting to be. After all, you are not writing a compiler or
+-- interpreter!
+--
+-- ### Rules
+--
+-- Programming languages have grammars, which specify valid token structure. For
+-- example, comments usually cannot appear within a string. Grammars consist of
+-- rules, which are simply combinations of tokens. Recall from the lexer
+-- template the `_rules` table, which defines all the rules used by the lexer
+-- grammar:
+--
+-- M._rules = {
+-- {'whitespace', ws},
+-- }
+--
+-- Each entry in a lexer's `_rules` table consists of a rule name and its
+-- associated pattern. Rule names are completely arbitrary and serve only to
+-- identify and distinguish between different rules. Rule order is important: if
+-- text does not match the first rule, the lexer tries the second rule, and so
+-- on. This simple grammar says to match whitespace tokens under a rule named
+-- "whitespace".
+--
+-- To illustrate the importance of rule order, here is an example of a
+-- simplified Lua grammar:
+--
+-- M._rules = {
+-- {'whitespace', ws},
+-- {'keyword', keyword},
+-- {'identifier', identifier},
+-- {'string', string},
+-- {'comment', comment},
+-- {'number', number},
+-- {'label', label},
+-- {'operator', operator},
+-- }
+--
+-- Note how identifiers come after keywords. In Lua, as with most programming
+-- languages, the characters allowed in keywords and identifiers are in the same
+-- set (alphanumerics plus underscores). If the lexer specified the "identifier"
+-- rule before the "keyword" rule, all keywords would match identifiers and thus
+-- incorrectly highlight as identifiers instead of keywords. The same idea
+-- applies to function, constant, etc. tokens that you may want to distinguish
+-- between: their rules should come before identifiers.
+--
+-- So what about text that does not match any rules? For example in Lua, the '!'
+-- character is meaningless outside a string or comment. Normally the lexer
+-- skips over such text. If instead you want to highlight these "syntax errors",
+-- add an additional end rule:
+--
+-- M._rules = {
+-- {'whitespace', ws},
+-- {'error', token(l.ERROR, l.any)},
+-- }
+--
+-- This identifies and highlights any character not matched by an existing
+-- rule as an `lexer.ERROR` token.
+--
+-- Even though the rules defined in the examples above contain a single token,
+-- rules may consist of multiple tokens. For example, a rule for an HTML tag
+-- could consist of a tag token followed by an arbitrary number of attribute
+-- tokens, allowing the lexer to highlight all tokens separately. The rule might
+-- look something like this:
+--
+-- {'tag', tag_start * (ws * attributes)^0 * tag_end^-1}
+--
+-- Note however that lexers with complex rules like these are more prone to lose
+-- track of their state.
+--
+-- ### Summary
+--
+-- Lexers primarily consist of tokens and grammar rules. At your disposal are a
+-- number of convenience patterns and functions for rapidly creating a lexer. If
+-- you choose to use predefined token names for your tokens, you do not have to
+-- define how the lexer highlights them. The tokens will inherit the default
+-- syntax highlighting color theme your editor uses.
+--
+-- ## Advanced Techniques
+--
+-- ### Styles and Styling
+--
+-- The most basic form of syntax highlighting is assigning different colors to
+-- different tokens. Instead of highlighting with just colors, Scintilla allows
+-- for more rich highlighting, or "styling", with different fonts, font sizes,
+-- font attributes, and foreground and background colors, just to name a few.
+-- The unit of this rich highlighting is called a "style". Styles are simply
+-- strings of comma-separated property settings. By default, lexers associate
+-- predefined token names like `lexer.WHITESPACE`, `lexer.COMMENT`,
+-- `lexer.STRING`, etc. with particular styles as part of a universal color
+-- theme. These predefined styles include [`lexer.STYLE_CLASS`](),
+-- [`lexer.STYLE_COMMENT`](), [`lexer.STYLE_CONSTANT`](),
+-- [`lexer.STYLE_ERROR`](), [`lexer.STYLE_EMBEDDED`](),
+-- [`lexer.STYLE_FUNCTION`](), [`lexer.STYLE_IDENTIFIER`](),
+-- [`lexer.STYLE_KEYWORD`](), [`lexer.STYLE_LABEL`](), [`lexer.STYLE_NUMBER`](),
+-- [`lexer.STYLE_OPERATOR`](), [`lexer.STYLE_PREPROCESSOR`](),
+-- [`lexer.STYLE_REGEX`](), [`lexer.STYLE_STRING`](), [`lexer.STYLE_TYPE`](),
+-- [`lexer.STYLE_VARIABLE`](), and [`lexer.STYLE_WHITESPACE`](). Like with
+-- predefined token names and LPeg patterns, you may define your own styles. At
+-- their core, styles are just strings, so you may create new ones and/or modify
+-- existing ones. Each style consists of the following comma-separated settings:
+--
+-- Setting | Description
+-- ---------------|------------
+-- font:_name_ | The name of the font the style uses.
+-- size:_int_ | The size of the font the style uses.
+-- [not]bold | Whether or not the font face is bold.
+-- [not]italics | Whether or not the font face is italic.
+-- [not]underlined| Whether or not the font face is underlined.
+-- fore:_color_ | The foreground color of the font face.
+-- back:_color_ | The background color of the font face.
+-- [not]eolfilled | Does the background color extend to the end of the line?
+-- case:_char_ | The case of the font ('u': upper, 'l': lower, 'm': normal).
+-- [not]visible | Whether or not the text is visible.
+-- [not]changeable| Whether the text is changeable or read-only.
+--
+-- Specify font colors in either "#RRGGBB" format, "0xBBGGRR" format, or the
+-- decimal equivalent of the latter. As with token names, LPeg patterns, and
+-- styles, there is a set of predefined color names, but they vary depending on
+-- the current color theme in use. Therefore, it is generally not a good idea to
+-- manually define colors within styles in your lexer since they might not fit
+-- into a user's chosen color theme. Try to refrain from even using predefined
+-- colors in a style because that color may be theme-specific. Instead, the best
+-- practice is to either use predefined styles or derive new color-agnostic
+-- styles from predefined ones. For example, Lua "longstring" tokens use the
+-- existing `lexer.STYLE_STRING` style instead of defining a new one.
+--
+-- #### Example Styles
+--
+-- Defining styles is pretty straightforward. An empty style that inherits the
+-- default theme settings is simply an empty string:
+--
+-- local style_nothing = ''
+--
+-- A similar style but with a bold font face looks like this:
+--
+-- local style_bold = 'bold'
+--
+-- If you want the same style, but also with an italic font face, define the new
+-- style in terms of the old one:
+--
+-- local style_bold_italic = style_bold..',italics'
+--
+-- This allows you to derive new styles from predefined ones without having to
+-- rewrite them. This operation leaves the old style unchanged. Thus if you
+-- had a "static variable" token whose style you wanted to base off of
+-- `lexer.STYLE_VARIABLE`, it would probably look like:
+--
+-- local style_static_var = l.STYLE_VARIABLE..',italics'
+--
+-- The color theme files in the *lexers/themes/* folder give more examples of
+-- style definitions.
+--
+-- ### Token Styles
+--
+-- Lexers use the `_tokenstyles` table to assign tokens to particular styles.
+-- Recall the token definition and `_tokenstyles` table from the lexer template:
+--
+-- local ws = token(l.WHITESPACE, l.space^1)
+--
+-- ...
+--
+-- M._tokenstyles = {
+--
+-- }
+--
+-- Why is a style not assigned to the `lexer.WHITESPACE` token? As mentioned
+-- earlier, lexers automatically associate tokens that use predefined token
+-- names with a particular style. Only tokens with custom token names need
+-- manual style associations. As an example, consider a custom whitespace token:
+--
+-- local ws = token('custom_whitespace', l.space^1)
+--
+-- Assigning a style to this token looks like:
+--
+-- M._tokenstyles = {
+-- custom_whitespace = l.STYLE_WHITESPACE
+-- }
+--
+-- Do not confuse token names with rule names. They are completely different
+-- entities. In the example above, the lexer assigns the "custom_whitespace"
+-- token the existing style for `WHITESPACE` tokens. If instead you want to
+-- color the background of whitespace a shade of grey, it might look like:
+--
+-- local custom_style = l.STYLE_WHITESPACE..',back:$(color.grey)'
+-- M._tokenstyles = {
+-- custom_whitespace = custom_style
+-- }
+--
+-- Notice that the lexer peforms Scintilla/SciTE-style "$()" property expansion.
+-- You may also use "%()". Remember to refrain from assigning specific colors in
+-- styles, but in this case, all user color themes probably define the
+-- "color.grey" property.
+--
+-- ### Line Lexers
+--
+-- By default, lexers match the arbitrary chunks of text passed to them by
+-- Scintilla. These chunks may be a full document, only the visible part of a
+-- document, or even just portions of lines. Some lexers need to match whole
+-- lines. For example, a lexer for the output of a file "diff" needs to know if
+-- the line started with a '+' or '-' and then style the entire line
+-- accordingly. To indicate that your lexer matches by line, use the
+-- `_LEXBYLINE` field:
+--
+-- M._LEXBYLINE = true
+--
+-- Now the input text for the lexer is a single line at a time. Keep in mind
+-- that line lexers do not have the ability to look ahead at subsequent lines.
+--
+-- ### Embedded Lexers
+--
+-- Lexers embed within one another very easily, requiring minimal effort. In the
+-- following sections, the lexer being embedded is called the "child" lexer and
+-- the lexer a child is being embedded in is called the "parent". For example,
+-- consider an HTML lexer and a CSS lexer. Either lexer stands alone for styling
+-- their respective HTML and CSS files. However, CSS can be embedded inside
+-- HTML. In this specific case, the CSS lexer is the "child" lexer with the HTML
+-- lexer being the "parent". Now consider an HTML lexer and a PHP lexer. This
+-- sounds a lot like the case with CSS, but there is a subtle difference: PHP
+-- _embeds itself_ into HTML while CSS is _embedded in_ HTML. This fundamental
+-- difference results in two types of embedded lexers: a parent lexer that
+-- embeds other child lexers in it (like HTML embedding CSS), and a child lexer
+-- that embeds itself within a parent lexer (like PHP embedding itself in HTML).
+--
+-- #### Parent Lexer
+--
+-- Before embedding a child lexer into a parent lexer, the parent lexer needs to
+-- load the child lexer. This is done with the [`lexer.load()`]() function. For
+-- example, loading the CSS lexer within the HTML lexer looks like:
+--
+-- local css = l.load('css')
+--
+-- The next part of the embedding process is telling the parent lexer when to
+-- switch over to the child lexer and when to switch back. The lexer refers to
+-- these indications as the "start rule" and "end rule", respectively, and are
+-- just LPeg patterns. Continuing with the HTML/CSS example, the transition from
+-- HTML to CSS is when the lexer encounters a "style" tag with a "type"
+-- attribute whose value is "text/css":
+--
+-- local css_tag = P('<style') * P(function(input, index)
+-- if input:find('^[^>]+type="text/css"', index) then
+-- return index
+-- end
+-- end)
+--
+-- This pattern looks for the beginning of a "style" tag and searches its
+-- attribute list for the text "`type="text/css"`". (In this simplified example,
+-- the Lua pattern does not consider whitespace between the '=' nor does it
+-- consider that using single quotes is valid.) If there is a match, the
+-- functional pattern returns a value instead of `nil`. In this case, the value
+-- returned does not matter because we ultimately want to style the "style" tag
+-- as an HTML tag, so the actual start rule looks like this:
+--
+-- local css_start_rule = #css_tag * tag
+--
+-- Now that the parent knows when to switch to the child, it needs to know when
+-- to switch back. In the case of HTML/CSS, the switch back occurs when the
+-- lexer encounters an ending "style" tag, though the lexer should still style
+-- the tag as an HTML tag:
+--
+-- local css_end_rule = #P('</style>') * tag
+--
+-- Once the parent loads the child lexer and defines the child's start and end
+-- rules, it embeds the child with the [`lexer.embed_lexer()`]() function:
+--
+-- l.embed_lexer(M, css, css_start_rule, css_end_rule)
+--
+-- The first parameter is the parent lexer object to embed the child in, which
+-- in this case is `M`. The other three parameters are the child lexer object
+-- loaded earlier followed by its start and end rules.
+--
+-- #### Child Lexer
+--
+-- The process for instructing a child lexer to embed itself into a parent is
+-- very similar to embedding a child into a parent: first, load the parent lexer
+-- into the child lexer with the [`lexer.load()`]() function and then create
+-- start and end rules for the child lexer. However, in this case, swap the
+-- lexer object arguments to [`lexer.embed_lexer()`](). For example, in the PHP
+-- lexer:
+--
+-- local html = l.load('html')
+-- local php_start_rule = token('php_tag', '<?php ')
+-- local php_end_rule = token('php_tag', '?>')
+-- l.embed_lexer(html, M, php_start_rule, php_end_rule)
+--
+-- ## Code Folding
+--
+-- When reading source code, it is occasionally helpful to temporarily hide
+-- blocks of code like functions, classes, comments, etc. This is the concept of
+-- "folding". In the Textadept and SciTE editors for example, little indicators
+-- in the editor margins appear next to code that can be folded at places called
+-- "fold points". When the user clicks an indicator, the editor hides the code
+-- associated with the indicator until the user clicks the indicator again. The
+-- lexer specifies these fold points and what code exactly to fold.
+--
+-- The fold points for most languages occur on keywords or character sequences.
+-- Examples of fold keywords are "if" and "end" in Lua and examples of fold
+-- character sequences are '{', '}', "/\*", and "\*/" in C for code block and
+-- comment delimiters, respectively. However, these fold points cannot occur
+-- just anywhere. For example, lexers should not recognize fold keywords that
+-- appear within strings or comments. The lexer's `_foldsymbols` table allows
+-- you to conveniently define fold points with such granularity. For example,
+-- consider C:
+--
+-- M._foldsymbols = {
+-- [l.OPERATOR] = {['{'] = 1, ['}'] = -1},
+-- [l.COMMENT] = {['/*'] = 1, ['*/'] = -1},
+-- _patterns = {'[{}]', '/%*', '%*/'}
+-- }
+--
+-- The first assignment states that any '{' or '}' that the lexer recognized as
+-- an `lexer.OPERATOR` token is a fold point. The integer `1` indicates the
+-- match is a beginning fold point and `-1` indicates the match is an ending
+-- fold point. Likewise, the second assignment states that any "/\*" or "\*/"
+-- that the lexer recognizes as part of a `lexer.COMMENT` token is a fold point.
+-- The lexer does not consider any occurences of these characters outside their
+-- defined tokens (such as in a string) as fold points. Finally, every
+-- `_foldsymbols` table must have a `_patterns` field that contains a list of
+-- [Lua patterns][] that match fold points. If the lexer encounters text that
+-- matches one of those patterns, the lexer looks up the matched text in its
+-- token's table to determine whether or not the text is a fold point. In the
+-- example above, the first Lua pattern matches any '{' or '}' characters. When
+-- the lexer comes across one of those characters, it checks if the match is an
+-- `lexer.OPERATOR` token. If so, the lexer identifies the match as a fold
+-- point. The same idea applies for the other patterns. (The '%' is in the other
+-- patterns because '\*' is a special character in Lua patterns that needs
+-- escaping.) How do you specify fold keywords? Here is an example for Lua:
+--
+-- M._foldsymbols = {
+-- [l.KEYWORD] = {
+-- ['if'] = 1, ['do'] = 1, ['function'] = 1,
+-- ['end'] = -1, ['repeat'] = 1, ['until'] = -1
+-- },
+-- _patterns = {'%l+'}
+-- }
+--
+-- Any time the lexer encounters a lower case word, if that word is a
+-- `lexer.KEYWORD` token and in the associated list of fold points, the lexer
+-- identifies the word as a fold point.
+--
+-- If your lexer needs to do some additional processing to determine if a match
+-- is a fold point, assign a function that returns an integer. Returning `1` or
+-- `-1` indicates the match is a fold point. Returning `0` indicates it is not.
+-- For example:
+--
+-- local function fold_strange_token(text, pos, line, s, match)
+-- if ... then
+-- return 1 -- beginning fold point
+-- elseif ... then
+-- return -1 -- ending fold point
+-- end
+-- return 0
+-- end
+--
+-- M._foldsymbols = {
+-- ['strange_token'] = {['|'] = fold_strange_token},
+-- _patterns = {'|'}
+-- }
+--
+-- Any time the lexer encounters a '|' that is a "strange_token", it calls the
+-- `fold_strange_token` function to determine if '|' is a fold point. The lexer
+-- calls these functions with the following arguments: the text to identify fold
+-- points in, the beginning position of the current line in the text to fold,
+-- the current line's text, the position in the current line the matched text
+-- starts at, and the matched text itself.
+--
+-- [Lua patterns]: http://www.lua.org/manual/5.2/manual.html#6.4.1
+--
+-- ### Fold by Indentation
+--
+-- Some languages have significant whitespace and/or no delimiters that indicate
+-- fold points. If your lexer falls into this category and you would like to
+-- mark fold points based on changes in indentation, use the
+-- `_FOLDBYINDENTATION` field:
+--
+-- M._FOLDBYINDENTATION = true
+--
+-- ## Using Lexers
+--
+-- ### Textadept
+--
+-- Put your lexer in your *~/.textadept/lexers/* directory so you do not
+-- overwrite it when upgrading Textadept. Also, lexers in this directory
+-- override default lexers. Thus, Textadept loads a user *lua* lexer instead of
+-- the default *lua* lexer. This is convenient for tweaking a default lexer to
+-- your liking. Then add a [file type][] for your lexer if necessary.
+--
+-- [file type]: _M.textadept.file_types.html
+--
+-- ### SciTE
+--
+-- Create a *.properties* file for your lexer and `import` it in either your
+-- *SciTEUser.properties* or *SciTEGlobal.properties*. The contents of the
+-- *.properties* file should contain:
+--
+-- file.patterns.[lexer_name]=[file_patterns]
+-- lexer.$(file.patterns.[lexer_name])=[lexer_name]
+--
+-- where `[lexer_name]` is the name of your lexer (minus the *.lua* extension)
+-- and `[file_patterns]` is a set of file extensions to use your lexer for.
+--
+-- Please note that Lua lexers ignore any styling information in *.properties*
+-- files. Your theme file in the *lexers/themes/* directory contains styling
+-- information.
+--
+-- ## Considerations
+--
+-- ### Performance
+--
+-- There might be some slight overhead when initializing a lexer, but loading a
+-- file from disk into Scintilla is usually more expensive. On modern computer
+-- systems, I see no difference in speed between LPeg lexers and Scintilla's C++
+-- ones. Optimize lexers for speed by re-arranging rules in the `_rules` table
+-- so that the most common rules match first. Do keep in mind that order matters
+-- for similar rules.
+--
+-- ### Limitations
+--
+-- Embedded preprocessor languages like PHP cannot completely embed in their
+-- parent languages in that the parent's tokens do not support start and end
+-- rules. This mostly goes unnoticed, but code like
+--
+-- <div id="<?php echo $id; ?>">
+--
+-- or
+--
+-- <div <?php if ($odd) { echo 'class="odd"'; } ?>>
+--
+-- will not style correctly.
+--
+-- ### Troubleshooting
+--
+-- Errors in lexers can be tricky to debug. Lexers print Lua errors to
+-- `io.stderr` and `_G.print()` statements to `io.stdout`. Running your editor
+-- from a terminal is the easiest way to see errors as they occur.
+--
+-- ### Risks
+--
+-- Poorly written lexers have the ability to crash Scintilla (and thus its
+-- containing application), so unsaved data might be lost. However, I have only
+-- observed these crashes in early lexer development, when syntax errors or
+-- pattern errors are present. Once the lexer actually starts styling text
+-- (either correctly or incorrectly, it does not matter), I have not observed
+-- any crashes.
+--
+-- ### Acknowledgements
+--
+-- Thanks to Peter Odding for his [lexer post][] on the Lua mailing list
+-- that inspired me, and thanks to Roberto Ierusalimschy for LPeg.
+--
+-- [lexer post]: http://lua-users.org/lists/lua-l/2007-04/msg00116.html
+-- @field LEXERPATH (string)
+-- The path used to search for a lexer to load.
+-- Identical in format to Lua's `package.path` string.
+-- The default value is `package.path`.
+-- @field DEFAULT (string)
+-- The token name for default tokens.
+-- @field WHITESPACE (string)
+-- The token name for whitespace tokens.
+-- @field COMMENT (string)
+-- The token name for comment tokens.
+-- @field STRING (string)
+-- The token name for string tokens.
+-- @field NUMBER (string)
+-- The token name for number tokens.
+-- @field KEYWORD (string)
+-- The token name for keyword tokens.
+-- @field IDENTIFIER (string)
+-- The token name for identifier tokens.
+-- @field OPERATOR (string)
+-- The token name for operator tokens.
+-- @field ERROR (string)
+-- The token name for error tokens.
+-- @field PREPROCESSOR (string)
+-- The token name for preprocessor tokens.
+-- @field CONSTANT (string)
+-- The token name for constant tokens.
+-- @field VARIABLE (string)
+-- The token name for variable tokens.
+-- @field FUNCTION (string)
+-- The token name for function tokens.
+-- @field CLASS (string)
+-- The token name for class tokens.
+-- @field TYPE (string)
+-- The token name for type tokens.
+-- @field LABEL (string)
+-- The token name for label tokens.
+-- @field REGEX (string)
+-- The token name for regex tokens.
+-- @field STYLE_CLASS (string)
+-- The style typically used for class definitions.
+-- @field STYLE_COMMENT (string)
+-- The style typically used for code comments.
+-- @field STYLE_CONSTANT (string)
+-- The style typically used for constants.
+-- @field STYLE_ERROR (string)
+-- The style typically used for erroneous syntax.
+-- @field STYLE_FUNCTION (string)
+-- The style typically used for function definitions.
+-- @field STYLE_KEYWORD (string)
+-- The style typically used for language keywords.
+-- @field STYLE_LABEL (string)
+-- The style typically used for labels.
+-- @field STYLE_NUMBER (string)
+-- The style typically used for numbers.
+-- @field STYLE_OPERATOR (string)
+-- The style typically used for operators.
+-- @field STYLE_REGEX (string)
+-- The style typically used for regular expression strings.
+-- @field STYLE_STRING (string)
+-- The style typically used for strings.
+-- @field STYLE_PREPROCESSOR (string)
+-- The style typically used for preprocessor statements.
+-- @field STYLE_TYPE (string)
+-- The style typically used for static types.
+-- @field STYLE_VARIABLE (string)
+-- The style typically used for variables.
+-- @field STYLE_WHITESPACE (string)
+-- The style typically used for whitespace.
+-- @field STYLE_EMBEDDED (string)
+-- The style typically used for embedded code.
+-- @field STYLE_IDENTIFIER (string)
+-- The style typically used for identifier words.
+-- @field STYLE_DEFAULT (string)
+-- The style all styles are based off of.
+-- @field STYLE_LINENUMBER (string)
+-- The style used for all margins except fold margins.
+-- @field STYLE_BRACELIGHT (string)
+-- The style used for highlighted brace characters.
+-- @field STYLE_BRACEBAD (string)
+-- The style used for unmatched brace characters.
+-- @field STYLE_CONTROLCHAR (string)
+-- The style used for control characters.
+-- Color attributes are ignored.
+-- @field STYLE_INDENTGUIDE (string)
+-- The style used for indentation guides.
+-- @field STYLE_CALLTIP (string)
+-- The style used by call tips if [`buffer.call_tip_use_style`]() is set.
+-- Only the font name, size, and color attributes are used.
+-- @field any (pattern)
+-- A pattern that matches any single character.
+-- @field ascii (pattern)
+-- A pattern that matches any ASCII character (codes 0 to 127).
+-- @field extend (pattern)
+-- A pattern that matches any ASCII extended character (codes 0 to 255).
+-- @field alpha (pattern)
+-- A pattern that matches any alphabetic character ('A'-'Z', 'a'-'z').
+-- @field digit (pattern)
+-- A pattern that matches any digit ('0'-'9').
+-- @field alnum (pattern)
+-- A pattern that matches any alphanumeric character ('A'-'Z', 'a'-'z',
+-- '0'-'9').
+-- @field lower (pattern)
+-- A pattern that matches any lower case character ('a'-'z').
+-- @field upper (pattern)
+-- A pattern that matches any upper case character ('A'-'Z').
+-- @field xdigit (pattern)
+-- A pattern that matches any hexadecimal digit ('0'-'9', 'A'-'F', 'a'-'f').
+-- @field cntrl (pattern)
+-- A pattern that matches any control character (ASCII codes 0 to 31).
+-- @field graph (pattern)
+-- A pattern that matches any graphical character ('!' to '~').
+-- @field print (pattern)
+-- A pattern that matches any printable character (' ' to '~').
+-- @field punct (pattern)
+-- A pattern that matches any punctuation character ('!' to '/', ':' to '@',
+-- '[' to ''', '{' to '~').
+-- @field space (pattern)
+-- A pattern that matches any whitespace character ('\t', '\v', '\f', '\n',
+-- '\r', space).
+-- @field newline (pattern)
+-- A pattern that matches any set of end of line characters.
+-- @field nonnewline (pattern)
+-- A pattern that matches any single, non-newline character.
+-- @field nonnewline_esc (pattern)
+-- A pattern that matches any single, non-newline character or any set of end
+-- of line characters escaped with '\'.
+-- @field dec_num (pattern)
+-- A pattern that matches a decimal number.
+-- @field hex_num (pattern)
+-- A pattern that matches a hexadecimal number.
+-- @field oct_num (pattern)
+-- A pattern that matches an octal number.
+-- @field integer (pattern)
+-- A pattern that matches either a decimal, hexadecimal, or octal number.
+-- @field float (pattern)
+-- A pattern that matches a floating point number.
+-- @field word (pattern)
+-- A pattern that matches a typical word. Words begin with a letter or
+-- underscore and consist of alphanumeric and underscore characters.
+-- @field FOLD_BASE (number)
+-- The initial (root) fold level.
+-- @field FOLD_BLANK (number)
+-- Flag indicating that the line is blank.
+-- @field FOLD_HEADER (number)
+-- Flag indicating the line is fold point.
+-- @field fold_level (table, Read-only)
+-- Table of fold level bit-masks for line numbers starting from zero.
+-- Fold level masks are composed of an integer level combined with any of the
+-- following bits:
+--
+-- * `lexer.FOLD_BASE`
+-- The initial fold level.
+-- * `lexer.FOLD_BLANK`
+-- The line is blank.
+-- * `lexer.FOLD_HEADER`
+-- The line is a header, or fold point.
+-- @field indent_amount (table, Read-only)
+-- Table of indentation amounts in character columns, for line numbers
+-- starting from zero.
+-- @field property (table)
+-- Map of key-value string pairs.
+-- @field property_expanded (table, Read-only)
+-- Map of key-value string pairs with `$()` and `%()` variable replacement
+-- performed in values.
+-- @field property_int (table, Read-only)
+-- Map of key-value pairs with values interpreted as numbers, or `0` if not
+-- found.
+-- @field style_at (table, Read-only)
+-- Table of style names at positions in the buffer starting from zero.
+module('lexer')]=]
+
+local lpeg = require('lpeg')
+local lpeg_P, lpeg_R, lpeg_S, lpeg_V = lpeg.P, lpeg.R, lpeg.S, lpeg.V
+local lpeg_Ct, lpeg_Cc, lpeg_Cp = lpeg.Ct, lpeg.Cc, lpeg.Cp
+local lpeg_Cmt, lpeg_C = lpeg.Cmt, lpeg.C
+local lpeg_match = lpeg.match
+
+M.LEXERPATH = package.path
+
+-- Table of loaded lexers.
+local lexers = {}
+
+-- Keep track of the last parent lexer loaded. This lexer's rules are used for
+-- proxy lexers (those that load parent and child lexers to embed) that do not
+-- declare a parent lexer.
+local parent_lexer
+
+if not package.searchpath then
+ -- Searches for the given *name* in the given *path*.
+ -- This is an implementation of Lua 5.2's `package.searchpath()` function for
+ -- Lua 5.1.
+ function package.searchpath(name, path)
+ local tried = {}
+ for part in path:gmatch('[^;]+') do
+ local filename = part:gsub('%?', name)
+ local f = io.open(filename, 'r')
+ if f then f:close() return filename end
+ tried[#tried + 1] = ("no file '%s'"):format(filename)
+ end
+ return nil, table.concat(tried, '\n')
+ end
+end
+
+-- Adds a rule to a lexer's current ordered list of rules.
+-- @param lexer The lexer to add the given rule to.
+-- @param name The name associated with this rule. It is used for other lexers
+-- to access this particular rule from the lexer's `_RULES` table. It does not
+-- have to be the same as the name passed to `token`.
+-- @param rule The LPeg pattern of the rule.
+local function add_rule(lexer, id, rule)
+ if not lexer._RULES then
+ lexer._RULES = {}
+ -- Contains an ordered list (by numerical index) of rule names. This is used
+ -- in conjunction with lexer._RULES for building _TOKENRULE.
+ lexer._RULEORDER = {}
+ end
+ lexer._RULES[id] = rule
+ lexer._RULEORDER[#lexer._RULEORDER + 1] = id
+end
+
+-- Adds a new Scintilla style to Scintilla.
+-- @param lexer The lexer to add the given style to.
+-- @param token_name The name of the token associated with this style.
+-- @param style A Scintilla style created from `style()`.
+-- @see style
+local function add_style(lexer, token_name, style)
+ local num_styles = lexer._numstyles
+ if num_styles == 32 then num_styles = num_styles + 8 end -- skip predefined
+ if num_styles >= 255 then print('Too many styles defined (255 MAX)') end
+ lexer._TOKENSTYLES[token_name], lexer._numstyles = num_styles, num_styles + 1
+ lexer._EXTRASTYLES[token_name] = style
+end
+
+-- (Re)constructs `lexer._TOKENRULE`.
+-- @param parent The parent lexer.
+local function join_tokens(lexer)
+ local patterns, order = lexer._RULES, lexer._RULEORDER
+ local token_rule = patterns[order[1]]
+ for i = 2, #order do token_rule = token_rule + patterns[order[i]] end
+ lexer._TOKENRULE = token_rule + M.token(M.DEFAULT, M.any)
+ return lexer._TOKENRULE
+end
+
+-- Adds a given lexer and any of its embedded lexers to a given grammar.
+-- @param grammar The grammar to add the lexer to.
+-- @param lexer The lexer to add.
+local function add_lexer(grammar, lexer, token_rule)
+ local token_rule = join_tokens(lexer)
+ local lexer_name = lexer._NAME
+ for _, child in ipairs(lexer._CHILDREN) do
+ if child._CHILDREN then add_lexer(grammar, child) end
+ local child_name = child._NAME
+ local rules = child._EMBEDDEDRULES[lexer_name]
+ local rules_token_rule = grammar['__'..child_name] or rules.token_rule
+ grammar[child_name] = (-rules.end_rule * rules_token_rule)^0 *
+ rules.end_rule^-1 * lpeg_V(lexer_name)
+ local embedded_child = '_'..child_name
+ grammar[embedded_child] = rules.start_rule * (-rules.end_rule *
+ rules_token_rule)^0 * rules.end_rule^-1
+ token_rule = lpeg_V(embedded_child) + token_rule
+ end
+ grammar['__'..lexer_name] = token_rule -- can contain embedded lexer rules
+ grammar[lexer_name] = token_rule^0
+end
+
+-- (Re)constructs `lexer._GRAMMAR`.
+-- @param lexer The parent lexer.
+-- @param initial_rule The name of the rule to start lexing with. The default
+-- value is `lexer._NAME`. Multilang lexers use this to start with a child
+-- rule if necessary.
+local function build_grammar(lexer, initial_rule)
+ local children = lexer._CHILDREN
+ if children then
+ local lexer_name = lexer._NAME
+ if not initial_rule then initial_rule = lexer_name end
+ local grammar = {initial_rule}
+ add_lexer(grammar, lexer)
+ lexer._INITIALRULE = initial_rule
+ lexer._GRAMMAR = lpeg_Ct(lpeg_P(grammar))
+ else
+ lexer._GRAMMAR = lpeg_Ct(join_tokens(lexer)^0)
+ end
+end
+
+local string_upper = string.upper
+-- Default styles.
+local default = {
+ 'nothing', 'whitespace', 'comment', 'string', 'number', 'keyword',
+ 'identifier', 'operator', 'error', 'preprocessor', 'constant', 'variable',
+ 'function', 'class', 'type', 'label', 'regex', 'embedded'
+}
+for _, v in ipairs(default) do
+ M[string_upper(v)], M['STYLE_'..string_upper(v)] = v, '$(style.'..v..')'
+end
+-- Predefined styles.
+local predefined = {
+ 'default', 'linenumber', 'bracelight', 'bracebad', 'controlchar',
+ 'indentguide', 'calltip'
+}
+for _, v in ipairs(predefined) do
+ M[string_upper(v)], M['STYLE_'..string_upper(v)] = v, '$(style.'..v..')'
+end
+
+---
+-- Initializes or loads and returns the lexer of string name *name*.
+-- Scintilla calls this function to load a lexer. Parent lexers also call this
+-- function to load child lexers and vice-versa. The user calls this function
+-- to load a lexer when using Scintillua as a Lua library.
+-- @param name The name of the lexing language.
+-- @param alt_name The alternate name of the lexing language. This is useful for
+-- embedding the same child lexer with multiple sets of start and end tokens.
+-- @return lexer object
+-- @name load
+function M.load(name, alt_name)
+ if lexers[alt_name or name] then return lexers[alt_name or name] end
+ parent_lexer = nil -- reset
+
+ -- When using Scintillua as a stand-alone module, the `property` and
+ -- `property_int` tables do not exist (they are not useful). Create them to
+ -- prevent errors from occurring.
+ if not M.property then
+ M.property, M.property_int = {}, setmetatable({}, {
+ __index = function(t, k)
+ return tostring(tonumber(M.property[k]) or 0)
+ end,
+ __newindex = function() error('read-only property') end
+ })
+ end
+
+ -- Load the language lexer with its rules, styles, etc.
+ M.WHITESPACE = (alt_name or name)..'_whitespace'
+ local lexer_file, error = package.searchpath(name, M.LEXERPATH)
+ local ok, lexer = pcall(dofile, lexer_file or '')
+ if not ok then
+ _G.print(error or lexer) -- error message
+ lexer = {_NAME = alt_name or name}
+ end
+ if alt_name then lexer._NAME = alt_name end
+
+ -- Create the initial maps for token names to style numbers and styles.
+ local token_styles = {}
+ for i = 1, #default do token_styles[default[i]] = i - 1 end
+ for i = 1, #predefined do token_styles[predefined[i]] = i + 31 end
+ lexer._TOKENSTYLES, lexer._numstyles = token_styles, #default
+ lexer._EXTRASTYLES = {}
+
+ -- If the lexer is a proxy (loads parent and child lexers to embed) and does
+ -- not declare a parent, try and find one and use its rules.
+ if not lexer._rules and not lexer._lexer then lexer._lexer = parent_lexer end
+
+ -- If the lexer is a proxy or a child that embedded itself, add its rules and
+ -- styles to the parent lexer. Then set the parent to be the main lexer.
+ if lexer._lexer then
+ local l, _r, _s = lexer._lexer, lexer._rules, lexer._tokenstyles
+ if not l._tokenstyles then l._tokenstyles = {} end
+ for _, r in ipairs(_r or {}) do
+ -- Prevent rule id clashes.
+ l._rules[#l._rules + 1] = {lexer._NAME..'_'..r[1], r[2]}
+ end
+ for token, style in pairs(_s or {}) do l._tokenstyles[token] = style end
+ lexer = l
+ end
+
+ -- Add the lexer's styles and build its grammar.
+ if lexer._rules then
+ for token, style in pairs(lexer._tokenstyles or {}) do
+ add_style(lexer, token, style)
+ end
+ for _, r in ipairs(lexer._rules) do add_rule(lexer, r[1], r[2]) end
+ build_grammar(lexer)
+ end
+ -- Add the lexer's unique whitespace style.
+ add_style(lexer, lexer._NAME..'_whitespace', M.STYLE_WHITESPACE)
+
+ -- Process the lexer's fold symbols.
+ if lexer._foldsymbols and lexer._foldsymbols._patterns then
+ local patterns = lexer._foldsymbols._patterns
+ for i = 1, #patterns do patterns[i] = '()('..patterns[i]..')' end
+ end
+
+ lexer.lex, lexer.fold = M.lex, M.fold
+ lexers[alt_name or name] = lexer
+ return lexer
+end
+
+---
+-- Lexes a chunk of text *text* (that has an initial style number of
+-- *init_style*) with lexer *lexer*.
+-- If *lexer* has a `_LEXBYLINE` flag set, the text is lexed one line at a time.
+-- Otherwise the text is lexed as a whole.
+-- @param lexer The lexer object to lex with.
+-- @param text The text in the buffer to lex.
+-- @param init_style The current style. Multiple-language lexers use this to
+-- determine which language to start lexing in.
+-- @return table of token names and positions.
+-- @name lex
+function M.lex(lexer, text, init_style)
+ if not lexer._LEXBYLINE then
+ -- For multilang lexers, build a new grammar whose initial_rule is the
+ -- current language.
+ if lexer._CHILDREN then
+ for style, style_num in pairs(lexer._TOKENSTYLES) do
+ if style_num == init_style then
+ local lexer_name = style:match('^(.+)_whitespace') or lexer._NAME
+ if lexer._INITIALRULE ~= lexer_name then
+ build_grammar(lexer, lexer_name)
+ end
+ break
+ end
+ end
+ end
+ return lpeg_match(lexer._GRAMMAR, text)
+ else
+ local tokens = {}
+ local function append(tokens, line_tokens, offset)
+ for i = 1, #line_tokens, 2 do
+ tokens[#tokens + 1] = line_tokens[i]
+ tokens[#tokens + 1] = line_tokens[i + 1] + offset
+ end
+ end
+ local offset = 0
+ local grammar = lexer._GRAMMAR
+ for line in text:gmatch('[^\r\n]*\r?\n?') do
+ local line_tokens = lpeg_match(grammar, line)
+ if line_tokens then append(tokens, line_tokens, offset) end
+ offset = offset + #line
+ -- Use the default style to the end of the line if none was specified.
+ if tokens[#tokens] ~= offset then
+ tokens[#tokens + 1], tokens[#tokens + 2] = 'default', offset + 1
+ end
+ end
+ return tokens
+ end
+end
+
+---
+-- Folds a chunk of text *text* with lexer *lexer*.
+-- Folds *text* starting at position *start_pos* on line number *start_line*
+-- with a beginning fold level of *start_level* in the buffer. If *lexer* has a
+-- a `_fold` function or a `_foldsymbols` table, that field is used to perform
+-- folding. Otherwise, if *lexer* has a `_FOLDBYINDENTATION` field set, or if a
+-- `fold.by.indentation` property is set, folding by indentation is done.
+-- @param lexer The lexer object to fold with.
+-- @param text The text in the buffer to fold.
+-- @param start_pos The position in the buffer *text* starts at.
+-- @param start_line The line number *text* starts on.
+-- @param start_level The fold level *text* starts on.
+-- @return table of fold levels.
+-- @name fold
+function M.fold(lexer, text, start_pos, start_line, start_level)
+ local folds = {}
+ if text == '' then return folds end
+ local fold = M.property_int['fold'] > 0
+ local FOLD_BASE = M.FOLD_BASE
+ local FOLD_HEADER, FOLD_BLANK = M.FOLD_HEADER, M.FOLD_BLANK
+ if fold and lexer._fold then
+ return lexer._fold(text, start_pos, start_line, start_level)
+ elseif fold and lexer._foldsymbols then
+ local lines = {}
+ for p, l in (text..'\n'):gmatch('()(.-)\r?\n') do
+ lines[#lines + 1] = {p, l}
+ end
+ local fold_zero_sum_lines = M.property_int['fold.on.zero.sum.lines'] > 0
+ local fold_symbols = lexer._foldsymbols
+ local fold_symbols_patterns = fold_symbols._patterns
+ local style_at, fold_level = M.style_at, M.fold_level
+ local line_num, prev_level = start_line, start_level
+ local current_level = prev_level
+ for i = 1, #lines do
+ local pos, line = lines[i][1], lines[i][2]
+ if line ~= '' then
+ local level_decreased = false
+ for j = 1, #fold_symbols_patterns do
+ for s, match in line:gmatch(fold_symbols_patterns[j]) do
+ local symbols = fold_symbols[style_at[start_pos + pos + s - 1]]
+ local l = symbols and symbols[match]
+ if type(l) == 'function' then l = l(text, pos, line, s, match) end
+ if type(l) == 'number' then
+ current_level = current_level + l
+ if l < 0 and current_level < prev_level then
+ -- Potential zero-sum line. If the level were to go back up on
+ -- the same line, the line may be marked as a fold header.
+ level_decreased = true
+ end
+ end
+ end
+ end
+ folds[line_num] = prev_level
+ if current_level > prev_level then
+ folds[line_num] = prev_level + FOLD_HEADER
+ elseif level_decreased and current_level == prev_level and
+ fold_zero_sum_lines then
+ if line_num > start_line then
+ folds[line_num] = prev_level - 1 + FOLD_HEADER
+ else
+ -- Typing within a zero-sum line.
+ local level = fold_level[line_num - 1] - 1
+ if level > FOLD_HEADER then level = level - FOLD_HEADER end
+ if level > FOLD_BLANK then level = level - FOLD_BLANK end
+ folds[line_num] = level + FOLD_HEADER
+ current_level = current_level + 1
+ end
+ end
+ if current_level < FOLD_BASE then current_level = FOLD_BASE end
+ prev_level = current_level
+ else
+ folds[line_num] = prev_level + FOLD_BLANK
+ end
+ line_num = line_num + 1
+ end
+ elseif fold and (lexer._FOLDBYINDENTATION or
+ M.property_int['fold.by.indentation'] > 0) then
+ -- Indentation based folding.
+ -- Calculate indentation per line.
+ local indentation = {}
+ for indent, line in (text..'\n'):gmatch('([\t ]*)([^\r\n]*)\r?\n') do
+ indentation[#indentation + 1] = line ~= '' and #indent
+ end
+ -- Find the first non-blank line before start_line. If the current line is
+ -- indented, make that previous line a header and update the levels of any
+ -- blank lines inbetween. If the current line is blank, match the level of
+ -- the previous non-blank line.
+ local current_level = start_level
+ for i = start_line - 1, 0, -1 do
+ local level = M.fold_level[i]
+ if level >= FOLD_HEADER then level = level - FOLD_HEADER end
+ if level < FOLD_BLANK then
+ local indent = M.indent_amount[i]
+ if indentation[1] and indentation[1] > indent then
+ folds[i] = FOLD_BASE + indent + FOLD_HEADER
+ for j = i + 1, start_line - 1 do
+ folds[j] = start_level + FOLD_BLANK
+ end
+ elseif not indentation[1] then
+ current_level = FOLD_BASE + indent
+ end
+ break
+ end
+ end
+ -- Iterate over lines, setting fold numbers and fold flags.
+ for i = 1, #indentation do
+ if indentation[i] then
+ current_level = FOLD_BASE + indentation[i]
+ folds[start_line + i - 1] = current_level
+ for j = i + 1, #indentation do
+ if indentation[j] then
+ if FOLD_BASE + indentation[j] > current_level then
+ folds[start_line + i - 1] = current_level + FOLD_HEADER
+ current_level = FOLD_BASE + indentation[j] -- for any blanks below
+ end
+ break
+ end
+ end
+ else
+ folds[start_line + i - 1] = current_level + FOLD_BLANK
+ end
+ end
+ else
+ -- No folding, reset fold levels if necessary.
+ local current_line = start_line
+ for _ in text:gmatch('\r?\n') do
+ folds[current_line] = start_level
+ current_line = current_line + 1
+ end
+ end
+ return folds
+end
+
+-- The following are utility functions lexers will have access to.
+
+-- Common patterns.
+M.any = lpeg_P(1)
+M.ascii = lpeg_R('\000\127')
+M.extend = lpeg_R('\000\255')
+M.alpha = lpeg_R('AZ', 'az')
+M.digit = lpeg_R('09')
+M.alnum = lpeg_R('AZ', 'az', '09')
+M.lower = lpeg_R('az')
+M.upper = lpeg_R('AZ')
+M.xdigit = lpeg_R('09', 'AF', 'af')
+M.cntrl = lpeg_R('\000\031')
+M.graph = lpeg_R('!~')
+M.print = lpeg_R(' ~')
+M.punct = lpeg_R('!/', ':@', '[\'', '{~')
+M.space = lpeg_S('\t\v\f\n\r ')
+
+M.newline = lpeg_S('\r\n\f')^1
+M.nonnewline = 1 - M.newline
+M.nonnewline_esc = 1 - (M.newline + '\\') + '\\' * M.any
+
+M.dec_num = M.digit^1
+M.hex_num = '0' * lpeg_S('xX') * M.xdigit^1
+M.oct_num = '0' * lpeg_R('07')^1
+M.integer = lpeg_S('+-')^-1 * (M.hex_num + M.oct_num + M.dec_num)
+M.float = lpeg_S('+-')^-1 *
+ (M.digit^0 * '.' * M.digit^1 + M.digit^1 * '.' * M.digit^0 +
+ M.digit^1) *
+ lpeg_S('eE') * lpeg_S('+-')^-1 * M.digit^1
+M.word = (M.alpha + '_') * (M.alnum + '_')^0
+
+---
+-- Creates and returns a token pattern with token name *name* and pattern
+-- *patt*.
+-- If *name* is not a predefined token name, its style must be defined in the
+-- lexer's `_tokenstyles` table.
+-- @param name The name of token. If this name is not a predefined token name,
+-- then a style needs to be assiciated with it in the lexer's `_tokenstyles`
+-- table.
+-- @param patt The LPeg pattern associated with the token.
+-- @return pattern
+-- @usage local ws = token(l.WHITESPACE, l.space^1)
+-- @usage local annotation = token('annotation', '@' * l.word)
+-- @name token
+function M.token(name, patt)
+ return lpeg_Cc(name) * patt * lpeg_Cp()
+end
+
+---
+-- Creates and returns a pattern that matches a range of text bounded by
+-- *chars* characters.
+-- This is a convenience function for matching more complicated delimited ranges
+-- like strings with escape characters and balanced parentheses. *single_line*
+-- indicates whether or not the range must be on a single line, *no_escape*
+-- indicates whether or not to ignore '\' as an escape character, and *balanced*
+-- indicates whether or not to handle balanced ranges like parentheses and
+-- requires *chars* to be composed of two characters.
+-- @param chars The character(s) that bound the matched range.
+-- @param single_line Optional flag indicating whether or not the range must be
+-- on a single line.
+-- @param no_escape Optional flag indicating whether or not the range end
+-- character may be escaped by a '\\' character.
+-- @param balanced Optional flag indicating whether or not to match a balanced
+-- range, like the "%b" Lua pattern. This flag only applies if *chars*
+-- consists of two different characters (e.g. "()").
+-- @return pattern
+-- @usage local dq_str_escapes = l.delimited_range('"')
+-- @usage local dq_str_noescapes = l.delimited_range('"', false, true)
+-- @usage local unbalanced_parens = l.delimited_range('()')
+-- @usage local balanced_parens = l.delimited_range('()', false, false, true)
+-- @see nested_pair
+-- @name delimited_range
+function M.delimited_range(chars, single_line, no_escape, balanced)
+ local s = chars:sub(1, 1)
+ local e = #chars == 2 and chars:sub(2, 2) or s
+ local range
+ local b = balanced and s or ''
+ local n = single_line and '\n' or ''
+ if no_escape then
+ local invalid = lpeg_S(e..n..b)
+ range = M.any - invalid
+ else
+ local invalid = lpeg_S(e..n..b) + '\\'
+ range = M.any - invalid + '\\' * M.any
+ end
+ if balanced and s ~= e then
+ return lpeg_P{s * (range + lpeg_V(1))^0 * e}
+ else
+ return s * range^0 * lpeg_P(e)^-1
+ end
+end
+
+---
+-- Creates and returns a pattern that matches pattern *patt* only at the
+-- beginning of a line.
+-- @param patt The LPeg pattern to match on the beginning of a line.
+-- @return pattern
+-- @usage local preproc = token(l.PREPROCESSOR, l.starts_line('#') *
+-- l.nonnewline^0)
+-- @name starts_line
+function M.starts_line(patt)
+ return lpeg_Cmt(lpeg_C(patt), function(input, index, match, ...)
+ local pos = index - #match
+ if pos == 1 then return index, ... end
+ local char = input:sub(pos - 1, pos - 1)
+ if char == '\n' or char == '\r' or char == '\f' then return index, ... end
+ end)
+end
+
+---
+-- Creates and returns a pattern that verifies that string set *s* contains the
+-- first non-whitespace character behind the current match position.
+-- @param s String character set like one passed to `lpeg.S()`.
+-- @return pattern
+-- @usage local regex = l.last_char_includes('+-*!%^&|=,([{') *
+-- l.delimited_range('/')
+-- @name last_char_includes
+function M.last_char_includes(s)
+ s = '['..s:gsub('[-%%%[]', '%%%1')..']'
+ return lpeg_P(function(input, index)
+ if index == 1 then return index end
+ local i = index
+ while input:sub(i - 1, i - 1):match('[ \t\r\n\f]') do i = i - 1 end
+ if input:sub(i - 1, i - 1):match(s) then return index end
+ end)
+end
+
+---
+-- Returns a pattern that matches a balanced range of text that starts with
+-- string *start_chars* and ends with string *end_chars*.
+-- With single-character delimiters, this function is identical to
+-- `delimited_range(start_chars..end_chars, false, true, true)`.
+-- @param start_chars The string starting a nested sequence.
+-- @param end_chars The string ending a nested sequence.
+-- @return pattern
+-- @usage local nested_comment = l.nested_pair('/*', '*/')
+-- @see delimited_range
+-- @name nested_pair
+function M.nested_pair(start_chars, end_chars)
+ local s, e = start_chars, lpeg_P(end_chars)^-1
+ return lpeg_P{s * (M.any - s - end_chars + lpeg_V(1))^0 * e}
+end
+
+---
+-- Creates and returns a pattern that matches any single word in list *words*.
+-- Words consist of alphanumeric and underscore characters, as well as the
+-- characters in string set *word_chars*. *case_insensitive* indicates whether
+-- or not to ignore case when matching words.
+-- This is a convenience function for simplifying a set of ordered choice word
+-- patterns.
+-- @param words A table of words.
+-- @param word_chars Optional string of additional characters considered to be
+-- part of a word. By default, word characters are alphanumerics and
+-- underscores ("%w_" in Lua). This parameter may be `nil` or the empty string
+-- to indicate no additional word characters.
+-- @param case_insensitive Optional boolean flag indicating whether or not the
+-- word match is case-insensitive. The default is `false`.
+-- @return pattern
+-- @usage local keyword = token(l.KEYWORD, word_match{'foo', 'bar', 'baz'})
+-- @usage local keyword = token(l.KEYWORD, word_match({'foo-bar', 'foo-baz',
+-- 'bar-foo', 'bar-baz', 'baz-foo', 'baz-bar'}, '-', true))
+-- @name word_match
+function M.word_match(words, word_chars, case_insensitive)
+ local word_list = {}
+ for _, word in ipairs(words) do
+ word_list[case_insensitive and word:lower() or word] = true
+ end
+ local chars = M.alnum + '_'
+ if word_chars then chars = chars + lpeg_S(word_chars) end
+ return lpeg_Cmt(chars^1, function(input, index, word)
+ if case_insensitive then word = word:lower() end
+ return word_list[word] and index or nil
+ end)
+end
+
+---
+-- Embeds child lexer *child* in parent lexer *parent* using patterns
+-- *start_rule* and *end_rule*, which signal the beginning and end of the
+-- embedded lexer, respectively.
+-- @param parent The parent lexer.
+-- @param child The child lexer.
+-- @param start_rule The pattern that signals the beginning of the embedded
+-- lexer.
+-- @param end_rule The pattern that signals the end of the embedded lexer.
+-- @usage l.embed_lexer(M, css, css_start_rule, css_end_rule)
+-- @usage l.embed_lexer(html, M, php_start_rule, php_end_rule)
+-- @usage l.embed_lexer(html, ruby, ruby_start_rule, ruby_end_rule)
+-- @name embed_lexer
+function M.embed_lexer(parent, child, start_rule, end_rule)
+ -- Add child rules.
+ if not child._EMBEDDEDRULES then child._EMBEDDEDRULES = {} end
+ if not child._RULES then -- creating a child lexer to be embedded
+ if not child._rules then error('Cannot embed language with no rules') end
+ for _, r in ipairs(child._rules) do add_rule(child, r[1], r[2]) end
+ end
+ child._EMBEDDEDRULES[parent._NAME] = {
+ ['start_rule'] = start_rule,
+ token_rule = join_tokens(child),
+ ['end_rule'] = end_rule
+ }
+ if not parent._CHILDREN then parent._CHILDREN = {} end
+ local children = parent._CHILDREN
+ children[#children + 1] = child
+ -- Add child styles.
+ if not parent._tokenstyles then parent._tokenstyles = {} end
+ local tokenstyles = parent._tokenstyles
+ tokenstyles[child._NAME..'_whitespace'] = M.STYLE_WHITESPACE
+ for token, style in pairs(child._tokenstyles or {}) do
+ tokenstyles[token] = style
+ end
+ child._lexer = parent -- use parent's tokens if child is embedding itself
+ parent_lexer = parent -- use parent's tokens if the calling lexer is a proxy
+end
+
+-- Determines if the previous line is a comment.
+-- This is used for determining if the current comment line is a fold point.
+-- @param prefix The prefix string defining a comment.
+-- @param text The text passed to a fold function.
+-- @param pos The pos passed to a fold function.
+-- @param line The line passed to a fold function.
+-- @param s The s passed to a fold function.
+local function prev_line_is_comment(prefix, text, pos, line, s)
+ local start = line:find('%S')
+ if start < s and not line:find(prefix, start, true) then return false end
+ local p = pos - 1
+ if text:sub(p, p) == '\n' then
+ p = p - 1
+ if text:sub(p, p) == '\r' then p = p - 1 end
+ if text:sub(p, p) ~= '\n' then
+ while p > 1 and text:sub(p - 1, p - 1) ~= '\n' do p = p - 1 end
+ while text:sub(p, p):find('^[\t ]$') do p = p + 1 end
+ return text:sub(p, p + #prefix - 1) == prefix
+ end
+ end
+ return false
+end
+
+-- Determines if the next line is a comment.
+-- This is used for determining if the current comment line is a fold point.
+-- @param prefix The prefix string defining a comment.
+-- @param text The text passed to a fold function.
+-- @param pos The pos passed to a fold function.
+-- @param line The line passed to a fold function.
+-- @param s The s passed to a fold function.
+local function next_line_is_comment(prefix, text, pos, line, s)
+ local p = text:find('\n', pos + s)
+ if p then
+ p = p + 1
+ while text:sub(p, p):find('^[\t ]$') do p = p + 1 end
+ return text:sub(p, p + #prefix - 1) == prefix
+ end
+ return false
+end
+
+---
+-- Returns a fold function (to be used within the lexer's `_foldsymbols` table)
+-- that folds consecutive line comments that start with string *prefix*.
+-- @param prefix The prefix string defining a line comment.
+-- @usage [l.COMMENT] = {['--'] = l.fold_line_comments('--')}
+-- @usage [l.COMMENT] = {['//'] = l.fold_line_comments('//')}
+-- @name fold_line_comments
+function M.fold_line_comments(prefix)
+ local property_int = M.property_int
+ return function(text, pos, line, s)
+ if property_int['fold.line.comments'] == 0 then return 0 end
+ if s > 1 and line:match('^%s*()') < s then return 0 end
+ local prev_line_comment = prev_line_is_comment(prefix, text, pos, line, s)
+ local next_line_comment = next_line_is_comment(prefix, text, pos, line, s)
+ if not prev_line_comment and next_line_comment then return 1 end
+ if prev_line_comment and not next_line_comment then return -1 end
+ return 0
+ end
+end
+
+M.property_expanded = setmetatable({}, {
+ -- Returns the string property value associated with string property *key*,
+ -- replacing any "$()" and "%()" expressions with the values of their keys.
+ __index = function(t, key)
+ return M.property[key]:gsub('[$%%]%b()', function(key)
+ return t[key:sub(3, -2)]
+ end)
+ end,
+ __newindex = function() error('read-only property') end
+})
+
+--[[ The functions and fields below were defined in C.
+
+---
+-- Individual fields for a lexer instance.
+-- @field _NAME The string name of the lexer.
+-- @field _rules An ordered list of rules for a lexer grammar.
+-- Each rule is a table containing an arbitrary rule name and the LPeg pattern
+-- associated with the rule. The order of rules is important as rules are
+-- matched sequentially.
+-- Child lexers should not use this table to access and/or modify their
+-- parent's rules and vice-versa. Use the `_RULES` table instead.
+-- @field _tokenstyles A map of non-predefined token names to styles.
+-- Remember to use token names, not rule names. It is recommended to use
+-- predefined styles or color-agnostic styles derived from predefined styles
+-- to ensure compatibility with user color themes.
+-- @field _foldsymbols A table of recognized fold points for the lexer.
+-- Keys are token names with table values defining fold points. Those table
+-- values have string keys of keywords or characters that indicate a fold
+-- point whose values are integers. A value of `1` indicates a beginning fold
+-- point and a value of `-1` indicates an ending fold point. Values can also
+-- be functions that return `1`, `-1`, or `0` (indicating no fold point) for
+-- keys which need additional processing.
+-- There is also a required `_pattern` key whose value is a table containing
+-- Lua pattern strings that match all fold points (the string keys contained
+-- in token name table values). When the lexer encounters text that matches
+-- one of those patterns, the matched text is looked up in its token's table
+-- to determine whether or not it is a fold point.
+-- @field _fold If this function exists in the lexer, it is called for folding
+-- the document instead of using `_foldsymbols` or indentation.
+-- @field _lexer The parent lexer object whose rules should be used. This field
+-- is only necessary to disambiguate a proxy lexer that loaded parent and
+-- child lexers for embedding and ended up having multiple parents loaded.
+-- @field _RULES A map of rule name keys with their associated LPeg pattern
+-- values for the lexer.
+-- This is constructed from the lexer's `_rules` table and accessible to other
+-- lexers for embedded lexer applications like modifying parent or child
+-- rules.
+-- @field _LEXBYLINE Indicates the lexer can only process one whole line of text
+-- (instead of an arbitrary chunk of text) at a time.
+-- The default value is `false`. Line lexers cannot look ahead to subsequent
+-- lines.
+-- @field _FOLDBYINDENTATION Declares the lexer does not define fold points and
+-- that fold points should be calculated based on changes in indentation.
+-- @class table
+-- @name lexer
+local lexer
+]]
+
+return M
diff --git a/lexers/lilypond.lua b/lexers/lilypond.lua
new file mode 100644
index 0000000..f5af771
--- /dev/null
+++ b/lexers/lilypond.lua
@@ -0,0 +1,40 @@
+-- Copyright 2006-2013 Robert Gieseke. See LICENSE.
+-- Lilypond LPeg lexer.
+-- TODO Embed Scheme; Notes?, Numbers?
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'lilypond'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local line_comment = '%' * l.nonnewline^0
+-- TODO: block comment.
+local comment = token(l.COMMENT, line_comment)
+
+-- Strings.
+local string = token(l.STRING, l.delimited_range('"', false, true))
+
+-- Keywords, commands.
+local keyword = token(l.KEYWORD, '\\' * l.word)
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.word)
+
+-- Operators.
+local operator = token(l.OPERATOR, S("{}'~<>|"))
+
+M._rules = {
+ {'whitespace', ws},
+ {'comment', comment},
+ {'string', string},
+ {'keyword', keyword},
+ {'operator', operator},
+ {'identifier', identifier},
+}
+
+return M
diff --git a/lexers/lisp.lua b/lexers/lisp.lua
new file mode 100644
index 0000000..cdc7126
--- /dev/null
+++ b/lexers/lisp.lua
@@ -0,0 +1,84 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Lisp LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'lisp'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local line_comment = ';' * l.nonnewline^0
+local block_comment = '#|' * (l.any - '|#')^0 * P('|#')^-1
+local comment = token(l.COMMENT, line_comment + block_comment)
+
+local word = l.alpha * (l.alnum + '_' + '-')^0
+
+-- Strings.
+local literal = "'" * word
+local dq_str = l.delimited_range('"')
+local string = token(l.STRING, literal + dq_str)
+
+-- Numbers.
+local number = token(l.NUMBER, P('-')^-1 * l.digit^1 * (S('./') * l.digit^1)^-1)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match({
+ 'defclass', 'defconstant', 'defgeneric', 'define-compiler-macro',
+ 'define-condition', 'define-method-combination', 'define-modify-macro',
+ 'define-setf-expander', 'define-symbol-macro', 'defmacro', 'defmethod',
+ 'defpackage', 'defparameter', 'defsetf', 'defstruct', 'deftype', 'defun',
+ 'defvar',
+ 'abort', 'assert', 'block', 'break', 'case', 'catch', 'ccase', 'cerror',
+ 'cond', 'ctypecase', 'declaim', 'declare', 'do', 'do*', 'do-all-symbols',
+ 'do-external-symbols', 'do-symbols', 'dolist', 'dotimes', 'ecase', 'error',
+ 'etypecase', 'eval-when', 'flet', 'handler-bind', 'handler-case', 'if',
+ 'ignore-errors', 'in-package', 'labels', 'lambda', 'let', 'let*', 'locally',
+ 'loop', 'macrolet', 'multiple-value-bind', 'proclaim', 'prog', 'prog*',
+ 'prog1', 'prog2', 'progn', 'progv', 'provide', 'require', 'restart-bind',
+ 'restart-case', 'restart-name', 'return', 'return-from', 'signal',
+ 'symbol-macrolet', 'tagbody', 'the', 'throw', 'typecase', 'unless',
+ 'unwind-protect', 'when', 'with-accessors', 'with-compilation-unit',
+ 'with-condition-restarts', 'with-hash-table-iterator',
+ 'with-input-from-string', 'with-open-file', 'with-open-stream',
+ 'with-output-to-string', 'with-package-iterator', 'with-simple-restart',
+ 'with-slots', 'with-standard-io-syntax',
+ 't', 'nil'
+}, '-'))
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, word)
+
+-- Operators.
+local operator = token(l.OPERATOR, S('<>=*/+-`@%()'))
+
+-- Entities.
+local entity = token('entity', '&' * word)
+
+M._rules = {
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'identifier', identifier},
+ {'string', string},
+ {'comment', comment},
+ {'number', number},
+ {'operator', operator},
+ {'entity', entity},
+}
+
+M._tokenstyles = {
+ entity = l.STYLE_VARIABLE
+}
+
+M._foldsymbols = {
+ _patterns = {'[%(%)%[%]{}]', '#|', '|#', ';'},
+ [l.OPERATOR] = {
+ ['('] = 1, [')'] = -1, ['['] = 1, [']'] = -1, ['{'] = 1, ['}'] = -1
+ },
+ [l.COMMENT] = {['#|'] = 1, ['|#'] = -1, [';'] = l.fold_line_comments(';')}
+}
+
+return M
diff --git a/lexers/litcoffee.lua b/lexers/litcoffee.lua
new file mode 100644
index 0000000..f31a2d1
--- /dev/null
+++ b/lexers/litcoffee.lua
@@ -0,0 +1,21 @@
+-- Copyright 2006-2013 Robert Gieseke. See LICENSE.
+-- Literate CoffeeScript LPeg lexer.
+-- http://coffeescript.org/#literate
+
+local l = require('lexer')
+local token = l.token
+local P = lpeg.P
+
+local M = {_NAME = 'litcoffee'}
+
+-- Embedded in Markdown.
+local markdown = l.load('markdown')
+M._lexer = markdown -- ensure markdown's rules are loaded, not HTML's
+
+-- Embedded CoffeeScript.
+local coffeescript = l.load('coffeescript')
+local coffee_start_rule = token(l.STYLE_EMBEDDED, (P(' ')^4 + P('\t')))
+local coffee_end_rule = token(l.STYLE_EMBEDDED, l.newline)
+l.embed_lexer(markdown, coffeescript, coffee_start_rule, coffee_end_rule)
+
+return M
diff --git a/lexers/lua.lua b/lexers/lua.lua
new file mode 100644
index 0000000..2ad1190
--- /dev/null
+++ b/lexers/lua.lua
@@ -0,0 +1,190 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Lua LPeg lexer.
+-- Original written by Peter Odding, 2007/04/04.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'lua'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+local longstring = lpeg.Cmt('[' * lpeg.C(P('=')^0) * '[',
+ function(input, index, eq)
+ local _, e = input:find(']'..eq..']', index, true)
+ return (e or #input) + 1
+ end)
+
+-- Comments.
+local line_comment = '--' * l.nonnewline^0
+local block_comment = '--' * longstring
+local comment = token(l.COMMENT, block_comment + line_comment)
+
+-- Strings.
+local sq_str = l.delimited_range("'")
+local dq_str = l.delimited_range('"')
+local string = token(l.STRING, sq_str + dq_str) +
+ token('longstring', longstring)
+
+-- Numbers.
+local lua_integer = P('-')^-1 * (l.hex_num + l.dec_num)
+local number = token(l.NUMBER, l.float + lua_integer)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match{
+ 'and', 'break', 'do', 'else', 'elseif', 'end', 'false', 'for', 'function',
+ 'goto', 'if', 'in', 'local', 'nil', 'not', 'or', 'repeat', 'return', 'then',
+ 'true', 'until', 'while'
+})
+
+-- Functions.
+local func = token(l.FUNCTION, word_match{
+ 'assert', 'collectgarbage', 'dofile', 'error', 'getmetatable', 'ipairs',
+ 'load', 'loadfile', 'next', 'pairs', 'pcall', 'print', 'rawequal', 'rawget',
+ 'rawset', 'require', 'select', 'setmetatable', 'tonumber', 'tostring', 'type',
+ 'xpcall',
+ -- Added in 5.2.
+ 'rawlen'
+})
+
+-- Deprecated functions.
+local deprecated_func = token('deprecated_function', word_match{
+ -- Deprecated in 5.2.
+ 'getfenv', 'loadstring', 'module', 'setfenv', 'unpack'
+})
+
+-- Constants.
+local constant = token(l.CONSTANT, word_match{
+ '_G', '_VERSION',
+ -- Added in 5.2.
+ '_ENV'
+})
+
+-- Libraries.
+local library = token('library', word_match({
+ -- Coroutine.
+ 'coroutine', 'coroutine.create', 'coroutine.resume', 'coroutine.running',
+ 'coroutine.status', 'coroutine.wrap', 'coroutine.yield',
+ -- Coroutine added in 5.3.
+ 'coroutine.isyieldable',
+ -- Module.
+ 'package', 'package.cpath', 'package.loaded', 'package.loadlib',
+ 'package.path', 'package.preload',
+ -- Module added in 5.2.
+ 'package.config', 'package.searchers', 'package.searchpath',
+ -- UTF-8 added in 5.3.
+ 'utf8', 'utf8.char', 'utf8.charpattern', 'utf8.codepoint', 'utf8.codes',
+ 'utf8.len', 'utf8.offset',
+ -- String.
+ 'string', 'string.byte', 'string.char', 'string.dump', 'string.find',
+ 'string.format', 'string.gmatch', 'string.gsub', 'string.len', 'string.lower',
+ 'string.match', 'string.rep', 'string.reverse', 'string.sub', 'string.upper',
+ -- String added in 5.3.
+ 'string.pack', 'string.packsize', 'string.unpack',
+ -- Table.
+ 'table', 'table.concat', 'table.insert', 'table.remove', 'table.sort',
+ -- Table added in 5.2.
+ 'table.pack', 'table.unpack',
+ -- Table added in 5.3.
+ 'table.move',
+ -- Math.
+ 'math', 'math.abs', 'math.acos', 'math.asin', 'math.atan', 'math.ceil',
+ 'math.cos', 'math.deg', 'math.exp', 'math.floor', 'math.fmod', 'math.huge',
+ 'math.log', 'math.max', 'math.min', 'math.modf', 'math.pi', 'math.rad',
+ 'math.random', 'math.randomseed', 'math.sin', 'math.sqrt', 'math.tan',
+ -- Math added in 5.3.
+ 'math.maxinteger', 'math.mininteger', 'math.tointeger', 'math.type',
+ 'math.ult',
+ -- IO.
+ 'io', 'io.close', 'io.flush', 'io.input', 'io.lines', 'io.open', 'io.output',
+ 'io.popen', 'io.read', 'io.stderr', 'io.stdin', 'io.stdout', 'io.tmpfile',
+ 'io.type', 'io.write',
+ -- OS.
+ 'os', 'os.clock', 'os.date', 'os.difftime', 'os.execute', 'os.exit',
+ 'os.getenv', 'os.remove', 'os.rename', 'os.setlocale', 'os.time',
+ 'os.tmpname',
+ -- Debug.
+ 'debug', 'debug.debug', 'debug.gethook', 'debug.getinfo', 'debug.getlocal',
+ 'debug.getmetatable', 'debug.getregistry', 'debug.getupvalue',
+ 'debug.sethook', 'debug.setlocal', 'debug.setmetatable', 'debug.setupvalue',
+ 'debug.traceback',
+ -- Debug added in 5.2.
+ 'debug.getuservalue', 'debug.setuservalue', 'debug.upvalueid',
+ 'debug.upvaluejoin',
+}, '.'))
+
+-- Deprecated libraries.
+local deprecated_library = token('deprecated_library', word_match({
+ -- Module deprecated in 5.2.
+ 'package.loaders', 'package.seeall',
+ -- Table deprecated in 5.2.
+ 'table.maxn',
+ -- Math deprecated in 5.2.
+ 'math.log10',
+ -- Math deprecated in 5.3.
+ 'math.atan2', 'math.cosh', 'math.frexp', 'math.ldexp', 'math.pow',
+ 'math.sinh', 'math.tanh',
+ -- Bit32 deprecated in 5.3.
+ 'bit32', 'bit32.arshift', 'bit32.band', 'bit32.bnot', 'bit32.bor',
+ 'bit32.btest', 'bit32.extract', 'bit32.lrotate', 'bit32.lshift',
+ 'bit32.replace', 'bit32.rrotate', 'bit32.rshift', 'bit32.xor',
+ -- Debug deprecated in 5.2.
+ 'debug.getfenv', 'debug.setfenv'
+}, '.'))
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.word)
+
+-- Labels.
+local label = token(l.LABEL, '::' * l.word * '::')
+
+-- Operators.
+local operator = token(l.OPERATOR, S('+-*/%^#=<>&|~;:,.{}[]()'))
+
+M._rules = {
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'function', func + deprecated_func},
+ {'constant', constant},
+ {'library', library + deprecated_library},
+ {'identifier', identifier},
+ {'string', string},
+ {'comment', comment},
+ {'number', number},
+ {'label', label},
+ {'operator', operator},
+}
+
+M._tokenstyles = {
+ longstring = l.STYLE_STRING,
+ deprecated_function = l.STYLE_FUNCTION..',italics',
+ library = l.STYLE_TYPE,
+ deprecated_library = l.STYLE_TYPE..',italics'
+}
+
+local function fold_longcomment(text, pos, line, s, match)
+ if match == '[' then
+ if line:find('^%[=*%[', s) then return 1 end
+ elseif match == ']' then
+ if line:find('^%]=*%]', s) then return -1 end
+ end
+ return 0
+end
+
+M._foldsymbols = {
+ _patterns = {'%l+', '[%({%)}]', '[%[%]]', '%-%-'},
+ [l.KEYWORD] = {
+ ['if'] = 1, ['do'] = 1, ['function'] = 1, ['end'] = -1, ['repeat'] = 1,
+ ['until'] = -1
+ },
+ [l.COMMENT] = {
+ ['['] = fold_longcomment, [']'] = fold_longcomment,
+ ['--'] = l.fold_line_comments('--')
+ },
+ longstring = {['['] = 1, [']'] = -1},
+ [l.OPERATOR] = {['('] = 1, ['{'] = 1, [')'] = -1, ['}'] = -1}
+}
+
+return M
diff --git a/lexers/makefile.lua b/lexers/makefile.lua
new file mode 100644
index 0000000..480e032
--- /dev/null
+++ b/lexers/makefile.lua
@@ -0,0 +1,108 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Makefile LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'makefile'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local comment = token(l.COMMENT, '#' * l.nonnewline^0)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, P('!')^-1 * l.word_match({
+ -- GNU Make conditionals.
+ 'ifeq', 'ifneq', 'ifdef', 'ifndef', 'else', 'endif',
+ -- Other conditionals.
+ 'if', 'elseif', 'elseifdef', 'elseifndef',
+ -- Directives and other keywords.
+ 'define', 'endef', 'export', 'include', 'override', 'private', 'undefine',
+ 'unexport', 'vpath'
+}, nil, true))
+
+-- Functions.
+local func = token(l.FUNCTION, l.word_match({
+ -- Functions for String Substitution and Analysis.
+ 'subst', 'patsubst', 'strip', 'findstring', 'filter', 'filter-out', 'sort',
+ 'word', 'wordlist', 'words', 'firstword', 'lastword',
+ -- Functions for File Names.
+ 'dir', 'notdir', 'suffix', 'basename', 'addsuffix', 'addprefix', 'join',
+ 'wildcard', 'realpath', 'abspath',
+ -- Functions for Conditionals.
+ 'if', 'or', 'and',
+ -- Miscellaneous Functions.
+ 'foreach', 'call', 'value', 'eval', 'origin', 'flavor', 'shell',
+ -- Functions That Control Make.
+ 'error', 'warning', 'info'
+}), '-')
+
+-- Variables.
+local word_char, assign = l.any - l.space - S(':#=(){}'), S(':+?')^-1 * '='
+local expanded_var = '$' * ('(' * word_char^1 * ')' + '{' * word_char^1 * '}')
+local auto_var = '$' * S('@%<?^+|*')
+local special_var = l.word_match({
+ 'MAKEFILE_LIST', '.DEFAULT_GOAL', 'MAKE_RESTARTS', '.RECIPEPREFIX',
+ '.VARIABLES', '.FEATURES', '.INCLUDE_DIRS',
+ 'GPATH', 'MAKECMDGOALS', 'MAKESHELL', 'SHELL', 'VPATH'
+}, '.') * #(ws^0 * assign)
+local implicit_var = l.word_match{
+ -- Some common variables.
+ 'AR', 'AS', 'CC', 'CXX', 'CPP', 'FC', 'M2C', 'PC', 'CO', 'GET', 'LEX', 'YACC',
+ 'LINT', 'MAKEINFO', 'TEX', 'TEXI2DVI', 'WEAVE', 'CWEAVE', 'TANGLE', 'CTANGLE',
+ 'RM',
+ -- Some common flag variables.
+ 'ARFLAGS', 'ASFLAGS', 'CFLAGS', 'CXXFLAGS', 'COFLAGS', 'CPPFLAGS', 'FFLAGS',
+ 'GFLAGS', 'LDFLAGS', 'LFLAGS', 'YFLAGS', 'PFLAGS', 'RFLAGS', 'LINTFLAGS',
+ -- Other.
+ 'DESTDIR', 'MAKE', 'MAKEFLAGS', 'MAKEOVERRIDES', 'MFLAGS'
+} * #(ws^0 * assign)
+local computed_var = token(l.OPERATOR, '$' * S('({')) * func
+local variable = token(l.VARIABLE,
+ expanded_var + auto_var + special_var + implicit_var) +
+ computed_var
+
+-- Targets.
+local special_target = token(l.CONSTANT, l.word_match({
+ '.PHONY', '.SUFFIXES', '.DEFAULT', '.PRECIOUS', '.INTERMEDIATE', '.SECONDARY',
+ '.SECONDEXPANSION', '.DELETE_ON_ERROR', '.IGNORE', '.LOW_RESOLUTION_TIME',
+ '.SILENT', '.EXPORT_ALL_VARIABLES', '.NOTPARALLEL', '.ONESHELL', '.POSIX'
+}, '.'))
+local normal_target = token('target', (l.any - l.space - S(':#='))^1)
+local target = l.starts_line((special_target + normal_target) * ws^0 *
+ #(':' * -P('=')))
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, word_char^1)
+
+-- Operators.
+local operator = token(l.OPERATOR, assign + S(':$(){}'))
+
+M._rules = {
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'target', target},
+ {'variable', variable},
+ {'operator', operator},
+ {'identifier', identifier},
+ {'comment', comment},
+}
+
+M._tokenstyles = {
+ target = l.STYLE_LABEL
+}
+
+M._LEXBYLINE = true
+
+-- Embedded Bash.
+local bash = l.load('bash')
+bash._RULES['variable'] = token(l.VARIABLE, '$$' * word_char^1) +
+ bash._RULES['variable'] + variable
+local bash_start_rule = token(l.WHITESPACE, P('\t')) + token(l.OPERATOR, P(';'))
+local bash_end_rule = token(l.WHITESPACE, P('\n'))
+l.embed_lexer(M, bash, bash_start_rule, bash_end_rule)
+
+return M
diff --git a/lexers/markdown.lua b/lexers/markdown.lua
new file mode 100644
index 0000000..9ab1568
--- /dev/null
+++ b/lexers/markdown.lua
@@ -0,0 +1,109 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Markdown LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'markdown'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Block elements.
+local header = token('h6', l.starts_line('######') * l.nonnewline^0) +
+ token('h5', l.starts_line('#####') * l.nonnewline^0) +
+ token('h4', l.starts_line('####') * l.nonnewline^0) +
+ token('h3', l.starts_line('###') * l.nonnewline^0) +
+ token('h2', l.starts_line('##') * l.nonnewline^0) +
+ token('h1', l.starts_line('#') * l.nonnewline^0)
+
+local blockquote = token(l.STRING,
+ lpeg.Cmt(l.starts_line(S(' \t')^0 * '>'),
+ function(input, index)
+ local _, e = input:find('\n[ \t]*\r?\n',
+ index)
+ return (e or #input) + 1
+ end))
+
+local blockcode = token('code', l.starts_line(P(' ')^4 + P('\t')) * -P('<') *
+ l.nonnewline^0)
+
+local hr = token('hr', lpeg.Cmt(l.starts_line(S(' \t')^0 * lpeg.C(S('*-_'))),
+ function(input, index, c)
+ local line = input:match('[^\n]*', index)
+ line = line:gsub('[ \t]', '')
+ if line:find('[^'..c..']') or #line < 2 then
+ return nil
+ end
+ return (input:find('\n', index) or #input) + 1
+ end))
+
+-- Span elements.
+local dq_str = token(l.STRING, l.delimited_range('"', false, true))
+local sq_str = token(l.STRING, l.delimited_range("'", false, true))
+local paren_str = token(l.STRING, l.delimited_range('()'))
+local link = token('link', P('!')^-1 * l.delimited_range('[]') *
+ (P('(') * (l.any - S(') \t'))^0 *
+ (S(' \t')^1 *
+ l.delimited_range('"', false, true))^-1 * ')' +
+ S(' \t')^0 * l.delimited_range('[]')) +
+ P('http://') * (l.any - l.space)^1)
+local link_label = token('link_label', l.delimited_range('[]') * ':') * ws *
+ token('link_url', (l.any - l.space)^1) *
+ (ws * (dq_str + sq_str + paren_str))^-1
+
+local strong = token('strong', (P('**') * (l.any - '**')^0 * P('**')^-1) +
+ (P('__') * (l.any - '__')^0 * P('__')^-1))
+local em = token('em',
+ l.delimited_range('*', true) + l.delimited_range('_', true))
+local code = token('code', (P('``') * (l.any - '``')^0 * P('``')^-1) +
+ l.delimited_range('`', true, true))
+
+local escape = token(l.DEFAULT, P('\\') * 1)
+
+local list = token('list',
+ l.starts_line(S(' \t')^0 * (S('*+-') + R('09')^1 * '.')) *
+ S(' \t'))
+
+M._rules = {
+ {'header', header},
+ {'blockquote', blockquote},
+ {'blockcode', blockcode},
+ {'hr', hr},
+ {'list', list},
+ {'whitespace', ws},
+ {'link_label', link_label},
+ {'escape', escape},
+ {'link', link},
+ {'strong', strong},
+ {'em', em},
+ {'code', code},
+}
+
+local font_size = 10
+local hstyle = 'fore:$(color.red)'
+M._tokenstyles = {
+ h6 = hstyle,
+ h5 = hstyle..',size:'..(font_size + 1),
+ h4 = hstyle..',size:'..(font_size + 2),
+ h3 = hstyle..',size:'..(font_size + 3),
+ h2 = hstyle..',size:'..(font_size + 4),
+ h1 = hstyle..',size:'..(font_size + 5),
+ code = l.STYLE_EMBEDDED..',eolfilled',
+ hr = 'back:$(color.black),eolfilled',
+ link = 'underlined',
+ link_url = 'underlined',
+ link_label = l.STYLE_LABEL,
+ strong = 'bold',
+ em = 'italics',
+ list = l.STYLE_CONSTANT,
+}
+
+-- Embedded HTML.
+local html = l.load('html')
+local start_rule = token('tag', l.starts_line(S(' \t')^0 * '<'))
+local end_rule = token(l.DEFAULT, P('\n')) -- TODO: l.WHITESPACE causes errors
+l.embed_lexer(M, html, start_rule, end_rule)
+
+return M
diff --git a/lexers/matlab.lua b/lexers/matlab.lua
new file mode 100644
index 0000000..5385a41
--- /dev/null
+++ b/lexers/matlab.lua
@@ -0,0 +1,105 @@
+-- Copyright 2006-2013 Martin Morawetz. See LICENSE.
+-- Matlab LPeg lexer.
+-- Based off of lexer code by Mitchell.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'matlab'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local line_comment = (P('%') + '#') * l.nonnewline^0
+local block_comment = '%{' * (l.any - '%}')^0 * P('%}')^-1
+local comment = token(l.COMMENT, block_comment + line_comment)
+
+-- Strings.
+local sq_str = l.delimited_range("'", true)
+local dq_str = l.delimited_range('"')
+local bt_str = l.delimited_range('`')
+local string = token(l.STRING, sq_str + dq_str + bt_str)
+
+-- Numbers.
+local number = token(l.NUMBER, l.float + l.integer + l.dec_num + l.hex_num +
+ l.oct_num)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match({
+ 'break', 'case', 'catch', 'continue', 'do', 'else', 'elseif', 'end',
+ 'end_try_catch', 'end_unwind_protect', 'endfor', 'endif', 'endswitch',
+ 'endwhile', 'for', 'function', 'endfunction', 'global', 'if', 'otherwise',
+ 'persistent', 'replot', 'return', 'static', 'switch', 'try', 'until',
+ 'unwind_protect', 'unwind_protect_cleanup', 'varargin', 'varargout', 'while'
+}, nil, true))
+
+-- Functions.
+local func = token(l.FUNCTION, word_match{
+ 'abs', 'any', 'argv','atan2', 'axes', 'axis', 'ceil', 'cla', 'clear', 'clf',
+ 'columns', 'cos', 'delete', 'diff', 'disp', 'doc', 'double', 'drawnow', 'exp',
+ 'figure', 'find', 'fix', 'floor', 'fprintf', 'gca', 'gcf', 'get', 'grid',
+ 'help', 'hist', 'hold', 'isempty', 'isnull', 'length', 'load', 'log', 'log10',
+ 'loglog', 'max', 'mean', 'median', 'min', 'mod', 'ndims', 'numel', 'num2str',
+ 'ones', 'pause', 'plot', 'printf', 'quit', 'rand', 'randn', 'rectangle',
+ 'rem', 'repmat', 'reshape', 'round', 'rows', 'save', 'semilogx', 'semilogy',
+ 'set', 'sign', 'sin', 'size', 'sizeof', 'size_equal', 'sort', 'sprintf',
+ 'squeeze', 'sqrt', 'std', 'strcmp', 'subplot', 'sum', 'tan', 'tic', 'title',
+ 'toc', 'uicontrol', 'who', 'xlabel', 'ylabel', 'zeros'
+})
+
+-- Constants.
+local constant = token(l.CONSTANT, word_match{
+ 'EDITOR', 'I', 'IMAGEPATH', 'INFO_FILE', 'J', 'LOADPATH', 'OCTAVE_VERSION',
+ 'PAGER', 'PS1', 'PS2', 'PS4', 'PWD'
+})
+
+-- Variable.
+local variable = token(l.VARIABLE, word_match{
+ 'ans', 'automatic_replot', 'default_return_value', 'do_fortran_indexing',
+ 'define_all_return_values', 'empty_list_elements_ok', 'eps', 'false',
+ 'gnuplot_binary', 'ignore_function_time_stamp', 'implicit_str_to_num_ok',
+ 'Inf', 'inf', 'NaN', 'nan', 'ok_to_lose_imaginary_part',
+ 'output_max_field_width', 'output_precision', 'page_screen_output', 'pi',
+ 'prefer_column_vectors', 'prefer_zero_one_indexing', 'print_answer_id_name',
+ 'print_empty_dimensions', 'realmax', 'realmin', 'resize_on_range_error',
+ 'return_last_computed_value', 'save_precision', 'silent_functions',
+ 'split_long_rows', 'suppress_verbose_help_message', 'treat_neg_dim_as_zero',
+ 'true', 'warn_assign_as_truth_value', 'warn_comma_in_global_decl',
+ 'warn_divide_by_zero', 'warn_function_name_clash',
+ 'whitespace_in_literal_matrix'
+})
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.word)
+
+-- Operators.
+local operator = token(l.OPERATOR, S('!%^&*()[]{}-=+/\\|:;.,?<>~`´'))
+
+M._rules = {
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'function', func},
+ {'constant', constant},
+ {'variable', variable},
+ {'identifier', identifier},
+ {'string', string},
+ {'comment', comment},
+ {'number', number},
+ {'operator', operator},
+}
+
+M._foldsymbols = {
+ _patterns = {'[a-z]+', '[%(%)%[%]]', '%%[{}]?', '#'},
+ [l.KEYWORD] = {
+ ['if'] = 1, ['for'] = 1, ['while'] = 1, switch = 1, ['end'] = -1
+ },
+ [l.OPERATOR] = {['('] = 1, [')'] = -1, ['['] = 1, [']'] = -1},
+ [l.COMMENT] = {
+ ['%{'] = 1, ['%}'] = -1, ['%'] = l.fold_line_comments('%'),
+ ['#'] = l.fold_line_comments('#')
+ }
+}
+
+return M
diff --git a/lexers/nemerle.lua b/lexers/nemerle.lua
new file mode 100644
index 0000000..d628c8a
--- /dev/null
+++ b/lexers/nemerle.lua
@@ -0,0 +1,81 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Nemerle LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'nemerle'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local line_comment = '//' * l.nonnewline_esc^0
+local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1
+local comment = token(l.COMMENT, line_comment + block_comment)
+
+-- Strings.
+local sq_str = P('L')^-1 * l.delimited_range("'", true)
+local dq_str = P('L')^-1 * l.delimited_range('"', true)
+local string = token(l.STRING, sq_str + dq_str)
+
+-- Numbers.
+local number = token(l.NUMBER, l.float + l.integer)
+
+-- Preprocessor.
+local preproc_word = word_match{
+ 'define', 'elif', 'else', 'endif', 'endregion', 'error', 'if', 'ifdef',
+ 'ifndef', 'line', 'pragma', 'region', 'undef', 'using', 'warning'
+}
+local preproc = token(l.PREPROCESSOR,
+ l.starts_line('#') * S('\t ')^0 * preproc_word)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match{
+ '_', 'abstract', 'and', 'array', 'as', 'base', 'catch', 'class', 'def', 'do',
+ 'else', 'extends', 'extern', 'finally', 'foreach', 'for', 'fun', 'if',
+ 'implements', 'in', 'interface', 'internal', 'lock', 'macro', 'match',
+ 'module', 'mutable', 'namespace', 'new', 'out', 'override', 'params',
+ 'private', 'protected', 'public', 'ref', 'repeat', 'sealed', 'static',
+ 'struct', 'syntax', 'this', 'throw', 'try', 'type', 'typeof', 'unless',
+ 'until', 'using', 'variant', 'virtual', 'when', 'where', 'while',
+ -- Values.
+ 'null', 'true', 'false'
+})
+
+-- Types.
+local type = token(l.TYPE, word_match{
+ 'bool', 'byte', 'char', 'decimal', 'double', 'float', 'int', 'list', 'long',
+ 'object', 'sbyte', 'short', 'string', 'uint', 'ulong', 'ushort', 'void'
+})
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.word)
+
+-- Operators.
+local operator = token(l.OPERATOR, S('+-/*%<>!=^&|?~:;.()[]{}'))
+
+M._rules = {
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'type', type},
+ {'identifier', identifier},
+ {'string', string},
+ {'comment', comment},
+ {'number', number},
+ {'preproc', preproc},
+ {'operator', operator},
+}
+
+M._foldsymbols = {
+ _patterns = {'%l+', '[{}]', '/%*', '%*/', '//'},
+ [l.PREPROCESSOR] = {
+ region = 1, endregion = -1,
+ ['if'] = 1, ifdef = 1, ifndef = 1, endif = -1
+ },
+ [l.OPERATOR] = {['{'] = 1, ['}'] = -1},
+ [l.COMMENT] = {['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//')}
+}
+
+return M
diff --git a/lexers/nim.lua b/lexers/nim.lua
new file mode 100644
index 0000000..d830774
--- /dev/null
+++ b/lexers/nim.lua
@@ -0,0 +1,124 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Nim LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'nim'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local comment = token(l.COMMENT, '#' * l.nonnewline_esc^0)
+
+-- Strings.
+local sq_str = l.delimited_range("'", true)
+local dq_str = l.delimited_range('"', true)
+local triple_dq_str = '"""' * (l.any - '"""')^0 * P('"""')^-1
+local raw_dq_str = 'r' * l.delimited_range('"', false, true)
+local string = token(l.STRING, triple_dq_str + sq_str + dq_str + raw_dq_str)
+
+-- Numbers.
+local dec = l.digit^1 * ('_' * l.digit^1)^0
+local hex = '0' * S('xX') * l.xdigit^1 * ('_' * l.xdigit^1)^0
+local bin = '0' * S('bB') * S('01')^1 * ('_' * S('01')^1)^0
+local oct = '0o' * R('07')^1
+local integer = S('+-')^-1 * (bin + hex + oct + dec) *
+ ("'" * S('iIuUfF') * (P('8') + '16' + '32' + '64'))^-1
+local float = l.digit^1 * ('_' * l.digit^1)^0 * ('.' * ('_' * l.digit)^0)^-1 *
+ S('eE') * S('+-')^-1 * l.digit^1 * ('_' * l.digit^1)^0
+local number = token(l.NUMBER, l.float + integer)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match({
+ 'addr', 'and', 'as', 'asm', 'atomic', 'bind', 'block', 'break', 'case',
+ 'cast', 'const', 'continue', 'converter', 'discard', 'distinct', 'div', 'do',
+ 'elif', 'else', 'end', 'enum', 'except', 'export', 'finally', 'for', 'from',
+ 'generic', 'if', 'import', 'in', 'include', 'interface', 'is', 'isnot',
+ 'iterator', 'lambda', 'let', 'macro', 'method', 'mixin', 'mod', 'nil', 'not',
+ 'notin', 'object', 'of', 'or', 'out', 'proc', 'ptr', 'raise', 'ref', 'return',
+ 'shared', 'shl', 'static', 'template', 'try', 'tuple', 'type', 'var', 'when',
+ 'while', 'with', 'without', 'xor', 'yield'
+}, nil, true))
+
+-- Functions.
+local func = token(l.FUNCTION, word_match({
+ -- Procs.
+ 'defined', 'definedInScope', 'new', 'unsafeNew', 'internalNew', 'reset',
+ 'high', 'low', 'sizeof', 'succ', 'pred', 'inc', 'dec', 'newSeq', 'len',
+ 'incl', 'excl', 'card', 'ord', 'chr', 'ze', 'ze64', 'toU8', 'toU16', 'toU32',
+ 'abs', 'min', 'max', 'contains', 'cmp', 'setLen', 'newString',
+ 'newStringOfCap', 'add', 'compileOption', 'quit', 'shallowCopy', 'del',
+ 'delete', 'insert', 'repr', 'toFloat', 'toBiggestFloat', 'toInt',
+ 'toBiggestInt', 'addQuitProc', 'substr', 'zeroMem', 'copyMem', 'moveMem',
+ 'equalMem', 'swap', 'getRefcount', 'clamp', 'isNil', 'find', 'contains',
+ 'pop', 'each', 'map', 'GC_ref', 'GC_unref', 'echo', 'debugEcho',
+ 'getTypeInfo', 'Open', 'repopen', 'Close', 'EndOfFile', 'readChar',
+ 'FlushFile', 'readAll', 'readFile', 'writeFile', 'write', 'readLine',
+ 'writeln', 'getFileSize', 'ReadBytes', 'ReadChars', 'readBuffer',
+ 'writeBytes', 'writeChars', 'writeBuffer', 'setFilePos', 'getFilePos',
+ 'fileHandle', 'cstringArrayToSeq', 'allocCStringArray', 'deallocCStringArray',
+ 'atomicInc', 'atomicDec', 'compareAndSwap', 'setControlCHook',
+ 'writeStackTrace', 'getStackTrace', 'alloc', 'alloc0', 'dealloc', 'realloc',
+ 'getFreeMem', 'getTotalMem', 'getOccupiedMem', 'allocShared', 'allocShared0',
+ 'deallocShared', 'reallocShared', 'IsOnStack', 'GC_addCycleRoot',
+ 'GC_disable', 'GC_enable', 'GC_setStrategy', 'GC_enableMarkAndSweep',
+ 'GC_disableMarkAndSweep', 'GC_fullCollect', 'GC_getStatistics',
+ 'nimDestroyRange', 'getCurrentException', 'getCurrentExceptionMsg', 'onRaise',
+ 'likely', 'unlikely', 'rawProc', 'rawEnv', 'finished', 'slurp', 'staticRead',
+ 'gorge', 'staticExec', 'rand', 'astToStr', 'InstatiationInfo', 'raiseAssert',
+ 'shallow', 'compiles', 'safeAdd', 'locals',
+ -- Iterators.
+ 'countdown', 'countup', 'items', 'pairs', 'fields', 'fieldPairs', 'lines',
+ -- Templates.
+ 'accumulateResult', 'newException', 'CurrentSourcePath', 'assert', 'doAssert',
+ 'onFailedAssert', 'eval',
+ -- Threads.
+ 'running', 'joinThread', 'joinThreads', 'createThread', 'threadId',
+ 'myThreadId',
+ -- Channels.
+ 'send', 'recv', 'peek', 'ready'
+}, nil, true))
+
+-- Types.
+local type = token(l.TYPE , word_match({
+ 'int', 'int8', 'int16', 'int32', 'int64', 'uint', 'uint8', 'uint16', 'uint32',
+ 'uint64', 'float', 'float32', 'float64', 'bool', 'char', 'string', 'cstring',
+ 'pointer', 'Ordinal', 'auto', 'any', 'TSignedInt', 'TUnsignedInt', 'TInteger',
+ 'TOrdinal', 'TReal', 'TNumber', 'range', 'array', 'openarray', 'varargs',
+ 'seq', 'set', 'TSlice', 'TThread', 'TChannel',
+ -- Meta Types.
+ 'expr', 'stmt', 'typeDesc', 'void',
+}, nil, true))
+
+-- Constants.
+local constant = token(l.CONSTANT, word_match{
+ 'on', 'off', 'isMainModule', 'CompileDate', 'CompileTime', 'NimVersion',
+ 'NimMajor', 'NimMinor', 'NimPatch', 'cpuEndian', 'hostOS', 'hostCPU',
+ 'appType', 'QuitSuccess', 'QuitFailure', 'inf', 'neginf', 'nan'
+})
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.word)
+
+-- Operators.
+local operator = token(l.OPERATOR, S('=+-*/<>@$~&%|!?^.:\\`()[]{},;'))
+
+M._rules = {
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'function', func},
+ {'type', type},
+ {'constant', constant},
+ {'identifier', identifier},
+ {'comment', comment},
+ {'string', string},
+ {'number', number},
+ {'operator', operator},
+}
+
+M._FOLDBYINDENTATION = true
+
+return M
diff --git a/lexers/nsis.lua b/lexers/nsis.lua
new file mode 100644
index 0000000..184858a
--- /dev/null
+++ b/lexers/nsis.lua
@@ -0,0 +1,182 @@
+-- Copyright 2006-2013 Robert Gieseke. See LICENSE.
+-- NSIS LPeg lexer
+-- Based on NSIS 2.46 docs: http://nsis.sourceforge.net/Docs/.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'nsis'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments (4.1).
+local line_comment = (';' * l.nonnewline^0) + ('#' * l.nonnewline^0)
+local block_comment = '/*' * (l.any - '*/')^0 * '*/'
+local comment = token(l.COMMENT, line_comment + block_comment)
+
+-- Strings.
+local sq_str = l.delimited_range("'")
+local dq_str = l.delimited_range('"')
+local ex_str = l.delimited_range('`')
+local string = token(l.STRING, sq_str + dq_str + ex_str)
+
+-- Numbers.
+local number = token(l.NUMBER, l.integer)
+
+-- Variables (4.2).
+local variable = token(l.VARIABLE, word_match({
+ '$0', '$1', '$2', '$3', '$4', '$5', '$6', '$7', '$8', '$9',
+ '$R0', '$R1', '$R2', '$R3', '$R4', '$R5', '$R6', '$R7', '$R8', '$R9',
+ '$INSTDIR', '$OUTDIR', '$CMDLINE', '$LANGUAGE',
+ 'Var', '/GLOBAL'
+}, '$/') + ('$' * l.word))
+
+-- Constants (4.2.3).
+local constant = token(l.CONSTANT, word_match({
+ '$PROGRAMFILES', '$PROGRAMFILES32', '$PROGRAMFILES64',
+ '$COMMONFILES', '$COMMONFILES32', '$COMMONFILES64',
+ '$DESKTOP', '$EXEDIR', '$EXEFILE', '$EXEPATH', '${NSISDIR}', '$WINDIR',
+ '$SYSDIR', '$TEMP', '$STARTMENU', '$SMPROGRAMS', '$SMSTARTUP',
+ '$QUICKLAUNCH','$DOCUMENTS', '$SENDTO', '$RECENT', '$FAVORITES', '$MUSIC',
+ '$PICTURES', '$VIDEOS', '$NETHOOD', '$FONTS', '$TEMPLATES', '$APPDATA',
+ '$LOCALAPPDATA', '$PRINTHOOD', '$INTERNET_CACHE', '$COOKIES', '$HISTORY',
+ '$PROFILE', '$ADMINTOOLS', '$RESOURCES', '$RESOURCES_LOCALIZED',
+ '$CDBURN_AREA', '$HWNDPARENT', '$PLUGINSDIR',
+}, '$_{}'))
+-- TODO? Constants used in strings: $$ $\r $\n $\t
+
+-- Labels (4.3).
+local label = token(l.LABEL, l.word * ':')
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match({
+-- Pages (4.5).
+ 'Page', 'UninstPage', 'PageEx', 'PageEnd', 'PageExEnd',
+-- Section commands (4.6).
+ 'AddSize', 'Section', 'SectionEnd', 'SectionIn', 'SectionGroup',
+ 'SectionGroupEnd',
+-- Functions (4.7).
+ 'Function', 'FunctionEnd',
+-- Callbacks (4.7.2).
+ '.onGUIInit', '.onInit', '.onInstFailed', '.onInstSuccess', '.onGUIEnd',
+ '.onMouseOverSection', '.onRebootFailed', '.onSelChange', '.onUserAbort',
+ '.onVerifyInstDir', 'un.onGUIInit', 'un.onInit', 'un.onUninstFailed',
+ 'un.onUninstSuccess', 'un.onGUIEnd', 'un.onRebootFailed', 'un.onSelChange',
+ 'un.onUserAbort',
+-- General Attributes (4.8.1).
+ 'AddBrandingImage', 'AllowRootDirInstall', 'AutoCloseWindow', 'BGFont',
+ 'BGFont', 'BrandingText', '/TRIMLEFT', '/TRIMRIGHT', '/TRIMCENTER', 'Caption',
+ 'ChangeUI', 'CheckBitmap', 'CompletedText', 'ComponentText', 'CRCCheck',
+ 'DetailsButtonText', 'DirText', 'DirVar', 'DirVerify', 'FileErrorText',
+ 'Icon', 'InstallButtonText', 'InstallColors', 'InstallDir',
+ 'InstallDirRegKey', 'InstProgressFlags', 'InstType', 'LicenseBkColor',
+ 'LicenseData', 'LicenseForceSelection', 'LicenseText', 'MiscButtonText',
+ 'Name', 'OutFile', 'RequestExecutionLevel', 'SetFont', 'ShowInstDetails',
+ 'ShowUninstDetails', 'SilentInstall', 'SilentUnInstall', 'SpaceTexts',
+ 'SubCaption', 'UninstallButtonText', 'UninstallCaption', 'UninstallIcon',
+ 'UninstallSubCaption', 'UninstallText', 'WindowIcon', 'XPStyle', 'admin',
+ 'auto', 'bottom', 'checkbox', 'false', 'force', 'height', 'hide', 'highest',
+ 'leave', 'left', 'nevershow', 'none', 'normal', 'off', 'on', 'radiobuttons',
+ 'right', 'show', 'silent', 'silentlog', 'top', 'true', 'user', 'width',
+-- Compiler Flags (4.8.2).
+ 'AllowSkipFiles', 'FileBufSize', 'SetCompress', 'SetCompressor',
+ '/SOLID', '/FINAL', 'zlib', 'bzip2', 'lzma', 'SetCompressorDictSize',
+ 'SetDatablockOptimize', 'SetDateSave', 'SetOverwrite', 'ifnewer', 'ifdiff',
+ 'lastused', 'try',
+-- Version Information (4.8.3).
+ 'VIAddVersionKey', 'VIProductVersion', '/LANG',
+ 'ProductName', 'Comments', 'CompanyName', 'LegalCopyright', 'FileDescription',
+ 'FileVersion', 'ProductVersion', 'InternalName', 'LegalTrademarks',
+ 'OriginalFilename', 'PrivateBuild', 'SpecialBuild',
+-- Basic Instructions (4.9.1).
+ 'Delete', '/REBOOTOK', 'Exec', 'ExecShell', 'ExecShell', 'File', '/nonfatal',
+ 'Rename', 'ReserveFile', 'RMDir', 'SetOutPath',
+-- Registry, INI, File Instructions (4.9.2).
+ 'DeleteINISec', 'DeleteINIStr', 'DeleteRegKey', '/ifempty',
+ 'DeleteRegValue', 'EnumRegKey', 'EnumRegValue', 'ExpandEnvStrings',
+ 'FlushINI', 'ReadEnvStr', 'ReadINIStr', 'ReadRegDWORD', 'ReadRegStr',
+ 'WriteINIStr', 'WriteRegBin', 'WriteRegDWORD', 'WriteRegStr',
+ 'WriteRegExpandStr', 'HKCR', 'HKEY_CLASSES_ROOT', 'HKLM', 'HKEY_LOCAL_MACHINE',
+ 'HKCU', 'HKEY_CURRENT_USER', 'HKU', 'HKEY_USERS', 'HKCC',
+ 'HKEY_CURRENT_CONFIG', 'HKDD', 'HKEY_DYN_DATA', 'HKPD',
+ 'HKEY_PERFORMANCE_DATA', 'SHCTX', 'SHELL_CONTEXT',
+
+-- General Purpose Instructions (4.9.3).
+ 'CallInstDLL', 'CopyFiles',
+ '/SILENT', '/FILESONLY', 'CreateDirectory', 'CreateShortCut', 'GetDLLVersion',
+ 'GetDLLVersionLocal', 'GetFileTime', 'GetFileTimeLocal', 'GetFullPathName',
+ '/SHORT', 'GetTempFileName', 'SearchPath', 'SetFileAttributes', 'RegDLL',
+ 'UnRegDLL',
+-- Flow Control Instructions (4.9.4).
+ 'Abort', 'Call', 'ClearErrors', 'GetCurrentAddress', 'GetFunctionAddress',
+ 'GetLabelAddress', 'Goto', 'IfAbort', 'IfErrors', 'IfFileExists',
+ 'IfRebootFlag', 'IfSilent', 'IntCmp', 'IntCmpU', 'MessageBox', 'MB_OK',
+ 'MB_OKCANCEL', 'MB_ABORTRETRYIGNORE', 'MB_RETRYCANCEL', 'MB_YESNO',
+ 'MB_YESNOCANCEL', 'MB_ICONEXCLAMATION', 'MB_ICONINFORMATION',
+ 'MB_ICONQUESTION', 'MB_ICONSTOP', 'MB_USERICON', 'MB_TOPMOST',
+ 'MB_SETFOREGROUND', 'MB_RIGHT', 'MB_RTLREADING', 'MB_DEFBUTTON1',
+ 'MB_DEFBUTTON2', 'MB_DEFBUTTON3', 'MB_DEFBUTTON4', 'IDABORT', 'IDCANCEL',
+ 'IDIGNORE', 'IDNO', 'IDOK', 'IDRETRY', 'IDYES', 'Return', 'Quit', 'SetErrors',
+ 'StrCmp', 'StrCmpS',
+-- File Instructions (4.9.5).
+ 'FileClose', 'FileOpen', 'FileRead', 'FileReadByte', 'FileSeek', 'FileWrite',
+ 'FileWriteByte', 'FindClose', 'FindFirst', 'FindNext',
+-- Uninstaller Instructions (4.9.6).
+ 'WriteUninstaller',
+-- Miscellaneous Instructions (4.9.7).
+ 'GetErrorLevel', 'GetInstDirError', 'InitPluginsDir', 'Nop', 'SetErrorLevel',
+ 'SetRegView', 'SetShellVarContext', 'all', 'current', 'Sleep',
+-- String Manipulation Instructions (4.9.8).
+ 'StrCpy', 'StrLen',
+-- Stack Support (4.9.9).
+ 'Exch', 'Pop', 'Push',
+-- Integer Support (4.9.10).
+ 'IntFmt', 'IntOp',
+-- Reboot Instructions (4.9.11).
+ 'Reboot', 'SetRebootFlag',
+-- Install Logging Instructions (4.9.12).
+ 'LogSet', 'LogText',
+-- Section Management (4.9.13).
+ 'SectionSetFlags', 'SectionGetFlags', 'SectionGetFlags',
+ 'SectionSetText', 'SectionGetText', 'SectionSetInstTypes',
+ 'SectionGetInstTypes', 'SectionSetSize', 'SectionGetSize', 'SetCurInstType',
+ 'GetCurInstType', 'InstTypeSetText', 'InstTypeGetText',
+-- User Interface Instructions (4.9.14).
+ 'BringToFront', 'CreateFont', 'DetailPrint', 'EnableWindow', 'FindWindow',
+ 'GetDlgItem', 'HideWindow', 'IsWindow', 'LockWindow', 'SendMessage',
+ 'SetAutoClose', 'SetBrandingImage', 'SetDetailsView', 'SetDetailsPrint',
+ 'listonly','textonly', 'both', 'SetCtlColors', '/BRANDING', 'SetSilent',
+ 'ShowWindow',
+-- Multiple Languages Instructions (4.9.15).
+ 'LoadLanguageFile', 'LangString', 'LicenseLangString',
+-- Compile time commands (5).
+ '!include', '!addincludedir', '!addplugindir', '!appendfile', '!cd',
+ '!delfile', '!echo', '!error', '!execute', '!packhdr', '!system', '!tempfile',
+ '!warning', '!verbose', '{__FILE__}', '{__LINE__}', '{__DATE__}',
+ '{__TIME__}', '{__TIMESTAMP__}', '{NSIS_VERSION}', '!define', '!undef',
+ '!ifdef', '!ifndef', '!if', '!ifmacrodef', '!ifmacrondef', '!else', '!endif',
+ '!insertmacro', '!macro', '!macroend', '!searchparse', '!searchreplace',
+}, '/!.{}_'))
+
+-- Operators.
+local operator = token(l.OPERATOR, S('+-*/%|&^~!<>'))
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.word)
+
+M._rules = {
+ {'whitespace', ws},
+ {'comment', comment},
+ {'string', string},
+ {'constant', constant},
+ {'variable', variable},
+ {'keyword', keyword},
+ {'number', number},
+ {'operator', operator},
+ {'label', label},
+ {'identifier', identifier},
+}
+
+return M
diff --git a/lexers/null.lua b/lexers/null.lua
new file mode 100644
index 0000000..d5a1131
--- /dev/null
+++ b/lexers/null.lua
@@ -0,0 +1,6 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Null LPeg lexer.
+
+local M = {_NAME = 'null'}
+
+return M
diff --git a/lexers/objective_c.lua b/lexers/objective_c.lua
new file mode 100644
index 0000000..db8a5a2
--- /dev/null
+++ b/lexers/objective_c.lua
@@ -0,0 +1,87 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Objective C LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'objective_c'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local line_comment = '//' * l.nonnewline_esc^0
+local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1
+local comment = token(l.COMMENT, line_comment + block_comment)
+
+-- Strings.
+local sq_str = P('L')^-1 * l.delimited_range("'", true)
+local dq_str = P('L')^-1 * l.delimited_range('"', true)
+local string = token(l.STRING, sq_str + dq_str)
+
+-- Numbers.
+local number = token(l.NUMBER, l.float + l.integer)
+
+-- Preprocessor.
+local preproc_word = word_match{
+ 'define', 'elif', 'else', 'endif', 'error', 'if', 'ifdef',
+ 'ifndef', 'import', 'include', 'line', 'pragma', 'undef',
+ 'warning'
+}
+local preproc = token(l.PREPROCESSOR,
+ l.starts_line('#') * S('\t ')^0 * preproc_word *
+ (l.nonnewline_esc^1 + l.space * l.nonnewline_esc^0))
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match({
+ -- From C.
+ 'asm', 'auto', 'break', 'case', 'const', 'continue', 'default', 'do', 'else',
+ 'extern', 'false', 'for', 'goto', 'if', 'inline', 'register', 'return',
+ 'sizeof', 'static', 'switch', 'true', 'typedef', 'void', 'volatile', 'while',
+ 'restrict', '_Bool', '_Complex', '_Pragma', '_Imaginary',
+ -- Objective C.
+ 'oneway', 'in', 'out', 'inout', 'bycopy', 'byref', 'self', 'super',
+ -- Preprocessor directives.
+ '@interface', '@implementation', '@protocol', '@end', '@private',
+ '@protected', '@public', '@class', '@selector', '@encode', '@defs',
+ '@synchronized', '@try', '@throw', '@catch', '@finally',
+ -- Constants.
+ 'TRUE', 'FALSE', 'YES', 'NO', 'NULL', 'nil', 'Nil', 'METHOD_NULL'
+}, '@'))
+
+-- Types.
+local type = token(l.TYPE, word_match{
+ 'apply_t', 'id', 'Class', 'MetaClass', 'Object', 'Protocol', 'retval_t',
+ 'SEL', 'STR', 'IMP', 'BOOL', 'TypedStream'
+})
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.word)
+
+-- Operators.
+local operator = token(l.OPERATOR, S('+-/*%<>!=^&|?~:;.()[]{}'))
+
+M._rules = {
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'type', type},
+ {'string', string},
+ {'identifier', identifier},
+ {'comment', comment},
+ {'number', number},
+ {'preproc', preproc},
+ {'operator', operator},
+}
+
+M._foldsymbols = {
+ _patterns = {'%l+', '[{}]', '/%*', '%*/', '//'},
+ [l.PREPROCESSOR] = {
+ region = 1, endregion = -1,
+ ['if'] = 1, ifdef = 1, ifndef = 1, endif = -1
+ },
+ [l.OPERATOR] = {['{'] = 1, ['}'] = -1},
+ [l.COMMENT] = {['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//')}
+}
+
+return M
diff --git a/lexers/pascal.lua b/lexers/pascal.lua
new file mode 100644
index 0000000..20a7800
--- /dev/null
+++ b/lexers/pascal.lua
@@ -0,0 +1,78 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Pascal LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'pascal'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local line_comment = '//' * l.nonnewline_esc^0
+local bblock_comment = '{' * (l.any - '}')^0 * P('}')^-1
+local pblock_comment = '(*' * (l.any - '*)')^0 * P('*)')^-1
+local comment = token(l.COMMENT, line_comment + bblock_comment + pblock_comment)
+
+-- Strings.
+local string = token(l.STRING, S('uUrR')^-1 *
+ l.delimited_range("'", true, true))
+
+-- Numbers.
+local number = token(l.NUMBER, (l.float + l.integer) * S('LlDdFf')^-1)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match({
+ 'and', 'array', 'as', 'at', 'asm', 'begin', 'case', 'class', 'const',
+ 'constructor', 'destructor', 'dispinterface', 'div', 'do', 'downto', 'else',
+ 'end', 'except', 'exports', 'file', 'final', 'finalization', 'finally', 'for',
+ 'function', 'goto', 'if', 'implementation', 'in', 'inherited',
+ 'initialization', 'inline', 'interface', 'is', 'label', 'mod', 'not',
+ 'object', 'of', 'on', 'or', 'out', 'packed', 'procedure', 'program',
+ 'property', 'raise', 'record', 'repeat', 'resourcestring', 'set', 'sealed',
+ 'shl', 'shr', 'static', 'string', 'then', 'threadvar', 'to', 'try', 'type',
+ 'unit', 'unsafe', 'until', 'uses', 'var', 'while', 'with', 'xor',
+ 'absolute', 'abstract', 'assembler', 'automated', 'cdecl', 'contains',
+ 'default', 'deprecated', 'dispid', 'dynamic', 'export', 'external', 'far',
+ 'forward', 'implements', 'index', 'library', 'local', 'message', 'name',
+ 'namespaces', 'near', 'nodefault', 'overload', 'override', 'package',
+ 'pascal', 'platform', 'private', 'protected', 'public', 'published', 'read',
+ 'readonly', 'register', 'reintroduce', 'requires', 'resident', 'safecall',
+ 'stdcall', 'stored', 'varargs', 'virtual', 'write', 'writeln', 'writeonly',
+ 'false', 'nil', 'self', 'true'
+}, nil, true))
+
+-- Functions.
+local func = token(l.FUNCTION, word_match({
+ 'chr', 'ord', 'succ', 'pred', 'abs', 'round', 'trunc', 'sqr', 'sqrt',
+ 'arctan', 'cos', 'sin', 'exp', 'ln', 'odd', 'eof', 'eoln'
+}, nil, true))
+
+-- Types.
+local type = token(l.TYPE, word_match({
+ 'shortint', 'byte', 'char', 'smallint', 'integer', 'word', 'longint',
+ 'cardinal', 'boolean', 'bytebool', 'wordbool', 'longbool', 'real', 'single',
+ 'double', 'extended', 'comp', 'currency', 'pointer'
+}, nil, true))
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.word)
+
+-- Operators.
+local operator = token(l.OPERATOR, S('.,;^@:=<>+-/*()[]'))
+
+M._rules = {
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'function', func},
+ {'type', type},
+ {'string', string},
+ {'identifier', identifier},
+ {'comment', comment},
+ {'number', number},
+ {'operator', operator},
+}
+
+return M
diff --git a/lexers/perl.lua b/lexers/perl.lua
new file mode 100644
index 0000000..a80248c
--- /dev/null
+++ b/lexers/perl.lua
@@ -0,0 +1,161 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Perl LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V
+
+local M = {_NAME = 'perl'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local line_comment = '#' * l.nonnewline_esc^0
+local block_comment = l.starts_line('=') * l.alpha *
+ (l.any - l.newline * '=cut')^0 * (l.newline * '=cut')^-1
+local comment = token(l.COMMENT, block_comment + line_comment)
+
+local delimiter_matches = {['('] = ')', ['['] = ']', ['{'] = '}', ['<'] = '>'}
+local literal_delimitted = P(function(input, index) -- for single delimiter sets
+ local delimiter = input:sub(index, index)
+ if not delimiter:find('%w') then -- only non alpha-numerics
+ local match_pos, patt
+ if delimiter_matches[delimiter] then
+ -- Handle nested delimiter/matches in strings.
+ local s, e = delimiter, delimiter_matches[delimiter]
+ patt = l.delimited_range(s..e, false, false, true)
+ else
+ patt = l.delimited_range(delimiter)
+ end
+ match_pos = lpeg.match(patt, input, index)
+ return match_pos or #input + 1
+ end
+end)
+local literal_delimitted2 = P(function(input, index) -- for 2 delimiter sets
+ local delimiter = input:sub(index, index)
+ -- Only consider non-alpha-numerics and non-spaces as delimiters. The
+ -- non-spaces are used to ignore operators like "-s".
+ if not delimiter:find('[%w ]') then
+ local match_pos, patt
+ if delimiter_matches[delimiter] then
+ -- Handle nested delimiter/matches in strings.
+ local s, e = delimiter, delimiter_matches[delimiter]
+ patt = l.delimited_range(s..e, false, false, true)
+ else
+ patt = l.delimited_range(delimiter)
+ end
+ first_match_pos = lpeg.match(patt, input, index)
+ final_match_pos = lpeg.match(patt, input, first_match_pos - 1)
+ if not final_match_pos then -- using (), [], {}, or <> notation
+ final_match_pos = lpeg.match(l.space^0 * patt, input, first_match_pos)
+ end
+ return final_match_pos or #input + 1
+ end
+end)
+
+-- Strings.
+local sq_str = l.delimited_range("'")
+local dq_str = l.delimited_range('"')
+local cmd_str = l.delimited_range('`')
+local heredoc = '<<' * P(function(input, index)
+ local s, e, delimiter = input:find('([%a_][%w_]*)[\n\r\f;]+', index)
+ if s == index and delimiter then
+ local end_heredoc = '[\n\r\f]+'
+ local _, e = input:find(end_heredoc..delimiter, e)
+ return e and e + 1 or #input + 1
+ end
+end)
+local lit_str = 'q' * P('q')^-1 * literal_delimitted
+local lit_array = 'qw' * literal_delimitted
+local lit_cmd = 'qx' * literal_delimitted
+local lit_tr = (P('tr') + 'y') * literal_delimitted2 * S('cds')^0
+local regex_str = #P('/') * l.last_char_includes('-<>+*!~\\=%&|^?:;([{') *
+ l.delimited_range('/', true) * S('imosx')^0
+local lit_regex = 'qr' * literal_delimitted * S('imosx')^0
+local lit_match = 'm' * literal_delimitted * S('cgimosx')^0
+local lit_sub = 's' * literal_delimitted2 * S('ecgimosx')^0
+local string = token(l.STRING, sq_str + dq_str + cmd_str + heredoc + lit_str +
+ lit_array + lit_cmd + lit_tr) +
+ token(l.REGEX, regex_str + lit_regex + lit_match + lit_sub)
+
+-- Numbers.
+local number = token(l.NUMBER, l.float + l.integer)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match{
+ 'STDIN', 'STDOUT', 'STDERR', 'BEGIN', 'END', 'CHECK', 'INIT',
+ 'require', 'use',
+ 'break', 'continue', 'do', 'each', 'else', 'elsif', 'foreach', 'for', 'if',
+ 'last', 'local', 'my', 'next', 'our', 'package', 'return', 'sub', 'unless',
+ 'until', 'while', '__FILE__', '__LINE__', '__PACKAGE__',
+ 'and', 'or', 'not', 'eq', 'ne', 'lt', 'gt', 'le', 'ge'
+})
+
+-- Functions.
+local func = token(l.FUNCTION, word_match({
+ 'abs', 'accept', 'alarm', 'atan2', 'bind', 'binmode', 'bless', 'caller',
+ 'chdir', 'chmod', 'chomp', 'chop', 'chown', 'chr', 'chroot', 'closedir',
+ 'close', 'connect', 'cos', 'crypt', 'dbmclose', 'dbmopen', 'defined',
+ 'delete', 'die', 'dump', 'each', 'endgrent', 'endhostent', 'endnetent',
+ 'endprotoent', 'endpwent', 'endservent', 'eof', 'eval', 'exec', 'exists',
+ 'exit', 'exp', 'fcntl', 'fileno', 'flock', 'fork', 'format', 'formline',
+ 'getc', 'getgrent', 'getgrgid', 'getgrnam', 'gethostbyaddr', 'gethostbyname',
+ 'gethostent', 'getlogin', 'getnetbyaddr', 'getnetbyname', 'getnetent',
+ 'getpeername', 'getpgrp', 'getppid', 'getpriority', 'getprotobyname',
+ 'getprotobynumber', 'getprotoent', 'getpwent', 'getpwnam', 'getpwuid',
+ 'getservbyname', 'getservbyport', 'getservent', 'getsockname', 'getsockopt',
+ 'glob', 'gmtime', 'goto', 'grep', 'hex', 'import', 'index', 'int', 'ioctl',
+ 'join', 'keys', 'kill', 'lcfirst', 'lc', 'length', 'link', 'listen',
+ 'localtime', 'log', 'lstat', 'map', 'mkdir', 'msgctl', 'msgget', 'msgrcv',
+ 'msgsnd', 'new', 'oct', 'opendir', 'open', 'ord', 'pack', 'pipe', 'pop',
+ 'pos', 'printf', 'print', 'prototype', 'push', 'quotemeta', 'rand', 'readdir',
+ 'read', 'readlink', 'recv', 'redo', 'ref', 'rename', 'reset', 'reverse',
+ 'rewinddir', 'rindex', 'rmdir', 'scalar', 'seekdir', 'seek', 'select',
+ 'semctl', 'semget', 'semop', 'send', 'setgrent', 'sethostent', 'setnetent',
+ 'setpgrp', 'setpriority', 'setprotoent', 'setpwent', 'setservent',
+ 'setsockopt', 'shift', 'shmctl', 'shmget', 'shmread', 'shmwrite', 'shutdown',
+ 'sin', 'sleep', 'socket', 'socketpair', 'sort', 'splice', 'split', 'sprintf',
+ 'sqrt', 'srand', 'stat', 'study', 'substr', 'symlink', 'syscall', 'sysread',
+ 'sysseek', 'system', 'syswrite', 'telldir', 'tell', 'tied', 'tie', 'time',
+ 'times', 'truncate', 'ucfirst', 'uc', 'umask', 'undef', 'unlink', 'unpack',
+ 'unshift', 'untie', 'utime', 'values', 'vec', 'wait', 'waitpid', 'wantarray',
+ 'warn', 'write'
+}, '2'))
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.word)
+
+-- Variables.
+local special_var = '$' * ('^' * S('ADEFHILMOPSTWX')^-1 +
+ S('\\"[]\'&`+*.,;=%~?@<>(|/!-') +
+ ':' * (l.any - ':') + P('$') * -l.word + l.digit^1)
+local plain_var = ('$#' + S('$@%')) * P('$')^0 * l.word + '$#'
+local variable = token(l.VARIABLE, special_var + plain_var)
+
+-- Operators.
+local operator = token(l.OPERATOR, S('-<>+*!~\\=/%&|^.?:;()[]{}'))
+
+-- Markers.
+local marker = token(l.COMMENT, word_match{'__DATA__', '__END__'} * l.any^0)
+
+M._rules = {
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'marker', marker},
+ {'function', func},
+ {'string', string},
+ {'identifier', identifier},
+ {'comment', comment},
+ {'number', number},
+ {'variable', variable},
+ {'operator', operator},
+}
+
+M._foldsymbols = {
+ _patterns = {'[%[%]{}]', '#'},
+ [l.OPERATOR] = {['['] = 1, [']'] = -1, ['{'] = 1, ['}'] = -1},
+ [l.COMMENT] = {['#'] = l.fold_line_comments('#')}
+}
+
+return M
diff --git a/lexers/php.lua b/lexers/php.lua
new file mode 100644
index 0000000..feaa670
--- /dev/null
+++ b/lexers/php.lua
@@ -0,0 +1,99 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- PHP LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V
+
+local M = {_NAME = 'php'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local line_comment = (P('//') + '#') * l.nonnewline^0
+local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1
+local comment = token(l.COMMENT, block_comment + line_comment)
+
+-- Strings.
+local sq_str = l.delimited_range("'")
+local dq_str = l.delimited_range('"')
+local bt_str = l.delimited_range('`')
+local heredoc = '<<<' * P(function(input, index)
+ local _, e, delimiter = input:find('([%a_][%w_]*)[\n\r\f]+', index)
+ if delimiter then
+ local _, e = input:find('[\n\r\f]+'..delimiter, e)
+ return e and e + 1
+ end
+end)
+local string = token(l.STRING, sq_str + dq_str + bt_str + heredoc)
+-- TODO: interpolated code.
+
+-- Numbers.
+local number = token(l.NUMBER, l.float + l.integer)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match{
+ 'and', 'array', 'as', 'bool', 'boolean', 'break', 'case',
+ 'cfunction', 'class', 'const', 'continue', 'declare', 'default',
+ 'die', 'directory', 'do', 'double', 'echo', 'else', 'elseif',
+ 'empty', 'enddeclare', 'endfor', 'endforeach', 'endif',
+ 'endswitch', 'endwhile', 'eval', 'exit', 'extends', 'false',
+ 'float', 'for', 'foreach', 'function', 'global', 'if', 'include',
+ 'include_once', 'int', 'integer', 'isset', 'list', 'new', 'null',
+ 'object', 'old_function', 'or', 'parent', 'print', 'real',
+ 'require', 'require_once', 'resource', 'return', 'static',
+ 'stdclass', 'string', 'switch', 'true', 'unset', 'use', 'var',
+ 'while', 'xor', '__class__', '__file__', '__function__',
+ '__line__', '__sleep', '__wakeup'
+})
+
+-- Variables.
+local word = (l.alpha + '_' + R('\127\255')) * (l.alnum + '_' + R('\127\255'))^0
+local variable = token(l.VARIABLE, '$' * word)
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, word)
+
+-- Operators.
+local operator = token(l.OPERATOR, S('!@%^*&()-+=|/.,;:<>[]{}') + '?' * -P('>'))
+
+M._rules = {
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'identifier', identifier},
+ {'string', string},
+ {'variable', variable},
+ {'comment', comment},
+ {'number', number},
+ {'operator', operator},
+}
+
+-- Embedded in HTML.
+local html = l.load('html')
+
+-- Embedded PHP.
+local php_start_rule = token('php_tag', '<?' * ('php' * l.space)^-1)
+local php_end_rule = token('php_tag', '?>')
+l.embed_lexer(html, M, php_start_rule, php_end_rule)
+
+M._tokenstyles = {
+ php_tag = l.STYLE_EMBEDDED
+}
+
+local _foldsymbols = html._foldsymbols
+_foldsymbols._patterns[#_foldsymbols._patterns + 1] = '<%?'
+_foldsymbols._patterns[#_foldsymbols._patterns + 1] = '%?>'
+_foldsymbols._patterns[#_foldsymbols._patterns + 1] = '/%*'
+_foldsymbols._patterns[#_foldsymbols._patterns + 1] = '%*/'
+_foldsymbols._patterns[#_foldsymbols._patterns + 1] = '//'
+_foldsymbols._patterns[#_foldsymbols._patterns + 1] = '#'
+_foldsymbols._patterns[#_foldsymbols._patterns + 1] = '[{}()]'
+_foldsymbols.php_tag = {['<?'] = 1, ['?>'] = -1}
+_foldsymbols[l.COMMENT]['/*'], _foldsymbols[l.COMMENT]['*/'] = 1, -1
+_foldsymbols[l.COMMENT]['//'] = l.fold_line_comments('//')
+_foldsymbols[l.COMMENT]['#'] = l.fold_line_comments('#')
+_foldsymbols[l.OPERATOR] = {['{'] = 1, ['}'] = -1, ['('] = 1, [')'] = -1}
+M._foldsymbols = _foldsymbols
+
+return M
diff --git a/lexers/pike.lua b/lexers/pike.lua
new file mode 100644
index 0000000..d5a130c
--- /dev/null
+++ b/lexers/pike.lua
@@ -0,0 +1,70 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Pike LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'pike'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local line_comment = '//' * l.nonnewline_esc^0
+local nested_comment = l.nested_pair('/*', '*/')
+local comment = token(l.COMMENT, line_comment + nested_comment)
+
+-- Strings.
+local sq_str = l.delimited_range("'", true)
+local dq_str = l.delimited_range('"', true)
+local lit_str = '#' * l.delimited_range('"')
+local string = token(l.STRING, sq_str + dq_str + lit_str)
+
+-- Numbers.
+local number = token(l.NUMBER, (l.float + l.integer) * S('lLdDfF')^-1)
+
+-- Preprocessors.
+local preproc = token(l.PREPROCESSOR, l.starts_line('#') * l.nonnewline^0)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match{
+ 'break', 'case', 'catch', 'continue', 'default', 'do', 'else', 'for',
+ 'foreach', 'gauge', 'if', 'lambda', 'return', 'sscanf', 'switch', 'while',
+ 'import', 'inherit',
+ -- Type modifiers.
+ 'constant', 'extern', 'final', 'inline', 'local', 'nomask', 'optional',
+ 'private', 'protected', 'public', 'static', 'variant'
+})
+
+-- Types.
+local type = token(l.TYPE, word_match{
+ 'array', 'class', 'float', 'function', 'int', 'mapping', 'mixed', 'multiset',
+ 'object', 'program', 'string', 'void'
+})
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.word)
+
+-- Operators.
+local operator = token(l.OPERATOR, S('<>=!+-/*%&|^~@`.,:;()[]{}'))
+
+M._rules = {
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'type', type},
+ {'identifier', identifier},
+ {'string', string},
+ {'comment', comment},
+ {'number', number},
+ {'preproc', preproc},
+ {'operator', operator},
+}
+
+M._foldsymbols = {
+ _patterns = {'[{}]', '/%*', '%*/', '//'},
+ [l.OPERATOR] = {['{'] = 1, ['}'] = -1},
+ [l.COMMENT] = {['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//')}
+}
+
+return M
diff --git a/lexers/pkgbuild.lua b/lexers/pkgbuild.lua
new file mode 100644
index 0000000..22aa275
--- /dev/null
+++ b/lexers/pkgbuild.lua
@@ -0,0 +1,89 @@
+-- Copyright 2006-2013 gwash. See LICENSE.
+-- Archlinux PKGBUILD LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'pkgbuild'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local comment = token(l.COMMENT, '#' * l.nonnewline^0)
+
+-- Strings.
+local sq_str = l.delimited_range("'", false, true)
+local dq_str = l.delimited_range('"')
+local ex_str = l.delimited_range('`')
+local heredoc = '<<' * P(function(input, index)
+ local s, e, _, delimiter =
+ input:find('(["\']?)([%a_][%w_]*)%1[\n\r\f;]+', index)
+ if s == index and delimiter then
+ local _, e = input:find('[\n\r\f]+'..delimiter, e)
+ return e and e + 1 or #input + 1
+ end
+end)
+local string = token(l.STRING, sq_str + dq_str + ex_str + heredoc)
+
+-- Numbers.
+local number = token(l.NUMBER, l.float + l.integer)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match({
+ 'patch', 'cd', 'make', 'patch', 'mkdir', 'cp', 'sed', 'install', 'rm',
+ 'if', 'then', 'elif', 'else', 'fi', 'case', 'in', 'esac', 'while', 'for',
+ 'do', 'done', 'continue', 'local', 'return', 'git', 'svn', 'co', 'clone',
+ 'gconf-merge-schema', 'msg', 'echo', 'ln',
+ -- Operators.
+ '-a', '-b', '-c', '-d', '-e', '-f', '-g', '-h', '-k', '-p', '-r', '-s', '-t',
+ '-u', '-w', '-x', '-O', '-G', '-L', '-S', '-N', '-nt', '-ot', '-ef', '-o',
+ '-z', '-n', '-eq', '-ne', '-lt', '-le', '-gt', '-ge', '-Np', '-i'
+}, '-'))
+
+-- Functions.
+local func = token(l.FUNCTION, word_match{'build'})
+
+local constant = token(l.CONSTANT, word_match{
+ 'pkgname', 'pkgver', 'pkgrel', 'pkgdesc', 'arch', 'url',
+ 'license', 'optdepends', 'depends', 'makedepends', 'provides',
+ 'conflicts', 'replaces', 'install', 'source', 'md5sums',
+ 'pkgdir', 'srcdir'
+})
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.word)
+
+-- Variables.
+local variable = token(l.VARIABLE,
+ '$' * (S('!#?*@$') +
+ l.delimited_range('()', true, true) +
+ l.delimited_range('[]', true, true) +
+ l.delimited_range('{}', true, true) +
+ l.delimited_range('`', true, true) +
+ l.digit^1 + l.word))
+
+-- Operators.
+local operator = token(l.OPERATOR, S('=!<>+-/*^~.,:;?()[]{}'))
+
+M._rules = {
+ {'whitespace', ws},
+ {'comment', comment},
+ {'string', string},
+ {'number', number},
+ {'keyword', keyword},
+ {'function', func},
+ {'constant', constant},
+ {'identifier', identifier},
+ {'variable', variable},
+ {'operator', operator},
+}
+
+M._foldsymbols = {
+ _patterns = {'[%(%){}]', '#'},
+ [l.OPERATOR] = {['('] = 1, [')'] = -1, ['{'] = 1, ['}'] = -1},
+ [l.COMMENT] = {['#'] = l.fold_line_comments('#')}
+}
+
+return M
diff --git a/lexers/powershell.lua b/lexers/powershell.lua
new file mode 100644
index 0000000..a94938b
--- /dev/null
+++ b/lexers/powershell.lua
@@ -0,0 +1,82 @@
+-- Copyright 2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- PowerShell LPeg lexer.
+-- Contributed by Jeff Stone.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'powershell'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local comment = token(l.COMMENT, '#' * l.nonnewline^0)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match({
+ 'Begin', 'Break', 'Continue', 'Do', 'Else', 'End', 'Exit', 'For', 'ForEach',
+ 'ForEach-Object', 'Get-Date', 'Get-Random', 'If', 'Param', 'Pause',
+ 'Powershell', 'Process', 'Read-Host', 'Return', 'Switch', 'While',
+ 'Write-Host'
+}, '-', true))
+
+-- Comparison Operators.
+local comparison = token(l.KEYWORD, '-' * word_match({
+ 'and', 'as', 'band', 'bor', 'contains', 'eq', 'ge', 'gt', 'is', 'isnot', 'le',
+ 'like', 'lt', 'match', 'ne', 'nomatch', 'not', 'notcontains', 'notlike', 'or',
+ 'replace'
+}, nil, true))
+
+-- Parameters.
+local parameter = token(l.KEYWORD, '-' * word_match({
+ 'Confirm', 'Debug', 'ErrorAction', 'ErrorVariable', 'OutBuffer',
+ 'OutVariable', 'Verbose', 'WhatIf'
+}, nil, true))
+
+-- Properties.
+local property = token(l.KEYWORD, '.' * word_match({
+ 'day', 'dayofweek', 'dayofyear', 'hour', 'millisecond', 'minute', 'month',
+ 'second', 'timeofday', 'year'
+}, nil, true))
+
+-- Types.
+local type = token(l.KEYWORD, '[' * word_match({
+ 'array', 'boolean', 'byte', 'char', 'datetime', 'decimal', 'double',
+ 'hashtable', 'int', 'long', 'single', 'string', 'xml'
+}, nil, true) * ']')
+
+-- Variables.
+local variable = token(l.VARIABLE, '$' * (l.digit^1 + l.word +
+ l.delimited_range('{}', true, true)))
+
+-- Strings.
+local string = token(l.STRING, l.delimited_range('"', true))
+
+-- Numbers.
+local number = token(l.NUMBER, l.float + l.integer)
+
+-- Operators.
+local operator = token(l.OPERATOR, S('=!<>+-/*^&|~.,:;?()[]{}%`'))
+
+M._rules = {
+ {'whitespace', ws},
+ {'comment', comment},
+ {'keyword', keyword},
+ {'comparison', comparison},
+ {'parameter', parameter},
+ {'property', property},
+ {'type', type},
+ {'variable', variable},
+ {'string', string},
+ {'number', number},
+ {'operator', operator},
+}
+
+M._foldsymbols = {
+ _patterns = {'[{}]'},
+ [l.OPERATOR] = {['{'] = 1, ['}'] = -1}
+}
+
+return M
diff --git a/lexers/prolog.lua b/lexers/prolog.lua
new file mode 100644
index 0000000..8fe63dc
--- /dev/null
+++ b/lexers/prolog.lua
@@ -0,0 +1,64 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Prolog LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'prolog'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local line_comment = '%' * l.nonnewline^0
+local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1
+local comment = token(l.COMMENT, line_comment + block_comment)
+
+-- Strings.
+local sq_str = l.delimited_range("'", true)
+local dq_str = l.delimited_range('"', true)
+local string = token(l.STRING, sq_str + dq_str)
+
+-- Numbers.
+local number = token(l.NUMBER, l.digit^1 * ('.' * l.digit^1)^-1)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match{
+ 'module', 'meta_predicate', 'multifile', 'dynamic', 'abolish',
+ 'current_output', 'peek_code', 'append', 'current_predicate', 'put_byte',
+ 'arg', 'current_prolog_flag', 'put_char', 'asserta', 'assert', 'fail',
+ 'put_code', 'assertz', 'findall', 'read', 'at_end_of_stream', 'float',
+ 'read_term', 'atom', 'flush_output', 'repeat', 'atom_chars', 'functor',
+ 'retract', 'atom_codes', 'get_byte', 'set_input', 'atom_concat', 'get_char',
+ 'set_output', 'atom_length', 'get_code', 'set_prolog_flag', 'atomic', 'halt',
+ 'set_stream_position', 'bagof', 'integer', 'setof', 'call', 'is',
+ 'stream_property', 'catch', 'nl', 'sub_atom', 'char_code', 'nonvar', 'throw',
+ 'char_conversion', 'number', 'clause', 'number_chars',
+ 'unify_with_occurs_check', 'close', 'number_codes', 'var', 'compound', 'once',
+ 'copy_term', 'op', 'write', 'writeln', 'write_canonical', 'write_term',
+ 'writeq', 'current_char_conversion', 'open', 'current_input', 'peek_byte',
+ 'current_op', 'peek_char', 'false', 'true', 'consult', 'member', 'memberchk',
+ 'reverse', 'permutation', 'delete',
+ -- Math.
+ 'mod', 'div', 'abs', 'exp', 'ln', 'log', 'sqrt', 'round', 'trunc', 'val',
+ 'cos', 'sin', 'tan', 'arctan', 'random', 'randominit'
+})
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.word)
+
+-- Operators.
+local operator = token(l.OPERATOR, S('-!+\\|=:;&<>()[]{}'))
+
+M._rules = {
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'identifier', identifier},
+ {'string', string},
+ {'comment', comment},
+ {'number', number},
+ {'operator', operator},
+}
+
+return M
diff --git a/lexers/props.lua b/lexers/props.lua
new file mode 100644
index 0000000..ef5edad
--- /dev/null
+++ b/lexers/props.lua
@@ -0,0 +1,47 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Props LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'props'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local comment = token(l.COMMENT, '#' * l.nonnewline^0)
+
+-- Equals.
+local equals = token(l.OPERATOR, '=')
+
+-- Strings.
+local sq_str = l.delimited_range("'")
+local dq_str = l.delimited_range('"')
+local string = token(l.STRING, sq_str + dq_str)
+
+-- Variables.
+local variable = token(l.VARIABLE, '$(' * (l.any - ')')^1 * ')')
+
+-- Colors.
+local xdigit = l.xdigit
+local color = token('color', '#' * xdigit * xdigit * xdigit * xdigit * xdigit *
+ xdigit)
+
+M._rules = {
+ {'whitespace', ws},
+ {'color', color},
+ {'comment', comment},
+ {'equals', equals},
+ {'string', string},
+ {'variable', variable},
+}
+
+M._tokenstyles = {
+ color = l.STYLE_NUMBER
+}
+
+M._LEXBYLINE = true
+
+return M
diff --git a/lexers/ps.lua b/lexers/ps.lua
new file mode 100644
index 0000000..5aa8d62
--- /dev/null
+++ b/lexers/ps.lua
@@ -0,0 +1,61 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Postscript LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'ps'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local comment = token(l.COMMENT, '%' * l.nonnewline^0)
+
+-- Strings.
+local arrow_string = l.delimited_range('<>')
+local nested_string = l.delimited_range('()', false, false, true)
+local string = token(l.STRING, arrow_string + nested_string)
+
+-- Numbers.
+local number = token(l.NUMBER, l.float + l.integer)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match{
+ 'pop', 'exch', 'dup', 'copy', 'roll', 'clear', 'count', 'mark', 'cleartomark',
+ 'counttomark', 'exec', 'if', 'ifelse', 'for', 'repeat', 'loop', 'exit',
+ 'stop', 'stopped', 'countexecstack', 'execstack', 'quit', 'start',
+ 'true', 'false', 'NULL'
+})
+
+-- Functions.
+local func = token(l.FUNCTION, word_match{
+ 'add', 'div', 'idiv', 'mod', 'mul', 'sub', 'abs', 'ned', 'ceiling', 'floor',
+ 'round', 'truncate', 'sqrt', 'atan', 'cos', 'sin', 'exp', 'ln', 'log', 'rand',
+ 'srand', 'rrand'
+})
+
+-- Identifiers.
+local word = (l.alpha + '-') * (l.alnum + '-')^0
+local identifier = token(l.IDENTIFIER, word)
+
+-- Operators.
+local operator = token(l.OPERATOR, S('[]{}'))
+
+-- Labels.
+local label = token(l.LABEL, '/' * word)
+
+M._rules = {
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'function', func},
+ {'identifier', identifier},
+ {'string', string},
+ {'comment', comment},
+ {'number', number},
+ {'label', label},
+ {'operator', operator},
+}
+
+return M
diff --git a/lexers/python.lua b/lexers/python.lua
new file mode 100644
index 0000000..a96ac60
--- /dev/null
+++ b/lexers/python.lua
@@ -0,0 +1,134 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Python LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'python'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local comment = token(l.COMMENT, '#' * l.nonnewline_esc^0)
+
+-- Strings.
+local sq_str = P('u')^-1 * l.delimited_range("'", true)
+local dq_str = P('U')^-1 * l.delimited_range('"', true)
+local triple_sq_str = "'''" * (l.any - "'''")^0 * P("'''")^-1
+local triple_dq_str = '"""' * (l.any - '"""')^0 * P('"""')^-1
+-- TODO: raw_strs cannot end in single \.
+local raw_sq_str = P('u')^-1 * 'r' * l.delimited_range("'", false, true)
+local raw_dq_str = P('U')^-1 * 'R' * l.delimited_range('"', false, true)
+local string = token(l.STRING, triple_sq_str + triple_dq_str + sq_str + dq_str +
+ raw_sq_str + raw_dq_str)
+
+-- Numbers.
+local dec = l.digit^1 * S('Ll')^-1
+local bin = '0b' * S('01')^1 * ('_' * S('01')^1)^0
+local oct = '0' * R('07')^1 * S('Ll')^-1
+local integer = S('+-')^-1 * (bin + l.hex_num + oct + dec)
+local number = token(l.NUMBER, l.float + integer)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match{
+ 'and', 'as', 'assert', 'break', 'class', 'continue', 'def', 'del', 'elif',
+ 'else', 'except', 'exec', 'finally', 'for', 'from', 'global', 'if', 'import',
+ 'in', 'is', 'lambda', 'nonlocal', 'not', 'or', 'pass', 'print', 'raise',
+ 'return', 'try', 'while', 'with', 'yield',
+ -- Descriptors/attr access.
+ '__get__', '__set__', '__delete__', '__slots__',
+ -- Class.
+ '__new__', '__init__', '__del__', '__repr__', '__str__', '__cmp__',
+ '__index__', '__lt__', '__le__', '__gt__', '__ge__', '__eq__', '__ne__',
+ '__hash__', '__nonzero__', '__getattr__', '__getattribute__', '__setattr__',
+ '__delattr__', '__call__',
+ -- Operator.
+ '__add__', '__sub__', '__mul__', '__div__', '__floordiv__', '__mod__',
+ '__divmod__', '__pow__', '__and__', '__xor__', '__or__', '__lshift__',
+ '__rshift__', '__nonzero__', '__neg__', '__pos__', '__abs__', '__invert__',
+ '__iadd__', '__isub__', '__imul__', '__idiv__', '__ifloordiv__', '__imod__',
+ '__ipow__', '__iand__', '__ixor__', '__ior__', '__ilshift__', '__irshift__',
+ -- Conversions.
+ '__int__', '__long__', '__float__', '__complex__', '__oct__', '__hex__',
+ '__coerce__',
+ -- Containers.
+ '__len__', '__getitem__', '__missing__', '__setitem__', '__delitem__',
+ '__contains__', '__iter__', '__getslice__', '__setslice__', '__delslice__',
+ -- Module and class attribs.
+ '__doc__', '__name__', '__dict__', '__file__', '__path__', '__module__',
+ '__bases__', '__class__', '__self__',
+ -- Stdlib/sys.
+ '__builtin__', '__future__', '__main__', '__import__', '__stdin__',
+ '__stdout__', '__stderr__',
+ -- Other.
+ '__debug__', '__doc__', '__import__', '__name__'
+})
+
+-- Functions.
+local func = token(l.FUNCTION, word_match{
+ 'abs', 'all', 'any', 'apply', 'basestring', 'bool', 'buffer', 'callable',
+ 'chr', 'classmethod', 'cmp', 'coerce', 'compile', 'complex', 'copyright',
+ 'credits', 'delattr', 'dict', 'dir', 'divmod', 'enumerate', 'eval',
+ 'execfile', 'exit', 'file', 'filter', 'float', 'frozenset', 'getattr',
+ 'globals', 'hasattr', 'hash', 'help', 'hex', 'id', 'input', 'int', 'intern',
+ 'isinstance', 'issubclass', 'iter', 'len', 'license', 'list', 'locals',
+ 'long', 'map', 'max', 'min', 'object', 'oct', 'open', 'ord', 'pow',
+ 'property', 'quit', 'range', 'raw_input', 'reduce', 'reload', 'repr',
+ 'reversed', 'round', 'set', 'setattr', 'slice', 'sorted', 'staticmethod',
+ 'str', 'sum', 'super', 'tuple', 'type', 'unichr', 'unicode', 'vars', 'xrange',
+ 'zip'
+})
+
+-- Constants.
+local constant = token(l.CONSTANT, word_match{
+ 'ArithmeticError', 'AssertionError', 'AttributeError', 'BaseException',
+ 'DeprecationWarning', 'EOFError', 'Ellipsis', 'EnvironmentError', 'Exception',
+ 'False', 'FloatingPointError', 'FutureWarning', 'GeneratorExit', 'IOError',
+ 'ImportError', 'ImportWarning', 'IndentationError', 'IndexError', 'KeyError',
+ 'KeyboardInterrupt', 'LookupError', 'MemoryError', 'NameError', 'None',
+ 'NotImplemented', 'NotImplementedError', 'OSError', 'OverflowError',
+ 'PendingDeprecationWarning', 'ReferenceError', 'RuntimeError',
+ 'RuntimeWarning', 'StandardError', 'StopIteration', 'SyntaxError',
+ 'SyntaxWarning', 'SystemError', 'SystemExit', 'TabError', 'True', 'TypeError',
+ 'UnboundLocalError', 'UnicodeDecodeError', 'UnicodeEncodeError',
+ 'UnicodeError', 'UnicodeTranslateError', 'UnicodeWarning', 'UserWarning',
+ 'ValueError', 'Warning', 'ZeroDivisionError'
+})
+
+-- Self.
+local self = token('self', P('self'))
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.word)
+
+-- Operators.
+local operator = token(l.OPERATOR, S('!%^&*()[]{}-=+/|:;.,?<>~`'))
+
+-- Decorators.
+local decorator = token('decorator', l.starts_line('@') * l.nonnewline^0)
+
+M._rules = {
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'function', func},
+ {'constant', constant},
+ {'self', self},
+ {'identifier', identifier},
+ {'comment', comment},
+ {'string', string},
+ {'number', number},
+ {'decorator', decorator},
+ {'operator', operator},
+}
+
+
+M._tokenstyles = {
+ self = l.STYLE_TYPE,
+ decorator = l.STYLE_PREPROCESSOR
+}
+
+M._FOLDBYINDENTATION = true
+
+return M
diff --git a/lexers/rails.lua b/lexers/rails.lua
new file mode 100644
index 0000000..07f463c
--- /dev/null
+++ b/lexers/rails.lua
@@ -0,0 +1,65 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Ruby on Rails LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local table = _G.table
+
+local M = {_NAME = 'rails'}
+
+-- Whitespace
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Functions.
+
+local actionpack = token(l.FUNCTION, word_match{
+ 'before_filter', 'skip_before_filter', 'skip_after_filter', 'after_filter',
+ 'around_filter', 'filter', 'filter_parameter_logging', 'layout',
+ 'require_dependency', 'render', 'render_action', 'render_text', 'render_file',
+ 'render_template', 'render_nothing', 'render_component',
+ 'render_without_layout', 'rescue_from', 'url_for', 'redirect_to',
+ 'redirect_to_path', 'redirect_to_url', 'respond_to', 'helper',
+ 'helper_method', 'model', 'service', 'observer', 'serialize', 'scaffold',
+ 'verify', 'hide_action'
+})
+
+local view_helpers = token(l.FUNCTION, word_match{
+ 'check_box', 'content_for', 'error_messages_for', 'form_for', 'fields_for',
+ 'file_field', 'hidden_field', 'image_submit_tag', 'label', 'link_to',
+ 'password_field', 'radio_button', 'submit', 'text_field', 'text_area'
+})
+
+local activerecord = token(l.FUNCTION, word_match{
+ 'after_create', 'after_destroy', 'after_save', 'after_update',
+ 'after_validation', 'after_validation_on_create',
+ 'after_validation_on_update', 'before_create', 'before_destroy',
+ 'before_save', 'before_update', 'before_validation',
+ 'before_validation_on_create', 'before_validation_on_update', 'composed_of',
+ 'belongs_to', 'has_one', 'has_many', 'has_and_belongs_to_many', 'validate',
+ 'validates', 'validate_on_create', 'validates_numericality_of',
+ 'validate_on_update', 'validates_acceptance_of', 'validates_associated',
+ 'validates_confirmation_of', 'validates_each', 'validates_format_of',
+ 'validates_inclusion_of', 'validates_exclusion_of', 'validates_length_of',
+ 'validates_presence_of', 'validates_size_of', 'validates_uniqueness_of',
+ 'attr_protected', 'attr_accessible', 'attr_readonly',
+ 'accepts_nested_attributes_for', 'default_scope', 'scope'
+})
+
+local active_support = token(l.FUNCTION, word_match{
+ 'alias_method_chain', 'alias_attribute', 'delegate', 'cattr_accessor',
+ 'mattr_accessor', 'returning', 'memoize'
+})
+
+-- Extend Ruby lexer to include Rails methods.
+local ruby = l.load('ruby')
+local _rules = ruby._rules
+_rules[1] = {'whitespace', ws}
+table.insert(_rules, 3, {'actionpack', actionpack})
+table.insert(_rules, 4, {'view_helpers', view_helpers})
+table.insert(_rules, 5, {'activerecord', activerecord})
+table.insert(_rules, 6, {'active_support', active_support})
+M._rules = _rules
+M._foldsymbols = ruby._foldsymbols
+
+return M
diff --git a/lexers/rebol.lua b/lexers/rebol.lua
new file mode 100644
index 0000000..ecedd5a
--- /dev/null
+++ b/lexers/rebol.lua
@@ -0,0 +1,129 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Rebol LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'rebol'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local line_comment = ';' * l.nonnewline^0;
+local block_comment = 'comment' * P(' ')^-1 *
+ l.delimited_range('{}', false, true)
+local comment = token(l.COMMENT, line_comment + block_comment)
+
+-- Strings.
+local sl_string = l.delimited_range('"', true)
+local ml_string = l.delimited_range('{}')
+local lit_string = "'" * l.word
+local string = token(l.STRING, sl_string + ml_string + lit_string)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match({
+ 'abs', 'absolute', 'add', 'and~', 'at', 'back', 'change', 'clear',
+ 'complement', 'copy', 'cp', 'divide', 'fifth', 'find', 'first', 'fourth',
+ 'head', 'insert', 'last', 'make', 'max', 'maximum', 'min', 'minimum',
+ 'multiply', 'negate', 'next', 'or~', 'pick', 'poke', 'power', 'random',
+ 'remainder', 'remove', 'second', 'select', 'skip', 'sort', 'subtract', 'tail',
+ 'third', 'to', 'trim', 'xor~', 'alias', 'all', 'any', 'arccosine', 'arcsine',
+ 'arctangent', 'bind', 'break', 'browse', 'call', 'caret-to-offset', 'catch',
+ 'checksum', 'close', 'comment', 'compose', 'compress', 'cosine', 'debase',
+ 'decompress', 'dehex', 'detab', 'dh-compute-key', 'dh-generate-key',
+ 'dh-make-key', 'difference', 'disarm', 'do', 'dsa-generate-key',
+ 'dsa-make-key', 'dsa-make-signature', 'dsa-verify-signature', 'either',
+ 'else', 'enbase', 'entab', 'exclude', 'exit', 'exp', 'foreach', 'form',
+ 'free', 'get', 'get-modes', 'halt', 'hide', 'if', 'in', 'intersect', 'load',
+ 'log-10', 'log-2', 'log-e', 'loop', 'lowercase', 'maximum-of', 'minimum-of',
+ 'mold', 'not', 'now', 'offset-to-caret', 'open', 'parse', 'prin', 'print',
+ 'protect', 'q', 'query', 'quit', 'read', 'read-io', 'recycle', 'reduce',
+ 'repeat', 'return', 'reverse', 'rsa-encrypt', 'rsa-generate-key',
+ 'rsa-make-key', 'save', 'secure', 'set', 'set-modes', 'show', 'sine',
+ 'size-text', 'square-root', 'tangent', 'textinfo', 'throw', 'to-hex',
+ 'to-local-file', 'to-rebol-file', 'trace', 'try', 'union', 'unique',
+ 'unprotect', 'unset', 'until', 'update', 'uppercase', 'use', 'wait', 'while',
+ 'write', 'write-io', 'basic-syntax-header', 'crlf', 'font-fixed',
+ 'font-sans-serif', 'font-serif', 'list-words', 'outstr', 'val', 'value',
+ 'about', 'alert', 'alter', 'append', 'array', 'ask', 'boot-prefs',
+ 'build-tag', 'center-face', 'change-dir', 'charset', 'choose', 'clean-path',
+ 'clear-fields', 'confine', 'confirm', 'context', 'cvs-date', 'cvs-version',
+ 'decode-cgi', 'decode-url', 'deflag-face', 'delete', 'demo', 'desktop',
+ 'dirize', 'dispatch', 'do-boot', 'do-events', 'do-face', 'do-face-alt',
+ 'does', 'dump-face', 'dump-pane', 'echo', 'editor', 'emailer', 'emit',
+ 'extract', 'find-by-type', 'find-key-face', 'find-window', 'flag-face',
+ 'flash', 'focus', 'for', 'forall', 'forever', 'forskip', 'func', 'function',
+ 'get-net-info', 'get-style', 'has', 'help', 'hide-popup', 'import-email',
+ 'inform', 'input', 'insert-event-func', 'join', 'launch', 'launch-thru',
+ 'layout', 'license', 'list-dir', 'load-image', 'load-prefs', 'load-thru',
+ 'make-dir', 'make-face', 'net-error', 'open-events', 'parse-email-addrs',
+ 'parse-header', 'parse-header-date', 'parse-xml', 'path-thru', 'probe',
+ 'protect-system', 'read-net', 'read-thru', 'reboot', 'reform', 'rejoin',
+ 'remold', 'remove-event-func', 'rename', 'repend', 'replace', 'request',
+ 'request-color', 'request-date', 'request-download', 'request-file',
+ 'request-list', 'request-pass', 'request-text', 'resend', 'save-prefs',
+ 'save-user', 'scroll-para', 'send', 'set-font', 'set-net', 'set-para',
+ 'set-style', 'set-user', 'set-user-name', 'show-popup', 'source',
+ 'split-path', 'stylize', 'switch', 'throw-on-error', 'to-binary',
+ 'to-bitset', 'to-block', 'to-char', 'to-date', 'to-decimal', 'to-email',
+ 'to-event', 'to-file', 'to-get-word', 'to-hash', 'to-idate', 'to-image',
+ 'to-integer', 'to-issue', 'to-list', 'to-lit-path', 'to-lit-word', 'to-logic',
+ 'to-money', 'to-none', 'to-pair', 'to-paren', 'to-path', 'to-refinement',
+ 'to-set-path', 'to-set-word', 'to-string', 'to-tag', 'to-time', 'to-tuple',
+ 'to-url', 'to-word', 'unfocus', 'uninstall', 'unview', 'upgrade', 'Usage',
+ 'vbug', 'view', 'view-install', 'view-prefs', 'what', 'what-dir',
+ 'write-user', 'return', 'at', 'space', 'pad', 'across', 'below', 'origin',
+ 'guide', 'tabs', 'indent', 'style', 'styles', 'size', 'sense', 'backcolor',
+ 'do', 'none',
+ 'action?', 'any-block?', 'any-function?', 'any-string?', 'any-type?',
+ 'any-word?', 'binary?', 'bitset?', 'block?', 'char?', 'datatype?', 'date?',
+ 'decimal?', 'email?', 'empty?', 'equal?', 'error?', 'even?', 'event?',
+ 'file?', 'function?', 'get-word?', 'greater-or-equal?', 'greater?', 'hash?',
+ 'head?', 'image?', 'index?', 'integer?', 'issue?', 'length?',
+ 'lesser-or-equal?', 'lesser?', 'library?', 'list?', 'lit-path?', 'lit-word?',
+ 'logic?', 'money?', 'native?', 'negative?', 'none?', 'not-equal?', 'number?',
+ 'object?', 'odd?', 'op?', 'pair?', 'paren?', 'path?', 'port?', 'positive?',
+ 'refinement?', 'routine?', 'same?', 'series?', 'set-path?', 'set-word?',
+ 'strict-equal?', 'strict-not-equal?', 'string?', 'struct?', 'tag?', 'tail?',
+ 'time?', 'tuple?', 'unset?', 'url?', 'word?', 'zero?', 'connected?',
+ 'crypt-strength?', 'exists-key?', 'input?', 'script?', 'type?', 'value?', '?',
+ '??', 'dir?', 'exists-thru?', 'exists?', 'flag-face?', 'found?', 'in-window?',
+ 'info?', 'inside?', 'link-app?', 'link?', 'modified?', 'offset?', 'outside?',
+ 'screen-offset?', 'size?', 'span?', 'view?', 'viewed?', 'win-offset?',
+ 'within?',
+ 'action!', 'any-block!', 'any-function!', 'any-string!', 'any-type!',
+ 'any-word!', 'binary!', 'bitset!', 'block!', 'char!', 'datatype!', 'date!',
+ 'decimal!', 'email!', 'error!', 'event!', 'file!', 'function!', 'get-word!',
+ 'hash!', 'image!', 'integer!', 'issue!', 'library!', 'list!', 'lit-path!',
+ 'lit-word!', 'logic!', 'money!', 'native!', 'none!', 'number!', 'object!',
+ 'op!', 'pair!', 'paren!', 'path!', 'port!', 'refinement!', 'routine!',
+ 'series!', 'set-path!', 'set-word!', 'string!', 'struct!', 'symbol!', 'tag!',
+ 'time!', 'tuple!', 'unset!', 'url!', 'word!',
+ 'true', 'false', 'self'
+}, '~-?!'))
+
+-- Identifiers.
+local word = (l.alpha + '-') * (l.alnum + '-')^0
+local identifier = token(l.IDENTIFIER, word)
+
+-- Operators.
+local operator = token(l.OPERATOR, S('=<>+/*:()[]'))
+
+M._rules = {
+ {'whitespace', ws},
+ {'comment', comment},
+ {'keyword', keyword},
+ {'identifier', identifier},
+ {'string', string},
+ {'operator', operator},
+}
+
+M._foldsymbols = {
+ _patterns = {'[%[%]{}]', ';'},
+ [l.COMMENT] = {['{'] = 1, ['}'] = -1, [';'] = l.fold_line_comments(';')},
+ [l.OPERATOR] = {['['] = 1, [']'] = -1, ['{'] = 1, ['}'] = -1}
+}
+
+return M
diff --git a/lexers/rest.lua b/lexers/rest.lua
new file mode 100644
index 0000000..1e0b3b1
--- /dev/null
+++ b/lexers/rest.lua
@@ -0,0 +1,259 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- reStructuredText LPeg lexer.
+
+local l = require('lexer')
+local token, word_match, starts_line = l.token, l.word_match, l.starts_line
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'rest'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, S(' \t')^1 + l.newline^1)
+local any_indent = S(' \t')^0
+
+-- Section titles (2 or more characters).
+local adornment_chars = lpeg.C(S('!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'))
+local adornment = lpeg.C(adornment_chars^2 * any_indent) * (l.newline + -1)
+local overline = lpeg.Cmt(starts_line(adornment), function(input, index, adm, c)
+ if not adm:find('^%'..c..'+%s*$') then return nil end
+ local rest = input:sub(index)
+ local lines = 1
+ for line, e in rest:gmatch('([^\r\n]+)()') do
+ if lines > 1 and line:match('^(%'..c..'+)%s*$') == adm then
+ return index + e - 1
+ end
+ if lines > 3 or #line > #adm then return nil end
+ lines = lines + 1
+ end
+ return #input + 1
+end)
+local underline = lpeg.Cmt(starts_line(adornment), function(_, index, adm, c)
+ local pos = adm:match('^%'..c..'+()%s*$')
+ return pos and index - #adm + pos - 1 or nil
+end)
+-- Token needs to be a predefined one in order for folder to work.
+local title = token(l.CONSTANT, overline + underline)
+
+-- Lists.
+local bullet_list = S('*+-') -- TODO: '•‣⁃', as lpeg does not support UTF-8
+local enum_list = P('(')^-1 *
+ (l.digit^1 + S('ivxlcmIVXLCM')^1 + l.alnum + '#') * S('.)')
+local field_list = ':' * (l.any - ':')^1 * P(':')^-1
+local option_word = l.alnum * (l.alnum + '-')^0
+local option = S('-/') * option_word * (' ' * option_word)^-1 +
+ '--' * option_word * ('=' * option_word)^-1
+local option_list = option * (',' * l.space^1 * option)^-1
+local list = #(l.space^0 * (S('*+-:/') + enum_list)) *
+ starts_line(token('list', l.space^0 * (option_list + bullet_list +
+ enum_list + field_list) *
+ l.space))
+
+-- Literal block.
+local block = P('::') * (l.newline + -1) * function(input, index)
+ local rest = input:sub(index)
+ local level, quote = #rest:match('^([ \t]*)')
+ for pos, indent, line in rest:gmatch('()[ \t]*()([^\r\n]+)') do
+ local no_indent = (indent - pos < level and line ~= ' ' or level == 0)
+ local quoted = no_indent and line:find(quote or '^%s*%W')
+ if quoted and not quote then quote = '^%s*%'..line:match('^%s*(%W)') end
+ if no_indent and not quoted and pos > 1 then return index + pos - 1 end
+ end
+ return #input + 1
+end
+local literal_block = token('literal_block', block)
+
+-- Line block.
+local line_block_char = token(l.OPERATOR, starts_line(any_indent * '|'))
+
+local word = l.alpha * (l.alnum + S('-.+'))^0
+
+-- Explicit markup blocks.
+local prefix = any_indent * '.. '
+local footnote_label = '[' * (l.digit^1 + '#' * word^-1 + '*') * ']'
+local footnote = token('footnote_block', prefix * footnote_label * l.space)
+local citation_label = '[' * word * ']'
+local citation = token('citation_block', prefix * citation_label * l.space)
+local link = token('link_block', prefix * '_' *
+ (l.delimited_range('`') + (P('\\') * 1 +
+ l.nonnewline - ':')^1) * ':' * l.space)
+local markup_block = #prefix * starts_line(footnote + citation + link)
+
+-- Directives.
+local directive_type = word_match({
+ -- Admonitions
+ 'attention', 'caution', 'danger', 'error', 'hint', 'important', 'note', 'tip',
+ 'warning', 'admonition',
+ -- Images
+ 'image', 'figure',
+ -- Body elements
+ 'topic', 'sidebar', 'line-block', 'parsed-literal', 'code', 'math', 'rubric',
+ 'epigraph', 'highlights', 'pull-quote', 'compound', 'container',
+ -- Table
+ 'table', 'csv-table', 'list-table',
+ -- Document parts
+ 'contents', 'sectnum', 'section-autonumbering', 'header', 'footer',
+ -- References
+ 'target-notes', 'footnotes', 'citations',
+ -- HTML-specific
+ 'meta',
+ -- Directives for substitution definitions
+ 'replace', 'unicode', 'date',
+ -- Miscellaneous
+ 'include', 'raw', 'class', 'role', 'default-role', 'title',
+ 'restructuredtext-test-directive',
+}, '-')
+local known_directive = token('directive',
+ prefix * directive_type * '::' * l.space)
+local sphinx_directive_type = word_match({
+ -- The TOC tree.
+ 'toctree',
+ -- Paragraph-level markup.
+ 'note', 'warning', 'versionadded', 'versionchanged', 'deprecated', 'seealso',
+ 'rubric', 'centered', 'hlist', 'glossary', 'productionlist',
+ -- Showing code examples.
+ 'highlight', 'literalinclude',
+ -- Miscellaneous
+ 'sectionauthor', 'index', 'only', 'tabularcolumns'
+}, '-')
+local sphinx_directive = token('sphinx_directive',
+ prefix * sphinx_directive_type * '::' * l.space)
+local unknown_directive = token('unknown_directive',
+ prefix * word * '::' * l.space)
+local directive = #prefix * starts_line(known_directive + sphinx_directive +
+ unknown_directive)
+
+-- Sphinx code block.
+local indented_block = function(input, index)
+ local rest = input:sub(index)
+ local level = #rest:match('^([ \t]*)')
+ for pos, indent, line in rest:gmatch('()[ \t]*()([^\r\n]+)') do
+ if indent - pos < level and line ~= ' ' or level == 0 and pos > 1 then
+ return index + pos - 1
+ end
+ end
+ return #input + 1
+end
+local code_block = prefix * 'code-block::' * S(' \t')^1 * l.nonnewline^0 *
+ (l.newline + -1) * indented_block
+local sphinx_block = #prefix * token('code_block', starts_line(code_block))
+
+-- Substitution definitions.
+local substitution = #prefix *
+ token('substitution',
+ starts_line(prefix * l.delimited_range('|') *
+ l.space^1 * word * '::' * l.space))
+
+-- Comments.
+local line_comment = prefix * l.nonnewline^0
+local bprefix = any_indent * '..'
+local block_comment = bprefix * l.newline * indented_block
+local comment = #bprefix *
+ token(l.COMMENT, starts_line(line_comment + block_comment))
+
+-- Inline markup.
+local em = token('em', l.delimited_range('*'))
+local strong = token('strong', ('**' * (l.any - '**')^0 * P('**')^-1))
+local role = token('role', ':' * word * ':' * (word * ':')^-1)
+local interpreted = role^-1 * token('interpreted', l.delimited_range('`')) *
+ role^-1
+local inline_literal = token('inline_literal',
+ '``' * (l.any - '``')^0 * P('``')^-1)
+local link_ref = token('link',
+ (word + l.delimited_range('`')) * '_' * P('_')^-1 +
+ '_' * l.delimited_range('`'))
+local footnote_ref = token('footnote', footnote_label * '_')
+local citation_ref = token('citation', citation_label * '_')
+local substitution_ref = token('substitution', l.delimited_range('|', true) *
+ ('_' * P('_')^-1)^-1)
+local link = token('link', l.alpha * (l.alnum + S('-.'))^1 * ':' *
+ (l.alnum + S('/.+-%@'))^1)
+local inline_markup = (strong + em + inline_literal + link_ref + interpreted +
+ footnote_ref + citation_ref + substitution_ref + link) *
+ -l.alnum
+
+-- Other.
+local non_space = token(l.DEFAULT, l.alnum * (l.any - l.space)^0)
+local escape = token(l.DEFAULT, '\\' * l.any)
+
+M._rules = {
+ {'literal_block', literal_block},
+ {'list', list},
+ {'markup_block', markup_block},
+ {'code_block', sphinx_block},
+ {'directive', directive},
+ {'substitution', substitution},
+ {'comment', comment},
+ {'title', title},
+ {'line_block_char', line_block_char},
+ {'whitespace', ws},
+ {'inline_markup', inline_markup},
+ {'non_space', non_space},
+ {'escape', escape}
+}
+
+M._tokenstyles = {
+ list = l.STYLE_TYPE,
+ literal_block = l.STYLE_EMBEDDED..',eolfilled',
+ footnote_block = l.STYLE_LABEL,
+ citation_block = l.STYLE_LABEL,
+ link_block = l.STYLE_LABEL,
+ directive = l.STYLE_KEYWORD,
+ sphinx_directive = l.STYLE_KEYWORD..',bold',
+ unknown_directive = l.STYLE_KEYWORD..',italics',
+ code_block = l.STYLE_EMBEDDED..',eolfilled',
+ substitution = l.STYLE_VARIABLE,
+ strong = 'bold',
+ em = 'italics',
+ role = l.STYLE_CLASS,
+ interpreted = l.STYLE_STRING,
+ inline_literal = l.STYLE_EMBEDDED,
+ link = 'underlined',
+ footnote = 'underlined',
+ citation = 'underlined',
+}
+
+local sphinx_levels = {
+ ['#'] = 0, ['*'] = 1, ['='] = 2, ['-'] = 3, ['^'] = 4, ['"'] = 5
+}
+
+-- Section-based folding.
+M._fold = function(text, start_pos, start_line, start_level)
+ local folds, line_starts = {}, {}
+ for pos in (text..'\n'):gmatch('().-\r?\n') do
+ line_starts[#line_starts + 1] = pos
+ end
+ local style_at, CONSTANT, level = l.style_at, l.CONSTANT, start_level
+ local sphinx = l.property_int['fold.by.sphinx.convention'] > 0
+ local FOLD_BASE = l.FOLD_BASE
+ local FOLD_HEADER, FOLD_BLANK = l.FOLD_HEADER, l.FOLD_BLANK
+ for i = 1, #line_starts do
+ local pos, next_pos = line_starts[i], line_starts[i + 1]
+ local c = text:sub(pos, pos)
+ local line_num = start_line + i - 1
+ folds[line_num] = level
+ if style_at[start_pos + pos] == CONSTANT and c:find('^[^%w%s]') then
+ local sphinx_level = FOLD_BASE + (sphinx_levels[c] or #sphinx_levels)
+ level = not sphinx and level - 1 or sphinx_level
+ if level < FOLD_BASE then level = FOLD_BASE end
+ folds[line_num - 1], folds[line_num] = level, level + FOLD_HEADER
+ level = (not sphinx and level or sphinx_level) + 1
+ elseif c == '\r' or c == '\n' then
+ folds[line_num] = level + FOLD_BLANK
+ end
+ end
+ return folds
+end
+
+l.property['fold.by.sphinx.convention'] = '0'
+
+--[[ Embedded languages.
+local bash = l.load('bash')
+local bash_indent_level
+local start_rule = #(prefix * 'code-block' * '::' * l.space^1 * 'bash' *
+ (l.newline + -1)) * sphinx_directive *
+ token('bash_begin', P(function(input, index)
+ bash_indent_level = #input:match('^([ \t]*)', index)
+ return index
+ end))]]
+
+return M
diff --git a/lexers/rexx.lua b/lexers/rexx.lua
new file mode 100644
index 0000000..6e13dd3
--- /dev/null
+++ b/lexers/rexx.lua
@@ -0,0 +1,97 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Rexx LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'rexx'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local line_comment = '--' * l.nonnewline_esc^0
+local block_comment = l.nested_pair('/*', '*/')
+local comment = token(l.COMMENT, line_comment + block_comment)
+
+-- Strings.
+local sq_str = l.delimited_range("'", true)
+local dq_str = l.delimited_range('"', true)
+local string = token(l.STRING, sq_str + dq_str)
+
+-- Numbers.
+local number = token(l.NUMBER, l.float + l.integer)
+
+-- Preprocessor.
+local preproc = token(l.PREPROCESSOR, l.starts_line('#') * l.nonnewline^0)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match({
+ 'address', 'arg', 'by', 'call', 'class', 'do', 'drop', 'else', 'end', 'exit',
+ 'expose', 'forever', 'forward', 'guard', 'if', 'interpret', 'iterate',
+ 'leave', 'method', 'nop', 'numeric', 'otherwise', 'parse', 'procedure',
+ 'pull', 'push', 'queue', 'raise', 'reply', 'requires', 'return', 'routine',
+ 'result', 'rc', 'say', 'select', 'self', 'sigl', 'signal', 'super', 'then',
+ 'to', 'trace', 'use', 'when', 'while', 'until'
+}, nil, true))
+
+-- Functions.
+local func = token(l.FUNCTION, word_match({
+ 'abbrev', 'abs', 'address', 'arg', 'beep', 'bitand', 'bitor', 'bitxor', 'b2x',
+ 'center', 'changestr', 'charin', 'charout', 'chars', 'compare', 'consition',
+ 'copies', 'countstr', 'c2d', 'c2x', 'datatype', 'date', 'delstr', 'delword',
+ 'digits', 'directory', 'd2c', 'd2x', 'errortext', 'filespec', 'form',
+ 'format', 'fuzz', 'insert', 'lastpos', 'left', 'length', 'linein', 'lineout',
+ 'lines', 'max', 'min', 'overlay', 'pos', 'queued', 'random', 'reverse',
+ 'right', 'sign', 'sourceline', 'space', 'stream', 'strip', 'substr',
+ 'subword', 'symbol', 'time', 'trace', 'translate', 'trunc', 'value', 'var',
+ 'verify', 'word', 'wordindex', 'wordlength', 'wordpos', 'words', 'xrange',
+ 'x2b', 'x2c', 'x2d', 'rxfuncadd', 'rxfuncdrop', 'rxfuncquery', 'rxmessagebox',
+ 'rxwinexec', 'sysaddrexxmacro', 'sysbootdrive', 'sysclearrexxmacrospace',
+ 'syscloseeventsem', 'sysclosemutexsem', 'syscls', 'syscreateeventsem',
+ 'syscreatemutexsem', 'syscurpos', 'syscurstate', 'sysdriveinfo',
+ 'sysdrivemap', 'sysdropfuncs', 'sysdroprexxmacro', 'sysdumpvariables',
+ 'sysfiledelete', 'sysfilesearch', 'sysfilesystemtype', 'sysfiletree',
+ 'sysfromunicode', 'systounicode', 'sysgeterrortext', 'sysgetfiledatetime',
+ 'sysgetkey', 'sysini', 'sysloadfuncs', 'sysloadrexxmacrospace', 'sysmkdir',
+ 'sysopeneventsem', 'sysopenmutexsem', 'sysposteventsem', 'syspulseeventsem',
+ 'sysqueryprocess', 'sysqueryrexxmacro', 'sysreleasemutexsem',
+ 'sysreorderrexxmacro', 'sysrequestmutexsem', 'sysreseteventsem', 'sysrmdir',
+ 'syssaverexxmacrospace', 'syssearchpath', 'syssetfiledatetime',
+ 'syssetpriority', 'syssleep', 'sysstemcopy', 'sysstemdelete', 'syssteminsert',
+ 'sysstemsort', 'sysswitchsession', 'syssystemdirectory', 'systempfilename',
+ 'systextscreenread', 'systextscreensize', 'sysutilversion', 'sysversion',
+ 'sysvolumelabel', 'syswaiteventsem', 'syswaitnamedpipe', 'syswindecryptfile',
+ 'syswinencryptfile', 'syswinver'
+}, '2', true))
+
+-- Identifiers.
+local word = l.alpha * (l.alnum + S('@#$\\.!?_')^0)
+local identifier = token(l.IDENTIFIER, word)
+
+-- Operators.
+local operator = token(l.OPERATOR, S('=!<>+-/\\*%&|^~.,:;(){}'))
+
+M._rules = {
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'function', func},
+ {'identifier', identifier},
+ {'string', string},
+ {'comment', comment},
+ {'number', number},
+ {'preproc', preproc},
+ {'operator', operator},
+}
+
+M._foldsymbols = {
+ _patterns = {'[a-z]+', '/%*', '%*/', '%-%-', ':'},
+ [l.KEYWORD] = {['do'] = 1, select = 1, ['end'] = -1, ['return'] = -1},
+ [l.COMMENT] = {
+ ['/*'] = 1, ['*/'] = -1, ['--'] = l.fold_line_comments('--')
+ },
+ [l.OPERATOR] = {[':'] = 1}
+}
+
+return M
diff --git a/lexers/rhtml.lua b/lexers/rhtml.lua
new file mode 100644
index 0000000..00dee38
--- /dev/null
+++ b/lexers/rhtml.lua
@@ -0,0 +1,29 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- RHTML LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'rhtml'}
+
+-- Embedded in HTML.
+local html = l.load('html')
+
+-- Embedded Ruby.
+local ruby = l.load('rails')
+local ruby_start_rule = token('rhtml_tag', '<%' * P('=')^-1)
+local ruby_end_rule = token('rhtml_tag', '%>')
+l.embed_lexer(html, ruby, ruby_start_rule, ruby_end_rule)
+
+M._tokenstyles = {
+ rhtml_tag = l.STYLE_EMBEDDED
+}
+
+local _foldsymbols = html._foldsymbols
+_foldsymbols._patterns[#_foldsymbols._patterns + 1] = '<%%'
+_foldsymbols._patterns[#_foldsymbols._patterns + 1] = '%%>'
+_foldsymbols.rhtml_tag = {['<%'] = 1, ['%>'] = -1}
+M._foldsymbols = _foldsymbols
+
+return M
diff --git a/lexers/rstats.lua b/lexers/rstats.lua
new file mode 100644
index 0000000..a17bac0
--- /dev/null
+++ b/lexers/rstats.lua
@@ -0,0 +1,53 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- R LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'rstats'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local comment = token(l.COMMENT, '#' * l.nonnewline^0)
+
+-- Strings.
+local sq_str = l.delimited_range("'", true)
+local dq_str = l.delimited_range('"', true)
+local string = token(l.STRING, sq_str + dq_str)
+
+-- Numbers.
+local number = token(l.NUMBER, (l.float + l.integer) * P('i')^-1)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match{
+ 'break', 'else', 'for', 'if', 'in', 'next', 'repeat', 'return', 'switch',
+ 'try', 'while', 'Inf', 'NA', 'NaN', 'NULL', 'FALSE', 'TRUE'
+})
+
+-- Types.
+local type = token(l.TYPE, word_match{
+ 'array', 'character', 'complex', 'data.frame', 'double', 'factor', 'function',
+ 'integer', 'list', 'logical', 'matrix', 'numeric', 'vector'
+})
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.word)
+
+-- Operators.
+local operator = token(l.OPERATOR, S('<->+*/^=.,:;|$()[]{}'))
+
+M._rules = {
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'type', type},
+ {'identifier', identifier},
+ {'string', string},
+ {'comment', comment},
+ {'number', number},
+ {'operator', operator},
+}
+
+return M
diff --git a/lexers/ruby.lua b/lexers/ruby.lua
new file mode 100644
index 0000000..910b906
--- /dev/null
+++ b/lexers/ruby.lua
@@ -0,0 +1,148 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Ruby LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'ruby'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local line_comment = '#' * l.nonnewline_esc^0
+local block_comment = l.starts_line('=begin') * (l.any - l.newline * '=end')^0 *
+ (l.newline * '=end')^-1
+local comment = token(l.COMMENT, block_comment + line_comment)
+
+local delimiter_matches = {['('] = ')', ['['] = ']', ['{'] = '}'}
+local literal_delimitted = P(function(input, index)
+ local delimiter = input:sub(index, index)
+ if not delimiter:find('[%w\r\n\f\t ]') then -- only non alpha-numerics
+ local match_pos, patt
+ if delimiter_matches[delimiter] then
+ -- Handle nested delimiter/matches in strings.
+ local s, e = delimiter, delimiter_matches[delimiter]
+ patt = l.delimited_range(s..e, false, false, true)
+ else
+ patt = l.delimited_range(delimiter)
+ end
+ match_pos = lpeg.match(patt, input, index)
+ return match_pos or #input + 1
+ end
+end)
+
+-- Strings.
+local cmd_str = l.delimited_range('`')
+local lit_cmd = '%x' * literal_delimitted
+local lit_array = '%w' * literal_delimitted
+local sq_str = l.delimited_range("'")
+local dq_str = l.delimited_range('"')
+local lit_str = '%' * S('qQ')^-1 * literal_delimitted
+local heredoc = '<<' * P(function(input, index)
+ local s, e, indented, _, delimiter =
+ input:find('(%-?)(["`]?)([%a_][%w_]*)%2[\n\r\f;]+', index)
+ if s == index and delimiter then
+ local end_heredoc = (#indented > 0 and '[\n\r\f]+ *' or '[\n\r\f]+')
+ local _, e = input:find(end_heredoc..delimiter, e)
+ return e and e + 1 or #input + 1
+ end
+end)
+-- TODO: regex_str fails with `obj.method /patt/` syntax.
+local regex_str = #P('/') * l.last_char_includes('!%^&*([{-=+|:;,?<>~') *
+ l.delimited_range('/', true, false) * S('iomx')^0
+local lit_regex = '%r' * literal_delimitted * S('iomx')^0
+local string = token(l.STRING, (sq_str + dq_str + lit_str + heredoc + cmd_str +
+ lit_cmd + lit_array) * S('f')^-1) +
+ token(l.REGEX, regex_str + lit_regex)
+
+local word_char = l.alnum + S('_!?')
+
+-- Numbers.
+local dec = l.digit^1 * ('_' * l.digit^1)^0 * S('ri')^-1
+local bin = '0b' * S('01')^1 * ('_' * S('01')^1)^0
+local integer = S('+-')^-1 * (bin + l.hex_num + l.oct_num + dec)
+-- TODO: meta, control, etc. for numeric_literal.
+local numeric_literal = '?' * (l.any - l.space) * -word_char
+local number = token(l.NUMBER, l.float * S('ri')^-1 + integer + numeric_literal)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match({
+ 'BEGIN', 'END', 'alias', 'and', 'begin', 'break', 'case', 'class', 'def',
+ 'defined?', 'do', 'else', 'elsif', 'end', 'ensure', 'false', 'for', 'if',
+ 'in', 'module', 'next', 'nil', 'not', 'or', 'redo', 'rescue', 'retry',
+ 'return', 'self', 'super', 'then', 'true', 'undef', 'unless', 'until', 'when',
+ 'while', 'yield', '__FILE__', '__LINE__'
+}, '?!'))
+
+-- Functions.
+local func = token(l.FUNCTION, word_match({
+ 'at_exit', 'autoload', 'binding', 'caller', 'catch', 'chop', 'chop!', 'chomp',
+ 'chomp!', 'eval', 'exec', 'exit', 'exit!', 'fail', 'fork', 'format', 'gets',
+ 'global_variables', 'gsub', 'gsub!', 'iterator?', 'lambda', 'load',
+ 'local_variables', 'loop', 'open', 'p', 'print', 'printf', 'proc', 'putc',
+ 'puts', 'raise', 'rand', 'readline', 'readlines', 'require', 'select',
+ 'sleep', 'split', 'sprintf', 'srand', 'sub', 'sub!', 'syscall', 'system',
+ 'test', 'trace_var', 'trap', 'untrace_var'
+}, '?!')) * -S('.:|')
+
+-- Identifiers.
+local word = (l.alpha + '_') * word_char^0
+local identifier = token(l.IDENTIFIER, word)
+
+-- Variables.
+local global_var = '$' * (word + S('!@L+`\'=~/\\,.;<>_*"$?:') + l.digit + '-' *
+ S('0FadiIKlpvw'))
+local class_var = '@@' * word
+local inst_var = '@' * word
+local variable = token(l.VARIABLE, global_var + class_var + inst_var)
+
+-- Symbols.
+local symbol = token('symbol', ':' * P(function(input, index)
+ if input:sub(index - 2, index - 2) ~= ':' then return index end
+end) * (word_char^1 + sq_str + dq_str))
+
+-- Operators.
+local operator = token(l.OPERATOR, S('!%^&*()[]{}-=+/|:;.,?<>~'))
+
+M._rules = {
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'function', func},
+ {'identifier', identifier},
+ {'comment', comment},
+ {'string', string},
+ {'number', number},
+ {'variable', variable},
+ {'symbol', symbol},
+ {'operator', operator},
+}
+
+M._tokenstyles = {
+ symbol = l.STYLE_CONSTANT
+}
+
+local function disambiguate(text, pos, line, s)
+ return line:sub(1, s - 1):match('^%s*$') and
+ not text:sub(1, pos - 1):match('\\[ \t]*\r?\n$') and 1 or 0
+end
+
+M._foldsymbols = {
+ _patterns = {'%l+', '[%(%)%[%]{}]', '=begin', '=end', '#'},
+ [l.KEYWORD] = {
+ begin = 1, class = 1, def = 1, ['do'] = 1, ['for'] = 1, ['module'] = 1,
+ case = 1,
+ ['if'] = disambiguate, ['while'] = disambiguate,
+ ['unless'] = disambiguate, ['until'] = disambiguate,
+ ['end'] = -1
+ },
+ [l.OPERATOR] = {
+ ['('] = 1, [')'] = -1, ['['] = 1, [']'] = -1, ['{'] = 1, ['}'] = -1
+ },
+ [l.COMMENT] = {
+ ['=begin'] = 1, ['=end'] = -1, ['#'] = l.fold_line_comments('#')
+ }
+}
+
+return M
diff --git a/lexers/rust.lua b/lexers/rust.lua
new file mode 100644
index 0000000..4fef3ae
--- /dev/null
+++ b/lexers/rust.lua
@@ -0,0 +1,87 @@
+-- Copyright 2015 Alejandro Baez (https://twitter.com/a_baez). See LICENSE.
+-- Rust LPeg lexer.
+
+local l = require("lexer")
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'rust'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local line_comment = '//' * l.nonnewline_esc^0
+local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1
+local comment = token(l.COMMENT, line_comment + block_comment)
+
+-- Strings.
+local sq_str = P('L')^-1 * l.delimited_range("'")
+local dq_str = P('L')^-1 * l.delimited_range('"')
+local raw_str = "##" * (l.any - '##')^0 * P("##")^-1
+local string = token(l.STRING, dq_str + raw_str)
+
+-- Numbers.
+local number = token(l.NUMBER, l.float +
+ "0b" * (l.dec_num + "_")^1 + l.integer)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match{
+ 'abstract', 'alignof', 'as', 'become', 'box',
+ 'break', 'const', 'continue', 'crate', 'do',
+ 'else', 'enum', 'extern', 'false', 'final',
+ 'fn', 'for', 'if', 'impl', 'in',
+ 'let', 'loop', 'macro', 'match', 'mod',
+ 'move', 'mut', "offsetof", 'override', 'priv',
+ 'pub', 'pure', 'ref', 'return', 'sizeof',
+ 'static', 'self', 'struct', 'super', 'true',
+ 'trait', 'type', 'typeof', 'unsafe', 'unsized',
+ 'use', 'virtual', 'where', 'while', 'yield'
+})
+
+-- Library types
+local library = token(l.LABEL, l.upper * (l.lower + l.dec_num)^1)
+
+-- syntax extensions
+local extension = l.word^1 * S("!")
+
+local func = token(l.FUNCTION, extension)
+
+-- Types.
+local type = token(l.TYPE, word_match{
+ '()', 'bool', 'isize', 'usize', 'char', 'str',
+ 'u8', 'u16', 'u32', 'u64', 'i8', 'i16', 'i32', 'i64',
+ 'f32','f64',
+})
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.word)
+
+-- Operators.
+local operator = token(l.OPERATOR, S('+-/*%<>!=`^~@&|?#~:;,.()[]{}'))
+
+-- Attributes.
+local attribute = token(l.PREPROCESSOR, "#[" *
+ (l.nonnewline - ']')^0 * P("]")^-1)
+
+M._rules = {
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'function', func},
+ {'library', library},
+ {'type', type},
+ {'identifier', identifier},
+ {'string', string},
+ {'comment', comment},
+ {'number', number},
+ {'operator', operator},
+ {'preprocessor', attribute},
+}
+
+M._foldsymbols = {
+ _patterns = {'%l+', '[{}]', '/%*', '%*/', '//'},
+ [l.COMMENT] = {['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//')},
+ [l.OPERATOR] = {['('] = 1, ['{'] = 1, [')'] = -1, ['}'] = -1}
+}
+
+return M
diff --git a/lexers/sass.lua b/lexers/sass.lua
new file mode 100644
index 0000000..002df27
--- /dev/null
+++ b/lexers/sass.lua
@@ -0,0 +1,32 @@
+-- Copyright 2006-2013 Robert Gieseke. See LICENSE.
+-- Sass CSS preprocessor LPeg lexer.
+-- http://sass-lang.com
+
+local l = require('lexer')
+local token = l.token
+local P, S = lpeg.P, lpeg.S
+
+local M = {_NAME = 'sass'}
+
+-- Line comments.
+local line_comment = token(l.COMMENT, '//' * l.nonnewline^0)
+
+-- Variables.
+local variable = token(l.VARIABLE, '$' * (l.alnum + S('_-'))^1)
+
+-- Mixins.
+local mixin = token('mixin', P('@') * l.word)
+
+local css = l.load('css')
+local _rules = css._rules
+table.insert(_rules, #_rules - 1, {'mixin', mixin})
+table.insert(_rules, #_rules - 1, {'line_comment', line_comment})
+table.insert(_rules, #_rules - 1, {'variable', variable})
+M._rules = _rules
+
+M._tokenstyles = css._tokenstyles
+M._tokenstyles['mixin'] = l.STYLE_FUNCTION
+
+M._foldsymbols = css._foldsymbols
+
+return M
diff --git a/lexers/scala.lua b/lexers/scala.lua
new file mode 100644
index 0000000..d455996
--- /dev/null
+++ b/lexers/scala.lua
@@ -0,0 +1,75 @@
+-- Copyright 2006-2013 JMS. See LICENSE.
+-- Scala LPeg Lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'scala'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local line_comment = '//' * l.nonnewline_esc^0
+local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1
+local comment = token(l.COMMENT, line_comment + block_comment)
+
+-- Strings.
+local symbol = "'" * l.word
+local dq_str = l.delimited_range('"', true)
+local tq_str = '"""' * (l.any - '"""')^0 * P('"""')^-1
+local string = token(l.STRING, tq_str + symbol + dq_str)
+
+-- Numbers.
+local number = token(l.NUMBER, (l.float + l.integer) * S('LlFfDd')^-1)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match{
+ 'abstract', 'case', 'catch', 'class', 'def', 'do', 'else', 'extends', 'false',
+ 'final', 'finally', 'for', 'forSome', 'if', 'implicit', 'import', 'lazy',
+ 'match', 'new', 'null', 'object', 'override', 'package', 'private',
+ 'protected', 'return', 'sealed', 'super', 'this', 'throw', 'trait', 'try',
+ 'true', 'type', 'val', 'var', 'while', 'with', 'yield'
+})
+
+-- Types.
+local type = token(l.TYPE, word_match{
+ 'Array', 'Boolean', 'Buffer', 'Byte', 'Char', 'Collection', 'Double', 'Float',
+ 'Int', 'Iterator', 'LinkedList', 'List', 'Long', 'Map', 'None', 'Option',
+ 'Set', 'Short', 'SortedMap', 'SortedSet', 'String', 'TreeMap', 'TreeSet'
+})
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.word)
+
+-- Operators.
+local operator = token(l.OPERATOR, S('+-/*%<>!=^&|?~:;.()[]{}'))
+
+-- Functions.
+local func = token(l.FUNCTION, l.word) * #P('(')
+
+-- Classes.
+local class_sequence = token(l.KEYWORD, P('class')) * ws^1 *
+ token(l.CLASS, l.word)
+
+M._rules = {
+ {'whitespace', ws},
+ {'class', class_sequence},
+ {'keyword', keyword},
+ {'type', type},
+ {'function', func},
+ {'identifier', identifier},
+ {'string', string},
+ {'comment', comment},
+ {'number', number},
+ {'operator', operator},
+}
+
+M._foldsymbols = {
+ _patterns = {'[{}]', '/%*', '%*/', '//'},
+ [l.OPERATOR] = {['{'] = 1, ['}'] = -1},
+ [l.COMMENT] = {['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//')}
+}
+
+return M
diff --git a/lexers/scheme.lua b/lexers/scheme.lua
new file mode 100644
index 0000000..bec5377
--- /dev/null
+++ b/lexers/scheme.lua
@@ -0,0 +1,104 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Scheme LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'scheme'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local line_comment = ';' * l.nonnewline^0
+local block_comment = '#|' * (l.any - '|#')^0 * P('|#')^-1
+local comment = token(l.COMMENT, line_comment + block_comment)
+
+-- Strings.
+local literal = (P("'") + '#' * S('\\bdox')) * l.word
+local dq_str = l.delimited_range('"')
+local string = token(l.STRING, literal + dq_str)
+
+-- Numbers.
+local number = token(l.NUMBER, P('-')^-1 * l.digit^1 * (S('./') * l.digit^1)^-1)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match({
+ 'and', 'begin', 'case', 'cond', 'cond-expand', 'define', 'define-macro',
+ 'delay', 'do', 'else', 'fluid-let', 'if', 'lambda', 'let', 'let*', 'letrec',
+ 'or', 'quasiquote', 'quote', 'set!',
+}, '-*!'))
+
+-- Functions.
+local func = token(l.FUNCTION, word_match({
+ 'abs', 'acos', 'angle', 'append', 'apply', 'asin', 'assoc', 'assq', 'assv',
+ 'atan', 'car', 'cdr', 'caar', 'cadr', 'cdar', 'cddr', 'caaar', 'caadr',
+ 'cadar', 'caddr', 'cdaar', 'cdadr', 'cddar', 'cdddr',
+ 'call-with-current-continuation', 'call-with-input-file',
+ 'call-with-output-file', 'call-with-values', 'call/cc', 'catch', 'ceiling',
+ 'char->integer', 'char-downcase', 'char-upcase', 'close-input-port',
+ 'close-output-port', 'cons', 'cos', 'current-input-port',
+ 'current-output-port', 'delete-file', 'display', 'dynamic-wind', 'eval',
+ 'exit', 'exact->inexact', 'exp', 'expt', 'file-or-directory-modify-seconds',
+ 'floor', 'force', 'for-each', 'gcd', 'gensym', 'get-output-string', 'getenv',
+ 'imag-part', 'integer->char', 'lcm', 'length', 'list', 'list->string',
+ 'list->vector', 'list-ref', 'list-tail', 'load', 'log', 'magnitude',
+ 'make-polar', 'make-rectangular', 'make-string', 'make-vector', 'map', 'max',
+ 'member', 'memq', 'memv', 'min', 'modulo', 'newline', 'nil', 'not',
+ 'number->string', 'open-input-file', 'open-input-string', 'open-output-file',
+ 'open-output-string', 'peek-char', 'quotient', 'read', 'read-char',
+ 'read-line', 'real-part', 'remainder', 'reverse', 'reverse!', 'round',
+ 'set-car!', 'set-cdr!', 'sin', 'sqrt', 'string', 'string->list',
+ 'string->number', 'string->symbol', 'string-append', 'string-copy',
+ 'string-fill!', 'string-length', 'string-ref', 'string-set!', 'substring',
+ 'symbol->string', 'system', 'tan', 'truncate', 'values', 'vector',
+ 'vector->list', 'vector-fill!', 'vector-length', 'vector-ref', 'vector-set!',
+ 'with-input-from-file', 'with-output-to-file', 'write', 'write-char',
+ 'boolean?', 'char-alphabetic?', 'char-ci<=?', 'char-ci<?', 'char-ci=?',
+ 'char-ci>=?', 'char-ci>?', 'char-lower-case?', 'char-numeric?', 'char-ready?',
+ 'char-upper-case?', 'char-whitespace?', 'char<=?', 'char<?', 'char=?',
+ 'char>=?', 'char>?', 'char?', 'complex?', 'eof-object?', 'eq?', 'equal?',
+ 'eqv?', 'even?', 'exact?', 'file-exists?', 'inexact?', 'input-port?',
+ 'integer?', 'list?', 'negative?', 'null?', 'number?', 'odd?', 'output-port?',
+ 'pair?', 'port?', 'positive?', 'procedure?', 'rational?', 'real?',
+ 'string-ci<=?', 'string-ci<?', 'string-ci=?', 'string-ci>=?', 'string-ci>?',
+ 'string<=?', 'string<?', 'string=?', 'string>=?', 'string>?', 'string?',
+ 'symbol?', 'vector?', 'zero?',
+ '#t', '#f'
+}, '-/<>!?=#'))
+
+-- Identifiers.
+local word = (l.alpha + S('-!?')) * (l.alnum + S('-!?'))^0
+local identifier = token(l.IDENTIFIER, word)
+
+-- Operators.
+local operator = token(l.OPERATOR, S('<>=*/+-`@%:()'))
+
+-- Entity.
+local entity = token('entity', '&' * word)
+
+M._rules = {
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'identifier', identifier},
+ {'string', string},
+ {'comment', comment},
+ {'number', number},
+ {'operator', operator},
+ {'entity', entity},
+}
+
+M._tokenstyles = {
+ entity = l.STYLE_VARIABLE
+}
+
+M._foldsymbols = {
+ _patterns = {'[%(%)%[%]{}]', '#|', '|#', ';'},
+ [l.OPERATOR] = {
+ ['('] = 1, [')'] = -1, ['['] = 1, [']'] = -1, ['{'] = 1, ['}'] = -1
+ },
+ [l.COMMENT] = {['#|'] = 1, ['|#'] = -1, [';'] = l.fold_line_comments(';')}
+}
+
+return M
diff --git a/lexers/smalltalk.lua b/lexers/smalltalk.lua
new file mode 100644
index 0000000..298173f
--- /dev/null
+++ b/lexers/smalltalk.lua
@@ -0,0 +1,62 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Smalltalk LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'smalltalk'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local comment = token(l.COMMENT, l.delimited_range('"', false, true))
+
+-- Strings.
+local sq_str = l.delimited_range("'")
+local literal = '$' * l.word
+local string = token(l.STRING, sq_str + literal)
+
+-- Numbers.
+local number = token(l.NUMBER, l.float + l.integer)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match{
+ 'true', 'false', 'nil', 'self', 'super', 'isNil', 'not', 'Smalltalk',
+ 'Transcript'
+})
+
+-- Types.
+local type = token(l.TYPE, word_match{
+ 'Date', 'Time', 'Boolean', 'True', 'False', 'Character', 'String', 'Array',
+ 'Symbol', 'Integer', 'Object'
+})
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.word)
+
+-- Operators.
+local operator = token(l.OPERATOR, S(':=_<>+-/*!()[]'))
+
+-- Labels.
+local label = token(l.LABEL, '#' * l.word)
+
+M._rules = {
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'type', type},
+ {'identifier', identifier},
+ {'string', string},
+ {'comment', comment},
+ {'number', number},
+ {'label', label},
+ {'operator', operator},
+}
+
+M._foldsymbols = {
+ _patterns = {'[%[%]]'},
+ [l.OPERATOR] = {['['] = 1, [']'] = -1}
+}
+
+return M
diff --git a/lexers/sql.lua b/lexers/sql.lua
new file mode 100644
index 0000000..1ff5a74
--- /dev/null
+++ b/lexers/sql.lua
@@ -0,0 +1,79 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- SQL LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'sql'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local line_comment = (P('--') + '#') * l.nonnewline^0
+local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1
+local comment = token(l.COMMENT, line_comment + block_comment)
+
+-- Strings.
+local sq_str = l.delimited_range("'")
+local dq_str = l.delimited_range('"')
+local bt_str = l.delimited_range('`')
+local string = token(l.STRING, sq_str + dq_str + bt_str)
+
+-- Numbers.
+local number = token(l.NUMBER, l.float + l.integer)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match({
+ 'add', 'all', 'alter', 'analyze', 'and', 'as', 'asc', 'asensitive', 'before',
+ 'between', 'bigint', 'binary', 'blob', 'both', 'by', 'call', 'cascade',
+ 'case', 'change', 'char', 'character', 'check', 'collate', 'column',
+ 'condition', 'connection', 'constraint', 'continue', 'convert', 'create',
+ 'cross', 'current_date', 'current_time', 'current_timestamp', 'current_user',
+ 'cursor', 'database', 'databases', 'day_hour', 'day_microsecond',
+ 'day_minute', 'day_second', 'dec', 'decimal', 'declare', 'default', 'delayed',
+ 'delete', 'desc', 'describe', 'deterministic', 'distinct', 'distinctrow',
+ 'div', 'double', 'drop', 'dual', 'each', 'else', 'elseif', 'enclosed',
+ 'escaped', 'exists', 'exit', 'explain', 'false', 'fetch', 'float', 'for',
+ 'force', 'foreign', 'from', 'fulltext', 'goto', 'grant', 'group', 'having',
+ 'high_priority', 'hour_microsecond', 'hour_minute', 'hour_second', 'if',
+ 'ignore', 'in', 'index', 'infile', 'inner', 'inout', 'insensitive', 'insert',
+ 'int', 'integer', 'interval', 'into', 'is', 'iterate', 'join', 'key', 'keys',
+ 'kill', 'leading', 'leave', 'left', 'like', 'limit', 'lines', 'load',
+ 'localtime', 'localtimestamp', 'lock', 'long', 'longblob', 'longtext', 'loop',
+ 'low_priority', 'match', 'mediumblob', 'mediumint', 'mediumtext', 'middleint',
+ 'minute_microsecond', 'minute_second', 'mod', 'modifies', 'natural', 'not',
+ 'no_write_to_binlog', 'null', 'numeric', 'on', 'optimize', 'option',
+ 'optionally', 'or', 'order', 'out', 'outer', 'outfile', 'precision',
+ 'primary', 'procedure', 'purge', 'read', 'reads', 'real', 'references',
+ 'regexp', 'rename', 'repeat', 'replace', 'require', 'restrict', 'return',
+ 'revoke', 'right', 'rlike', 'schema', 'schemas', 'second_microsecond',
+ 'select', 'sensitive', 'separator', 'set', 'show', 'smallint', 'soname',
+ 'spatial', 'specific', 'sql', 'sqlexception', 'sqlstate', 'sqlwarning',
+ 'sql_big_result', 'sql_calc_found_rows', 'sql_small_result', 'ssl',
+ 'starting', 'straight_join', 'table', 'terminated', 'text', 'then',
+ 'tinyblob', 'tinyint', 'tinytext', 'to', 'trailing', 'trigger', 'true',
+ 'undo', 'union', 'unique', 'unlock', 'unsigned', 'update', 'usage', 'use',
+ 'using', 'utc_date', 'utc_time', 'utc_timestamp', 'values', 'varbinary',
+ 'varchar', 'varcharacter', 'varying', 'when', 'where', 'while', 'with',
+ 'write', 'xor', 'year_month', 'zerofill'
+}, nil, true))
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.word)
+
+-- Operators.
+local operator = token(l.OPERATOR, S(',()'))
+
+M._rules = {
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'identifier', identifier},
+ {'string', string},
+ {'comment', comment},
+ {'number', number},
+ {'operator', operator},
+}
+
+return M
diff --git a/lexers/tcl.lua b/lexers/tcl.lua
new file mode 100644
index 0000000..f014c35
--- /dev/null
+++ b/lexers/tcl.lua
@@ -0,0 +1,59 @@
+-- Copyright 2014-2015 Joshua Krämer. See LICENSE.
+-- Tcl LPeg lexer.
+-- This lexer follows the TCL dodekalogue (http://wiki.tcl.tk/10259).
+-- It is based on the previous lexer by Mitchell.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'tcl'}
+
+-- Whitespace.
+local whitespace = token(l.WHITESPACE, l.space^1)
+
+-- Separator (semicolon).
+local separator = token(l.CLASS, P(';'))
+
+-- Delimiters.
+local braces = token(l.KEYWORD, S('{}'))
+local quotes = token(l.FUNCTION, '"')
+local brackets = token(l.VARIABLE, S('[]'))
+
+-- Argument expander.
+local expander = token(l.LABEL, P('{*}'))
+
+-- Variable substitution.
+local variable = token(l.STRING, '$' * (l.alnum + '_' + P(':')^2)^0)
+
+-- Backslash substitution.
+local backslash = token(l.TYPE, '\\' * ((l.digit * l.digit^-2) +
+ ('x' * l.xdigit^1) + ('u' * l.xdigit * l.xdigit^-3) +
+ ('U' * l.xdigit * l.xdigit^-7) + P(1)))
+
+-- Comment.
+local comment = token(l.COMMENT, '#' * P(function(input, index)
+ local i = index - 2
+ while i > 0 and input:find('^[ \t]', i) do i = i - 1 end
+ if i < 1 or input:find('^[\r\n;]', i) then return index end
+end) * l.nonnewline^0)
+
+M._rules = {
+ {'whitespace', whitespace},
+ {'comment', comment},
+ {'separator', separator},
+ {'expander', expander},
+ {'braces', braces},
+ {'quotes', quotes},
+ {'brackets', brackets},
+ {'variable', variable},
+ {'backslash', backslash},
+}
+
+M._foldsymbols = {
+ _patterns = {'[{}]', '#'},
+ [l.KEYWORD] = {['{'] = 1, ['}'] = -1},
+ [l.COMMENT] = {['#'] = l.fold_line_comments('#')}
+}
+
+return M
diff --git a/lexers/template.txt b/lexers/template.txt
new file mode 100644
index 0000000..4f8c560
--- /dev/null
+++ b/lexers/template.txt
@@ -0,0 +1,20 @@
+-- ? LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = '?'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+M._rules = {
+ {'whitespace', ws},
+}
+
+M._tokenstyles = {
+
+}
+
+return M
diff --git a/lexers/tex.lua b/lexers/tex.lua
new file mode 100644
index 0000000..7eb9e75
--- /dev/null
+++ b/lexers/tex.lua
@@ -0,0 +1,45 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Plain TeX LPeg lexer.
+-- Modified by Robert Gieseke.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'tex'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local comment = token(l.COMMENT, '%' * l.nonnewline^0)
+
+-- TeX environments.
+local environment = token('environment', '\\' * (P('begin') + 'end') * l.word)
+
+-- Commands.
+local command = token(l.KEYWORD, '\\' * (l.alpha^1 + S('#$&~_^%{}')))
+
+-- Operators.
+local operator = token(l.OPERATOR, S('$&#{}[]'))
+
+M._rules = {
+ {'whitespace', ws},
+ {'comment', comment},
+ {'environment', environment},
+ {'keyword', command},
+ {'operator', operator},
+}
+
+M._tokenstyles = {
+ environment = l.STYLE_KEYWORD
+}
+
+M._foldsymbols = {
+ _patterns = {'\\begin', '\\end', '[{}]', '%%'},
+ [l.COMMENT] = {['%'] = l.fold_line_comments('%')},
+ ['environment'] = {['\\begin'] = 1, ['\\end'] = -1},
+ [l.OPERATOR] = {['{'] = 1, ['}'] = -1}
+}
+
+return M
diff --git a/lexers/texinfo.lua b/lexers/texinfo.lua
new file mode 100644
index 0000000..f964424
--- /dev/null
+++ b/lexers/texinfo.lua
@@ -0,0 +1,270 @@
+-- Copyright 2014-2015 stef@ailleurs.land. See LICENSE.
+-- Plain Texinfo version 5.2 LPeg lexer
+-- Freely inspired from Mitchell work and valuable help from him too !
+
+-- Directives are processed (more or less) in the Reference Card Texinfo order
+-- Reference Card page for each directive group is in comment for reference
+
+--[[
+Note: Improving Fold Points use with Texinfo
+
+At the very beginning of your Texinfo file, it could be wised to insert theses
+alias :
+
+@alias startchapter = comment
+@alias endchapter = comment
+
+Then use this to begin each chapter :
+
+@endchapter --------------------------------------------------------------------
+@chapter CHAPTER TITLE
+@startchapter ------------------------------------------------------------------
+
+With the use of Scintilla's `SCI_FOLDALL(SC_FOLDACTION_TOGGLE)` or Textadept's
+`buffer:fold_all(buffer.FOLDACTION_TOGGLE)`, you have then a nice chapter
+folding, useful with large documents.
+]]
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'texinfo'}
+
+-------------------------------------------------------------------------------
+-- Common processing
+-------------------------------------------------------------------------------
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local line_comment = '@c' * l.nonnewline_esc^0
+--local line_comment_long = '@comment' * l.nonnewline_esc^0
+local block_comment = '@ignore' * (l.any - '@end ignore')^0 *
+ P('@end ignore')^-1
+local comment = token(l.COMMENT, line_comment + block_comment)
+
+-- Strings.
+local string = token(l.STRING, l.delimited_range('{}', false, true, true))
+
+-- Numbers.
+local number = token(l.NUMBER, l.float + l.integer)
+
+-- Identifiers
+local identifier = token(l.IDENTIFIER, l.word)
+
+-------------------------------------------------------------------------------
+-- Common keywords
+-------------------------------------------------------------------------------
+
+local keyword_base = word_match({
+ 'end',
+ -- Beginning a Texinfo document (page 1, column 1)
+ 'setfilename', 'settitle', 'insertcopying',
+ -- Beginning a Texinfo document > Internationlization (page 1, column 1)
+ 'documentencoding', 'documentlanguage', 'frenchspacing',
+ -- Beginning a Texinfo document > Info directory specification and HTML
+ -- document description (page 1, column 1)
+ 'dircategory', 'direntry', 'documentdescription',
+ -- Beginning a Texinfo document > Titre pages (page 1, column 1)
+ 'shorttitlepage', 'center', 'titlefont', 'title', 'subtitle', 'author',
+ -- Beginning a Texinfo document > Tables of contents (page 1, column 2)
+ 'shortcontents', 'summarycontents', 'contents', 'setcontentsaftertitlepage',
+ 'setshortcontentsaftertitlepage',
+ -- Nodes (page 1, column 2)
+ 'node', 'top', 'anchor', 'novalidate',
+ -- Menus (page 1, column 2)
+ 'menu', 'detailmenu',
+ -- Cross references > Within the Info system (page 1, column 3)
+ 'xref', 'pxref', 'ref', 'inforef', 'xrefautomaticsectiontitle',
+ -- Cross references > Outside of info (page 1, column 3)
+ 'url', 'cite',
+ -- Marking text > Markup for regular text (page 1, column 3)
+ 'var', 'dfn', 'acronym', 'abbr',
+ -- Marking text > Markup for litteral text (page 1, column 3)
+ 'code', 'file', 'command', 'env', 'option', 'kbd', 'key', 'email',
+ 'indicateurl', 'samp', 'verb',
+ -- Marking text > GUI sequences (page 2, column 1)
+ 'clicksequence', 'click', 'clickstyle', 'arrow',
+ -- Marking text > Math (page 2, column 1)
+ 'math', 'minus', 'geq', 'leq',
+ -- Marking text > Explicit font selection (page 2, column 1)
+ 'sc', 'r', 'i', 'slanted', 'b', 'sansserif', 't',
+ -- Block environments (page 2, column 1)
+ 'noindent', 'indent', 'exdent',
+ -- Block environments > Normally filled displays using regular text fonts
+ -- (page 2, column 1)
+ 'quotation', 'smallquotation', 'indentedblock', 'smallindentedblock',
+ 'raggedright',
+ -- Block environments > Line-for-line displays using regular test fonts (page
+ -- 2, column 2)
+ 'format', 'smallformat', 'display', 'smalldisplay', 'flushleft', 'flushright',
+ -- Block environments > Displays using fixed-width fonts (page 2, column 2)
+ 'lisp', 'smalllisp', 'verbatim',
+ -- List and tables (page 2, column 2)
+ 'table', 'ftable', 'vtable', 'tab', 'item', 'itemx', 'headitem',
+ 'headitemfont', 'asis',
+ -- Indices (page 2, column 3)
+ 'cindex', 'findex', 'vindex', 'kindex', 'pindex', 'tindex', 'defcodeindex',
+ 'syncodeindex', 'synindex', 'printindex',
+ -- Insertions within a paragraph > Characters special to Texinfo (page 2,
+ -- column 3)
+ '@', '{', '}', 'backslashcar', 'comma', 'hashcar', ':', '.', '?', '!', 'dmn',
+ -- Insertions within a paragraph > Accents (page 3, column 1)
+ -- not implemented
+ -- Insertions within a paragraph > Non-English characters (page 3, column 1)
+ -- not implemented
+ -- Insertions within a paragraph > Other text characters an logos (page 3,
+ -- column 1)
+ 'bullet', 'dots', 'enddots', 'euro', 'pounds', 'textdegree', 'copyright',
+ 'registeredsymbol', 'TeX', 'LaTeX', 'today',
+ 'guillemetleft', 'guillementright', 'guillemotleft', 'guillemotright',
+ -- Insertions within a paragraph > Glyphs for code examples (page 3, column 2)
+ 'equiv', 'error', 'expansion', 'point', 'print', 'result',
+ -- Making and preventing breaks (page 3, column 2)
+ '*', '/', '-', 'hyphenation', 'tie', 'w', 'refill',
+ -- Vertical space (page 3, column 2)
+ 'sp', 'page', 'need', 'group', 'vskip'
+ -- Definition commands (page 3, column 2)
+ -- not implemented
+}, nil, true)
+
+local keyword = token(l.KEYWORD, ('@end' * l.space^1 + '@') * keyword_base)
+
+-------------------------------------------------------------------------------
+-- Chapter structuring Keywords
+-------------------------------------------------------------------------------
+
+local chapters_base = word_match({
+ -- Chapter structuring (page 1, column 2)
+ 'lowersections', 'raisesections', 'part',
+ -- Chapter structuring > Numbered, included in contents (page 1, column 2)
+ 'chapter', 'centerchap',
+ -- Chapter structuring > Context-dependent, included in contents (page 1,
+ -- column 2)
+ 'section', 'subsection', 'subsubsection',
+ -- Chapter structuring > Unumbered, included in contents (page 1, column 2)
+ 'unnumbered', 'unnumberedsec', 'unnumberedsubsec', 'unnumberedsubsection',
+ 'unnumberedsubsubsec', 'unnumberedsubsubsection',
+ -- Chapter structuring > Letter and numbered, included in contents (page 1,
+ -- column 2)
+ 'appendix', 'appendixsec', 'appendixsection', 'appendixsubsec',
+ 'appendixsubsection', 'appendixsubsubsec', 'appendixsubsubsection',
+ -- Chapter structuring > Unumbered, not included in contents, no new page
+ -- (page 1, column 3)
+ 'chapheading', 'majorheading', 'heading', 'subheading', 'subsubheading'
+}, nil, true)
+
+local chapters = token('chapters', ('@end' * l.space^1 + '@') * chapters_base)
+
+-------------------------------------------------------------------------------
+-- Directives Keywords
+-------------------------------------------------------------------------------
+
+local directives_base = word_match({
+ 'end',
+ -- Custom keywords for chapter folding
+ 'startchapter', 'endchapter',
+ -- List and tables (page 2, column 2)
+ 'itemize', 'enumerate',
+ -- Beginning a Texinfo document (page 1, column 1)
+ 'titlepage', 'copying',
+ -- Block environments (page 2, column 1)
+ 'cartouche',
+ -- Block environments > Displays using fixed-width fonts (page 2, column 2)
+ 'example', 'smallexample',
+ -- List and tables (page 2, column 2)
+ 'multitable',
+ -- Floating Displays (page 2, column 3)
+ 'float', 'listoffloats', 'caption', 'shortcaption', 'image',
+ -- Floating Displays > Footnotes (page 2, column 3)
+ 'footnote', 'footnotestyle',
+ -- Conditionally (in)visible text > Output formats (page 3, column 3)
+ 'ifdocbook', 'ifhtml', 'ifinfo', 'ifplaintext', 'iftex', 'ifxml',
+ 'ifnotdocbook', 'ifnothtml', 'ifnotplaintext',
+ 'ifnottex', 'ifnotxml', 'ifnotinfo', 'inlinefmt', 'inlinefmtifelse',
+ -- Conditionally (in)visible text > Raw formatter text (page 4, column 1)
+ 'docbook', 'html', 'tex', 'xml', 'inlineraw',
+ -- Conditionally (in)visible text > Documents variables (page 4, column 1)
+ 'set', 'clear', 'value', 'ifset', 'ifclear', 'inlineifset', 'inlineifclear',
+ -- Conditionally (in)visible text > Testing for commands (page 4, column 1)
+ 'ifcommanddefined', 'ifcommandnotdefined', 'end',
+ -- Defining new Texinfo commands (page 4, column 1)
+ 'alias', 'macro', 'unmacro', 'definfounclose',
+ -- File inclusion (page 4, column 1)
+ 'include', 'verbatiminclude',
+ -- Formatting and headers footers for TeX (page 4, column 1)
+ 'allowcodebreaks', 'finalout', 'fonttextsize',
+ -- Formatting and headers footers for TeX > paper size (page 4, column 2)
+ 'smallbook', 'afourpaper', 'afivepaper', 'afourlatex', 'afourwide',
+ 'pagesizes',
+ -- Formatting and headers footers for TeX > Page headers and footers (page 4,
+ -- column 2)
+ -- not implemented
+ -- Document preferences (page 4, column 2)
+ -- not implemented
+ -- Ending a Texinfo document (page 4, column 2)
+ 'bye'
+}, nil, case_insensitive_tags)
+
+local directives = token('directives',
+ ('@end' * l.space^1 + '@') * directives_base)
+
+-------------------------------------------------------------------------------
+-- Special keywords
+-------------------------------------------------------------------------------
+
+-- Italics
+local emph = token('emph', '@emph' * l.delimited_range('{}', false, true, true))
+
+-- Bold
+local strong = token('strong',
+ '@strong' * l.delimited_range('{}', false, true, true))
+
+-------------------------------------------------------------------------------
+-- Rules, Tokenstyles and Folds points
+-------------------------------------------------------------------------------
+
+M._rules = {
+ {'whitespace', ws},
+ {'directives', directives},
+ {'chapters', chapters},
+ {'keyword', keyword},
+ {'emph', emph},
+ {'strong', strong},
+ {'identifier', identifier},
+ {'string', string},
+ {'number', number},
+ {'comment', comment},
+}
+
+M._tokenstyles = {
+ directives = l.STYLE_FUNCTION,
+ chapters = l.STYLE_CLASS,
+ emph = l.STYLE_STRING..',italics',
+ strong = l.STYLE_STRING..',bold'
+}
+
+M._foldsymbols = {
+ _patterns = {'@end %l+', '@%l+'},
+ directives = {
+ ['@titlepage'] = 1, ['@end titlepage'] = -1,
+ ['@copying'] = 1, ['@end copying'] = -1,
+
+ ['@ifset'] = 1, ['@end ifset'] = -1,
+ ['@tex'] = 1, ['@end tex'] = -1,
+
+ ['@itemize'] = 1, ['@end itemize'] = -1,
+ ['@enumerate'] = 1, ['@end enumerate'] = -1,
+ ['@multitable'] = 1, ['@end multitable'] = -1,
+
+ ['@example'] = 1, ['@end example'] = -1,
+ ['@smallexample'] = 1, ['@end smallexample'] = -1,
+ ['@cartouche'] = 1, ['@end cartouche'] = -1,
+
+ ['@startchapter'] = 1, ['@endchapter'] = -1,
+ }
+}
+
+return M
diff --git a/lexers/text.lua b/lexers/text.lua
new file mode 100644
index 0000000..6331f93
--- /dev/null
+++ b/lexers/text.lua
@@ -0,0 +1,6 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Text LPeg lexer.
+
+local M = {_NAME = 'text'}
+
+return M
diff --git a/lexers/toml.lua b/lexers/toml.lua
new file mode 100644
index 0000000..d7d2960
--- /dev/null
+++ b/lexers/toml.lua
@@ -0,0 +1,68 @@
+-- Copyright 2015 Alejandro Baez (https://twitter.com/a_baez). See LICENSE.
+-- TOML LPeg lexer.
+
+local l = require("lexer")
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'toml'}
+
+-- Whitespace
+local indent = #l.starts_line(S(' \t')) *
+ (token(l.WHITESPACE, ' ') + token('indent_error', '\t'))^1
+local ws = token(l.WHITESPACE, S(' \t')^1 + l.newline^1)
+
+-- Comments.
+local comment = token(l.COMMENT, '#' * l.nonnewline^0)
+
+-- Strings.
+local string = token(l.STRING, l.delimited_range("'") + l.delimited_range('"'))
+
+-- Numbers.
+local number = token(l.NUMBER, l.float + l.integer)
+
+-- Datetime.
+local ts = token('timestamp', l.digit * l.digit * l.digit * l.digit * -- year
+ '-' * l.digit * l.digit^-1 * -- month
+ '-' * l.digit * l.digit^-1 * -- day
+ ((S(' \t')^1 + S('tT'))^-1 * -- separator
+ l.digit * l.digit^-1 * -- hour
+ ':' * l.digit * l.digit * -- minute
+ ':' * l.digit * l.digit * -- second
+ ('.' * l.digit^0)^-1 * -- fraction
+ ('Z' + -- timezone
+ S(' \t')^0 * S('-+') * l.digit * l.digit^-1 *
+ (':' * l.digit * l.digit)^-1)^-1)^-1)
+
+-- kewwords.
+local keyword = token(l.KEYWORD, word_match{
+ 'true', 'false'
+})
+
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.word)
+
+-- Operators.
+local operator = token(l.OPERATOR, S('#=+-,.{}[]()'))
+
+M._rules = {
+ {'indent', indent},
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'identifier', identifier},
+ {'operator', operator},
+ {'string', string},
+ {'comment', comment},
+ {'number', number},
+ {'timestamp', ts},
+}
+
+M._tokenstyles = {
+ indent_error = 'back:%(color.red)',
+ timestamp = l.STYLE_NUMBER,
+}
+
+M._FOLDBYINDENTATION = true
+
+return M
diff --git a/lexers/vala.lua b/lexers/vala.lua
new file mode 100644
index 0000000..d9a0a3a
--- /dev/null
+++ b/lexers/vala.lua
@@ -0,0 +1,75 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Vala LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'vala'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local line_comment = '//' * l.nonnewline_esc^0
+local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1
+local comment = token(l.COMMENT, line_comment + block_comment)
+
+-- Strings.
+local sq_str = l.delimited_range("'", true)
+local dq_str = l.delimited_range('"', true)
+local tq_str = '"""' * (l.any - '"""')^0 * P('"""')^-1
+local ml_str = '@' * l.delimited_range('"', false, true)
+local string = token(l.STRING, tq_str + sq_str + dq_str + ml_str)
+
+-- Numbers.
+local number = token(l.NUMBER, (l.float + l.integer) * S('uUlLfFdDmM')^-1)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match{
+ 'class', 'delegate', 'enum', 'errordomain', 'interface', 'namespace',
+ 'signal', 'struct', 'using',
+ -- Modifiers.
+ 'abstract', 'const', 'dynamic', 'extern', 'inline', 'out', 'override',
+ 'private', 'protected', 'public', 'ref', 'static', 'virtual', 'volatile',
+ 'weak',
+ -- Other.
+ 'as', 'base', 'break', 'case', 'catch', 'construct', 'continue', 'default',
+ 'delete', 'do', 'else', 'ensures', 'finally', 'for', 'foreach', 'get', 'if',
+ 'in', 'is', 'lock', 'new', 'requires', 'return', 'set', 'sizeof', 'switch',
+ 'this', 'throw', 'throws', 'try', 'typeof', 'value', 'var', 'void', 'while',
+ -- Etc.
+ 'null', 'true', 'false'
+})
+
+-- Types.
+local type = token(l.TYPE, word_match{
+ 'bool', 'char', 'double', 'float', 'int', 'int8', 'int16', 'int32', 'int64',
+ 'long', 'short', 'size_t', 'ssize_t', 'string', 'uchar', 'uint', 'uint8',
+ 'uint16', 'uint32', 'uint64', 'ulong', 'unichar', 'ushort'
+})
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.word)
+
+-- Operators.
+local operator = token(l.OPERATOR, S('+-/*%<>!=^&|?~:;.()[]{}'))
+
+M._rules = {
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'type', type},
+ {'identifier', identifier},
+ {'string', string},
+ {'comment', comment},
+ {'number', number},
+ {'operator', operator},
+}
+
+M._foldsymbols = {
+ _patterns = {'[{}]', '/%*', '%*/', '//'},
+ [l.OPERATOR] = {['{'] = 1, ['}'] = -1},
+ [l.COMMENT] = {['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//')}
+}
+
+return M
diff --git a/lexers/vb.lua b/lexers/vb.lua
new file mode 100644
index 0000000..6541a92
--- /dev/null
+++ b/lexers/vb.lua
@@ -0,0 +1,64 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- VisualBasic LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'vb'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local comment = token(l.COMMENT, (P("'") + word_match({'rem'}, nil, true)) *
+ l.nonnewline^0)
+
+-- Strings.
+local string = token(l.STRING, l.delimited_range('"', true, true))
+
+-- Numbers.
+local number = token(l.NUMBER, (l.float + l.integer) * S('LlUuFf')^-2)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match({
+ -- Control.
+ 'If', 'Then', 'Else', 'ElseIf', 'While', 'Wend', 'For', 'To', 'Each',
+ 'In', 'Step', 'Case', 'Select', 'Return', 'Continue', 'Do',
+ 'Until', 'Loop', 'Next', 'With', 'Exit',
+ -- Operators.
+ 'Mod', 'And', 'Not', 'Or', 'Xor', 'Is',
+ -- Storage types.
+ 'Call', 'Class', 'Const', 'Dim', 'ReDim', 'Preserve', 'Function', 'Sub',
+ 'Property', 'End', 'Set', 'Let', 'Get', 'New', 'Randomize', 'Option',
+ 'Explicit', 'On', 'Error', 'Execute',
+ -- Storage modifiers.
+ 'Private', 'Public', 'Default',
+ -- Constants.
+ 'Empty', 'False', 'Nothing', 'Null', 'True'
+}, nil, true))
+
+-- Types.
+local type = token(l.TYPE, word_match({
+ 'Boolean', 'Byte', 'Char', 'Date', 'Decimal', 'Double', 'Long', 'Object',
+ 'Short', 'Single', 'String'
+}, nil, true))
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.word)
+
+-- Operators.
+local operator = token(l.OPERATOR, S('=><+-*^&:.,_()'))
+
+M._rules = {
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'type', type},
+ {'comment', comment},
+ {'identifier', identifier},
+ {'string', string},
+ {'number', number},
+ {'operator', operator},
+}
+
+return M
diff --git a/lexers/vbscript.lua b/lexers/vbscript.lua
new file mode 100644
index 0000000..f4d9f36
--- /dev/null
+++ b/lexers/vbscript.lua
@@ -0,0 +1,63 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- VisualBasic LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'vbscript'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local comment = token(l.COMMENT, (P("'") + word_match({'rem'}, nil, true)) * l.nonnewline^0)
+
+-- Strings.
+local string = token(l.STRING, l.delimited_range('"', true, true))
+
+-- Numbers.
+local number = token(l.NUMBER, (l.float + l.integer) * S('LlUuFf')^-2)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match({
+ -- Control.
+ 'If', 'Then', 'Else', 'ElseIf', 'While', 'Wend', 'For', 'To', 'Each',
+ 'In', 'Step', 'Case', 'Select', 'Return', 'Continue', 'Do',
+ 'Until', 'Loop', 'Next', 'With', 'Exit',
+ -- Operators.
+ 'Mod', 'And', 'Not', 'Or', 'Xor', 'Is',
+ -- Storage types.
+ 'Call', 'Class', 'Const', 'Dim', 'ReDim', 'Preserve', 'Function', 'Sub',
+ 'Property', 'End', 'Set', 'Let', 'Get', 'New', 'Randomize', 'Option',
+ 'Explicit', 'On', 'Error', 'Execute',
+ -- Storage modifiers.
+ 'Private', 'Public', 'Default',
+ -- Constants.
+ 'Empty', 'False', 'Nothing', 'Null', 'True'
+}, nil, true))
+
+-- Types.
+local type = token(l.TYPE, word_match({
+ 'Boolean', 'Byte', 'Char', 'Date', 'Decimal', 'Double', 'Long', 'Object',
+ 'Short', 'Single', 'String'
+}, nil, true))
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.word)
+
+-- Operators.
+local operator = token(l.OPERATOR, S('=><+-*^&:.,_()'))
+
+M._rules = {
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'type', type},
+ {'comment', comment},
+ {'identifier', identifier},
+ {'string', string},
+ {'number', number},
+ {'operator', operator},
+}
+
+return M
diff --git a/lexers/vcard.lua b/lexers/vcard.lua
new file mode 100644
index 0000000..2057451
--- /dev/null
+++ b/lexers/vcard.lua
@@ -0,0 +1,97 @@
+-- Copyright (c) 2015 Piotr Orzechowski [drzewo.org]. See LICENSE.
+-- vCard 2.1, 3.0 and 4.0 LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'vcard'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Required properties.
+local required_property = token(l.KEYWORD, word_match({
+ 'BEGIN', 'END', 'FN', 'N' --[[ Not required in v4.0. ]], 'VERSION'
+}, nil, true)) * #P(':')
+
+-- Supported properties.
+local supported_property = token(l.TYPE, word_match({
+ 'ADR', 'AGENT' --[[ Not supported in v4.0. ]],
+ 'ANNIVERSARY' --[[ Supported in v4.0 only. ]], 'BDAY',
+ 'CALADRURI' --[[ Supported in v4.0 only. ]],
+ 'CALURI' --[[ Supported in v4.0 only. ]], 'CATEGORIES',
+ 'CLASS' --[[ Supported in v3.0 only. ]],
+ 'CLIENTPIDMAP' --[[ Supported in v4.0 only. ]], 'EMAIL', 'END',
+ 'FBURL' --[[ Supported in v4.0 only. ]],
+ 'GENDER' --[[ Supported in v4.0 only. ]], 'GEO',
+ 'IMPP' --[[ Not supported in v2.1. ]], 'KEY',
+ 'KIND' --[[ Supported in v4.0 only. ]],
+ 'LABEL' --[[ Not supported in v4.0. ]],
+ 'LANG' --[[ Supported in v4.0 only. ]], 'LOGO',
+ 'MAILER' --[[ Not supported in v4.0. ]],
+ 'MEMBER' --[[ Supported in v4.0 only. ]],
+ 'NAME' --[[ Supported in v3.0 only. ]],
+ 'NICKNAME' --[[ Not supported in v2.1. ]], 'NOTE', 'ORG', 'PHOTO',
+ 'PRODID' --[[ Not supported in v2.1. ]],
+ 'PROFILE' --[[ Not supported in v4.0. ]],
+ 'RELATED' --[[ Supported in v4.0 only. ]], 'REV', 'ROLE',
+ 'SORT-STRING' --[[ Not supported in v4.0. ]], 'SOUND', 'SOURCE', 'TEL',
+ 'TITLE', 'TZ', 'UID', 'URL', 'XML' --[[ Supported in v4.0 only. ]]
+}, nil, true)) * #S(':;')
+
+local identifier = l.alpha^1 * l.digit^0 * (P('-') * l.alnum^1)^0
+
+-- Extension.
+local extension = token(l.TYPE,
+ l.starts_line(S('xX') * P('-') * identifier * #S(':;')))
+
+-- Parameter.
+local parameter = token(l.IDENTIFIER, l.starts_line(identifier * #S(':='))) +
+ token(l.STRING, identifier) * #S(':=')
+
+-- Operators.
+local operator = token(l.OPERATOR, S('.:;='))
+
+-- Group and property.
+local group_sequence = token(l.CONSTANT, l.starts_line(identifier)) *
+ token(l.OPERATOR, P('.')) *
+ (required_property + supported_property +
+ l.token(l.TYPE, S('xX') * P('-') * identifier) *
+ #S(':;'))
+-- Begin vCard, end vCard.
+local begin_sequence = token(l.KEYWORD, P('BEGIN')) *
+ token(l.OPERATOR, P(':')) * token(l.COMMENT, P('VCARD'))
+local end_sequence = token(l.KEYWORD, P('END')) * token(l.OPERATOR, P(':')) *
+ token(l.COMMENT, P('VCARD'))
+
+-- vCard version (in v3.0 and v4.0 must appear immediately after BEGIN:VCARD).
+local version_sequence = token(l.KEYWORD, P('VERSION')) *
+ token(l.OPERATOR, P(':')) *
+ token(l.CONSTANT, l.digit^1 * (P('.') * l.digit^1)^-1)
+
+-- Data.
+local data = token(l.IDENTIFIER, l.any)
+
+-- Rules.
+M._rules = {
+ {'whitespace', ws},
+ {'begin_sequence', begin_sequence},
+ {'end_sequence', end_sequence},
+ {'version_sequence', version_sequence},
+ {'group_sequence', group_sequence},
+ {'required_property', required_property},
+ {'supported_property', supported_property},
+ {'extension', extension},
+ {'parameter', parameter},
+ {'operator', operator},
+ {'data', data},
+}
+
+-- Folding.
+M._foldsymbols = {
+ _patterns = {'BEGIN', 'END'},
+ [l.KEYWORD] = {['BEGIN'] = 1, ['END'] = -1}
+}
+
+return M
diff --git a/lexers/verilog.lua b/lexers/verilog.lua
new file mode 100644
index 0000000..bfee3b6
--- /dev/null
+++ b/lexers/verilog.lua
@@ -0,0 +1,101 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Verilog LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'verilog'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local line_comment = '//' * l.nonnewline^0
+local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1
+local comment = token(l.COMMENT, line_comment + block_comment)
+
+-- Strings.
+local string = token(l.STRING, l.delimited_range('"'))
+
+-- Numbers.
+local bin_suffix = S('bB') * S('01_xXzZ')^1
+local oct_suffix = S('oO') * S('01234567_xXzZ')^1
+local dec_suffix = S('dD') * S('0123456789_xXzZ')^1
+local hex_suffix = S('hH') * S('0123456789abcdefABCDEF_xXzZ')^1
+local number = token(l.NUMBER, (l.digit + '_')^1 + "'" *
+ (bin_suffix + oct_suffix + dec_suffix +
+ hex_suffix))
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match({
+ 'always', 'assign', 'begin', 'case', 'casex', 'casez', 'default', 'deassign',
+ 'disable', 'else', 'end', 'endcase', 'endfunction', 'endgenerate',
+ 'endmodule', 'endprimitive', 'endspecify', 'endtable', 'endtask', 'for',
+ 'force', 'forever', 'fork', 'function', 'generate', 'if', 'initial', 'join',
+ 'macromodule', 'module', 'negedge', 'posedge', 'primitive', 'repeat',
+ 'release', 'specify', 'table', 'task', 'wait', 'while',
+ -- Compiler directives.
+ '`include', '`define', '`undef', '`ifdef', '`ifndef', '`else', '`endif',
+ '`timescale', '`resetall', '`signed', '`unsigned', '`celldefine',
+ '`endcelldefine', '`default_nettype', '`unconnected_drive',
+ '`nounconnected_drive', '`protect', '`endprotect', '`protected',
+ '`endprotected', '`remove_gatename', '`noremove_gatename', '`remove_netname',
+ '`noremove_netname', '`expand_vectornets', '`noexpand_vectornets',
+ '`autoexpand_vectornets',
+ -- Signal strengths.
+ 'strong0', 'strong1', 'pull0', 'pull1', 'weak0', 'weak1', 'highz0', 'highz1',
+ 'small', 'medium', 'large'
+}, '`01'))
+
+-- Function.
+local func = token(l.FUNCTION, word_match({
+ '$stop', '$finish', '$time', '$stime', '$realtime', '$settrace',
+ '$cleartrace', '$showscopes', '$showvars', '$monitoron', '$monitoroff',
+ '$random', '$printtimescale', '$timeformat', '$display',
+ -- Built-in primitives.
+ 'and', 'nand', 'or', 'nor', 'xor', 'xnor', 'buf', 'bufif0', 'bufif1', 'not',
+ 'notif0', 'notif1', 'nmos', 'pmos', 'cmos', 'rnmos', 'rpmos', 'rcmos', 'tran',
+ 'tranif0', 'tranif1', 'rtran', 'rtranif0', 'rtranif1', 'pullup', 'pulldown'
+}, '$01'))
+
+-- Types.
+local type = token(l.TYPE, word_match({
+ 'integer', 'reg', 'time', 'realtime', 'defparam', 'parameter', 'event',
+ 'wire', 'wand', 'wor', 'tri', 'triand', 'trior', 'tri0', 'tri1', 'trireg',
+ 'vectored', 'scalared', 'input', 'output', 'inout',
+ 'supply0', 'supply1'
+}, '01'))
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.word)
+
+-- Operators.
+local operator = token(l.OPERATOR, S('=~+-/*<>%&|^~,:;()[]{}'))
+
+M._rules = {
+ {'whitespace', ws},
+ {'number', number},
+ {'keyword', keyword},
+ {'function', func},
+ {'type', type},
+ {'identifier', identifier},
+ {'string', string},
+ {'comment', comment},
+ {'operator', operator},
+}
+
+M._foldsymbols = {
+ _patterns = {'[a-z]+', '[%(%){}]', '/%*', '%*/', '//'},
+ [l.KEYWORD] = {
+ case = 1, casex = 1, casez = 1, endcase = -1, ['function'] = 1,
+ endfunction = -1, fork = 1, join = -1, table = 1, endtable = -1, task = 1,
+ endtask = -1, generate = 1, endgenerate = -1, specify = 1, endspecify = -1,
+ primitive = 1, endprimitive = -1, ['module'] = 1, endmodule = -1, begin = 1,
+ ['end'] = -1
+ },
+ [l.OPERATOR] = {['('] = 1, [')'] = -1, ['{'] = 1, ['}'] = -1},
+ [l.COMMENT] = {['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//')}
+}
+
+return M
diff --git a/lexers/vhdl.lua b/lexers/vhdl.lua
new file mode 100644
index 0000000..4cad5f7
--- /dev/null
+++ b/lexers/vhdl.lua
@@ -0,0 +1,89 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- VHDL LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'vhdl'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local comment = token(l.COMMENT, '--' * l.nonnewline^0)
+
+-- Strings.
+local sq_str = l.delimited_range("'", true, true)
+local dq_str = l.delimited_range('"', true)
+local string = token(l.STRING, sq_str + dq_str)
+
+-- Numbers.
+local number = token(l.NUMBER, l.float + l.integer)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match{
+ 'access', 'after', 'alias', 'all', 'architecture', 'array', 'assert',
+ 'attribute', 'begin', 'block', 'body', 'buffer', 'bus', 'case', 'component',
+ 'configuration', 'constant', 'disconnect', 'downto', 'else', 'elsif', 'end',
+ 'entity', 'exit', 'file', 'for', 'function', 'generate', 'generic', 'group',
+ 'guarded', 'if', 'impure', 'in', 'inertial', 'inout', 'is', 'label',
+ 'library', 'linkage', 'literal', 'loop', 'map', 'new', 'next', 'null', 'of',
+ 'on', 'open', 'others', 'out', 'package', 'port', 'postponed', 'procedure',
+ 'process', 'pure', 'range', 'record', 'register', 'reject', 'report',
+ 'return', 'select', 'severity', 'signal', 'shared', 'subtype', 'then', 'to',
+ 'transport', 'type', 'unaffected', 'units', 'until', 'use', 'variable',
+ 'wait', 'when', 'while', 'with', 'note', 'warning', 'error', 'failure',
+ 'and', 'nand', 'or', 'nor', 'xor', 'xnor', 'rol', 'ror', 'sla', 'sll', 'sra',
+ 'srl', 'mod', 'rem', 'abs', 'not',
+ 'false', 'true'
+})
+
+-- Functions.
+local func = token(l.FUNCTION, word_match{
+ 'rising_edge', 'shift_left', 'shift_right', 'rotate_left', 'rotate_right',
+ 'resize', 'std_match', 'to_integer', 'to_unsigned', 'to_signed', 'unsigned',
+ 'signed', 'to_bit', 'to_bitvector', 'to_stdulogic', 'to_stdlogicvector',
+ 'to_stdulogicvector'
+})
+
+-- Types.
+local type = token(l.TYPE, word_match{
+ 'bit', 'bit_vector', 'character', 'boolean', 'integer', 'real', 'time',
+ 'string', 'severity_level', 'positive', 'natural', 'signed', 'unsigned',
+ 'line', 'text', 'std_logic', 'std_logic_vector', 'std_ulogic',
+ 'std_ulogic_vector', 'qsim_state', 'qsim_state_vector', 'qsim_12state',
+ 'qsim_12state_vector', 'qsim_strength', 'mux_bit', 'mux_vectory', 'reg_bit',
+ 'reg_vector', 'wor_bit', 'wor_vector'
+})
+
+-- Constants.
+local constant = token(l.CONSTANT, word_match{
+ 'EVENT', 'BASE', 'LEFT', 'RIGHT', 'LOW', 'HIGH', 'ASCENDING', 'IMAGE',
+ 'VALUE', 'POS', 'VAL', 'SUCC', 'VAL', 'POS', 'PRED', 'VAL', 'POS', 'LEFTOF',
+ 'RIGHTOF', 'LEFT', 'RIGHT', 'LOW', 'HIGH', 'RANGE', 'REVERSE', 'LENGTH',
+ 'ASCENDING', 'DELAYED', 'STABLE', 'QUIET', 'TRANSACTION', 'EVENT', 'ACTIVE',
+ 'LAST', 'LAST', 'LAST', 'DRIVING', 'DRIVING', 'SIMPLE', 'INSTANCE', 'PATH'
+})
+
+-- Identifiers.
+local word = (l.alpha + "'") * (l.alnum + "_" + "'")^1
+local identifier = token(l.IDENTIFIER, word)
+
+-- Operators.
+local operator = token(l.OPERATOR, S('=/!:;<>+-/*%&|^~()'))
+
+M._rules = {
+ {'whitespace', ws},
+ {'keyword', keyword},
+ {'function', func},
+ {'type', type},
+ {'constant', constant},
+ {'identifier', identifier},
+ {'string', string},
+ {'comment', comment},
+ {'number', number},
+ {'operator', operator},
+}
+
+return M
diff --git a/lexers/wsf.lua b/lexers/wsf.lua
new file mode 100644
index 0000000..fc024b2
--- /dev/null
+++ b/lexers/wsf.lua
@@ -0,0 +1,108 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- WSF LPeg lexer (based on XML).
+-- Contributed by Jeff Stone.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V
+
+local M = {_NAME = 'wsf'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local comment = token(l.COMMENT, '<!--' * (l.any - '-->')^0 * P('-->')^-1)
+
+-- Strings.
+local sq_str = l.delimited_range("'", false, true)
+local dq_str = l.delimited_range('"', false, true)
+local string = #S('\'"') * l.last_char_includes('=') *
+ token(l.STRING, sq_str + dq_str)
+
+local in_tag = P(function(input, index)
+ local before = input:sub(1, index - 1)
+ local s, e = before:find('<[^>]-$'), before:find('>[^<]-$')
+ if s and e then return s > e and index or nil end
+ if s then return index end
+ return input:find('^[^<]->', index) and index or nil
+end)
+
+-- Numbers.
+local number = #l.digit * l.last_char_includes('=') *
+ token(l.NUMBER, l.digit^1 * P('%')^-1) * in_tag
+
+local alpha = R('az', 'AZ', '\127\255')
+local word_char = l.alnum + S('_-:.??')
+local identifier = (l.alpha + S('_-:.??')) * word_char^0
+
+-- Elements.
+local element = token('element', '<' * P('/')^-1 * identifier)
+
+-- Attributes.
+local attribute = token('attribute', identifier) * #(l.space^0 * '=')
+
+-- Closing tags.
+local tag_close = token('element', P('/')^-1 * '>')
+
+-- Equals.
+local equals = token(l.OPERATOR, '=') * in_tag
+
+-- Entities.
+local entity = token('entity', '&' * word_match{
+ 'lt', 'gt', 'amp', 'apos', 'quot'
+} * ';')
+
+M._rules = {
+ {'whitespace', ws},
+ {'comment', comment},
+ {'element', element},
+ {'tag_close', tag_close},
+ {'attribute', attribute},
+ {'equals', equals},
+ {'string', string},
+ {'number', number},
+ {'entity', entity}
+}
+
+M._tokenstyles = {
+ element = l.STYLE_KEYWORD,
+ attribute = l.STYLE_TYPE,
+ entity = l.STYLE_OPERATOR
+}
+
+M._foldsymbols = {
+ _patterns = {'</?', '/>', '<!%-%-', '%-%->'},
+ element = {['<'] = 1, ['/>'] = -1, ['</'] = -1},
+ [l.COMMENT] = {['<!--'] = 1, ['-->'] = -1},
+}
+
+-- Finally, add JavaScript and VBScript as embedded languages
+
+-- Tags that start embedded languages.
+M.embed_start_tag = element *
+ (ws^1 * attribute * ws^0 * equals * ws^0 * string)^0 *
+ ws^0 * tag_close
+M.embed_end_tag = element * tag_close
+
+-- Embedded JavaScript.
+local js = l.load('javascript')
+local js_start_rule = #(P('<script') * (P(function(input, index)
+ if input:find('^%s+language%s*=%s*(["\'])[jJ][ava]*[sS]cript%1', index) then
+ return index
+ end
+end) + '>')) * M.embed_start_tag -- <script language="javascript">
+local js_end_rule = #('</script' * ws^0 * '>') * M.embed_end_tag -- </script>
+l.embed_lexer(M, js, js_start_rule, js_end_rule)
+
+-- Embedded VBScript.
+local vbs = l.load('vbscript')
+local vbs_start_rule = #(P('<script') * (P(function(input, index)
+ if input:find('^%s+language%s*=%s*(["\'])[vV][bB][sS]cript%1', index) then
+ return index
+ end
+end) + '>')) * M.embed_start_tag -- <script language="vbscript">
+local vbs_end_rule = #('</script' * ws^0 * '>') * M.embed_end_tag -- </script>
+l.embed_lexer(M, vbs, vbs_start_rule, vbs_end_rule)
+
+return M
diff --git a/lexers/xml.lua b/lexers/xml.lua
new file mode 100644
index 0000000..25e6e45
--- /dev/null
+++ b/lexers/xml.lua
@@ -0,0 +1,99 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- XML LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V
+
+local M = {_NAME = 'xml'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments and CDATA.
+local comment = token(l.COMMENT, '<!--' * (l.any - '-->')^0 * P('-->')^-1)
+local cdata = token('cdata', '<![CDATA[' * (l.any - ']]>')^0 * P(']]>')^-1)
+
+-- Strings.
+local sq_str = l.delimited_range("'", false, true)
+local dq_str = l.delimited_range('"', false, true)
+local string = #S('\'"') * l.last_char_includes('=') *
+ token(l.STRING, sq_str + dq_str)
+
+local in_tag = P(function(input, index)
+ local before = input:sub(1, index - 1)
+ local s, e = before:find('<[^>]-$'), before:find('>[^<]-$')
+ if s and e then return s > e and index or nil end
+ if s then return index end
+ return input:find('^[^<]->', index) and index or nil
+end)
+
+-- Numbers.
+local number = #l.digit * l.last_char_includes('=') *
+ token(l.NUMBER, l.digit^1 * P('%')^-1) * in_tag
+
+local alpha = R('az', 'AZ', '\127\255')
+local word_char = l.alnum + S('_-:.??')
+local identifier = (l.alpha + S('_-:.??')) * word_char^0
+local namespace = token(l.OPERATOR, ':') * token('namespace', identifier)
+
+-- Elements.
+local element = token('element', '<' * P('/')^-1 * identifier) * namespace^-1
+
+-- Attributes.
+local attribute = token('attribute', identifier) * namespace^-1 *
+ #(l.space^0 * '=')
+
+-- Closing tags.
+local close_tag = token('element', P('/')^-1 * '>')
+
+-- Equals.
+local equals = token(l.OPERATOR, '=') * in_tag
+
+-- Entities.
+local entity = token('entity', '&' * word_match{
+ 'lt', 'gt', 'amp', 'apos', 'quot'
+} * ';')
+
+-- Doctypes and other markup tags.
+local doctype = token('doctype', P('<!DOCTYPE')) * ws *
+ token('doctype', identifier) * (ws * identifier)^-1 *
+ (1 - P('>'))^0 * token('doctype', '>')
+
+-- Processing instructions.
+local proc_insn = token('proc_insn', P('<?') * (1 - P('?>'))^0 * P('?>')^-1)
+
+M._rules = {
+ {'whitespace', ws},
+ {'comment', comment},
+ {'cdata', cdata},
+ {'doctype', doctype},
+ {'proc_insn', proc_insn},
+ {'element', element},
+ {'close_tag', close_tag},
+ {'attribute', attribute},
+ {'equals', equals},
+ {'string', string},
+ {'number', number},
+ {'entity', entity},
+}
+
+M._tokenstyles = {
+ element = l.STYLE_KEYWORD,
+ namespace = l.STYLE_CLASS,
+ attribute = l.STYLE_TYPE,
+ cdata = l.STYLE_COMMENT,
+ entity = l.STYLE_OPERATOR,
+ doctype = l.STYLE_COMMENT,
+ proc_insn = l.STYLE_COMMENT,
+ --markup = l.STYLE_COMMENT
+}
+
+M._foldsymbols = {
+ _patterns = {'</?', '/>', '<!%-%-', '%-%->', '<!%[CDATA%[', '%]%]>'},
+ element = {['<'] = 1, ['/>'] = -1, ['</'] = -1},
+ [l.COMMENT] = {['<!--'] = 1, ['-->'] = -1},
+ cdata = {['<![CDATA['] = 1, [']]>'] = -1}
+}
+
+return M
diff --git a/lexers/xtend.lua b/lexers/xtend.lua
new file mode 100644
index 0000000..500dc33
--- /dev/null
+++ b/lexers/xtend.lua
@@ -0,0 +1,112 @@
+-- Copyright (c) 2014-2015 Piotr Orzechowski [drzewo.org]. See LICENSE.
+-- Xtend LPeg lexer.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'xtend'}
+
+-- Whitespace.
+local ws = token(l.WHITESPACE, l.space^1)
+
+-- Comments.
+local line_comment = '//' * l.nonnewline_esc^0
+local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1
+local comment = token(l.COMMENT, line_comment + block_comment)
+
+-- Strings.
+local sq_str = l.delimited_range("'", true)
+local dq_str = l.delimited_range('"', true)
+local string = token(l.STRING, sq_str + dq_str)
+
+-- Templates.
+local templ_str = "'''" * (l.any - P("'''"))^0 * P("'''")^-1
+local template = token('template', templ_str, true)
+
+-- Numbers.
+local small_suff = S('lL')
+local med_suff = P(S('bB') * S('iI'))
+local large_suff = S('dD') + S('fF') + P(S('bB') * S('dD'))
+local exp = S('eE') * l.digit^1
+
+local dec_inf = ('_' * l.digit^1)^0
+local hex_inf = ('_' * l.xdigit^1)^0
+local float_pref = l.digit^1 * '.' * l.digit^1
+local float_suff = exp^-1 * med_suff^-1 * large_suff^-1
+
+local dec = l.digit * dec_inf * (small_suff^-1 + float_suff)
+local hex = l.hex_num * hex_inf * P('#' * (small_suff + med_suff))^-1
+local float = float_pref * dec_inf * float_suff
+
+local number = token(l.NUMBER, float + hex + dec)
+
+-- Keywords.
+local keyword = token(l.KEYWORD, word_match{
+ -- General.
+ 'abstract', 'annotation', 'as', 'case', 'catch', 'class', 'create', 'def',
+ 'default', 'dispatch', 'do', 'else', 'enum', 'extends', 'extension', 'final',
+ 'finally', 'for', 'if', 'implements', 'import', 'interface', 'instanceof',
+ 'it', 'new', 'override', 'package', 'private', 'protected', 'public',
+ 'return', 'self', 'static', 'super', 'switch', 'synchronized', 'this',
+ 'throw', 'throws', 'try', 'typeof', 'val', 'var', 'while',
+ -- Templates.
+ -- 'AFTER', 'BEFORE', 'ENDFOR', 'ENDIF', 'FOR', 'IF', 'SEPARATOR',
+ -- Literals.
+ 'true', 'false', 'null'
+})
+
+-- Types.
+local type = token(l.TYPE, word_match{
+ 'boolean', 'byte', 'char', 'double', 'float', 'int', 'long', 'short', 'void',
+ 'Boolean', 'Byte', 'Character', 'Double', 'Float', 'Integer', 'Long', 'Short',
+ 'String'
+})
+
+-- Identifiers.
+local identifier = token(l.IDENTIFIER, l.word)
+
+-- Operators.
+local operator = token(l.OPERATOR, S('+-/*%<>!=^&|?~:;.()[]{}#'))
+
+-- Annotations.
+local annotation = token('annotation', '@' * l.word)
+
+-- Functions.
+local func = token(l.FUNCTION, l.word) * #P('(')
+
+-- Classes.
+local class = token(l.KEYWORD, P('class')) * ws^1 * token(l.CLASS, l.word)
+
+-- Rules.
+M._rules = {
+ {'whitespace', ws},
+ {'class', class},
+ {'keyword', keyword},
+ {'type', type},
+ {'function', func},
+ {'identifier', identifier},
+ {'template', template},
+ {'string', string},
+ {'comment', comment},
+ {'number', number},
+ {'annotation', annotation},
+ {'operator', operator},
+ {'error', token(l.ERROR, l.any)},
+}
+
+-- Token styles.
+M._tokenstyles = {
+ annotation = l.STYLE_PREPROCESSOR,
+ template = l.STYLE_EMBEDDED
+}
+
+-- Folding.
+M._foldsymbols = {
+ _patterns = {'[{}]', '/%*', '%*/', '//', 'import'},
+ [l.OPERATOR] = {['{'] = 1, ['}'] = -1},
+ [l.COMMENT] = {['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//')},
+ [l.KEYWORD] = {['import'] = l.fold_line_comments('import')}
+}
+
+return M
diff --git a/lexers/yaml.lua b/lexers/yaml.lua
new file mode 100644
index 0000000..9c015e3
--- /dev/null
+++ b/lexers/yaml.lua
@@ -0,0 +1,115 @@
+-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- YAML LPeg lexer.
+-- It does not keep track of indentation perfectly.
+
+local l = require('lexer')
+local token, word_match = l.token, l.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'yaml'}
+
+-- Whitespace.
+local indent = #l.starts_line(S(' \t')) *
+ (token(l.WHITESPACE, ' ') + token('indent_error', '\t'))^1
+local ws = token(l.WHITESPACE, S(' \t')^1 + l.newline^1)
+
+-- Comments.
+local comment = token(l.COMMENT, '#' * l.nonnewline^0)
+
+-- Strings.
+local string = token(l.STRING, l.delimited_range("'") + l.delimited_range('"'))
+
+-- Numbers.
+local integer = l.dec_num + l.hex_num + '0' * S('oO') * R('07')^1
+local special_num = '.' * word_match({'inf', 'nan'}, nil, true)
+local number = token(l.NUMBER, special_num + l.float + integer)
+
+-- Timestamps.
+local ts = token('timestamp', l.digit * l.digit * l.digit * l.digit * -- year
+ '-' * l.digit * l.digit^-1 * -- month
+ '-' * l.digit * l.digit^-1 * -- day
+ ((S(' \t')^1 + S('tT'))^-1 * -- separator
+ l.digit * l.digit^-1 * -- hour
+ ':' * l.digit * l.digit * -- minute
+ ':' * l.digit * l.digit * -- second
+ ('.' * l.digit^0)^-1 * -- fraction
+ ('Z' + -- timezone
+ S(' \t')^0 * S('-+') * l.digit * l.digit^-1 *
+ (':' * l.digit * l.digit)^-1)^-1)^-1)
+
+-- Constants.
+local constant = token(l.CONSTANT,
+ word_match({'null', 'true', 'false'}, nil, true))
+
+-- Types.
+local type = token(l.TYPE, '!!' * word_match({
+ -- Collection types.
+ 'map', 'omap', 'pairs', 'set', 'seq',
+ -- Scalar types.
+ 'binary', 'bool', 'float', 'int', 'merge', 'null', 'str', 'timestamp',
+ 'value', 'yaml'
+}, nil, true) + '!' * l.delimited_range('<>'))
+
+-- Document boundaries.
+local doc_bounds = token('document', l.starts_line(P('---') + '...'))
+
+-- Directives
+local directive = token('directive', l.starts_line('%') * l.nonnewline^1)
+
+local word = (l.alpha + '-' * -l.space) * (l.alnum + '-')^0
+
+-- Keys and literals.
+local colon = S(' \t')^0 * ':' * (l.space + -1)
+local key = token(l.KEYWORD,
+ #word * (l.nonnewline - colon)^1 * #colon *
+ P(function(input, index)
+ local line = input:sub(1, index - 1):match('[^\r\n]+$')
+ return not line:find('[%w-]+:') and index
+ end))
+local value = #word * (l.nonnewline - l.space^0 * S(',]}'))^1
+local block = S('|>') * S('+-')^-1 * (l.newline + -1) * function(input, index)
+ local rest = input:sub(index)
+ local level = #rest:match('^( *)')
+ for pos, indent, line in rest:gmatch('() *()([^\r\n]+)') do
+ if indent - pos < level and line ~= ' ' or level == 0 and pos > 1 then
+ return index + pos - 1
+ end
+ end
+ return #input + 1
+end
+local literal = token('literal', value + block)
+
+-- Indicators.
+local anchor = token(l.LABEL, '&' * word)
+local alias = token(l.VARIABLE, '*' * word)
+local tag = token('tag', '!' * word * P('!')^-1)
+local reserved = token(l.ERROR, S('@`') * word)
+local indicator_chars = token(l.OPERATOR, S('-?:,[]{}!'))
+
+M._rules = {
+ {'indent', indent},
+ {'whitespace', ws},
+ {'comment', comment},
+ {'doc_bounds', doc_bounds},
+ {'key', key},
+ {'literal', literal},
+ {'timestamp', ts},
+ {'number', number},
+ {'constant', constant},
+ {'type', type},
+ {'indicator', tag + indicator_chars + alias + anchor + reserved},
+ {'directive', directive},
+}
+
+M._tokenstyles = {
+ indent_error = 'back:%(color.red)',
+ document = l.STYLE_CONSTANT,
+ literal = l.STYLE_DEFAULT,
+ timestamp = l.STYLE_NUMBER,
+ tag = l.STYLE_CLASS,
+ directive = l.STYLE_PREPROCESSOR,
+}
+
+M._FOLDBYINDENTATION = true
+
+return M