aboutsummaryrefslogtreecommitdiff
path: root/lua/lexers/perl.lua
diff options
context:
space:
mode:
authorqiu-x <alex@alexslomka.xyz>2022-06-29 07:56:51 +0200
committerFelix Van der Jeugt <felix.vanderjeugt@posteo.net>2022-11-29 21:57:18 +0100
commit8a420ecc4c1ed50111464ec66901bd983eaf2dbd (patch)
treef31d2186cafaee6e7f18d32fe99144c3e8148c00 /lua/lexers/perl.lua
parent981b90a203484182feace48471fe2b53dae7676f (diff)
downloadvis-8a420ecc4c1ed50111464ec66901bd983eaf2dbd.tar.gz
vis-8a420ecc4c1ed50111464ec66901bd983eaf2dbd.tar.xz
Resync the lexers with Scintillua
- Resync the lexers with Scintillua - Update the lexer readme - Update `zenburn` theme to fix some highlighting issues - lexers: redirect print function to vis:info() - Fix support for custom style names - As per error message "lexer.delimited_range() is deprecated, use lexer.range()". - Remove remaining `lexer.delimited_range()` call - Set syntax to `nil` if the file type has no matching lexer - Updated Go lexer for Go 1.18. - lexers/dsv: convert to new lexer format (cherry picked from commit 9edbc3cd9ea1d7142b1305840432a3d2739e755a) - lexers/gemini: disable legacy gemini lexer This reverts commit 468f9ee1b027a7ce98b1a249fa1af5888feeb989. It is in legacy format and of questionable quality. Ideally it should be contributed upstream from where it will eventually trickle down to us. - lexers/git-rebase: convert to new lexer format (cherry picked from commit 4000a4cc9ac4a4c2869dfae772b977a82aee8d8c) - lexers/strace: convert to new lexer format (cherry picked from commit e420451320d97eb164f5629c1bcfab0b595be29d) - lexers/typescript: add new upstream lexer revision 28e2b60 (cherry picked from commit 7326e6deecdaa75fa94ae9ebdb653f9f907b33f2) - use `package.searchpath` instead of a local `searchpath` function - Restore `filetype: support filetype detection via hashbang` - Remove redundant comment - Restore gemini lexer
Diffstat (limited to 'lua/lexers/perl.lua')
-rw-r--r--lua/lexers/perl.lua220
1 files changed, 103 insertions, 117 deletions
diff --git a/lua/lexers/perl.lua b/lua/lexers/perl.lua
index b490c7f..40727a4 100644
--- a/lua/lexers/perl.lua
+++ b/lua/lexers/perl.lua
@@ -1,164 +1,150 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- Perl LPeg lexer.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'perl'}
+local lex = lexer.new('perl')
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
+lex:add_rule('perl', token(lexer.WHITESPACE, lexer.space^1))
--- Comments.
-local line_comment = '#' * l.nonnewline_esc^0
-local block_comment = l.starts_line('=') * l.alpha *
- (l.any - l.newline * '=cut')^0 * (l.newline * '=cut')^-1
-local comment = token(l.COMMENT, block_comment + line_comment)
+-- Keywords.
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'STDIN', 'STDOUT', 'STDERR', 'BEGIN', 'END', 'CHECK', 'INIT', --
+ 'require', 'use', --
+ 'break', 'continue', 'do', 'each', 'else', 'elsif', 'foreach', 'for', 'if', 'last', 'local', 'my',
+ 'next', 'our', 'package', 'return', 'sub', 'unless', 'until', 'while', '__FILE__', '__LINE__',
+ '__PACKAGE__', --
+ 'and', 'or', 'not', 'eq', 'ne', 'lt', 'gt', 'le', 'ge'
+}))
+-- Markers.
+lex:add_rule('marker', token(lexer.COMMENT, word_match('__DATA__ __END__') * lexer.any^0))
+
+-- Functions.
+lex:add_rule('function', token(lexer.FUNCTION, word_match{
+ 'abs', 'accept', 'alarm', 'atan2', 'bind', 'binmode', 'bless', 'caller', 'chdir', 'chmod',
+ 'chomp', 'chop', 'chown', 'chr', 'chroot', 'closedir', 'close', 'connect', 'cos', 'crypt',
+ 'dbmclose', 'dbmopen', 'defined', 'delete', 'die', 'dump', 'each', 'endgrent', 'endhostent',
+ 'endnetent', 'endprotoent', 'endpwent', 'endservent', 'eof', 'eval', 'exec', 'exists', 'exit',
+ 'exp', 'fcntl', 'fileno', 'flock', 'fork', 'format', 'formline', 'getc', 'getgrent', 'getgrgid',
+ 'getgrnam', 'gethostbyaddr', 'gethostbyname', 'gethostent', 'getlogin', 'getnetbyaddr',
+ 'getnetbyname', 'getnetent', 'getpeername', 'getpgrp', 'getppid', 'getpriority', 'getprotobyname',
+ 'getprotobynumber', 'getprotoent', 'getpwent', 'getpwnam', 'getpwuid', 'getservbyname',
+ 'getservbyport', 'getservent', 'getsockname', 'getsockopt', 'glob', 'gmtime', 'goto', 'grep',
+ 'hex', 'import', 'index', 'int', 'ioctl', 'join', 'keys', 'kill', 'lcfirst', 'lc', 'length',
+ 'link', 'listen', 'localtime', 'log', 'lstat', 'map', 'mkdir', 'msgctl', 'msgget', 'msgrcv',
+ 'msgsnd', 'new', 'oct', 'opendir', 'open', 'ord', 'pack', 'pipe', 'pop', 'pos', 'printf', 'print',
+ 'prototype', 'push', 'quotemeta', 'rand', 'readdir', 'read', 'readlink', 'recv', 'redo', 'ref',
+ 'rename', 'reset', 'reverse', 'rewinddir', 'rindex', 'rmdir', 'scalar', 'seekdir', 'seek',
+ 'select', 'semctl', 'semget', 'semop', 'send', 'setgrent', 'sethostent', 'setnetent', 'setpgrp',
+ 'setpriority', 'setprotoent', 'setpwent', 'setservent', 'setsockopt', 'shift', 'shmctl', 'shmget',
+ 'shmread', 'shmwrite', 'shutdown', 'sin', 'sleep', 'socket', 'socketpair', 'sort', 'splice',
+ 'split', 'sprintf', 'sqrt', 'srand', 'stat', 'study', 'substr', 'symlink', 'syscall', 'sysread',
+ 'sysseek', 'system', 'syswrite', 'telldir', 'tell', 'tied', 'tie', 'time', 'times', 'truncate',
+ 'ucfirst', 'uc', 'umask', 'undef', 'unlink', 'unpack', 'unshift', 'untie', 'utime', 'values',
+ 'vec', 'wait', 'waitpid', 'wantarray', 'warn', 'write'
+}))
+
+-- Strings.
local delimiter_matches = {['('] = ')', ['['] = ']', ['{'] = '}', ['<'] = '>'}
-local literal_delimitted = P(function(input, index) -- for single delimiter sets
+local literal_delimited = P(function(input, index) -- for single delimiter sets
local delimiter = input:sub(index, index)
if not delimiter:find('%w') then -- only non alpha-numerics
- local match_pos, patt
+ local patt
if delimiter_matches[delimiter] then
-- Handle nested delimiter/matches in strings.
local s, e = delimiter, delimiter_matches[delimiter]
- patt = l.delimited_range(s..e, false, false, true)
+ patt = lexer.range(s, e, false, true, true)
else
- patt = l.delimited_range(delimiter)
+ patt = lexer.range(delimiter)
end
- match_pos = lpeg.match(patt, input, index)
+ local match_pos = lpeg.match(patt, input, index)
return match_pos or #input + 1
end
end)
-local literal_delimitted2 = P(function(input, index) -- for 2 delimiter sets
+local literal_delimited2 = P(function(input, index) -- for 2 delimiter sets
local delimiter = input:sub(index, index)
- -- Only consider non-alpha-numerics and non-spaces as delimiters. The
- -- non-spaces are used to ignore operators like "-s".
+ -- Only consider non-alpha-numerics and non-spaces as delimiters. The non-spaces are used to
+ -- ignore operators like "-s".
if not delimiter:find('[%w ]') then
- local match_pos, patt
+ local patt
if delimiter_matches[delimiter] then
-- Handle nested delimiter/matches in strings.
local s, e = delimiter, delimiter_matches[delimiter]
- patt = l.delimited_range(s..e, false, false, true)
+ patt = lexer.range(s, e, false, true, true)
else
- patt = l.delimited_range(delimiter)
- end
- first_match_pos = lpeg.match(patt, input, index)
- if not first_match_pos then
- return #input + 1
+ patt = lexer.range(delimiter)
end
- final_match_pos = lpeg.match(patt, input, first_match_pos - 1)
+ local first_match_pos = lpeg.match(patt, input, index)
+ local final_match_pos = lpeg.match(patt, input, first_match_pos - 1)
if not final_match_pos then -- using (), [], {}, or <> notation
- final_match_pos = lpeg.match(l.space^0 * patt, input, first_match_pos)
+ final_match_pos = lpeg.match(lexer.space^0 * patt, input, first_match_pos)
end
return final_match_pos or #input + 1
end
end)
--- Strings.
-local sq_str = l.delimited_range("'")
-local dq_str = l.delimited_range('"')
-local cmd_str = l.delimited_range('`')
+local sq_str = lexer.range("'")
+local dq_str = lexer.range('"')
+local cmd_str = lexer.range('`')
local heredoc = '<<' * P(function(input, index)
local s, e, delimiter = input:find('([%a_][%w_]*)[\n\r\f;]+', index)
if s == index and delimiter then
local end_heredoc = '[\n\r\f]+'
- local _, e = input:find(end_heredoc..delimiter, e)
+ e = select(2, input:find(end_heredoc .. delimiter, e))
return e and e + 1 or #input + 1
end
end)
-local lit_str = 'q' * P('q')^-1 * literal_delimitted
-local lit_array = 'qw' * literal_delimitted
-local lit_cmd = 'qx' * literal_delimitted
-local lit_tr = (P('tr') + 'y') * literal_delimitted2 * S('cds')^0
-local regex_str = #P('/') * l.last_char_includes('-<>+*!~\\=%&|^?:;([{') *
- l.delimited_range('/', true) * S('imosx')^0
-local lit_regex = 'qr' * literal_delimitted * S('imosx')^0
-local lit_match = 'm' * literal_delimitted * S('cgimosx')^0
-local lit_sub = 's' * literal_delimitted2 * S('ecgimosx')^0
-local string = token(l.STRING, sq_str + dq_str + cmd_str + heredoc + lit_str +
- lit_array + lit_cmd + lit_tr) +
- token(l.REGEX, regex_str + lit_regex + lit_match + lit_sub)
-
--- Numbers.
-local number = token(l.NUMBER, l.float + l.integer)
+local lit_str = 'q' * P('q')^-1 * literal_delimited
+local lit_array = 'qw' * literal_delimited
+local lit_cmd = 'qx' * literal_delimited
+local lit_tr = (P('tr') + 'y') * literal_delimited2 * S('cds')^0
+local string = token(lexer.STRING,
+ sq_str + dq_str + cmd_str + heredoc + lit_str + lit_array + lit_cmd + lit_tr)
+local regex_str = #P('/') * lexer.last_char_includes('-<>+*!~\\=%&|^?:;([{') *
+ lexer.range('/', true) * S('imosx')^0
+local lit_regex = 'qr' * literal_delimited * S('imosx')^0
+local lit_match = 'm' * literal_delimited * S('cgimosx')^0
+local lit_sub = 's' * literal_delimited2 * S('ecgimosx')^0
+local regex = token(lexer.REGEX, regex_str + lit_regex + lit_match + lit_sub)
+lex:add_rule('string', string + regex)
--- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- 'STDIN', 'STDOUT', 'STDERR', 'BEGIN', 'END', 'CHECK', 'INIT',
- 'require', 'use',
- 'break', 'continue', 'do', 'each', 'else', 'elsif', 'foreach', 'for', 'if',
- 'last', 'local', 'my', 'next', 'our', 'package', 'return', 'sub', 'unless',
- 'until', 'while', '__FILE__', '__LINE__', '__PACKAGE__',
- 'and', 'or', 'not', 'eq', 'ne', 'lt', 'gt', 'le', 'ge'
-})
+-- Identifiers.
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
--- Functions.
-local func = token(l.FUNCTION, word_match({
- 'abs', 'accept', 'alarm', 'atan2', 'bind', 'binmode', 'bless', 'caller',
- 'chdir', 'chmod', 'chomp', 'chop', 'chown', 'chr', 'chroot', 'closedir',
- 'close', 'connect', 'cos', 'crypt', 'dbmclose', 'dbmopen', 'defined',
- 'delete', 'die', 'dump', 'each', 'endgrent', 'endhostent', 'endnetent',
- 'endprotoent', 'endpwent', 'endservent', 'eof', 'eval', 'exec', 'exists',
- 'exit', 'exp', 'fcntl', 'fileno', 'flock', 'fork', 'format', 'formline',
- 'getc', 'getgrent', 'getgrgid', 'getgrnam', 'gethostbyaddr', 'gethostbyname',
- 'gethostent', 'getlogin', 'getnetbyaddr', 'getnetbyname', 'getnetent',
- 'getpeername', 'getpgrp', 'getppid', 'getpriority', 'getprotobyname',
- 'getprotobynumber', 'getprotoent', 'getpwent', 'getpwnam', 'getpwuid',
- 'getservbyname', 'getservbyport', 'getservent', 'getsockname', 'getsockopt',
- 'glob', 'gmtime', 'goto', 'grep', 'hex', 'import', 'index', 'int', 'ioctl',
- 'join', 'keys', 'kill', 'lcfirst', 'lc', 'length', 'link', 'listen',
- 'localtime', 'log', 'lstat', 'map', 'mkdir', 'msgctl', 'msgget', 'msgrcv',
- 'msgsnd', 'new', 'oct', 'opendir', 'open', 'ord', 'pack', 'pipe', 'pop',
- 'pos', 'printf', 'print', 'prototype', 'push', 'quotemeta', 'rand', 'readdir',
- 'read', 'readlink', 'recv', 'redo', 'ref', 'rename', 'reset', 'reverse',
- 'rewinddir', 'rindex', 'rmdir', 'scalar', 'seekdir', 'seek', 'select',
- 'semctl', 'semget', 'semop', 'send', 'setgrent', 'sethostent', 'setnetent',
- 'setpgrp', 'setpriority', 'setprotoent', 'setpwent', 'setservent',
- 'setsockopt', 'shift', 'shmctl', 'shmget', 'shmread', 'shmwrite', 'shutdown',
- 'sin', 'sleep', 'socket', 'socketpair', 'sort', 'splice', 'split', 'sprintf',
- 'sqrt', 'srand', 'stat', 'study', 'substr', 'symlink', 'syscall', 'sysread',
- 'sysseek', 'system', 'syswrite', 'telldir', 'tell', 'tied', 'tie', 'time',
- 'times', 'truncate', 'ucfirst', 'uc', 'umask', 'undef', 'unlink', 'unpack',
- 'unshift', 'untie', 'utime', 'values', 'vec', 'wait', 'waitpid', 'wantarray',
- 'warn', 'write'
-}, '2'))
+-- Comments.
+local line_comment = lexer.to_eol('#', true)
+local block_comment = lexer.range(lexer.starts_line('=' * lexer.alpha), lexer.starts_line('=cut'))
+lex:add_rule('comment', token(lexer.COMMENT, block_comment + line_comment))
--- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+-- Numbers.
+local dec = lexer.digit^1 * ('_' * lexer.digit^1)^0
+local hex = '0' * S('xX') * lexer.xdigit^1 * ('_' * lexer.xdigit^1)^0
+local bin = '0' * S('bB') * S('01')^1 * ('_' * S('01')^1)^0 * -lexer.xdigit
+local integer = S('+-')^-1 * (hex + bin + dec)
+lex:add_rule('number', token(lexer.NUMBER, lexer.float + integer))
-- Variables.
-local special_var = '$' * ('^' * S('ADEFHILMOPSTWX')^-1 +
- S('\\"[]\'&`+*.,;=%~?@<>(|/!-') +
- ':' * (l.any - ':') + P('$') * -l.word + l.digit^1)
-local plain_var = ('$#' + S('$@%')) * P('$')^0 * l.word + '$#'
-local variable = token(l.VARIABLE, special_var + plain_var)
+-- LuaFormatter off
+local special_var = '$' * (
+ '^' * S('ADEFHILMOPSTWX')^-1 +
+ S('\\"[]\'&`+*.,;=%~?@<>(|/!-') +
+ ':' * (lexer.any - ':') +
+ P('$') * -lexer.word +
+ lexer.digit^1)
+-- LuaFormatter on
+local plain_var = ('$#' + S('$@%')) * P('$')^0 * lexer.word + '$#'
+lex:add_rule('variable', token(lexer.VARIABLE, special_var + plain_var))
-- Operators.
-local operator = token(l.OPERATOR, S('-<>+*!~\\=/%&|^.?:;()[]{}'))
-
--- Markers.
-local marker = token(l.COMMENT, word_match{'__DATA__', '__END__'} * l.any^0)
-
-M._rules = {
- {'whitespace', ws},
- {'keyword', keyword},
- {'marker', marker},
- {'function', func},
- {'string', string},
- {'identifier', identifier},
- {'comment', comment},
- {'number', number},
- {'variable', variable},
- {'operator', operator},
-}
+lex:add_rule('operator', token(lexer.OPERATOR, S('-<>+*!~\\=/%&|^.,?:;()[]{}')))
-M._foldsymbols = {
- _patterns = {'[%[%]{}]', '#'},
- [l.OPERATOR] = {['['] = 1, [']'] = -1, ['{'] = 1, ['}'] = -1},
- [l.COMMENT] = {['#'] = l.fold_line_comments('#')}
-}
+-- Fold points.
+lex:add_fold_point(lexer.OPERATOR, '[', ']')
+lex:add_fold_point(lexer.OPERATOR, '{', '}')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('#'))
-return M
+return lex