aboutsummaryrefslogtreecommitdiff
path: root/lua/lexers/awk.lua
diff options
context:
space:
mode:
authorqiu-x <alex@alexslomka.xyz>2022-06-29 07:56:51 +0200
committerFelix Van der Jeugt <felix.vanderjeugt@posteo.net>2022-11-29 21:57:18 +0100
commit8a420ecc4c1ed50111464ec66901bd983eaf2dbd (patch)
treef31d2186cafaee6e7f18d32fe99144c3e8148c00 /lua/lexers/awk.lua
parent981b90a203484182feace48471fe2b53dae7676f (diff)
downloadvis-8a420ecc4c1ed50111464ec66901bd983eaf2dbd.tar.gz
vis-8a420ecc4c1ed50111464ec66901bd983eaf2dbd.tar.xz
Resync the lexers with Scintillua
- Resync the lexers with Scintillua - Update the lexer readme - Update `zenburn` theme to fix some highlighting issues - lexers: redirect print function to vis:info() - Fix support for custom style names - As per error message "lexer.delimited_range() is deprecated, use lexer.range()". - Remove remaining `lexer.delimited_range()` call - Set syntax to `nil` if the file type has no matching lexer - Updated Go lexer for Go 1.18. - lexers/dsv: convert to new lexer format (cherry picked from commit 9edbc3cd9ea1d7142b1305840432a3d2739e755a) - lexers/gemini: disable legacy gemini lexer This reverts commit 468f9ee1b027a7ce98b1a249fa1af5888feeb989. It is in legacy format and of questionable quality. Ideally it should be contributed upstream from where it will eventually trickle down to us. - lexers/git-rebase: convert to new lexer format (cherry picked from commit 4000a4cc9ac4a4c2869dfae772b977a82aee8d8c) - lexers/strace: convert to new lexer format (cherry picked from commit e420451320d97eb164f5629c1bcfab0b595be29d) - lexers/typescript: add new upstream lexer revision 28e2b60 (cherry picked from commit 7326e6deecdaa75fa94ae9ebdb653f9f907b33f2) - use `package.searchpath` instead of a local `searchpath` function - Restore `filetype: support filetype detection via hashbang` - Remove redundant comment - Restore gemini lexer
Diffstat (limited to 'lua/lexers/awk.lua')
-rw-r--r--lua/lexers/awk.lua174
1 files changed, 62 insertions, 112 deletions
diff --git a/lua/lexers/awk.lua b/lua/lexers/awk.lua
index 87e39d9..0b3f9bf 100644
--- a/lua/lexers/awk.lua
+++ b/lua/lexers/awk.lua
@@ -1,12 +1,12 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
-- AWK LPeg lexer.
-- Modified by Wolfgang Seeberg 2012, 2013.
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
-local M = {_NAME = 'awk'}
+local lex = lexer.new('awk')
local LEFTBRACKET = '['
local RIGHTBRACKET = ']'
@@ -20,14 +20,13 @@ local DQUOTE = '"'
local DELIMITER_MATCHES = {['('] = ')', ['['] = ']'}
local COMPANION = {['('] = '[', ['['] = '('}
local CC = {
- alnum = 1, alpha = 1, blank = 1, cntrl = 1, digit = 1, graph = 1, lower = 1,
- print = 1, punct = 1, space = 1, upper = 1, xdigit = 1
+ alnum = 1, alpha = 1, blank = 1, cntrl = 1, digit = 1, graph = 1, lower = 1, print = 1, punct = 1,
+ space = 1, upper = 1, xdigit = 1
}
local LastRegexEnd = 0
local BackslashAtCommentEnd = 0
local KW_BEFORE_RX = {
- case = 1, ['do'] = 1, ['else'] = 1, exit = 1, print = 1, printf = 1,
- ['return'] = 1
+ case = 1, ['do'] = 1, ['else'] = 1, exit = 1, print = 1, printf = 1, ['return'] = 1
}
local function findKeyword(input, e)
@@ -146,9 +145,7 @@ local function scanGawkRegex(input, index)
return false
end
-- Is only called immediately after scanGawkRegex().
-local function scanRegex()
- return ScanRegexResult
-end
+local function scanRegex() return ScanRegexResult end
local function scanString(input, index)
local i = index
@@ -160,7 +157,7 @@ local function scanString(input, index)
return i + 1
elseif input:sub(i, i) == BACKSLASH then
i = i + 1
- -- l.delimited_range() doesn't handle CRLF.
+ -- lexer.range() doesn't handle CRLF.
if input:sub(i, i + 1) == CRLF then i = i + 1 end
end
i = i + 1
@@ -168,8 +165,7 @@ local function scanString(input, index)
return false
end
--- purpose: prevent isRegex() from entering a comment line that ends with a
--- backslash.
+-- purpose: prevent isRegex() from entering a comment line that ends with a backslash.
local function scanComment(input, index)
local _, i = input:find('[^\r\n]*', index)
if input:sub(i, i) == BACKSLASH then BackslashAtCommentEnd = i end
@@ -220,115 +216,69 @@ local function scanFieldDelimiters(input, index)
end
-- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Comments.
-local comment = token(l.COMMENT, '#' * P(scanComment))
+lex:add_rule('comment', token(lexer.COMMENT, '#' * P(scanComment)))
-- Strings.
-local string = token(l.STRING, DQUOTE * P(scanString))
-
--- Regular expressions.
--- Slash delimited regular expressions are preceded by most operators or
--- the keywords 'print' and 'case', possibly on a preceding line. They
--- can contain unescaped slashes and brackets in brackets. Some escape
--- sequences like '\S', '\s' have special meanings with Gawk. Tokens that
--- contain them are displayed differently.
-local regex = token(l.REGEX, SLASH * P(scanRegex))
-local gawkRegex = token('gawkRegex', SLASH * P(scanGawkRegex))
+lex:add_rule('string', token(lexer.STRING, DQUOTE * P(scanString)))
--- no leading sign because it might be binary.
-local float = ((l.digit ^ 1 * ('.' * l.digit ^ 0) ^ -1) +
- ('.' * l.digit ^ 1)) * (S('eE') * S('+-') ^ -1 * l.digit ^ 1) ^ -1
--- Numbers.
-local number = token(l.NUMBER, float)
-local gawkNumber = token('gawkNumber', l.hex_num + l.oct_num)
-
--- Operators.
-local operator = token(l.OPERATOR, S('!%&()*+,-/:;<=>?[\\]^{|}~'))
-local gawkOperator = token('gawkOperator', P("|&") + "@" + "**=" + "**")
+-- No leading sign because it might be binary.
+local float = ((lexer.digit^1 * ('.' * lexer.digit^0)^-1) + ('.' * lexer.digit^1)) *
+ (S('eE') * S('+-')^-1 * lexer.digit^1)^-1
-- Fields. E.g. $1, $a, $(x), $a(x), $a[x], $"1", $$a, etc.
-local field = token('field', P('$') * S('$+-') ^ 0 *
- (float + (l.word ^ 0 * '(' * P(scanFieldDelimiters)) +
- (l.word ^ 1 * ('[' * P(scanFieldDelimiters)) ^ -1) +
- ('"' * P(scanString)) + ('/' * P(eatRegex) * '/')))
-
--- Functions.
-local func = token(l.FUNCTION, l.word * #P('('))
-
--- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
-
--- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- 'BEGIN', 'END', 'atan2', 'break', 'close', 'continue', 'cos', 'delete', 'do',
- 'else', 'exit', 'exp', 'fflush', 'for', 'function', 'getline', 'gsub', 'if',
- 'in', 'index', 'int', 'length', 'log', 'match', 'next', 'nextfile', 'print',
- 'printf', 'rand', 'return', 'sin', 'split', 'sprintf', 'sqrt', 'srand', 'sub',
- 'substr', 'system', 'tolower', 'toupper', 'while'
-})
-
-local gawkKeyword = token('gawkKeyword', word_match{
- 'BEGINFILE', 'ENDFILE', 'adump', 'and', 'asort', 'asorti', 'bindtextdomain',
- 'case', 'compl', 'dcgettext', 'dcngettext', 'default', 'extension', 'func',
- 'gensub', 'include', 'isarray', 'load', 'lshift', 'mktime', 'or', 'patsplit',
- 'rshift', 'stopme', 'strftime', 'strtonum', 'switch', 'systime', 'xor'
-})
+lex:add_rule('field', token('field', '$' * S('$+-')^0 *
+ (float + lexer.word^0 * '(' * P(scanFieldDelimiters) + lexer.word^1 *
+ ('[' * P(scanFieldDelimiters))^-1 + '"' * P(scanString) + '/' * P(eatRegex) * '/')))
+lex:add_style('field', lexer.styles.label)
-local builtInVariable = token('builtInVariable', word_match{
- 'ARGC', 'ARGV', 'CONVFMT', 'ENVIRON', 'FILENAME', 'FNR', 'FS', 'NF', 'NR',
- 'OFMT', 'OFS', 'ORS', 'RLENGTH', 'RS', 'RSTART', 'SUBSEP'
-})
-
-local gawkBuiltInVariable = token('gawkBuiltInVariable', word_match {
- 'ARGIND', 'BINMODE', 'ERRNO', 'FIELDWIDTHS', 'FPAT', 'FUNCTAB', 'IGNORECASE',
- 'LINT', 'PREC', 'PROCINFO', 'ROUNDMODE', 'RT', 'SYMTAB', 'TEXTDOMAIN'
-})
-
--- Within each group order matters, but the groups themselves (except the
--- last) can be in any order.
-M._rules = {
- {'whitespace', ws},
-
- {'comment', comment},
-
- {'string', string},
+-- Regular expressions.
+-- Slash delimited regular expressions are preceded by most operators or the keywords 'print'
+-- and 'case', possibly on a preceding line. They can contain unescaped slashes and brackets
+-- in brackets. Some escape sequences like '\S', '\s' have special meanings with Gawk. Tokens
+-- that contain them are displayed differently.
+lex:add_rule('gawkRegex', token('gawkRegex', SLASH * P(scanGawkRegex)))
+lex:add_style('gawkRegex', lexer.styles.preprocessor .. {underlined = true})
+lex:add_rule('regex', token(lexer.REGEX, SLASH * P(scanRegex)))
- {'field', field},
+-- Operators.
+lex:add_rule('gawkOperator', token('gawkOperator', P("|&") + "@" + "**=" + "**"))
+lex:add_style('gawkOperator', lexer.styles.operator .. {underlined = true})
+lex:add_rule('operator', token(lexer.OPERATOR, S('!%&()*+,-/:;<=>?[\\]^{|}~')))
- {'gawkRegex', gawkRegex},
- {'regex', regex},
- {'gawkOperator', gawkOperator},
- {'operator', operator},
+-- Numbers.
+lex:add_rule('gawkNumber', token('gawkNumber', lexer.hex_num + lexer.oct_num))
+lex:add_style('gawkNumber', lexer.styles.number .. {underlined = true})
+lex:add_rule('number', token(lexer.NUMBER, float))
- {'gawkNumber', gawkNumber},
- {'number', number},
+-- Keywords.
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+ 'BEGIN', 'END', 'atan2', 'break', 'close', 'continue', 'cos', 'delete', 'do', 'else', 'exit',
+ 'exp', 'fflush', 'for', 'function', 'getline', 'gsub', 'if', 'in', 'index', 'int', 'length',
+ 'log', 'match', 'next', 'nextfile', 'print', 'printf', 'rand', 'return', 'sin', 'split',
+ 'sprintf', 'sqrt', 'srand', 'sub', 'substr', 'system', 'tolower', 'toupper', 'while'
+}))
+
+lex:add_rule('builtInVariable', token('builtInVariable', word_match(
+ 'ARGC ARGV CONVFMT ENVIRON FILENAME FNR FS NF NR OFMT OFS ORS RLENGTH RS RSTART SUBSEP')))
+lex:add_style('builtInVariable', lexer.styles.constant)
+
+lex:add_rule('gawkBuiltInVariable', token('gawkBuiltInVariable', word_match{
+ 'ARGIND', 'BINMODE', 'ERRNO', 'FIELDWIDTHS', 'FPAT', 'FUNCTAB', 'IGNORECASE', 'LINT', 'PREC',
+ 'PROCINFO', 'ROUNDMODE', 'RT', 'SYMTAB', 'TEXTDOMAIN'
+}))
+lex:add_style('gawkBuiltInVariable', lexer.styles.constant .. {underlined = true})
- {'keyword', keyword},
- {'builtInVariable', builtInVariable},
- {'gawkKeyword', gawkKeyword},
- {'gawkBuiltInVariable', gawkBuiltInVariable},
- {'function', func},
- {'identifier', identifier},
-}
+-- Functions.
+lex:add_rule('function', token(lexer.FUNCTION, lexer.word * #P('(')))
-M._tokenstyles = {
- builtInVariable = l.STYLE_CONSTANT,
- default = l.STYLE_ERROR,
- field = l.STYLE_LABEL,
- gawkBuiltInVariable = l.STYLE_CONSTANT..',underlined',
- gawkKeyword = l.STYLE_KEYWORD..',underlined',
- gawkNumber = l.STYLE_NUMBER..',underlined',
- gawkOperator = l.STYLE_OPERATOR..',underlined',
- gawkRegex = l.STYLE_PREPROCESSOR..',underlined',
- regex = l.STYLE_PREPROCESSOR
-}
+-- Identifiers.
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-M._foldsymbols = {
- _patterns = {'[{}]', '#'},
- [l.OPERATOR] = {['{'] = 1, ['}'] = -1},
- [l.COMMENT] = {['#'] = l.fold_line_comments('#')}
-}
+-- Fold points.
+lex:add_fold_point(lexer.OPERATOR, '{', '}')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('#'))
-return M
+return lex