aboutsummaryrefslogtreecommitdiff
path: root/lexers/awk.lua
diff options
context:
space:
mode:
authorMarc André Tanner <mat@brain-dump.org>2016-12-07 16:49:29 +0100
committerMarc André Tanner <mat@brain-dump.org>2016-12-07 20:11:32 +0100
commit3570869c9ae2c4df14b15423789919e514322916 (patch)
tree6b990c9ec59fbdc7abce89c1307d22e66d0fd88a /lexers/awk.lua
parent098504f67aea8a862840d58c69e8f6360eef3073 (diff)
downloadvis-3570869c9ae2c4df14b15423789919e514322916.tar.gz
vis-3570869c9ae2c4df14b15423789919e514322916.tar.xz
Move all lua related files to lua/ subfolder
Also remove the lexers sub directory from the Lua search path. As a result we attempt to open fewer files during startup: $ strace -e open -o log ./vis +q config.h && wc -l log In order to avoid having to modifiy all lexers which `require('lexer')` we instead place a symlink in the top level directory. $ ./configure --disable-lua $ rm -rf lua Should result in a source tree with most lua specifc functionality removed.
Diffstat (limited to 'lexers/awk.lua')
-rw-r--r--lexers/awk.lua334
1 files changed, 0 insertions, 334 deletions
diff --git a/lexers/awk.lua b/lexers/awk.lua
deleted file mode 100644
index 944c698..0000000
--- a/lexers/awk.lua
+++ /dev/null
@@ -1,334 +0,0 @@
--- Copyright 2006-2016 Mitchell mitchell.att.foicica.com. See LICENSE.
--- AWK LPeg lexer.
--- Modified by Wolfgang Seeberg 2012, 2013.
-
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
-
-local M = {_NAME = 'awk'}
-
-local LEFTBRACKET = '['
-local RIGHTBRACKET = ']'
-local SLASH = '/'
-local BACKSLASH = '\\'
-local CARET = '^'
-local CR = '\r'
-local LF = '\n'
-local CRLF = CR .. LF
-local DQUOTE = '"'
-local DELIMITER_MATCHES = {['('] = ')', ['['] = ']'}
-local COMPANION = {['('] = '[', ['['] = '('}
-local CC = {
- alnum = 1, alpha = 1, blank = 1, cntrl = 1, digit = 1, graph = 1, lower = 1,
- print = 1, punct = 1, space = 1, upper = 1, xdigit = 1
-}
-local LastRegexEnd = 0
-local BackslashAtCommentEnd = 0
-local KW_BEFORE_RX = {
- case = 1, ['do'] = 1, ['else'] = 1, exit = 1, print = 1, printf = 1,
- ['return'] = 1
-}
-
-local function findKeyword(input, e)
- local i = e
- while i > 0 and input:find("^[%l]", i) do i = i - 1 end
- local w = input:sub(i + 1, e)
- if i == 0 then
- return KW_BEFORE_RX[w] == 1
- elseif input:find("^[%u%d_]", i) then
- return false
- else
- return KW_BEFORE_RX[w] == 1
- end
-end
-
-local function isRegex(input, i)
- while i >= 1 and input:find('^[ \t]', i) do i = i - 1 end
- if i < 1 then return true end
- if input:find("^[-!%%&(*+,:;<=>?[^{|}~\f]", i) or findKeyword(input, i) then
- return true
- elseif input:sub(i, i) == SLASH then
- return i ~= LastRegexEnd -- deals with /xx/ / /yy/.
- elseif input:find('^[]%w)."]', i) then
- return false
- elseif input:sub(i, i) == LF then
- if i == 1 then return true end
- i = i - 1
- if input:sub(i, i) == CR then
- if i == 1 then return true end
- i = i - 1
- end
- elseif input:sub(i, i) == CR then
- if i == 1 then return true end
- i = i - 1
- else
- return false
- end
- if input:sub(i, i) == BACKSLASH and i ~= BackslashAtCommentEnd then
- return isRegex(input, i - 1)
- else
- return true
- end
-end
-
-local function eatCharacterClass(input, s, e)
- local i = s
- while i <= e do
- if input:find('^[\r\n]', i) then
- return false
- elseif input:sub(i, i + 1) == ':]' then
- local str = input:sub(s, i - 1)
- return CC[str] == 1 and i + 1
- end
- i = i + 1
- end
- return false
-end
-
-local function eatBrackets(input, i, e)
- if input:sub(i, i) == CARET then i = i + 1 end
- if input:sub(i, i) == RIGHTBRACKET then i = i + 1 end
- while i <= e do
- if input:find('^[\r\n]', i) then
- return false
- elseif input:sub(i, i) == RIGHTBRACKET then
- return i
- elseif input:sub(i, i + 1) == '[:' then
- i = eatCharacterClass(input, i + 2, e)
- if not i then return false end
- elseif input:sub(i, i) == BACKSLASH then
- i = i + 1
- if input:sub(i, i + 1) == CRLF then i = i + 1 end
- end
- i = i + 1
- end
- return false
-end
-
-local function eatRegex(input, i)
- local e = #input
- while i <= e do
- if input:find('^[\r\n]', i) then
- return false
- elseif input:sub(i, i) == SLASH then
- LastRegexEnd = i
- return i
- elseif input:sub(i, i) == LEFTBRACKET then
- i = eatBrackets(input, i + 1, e)
- if not i then return false end
- elseif input:sub(i, i) == BACKSLASH then
- i = i + 1
- if input:sub(i, i + 1) == CRLF then i = i + 1 end
- end
- i = i + 1
- end
- return false
-end
-
-local ScanRegexResult
-local function scanGawkRegex(input, index)
- if isRegex(input, index - 2) then
- local i = eatRegex(input, index)
- if not i then
- ScanRegexResult = false
- return false
- end
- local rx = input:sub(index - 1, i)
- for bs in rx:gmatch("[^\\](\\+)[BSsWwy<>`']") do
- -- /\S/ is special, but /\\S/ is not.
- if #bs % 2 == 1 then return i + 1 end
- end
- ScanRegexResult = i + 1
- else
- ScanRegexResult = false
- end
- return false
-end
--- Is only called immediately after scanGawkRegex().
-local function scanRegex()
- return ScanRegexResult
-end
-
-local function scanString(input, index)
- local i = index
- local e = #input
- while i <= e do
- if input:find('^[\r\n]', i) then
- return false
- elseif input:sub(i, i) == DQUOTE then
- return i + 1
- elseif input:sub(i, i) == BACKSLASH then
- i = i + 1
- -- l.delimited_range() doesn't handle CRLF.
- if input:sub(i, i + 1) == CRLF then i = i + 1 end
- end
- i = i + 1
- end
- return false
-end
-
--- purpose: prevent isRegex() from entering a comment line that ends with a
--- backslash.
-local function scanComment(input, index)
- local _, i = input:find('[^\r\n]*', index)
- if input:sub(i, i) == BACKSLASH then BackslashAtCommentEnd = i end
- return i + 1
-end
-
-local function scanFieldDelimiters(input, index)
- local i = index
- local e = #input
- local left = input:sub(i - 1, i - 1)
- local count = 1
- local right = DELIMITER_MATCHES[left]
- local left2 = COMPANION[left]
- local count2 = 0
- local right2 = DELIMITER_MATCHES[left2]
- while i <= e do
- if input:find('^[#\r\n]', i) then
- return false
- elseif input:sub(i, i) == right then
- count = count - 1
- if count == 0 then return count2 == 0 and i + 1 end
- elseif input:sub(i, i) == left then
- count = count + 1
- elseif input:sub(i, i) == right2 then
- count2 = count2 - 1
- if count2 < 0 then return false end
- elseif input:sub(i, i) == left2 then
- count2 = count2 + 1
- elseif input:sub(i, i) == DQUOTE then
- i = scanString(input, i + 1)
- if not i then return false end
- i = i - 1
- elseif input:sub(i, i) == SLASH then
- if isRegex(input, i - 1) then
- i = eatRegex(input, i + 1)
- if not i then return false end
- end
- elseif input:sub(i, i) == BACKSLASH then
- if input:sub(i + 1, i + 2) == CRLF then
- i = i + 2
- elseif input:find('^[\r\n]', i + 1) then
- i = i + 1
- end
- end
- i = i + 1
- end
- return false
-end
-
--- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local comment = token(l.COMMENT, '#' * P(scanComment))
-
--- Strings.
-local string = token(l.STRING, DQUOTE * P(scanString))
-
--- Regular expressions.
--- Slash delimited regular expressions are preceded by most operators or
--- the keywords 'print' and 'case', possibly on a preceding line. They
--- can contain unescaped slashes and brackets in brackets. Some escape
--- sequences like '\S', '\s' have special meanings with Gawk. Tokens that
--- contain them are displayed differently.
-local regex = token(l.REGEX, SLASH * P(scanRegex))
-local gawkRegex = token('gawkRegex', SLASH * P(scanGawkRegex))
-
--- no leading sign because it might be binary.
-local float = ((l.digit ^ 1 * ('.' * l.digit ^ 0) ^ -1) +
- ('.' * l.digit ^ 1)) * (S('eE') * S('+-') ^ -1 * l.digit ^ 1) ^ -1
--- Numbers.
-local number = token(l.NUMBER, float)
-local gawkNumber = token('gawkNumber', l.hex_num + l.oct_num)
-
--- Operators.
-local operator = token(l.OPERATOR, S('!%&()*+,-/:;<=>?[\\]^{|}~'))
-local gawkOperator = token('gawkOperator', P("|&") + "@" + "**=" + "**")
-
--- Fields. E.g. $1, $a, $(x), $a(x), $a[x], $"1", $$a, etc.
-local field = token('field', P('$') * S('$+-') ^ 0 *
- (float + (l.word ^ 0 * '(' * P(scanFieldDelimiters)) +
- (l.word ^ 1 * ('[' * P(scanFieldDelimiters)) ^ -1) +
- ('"' * P(scanString)) + ('/' * P(eatRegex) * '/')))
-
--- Functions.
-local func = token(l.FUNCTION, l.word * #P('('))
-
--- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
-
--- Keywords.
-local keyword = token(l.KEYWORD, word_match{
- 'BEGIN', 'END', 'atan2', 'break', 'close', 'continue', 'cos', 'delete', 'do',
- 'else', 'exit', 'exp', 'fflush', 'for', 'function', 'getline', 'gsub', 'if',
- 'in', 'index', 'int', 'length', 'log', 'match', 'next', 'nextfile', 'print',
- 'printf', 'rand', 'return', 'sin', 'split', 'sprintf', 'sqrt', 'srand', 'sub',
- 'substr', 'system', 'tolower', 'toupper', 'while'
-})
-
-local gawkKeyword = token('gawkKeyword', word_match{
- 'BEGINFILE', 'ENDFILE', 'adump', 'and', 'asort', 'asorti', 'bindtextdomain',
- 'case', 'compl', 'dcgettext', 'dcngettext', 'default', 'extension', 'func',
- 'gensub', 'include', 'isarray', 'load', 'lshift', 'mktime', 'or', 'patsplit',
- 'rshift', 'stopme', 'strftime', 'strtonum', 'switch', 'systime', 'xor'
-})
-
-local builtInVariable = token('builtInVariable', word_match{
- 'ARGC', 'ARGV', 'CONVFMT', 'ENVIRON', 'FILENAME', 'FNR', 'FS', 'NF', 'NR',
- 'OFMT', 'OFS', 'ORS', 'RLENGTH', 'RS', 'RSTART', 'SUBSEP'
-})
-
-local gawkBuiltInVariable = token('gawkBuiltInVariable', word_match {
- 'ARGIND', 'BINMODE', 'ERRNO', 'FIELDWIDTHS', 'FPAT', 'FUNCTAB', 'IGNORECASE',
- 'LINT', 'PREC', 'PROCINFO', 'ROUNDMODE', 'RT', 'SYMTAB', 'TEXTDOMAIN'
-})
-
--- Within each group order matters, but the groups themselves (except the
--- last) can be in any order.
-M._rules = {
- {'whitespace', ws},
-
- {'comment', comment},
-
- {'string', string},
-
- {'field', field},
-
- {'gawkRegex', gawkRegex},
- {'regex', regex},
- {'gawkOperator', gawkOperator},
- {'operator', operator},
-
- {'gawkNumber', gawkNumber},
- {'number', number},
-
- {'keyword', keyword},
- {'builtInVariable', builtInVariable},
- {'gawkKeyword', gawkKeyword},
- {'gawkBuiltInVariable', gawkBuiltInVariable},
- {'function', func},
- {'identifier', identifier},
-}
-
-M._tokenstyles = {
- builtInVariable = l.STYLE_CONSTANT,
- default = l.STYLE_ERROR,
- field = l.STYLE_LABEL,
- gawkBuiltInVariable = l.STYLE_CONSTANT..',underlined',
- gawkKeyword = l.STYLE_KEYWORD..',underlined',
- gawkNumber = l.STYLE_NUMBER..',underlined',
- gawkOperator = l.STYLE_OPERATOR..',underlined',
- gawkRegex = l.STYLE_PREPROCESSOR..',underlined',
- regex = l.STYLE_PREPROCESSOR
-}
-
-M._foldsymbols = {
- _patterns = {'[{}]', '#'},
- [l.OPERATOR] = {['{'] = 1, ['}'] = -1},
- [l.COMMENT] = {['#'] = l.fold_line_comments('#')}
-}
-
-return M