diff options
| author | Matěj Cepl <mcepl@cepl.eu> | 2023-08-11 01:27:32 +0200 |
|---|---|---|
| committer | Randy Palamar <randy@rnpnr.xyz> | 2024-03-27 06:04:21 -0600 |
| commit | 4c4392d29df777ff702dfe99b4f3c23142976e05 (patch) | |
| tree | 5355324abe18952f7d19d6cfc5dbeb5d6cb72b84 /lua/lexers/yaml.lua | |
| parent | 95bf9f59f8a9a37148bdc0787db378d62c7cd032 (diff) | |
| download | vis-4c4392d29df777ff702dfe99b4f3c23142976e05.tar.gz vis-4c4392d29df777ff702dfe99b4f3c23142976e05.tar.xz | |
update lexers to orbitalquark/scintillua@b789dde
Rather than cherry pick patches from after 6.2 we will just grab
everything as is.
Diffstat (limited to 'lua/lexers/yaml.lua')
| -rw-r--r-- | lua/lexers/yaml.lua | 124 |
1 files changed, 73 insertions, 51 deletions
diff --git a/lua/lexers/yaml.lua b/lua/lexers/yaml.lua index ebf90cf..75705f4 100644 --- a/lua/lexers/yaml.lua +++ b/lua/lexers/yaml.lua @@ -1,34 +1,60 @@ --- Copyright 2006-2022 Mitchell. See LICENSE. +-- Copyright 2006-2024 Mitchell. See LICENSE. -- YAML LPeg lexer. -- It does not keep track of indentation perfectly. -local lexer = require('lexer') -local token, word_match = lexer.token, lexer.word_match +local lexer = lexer +local word_match = lexer.word_match local P, S, B = lpeg.P, lpeg.S, lpeg.B -local lex = lexer.new('yaml', {fold_by_indentation = true}) +local lex = lexer.new(..., {fold_by_indentation = true}) --- Whitespace. -local indent = #lexer.starts_line(S(' \t')) * - (token(lexer.WHITESPACE, ' ') + token('indent_error', '\t'))^1 -lex:add_rule('indent', indent) -lex:add_style('indent_error', {back = lexer.colors.red}) -lex:add_rule('whitespace', token(lexer.WHITESPACE, S(' \t')^1 + lexer.newline^1)) +-- Distinguish between horizontal and vertical space so indenting tabs can be marked as errors. +local tab_indent = lex:tag(lexer.ERROR .. '.indent', lexer.starts_line('\t', true)) +lex:modify_rule('whitespace', tab_indent + lex:tag(lexer.WHITESPACE, S(' \r\n')^1 + P('\t')^1)) + +-- Document boundaries. +lex:add_rule('doc_bounds', lex:tag(lexer.OPERATOR, lexer.starts_line(P('---') + '...'))) -- Keys. -local word = (lexer.alpha + '-' * -lexer.space) * (lexer.alnum + '-')^0 -lex:add_rule('key', token(lexer.KEYWORD, word * (S(' \t_')^1 * word^-1)^0) * #(':' * lexer.space)) +local word = (lexer.alnum + '-')^1 +lex:add_rule('key', -P('- ') * lex:tag(lexer.STRING, word * (S(' \t_')^1 * word^-1)^0) * + #P(':' * lexer.space)) + +-- Collections. +lex:add_rule('collection', lex:tag(lexer.OPERATOR, + lexer.after_set('?-:\n', S('?-') * #P(' '), ' \t') + ':' * #P(lexer.space) + S('[]{}') + ',' * + #P(' '))) + +-- Alias indicators. +local anchor = lex:tag(lexer.OPERATOR, '&') * lex:tag(lexer.LABEL, word) +local alias = lex:tag(lexer.OPERATOR, '*') * lex:tag(lexer.LABEL, word) +lex:add_rule('alias', anchor + alias) + +-- Tags. +local explicit_tag = '!!' * word_match{ + 'map', 'omap', 'pairs', 'set', 'seq', -- collection + 'binary', 'bool', 'float', 'int', 'merge', 'null', 'str', 'timestamp', 'value', 'yaml' -- scalar +} +local verbatim_tag = '!' * lexer.range('<', '>', true) +local short_tag = '!' * word * ('!' * (1 - lexer.space)^1)^-1 +lex:add_rule('tag', lex:tag(lexer.TYPE, explicit_tag + verbatim_tag + short_tag)) + +-- Comments. +lex:add_rule('comment', lex:tag(lexer.COMMENT, lexer.to_eol('#'))) + +-- Reserved. +lex:add_rule('reserved', + B(S(':,') * ' ') * lex:tag(lexer.ERROR, S('@`') + lexer.starts_line(S('@`')))) -- Constants. -lex:add_rule('constant', B(lexer.space) * token(lexer.CONSTANT, word_match('null true false', true))) +local scalar_end = #(S(' \t')^0 * lexer.newline + S(',]}') + -1) +lex:add_rule('constant', + lex:tag(lexer.CONSTANT_BUILTIN, word_match('null true false', true)) * scalar_end) -- Strings. local sq_str = lexer.range("'") local dq_str = lexer.range('"') -lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) - --- Comments. -lex:add_rule('comment', B(lexer.space) * token(lexer.COMMENT, lexer.to_eol('#'))) +lex:add_rule('string', lex:tag(lexer.STRING, sq_str + dq_str) * (scalar_end + #P(':' * lexer.space))) -- Timestamps. local year = lexer.digit * lexer.digit * lexer.digit * lexer.digit @@ -40,45 +66,41 @@ local minutes = lexer.digit * lexer.digit local seconds = lexer.digit * lexer.digit local fraction = '.' * lexer.digit^0 local time = hours * ':' * minutes * ':' * seconds * fraction^-1 -local T = S(' \t')^1 + S('tT') -local zone = 'Z' + S(' \t')^0 * S('-+') * hours * (':' * minutes)^-1 -lex:add_rule('timestamp', token('timestamp', date * (T * time * zone^-1)^-1)) -lex:add_style('timestamp', lexer.styles.number) +local zone = 'Z' + S(' \t')^-1 * S('-+') * hours * (':' * minutes)^-1 +lex:add_rule('timestamp', lex:tag(lexer.NUMBER .. '.timestamp', + date * (S('tT \t') * time * zone^-1)^-1) * scalar_end) -- Numbers. -local dec = lexer.digit^1 * ('_' * lexer.digit^1)^0 -local hex = '0' * S('xX') * ('_' * lexer.xdigit^1)^1 -local bin = '0' * S('bB') * S('01')^1 * ('_' * S('01')^1)^0 -local integer = S('+-')^-1 * (hex + bin + dec) -local float = S('+-')^-1 * - ((dec^-1 * '.' * dec + dec * '.' * dec^-1 * -P('.')) * (S('eE') * S('+-')^-1 * dec)^-1 + - (dec * S('eE') * S('+-')^-1 * dec)) local special_num = S('+-')^-1 * '.' * word_match('inf nan', true) -lex:add_rule('number', B(lexer.space) * token(lexer.NUMBER, special_num + float + integer)) +local number = lexer.number + special_num +lex:add_rule('number', (B(lexer.alnum) * lex:tag(lexer.DEFAULT, number) + + lex:tag(lexer.NUMBER, number)) * scalar_end) --- Types. -lex:add_rule('type', token(lexer.TYPE, '!!' * word_match({ - -- Collection types. - 'map', 'omap', 'pairs', 'set', 'seq', - -- Scalar types. - 'binary', 'bool', 'float', 'int', 'merge', 'null', 'str', 'timestamp', 'value', 'yaml' -}, true) + '!' * lexer.range('<', '>', true))) - --- Document boundaries. -lex:add_rule('doc_bounds', token('document', lexer.starts_line(P('---') + '...'))) -lex:add_style('document', lexer.styles.constant) +-- Scalars. +local block_indicator = S('|>') * (S('-+') * lexer.digit^-1 + lexer.digit * S('-+')^-1)^-1 +local block = lpeg.Cmt(lpeg.C(block_indicator * lexer.newline), function(input, index, indicator) + local indent = lexer.indent_amount[lexer.line_from_position(index - #indicator)] + for s, i, j in input:gmatch('()\n()[ \t]*()[^ \t\r\n]', index) do -- ignore blank lines + if s >= index then -- compatibility for Lua < 5.4, which doesn't have init for string.gmatch() + if j - i <= indent then return s end + end + end + return #input + 1 +end) +local seq = B('- ') * lexer.nonnewline^1 +local csv = B(', ') * (lexer.nonnewline - S(',]}'))^1 +local stop_chars, LF = {[string.byte('{')] = true, [string.byte('\n')] = true}, string.byte('\n') +local map = B(': ') * lexer.nonnewline * P(function(input, index) + local pos = index + while pos > 1 and not stop_chars[input:byte(pos)] do pos = pos - 1 end + local s = input:find(input:byte(pos) ~= LF and '[\n,}]' or '\n', index) + return s or #input + 1 +end) +lex:add_rule('scalar', lex:tag(lexer.DEFAULT, block + seq + csv + map)) -- Directives -lex:add_rule('directive', token('directive', lexer.starts_line(lexer.to_eol('%')))) -lex:add_style('directive', lexer.styles.preprocessor) - --- Indicators. -local anchor = B(lexer.space) * token(lexer.LABEL, '&' * word) -local alias = token(lexer.VARIABLE, '*' * word) -local tag = token('tag', '!' * word * P('!')^-1) -local reserved = token(lexer.ERROR, S('@`') * word) -local indicator_chars = token(lexer.OPERATOR, S('-?:,>|[]{}!')) -lex:add_rule('indicator', tag + indicator_chars + alias + anchor + reserved) -lex:add_style('tag', lexer.styles.class) +lex:add_rule('directive', lex:tag(lexer.PREPROCESSOR, lexer.starts_line(lexer.to_eol('%')))) + +lexer.property['scintillua.comment'] = '#' return lex |
