diff options
Diffstat (limited to 'lua/lexers/xml.lua')
| -rw-r--r-- | lua/lexers/xml.lua | 83 |
1 files changed, 40 insertions, 43 deletions
diff --git a/lua/lexers/xml.lua b/lua/lexers/xml.lua index 640e924..0e33f6a 100644 --- a/lua/lexers/xml.lua +++ b/lua/lexers/xml.lua @@ -1,78 +1,75 @@ --- Copyright 2006-2022 Mitchell. See LICENSE. +-- Copyright 2006-2024 Mitchell. See LICENSE. -- XML LPeg lexer. -local lexer = require('lexer') -local token, word_match = lexer.token, lexer.word_match +local lexer = lexer local P, S = lpeg.P, lpeg.S -local lex = lexer.new('xml') - --- Whitespace. -local ws = token(lexer.WHITESPACE, lexer.space^1) -lex:add_rule('whitespace', ws) +local lex = lexer.new(...) -- Comments and CDATA. -lex:add_rule('comment', token(lexer.COMMENT, lexer.range('<!--', '-->'))) -lex:add_rule('cdata', token('cdata', lexer.range('<![CDATA[', ']]>'))) -lex:add_style('cdata', lexer.styles.comment) - --- Doctypes and other markup tags. -local alpha = lpeg.R('az', 'AZ', '\127\255') -local word_char = lexer.alnum + S('_-:.??') -local identifier = (alpha + S('_-:.?')) * word_char^0 -local doctype = token('doctype', '<!DOCTYPE') * ws * token('doctype', identifier) * - (ws * identifier)^-1 * (1 - P('>'))^0 * token('doctype', '>') +lex:add_rule('comment', lex:tag(lexer.COMMENT, lexer.range('<!--', '-->'))) +lex:add_rule('cdata', lex:tag('cdata', lexer.range('<![CDATA[', ']]>'))) + +-- Doctype. +local ws = lex:get_rule('whitespace') +local identifier = (lexer.alpha + S('_-')) * (lexer.alnum + S('_-'))^0 +local doctype = lex:tag(lexer.TAG .. '.doctype', '<!DOCTYPE') * ws * + lex:tag(lexer.TAG .. '.doctype', identifier) * (ws * identifier)^-1 * (1 - P('>'))^0 * + lex:tag(lexer.TAG .. '.doctype', '>') lex:add_rule('doctype', doctype) -lex:add_style('doctype', lexer.styles.comment) -- Processing instructions. -lex:add_rule('proc_insn', token('proc_insn', '<?' * (1 - P('?>'))^0 * P('?>')^-1)) -lex:add_style('proc_insn', lexer.styles.comment) +lex:add_rule('proc_insn', lex:tag(lexer.TAG .. '.pi', '<?' * identifier + '?>')) --- Elements. -local namespace = token(lexer.OPERATOR, ':') * token('namespace', identifier) -lex:add_rule('element', token('element', '<' * P('/')^-1 * identifier) * namespace^-1) -lex:add_style('element', lexer.styles.keyword) -lex:add_style('namespace', lexer.styles.class) +-- Tags. +local namespace = lex:tag(lexer.OPERATOR, ':') * lex:tag(lexer.LABEL, identifier) +lex:add_rule('element', lex:tag(lexer.TAG, '<' * P('/')^-1 * identifier) * namespace^-1) -- Closing tags. -lex:add_rule('close_tag', token('element', P('/')^-1 * '>')) - --- Attributes. -lex:add_rule('attribute', token('attribute', identifier) * namespace^-1 * #(lexer.space^0 * '=')) -lex:add_style('attribute', lexer.styles.type) +lex:add_rule('close_tag', lex:tag(lexer.TAG, P('/')^-1 * '>')) -- Equals. -- TODO: performance is terrible on large files. local in_tag = P(function(input, index) local before = input:sub(1, index - 1) local s, e = before:find('<[^>]-$'), before:find('>[^<]-$') - if s and e then return s > e and index or nil end - if s then return index end - return input:find('^[^<]->', index) and index or nil + if s and e then return s > e end + if s then return true end + return input:find('^[^<]->', index) ~= nil end) --- lex:add_rule('equal', token(lexer.OPERATOR, '=')) -- * in_tag +local equals = lex:tag(lexer.OPERATOR, '=') -- * in_tag +-- lex:add_rule('equal', equals) + +-- Attributes. +local attribute_eq = lex:tag(lexer.ATTRIBUTE, identifier) * namespace^-1 * ws^-1 * equals +lex:add_rule('attribute', attribute_eq) -- Strings. local sq_str = lexer.range("'", false, false) local dq_str = lexer.range('"', false, false) -lex:add_rule('string', - #S('\'"') * lexer.last_char_includes('=') * token(lexer.STRING, sq_str + dq_str)) +lex:add_rule('string', lex:tag(lexer.STRING, lexer.after_set('=', sq_str + dq_str))) -- Numbers. -local number = token(lexer.NUMBER, lexer.dec_num * P('%')^-1) -lex:add_rule('number', #lexer.digit * lexer.last_char_includes('=') * number) -- *in_tag) +local number = lex:tag(lexer.NUMBER, lexer.dec_num * P('%')^-1) +lex:add_rule('number', lexer.after_set('=', number)) -- *in_tag) -- Entities. -lex:add_rule('entity', token('entity', '&' * word_match('lt gt amp apos quot') * ';')) -lex:add_style('entity', lexer.styles.operator) +local predefined = lex:tag(lexer.CONSTANT_BUILTIN .. '.entity', + '&' * lexer.word_match('lt gt amp apos quot') * ';') +local general = lex:tag(lexer.CONSTANT .. '.entity', '&' * identifier * ';') +lex:add_rule('entity', predefined + general) -- Fold Points. local function disambiguate_lt(text, pos, line, s) return not line:find('^</', s) and 1 or -1 end -lex:add_fold_point('element', '<', disambiguate_lt) -lex:add_fold_point('element', '/>', -1) +lex:add_fold_point(lexer.TAG, '<', disambiguate_lt) +lex:add_fold_point(lexer.TAG, '/>', -1) lex:add_fold_point(lexer.COMMENT, '<!--', '-->') lex:add_fold_point('cdata', '<![CDATA[', ']]>') +lexer.property['scintillua.comment'] = '<!--|-->' +lexer.property['scintillua.angle.braces'] = '1' +lexer.property['scintillua.word.chars'] = + 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-' + return lex |
