diff options
| author | Marc André Tanner <mat@brain-dump.org> | 2015-10-16 12:36:47 +0200 |
|---|---|---|
| committer | Marc André Tanner <mat@brain-dump.org> | 2015-11-08 13:35:36 +0100 |
| commit | 039042f2e323c1f982f1de61b702c88fb33d6ccb (patch) | |
| tree | 67dea69de9462e0c27ea2a743b4c5d1798eaa057 /lexers/xml.lua | |
| parent | b1ec60061623601ca6185a16d77c6c6c62135e95 (diff) | |
| download | vis-039042f2e323c1f982f1de61b702c88fb33d6ccb.tar.gz vis-039042f2e323c1f982f1de61b702c88fb33d6ccb.tar.xz | |
Import LPeg based lexers from Scintillua 3.6.1-1
These are Copyright (c) 2007-2015 Mitchell and released under the
MIT license.
Diffstat (limited to 'lexers/xml.lua')
| -rw-r--r-- | lexers/xml.lua | 99 |
1 files changed, 99 insertions, 0 deletions
diff --git a/lexers/xml.lua b/lexers/xml.lua new file mode 100644 index 0000000..25e6e45 --- /dev/null +++ b/lexers/xml.lua @@ -0,0 +1,99 @@ +-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE. +-- XML LPeg lexer. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V + +local M = {_NAME = 'xml'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Comments and CDATA. +local comment = token(l.COMMENT, '<!--' * (l.any - '-->')^0 * P('-->')^-1) +local cdata = token('cdata', '<![CDATA[' * (l.any - ']]>')^0 * P(']]>')^-1) + +-- Strings. +local sq_str = l.delimited_range("'", false, true) +local dq_str = l.delimited_range('"', false, true) +local string = #S('\'"') * l.last_char_includes('=') * + token(l.STRING, sq_str + dq_str) + +local in_tag = P(function(input, index) + local before = input:sub(1, index - 1) + local s, e = before:find('<[^>]-$'), before:find('>[^<]-$') + if s and e then return s > e and index or nil end + if s then return index end + return input:find('^[^<]->', index) and index or nil +end) + +-- Numbers. +local number = #l.digit * l.last_char_includes('=') * + token(l.NUMBER, l.digit^1 * P('%')^-1) * in_tag + +local alpha = R('az', 'AZ', '\127\255') +local word_char = l.alnum + S('_-:.??') +local identifier = (l.alpha + S('_-:.??')) * word_char^0 +local namespace = token(l.OPERATOR, ':') * token('namespace', identifier) + +-- Elements. +local element = token('element', '<' * P('/')^-1 * identifier) * namespace^-1 + +-- Attributes. +local attribute = token('attribute', identifier) * namespace^-1 * + #(l.space^0 * '=') + +-- Closing tags. +local close_tag = token('element', P('/')^-1 * '>') + +-- Equals. +local equals = token(l.OPERATOR, '=') * in_tag + +-- Entities. +local entity = token('entity', '&' * word_match{ + 'lt', 'gt', 'amp', 'apos', 'quot' +} * ';') + +-- Doctypes and other markup tags. +local doctype = token('doctype', P('<!DOCTYPE')) * ws * + token('doctype', identifier) * (ws * identifier)^-1 * + (1 - P('>'))^0 * token('doctype', '>') + +-- Processing instructions. +local proc_insn = token('proc_insn', P('<?') * (1 - P('?>'))^0 * P('?>')^-1) + +M._rules = { + {'whitespace', ws}, + {'comment', comment}, + {'cdata', cdata}, + {'doctype', doctype}, + {'proc_insn', proc_insn}, + {'element', element}, + {'close_tag', close_tag}, + {'attribute', attribute}, + {'equals', equals}, + {'string', string}, + {'number', number}, + {'entity', entity}, +} + +M._tokenstyles = { + element = l.STYLE_KEYWORD, + namespace = l.STYLE_CLASS, + attribute = l.STYLE_TYPE, + cdata = l.STYLE_COMMENT, + entity = l.STYLE_OPERATOR, + doctype = l.STYLE_COMMENT, + proc_insn = l.STYLE_COMMENT, + --markup = l.STYLE_COMMENT +} + +M._foldsymbols = { + _patterns = {'</?', '/>', '<!%-%-', '%-%->', '<!%[CDATA%[', '%]%]>'}, + element = {['<'] = 1, ['/>'] = -1, ['</'] = -1}, + [l.COMMENT] = {['<!--'] = 1, ['-->'] = -1}, + cdata = {['<![CDATA['] = 1, [']]>'] = -1} +} + +return M |
