diff options
Diffstat (limited to 'lua/lexers/wsf.lua')
| -rw-r--r-- | lua/lexers/wsf.lua | 136 |
1 files changed, 62 insertions, 74 deletions
diff --git a/lua/lexers/wsf.lua b/lua/lexers/wsf.lua index 37cb33e..6972cfe 100644 --- a/lua/lexers/wsf.lua +++ b/lua/lexers/wsf.lua @@ -1,102 +1,90 @@ --- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Copyright 2006-2022 Mitchell. See LICENSE. -- WSF LPeg lexer (based on XML). -- Contributed by Jeff Stone. -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'wsf'} +local lex = lexer.new('wsf') -- Whitespace. -local ws = token(l.WHITESPACE, l.space^1) +local ws = token(lexer.WHITESPACE, lexer.space^1) +lex:add_rule('whitespace', ws) -- Comments. -local comment = token(l.COMMENT, '<!--' * (l.any - '-->')^0 * P('-->')^-1) - --- Strings. -local sq_str = l.delimited_range("'", false, true) -local dq_str = l.delimited_range('"', false, true) -local string = #S('\'"') * l.last_char_includes('=') * - token(l.STRING, sq_str + dq_str) - -local in_tag = #P((1 - S'><')^0 * '>') - --- Numbers. -local number = #l.digit * l.last_char_includes('=') * - token(l.NUMBER, l.digit^1 * P('%')^-1) * in_tag - -local alpha = R('az', 'AZ', '\127\255') -local word_char = l.alnum + S('_-:.??') -local identifier = (l.alpha + S('_-:.??')) * word_char^0 +lex:add_rule('comment', token(lexer.COMMENT, lexer.range('<!--', '-->'))) -- Elements. +local alpha = lpeg.R('az', 'AZ', '\127\255') +local word_char = lexer.alnum + S('_-:.?') +local identifier = (alpha + S('_-:.?')) * word_char^0 local element = token('element', '<' * P('/')^-1 * identifier) - --- Attributes. -local attribute = token('attribute', identifier) * #(l.space^0 * '=') +lex:add_rule('element', element) +lex:add_style('element', lexer.styles.keyword) -- Closing tags. local tag_close = token('element', P('/')^-1 * '>') +lex:add_rule('tag_close', tag_close) + +-- Attributes. +local attribute = token('attribute', identifier) * #(lexer.space^0 * '=') +lex:add_rule('attribute', attribute) +lex:add_style('attribute', lexer.styles.type) -- Equals. -local equals = token(l.OPERATOR, '=') * in_tag +local in_tag = P(function(input, index) + local before = input:sub(1, index - 1) + local s, e = before:find('<[^>]-$'), before:find('>[^<]-$') + if s and e then return s > e and index or nil end + if s then return index end + return input:find('^[^<]->', index) and index or nil +end) + +local equals = token(lexer.OPERATOR, '=') * in_tag +lex:add_rule('equals', equals) + +-- Strings. +local sq_str = lexer.range("'", false, false) +local dq_str = lexer.range('"', false, false) +local string = #S('\'"') * lexer.last_char_includes('=') * token(lexer.STRING, sq_str + dq_str) +lex:add_rule('string', string) + +-- Numbers. +local number = token(lexer.NUMBER, lexer.dec_num * P('%')^-1) +lex:add_rule('number', #lexer.digit * lexer.last_char_includes('=') * number * in_tag) -- Entities. -local entity = token('entity', '&' * word_match{ - 'lt', 'gt', 'amp', 'apos', 'quot' -} * ';') - -M._rules = { - {'whitespace', ws}, - {'comment', comment}, - {'element', element}, - {'tag_close', tag_close}, - {'attribute', attribute}, - {'equals', equals}, - {'string', string}, - {'number', number}, - {'entity', entity} -} - -M._tokenstyles = { - element = l.STYLE_KEYWORD, - attribute = l.STYLE_TYPE, - entity = l.STYLE_OPERATOR -} - -M._foldsymbols = { - _patterns = {'</?', '/>', '<!%-%-', '%-%->'}, - element = {['<'] = 1, ['/>'] = -1, ['</'] = -1}, - [l.COMMENT] = {['<!--'] = 1, ['-->'] = -1}, -} +lex:add_rule('entity', token('entity', '&' * word_match('lt gt amp apos quot') * ';')) +lex:add_style('entity', lexer.styles.operator) + +-- Fold points. +local function disambiguate_lt(text, pos, line, s) return not line:find('^</', s) and 1 or -1 end +lex:add_fold_point('element', '<', disambiguate_lt) +lex:add_fold_point('element', '/>', -1) +lex:add_fold_point(lexer.COMMENT, '<!--', '-->') -- Finally, add JavaScript and VBScript as embedded languages -- Tags that start embedded languages. -M.embed_start_tag = element * - (ws^1 * attribute * ws^0 * equals * ws^0 * string)^0 * - ws^0 * tag_close -M.embed_end_tag = element * tag_close +local embed_start_tag = element * (ws^1 * attribute * ws^0 * equals * ws^0 * string)^0 * ws^0 * + tag_close +local embed_end_tag = element * tag_close -- Embedded JavaScript. -local js = l.load('javascript') +local js = lexer.load('javascript') local js_start_rule = #(P('<script') * (P(function(input, index) - if input:find('^%s+language%s*=%s*(["\'])[jJ][ava]*[sS]cript%1', index) then - return index - end -end) + '>')) * M.embed_start_tag -- <script language="javascript"> -local js_end_rule = #('</script' * ws^0 * '>') * M.embed_end_tag -- </script> -l.embed_lexer(M, js, js_start_rule, js_end_rule) + if input:find('^%s+language%s*=%s*(["\'])[jJ][ava]*[sS]cript%1', index) then return index end +end) + '>')) * embed_start_tag -- <script language="javascript"> +local js_end_rule = #('</script' * ws^0 * '>') * embed_end_tag -- </script> +lex:embed(js, js_start_rule, js_end_rule) -- Embedded VBScript. -local vbs = l.load('vbscript') +local vbs = lexer.load('vb', 'vbscript') local vbs_start_rule = #(P('<script') * (P(function(input, index) - if input:find('^%s+language%s*=%s*(["\'])[vV][bB][sS]cript%1', index) then - return index - end -end) + '>')) * M.embed_start_tag -- <script language="vbscript"> -local vbs_end_rule = #('</script' * ws^0 * '>') * M.embed_end_tag -- </script> -l.embed_lexer(M, vbs, vbs_start_rule, vbs_end_rule) - -return M + if input:find('^%s+language%s*=%s*(["\'])[vV][bB][sS]cript%1', index) then return index end +end) + '>')) * embed_start_tag -- <script language="vbscript"> +local vbs_end_rule = #('</script' * ws^0 * '>') * embed_end_tag -- </script> +lex:embed(vbs, vbs_start_rule, vbs_end_rule) + +return lex |
