From 4c4392d29df777ff702dfe99b4f3c23142976e05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mat=C4=9Bj=20Cepl?= Date: Fri, 11 Aug 2023 01:27:32 +0200 Subject: update lexers to orbitalquark/scintillua@b789dde Rather than cherry pick patches from after 6.2 we will just grab everything as is. --- lua/lexers/wsf.lua | 77 ++++++++++++++++++++++++++---------------------------- 1 file changed, 37 insertions(+), 40 deletions(-) (limited to 'lua/lexers/wsf.lua') diff --git a/lua/lexers/wsf.lua b/lua/lexers/wsf.lua index 6972cfe..cbfa221 100644 --- a/lua/lexers/wsf.lua +++ b/lua/lexers/wsf.lua @@ -1,90 +1,87 @@ --- Copyright 2006-2022 Mitchell. See LICENSE. +-- Copyright 2006-2024 Mitchell. See LICENSE. -- WSF LPeg lexer (based on XML). -- Contributed by Jeff Stone. -local lexer = require('lexer') -local token, word_match = lexer.token, lexer.word_match +local lexer = lexer local P, S = lpeg.P, lpeg.S -local lex = lexer.new('wsf') - --- Whitespace. -local ws = token(lexer.WHITESPACE, lexer.space^1) -lex:add_rule('whitespace', ws) +local lex = lexer.new(...) -- Comments. -lex:add_rule('comment', token(lexer.COMMENT, lexer.range(''))) +lex:add_rule('comment', lex:tag(lexer.COMMENT, lexer.range(''))) -- Elements. -local alpha = lpeg.R('az', 'AZ', '\127\255') -local word_char = lexer.alnum + S('_-:.?') -local identifier = (alpha + S('_-:.?')) * word_char^0 -local element = token('element', '<' * P('/')^-1 * identifier) -lex:add_rule('element', element) -lex:add_style('element', lexer.styles.keyword) +local identifier = (lexer.alpha + S('_-')) * (lexer.alnum + S('_-'))^0 +local tag = lex:tag(lexer.TAG, '<' * P('/')^-1 * identifier) +lex:add_rule('tag', tag) -- Closing tags. -local tag_close = token('element', P('/')^-1 * '>') +local tag_close = lex:tag(lexer.TAG, P('/')^-1 * '>') lex:add_rule('tag_close', tag_close) --- Attributes. -local attribute = token('attribute', identifier) * #(lexer.space^0 * '=') -lex:add_rule('attribute', attribute) -lex:add_style('attribute', lexer.styles.type) - -- Equals. +-- TODO: performance is terrible on large files. local in_tag = P(function(input, index) local before = input:sub(1, index - 1) local s, e = before:find('<[^>]-$'), before:find('>[^<]-$') - if s and e then return s > e and index or nil end - if s then return index end - return input:find('^[^<]->', index) and index or nil + if s and e then return s > e end + if s then return true end + return input:find('^[^<]->', index) ~= nil end) -local equals = token(lexer.OPERATOR, '=') * in_tag -lex:add_rule('equals', equals) +local equals = lex:tag(lexer.OPERATOR, '=') -- * in_tag +-- lex:add_rule('equals', equals) + +-- Attributes. +local ws = lex:get_rule('whitespace') +local attribute_eq = lex:tag(lexer.ATTRIBUTE, identifier) * ws^-1 * equals +lex:add_rule('attribute', attribute_eq) -- Strings. local sq_str = lexer.range("'", false, false) local dq_str = lexer.range('"', false, false) -local string = #S('\'"') * lexer.last_char_includes('=') * token(lexer.STRING, sq_str + dq_str) +local string = lex:tag(lexer.STRING, lexer.after_set('=', sq_str + dq_str)) lex:add_rule('string', string) -- Numbers. -local number = token(lexer.NUMBER, lexer.dec_num * P('%')^-1) -lex:add_rule('number', #lexer.digit * lexer.last_char_includes('=') * number * in_tag) +local number = lex:tag(lexer.NUMBER, lexer.dec_num * P('%')^-1) +lex:add_rule('number', lexer.after_set('=', number)) -- * in_tag) -- Entities. -lex:add_rule('entity', token('entity', '&' * word_match('lt gt amp apos quot') * ';')) -lex:add_style('entity', lexer.styles.operator) +local predefined = lex:tag(lexer.CONSTANT_BUILTIN .. '.entity', + '&' * lexer.word_match('lt gt amp apos quot') * ';') +local general = lex:tag(lexer.CONSTANT .. '.entity', '&' * identifier * ';') +lex:add_rule('entity', predefined + general) -- Fold points. local function disambiguate_lt(text, pos, line, s) return not line:find('^', -1) +lex:add_fold_point(lexer.TAG, '<', disambiguate_lt) +lex:add_fold_point(lexer.TAG, '/>', -1) lex:add_fold_point(lexer.COMMENT, '') -- Finally, add JavaScript and VBScript as embedded languages -- Tags that start embedded languages. -local embed_start_tag = element * (ws^1 * attribute * ws^0 * equals * ws^0 * string)^0 * ws^0 * - tag_close -local embed_end_tag = element * tag_close +local embed_start_tag = tag * (ws * attribute_eq * ws^-1 * string)^0 * ws^-1 * tag_close +local embed_end_tag = tag * tag_close -- Embedded JavaScript. local js = lexer.load('javascript') local js_start_rule = #(P('')) * embed_start_tag -- +local js_end_rule = #P('') * embed_end_tag -- lex:embed(js, js_start_rule, js_end_rule) -- Embedded VBScript. local vbs = lexer.load('vb', 'vbscript') local vbs_start_rule = #(P('')) * embed_start_tag -- +local vbs_end_rule = #P('') * embed_end_tag -- lex:embed(vbs, vbs_start_rule, vbs_end_rule) +lexer.property['scintillua.comment'] = '' +lexer.property['scintillua.angle.braces'] = '1' + return lex -- cgit v1.2.3