diff options
Diffstat (limited to 'lua')
| -rw-r--r-- | lua/lexers/lexer.lua | 830 | ||||
| -rw-r--r-- | lua/lexers/lisp.lua | 61 | ||||
| -rw-r--r-- | lua/lexers/markdown.lua | 74 | ||||
| -rw-r--r-- | lua/lexers/output.lua | 17 | ||||
| -rw-r--r-- | lua/lexers/rest.lua | 151 | ||||
| -rw-r--r-- | lua/lexers/rpmspec.lua | 44 | ||||
| -rw-r--r-- | lua/lexers/systemd.lua | 75 |
7 files changed, 720 insertions, 532 deletions
diff --git a/lua/lexers/lexer.lua b/lua/lexers/lexer.lua index 7a8c9e0..509a5e2 100644 --- a/lua/lexers/lexer.lua +++ b/lua/lexers/lexer.lua @@ -2,13 +2,31 @@ --- Lexes Scintilla documents and source code with Lua and LPeg. -- +-- ### Contents +-- +-- 1. [Writing Lua Lexers](#writing-lua-lexers) +-- 2. [Lexer Basics](#lexer-basics) +-- - [New Lexer Template](#new-lexer-template) +-- - [Tags](#tags) +-- - [Rules](#rules) +-- - [Summary](#summary) +-- 3. [Advanced Techniques](#advanced-techniques) +-- - [Line Lexers](#line-lexers) +-- - [Embedded Lexers](#embedded-lexers) +-- - [Lexers with Complex State](#lexers-with-complex-state) +-- 4. [Code Folding](#code-folding) +-- 5. [Using Lexers](#using-lexers) +-- 6. [Migrating Legacy Lexers](#migrating-legacy-lexers) +-- 7. [Considerations](#considerations) +-- 8. [API Documentation](#lexer.add_fold_point) +-- -- ### Writing Lua Lexers -- -- Lexers recognize and tag elements of source code for syntax highlighting. Scintilla (the -- editing component behind [Textadept][] and [SciTE][]) traditionally uses static, compiled C++ --- lexers which are notoriously difficult to create and/or extend. On the other hand, Lua makes --- it easy to to rapidly create new lexers, extend existing ones, and embed lexers within one --- another. Lua lexers tend to be more readable than C++ lexers too. +-- lexers which are difficult to create and/or extend. On the other hand, Lua makes it easy to +-- to rapidly create new lexers, extend existing ones, and embed lexers within one another. Lua +-- lexers tend to be more readable than C++ lexers too. -- -- While lexers can be written in plain Lua, Scintillua prefers using Parsing Expression -- Grammars, or PEGs, composed with the Lua [LPeg library][]. As a result, this document is @@ -18,17 +36,17 @@ -- -- Operator | Description -- -|- --- `lpeg.P(string)` | Matches `string` literally. --- `lpeg.P(`_`n`_`)` | Matches exactly _`n`_ number of characters. --- `lpeg.S(string)` | Matches any character in set `string`. --- `lpeg.R("`_`xy`_`")`| Matches any character between range `x` and `y`. --- `patt^`_`n`_ | Matches at least _`n`_ repetitions of `patt`. --- `patt^-`_`n`_ | Matches at most _`n`_ repetitions of `patt`. --- `patt1 * patt2` | Matches `patt1` followed by `patt2`. --- `patt1 + patt2` | Matches `patt1` or `patt2` (ordered choice). --- `patt1 - patt2` | Matches `patt1` if `patt2` does not also match. --- `-patt` | Matches if `patt` does not match, consuming no input. --- `#patt` | Matches `patt` but consumes no input. +-- `lpeg.P`(*string*) | Matches string *string* literally. +-- `lpeg.P`(*n*) | Matches exactly *n* number of characters. +-- `lpeg.S`(*string*) | Matches any character in string set *string*. +-- `lpeg.R`("*xy*") | Matches any character between range *x* and *y*. +-- *patt*`^`*n* | Matches at least *n* repetitions of *patt*. +-- *patt*`^`-*n* | Matches at most *n* repetitions of *patt*. +-- *patt1* `*` *patt2* | Matches *patt1* followed by *patt2*. +-- *patt1* `+` *patt2* | Matches *patt1* or *patt2* (ordered choice). +-- *patt1* `-` *patt2* | Matches *patt1* if *patt2* does not also match. +-- `-`*patt* | Matches if *patt* does not match, consuming no input. +-- `#`*patt* | Matches *patt* but consumes no input. -- -- The first part of this document deals with rapidly constructing a simple lexer. The next part -- deals with more advanced techniques, such as embedding lexers within one another. Following @@ -56,22 +74,24 @@ -- free to use it, replacing the '?' with the name of your lexer. Consider this snippet from -- the template: -- --- -- ? LPeg lexer. +-- ```lua +-- -- ? LPeg lexer. -- --- local lexer = lexer --- local P, S = lpeg.P, lpeg.S +-- local lexer = lexer +-- local P, S = lpeg.P, lpeg.S -- --- local lex = lexer.new(...) +-- local lex = lexer.new(...) -- --- [... lexer rules ...] +-- --[[... lexer rules ...]] -- --- -- Identifier. --- local identifier = lex:tag(lexer.IDENTIFIER, lexer.word) --- lex:add_rule('identifier', identifier) +-- -- Identifier. +-- local identifier = lex:tag(lexer.IDENTIFIER, lexer.word) +-- lex:add_rule('identifier', identifier) -- --- [... more lexer rules ...] +-- --[[... more lexer rules ...]] -- --- return lex +-- return lex +-- ``` -- -- The first line of code is a Lua convention to store a global variable into a local variable -- for quick access. The second line simply defines often used convenience variables. The third @@ -103,12 +123,16 @@ -- tag name using the the `lexer.tag()` function. Let us examine the "identifier" tag used in -- the template shown earlier: -- --- local identifier = lex:tag(lexer.IDENTIFIER, lexer.word) +-- ```lua +-- local identifier = lex:tag(lexer.IDENTIFIER, lexer.word) +-- ``` -- -- At first glance, the first argument does not appear to be a string name and the second -- argument does not appear to be an LPeg pattern. Perhaps you expected something like: -- --- lex:tag('identifier', (lpeg.R('AZ', 'az') + '_') * (lpeg.R('AZ', 'az', '09') + '_')^0) +-- ```lua +-- lex:tag('identifier', (lpeg.R('AZ', 'az') + '_') * (lpeg.R('AZ', 'az', '09') + '_')^0) +-- ``` -- -- The `lexer` module actually provides a convenient list of common tag names and common LPeg -- patterns for you to use. Tag names for programming languages include (but are not limited @@ -122,14 +146,13 @@ -- `lexer.LIST`. Patterns include `lexer.any`, `lexer.alpha`, `lexer.digit`, `lexer.alnum`, -- `lexer.lower`, `lexer.upper`, `lexer.xdigit`, `lexer.graph`, `lexer.punct`, `lexer.space`, -- `lexer.newline`, `lexer.nonnewline`, `lexer.dec_num`, `lexer.hex_num`, `lexer.oct_num`, --- `lexer.bin_num`, `lexer.integer`, `lexer.float`, `lexer.number`, and `lexer.word`. You may --- use your own tag names if none of the above fit your language, but an advantage to using --- predefined tag names is that the language elements your lexer recognizes will inherit any --- universal syntax highlighting color theme that your editor uses. You can also "subclass" --- existing tag names by appending a '.*subclass*' string to them. For example, the HTML lexer --- tags unknown tags as `lexer.TAG .. '.unknown'`. This gives editors the opportunity to style --- those subclassed tags in a different way than normal tags, or fall back to styling them as --- normal tags. +-- `lexer.bin_num`, `lexer.integer`, `lexer.float`, `lexer.number`, and `lexer.word`. You may use +-- your own tag names if none of the above fit your language, but an advantage to using predefined +-- tag names is that the language elements your lexer recognizes will inherit any universal syntax +-- highlighting color theme that your editor uses. You can also "subclass" existing tag names by +-- appending a '.*subclass*' string to them. For example, the HTML lexer tags unknown tags as +-- `lexer.TAG .. '.unknown'`. This gives editors the opportunity to highlight those subclassed +-- tags in a different way than normal tags, or fall back to highlighting them as normal tags. -- -- ##### Example Tags -- @@ -138,29 +161,31 @@ -- -- **Keywords** -- --- Instead of matching _n_ keywords with _n_ `P('keyword_`_`n`_`')` ordered choices, use one +-- Instead of matching *n* keywords with *n* `P('keyword_n')` ordered choices, use one -- of of the following methods: -- -- 1. Use the convenience function `lexer.word_match()` optionally coupled with --- `lexer.set_word_list()`. It is much easier and more efficient to write word matches like: --- --- local keyword = lex:tag(lexer.KEYWORD, lex:word_match(lexer.KEYWORD)) --- [...] --- lex:set_word_list(lexer.KEYWORD, { --- 'keyword_1', 'keyword_2', ..., 'keyword_n' --- }) --- --- local case_insensitive_word = lex:tag(lexer.KEYWORD, lex:word_match(lexer.KEYWORD, true)) --- [...] --- lex:set_word_list(lexer.KEYWORD, { --- 'KEYWORD_1', 'keyword_2', ..., 'KEYword_n' --- }) --- --- local hyphenated_keyword = lex:tag(lexer.KEYWORD, lex:word_match(lexer.KEYWORD)) --- [...] --- lex:set_word_list(lexer.KEYWORD, { --- 'keyword-1', 'keyword-2', ..., 'keyword-n' --- }) +-- `lexer.set_word_list()`. It is much easier and more efficient to write word matches like: +-- +-- ```lua +-- local keyword = lex:tag(lexer.KEYWORD, lex:word_match(lexer.KEYWORD)) +-- --[[...]] +-- lex:set_word_list(lexer.KEYWORD, { +-- 'keyword_1', 'keyword_2', ..., 'keyword_n' +-- }) +-- +-- local case_insensitive_word = lex:tag(lexer.KEYWORD, lex:word_match(lexer.KEYWORD, true)) +-- --[[...]] +-- lex:set_word_list(lexer.KEYWORD, { +-- 'KEYWORD_1', 'keyword_2', ..., 'KEYword_n' +-- }) +-- +-- local hyphenated_keyword = lex:tag(lexer.KEYWORD, lex:word_match(lexer.KEYWORD)) +-- --[[...]] +-- lex:set_word_list(lexer.KEYWORD, { +-- 'keyword-1', 'keyword-2', ..., 'keyword-n' +-- }) +-- ``` -- -- The benefit of using this method is that other lexers that inherit from, embed, or embed -- themselves into your lexer can set, replace, or extend these word lists. For example, @@ -178,21 +203,25 @@ -- -- 2. Use the lexer-agnostic form of `lexer.word_match()`: -- --- local keyword = lex:tag(lexer.KEYWORD, lexer.word_match{ --- 'keyword_1', 'keyword_2', ..., 'keyword_n' --- }) +-- ```lua +-- local keyword = lex:tag(lexer.KEYWORD, lexer.word_match{ +-- 'keyword_1', 'keyword_2', ..., 'keyword_n' +-- }) -- --- local case_insensitive_keyword = lex:tag(lexer.KEYWORD, lexer.word_match({ --- 'KEYWORD_1', 'keyword_2', ..., 'KEYword_n' --- }, true)) +-- local case_insensitive_keyword = lex:tag(lexer.KEYWORD, lexer.word_match({ +-- 'KEYWORD_1', 'keyword_2', ..., 'KEYword_n' +-- }, true)) -- --- local hyphened_keyword = lex:tag(lexer.KEYWORD, lexer.word_match{ --- 'keyword-1', 'keyword-2', ..., 'keyword-n' --- }) +-- local hyphened_keyword = lex:tag(lexer.KEYWORD, lexer.word_match{ +-- 'keyword-1', 'keyword-2', ..., 'keyword-n' +-- }) +-- ``` -- -- For short keyword lists, you can use a single string of words. For example: -- --- local keyword = lex:tag(lexer.KEYWORD, lexer.word_match('key_1 key_2 ... key_n')) +-- ```lua +-- local keyword = lex:tag(lexer.KEYWORD, lexer.word_match('key_1 key_2 ... key_n')) +-- ``` -- -- You can use this method for static word lists that do not change, or where it does not -- make sense to allow applications or other lexers to extend or replace a word list. @@ -201,16 +230,20 @@ -- -- Line-style comments with a prefix character(s) are easy to express: -- --- local shell_comment = lex:tag(lexer.COMMENT, lexer.to_eol('#')) --- local c_line_comment = lex:tag(lexer.COMMENT, lexer.to_eol('//', true)) +-- ```lua +-- local shell_comment = lex:tag(lexer.COMMENT, lexer.to_eol('#')) +-- local c_line_comment = lex:tag(lexer.COMMENT, lexer.to_eol('//', true)) +-- ``` -- -- The comments above start with a '#' or "//" and go to the end of the line (EOL). The second --- comment recognizes the next line also as a comment if the current line ends with a '\' +-- comment recognizes the next line also as a comment if the current line ends with a '\\' -- escape character. -- -- C-style "block" comments with a start and end delimiter are also easy to express: -- --- local c_comment = lex:tag(lexer.COMMENT, lexer.range('/*', '*/')) +-- ```lua +-- local c_comment = lex:tag(lexer.COMMENT, lexer.range('/*', '*/')) +-- ``` -- -- This comment starts with a "/\*" sequence and contains anything up to and including an ending -- "\*/" sequence. The ending "\*/" is optional so the lexer can recognize unfinished comments @@ -222,27 +255,35 @@ -- "\\"" in a double-quoted string indicates that the '"' is not the end of the -- string. `lexer.range()` handles escapes inherently. -- --- local dq_str = lexer.range('"') --- local sq_str = lexer.range("'") --- local string = lex:tag(lexer.STRING, dq_str + sq_str) +-- ```lua +-- local dq_str = lexer.range('"') +-- local sq_str = lexer.range("'") +-- local string = lex:tag(lexer.STRING, dq_str + sq_str) +-- ``` -- --- In this case, the lexer treats '\' as an escape character in a string sequence. +-- In this case, the lexer treats '\\' as an escape character in a string sequence. -- -- **Numbers** -- -- Most programming languages have the same format for integers and floats, so it might be as -- simple as using a predefined LPeg pattern: -- --- local number = lex:tag(lexer.NUMBER, lexer.number) +-- ```lua +-- local number = lex:tag(lexer.NUMBER, lexer.number) +-- ``` -- --- However, some languages allow postfix characters on integers. +-- However, some languages allow postfix characters on integers: -- --- local integer = P('-')^-1 * (lexer.dec_num * S('lL')^-1) --- local number = lex:tag(lexer.NUMBER, lexer.float + lexer.hex_num + integer) +-- ```lua +-- local integer = P('-')^-1 * (lexer.dec_num * S('lL')^-1) +-- local number = lex:tag(lexer.NUMBER, lexer.float + lexer.hex_num + integer) +-- ``` -- --- Other languages allow separaters within numbers for better readability. +-- Other languages allow separaters within numbers for better readability: -- --- local number = lex:tag(lexer.NUMBER, lexer.number_('_')) -- recognize 1_000_000 +-- ```lua +-- local number = lex:tag(lexer.NUMBER, lexer.number_('_')) -- recognize 1_000_000 +-- ``` -- -- Your language may need other tweaks, but it is up to you how fine-grained you want your -- highlighting to be. After all, you are not writing a compiler or interpreter! @@ -255,7 +296,9 @@ -- are tagged. Recall from the lexer template the `lexer.add_rule()` call, which adds a rule -- to the lexer's grammar: -- --- lex:add_rule('identifier', identifier) +-- ```lua +-- lex:add_rule('identifier', identifier) +-- ``` -- -- Each rule has an associated name, but rule names are completely arbitrary and serve only to -- identify and distinguish between different rules. Rule order is important: if text does not @@ -264,13 +307,15 @@ -- -- To illustrate the importance of rule order, here is an example of a simplified Lua lexer: -- --- lex:add_rule('keyword', lex:tag(lexer.KEYWORD, ...)) --- lex:add_rule('identifier', lex:tag(lexer.IDENTIFIER, ...)) --- lex:add_rule('string', lex:tag(lexer.STRING, ...)) --- lex:add_rule('comment', lex:tag(lexer.COMMENT, ...)) --- lex:add_rule('number', lex:tag(lexer.NUMBER, ...)) --- lex:add_rule('label', lex:tag(lexer.LABEL, ...)) --- lex:add_rule('operator', lex:tag(lexer.OPERATOR, ...)) +-- ```lua +-- lex:add_rule('keyword', lex:tag(lexer.KEYWORD, ...)) +-- lex:add_rule('identifier', lex:tag(lexer.IDENTIFIER, ...)) +-- lex:add_rule('string', lex:tag(lexer.STRING, ...)) +-- lex:add_rule('comment', lex:tag(lexer.COMMENT, ...)) +-- lex:add_rule('number', lex:tag(lexer.NUMBER, ...)) +-- lex:add_rule('label', lex:tag(lexer.LABEL, ...)) +-- lex:add_rule('operator', lex:tag(lexer.OPERATOR, ...)) +-- ``` -- -- Notice how identifiers come _after_ keywords. In Lua, as with most programming languages, -- the characters allowed in keywords and identifiers are in the same set (alphanumerics plus @@ -284,9 +329,11 @@ -- meaningless outside a string or comment. Normally the lexer skips over such text. If instead -- you want to highlight these "syntax errors", add a final rule: -- --- lex:add_rule('keyword', keyword) --- ... --- lex:add_rule('error', lex:tag(lexer.ERROR, lexer.any)) +-- ```lua +-- lex:add_rule('keyword', keyword) +-- --[[...]] +-- lex:add_rule('error', lex:tag(lexer.ERROR, lexer.any)) +-- ``` -- -- This identifies and tags any character not matched by an existing rule as a `lexer.ERROR`. -- @@ -296,8 +343,10 @@ -- allows the lexer to produce all tags separately, but in a single, convenient rule. That rule -- might look something like this: -- --- local ws = lex:get_rule('whitespace') -- predefined rule for all lexers --- lex:add_rule('tag', tag_start * (ws * attributes)^0 * tag_end^-1) +-- ```lua +-- local ws = lex:get_rule('whitespace') -- predefined rule for all lexers +-- lex:add_rule('tag', tag_start * (ws * attributes)^0 * tag_end^-1) +-- ``` -- -- Note however that lexers with complex rules like these are more prone to lose track of their -- state, especially if they span multiple lines. @@ -317,13 +366,15 @@ -- #### Line Lexers -- -- By default, lexers match the arbitrary chunks of text passed to them by Scintilla. These --- chunks may be a full document, only the visible part of a document, or even just portions --- of lines. Some lexers need to match whole lines. For example, a lexer for the output of a --- file "diff" needs to know if the line started with a '+' or '-' and then style the entire +-- chunks may be a full document, only the visible part of a document, or even just portions of +-- lines. Some lexers need to match whole lines. For example, a lexer for the output of a file +-- "diff" needs to know if the line started with a '+' or '-' and then highlight the entire -- line accordingly. To indicate that your lexer matches by line, create the lexer with an -- extra parameter: -- --- local lex = lexer.new(..., {lex_by_line = true}) +-- ```lua +-- local lex = lexer.new(..., {lex_by_line = true}) +-- ``` -- -- Now the input text for the lexer is a single line at a time. Keep in mind that line lexers -- do not have the ability to look ahead to subsequent lines. @@ -333,7 +384,7 @@ -- Scintillua lexers embed within one another very easily, requiring minimal effort. In the -- following sections, the lexer being embedded is called the "child" lexer and the lexer a child -- is being embedded in is called the "parent". For example, consider an HTML lexer and a CSS --- lexer. Either lexer stands alone for styling their respective HTML and CSS files. However, CSS +-- lexer. Either lexer stands alone for tagging their respective HTML and CSS files. However, CSS -- can be embedded inside HTML. In this specific case, the CSS lexer is the "child" lexer with -- the HTML lexer being the "parent". Now consider an HTML lexer and a PHP lexer. This sounds -- a lot like the case with CSS, but there is a subtle difference: PHP _embeds itself into_ @@ -347,7 +398,9 @@ -- lexer. This is done with the `lexer.load()` function. For example, loading the CSS lexer -- within the HTML lexer looks like: -- --- local css = lexer.load('css') +-- ```lua +-- local css = lexer.load('css') +-- ``` -- -- The next part of the embedding process is telling the parent lexer when to switch over -- to the child lexer and when to switch back. The lexer refers to these indications as the @@ -355,28 +408,36 @@ -- HTML/CSS example, the transition from HTML to CSS is when the lexer encounters a "style" -- tag with a "type" attribute whose value is "text/css": -- --- local css_tag = P('<style') * P(function(input, index) --- if input:find('^[^>]+type="text/css"', index) then return true end --- end) +-- ```lua +-- local css_tag = P('<style') * P(function(input, index) +-- if input:find('^[^>]+type="text/css"', index) then return true end +-- end) +-- ``` -- -- This pattern looks for the beginning of a "style" tag and searches its attribute list for -- the text "`type="text/css"`". (In this simplified example, the Lua pattern does not consider -- whitespace between the '=' nor does it consider that using single quotes is valid.) If there --- is a match, the functional pattern returns `true`. However, we ultimately want to style the +-- is a match, the functional pattern returns `true`. However, we ultimately want to tag the -- "style" tag as an HTML tag, so the actual start rule looks like this: -- --- local css_start_rule = #css_tag * tag +-- ```lua +-- local css_start_rule = #css_tag * tag +-- ``` -- -- Now that the parent knows when to switch to the child, it needs to know when to switch -- back. In the case of HTML/CSS, the switch back occurs when the lexer encounters an ending --- "style" tag, though the lexer should still style the tag as an HTML tag: +-- "style" tag, though the lexer should still tag that tag as an HTML tag: -- --- local css_end_rule = #P('</style>') * tag +-- ```lua +-- local css_end_rule = #P('</style>') * tag +-- ``` -- -- Once the parent loads the child lexer and defines the child's start and end rules, it embeds -- the child with the `lexer.embed()` function: -- --- lex:embed(css, css_start_rule, css_end_rule) +-- ```lua +-- lex:embed(css, css_start_rule, css_end_rule) +-- ``` -- -- ##### Child Lexer -- @@ -385,10 +446,12 @@ -- `lexer.load()` function and then create start and end rules for the child lexer. However, -- in this case, call `lexer.embed()` with switched arguments. For example, in the PHP lexer: -- --- local html = lexer.load('html') --- local php_start_rule = lex:tag('php_tag', '<?php' * lexer.space) --- local php_end_rule = lex:tag('php_tag', '?>') --- html:embed(lex, php_start_rule, php_end_rule) +-- ```lua +-- local html = lexer.load('html') +-- local php_start_rule = lex:tag('php_tag', '<?php' * lexer.space) +-- local php_end_rule = lex:tag('php_tag', '?>') +-- html:embed(lex, php_start_rule, php_end_rule) +-- ``` -- -- Note that the use of a 'php_tag' tag will require the editor using the lexer to specify how -- to highlight text with that tag. In order to avoid this, you could use the `lexer.PREPROCESSOR` @@ -410,10 +473,10 @@ -- -- When reading source code, it is occasionally helpful to temporarily hide blocks of code like -- functions, classes, comments, etc. This is the concept of "folding". In the Textadept and --- SciTE editors for example, little indicators in the editor margins appear next to code that --- can be folded at places called "fold points". When the user clicks an indicator, the editor --- hides the code associated with the indicator until the user clicks the indicator again. The --- lexer specifies these fold points and what code exactly to fold. +-- SciTE editors for example, little markers in the editor margins appear next to code that +-- can be folded at places called "fold points". When the user clicks on one of those markers, +-- the editor hides the code associated with the marker until the user clicks on the marker +-- again. The lexer specifies these fold points and what code exactly to fold. -- -- The fold points for most languages occur on keywords or character sequences. Examples of -- fold keywords are "if" and "end" in Lua and examples of fold character sequences are '{', @@ -422,8 +485,10 @@ -- keywords that appear within strings or comments. The `lexer.add_fold_point()` function allows -- you to conveniently define fold points with such granularity. For example, consider C: -- --- lex:add_fold_point(lexer.OPERATOR, '{', '}') --- lex:add_fold_point(lexer.COMMENT, '/*', '*/') +-- ```lua +-- lex:add_fold_point(lexer.OPERATOR, '{', '}') +-- lex:add_fold_point(lexer.COMMENT, '/*', '*/') +-- ``` -- -- The first assignment states that any '{' or '}' that the lexer tagged as an `lexer.OPERATOR` -- is a fold point. Likewise, the second assignment states that any "/\*" or "\*/" that the @@ -431,10 +496,12 @@ -- occurrences of these characters outside their tagged elements (such as in a string) as fold -- points. How do you specify fold keywords? Here is an example for Lua: -- --- lex:add_fold_point(lexer.KEYWORD, 'if', 'end') --- lex:add_fold_point(lexer.KEYWORD, 'do', 'end') --- lex:add_fold_point(lexer.KEYWORD, 'function', 'end') --- lex:add_fold_point(lexer.KEYWORD, 'repeat', 'until') +-- ```lua +-- lex:add_fold_point(lexer.KEYWORD, 'if', 'end') +-- lex:add_fold_point(lexer.KEYWORD, 'do', 'end') +-- lex:add_fold_point(lexer.KEYWORD, 'function', 'end') +-- lex:add_fold_point(lexer.KEYWORD, 'repeat', 'until') +-- ``` -- -- If your lexer has case-insensitive keywords as fold points, simply add a -- `case_insensitive_fold_points = true` option to `lexer.new()`, and specify keywords in @@ -446,16 +513,18 @@ -- indicates the element is an ending fold point. A return value of `0` indicates the element -- is not a fold point. For example: -- --- local function fold_strange_element(text, pos, line, s, symbol) --- if ... then --- return 1 -- beginning fold point --- elseif ... then --- return -1 -- ending fold point --- end --- return 0 --- end +-- ```lua +-- local function fold_strange_element(text, pos, line, s, symbol) +-- if ... then +-- return 1 -- beginning fold point +-- elseif ... then +-- return -1 -- ending fold point +-- end +-- return 0 +-- end -- --- lex:add_fold_point('strange_element', '|', fold_strange_element) +-- lex:add_fold_point('strange_element', '|', fold_strange_element) +-- ``` -- -- Any time the lexer encounters a '|' that is tagged as a "strange_element", it calls the -- `fold_strange_element` function to determine if '|' is a fold point. The lexer calls these @@ -469,7 +538,14 @@ -- your lexer falls into this category and you would like to mark fold points based on changes -- in indentation, create the lexer with a `fold_by_indentation = true` option: -- --- local lex = lexer.new(..., {fold_by_indentation = true}) +-- ```lua +-- local lex = lexer.new(..., {fold_by_indentation = true}) +-- ``` +-- +-- #### Custom Folding +-- +-- Lexers with complex folding needs can implement their own folders by defining their own +-- [`lex:fold()`](#lexer.fold) method. Writing custom folders is beyond the scope of this document. -- -- ### Using Lexers -- @@ -486,12 +562,12 @@ -- Create a *.properties* file for your lexer and `import` it in either your *SciTEUser.properties* -- or *SciTEGlobal.properties*. The contents of the *.properties* file should contain: -- --- file.patterns.[lexer_name]=[file_patterns] --- lexer.$(file.patterns.[lexer_name])=scintillua.[lexer_name] --- keywords.$(file.patterns.[lexer_name])=scintillua --- keywords2.$(file.patterns.[lexer_name])=scintillua --- ... --- keywords9.$(file.patterns.[lexer_name])=scintillua +-- file.patterns.[lexer_name]=[file_patterns] +-- lexer.$(file.patterns.[lexer_name])=scintillua.[lexer_name] +-- keywords.$(file.patterns.[lexer_name])=scintillua +-- keywords2.$(file.patterns.[lexer_name])=scintillua +-- ... +-- keywords9.$(file.patterns.[lexer_name])=scintillua -- -- where `[lexer_name]` is the name of your lexer (minus the *.lua* extension) and -- `[file_patterns]` is a set of file extensions to use your lexer for. The `keyword` settings are @@ -506,36 +582,38 @@ -- tag names used for a given language changes, your *.properties* file should specify styles -- for tag names instead of style numbers. For example: -- --- scintillua.styles.my_tag=$(scintillua.styles.keyword),bold +-- scintillua.styles.my_tag=$(scintillua.styles.keyword),bold -- -- ### Migrating Legacy Lexers -- -- Legacy lexers are of the form: -- --- local lexer = require('lexer') --- local token, word_match = lexer.token, lexer.word_match --- local P, S = lpeg.P, lpeg.S +-- ```lua +-- local lexer = require('lexer') +-- local token, word_match = lexer.token, lexer.word_match +-- local P, S = lpeg.P, lpeg.S -- --- local lex = lexer.new('?') +-- local lex = lexer.new('?') -- --- -- Whitespace. --- lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) +-- -- Whitespace. +-- lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) -- --- -- Keywords. --- lex:add_rule('keyword', token(lexer.KEYWORD, word_match{ --- [...] --- })) +-- -- Keywords. +-- lex:add_rule('keyword', token(lexer.KEYWORD, word_match{ +-- --[[...]] +-- })) -- --- [... other rule definitions ...] +-- --[[... other rule definitions ...]] -- --- -- Custom. --- lex:add_rule('custom_rule', token('custom_token', ...)) --- lex:add_style('custom_token', lexer.styles.keyword .. {bold = true}) +-- -- Custom. +-- lex:add_rule('custom_rule', token('custom_token', ...)) +-- lex:add_style('custom_token', lexer.styles.keyword .. {bold = true}) -- --- -- Fold points. --- lex:add_fold_point(lexer.OPERATOR, '{', '}') +-- -- Fold points. +-- lex:add_fold_point(lexer.OPERATOR, '{', '}') -- --- return lex +-- return lex +-- ``` -- -- While Scintillua will mostly handle such legacy lexers just fine without any changes, it is -- recommended that you migrate yours. The migration process is fairly straightforward: @@ -563,46 +641,50 @@ -- -- As an example, consider the following sample legacy lexer: -- --- local lexer = require('lexer') --- local token, word_match = lexer.token, lexer.word_match --- local P, S = lpeg.P, lpeg.S +-- ```lua +-- local lexer = require('lexer') +-- local token, word_match = lexer.token, lexer.word_match +-- local P, S = lpeg.P, lpeg.S -- --- local lex = lexer.new('legacy') +-- local lex = lexer.new('legacy') -- --- lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) --- lex:add_rule('keyword', token(lexer.KEYWORD, word_match('foo bar baz'))) --- lex:add_rule('custom', token('custom', 'quux')) --- lex:add_style('custom', lexer.styles.keyword .. {bold = true}) --- lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) --- lex:add_rule('string', token(lexer.STRING, lexer.range('"'))) --- lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#'))) --- lex:add_rule('number', token(lexer.NUMBER, lexer.number)) --- lex:add_rule('operator', token(lexer.OPERATOR, S('+-*/%^=<>,.()[]{}'))) +-- lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) +-- lex:add_rule('keyword', token(lexer.KEYWORD, word_match('foo bar baz'))) +-- lex:add_rule('custom', token('custom', 'quux')) +-- lex:add_style('custom', lexer.styles.keyword .. {bold = true}) +-- lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) +-- lex:add_rule('string', token(lexer.STRING, lexer.range('"'))) +-- lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#'))) +-- lex:add_rule('number', token(lexer.NUMBER, lexer.number)) +-- lex:add_rule('operator', token(lexer.OPERATOR, S('+-*/%^=<>,.()[]{}'))) -- --- lex:add_fold_point(lexer.OPERATOR, '{', '}') +-- lex:add_fold_point(lexer.OPERATOR, '{', '}') -- --- return lex +-- return lex +-- ``` -- -- Following the migration steps would yield: -- --- local lexer = lexer --- local P, S = lpeg.P, lpeg.S +-- ```lua +-- local lexer = lexer +-- local P, S = lpeg.P, lpeg.S -- --- local lex = lexer.new(...) +-- local lex = lexer.new(...) -- --- lex:add_rule('keyword', lex:tag(lexer.KEYWORD, lex:word_match(lexer.KEYWORD))) --- lex:add_rule('custom', lex:tag('custom', 'quux')) --- lex:add_rule('identifier', lex:tag(lexer.IDENTIFIER, lexer.word)) --- lex:add_rule('string', lex:tag(lexer.STRING, lexer.range('"'))) --- lex:add_rule('comment', lex:tag(lexer.COMMENT, lexer.to_eol('#'))) --- lex:add_rule('number', lex:tag(lexer.NUMBER, lexer.number)) --- lex:add_rule('operator', lex:tag(lexer.OPERATOR, S('+-*/%^=<>,.()[]{}'))) +-- lex:add_rule('keyword', lex:tag(lexer.KEYWORD, lex:word_match(lexer.KEYWORD))) +-- lex:add_rule('custom', lex:tag('custom', 'quux')) +-- lex:add_rule('identifier', lex:tag(lexer.IDENTIFIER, lexer.word)) +-- lex:add_rule('string', lex:tag(lexer.STRING, lexer.range('"'))) +-- lex:add_rule('comment', lex:tag(lexer.COMMENT, lexer.to_eol('#'))) +-- lex:add_rule('number', lex:tag(lexer.NUMBER, lexer.number)) +-- lex:add_rule('operator', lex:tag(lexer.OPERATOR, S('+-*/%^=<>,.()[]{}'))) -- --- lex:add_fold_point(lexer.OPERATOR, '{', '}') +-- lex:add_fold_point(lexer.OPERATOR, '{', '}') -- --- lex:set_word_list(lexer.KEYWORD, {'foo', 'bar', 'baz'}) +-- lex:set_word_list(lexer.KEYWORD, {'foo', 'bar', 'baz'}) -- --- return lex +-- return lex +-- ``` -- -- Any editors using this lexer would have to add a style for the 'custom' tag. -- @@ -629,17 +711,21 @@ -- languages because the parent's tagged patterns do not support start and end rules. This -- mostly goes unnoticed, but code like -- +-- ```php -- <div id="<?php echo $id; ?>"> +-- ``` -- --- will not style correctly. Also, these types of languages cannot currently embed themselves +-- will not be tagged correctly. Also, these types of languages cannot currently embed themselves -- into their parent's child languages either. -- -- A language cannot embed itself into something like an interpolated string because it is -- possible that if lexing starts within the embedded entity, it will not be detected as such, -- so a child to parent transition cannot happen. For example, the following Ruby code will --- not style correctly: +-- not be tagged correctly: -- +-- ```ruby -- sum = "1 + 2 = #{1 + 2}" +-- ``` -- -- Also, there is the potential for recursion for languages embedding themselves within themselves. -- @@ -765,10 +851,10 @@ local M = {} --- The initial (root) fold level. -- @field FOLD_BASE ---- Flag indicating that the line is blank. +--- Bit-flag indicating that the line is blank. -- @field FOLD_BLANK ---- Flag indicating the line is fold point. +--- Bit-flag indicating the line is fold point. -- @field FOLD_HEADER -- This comment is needed for LDoc to process the previous field. @@ -796,13 +882,12 @@ local predefined = { } for _, name in ipairs(predefined) do M[name:upper():gsub('%.', '_')] = name end ---- Creates and returns a pattern that tags pattern *patt* with name *name* in lexer *lexer*. --- If *name* is not a predefined tag name, its Scintilla style will likely need to be defined --- by the editor or theme using this lexer. --- @param lexer The lexer to tag the given pattern in. --- @param name The name to use. --- @param patt The LPeg pattern to tag. --- @return pattern +--- Returns a tagged pattern. +-- @param lexer Lexer to tag the pattern in. +-- @param name String name to use for the tag. If it is not a predefined tag name +-- (`lexer.[A-Z_]+`), its Scintilla style will likely need to be defined by the editor or +-- theme using this lexer. +-- @param patt LPeg pattern to tag. -- @usage local number = lex:tag(lexer.NUMBER, lexer.number) -- @usage local addition = lex:tag('addition', '+' * lexer.word) function M.tag(lexer, name, patt) @@ -827,24 +912,21 @@ function M.tag(lexer, name, patt) return Cc(name) * (P(patt) / 0) * Cp() end ---- Returns a unique grammar rule name for the given lexer's i-th word list. +--- Returns a unique grammar rule name for one of the word lists in a lexer. +-- @param lexer Lexer to use. +-- @param i *i*th word list to get. local function word_list_id(lexer, i) return lexer._name .. '_wordlist' .. i end ---- Either returns a pattern for lexer *lexer* (if given) that matches one word in the word list --- identified by string *word_list*, ignoring case if *case_sensitive* is `true`, or, if *lexer* --- is not given, creates and returns a pattern that matches any single word in list or string --- *word_list*, ignoring case if *case_insensitive* is `true`. +--- Returns a pattern that matches a word in a word list. -- This is a convenience function for simplifying a set of ordered choice word patterns and -- potentially allowing downstream users to configure word lists. --- If there is ultimately no word list set via `set_word_list()`, no error will be raised, --- but the returned pattern will not match anything. --- @param[opt] lexer Optional lexer to match a word in a wordlist for. This parameter may be --- omitted for lexer-agnostic matching. --- @param word_list Either a string name of the word list to match from if *lexer* is given, --- or, if *lexer* is omitted, a list of words or a string list of words separated by spaces. --- @param[opt=false] case_insensitive Optional boolean flag indicating whether or not the word --- match is case-insensitive. --- @return pattern +-- @param[opt] lexer Lexer to match a word in a word list for. This parameter may be omitted +-- for lexer-agnostic matching. +-- @param word_list Either a string name of the word list to match from if *lexer* is given, or, +-- if *lexer* is omitted, a table of words or a string list of words separated by spaces. If a +-- word list name was given and there is ultimately no word list set via `lex:set_word_list()`, +-- no error will be raised, but the returned pattern will not match anything. +-- @param[opt=false] case_insensitive Match the word case-insensitively. -- @usage lex:add_rule('keyword', lex:tag(lexer.KEYWORD, lex:word_match(lexer.KEYWORD))) -- @usage local keyword = lex:tag(lexer.KEYWORD, lexer.word_match{'foo', 'bar', 'baz'}) -- @usage local keyword = lex:tag(lexer.KEYWORD, lexer.word_match({'foo-bar', 'foo-baz', @@ -899,15 +981,13 @@ function M.word_match(lexer, word_list, case_insensitive) end) end ---- Sets in lexer *lexer* the word list identified by string or number *name* to string or --- list *word_list*, appending to any existing word list if *append* is `true`. --- This only has an effect if *lexer* uses `word_match()` to reference the given list. --- Case-insensitivity is specified by `word_match()`. --- @param lexer The lexer to add the given word list to. --- @param name The string name or number of the word list to set. --- @param word_list A list of words or a string list of words separated by spaces. --- @param append Whether or not to append *word_list* to the existing word list (if any). The --- default value is `false`. +--- Sets the words in a lexer's word list. +-- This only has an effect if the lexer uses `lexer.word_match()` to reference the given list. +-- @param lexer Lexer to add a word list to. +-- @param name String name or number of the word list to set. +-- @param word_list Table of words or a string list of words separated by +-- spaces. Case-insensitivity is specified by a `lexer.word_match()` reference to this list. +-- @param[opt=false] append Append *word_list* to an existing word list (if any). function M.set_word_list(lexer, name, word_list, append) if word_list == 'scintillua' then return end -- for SciTE if lexer._lexer then @@ -938,12 +1018,11 @@ function M.set_word_list(lexer, name, word_list, append) lexer._grammar_table = nil -- invalidate end ---- Adds pattern *rule* identified by string *id* to the ordered list of rules for lexer *lexer*. --- @param lexer The lexer to add the given rule to. --- @param id The id associated with this rule. It does not have to be the same as the name --- passed to `tag()`. --- @param rule The LPeg pattern of the rule. --- @see modify_rule +--- Adds a rule to a lexer. +-- @param lexer Lexer to add *rule* to. +-- @param id String id associated with this rule. It does not have to be the same as the name +-- passed to `lex:tag()`. +-- @param rule LPeg pattern of the rule to add. function M.add_rule(lexer, id, rule) if lexer._lexer then lexer = lexer._lexer end -- proxy; get true parent if not lexer._rules then lexer._rules = {} end @@ -955,10 +1034,10 @@ function M.add_rule(lexer, id, rule) lexer._grammar_table = nil -- invalidate end ---- Replaces in lexer *lexer* the existing rule identified by string *id* with pattern *rule*. --- @param lexer The lexer to modify. --- @param id The id associated with this rule. --- @param rule The LPeg pattern of the rule. +--- Replaces a lexer's existing rule. +-- @param lexer Lexer to modify. +-- @param id String id of the rule to replace. +-- @param rule LPeg pattern of the new rule. function M.modify_rule(lexer, id, rule) if lexer._lexer then lexer = lexer._lexer end -- proxy; get true parent assert(lexer._rules[id], 'rule does not exist') @@ -966,25 +1045,23 @@ function M.modify_rule(lexer, id, rule) lexer._grammar_table = nil -- invalidate end ---- Returns a unique grammar rule name for the given lexer's rule name. +--- Returns a unique grammar rule name for one of the rule names in a lexer. local function rule_id(lexer, name) return lexer._name .. '.' .. name end ---- Returns the rule identified by string *id*. --- @param lexer The lexer to fetch a rule from. --- @param id The id of the rule to fetch. --- @return pattern +--- Returns a lexer's rule. +-- @param lexer Lexer to fetch a rule from. +-- @param id String id of the rule to fetch. function M.get_rule(lexer, id) if lexer._lexer then lexer = lexer._lexer end -- proxy; get true parent if id == 'whitespace' then return V(rule_id(lexer, id)) end -- special case return assert(lexer._rules[id], 'rule does not exist') end ---- Embeds child lexer *child* in parent lexer *lexer* using patterns *start_rule* and *end_rule*, --- which signal the beginning and end of the embedded lexer, respectively. --- @param lexer The parent lexer. --- @param child The child lexer. --- @param start_rule The pattern that signals the beginning of the embedded lexer. --- @param end_rule The pattern that signals the end of the embedded lexer. +--- Embeds a child lexer into a parent lexer. +-- @param lexer Parent lexer. +-- @param child Child lexer. +-- @param start_rule LPeg pattern matches the beginning of the child lexer. +-- @param end_rule LPeg pattern that matches the end of the child lexer. -- @usage html:embed(css, css_start_rule, css_end_rule) -- @usage html:embed(lex, php_start_rule, php_end_rule) -- from php lexer function M.embed(lexer, child, start_rule, end_rule) @@ -1024,24 +1101,18 @@ function M.embed(lexer, child, start_rule, end_rule) child._lexer = lexer -- use parent's rules if child is embedding itself end ---- Adds to lexer *lexer* a fold point whose beginning and end points are tagged with string --- *tag_name* tags and have string content *start_symbol* and *end_symbol*, respectively. --- In the event that *start_symbol* may or may not be a fold point depending on context, and that --- additional processing is required, *end_symbol* may be a function that ultimately returns --- `1` (indicating a beginning fold point), `-1` (indicating an ending fold point), or `0` --- (indicating no fold point). That function is passed the following arguments: --- +--- Adds a fold point to a lexer. +-- @param lexer Lexer to add a fold point to. +-- @param tag_name String tag name of fold point text. +-- @param start_symbol String fold point start text. +-- @param end_symbol Either string fold point end text, or a function that returns whether or +-- not *start_symbol* is a beginning fold point (1), an ending fold point (-1), or not a fold +-- point at all (0). If it is a function, it is passed the following arguments: -- - `text`: The text being processed for fold points. -- - `pos`: The position in *text* of the beginning of the line currently being processed. -- - `line`: The text of the line currently being processed. -- - `s`: The position of *start_symbol* in *line*. -- - `symbol`: *start_symbol* itself. --- @param lexer The lexer to add a fold point to. --- @param tag_name The tag name for text that indicates a fold point. --- @param start_symbol The text that indicates the beginning of a fold point. --- @param end_symbol Either the text that indicates the end of a fold point, or a function that --- returns whether or not *start_symbol* is a beginning fold point (1), an ending fold point --- (-1), or not a fold point at all (0). -- @usage lex:add_fold_point(lexer.OPERATOR, '{', '}') -- @usage lex:add_fold_point(lexer.KEYWORD, 'if', 'end') -- @usage lex:add_fold_point('custom', function(text, pos, line, s, symbol) ... end) @@ -1071,9 +1142,9 @@ function M.add_fold_point(lexer, tag_name, start_symbol, end_symbol) if lexer._lexer then lexer._lexer:add_fold_point(tag_name, start_symbol, end_symbol) end end ---- Recursively adds the rules for the given lexer and its children to the given grammar. --- @param g The grammar to add rules to. --- @param lexer The lexer whose rules to add. +--- Recursively adds the rules for a lexer and its children to a grammar. +-- @param g Grammar to add rules to. +-- @param lexer Lexer whose rules to add. local function add_lexer(g, lexer) local rule = P(false) @@ -1141,9 +1212,9 @@ local function add_lexer(g, lexer) end end ---- Returns a grammar for the given lexer and initial rule, (re)constructing it if necessary. --- @param lexer The lexer to build a grammar for. --- @param init_style The current style. Multiple-language lexers use this to determine which +--- Returns the grammar for a lexer and its initial rule, (re)constructing it if necessary. +-- @param lexer Lexer to build a grammar for. +-- @param init_style Current style number. Multiple-language lexers use this to determine which -- language to start lexing in. local function build_grammar(lexer, init_style) if not lexer._rules then return end @@ -1217,13 +1288,13 @@ local function build_grammar(lexer, init_style) return lexer._grammar end ---- Lexes a chunk of text *text* (that has an initial style number of *init_style*) using lexer --- *lexer*, returning a list of tag names and positions. --- @param lexer The lexer to lex text with. --- @param text The text in the buffer to lex. --- @param init_style The current style. Multiple-language lexers use this to determine which --- language to start lexing in. --- @return list of tag names and positions. +--- Lexes a chunk of text. +-- @param lexer Lexer to lex text with. +-- @param text String text to lex, which may be a partial chunk, single line, or full text. +-- @param init_style Number of the text's current style. Multiple-language lexers use this to +-- determine which language to start lexing in. +-- @return table of tag names and positions. +-- @usage lex:lex(...) --> {'keyword', 2, 'whitespace.lua', 3, 'identifier', 7} function M.lex(lexer, text, init_style) local grammar = build_grammar(lexer, init_style) if not grammar then return {M.DEFAULT, #text + 1} end @@ -1255,16 +1326,16 @@ function M.lex(lexer, text, init_style) return grammar:match(text) end ---- Determines fold points in a chunk of text *text* using lexer *lexer*, returning a table of --- fold levels associated with line numbers. --- *text* starts on line number *start_line* with a beginning fold level of *start_level* --- in the buffer. --- @param lexer The lexer to fold text with. --- @param text The text in the buffer to fold. --- @param start_line The line number *text* starts on, counting from 1. --- @param start_level The fold level *text* starts on. --- @return table of fold levels associated with line numbers. +--- Determines fold points in a chunk of text. +-- @param lexer Lexer to fold text with. +-- @param text String text to fold, which may be a partial chunk, single line, or full text. +-- @param start_line Line number *text* starts on, counting from 1. +-- @param start_level Fold level *text* starts with. It cannot be lower than `lexer.FOLD_BASE` +-- (1024). +-- @return table of line numbers mapped to fold levels +-- @usage lex:fold(...) --> {[1] = 1024, [2] = 9216, [3] = 1025, [4] = 1025, [5] = 1024} function M.fold(lexer, text, start_line, start_level) + if rawget(lexer, 'fold') then return rawget(lexer, 'fold')(lexer, text, start_line, start_level) end local folds = {} if text == '' then return folds end local fold = M.property_int['fold'] > 0 @@ -1404,21 +1475,20 @@ function M.fold(lexer, text, start_line, start_level) return folds end ---- Creates a returns a new lexer with the given name. --- @param name The lexer's name. --- @param opts Table of lexer options. Options currently supported: --- - `lex_by_line`: Whether or not the lexer only processes whole lines of text (instead of --- arbitrary chunks of text) at a time. Line lexers cannot look ahead to subsequent lines. --- The default value is `false`. --- - `fold_by_indentation`: Whether or not the lexer does not define any fold points and that --- fold points should be calculated based on changes in line indentation. The default value --- is `false`. --- - `case_insensitive_fold_points`: Whether or not fold points added via --- `lexer.add_fold_point()` ignore case. The default value is `false`. --- - `no_user_word_lists`: Does not automatically allocate word lists that can be set by +--- Creates a new lexer. +-- @param name String lexer name. Use `...` to inherit from the file's name. +-- @param[opt] opts Table of lexer options. Options currently supported: +-- - `lex_by_line`: Only processes whole lines of text at a time (instead of arbitrary chunks +-- of text). Line lexers cannot look ahead to subsequent lines. The default value is `false`. +-- - `fold_by_indentation`: Calculate fold points based on changes in line indentation. The +-- default value is `false`. +-- - `case_insensitive_fold_points`: Fold points added via `lexer.add_fold_point()` should +-- ignore case. The default value is `false`. +-- - `no_user_word_lists`: Do not automatically allocate word lists that can be set by -- users. This should really only be set by non-programming languages like markup languages. -- - `inherit`: Lexer to inherit from. The default value is `nil`. --- @usage lexer.new('rhtml', {inherit = lexer.load('html')}) +-- @return lexer object +-- @usage lexer.new(..., {inherit = lexer.load('html')}) -- name is 'rhtml' in rhtml.lua file function M.new(name, opts) local lexer = setmetatable({ _name = assert(name, 'lexer name expected'), _lex_by_line = opts and opts['lex_by_line'], @@ -1457,6 +1527,24 @@ local function initialize_standalone_library() return line - 1 -- should not get to here end + M.text_range = function(pos, length) return M._text:sub(pos, pos + length - 1) end + + --- Returns a line number's start and end positions. + -- @param line Line number (1-based) to get the start and end positions of. + local function get_line_range(line) + local current_line = 1 + for s, e in M._text:gmatch('()[^\n]*()') do + if current_line == line then return s, e end + current_line = current_line + 1 + end + return 1, 1 -- should not get to here + end + + M.line_start = setmetatable({}, {__index = function(_, line) return get_line_range(line) end}) + M.line_end = setmetatable({}, { + __index = function(_, line) return select(2, get_line_range(line)) end + }) + M.indent_amount = setmetatable({}, { __index = function(_, line) local current_line = 1 @@ -1474,9 +1562,12 @@ local function initialize_standalone_library() M._standalone = true end ---- Searches for the given *name* in the given *path*. +--- Searches for a lexer to load. -- This is a safe implementation of Lua 5.2's `package.searchpath()` function that does not -- require the package module to be loaded. +-- @param name String lexer name to search for. +-- @param path String list of ';'-separated paths to search for lexers in. +-- @return path to a lexer or `nil` plus an error message local function searchpath(name, path) local tried = {} for part in path:gmatch('[^;]+') do @@ -1488,12 +1579,12 @@ local function searchpath(name, path) return nil, table.concat(tried, '\n') end ---- Initializes or loads and then returns the lexer of string name *name*. +--- Initializes or loads a lexer. -- Scintilla calls this function in order to load a lexer. Parent lexers also call this function -- in order to load child lexers and vice-versa. The user calls this function in order to load -- a lexer when using Scintillua as a Lua library. --- @param name The name of the lexing language. --- @param[opt] alt_name Optional alternate name of the lexing language. This is useful for +-- @param name String name of the lexing language. +-- @param[opt] alt_name String alternate name of the lexing language. This is useful for -- embedding the same child lexer with multiple sets of start and end tags. -- @return lexer object function M.load(name, alt_name) @@ -1535,12 +1626,11 @@ function M.load(name, alt_name) return lexer end ---- Returns a list of all known lexer names. +--- Returns a table of all known lexer names. -- This function is not available to lexers and requires the LuaFileSystem (`lfs`) module to -- be available. --- @param[opt] path Optional ';'-delimited list of directories to search for lexers in. The +-- @param[opt] path String list of ';'-separated directories to search for lexers in. The -- default value is Scintillua's configured lexer path. --- @return lexer name list function M.names(path) local lfs = require('lfs') if not path then path = M.property and M.property['scintillua.lexers'] end @@ -1569,25 +1659,20 @@ function M.names(path) end --- Map of file extensions, without the '.' prefix, to their associated lexer names. --- This map has precedence over Scintillua's built-in map. --- @see detect +-- @usage lexer.detect_extensions.luadoc = 'lua' M.detect_extensions = {} ---- Map of line patterns to their associated lexer names. +--- Map of first-line patterns to their associated lexer names. -- These are Lua string patterns, not LPeg patterns. --- This map has precedence over Scintillua's built-in map. --- @see detect +-- @usage lexer.detect_patterns['^#!.+/zsh'] = 'bash' M.detect_patterns = {} ---- Returns the name of the lexer often associated with filename *filename* and/or content --- line *line*. --- @param[opt] filename Optional string filename. The default value is read from the --- 'lexer.scintillua.filename' property. --- @param[optchain] line Optional string first content line, such as a shebang line. The default --- value is read from the 'lexer.scintillua.line' property. --- @return string lexer name to pass to `load()`, or `nil` if none was detected --- @see detect_extensions --- @see detect_patterns +--- Returns the name of the lexer often associated a particular filename and/or file content. +-- @param[opt] filename String filename to inspect. The default value is read from the +-- "lexer.scintillua.filename" property. +-- @param[optchain] line String first content line, such as a shebang line. The default value +-- is read from the "lexer.scintillua.line" property. +-- @return string lexer name to pass to `lexer.load()`, or `nil` if none was detected function M.detect(filename, line) if not filename then filename = M.property and M.property['lexer.scintillua.filename'] or '' end if not line then line = M.property and M.property['lexer.scintillua.line'] or '' end @@ -1673,7 +1758,7 @@ function M.detect(filename, line) lua = 'lua', -- GNUmakefile = 'makefile', iface = 'makefile', mak = 'makefile', makefile = 'makefile', Makefile = 'makefile', -- - md = 'markdown', -- + md = 'markdown', markdown = 'markdown', -- ['meson.build'] = 'meson', -- moon = 'moonscript', -- myr = 'myrddin', -- @@ -1790,36 +1875,46 @@ M.punct = R('!/', ':@', '[\'', '{~') --- A pattern that matches any whitespace character ('\t', '\v', '\f', '\n', '\r', space). M.space = S('\t\v\f\n\r ') ---- A pattern that matches a sequence of end of line characters. +--- A pattern that matches an end of line, either CR+LF or LF. M.newline = P('\r')^-1 * '\n' --- A pattern that matches any single, non-newline character. M.nonnewline = 1 - M.newline ---- Returns a pattern that matches a decimal number, whose digits may be separated by character --- *c*. +--- Returns a pattern that matches a decimal number, whose digits may be separated by a particular +-- character. +-- @param c Digit separator character. function M.dec_num_(c) return M.digit * (P(c)^-1 * M.digit)^0 end --- Returns a pattern that matches a hexadecimal number, whose digits may be separated by --- character *c*. +-- a particular character. +-- @param c Digit separator character. function M.hex_num_(c) return '0' * S('xX') * (P(c)^-1 * M.xdigit)^1 end ---- Returns a pattern that matches an octal number, whose digits may be separated by character *c*. +--- Returns a pattern that matches an octal number, whose digits may be separated by a particular +-- character. +-- @param c Digit separator character. function M.oct_num_(c) return '0' * (P(c)^-1 * R('07'))^1 * -M.xdigit end ---- Returns a pattern that matches a binary number, whose digits may be separated by character *c*. +--- Returns a pattern that matches a binary number, whose digits may be separated by a particular +-- character. +-- @param c Digit separator character. function M.bin_num_(c) return '0' * S('bB') * (P(c)^-1 * S('01'))^1 * -M.xdigit end --- Returns a pattern that matches either a decimal, hexadecimal, octal, or binary number, --- whose digits may be separated by character *c*. +-- whose digits may be separated by a particular character. +-- @param c Digit separator character. function M.integer_(c) return S('+-')^-1 * (M.hex_num_(c) + M.bin_num_(c) + M.oct_num_(c) + M.dec_num_(c)) end local function exp_(c) return S('eE') * S('+-')^-1 * M.digit * (P(c)^-1 * M.digit)^0 end ---- Returns a pattern that matches a floating point number, whose digits may be separated by --- character *c*. +--- Returns a pattern that matches a floating point number, whose digits may be separated by a +-- particular character. +-- @param c Digit separator character. function M.float_(c) return S('+-')^-1 * ((M.dec_num_(c)^-1 * '.' * M.dec_num_(c) + M.dec_num_(c) * '.' * M.dec_num_(c)^-1 * -P('.')) * exp_(c)^-1 + (M.dec_num_(c) * exp_(c))) end --- Returns a pattern that matches a typical number, either a floating point, decimal, hexadecimal, --- octal, or binary number, and whose digits may be separated by character *c*. +-- octal, or binary number, and whose digits may be separated by a particular character. +-- @param c Digit separator character. +-- @usage lexer.number_('_') -- matches 1_000_000 function M.number_(c) return M.float_(c) + M.integer_(c) end --- A pattern that matches a decimal number. @@ -1842,14 +1937,10 @@ M.number = M.number_(false) -- of alphanumeric and underscore characters. M.word = (M.alpha + '_') * (M.alnum + '_')^0 ---- Creates and returns a pattern that matches from string or pattern *prefix* until the end of --- the line. --- *escape* indicates whether the end of the line can be escaped with a '\' character. --- @param[opt] prefix Optional string or pattern prefix to start matching at. The default value --- is any non-newline character. --- @param[optchain=false] escape Optional flag indicating whether or not newlines can be escaped --- by a '\' character. --- @return pattern +--- Returns a pattern that matches a prefix until the end of its line. +-- @param[opt] prefix String or pattern prefix to start matching at. The default value is any +-- non-newline character. +-- @param[optchain=false] escape Allow newline escapes using a '\\' character. -- @usage local line_comment = lexer.to_eol('//') -- @usage local line_comment = lexer.to_eol(S('#;')) function M.to_eol(prefix, escape) @@ -1857,24 +1948,17 @@ function M.to_eol(prefix, escape) (not escape and M.nonnewline or 1 - (M.newline + '\\') + '\\' * M.any)^0 end ---- Creates and returns a pattern that matches a range of text bounded by strings or patterns *s* --- and *e*. +--- Returns a pattern that matches a bounded range of text. -- This is a convenience function for matching more complicated ranges like strings with escape --- characters, balanced parentheses, and block comments (nested or not). *e* is optional and --- defaults to *s*. *single_line* indicates whether or not the range must be on a single line; --- *escapes* indicates whether or not to allow '\' as an escape character; and *balanced* --- indicates whether or not to handle balanced ranges like parentheses, and requires *s* and *e* --- to be different. --- @param s String or pattern start of a range. --- @param[opt=s] e Optional string or pattern end of a range. The default value is *s*. --- @param[optchain=false] single_line Optional flag indicating whether or not the range must --- be on a single line. --- @param[optchain] escapes Optional flag indicating whether or not the range end may be --- escaped by a '\' character. The default value is `false` unless *s* and *e* are identical, --- single-character strings. In that case, the default value is `true`. --- @param[optchain=false] balanced Optional flag indicating whether or not to match a balanced --- range, like the "%b" Lua pattern. This flag only applies if *s* and *e* are different. --- @return pattern +-- characters, balanced parentheses, and block comments (nested or not). +-- @param s String or LPeg pattern start of the range. +-- @param[opt=s] e String or LPeg pattern end of the range. The default value is *s*. +-- @param[optchain=false] single_line Restrict the range to a single line. +-- @param[optchain] escapes Allow the range end to be escaped by a '\\' character. The default +-- value is `false` unless *s* and *e* are identical, single-character strings. In that case, +-- the default value is `true`. +-- @param[optchain=false] balanced Match a balanced range, like the "%b" Lua pattern. This flag +-- only applies if *s* and *e* are different. -- @usage local dq_str_escapes = lexer.range('"') -- @usage local dq_str_noescapes = lexer.range('"', false, false) -- @usage local unbalanced_parens = lexer.range('(', ')') @@ -1893,13 +1977,14 @@ function M.range(s, e, single_line, escapes, balanced) return s * any^0 * P(e)^-1 end ---- Creates and returns a pattern that matches pattern *patt* only when it comes after one of --- the characters in string *set* (or when there are no characters behind *patt*), skipping --- over any characters in string *skip*, which is whitespace by default. +--- Returns a pattern that only matches when it comes after certain characters (or when there +-- are no characters behind it). -- @param set String character set like one passed to `lpeg.S()`. --- @param patt The LPeg pattern to match after a set character. --- @param skip String character set to skip over. The default value is ' \t\r\n\v\f' (whitespace). +-- @param patt LPeg pattern to match after a character in *set*. +-- @param skip String character set to skip over when looking backwards from *patt*. The default +-- value is " \t\r\n\v\f" (whitespace). -- @usage local regex = lexer.after_set('+-*!%^&|=,([{', lexer.range('/')) +-- -- matches "var re = /foo/;", but not "var x = 1 / 2 / 3;" function M.after_set(set, patt, skip) if not skip then skip = ' \t\r\n\v\f' end local set_chars, skip_chars = {}, {} @@ -1914,12 +1999,9 @@ function M.after_set(set, patt, skip) end) end ---- Creates and returns a pattern that matches pattern *patt* only at the beginning of a line, --- or after any line indentation if *allow_indent* is `true`. --- @param patt The LPeg pattern to match on the beginning of a line. --- @param allow_indent Whether or not to consider line indentation as the start of a line. The --- default value is `false`. --- @return pattern +--- Returns a pattern that matches only at the beginning of a line. +-- @param patt LPeg pattern to match at the beginning of a line. +-- @param[opt=false] allow_indent Allow *patt* to match after line indentation. -- @usage local preproc = lex:tag(lexer.PREPROCESSOR, lexer.starts_line(lexer.to_eol('#'))) function M.starts_line(patt, allow_indent) return M.after_set('\r\n\v\f', patt, allow_indent and ' \t' or '') @@ -1937,7 +2019,6 @@ M.property_expanded = setmetatable({}, {__index = function() return '' end}) -- -- Use `tag()` instead. -- @param name The name of token. -- @param patt The LPeg pattern associated with the token. --- @return pattern -- @usage local number = token(lexer.NUMBER, lexer.number) -- @usage local addition = token('addition', '+' * lexer.word) function M.token(name, patt) return Cc(name) * (P(patt) / 0) * Cp() end @@ -1945,7 +2026,6 @@ function M.token(name, patt) return Cc(name) * (P(patt) / 0) * Cp() end -- Legacy function that creates and returns a pattern that verifies the first non-whitespace -- character behind the current match position is in string set *s*. -- @param s String character set like one passed to `lpeg.S()`. --- @return pattern -- @usage local regex = #P('/') * lexer.last_char_includes('+-*!%^&|=,([{') * lexer.range('/') function M.last_char_includes(s) return M.after_set(s, true) end @@ -1953,39 +2033,55 @@ function M.fold_consecutive_lines() end -- legacy -- The functions and fields below were defined in C. ---- Table of fold level bit-masks for line numbers starting from 1. (Read-only) +--- Map of line numbers (starting from 1) to their fold level bit-masks. (Read-only) -- Fold level masks are composed of an integer level combined with any of the following bits: -- -- - `lexer.FOLD_BASE` --- The initial fold level. +-- The initial fold level (1024). -- - `lexer.FOLD_BLANK` -- The line is blank. -- - `lexer.FOLD_HEADER` -- The line is a header, or fold point. -- @table fold_level ---- Table of indentation amounts in character columns, for line numbers starting from --- 1. (Read-only) +--- Map of line numbers (starting from 1) to their indentation amounts, measured in character +-- columns. (Read-only) -- @table indent_amount ---- Table of integer line states for line numbers starting from 1. --- Line states can be used by lexers for keeping track of persistent states. For example, --- the output lexer uses this to mark lines that have warnings or errors. +--- Map of line numbers (starting from 1) to their 32-bit integer line states. +-- Line states can be used by lexers for keeping track of persistent states (up to 32 states +-- with 1 state per bit). For example, the output lexer uses this to mark lines that have +-- warnings or errors. -- @table line_state --- Map of key-value string pairs. +-- The contents of this map are application-dependant. -- @table property ---- Map of key-value pairs with values interpreted as numbers, or `0` if not found. (Read-only) +--- Alias of `lexer.property`, but with values interpreted as numbers, or `0` if not +-- found. (Read-only) -- @table property_int ---- Table of style names at positions in the buffer starting from 1. (Read-only) +--- Map of buffer positions (starting from 1) to their string style names. (Read-only) -- @table style_at ---- Returns the line number (starting from 1) of the line that contains position *pos*, which --- starts from 1. --- @param pos The position to get the line number of. --- @return number +--- Returns a position's line number (starting from 1). +-- @param pos Position (starting from 1) to get the line number of. -- @function line_from_position +--- Map of line numbers (starting from 1) to their start positions. (Read-only) +-- @table line_start + +--- Map of line numbers (starting from 1) to their end positions. (Read-only) +-- @table line_end + +--- Returns a range of buffer text. +-- The current text being lexed or folded may be a subset of buffer text. This function can +-- return any text in the buffer. +-- @param pos Position (starting from 1) of the text range to get. It needs to be an absolute +-- position. Use a combination of `lexer.line_from_position()` and `lexer.line_start` +-- to get one. +-- @param length Length of the text range to get. +-- @function text_range + return M diff --git a/lua/lexers/lisp.lua b/lua/lexers/lisp.lua index fa4c034..d628ef5 100644 --- a/lua/lexers/lisp.lua +++ b/lua/lexers/lisp.lua @@ -1,52 +1,35 @@ -- Copyright 2006-2025 Mitchell. See LICENSE. -- Lisp LPeg lexer. -local lexer = require('lexer') -local token, word_match = lexer.token, lexer.word_match +local lexer = lexer local P, S = lpeg.P, lpeg.S -local lex = lexer.new('lisp') - --- Whitespace. -lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) +local lex = lexer.new(...) -- Keywords. -lex:add_rule('keyword', token(lexer.KEYWORD, word_match{ - 'defclass', 'defconstant', 'defgeneric', 'define-compiler-macro', 'define-condition', - 'define-method-combination', 'define-modify-macro', 'define-setf-expander', 'define-symbol-macro', - 'defmacro', 'defmethod', 'defpackage', 'defparameter', 'defsetf', 'defstruct', 'deftype', 'defun', - 'defvar', -- - 'abort', 'assert', 'block', 'break', 'case', 'catch', 'ccase', 'cerror', 'cond', 'ctypecase', - 'declaim', 'declare', 'do', 'do*', 'do-all-symbols', 'do-external-symbols', 'do-symbols', - 'dolist', 'dotimes', 'ecase', 'error', 'etypecase', 'eval-when', 'flet', 'handler-bind', - 'handler-case', 'if', 'ignore-errors', 'in-package', 'labels', 'lambda', 'let', 'let*', 'locally', - 'loop', 'macrolet', 'multiple-value-bind', 'proclaim', 'prog', 'prog*', 'prog1', 'prog2', 'progn', - 'progv', 'provide', 'require', 'restart-bind', 'restart-case', 'restart-name', 'return', - 'return-from', 'signal', 'symbol-macrolet', 'tagbody', 'the', 'throw', 'typecase', 'unless', - 'unwind-protect', 'when', 'with-accessors', 'with-compilation-unit', 'with-condition-restarts', - 'with-hash-table-iterator', 'with-input-from-string', 'with-open-file', 'with-open-stream', - 'with-output-to-string', 'with-package-iterator', 'with-simple-restart', 'with-slots', - 'with-standard-io-syntax', -- - 't', 'nil' -})) +lex:add_rule('keyword', lex:tag(lexer.KEYWORD, lex:word_match(lexer.KEYWORD))) -- Identifiers. local word = lexer.alpha * (lexer.alnum + S('_-'))^0 -lex:add_rule('identifier', token(lexer.IDENTIFIER, word)) +lex:add_rule('identifier', lex:tag(lexer.IDENTIFIER, word)) -- Strings. -lex:add_rule('string', token(lexer.STRING, "'" * word + lexer.range('"') + '#\\' * lexer.any)) +local character = lexer.word_match{ + 'backspace', 'linefeed', 'newline', 'page', 'return', 'rubout', 'space', 'tab' +} + 'x' * lexer.xdigit^1 + lexer.any +lex:add_rule('string', lex:tag(lexer.STRING, "'" * word + lexer.range('"') + '#\\' * character)) -- Comments. local line_comment = lexer.to_eol(';') local block_comment = lexer.range('#|', '|#') -lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) +lex:add_rule('comment', lex:tag(lexer.COMMENT, line_comment + block_comment)) -- Numbers. -lex:add_rule('number', token(lexer.NUMBER, P('-')^-1 * lexer.digit^1 * (S('./') * lexer.digit^1)^-1)) +lex:add_rule('number', + lex:tag(lexer.NUMBER, P('-')^-1 * lexer.digit^1 * (S('./') * lexer.digit^1)^-1)) -- Operators. -lex:add_rule('operator', token(lexer.OPERATOR, S('<>=*/+-`@%()'))) +lex:add_rule('operator', lex:tag(lexer.OPERATOR, S('<>=*/+-`@%()'))) -- Fold points. lex:add_fold_point(lexer.OPERATOR, '(', ')') @@ -54,6 +37,26 @@ lex:add_fold_point(lexer.OPERATOR, '[', ']') lex:add_fold_point(lexer.OPERATOR, '{', '}') lex:add_fold_point(lexer.COMMENT, '#|', '|#') +-- Word lists +lex:set_word_list(lexer.KEYWORD, { + 'defclass', 'defconstant', 'defgeneric', 'define-compiler-macro', 'define-condition', + 'define-method-combination', 'define-modify-macro', 'define-setf-expander', 'define-symbol-macro', + 'defmacro', 'defmethod', 'defpackage', 'defparameter', 'defsetf', 'defstruct', 'deftype', 'defun', + 'defvar', -- + 'abort', 'assert', 'block', 'break', 'case', 'catch', 'ccase', 'cerror', 'cond', 'ctypecase', + 'declaim', 'declare', 'do', 'do*', 'do-all-symbols', 'do-external-symbols', 'do-symbols', + 'dolist', 'dotimes', 'ecase', 'error', 'etypecase', 'eval-when', 'flet', 'handler-bind', + 'handler-case', 'if', 'ignore-errors', 'in-package', 'labels', 'lambda', 'let', 'let*', 'locally', + 'loop', 'macrolet', 'multiple-value-bind', 'proclaim', 'prog', 'prog*', 'prog1', 'prog2', 'progn', + 'progv', 'provide', 'require', 'restart-bind', 'restart-case', 'restart-name', 'return', + 'return-from', 'signal', 'symbol-macrolet', 'tagbody', 'the', 'throw', 'typecase', 'unless', + 'unwind-protect', 'when', 'with-accessors', 'with-compilation-unit', 'with-condition-restarts', + 'with-hash-table-iterator', 'with-input-from-string', 'with-open-file', 'with-open-stream', + 'with-output-to-string', 'with-package-iterator', 'with-simple-restart', 'with-slots', + 'with-standard-io-syntax', -- + 't', 'nil' +}) + lexer.property['scintillua.comment'] = ';' return lex diff --git a/lua/lexers/markdown.lua b/lua/lexers/markdown.lua index b6be9f7..d5a18b8 100644 --- a/lua/lexers/markdown.lua +++ b/lua/lexers/markdown.lua @@ -29,22 +29,54 @@ lex:add_rule('list', lex:tag(lexer.LIST, local hspace = lexer.space - '\n' local blank_line = '\n' * hspace^0 * ('\n' + P(-1)) -local code_line = lexer.starts_line((B(' ') + B('\t')) * lexer.to_eol(), true) -local code_block = - lexer.range(lexer.starts_line('```', true), '\n```' * hspace^0 * ('\n' + P(-1))) + - lexer.range(lexer.starts_line('~~~', true), '\n~~~' * hspace^0 * ('\n' + P(-1))) +local code_line = lexer.starts_line((B(' ') + B('\t')) * lpeg.P(function(input, index) + -- Backtrack to the start of the current paragraph, which is either after a blank line, + -- at the start of a higher level of indentation, or at the start of the buffer. + local line, blank_line = lexer.line_from_position(index), false + while line > 0 do + local s, e = lexer.line_start[line], lexer.line_end[line] + blank_line = s == e or lexer.text_range(s, e - s + 1):find('^%s+$') + if blank_line then break end + local indent_amount = lexer.indent_amount[line] + line = line - 1 + if line > 0 and lexer.indent_amount[line] > indent_amount then break end + end + + -- If the start of the paragraph does not being with a ' ' or '\t', then this line + -- is a continuation of the current paragraph, not a code block. + local text = lexer.text_range(lexer.line_start[line + 1], 4) + if not text:find('^\t') and text ~= ' ' then return false end + + -- If the current paragraph is a code block, then so is this line. + if line <= 1 then return true end + + -- Backtrack to see if this line is in a list item. If so, it is not a code block. + while line > 1 do + line = line - 1 + local s, e = lexer.line_start[line], lexer.line_end[line] + local blank = s == e or lexer.text_range(s, e - s + 1):find('^%s+$') + if not blank and lexer.indent_amount[line] == 0 then break end + end + text = lexer.text_range(lexer.line_start[line], 8) -- note: only 2 is needed for unordered lists + if text:find('^[*+-][ \t]') then return false end + if text:find('^%d+%.[ \t]') then return false end + + return true -- if all else fails, it is probably a code block +end) * lexer.to_eol(), true) + +local code_block = lexer.range(lexer.starts_line('```', true), + '\n' * hspace^0 * '```' * hspace^0 * ('\n' + P(-1))) + + lexer.range(lexer.starts_line('~~~', true), '\n' * hspace^0 * '~~~' * hspace^0 * ('\n' + P(-1))) + local code_inline = lpeg.Cmt(lpeg.C(P('`')^1), function(input, index, bt) -- `foo`, ``foo``, ``foo`bar``, `foo``bar` are all allowed. local _, e = input:find('[^`]' .. bt .. '%f[^`]', index) return (e or #input) + 1 end) + lex:add_rule('block_code', lex:tag(lexer.CODE, code_line + code_block + code_inline)) -lex:add_rule('blockquote', - lex:tag(lexer.STRING, lpeg.Cmt(lexer.starts_line('>', true), function(input, index) - local _, e = input:find('\n[ \t]*\r?\n', index) -- the next blank line (possibly with indentation) - return (e or #input) + 1 - end))) +lex:add_rule('blockquote', lex:tag(lexer.STRING, lexer.starts_line('>', true))) -- Span elements. lex:add_rule('escape', lex:tag(lexer.DEFAULT, P('\\') * 1)) @@ -92,4 +124,28 @@ local start_rule = lexer.starts_line(P(' ')^-3) * #P('<') * html:get_rule('tag') local end_rule = #blank_line * ws lex:embed(html, start_rule, end_rule) +local FOLD_HEADER, FOLD_BASE = lexer.FOLD_HEADER, lexer.FOLD_BASE +-- Fold '#' headers. +function lex:fold(text, start_line, start_level) + local levels = {} + local line_num = start_line + if start_level > FOLD_HEADER then start_level = start_level - FOLD_HEADER end + for line in (text .. '\n'):gmatch('(.-)\r?\n') do + local header = line:match('^%s*(#*)') + -- If the previous line was a header, this line's level has been pre-defined. + -- Otherwise, use the previous line's level, or if starting to fold, use the start level. + local level = levels[line_num] or levels[line_num - 1] or start_level + if level > FOLD_HEADER then level = level - FOLD_HEADER end + -- If this line is a header, set its level to be one less than the header level + -- (so it can be a fold point) and mark it as a fold point. + if #header > 0 then + level = FOLD_BASE + #header - 1 + FOLD_HEADER + levels[line_num + 1] = FOLD_BASE + #header + end + levels[line_num] = level + line_num = line_num + 1 + end + return levels +end + return lex diff --git a/lua/lexers/output.lua b/lua/lexers/output.lua index 797ee97..0dd3f38 100644 --- a/lua/lexers/output.lua +++ b/lua/lexers/output.lua @@ -7,7 +7,7 @@ local lexer = lexer local starts_line = lexer.starts_line -local P, S = lpeg.P, lpeg.S +local P, S, R = lpeg.P, lpeg.S, lpeg.R local lex = lexer.new(..., {lex_by_line = true}) @@ -94,6 +94,19 @@ lex:add_rule('perl', starts_line(message((lexer.nonnewline - ' at ')^1)) * text( lex:add_rule('cmake', starts_line(text('CMake Error at ')) * c_filename * colon * line * colon * mark_error) -lex:add_rule('any_line', lex:tag(lexer.DEFAULT, lexer.to_eol())) +-- CSI sequences, including colors. +local csi = P('\x1B[') +local non_csi_seq = text((lexer.nonnewline - csi)^1) +local colors = {'black', 'red', 'green', 'yellow', 'blue', 'magenta', 'cyan', 'white'} +local csi_color = P(false) +for i, name in ipairs(colors) do + local bold, color = '1;', tostring(30 + i - 1) + csi_color = csi_color + + (lex:tag('csi', csi * bold * color * 'm') * lex:tag('csi.' .. name .. '.bright', non_csi_seq)) + + (lex:tag('csi', csi * color * 'm') * lex:tag('csi.' .. name, non_csi_seq)) +end +local csi_seq = #csi * (csi_color + lex:tag('csi', csi * (lexer.nonnewline - R('@~'))^0 * R('@~'))) + +lex:add_rule('any_line', (non_csi_seq + csi_seq)^1) return lex diff --git a/lua/lexers/rest.lua b/lua/lexers/rest.lua index 74337e7..46742a4 100644 --- a/lua/lexers/rest.lua +++ b/lua/lexers/rest.lua @@ -1,11 +1,11 @@ -- Copyright 2006-2025 Mitchell. See LICENSE. -- reStructuredText LPeg lexer. -local lexer = require('lexer') -local token, word_match, starts_line = lexer.token, lexer.word_match, lexer.starts_line +local lexer = lexer +local starts_line = lexer.starts_line local P, S = lpeg.P, lpeg.S -local lex = lexer.new('rest') +local lex = lexer.new(...) -- Literal block. local block = '::' * (lexer.newline + -1) * function(input, index) @@ -19,8 +19,7 @@ local block = '::' * (lexer.newline + -1) * function(input, index) end return #input + 1 end -lex:add_rule('literal_block', token('literal_block', block)) -lex:add_style('literal_block', lexer.styles.embedded .. {eolfilled = true}) +lex:add_rule('literal_block', lex:tag(lexer.LABEL .. '.literal', block)) -- Lists. local option_word = lexer.alnum * (lexer.alnum + '-')^0 @@ -30,9 +29,9 @@ local option_list = option * (',' * lexer.space^1 * option)^-1 local bullet_list = S('*+-') -- TODO: '•‣⁃', as lpeg does not support UTF-8 local enum_list = P('(')^-1 * (lexer.digit^1 + S('ivxlcmIVXLCM')^1 + lexer.alnum + '#') * S('.)') local field_list = ':' * (lexer.any - ':')^1 * P(':')^-1 -lex:add_rule('list', #(lexer.space^0 * (S('*+-:/') + enum_list)) * - starts_line(token(lexer.LIST, - lexer.space^0 * (option_list + bullet_list + enum_list + field_list) * lexer.space))) +lex:add_rule('list', + #(lexer.space^0 * (S('*+-:/') + enum_list)) * starts_line(lex:tag(lexer.LIST, lexer.space^0 * + (option_list + bullet_list + enum_list + field_list) * lexer.space))) local any_indent = S(' \t')^0 local word = lexer.alpha * (lexer.alnum + S('-.+'))^0 @@ -40,15 +39,12 @@ local prefix = any_indent * '.. ' -- Explicit markup blocks. local footnote_label = '[' * (lexer.digit^1 + '#' * word^-1 + '*') * ']' -local footnote = token('footnote_block', prefix * footnote_label * lexer.space) +local footnote = lex:tag(lexer.LABEL .. '.footnote', prefix * footnote_label * lexer.space) local citation_label = '[' * word * ']' -local citation = token('citation_block', prefix * citation_label * lexer.space) -local link = token('link_block', prefix * '_' * +local citation = lex:tag(lexer.LABEL .. '.citation', prefix * citation_label * lexer.space) +local link = lex:tag(lexer.LABEL .. '.link', prefix * '_' * (lexer.range('`') + (P('\\') * 1 + lexer.nonnewline - ':')^1) * ':' * lexer.space) lex:add_rule('markup_block', #prefix * starts_line(footnote + citation + link)) -lex:add_style('footnote_block', lexer.styles.label) -lex:add_style('citation_block', lexer.styles.label) -lex:add_style('link_block', lexer.styles.label) -- Sphinx code block. local indented_block = function(input, index) @@ -61,60 +57,27 @@ local indented_block = function(input, index) end local code_block = prefix * 'code-block::' * S(' \t')^1 * lexer.nonnewline^0 * (lexer.newline + -1) * indented_block -lex:add_rule('code_block', #prefix * token('code_block', starts_line(code_block))) -lex:add_style('code_block', lexer.styles.embedded .. {eolfilled = true}) +lex:add_rule('code_block', #prefix * lex:tag(lexer.LABEL .. '.code', starts_line(code_block))) -- Directives. -local known_directive = token('directive', prefix * word_match{ - -- Admonitions - 'attention', 'caution', 'danger', 'error', 'hint', 'important', 'note', 'tip', 'warning', - 'admonition', - -- Images - 'image', 'figure', - -- Body elements - 'topic', 'sidebar', 'line-block', 'parsed-literal', 'code', 'math', 'rubric', 'epigraph', - 'highlights', 'pull-quote', 'compound', 'container', - -- Table - 'table', 'csv-table', 'list-table', - -- Document parts - 'contents', 'sectnum', 'section-autonumbering', 'header', 'footer', - -- References - 'target-notes', 'footnotes', 'citations', - -- HTML-specific - 'meta', - -- Directives for substitution definitions - 'replace', 'unicode', 'date', - -- Miscellaneous - 'include', 'raw', 'class', 'role', 'default-role', 'title', 'restructuredtext-test-directive' -} * '::' * lexer.space) -local sphinx_directive = token('sphinx_directive', prefix * word_match{ - -- The TOC tree. - 'toctree', - -- Paragraph-level markup. - 'note', 'warning', 'versionadded', 'versionchanged', 'deprecated', 'seealso', 'rubric', - 'centered', 'hlist', 'glossary', 'productionlist', - -- Showing code examples. - 'highlight', 'literalinclude', - -- Miscellaneous - 'sectionauthor', 'index', 'only', 'tabularcolumns' -} * '::' * lexer.space) -local unknown_directive = token('unknown_directive', prefix * word * '::' * lexer.space) +local known_directive = lex:tag(lexer.KEYWORD, + prefix * lex:word_match(lexer.KEYWORD) * '::' * lexer.space) +local sphinx_directive = lex:tag(lexer.KEYWORD .. '.sphinx', prefix * + lex:word_match(lexer.KEYWORD .. '.sphinx') * '::' * lexer.space) +local unknown_directive = lex:tag(lexer.KEYWORD .. '.unknown', prefix * word * '::' * lexer.space) lex:add_rule('directive', #prefix * starts_line(known_directive + sphinx_directive + unknown_directive)) -lex:add_style('directive', lexer.styles.keyword) -lex:add_style('sphinx_directive', lexer.styles.keyword .. {bold = true}) -lex:add_style('unknown_directive', lexer.styles.keyword .. {italics = true}) -- Substitution definitions. -lex:add_rule('substitution', #prefix * token('substitution', starts_line(prefix * lexer.range('|') * - lexer.space^1 * word * '::' * lexer.space))) -lex:add_style('substitution', lexer.styles.variable) +lex:add_rule('substitution', + #prefix * lex:tag(lexer.FUNCTION, starts_line(prefix * lexer.range('|') * lexer.space^1 * word * + '::' * lexer.space))) -- Comments. local line_comment = lexer.to_eol(prefix) local bprefix = any_indent * '..' local block_comment = bprefix * lexer.newline * indented_block -lex:add_rule('comment', #bprefix * token(lexer.COMMENT, starts_line(line_comment + block_comment))) +lex:add_rule('comment', #bprefix * lex:tag(lexer.COMMENT, starts_line(line_comment + block_comment))) -- Section titles (2 or more characters). local adornment_chars = lpeg.C(S('!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~')) @@ -135,38 +98,35 @@ local underline = lpeg.Cmt(starts_line(adornment), function(_, index, adm, c) return pos and index - #adm + pos - 1 or nil end) -- Token needs to be a predefined one in order for folder to work. -lex:add_rule('title', token(lexer.HEADING, overline + underline)) +lex:add_rule('title', lex:tag(lexer.HEADING, overline + underline)) -- Line block. -lex:add_rule('line_block_char', token(lexer.OPERATOR, starts_line(any_indent * '|'))) +lex:add_rule('line_block_char', lex:tag(lexer.OPERATOR, starts_line(any_indent * '|'))) -- Whitespace. -lex:add_rule('whitespace', token(lexer.WHITESPACE, S(' \t')^1 + lexer.newline^1)) +lex:add_rule('whitespace', lex:tag(lexer.WHITESPACE, S(' \t')^1 + lexer.newline^1)) -- Inline markup. -local strong = token(lexer.BOLD, lexer.range('**')) -local em = token(lexer.ITALIC, lexer.range('*')) -local inline_literal = token('inline_literal', lexer.range('``')) +local strong = lex:tag(lexer.BOLD, lexer.range('**')) +local em = lex:tag(lexer.ITALIC, lexer.range('*')) +local inline_literal = lex:tag(lexer.CODE .. '.inline', lexer.range('``')) local postfix_link = (word + lexer.range('`')) * '_' * P('_')^-1 local prefix_link = '_' * lexer.range('`') -local link_ref = token(lexer.LINK, postfix_link + prefix_link) -local role = token('role', ':' * word * ':' * (word * ':')^-1) -local interpreted = role^-1 * token('interpreted', lexer.range('`')) * role^-1 -local footnote_ref = token(lexer.REFERENCE, footnote_label * '_') -local citation_ref = token(lexer.REFERENCE, citation_label * '_') -local substitution_ref = token('substitution', lexer.range('|', true) * ('_' * P('_')^-1)^-1) -local link = token(lexer.LINK, +local link_ref = lex:tag(lexer.LINK, postfix_link + prefix_link) +local role = lex:tag(lexer.FUNCTION_BUILTIN, ':' * word * ':' * (word * ':')^-1) +local interpreted = role^-1 * lex:tag(lexer.EMBEDDED, lexer.range('`')) * role^-1 +local footnote_ref = lex:tag(lexer.REFERENCE, footnote_label * '_') +local citation_ref = lex:tag(lexer.REFERENCE, citation_label * '_') +local substitution_ref = lex:tag(lexer.FUNCTION, lexer.range('|', true) * ('_' * P('_')^-1)^-1) +local link = lex:tag(lexer.LINK, lexer.alpha * (lexer.alnum + S('-.'))^1 * ':' * (lexer.alnum + S('/.+-%@'))^1) lex:add_rule('inline_markup', (strong + em + inline_literal + link_ref + interpreted + footnote_ref + citation_ref + substitution_ref + link) * -lexer.alnum) -lex:add_style('inline_literal', lexer.styles.embedded) -lex:add_style('role', lexer.styles.class) -lex:add_style('interpreted', lexer.styles.string) -- Other. -lex:add_rule('non_space', token(lexer.DEFAULT, lexer.alnum * (lexer.any - lexer.space)^0)) -lex:add_rule('escape', token(lexer.DEFAULT, '\\' * lexer.any)) +lex:add_rule('non_space', lex:tag(lexer.DEFAULT, lexer.alnum * (lexer.any - lexer.space)^0)) +lex:add_rule('escape', lex:tag(lexer.DEFAULT, '\\' * lexer.any)) -- Section-based folding. local sphinx_levels = { @@ -198,18 +158,53 @@ function lex:fold(text, start_line, start_level) return folds end --- lexer.property['fold.by.sphinx.convention'] = '0' - --[[ Embedded languages. local bash = lexer.load('bash') local bash_indent_level local start_rule = #(prefix * 'code-block' * '::' * lexer.space^1 * 'bash' * (lexer.newline + -1)) * - sphinx_directive * token('bash_begin', P(function(input, index) + sphinx_directive * lex:tag(lexer.EMBEDDED, P(function(input, index) bash_indent_level = #input:match('^([ \t]*)', index) return index end))]] +-- Word lists +lex:set_word_list(lexer.KEYWORD, { + -- Admonitions + 'attention', 'caution', 'danger', 'error', 'hint', 'important', 'note', 'tip', 'warning', + 'admonition', + -- Images + 'image', 'figure', + -- Body elements + 'topic', 'sidebar', 'line-block', 'parsed-literal', 'code', 'math', 'rubric', 'epigraph', + 'highlights', 'pull-quote', 'compound', 'container', + -- Table + 'table', 'csv-table', 'list-table', + -- Document parts + 'contents', 'sectnum', 'section-autonumbering', 'header', 'footer', + -- References + 'target-notes', 'footnotes', 'citations', + -- HTML-specific + 'meta', + -- Directives for substitution definitions + 'replace', 'unicode', 'date', + -- Miscellaneous + 'include', 'raw', 'class', 'role', 'default-role', 'title', 'restructuredtext-test-directive' +}) + +lex:set_word_list(lexer.KEYWORD .. '.sphinx', { + -- The TOC tree. + 'toctree', + -- Paragraph-level markup. + 'note', 'warning', 'versionadded', 'versionchanged', 'deprecated', 'seealso', 'rubric', + 'centered', 'hlist', 'glossary', 'productionlist', + -- Showing code examples. + 'highlight', 'literalinclude', + -- Miscellaneous + 'sectionauthor', 'index', 'only', 'tabularcolumns' +}) + +-- lexer.property['fold.by.sphinx.convention'] = '0' lexer.property['scintillua.comment'] = '.. ' return lex diff --git a/lua/lexers/rpmspec.lua b/lua/lexers/rpmspec.lua index 4eec9c8..7b390e4 100644 --- a/lua/lexers/rpmspec.lua +++ b/lua/lexers/rpmspec.lua @@ -1,32 +1,48 @@ -- Copyright 2022-2025 Matej Cepl mcepl.att.cepl.eu. See LICENSE. -local lexer = require('lexer') -local token, word_match = lexer.token, lexer.word_match -local P, S = lpeg.P, lpeg.S +local lexer = lexer +local P, R, S = lpeg.P, lpeg.R, lpeg.S -local lex = lexer.new('rpmspec') - --- Whitespace. -lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) +local lex = lexer.new(...) -- Comments. -lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#'))) +lex:add_rule('comment', lex:tag(lexer.COMMENT, lexer.to_eol('#'))) + +-- Numbers (including versions) +lex:add_rule('number', lex:tag(lexer.NUMBER, + lpeg.B(lexer.space) * (lexer.number * (lexer.number + S('.+~'))^0))) + +-- Operators +lex:add_rule('operator', lex:tag(lexer.OPERATOR, S('&<>=|'))) -- Strings. -lex:add_rule('string', token(lexer.STRING, lexer.range('"'))) +lex:add_rule('string', lex:tag(lexer.STRING, lexer.range('"'))) -- Keywords. -lex:add_rule('keyword', token(lexer.KEYWORD, word_match{ +lex:add_rule('keyword', lex:tag(lexer.KEYWORD, (lex:word_match(lexer.KEYWORD) + + (P('Patch') + 'Source') * R('09')^0) * ':')) + +-- Macros +lex:add_rule('command', lex:tag(lexer.FUNCTION, lexer.range(S('%$')^1 * S('{')^0 * + ((lexer.alnum + S('_?'))^0), S('}')^0))) + +-- Constants +lex:add_rule('constant', lex:tag(lexer.CONSTANT, lex:word_match(lexer.CONSTANT))) + +-- Word lists +lex:set_word_list(lexer.CONSTANT, { + 'rhel', 'fedora', 'suse_version', 'sle_version', 'x86_64', 'aarch64', 'ppc64le', 'riscv64', + 's390x' +}) + +lex:set_word_list(lexer.KEYWORD, { 'Prereq', 'Summary', 'Name', 'Version', 'Packager', 'Requires', 'Recommends', 'Suggests', 'Supplements', 'Enhances', 'Icon', 'URL', 'Source', 'Patch', 'Prefix', 'Packager', 'Group', 'License', 'Release', 'BuildRoot', 'Distribution', 'Vendor', 'Provides', 'ExclusiveArch', 'ExcludeArch', 'ExclusiveOS', 'Obsoletes', 'BuildArch', 'BuildArchitectures', 'BuildRequires', 'BuildConflicts', 'BuildPreReq', 'Conflicts', 'AutoRequires', 'AutoReq', 'AutoReqProv', 'AutoProv', 'Epoch' -})) - --- Macros -lex:add_rule('command', token(lexer.FUNCTION, '%' * lexer.word)) +}) lexer.property['scintillua.comment'] = '#' diff --git a/lua/lexers/systemd.lua b/lua/lexers/systemd.lua index 9145cd3..780830d 100644 --- a/lua/lexers/systemd.lua +++ b/lua/lexers/systemd.lua @@ -1,17 +1,41 @@ -- Copyright 2016-2025 Christian Hesse. See LICENSE. -- systemd unit file LPeg lexer. -local lexer = require('lexer') -local token, word_match = lexer.token, lexer.word_match +local lexer = lexer local P, S = lpeg.P, lpeg.S -local lex = lexer.new('systemd', {lex_by_line = true}) - --- Whitespace. -lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) +local lex = lexer.new(..., {lex_by_line = true}) -- Keywords. -lex:add_rule('keyword', token(lexer.KEYWORD, word_match{ +lex:add_rule('keyword', lex:tag(lexer.KEYWORD, lex:word_match(lexer.KEYWORD))) + +-- Options. +lex:add_rule('option', lex:tag(lexer.PREPROCESSOR, lex:word_match(lexer.PREPROCESSOR))) + +-- Identifiers. +lex:add_rule('identifier', + lex:tag(lexer.IDENTIFIER, (lexer.alpha + '_') * (lexer.alnum + S('_.'))^0)) + +-- Strings. +local sq_str = lexer.range("'") +local dq_str = lexer.range('"') +lex:add_rule('string', lex:tag(lexer.STRING, sq_str + dq_str)) + +-- Sections. +lex:add_rule('section', lex:tag(lexer.LABEL, '[' * lex:word_match(lexer.LABEL) * ']')) + +-- Comments. +lex:add_rule('comment', lex:tag(lexer.COMMENT, lexer.starts_line(lexer.to_eol(S(';#'))))) + +-- Numbers. +local integer = S('+-')^-1 * (lexer.hex_num + lexer.oct_num_('_') + lexer.dec_num_('_')) +lex:add_rule('number', lex:tag(lexer.NUMBER, lexer.float + integer)) + +-- Operators. +lex:add_rule('operator', lex:tag(lexer.OPERATOR, '=')) + +-- Word lists +lex:set_word_list(lexer.KEYWORD, { -- Boolean values. 'true', 'false', 'on', 'off', 'yes', 'no', -- Service types. @@ -38,10 +62,9 @@ lex:add_rule('keyword', token(lexer.KEYWORD, word_match{ 'PATH', 'LANG', 'USER', 'LOGNAME', 'HOME', 'SHELL', 'XDG_RUNTIME_DIR', 'XDG_SESSION_ID', 'XDG_SEAT', 'XDG_VTNR', 'MAINPID', 'MANAGERPID', 'LISTEN_FDS', 'LISTEN_PID', 'LISTEN_FDNAMES', 'NOTIFY_SOCKET', 'WATCHDOG_PID', 'WATCHDOG_USEC', 'TERM' -})) +}) --- Options. -lex:add_rule('option', token(lexer.PREPROCESSOR, word_match{ +lex:set_word_list(lexer.PREPROCESSOR, { -- Unit section. 'Description', 'Documentation', 'Requires', 'Requisite', 'Wants', 'BindsTo', 'PartOf', 'Conflicts', 'Before', 'After', 'OnFailure', 'PropagatesReloadTo', 'ReloadPropagatedFrom', @@ -103,30 +126,16 @@ lex:add_rule('option', token(lexer.PREPROCESSOR, word_match{ 'UtmpIdentifier', 'UtmpMode', 'SELinuxContext', 'AppArmorProfile', 'SmackProcessLabel', 'IgnoreSIGPIPE', 'NoNewPrivileges', 'SystemCallFilter', 'SystemCallErrorNumber', 'SystemCallArchitectures', 'RestrictAddressFamilies', 'Personality', 'RuntimeDirectory', - 'RuntimeDirectoryMode' -})) - --- Identifiers. -lex:add_rule('identifier', token(lexer.IDENTIFIER, (lexer.alpha + '_') * (lexer.alnum + S('_.'))^0)) - --- Strings. -local sq_str = lexer.range("'") -local dq_str = lexer.range('"') -lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) + 'RuntimeDirectoryMode', + -- Container files. + 'AddCapability', 'AutoUpdate', 'ContainerName', 'Exec', 'HostName', 'Image', 'Network', + 'PodmanArgs', 'PublishPort', 'UserNS', 'Volume' +}) --- Sections. -lex:add_rule('section', token(lexer.LABEL, '[' * - word_match('Automount BusName Install Mount Path Service Service Socket Timer Unit') * ']')) - --- Comments. -lex:add_rule('comment', token(lexer.COMMENT, lexer.starts_line(lexer.to_eol(S(';#'))))) - --- Numbers. -local integer = S('+-')^-1 * (lexer.hex_num + lexer.oct_num_('_') + lexer.dec_num_('_')) -lex:add_rule('number', token(lexer.NUMBER, lexer.float + integer)) - --- Operators. -lex:add_rule('operator', token(lexer.OPERATOR, '=')) +lex:set_word_list(lexer.LABEL, { + 'Automount', 'BusName', 'Install', 'Mount', 'Path', 'Service', 'Service', 'Socket', 'Timer', + 'Unit' +}) lexer.property['scintillua.comment'] = '#' |
