aboutsummaryrefslogtreecommitdiff
path: root/lua/lexers/troff.lua
blob: ab17d08db15b7505b6e3bb036252f3be78116f21 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
-- Copyright 2023-2024 Mitchell. See LICENSE.
-- troff/man LPeg lexer.
-- Based on original Man lexer by David B. Lamkins and modified by Eolien55.

local lexer = lexer
local P, R, S = lpeg.P, lpeg.R, lpeg.S

local lex = lexer.new(...)

-- Registers and groff's structured programming.
lex:add_rule('keywords', lex:tag(lexer.KEYWORD, (lexer.starts_line('.') * (lexer.space - '\n')^0 *
	(P('while') + 'break' + 'continue' + 'nr' + 'rr' + 'rnn' + 'aln' + '\\}')) + '\\{'))

-- Markup.
lex:add_rule('escape_sequences', lex:tag(lexer.VARIABLE,
	'\\' * (('s' * S('+-')^-1) + S('*fgmnYV'))^-1 * (P('(') * 2 + lexer.range('[', ']') + 1)))

lex:add_rule('headings', lex:tag(lexer.NUMBER,
	lexer.starts_line('.') * (lexer.space - '\n')^0 * (S('STN') * 'H') * (lexer.space - '\n') *
		lexer.nonnewline^0))
lex:add_rule('man_alignment', lex:tag(lexer.KEYWORD,
	lexer.starts_line('.') * (lexer.space - '\n')^0 * (P('br') + 'DS' + 'RS' + 'RE' + 'PD' + 'PP') *
		lexer.space))
lex:add_rule('font', lex:tag(lexer.VARIABLE,
	lexer.starts_line('.') * (lexer.space - '\n')^0 * ('B' * P('R')^-1 + 'I' * S('PR')^-1) *
		lexer.space))

-- Lowercase troff macros are plain macros (like .so or .nr).
lex:add_rule('troff_plain_macros', lex:tag(lexer.VARIABLE, lexer.starts_line('.') *
	(lexer.space - '\n')^0 * lexer.lower^1))
lex:add_rule('any_macro', lex:tag(lexer.PREPROCESSOR,
	lexer.starts_line('.') * (lexer.space - '\n')^0 * (lexer.any - lexer.space)^0))
lex:add_rule('comment', lex:tag(lexer.COMMENT,
	(lexer.starts_line('.\\"') + '\\"' + '\\#') * lexer.nonnewline^0))
lex:add_rule('string', lex:tag(lexer.STRING, lexer.range('"', true)))

-- Usually used by eqn, and mandoc in some way.
lex:add_rule('in_dollars', lex:tag(lexer.EMBEDDED, lexer.range('$', false, false)))

-- TODO: a lexer for each preprocessor?

return lex