diff options
Diffstat (limited to 'lua/lexers/gleam.lua')
| -rw-r--r-- | lua/lexers/gleam.lua | 119 |
1 files changed, 119 insertions, 0 deletions
diff --git a/lua/lexers/gleam.lua b/lua/lexers/gleam.lua new file mode 100644 index 0000000..6aee725 --- /dev/null +++ b/lua/lexers/gleam.lua @@ -0,0 +1,119 @@ +-- Copyright 2021-2022 Mitchell. See LICENSE. +-- Gleam LPeg lexer +-- https://gleam.run/ +-- Contributed by Tynan Beatty + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S + +local KEY, OP = lexer.KEYWORD, lexer.OPERATOR + +local lex = lexer.new('gleam') + +-- Whitespace. +local gleam_ws = token(lexer.WHITESPACE, lexer.space^1) +lex:add_rule('whitespace', gleam_ws) + +-- Types. +local typ_tok = token(lexer.TYPE, lexer.upper * lexer.alnum^0) +lex:add_rule('type', typ_tok) + +-- Modules. +local name = (lexer.lower + '_') * (lexer.lower + lexer.digit + '_')^0 +local fn_name = token(lexer.FUNCTION, name) +local mod_name = token('module', name) +local typ_or_fn = typ_tok + fn_name +local function mod_tok(ws) + return token(KEY, 'import') * ws^1 * mod_name * (ws^0 * token(OP, '/') * ws^0 * mod_name)^0 * + (ws^1 * token(KEY, 'as') * ws^1 * mod_name)^-1 * + (ws^0 * token(OP, '.') * ws^0 * token(OP, '{') * ws^0 * typ_or_fn * + (ws^0 * token(OP, ',') * ws^0 * typ_or_fn)^0 * ws^0 * token(OP, '}'))^-1 +end +lex:add_rule('module', mod_tok(gleam_ws)) +lex:add_style('module', lexer.styles.constant) + +-- Keywords. +local key_tok = token(KEY, word_match( + 'as assert case const external fn if import let opaque pub todo try tuple type')) +lex:add_rule('keyword', key_tok) + +-- Functions. +local function fn_tok(ws) + local mod_name_op = mod_name * ws^0 * token(OP, '.') + local fn_def_call = mod_name_op^-1 * ws^0 * fn_name * ws^0 * #P('(') + local fn_pipe = token(OP, '|>') * ws^0 * (token(KEY, 'fn') + mod_name_op^-1 * fn_name) + return fn_def_call + fn_pipe +end +lex:add_rule('function', fn_tok(gleam_ws)) + +-- Labels. +local id = token(lexer.IDENTIFIER, name) +local function lab_tok(ws) + return token(OP, S('(,')) * ws^0 * token(lexer.LABEL, name) * #(ws^1 * id) +end +lex:add_rule('label', lab_tok(gleam_ws)) + +-- Identifiers. +local discard_id = token('discard', '_' * name) +local id_tok = discard_id + id +lex:add_rule('identifier', id_tok) +lex:add_style('discard', lexer.styles.comment) + +-- Strings. +local str_tok = token(lexer.STRING, lexer.range('"')) +lex:add_rule('string', str_tok) + +-- Comments. +local com_tok = token(lexer.COMMENT, lexer.to_eol('//')) +lex:add_rule('comment', com_tok) + +-- Numbers. +local function can_neg(patt) return (lpeg.B(lexer.space + S('+-/*%<>=&|:,.')) * '-')^-1 * patt end +local function can_sep(patt) return (P('_')^-1 * patt^1)^1 end +local dec = lexer.digit * can_sep(lexer.digit)^0 +local float = dec * '.' * dec^0 +local bin = '0' * S('bB') * can_sep(S('01')) * -lexer.xdigit +local oct = '0' * S('oO') * can_sep(lpeg.R('07')) +local hex = '0' * S('xX') * can_sep(lexer.xdigit) +local num_tok = token(lexer.NUMBER, can_neg(float) + bin + oct + hex + can_neg(dec)) +lex:add_rule('number', num_tok) + +-- Operators. +local op_tok = token(OP, S('+-*/%#!=<>&|.,:;{}[]()')) +lex:add_rule('operator', op_tok) + +-- Errors. +local err_tok = token(lexer.ERROR, lexer.any) +lex:add_rule('error', err_tok) + +-- Fold points. +lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('//')) +lex:add_fold_point(lexer.OPERATOR, '{', '}') +lex:add_fold_point(lexer.OPERATOR, '[', ']') +lex:add_fold_point(lexer.OPERATOR, '(', ')') + +-- Embedded Bit Strings. +-- Mimic lexer.load() by creating a bitstring-specific whitespace style. +local bitstring = lexer.new(lex._NAME .. '_bitstring') +local bitstring_ws = token(bitstring._NAME .. '_whitespace', lexer.space^1) +bitstring:add_rule('whitespace', bitstring_ws) +bitstring:add_style(bitstring._NAME .. '_whitespace', lexer.styles.whitespace) +bitstring:add_rule('type', typ_tok) +bitstring:add_rule('module', mod_tok(bitstring_ws)) +bitstring:add_rule('keyword', key_tok + token(KEY, word_match{ + 'binary', 'bytes', 'int', 'float', 'bit_string', 'bits', 'utf8', 'utf16', 'utf32', + 'utf8_codepoint', 'utf16_codepoint', 'utf32_codepoint', 'signed', 'unsigned', 'big', 'little', + 'native', 'unit', 'size' +})) +bitstring:add_rule('function', fn_tok(bitstring_ws)) +bitstring:add_rule('label', lab_tok(bitstring_ws)) +bitstring:add_rule('identifier', id_tok) +bitstring:add_rule('string', str_tok) +bitstring:add_rule('comment', com_tok) +bitstring:add_rule('number', num_tok) +bitstring:add_rule('operator', op_tok) +bitstring:add_rule('error', err_tok) +lex:embed(bitstring, token(OP, '<<'), token(OP, '>>')) + +return lex |
