diff options
Diffstat (limited to 'lua/lexers/ansi_c.lua')
| -rw-r--r-- | lua/lexers/ansi_c.lua | 213 |
1 files changed, 78 insertions, 135 deletions
diff --git a/lua/lexers/ansi_c.lua b/lua/lexers/ansi_c.lua index 0235e46..68aba5d 100644 --- a/lua/lexers/ansi_c.lua +++ b/lua/lexers/ansi_c.lua @@ -1,154 +1,97 @@ --- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE. +-- Copyright 2006-2022 Mitchell. See LICENSE. -- C LPeg lexer. -local l = require('lexer') -local token, word_match = l.token, l.word_match -local P, R, S = lpeg.P, lpeg.R, lpeg.S +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S -local M = {_NAME = 'ansi_c'} +local lex = lexer.new('ansi_c') -- Whitespace. -local ws = token(l.WHITESPACE, l.space^1) - --- Comments. -local line_comment = '//' * l.nonnewline_esc^0 -local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1 --- local preproc_ifzero = l.starts_line('#if') * S(' \t')^0 * '0' * l.space * --- (l.starts_line('#endif')) -local comment = token(l.COMMENT, line_comment + block_comment) - --- Strings. -local sq_str = P('L')^-1 * l.delimited_range("'", true) -local dq_str = P('L')^-1 * l.delimited_range('"', true) -local string = token(l.STRING, sq_str + dq_str) - --- Numbers. -local float_suffix = P('f')^-1 -local integer_suffix = (S('uU')^-1 * word_match{ 'l', 'L', 'll', 'LL' }^-1) + - (word_match{ 'l', 'L', 'll', 'LL' }^-1 * S('uU')^-1) -local number = token(l.NUMBER, (l.float * float_suffix) + - (l.integer * integer_suffix)) - --- Preprocessor. -local preproc_word = word_match{ - 'define', 'elif', 'else', 'endif', 'error', 'if', 'ifdef', 'ifndef', 'line', - 'pragma', 'undef', 'warning' -} - -local preproc = #l.starts_line('#') * - (token(l.PREPROCESSOR, '#' * S('\t ')^0 * preproc_word) + - token(l.PREPROCESSOR, '#' * S('\t ')^0 * 'include') * - (token(l.WHITESPACE, S('\t ')^0) * - token(l.STRING, l.delimited_range('<>', true, true)))^-1) +local ws = token(lexer.WHITESPACE, lexer.space^1) +lex:add_rule('whitespace', ws) -- Keywords. -local storage_class = word_match{ - -- C11 6.7.1 - 'typedef', 'extern', 'static', '_Thread_local', 'auto', 'register', -} - -local type_qualifier = word_match{ - -- C11 6.7.3 - 'const', 'restrict', 'volatile', '_Atomic', -} - -local function_specifier = word_match{ - -- C11 6.7.4 - 'inline', '_Noreturn', -} - -local extra_keywords = word_match{ - 'asm', '__asm', '__asm__', '__restrict__', '__inline', '__inline__', - '__attribute__', '__declspec' -} +lex:add_rule('keyword', token(lexer.KEYWORD, word_match{ + 'auto', 'break', 'case', 'const', 'continue', 'default', 'do', 'else', 'enum', 'extern', 'for', + 'goto', 'if', 'inline', 'register', 'restrict', 'return', 'sizeof', 'static', 'switch', 'typedef', + 'volatile', 'while', + -- C99. + 'false', 'true', + -- C11. + '_Alignas', '_Alignof', '_Atomic', '_Generic', '_Noreturn', '_Static_assert', '_Thread_local', + -- Compiler. + 'asm', '__asm', '__asm__', '__restrict__', '__inline', '__inline__', '__attribute__', '__declspec' +})) -local keyword = token(l.KEYWORD, word_match{ - 'break', 'case', 'continue', 'default', 'do', 'else', 'enum', 'for', 'goto', - 'if', 'return', 'switch', 'while', - '_Alignas', '_Generic', '_Static_assert', -} + storage_class + type_qualifier + function_specifier + extra_keywords) +-- Types. +lex:add_rule('type', token(lexer.TYPE, word_match{ + 'bool', 'char', 'double', 'float', 'int', 'long', 'short', 'signed', 'struct', 'union', + 'unsigned', 'void', '_Bool', '_Complex', '_Imaginary', + -- Stdlib types. + 'ptrdiff_t', 'size_t', 'max_align_t', 'wchar_t', 'intptr_t', 'uintptr_t', 'intmax_t', 'uintmax_t' +} + P('u')^-1 * 'int' * (P('_least') + '_fast')^-1 * lexer.digit^1 * '_t')) -- Constants. -local errno = word_match{ - -- http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html - 'E2BIG', 'EACCES', 'EADDRINUSE', 'EADDRNOTAVAIL', 'EAFNOSUPPORT', - 'EAGAIN', 'EALREADY', 'EBADF', 'EBADMSG', 'EBUSY', 'ECANCELED', 'ECHILD', - 'ECONNABORTED', 'ECONNREFUSED', 'ECONNRESET', 'EDEADLK', 'EDESTADDRREQ', - 'EDOM', 'EDQUOT', 'EEXIST', 'EFAULT', 'EFBIG', 'EHOSTUNREACH', 'EIDRM', - 'EILSEQ', 'EINPROGRESS', 'EINTR', 'EINVAL', 'EIO', 'EISCONN', 'EISDIR', - 'ELOOP', 'EMFILE', 'EMLINK', 'EMSGSIZE', 'EMULTIHOP', 'ENAMETOOLONG', - 'ENETDOWN', 'ENETRESET', 'ENETUNREACH', 'ENFILE', 'ENOBUFS', 'ENODATA', - 'ENODEV', 'ENOENT', 'ENOEXEC', 'ENOLCK', 'ENOLINK', 'ENOMEM', - 'ENOMSG', 'ENOPROTOOPT', 'ENOSPC', 'ENOSR', 'ENOSTR', 'ENOSYS', - 'ENOTCONN', 'ENOTDIR', 'ENOTEMPTY', 'ENOTRECOVERABLE', 'ENOTSOCK', - 'ENOTSUP', 'ENOTTY', 'ENXIO', 'EOPNOTSUPP', 'EOVERFLOW', 'EOWNERDEAD', - 'EPERM', 'EPIPE', 'EPROTO', 'EPROTONOSUPPORT', 'EPROTOTYPE', 'ERANGE', - 'EROFS', 'ESPIPE', 'ESRCH', 'ESTALE', 'ETIME', 'ETIMEDOUT', 'ETXTBSY', - 'EWOULDBLOCK', 'EXDEV', -} +lex:add_rule('constants', token(lexer.CONSTANT, word_match{ + 'NULL', + -- Preprocessor. + '__DATE__', '__FILE__', '__LINE__', '__TIME__', '__func__', + -- errno.h. + 'E2BIG', 'EACCES', 'EADDRINUSE', 'EADDRNOTAVAIL', 'EAFNOSUPPORT', 'EAGAIN', 'EALREADY', 'EBADF', + 'EBADMSG', 'EBUSY', 'ECANCELED', 'ECHILD', 'ECONNABORTED', 'ECONNREFUSED', 'ECONNRESET', + 'EDEADLK', 'EDESTADDRREQ', 'EDOM', 'EDQUOT', 'EEXIST', 'EFAULT', 'EFBIG', 'EHOSTUNREACH', 'EIDRM', + 'EILSEQ', 'EINPROGRESS', 'EINTR', 'EINVAL', 'EIO', 'EISCONN', 'EISDIR', 'ELOOP', 'EMFILE', + 'EMLINK', 'EMSGSIZE', 'EMULTIHOP', 'ENAMETOOLONG', 'ENETDOWN', 'ENETRESET', 'ENETUNREACH', + 'ENFILE', 'ENOBUFS', 'ENODATA', 'ENODEV', 'ENOENT', 'ENOEXEC', 'ENOLCK', 'ENOLINK', 'ENOMEM', + 'ENOMSG', 'ENOPROTOOPT', 'ENOSPC', 'ENOSR', 'ENOSTR', 'ENOSYS', 'ENOTCONN', 'ENOTDIR', + 'ENOTEMPTY', 'ENOTRECOVERABLE', 'ENOTSOCK', 'ENOTSUP', 'ENOTTY', 'ENXIO', 'EOPNOTSUPP', + 'EOVERFLOW', 'EOWNERDEAD', 'EPERM', 'EPIPE', 'EPROTO', 'EPROTONOSUPPORT', 'EPROTOTYPE', 'ERANGE', + 'EROFS', 'ESPIPE', 'ESRCH', 'ESTALE', 'ETIME', 'ETIMEDOUT', 'ETXTBSY', 'EWOULDBLOCK', 'EXDEV', + -- stdint.h. + 'PTRDIFF_MIN', 'PTRDIFF_MAX', 'SIZE_MAX', 'SIG_ATOMIC_MIN', 'SIG_ATOMIC_MAX', 'WINT_MIN', + 'WINT_MAX', 'WCHAR_MIN', 'WCHAR_MAX' +} + P('U')^-1 * 'INT' * ((P('_LEAST') + '_FAST')^-1 * lexer.digit^1 + 'PTR' + 'MAX') * + (P('_MIN') + '_MAX'))) -local preproc_macros = word_match{ - -- C11 6.10.8.1 Mandatory macros - '__DATE__', '__FILE__', '__LINE__', '__TIME__', - -- C11 6.4.2.2 Predefined identifiers - '__func__', -} +-- Labels. +lex:add_rule('label', token(lexer.LABEL, lexer.starts_line(lexer.word * ':'))) -local constant = token(l.CONSTANT, word_match{ - 'true', 'false', - 'NULL', 'CHAR_BIT', 'SIZE_MAX', } + - ((P('WINT') + P('WCHAR') + P('SIG_ATOMIC') + P('PTRDIFF')) * (P('_MIN') + P('_MAX'))) + - ( P('INT') * (((P('_LEAST') + P('_FAST'))^-1 * l.dec_num^1) + P('MAX') + P('PTR')) * (P('_MIN') + P('_MAX'))) + - (P('UINT') * (((P('_LEAST') + P('_FAST'))^-1 * l.dec_num^1) + P('MAX') + P('PTR')) * P('_MAX')) + - errno + preproc_macros -) +-- Strings. +local sq_str = P('L')^-1 * lexer.range("'", true) +local dq_str = P('L')^-1 * lexer.range('"', true) +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) --- Types. -local type = token(l.TYPE, word_match{ - 'bool', 'char', 'double', 'float', 'int', 'long', 'short', - 'signed', 'struct', 'union', 'unsigned', 'void', '_Bool', '_Complex', - '_Imaginary', 'ptrdiff_t', 'size_t', 'max_align_t', 'wchar_t', - 'intptr_t', 'uintptr_t', 'intmax_t', 'uintmax_t'} + - (P('u')^-1 * P('int') * (P('_least') + P('_fast'))^-1 * l.dec_num^1 * P('_t')) + - (S('usif') * l.dec_num^1 * P('_t')) + - (P('__')^-1 * S('usif') * l.dec_num^1) -) +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) --- Labels. --- FIXME: Accept whitespace before label. -local label = token(l.LABEL, l.starts_line(l.word * ':')) +-- Comments. +local line_comment = lexer.to_eol('//', true) +local block_comment = lexer.range('/*', '*/') + + lexer.range('#if' * S(' \t')^0 * '0' * lexer.space, '#endif') +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) --- Identifiers. -local identifier = token(l.IDENTIFIER, l.word) +-- Numbers. +local integer = lexer.integer * word_match('u l ll ul ull lu llu', true)^-1 +local float = lexer.float * P('f')^-1 +lex:add_rule('number', token(lexer.NUMBER, float + integer)) --- Operators. -local operator = token(l.OPERATOR, - S('+-/*%<>~!=^&|?~:;,.()[]{}') + - word_match{ 'sizeof', '_Alignof' } -) +-- Preprocessor. +local include = token(lexer.PREPROCESSOR, '#' * S('\t ')^0 * 'include') * + (ws * token(lexer.STRING, lexer.range('<', '>', true)))^-1 +local preproc = token(lexer.PREPROCESSOR, '#' * S('\t ')^0 * + word_match('define elif else endif if ifdef ifndef line pragma undef')) +lex:add_rule('preprocessor', include + preproc) -M._rules = { - {'whitespace', ws}, - {'comment', comment}, - {'keyword', keyword}, - {'type', type}, - {'constant', constant}, - {'operator', operator}, - {'label', label}, - {'identifier', identifier}, - {'string', string}, - {'number', number}, - {'preproc', preproc}, -} +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>~!=^&|?~:;,.()[]{}'))) -M._foldsymbols = { - _patterns = {'#?%l+', '[{}]', '/%*', '%*/', '//'}, - [l.PREPROCESSOR] = {['if'] = 1, ifdef = 1, ifndef = 1, endif = -1}, - [l.OPERATOR] = {['{'] = 1, ['}'] = -1}, - [l.COMMENT] = { - ['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//'), - ['#if'] = 1, ['#endif'] = -1 - } -} +-- Fold points. +lex:add_fold_point(lexer.PREPROCESSOR, '#if', '#endif') +lex:add_fold_point(lexer.PREPROCESSOR, '#ifdef', '#endif') +lex:add_fold_point(lexer.PREPROCESSOR, '#ifndef', '#endif') +lex:add_fold_point(lexer.OPERATOR, '{', '}') +lex:add_fold_point(lexer.COMMENT, '/*', '*/') +lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('//')) -return M +return lex |
