1 files changed, 78 insertions, 135 deletions
diff --git a/lua/lexers/ansi_c.lua b/lua/lexers/ansi_c.lua
index 0235e46..68aba5d 100644
--- a/lua/lexers/ansi_c.lua
+++ b/lua/lexers/ansi_c.lua
@@ -1,154 +1,97 @@
--- Copyright 2006-2017 Mitchell mitchell.att.foicica.com. See LICENSE.
+-- Copyright 2006-2022 Mitchell. See LICENSE.
 -- C LPeg lexer.
 
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, S = lpeg.P, lpeg.S
 
-local M = {_NAME = 'ansi_c'}
+local lex = lexer.new('ansi_c')
 
 -- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local line_comment = '//' * l.nonnewline_esc^0
-local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1
--- local preproc_ifzero = l.starts_line('#if') * S(' \t')^0 * '0' * l.space *
---                       (l.starts_line('#endif'))
-local comment = token(l.COMMENT, line_comment + block_comment)
-
--- Strings.
-local sq_str = P('L')^-1 * l.delimited_range("'", true)
-local dq_str = P('L')^-1 * l.delimited_range('"', true)
-local string = token(l.STRING, sq_str + dq_str)
-
--- Numbers.
-local float_suffix = P('f')^-1
-local integer_suffix = (S('uU')^-1 * word_match{ 'l', 'L', 'll', 'LL' }^-1) +
-                       (word_match{ 'l', 'L', 'll', 'LL' }^-1 * S('uU')^-1)
-local number = token(l.NUMBER, (l.float * float_suffix) +
-                               (l.integer * integer_suffix))
-
--- Preprocessor.
-local preproc_word = word_match{
-  'define', 'elif', 'else', 'endif', 'error', 'if', 'ifdef', 'ifndef', 'line',
-  'pragma', 'undef', 'warning'
-}
-
-local preproc = #l.starts_line('#') *
-                (token(l.PREPROCESSOR, '#' * S('\t ')^0 * preproc_word) +
-                 token(l.PREPROCESSOR, '#' * S('\t ')^0 * 'include') *
-                 (token(l.WHITESPACE, S('\t ')^0) *
-                  token(l.STRING, l.delimited_range('<>', true, true)))^-1)
+local ws = token(lexer.WHITESPACE, lexer.space^1)
+lex:add_rule('whitespace', ws)
 
 -- Keywords.
-local storage_class = word_match{
-  -- C11 6.7.1
-  'typedef', 'extern', 'static', '_Thread_local', 'auto', 'register',
-}
-
-local type_qualifier = word_match{
-  -- C11 6.7.3
-  'const', 'restrict', 'volatile', '_Atomic',
-}
-
-local function_specifier = word_match{
-  -- C11 6.7.4
-  'inline', '_Noreturn',
-}
-
-local extra_keywords = word_match{
-   'asm', '__asm', '__asm__', '__restrict__', '__inline', '__inline__',
-   '__attribute__', '__declspec'
-}
+lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
+  'auto', 'break', 'case', 'const', 'continue', 'default', 'do', 'else', 'enum', 'extern', 'for',
+  'goto', 'if', 'inline', 'register', 'restrict', 'return', 'sizeof', 'static', 'switch', 'typedef',
+  'volatile', 'while',
+  -- C99.
+  'false', 'true',
+  -- C11.
+  '_Alignas', '_Alignof', '_Atomic', '_Generic', '_Noreturn', '_Static_assert', '_Thread_local',
+  -- Compiler.
+  'asm', '__asm', '__asm__', '__restrict__', '__inline', '__inline__', '__attribute__', '__declspec'
+}))
 
-local keyword = token(l.KEYWORD, word_match{
-  'break', 'case', 'continue', 'default', 'do', 'else', 'enum', 'for', 'goto',
-  'if', 'return', 'switch', 'while',
-  '_Alignas', '_Generic', '_Static_assert',
-} + storage_class + type_qualifier + function_specifier + extra_keywords)
+-- Types.
+lex:add_rule('type', token(lexer.TYPE, word_match{
+  'bool', 'char', 'double', 'float', 'int', 'long', 'short', 'signed', 'struct', 'union',
+  'unsigned', 'void', '_Bool', '_Complex', '_Imaginary',
+  -- Stdlib types.
+  'ptrdiff_t', 'size_t', 'max_align_t', 'wchar_t', 'intptr_t', 'uintptr_t', 'intmax_t', 'uintmax_t'
+} + P('u')^-1 * 'int' * (P('_least') + '_fast')^-1 * lexer.digit^1 * '_t'))
 
 -- Constants.
-local errno = word_match{
-  -- http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html
-  'E2BIG', 'EACCES', 'EADDRINUSE', 'EADDRNOTAVAIL', 'EAFNOSUPPORT', 
-  'EAGAIN', 'EALREADY', 'EBADF', 'EBADMSG', 'EBUSY', 'ECANCELED', 'ECHILD',
-  'ECONNABORTED', 'ECONNREFUSED', 'ECONNRESET', 'EDEADLK', 'EDESTADDRREQ',
-  'EDOM', 'EDQUOT', 'EEXIST', 'EFAULT', 'EFBIG', 'EHOSTUNREACH', 'EIDRM',
-  'EILSEQ', 'EINPROGRESS', 'EINTR', 'EINVAL', 'EIO', 'EISCONN', 'EISDIR',
-  'ELOOP', 'EMFILE', 'EMLINK', 'EMSGSIZE', 'EMULTIHOP', 'ENAMETOOLONG',
-  'ENETDOWN', 'ENETRESET', 'ENETUNREACH', 'ENFILE', 'ENOBUFS', 'ENODATA',
-  'ENODEV', 'ENOENT', 'ENOEXEC', 'ENOLCK', 'ENOLINK', 'ENOMEM',
-  'ENOMSG', 'ENOPROTOOPT', 'ENOSPC', 'ENOSR', 'ENOSTR', 'ENOSYS',
-  'ENOTCONN', 'ENOTDIR', 'ENOTEMPTY', 'ENOTRECOVERABLE', 'ENOTSOCK',
-  'ENOTSUP', 'ENOTTY', 'ENXIO', 'EOPNOTSUPP', 'EOVERFLOW', 'EOWNERDEAD',
-  'EPERM', 'EPIPE', 'EPROTO', 'EPROTONOSUPPORT', 'EPROTOTYPE', 'ERANGE',
-  'EROFS', 'ESPIPE', 'ESRCH', 'ESTALE', 'ETIME', 'ETIMEDOUT', 'ETXTBSY',
-  'EWOULDBLOCK', 'EXDEV',
-}
+lex:add_rule('constants', token(lexer.CONSTANT, word_match{
+  'NULL',
+  -- Preprocessor.
+  '__DATE__', '__FILE__', '__LINE__', '__TIME__', '__func__',
+  -- errno.h.
+  'E2BIG', 'EACCES', 'EADDRINUSE', 'EADDRNOTAVAIL', 'EAFNOSUPPORT', 'EAGAIN', 'EALREADY', 'EBADF',
+  'EBADMSG', 'EBUSY', 'ECANCELED', 'ECHILD', 'ECONNABORTED', 'ECONNREFUSED', 'ECONNRESET',
+  'EDEADLK', 'EDESTADDRREQ', 'EDOM', 'EDQUOT', 'EEXIST', 'EFAULT', 'EFBIG', 'EHOSTUNREACH', 'EIDRM',
+  'EILSEQ', 'EINPROGRESS', 'EINTR', 'EINVAL', 'EIO', 'EISCONN', 'EISDIR', 'ELOOP', 'EMFILE',
+  'EMLINK', 'EMSGSIZE', 'EMULTIHOP', 'ENAMETOOLONG', 'ENETDOWN', 'ENETRESET', 'ENETUNREACH',
+  'ENFILE', 'ENOBUFS', 'ENODATA', 'ENODEV', 'ENOENT', 'ENOEXEC', 'ENOLCK', 'ENOLINK', 'ENOMEM',
+  'ENOMSG', 'ENOPROTOOPT', 'ENOSPC', 'ENOSR', 'ENOSTR', 'ENOSYS', 'ENOTCONN', 'ENOTDIR',
+  'ENOTEMPTY', 'ENOTRECOVERABLE', 'ENOTSOCK', 'ENOTSUP', 'ENOTTY', 'ENXIO', 'EOPNOTSUPP',
+  'EOVERFLOW', 'EOWNERDEAD', 'EPERM', 'EPIPE', 'EPROTO', 'EPROTONOSUPPORT', 'EPROTOTYPE', 'ERANGE',
+  'EROFS', 'ESPIPE', 'ESRCH', 'ESTALE', 'ETIME', 'ETIMEDOUT', 'ETXTBSY', 'EWOULDBLOCK', 'EXDEV',
+  -- stdint.h.
+  'PTRDIFF_MIN', 'PTRDIFF_MAX', 'SIZE_MAX', 'SIG_ATOMIC_MIN', 'SIG_ATOMIC_MAX', 'WINT_MIN',
+  'WINT_MAX', 'WCHAR_MIN', 'WCHAR_MAX'
+} + P('U')^-1 * 'INT' * ((P('_LEAST') + '_FAST')^-1 * lexer.digit^1 + 'PTR' + 'MAX') *
+  (P('_MIN') + '_MAX')))
 
-local preproc_macros = word_match{
-  -- C11 6.10.8.1 Mandatory macros
-  '__DATE__', '__FILE__', '__LINE__', '__TIME__',
-  -- C11 6.4.2.2 Predefined identifiers
-  '__func__',
-}
+-- Labels.
+lex:add_rule('label', token(lexer.LABEL, lexer.starts_line(lexer.word * ':')))
 
-local constant = token(l.CONSTANT, word_match{
-  'true', 'false',
-  'NULL', 'CHAR_BIT', 'SIZE_MAX', } +
-  ((P('WINT') + P('WCHAR') + P('SIG_ATOMIC') + P('PTRDIFF')) * (P('_MIN') + P('_MAX'))) +
-  ( P('INT') * (((P('_LEAST') + P('_FAST'))^-1 * l.dec_num^1) + P('MAX') + P('PTR')) * (P('_MIN') + P('_MAX'))) +
-  (P('UINT') * (((P('_LEAST') + P('_FAST'))^-1 * l.dec_num^1) + P('MAX') + P('PTR')) *  P('_MAX')) +
-  errno + preproc_macros
-)
+-- Strings.
+local sq_str = P('L')^-1 * lexer.range("'", true)
+local dq_str = P('L')^-1 * lexer.range('"', true)
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
 
--- Types.
-local type = token(l.TYPE, word_match{
-  'bool', 'char', 'double', 'float', 'int', 'long', 'short',
-  'signed', 'struct', 'union', 'unsigned', 'void', '_Bool', '_Complex',
-  '_Imaginary', 'ptrdiff_t', 'size_t', 'max_align_t', 'wchar_t',
-  'intptr_t', 'uintptr_t', 'intmax_t', 'uintmax_t'} +
-  (P('u')^-1 * P('int') * (P('_least') + P('_fast'))^-1 * l.dec_num^1 * P('_t')) +
-  (S('usif') * l.dec_num^1 * P('_t')) +
-  (P('__')^-1 * S('usif') * l.dec_num^1)
-)
+-- Identifiers.
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
 
--- Labels.
--- FIXME: Accept whitespace before label.
-local label = token(l.LABEL, l.starts_line(l.word * ':'))
+-- Comments.
+local line_comment = lexer.to_eol('//', true)
+local block_comment = lexer.range('/*', '*/') +
+  lexer.range('#if' * S(' \t')^0 * '0' * lexer.space, '#endif')
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
 
--- Identifiers.
-local identifier = token(l.IDENTIFIER, l.word)
+-- Numbers.
+local integer = lexer.integer * word_match('u l ll ul ull lu llu', true)^-1
+local float = lexer.float * P('f')^-1
+lex:add_rule('number', token(lexer.NUMBER, float + integer))
 
--- Operators.
-local operator = token(l.OPERATOR,
-  S('+-/*%<>~!=^&|?~:;,.()[]{}') +
-  word_match{ 'sizeof', '_Alignof' }
-)
+-- Preprocessor.
+local include = token(lexer.PREPROCESSOR, '#' * S('\t ')^0 * 'include') *
+  (ws * token(lexer.STRING, lexer.range('<', '>', true)))^-1
+local preproc = token(lexer.PREPROCESSOR, '#' * S('\t ')^0 *
+  word_match('define elif else endif if ifdef ifndef line pragma undef'))
+lex:add_rule('preprocessor', include + preproc)
 
-M._rules = {
-  {'whitespace', ws},
-  {'comment', comment},
-  {'keyword', keyword},
-  {'type', type},
-  {'constant', constant},
-  {'operator', operator},
-  {'label', label},
-  {'identifier', identifier},
-  {'string', string},
-  {'number', number},
-  {'preproc', preproc},
-}
+-- Operators.
+lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>~!=^&|?~:;,.()[]{}')))
 
-M._foldsymbols = {
-  _patterns = {'#?%l+', '[{}]', '/%*', '%*/', '//'},
-  [l.PREPROCESSOR] = {['if'] = 1, ifdef = 1, ifndef = 1, endif = -1},
-  [l.OPERATOR] = {['{'] = 1, ['}'] = -1},
-  [l.COMMENT] = {
-    ['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//'),
-    ['#if'] = 1, ['#endif'] = -1
-  }
-}
+-- Fold points.
+lex:add_fold_point(lexer.PREPROCESSOR, '#if', '#endif')
+lex:add_fold_point(lexer.PREPROCESSOR, '#ifdef', '#endif')
+lex:add_fold_point(lexer.PREPROCESSOR, '#ifndef', '#endif')
+lex:add_fold_point(lexer.OPERATOR, '{', '}')
+lex:add_fold_point(lexer.COMMENT, '/*', '*/')
+lex:add_fold_point(lexer.COMMENT, lexer.fold_consecutive_lines('//'))
 
-return M
+return lex