diff options
| author | Marc André Tanner <mat@brain-dump.org> | 2016-12-12 12:02:20 +0100 |
|---|---|---|
| committer | Marc André Tanner <mat@brain-dump.org> | 2017-01-19 21:48:59 +0100 |
| commit | 53f84f7cbafcb177406f8f7bcc890e626e72ca63 (patch) | |
| tree | 37d5cbd337eb3fed871e04c20e904165dbef8d87 | |
| parent | 242f22f2ef7aeb14c36e54d7d44d3fd4e51a5d06 (diff) | |
| download | vis-53f84f7cbafcb177406f8f7bcc890e626e72ca63.tar.gz vis-53f84f7cbafcb177406f8f7bcc890e626e72ca63.tar.xz | |
text-regex: add regex backend based on libtre
While memory consumption should be improved, backward searches
will still be slow, because they are implemented in terms of
repeated forward searches. It needs to be investigated whether
the underlying automaton can have its transitions reversed and
essentially run backwards, as is the case in sam.
| -rw-r--r-- | Makefile | 12 | ||||
| -rw-r--r-- | README.md | 1 | ||||
| -rwxr-xr-x | configure | 52 | ||||
| -rw-r--r-- | text-regex-tre.c | 112 | ||||
| -rw-r--r-- | text-regex.h | 5 | ||||
| -rw-r--r-- | vis-cmds.c | 1 |
6 files changed, 179 insertions, 4 deletions
@@ -1,9 +1,11 @@ -include config.mk +REGEX_SRC ?= text-regex.c + SRC = array.c buffer.c libutf.c main.c map.c register.c ring-buffer.c \ - sam.c text.c text-motions.c text-objects.c text-regex.c text-util.c \ + sam.c text.c text-motions.c text-objects.c text-util.c \ ui-curses.c view.c vis.c vis-lua.c vis-modes.c vis-motions.c \ - vis-operators.c vis-prompt.c vis-text-objects.c + vis-operators.c vis-prompt.c vis-text-objects.c $(REGEX_SRC) EXECUTABLES = vis vis-clipboard vis-complete vis-menu vis-open @@ -19,6 +21,7 @@ VERSION = $(shell git describe --always --dirty 2>/dev/null || echo "0.2-git") CONFIG_LUA ?= 1 CONFIG_LPEG ?= 0 +CONFIG_TRE ?= 0 CONFIG_ACL ?= 0 CONFIG_SELINUX ?= 0 @@ -27,16 +30,17 @@ CFLAGS_STD += -DVERSION=\"${VERSION}\" LDFLAGS_STD ?= -lc CFLAGS_VIS = $(CFLAGS_AUTO) $(CFLAGS_TERMKEY) $(CFLAGS_CURSES) $(CFLAGS_ACL) \ - $(CFLAGS_SELINUX) $(CFLAGS_LUA) $(CFLAGS_LPEG) $(CFLAGS_STD) + $(CFLAGS_SELINUX) $(CFLAGS_TRE) $(CFLAGS_LUA) $(CFLAGS_LPEG) $(CFLAGS_STD) CFLAGS_VIS += -DVIS_PATH=\"${SHAREPREFIX}/vis\" CFLAGS_VIS += -DCONFIG_LUA=${CONFIG_LUA} CFLAGS_VIS += -DCONFIG_LPEG=${CONFIG_LPEG} +CFLAGS_VIS += -DCONFIG_TRE=${CONFIG_TRE} CFLAGS_VIS += -DCONFIG_SELINUX=${CONFIG_SELINUX} CFLAGS_VIS += -DCONFIG_ACL=${CONFIG_ACL} LDFLAGS_VIS = $(LDFLAGS_AUTO) $(LDFLAGS_TERMKEY) $(LDFLAGS_CURSES) $(LDFLAGS_ACL) \ - $(LDFLAGS_SELINUX) $(LDFLAGS_LUA) $(LDFLAGS_LPEG) $(LDFLAGS_STD) + $(LDFLAGS_SELINUX) $(LDFLAGS_TRE) $(LDFLAGS_LUA) $(LDFLAGS_LPEG) $(LDFLAGS_STD) STRIP?=strip @@ -60,6 +60,7 @@ compatible environment as well as: * [Lua](http://www.lua.org/) >= 5.2 (optional) * [LPeg](http://www.inf.puc-rio.br/~roberto/lpeg/) >= 0.12 (optional runtime dependency required for syntax highlighting) + * [TRE](http://laurikari.net/tre/) (optional for more memory efficient regex search) Assuming these dependencies are met, execute: @@ -25,6 +25,7 @@ Fine tuning of the installation directories: Optional features: --enable-lua build with Lua support [auto] --enable-lpeg build with support for statically linking to LPeg [auto] + --enable-tre build with TRE regex support [auto] --enable-selinux build with SELinux support [auto] --enable-acl build with POSIX ACL support [auto] @@ -115,6 +116,7 @@ MANDIR='$(PREFIX)/share/man' lua=auto lpeg=auto +tre=auto selinux=auto acl=auto @@ -133,6 +135,8 @@ case "$arg" in --disable-lua|--enable-lua=no) lua=no ;; --enable-lpeg|--enable-lpeg=yes) lpeg=yes ;; --disable-lpeg|--enable-lpeg=no) lpeg=no ;; +--enable-tre|--enable-tre=yes) tre=yes ;; +--disable-tre|--enable-tre=no) tre=no ;; --enable-selinux|--enable-selinux=yes) selinux=yes ;; --disable-selinux|--enable-selinux=no) selinux=no ;; --enable-acl|--enable-acl=yes) acl=yes ;; @@ -353,6 +357,50 @@ else fail "$0: cannot find libtermkey" fi +CONFIG_TRE=0 +REGEX_SRC=text-regex.c + +if test "$tre" != "no" ; then + + printf "checking for libtre... " + +cat > "$tmpc" <<EOF +#include <tre/tre.h> + +int main() { + regex_t preg; + tre_str_source *source = NULL; + regmatch_t pmatch[1]; + tre_regcomp(&preg, "\0", REG_EXTENDED); + tre_reguexec(&preg, source, 1, pmatch, 0); + tre_regfree(&preg); + return 0; +} +EOF + + if test "$have_pkgconfig" = "yes" ; then + CFLAGS_TRE=$(pkg-config --cflags tre 2>/dev/null) + LDFLAGS_TRE=$(pkg-config --libs tre 2>/dev/null) + fi + + if test -z "$LDFLAGS_TRE"; then + CFLAGS_TRE="" + LDFLAGS_TRE="-ltre" + fi + + if $CC $CFLAGS $CFLAGS_TRE "$tmpc" \ + $LDFLAGS $LDFLAGS_TRE -o "$tmpo" >/dev/null 2>&1; then + CONFIG_TRE=1 + REGEX_SRC=text-regex-tre.c + printf "%s\n" "yes" + else + printf "%s\n" "no" + CFLAGS_TRE="" + LDFLAGS_TRE="" + test "$tre" = "yes" && fail "$0: cannot find libtre" + fi +fi + CONFIG_LUA=0 # enabling builtin lpeg requires lua support @@ -537,6 +585,10 @@ CFLAGS_CURSES = $CFLAGS_CURSES LDFLAGS_CURSES = $LDFLAGS_CURSES CFLAGS_TERMKEY = $CFLAGS_TERMKEY LDFLAGS_TERMKEY = $LDFLAGS_TERMKEY +REGEX_SRC = $REGEX_SRC +CONFIG_TRE = $CONFIG_TRE +CFLAGS_TRE = $CFLAGS_TRE +LDFLAGS_TRE = $LDFLAGS_TRE CONFIG_LUA = $CONFIG_LUA CFLAGS_LUA = $CFLAGS_LUA LDFLAGS_LUA = $LDFLAGS_LUA diff --git a/text-regex-tre.c b/text-regex-tre.c new file mode 100644 index 0000000..d45252a --- /dev/null +++ b/text-regex-tre.c @@ -0,0 +1,112 @@ +#include <stdlib.h> +#include <string.h> + +#include "text-regex.h" +#include "text-motions.h" + +struct Regex { + regex_t regex; + tre_str_source str_source; + Text *text; + Iterator it; + size_t end; +}; + +size_t text_regex_nsub(Regex *r) { + if (!r) + return 0; + return r->regex.re_nsub; +} + +static int str_next_char(tre_char_t *c, unsigned int *pos_add, void *context) { + Regex *r = context; + text_iterator_byte_get(&r->it, (char*)c); + return r->it.pos < r->end && text_iterator_byte_next(&r->it, NULL) ? 0 : 1; +} + +static void str_rewind(size_t pos, void *context) { + Regex *r = context; + r->it = text_iterator_get(r->text, pos); +} + +static int str_compare(size_t pos1, size_t pos2, size_t len, void *context) { + Regex *r = context; + int ret = 1; + void *buf1 = malloc(len), *buf2 = malloc(len); + if (!buf1 || !buf2) + goto err; + text_bytes_get(r->text, pos1, len, buf1); + text_bytes_get(r->text, pos2, len, buf2); + ret = memcmp(buf1, buf2, len); +err: + free(buf1); + free(buf2); + return ret; +} + +Regex *text_regex_new(void) { + Regex *r = calloc(1, sizeof(*r)); + if (!r) + return NULL; + r->str_source = (tre_str_source) { + .get_next_char = str_next_char, + .rewind = str_rewind, + .compare = str_compare, + .context = r, + }; + return r; +} + +void text_regex_free(Regex *r) { + if (!r) + return; + tre_regfree(&r->regex); + free(r); +} + +int text_regex_compile(Regex *regex, const char *string, int cflags) { + int r = tre_regcomp(®ex->regex, string, cflags); + if (r) + tre_regcomp(®ex->regex, "\0\0", 0); + return r; +} + +int text_regex_match(Regex *r, const char *data, int eflags) { + return tre_regexec(&r->regex, data, 0, NULL, eflags); +} + +int text_search_range_forward(Text *txt, size_t pos, size_t len, Regex *r, size_t nmatch, RegexMatch pmatch[], int eflags) { + r->text = txt; + r->it = text_iterator_get(txt, pos); + r->end = pos+len; + + regmatch_t match[nmatch]; + int ret = tre_reguexec(&r->regex, &r->str_source, nmatch, match, eflags); + if (!ret) { + for (size_t i = 0; i < nmatch; i++) { + pmatch[i].start = match[i].rm_so == -1 ? EPOS : pos + match[i].rm_so; + pmatch[i].end = match[i].rm_eo == -1 ? EPOS : pos + match[i].rm_eo; + } + } + return ret; +} + +int text_search_range_backward(Text *txt, size_t pos, size_t len, Regex *r, size_t nmatch, RegexMatch pmatch[], int eflags) { + int ret = REG_NOMATCH; + size_t end = pos + len; + + while (pos < end && !text_search_range_forward(txt, pos, len, r, nmatch, pmatch, eflags)) { + ret = 0; + // FIXME: assumes nmatch >= 1 + size_t next = pmatch[0].end; + if (next == pos) { + next = text_line_next(txt, pos); + if (next == pos) + break; + } + pos = next; + len = end - pos; + } + + return ret; +} diff --git a/text-regex.h b/text-regex.h index 1b2c382..45054c8 100644 --- a/text-regex.h +++ b/text-regex.h @@ -1,7 +1,12 @@ #ifndef TEXT_REGEX_H #define TEXT_REGEX_H +/* make the REG_* constants available */ +#if CONFIG_TRE +#include <tre/tre.h> +#else #include <regex.h> +#endif #include "text.h" typedef struct Regex Regex; @@ -741,6 +741,7 @@ static bool cmd_help(Vis *vis, Win *win, Command *cmd, const char *argv[], Curso } configs[] = { { "Lua support: ", CONFIG_LUA }, { "Lua LPeg statically built-in: ", CONFIG_LPEG }, + { "TRE based regex support: ", CONFIG_TRE }, { "POSIX ACL support: ", CONFIG_ACL }, { "SELinux support: ", CONFIG_SELINUX }, }; |
