aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarc André Tanner <mat@brain-dump.org>2016-12-12 12:02:20 +0100
committerMarc André Tanner <mat@brain-dump.org>2017-01-19 21:48:59 +0100
commit53f84f7cbafcb177406f8f7bcc890e626e72ca63 (patch)
tree37d5cbd337eb3fed871e04c20e904165dbef8d87
parent242f22f2ef7aeb14c36e54d7d44d3fd4e51a5d06 (diff)
downloadvis-53f84f7cbafcb177406f8f7bcc890e626e72ca63.tar.gz
vis-53f84f7cbafcb177406f8f7bcc890e626e72ca63.tar.xz
text-regex: add regex backend based on libtre
While memory consumption should be improved, backward searches will still be slow, because they are implemented in terms of repeated forward searches. It needs to be investigated whether the underlying automaton can have its transitions reversed and essentially run backwards, as is the case in sam.
-rw-r--r--Makefile12
-rw-r--r--README.md1
-rwxr-xr-xconfigure52
-rw-r--r--text-regex-tre.c112
-rw-r--r--text-regex.h5
-rw-r--r--vis-cmds.c1
6 files changed, 179 insertions, 4 deletions
diff --git a/Makefile b/Makefile
index 18bf93d..164acae 100644
--- a/Makefile
+++ b/Makefile
@@ -1,9 +1,11 @@
-include config.mk
+REGEX_SRC ?= text-regex.c
+
SRC = array.c buffer.c libutf.c main.c map.c register.c ring-buffer.c \
- sam.c text.c text-motions.c text-objects.c text-regex.c text-util.c \
+ sam.c text.c text-motions.c text-objects.c text-util.c \
ui-curses.c view.c vis.c vis-lua.c vis-modes.c vis-motions.c \
- vis-operators.c vis-prompt.c vis-text-objects.c
+ vis-operators.c vis-prompt.c vis-text-objects.c $(REGEX_SRC)
EXECUTABLES = vis vis-clipboard vis-complete vis-menu vis-open
@@ -19,6 +21,7 @@ VERSION = $(shell git describe --always --dirty 2>/dev/null || echo "0.2-git")
CONFIG_LUA ?= 1
CONFIG_LPEG ?= 0
+CONFIG_TRE ?= 0
CONFIG_ACL ?= 0
CONFIG_SELINUX ?= 0
@@ -27,16 +30,17 @@ CFLAGS_STD += -DVERSION=\"${VERSION}\"
LDFLAGS_STD ?= -lc
CFLAGS_VIS = $(CFLAGS_AUTO) $(CFLAGS_TERMKEY) $(CFLAGS_CURSES) $(CFLAGS_ACL) \
- $(CFLAGS_SELINUX) $(CFLAGS_LUA) $(CFLAGS_LPEG) $(CFLAGS_STD)
+ $(CFLAGS_SELINUX) $(CFLAGS_TRE) $(CFLAGS_LUA) $(CFLAGS_LPEG) $(CFLAGS_STD)
CFLAGS_VIS += -DVIS_PATH=\"${SHAREPREFIX}/vis\"
CFLAGS_VIS += -DCONFIG_LUA=${CONFIG_LUA}
CFLAGS_VIS += -DCONFIG_LPEG=${CONFIG_LPEG}
+CFLAGS_VIS += -DCONFIG_TRE=${CONFIG_TRE}
CFLAGS_VIS += -DCONFIG_SELINUX=${CONFIG_SELINUX}
CFLAGS_VIS += -DCONFIG_ACL=${CONFIG_ACL}
LDFLAGS_VIS = $(LDFLAGS_AUTO) $(LDFLAGS_TERMKEY) $(LDFLAGS_CURSES) $(LDFLAGS_ACL) \
- $(LDFLAGS_SELINUX) $(LDFLAGS_LUA) $(LDFLAGS_LPEG) $(LDFLAGS_STD)
+ $(LDFLAGS_SELINUX) $(LDFLAGS_TRE) $(LDFLAGS_LUA) $(LDFLAGS_LPEG) $(LDFLAGS_STD)
STRIP?=strip
diff --git a/README.md b/README.md
index 7e9bfbc..ca8206a 100644
--- a/README.md
+++ b/README.md
@@ -60,6 +60,7 @@ compatible environment as well as:
* [Lua](http://www.lua.org/) >= 5.2 (optional)
* [LPeg](http://www.inf.puc-rio.br/~roberto/lpeg/) >= 0.12
(optional runtime dependency required for syntax highlighting)
+ * [TRE](http://laurikari.net/tre/) (optional for more memory efficient regex search)
Assuming these dependencies are met, execute:
diff --git a/configure b/configure
index dc8f278..fe85ccf 100755
--- a/configure
+++ b/configure
@@ -25,6 +25,7 @@ Fine tuning of the installation directories:
Optional features:
--enable-lua build with Lua support [auto]
--enable-lpeg build with support for statically linking to LPeg [auto]
+ --enable-tre build with TRE regex support [auto]
--enable-selinux build with SELinux support [auto]
--enable-acl build with POSIX ACL support [auto]
@@ -115,6 +116,7 @@ MANDIR='$(PREFIX)/share/man'
lua=auto
lpeg=auto
+tre=auto
selinux=auto
acl=auto
@@ -133,6 +135,8 @@ case "$arg" in
--disable-lua|--enable-lua=no) lua=no ;;
--enable-lpeg|--enable-lpeg=yes) lpeg=yes ;;
--disable-lpeg|--enable-lpeg=no) lpeg=no ;;
+--enable-tre|--enable-tre=yes) tre=yes ;;
+--disable-tre|--enable-tre=no) tre=no ;;
--enable-selinux|--enable-selinux=yes) selinux=yes ;;
--disable-selinux|--enable-selinux=no) selinux=no ;;
--enable-acl|--enable-acl=yes) acl=yes ;;
@@ -353,6 +357,50 @@ else
fail "$0: cannot find libtermkey"
fi
+CONFIG_TRE=0
+REGEX_SRC=text-regex.c
+
+if test "$tre" != "no" ; then
+
+ printf "checking for libtre... "
+
+cat > "$tmpc" <<EOF
+#include <tre/tre.h>
+
+int main() {
+ regex_t preg;
+ tre_str_source *source = NULL;
+ regmatch_t pmatch[1];
+ tre_regcomp(&preg, "\0", REG_EXTENDED);
+ tre_reguexec(&preg, source, 1, pmatch, 0);
+ tre_regfree(&preg);
+ return 0;
+}
+EOF
+
+ if test "$have_pkgconfig" = "yes" ; then
+ CFLAGS_TRE=$(pkg-config --cflags tre 2>/dev/null)
+ LDFLAGS_TRE=$(pkg-config --libs tre 2>/dev/null)
+ fi
+
+ if test -z "$LDFLAGS_TRE"; then
+ CFLAGS_TRE=""
+ LDFLAGS_TRE="-ltre"
+ fi
+
+ if $CC $CFLAGS $CFLAGS_TRE "$tmpc" \
+ $LDFLAGS $LDFLAGS_TRE -o "$tmpo" >/dev/null 2>&1; then
+ CONFIG_TRE=1
+ REGEX_SRC=text-regex-tre.c
+ printf "%s\n" "yes"
+ else
+ printf "%s\n" "no"
+ CFLAGS_TRE=""
+ LDFLAGS_TRE=""
+ test "$tre" = "yes" && fail "$0: cannot find libtre"
+ fi
+fi
+
CONFIG_LUA=0
# enabling builtin lpeg requires lua support
@@ -537,6 +585,10 @@ CFLAGS_CURSES = $CFLAGS_CURSES
LDFLAGS_CURSES = $LDFLAGS_CURSES
CFLAGS_TERMKEY = $CFLAGS_TERMKEY
LDFLAGS_TERMKEY = $LDFLAGS_TERMKEY
+REGEX_SRC = $REGEX_SRC
+CONFIG_TRE = $CONFIG_TRE
+CFLAGS_TRE = $CFLAGS_TRE
+LDFLAGS_TRE = $LDFLAGS_TRE
CONFIG_LUA = $CONFIG_LUA
CFLAGS_LUA = $CFLAGS_LUA
LDFLAGS_LUA = $LDFLAGS_LUA
diff --git a/text-regex-tre.c b/text-regex-tre.c
new file mode 100644
index 0000000..d45252a
--- /dev/null
+++ b/text-regex-tre.c
@@ -0,0 +1,112 @@
+#include <stdlib.h>
+#include <string.h>
+
+#include "text-regex.h"
+#include "text-motions.h"
+
+struct Regex {
+ regex_t regex;
+ tre_str_source str_source;
+ Text *text;
+ Iterator it;
+ size_t end;
+};
+
+size_t text_regex_nsub(Regex *r) {
+ if (!r)
+ return 0;
+ return r->regex.re_nsub;
+}
+
+static int str_next_char(tre_char_t *c, unsigned int *pos_add, void *context) {
+ Regex *r = context;
+ text_iterator_byte_get(&r->it, (char*)c);
+ return r->it.pos < r->end && text_iterator_byte_next(&r->it, NULL) ? 0 : 1;
+}
+
+static void str_rewind(size_t pos, void *context) {
+ Regex *r = context;
+ r->it = text_iterator_get(r->text, pos);
+}
+
+static int str_compare(size_t pos1, size_t pos2, size_t len, void *context) {
+ Regex *r = context;
+ int ret = 1;
+ void *buf1 = malloc(len), *buf2 = malloc(len);
+ if (!buf1 || !buf2)
+ goto err;
+ text_bytes_get(r->text, pos1, len, buf1);
+ text_bytes_get(r->text, pos2, len, buf2);
+ ret = memcmp(buf1, buf2, len);
+err:
+ free(buf1);
+ free(buf2);
+ return ret;
+}
+
+Regex *text_regex_new(void) {
+ Regex *r = calloc(1, sizeof(*r));
+ if (!r)
+ return NULL;
+ r->str_source = (tre_str_source) {
+ .get_next_char = str_next_char,
+ .rewind = str_rewind,
+ .compare = str_compare,
+ .context = r,
+ };
+ return r;
+}
+
+void text_regex_free(Regex *r) {
+ if (!r)
+ return;
+ tre_regfree(&r->regex);
+ free(r);
+}
+
+int text_regex_compile(Regex *regex, const char *string, int cflags) {
+ int r = tre_regcomp(&regex->regex, string, cflags);
+ if (r)
+ tre_regcomp(&regex->regex, "\0\0", 0);
+ return r;
+}
+
+int text_regex_match(Regex *r, const char *data, int eflags) {
+ return tre_regexec(&r->regex, data, 0, NULL, eflags);
+}
+
+int text_search_range_forward(Text *txt, size_t pos, size_t len, Regex *r, size_t nmatch, RegexMatch pmatch[], int eflags) {
+ r->text = txt;
+ r->it = text_iterator_get(txt, pos);
+ r->end = pos+len;
+
+ regmatch_t match[nmatch];
+ int ret = tre_reguexec(&r->regex, &r->str_source, nmatch, match, eflags);
+ if (!ret) {
+ for (size_t i = 0; i < nmatch; i++) {
+ pmatch[i].start = match[i].rm_so == -1 ? EPOS : pos + match[i].rm_so;
+ pmatch[i].end = match[i].rm_eo == -1 ? EPOS : pos + match[i].rm_eo;
+ }
+ }
+ return ret;
+}
+
+int text_search_range_backward(Text *txt, size_t pos, size_t len, Regex *r, size_t nmatch, RegexMatch pmatch[], int eflags) {
+ int ret = REG_NOMATCH;
+ size_t end = pos + len;
+
+ while (pos < end && !text_search_range_forward(txt, pos, len, r, nmatch, pmatch, eflags)) {
+ ret = 0;
+ // FIXME: assumes nmatch >= 1
+ size_t next = pmatch[0].end;
+ if (next == pos) {
+ next = text_line_next(txt, pos);
+ if (next == pos)
+ break;
+ }
+ pos = next;
+ len = end - pos;
+ }
+
+ return ret;
+}
diff --git a/text-regex.h b/text-regex.h
index 1b2c382..45054c8 100644
--- a/text-regex.h
+++ b/text-regex.h
@@ -1,7 +1,12 @@
#ifndef TEXT_REGEX_H
#define TEXT_REGEX_H
+/* make the REG_* constants available */
+#if CONFIG_TRE
+#include <tre/tre.h>
+#else
#include <regex.h>
+#endif
#include "text.h"
typedef struct Regex Regex;
diff --git a/vis-cmds.c b/vis-cmds.c
index 60e5f91..879eadd 100644
--- a/vis-cmds.c
+++ b/vis-cmds.c
@@ -741,6 +741,7 @@ static bool cmd_help(Vis *vis, Win *win, Command *cmd, const char *argv[], Curso
} configs[] = {
{ "Lua support: ", CONFIG_LUA },
{ "Lua LPeg statically built-in: ", CONFIG_LPEG },
+ { "TRE based regex support: ", CONFIG_TRE },
{ "POSIX ACL support: ", CONFIG_ACL },
{ "SELinux support: ", CONFIG_SELINUX },
};