aboutsummaryrefslogtreecommitdiff
path: root/text-regex-tre.c
diff options
context:
space:
mode:
authorMarc André Tanner <mat@brain-dump.org>2016-12-12 12:02:20 +0100
committerMarc André Tanner <mat@brain-dump.org>2017-01-19 21:48:59 +0100
commit53f84f7cbafcb177406f8f7bcc890e626e72ca63 (patch)
tree37d5cbd337eb3fed871e04c20e904165dbef8d87 /text-regex-tre.c
parent242f22f2ef7aeb14c36e54d7d44d3fd4e51a5d06 (diff)
downloadvis-53f84f7cbafcb177406f8f7bcc890e626e72ca63.tar.gz
vis-53f84f7cbafcb177406f8f7bcc890e626e72ca63.tar.xz
text-regex: add regex backend based on libtre
While memory consumption should be improved, backward searches will still be slow, because they are implemented in terms of repeated forward searches. It needs to be investigated whether the underlying automaton can have its transitions reversed and essentially run backwards, as is the case in sam.
Diffstat (limited to 'text-regex-tre.c')
-rw-r--r--text-regex-tre.c112
1 files changed, 112 insertions, 0 deletions
diff --git a/text-regex-tre.c b/text-regex-tre.c
new file mode 100644
index 0000000..d45252a
--- /dev/null
+++ b/text-regex-tre.c
@@ -0,0 +1,112 @@
+#include <stdlib.h>
+#include <string.h>
+
+#include "text-regex.h"
+#include "text-motions.h"
+
+struct Regex {
+ regex_t regex;
+ tre_str_source str_source;
+ Text *text;
+ Iterator it;
+ size_t end;
+};
+
+size_t text_regex_nsub(Regex *r) {
+ if (!r)
+ return 0;
+ return r->regex.re_nsub;
+}
+
+static int str_next_char(tre_char_t *c, unsigned int *pos_add, void *context) {
+ Regex *r = context;
+ text_iterator_byte_get(&r->it, (char*)c);
+ return r->it.pos < r->end && text_iterator_byte_next(&r->it, NULL) ? 0 : 1;
+}
+
+static void str_rewind(size_t pos, void *context) {
+ Regex *r = context;
+ r->it = text_iterator_get(r->text, pos);
+}
+
+static int str_compare(size_t pos1, size_t pos2, size_t len, void *context) {
+ Regex *r = context;
+ int ret = 1;
+ void *buf1 = malloc(len), *buf2 = malloc(len);
+ if (!buf1 || !buf2)
+ goto err;
+ text_bytes_get(r->text, pos1, len, buf1);
+ text_bytes_get(r->text, pos2, len, buf2);
+ ret = memcmp(buf1, buf2, len);
+err:
+ free(buf1);
+ free(buf2);
+ return ret;
+}
+
+Regex *text_regex_new(void) {
+ Regex *r = calloc(1, sizeof(*r));
+ if (!r)
+ return NULL;
+ r->str_source = (tre_str_source) {
+ .get_next_char = str_next_char,
+ .rewind = str_rewind,
+ .compare = str_compare,
+ .context = r,
+ };
+ return r;
+}
+
+void text_regex_free(Regex *r) {
+ if (!r)
+ return;
+ tre_regfree(&r->regex);
+ free(r);
+}
+
+int text_regex_compile(Regex *regex, const char *string, int cflags) {
+ int r = tre_regcomp(&regex->regex, string, cflags);
+ if (r)
+ tre_regcomp(&regex->regex, "\0\0", 0);
+ return r;
+}
+
+int text_regex_match(Regex *r, const char *data, int eflags) {
+ return tre_regexec(&r->regex, data, 0, NULL, eflags);
+}
+
+int text_search_range_forward(Text *txt, size_t pos, size_t len, Regex *r, size_t nmatch, RegexMatch pmatch[], int eflags) {
+ r->text = txt;
+ r->it = text_iterator_get(txt, pos);
+ r->end = pos+len;
+
+ regmatch_t match[nmatch];
+ int ret = tre_reguexec(&r->regex, &r->str_source, nmatch, match, eflags);
+ if (!ret) {
+ for (size_t i = 0; i < nmatch; i++) {
+ pmatch[i].start = match[i].rm_so == -1 ? EPOS : pos + match[i].rm_so;
+ pmatch[i].end = match[i].rm_eo == -1 ? EPOS : pos + match[i].rm_eo;
+ }
+ }
+ return ret;
+}
+
+int text_search_range_backward(Text *txt, size_t pos, size_t len, Regex *r, size_t nmatch, RegexMatch pmatch[], int eflags) {
+ int ret = REG_NOMATCH;
+ size_t end = pos + len;
+
+ while (pos < end && !text_search_range_forward(txt, pos, len, r, nmatch, pmatch, eflags)) {
+ ret = 0;
+ // FIXME: assumes nmatch >= 1
+ size_t next = pmatch[0].end;
+ if (next == pos) {
+ next = text_line_next(txt, pos);
+ if (next == pos)
+ break;
+ }
+ pos = next;
+ len = end - pos;
+ }
+
+ return ret;
+}