From 8129933ca51caf788e0cd7c5fdbcb43fdc64601d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc=20Andr=C3=A9=20Tanner?= Date: Sun, 19 Jul 2015 13:55:50 +0200 Subject: text: move regex related functions to separate file Eventually this should probably be rewritten to use an iternal regex engine, currently it has unacceptable memory usage, it copies the whole text. --- text-regex.c | 71 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 text-regex.c (limited to 'text-regex.c') diff --git a/text-regex.c b/text-regex.c new file mode 100644 index 0000000..cff8587 --- /dev/null +++ b/text-regex.c @@ -0,0 +1,71 @@ +#include +#include + +#include "text-regex.h" + +struct Regex { + const char *string; + regex_t regex; +}; + +Regex *text_regex_new(void) { + Regex *r = calloc(1, sizeof(Regex)); + if (!r) + return NULL; + regcomp(&r->regex, "\0\0", 0); /* this should not match anything */ + return r; +} + +int text_regex_compile(Regex *regex, const char *string, int cflags) { + regex->string = string; + int r = regcomp(®ex->regex, string, cflags); + if (r) + regcomp(®ex->regex, "\0\0", 0); + return r; +} + +void text_regex_free(Regex *r) { + if (!r) + return; + regfree(&r->regex); + free(r); +} + +int text_search_range_forward(Text *txt, size_t pos, size_t len, Regex *r, size_t nmatch, RegexMatch pmatch[], int eflags) { + char *buf = malloc(len + 1); + if (!buf) + return REG_NOMATCH; + len = text_bytes_get(txt, pos, len, buf); + buf[len] = '\0'; + regmatch_t match[nmatch]; + int ret = regexec(&r->regex, buf, nmatch, match, eflags); + if (!ret) { + for (size_t i = 0; i < nmatch; i++) { + pmatch[i].start = match[i].rm_so == -1 ? EPOS : pos + match[i].rm_so; + pmatch[i].end = match[i].rm_eo == -1 ? EPOS : pos + match[i].rm_eo; + } + } + free(buf); + return ret; +} + +int text_search_range_backward(Text *txt, size_t pos, size_t len, Regex *r, size_t nmatch, RegexMatch pmatch[], int eflags) { + char *buf = malloc(len + 1); + if (!buf) + return REG_NOMATCH; + len = text_bytes_get(txt, pos, len, buf); + buf[len] = '\0'; + regmatch_t match[nmatch]; + char *cur = buf; + int ret = REG_NOMATCH; + while (!regexec(&r->regex, cur, nmatch, match, eflags)) { + ret = 0; + for (size_t i = 0; i < nmatch; i++) { + pmatch[i].start = match[i].rm_so == -1 ? EPOS : pos + (size_t)(cur - buf) + match[i].rm_so; + pmatch[i].end = match[i].rm_eo == -1 ? EPOS : pos + (size_t)(cur - buf) + match[i].rm_eo; + } + cur += match[0].rm_eo; + } + free(buf); + return ret; +} -- cgit v1.2.3