aboutsummaryrefslogtreecommitdiff
path: root/text-regex.c
diff options
context:
space:
mode:
authorMarc André Tanner <mat@brain-dump.org>2016-08-07 22:27:23 +0200
committerMarc André Tanner <mat@brain-dump.org>2016-08-07 22:27:23 +0200
commit2b4a550d432e5e570bb888ce06440a825c2e2e7c (patch)
tree20ffcd1ebc90f2c7d07cad7b10581c27b69ee038 /text-regex.c
parentbec4efe8bbc2956b026438611f9340349d980388 (diff)
downloadvis-2b4a550d432e5e570bb888ce06440a825c2e2e7c.tar.gz
vis-2b4a550d432e5e570bb888ce06440a825c2e2e7c.tar.xz
text-regex: improve searching in binary data
The regex(3) API we currently use, matches on NUL terminated strings. Therefore it does not work for binary data. This commit adds loops to manually skip over NUL bytes. While it does not work for patterns which would match strings containing NUL bytes, it should improve the most basic cases. Binary file handling will need further improvements in the future. Fixes #359.
Diffstat (limited to 'text-regex.c')
-rw-r--r--text-regex.c63
1 files changed, 43 insertions, 20 deletions
diff --git a/text-regex.c b/text-regex.c
index 640ead5..dd541d7 100644
--- a/text-regex.c
+++ b/text-regex.c
@@ -38,13 +38,25 @@ int text_search_range_forward(Text *txt, size_t pos, size_t len, Regex *r, size_
char *buf = text_bytes_alloc0(txt, pos, len);
if (!buf)
return REG_NOMATCH;
+ char *cur = buf, *end = buf + len;
+ int ret = REG_NOMATCH;
regmatch_t match[nmatch];
- int ret = regexec(&r->regex, buf, nmatch, match, eflags);
- if (!ret) {
- for (size_t i = 0; i < nmatch; i++) {
- pmatch[i].start = match[i].rm_so == -1 ? EPOS : pos + match[i].rm_so;
- pmatch[i].end = match[i].rm_eo == -1 ? EPOS : pos + match[i].rm_eo;
+ for (size_t junk = len; len > 0; len -= junk, pos += junk) {
+ ret = regexec(&r->regex, cur, nmatch, match, eflags);
+ if (!ret) {
+ for (size_t i = 0; i < nmatch; i++) {
+ pmatch[i].start = match[i].rm_so == -1 ? EPOS : pos + match[i].rm_so;
+ pmatch[i].end = match[i].rm_eo == -1 ? EPOS : pos + match[i].rm_eo;
+ }
+ break;
}
+ char *next = memchr(cur, 0, len);
+ if (!next)
+ break;
+ while (!*next && next != end)
+ next++;
+ junk = next - cur;
+ cur = next;
}
free(buf);
return ret;
@@ -54,25 +66,36 @@ int text_search_range_backward(Text *txt, size_t pos, size_t len, Regex *r, size
char *buf = text_bytes_alloc0(txt, pos, len);
if (!buf)
return REG_NOMATCH;
- regmatch_t match[nmatch];
- char *cur = buf;
+ char *cur = buf, *end = buf + len;
int ret = REG_NOMATCH;
- while (!regexec(&r->regex, cur, nmatch, match, eflags)) {
- ret = 0;
- for (size_t i = 0; i < nmatch; i++) {
- pmatch[i].start = match[i].rm_so == -1 ? EPOS : pos + (size_t)(cur - buf) + match[i].rm_so;
- pmatch[i].end = match[i].rm_eo == -1 ? EPOS : pos + (size_t)(cur - buf) + match[i].rm_eo;
- }
- if (match[0].rm_so == 0 && match[0].rm_eo == 0) {
- /* empty match at the beginning of cur, advance to next line */
- if ((cur = strchr(cur, '\n')))
- cur++;
- else
- break;
+ regmatch_t match[nmatch];
+ for (size_t junk = len; len > 0; len -= junk, pos += junk) {
+ char *next;
+ if (!regexec(&r->regex, cur, nmatch, match, eflags)) {
+ ret = 0;
+ for (size_t i = 0; i < nmatch; i++) {
+ pmatch[i].start = match[i].rm_so == -1 ? EPOS : pos + match[i].rm_so;
+ pmatch[i].end = match[i].rm_eo == -1 ? EPOS : pos + match[i].rm_eo;
+ }
+ if (match[0].rm_so == 0 && match[0].rm_eo == 0) {
+ /* empty match at the beginning of cur, advance to next line */
+ next = strchr(cur, '\n');
+ if (!next)
+ break;
+ next++;
+ } else {
+ next = cur + match[0].rm_eo;
+ }
} else {
- cur += match[0].rm_eo;
+ next = memchr(cur, 0, len);
+ if (!next)
+ break;
+ while (!*next && next != end)
+ next++;
}
+ junk = next - cur;
+ cur = next;
}
free(buf);
return ret;