From 886a5ed603df862a26f0d6d78d6c8dd161623738 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc=20Andr=C3=A9=20Tanner?= Date: Thu, 23 Jan 2020 14:01:41 +0100 Subject: text: fix spurious regex anchor matches The regex anchors ^ and $ must not match at the start/end of the search range unless it is preceded/succeeded by a new line. This is implemented at the text-motion layer by passing the appropriate REG_NOT{B,E}OL flags to the search backend, meaning the caller can influence the anchor behavior depending on the context. This is important as for example in the command language the anchors apply to existing selections, not line boundaries. --- text-motions.c | 15 +++++++++++---- text-regex-tre.c | 6 ++++++ text-regex.c | 4 ++++ 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/text-motions.c b/text-motions.c index 533d896..97a7e1e 100644 --- a/text-motions.c +++ b/text-motions.c @@ -618,11 +618,15 @@ size_t text_search_forward(Text *txt, size_t pos, Regex *regex) { size_t start = pos + 1; size_t end = text_size(txt); RegexMatch match[1]; - bool found = start < end && !text_search_range_forward(txt, start, end - start, regex, 1, match, 0); + char c; + int flags = text_byte_get(txt, pos, &c) && c == '\n' ? 0 : REG_NOTBOL; + bool found = start < end && !text_search_range_forward(txt, start, end - start, regex, 1, match, flags); if (!found) { start = 0; - found = !text_search_range_forward(txt, start, end - start, regex, 1, match, 0); + end = pos; + flags = text_byte_get(txt, end, &c) && c == '\n' ? 0 : REG_NOTEOL; + found = !text_search_range_forward(txt, start, end - start, regex, 1, match, flags); } return found ? match[0].start : pos; @@ -632,12 +636,15 @@ size_t text_search_backward(Text *txt, size_t pos, Regex *regex) { size_t start = 0; size_t end = pos; RegexMatch match[1]; - bool found = !text_search_range_backward(txt, start, end, regex, 1, match, 0); + char c; + int flags = text_byte_get(txt, end, &c) && c == '\n' ? 0 : REG_NOTEOL; + bool found = !text_search_range_backward(txt, start, end, regex, 1, match, flags); if (!found) { start = pos + 1; end = text_size(txt); - found = start < end && !text_search_range_backward(txt, start, end - start, regex, 1, match, 0); + flags = text_byte_get(txt, pos, &c) && c == '\n' ? 0 : REG_NOTBOL; + found = start < end && !text_search_range_backward(txt, start, end - start, regex, 1, match, flags); } return found ? match[0].start : pos; diff --git a/text-regex-tre.c b/text-regex-tre.c index cbfc4ee..da377b4 100644 --- a/text-regex-tre.c +++ b/text-regex-tre.c @@ -162,6 +162,12 @@ int text_search_range_backward(Text *txt, size_t pos, size_t len, Regex *r, size } pos = next; len = end - pos; + + char c; + if (text_byte_get(txt, pos-1, &c) && c == '\n') + eflags &= ~REG_NOTBOL; + else + eflags |= REG_NOTBOL; } return ret; diff --git a/text-regex.c b/text-regex.c index b0549b6..56ecafc 100644 --- a/text-regex.c +++ b/text-regex.c @@ -101,6 +101,10 @@ int text_search_range_backward(Text *txt, size_t pos, size_t len, Regex *r, size } junk = next - cur; cur = next; + if (cur[-1] == '\n') + eflags &= ~REG_NOTBOL; + else + eflags |= REG_NOTBOL; } free(buf); return ret; -- cgit v1.2.3