From 1a158268c7693b00bf43c7e81034816d8d00358c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc=20Andr=C3=A9=20Tanner?= Date: Wed, 13 Apr 2016 17:20:18 +0200 Subject: sam: improve ^ matching start of line There are some nasty differences between the meaning of ^ in Plan 9's regexp library and POSIX when using REG_NEWLINE. The former only matches at the beginning of a line wheras the latter matches the zero-length string immediately after a newline character \n. As a result this also matches after the very last newline at the end of the file. This is undesired behavior for a command like :x/^/c/#/ Hence we try to filter out this last match. Close #264 --- sam.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/sam.c b/sam.c index be0b14a..3331567 100644 --- a/sam.c +++ b/sam.c @@ -833,7 +833,8 @@ static bool cmd_extract(Vis *vis, Win *win, Command *cmd, const char *argv[], Cu RegexMatch match[1]; while (start < end) { bool found = text_search_range_forward(txt, start, - end - start, cmd->regex, 1, match, 0) == 0; + end - start, cmd->regex, 1, match, + start > range->start ? REG_NOTBOL : 0) == 0; Filerange r = text_range_empty(); if (found) { if (argv[0][0] == 'x') @@ -845,10 +846,15 @@ static bool cmd_extract(Vis *vis, Win *win, Command *cmd, const char *argv[], Cu start++; continue; } - start = match[0].end+1; - } else { - start = match[0].end; + /* in Plan 9's regexp library ^ matches the beginning + * of a line, however in POSIX with REG_NEWLINE ^ + * matches the zero-length string immediately after a + * newline. Try filtering out the last such match at EOF. + */ + if (end == match[0].start && start > range->start) + break; } + start = match[0].end; } else { if (argv[0][0] == 'y') r = text_range_new(start, end); -- cgit v1.2.3