From d2004b15f1e90efafedc367335c07ad4636d291d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc=20Andr=C3=A9=20Tanner?= Date: Fri, 7 Apr 2017 16:04:40 +0200 Subject: text: add mem{r,}chr(3) based byte search functions These are generally implemented efficiently in libc. While memrchr(3) is non-standard, it is a common extension. If it is not available, we use a simple C implementation from musl. --- Makefile | 5 ++++- configure | 20 ++++++++++++++++++++ text.c | 29 +++++++++++++++++++++++++++++ text.h | 2 ++ util.h | 11 +++++++++++ 5 files changed, 66 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 8e114e9..ba79108 100644 --- a/Makefile +++ b/Makefile @@ -32,8 +32,11 @@ CFLAGS_STD ?= -std=c99 -D_POSIX_C_SOURCE=200809L -D_XOPEN_SOURCE=700 -DNDEBUG CFLAGS_STD += -DVERSION=\"${VERSION}\" LDFLAGS_STD ?= -lc +CFLAGS_LIBC ?= -DHAVE_MEMRCHR=0 + CFLAGS_VIS = $(CFLAGS_AUTO) $(CFLAGS_TERMKEY) $(CFLAGS_CURSES) $(CFLAGS_ACL) \ - $(CFLAGS_SELINUX) $(CFLAGS_TRE) $(CFLAGS_LUA) $(CFLAGS_LPEG) $(CFLAGS_STD) + $(CFLAGS_SELINUX) $(CFLAGS_TRE) $(CFLAGS_LUA) $(CFLAGS_LPEG) $(CFLAGS_STD) \ + $(CFLAGS_LIBC) CFLAGS_VIS += -DVIS_PATH=\"${SHAREPREFIX}/vis\" CFLAGS_VIS += -DCONFIG_HELP=${CONFIG_HELP} diff --git a/configure b/configure index f12b96b..8cfd512 100755 --- a/configure +++ b/configure @@ -594,6 +594,25 @@ EOF fi fi +printf "checking for memrchr... " + +cat > "$tmpc" < + +int main(int argc, char *argv[]) { + return !memrchr("\n", '\n', 1); +} +EOF + +if $CC $CFLAGS "$tmpc" $LDFLAGS -o "$tmpo" >/dev/null 2>&1; then + HAVE_MEMRCHR=1 + printf "%s\n" "yes" +else + HAVE_MEMRCHR=0 + printf "%s\n" "no" +fi + printf "completing config.mk... " exec 3>&1 1>>config.mk @@ -621,6 +640,7 @@ LDFLAGS_ACL = $LDFLAGS_ACL CONFIG_SELINUX = $CONFIG_SELINUX CFLAGS_SELINUX = $CFLAGS_SELINUX LDFLAGS_SELINUX = $LDFLAGS_SELINUX +CFLAGS_LIBC = -DHAVE_MEMRCHR=$HAVE_MEMRCHR EOF exec 1>&3 3>&- diff --git a/text.c b/text.c index f789339..1bf1940 100644 --- a/text.c +++ b/text.c @@ -1,3 +1,4 @@ +#define _GNU_SOURCE // memrchr(3) is non-standard #include #include #include @@ -1455,6 +1456,34 @@ bool text_iterator_byte_prev(Iterator *it, char *b) { return true; } +bool text_iterator_byte_find_prev(Iterator *it, char b) { + while (it->text) { + const char *match = memrchr(it->start, b, it->text - it->start); + if (match) { + it->pos -= it->text - match; + it->text = match; + return true; + } + text_iterator_prev(it); + } + text_iterator_next(it); + return false; +} + +bool text_iterator_byte_find_next(Iterator *it, char b) { + while (it->text) { + const char *match = memchr(it->text, b, it->end - it->text); + if (match) { + it->pos += match - it->text; + it->text = match; + return true; + } + text_iterator_next(it); + } + text_iterator_prev(it); + return false; +} + bool text_iterator_codepoint_next(Iterator *it, char *c) { while (text_iterator_byte_next(it, NULL)) { if (ISUTF8(*it->text)) { diff --git a/text.h b/text.h index 0e372c1..c2a5812 100644 --- a/text.h +++ b/text.h @@ -97,6 +97,8 @@ bool text_iterator_byte_prev(Iterator*, char *b); /* if the new position is at EOF a NUL byte (which is not actually * part of the file) is read. */ bool text_iterator_byte_next(Iterator*, char *b); +bool text_iterator_byte_find_prev(Iterator*, char b); +bool text_iterator_byte_find_next(Iterator*, char b); /* move to the next/previous UTF-8 encoded Unicode codepoint * and set c (if it is non NULL) to the first byte */ bool text_iterator_codepoint_next(Iterator *it, char *c); diff --git a/util.h b/util.h index a53deda..9048c82 100644 --- a/util.h +++ b/util.h @@ -23,4 +23,15 @@ static inline bool addu(size_t a, size_t b, size_t *c) { } #endif +#if !HAVE_MEMRCHR +/* MIT licensed implementation from musl libc */ +static void *memrchr(const void *m, int c, size_t n) +{ + const unsigned char *s = m; + c = (unsigned char)c; + while (n--) if (s[n]==c) return (void *)(s+n); + return 0; +} #endif + +#endif /* UTIL_H */ -- cgit v1.2.3