diff options
| author | Marc André Tanner <mat@brain-dump.org> | 2017-07-03 14:33:04 +0200 |
|---|---|---|
| committer | Marc André Tanner <mat@brain-dump.org> | 2017-07-03 14:33:04 +0200 |
| commit | 7f3d570cc23cf80163514013af2fc224eeff80c3 (patch) | |
| tree | dd1e39ee091df00637e0f7134f8402ea85cf3d70 /text-regex-tre.c | |
| parent | 48aad577d0525755d2e57c304475ee35f63ca12d (diff) | |
| download | vis-7f3d570cc23cf80163514013af2fc224eeff80c3.tar.gz vis-7f3d570cc23cf80163514013af2fc224eeff80c3.tar.xz | |
Fix Unicode regex searches for libtre builds
Fix #566
Diffstat (limited to 'text-regex-tre.c')
| -rw-r--r-- | text-regex-tre.c | 61 |
1 files changed, 55 insertions, 6 deletions
diff --git a/text-regex-tre.c b/text-regex-tre.c index 0ac8b38..3f02dcd 100644 --- a/text-regex-tre.c +++ b/text-regex-tre.c @@ -1,5 +1,7 @@ #include <stdlib.h> #include <string.h> +#include <wchar.h> +#include <errno.h> #include "text-regex.h" #include "text-motions.h" @@ -20,13 +22,60 @@ size_t text_regex_nsub(Regex *r) { static int str_next_char(tre_char_t *c, unsigned int *pos_add, void *context) { Regex *r = context; - *pos_add = 1; - if (r->it.pos < r->end && text_iterator_byte_get(&r->it, (char*)c)) { - text_iterator_byte_next(&r->it, NULL); - return 0; + Iterator *it = &r->it; + if (TRE_WCHAR) { + mbstate_t ps = { 0 }; + bool eof = false; + size_t start = it->pos; + for (;;) { + if (it->pos >= r->end) { + eof = true; + break; + } + size_t rem = r->end - it->pos; + size_t plen = it->end - it->text; + size_t len = rem < plen ? rem : plen; + size_t wclen = mbrtowc(c, it->text, len, &ps); + if (wclen == (size_t)-1 && errno == EILSEQ) { + *c = L'\0'; + text_iterator_codepoint_next(it, NULL); + break; + } else if (wclen == (size_t)-2) { + if (!text_iterator_next(it)) { + eof = true; + break; + } + } else if (wclen == 0) { + text_iterator_byte_next(it, NULL); + break; + } else { + if (wclen < plen) { + it->text += wclen; + it->pos += wclen; + } else { + text_iterator_next(it); + } + break; + } + } + + if (eof) { + *c = L'\0'; + *pos_add = 1; + return 1; + } else { + *pos_add = it->pos - start; + return 0; + } } else { - *c = '\0'; - return 1; + *pos_add = 1; + if (it->pos < r->end && text_iterator_byte_get(it, (char*)c)) { + text_iterator_byte_next(it, NULL); + return 0; + } else { + *c = '\0'; + return 1; + } } } |
