diff options
| author | Marc André Tanner <mat@brain-dump.org> | 2015-11-23 11:10:38 +0100 |
|---|---|---|
| committer | Marc André Tanner <mat@brain-dump.org> | 2015-11-23 11:39:37 +0100 |
| commit | 51e92f0c8e7b50c684287bea1a55edbde128053f (patch) | |
| tree | 02eaed9cd71db186569698928c7afeca457e2bd7 /text.c | |
| parent | 0667089d47dd0cee5bda83965ff6bbdc2e4fd288 (diff) | |
| download | vis-51e92f0c8e7b50c684287bea1a55edbde128053f.tar.gz vis-51e92f0c8e7b50c684287bea1a55edbde128053f.tar.xz | |
text: introduce functions to iterate over graphemes
They currently consider any character for which wcwidth(3)
return 0 as a combining character.
Diffstat (limited to 'text.c')
| -rw-r--r-- | text.c | 57 |
1 files changed, 55 insertions, 2 deletions
@@ -20,6 +20,7 @@ #include <time.h> #include <fcntl.h> #include <errno.h> +#include <wchar.h> #include <sys/types.h> #include <sys/stat.h> #include <sys/mman.h> @@ -1333,7 +1334,7 @@ bool text_iterator_byte_prev(Iterator *it, char *b) { return true; } -bool text_iterator_char_next(Iterator *it, char *c) { +bool text_iterator_codepoint_next(Iterator *it, char *c) { while (text_iterator_byte_next(it, NULL)) { if (ISUTF8(*it->text)) { if (c) @@ -1344,7 +1345,7 @@ bool text_iterator_char_next(Iterator *it, char *c) { return false; } -bool text_iterator_char_prev(Iterator *it, char *c) { +bool text_iterator_codepoint_prev(Iterator *it, char *c) { while (text_iterator_byte_prev(it, NULL)) { if (ISUTF8(*it->text)) { if (c) @@ -1355,6 +1356,58 @@ bool text_iterator_char_prev(Iterator *it, char *c) { return false; } +bool text_iterator_char_next(Iterator *it, char *c) { + if (!text_iterator_codepoint_next(it, c)) + return false; + mbstate_t ps = { 0 }; + for (;;) { + char buf[MB_CUR_MAX]; + size_t len = text_bytes_get(it->piece->text, it->pos, sizeof buf, buf); + wchar_t wc; + size_t wclen = mbrtowc(&wc, buf, len, &ps); + if (wclen == (size_t)-1 && errno == EILSEQ) { + return true; + } else if (wclen == (size_t)-2) { + return false; + } else if (wclen == 0) { + return true; + } else { + int width = wcwidth(wc); + if (width != 0) + return true; + if (!text_iterator_codepoint_next(it, c)) + return false; + } + } + return true; +} + +bool text_iterator_char_prev(Iterator *it, char *c) { + if (!text_iterator_codepoint_prev(it, c)) + return false; + for (;;) { + char buf[MB_CUR_MAX]; + size_t len = text_bytes_get(it->piece->text, it->pos, sizeof buf, buf); + wchar_t wc; + mbstate_t ps = { 0 }; + size_t wclen = mbrtowc(&wc, buf, len, &ps); + if (wclen == (size_t)-1 && errno == EILSEQ) { + return true; + } else if (wclen == (size_t)-2) { + return false; + } else if (wclen == 0) { + return true; + } else { + int width = wcwidth(wc); + if (width != 0) + return true; + if (!text_iterator_codepoint_prev(it, c)) + return false; + } + } + return true; +} + bool text_byte_get(Text *txt, size_t pos, char *buf) { return text_bytes_get(txt, pos, 1, buf); } |
