diff options
| -rw-r--r-- | text-motions.h | 8 | ||||
| -rw-r--r-- | text.c | 57 | ||||
| -rw-r--r-- | text.h | 7 |
3 files changed, 65 insertions, 7 deletions
diff --git a/text-motions.h b/text-motions.h index d65bdf2..57aa09e 100644 --- a/text-motions.h +++ b/text-motions.h @@ -12,7 +12,7 @@ size_t text_begin(Text*, size_t pos); size_t text_end(Text*, size_t pos); -/* move to start of next / previous UTF-8 character */ +/* char refers to a grapheme (might skip over multiple Unicode codepoints) */ size_t text_char_next(Text*, size_t pos); size_t text_char_prev(Text*, size_t pos); @@ -39,11 +39,11 @@ size_t text_line_lastchar(Text*, size_t pos); size_t text_line_end(Text*, size_t pos); size_t text_line_next(Text*, size_t pos); size_t text_line_offset(Text*, size_t pos, size_t off); -/* get character count of the line upto `pos' */ +/* get grapheme count of the line upto `pos' */ int text_line_char_get(Text*, size_t pos); -/* get position of the `count' character in the line containing `pos' */ +/* get position of the `count' grapheme in the line containing `pos' */ size_t text_line_char_set(Text*, size_t pos, int count); -/* move to the next/previous character on the same line */ +/* move to the next/previous grapheme on the same line */ size_t text_line_char_next(Text*, size_t pos); size_t text_line_char_prev(Text*, size_t pos); /* move to the next/previous empty line */ @@ -20,6 +20,7 @@ #include <time.h> #include <fcntl.h> #include <errno.h> +#include <wchar.h> #include <sys/types.h> #include <sys/stat.h> #include <sys/mman.h> @@ -1333,7 +1334,7 @@ bool text_iterator_byte_prev(Iterator *it, char *b) { return true; } -bool text_iterator_char_next(Iterator *it, char *c) { +bool text_iterator_codepoint_next(Iterator *it, char *c) { while (text_iterator_byte_next(it, NULL)) { if (ISUTF8(*it->text)) { if (c) @@ -1344,7 +1345,7 @@ bool text_iterator_char_next(Iterator *it, char *c) { return false; } -bool text_iterator_char_prev(Iterator *it, char *c) { +bool text_iterator_codepoint_prev(Iterator *it, char *c) { while (text_iterator_byte_prev(it, NULL)) { if (ISUTF8(*it->text)) { if (c) @@ -1355,6 +1356,58 @@ bool text_iterator_char_prev(Iterator *it, char *c) { return false; } +bool text_iterator_char_next(Iterator *it, char *c) { + if (!text_iterator_codepoint_next(it, c)) + return false; + mbstate_t ps = { 0 }; + for (;;) { + char buf[MB_CUR_MAX]; + size_t len = text_bytes_get(it->piece->text, it->pos, sizeof buf, buf); + wchar_t wc; + size_t wclen = mbrtowc(&wc, buf, len, &ps); + if (wclen == (size_t)-1 && errno == EILSEQ) { + return true; + } else if (wclen == (size_t)-2) { + return false; + } else if (wclen == 0) { + return true; + } else { + int width = wcwidth(wc); + if (width != 0) + return true; + if (!text_iterator_codepoint_next(it, c)) + return false; + } + } + return true; +} + +bool text_iterator_char_prev(Iterator *it, char *c) { + if (!text_iterator_codepoint_prev(it, c)) + return false; + for (;;) { + char buf[MB_CUR_MAX]; + size_t len = text_bytes_get(it->piece->text, it->pos, sizeof buf, buf); + wchar_t wc; + mbstate_t ps = { 0 }; + size_t wclen = mbrtowc(&wc, buf, len, &ps); + if (wclen == (size_t)-1 && errno == EILSEQ) { + return true; + } else if (wclen == (size_t)-2) { + return false; + } else if (wclen == 0) { + return true; + } else { + int width = wcwidth(wc); + if (width != 0) + return true; + if (!text_iterator_codepoint_prev(it, c)) + return false; + } + } + return true; +} + bool text_byte_get(Text *txt, size_t pos, char *buf) { return text_bytes_get(txt, pos, 1, buf); } @@ -84,7 +84,12 @@ bool text_iterator_byte_prev(Iterator*, char *b); /* if the new position is at EOF a NUL byte (which is not actually * part of the file) is read. */ bool text_iterator_byte_next(Iterator*, char *b); - +/* move to the next/previous UTF-8 encoded Unicode codepoint + * and set c (if it is non NULL) to the first byte */ +bool text_iterator_codepoint_next(Iterator *it, char *c); +bool text_iterator_codepoint_prev(Iterator *it, char *c); +/* move to next/previous grapheme i.e. might skip over multiple + * Unicode codepoints (e.g. for combining characters) */ bool text_iterator_char_next(Iterator*, char *c); bool text_iterator_char_prev(Iterator*, char *c); |
