diff options
| author | Randy Palamar <randy@rnpnr.xyz> | 2025-12-05 22:36:10 -0700 |
|---|---|---|
| committer | Randy Palamar <randy@rnpnr.xyz> | 2026-01-06 16:52:56 -0700 |
| commit | 0d9bbb74c6de959ab7c6b93b7a97f9f2e643e8e8 (patch) | |
| tree | 25596fd04e3623571a155e9c2b1e2503aa9dd4f6 | |
| parent | 6ced61ef5f366001877823ed8aff978035fa53c8 (diff) | |
| download | vis-0d9bbb74c6de959ab7c6b93b7a97f9f2e643e8e8.tar.gz vis-0d9bbb74c6de959ab7c6b93b7a97f9f2e643e8e8.tar.xz | |
replace oversized libutf with smaller version
this is taken from one of my other projects. there was no reason
for there to be 2x the code
tests checking for surrogate characters and non characters were
removed. I see no reason why the user shouldn't be allowed to
insert those characters in text (they exist in the standard).
Also, in the case of non-characters only the first two were being
checked and not the other 64.
| -rw-r--r-- | LICENSE | 3 | ||||
| -rw-r--r-- | libutf.c | 54 | ||||
| -rw-r--r-- | libutf.h | 34 | ||||
| -rw-r--r-- | main.c | 18 | ||||
| -rw-r--r-- | test/vis/insert-mode/verbatim.in | 2 | ||||
| -rw-r--r-- | test/vis/insert-mode/verbatim.keys | 4 | ||||
| -rw-r--r-- | test/vis/insert-mode/verbatim.ref | 2 | ||||
| -rw-r--r-- | util.c | 28 | ||||
| -rw-r--r-- | vis.c | 19 | ||||
| -rw-r--r-- | vis.h | 3 |
10 files changed, 49 insertions, 118 deletions
@@ -23,9 +23,6 @@ under terms compatible with the above ISC license: - map.[ch] originate from the Comprehensive C Archive Network strmap module and are public domain / CC0 licensed - - libutf.[ch] originate from libutf a port of Plan 9's Unicode library - to Unix and are MIT licensed - - sam.[ch] is heavily inspired (and partially based upon) the X11 version of Rob Pike's sam text editor originally written for Plan 9 and distributed under an ISC-like license diff --git a/libutf.c b/libutf.c deleted file mode 100644 index 108595e..0000000 --- a/libutf.c +++ /dev/null @@ -1,54 +0,0 @@ -/* libutf8 © 2012-2015 Connor Lane Smith <cls@lubutu.com> */ -#include "util.h" - -#include "libutf.h" - -int -runelen(Rune r) -{ - if(r <= 0x7F) - return 1; - else if(r <= 0x07FF) - return 2; - else if(r <= 0xD7FF) - return 3; - else if(r <= 0xDFFF) - return 0; /* surrogate character */ - else if(r <= 0xFFFD) - return 3; - else if(r <= 0xFFFF) - return 0; /* illegal character */ - else if(r <= Runemax) - return 4; - else - return 0; /* rune too large */ -} - -int -runetochar(char *s, const Rune *p) -{ - Rune r = *p; - - switch(runelen(r)) { - case 1: /* 0aaaaaaa */ - s[0] = r; - return 1; - case 2: /* 00000aaa aabbbbbb */ - s[0] = 0xC0 | ((r & 0x0007C0) >> 6); /* 110aaaaa */ - s[1] = 0x80 | (r & 0x00003F); /* 10bbbbbb */ - return 2; - case 3: /* aaaabbbb bbcccccc */ - s[0] = 0xE0 | ((r & 0x00F000) >> 12); /* 1110aaaa */ - s[1] = 0x80 | ((r & 0x000FC0) >> 6); /* 10bbbbbb */ - s[2] = 0x80 | (r & 0x00003F); /* 10cccccc */ - return 3; - case 4: /* 000aaabb bbbbcccc ccdddddd */ - s[0] = 0xF0 | ((r & 0x1C0000) >> 18); /* 11110aaa */ - s[1] = 0x80 | ((r & 0x03F000) >> 12); /* 10bbbbbb */ - s[2] = 0x80 | ((r & 0x000FC0) >> 6); /* 10cccccc */ - s[3] = 0x80 | (r & 0x00003F); /* 10dddddd */ - return 4; - default: - return 0; /* error */ - } -} diff --git a/libutf.h b/libutf.h deleted file mode 100644 index 30255cc..0000000 --- a/libutf.h +++ /dev/null @@ -1,34 +0,0 @@ -#ifndef LIBUTF_H -#define LIBUTF_H - -/* libutf8 © 2012-2015 Connor Lane Smith <cls@lubutu.com> */ -#include <stddef.h> -#include <stdint.h> - -#if __STDC_VERSION__ >= 201112L -#include <uchar.h> -#ifdef __STDC_UTF_32__ -#define RUNE_C INT32_C -typedef char32_t Rune; -#endif -#endif - -#ifndef RUNE_C -#ifdef INT32_C -#define RUNE_C INT32_C -typedef uint_least32_t Rune; -#else -#define RUNE_C(x) x##L -typedef unsigned long Rune; -#endif -#endif - -#define UTFmax 4 /* maximum bytes per rune */ - -#define Runeself 0x80 /* rune and utf are equal (<) */ -#define Runemax RUNE_C(0x10FFFF) /* maximum rune value */ - -VIS_INTERNAL int runelen(Rune r); -VIS_INTERNAL int runetochar(char *s, const Rune *p); - -#endif @@ -844,7 +844,7 @@ static KEY_ACTION_FN(ka_replace) if (!next) return NULL; - char replacement[UTFmax+1]; + char replacement[4+1]; if (!vis_keys_utf8(vis, keys, replacement)) return next; @@ -897,7 +897,7 @@ static KEY_ACTION_FN(ka_movement_key) const char *next = vis_keys_next(vis, keys); if (!next) return NULL; - char utf8[UTFmax+1]; + char utf8[4+1]; if (vis_keys_utf8(vis, keys, utf8)) vis_motion(vis, arg->i, utf8); return next; @@ -1030,8 +1030,8 @@ static KEY_ACTION_FN(ka_prompt_show) static KEY_ACTION_FN(ka_insert_verbatim) { - Rune rune = 0; - char buf[4], type = keys[0]; + uint32_t rune = 0; + unsigned char buf[4], type = keys[0]; const char *data = NULL; int len = 0, count = 0, base = 0; switch (type) { @@ -1084,22 +1084,22 @@ static KEY_ACTION_FN(ka_insert_verbatim) if (count > 0) return NULL; if (type == 'u' || type == 'U') { - len = runetochar(buf, &rune); + len = utf8_encode(buf, rune); } else { buf[0] = rune; len = 1; } - data = buf; + data = (char *)buf; } else { const char *next = vis_keys_next(vis, keys); if (!next) return NULL; - if ((rune = vis_keys_codepoint(vis, keys)) != (Rune)-1) { - len = runetochar(buf, &rune); + if ((rune = vis_keys_codepoint(vis, keys)) != -1) { + len = utf8_encode(buf, rune); if (buf[0] == '\n') buf[0] = '\r'; - data = buf; + data = (char *)buf; } else { vis_info_show(vis, "Unknown key"); } diff --git a/test/vis/insert-mode/verbatim.in b/test/vis/insert-mode/verbatim.in index 6fcb11a..da2d1b4 100644 --- a/test/vis/insert-mode/verbatim.in +++ b/test/vis/insert-mode/verbatim.in @@ -13,9 +13,7 @@ O100 = 64 = U+07FF = U+D7FF = -U+DFFF = /* not really correct */ U+FFFD = -U+FFFF = /* not really correct */ U+10FFFF = U+11000 = /* invalid */ <Escape> = diff --git a/test/vis/insert-mode/verbatim.keys b/test/vis/insert-mode/verbatim.keys index ba71c21..0441e3b 100644 --- a/test/vis/insert-mode/verbatim.keys +++ b/test/vis/insert-mode/verbatim.keys @@ -21,12 +21,8 @@ a<Space><C-v>u07FF<Escape> n a<Space><C-v>uD7FF<Escape> n -a<Space><C-v>uDFFF<Escape> -n a<Space><C-v>uFFFD<Escape> n -a<Space><C-v>uFFFF<Escape> -n a<Space><C-v>U0010FFFF<Escape> n a<Space><C-v>U00110000<Escape> diff --git a/test/vis/insert-mode/verbatim.ref b/test/vis/insert-mode/verbatim.ref index 28a5c8c..98494b0 100644 --- a/test/vis/insert-mode/verbatim.ref +++ b/test/vis/insert-mode/verbatim.ref @@ -13,9 +13,7 @@ O100 = @ 64 = @ U+07FF = ߿ U+D7FF = -U+DFFF = /* not really correct */ U+FFFD = � -U+FFFF = /* not really correct */ U+10FFFF = U+11000 = /* invalid */ <Escape> = @@ -0,0 +1,28 @@ +static uint32_t +utf8_encode(uint8_t out[4], uint32_t cp) +{ + uint32_t result; + if (cp <= 0x7F) { + out[0] = cp & 0x7F; + result = 1; + } else if (cp <= 0x7FF) { + result = 2; + out[0] = ((cp >> 6) & 0x1F) | 0xC0; + out[1] = ((cp >> 0) & 0x3F) | 0x80; + } else if (cp <= 0xFFFF) { + result = 3; + out[0] = ((cp >> 12) & 0x0F) | 0xE0; + out[1] = ((cp >> 6) & 0x3F) | 0x80; + out[2] = ((cp >> 0) & 0x3F) | 0x80; + } else if (cp <= 0x10FFFF) { + result = 4; + out[0] = ((cp >> 18) & 0x07) | 0xF0; + out[1] = ((cp >> 12) & 0x3F) | 0x80; + out[2] = ((cp >> 6) & 0x3F) | 0x80; + out[3] = ((cp >> 0) & 0x3F) | 0x80; + } else { + //out[0] = '?'; + result = 0; + } + return result; +} @@ -11,10 +11,11 @@ #include "ui.h" #include "vis-subprocess.h" +#include "util.c" + #include "array.c" #include "buffer.c" #include "event-basic.c" -#include "libutf.c" #include "map.c" #include "sam.c" #include "text.c" @@ -1017,13 +1018,15 @@ long vis_keys_codepoint(Vis *vis, const char *keys) { return -1; } -bool vis_keys_utf8(Vis *vis, const char *keys, char utf8[static UTFmax+1]) { - Rune rune = vis_keys_codepoint(vis, keys); - if (rune == (Rune)-1) - return false; - size_t len = runetochar(utf8, &rune); - utf8[len] = '\0'; - return true; +bool vis_keys_utf8(Vis *vis, const char *keys, char utf8[4+1]) +{ + uint32_t cp = vis_keys_codepoint(vis, keys); + bool result = cp != -1; + if (result) { + size_t len = utf8_encode((unsigned char *)utf8, cp); + utf8[len] = 0; + } + return result; } typedef struct { @@ -16,7 +16,6 @@ typedef struct Win Win; #include "ui.h" #include "view.h" #include "text-regex.h" -#include "libutf.h" #include "array.h" #include "buffer.h" @@ -1326,7 +1325,7 @@ VIS_EXPORT long vis_keys_codepoint(Vis *vis, const char *keys); * .. note:: Guarantees that ``utf8`` is NUL terminated on success. * @endrst */ -VIS_EXPORT bool vis_keys_utf8(Vis *vis, const char *keys, char utf8[static UTFmax+1]); +VIS_EXPORT bool vis_keys_utf8(Vis *vis, const char *keys, char utf8[4+1]); /** * Process symbolic keys as if they were user originated input. * @param vis The editor instance. |
