aboutsummaryrefslogtreecommitdiff
path: root/util.c
diff options
context:
space:
mode:
authorRandy Palamar <randy@rnpnr.xyz>2025-12-05 22:36:10 -0700
committerRandy Palamar <randy@rnpnr.xyz>2026-01-06 16:52:56 -0700
commit0d9bbb74c6de959ab7c6b93b7a97f9f2e643e8e8 (patch)
tree25596fd04e3623571a155e9c2b1e2503aa9dd4f6 /util.c
parent6ced61ef5f366001877823ed8aff978035fa53c8 (diff)
downloadvis-0d9bbb74c6de959ab7c6b93b7a97f9f2e643e8e8.tar.gz
vis-0d9bbb74c6de959ab7c6b93b7a97f9f2e643e8e8.tar.xz
replace oversized libutf with smaller version
this is taken from one of my other projects. there was no reason for there to be 2x the code tests checking for surrogate characters and non characters were removed. I see no reason why the user shouldn't be allowed to insert those characters in text (they exist in the standard). Also, in the case of non-characters only the first two were being checked and not the other 64.
Diffstat (limited to 'util.c')
-rw-r--r--util.c28
1 files changed, 28 insertions, 0 deletions
diff --git a/util.c b/util.c
new file mode 100644
index 0000000..202c750
--- /dev/null
+++ b/util.c
@@ -0,0 +1,28 @@
+static uint32_t
+utf8_encode(uint8_t out[4], uint32_t cp)
+{
+ uint32_t result;
+ if (cp <= 0x7F) {
+ out[0] = cp & 0x7F;
+ result = 1;
+ } else if (cp <= 0x7FF) {
+ result = 2;
+ out[0] = ((cp >> 6) & 0x1F) | 0xC0;
+ out[1] = ((cp >> 0) & 0x3F) | 0x80;
+ } else if (cp <= 0xFFFF) {
+ result = 3;
+ out[0] = ((cp >> 12) & 0x0F) | 0xE0;
+ out[1] = ((cp >> 6) & 0x3F) | 0x80;
+ out[2] = ((cp >> 0) & 0x3F) | 0x80;
+ } else if (cp <= 0x10FFFF) {
+ result = 4;
+ out[0] = ((cp >> 18) & 0x07) | 0xF0;
+ out[1] = ((cp >> 12) & 0x3F) | 0x80;
+ out[2] = ((cp >> 6) & 0x3F) | 0x80;
+ out[3] = ((cp >> 0) & 0x3F) | 0x80;
+ } else {
+ //out[0] = '?';
+ result = 0;
+ }
+ return result;
+}