From 0d9bbb74c6de959ab7c6b93b7a97f9f2e643e8e8 Mon Sep 17 00:00:00 2001
From: Randy Palamar <randy@rnpnr.xyz>
Date: Fri, 5 Dec 2025 22:36:10 -0700
Subject: replace oversized libutf with smaller version

this is taken from one of my other projects. there was no reason
for there to be 2x the code

tests checking for surrogate characters and non characters were
removed. I see no reason why the user shouldn't be allowed to
insert those characters in text (they exist in the standard).
Also, in the case of non-characters only the first two were being
checked and not the other 64.
---
 util.c | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)
 create mode 100644 util.c

(limited to 'util.c')

diff --git a/util.c b/util.c
new file mode 100644
index 0000000..202c750
--- /dev/null
+++ b/util.c
@@ -0,0 +1,28 @@
+static uint32_t
+utf8_encode(uint8_t out[4], uint32_t cp)
+{
+	uint32_t result;
+	if (cp <= 0x7F) {
+		out[0] = cp & 0x7F;
+		result = 1;
+	} else if (cp <= 0x7FF) {
+		result = 2;
+		out[0] = ((cp >>  6) & 0x1F) | 0xC0;
+		out[1] = ((cp >>  0) & 0x3F) | 0x80;
+	} else if (cp <= 0xFFFF) {
+		result = 3;
+		out[0] = ((cp >> 12) & 0x0F) | 0xE0;
+		out[1] = ((cp >>  6) & 0x3F) | 0x80;
+		out[2] = ((cp >>  0) & 0x3F) | 0x80;
+	} else if (cp <= 0x10FFFF) {
+		result = 4;
+		out[0] = ((cp >> 18) & 0x07) | 0xF0;
+		out[1] = ((cp >> 12) & 0x3F) | 0x80;
+		out[2] = ((cp >>  6) & 0x3F) | 0x80;
+		out[3] = ((cp >>  0) & 0x3F) | 0x80;
+	} else {
+		//out[0] = '?';
+		result = 0;
+	}
+	return result;
+}
-- 
cgit v1.2.3