From 0d9bbb74c6de959ab7c6b93b7a97f9f2e643e8e8 Mon Sep 17 00:00:00 2001 From: Randy Palamar Date: Fri, 5 Dec 2025 22:36:10 -0700 Subject: replace oversized libutf with smaller version this is taken from one of my other projects. there was no reason for there to be 2x the code tests checking for surrogate characters and non characters were removed. I see no reason why the user shouldn't be allowed to insert those characters in text (they exist in the standard). Also, in the case of non-characters only the first two were being checked and not the other 64. --- util.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 util.c (limited to 'util.c') diff --git a/util.c b/util.c new file mode 100644 index 0000000..202c750 --- /dev/null +++ b/util.c @@ -0,0 +1,28 @@ +static uint32_t +utf8_encode(uint8_t out[4], uint32_t cp) +{ + uint32_t result; + if (cp <= 0x7F) { + out[0] = cp & 0x7F; + result = 1; + } else if (cp <= 0x7FF) { + result = 2; + out[0] = ((cp >> 6) & 0x1F) | 0xC0; + out[1] = ((cp >> 0) & 0x3F) | 0x80; + } else if (cp <= 0xFFFF) { + result = 3; + out[0] = ((cp >> 12) & 0x0F) | 0xE0; + out[1] = ((cp >> 6) & 0x3F) | 0x80; + out[2] = ((cp >> 0) & 0x3F) | 0x80; + } else if (cp <= 0x10FFFF) { + result = 4; + out[0] = ((cp >> 18) & 0x07) | 0xF0; + out[1] = ((cp >> 12) & 0x3F) | 0x80; + out[2] = ((cp >> 6) & 0x3F) | 0x80; + out[3] = ((cp >> 0) & 0x3F) | 0x80; + } else { + //out[0] = '?'; + result = 0; + } + return result; +} -- cgit v1.2.3