|
|
ac3a84 |
From c48d12fc0abb8f113ed386c47bf02451ea8f853d Mon Sep 17 00:00:00 2001
|
|
|
ac3a84 |
From: Jan Janssen <medhefgo@web.de>
|
|
|
ac3a84 |
Date: Tue, 22 Nov 2022 15:55:07 +0100
|
|
|
ac3a84 |
Subject: [PATCH] boot: Add xstrn8_to_16
|
|
|
ac3a84 |
|
|
|
ac3a84 |
(cherry picked from commit 8ad7deffa95d33b5849ad6589dd52ab12e645edc)
|
|
|
ac3a84 |
|
|
|
ac3a84 |
Related: #2138081
|
|
|
ac3a84 |
---
|
|
|
ac3a84 |
src/boot/efi/efi-string.c | 78 +++++++++++++++++++++++++++++++++-
|
|
|
ac3a84 |
src/boot/efi/efi-string.h | 5 +++
|
|
|
ac3a84 |
src/boot/efi/test-efi-string.c | 27 ++++++++++++
|
|
|
ac3a84 |
3 files changed, 109 insertions(+), 1 deletion(-)
|
|
|
ac3a84 |
|
|
|
ac3a84 |
diff --git a/src/boot/efi/efi-string.c b/src/boot/efi/efi-string.c
|
|
|
ac3a84 |
index b877c6f224..2ba15673c9 100644
|
|
|
ac3a84 |
--- a/src/boot/efi/efi-string.c
|
|
|
ac3a84 |
+++ b/src/boot/efi/efi-string.c
|
|
|
ac3a84 |
@@ -9,7 +9,8 @@
|
|
|
ac3a84 |
# include "util.h"
|
|
|
ac3a84 |
#else
|
|
|
ac3a84 |
# include <stdlib.h>
|
|
|
ac3a84 |
-# include "macro.h"
|
|
|
ac3a84 |
+# include "alloc-util.h"
|
|
|
ac3a84 |
+# define xnew(t, n) ASSERT_SE_PTR(new(t, n))
|
|
|
ac3a84 |
# define xmalloc(n) ASSERT_SE_PTR(malloc(n))
|
|
|
ac3a84 |
#endif
|
|
|
ac3a84 |
|
|
|
ac3a84 |
@@ -138,6 +139,81 @@ DEFINE_STRCHR(char16_t, strchr16);
|
|
|
ac3a84 |
DEFINE_STRNDUP(char, xstrndup8, strnlen8);
|
|
|
ac3a84 |
DEFINE_STRNDUP(char16_t, xstrndup16, strnlen16);
|
|
|
ac3a84 |
|
|
|
ac3a84 |
+static unsigned utf8_to_unichar(const char *utf8, size_t n, char32_t *c) {
|
|
|
ac3a84 |
+ char32_t unichar;
|
|
|
ac3a84 |
+ unsigned len;
|
|
|
ac3a84 |
+
|
|
|
ac3a84 |
+ assert(utf8);
|
|
|
ac3a84 |
+ assert(c);
|
|
|
ac3a84 |
+
|
|
|
ac3a84 |
+ if (!(utf8[0] & 0x80)) {
|
|
|
ac3a84 |
+ *c = utf8[0];
|
|
|
ac3a84 |
+ return 1;
|
|
|
ac3a84 |
+ } else if ((utf8[0] & 0xe0) == 0xc0) {
|
|
|
ac3a84 |
+ len = 2;
|
|
|
ac3a84 |
+ unichar = utf8[0] & 0x1f;
|
|
|
ac3a84 |
+ } else if ((utf8[0] & 0xf0) == 0xe0) {
|
|
|
ac3a84 |
+ len = 3;
|
|
|
ac3a84 |
+ unichar = utf8[0] & 0x0f;
|
|
|
ac3a84 |
+ } else if ((utf8[0] & 0xf8) == 0xf0) {
|
|
|
ac3a84 |
+ len = 4;
|
|
|
ac3a84 |
+ unichar = utf8[0] & 0x07;
|
|
|
ac3a84 |
+ } else if ((utf8[0] & 0xfc) == 0xf8) {
|
|
|
ac3a84 |
+ len = 5;
|
|
|
ac3a84 |
+ unichar = utf8[0] & 0x03;
|
|
|
ac3a84 |
+ } else if ((utf8[0] & 0xfe) == 0xfc) {
|
|
|
ac3a84 |
+ len = 6;
|
|
|
ac3a84 |
+ unichar = utf8[0] & 0x01;
|
|
|
ac3a84 |
+ } else {
|
|
|
ac3a84 |
+ *c = UINT32_MAX;
|
|
|
ac3a84 |
+ return 1;
|
|
|
ac3a84 |
+ }
|
|
|
ac3a84 |
+
|
|
|
ac3a84 |
+ if (len > n) {
|
|
|
ac3a84 |
+ *c = UINT32_MAX;
|
|
|
ac3a84 |
+ return len;
|
|
|
ac3a84 |
+ }
|
|
|
ac3a84 |
+
|
|
|
ac3a84 |
+ for (unsigned i = 1; i < len; i++) {
|
|
|
ac3a84 |
+ if ((utf8[i] & 0xc0) != 0x80) {
|
|
|
ac3a84 |
+ *c = UINT32_MAX;
|
|
|
ac3a84 |
+ return len;
|
|
|
ac3a84 |
+ }
|
|
|
ac3a84 |
+ unichar <<= 6;
|
|
|
ac3a84 |
+ unichar |= utf8[i] & 0x3f;
|
|
|
ac3a84 |
+ }
|
|
|
ac3a84 |
+
|
|
|
ac3a84 |
+ *c = unichar;
|
|
|
ac3a84 |
+ return len;
|
|
|
ac3a84 |
+}
|
|
|
ac3a84 |
+
|
|
|
ac3a84 |
+/* Convert UTF-8 to UCS-2, skipping any invalid or short byte sequences. */
|
|
|
ac3a84 |
+char16_t *xstrn8_to_16(const char *str8, size_t n) {
|
|
|
ac3a84 |
+ if (!str8 || n == 0)
|
|
|
ac3a84 |
+ return NULL;
|
|
|
ac3a84 |
+
|
|
|
ac3a84 |
+ size_t i = 0;
|
|
|
ac3a84 |
+ char16_t *str16 = xnew(char16_t, n + 1);
|
|
|
ac3a84 |
+
|
|
|
ac3a84 |
+ while (n > 0 && *str8 != '\0') {
|
|
|
ac3a84 |
+ char32_t unichar;
|
|
|
ac3a84 |
+
|
|
|
ac3a84 |
+ size_t utf8len = utf8_to_unichar(str8, n, &unichar);
|
|
|
ac3a84 |
+ str8 += utf8len;
|
|
|
ac3a84 |
+ n = LESS_BY(n, utf8len);
|
|
|
ac3a84 |
+
|
|
|
ac3a84 |
+ switch (unichar) {
|
|
|
ac3a84 |
+ case 0 ... 0xd7ffU:
|
|
|
ac3a84 |
+ case 0xe000U ... 0xffffU:
|
|
|
ac3a84 |
+ str16[i++] = unichar;
|
|
|
ac3a84 |
+ break;
|
|
|
ac3a84 |
+ }
|
|
|
ac3a84 |
+ }
|
|
|
ac3a84 |
+
|
|
|
ac3a84 |
+ str16[i] = '\0';
|
|
|
ac3a84 |
+ return str16;
|
|
|
ac3a84 |
+}
|
|
|
ac3a84 |
+
|
|
|
ac3a84 |
static bool efi_fnmatch_prefix(const char16_t *p, const char16_t *h, const char16_t **ret_p, const char16_t **ret_h) {
|
|
|
ac3a84 |
assert(p);
|
|
|
ac3a84 |
assert(h);
|
|
|
ac3a84 |
diff --git a/src/boot/efi/efi-string.h b/src/boot/efi/efi-string.h
|
|
|
ac3a84 |
index 1ebd5fd6b7..9b2a9ad1c5 100644
|
|
|
ac3a84 |
--- a/src/boot/efi/efi-string.h
|
|
|
ac3a84 |
+++ b/src/boot/efi/efi-string.h
|
|
|
ac3a84 |
@@ -99,6 +99,11 @@ static inline char16_t *xstrdup16(const char16_t *s) {
|
|
|
ac3a84 |
return xstrndup16(s, SIZE_MAX);
|
|
|
ac3a84 |
}
|
|
|
ac3a84 |
|
|
|
ac3a84 |
+char16_t *xstrn8_to_16(const char *str8, size_t n);
|
|
|
ac3a84 |
+static inline char16_t *xstr8_to_16(const char *str8) {
|
|
|
ac3a84 |
+ return xstrn8_to_16(str8, strlen8(str8));
|
|
|
ac3a84 |
+}
|
|
|
ac3a84 |
+
|
|
|
ac3a84 |
bool efi_fnmatch(const char16_t *pattern, const char16_t *haystack);
|
|
|
ac3a84 |
|
|
|
ac3a84 |
bool parse_number8(const char *s, uint64_t *ret_u, const char **ret_tail);
|
|
|
ac3a84 |
diff --git a/src/boot/efi/test-efi-string.c b/src/boot/efi/test-efi-string.c
|
|
|
ac3a84 |
index 2b2359fe5c..7b43e1d629 100644
|
|
|
ac3a84 |
--- a/src/boot/efi/test-efi-string.c
|
|
|
ac3a84 |
+++ b/src/boot/efi/test-efi-string.c
|
|
|
ac3a84 |
@@ -324,6 +324,33 @@ TEST(xstrdup16) {
|
|
|
ac3a84 |
free(s);
|
|
|
ac3a84 |
}
|
|
|
ac3a84 |
|
|
|
ac3a84 |
+TEST(xstrn8_to_16) {
|
|
|
ac3a84 |
+ char16_t *s = NULL;
|
|
|
ac3a84 |
+
|
|
|
ac3a84 |
+ assert_se(xstrn8_to_16(NULL, 1) == NULL);
|
|
|
ac3a84 |
+ assert_se(xstrn8_to_16("a", 0) == NULL);
|
|
|
ac3a84 |
+
|
|
|
ac3a84 |
+ assert_se(s = xstrn8_to_16("", 1));
|
|
|
ac3a84 |
+ assert_se(streq16(s, u""));
|
|
|
ac3a84 |
+ free(s);
|
|
|
ac3a84 |
+
|
|
|
ac3a84 |
+ assert_se(s = xstrn8_to_16("1", 1));
|
|
|
ac3a84 |
+ assert_se(streq16(s, u"1"));
|
|
|
ac3a84 |
+ free(s);
|
|
|
ac3a84 |
+
|
|
|
ac3a84 |
+ assert_se(s = xstr8_to_16("abcxyzABCXYZ09 .,-_#*!\"§$%&/()=?`~"));
|
|
|
ac3a84 |
+ assert_se(streq16(s, u"abcxyzABCXYZ09 .,-_#*!\"§$%&/()=?`~"));
|
|
|
ac3a84 |
+ free(s);
|
|
|
ac3a84 |
+
|
|
|
ac3a84 |
+ assert_se(s = xstr8_to_16("ÿⱿ𝇉 😺"));
|
|
|
ac3a84 |
+ assert_se(streq16(s, u"ÿⱿ "));
|
|
|
ac3a84 |
+ free(s);
|
|
|
ac3a84 |
+
|
|
|
ac3a84 |
+ assert_se(s = xstrn8_to_16("¶¶", 3));
|
|
|
ac3a84 |
+ assert_se(streq16(s, u"¶"));
|
|
|
ac3a84 |
+ free(s);
|
|
|
ac3a84 |
+}
|
|
|
ac3a84 |
+
|
|
|
ac3a84 |
#define TEST_FNMATCH_ONE(pattern, haystack, expect) \
|
|
|
ac3a84 |
({ \
|
|
|
ac3a84 |
assert_se(fnmatch(pattern, haystack, 0) == (expect ? 0 : FNM_NOMATCH)); \
|