|
|
a4b143 |
From 1bd14ab0b60de49c8546f22d90996b95423b9eeb Mon Sep 17 00:00:00 2001
|
|
|
a4b143 |
From: Dave Reisner <dreisner@archlinux.org>
|
|
|
a4b143 |
Date: Tue, 17 Sep 2013 15:39:09 -0400
|
|
|
a4b143 |
Subject: [PATCH] move utf8 functions from libudev-private.h to utf8.h
|
|
|
a4b143 |
|
|
|
a4b143 |
There's now some more obvious overlap amongst the two utf8 validation
|
|
|
a4b143 |
functions, but no more than there already was previously.
|
|
|
a4b143 |
|
|
|
a4b143 |
This also adds some menial tests for anyone who wants to do more
|
|
|
a4b143 |
merging of these two in the future.
|
|
|
a4b143 |
---
|
|
|
a4b143 |
.gitignore | 2 +
|
|
|
a4b143 |
Makefile.am | 10 +++
|
|
|
a4b143 |
src/libudev/libudev-util.c | 171 +--------------------------------------------
|
|
|
a4b143 |
src/shared/utf8.c | 151 +++++++++++++++++++++++++++++++++++++++
|
|
|
a4b143 |
src/shared/utf8.h | 4 ++
|
|
|
a4b143 |
src/test/test-utf8.c | 59 ++++++++++++++++
|
|
|
a4b143 |
6 files changed, 229 insertions(+), 168 deletions(-)
|
|
|
a4b143 |
create mode 100644 src/test/test-utf8.c
|
|
|
a4b143 |
|
|
|
a4b143 |
diff --git a/.gitignore b/.gitignore
|
|
|
a4b143 |
index 61bc2a3..deeee53 100644
|
|
|
a4b143 |
--- a/.gitignore
|
|
|
a4b143 |
+++ b/.gitignore
|
|
|
a4b143 |
@@ -124,6 +124,7 @@
|
|
|
a4b143 |
/test-list
|
|
|
a4b143 |
/test-log
|
|
|
a4b143 |
/test-login
|
|
|
a4b143 |
+/test-login-shared
|
|
|
a4b143 |
/test-loopback
|
|
|
a4b143 |
/test-mmap-cache
|
|
|
a4b143 |
/test-ns
|
|
|
a4b143 |
@@ -143,6 +144,7 @@
|
|
|
a4b143 |
/test-udev
|
|
|
a4b143 |
/test-unit-file
|
|
|
a4b143 |
/test-unit-name
|
|
|
a4b143 |
+/test-utf8
|
|
|
a4b143 |
/test-util
|
|
|
a4b143 |
/test-watchdog
|
|
|
a4b143 |
/timedatectl
|
|
|
a4b143 |
diff --git a/Makefile.am b/Makefile.am
|
|
|
a4b143 |
index 25bfd91..6dd33ad 100644
|
|
|
a4b143 |
--- a/Makefile.am
|
|
|
a4b143 |
+++ b/Makefile.am
|
|
|
a4b143 |
@@ -1121,6 +1121,7 @@ tests += \
|
|
|
a4b143 |
test-strxcpyx \
|
|
|
a4b143 |
test-unit-name \
|
|
|
a4b143 |
test-unit-file \
|
|
|
a4b143 |
+ test-utf8 \
|
|
|
a4b143 |
test-util \
|
|
|
a4b143 |
test-date \
|
|
|
a4b143 |
test-sleep \
|
|
|
a4b143 |
@@ -1223,6 +1224,15 @@ test_unit_file_CFLAGS = \
|
|
|
a4b143 |
test_unit_file_LDADD = \
|
|
|
a4b143 |
libsystemd-core.la
|
|
|
a4b143 |
|
|
|
a4b143 |
+test_utf8_SOURCES = \
|
|
|
a4b143 |
+ src/test/test-utf8.c
|
|
|
a4b143 |
+
|
|
|
a4b143 |
+test_utf8_CFLAGS = \
|
|
|
a4b143 |
+ $(AM_CFLAGS)
|
|
|
a4b143 |
+
|
|
|
a4b143 |
+test_utf8_LDADD = \
|
|
|
a4b143 |
+ libsystemd-shared.la
|
|
|
a4b143 |
+
|
|
|
a4b143 |
test_util_SOURCES = \
|
|
|
a4b143 |
src/test/test-util.c
|
|
|
a4b143 |
|
|
|
a4b143 |
diff --git a/src/libudev/libudev-util.c b/src/libudev/libudev-util.c
|
|
|
a4b143 |
index 714dc50..d54430c 100644
|
|
|
a4b143 |
--- a/src/libudev/libudev-util.c
|
|
|
a4b143 |
+++ b/src/libudev/libudev-util.c
|
|
|
a4b143 |
@@ -34,6 +34,7 @@
|
|
|
a4b143 |
|
|
|
a4b143 |
#include "libudev.h"
|
|
|
a4b143 |
#include "libudev-private.h"
|
|
|
a4b143 |
+#include "utf8.h"
|
|
|
a4b143 |
|
|
|
a4b143 |
/**
|
|
|
a4b143 |
* SECTION:libudev-util
|
|
|
a4b143 |
@@ -306,129 +307,6 @@ void util_remove_trailing_chars(char *path, char c)
|
|
|
a4b143 |
path[--len] = '\0';
|
|
|
a4b143 |
}
|
|
|
a4b143 |
|
|
|
a4b143 |
-/* count of characters used to encode one unicode char */
|
|
|
a4b143 |
-static int utf8_encoded_expected_len(const char *str)
|
|
|
a4b143 |
-{
|
|
|
a4b143 |
- unsigned char c = (unsigned char)str[0];
|
|
|
a4b143 |
-
|
|
|
a4b143 |
- if (c < 0x80)
|
|
|
a4b143 |
- return 1;
|
|
|
a4b143 |
- if ((c & 0xe0) == 0xc0)
|
|
|
a4b143 |
- return 2;
|
|
|
a4b143 |
- if ((c & 0xf0) == 0xe0)
|
|
|
a4b143 |
- return 3;
|
|
|
a4b143 |
- if ((c & 0xf8) == 0xf0)
|
|
|
a4b143 |
- return 4;
|
|
|
a4b143 |
- if ((c & 0xfc) == 0xf8)
|
|
|
a4b143 |
- return 5;
|
|
|
a4b143 |
- if ((c & 0xfe) == 0xfc)
|
|
|
a4b143 |
- return 6;
|
|
|
a4b143 |
- return 0;
|
|
|
a4b143 |
-}
|
|
|
a4b143 |
-
|
|
|
a4b143 |
-/* decode one unicode char */
|
|
|
a4b143 |
-static int utf8_encoded_to_unichar(const char *str)
|
|
|
a4b143 |
-{
|
|
|
a4b143 |
- int unichar;
|
|
|
a4b143 |
- int len;
|
|
|
a4b143 |
- int i;
|
|
|
a4b143 |
-
|
|
|
a4b143 |
- len = utf8_encoded_expected_len(str);
|
|
|
a4b143 |
- switch (len) {
|
|
|
a4b143 |
- case 1:
|
|
|
a4b143 |
- return (int)str[0];
|
|
|
a4b143 |
- case 2:
|
|
|
a4b143 |
- unichar = str[0] & 0x1f;
|
|
|
a4b143 |
- break;
|
|
|
a4b143 |
- case 3:
|
|
|
a4b143 |
- unichar = (int)str[0] & 0x0f;
|
|
|
a4b143 |
- break;
|
|
|
a4b143 |
- case 4:
|
|
|
a4b143 |
- unichar = (int)str[0] & 0x07;
|
|
|
a4b143 |
- break;
|
|
|
a4b143 |
- case 5:
|
|
|
a4b143 |
- unichar = (int)str[0] & 0x03;
|
|
|
a4b143 |
- break;
|
|
|
a4b143 |
- case 6:
|
|
|
a4b143 |
- unichar = (int)str[0] & 0x01;
|
|
|
a4b143 |
- break;
|
|
|
a4b143 |
- default:
|
|
|
a4b143 |
- return -1;
|
|
|
a4b143 |
- }
|
|
|
a4b143 |
-
|
|
|
a4b143 |
- for (i = 1; i < len; i++) {
|
|
|
a4b143 |
- if (((int)str[i] & 0xc0) != 0x80)
|
|
|
a4b143 |
- return -1;
|
|
|
a4b143 |
- unichar <<= 6;
|
|
|
a4b143 |
- unichar |= (int)str[i] & 0x3f;
|
|
|
a4b143 |
- }
|
|
|
a4b143 |
-
|
|
|
a4b143 |
- return unichar;
|
|
|
a4b143 |
-}
|
|
|
a4b143 |
-
|
|
|
a4b143 |
-/* expected size used to encode one unicode char */
|
|
|
a4b143 |
-static int utf8_unichar_to_encoded_len(int unichar)
|
|
|
a4b143 |
-{
|
|
|
a4b143 |
- if (unichar < 0x80)
|
|
|
a4b143 |
- return 1;
|
|
|
a4b143 |
- if (unichar < 0x800)
|
|
|
a4b143 |
- return 2;
|
|
|
a4b143 |
- if (unichar < 0x10000)
|
|
|
a4b143 |
- return 3;
|
|
|
a4b143 |
- if (unichar < 0x200000)
|
|
|
a4b143 |
- return 4;
|
|
|
a4b143 |
- if (unichar < 0x4000000)
|
|
|
a4b143 |
- return 5;
|
|
|
a4b143 |
- return 6;
|
|
|
a4b143 |
-}
|
|
|
a4b143 |
-
|
|
|
a4b143 |
-/* check if unicode char has a valid numeric range */
|
|
|
a4b143 |
-static int utf8_unichar_valid_range(int unichar)
|
|
|
a4b143 |
-{
|
|
|
a4b143 |
- if (unichar > 0x10ffff)
|
|
|
a4b143 |
- return 0;
|
|
|
a4b143 |
- if ((unichar & 0xfffff800) == 0xd800)
|
|
|
a4b143 |
- return 0;
|
|
|
a4b143 |
- if ((unichar > 0xfdcf) && (unichar < 0xfdf0))
|
|
|
a4b143 |
- return 0;
|
|
|
a4b143 |
- if ((unichar & 0xffff) == 0xffff)
|
|
|
a4b143 |
- return 0;
|
|
|
a4b143 |
- return 1;
|
|
|
a4b143 |
-}
|
|
|
a4b143 |
-
|
|
|
a4b143 |
-/* validate one encoded unicode char and return its length */
|
|
|
a4b143 |
-static int utf8_encoded_valid_unichar(const char *str)
|
|
|
a4b143 |
-{
|
|
|
a4b143 |
- int len;
|
|
|
a4b143 |
- int unichar;
|
|
|
a4b143 |
- int i;
|
|
|
a4b143 |
-
|
|
|
a4b143 |
- len = utf8_encoded_expected_len(str);
|
|
|
a4b143 |
- if (len == 0)
|
|
|
a4b143 |
- return -1;
|
|
|
a4b143 |
-
|
|
|
a4b143 |
- /* ascii is valid */
|
|
|
a4b143 |
- if (len == 1)
|
|
|
a4b143 |
- return 1;
|
|
|
a4b143 |
-
|
|
|
a4b143 |
- /* check if expected encoded chars are available */
|
|
|
a4b143 |
- for (i = 0; i < len; i++)
|
|
|
a4b143 |
- if ((str[i] & 0x80) != 0x80)
|
|
|
a4b143 |
- return -1;
|
|
|
a4b143 |
-
|
|
|
a4b143 |
- unichar = utf8_encoded_to_unichar(str);
|
|
|
a4b143 |
-
|
|
|
a4b143 |
- /* check if encoded length matches encoded value */
|
|
|
a4b143 |
- if (utf8_unichar_to_encoded_len(unichar) != len)
|
|
|
a4b143 |
- return -1;
|
|
|
a4b143 |
-
|
|
|
a4b143 |
- /* check if value has valid range */
|
|
|
a4b143 |
- if (!utf8_unichar_valid_range(unichar))
|
|
|
a4b143 |
- return -1;
|
|
|
a4b143 |
-
|
|
|
a4b143 |
- return len;
|
|
|
a4b143 |
-}
|
|
|
a4b143 |
-
|
|
|
a4b143 |
int util_replace_whitespace(const char *str, char *to, size_t len)
|
|
|
a4b143 |
{
|
|
|
a4b143 |
size_t i, j;
|
|
|
a4b143 |
@@ -457,17 +335,6 @@ int util_replace_whitespace(const char *str, char *to, size_t len)
|
|
|
a4b143 |
return 0;
|
|
|
a4b143 |
}
|
|
|
a4b143 |
|
|
|
a4b143 |
-static int is_whitelisted(char c, const char *white)
|
|
|
a4b143 |
-{
|
|
|
a4b143 |
- if ((c >= '0' && c <= '9') ||
|
|
|
a4b143 |
- (c >= 'A' && c <= 'Z') ||
|
|
|
a4b143 |
- (c >= 'a' && c <= 'z') ||
|
|
|
a4b143 |
- strchr("#+-.:=@_", c) != NULL ||
|
|
|
a4b143 |
- (white != NULL && strchr(white, c) != NULL))
|
|
|
a4b143 |
- return 1;
|
|
|
a4b143 |
- return 0;
|
|
|
a4b143 |
-}
|
|
|
a4b143 |
-
|
|
|
a4b143 |
/* allow chars in whitelist, plain ascii, hex-escaping and valid utf8 */
|
|
|
a4b143 |
int util_replace_chars(char *str, const char *white)
|
|
|
a4b143 |
{
|
|
|
a4b143 |
@@ -477,7 +344,7 @@ int util_replace_chars(char *str, const char *white)
|
|
|
a4b143 |
while (str[i] != '\0') {
|
|
|
a4b143 |
int len;
|
|
|
a4b143 |
|
|
|
a4b143 |
- if (is_whitelisted(str[i], white)) {
|
|
|
a4b143 |
+ if (is_utf8_encoding_whitelisted(str[i], white)) {
|
|
|
a4b143 |
i++;
|
|
|
a4b143 |
continue;
|
|
|
a4b143 |
}
|
|
|
a4b143 |
@@ -525,39 +392,7 @@ int util_replace_chars(char *str, const char *white)
|
|
|
a4b143 |
**/
|
|
|
a4b143 |
_public_ int udev_util_encode_string(const char *str, char *str_enc, size_t len)
|
|
|
a4b143 |
{
|
|
|
a4b143 |
- size_t i, j;
|
|
|
a4b143 |
-
|
|
|
a4b143 |
- if (str == NULL || str_enc == NULL)
|
|
|
a4b143 |
- return -1;
|
|
|
a4b143 |
-
|
|
|
a4b143 |
- for (i = 0, j = 0; str[i] != '\0'; i++) {
|
|
|
a4b143 |
- int seqlen;
|
|
|
a4b143 |
-
|
|
|
a4b143 |
- seqlen = utf8_encoded_valid_unichar(&str[i]);
|
|
|
a4b143 |
- if (seqlen > 1) {
|
|
|
a4b143 |
- if (len-j < (size_t)seqlen)
|
|
|
a4b143 |
- goto err;
|
|
|
a4b143 |
- memcpy(&str_enc[j], &str[i], seqlen);
|
|
|
a4b143 |
- j += seqlen;
|
|
|
a4b143 |
- i += (seqlen-1);
|
|
|
a4b143 |
- } else if (str[i] == '\\' || !is_whitelisted(str[i], NULL)) {
|
|
|
a4b143 |
- if (len-j < 4)
|
|
|
a4b143 |
- goto err;
|
|
|
a4b143 |
- sprintf(&str_enc[j], "\\x%02x", (unsigned char) str[i]);
|
|
|
a4b143 |
- j += 4;
|
|
|
a4b143 |
- } else {
|
|
|
a4b143 |
- if (len-j < 1)
|
|
|
a4b143 |
- goto err;
|
|
|
a4b143 |
- str_enc[j] = str[i];
|
|
|
a4b143 |
- j++;
|
|
|
a4b143 |
- }
|
|
|
a4b143 |
- }
|
|
|
a4b143 |
- if (len-j < 1)
|
|
|
a4b143 |
- goto err;
|
|
|
a4b143 |
- str_enc[j] = '\0';
|
|
|
a4b143 |
- return 0;
|
|
|
a4b143 |
-err:
|
|
|
a4b143 |
- return -1;
|
|
|
a4b143 |
+ return udev_encode_string(str, str_enc, len);
|
|
|
a4b143 |
}
|
|
|
a4b143 |
|
|
|
a4b143 |
/*
|
|
|
a4b143 |
diff --git a/src/shared/utf8.c b/src/shared/utf8.c
|
|
|
a4b143 |
index 655cc77..1a68394 100644
|
|
|
a4b143 |
--- a/src/shared/utf8.c
|
|
|
a4b143 |
+++ b/src/shared/utf8.c
|
|
|
a4b143 |
@@ -317,3 +317,154 @@ char *utf16_to_utf8(const void *s, size_t length) {
|
|
|
a4b143 |
|
|
|
a4b143 |
return r;
|
|
|
a4b143 |
}
|
|
|
a4b143 |
+
|
|
|
a4b143 |
+/* count of characters used to encode one unicode char */
|
|
|
a4b143 |
+static int utf8_encoded_expected_len(const char *str) {
|
|
|
a4b143 |
+ unsigned char c = (unsigned char)str[0];
|
|
|
a4b143 |
+
|
|
|
a4b143 |
+ if (c < 0x80)
|
|
|
a4b143 |
+ return 1;
|
|
|
a4b143 |
+ if ((c & 0xe0) == 0xc0)
|
|
|
a4b143 |
+ return 2;
|
|
|
a4b143 |
+ if ((c & 0xf0) == 0xe0)
|
|
|
a4b143 |
+ return 3;
|
|
|
a4b143 |
+ if ((c & 0xf8) == 0xf0)
|
|
|
a4b143 |
+ return 4;
|
|
|
a4b143 |
+ if ((c & 0xfc) == 0xf8)
|
|
|
a4b143 |
+ return 5;
|
|
|
a4b143 |
+ if ((c & 0xfe) == 0xfc)
|
|
|
a4b143 |
+ return 6;
|
|
|
a4b143 |
+ return 0;
|
|
|
a4b143 |
+}
|
|
|
a4b143 |
+
|
|
|
a4b143 |
+/* decode one unicode char */
|
|
|
a4b143 |
+static int utf8_encoded_to_unichar(const char *str) {
|
|
|
a4b143 |
+ int unichar;
|
|
|
a4b143 |
+ int len;
|
|
|
a4b143 |
+ int i;
|
|
|
a4b143 |
+
|
|
|
a4b143 |
+ len = utf8_encoded_expected_len(str);
|
|
|
a4b143 |
+ switch (len) {
|
|
|
a4b143 |
+ case 1:
|
|
|
a4b143 |
+ return (int)str[0];
|
|
|
a4b143 |
+ case 2:
|
|
|
a4b143 |
+ unichar = str[0] & 0x1f;
|
|
|
a4b143 |
+ break;
|
|
|
a4b143 |
+ case 3:
|
|
|
a4b143 |
+ unichar = (int)str[0] & 0x0f;
|
|
|
a4b143 |
+ break;
|
|
|
a4b143 |
+ case 4:
|
|
|
a4b143 |
+ unichar = (int)str[0] & 0x07;
|
|
|
a4b143 |
+ break;
|
|
|
a4b143 |
+ case 5:
|
|
|
a4b143 |
+ unichar = (int)str[0] & 0x03;
|
|
|
a4b143 |
+ break;
|
|
|
a4b143 |
+ case 6:
|
|
|
a4b143 |
+ unichar = (int)str[0] & 0x01;
|
|
|
a4b143 |
+ break;
|
|
|
a4b143 |
+ default:
|
|
|
a4b143 |
+ return -1;
|
|
|
a4b143 |
+ }
|
|
|
a4b143 |
+
|
|
|
a4b143 |
+ for (i = 1; i < len; i++) {
|
|
|
a4b143 |
+ if (((int)str[i] & 0xc0) != 0x80)
|
|
|
a4b143 |
+ return -1;
|
|
|
a4b143 |
+ unichar <<= 6;
|
|
|
a4b143 |
+ unichar |= (int)str[i] & 0x3f;
|
|
|
a4b143 |
+ }
|
|
|
a4b143 |
+
|
|
|
a4b143 |
+ return unichar;
|
|
|
a4b143 |
+}
|
|
|
a4b143 |
+
|
|
|
a4b143 |
+/* expected size used to encode one unicode char */
|
|
|
a4b143 |
+static int utf8_unichar_to_encoded_len(int unichar) {
|
|
|
a4b143 |
+ if (unichar < 0x80)
|
|
|
a4b143 |
+ return 1;
|
|
|
a4b143 |
+ if (unichar < 0x800)
|
|
|
a4b143 |
+ return 2;
|
|
|
a4b143 |
+ if (unichar < 0x10000)
|
|
|
a4b143 |
+ return 3;
|
|
|
a4b143 |
+ if (unichar < 0x200000)
|
|
|
a4b143 |
+ return 4;
|
|
|
a4b143 |
+ if (unichar < 0x4000000)
|
|
|
a4b143 |
+ return 5;
|
|
|
a4b143 |
+ return 6;
|
|
|
a4b143 |
+}
|
|
|
a4b143 |
+
|
|
|
a4b143 |
+/* validate one encoded unicode char and return its length */
|
|
|
a4b143 |
+int utf8_encoded_valid_unichar(const char *str) {
|
|
|
a4b143 |
+ int len;
|
|
|
a4b143 |
+ int unichar;
|
|
|
a4b143 |
+ int i;
|
|
|
a4b143 |
+
|
|
|
a4b143 |
+ len = utf8_encoded_expected_len(str);
|
|
|
a4b143 |
+ if (len == 0)
|
|
|
a4b143 |
+ return -1;
|
|
|
a4b143 |
+
|
|
|
a4b143 |
+ /* ascii is valid */
|
|
|
a4b143 |
+ if (len == 1)
|
|
|
a4b143 |
+ return 1;
|
|
|
a4b143 |
+
|
|
|
a4b143 |
+ /* check if expected encoded chars are available */
|
|
|
a4b143 |
+ for (i = 0; i < len; i++)
|
|
|
a4b143 |
+ if ((str[i] & 0x80) != 0x80)
|
|
|
a4b143 |
+ return -1;
|
|
|
a4b143 |
+
|
|
|
a4b143 |
+ unichar = utf8_encoded_to_unichar(str);
|
|
|
a4b143 |
+
|
|
|
a4b143 |
+ /* check if encoded length matches encoded value */
|
|
|
a4b143 |
+ if (utf8_unichar_to_encoded_len(unichar) != len)
|
|
|
a4b143 |
+ return -1;
|
|
|
a4b143 |
+
|
|
|
a4b143 |
+ /* check if value has valid range */
|
|
|
a4b143 |
+ if (!is_unicode_valid(unichar))
|
|
|
a4b143 |
+ return -1;
|
|
|
a4b143 |
+
|
|
|
a4b143 |
+ return len;
|
|
|
a4b143 |
+}
|
|
|
a4b143 |
+
|
|
|
a4b143 |
+int is_utf8_encoding_whitelisted(char c, const char *white) {
|
|
|
a4b143 |
+ if ((c >= '0' && c <= '9') ||
|
|
|
a4b143 |
+ (c >= 'A' && c <= 'Z') ||
|
|
|
a4b143 |
+ (c >= 'a' && c <= 'z') ||
|
|
|
a4b143 |
+ strchr("#+-.:=@_", c) != NULL ||
|
|
|
a4b143 |
+ (white != NULL && strchr(white, c) != NULL))
|
|
|
a4b143 |
+ return 1;
|
|
|
a4b143 |
+ return 0;
|
|
|
a4b143 |
+}
|
|
|
a4b143 |
+
|
|
|
a4b143 |
+int udev_encode_string(const char *str, char *str_enc, size_t len) {
|
|
|
a4b143 |
+ size_t i, j;
|
|
|
a4b143 |
+
|
|
|
a4b143 |
+ if (str == NULL || str_enc == NULL)
|
|
|
a4b143 |
+ return -1;
|
|
|
a4b143 |
+
|
|
|
a4b143 |
+ for (i = 0, j = 0; str[i] != '\0'; i++) {
|
|
|
a4b143 |
+ int seqlen;
|
|
|
a4b143 |
+
|
|
|
a4b143 |
+ seqlen = utf8_encoded_valid_unichar(&str[i]);
|
|
|
a4b143 |
+ if (seqlen > 1) {
|
|
|
a4b143 |
+ if (len-j < (size_t)seqlen)
|
|
|
a4b143 |
+ goto err;
|
|
|
a4b143 |
+ memcpy(&str_enc[j], &str[i], seqlen);
|
|
|
a4b143 |
+ j += seqlen;
|
|
|
a4b143 |
+ i += (seqlen-1);
|
|
|
a4b143 |
+ } else if (str[i] == '\\' || !is_utf8_encoding_whitelisted(str[i], NULL)) {
|
|
|
a4b143 |
+ if (len-j < 4)
|
|
|
a4b143 |
+ goto err;
|
|
|
a4b143 |
+ sprintf(&str_enc[j], "\\x%02x", (unsigned char) str[i]);
|
|
|
a4b143 |
+ j += 4;
|
|
|
a4b143 |
+ } else {
|
|
|
a4b143 |
+ if (len-j < 1)
|
|
|
a4b143 |
+ goto err;
|
|
|
a4b143 |
+ str_enc[j] = str[i];
|
|
|
a4b143 |
+ j++;
|
|
|
a4b143 |
+ }
|
|
|
a4b143 |
+ }
|
|
|
a4b143 |
+ if (len-j < 1)
|
|
|
a4b143 |
+ goto err;
|
|
|
a4b143 |
+ str_enc[j] = '\0';
|
|
|
a4b143 |
+ return 0;
|
|
|
a4b143 |
+err:
|
|
|
a4b143 |
+ return -1;
|
|
|
a4b143 |
+}
|
|
|
a4b143 |
diff --git a/src/shared/utf8.h b/src/shared/utf8.h
|
|
|
a4b143 |
index f805ea6..7a5608c 100644
|
|
|
a4b143 |
--- a/src/shared/utf8.h
|
|
|
a4b143 |
+++ b/src/shared/utf8.h
|
|
|
a4b143 |
@@ -34,3 +34,7 @@ char *utf8_filter(const char *s);
|
|
|
a4b143 |
char *ascii_filter(const char *s);
|
|
|
a4b143 |
|
|
|
a4b143 |
char *utf16_to_utf8(const void *s, size_t length);
|
|
|
a4b143 |
+
|
|
|
a4b143 |
+int utf8_encoded_valid_unichar(const char *str);
|
|
|
a4b143 |
+int is_utf8_encoding_whitelisted(char c, const char *white);
|
|
|
a4b143 |
+int udev_encode_string(const char *str, char *str_enc, size_t len);
|
|
|
a4b143 |
diff --git a/src/test/test-utf8.c b/src/test/test-utf8.c
|
|
|
a4b143 |
new file mode 100644
|
|
|
a4b143 |
index 0000000..d2b9771
|
|
|
a4b143 |
--- /dev/null
|
|
|
a4b143 |
+++ b/src/test/test-utf8.c
|
|
|
a4b143 |
@@ -0,0 +1,59 @@
|
|
|
a4b143 |
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
|
|
|
a4b143 |
+
|
|
|
a4b143 |
+/***
|
|
|
a4b143 |
+ This file is part of systemd.
|
|
|
a4b143 |
+
|
|
|
a4b143 |
+ Copyright 2013 Dave Reisner
|
|
|
a4b143 |
+
|
|
|
a4b143 |
+ systemd is free software; you can redistribute it and/or modify it
|
|
|
a4b143 |
+ under the terms of the GNU Lesser General Public License as published by
|
|
|
a4b143 |
+ the Free Software Foundation; either version 2.1 of the License, or
|
|
|
a4b143 |
+ (at your option) any later version.
|
|
|
a4b143 |
+
|
|
|
a4b143 |
+ systemd is distributed in the hope that it will be useful, but
|
|
|
a4b143 |
+ WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
a4b143 |
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
a4b143 |
+ Lesser General Public License for more details.
|
|
|
a4b143 |
+
|
|
|
a4b143 |
+ You should have received a copy of the GNU Lesser General Public License
|
|
|
a4b143 |
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
|
|
|
a4b143 |
+***/
|
|
|
a4b143 |
+
|
|
|
a4b143 |
+
|
|
|
a4b143 |
+#include "utf8.h"
|
|
|
a4b143 |
+#include "util.h"
|
|
|
a4b143 |
+
|
|
|
a4b143 |
+/* helpers for test_udev_encode_string */
|
|
|
a4b143 |
+static char *do_encode_string(const char *in) {
|
|
|
a4b143 |
+ size_t out_len = strlen(in) * 4;
|
|
|
a4b143 |
+ char *out = malloc(out_len);
|
|
|
a4b143 |
+
|
|
|
a4b143 |
+ assert_se(out);
|
|
|
a4b143 |
+ assert_se(udev_encode_string(in, out, out_len) >= 0);
|
|
|
a4b143 |
+ puts(out);
|
|
|
a4b143 |
+
|
|
|
a4b143 |
+ return out;
|
|
|
a4b143 |
+}
|
|
|
a4b143 |
+
|
|
|
a4b143 |
+static bool expect_encoded_as(const char *in, const char *expected) {
|
|
|
a4b143 |
+ _cleanup_free_ char *encoded = do_encode_string(in);
|
|
|
a4b143 |
+ return streq(encoded, expected);
|
|
|
a4b143 |
+}
|
|
|
a4b143 |
+
|
|
|
a4b143 |
+static void test_udev_encode_string(void) {
|
|
|
a4b143 |
+ assert_se(expect_encoded_as("systemd sucks", "systemd\\x20sucks"));
|
|
|
a4b143 |
+ assert_se(expect_encoded_as("pinkiepie", "pinkiepie"));
|
|
|
a4b143 |
+ assert_se(expect_encoded_as("valíd\\ųtf8", "valíd\\x5cųtf8"));
|
|
|
a4b143 |
+ assert_se(expect_encoded_as("s/ash/ng", "s\\x2fash\\x2fng"));
|
|
|
a4b143 |
+}
|
|
|
a4b143 |
+
|
|
|
a4b143 |
+static void test_utf8_is_valid(void) {
|
|
|
a4b143 |
+ assert_se(utf8_is_valid("ascii is valid unicode"));
|
|
|
a4b143 |
+ assert_se(utf8_is_valid("\341\204\242"));
|
|
|
a4b143 |
+ assert_se(!utf8_is_valid("\341\204"));
|
|
|
a4b143 |
+}
|
|
|
a4b143 |
+
|
|
|
a4b143 |
+int main(int argc, char *argv[]) {
|
|
|
a4b143 |
+ test_utf8_is_valid();
|
|
|
a4b143 |
+ test_udev_encode_string();
|
|
|
a4b143 |
+}
|