|
|
afa004 |
commit e8f285b522a907603501329e5b4212755f525fdf
|
|
|
afa004 |
Author: Tomas Korbar <tkorbar@redhat.com>
|
|
|
afa004 |
Date: Thu Mar 3 12:04:09 2022 +0100
|
|
|
afa004 |
|
|
|
afa004 |
CVE-2022-25235
|
|
|
afa004 |
|
|
|
afa004 |
diff --git a/lib/xmltok.c b/lib/xmltok.c
|
|
|
afa004 |
index 6b415d8..b55732a 100644
|
|
|
afa004 |
--- a/lib/xmltok.c
|
|
|
afa004 |
+++ b/lib/xmltok.c
|
|
|
afa004 |
@@ -103,13 +103,6 @@
|
|
|
afa004 |
+ ((((byte)[2]) >> 5) & 1)] \
|
|
|
afa004 |
& (1u << (((byte)[2]) & 0x1F)))
|
|
|
afa004 |
|
|
|
afa004 |
-#define UTF8_GET_NAMING(pages, p, n) \
|
|
|
afa004 |
- ((n) == 2 \
|
|
|
afa004 |
- ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \
|
|
|
afa004 |
- : ((n) == 3 \
|
|
|
afa004 |
- ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \
|
|
|
afa004 |
- : 0))
|
|
|
afa004 |
-
|
|
|
afa004 |
/* Detection of invalid UTF-8 sequences is based on Table 3.1B
|
|
|
afa004 |
of Unicode 3.2: http://www.unicode.org/unicode/reports/tr28/
|
|
|
afa004 |
with the additional restriction of not allowing the Unicode
|
|
|
afa004 |
diff --git a/lib/xmltok_impl.c b/lib/xmltok_impl.c
|
|
|
afa004 |
index 0403dd3..56d7a40 100644
|
|
|
afa004 |
--- a/lib/xmltok_impl.c
|
|
|
afa004 |
+++ b/lib/xmltok_impl.c
|
|
|
afa004 |
@@ -61,7 +61,7 @@
|
|
|
afa004 |
case BT_LEAD ## n: \
|
|
|
afa004 |
if (end - ptr < n) \
|
|
|
afa004 |
return XML_TOK_PARTIAL_CHAR; \
|
|
|
afa004 |
- if (!IS_NAME_CHAR(enc, ptr, n)) { \
|
|
|
afa004 |
+ if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NAME_CHAR(enc, ptr, n)) { \
|
|
|
afa004 |
*nextTokPtr = ptr; \
|
|
|
afa004 |
return XML_TOK_INVALID; \
|
|
|
afa004 |
} \
|
|
|
afa004 |
@@ -89,7 +89,7 @@
|
|
|
afa004 |
case BT_LEAD ## n: \
|
|
|
afa004 |
if (end - ptr < n) \
|
|
|
afa004 |
return XML_TOK_PARTIAL_CHAR; \
|
|
|
afa004 |
- if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \
|
|
|
afa004 |
+ if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NMSTRT_CHAR(enc, ptr, n)) { \
|
|
|
afa004 |
*nextTokPtr = ptr; \
|
|
|
afa004 |
return XML_TOK_INVALID; \
|
|
|
afa004 |
} \
|
|
|
afa004 |
@@ -1117,6 +1117,10 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
|
|
|
afa004 |
case BT_LEAD ## n: \
|
|
|
afa004 |
if (end - ptr < n) \
|
|
|
afa004 |
return XML_TOK_PARTIAL_CHAR; \
|
|
|
afa004 |
+ if (IS_INVALID_CHAR(enc, ptr, n)) { \
|
|
|
afa004 |
+ *nextTokPtr = ptr; \
|
|
|
afa004 |
+ return XML_TOK_INVALID; \
|
|
|
afa004 |
+ } \
|
|
|
afa004 |
if (IS_NMSTRT_CHAR(enc, ptr, n)) { \
|
|
|
afa004 |
ptr += n; \
|
|
|
afa004 |
tok = XML_TOK_NAME; \
|
|
|
afa004 |
diff --git a/tests/runtests.c b/tests/runtests.c
|
|
|
afa004 |
index 278bfa1..0f3afde 100644
|
|
|
afa004 |
--- a/tests/runtests.c
|
|
|
afa004 |
+++ b/tests/runtests.c
|
|
|
afa004 |
@@ -6540,6 +6540,106 @@ START_TEST(test_utf8_in_cdata_section_2)
|
|
|
afa004 |
}
|
|
|
afa004 |
END_TEST
|
|
|
afa004 |
|
|
|
afa004 |
+START_TEST(test_utf8_in_start_tags) {
|
|
|
afa004 |
+ struct test_case {
|
|
|
afa004 |
+ bool goodName;
|
|
|
afa004 |
+ bool goodNameStart;
|
|
|
afa004 |
+ const char *tagName;
|
|
|
afa004 |
+ };
|
|
|
afa004 |
+
|
|
|
afa004 |
+ // The idea with the tests below is this:
|
|
|
afa004 |
+ // We want to cover 1-, 2- and 3-byte sequences, 4-byte sequences
|
|
|
afa004 |
+ // go to isNever and are hence not a concern.
|
|
|
afa004 |
+ //
|
|
|
afa004 |
+ // We start with a character that is a valid name character
|
|
|
afa004 |
+ // (or even name-start character, see XML 1.0r4 spec) and then we flip
|
|
|
afa004 |
+ // single bits at places where (1) the result leaves the UTF-8 encoding space
|
|
|
afa004 |
+ // and (2) we stay in the same n-byte sequence family.
|
|
|
afa004 |
+ //
|
|
|
afa004 |
+ // The flipped bits are highlighted in angle brackets in comments,
|
|
|
afa004 |
+ // e.g. "[<1>011 1001]" means we had [0011 1001] but we now flipped
|
|
|
afa004 |
+ // the most significant bit to 1 to leave UTF-8 encoding space.
|
|
|
afa004 |
+ struct test_case cases[] = {
|
|
|
afa004 |
+ // 1-byte UTF-8: [0xxx xxxx]
|
|
|
afa004 |
+ {true, true, "\x3A"}, // [0011 1010] = ASCII colon ':'
|
|
|
afa004 |
+ {false, false, "\xBA"}, // [<1>011 1010]
|
|
|
afa004 |
+ {true, false, "\x39"}, // [0011 1001] = ASCII nine '9'
|
|
|
afa004 |
+ {false, false, "\xB9"}, // [<1>011 1001]
|
|
|
afa004 |
+
|
|
|
afa004 |
+ // 2-byte UTF-8: [110x xxxx] [10xx xxxx]
|
|
|
afa004 |
+ {true, true, "\xDB\xA5"}, // [1101 1011] [1010 0101] =
|
|
|
afa004 |
+ // Arabic small waw U+06E5
|
|
|
afa004 |
+ {false, false, "\x9B\xA5"}, // [1<0>01 1011] [1010 0101]
|
|
|
afa004 |
+ {false, false, "\xDB\x25"}, // [1101 1011] [<0>010 0101]
|
|
|
afa004 |
+ {false, false, "\xDB\xE5"}, // [1101 1011] [1<1>10 0101]
|
|
|
afa004 |
+ {true, false, "\xCC\x81"}, // [1100 1100] [1000 0001] =
|
|
|
afa004 |
+ // combining char U+0301
|
|
|
afa004 |
+ {false, false, "\x8C\x81"}, // [1<0>00 1100] [1000 0001]
|
|
|
afa004 |
+ {false, false, "\xCC\x01"}, // [1100 1100] [<0>000 0001]
|
|
|
afa004 |
+ {false, false, "\xCC\xC1"}, // [1100 1100] [1<1>00 0001]
|
|
|
afa004 |
+
|
|
|
afa004 |
+ // 3-byte UTF-8: [1110 xxxx] [10xx xxxx] [10xxxxxx]
|
|
|
afa004 |
+ {true, true, "\xE0\xA4\x85"}, // [1110 0000] [1010 0100] [1000 0101] =
|
|
|
afa004 |
+ // Devanagari Letter A U+0905
|
|
|
afa004 |
+ {false, false, "\xA0\xA4\x85"}, // [1<0>10 0000] [1010 0100] [1000 0101]
|
|
|
afa004 |
+ {false, false, "\xE0\x24\x85"}, // [1110 0000] [<0>010 0100] [1000 0101]
|
|
|
afa004 |
+ {false, false, "\xE0\xE4\x85"}, // [1110 0000] [1<1>10 0100] [1000 0101]
|
|
|
afa004 |
+ {false, false, "\xE0\xA4\x05"}, // [1110 0000] [1010 0100] [<0>000 0101]
|
|
|
afa004 |
+ {false, false, "\xE0\xA4\xC5"}, // [1110 0000] [1010 0100] [1<1>00 0101]
|
|
|
afa004 |
+ {true, false, "\xE0\xA4\x81"}, // [1110 0000] [1010 0100] [1000 0001] =
|
|
|
afa004 |
+ // combining char U+0901
|
|
|
afa004 |
+ {false, false, "\xA0\xA4\x81"}, // [1<0>10 0000] [1010 0100] [1000 0001]
|
|
|
afa004 |
+ {false, false, "\xE0\x24\x81"}, // [1110 0000] [<0>010 0100] [1000 0001]
|
|
|
afa004 |
+ {false, false, "\xE0\xE4\x81"}, // [1110 0000] [1<1>10 0100] [1000 0001]
|
|
|
afa004 |
+ {false, false, "\xE0\xA4\x01"}, // [1110 0000] [1010 0100] [<0>000 0001]
|
|
|
afa004 |
+ {false, false, "\xE0\xA4\xC1"}, // [1110 0000] [1010 0100] [1<1>00 0001]
|
|
|
afa004 |
+ };
|
|
|
afa004 |
+ const bool atNameStart[] = {true, false};
|
|
|
afa004 |
+
|
|
|
afa004 |
+ size_t i = 0;
|
|
|
afa004 |
+ char doc[1024];
|
|
|
afa004 |
+ size_t failCount = 0;
|
|
|
afa004 |
+
|
|
|
afa004 |
+ for (; i < sizeof(cases) / sizeof(cases[0]); i++) {
|
|
|
afa004 |
+ size_t j = 0;
|
|
|
afa004 |
+ for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) {
|
|
|
afa004 |
+ const bool expectedSuccess
|
|
|
afa004 |
+ = atNameStart[j] ? cases[i].goodNameStart : cases[i].goodName;
|
|
|
afa004 |
+ sprintf(doc, "<%s%s>
|
|
|
afa004 |
+ XML_Parser parser = XML_ParserCreate(NULL);
|
|
|
afa004 |
+
|
|
|
afa004 |
+ const enum XML_Status status
|
|
|
afa004 |
+ = XML_Parse(parser, doc, (int)strlen(doc), /*isFinal=*/XML_FALSE);
|
|
|
afa004 |
+
|
|
|
afa004 |
+ bool success = true;
|
|
|
afa004 |
+ if ((status == XML_STATUS_OK) != expectedSuccess) {
|
|
|
afa004 |
+ success = false;
|
|
|
afa004 |
+ }
|
|
|
afa004 |
+ if ((status == XML_STATUS_ERROR)
|
|
|
afa004 |
+ && (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)) {
|
|
|
afa004 |
+ success = false;
|
|
|
afa004 |
+ }
|
|
|
afa004 |
+
|
|
|
afa004 |
+ if (! success) {
|
|
|
afa004 |
+ fprintf(
|
|
|
afa004 |
+ stderr,
|
|
|
afa004 |
+ "FAIL case %2u (%sat name start, %u-byte sequence, error code %d)\n",
|
|
|
afa004 |
+ (unsigned)i + 1u, atNameStart[j] ? " " : "not ",
|
|
|
afa004 |
+ (unsigned)strlen(cases[i].tagName), XML_GetErrorCode(parser));
|
|
|
afa004 |
+ failCount++;
|
|
|
afa004 |
+ }
|
|
|
afa004 |
+
|
|
|
afa004 |
+ XML_ParserFree(parser);
|
|
|
afa004 |
+ }
|
|
|
afa004 |
+ }
|
|
|
afa004 |
+
|
|
|
afa004 |
+ if (failCount > 0) {
|
|
|
afa004 |
+ fail("UTF-8 regression detected");
|
|
|
afa004 |
+ }
|
|
|
afa004 |
+}
|
|
|
afa004 |
+END_TEST
|
|
|
afa004 |
+
|
|
|
afa004 |
+
|
|
|
afa004 |
/* Test trailing spaces in elements are accepted */
|
|
|
afa004 |
static void XMLCALL
|
|
|
afa004 |
record_element_end_handler(void *userData,
|
|
|
afa004 |
@@ -6734,6 +6834,15 @@ START_TEST(test_bad_doctype)
|
|
|
afa004 |
}
|
|
|
afa004 |
END_TEST
|
|
|
afa004 |
|
|
|
afa004 |
+START_TEST(test_bad_doctype_utf8) {
|
|
|
afa004 |
+ const char *text = "
|
|
|
afa004 |
+ "doc><doc/>"; // [1101 1011] [<0>010 0101]
|
|
|
afa004 |
+ expect_failure(text, XML_ERROR_INVALID_TOKEN,
|
|
|
afa004 |
+ "Invalid UTF-8 in DOCTYPE not faulted");
|
|
|
afa004 |
+}
|
|
|
afa004 |
+END_TEST
|
|
|
afa004 |
+
|
|
|
afa004 |
+
|
|
|
afa004 |
START_TEST(test_bad_doctype_utf16)
|
|
|
afa004 |
{
|
|
|
afa004 |
const char text[] =
|
|
|
afa004 |
@@ -12256,6 +12365,7 @@ make_suite(void)
|
|
|
afa004 |
tcase_add_test(tc_basic, test_ext_entity_utf8_non_bom);
|
|
|
afa004 |
tcase_add_test(tc_basic, test_utf8_in_cdata_section);
|
|
|
afa004 |
tcase_add_test(tc_basic, test_utf8_in_cdata_section_2);
|
|
|
afa004 |
+ tcase_add_test(tc_basic, test_utf8_in_start_tags);
|
|
|
afa004 |
tcase_add_test(tc_basic, test_trailing_spaces_in_elements);
|
|
|
afa004 |
tcase_add_test(tc_basic, test_utf16_attribute);
|
|
|
afa004 |
tcase_add_test(tc_basic, test_utf16_second_attr);
|
|
|
afa004 |
@@ -12264,6 +12374,7 @@ make_suite(void)
|
|
|
afa004 |
tcase_add_test(tc_basic, test_bad_attr_desc_keyword);
|
|
|
afa004 |
tcase_add_test(tc_basic, test_bad_attr_desc_keyword_utf16);
|
|
|
afa004 |
tcase_add_test(tc_basic, test_bad_doctype);
|
|
|
afa004 |
+ tcase_add_test(tc_basic, test_bad_doctype_utf8);
|
|
|
afa004 |
tcase_add_test(tc_basic, test_bad_doctype_utf16);
|
|
|
afa004 |
tcase_add_test(tc_basic, test_bad_doctype_plus);
|
|
|
afa004 |
tcase_add_test(tc_basic, test_bad_doctype_star);
|