|
|
4cc7ea |
commit c8abeb6dbc14761866da2d3cf359d795f126b6d8
|
|
|
4cc7ea |
Author: Tomas Korbar <tkorbar@redhat.com>
|
|
|
4cc7ea |
Date: Mon Mar 21 12:48:53 2022 +0100
|
|
|
4cc7ea |
|
|
|
4cc7ea |
Add missing validation of encoding
|
|
|
4cc7ea |
|
|
|
4cc7ea |
diff --git a/lib/xmltok.c b/lib/xmltok.c
|
|
|
4cc7ea |
index cb98ce1..a080f59 100644
|
|
|
4cc7ea |
--- a/lib/xmltok.c
|
|
|
4cc7ea |
+++ b/lib/xmltok.c
|
|
|
4cc7ea |
@@ -71,13 +71,6 @@
|
|
|
4cc7ea |
+ ((((byte)[2]) >> 5) & 1)] \
|
|
|
4cc7ea |
& (1 << (((byte)[2]) & 0x1F)))
|
|
|
4cc7ea |
|
|
|
4cc7ea |
-#define UTF8_GET_NAMING(pages, p, n) \
|
|
|
4cc7ea |
- ((n) == 2 \
|
|
|
4cc7ea |
- ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \
|
|
|
4cc7ea |
- : ((n) == 3 \
|
|
|
4cc7ea |
- ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \
|
|
|
4cc7ea |
- : 0))
|
|
|
4cc7ea |
-
|
|
|
4cc7ea |
/* Detection of invalid UTF-8 sequences is based on Table 3.1B
|
|
|
4cc7ea |
of Unicode 3.2: http://www.unicode.org/unicode/reports/tr28/
|
|
|
4cc7ea |
with the additional restriction of not allowing the Unicode
|
|
|
4cc7ea |
diff --git a/lib/xmltok_impl.c b/lib/xmltok_impl.c
|
|
|
4cc7ea |
index c93e2ac..a135514 100644
|
|
|
4cc7ea |
--- a/lib/xmltok_impl.c
|
|
|
4cc7ea |
+++ b/lib/xmltok_impl.c
|
|
|
4cc7ea |
@@ -34,7 +34,7 @@
|
|
|
4cc7ea |
case BT_LEAD ## n: \
|
|
|
4cc7ea |
if (end - ptr < n) \
|
|
|
4cc7ea |
return XML_TOK_PARTIAL_CHAR; \
|
|
|
4cc7ea |
- if (!IS_NAME_CHAR(enc, ptr, n)) { \
|
|
|
4cc7ea |
+ if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NAME_CHAR(enc, ptr, n)) { \
|
|
|
4cc7ea |
*nextTokPtr = ptr; \
|
|
|
4cc7ea |
return XML_TOK_INVALID; \
|
|
|
4cc7ea |
} \
|
|
|
4cc7ea |
@@ -62,7 +62,7 @@
|
|
|
4cc7ea |
case BT_LEAD ## n: \
|
|
|
4cc7ea |
if (end - ptr < n) \
|
|
|
4cc7ea |
return XML_TOK_PARTIAL_CHAR; \
|
|
|
4cc7ea |
- if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \
|
|
|
4cc7ea |
+ if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NMSTRT_CHAR(enc, ptr, n)) { \
|
|
|
4cc7ea |
*nextTokPtr = ptr; \
|
|
|
4cc7ea |
return XML_TOK_INVALID; \
|
|
|
4cc7ea |
} \
|
|
|
4cc7ea |
@@ -1097,6 +1097,10 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
|
|
|
4cc7ea |
case BT_LEAD ## n: \
|
|
|
4cc7ea |
if (end - ptr < n) \
|
|
|
4cc7ea |
return XML_TOK_PARTIAL_CHAR; \
|
|
|
4cc7ea |
+ if (IS_INVALID_CHAR(enc, ptr, n)) { \
|
|
|
4cc7ea |
+ *nextTokPtr = ptr; \
|
|
|
4cc7ea |
+ return XML_TOK_INVALID; \
|
|
|
4cc7ea |
+ } \
|
|
|
4cc7ea |
if (IS_NMSTRT_CHAR(enc, ptr, n)) { \
|
|
|
4cc7ea |
ptr += n; \
|
|
|
4cc7ea |
tok = XML_TOK_NAME; \
|
|
|
4cc7ea |
diff --git a/tests/runtests.c b/tests/runtests.c
|
|
|
4cc7ea |
index 86f8b18..c01f096 100644
|
|
|
4cc7ea |
--- a/tests/runtests.c
|
|
|
4cc7ea |
+++ b/tests/runtests.c
|
|
|
4cc7ea |
@@ -14,6 +14,7 @@
|
|
|
4cc7ea |
#include <string.h>
|
|
|
4cc7ea |
#include <stdint.h>
|
|
|
4cc7ea |
#include <limits.h>
|
|
|
4cc7ea |
+#include <stdbool.h>
|
|
|
4cc7ea |
|
|
|
4cc7ea |
#include "expat.h"
|
|
|
4cc7ea |
#include "chardata.h"
|
|
|
4cc7ea |
@@ -82,7 +83,7 @@ _xml_failure(XML_Parser parser, const char *file, int line)
|
|
|
4cc7ea |
|
|
|
4cc7ea |
static void
|
|
|
4cc7ea |
_expect_failure(char *text, enum XML_Error errorCode, char *errorMessage,
|
|
|
4cc7ea |
- char *file, int lineno)
|
|
|
4cc7ea |
+ const char *file, int lineno)
|
|
|
4cc7ea |
{
|
|
|
4cc7ea |
if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_OK)
|
|
|
4cc7ea |
/* Hackish use of _fail_unless() macro, but let's us report
|
|
|
4cc7ea |
@@ -1541,6 +1542,13 @@ START_TEST(test_ns_separator_in_uri) {
|
|
|
4cc7ea |
}
|
|
|
4cc7ea |
END_TEST
|
|
|
4cc7ea |
|
|
|
4cc7ea |
+START_TEST(test_bad_doctype_utf8) {
|
|
|
4cc7ea |
+ char *text = "
|
|
|
4cc7ea |
+ "doc><doc/>"; // [1101 1011] [<0>010 0101]
|
|
|
4cc7ea |
+ expect_failure(text, XML_ERROR_INVALID_TOKEN,
|
|
|
4cc7ea |
+ "Invalid UTF-8 in DOCTYPE not faulted");
|
|
|
4cc7ea |
+}
|
|
|
4cc7ea |
+END_TEST
|
|
|
4cc7ea |
|
|
|
4cc7ea |
START_TEST(test_utf8_in_start_tags) {
|
|
|
4cc7ea |
struct test_case {
|
|
|
4cc7ea |
@@ -1695,6 +1703,8 @@ make_suite(void)
|
|
|
4cc7ea |
tcase_add_test(tc_basic, test_ns_in_attribute_default_without_namespaces);
|
|
|
4cc7ea |
tcase_add_test(tc_basic, test_stop_parser_between_char_data_calls);
|
|
|
4cc7ea |
tcase_add_test(tc_basic, test_suspend_parser_between_char_data_calls);
|
|
|
4cc7ea |
+ tcase_add_test(tc_basic, test_utf8_in_start_tags);
|
|
|
4cc7ea |
+ tcase_add_test(tc_basic, test_bad_doctype_utf8);
|
|
|
4cc7ea |
|
|
|
4cc7ea |
suite_add_tcase(s, tc_namespace);
|
|
|
4cc7ea |
tcase_add_checked_fixture(tc_namespace,
|