From 76b4a34e04f09596c92fa0326001f18891695e17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ale=C5=A1=20Mat=C4=9Bj?= Date: Fri, 24 Jul 2020 12:53:47 +0200 Subject: [PATCH 1/2] Add a test for parsing huge snippet (RhBug:1859689) https://bugzilla.redhat.com/show_bug.cgi?id=1859689 --- tests/python/tests/test_xml_parser.py | 36 +++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/tests/python/tests/test_xml_parser.py b/tests/python/tests/test_xml_parser.py index e2c48f0..8856096 100644 --- a/tests/python/tests/test_xml_parser.py +++ b/tests/python/tests/test_xml_parser.py @@ -476,6 +476,42 @@ class TestCaseXmlParserFilelists(unittest.TestCase): self.assertEqual(userdata["pkgcb_calls"], 2) self.assertEqual(userdata["warnings"], []) + def test_xml_parser_filelists_snippet_huge(self): + + userdata = { + "pkgs": [], + "pkgcb_calls": 0, + "warnings": [] + } + + def newpkgcb(pkgId, name, arch): + pkg = cr.Package() + userdata["pkgs"].append(pkg) + return pkg + + def pkgcb(pkg): + userdata["pkgcb_calls"] += 1 + + def warningcb(warn_type, msg): + userdata["warnings"].append((warn_type, msg)) + + # generete huge filelists snippet + content = """ + + + """ + for i in range(145951): + content += "/usr/share/icons/Flat-Remix-Yellow/status/symbolic/user-available-symbolic.svg" + content += "" + + cr.xml_parse_filelists_snippet(content, newpkgcb, pkgcb, warningcb) + + self.assertEqual([pkg.name for pkg in userdata["pkgs"]], + ['flat-remix-icon-theme']) + self.assertEqual(userdata["pkgcb_calls"], 1) + self.assertEqual(userdata["warnings"], []) + + def test_xml_parser_filelists_repo02_only_pkgcb(self): -- 2.28.0.rc1 From 6f08a56c36e6c60d216fff964b31a2c58ce12fc7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ale=C5=A1=20Mat=C4=9Bj?= Date: Fri, 24 Jul 2020 12:41:50 +0200 Subject: [PATCH 2/2] Parse xml snippet in smaller parts (RhBug:1859689) If string passed to xmlParseChunk (function froml ibxml2) is too big the function errors out, its safer to parse the snippet in smaller parts. https://bugzilla.redhat.com/show_bug.cgi?id=1859689 --- src/xml_parser.c | 48 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/src/xml_parser.c b/src/xml_parser.c index 4727daa..9375107 100644 --- a/src/xml_parser.c +++ b/src/xml_parser.c @@ -243,29 +243,45 @@ cr_xml_parser_generic_from_string(xmlParserCtxtPtr parser, /* Note: This function uses .err members of cr_ParserData! */ int ret = CRE_OK; + int block_size = XML_BUFFER_SIZE; + const char *next_data = xml_string; + const char *end_of_string = xml_string + strlen(xml_string); + int finished = 0; assert(parser); assert(pd); assert(xml_string); assert(!err || *err == NULL); - if (xmlParseChunk(parser, xml_string, strlen(xml_string), 1)) { - ret = CRE_XMLPARSER; - xmlErrorPtr xml_err = xmlCtxtGetLastError(parser); - g_critical("%s: parsing error '%s': %s", - __func__, - xml_string, - xml_err->message); - g_set_error(err, ERR_DOMAIN, CRE_XMLPARSER, - "Parse error '%s' at line: %d (%s)", - xml_string, - (int) xml_err->line, - (char *) xml_err->message); - } + const char *data; + while (!finished) { + data = next_data; + + // Check if we are in the last loop + next_data = data + block_size; + if (next_data > end_of_string) { + block_size = strlen(data); + finished = 1; + } + + if (xmlParseChunk(parser, data, block_size, finished)) { + ret = CRE_XMLPARSER; + xmlErrorPtr xml_err = xmlCtxtGetLastError(parser); + g_critical("%s: parsing error '%s': %s", + __func__, + data, + xml_err->message); + g_set_error(err, ERR_DOMAIN, CRE_XMLPARSER, + "Parse error '%s' at line: %d (%s)", + data, + (int) xml_err->line, + (char *) xml_err->message); + } - if (pd->err) { - ret = pd->err->code; - g_propagate_error(err, pd->err); + if (pd->err) { + ret = pd->err->code; + g_propagate_error(err, pd->err); + } } return ret; -- 2.28.0.rc1