Blame SOURCES/00394-cve-2022-45061-cpu-denial-of-service-via-inefficient-idna-decoder.patch

6bc0c3
From c09dba57cfbbf74273ce44b1f48f71b46806605c Mon Sep 17 00:00:00 2001
6bc0c3
From: "Miss Islington (bot)"
6bc0c3
 <31488909+miss-islington@users.noreply.github.com>
6bc0c3
Date: Thu, 10 Nov 2022 07:57:41 -0800
6bc0c3
Subject: [PATCH] [3.9] gh-98433: Fix quadratic time idna decoding. (GH-99092)
6bc0c3
 (GH-99222) (#99230)
6bc0c3
6bc0c3
There was an unnecessary quadratic loop in idna decoding. This restores
6bc0c3
the behavior to linear.
6bc0c3
6bc0c3
(cherry picked from commit d315722564927c7202dd6e111dc79eaf14240b0d)
6bc0c3
(cherry picked from commit a6f6c3a3d6f2b580f2d87885c9b8a9350ad7bf15)
6bc0c3
6bc0c3
Co-authored-by: Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com>
6bc0c3
Co-authored-by: Gregory P. Smith <greg@krypto.org>
6bc0c3
---
6bc0c3
 Lib/encodings/idna.py                         | 32 +++++++++----------
6bc0c3
 Lib/test/test_codecs.py                       |  6 ++++
6bc0c3
 ...2-11-04-09-29-36.gh-issue-98433.l76c5G.rst |  6 ++++
6bc0c3
 3 files changed, 27 insertions(+), 17 deletions(-)
6bc0c3
 create mode 100644 Misc/NEWS.d/next/Security/2022-11-04-09-29-36.gh-issue-98433.l76c5G.rst
6bc0c3
6bc0c3
diff --git a/Lib/encodings/idna.py b/Lib/encodings/idna.py
6bc0c3
index ea4058512fe3..bf98f513366b 100644
6bc0c3
--- a/Lib/encodings/idna.py
6bc0c3
+++ b/Lib/encodings/idna.py
6bc0c3
@@ -39,23 +39,21 @@ def nameprep(label):
6bc0c3
 
6bc0c3
     # Check bidi
6bc0c3
     RandAL = [stringprep.in_table_d1(x) for x in label]
6bc0c3
-    for c in RandAL:
6bc0c3
-        if c:
6bc0c3
-            # There is a RandAL char in the string. Must perform further
6bc0c3
-            # tests:
6bc0c3
-            # 1) The characters in section 5.8 MUST be prohibited.
6bc0c3
-            # This is table C.8, which was already checked
6bc0c3
-            # 2) If a string contains any RandALCat character, the string
6bc0c3
-            # MUST NOT contain any LCat character.
6bc0c3
-            if any(stringprep.in_table_d2(x) for x in label):
6bc0c3
-                raise UnicodeError("Violation of BIDI requirement 2")
6bc0c3
-
6bc0c3
-            # 3) If a string contains any RandALCat character, a
6bc0c3
-            # RandALCat character MUST be the first character of the
6bc0c3
-            # string, and a RandALCat character MUST be the last
6bc0c3
-            # character of the string.
6bc0c3
-            if not RandAL[0] or not RandAL[-1]:
6bc0c3
-                raise UnicodeError("Violation of BIDI requirement 3")
6bc0c3
+    if any(RandAL):
6bc0c3
+        # There is a RandAL char in the string. Must perform further
6bc0c3
+        # tests:
6bc0c3
+        # 1) The characters in section 5.8 MUST be prohibited.
6bc0c3
+        # This is table C.8, which was already checked
6bc0c3
+        # 2) If a string contains any RandALCat character, the string
6bc0c3
+        # MUST NOT contain any LCat character.
6bc0c3
+        if any(stringprep.in_table_d2(x) for x in label):
6bc0c3
+            raise UnicodeError("Violation of BIDI requirement 2")
6bc0c3
+        # 3) If a string contains any RandALCat character, a
6bc0c3
+        # RandALCat character MUST be the first character of the
6bc0c3
+        # string, and a RandALCat character MUST be the last
6bc0c3
+        # character of the string.
6bc0c3
+        if not RandAL[0] or not RandAL[-1]:
6bc0c3
+            raise UnicodeError("Violation of BIDI requirement 3")
6bc0c3
 
6bc0c3
     return label
6bc0c3
 
6bc0c3
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
6bc0c3
index fc50e70df24b..3520cc00a1a4 100644
6bc0c3
--- a/Lib/test/test_codecs.py
6bc0c3
+++ b/Lib/test/test_codecs.py
6bc0c3
@@ -1532,6 +1532,12 @@ def test_builtin_encode(self):
6bc0c3
         self.assertEqual("pyth\xf6n.org".encode("idna"), b"xn--pythn-mua.org")
6bc0c3
         self.assertEqual("pyth\xf6n.org.".encode("idna"), b"xn--pythn-mua.org.")
6bc0c3
 
6bc0c3
+    def test_builtin_decode_length_limit(self):
6bc0c3
+        with self.assertRaisesRegex(UnicodeError, "too long"):
6bc0c3
+            (b"xn--016c"+b"a"*1100).decode("idna")
6bc0c3
+        with self.assertRaisesRegex(UnicodeError, "too long"):
6bc0c3
+            (b"xn--016c"+b"a"*70).decode("idna")
6bc0c3
+
6bc0c3
     def test_stream(self):
6bc0c3
         r = codecs.getreader("idna")(io.BytesIO(b"abc"))
6bc0c3
         r.read(3)
6bc0c3
diff --git a/Misc/NEWS.d/next/Security/2022-11-04-09-29-36.gh-issue-98433.l76c5G.rst b/Misc/NEWS.d/next/Security/2022-11-04-09-29-36.gh-issue-98433.l76c5G.rst
6bc0c3
new file mode 100644
6bc0c3
index 000000000000..5185fac2e29d
6bc0c3
--- /dev/null
6bc0c3
+++ b/Misc/NEWS.d/next/Security/2022-11-04-09-29-36.gh-issue-98433.l76c5G.rst
6bc0c3
@@ -0,0 +1,6 @@
6bc0c3
+The IDNA codec decoder used on DNS hostnames by :mod:`socket` or :mod:`asyncio`
6bc0c3
+related name resolution functions no longer involves a quadratic algorithm.
6bc0c3
+This prevents a potential CPU denial of service if an out-of-spec excessive
6bc0c3
+length hostname involving bidirectional characters were decoded. Some protocols
6bc0c3
+such as :mod:`urllib` http ``3xx`` redirects potentially allow for an attacker
6bc0c3
+to supply such a name.