|
|
6bc0c3 |
From c09dba57cfbbf74273ce44b1f48f71b46806605c Mon Sep 17 00:00:00 2001
|
|
|
6bc0c3 |
From: "Miss Islington (bot)"
|
|
|
6bc0c3 |
<31488909+miss-islington@users.noreply.github.com>
|
|
|
6bc0c3 |
Date: Thu, 10 Nov 2022 07:57:41 -0800
|
|
|
6bc0c3 |
Subject: [PATCH] [3.9] gh-98433: Fix quadratic time idna decoding. (GH-99092)
|
|
|
6bc0c3 |
(GH-99222) (#99230)
|
|
|
6bc0c3 |
|
|
|
6bc0c3 |
There was an unnecessary quadratic loop in idna decoding. This restores
|
|
|
6bc0c3 |
the behavior to linear.
|
|
|
6bc0c3 |
|
|
|
6bc0c3 |
(cherry picked from commit d315722564927c7202dd6e111dc79eaf14240b0d)
|
|
|
6bc0c3 |
(cherry picked from commit a6f6c3a3d6f2b580f2d87885c9b8a9350ad7bf15)
|
|
|
6bc0c3 |
|
|
|
6bc0c3 |
Co-authored-by: Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com>
|
|
|
6bc0c3 |
Co-authored-by: Gregory P. Smith <greg@krypto.org>
|
|
|
6bc0c3 |
---
|
|
|
6bc0c3 |
Lib/encodings/idna.py | 32 +++++++++----------
|
|
|
6bc0c3 |
Lib/test/test_codecs.py | 6 ++++
|
|
|
6bc0c3 |
...2-11-04-09-29-36.gh-issue-98433.l76c5G.rst | 6 ++++
|
|
|
6bc0c3 |
3 files changed, 27 insertions(+), 17 deletions(-)
|
|
|
6bc0c3 |
create mode 100644 Misc/NEWS.d/next/Security/2022-11-04-09-29-36.gh-issue-98433.l76c5G.rst
|
|
|
6bc0c3 |
|
|
|
6bc0c3 |
diff --git a/Lib/encodings/idna.py b/Lib/encodings/idna.py
|
|
|
6bc0c3 |
index ea4058512fe3..bf98f513366b 100644
|
|
|
6bc0c3 |
--- a/Lib/encodings/idna.py
|
|
|
6bc0c3 |
+++ b/Lib/encodings/idna.py
|
|
|
6bc0c3 |
@@ -39,23 +39,21 @@ def nameprep(label):
|
|
|
6bc0c3 |
|
|
|
6bc0c3 |
# Check bidi
|
|
|
6bc0c3 |
RandAL = [stringprep.in_table_d1(x) for x in label]
|
|
|
6bc0c3 |
- for c in RandAL:
|
|
|
6bc0c3 |
- if c:
|
|
|
6bc0c3 |
- # There is a RandAL char in the string. Must perform further
|
|
|
6bc0c3 |
- # tests:
|
|
|
6bc0c3 |
- # 1) The characters in section 5.8 MUST be prohibited.
|
|
|
6bc0c3 |
- # This is table C.8, which was already checked
|
|
|
6bc0c3 |
- # 2) If a string contains any RandALCat character, the string
|
|
|
6bc0c3 |
- # MUST NOT contain any LCat character.
|
|
|
6bc0c3 |
- if any(stringprep.in_table_d2(x) for x in label):
|
|
|
6bc0c3 |
- raise UnicodeError("Violation of BIDI requirement 2")
|
|
|
6bc0c3 |
-
|
|
|
6bc0c3 |
- # 3) If a string contains any RandALCat character, a
|
|
|
6bc0c3 |
- # RandALCat character MUST be the first character of the
|
|
|
6bc0c3 |
- # string, and a RandALCat character MUST be the last
|
|
|
6bc0c3 |
- # character of the string.
|
|
|
6bc0c3 |
- if not RandAL[0] or not RandAL[-1]:
|
|
|
6bc0c3 |
- raise UnicodeError("Violation of BIDI requirement 3")
|
|
|
6bc0c3 |
+ if any(RandAL):
|
|
|
6bc0c3 |
+ # There is a RandAL char in the string. Must perform further
|
|
|
6bc0c3 |
+ # tests:
|
|
|
6bc0c3 |
+ # 1) The characters in section 5.8 MUST be prohibited.
|
|
|
6bc0c3 |
+ # This is table C.8, which was already checked
|
|
|
6bc0c3 |
+ # 2) If a string contains any RandALCat character, the string
|
|
|
6bc0c3 |
+ # MUST NOT contain any LCat character.
|
|
|
6bc0c3 |
+ if any(stringprep.in_table_d2(x) for x in label):
|
|
|
6bc0c3 |
+ raise UnicodeError("Violation of BIDI requirement 2")
|
|
|
6bc0c3 |
+ # 3) If a string contains any RandALCat character, a
|
|
|
6bc0c3 |
+ # RandALCat character MUST be the first character of the
|
|
|
6bc0c3 |
+ # string, and a RandALCat character MUST be the last
|
|
|
6bc0c3 |
+ # character of the string.
|
|
|
6bc0c3 |
+ if not RandAL[0] or not RandAL[-1]:
|
|
|
6bc0c3 |
+ raise UnicodeError("Violation of BIDI requirement 3")
|
|
|
6bc0c3 |
|
|
|
6bc0c3 |
return label
|
|
|
6bc0c3 |
|
|
|
6bc0c3 |
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
|
|
|
6bc0c3 |
index fc50e70df24b..3520cc00a1a4 100644
|
|
|
6bc0c3 |
--- a/Lib/test/test_codecs.py
|
|
|
6bc0c3 |
+++ b/Lib/test/test_codecs.py
|
|
|
6bc0c3 |
@@ -1532,6 +1532,12 @@ def test_builtin_encode(self):
|
|
|
6bc0c3 |
self.assertEqual("pyth\xf6n.org".encode("idna"), b"xn--pythn-mua.org")
|
|
|
6bc0c3 |
self.assertEqual("pyth\xf6n.org.".encode("idna"), b"xn--pythn-mua.org.")
|
|
|
6bc0c3 |
|
|
|
6bc0c3 |
+ def test_builtin_decode_length_limit(self):
|
|
|
6bc0c3 |
+ with self.assertRaisesRegex(UnicodeError, "too long"):
|
|
|
6bc0c3 |
+ (b"xn--016c"+b"a"*1100).decode("idna")
|
|
|
6bc0c3 |
+ with self.assertRaisesRegex(UnicodeError, "too long"):
|
|
|
6bc0c3 |
+ (b"xn--016c"+b"a"*70).decode("idna")
|
|
|
6bc0c3 |
+
|
|
|
6bc0c3 |
def test_stream(self):
|
|
|
6bc0c3 |
r = codecs.getreader("idna")(io.BytesIO(b"abc"))
|
|
|
6bc0c3 |
r.read(3)
|
|
|
6bc0c3 |
diff --git a/Misc/NEWS.d/next/Security/2022-11-04-09-29-36.gh-issue-98433.l76c5G.rst b/Misc/NEWS.d/next/Security/2022-11-04-09-29-36.gh-issue-98433.l76c5G.rst
|
|
|
6bc0c3 |
new file mode 100644
|
|
|
6bc0c3 |
index 000000000000..5185fac2e29d
|
|
|
6bc0c3 |
--- /dev/null
|
|
|
6bc0c3 |
+++ b/Misc/NEWS.d/next/Security/2022-11-04-09-29-36.gh-issue-98433.l76c5G.rst
|
|
|
6bc0c3 |
@@ -0,0 +1,6 @@
|
|
|
6bc0c3 |
+The IDNA codec decoder used on DNS hostnames by :mod:`socket` or :mod:`asyncio`
|
|
|
6bc0c3 |
+related name resolution functions no longer involves a quadratic algorithm.
|
|
|
6bc0c3 |
+This prevents a potential CPU denial of service if an out-of-spec excessive
|
|
|
6bc0c3 |
+length hostname involving bidirectional characters were decoded. Some protocols
|
|
|
6bc0c3 |
+such as :mod:`urllib` http ``3xx`` redirects potentially allow for an attacker
|
|
|
6bc0c3 |
+to supply such a name.
|