diff --git a/SOURCES/00378-support-expat-2-4-5.patch b/SOURCES/00378-support-expat-2-4-5.patch new file mode 100644 index 0000000..83b20fb --- /dev/null +++ b/SOURCES/00378-support-expat-2-4-5.patch @@ -0,0 +1,101 @@ +From 3950e203a4c625b7bc53e67e96d5d5239758f4fa Mon Sep 17 00:00:00 2001 +From: "Miss Islington (bot)" + <31488909+miss-islington@users.noreply.github.com> +Date: Mon, 21 Feb 2022 08:16:23 -0800 +Subject: [PATCH] bpo-46811: Make test suite support Expat >=2.4.5 (GH-31453) + (GH-31469) + +Curly brackets were never allowed in namespace URIs +according to RFC 3986, and so-called namespace-validating +XML parsers have the right to reject them a invalid URIs. + +libexpat >=2.4.5 has become strcter in that regard due to +related security issues; with ET.XML instantiating a +namespace-aware parser under the hood, this test has no +future in CPython. + +References: +- https://datatracker.ietf.org/doc/html/rfc3968 +- https://www.w3.org/TR/xml-names/ + +Also, test_minidom.py: Support Expat >=2.4.5 +(cherry picked from commit 2cae93832f46b245847bdc252456ddf7742ef45e) + +Co-authored-by: Sebastian Pipping + +Co-authored-by: Sebastian Pipping +--- + Lib/test/test_minidom.py | 12 +++++++++--- + Lib/test/test_xml_etree.py | 6 ------ + .../Library/2022-02-20-21-03-31.bpo-46811.8BxgdQ.rst | 1 + + 3 files changed, 10 insertions(+), 9 deletions(-) + create mode 100644 Misc/NEWS.d/next/Library/2022-02-20-21-03-31.bpo-46811.8BxgdQ.rst + +diff --git a/Lib/test/test_minidom.py b/Lib/test/test_minidom.py +index 1663b1f..5f52ed1 100644 +--- a/Lib/test/test_minidom.py ++++ b/Lib/test/test_minidom.py +@@ -6,10 +6,12 @@ import io + from test import support + import unittest + ++import pyexpat + import xml.dom.minidom + + from xml.dom.minidom import parse, Node, Document, parseString + from xml.dom.minidom import getDOMImplementation ++from xml.parsers.expat import ExpatError + + + tstfile = support.findfile("test.xml", subdir="xmltestdata") +@@ -1147,8 +1149,10 @@ class MinidomTest(unittest.TestCase): + + # Verify that character decoding errors raise exceptions instead + # of crashing +- self.assertRaises(UnicodeDecodeError, parseString, +- b'Comment \xe7a va ? Tr\xe8s bien ?') ++ self.assertRaises(ExpatError, parseString, ++ b'') ++ self.assertRaises(ExpatError, parseString, ++ b'Comment \xe7a va ? Tr\xe8s bien ?') + + doc.unlink() + +@@ -1609,7 +1613,9 @@ class MinidomTest(unittest.TestCase): + self.confirm(doc2.namespaceURI == xml.dom.EMPTY_NAMESPACE) + + def testExceptionOnSpacesInXMLNSValue(self): +- with self.assertRaisesRegex(ValueError, 'Unsupported syntax'): ++ context = self.assertRaisesRegex(ExpatError, 'syntax error') ++ ++ with context: + parseString('') + + def testDocRemoveChild(self): +diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py +index 23c4cd5..142ce2c 100644 +--- a/Lib/test/test_xml_etree.py ++++ b/Lib/test/test_xml_etree.py +@@ -2159,12 +2159,6 @@ class BugsTest(unittest.TestCase): + b"\n" + b'tãg') + +- def test_issue3151(self): +- e = ET.XML('') +- self.assertEqual(e.tag, '{${stuff}}localname') +- t = ET.ElementTree(e) +- self.assertEqual(ET.tostring(e), b'') +- + def test_issue6565(self): + elem = ET.XML("") + self.assertEqual(summarize_list(elem), ['tag']) +diff --git a/Misc/NEWS.d/next/Library/2022-02-20-21-03-31.bpo-46811.8BxgdQ.rst b/Misc/NEWS.d/next/Library/2022-02-20-21-03-31.bpo-46811.8BxgdQ.rst +new file mode 100644 +index 0000000..6969bd1 +--- /dev/null ++++ b/Misc/NEWS.d/next/Library/2022-02-20-21-03-31.bpo-46811.8BxgdQ.rst +@@ -0,0 +1 @@ ++Make test suite support Expat >=2.4.5 +-- +2.37.3 + diff --git a/SOURCES/00387-cve-2020-10735-prevent-dos-by-very-large-int.patch b/SOURCES/00387-cve-2020-10735-prevent-dos-by-very-large-int.patch new file mode 100644 index 0000000..5c4933b --- /dev/null +++ b/SOURCES/00387-cve-2020-10735-prevent-dos-by-very-large-int.patch @@ -0,0 +1,1499 @@ +From 2b6758f55e3b0a4141a54f3c35a0d0cd377ce3cf Mon Sep 17 00:00:00 2001 +From: "Gregory P. Smith" +Date: Mon, 5 Sep 2022 02:21:03 -0700 +Subject: [PATCH] gh-95778: CVE-2020-10735: Prevent DoS by very large int() + (#96502) + +* Correctly pre-check for int-to-str conversion (#96537) + +Converting a large enough `int` to a decimal string raises `ValueError` as expected. However, the raise comes _after_ the quadratic-time base-conversion algorithm has run to completion. For effective DOS prevention, we need some kind of check before entering the quadratic-time loop. Oops! =) + +The quick fix: essentially we catch _most_ values that exceed the threshold up front. Those that slip through will still be on the small side (read: sufficiently fast), and will get caught by the existing check so that the limit remains exact. + +The justification for the current check. The C code check is: +```c +max_str_digits / (3 * PyLong_SHIFT) <= (size_a - 11) / 10 +``` + +In GitHub markdown math-speak, writing $M$ for `max_str_digits`, $L$ for `PyLong_SHIFT` and $s$ for `size_a`, that check is: +$$\left\lfloor\frac{M}{3L}\right\rfloor \le \left\lfloor\frac{s - 11}{10}\right\rfloor$$ + +From this it follows that +$$\frac{M}{3L} < \frac{s-1}{10}$$ +hence that +$$\frac{L(s-1)}{M} > \frac{10}{3} > \log_2(10).$$ +So +$$2^{L(s-1)} > 10^M.$$ +But our input integer $a$ satisfies $|a| \ge 2^{L(s-1)}$, so $|a|$ is larger than $10^M$. This shows that we don't accidentally capture anything _below_ the intended limit in the check. + + +* Issue: gh-95778 + + +Co-authored-by: Gregory P. Smith [Google LLC] +Co-authored-by: Christian Heimes +Co-authored-by: Mark Dickinson +--- + Doc/data/python3.9.abi | 5 +- + Doc/library/functions.rst | 8 + + Doc/library/json.rst | 11 + + Doc/library/stdtypes.rst | 159 ++++++++++++++ + Doc/library/sys.rst | 59 ++++-- + Doc/library/test.rst | 10 + + Doc/using/cmdline.rst | 13 ++ + Doc/whatsnew/3.9.rst | 14 ++ + Include/internal/pycore_initconfig.h | 2 + + Include/internal/pycore_interp.h | 2 + + Include/internal/pycore_long.h | 49 +++++ + Lib/test/support/__init__.py | 11 + + Lib/test/test_ast.py | 8 + + Lib/test/test_cmd_line.py | 33 +++ + Lib/test/test_compile.py | 13 ++ + Lib/test/test_decimal.py | 18 ++ + Lib/test/test_int.py | 196 ++++++++++++++++++ + Lib/test/test_json/test_decode.py | 9 + + Lib/test/test_sys.py | 11 +- + Lib/test/test_xmlrpc.py | 10 + + Makefile.pre.in | 1 + + ...08-07-16-53-38.gh-issue-95778.ch010gps.rst | 14 ++ + Objects/longobject.c | 65 +++++- + Parser/pegen/pegen.c | 18 ++ + Python/clinic/sysmodule.c.h | 60 +++++- + Python/initconfig.c | 60 ++++++ + Python/sysmodule.c | 46 +++- + 27 files changed, 886 insertions(+), 19 deletions(-) + create mode 100644 Include/internal/pycore_long.h + create mode 100644 Misc/NEWS.d/next/Security/2022-08-07-16-53-38.gh-issue-95778.ch010gps.rst + +diff --git a/Doc/data/python3.9.abi b/Doc/data/python3.9.abi +index e203743..cca9779 100644 +--- a/Doc/data/python3.9.abi ++++ b/Doc/data/python3.9.abi +@@ -5653,7 +5653,7 @@ + + + +- ++ + + + +@@ -5774,6 +5774,9 @@ + + + ++ ++ ++ + + + +diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst +index 8df557e..820b313 100644 +--- a/Doc/library/functions.rst ++++ b/Doc/library/functions.rst +@@ -844,6 +844,14 @@ are always available. They are listed here in alphabetical order. + .. versionchanged:: 3.8 + Falls back to :meth:`__index__` if :meth:`__int__` is not defined. + ++ .. versionchanged:: 3.9.14 ++ :class:`int` string inputs and string representations can be limited to ++ help avoid denial of service attacks. A :exc:`ValueError` is raised when ++ the limit is exceeded while converting a string *x* to an :class:`int` or ++ when converting an :class:`int` into a string would exceed the limit. ++ See the :ref:`integer string conversion length limitation ++ ` documentation. ++ + + .. function:: isinstance(object, classinfo) + +diff --git a/Doc/library/json.rst b/Doc/library/json.rst +index 1810e04..6b715b5 100644 +--- a/Doc/library/json.rst ++++ b/Doc/library/json.rst +@@ -18,6 +18,11 @@ is a lightweight data interchange format inspired by + `JavaScript `_ object literal syntax + (although it is not a strict subset of JavaScript [#rfc-errata]_ ). + ++.. warning:: ++ Be cautious when parsing JSON data from untrusted sources. A malicious ++ JSON string may cause the decoder to consume considerable CPU and memory ++ resources. Limiting the size of data to be parsed is recommended. ++ + :mod:`json` exposes an API familiar to users of the standard library + :mod:`marshal` and :mod:`pickle` modules. + +@@ -255,6 +260,12 @@ Basic Usage + be used to use another datatype or parser for JSON integers + (e.g. :class:`float`). + ++ .. versionchanged:: 3.9.14 ++ The default *parse_int* of :func:`int` now limits the maximum length of ++ the integer string via the interpreter's :ref:`integer string ++ conversion length limitation ` to help avoid denial ++ of service attacks. ++ + *parse_constant*, if specified, will be called with one of the following + strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. + This can be used to raise an exception if invalid JSON numbers +diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst +index bfa0e74..f88e81f 100644 +--- a/Doc/library/stdtypes.rst ++++ b/Doc/library/stdtypes.rst +@@ -5206,6 +5206,165 @@ types, where they are relevant. Some of these are not reported by the + [] + + ++.. _int_max_str_digits: ++ ++Integer string conversion length limitation ++=========================================== ++ ++CPython has a global limit for converting between :class:`int` and :class:`str` ++to mitigate denial of service attacks. This limit *only* applies to decimal or ++other non-power-of-two number bases. Hexadecimal, octal, and binary conversions ++are unlimited. The limit can be configured. ++ ++The :class:`int` type in CPython is an abitrary length number stored in binary ++form (commonly known as a "bignum"). There exists no algorithm that can convert ++a string to a binary integer or a binary integer to a string in linear time, ++*unless* the base is a power of 2. Even the best known algorithms for base 10 ++have sub-quadratic complexity. Converting a large value such as ``int('1' * ++500_000)`` can take over a second on a fast CPU. ++ ++Limiting conversion size offers a practical way to avoid `CVE-2020-10735 ++`_. ++ ++The limit is applied to the number of digit characters in the input or output ++string when a non-linear conversion algorithm would be involved. Underscores ++and the sign are not counted towards the limit. ++ ++When an operation would exceed the limit, a :exc:`ValueError` is raised: ++ ++.. doctest:: ++ ++ >>> import sys ++ >>> sys.set_int_max_str_digits(4300) # Illustrative, this is the default. ++ >>> _ = int('2' * 5432) ++ Traceback (most recent call last): ++ ... ++ ValueError: Exceeds the limit (4300) for integer string conversion: value has 5432 digits. ++ >>> i = int('2' * 4300) ++ >>> len(str(i)) ++ 4300 ++ >>> i_squared = i*i ++ >>> len(str(i_squared)) ++ Traceback (most recent call last): ++ ... ++ ValueError: Exceeds the limit (4300) for integer string conversion: value has 8599 digits. ++ >>> len(hex(i_squared)) ++ 7144 ++ >>> assert int(hex(i_squared), base=16) == i*i # Hexadecimal is unlimited. ++ ++The default limit is 4300 digits as provided in ++:data:`sys.int_info.default_max_str_digits `. ++The lowest limit that can be configured is 640 digits as provided in ++:data:`sys.int_info.str_digits_check_threshold `. ++ ++Verification: ++ ++.. doctest:: ++ ++ >>> import sys ++ >>> assert sys.int_info.default_max_str_digits == 4300, sys.int_info ++ >>> assert sys.int_info.str_digits_check_threshold == 640, sys.int_info ++ >>> msg = int('578966293710682886880994035146873798396722250538762761564' ++ ... '9252925514383915483333812743580549779436104706260696366600' ++ ... '571186405732').to_bytes(53, 'big') ++ ... ++ ++.. versionadded:: 3.9.14 ++ ++Affected APIs ++------------- ++ ++The limitation only applies to potentially slow conversions between :class:`int` ++and :class:`str` or :class:`bytes`: ++ ++* ``int(string)`` with default base 10. ++* ``int(string, base)`` for all bases that are not a power of 2. ++* ``str(integer)``. ++* ``repr(integer)`` ++* any other string conversion to base 10, for example ``f"{integer}"``, ++ ``"{}".format(integer)``, or ``b"%d" % integer``. ++ ++The limitations do not apply to functions with a linear algorithm: ++ ++* ``int(string, base)`` with base 2, 4, 8, 16, or 32. ++* :func:`int.from_bytes` and :func:`int.to_bytes`. ++* :func:`hex`, :func:`oct`, :func:`bin`. ++* :ref:`formatspec` for hex, octal, and binary numbers. ++* :class:`str` to :class:`float`. ++* :class:`str` to :class:`decimal.Decimal`. ++ ++Configuring the limit ++--------------------- ++ ++Before Python starts up you can use an environment variable or an interpreter ++command line flag to configure the limit: ++ ++* :envvar:`PYTHONINTMAXSTRDIGITS`, e.g. ++ ``PYTHONINTMAXSTRDIGITS=640 python3`` to set the limit to 640 or ++ ``PYTHONINTMAXSTRDIGITS=0 python3`` to disable the limitation. ++* :option:`-X int_max_str_digits <-X>`, e.g. ++ ``python3 -X int_max_str_digits=640`` ++* :data:`sys.flags.int_max_str_digits` contains the value of ++ :envvar:`PYTHONINTMAXSTRDIGITS` or :option:`-X int_max_str_digits <-X>`. ++ If both the env var and the ``-X`` option are set, the ``-X`` option takes ++ precedence. A value of *-1* indicates that both were unset, thus a value of ++ :data:`sys.int_info.default_max_str_digits` was used during initilization. ++ ++From code, you can inspect the current limit and set a new one using these ++:mod:`sys` APIs: ++ ++* :func:`sys.get_int_max_str_digits` and :func:`sys.set_int_max_str_digits` are ++ a getter and setter for the interpreter-wide limit. Subinterpreters have ++ their own limit. ++ ++Information about the default and minimum can be found in :attr:`sys.int_info`: ++ ++* :data:`sys.int_info.default_max_str_digits ` is the compiled-in ++ default limit. ++* :data:`sys.int_info.str_digits_check_threshold ` is the lowest ++ accepted value for the limit (other than 0 which disables it). ++ ++.. versionadded:: 3.9.14 ++ ++.. caution:: ++ ++ Setting a low limit *can* lead to problems. While rare, code exists that ++ contains integer constants in decimal in their source that exceed the ++ minimum threshold. A consequence of setting the limit is that Python source ++ code containing decimal integer literals longer than the limit will ++ encounter an error during parsing, usually at startup time or import time or ++ even at installation time - anytime an up to date ``.pyc`` does not already ++ exist for the code. A workaround for source that contains such large ++ constants is to convert them to ``0x`` hexadecimal form as it has no limit. ++ ++ Test your application thoroughly if you use a low limit. Ensure your tests ++ run with the limit set early via the environment or flag so that it applies ++ during startup and even during any installation step that may invoke Python ++ to precompile ``.py`` sources to ``.pyc`` files. ++ ++Recommended configuration ++------------------------- ++ ++The default :data:`sys.int_info.default_max_str_digits` is expected to be ++reasonable for most applications. If your application requires a different ++limit, set it from your main entry point using Python version agnostic code as ++these APIs were added in security patch releases in versions before 3.11. ++ ++Example:: ++ ++ >>> import sys ++ >>> if hasattr(sys, "set_int_max_str_digits"): ++ ... upper_bound = 68000 ++ ... lower_bound = 4004 ++ ... current_limit = sys.get_int_max_str_digits() ++ ... if current_limit == 0 or current_limit > upper_bound: ++ ... sys.set_int_max_str_digits(upper_bound) ++ ... elif current_limit < lower_bound: ++ ... sys.set_int_max_str_digits(lower_bound) ++ ++If you need to disable it entirely, set it to ``0``. ++ ++ + .. rubric:: Footnotes + + .. [1] Additional information on these special methods may be found in the Python +diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst +index 9e18282..9b98bc5 100644 +--- a/Doc/library/sys.rst ++++ b/Doc/library/sys.rst +@@ -445,9 +445,9 @@ always available. + The :term:`named tuple` *flags* exposes the status of command line + flags. The attributes are read only. + +- ============================= ================================================================ ++ ============================= ============================================================================================================== + attribute flag +- ============================= ================================================================ ++ ============================= ============================================================================================================== + :const:`debug` :option:`-d` + :const:`inspect` :option:`-i` + :const:`interactive` :option:`-i` +@@ -463,7 +463,8 @@ always available. + :const:`hash_randomization` :option:`-R` + :const:`dev_mode` :option:`-X dev <-X>` (:ref:`Python Development Mode `) + :const:`utf8_mode` :option:`-X utf8 <-X>` +- ============================= ================================================================ ++ :const:`int_max_str_digits` :option:`-X int_max_str_digits <-X>` (:ref:`integer string conversion length limitation `) ++ ============================= ============================================================================================================== + + .. versionchanged:: 3.2 + Added ``quiet`` attribute for the new :option:`-q` flag. +@@ -482,6 +483,9 @@ always available. + Mode ` and the ``utf8_mode`` attribute for the new :option:`-X` + ``utf8`` flag. + ++ .. versionchanged:: 3.9.14 ++ Added the ``int_max_str_digits`` attribute. ++ + + .. data:: float_info + +@@ -660,6 +664,15 @@ always available. + + .. versionadded:: 3.6 + ++ ++.. function:: get_int_max_str_digits() ++ ++ Returns the current value for the :ref:`integer string conversion length ++ limitation `. See also :func:`set_int_max_str_digits`. ++ ++ .. versionadded:: 3.9.14 ++ ++ + .. function:: getrefcount(object) + + Return the reference count of the *object*. The count returned is generally one +@@ -933,19 +946,31 @@ always available. + + .. tabularcolumns:: |l|L| + +- +-------------------------+----------------------------------------------+ +- | Attribute | Explanation | +- +=========================+==============================================+ +- | :const:`bits_per_digit` | number of bits held in each digit. Python | +- | | integers are stored internally in base | +- | | ``2**int_info.bits_per_digit`` | +- +-------------------------+----------------------------------------------+ +- | :const:`sizeof_digit` | size in bytes of the C type used to | +- | | represent a digit | +- +-------------------------+----------------------------------------------+ ++ +----------------------------------------+-----------------------------------------------+ ++ | Attribute | Explanation | ++ +========================================+===============================================+ ++ | :const:`bits_per_digit` | number of bits held in each digit. Python | ++ | | integers are stored internally in base | ++ | | ``2**int_info.bits_per_digit`` | ++ +----------------------------------------+-----------------------------------------------+ ++ | :const:`sizeof_digit` | size in bytes of the C type used to | ++ | | represent a digit | ++ +----------------------------------------+-----------------------------------------------+ ++ | :const:`default_max_str_digits` | default value for | ++ | | :func:`sys.get_int_max_str_digits` when it | ++ | | is not otherwise explicitly configured. | ++ +----------------------------------------+-----------------------------------------------+ ++ | :const:`str_digits_check_threshold` | minimum non-zero value for | ++ | | :func:`sys.set_int_max_str_digits`, | ++ | | :envvar:`PYTHONINTMAXSTRDIGITS`, or | ++ | | :option:`-X int_max_str_digits <-X>`. | ++ +----------------------------------------+-----------------------------------------------+ + + .. versionadded:: 3.1 + ++ .. versionchanged:: 3.9.14 ++ Added ``default_max_str_digits`` and ``str_digits_check_threshold``. ++ + + .. data:: __interactivehook__ + +@@ -1223,6 +1248,14 @@ always available. + + .. availability:: Unix. + ++.. function:: set_int_max_str_digits(n) ++ ++ Set the :ref:`integer string conversion length limitation ++ ` used by this interpreter. See also ++ :func:`get_int_max_str_digits`. ++ ++ .. versionadded:: 3.9.14 ++ + .. function:: setprofile(profilefunc) + + .. index:: +diff --git a/Doc/library/test.rst b/Doc/library/test.rst +index 16f908c..563197f 100644 +--- a/Doc/library/test.rst ++++ b/Doc/library/test.rst +@@ -1302,6 +1302,16 @@ The :mod:`test.support` module defines the following functions: + .. versionadded:: 3.6 + + ++.. function:: adjust_int_max_str_digits(max_digits) ++ ++ This function returns a context manager that will change the global ++ :func:`sys.set_int_max_str_digits` setting for the duration of the ++ context to allow execution of test code that needs a different limit ++ on the number of digits when converting between an integer and string. ++ ++ .. versionadded:: 3.9.14 ++ ++ + The :mod:`test.support` module defines the following classes: + + .. class:: TransientResource(exc, **kwargs) +diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst +index 5739388..66d8d57 100644 +--- a/Doc/using/cmdline.rst ++++ b/Doc/using/cmdline.rst +@@ -436,6 +436,9 @@ Miscellaneous options + stored in a traceback of a trace. Use ``-X tracemalloc=NFRAME`` to start + tracing with a traceback limit of *NFRAME* frames. See the + :func:`tracemalloc.start` for more information. ++ * ``-X int_max_str_digits`` configures the :ref:`integer string conversion ++ length limitation `. See also ++ :envvar:`PYTHONINTMAXSTRDIGITS`. + * ``-X importtime`` to show how long each import takes. It shows module + name, cumulative time (including nested imports) and self time (excluding + nested imports). Note that its output may be broken in multi-threaded +@@ -480,6 +483,9 @@ Miscellaneous options + + The ``-X showalloccount`` option has been removed. + ++ .. versionadded:: 3.9.14 ++ The ``-X int_max_str_digits`` option. ++ + .. deprecated-removed:: 3.9 3.10 + The ``-X oldparser`` option. + +@@ -659,6 +665,13 @@ conflict. + + .. versionadded:: 3.2.3 + ++.. envvar:: PYTHONINTMAXSTRDIGITS ++ ++ If this variable is set to an integer, it is used to configure the ++ interpreter's global :ref:`integer string conversion length limitation ++ `. ++ ++ .. versionadded:: 3.9.14 + + .. envvar:: PYTHONIOENCODING + +diff --git a/Doc/whatsnew/3.9.rst b/Doc/whatsnew/3.9.rst +index 0662adb..2270d3e 100644 +--- a/Doc/whatsnew/3.9.rst ++++ b/Doc/whatsnew/3.9.rst +@@ -1570,3 +1570,17 @@ URL by the parser in :mod:`urllib.parse` preventing such attacks. The removal + characters are controlled by a new module level variable + ``urllib.parse._UNSAFE_URL_BYTES_TO_REMOVE``. (See :issue:`43882`) + ++Notable security feature in 3.9.14 ++================================== ++ ++Converting between :class:`int` and :class:`str` in bases other than 2 ++(binary), 4, 8 (octal), 16 (hexadecimal), or 32 such as base 10 (decimal) ++now raises a :exc:`ValueError` if the number of digits in string form is ++above a limit to avoid potential denial of service attacks due to the ++algorithmic complexity. This is a mitigation for `CVE-2020-10735 ++`_. ++This limit can be configured or disabled by environment variable, command ++line flag, or :mod:`sys` APIs. See the :ref:`integer string conversion ++length limitation ` documentation. The default limit ++is 4300 digits in string form. ++ +diff --git a/Include/internal/pycore_initconfig.h b/Include/internal/pycore_initconfig.h +index 457a005..ad1b7e5 100644 +--- a/Include/internal/pycore_initconfig.h ++++ b/Include/internal/pycore_initconfig.h +@@ -156,6 +156,8 @@ extern PyStatus _PyConfig_SetPyArgv( + PyConfig *config, + const _PyArgv *args); + ++extern int _Py_global_config_int_max_str_digits; ++ + + /* --- Function used for testing ---------------------------------- */ + +diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h +index 551ad83..304d704 100644 +--- a/Include/internal/pycore_interp.h ++++ b/Include/internal/pycore_interp.h +@@ -154,6 +154,8 @@ struct _is { + */ + PyLongObject* small_ints[_PY_NSMALLNEGINTS + _PY_NSMALLPOSINTS]; + #endif ++ ++ int int_max_str_digits; + }; + + /* Used by _PyImport_Cleanup() */ +diff --git a/Include/internal/pycore_long.h b/Include/internal/pycore_long.h +new file mode 100644 +index 0000000..ae04332 +--- /dev/null ++++ b/Include/internal/pycore_long.h +@@ -0,0 +1,49 @@ ++#ifndef Py_INTERNAL_LONG_H ++#define Py_INTERNAL_LONG_H ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++#ifndef Py_BUILD_CORE ++# error "this header requires Py_BUILD_CORE define" ++#endif ++ ++/* ++ * Default int base conversion size limitation: Denial of Service prevention. ++ * ++ * Chosen such that this isn't wildly slow on modern hardware and so that ++ * everyone's existing deployed numpy test suite passes before ++ * https://github.com/numpy/numpy/issues/22098 is widely available. ++ * ++ * $ python -m timeit -s 's = "1"*4300' 'int(s)' ++ * 2000 loops, best of 5: 125 usec per loop ++ * $ python -m timeit -s 's = "1"*4300; v = int(s)' 'str(v)' ++ * 1000 loops, best of 5: 311 usec per loop ++ * (zen2 cloud VM) ++ * ++ * 4300 decimal digits fits a ~14284 bit number. ++ */ ++#define _PY_LONG_DEFAULT_MAX_STR_DIGITS 4300 ++/* ++ * Threshold for max digits check. For performance reasons int() and ++ * int.__str__() don't checks values that are smaller than this ++ * threshold. Acts as a guaranteed minimum size limit for bignums that ++ * applications can expect from CPython. ++ * ++ * % python -m timeit -s 's = "1"*640; v = int(s)' 'str(int(s))' ++ * 20000 loops, best of 5: 12 usec per loop ++ * ++ * "640 digits should be enough for anyone." - gps ++ * fits a ~2126 bit decimal number. ++ */ ++#define _PY_LONG_MAX_STR_DIGITS_THRESHOLD 640 ++ ++#if ((_PY_LONG_DEFAULT_MAX_STR_DIGITS != 0) && \ ++ (_PY_LONG_DEFAULT_MAX_STR_DIGITS < _PY_LONG_MAX_STR_DIGITS_THRESHOLD)) ++# error "_PY_LONG_DEFAULT_MAX_STR_DIGITS smaller than threshold." ++#endif ++ ++#ifdef __cplusplus ++} ++#endif ++#endif /* !Py_INTERNAL_LONG_H */ +diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py +index 11818ac..e5d6e2b 100644 +--- a/Lib/test/support/__init__.py ++++ b/Lib/test/support/__init__.py +@@ -3251,6 +3251,17 @@ def clear_ignored_deprecations(*tokens: object) -> None: + warnings._filters_mutated() + + ++@contextlib.contextmanager ++def adjust_int_max_str_digits(max_digits): ++ """Temporarily change the integer string conversion length limit.""" ++ current = sys.get_int_max_str_digits() ++ try: ++ sys.set_int_max_str_digits(max_digits) ++ yield ++ finally: ++ sys.set_int_max_str_digits(current) ++ ++ + def fails_in_fips_mode(expected_error): + import _hashlib + if _hashlib.get_fips_mode(): +diff --git a/Lib/test/test_ast.py b/Lib/test/test_ast.py +index c3e3be6..a048d38 100644 +--- a/Lib/test/test_ast.py ++++ b/Lib/test/test_ast.py +@@ -978,6 +978,14 @@ Module( + self.assertRaises(ValueError, ast.literal_eval, '+True') + self.assertRaises(ValueError, ast.literal_eval, '2+3') + ++ def test_literal_eval_str_int_limit(self): ++ with support.adjust_int_max_str_digits(4000): ++ ast.literal_eval('3'*4000) # no error ++ with self.assertRaises(SyntaxError) as err_ctx: ++ ast.literal_eval('3'*4001) ++ self.assertIn('Exceeds the limit ', str(err_ctx.exception)) ++ self.assertIn(' Consider hexadecimal ', str(err_ctx.exception)) ++ + def test_literal_eval_complex(self): + # Issue #4907 + self.assertEqual(ast.literal_eval('6j'), 6j) +diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py +index 712d861..7ad9263 100644 +--- a/Lib/test/test_cmd_line.py ++++ b/Lib/test/test_cmd_line.py +@@ -804,6 +804,39 @@ class CmdLineTest(unittest.TestCase): + self.assertTrue(proc.stderr.startswith(err_msg), proc.stderr) + self.assertNotEqual(proc.returncode, 0) + ++ def test_int_max_str_digits(self): ++ code = "import sys; print(sys.flags.int_max_str_digits, sys.get_int_max_str_digits())" ++ ++ assert_python_failure('-X', 'int_max_str_digits', '-c', code) ++ assert_python_failure('-X', 'int_max_str_digits=foo', '-c', code) ++ assert_python_failure('-X', 'int_max_str_digits=100', '-c', code) ++ ++ assert_python_failure('-c', code, PYTHONINTMAXSTRDIGITS='foo') ++ assert_python_failure('-c', code, PYTHONINTMAXSTRDIGITS='100') ++ ++ def res2int(res): ++ out = res.out.strip().decode("utf-8") ++ return tuple(int(i) for i in out.split()) ++ ++ res = assert_python_ok('-c', code) ++ self.assertEqual(res2int(res), (-1, sys.get_int_max_str_digits())) ++ res = assert_python_ok('-X', 'int_max_str_digits=0', '-c', code) ++ self.assertEqual(res2int(res), (0, 0)) ++ res = assert_python_ok('-X', 'int_max_str_digits=4000', '-c', code) ++ self.assertEqual(res2int(res), (4000, 4000)) ++ res = assert_python_ok('-X', 'int_max_str_digits=100000', '-c', code) ++ self.assertEqual(res2int(res), (100000, 100000)) ++ ++ res = assert_python_ok('-c', code, PYTHONINTMAXSTRDIGITS='0') ++ self.assertEqual(res2int(res), (0, 0)) ++ res = assert_python_ok('-c', code, PYTHONINTMAXSTRDIGITS='4000') ++ self.assertEqual(res2int(res), (4000, 4000)) ++ res = assert_python_ok( ++ '-X', 'int_max_str_digits=6000', '-c', code, ++ PYTHONINTMAXSTRDIGITS='4000' ++ ) ++ self.assertEqual(res2int(res), (6000, 6000)) ++ + + @unittest.skipIf(interpreter_requires_environment(), + 'Cannot run -I tests when PYTHON env vars are required.') +diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py +index 55716fd..ec776b9 100644 +--- a/Lib/test/test_compile.py ++++ b/Lib/test/test_compile.py +@@ -189,6 +189,19 @@ if 1: + self.assertEqual(eval("0o777"), 511) + self.assertEqual(eval("-0o0000010"), -8) + ++ def test_int_literals_too_long(self): ++ n = 3000 ++ source = f"a = 1\nb = 2\nc = {'3'*n}\nd = 4" ++ with support.adjust_int_max_str_digits(n): ++ compile(source, "", "exec") # no errors. ++ with support.adjust_int_max_str_digits(n-1): ++ with self.assertRaises(SyntaxError) as err_ctx: ++ compile(source, "", "exec") ++ exc = err_ctx.exception ++ self.assertEqual(exc.lineno, 3) ++ self.assertIn('Exceeds the limit ', str(exc)) ++ self.assertIn(' Consider hexadecimal ', str(exc)) ++ + def test_unary_minus(self): + # Verify treatment of unary minus on negative numbers SF bug #660455 + if sys.maxsize == 2147483647: +diff --git a/Lib/test/test_decimal.py b/Lib/test/test_decimal.py +index 3f30a93..c992815 100644 +--- a/Lib/test/test_decimal.py ++++ b/Lib/test/test_decimal.py +@@ -2462,6 +2462,15 @@ class CUsabilityTest(UsabilityTest): + class PyUsabilityTest(UsabilityTest): + decimal = P + ++ def setUp(self): ++ super().setUp() ++ self._previous_int_limit = sys.get_int_max_str_digits() ++ sys.set_int_max_str_digits(7000) ++ ++ def tearDown(self): ++ sys.set_int_max_str_digits(self._previous_int_limit) ++ super().tearDown() ++ + class PythonAPItests(unittest.TestCase): + + def test_abc(self): +@@ -4519,6 +4528,15 @@ class CCoverage(Coverage): + class PyCoverage(Coverage): + decimal = P + ++ def setUp(self): ++ super().setUp() ++ self._previous_int_limit = sys.get_int_max_str_digits() ++ sys.set_int_max_str_digits(7000) ++ ++ def tearDown(self): ++ sys.set_int_max_str_digits(self._previous_int_limit) ++ super().tearDown() ++ + class PyFunctionality(unittest.TestCase): + """Extra functionality in decimal.py""" + +diff --git a/Lib/test/test_int.py b/Lib/test/test_int.py +index 6fdf52e..cbbddf5 100644 +--- a/Lib/test/test_int.py ++++ b/Lib/test/test_int.py +@@ -1,4 +1,5 @@ + import sys ++import time + + import unittest + from test import support +@@ -571,5 +572,200 @@ class IntTestCases(unittest.TestCase): + self.assertEqual(int('1_2_3_4_5_6_7', 32), 1144132807) + + ++class IntStrDigitLimitsTests(unittest.TestCase): ++ ++ int_class = int # Override this in subclasses to reuse the suite. ++ ++ def setUp(self): ++ super().setUp() ++ self._previous_limit = sys.get_int_max_str_digits() ++ sys.set_int_max_str_digits(2048) ++ ++ def tearDown(self): ++ sys.set_int_max_str_digits(self._previous_limit) ++ super().tearDown() ++ ++ def test_disabled_limit(self): ++ self.assertGreater(sys.get_int_max_str_digits(), 0) ++ self.assertLess(sys.get_int_max_str_digits(), 20_000) ++ with support.adjust_int_max_str_digits(0): ++ self.assertEqual(sys.get_int_max_str_digits(), 0) ++ i = self.int_class('1' * 20_000) ++ str(i) ++ self.assertGreater(sys.get_int_max_str_digits(), 0) ++ ++ def test_max_str_digits_edge_cases(self): ++ """Ignore the +/- sign and space padding.""" ++ int_class = self.int_class ++ maxdigits = sys.get_int_max_str_digits() ++ ++ int_class('1' * maxdigits) ++ int_class(' ' + '1' * maxdigits) ++ int_class('1' * maxdigits + ' ') ++ int_class('+' + '1' * maxdigits) ++ int_class('-' + '1' * maxdigits) ++ self.assertEqual(len(str(10 ** (maxdigits - 1))), maxdigits) ++ ++ def check(self, i, base=None): ++ with self.assertRaises(ValueError): ++ if base is None: ++ self.int_class(i) ++ else: ++ self.int_class(i, base) ++ ++ def test_max_str_digits(self): ++ maxdigits = sys.get_int_max_str_digits() ++ ++ self.check('1' * (maxdigits + 1)) ++ self.check(' ' + '1' * (maxdigits + 1)) ++ self.check('1' * (maxdigits + 1) + ' ') ++ self.check('+' + '1' * (maxdigits + 1)) ++ self.check('-' + '1' * (maxdigits + 1)) ++ self.check('1' * (maxdigits + 1)) ++ ++ i = 10 ** maxdigits ++ with self.assertRaises(ValueError): ++ str(i) ++ ++ def test_denial_of_service_prevented_int_to_str(self): ++ """Regression test: ensure we fail before performing O(N**2) work.""" ++ maxdigits = sys.get_int_max_str_digits() ++ assert maxdigits < 50_000, maxdigits # A test prerequisite. ++ get_time = time.process_time ++ if get_time() <= 0: # some platforms like WASM lack process_time() ++ get_time = time.monotonic ++ ++ huge_int = int(f'0x{"c"*65_000}', base=16) # 78268 decimal digits. ++ digits = 78_268 ++ with support.adjust_int_max_str_digits(digits): ++ start = get_time() ++ huge_decimal = str(huge_int) ++ seconds_to_convert = get_time() - start ++ self.assertEqual(len(huge_decimal), digits) ++ # Ensuring that we chose a slow enough conversion to measure. ++ # It takes 0.1 seconds on a Zen based cloud VM in an opt build. ++ if seconds_to_convert < 0.005: ++ raise unittest.SkipTest('"slow" conversion took only ' ++ f'{seconds_to_convert} seconds.') ++ ++ # We test with the limit almost at the size needed to check performance. ++ # The performant limit check is slightly fuzzy, give it a some room. ++ with support.adjust_int_max_str_digits(int(.995 * digits)): ++ with self.assertRaises(ValueError) as err: ++ start = get_time() ++ str(huge_int) ++ seconds_to_fail_huge = get_time() - start ++ self.assertIn('conversion', str(err.exception)) ++ self.assertLess(seconds_to_fail_huge, seconds_to_convert/8) ++ ++ # Now we test that a conversion that would take 30x as long also fails ++ # in a similarly fast fashion. ++ extra_huge_int = int(f'0x{"c"*500_000}', base=16) # 602060 digits. ++ with self.assertRaises(ValueError) as err: ++ start = get_time() ++ # If not limited, 8 seconds said Zen based cloud VM. ++ str(extra_huge_int) ++ seconds_to_fail_extra_huge = get_time() - start ++ self.assertIn('conversion', str(err.exception)) ++ self.assertLess(seconds_to_fail_extra_huge, seconds_to_convert/8) ++ ++ def test_denial_of_service_prevented_str_to_int(self): ++ """Regression test: ensure we fail before performing O(N**2) work.""" ++ maxdigits = sys.get_int_max_str_digits() ++ assert maxdigits < 100_000, maxdigits # A test prerequisite. ++ get_time = time.process_time ++ if get_time() <= 0: # some platforms like WASM lack process_time() ++ get_time = time.monotonic ++ ++ digits = 133700 ++ huge = '8'*digits ++ with support.adjust_int_max_str_digits(digits): ++ start = get_time() ++ int(huge) ++ seconds_to_convert = get_time() - start ++ # Ensuring that we chose a slow enough conversion to measure. ++ # It takes 0.1 seconds on a Zen based cloud VM in an opt build. ++ if seconds_to_convert < 0.005: ++ raise unittest.SkipTest('"slow" conversion took only ' ++ f'{seconds_to_convert} seconds.') ++ ++ with support.adjust_int_max_str_digits(digits - 1): ++ with self.assertRaises(ValueError) as err: ++ start = get_time() ++ int(huge) ++ seconds_to_fail_huge = get_time() - start ++ self.assertIn('conversion', str(err.exception)) ++ self.assertLess(seconds_to_fail_huge, seconds_to_convert/8) ++ ++ # Now we test that a conversion that would take 30x as long also fails ++ # in a similarly fast fashion. ++ extra_huge = '7'*1_200_000 ++ with self.assertRaises(ValueError) as err: ++ start = get_time() ++ # If not limited, 8 seconds in the Zen based cloud VM. ++ int(extra_huge) ++ seconds_to_fail_extra_huge = get_time() - start ++ self.assertIn('conversion', str(err.exception)) ++ self.assertLess(seconds_to_fail_extra_huge, seconds_to_convert/8) ++ ++ def test_power_of_two_bases_unlimited(self): ++ """The limit does not apply to power of 2 bases.""" ++ maxdigits = sys.get_int_max_str_digits() ++ ++ for base in (2, 4, 8, 16, 32): ++ with self.subTest(base=base): ++ self.int_class('1' * (maxdigits + 1), base) ++ assert maxdigits < 100_000 ++ self.int_class('1' * 100_000, base) ++ ++ def test_underscores_ignored(self): ++ maxdigits = sys.get_int_max_str_digits() ++ ++ triples = maxdigits // 3 ++ s = '111' * triples ++ s_ = '1_11' * triples ++ self.int_class(s) # succeeds ++ self.int_class(s_) # succeeds ++ self.check(f'{s}111') ++ self.check(f'{s_}_111') ++ ++ def test_sign_not_counted(self): ++ int_class = self.int_class ++ max_digits = sys.get_int_max_str_digits() ++ s = '5' * max_digits ++ i = int_class(s) ++ pos_i = int_class(f'+{s}') ++ assert i == pos_i ++ neg_i = int_class(f'-{s}') ++ assert -pos_i == neg_i ++ str(pos_i) ++ str(neg_i) ++ ++ def _other_base_helper(self, base): ++ int_class = self.int_class ++ max_digits = sys.get_int_max_str_digits() ++ s = '2' * max_digits ++ i = int_class(s, base) ++ if base > 10: ++ with self.assertRaises(ValueError): ++ str(i) ++ elif base < 10: ++ str(i) ++ with self.assertRaises(ValueError) as err: ++ int_class(f'{s}1', base) ++ ++ def test_int_from_other_bases(self): ++ base = 3 ++ with self.subTest(base=base): ++ self._other_base_helper(base) ++ base = 36 ++ with self.subTest(base=base): ++ self._other_base_helper(base) ++ ++ ++class IntSubclassStrDigitLimitsTests(IntStrDigitLimitsTests): ++ int_class = IntSubclass ++ ++ + if __name__ == "__main__": + unittest.main() +diff --git a/Lib/test/test_json/test_decode.py b/Lib/test/test_json/test_decode.py +index fdb9e62..124045b 100644 +--- a/Lib/test/test_json/test_decode.py ++++ b/Lib/test/test_json/test_decode.py +@@ -2,6 +2,7 @@ import decimal + from io import StringIO + from collections import OrderedDict + from test.test_json import PyTest, CTest ++from test import support + + + class TestDecode: +@@ -95,5 +96,13 @@ class TestDecode: + d = self.json.JSONDecoder() + self.assertRaises(ValueError, d.raw_decode, 'a'*42, -50000) + ++ def test_limit_int(self): ++ maxdigits = 5000 ++ with support.adjust_int_max_str_digits(maxdigits): ++ self.loads('1' * maxdigits) ++ with self.assertRaises(ValueError): ++ self.loads('1' * (maxdigits + 1)) ++ ++ + class TestPyDecode(TestDecode, PyTest): pass + class TestCDecode(TestDecode, CTest): pass +diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py +index 2f1e5e9..b11f1ff 100644 +--- a/Lib/test/test_sys.py ++++ b/Lib/test/test_sys.py +@@ -409,11 +409,17 @@ class SysModuleTest(unittest.TestCase): + self.assertIsInstance(sys.executable, str) + self.assertEqual(len(sys.float_info), 11) + self.assertEqual(sys.float_info.radix, 2) +- self.assertEqual(len(sys.int_info), 2) ++ self.assertEqual(len(sys.int_info), 4) + self.assertTrue(sys.int_info.bits_per_digit % 5 == 0) + self.assertTrue(sys.int_info.sizeof_digit >= 1) ++ self.assertGreaterEqual(sys.int_info.default_max_str_digits, 500) ++ self.assertGreaterEqual(sys.int_info.str_digits_check_threshold, 100) ++ self.assertGreater(sys.int_info.default_max_str_digits, ++ sys.int_info.str_digits_check_threshold) + self.assertEqual(type(sys.int_info.bits_per_digit), int) + self.assertEqual(type(sys.int_info.sizeof_digit), int) ++ self.assertIsInstance(sys.int_info.default_max_str_digits, int) ++ self.assertIsInstance(sys.int_info.str_digits_check_threshold, int) + self.assertIsInstance(sys.hexversion, int) + + self.assertEqual(len(sys.hash_info), 9) +@@ -517,7 +523,8 @@ class SysModuleTest(unittest.TestCase): + "inspect", "interactive", "optimize", + "dont_write_bytecode", "no_user_site", "no_site", + "ignore_environment", "verbose", "bytes_warning", "quiet", +- "hash_randomization", "isolated", "dev_mode", "utf8_mode") ++ "hash_randomization", "isolated", "dev_mode", "utf8_mode", ++ "int_max_str_digits") + for attr in attrs: + self.assertTrue(hasattr(sys.flags, attr), attr) + attr_type = bool if attr == "dev_mode" else int +diff --git a/Lib/test/test_xmlrpc.py b/Lib/test/test_xmlrpc.py +index f714b77..d12da2f 100644 +--- a/Lib/test/test_xmlrpc.py ++++ b/Lib/test/test_xmlrpc.py +@@ -285,6 +285,16 @@ class XMLRPCTestCase(unittest.TestCase): + check('9876543210.0123456789', + decimal.Decimal('9876543210.0123456789')) + ++ def test_limit_int(self): ++ check = self.check_loads ++ maxdigits = 5000 ++ with support.adjust_int_max_str_digits(maxdigits): ++ s = '1' * (maxdigits + 1) ++ with self.assertRaises(ValueError): ++ check(f'{s}', None) ++ with self.assertRaises(ValueError): ++ check(f'{s}', None) ++ + def test_get_host_info(self): + # see bug #3613, this raised a TypeError + transp = xmlrpc.client.Transport() +diff --git a/Makefile.pre.in b/Makefile.pre.in +index c1cf158..b64837c 100644 +--- a/Makefile.pre.in ++++ b/Makefile.pre.in +@@ -1153,6 +1153,7 @@ PYTHON_HEADERS= \ + $(srcdir)/Include/internal/pycore_import.h \ + $(srcdir)/Include/internal/pycore_initconfig.h \ + $(srcdir)/Include/internal/pycore_interp.h \ ++ $(srcdir)/Include/internal/pycore_long.h \ + $(srcdir)/Include/internal/pycore_object.h \ + $(srcdir)/Include/internal/pycore_pathconfig.h \ + $(srcdir)/Include/internal/pycore_pyerrors.h \ +diff --git a/Misc/NEWS.d/next/Security/2022-08-07-16-53-38.gh-issue-95778.ch010gps.rst b/Misc/NEWS.d/next/Security/2022-08-07-16-53-38.gh-issue-95778.ch010gps.rst +new file mode 100644 +index 0000000..8eb8a34 +--- /dev/null ++++ b/Misc/NEWS.d/next/Security/2022-08-07-16-53-38.gh-issue-95778.ch010gps.rst +@@ -0,0 +1,14 @@ ++Converting between :class:`int` and :class:`str` in bases other than 2 ++(binary), 4, 8 (octal), 16 (hexadecimal), or 32 such as base 10 (decimal) now ++raises a :exc:`ValueError` if the number of digits in string form is above a ++limit to avoid potential denial of service attacks due to the algorithmic ++complexity. This is a mitigation for `CVE-2020-10735 ++`_. ++ ++This new limit can be configured or disabled by environment variable, command ++line flag, or :mod:`sys` APIs. See the :ref:`integer string conversion length ++limitation ` documentation. The default limit is 4300 ++digits in string form. ++ ++Patch by Gregory P. Smith [Google] and Christian Heimes [Red Hat] with feedback ++from Victor Stinner, Thomas Wouters, Steve Dower, Ned Deily, and Mark Dickinson. +diff --git a/Objects/longobject.c b/Objects/longobject.c +index cf13b2c..ec18ec3 100644 +--- a/Objects/longobject.c ++++ b/Objects/longobject.c +@@ -3,7 +3,9 @@ + /* XXX The functional organization of this file is terrible */ + + #include "Python.h" ++#include "pycore_initconfig.h" // _Py_global_config_int_max_str_digits + #include "pycore_interp.h" // _PY_NSMALLPOSINTS ++#include "pycore_long.h" + #include "pycore_pystate.h" // _Py_IsMainInterpreter() + #include "longintrepr.h" + +@@ -36,6 +38,9 @@ PyObject *_PyLong_One = NULL; + #define IS_SMALL_INT(ival) (-NSMALLNEGINTS <= (ival) && (ival) < NSMALLPOSINTS) + #define IS_SMALL_UINT(ival) ((ival) < NSMALLPOSINTS) + ++#define _MAX_STR_DIGITS_ERROR_FMT_TO_INT "Exceeds the limit (%d) for integer string conversion: value has %zd digits" ++#define _MAX_STR_DIGITS_ERROR_FMT_TO_STR "Exceeds the limit (%d) for integer string conversion" ++ + static PyObject * + get_small_int(sdigit ival) + { +@@ -1718,6 +1723,23 @@ long_to_decimal_string_internal(PyObject *aa, + size_a = Py_ABS(Py_SIZE(a)); + negative = Py_SIZE(a) < 0; + ++ /* quick and dirty pre-check for overflowing the decimal digit limit, ++ based on the inequality 10/3 >= log2(10) ++ ++ explanation in https://github.com/python/cpython/pull/96537 ++ */ ++ if (size_a >= 10 * _PY_LONG_MAX_STR_DIGITS_THRESHOLD ++ / (3 * PyLong_SHIFT) + 2) { ++ PyInterpreterState *interp = _PyInterpreterState_GET(); ++ int max_str_digits = interp->int_max_str_digits; ++ if ((max_str_digits > 0) && ++ (max_str_digits / (3 * PyLong_SHIFT) <= (size_a - 11) / 10)) { ++ PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT_TO_STR, ++ max_str_digits); ++ return -1; ++ } ++ } ++ + /* quick and dirty upper bound for the number of digits + required to express a in base _PyLong_DECIMAL_BASE: + +@@ -1777,6 +1799,17 @@ long_to_decimal_string_internal(PyObject *aa, + tenpow *= 10; + strlen++; + } ++ if (strlen > _PY_LONG_MAX_STR_DIGITS_THRESHOLD) { ++ PyInterpreterState *interp = _PyInterpreterState_GET(); ++ int max_str_digits = interp->int_max_str_digits; ++ Py_ssize_t strlen_nosign = strlen - negative; ++ if ((max_str_digits > 0) && (strlen_nosign > max_str_digits)) { ++ Py_DECREF(scratch); ++ PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT_TO_STR, ++ max_str_digits); ++ return -1; ++ } ++ } + if (writer) { + if (_PyUnicodeWriter_Prepare(writer, strlen, '9') == -1) { + Py_DECREF(scratch); +@@ -2290,6 +2323,7 @@ PyLong_FromString(const char *str, char **pend, int base) + + start = str; + if ((base & (base - 1)) == 0) { ++ /* binary bases are not limited by int_max_str_digits */ + int res = long_from_binary_base(&str, base, &z); + if (res < 0) { + /* Syntax error. */ +@@ -2441,6 +2475,17 @@ digit beyond the first. + goto onError; + } + ++ /* Limit the size to avoid excessive computation attacks. */ ++ if (digits > _PY_LONG_MAX_STR_DIGITS_THRESHOLD) { ++ PyInterpreterState *interp = _PyInterpreterState_GET(); ++ int max_str_digits = interp->int_max_str_digits; ++ if ((max_str_digits > 0) && (digits > max_str_digits)) { ++ PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT_TO_INT, ++ max_str_digits, digits); ++ return NULL; ++ } ++ } ++ + /* Create an int object that can contain the largest possible + * integer with this base and length. Note that there's no + * need to initialize z->ob_digit -- no slot is read up before +@@ -5071,6 +5116,7 @@ long_new_impl(PyTypeObject *type, PyObject *x, PyObject *obase) + } + return PyLong_FromLong(0L); + } ++ /* default base and limit, forward to standard implementation */ + if (obase == NULL) + return PyNumber_Long(x); + +@@ -5723,6 +5769,8 @@ internal representation of integers. The attributes are read only."); + static PyStructSequence_Field int_info_fields[] = { + {"bits_per_digit", "size of a digit in bits"}, + {"sizeof_digit", "size in bytes of the C type used to represent a digit"}, ++ {"default_max_str_digits", "maximum string conversion digits limitation"}, ++ {"str_digits_check_threshold", "minimum positive value for int_max_str_digits"}, + {NULL, NULL} + }; + +@@ -5730,7 +5778,7 @@ static PyStructSequence_Desc int_info_desc = { + "sys.int_info", /* name */ + int_info__doc__, /* doc */ + int_info_fields, /* fields */ +- 2 /* number of fields */ ++ 4 /* number of fields */ + }; + + PyObject * +@@ -5745,6 +5793,17 @@ PyLong_GetInfo(void) + PyLong_FromLong(PyLong_SHIFT)); + PyStructSequence_SET_ITEM(int_info, field++, + PyLong_FromLong(sizeof(digit))); ++ /* ++ * The following two fields were added after investigating uses of ++ * sys.int_info in the wild: Exceedingly rarely used. The ONLY use found was ++ * numba using sys.int_info.bits_per_digit as attribute access rather than ++ * sequence unpacking. Cython and sympy also refer to sys.int_info but only ++ * as info for debugging. No concern about adding these in a backport. ++ */ ++ PyStructSequence_SET_ITEM(int_info, field++, ++ PyLong_FromLong(_PY_LONG_DEFAULT_MAX_STR_DIGITS)); ++ PyStructSequence_SET_ITEM(int_info, field++, ++ PyLong_FromLong(_PY_LONG_MAX_STR_DIGITS_THRESHOLD)); + if (PyErr_Occurred()) { + Py_CLEAR(int_info); + return NULL; +@@ -5790,6 +5849,10 @@ _PyLong_Init(PyThreadState *tstate) + } + } + } ++ tstate->interp->int_max_str_digits = _Py_global_config_int_max_str_digits; ++ if (tstate->interp->int_max_str_digits == -1) { ++ tstate->interp->int_max_str_digits = _PY_LONG_DEFAULT_MAX_STR_DIGITS; ++ } + + return 1; + } +diff --git a/Parser/pegen/pegen.c b/Parser/pegen/pegen.c +index cdfbc12..15b06ce 100644 +--- a/Parser/pegen/pegen.c ++++ b/Parser/pegen/pegen.c +@@ -967,6 +967,24 @@ _PyPegen_number_token(Parser *p) + + if (c == NULL) { + p->error_indicator = 1; ++ PyObject *exc_type, *exc_value, *exc_tb; ++ PyErr_Fetch(&exc_type, &exc_value, &exc_tb); ++ // The only way a ValueError should happen in _this_ code is via ++ // PyLong_FromString hitting a length limit. ++ if (exc_type == PyExc_ValueError && exc_value != NULL) { ++ // The Fetch acted as PyErr_Clear(), we're replacing the exception. ++ Py_XDECREF(exc_tb); ++ Py_DECREF(exc_type); ++ RAISE_ERROR_KNOWN_LOCATION( ++ p, PyExc_SyntaxError, ++ t->lineno, 0 /* col_offset */, ++ "%S - Consider hexadecimal for huge integer literals " ++ "to avoid decimal conversion limits.", ++ exc_value); ++ Py_DECREF(exc_value); ++ } else { ++ PyErr_Restore(exc_type, exc_value, exc_tb); ++ } + return NULL; + } + +diff --git a/Python/clinic/sysmodule.c.h b/Python/clinic/sysmodule.c.h +index 4615eba..4144408 100644 +--- a/Python/clinic/sysmodule.c.h ++++ b/Python/clinic/sysmodule.c.h +@@ -667,6 +667,64 @@ exit: + + #endif /* defined(USE_MALLOPT) */ + ++PyDoc_STRVAR(sys_get_int_max_str_digits__doc__, ++"get_int_max_str_digits($module, /)\n" ++"--\n" ++"\n" ++"Set the maximum string digits limit for non-binary int<->str conversions."); ++ ++#define SYS_GET_INT_MAX_STR_DIGITS_METHODDEF \ ++ {"get_int_max_str_digits", (PyCFunction)sys_get_int_max_str_digits, METH_NOARGS, sys_get_int_max_str_digits__doc__}, ++ ++static PyObject * ++sys_get_int_max_str_digits_impl(PyObject *module); ++ ++static PyObject * ++sys_get_int_max_str_digits(PyObject *module, PyObject *Py_UNUSED(ignored)) ++{ ++ return sys_get_int_max_str_digits_impl(module); ++} ++ ++PyDoc_STRVAR(sys_set_int_max_str_digits__doc__, ++"set_int_max_str_digits($module, /, maxdigits)\n" ++"--\n" ++"\n" ++"Set the maximum string digits limit for non-binary int<->str conversions."); ++ ++#define SYS_SET_INT_MAX_STR_DIGITS_METHODDEF \ ++ {"set_int_max_str_digits", (PyCFunction)(void(*)(void))sys_set_int_max_str_digits, METH_FASTCALL|METH_KEYWORDS, sys_set_int_max_str_digits__doc__}, ++ ++static PyObject * ++sys_set_int_max_str_digits_impl(PyObject *module, int maxdigits); ++ ++static PyObject * ++sys_set_int_max_str_digits(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) ++{ ++ PyObject *return_value = NULL; ++ static const char * const _keywords[] = {"maxdigits", NULL}; ++ static _PyArg_Parser _parser = {NULL, _keywords, "set_int_max_str_digits", 0}; ++ PyObject *argsbuf[1]; ++ int maxdigits; ++ ++ args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf); ++ if (!args) { ++ goto exit; ++ } ++ if (PyFloat_Check(args[0])) { ++ PyErr_SetString(PyExc_TypeError, ++ "integer argument expected, got float" ); ++ goto exit; ++ } ++ maxdigits = _PyLong_AsInt(args[0]); ++ if (maxdigits == -1 && PyErr_Occurred()) { ++ goto exit; ++ } ++ return_value = sys_set_int_max_str_digits_impl(module, maxdigits); ++ ++exit: ++ return return_value; ++} ++ + PyDoc_STRVAR(sys_getrefcount__doc__, + "getrefcount($module, object, /)\n" + "--\n" +@@ -970,4 +1028,4 @@ sys_getandroidapilevel(PyObject *module, PyObject *Py_UNUSED(ignored)) + #ifndef SYS_GETANDROIDAPILEVEL_METHODDEF + #define SYS_GETANDROIDAPILEVEL_METHODDEF + #endif /* !defined(SYS_GETANDROIDAPILEVEL_METHODDEF) */ +-/*[clinic end generated code: output=39eb34a01fb9a919 input=a9049054013a1b77]*/ ++/*[clinic end generated code: output=401254a595859ac6 input=a9049054013a1b77]*/ +diff --git a/Python/initconfig.c b/Python/initconfig.c +index 116ee33..a2c435f 100644 +--- a/Python/initconfig.c ++++ b/Python/initconfig.c +@@ -3,6 +3,7 @@ + #include "pycore_getopt.h" // _PyOS_GetOpt() + #include "pycore_initconfig.h" // _PyStatus_OK() + #include "pycore_interp.h" // _PyInterpreterState.runtime ++#include "pycore_long.h" // _PY_LONG_MAX_STR_DIGITS_THRESHOLD + #include "pycore_pathconfig.h" // _Py_path_config + #include "pycore_pyerrors.h" // _PyErr_Fetch() + #include "pycore_pylifecycle.h" // _Py_PreInitializeFromConfig() +@@ -99,6 +100,9 @@ static const char usage_3[] = "\ + otherwise activate automatically)\n\ + -X pycache_prefix=PATH: enable writing .pyc files to a parallel tree rooted at the\n\ + given directory instead of to the code tree\n\ ++ -X int_max_str_digits=number: limit the size of int<->str conversions.\n\ ++ This helps avoid denial of service attacks when parsing untrusted data.\n\ ++ The default is sys.int_info.default_max_str_digits. 0 disables.\n\ + \n\ + --check-hash-based-pycs always|default|never:\n\ + control how Python invalidates hash-based .pyc files\n\ +@@ -125,6 +129,10 @@ static const char usage_6[] = + " to seed the hashes of str and bytes objects. It can also be set to an\n" + " integer in the range [0,4294967295] to get hash values with a\n" + " predictable seed.\n" ++"PYTHONINTMAXSTRDIGITS: limits the maximum digit characters in an int value\n" ++" when converting from a string and when converting an int back to a str.\n" ++" A value of 0 disables the limit. Conversions to or from bases 2, 4, 8,\n" ++" 16, and 32 are never limited.\n" + "PYTHONMALLOC: set the Python memory allocators and/or install debug hooks\n" + " on Python memory allocators. Use PYTHONMALLOC=debug to install debug\n" + " hooks.\n" +@@ -646,6 +654,10 @@ _PyConfig_InitCompatConfig(PyConfig *config) + config->_use_peg_parser = 1; + } + ++/* Excluded from public struct PyConfig for backporting reasons. */ ++/* default to unconfigured, _PyLong_Init() does the rest */ ++int _Py_global_config_int_max_str_digits = -1; ++ + + static void + config_init_defaults(PyConfig *config) +@@ -1410,6 +1422,48 @@ config_init_tracemalloc(PyConfig *config) + return _PyStatus_OK(); + } + ++static PyStatus ++config_init_int_max_str_digits(PyConfig *config) ++{ ++ int maxdigits; ++ int valid = 0; ++ ++ const char *env = config_get_env(config, "PYTHONINTMAXSTRDIGITS"); ++ if (env) { ++ if (!_Py_str_to_int(env, &maxdigits)) { ++ valid = ((maxdigits == 0) || (maxdigits >= _PY_LONG_MAX_STR_DIGITS_THRESHOLD)); ++ } ++ if (!valid) { ++#define STRINGIFY(VAL) _STRINGIFY(VAL) ++#define _STRINGIFY(VAL) #VAL ++ return _PyStatus_ERR( ++ "PYTHONINTMAXSTRDIGITS: invalid limit; must be >= " ++ STRINGIFY(_PY_LONG_MAX_STR_DIGITS_THRESHOLD) ++ " or 0 for unlimited."); ++ } ++ _Py_global_config_int_max_str_digits = maxdigits; ++ } ++ ++ const wchar_t *xoption = config_get_xoption(config, L"int_max_str_digits"); ++ if (xoption) { ++ const wchar_t *sep = wcschr(xoption, L'='); ++ if (sep) { ++ if (!config_wstr_to_int(sep + 1, &maxdigits)) { ++ valid = ((maxdigits == 0) || (maxdigits >= _PY_LONG_MAX_STR_DIGITS_THRESHOLD)); ++ } ++ } ++ if (!valid) { ++ return _PyStatus_ERR( ++ "-X int_max_str_digits: invalid limit; must be >= " ++ STRINGIFY(_PY_LONG_MAX_STR_DIGITS_THRESHOLD) ++ " or 0 for unlimited."); ++#undef _STRINGIFY ++#undef STRINGIFY ++ } ++ _Py_global_config_int_max_str_digits = maxdigits; ++ } ++ return _PyStatus_OK(); ++} + + static PyStatus + config_init_pycache_prefix(PyConfig *config) +@@ -1466,6 +1520,12 @@ config_read_complex_options(PyConfig *config) + return status; + } + } ++ if (_Py_global_config_int_max_str_digits < 0) { ++ status = config_init_int_max_str_digits(config); ++ if (_PyStatus_EXCEPTION(status)) { ++ return status; ++ } ++ } + + if (config->pycache_prefix == NULL) { + status = config_init_pycache_prefix(config); +diff --git a/Python/sysmodule.c b/Python/sysmodule.c +index a52b299..8efa850 100644 +--- a/Python/sysmodule.c ++++ b/Python/sysmodule.c +@@ -19,6 +19,7 @@ Data members: + #include "frameobject.h" // PyFrame_GetBack() + #include "pycore_ceval.h" + #include "pycore_initconfig.h" ++#include "pycore_long.h" // _PY_LONG_MAX_STR_DIGITS_THRESHOLD + #include "pycore_object.h" + #include "pycore_pathconfig.h" + #include "pycore_pyerrors.h" +@@ -1636,6 +1637,45 @@ sys_mdebug_impl(PyObject *module, int flag) + } + #endif /* USE_MALLOPT */ + ++ ++/*[clinic input] ++sys.get_int_max_str_digits ++ ++Set the maximum string digits limit for non-binary int<->str conversions. ++[clinic start generated code]*/ ++ ++static PyObject * ++sys_get_int_max_str_digits_impl(PyObject *module) ++/*[clinic end generated code: output=0042f5e8ae0e8631 input=8dab13e2023e60d5]*/ ++{ ++ PyInterpreterState *interp = _PyInterpreterState_GET(); ++ return PyLong_FromSsize_t(interp->int_max_str_digits); ++} ++ ++/*[clinic input] ++sys.set_int_max_str_digits ++ ++ maxdigits: int ++ ++Set the maximum string digits limit for non-binary int<->str conversions. ++[clinic start generated code]*/ ++ ++static PyObject * ++sys_set_int_max_str_digits_impl(PyObject *module, int maxdigits) ++/*[clinic end generated code: output=734d4c2511f2a56d input=d7e3f325db6910c5]*/ ++{ ++ PyThreadState *tstate = _PyThreadState_GET(); ++ if ((!maxdigits) || (maxdigits >= _PY_LONG_MAX_STR_DIGITS_THRESHOLD)) { ++ tstate->interp->int_max_str_digits = maxdigits; ++ Py_RETURN_NONE; ++ } else { ++ PyErr_Format( ++ PyExc_ValueError, "maxdigits must be 0 or larger than %d", ++ _PY_LONG_MAX_STR_DIGITS_THRESHOLD); ++ return NULL; ++ } ++} ++ + size_t + _PySys_GetSizeOf(PyObject *o) + { +@@ -1980,6 +2020,8 @@ static PyMethodDef sys_methods[] = { + SYS_GET_ASYNCGEN_HOOKS_METHODDEF + SYS_GETANDROIDAPILEVEL_METHODDEF + SYS_UNRAISABLEHOOK_METHODDEF ++ SYS_GET_INT_MAX_STR_DIGITS_METHODDEF ++ SYS_SET_INT_MAX_STR_DIGITS_METHODDEF + {NULL, NULL} /* sentinel */ + }; + +@@ -2440,6 +2482,7 @@ static PyStructSequence_Field flags_fields[] = { + {"isolated", "-I"}, + {"dev_mode", "-X dev"}, + {"utf8_mode", "-X utf8"}, ++ {"int_max_str_digits", "-X int_max_str_digits"}, + {0} + }; + +@@ -2447,7 +2490,7 @@ static PyStructSequence_Desc flags_desc = { + "sys.flags", /* name */ + flags__doc__, /* doc */ + flags_fields, /* fields */ +- 15 ++ 16 + }; + + static PyObject* +@@ -2483,6 +2526,7 @@ make_flags(PyThreadState *tstate) + SetFlag(config->isolated); + PyStructSequence_SET_ITEM(seq, pos++, PyBool_FromLong(config->dev_mode)); + SetFlag(preconfig->utf8_mode); ++ SetFlag(_Py_global_config_int_max_str_digits); + #undef SetFlag + + if (_PyErr_Occurred(tstate)) { +-- +2.37.3 + diff --git a/SPECS/python3.9.spec b/SPECS/python3.9.spec index dea8a1d..e3c7e4b 100644 --- a/SPECS/python3.9.spec +++ b/SPECS/python3.9.spec @@ -17,7 +17,7 @@ URL: https://www.python.org/ #global prerel ... %global upstream_version %{general_version}%{?prerel} Version: %{general_version}%{?prerel:~%{prerel}} -Release: 2%{?dist} +Release: 3%{?dist} License: Python @@ -399,6 +399,52 @@ Patch329: 00329-fips.patch # a nightmare because it's basically a binary file. Patch353: 00353-architecture-names-upstream-downstream.patch +# 00378 # +# Support expat 2.4.5 +# +# Curly brackets were never allowed in namespace URIs +# according to RFC 3986, and so-called namespace-validating +# XML parsers have the right to reject them a invalid URIs. +# +# libexpat >=2.4.5 has become strcter in that regard due to +# related security issues; with ET.XML instantiating a +# namespace-aware parser under the hood, this test has no +# future in CPython. +# +# References: +# - https://datatracker.ietf.org/doc/html/rfc3968 +# - https://www.w3.org/TR/xml-names/ +# +# Also, test_minidom.py: Support Expat >=2.4.5 +# +# The patch has diverged from upstream as the python test +# suite was relying on checking the expat version, whereas +# in RHEL fixes get backported instead of rebasing packages. +# +# Upstream: https://bugs.python.org/issue46811 +Patch378: 00378-support-expat-2-4-5.patch + +# 00387 # 87d28f3f0f0c9165c67b2a156134c614c6f6dcf5 +# CVE-2020-10735: Prevent DoS by very large int() +# +# gh-95778: CVE-2020-10735: Prevent DoS by very large int() (GH-96504) +# +# Converting between `int` and `str` in bases other than 2 +# (binary), 4, 8 (octal), 16 (hexadecimal), or 32 such as base 10 (decimal) now +# raises a `ValueError` if the number of digits in string form is above a +# limit to avoid potential denial of service attacks due to the algorithmic +# complexity. This is a mitigation for CVE-2020-10735 +# (https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-10735). +# +# This new limit can be configured or disabled by environment variable, command +# line flag, or :mod:`sys` APIs. See the `Integer String Conversion Length +# Limitation` documentation. The default limit is 4300 +# digits in string form. +# +# Patch by Gregory P. Smith [Google] and Christian Heimes [Red Hat] with feedback +# from Victor Stinner, Thomas Wouters, Steve Dower, Ned Deily, and Mark Dickinson. +Patch387: 00387-cve-2020-10735-prevent-dos-by-very-large-int.patch + # (New patches go here ^^^) # # When adding new patches to "python" and "python3" in Fedora, EL, etc., @@ -1800,6 +1846,11 @@ CheckPython optimized # ====================================================== %changelog +* Fri Sep 23 2022 Charalampos Stratakis - 3.9.10-3 +- Security fix for CVE-2020-10735 +- Fix the test suite support for Expat >= 2.4.5 +Resolves: rhbz#1834423 + * Wed Feb 09 2022 Charalampos Stratakis - 3.9.10-2 - Fix undefined behavior in Modules/_hashopenssl.c Resolves: rhbz#1942527