Merge pull request #10109 from BKPepe/python3

python3: backport three security patches
5 years ago · 33c0a613f8
--- a/lang/python/python3/Makefile
+++ b/lang/python/python3/Makefile
@ -14,7 +14,7 @@ PYTHON_VERSION:=$(PYTHON3_VERSION)
 PYTHON_VERSION_MICRO:=$(PYTHON3_VERSION_MICRO)
 PKG_NAME:=python3
 PKG_RELEASE:=3
 PKG_RELEASE:=4
 PKG_VERSION:=$(PYTHON_VERSION).$(PYTHON_VERSION_MICRO)
 PKG_SOURCE:=Python-$(PKG_VERSION).tar.xz
--- a/lang/python/python3/patches/025-bpo-37461-Fix-infinite-loop-in-parsing-of-specially-.patch
+++ b/lang/python/python3/patches/025-bpo-37461-Fix-infinite-loop-in-parsing-of-specially-.patch
@ -0,0 +1,56 @@
 From 391511ccaaf0050970dfbe95bf2df1bcf6c33440 Mon Sep 17 00:00:00 2001
 From: "Miss Islington (bot)"
 <31488909+miss-islington@users.noreply.github.com>
 Date: Wed, 17 Jul 2019 10:02:05 -0700
 Subject: [PATCH] bpo-37461: Fix infinite loop in parsing of specially crafted
 email headers (GH-14794)
 * bpo-37461: Fix infinite loop in parsing of specially crafted email headers.
 Some crafted email header would cause the get_parameter method to run in an
 infinite loop causing a DoS attack surface when parsing those headers. This
 patch fixes that by making sure the DQUOTE character is handled to prevent
 going into an infinite loop.
 (cherry picked from commit a4a994bd3e619cbaff97610a1cee8ffa87c672f5)
 Co-authored-by: Abhilash Raj <maxking@users.noreply.github.com>
 ---
 Lib/email/_header_value_parser.py                          | 3 +++
 Lib/test/test_email/test__header_value_parser.py           | 7 +++++++
 .../next/Security/2019-07-16-08-11-00.bpo-37461.1Ahz7O.rst | 2 ++
 3 files changed, 12 insertions(+)
 create mode 100644 Misc/NEWS.d/next/Security/2019-07-16-08-11-00.bpo-37461.1Ahz7O.rst
 --- a/Lib/email/_header_value_parser.py
 +++ b/Lib/email/_header_value_parser.py
@@ -2387,6 +2387,9 @@ def get_parameter(value):
         while value:
             if value[0] in WSP:
                 token, value = get_fws(value)
 +            elif value[0] == '"':
 +                token = ValueTerminal('"', 'DQUOTE')
 +                value = value[1:]
             else:
                 token, value = get_qcontent(value)
             v.append(token)
 --- a/Lib/test/test_email/test__header_value_parser.py
 +++ b/Lib/test/test_email/test__header_value_parser.py
@@ -2621,6 +2621,13 @@ class Test_parse_mime_parameters(TestPar
             # Defects are apparent missing *0*, and two 'out of sequence'.
             [errors.InvalidHeaderDefect]*3),
 +        # bpo-37461: Check that we don't go into an infinite loop.
 +        'extra_dquote': (
 +            'r*="\'a\'\\"',
 +            ' r="\\""',
 +            'r*=\'a\'"',
 +            [('r', '"')],
 +            [errors.InvalidHeaderDefect]*2),
     }
 @parameterize
 --- /dev/null
 +++ b/Misc/NEWS.d/next/Security/2019-07-16-08-11-00.bpo-37461.1Ahz7O.rst
@@ -0,0 +1,2 @@
 +Fix an inifite loop when parsing specially crafted email headers. Patch by
 +Abhilash Raj.
--- a/lang/python/python3/patches/026-3.7-bpo-37764-Fix-infinite-loop-when-parsing-unstruc.patch
+++ b/lang/python/python3/patches/026-3.7-bpo-37764-Fix-infinite-loop-when-parsing-unstruc.patch
@ -0,0 +1,167 @@
 From ea21389dda401457198fb214aa2c981a45ed9528 Mon Sep 17 00:00:00 2001
 From: Ashwin Ramaswami <aramaswamis@gmail.com>
 Date: Tue, 3 Sep 2019 09:42:53 -0700
 Subject: [PATCH] [3.7] bpo-37764: Fix infinite loop when parsing unstructured
 email headers. (GH-15239) (GH-15654)
 MIME-Version: 1.0
 Content-Type: text/plain; charset=UTF-8
 Content-Transfer-Encoding: 8bit
 …aders. (GH-15239)
 Fixes a case in which email._header_value_parser.get_unstructured hangs the system for some invalid headers. This covers the cases in which the header contains either:
 - a case without trailing whitespace
 - an invalid encoded word
 https://bugs.python.org/issue37764
 This fix should also be backported to 3.7 and 3.8
 https://bugs.python.org/issue37764
 (cherry picked from commit c5b242f87f31286ad38991bc3868cf4cfbf2b681)
 Co-authored-by: Ashwin Ramaswami <aramaswamis@gmail.com>
 https://bugs.python.org/issue37764
 ---
 Lib/email/_header_value_parser.py             | 19 ++++++++++++++---
 .../test_email/test__header_value_parser.py   | 16 ++++++++++++++
 Lib/test/test_email/test_email.py             | 21 +++++++++++++++++++
 Misc/ACKS                                     |  1 +
 .../2019-08-27-01-13-05.bpo-37764.qv67PQ.rst  |  1 +
 5 files changed, 55 insertions(+), 3 deletions(-)
 create mode 100644 Misc/NEWS.d/next/Security/2019-08-27-01-13-05.bpo-37764.qv67PQ.rst
 --- a/Lib/email/_header_value_parser.py
 +++ b/Lib/email/_header_value_parser.py
@@ -931,6 +931,10 @@ class EWWhiteSpaceTerminal(WhiteSpaceTer
         return ''
 +class _InvalidEwError(errors.HeaderParseError):
 +    """Invalid encoded word found while parsing headers."""
 +
 +
 # XXX these need to become classes and used as instances so
 # that a program can't change them in a parse tree and screw
 # up other parse trees.  Maybe should have  tests for that, too.
@@ -1035,7 +1039,10 @@ def get_encoded_word(value):
         raise errors.HeaderParseError(
             "expected encoded word but found {}".format(value))
     remstr = ''.join(remainder)
 -    if len(remstr) > 1 and remstr[0] in hexdigits and remstr[1] in hexdigits:
 +    if (len(remstr) > 1 and
 +        remstr[0] in hexdigits and
 +        remstr[1] in hexdigits and
 +        tok.count('?') < 2):
         # The ? after the CTE was followed by an encoded word escape (=XX).
         rest, *remainder = remstr.split('?=', 1)
         tok = tok + '?=' + rest
@@ -1047,7 +1054,7 @@ def get_encoded_word(value):
     try:
         text, charset, lang, defects = _ew.decode('=?' + tok + '?=')
     except ValueError:
 -        raise errors.HeaderParseError(
 +        raise _InvalidEwError(
             "encoded word format invalid: '{}'".format(ew.cte))
     ew.charset = charset
     ew.lang = lang
@@ -1097,9 +1104,12 @@ def get_unstructured(value):
             token, value = get_fws(value)
             unstructured.append(token)
             continue
 +        valid_ew = True
         if value.startswith('=?'):
             try:
                 token, value = get_encoded_word(value)
 +            except _InvalidEwError:
 +                valid_ew = False
             except errors.HeaderParseError:
                 # XXX: Need to figure out how to register defects when
                 # appropriate here.
@@ -1121,7 +1131,10 @@ def get_unstructured(value):
         # Split in the middle of an atom if there is a rfc2047 encoded word
         # which does not have WSP on both sides. The defect will be registered
         # the next time through the loop.
 -        if rfc2047_matcher.search(tok):
 +        # This needs to only be performed when the encoded word is valid;
 +        # otherwise, performing it on an invalid encoded word can cause
 +        # the parser to go in an infinite loop.
 +        if valid_ew and rfc2047_matcher.search(tok):
             tok, *remainder = value.partition('=?')
         vtext = ValueTerminal(tok, 'vtext')
         _validate_xtext(vtext)
 --- a/Lib/test/test_email/test__header_value_parser.py
 +++ b/Lib/test/test_email/test__header_value_parser.py
@@ -383,6 +383,22 @@ class TestParser(TestParserMixin, TestEm
             [errors.InvalidHeaderDefect],
             '')
 +    def test_get_unstructured_without_trailing_whitespace_hang_case(self):
 +        self._test_get_x(self._get_unst,
 +            '=?utf-8?q?somevalue?=aa',
 +            'somevalueaa',
 +            'somevalueaa',
 +            [errors.InvalidHeaderDefect],
 +            '')
 +
 +    def test_get_unstructured_invalid_ew(self):
 +        self._test_get_x(self._get_unst,
 +            '=?utf-8?q?=somevalue?=',
 +            '=?utf-8?q?=somevalue?=',
 +            '=?utf-8?q?=somevalue?=',
 +            [],
 +            '')
 +
     # get_qp_ctext
     def test_get_qp_ctext_only(self):
 --- a/Lib/test/test_email/test_email.py
 +++ b/Lib/test/test_email/test_email.py
@@ -5367,6 +5367,27 @@ Content-Type: application/x-foo;
         eq(language, 'en-us')
         eq(s, 'My Document For You')
 +    def test_should_not_hang_on_invalid_ew_messages(self):
 +        messages = ["""From: user@host.com
 +To: user@host.com
 +Bad-Header:
 + =?us-ascii?Q?LCSwrV11+IB0rSbSker+M9vWR7wEDSuGqmHD89Gt=ea0nJFSaiz4vX3XMJPT4vrE?=
 + =?us-ascii?Q?xGUZeOnp0o22pLBB7CYLH74Js=wOlK6Tfru2U47qR?=
 + =?us-ascii?Q?72OfyEY2p2=2FrA9xNFyvH+fBTCmazxwzF8nGkK6D?=
 +
 +Hello!
 +""", """From: ï¿½ï¿½ï¿½ï¿½ï¿½ ï¿½ï¿½ï¿½ï¿½ï¿½ï¿½ï¿½ï¿½ <xxx@xxx>
 +To: "xxx" <xxx@xxx>
 +Subject:   ï¿½ï¿½ï¿½ ï¿½ï¿½ï¿½ï¿½ï¿½ï¿½ï¿½ï¿½ï¿½ï¿½ ï¿½ï¿½ï¿½ï¿½ï¿½ ï¿½ï¿½ï¿½ï¿½ï¿½ ï¿½ ï¿½ï¿½ï¿½ï¿½ï¿½ï¿½ï¿½ï¿½ï¿½ ï¿½ï¿½ ï¿½ï¿½ï¿½ï¿½
 +MIME-Version: 1.0
 +Content-Type: text/plain; charset="windows-1251";
 +Content-Transfer-Encoding: 8bit
 +
 +ï¿½ï¿½ ï¿½ï¿½ï¿½ï¿½ï¿½ ï¿½ ï¿½ï¿½ï¿½ï¿½ ï¿½ï¿½ï¿½ï¿½ï¿½ï¿½ ï¿½ï¿½ï¿½ ï¿½ï¿½ï¿½ï¿½ï¿½ï¿½ï¿½ï¿½
 +"""]
 +        for m in messages:
 +            with self.subTest(m=m):
 +                msg = email.message_from_string(m)
 # Tests to ensure that signed parts of an email are completely preserved, as
 --- a/Misc/ACKS
 +++ b/Misc/ACKS
@@ -1305,6 +1305,7 @@ Burton Radons
 Abhilash Raj
 Shorya Raj
 Dhushyanth Ramasamy
 +Ashwin Ramaswami
 Jeff Ramnani
 Bayard Randel
 Varpu Rantala
 --- /dev/null
 +++ b/Misc/NEWS.d/next/Security/2019-08-27-01-13-05.bpo-37764.qv67PQ.rst
@@ -0,0 +1 @@
 +Fixes email._header_value_parser.get_unstructured going into an infinite loop for a specific case in which the email header does not have trailing whitespace, and the case in which it contains an invalid encoded word. Patch by Ashwin Ramaswami.
 \ No newline at end of file
--- a/lang/python/python3/patches/027-bpo-38243-xmlrpc.server-Escape-the-server_title-GH-1.patch
+++ b/lang/python/python3/patches/027-bpo-38243-xmlrpc.server-Escape-the-server_title-GH-1.patch
@ -0,0 +1,74 @@
 From 39a0c7555530e31c6941a78da19b6a5b61170687 Mon Sep 17 00:00:00 2001
 From: "Miss Islington (bot)"
 <31488909+miss-islington@users.noreply.github.com>
 Date: Fri, 27 Sep 2019 13:18:14 -0700
 Subject: [PATCH] bpo-38243, xmlrpc.server: Escape the server_title (GH-16373)
 Escape the server title of xmlrpc.server.DocXMLRPCServer
 when rendering the document page as HTML.
 (cherry picked from commit e8650a4f8c7fb76f570d4ca9c1fbe44e91c8dfaa)
 Co-authored-by: Dong-hee Na <donghee.na92@gmail.com>
 ---
 Lib/test/test_docxmlrpc.py                       | 16 ++++++++++++++++
 Lib/xmlrpc/server.py                             |  3 ++-
 .../2019-09-25-13-21-09.bpo-38243.1pfz24.rst     |  3 +++
 3 files changed, 21 insertions(+), 1 deletion(-)
 create mode 100644 Misc/NEWS.d/next/Security/2019-09-25-13-21-09.bpo-38243.1pfz24.rst
 --- a/Lib/test/test_docxmlrpc.py
 +++ b/Lib/test/test_docxmlrpc.py
@@ -1,5 +1,6 @@
 from xmlrpc.server import DocXMLRPCServer
 import http.client
 +import re
 import sys
 import threading
 from test import support
@@ -193,6 +194,21 @@ class DocXMLRPCHTTPGETServer(unittest.Te
              b'method_annotation</strong></a>(x: bytes)</dt></dl>'),
             response.read())
 +    def test_server_title_escape(self):
 +        # bpo-38243: Ensure that the server title and documentation
 +        # are escaped for HTML.
 +        self.serv.set_server_title('test_title<script>')
 +        self.serv.set_server_documentation('test_documentation<script>')
 +        self.assertEqual('test_title<script>', self.serv.server_title)
 +        self.assertEqual('test_documentation<script>',
 +                self.serv.server_documentation)
 +
 +        generated = self.serv.generate_html_documentation()
 +        title = re.search(r'<title>(.+?)</title>', generated).group()
 +        documentation = re.search(r'<p><tt>(.+?)</tt></p>', generated).group()
 +        self.assertEqual('<title>Python: test_title&lt;script&gt;</title>', title)
 +        self.assertEqual('<p><tt>test_documentation&lt;script&gt;</tt></p>', documentation)
 +
 if __name__ == '__main__':
     unittest.main()
 --- a/Lib/xmlrpc/server.py
 +++ b/Lib/xmlrpc/server.py
@@ -108,6 +108,7 @@ from xmlrpc.client import Fault, dumps,
 from http.server import BaseHTTPRequestHandler
 from functools import partial
 from inspect import signature
 +import html
 import http.server
 import socketserver
 import sys
@@ -894,7 +895,7 @@ class XMLRPCDocGenerator:
                                 methods
                             )
 -        return documenter.page(self.server_title, documentation)
 +        return documenter.page(html.escape(self.server_title), documentation)
 class DocXMLRPCRequestHandler(SimpleXMLRPCRequestHandler):
     """XML-RPC and documentation request handler class.
 --- /dev/null
 +++ b/Misc/NEWS.d/next/Security/2019-09-25-13-21-09.bpo-38243.1pfz24.rst
@@ -0,0 +1,3 @@
 +Escape the server title of :class:`xmlrpc.server.DocXMLRPCServer`
 +when rendering the document page as HTML.
 +(Contributed by Dong-hee Na in :issue:`38243`.)