You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

167 lines
6.6 KiB

  1. From ea21389dda401457198fb214aa2c981a45ed9528 Mon Sep 17 00:00:00 2001
  2. From: Ashwin Ramaswami <aramaswamis@gmail.com>
  3. Date: Tue, 3 Sep 2019 09:42:53 -0700
  4. Subject: [PATCH] [3.7] bpo-37764: Fix infinite loop when parsing unstructured
  5. email headers. (GH-15239) (GH-15654)
  6. MIME-Version: 1.0
  7. Content-Type: text/plain; charset=UTF-8
  8. Content-Transfer-Encoding: 8bit
  9. …aders. (GH-15239)
  10. Fixes a case in which email._header_value_parser.get_unstructured hangs the system for some invalid headers. This covers the cases in which the header contains either:
  11. - a case without trailing whitespace
  12. - an invalid encoded word
  13. https://bugs.python.org/issue37764
  14. This fix should also be backported to 3.7 and 3.8
  15. https://bugs.python.org/issue37764
  16. (cherry picked from commit c5b242f87f31286ad38991bc3868cf4cfbf2b681)
  17. Co-authored-by: Ashwin Ramaswami <aramaswamis@gmail.com>
  18. https://bugs.python.org/issue37764
  19. ---
  20. Lib/email/_header_value_parser.py | 19 ++++++++++++++---
  21. .../test_email/test__header_value_parser.py | 16 ++++++++++++++
  22. Lib/test/test_email/test_email.py | 21 +++++++++++++++++++
  23. Misc/ACKS | 1 +
  24. .../2019-08-27-01-13-05.bpo-37764.qv67PQ.rst | 1 +
  25. 5 files changed, 55 insertions(+), 3 deletions(-)
  26. create mode 100644 Misc/NEWS.d/next/Security/2019-08-27-01-13-05.bpo-37764.qv67PQ.rst
  27. --- a/Lib/email/_header_value_parser.py
  28. +++ b/Lib/email/_header_value_parser.py
  29. @@ -931,6 +931,10 @@ class EWWhiteSpaceTerminal(WhiteSpaceTer
  30. return ''
  31. +class _InvalidEwError(errors.HeaderParseError):
  32. + """Invalid encoded word found while parsing headers."""
  33. +
  34. +
  35. # XXX these need to become classes and used as instances so
  36. # that a program can't change them in a parse tree and screw
  37. # up other parse trees. Maybe should have tests for that, too.
  38. @@ -1035,7 +1039,10 @@ def get_encoded_word(value):
  39. raise errors.HeaderParseError(
  40. "expected encoded word but found {}".format(value))
  41. remstr = ''.join(remainder)
  42. - if len(remstr) > 1 and remstr[0] in hexdigits and remstr[1] in hexdigits:
  43. + if (len(remstr) > 1 and
  44. + remstr[0] in hexdigits and
  45. + remstr[1] in hexdigits and
  46. + tok.count('?') < 2):
  47. # The ? after the CTE was followed by an encoded word escape (=XX).
  48. rest, *remainder = remstr.split('?=', 1)
  49. tok = tok + '?=' + rest
  50. @@ -1047,7 +1054,7 @@ def get_encoded_word(value):
  51. try:
  52. text, charset, lang, defects = _ew.decode('=?' + tok + '?=')
  53. except ValueError:
  54. - raise errors.HeaderParseError(
  55. + raise _InvalidEwError(
  56. "encoded word format invalid: '{}'".format(ew.cte))
  57. ew.charset = charset
  58. ew.lang = lang
  59. @@ -1097,9 +1104,12 @@ def get_unstructured(value):
  60. token, value = get_fws(value)
  61. unstructured.append(token)
  62. continue
  63. + valid_ew = True
  64. if value.startswith('=?'):
  65. try:
  66. token, value = get_encoded_word(value)
  67. + except _InvalidEwError:
  68. + valid_ew = False
  69. except errors.HeaderParseError:
  70. # XXX: Need to figure out how to register defects when
  71. # appropriate here.
  72. @@ -1121,7 +1131,10 @@ def get_unstructured(value):
  73. # Split in the middle of an atom if there is a rfc2047 encoded word
  74. # which does not have WSP on both sides. The defect will be registered
  75. # the next time through the loop.
  76. - if rfc2047_matcher.search(tok):
  77. + # This needs to only be performed when the encoded word is valid;
  78. + # otherwise, performing it on an invalid encoded word can cause
  79. + # the parser to go in an infinite loop.
  80. + if valid_ew and rfc2047_matcher.search(tok):
  81. tok, *remainder = value.partition('=?')
  82. vtext = ValueTerminal(tok, 'vtext')
  83. _validate_xtext(vtext)
  84. --- a/Lib/test/test_email/test__header_value_parser.py
  85. +++ b/Lib/test/test_email/test__header_value_parser.py
  86. @@ -383,6 +383,22 @@ class TestParser(TestParserMixin, TestEm
  87. [errors.InvalidHeaderDefect],
  88. '')
  89. + def test_get_unstructured_without_trailing_whitespace_hang_case(self):
  90. + self._test_get_x(self._get_unst,
  91. + '=?utf-8?q?somevalue?=aa',
  92. + 'somevalueaa',
  93. + 'somevalueaa',
  94. + [errors.InvalidHeaderDefect],
  95. + '')
  96. +
  97. + def test_get_unstructured_invalid_ew(self):
  98. + self._test_get_x(self._get_unst,
  99. + '=?utf-8?q?=somevalue?=',
  100. + '=?utf-8?q?=somevalue?=',
  101. + '=?utf-8?q?=somevalue?=',
  102. + [],
  103. + '')
  104. +
  105. # get_qp_ctext
  106. def test_get_qp_ctext_only(self):
  107. --- a/Lib/test/test_email/test_email.py
  108. +++ b/Lib/test/test_email/test_email.py
  109. @@ -5367,6 +5367,27 @@ Content-Type: application/x-foo;
  110. eq(language, 'en-us')
  111. eq(s, 'My Document For You')
  112. + def test_should_not_hang_on_invalid_ew_messages(self):
  113. + messages = ["""From: user@host.com
  114. +To: user@host.com
  115. +Bad-Header:
  116. + =?us-ascii?Q?LCSwrV11+IB0rSbSker+M9vWR7wEDSuGqmHD89Gt=ea0nJFSaiz4vX3XMJPT4vrE?=
  117. + =?us-ascii?Q?xGUZeOnp0o22pLBB7CYLH74Js=wOlK6Tfru2U47qR?=
  118. + =?us-ascii?Q?72OfyEY2p2=2FrA9xNFyvH+fBTCmazxwzF8nGkK6D?=
  119. +
  120. +Hello!
  121. +""", """From: ����� �������� <xxx@xxx>
  122. +To: "xxx" <xxx@xxx>
  123. +Subject: ��� ���������� ����� ����� � ��������� �� ����
  124. +MIME-Version: 1.0
  125. +Content-Type: text/plain; charset="windows-1251";
  126. +Content-Transfer-Encoding: 8bit
  127. +
  128. +�� ����� � ���� ������ ��� ��������
  129. +"""]
  130. + for m in messages:
  131. + with self.subTest(m=m):
  132. + msg = email.message_from_string(m)
  133. # Tests to ensure that signed parts of an email are completely preserved, as
  134. --- a/Misc/ACKS
  135. +++ b/Misc/ACKS
  136. @@ -1305,6 +1305,7 @@ Burton Radons
  137. Abhilash Raj
  138. Shorya Raj
  139. Dhushyanth Ramasamy
  140. +Ashwin Ramaswami
  141. Jeff Ramnani
  142. Bayard Randel
  143. Varpu Rantala
  144. --- /dev/null
  145. +++ b/Misc/NEWS.d/next/Security/2019-08-27-01-13-05.bpo-37764.qv67PQ.rst
  146. @@ -0,0 +1 @@
  147. +Fixes email._header_value_parser.get_unstructured going into an infinite loop for a specific case in which the email header does not have trailing whitespace, and the case in which it contains an invalid encoded word. Patch by Ashwin Ramaswami.
  148. \ No newline at end of file