]> granicus.if.org Git - python/commitdiff
#16983: Apply postel's law to encoded words inside quoted strings.
authorR David Murray <rdmurray@bitdance.com>
Sat, 8 Feb 2014 18:12:00 +0000 (13:12 -0500)
committerR David Murray <rdmurray@bitdance.com>
Sat, 8 Feb 2014 18:12:00 +0000 (13:12 -0500)
This applies only to the new parser.  The old parser decodes encoded words
inside quoted strings already, although it gets the whitespace wrong
when it does so.

This version of the patch only handles the most common case (a single encoded
word surrounded by quotes), but I haven't seen any other variations of this in
the wild yet, so its good enough for now.

Lib/email/_header_value_parser.py
Lib/test/test_email/test__header_value_parser.py
Lib/test/test_email/test_headerregistry.py
Misc/NEWS

index 291437c5867c168cc507d3f59861c4c5c0f02274..0369e015477a3387021e96dac47319258e063825 100644 (file)
@@ -1559,6 +1559,13 @@ def get_bare_quoted_string(value):
     while value and value[0] != '"':
         if value[0] in WSP:
             token, value = get_fws(value)
+        elif value[:2] == '=?':
+            try:
+                token, value = get_encoded_word(value)
+                bare_quoted_string.defects.append(errors.InvalidHeaderDefect(
+                    "encoded word inside quoted string"))
+            except errors.HeaderParseError:
+                token, value = get_qcontent(value)
         else:
             token, value = get_qcontent(value)
         bare_quoted_string.append(token)
index 646082b4a40a24cce4d6e99c986b1abdd4663814..32996ca4c8a1e4c0f70aa09719a1b4ab4131d2de 100644 (file)
@@ -540,6 +540,15 @@ class TestParser(TestParserMixin, TestEmailBase):
         self._test_get_x(parser.get_bare_quoted_string,
             '""', '""', '', [], '')
 
+    # Issue 16983: apply postel's law to some bad encoding.
+    def test_encoded_word_inside_quotes(self):
+        self._test_get_x(parser.get_bare_quoted_string,
+            '"=?utf-8?Q?not_really_valid?="',
+            '"not really valid"',
+            'not really valid',
+            [errors.InvalidHeaderDefect],
+            '')
+
     # get_comment
 
     def test_get_comment_only(self):
index f829f83e320b0ce11f40b10e942c8f15e0e22bf1..adaf3e8fe457bdd7cc769ca16e697aaff712ed30 100644 (file)
@@ -1143,6 +1143,16 @@ class TestAddressHeader(TestHeaderBase):
             'example.com',
             None),
 
+        'rfc2047_atom_in_quoted_string_is_decoded':
+            ('"=?utf-8?q?=C3=89ric?=" <foo@example.com>',
+            [errors.InvalidHeaderDefect],
+            'Éric <foo@example.com>',
+            'Éric',
+            'foo@example.com',
+            'foo',
+            'example.com',
+            None),
+
         }
 
         # XXX: Need many more examples, and in particular some with names in
index cedd4e52ac574644b058ffbf2d18f7337ac71c5c..3ee074392b77a602fcb8ce1d9a9bebfd309a4e96 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -48,6 +48,9 @@ Core and Builtins
 Library
 -------
 
+- Issue #16983: the new email header parsing code will now decode encoded words
+  that are (incorrectly) surrounded by quotes, and register a defect.
+
 - Issue #19772: email.generator no longer mutates the message object when
   doing a down-transform from 8bit to 7bit CTEs.