]> granicus.if.org Git - python/commitdiff
#14291: if a header has non-ascii unicode, default to CTE using utf-8
authorR David Murray <rdmurray@bitdance.com>
Wed, 14 Mar 2012 06:59:51 +0000 (02:59 -0400)
committerR David Murray <rdmurray@bitdance.com>
Wed, 14 Mar 2012 06:59:51 +0000 (02:59 -0400)
In Python2, if a unicode string was assigned as the value of a header,
email would automatically CTE encode it using the UTF8 charset.
This capability was lost in the Python3 translation, and this patch
restores it.

Patch by Ali Ikinci, assisted by R. David Murray.

I also added a fix for the mailbox test that was depending (with a comment
that it was a bad idea to so depend) on non-ASCII causing message_from_string
to raise an error.  It now uses support.patch to induce an error during
message serialization.

Lib/email/header.py
Lib/email/test/test_email.py
Lib/test/test_mailbox.py
Misc/ACKS
Misc/NEWS

index 2e687b7a6f10742e25e9ea33d2ddb06a5e616c7d..3250d367eddac554e2f9915f7309f4124d000ec9 100644 (file)
@@ -283,7 +283,12 @@ class Header:
         # character set, otherwise an early error is thrown.
         output_charset = charset.output_codec or 'us-ascii'
         if output_charset != _charset.UNKNOWN8BIT:
-            s.encode(output_charset, errors)
+            try:
+                s.encode(output_charset, errors)
+            except UnicodeEncodeError:
+                if output_charset!='us-ascii':
+                    raise
+                charset = UTF8
         self._chunks.append((s, charset))
 
     def encode(self, splitchars=';, \t', maxlinelen=None, linesep='\n'):
index 102e15b9ff053a3242eb6807ec727f6270bd0a57..f43bb38aa8d52a042b375e3fd1f79b89a1303b7d 100644 (file)
@@ -619,6 +619,19 @@ class TestMessageAPI(TestEmailBase):
         msg['Dummy'] = 'dummy\nX-Injected-Header: test'
         self.assertRaises(errors.HeaderParseError, msg.as_string)
 
+    def test_unicode_header_defaults_to_utf8_encoding(self):
+        # Issue 14291
+        m = MIMEText('abc\n')
+        m['Subject'] = 'É test'
+        self.assertEqual(str(m),textwrap.dedent("""\
+            Content-Type: text/plain; charset="us-ascii"
+            MIME-Version: 1.0
+            Content-Transfer-Encoding: 7bit
+            Subject: =?utf-8?q?=C3=89_test?=
+
+            abc
+            """))
+
 # Test the email.encoders module
 class TestEncoders(unittest.TestCase):
 
@@ -1060,9 +1073,13 @@ Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-W
                          'f\xfcr Offshore-Windkraftprojekte '
                          '<a-very-long-address@example.com>')
         msg['Reply-To'] = header_string
-        self.assertRaises(UnicodeEncodeError, msg.as_string)
+        eq(msg.as_string(maxheaderlen=78), """\
+Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
+ =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
+
+""")
         msg = Message()
-        msg['Reply-To'] = Header(header_string, 'utf-8',
+        msg['Reply-To'] = Header(header_string,
                                  header_name='Reply-To')
         eq(msg.as_string(maxheaderlen=78), """\
 Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
index e09aea41d165282cbf98da8dc8f18f0a2e5fda50..8f76e18c73b04446c63ba052967d9a998bdc53c1 100644 (file)
@@ -111,10 +111,10 @@ class TestMailbox(TestBase):
         self.assertMailboxEmpty()
 
     def test_add_that_raises_leaves_mailbox_empty(self):
-        # XXX This test will start failing when Message learns to handle
-        # non-ASCII string headers, and a different internal failure will
-        # need to be found or manufactured.
-        with self.assertRaises(ValueError):
+        def raiser(*args, **kw):
+            raise Exception("a fake error")
+        support.patch(self, email.generator.BytesGenerator, 'flatten', raiser)
+        with self.assertRaises(Exception):
             self._box.add(email.message_from_string("From: Alphöso"))
         self.assertEqual(len(self._box), 0)
         self._box.close()
index a7d89d39c42e1e8094ef0f70d52cba086a4b1405..2b3dad5424fc4acfff9e69d14de885f9091c6bcc 100644 (file)
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -430,6 +430,7 @@ Jeremy Hylton
 Gerhard Häring
 Fredrik Håård
 Mihai Ibanescu
+Ali Ikinci
 Lars Immisch
 Bobby Impollonia
 Meador Inge
index eea3a17e7d8bbb07da477093f27895b903203863..b4dcf82608220a7354c785ee6975d063df6b2bc9 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -22,6 +22,9 @@ Core and Builtins
 Library
 -------
 
+- Issue #14291: Email now defaults to utf-8 for non-ASCII unicode headers
+  instead of raising an error.  This fixes a regression relative to 2.7.
+
 - Issue #5219: Prevent event handler cascade in IDLE.
 
 - Issue #14184: Increase the default stack size for secondary threads on