]> granicus.if.org Git - python/commitdiff
#11605: don't use set/get_payload in feedparser; they do conversions.
authorR David Murray <rdmurray@bitdance.com>
Wed, 6 Apr 2011 12:13:02 +0000 (08:13 -0400)
committerR David Murray <rdmurray@bitdance.com>
Wed, 6 Apr 2011 12:13:02 +0000 (08:13 -0400)
Really the whole API needs to be gone over to restore the
separation of concerns; but that's what email6 is about.

Lib/email/feedparser.py
Lib/email/test/test_email.py
Misc/NEWS

index de8750dc1621af06ddf9b3b18cb7a526755e90eb..60a83255c0d30820e26f0dd08dd7585eff20bf04 100644 (file)
@@ -368,12 +368,12 @@ class FeedParser:
                                 end = len(mo.group(0))
                                 self._last.epilogue = epilogue[:-end]
                     else:
-                        payload = self._last.get_payload()
+                        payload = self._last._payload
                         if isinstance(payload, str):
                             mo = NLCRE_eol.search(payload)
                             if mo:
                                 payload = payload[:-len(mo.group(0))]
-                                self._last.set_payload(payload)
+                                self._last._payload = payload
                     self._input.pop_eof_matcher()
                     self._pop_message()
                     # Set the multipart up for newline cleansing, which will
index 245f65999b55ded199588d65630cf7815042f834..b4dc575ae673e77945bef6ba2e0a90b789a3c5a8 100644 (file)
@@ -3168,6 +3168,53 @@ class Test8BitBytesHandling(unittest.TestCase):
         g = email.generator.BytesGenerator(s)
         g.flatten(msg, linesep='\r\n')
         self.assertEqual(s.getvalue(), text)
+
+    def test_8bit_multipart(self):
+        # Issue 11605
+        source = textwrap.dedent("""\
+            Date: Fri, 18 Mar 2011 17:15:43 +0100
+            To: foo@example.com
+            From: foodwatch-Newsletter <bar@example.com>
+            Subject: Aktuelles zu Japan, Klonfleisch und Smiley-System
+            Message-ID: <76a486bee62b0d200f33dc2ca08220ad@localhost.localdomain>
+            MIME-Version: 1.0
+            Content-Type: multipart/alternative;
+                    boundary="b1_76a486bee62b0d200f33dc2ca08220ad"
+
+            --b1_76a486bee62b0d200f33dc2ca08220ad
+            Content-Type: text/plain; charset="utf-8"
+            Content-Transfer-Encoding: 8bit
+
+            Guten Tag, ,
+
+            mit großer Betroffenheit verfolgen auch wir im foodwatch-Team die
+            Nachrichten aus Japan.
+
+
+            --b1_76a486bee62b0d200f33dc2ca08220ad
+            Content-Type: text/html; charset="utf-8"
+            Content-Transfer-Encoding: 8bit
+
+            <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
+                "http://www.w3.org/TR/html4/loose.dtd">
+            <html lang="de">
+            <head>
+                    <title>foodwatch - Newsletter</title>
+            </head>
+            <body>
+              <p>mit gro&szlig;er Betroffenheit verfolgen auch wir im foodwatch-Team
+                 die Nachrichten aus Japan.</p>
+            </body>
+            </html>
+            --b1_76a486bee62b0d200f33dc2ca08220ad--
+
+            """).encode('utf-8')
+        msg = email.message_from_bytes(source)
+        s = BytesIO()
+        g = email.generator.BytesGenerator(s)
+        g.flatten(msg)
+        self.assertEqual(s.getvalue(), source)
+
     maxDiff = None
 
 
index 7ab3a5d2be016170137f6a9643a45f9f862bef7e..2ae7eaf657ab9265ce6c169985a03b2e8e6e1d71 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -49,6 +49,9 @@ Core and Builtins
 Library
 -------
 
+- Issue #11605: email.parser.BytesFeedParser was incorrectly converting multipart
+  subpararts with an 8bit CTE into unicode instead of preserving the bytes.
+
 - Issue #10963: Ensure that subprocess.communicate() never raises EPIPE.
 
 - Issue #11746: Fix SSLContext.load_cert_chain() to accept elliptic curve