From: R David Murray <rdmurray@bitdance.com>
Date: Wed, 14 Mar 2012 18:24:22 +0000 (-0400)
Subject: #14062: fix BytesParser handling of Header objects
X-Git-Tag: v3.3.0a2~203
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=8d8f11049265f4e53b2d97f5caa73c4ca0ee7875;p=python

#14062: fix BytesParser handling of Header objects

This is a different fix than the 3.2 fix, but the new tests are the same.

This also affected smtplib.SMTP.send_message, which calls BytesParser.
---

8d8f11049265f4e53b2d97f5caa73c4ca0ee7875
diff --cc Lib/email/generator.py
index d8b8fa960b,430ee73ea1..edba13f8fd
--- a/Lib/email/generator.py
+++ b/Lib/email/generator.py
@@@ -372,22 -359,21 +372,25 @@@ class BytesGenerator(Generator)
          # strings with 8bit bytes.
          for h, v in msg._headers:
              self.write('%s: ' % h)
 -            if isinstance(v, Header):
 -                self.write(v.encode(maxlinelen=self._maxheaderlen)+self._NL)
 -            elif _has_surrogates(v):
 -                # If we have raw 8bit data in a byte string, we have no idea
 -                # what the encoding is.  There is no safe way to split this
 -                # string.  If it's ascii-subset, then we could do a normal
 -                # ascii split, but if it's multibyte then we could break the
 -                # string.  There's no way to know so the least harm seems to
 -                # be to not split the string and risk it being too long.
 -                self.write(v+NL)
 +            if isinstance(v, str):
 +                if _has_surrogates(v):
 +                    if not self.policy.must_be_7bit:
 +                        # If we have raw 8bit data in a byte string, we have no idea
 +                        # what the encoding is.  There is no safe way to split this
 +                        # string.  If it's ascii-subset, then we could do a normal
 +                        # ascii split, but if it's multibyte then we could break the
 +                        # string.  There's no way to know so the least harm seems to
 +                        # be to not split the string and risk it being too long.
 +                        self.write(v+NL)
 +                        continue
 +                    h = Header(v, charset=_charset.UNKNOWN8BIT, header_name=h)
 +                else:
 +                    h = Header(v, header_name=h)
+             else:
 -                # Header's got lots of smarts and this string is safe...
 -                header = Header(v, maxlinelen=self._maxheaderlen,
 -                                header_name=h)
 -                self.write(header.encode(linesep=self._NL)+self._NL)
++                # Assume it is a Header-like object.
++                h = v
 +            self.write(h.encode(linesep=self._NL,
 +                                maxlinelen=self._maxheaderlen)+self._NL)
          # A blank line always separates headers from body
          self.write(self._NL)
  
diff --cc Lib/test/test_email/test_email.py
index a4d39ab779,5655938021..1f354c2b66
--- a/Lib/test/test_email/test_email.py
+++ b/Lib/test/test_email/test_email.py
@@@ -3601,44 -3440,30 +3601,68 @@@ class Test8BitBytesHandling(unittest.Te
          g.flatten(msg)
          self.assertEqual(s.getvalue(), source)
  
+     def test_bytes_generator_b_encoding_linesep(self):
+         # Issue 14062: b encoding was tacking on an extra \n.
+         m = Message()
+         # This has enough non-ascii that it should always end up b encoded.
+         m['Subject'] = Header('Å¾luÅ¥ouÄkÃ½ kÅ¯Å')
+         s = BytesIO()
+         g = email.generator.BytesGenerator(s)
+         g.flatten(m, linesep='\r\n')
+         self.assertEqual(
+             s.getvalue(),
+             b'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
+ 
+     def test_generator_b_encoding_linesep(self):
+         # Since this broke in ByteGenerator, test Generator for completeness.
+         m = Message()
+         # This has enough non-ascii that it should always end up b encoded.
+         m['Subject'] = Header('Å¾luÅ¥ouÄkÃ½ kÅ¯Å')
+         s = StringIO()
+         g = email.generator.Generator(s)
+         g.flatten(m, linesep='\r\n')
+         self.assertEqual(
+             s.getvalue(),
+             'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
+ 
 +    def test_crlf_control_via_policy(self):
 +        # msg_26 is crlf terminated
 +        with openfile('msg_26.txt', 'rb') as fp:
 +            text = fp.read()
 +        msg = email.message_from_bytes(text)
 +        s = BytesIO()
 +        g = email.generator.BytesGenerator(s, policy=email.policy.SMTP)
 +        g.flatten(msg)
 +        self.assertEqual(s.getvalue(), text)
 +
 +    def test_flatten_linesep_overrides_policy(self):
 +        # msg_27 is lf separated
 +        with openfile('msg_27.txt', 'rb') as fp:
 +            text = fp.read()
 +        msg = email.message_from_bytes(text)
 +        s = BytesIO()
 +        g = email.generator.BytesGenerator(s, policy=email.policy.SMTP)
 +        g.flatten(msg, linesep='\n')
 +        self.assertEqual(s.getvalue(), text)
 +
 +    def test_must_be_7bit_handles_unknown_8bit(self):
 +        msg = email.message_from_bytes(self.non_latin_bin_msg)
 +        out = BytesIO()
 +        g = email.generator.BytesGenerator(out,
 +                        policy=email.policy.default.clone(must_be_7bit=True))
 +        g.flatten(msg)
 +        self.assertEqual(out.getvalue(),
 +            self.non_latin_bin_msg_as7bit_wrapped.encode('ascii'))
 +
 +    def test_must_be_7bit_transforms_8bit_cte(self):
 +        msg = email.message_from_bytes(self.latin_bin_msg)
 +        out = BytesIO()
 +        g = email.generator.BytesGenerator(out,
 +                        policy=email.policy.default.clone(must_be_7bit=True))
 +        g.flatten(msg)
 +        self.assertEqual(out.getvalue(),
 +                        self.latin_bin_msg_as7bit.encode('ascii'))
 +
      maxDiff = None
  
  
diff --cc Misc/NEWS
index 3652d618bc,6d6268c4c8..45fb9672f0
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@@ -24,9 -22,8 +24,12 @@@ Core and Builtin
  Library
  -------
  
 -- Issue #14062: Header objects now correctly respect the 'linesep' setting
 -  when processed by BytesParser (which smtplib.SMTP.send_message uses).
++- Issue #14062: BytesGenerator now correctly folds Header objects,
++  including using linesep when folding.
++
 +- Issue #13839: When invoked on the command-line, the pstats module now
 +  accepts several filenames of profile stat files and merges them all.
 +  Patch by Matt Joiner.
  
  - Issue #14291: Email now defaults to utf-8 for non-ASCII unicode headers
    instead of raising an error.  This fixes a regression relative to 2.7.