#14062: fix BytesParser handling of Header objects

author R David Murray <rdmurray@bitdance.com>

Wed, 14 Mar 2012 18:24:22 +0000 (14:24 -0400)

committer R David Murray <rdmurray@bitdance.com>

Wed, 14 Mar 2012 18:24:22 +0000 (14:24 -0400)
author R David Murray <rdmurray@bitdance.com>
Wed, 14 Mar 2012 18:24:22 +0000 (14:24 -0400)
committer R David Murray <rdmurray@bitdance.com>
Wed, 14 Mar 2012 18:24:22 +0000 (14:24 -0400)
diff --cc Lib/email/generator.py

index d8b8fa960b04ecafbce278e81f8a94d0877bea41,430ee73ea10c9168a6ce57cec111e2227b21e6ec..edba13f8fda1e6ab29c58f47620d5c86104d4e49
--- 1/Lib/email/generator.py
--- 2/Lib/email/generator.py
+++ b/Lib/email/generator.py
@@@ -372,22 -359,21 +372,25 @@@ class BytesGenerator(Generator)
           # strings with 8bit bytes.
           for h, v in msg._headers:
               self.write('%s: ' % h)
- -            if isinstance(v, Header):
- -                self.write(v.encode(maxlinelen=self._maxheaderlen)+self._NL)
- -            elif _has_surrogates(v):
- -                # If we have raw 8bit data in a byte string, we have no idea
- -                # what the encoding is.  There is no safe way to split this
- -                # string.  If it's ascii-subset, then we could do a normal
- -                # ascii split, but if it's multibyte then we could break the
- -                # string.  There's no way to know so the least harm seems to
- -                # be to not split the string and risk it being too long.
- -                self.write(v+NL)
+ +            if isinstance(v, str):
+ +                if _has_surrogates(v):
+ +                    if not self.policy.must_be_7bit:
+ +                        # If we have raw 8bit data in a byte string, we have no idea
+ +                        # what the encoding is.  There is no safe way to split this
+ +                        # string.  If it's ascii-subset, then we could do a normal
+ +                        # ascii split, but if it's multibyte then we could break the
+ +                        # string.  There's no way to know so the least harm seems to
+ +                        # be to not split the string and risk it being too long.
+ +                        self.write(v+NL)
+ +                        continue
+ +                    h = Header(v, charset=_charset.UNKNOWN8BIT, header_name=h)
+ +                else:
+ +                    h = Header(v, header_name=h)
+             else:
- -                # Header's got lots of smarts and this string is safe...
- -                header = Header(v, maxlinelen=self._maxheaderlen,
- -                                header_name=h)
- -                self.write(header.encode(linesep=self._NL)+self._NL)
++                # Assume it is a Header-like object.
++                h = v
+ +            self.write(h.encode(linesep=self._NL,
+ +                                maxlinelen=self._maxheaderlen)+self._NL)
           # A blank line always separates headers from body
           self.write(self._NL)
   
diff --cc Lib/test/test_email/test_email.py

index a4d39ab7793d6074bfc49f3bc06339d5fcbced23,5655938021c6e0b7b7574323f3fceab8aa1342c3..1f354c2b66028728e5137f3bc22c69698c988647
--- 1/Lib/test/test_email/test_email.py
--- 2/Lib/email/test/test_email.py
+++ b/Lib/test/test_email/test_email.py
@@@ -3601,44 -3440,30 +3601,68 @@@ class Test8BitBytesHandling(unittest.Te
           g.flatten(msg)
           self.assertEqual(s.getvalue(), source)
   
+     def test_bytes_generator_b_encoding_linesep(self):
+         # Issue 14062: b encoding was tacking on an extra \n.
+         m = Message()
+         # This has enough non-ascii that it should always end up b encoded.
+         m['Subject'] = Header('žluťoučký kůň')
+         s = BytesIO()
+         g = email.generator.BytesGenerator(s)
+         g.flatten(m, linesep='\r\n')
+         self.assertEqual(
+             s.getvalue(),
+             b'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
+ 
+     def test_generator_b_encoding_linesep(self):
+         # Since this broke in ByteGenerator, test Generator for completeness.
+         m = Message()
+         # This has enough non-ascii that it should always end up b encoded.
+         m['Subject'] = Header('žluťoučký kůň')
+         s = StringIO()
+         g = email.generator.Generator(s)
+         g.flatten(m, linesep='\r\n')
+         self.assertEqual(
+             s.getvalue(),
+             'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
+ 
+ +    def test_crlf_control_via_policy(self):
+ +        # msg_26 is crlf terminated
+ +        with openfile('msg_26.txt', 'rb') as fp:
+ +            text = fp.read()
+ +        msg = email.message_from_bytes(text)
+ +        s = BytesIO()
+ +        g = email.generator.BytesGenerator(s, policy=email.policy.SMTP)
+ +        g.flatten(msg)
+ +        self.assertEqual(s.getvalue(), text)
+ +
+ +    def test_flatten_linesep_overrides_policy(self):
+ +        # msg_27 is lf separated
+ +        with openfile('msg_27.txt', 'rb') as fp:
+ +            text = fp.read()
+ +        msg = email.message_from_bytes(text)
+ +        s = BytesIO()
+ +        g = email.generator.BytesGenerator(s, policy=email.policy.SMTP)
+ +        g.flatten(msg, linesep='\n')
+ +        self.assertEqual(s.getvalue(), text)
+ +
+ +    def test_must_be_7bit_handles_unknown_8bit(self):
+ +        msg = email.message_from_bytes(self.non_latin_bin_msg)
+ +        out = BytesIO()
+ +        g = email.generator.BytesGenerator(out,
+ +                        policy=email.policy.default.clone(must_be_7bit=True))
+ +        g.flatten(msg)
+ +        self.assertEqual(out.getvalue(),
+ +            self.non_latin_bin_msg_as7bit_wrapped.encode('ascii'))
+ +
+ +    def test_must_be_7bit_transforms_8bit_cte(self):
+ +        msg = email.message_from_bytes(self.latin_bin_msg)
+ +        out = BytesIO()
+ +        g = email.generator.BytesGenerator(out,
+ +                        policy=email.policy.default.clone(must_be_7bit=True))
+ +        g.flatten(msg)
+ +        self.assertEqual(out.getvalue(),
+ +                        self.latin_bin_msg_as7bit.encode('ascii'))
+ +
       maxDiff = None
   
   
diff --cc Misc/NEWS

index 3652d618bc0d51ab532454a139b9939d757ee398,6d6268c4c8c1bba4e34a7f905cc3faf4024bfb8b..45fb9672f0d36399e22b1155d79c02d0f1ec17f3
--- 1/Misc/NEWS
--- 2/Misc/NEWS
+++ b/Misc/NEWS
@@@ -24,9 -22,8 +24,12 @@@ Core and Builtin
   Library
   -------
   
- -- Issue #14062: Header objects now correctly respect the 'linesep' setting
- -  when processed by BytesParser (which smtplib.SMTP.send_message uses).
++- Issue #14062: BytesGenerator now correctly folds Header objects,
++  including using linesep when folding.
++
+ +- Issue #13839: When invoked on the command-line, the pstats module now
+ +  accepts several filenames of profile stat files and merges them all.
+ +  Patch by Matt Joiner.
   
   - Issue #14291: Email now defaults to utf-8 for non-ASCII unicode headers
     instead of raising an error.  This fixes a regression relative to 2.7.
author	R David Murray <rdmurray@bitdance.com>
	Wed, 14 Mar 2012 18:24:22 +0000 (14:24 -0400)
committer	R David Murray <rdmurray@bitdance.com>
	Wed, 14 Mar 2012 18:24:22 +0000 (14:24 -0400)
		1	2
Lib/email/generator.py	patch \|	diff1 \|	diff2 \|	blob \| history
Lib/test/test_email/test_email.py	patch \|	diff1 \|	diff2 \|	blob \| history
Misc/NEWS	patch \|	diff1 \|	diff2 \|	blob \| history