]> granicus.if.org Git - python/commitdiff
#14062: fix BytesParser handling of Header objects
authorR David Murray <rdmurray@bitdance.com>
Wed, 14 Mar 2012 18:24:22 +0000 (14:24 -0400)
committerR David Murray <rdmurray@bitdance.com>
Wed, 14 Mar 2012 18:24:22 +0000 (14:24 -0400)
This is a different fix than the 3.2 fix, but the new tests are the same.

This also affected smtplib.SMTP.send_message, which calls BytesParser.

1  2 
Lib/email/generator.py
Lib/test/test_email/test_email.py
Misc/NEWS

index d8b8fa960b04ecafbce278e81f8a94d0877bea41,430ee73ea10c9168a6ce57cec111e2227b21e6ec..edba13f8fda1e6ab29c58f47620d5c86104d4e49
@@@ -372,22 -359,21 +372,25 @@@ class BytesGenerator(Generator)
          # strings with 8bit bytes.
          for h, v in msg._headers:
              self.write('%s: ' % h)
 -            if isinstance(v, Header):
 -                self.write(v.encode(maxlinelen=self._maxheaderlen)+self._NL)
 -            elif _has_surrogates(v):
 -                # If we have raw 8bit data in a byte string, we have no idea
 -                # what the encoding is.  There is no safe way to split this
 -                # string.  If it's ascii-subset, then we could do a normal
 -                # ascii split, but if it's multibyte then we could break the
 -                # string.  There's no way to know so the least harm seems to
 -                # be to not split the string and risk it being too long.
 -                self.write(v+NL)
 +            if isinstance(v, str):
 +                if _has_surrogates(v):
 +                    if not self.policy.must_be_7bit:
 +                        # If we have raw 8bit data in a byte string, we have no idea
 +                        # what the encoding is.  There is no safe way to split this
 +                        # string.  If it's ascii-subset, then we could do a normal
 +                        # ascii split, but if it's multibyte then we could break the
 +                        # string.  There's no way to know so the least harm seems to
 +                        # be to not split the string and risk it being too long.
 +                        self.write(v+NL)
 +                        continue
 +                    h = Header(v, charset=_charset.UNKNOWN8BIT, header_name=h)
 +                else:
 +                    h = Header(v, header_name=h)
+             else:
 -                # Header's got lots of smarts and this string is safe...
 -                header = Header(v, maxlinelen=self._maxheaderlen,
 -                                header_name=h)
 -                self.write(header.encode(linesep=self._NL)+self._NL)
++                # Assume it is a Header-like object.
++                h = v
 +            self.write(h.encode(linesep=self._NL,
 +                                maxlinelen=self._maxheaderlen)+self._NL)
          # A blank line always separates headers from body
          self.write(self._NL)
  
index a4d39ab7793d6074bfc49f3bc06339d5fcbced23,5655938021c6e0b7b7574323f3fceab8aa1342c3..1f354c2b66028728e5137f3bc22c69698c988647
@@@ -3601,44 -3440,30 +3601,68 @@@ class Test8BitBytesHandling(unittest.Te
          g.flatten(msg)
          self.assertEqual(s.getvalue(), source)
  
+     def test_bytes_generator_b_encoding_linesep(self):
+         # Issue 14062: b encoding was tacking on an extra \n.
+         m = Message()
+         # This has enough non-ascii that it should always end up b encoded.
+         m['Subject'] = Header('žluťoučký kůň')
+         s = BytesIO()
+         g = email.generator.BytesGenerator(s)
+         g.flatten(m, linesep='\r\n')
+         self.assertEqual(
+             s.getvalue(),
+             b'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
+     def test_generator_b_encoding_linesep(self):
+         # Since this broke in ByteGenerator, test Generator for completeness.
+         m = Message()
+         # This has enough non-ascii that it should always end up b encoded.
+         m['Subject'] = Header('žluťoučký kůň')
+         s = StringIO()
+         g = email.generator.Generator(s)
+         g.flatten(m, linesep='\r\n')
+         self.assertEqual(
+             s.getvalue(),
+             'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
 +    def test_crlf_control_via_policy(self):
 +        # msg_26 is crlf terminated
 +        with openfile('msg_26.txt', 'rb') as fp:
 +            text = fp.read()
 +        msg = email.message_from_bytes(text)
 +        s = BytesIO()
 +        g = email.generator.BytesGenerator(s, policy=email.policy.SMTP)
 +        g.flatten(msg)
 +        self.assertEqual(s.getvalue(), text)
 +
 +    def test_flatten_linesep_overrides_policy(self):
 +        # msg_27 is lf separated
 +        with openfile('msg_27.txt', 'rb') as fp:
 +            text = fp.read()
 +        msg = email.message_from_bytes(text)
 +        s = BytesIO()
 +        g = email.generator.BytesGenerator(s, policy=email.policy.SMTP)
 +        g.flatten(msg, linesep='\n')
 +        self.assertEqual(s.getvalue(), text)
 +
 +    def test_must_be_7bit_handles_unknown_8bit(self):
 +        msg = email.message_from_bytes(self.non_latin_bin_msg)
 +        out = BytesIO()
 +        g = email.generator.BytesGenerator(out,
 +                        policy=email.policy.default.clone(must_be_7bit=True))
 +        g.flatten(msg)
 +        self.assertEqual(out.getvalue(),
 +            self.non_latin_bin_msg_as7bit_wrapped.encode('ascii'))
 +
 +    def test_must_be_7bit_transforms_8bit_cte(self):
 +        msg = email.message_from_bytes(self.latin_bin_msg)
 +        out = BytesIO()
 +        g = email.generator.BytesGenerator(out,
 +                        policy=email.policy.default.clone(must_be_7bit=True))
 +        g.flatten(msg)
 +        self.assertEqual(out.getvalue(),
 +                        self.latin_bin_msg_as7bit.encode('ascii'))
 +
      maxDiff = None
  
  
diff --cc Misc/NEWS
index 3652d618bc0d51ab532454a139b9939d757ee398,6d6268c4c8c1bba4e34a7f905cc3faf4024bfb8b..45fb9672f0d36399e22b1155d79c02d0f1ec17f3
+++ b/Misc/NEWS
@@@ -24,9 -22,8 +24,12 @@@ Core and Builtin
  Library
  -------
  
 -- Issue #14062: Header objects now correctly respect the 'linesep' setting
 -  when processed by BytesParser (which smtplib.SMTP.send_message uses).
++- Issue #14062: BytesGenerator now correctly folds Header objects,
++  including using linesep when folding.
++
 +- Issue #13839: When invoked on the command-line, the pstats module now
 +  accepts several filenames of profile stat files and merges them all.
 +  Patch by Matt Joiner.
  
  - Issue #14291: Email now defaults to utf-8 for non-ASCII unicode headers
    instead of raising an error.  This fixes a regression relative to 2.7.