From: R David Murray Date: Wed, 14 Mar 2012 18:24:22 +0000 (-0400) Subject: #14062: fix BytesParser handling of Header objects X-Git-Tag: v3.3.0a2~203 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=8d8f11049265f4e53b2d97f5caa73c4ca0ee7875;p=python #14062: fix BytesParser handling of Header objects This is a different fix than the 3.2 fix, but the new tests are the same. This also affected smtplib.SMTP.send_message, which calls BytesParser. --- 8d8f11049265f4e53b2d97f5caa73c4ca0ee7875 diff --cc Lib/email/generator.py index d8b8fa960b,430ee73ea1..edba13f8fd --- a/Lib/email/generator.py +++ b/Lib/email/generator.py @@@ -372,22 -359,21 +372,25 @@@ class BytesGenerator(Generator) # strings with 8bit bytes. for h, v in msg._headers: self.write('%s: ' % h) - if isinstance(v, Header): - self.write(v.encode(maxlinelen=self._maxheaderlen)+self._NL) - elif _has_surrogates(v): - # If we have raw 8bit data in a byte string, we have no idea - # what the encoding is. There is no safe way to split this - # string. If it's ascii-subset, then we could do a normal - # ascii split, but if it's multibyte then we could break the - # string. There's no way to know so the least harm seems to - # be to not split the string and risk it being too long. - self.write(v+NL) + if isinstance(v, str): + if _has_surrogates(v): + if not self.policy.must_be_7bit: + # If we have raw 8bit data in a byte string, we have no idea + # what the encoding is. There is no safe way to split this + # string. If it's ascii-subset, then we could do a normal + # ascii split, but if it's multibyte then we could break the + # string. There's no way to know so the least harm seems to + # be to not split the string and risk it being too long. + self.write(v+NL) + continue + h = Header(v, charset=_charset.UNKNOWN8BIT, header_name=h) + else: + h = Header(v, header_name=h) + else: - # Header's got lots of smarts and this string is safe... - header = Header(v, maxlinelen=self._maxheaderlen, - header_name=h) - self.write(header.encode(linesep=self._NL)+self._NL) ++ # Assume it is a Header-like object. ++ h = v + self.write(h.encode(linesep=self._NL, + maxlinelen=self._maxheaderlen)+self._NL) # A blank line always separates headers from body self.write(self._NL) diff --cc Lib/test/test_email/test_email.py index a4d39ab779,5655938021..1f354c2b66 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@@ -3601,44 -3440,30 +3601,68 @@@ class Test8BitBytesHandling(unittest.Te g.flatten(msg) self.assertEqual(s.getvalue(), source) + def test_bytes_generator_b_encoding_linesep(self): + # Issue 14062: b encoding was tacking on an extra \n. + m = Message() + # This has enough non-ascii that it should always end up b encoded. + m['Subject'] = Header('žluťoučký kůň') + s = BytesIO() + g = email.generator.BytesGenerator(s) + g.flatten(m, linesep='\r\n') + self.assertEqual( + s.getvalue(), + b'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n') + + def test_generator_b_encoding_linesep(self): + # Since this broke in ByteGenerator, test Generator for completeness. + m = Message() + # This has enough non-ascii that it should always end up b encoded. + m['Subject'] = Header('žluťoučký kůň') + s = StringIO() + g = email.generator.Generator(s) + g.flatten(m, linesep='\r\n') + self.assertEqual( + s.getvalue(), + 'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n') + + def test_crlf_control_via_policy(self): + # msg_26 is crlf terminated + with openfile('msg_26.txt', 'rb') as fp: + text = fp.read() + msg = email.message_from_bytes(text) + s = BytesIO() + g = email.generator.BytesGenerator(s, policy=email.policy.SMTP) + g.flatten(msg) + self.assertEqual(s.getvalue(), text) + + def test_flatten_linesep_overrides_policy(self): + # msg_27 is lf separated + with openfile('msg_27.txt', 'rb') as fp: + text = fp.read() + msg = email.message_from_bytes(text) + s = BytesIO() + g = email.generator.BytesGenerator(s, policy=email.policy.SMTP) + g.flatten(msg, linesep='\n') + self.assertEqual(s.getvalue(), text) + + def test_must_be_7bit_handles_unknown_8bit(self): + msg = email.message_from_bytes(self.non_latin_bin_msg) + out = BytesIO() + g = email.generator.BytesGenerator(out, + policy=email.policy.default.clone(must_be_7bit=True)) + g.flatten(msg) + self.assertEqual(out.getvalue(), + self.non_latin_bin_msg_as7bit_wrapped.encode('ascii')) + + def test_must_be_7bit_transforms_8bit_cte(self): + msg = email.message_from_bytes(self.latin_bin_msg) + out = BytesIO() + g = email.generator.BytesGenerator(out, + policy=email.policy.default.clone(must_be_7bit=True)) + g.flatten(msg) + self.assertEqual(out.getvalue(), + self.latin_bin_msg_as7bit.encode('ascii')) + maxDiff = None diff --cc Misc/NEWS index 3652d618bc,6d6268c4c8..45fb9672f0 --- a/Misc/NEWS +++ b/Misc/NEWS @@@ -24,9 -22,8 +24,12 @@@ Core and Builtin Library ------- -- Issue #14062: Header objects now correctly respect the 'linesep' setting - when processed by BytesParser (which smtplib.SMTP.send_message uses). ++- Issue #14062: BytesGenerator now correctly folds Header objects, ++ including using linesep when folding. ++ +- Issue #13839: When invoked on the command-line, the pstats module now + accepts several filenames of profile stat files and merges them all. + Patch by Matt Joiner. - Issue #14291: Email now defaults to utf-8 for non-ASCII unicode headers instead of raising an error. This fixes a regression relative to 2.7.