]> granicus.if.org Git - python/commitdiff
Fix formatting of values with embedded newlines when rfc2047 encoding
authorR. David Murray <rdmurray@bitdance.com>
Fri, 7 Jan 2011 21:57:25 +0000 (21:57 +0000)
committerR. David Murray <rdmurray@bitdance.com>
Fri, 7 Jan 2011 21:57:25 +0000 (21:57 +0000)
Before this patch if a value being encoded had an embedded newline,
the line following the newline would have no leading whitespace,
and the whitespace it did have was encoded into the word.  Now
the existing whitespace gets turned into a blank, the way it does
in other header reformatting, and the _continuation_ws gets added
at the beginning of the encoded line.

Lib/email/header.py
Lib/email/test/test_email.py
Misc/NEWS

index 94eb1a94aabc58f926209699ccd44c60ae00bda5..d462bf008f5a6a759c2a2a0b655ed9d0d9f82bee 100644 (file)
@@ -305,10 +305,15 @@ class Header:
                                     self._continuation_ws, splitchars)
         for string, charset in self._chunks:
             lines = string.splitlines()
-            for line in lines:
+            formatter.feed(lines[0], charset)
+            for line in lines[1:]:
+                formatter.newline()
+                if charset.header_encoding is not None:
+                    formatter.feed(self._continuation_ws, USASCII)
+                    line = ' ' + line.lstrip()
                 formatter.feed(line, charset)
-                if len(lines) > 1:
-                    formatter.newline()
+            if len(lines) > 1:
+                formatter.newline()
             formatter.add_transition()
         return formatter._str(linesep)
 
index e7fcee3b6d5f09e064f8480c78bd34317499ddbd..4855371d1b1753f17f8b57909b360bc02bcede05 100644 (file)
@@ -968,6 +968,19 @@ List: List-Unsubscribe: <http://lists.sourceforge.net/lists/listinfo/spamassassi
 
 """)
 
+    def test_long_rfc2047_header_with_embedded_fws(self):
+        h = Header(textwrap.dedent("""\
+            We're going to pretend this header is in a non-ascii character set
+            \tto see if line wrapping with encoded words and embedded
+               folding white space works"""),
+                   charset='utf-8',
+                   header_name='Test')
+        self.assertEqual(h.encode()+'\n', textwrap.dedent("""\
+            =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?=
+             =?utf-8?q?cter_set?=
+             =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?=
+             =?utf-8?q?_folding_white_space_works?=""")+'\n')
+
 
 
 # Test mangling of "From " lines in the body of a message
index 1865aa84ab87f5a014987ab9e7c5f1de8d33c1cd..536157487384f302c2078a42e2b00e99cb6a6d74 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -40,6 +40,11 @@ Core and Builtins
 Library
 -------
 
+- email.header.Header was incorrectly encoding folding white space when
+  rfc2047-encoding header values with embedded newlines, leaving them
+  without folding whitespace.  It now uses the continuation_ws, as it
+  does for continuation lines that it creates itself.
+
 - Issue #10827: Changed the rules for 2-digit years.  The time.asctime
   function will now format any year when ``time.accept2dyear`` is
   false and will accept years >= 1000 otherwise.  The year range
@@ -47,7 +52,6 @@ Library
   dependent, but ``time.mktime`` will now accept full range supported
   by the OS.  Conversion of 2-digit years to 4-digit is deprecated.
 
-
 - Issue #7858: Raise an error properly when os.utime() fails under Windows
   on an existing file.