Fix formatting of values with embedded newlines when rfc2047 encoding

author R. David Murray <rdmurray@bitdance.com>

Fri, 7 Jan 2011 21:57:25 +0000 (21:57 +0000)

committer R. David Murray <rdmurray@bitdance.com>

Fri, 7 Jan 2011 21:57:25 +0000 (21:57 +0000)
author R. David Murray <rdmurray@bitdance.com>
Fri, 7 Jan 2011 21:57:25 +0000 (21:57 +0000)
committer R. David Murray <rdmurray@bitdance.com>
Fri, 7 Jan 2011 21:57:25 +0000 (21:57 +0000)
diff --git a/Lib/email/header.py b/Lib/email/header.py

index 94eb1a94aabc58f926209699ccd44c60ae00bda5..d462bf008f5a6a759c2a2a0b655ed9d0d9f82bee 100644 (file)
--- a/Lib/email/header.py
+++ b/Lib/email/header.py
@@ -305,10 +305,15 @@ class Header:
                                      self._continuation_ws, splitchars)
          for string, charset in self._chunks:
              lines = string.splitlines()
-            for line in lines:
+            formatter.feed(lines[0], charset)
+            for line in lines[1:]:
+                formatter.newline()
+                if charset.header_encoding is not None:
+                    formatter.feed(self._continuation_ws, USASCII)
+                    line = ' ' + line.lstrip()
                  formatter.feed(line, charset)
-                if len(lines) > 1:
-                    formatter.newline()
+            if len(lines) > 1:
+                formatter.newline()
              formatter.add_transition()
          return formatter._str(linesep)
  
diff --git a/Lib/email/test/test_email.py b/Lib/email/test/test_email.py

index e7fcee3b6d5f09e064f8480c78bd34317499ddbd..4855371d1b1753f17f8b57909b360bc02bcede05 100644 (file)
--- a/Lib/email/test/test_email.py
+++ b/Lib/email/test/test_email.py
@@ -968,6 +968,19 @@ List: List-Unsubscribe: <http://lists.sourceforge.net/lists/listinfo/spamassassi
  
  """)
  
+    def test_long_rfc2047_header_with_embedded_fws(self):
+        h = Header(textwrap.dedent("""\
+            We're going to pretend this header is in a non-ascii character set
+            \tto see if line wrapping with encoded words and embedded
+               folding white space works"""),
+                   charset='utf-8',
+                   header_name='Test')
+        self.assertEqual(h.encode()+'\n', textwrap.dedent("""\
+            =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?=
+             =?utf-8?q?cter_set?=
+             =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?=
+             =?utf-8?q?_folding_white_space_works?=""")+'\n')
+
  
  
  # Test mangling of "From " lines in the body of a message
diff --git a/Misc/NEWS b/Misc/NEWS

index 1865aa84ab87f5a014987ab9e7c5f1de8d33c1cd..536157487384f302c2078a42e2b00e99cb6a6d74 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -40,6 +40,11 @@ Core and Builtins
  Library
  -------
  
+- email.header.Header was incorrectly encoding folding white space when
+  rfc2047-encoding header values with embedded newlines, leaving them
+  without folding whitespace.  It now uses the continuation_ws, as it
+  does for continuation lines that it creates itself.
+
  - Issue #10827: Changed the rules for 2-digit years.  The time.asctime
    function will now format any year when ``time.accept2dyear`` is
    false and will accept years >= 1000 otherwise.  The year range
@@ -47,7 +52,6 @@ Library
    dependent, but ``time.mktime`` will now accept full range supported
    by the OS.  Conversion of 2-digit years to 4-digit is deprecated.
  
-
  - Issue #7858: Raise an error properly when os.utime() fails under Windows
    on an existing file.
author	R. David Murray <rdmurray@bitdance.com>
	Fri, 7 Jan 2011 21:57:25 +0000 (21:57 +0000)
committer	R. David Murray <rdmurray@bitdance.com>
	Fri, 7 Jan 2011 21:57:25 +0000 (21:57 +0000)
Lib/email/header.py		patch \| blob \| history
Lib/email/test/test_email.py		patch \| blob \| history
Misc/NEWS		patch \| blob \| history