]> granicus.if.org Git - python/commitdiff
#17369: Improve handling of broken RFC2231 values in get_filename.
authorR David Murray <rdmurray@bitdance.com>
Fri, 7 Feb 2014 20:02:19 +0000 (15:02 -0500)
committerR David Murray <rdmurray@bitdance.com>
Fri, 7 Feb 2014 20:02:19 +0000 (15:02 -0500)
This fixes a regression relative to python2.

Lib/email/utils.py
Lib/test/test_email/test_email.py
Misc/NEWS

index 93a625c8b447d9742ff8a69258cbeec9e3ca56fa..f76c21eb1b6770f4d1942e2d8109df0609711ce0 100644 (file)
@@ -337,6 +337,10 @@ def collapse_rfc2231_value(value, errors='replace',
     # object.  We do not want bytes() normal utf-8 decoder, we want a straight
     # interpretation of the string as character bytes.
     charset, language, text = value
+    if charset is None:
+        # Issue 17369: if charset/lang is None, decode_rfc2231 couldn't parse
+        # the value, so use the fallback_charset.
+        charset = fallback_charset
     rawbytes = bytes(text, 'raw-unicode-escape')
     try:
         return str(rawbytes, charset, errors)
index c787695e1236553d39a1e90ee74a7d6e07880061..4157a067a2a6e0f8b33c2855a8225e6f6769547e 100644 (file)
@@ -5018,6 +5018,26 @@ Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
         self.assertNotIsInstance(param, tuple)
         self.assertEqual(param, "Frank's Document")
 
+    def test_rfc2231_missing_tick(self):
+        m = '''\
+Content-Disposition: inline;
+\tfilename*0*="'This%20is%20broken";
+'''
+        msg = email.message_from_string(m)
+        self.assertEqual(
+            msg.get_filename(),
+            "'This is broken")
+
+    def test_rfc2231_missing_tick_with_encoded_non_ascii(self):
+        m = '''\
+Content-Disposition: inline;
+\tfilename*0*="'This%20is%E2broken";
+'''
+        msg = email.message_from_string(m)
+        self.assertEqual(
+            msg.get_filename(),
+            "'This is\ufffdbroken")
+
     # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_value_with_charset_and_lang
     def test_rfc2231_tick_attack_extended(self):
         eq = self.assertEqual
index bf7781ff397036cda40b6d8acb7cb983cfeb8696..e663bfea9b2f02ae4188df948f78fab03782571a 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -45,6 +45,10 @@ Core and Builtins
 Library
 -------
 
+- Issue #17369: get_filename was raising an exception if the filename
+  parameter's RFC2231 encoding was broken in certain ways.  This was
+  a regression relative to python2.
+
 - Issue #20013: Some imap servers disconnect if the current mailbox is
   deleted, and imaplib did not handle that case gracefully.  Now it
   handles the 'bye' correctly.