]> granicus.if.org Git - python/commitdiff
Forward port some fixes that were in email 2.5 but for some reason didn't make
authorBarry Warsaw <barry@python.org>
Wed, 26 Jul 2006 05:54:46 +0000 (05:54 +0000)
committerBarry Warsaw <barry@python.org>
Wed, 26 Jul 2006 05:54:46 +0000 (05:54 +0000)
it into email 4.0.  Specifically, in Message.get_content_charset(), handle RFC
2231 headers that contain an encoding not known to Python, or a character in
the data that isn't in the charset encoding.  Also forward port the
appropriate unit tests.

Lib/email/message.py
Lib/email/test/test_email.py
Lib/email/test/test_email_renamed.py

index 50d90b4560a5783b5e2dce6e7c1ed90f283ed37f..79c5c4c6f3fd875494775ba1571021bb32367a72 100644 (file)
@@ -747,7 +747,18 @@ class Message:
         if isinstance(charset, tuple):
             # RFC 2231 encoded, so decode it, and it better end up as ascii.
             pcharset = charset[0] or 'us-ascii'
-            charset = unicode(charset[2], pcharset).encode('us-ascii')
+            try:
+                # LookupError will be raised if the charset isn't known to
+                # Python.  UnicodeError will be raised if the encoded text
+                # contains a character not in the charset.
+                charset = unicode(charset[2], pcharset).encode('us-ascii')
+            except (LookupError, UnicodeError):
+                charset = charset[2]
+        # charset character must be in us-ascii range
+        try:
+            charset = unicode(charset, 'us-ascii').encode('us-ascii')
+        except UnicodeError:
+            return failobj
         # RFC 2046, $4.1.2 says charsets are not case sensitive
         return charset.lower()
 
index db0c2bebd4a3b1c77447e268d94c366bcb98bd7d..13801dce12bdeaaa66e38d8579dfb18827d73d10 100644 (file)
@@ -3086,6 +3086,50 @@ Content-Type: text/plain;
         self.assertEqual(msg.get_content_charset(),
                          'this is even more ***fun*** is it not.pdf')
 
+    def test_rfc2231_bad_encoding_in_filename(self):
+        m = '''\
+Content-Disposition: inline;
+\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
+\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
+\tfilename*2="is it not.pdf"
+
+'''
+        msg = email.message_from_string(m)
+        self.assertEqual(msg.get_filename(),
+                         'This is even more ***fun*** is it not.pdf')
+
+    def test_rfc2231_bad_encoding_in_charset(self):
+        m = """\
+Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
+
+"""
+        msg = email.message_from_string(m)
+        # This should return None because non-ascii characters in the charset
+        # are not allowed.
+        self.assertEqual(msg.get_content_charset(), None)
+
+    def test_rfc2231_bad_character_in_charset(self):
+        m = """\
+Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
+
+"""
+        msg = email.message_from_string(m)
+        # This should return None because non-ascii characters in the charset
+        # are not allowed.
+        self.assertEqual(msg.get_content_charset(), None)
+
+    def test_rfc2231_bad_character_in_filename(self):
+        m = '''\
+Content-Disposition: inline;
+\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
+\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
+\tfilename*2*="is it not.pdf%E2"
+
+'''
+        msg = email.message_from_string(m)
+        self.assertEqual(msg.get_filename(),
+                         u'This is even more ***fun*** is it not.pdf\ufffd')
+
     def test_rfc2231_unknown_encoding(self):
         m = """\
 Content-Transfer-Encoding: 8bit
index 680a725a7135f996f8108785f5e30c2d1b932f99..30f39b905d13509a0d25c455e1cbd0ff92696c7a 100644 (file)
@@ -3092,6 +3092,50 @@ Content-Type: text/plain;
         self.assertEqual(msg.get_content_charset(),
                          'this is even more ***fun*** is it not.pdf')
 
+    def test_rfc2231_bad_encoding_in_filename(self):
+        m = '''\
+Content-Disposition: inline;
+\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
+\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
+\tfilename*2="is it not.pdf"
+
+'''
+        msg = email.message_from_string(m)
+        self.assertEqual(msg.get_filename(),
+                         'This is even more ***fun*** is it not.pdf')
+
+    def test_rfc2231_bad_encoding_in_charset(self):
+        m = """\
+Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
+
+"""
+        msg = email.message_from_string(m)
+        # This should return None because non-ascii characters in the charset
+        # are not allowed.
+        self.assertEqual(msg.get_content_charset(), None)
+
+    def test_rfc2231_bad_character_in_charset(self):
+        m = """\
+Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
+
+"""
+        msg = email.message_from_string(m)
+        # This should return None because non-ascii characters in the charset
+        # are not allowed.
+        self.assertEqual(msg.get_content_charset(), None)
+
+    def test_rfc2231_bad_character_in_filename(self):
+        m = '''\
+Content-Disposition: inline;
+\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
+\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
+\tfilename*2*="is it not.pdf%E2"
+
+'''
+        msg = email.message_from_string(m)
+        self.assertEqual(msg.get_filename(),
+                         u'This is even more ***fun*** is it not.pdf\ufffd')
+
     def test_rfc2231_unknown_encoding(self):
         m = """\
 Content-Transfer-Encoding: 8bit