]> granicus.if.org Git - python/commitdiff
Python 2 can encode/decode surrogates to utf-8. Add a test for this.
authorEzio Melotti <ezio.melotti@gmail.com>
Mon, 28 Feb 2011 01:42:29 +0000 (01:42 +0000)
committerEzio Melotti <ezio.melotti@gmail.com>
Mon, 28 Feb 2011 01:42:29 +0000 (01:42 +0000)
Lib/test/test_unicode.py

index 0452f477a0500e1b9e33be2a4b0ccf35faca5b03..ae4c355f48d9ce90c18dcbe6605ee5ae4d5340ae 100644 (file)
@@ -667,11 +667,17 @@ class UnicodeTest(
         # see http://www.unicode.org/versions/Unicode5.2.0/ch03.pdf
         # (table 3-7) and http://www.rfc-editor.org/rfc/rfc3629.txt
         #for cb in map(chr, range(0xA0, 0xC0)):
-            #sys.__stdout__.write('\\xED\\x%02x\\x80\n' % ord(cb))
             #self.assertRaises(UnicodeDecodeError,
                               #('\xED'+cb+'\x80').decode, 'utf-8')
             #self.assertRaises(UnicodeDecodeError,
                               #('\xED'+cb+'\xBF').decode, 'utf-8')
+        # but since they are valid on Python 2 add a test for that:
+        for cb, surrogate in zip(map(chr, range(0xA0, 0xC0)),
+                                 map(unichr, range(0xd800, 0xe000, 64))):
+            encoded = '\xED'+cb+'\x80'
+            self.assertEqual(encoded.decode('utf-8'), surrogate)
+            self.assertEqual(surrogate.encode('utf-8'), encoded)
+
         for cb in map(chr, range(0x80, 0x90)):
             self.assertRaises(UnicodeDecodeError,
                               ('\xF0'+cb+'\x80\x80').decode, 'utf-8')