From 370d85cee417e24447e28f00b36716c7479f343c Mon Sep 17 00:00:00 2001 From: Ezio Melotti Date: Mon, 28 Feb 2011 01:42:29 +0000 Subject: [PATCH] Python 2 can encode/decode surrogates to utf-8. Add a test for this. --- Lib/test/test_unicode.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 0452f477a0..ae4c355f48 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -667,11 +667,17 @@ class UnicodeTest( # see http://www.unicode.org/versions/Unicode5.2.0/ch03.pdf # (table 3-7) and http://www.rfc-editor.org/rfc/rfc3629.txt #for cb in map(chr, range(0xA0, 0xC0)): - #sys.__stdout__.write('\\xED\\x%02x\\x80\n' % ord(cb)) #self.assertRaises(UnicodeDecodeError, #('\xED'+cb+'\x80').decode, 'utf-8') #self.assertRaises(UnicodeDecodeError, #('\xED'+cb+'\xBF').decode, 'utf-8') + # but since they are valid on Python 2 add a test for that: + for cb, surrogate in zip(map(chr, range(0xA0, 0xC0)), + map(unichr, range(0xd800, 0xe000, 64))): + encoded = '\xED'+cb+'\x80' + self.assertEqual(encoded.decode('utf-8'), surrogate) + self.assertEqual(surrogate.encode('utf-8'), encoded) + for cb in map(chr, range(0x80, 0x90)): self.assertRaises(UnicodeDecodeError, ('\xF0'+cb+'\x80\x80').decode, 'utf-8') -- 2.50.0