From: Philip Jenvey Date: Sat, 27 Oct 2012 00:05:09 +0000 (-0700) Subject: merge with 3.2 X-Git-Tag: v3.3.1rc1~742 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=5f9459fbedae091bff5b87189014a996f1352b1c;p=python merge with 3.2 --- 5f9459fbedae091bff5b87189014a996f1352b1c diff --cc Lib/test/test_codecs.py index 4e808ec6ac,42d0da3e70..93660f7a19 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@@ -661,113 -644,10 +661,115 @@@ class UTF8Test(ReadTest) b"abc\xed\xa0\x80def") self.assertEqual(b"abc\xed\xa0\x80def".decode("utf-8", "surrogatepass"), "abc\ud800def") + self.assertEqual("\U00010fff\uD800".encode("utf-8", "surrogatepass"), + b"\xf0\x90\xbf\xbf\xed\xa0\x80") + self.assertEqual(b"\xf0\x90\xbf\xbf\xed\xa0\x80".decode("utf-8", "surrogatepass"), + "\U00010fff\uD800") self.assertTrue(codecs.lookup_error("surrogatepass")) + with self.assertRaises(UnicodeDecodeError): + b"abc\xed\xa0".decode("utf-8", "surrogatepass") +@unittest.skipUnless(sys.platform == 'win32', + 'cp65001 is a Windows-only codec') +class CP65001Test(ReadTest): + encoding = "cp65001" + + def test_encode(self): + tests = [ + ('abc', 'strict', b'abc'), + ('\xe9\u20ac', 'strict', b'\xc3\xa9\xe2\x82\xac'), + ('\U0010ffff', 'strict', b'\xf4\x8f\xbf\xbf'), + ] + if VISTA_OR_LATER: + tests.extend(( + ('\udc80', 'strict', None), + ('\udc80', 'ignore', b''), + ('\udc80', 'replace', b'?'), + ('\udc80', 'backslashreplace', b'\\udc80'), + ('\udc80', 'surrogatepass', b'\xed\xb2\x80'), + )) + else: + tests.append(('\udc80', 'strict', b'\xed\xb2\x80')) + for text, errors, expected in tests: + if expected is not None: + try: + encoded = text.encode('cp65001', errors) + except UnicodeEncodeError as err: + self.fail('Unable to encode %a to cp65001 with ' + 'errors=%r: %s' % (text, errors, err)) + self.assertEqual(encoded, expected, + '%a.encode("cp65001", %r)=%a != %a' + % (text, errors, encoded, expected)) + else: + self.assertRaises(UnicodeEncodeError, + text.encode, "cp65001", errors) + + def test_decode(self): + tests = [ + (b'abc', 'strict', 'abc'), + (b'\xc3\xa9\xe2\x82\xac', 'strict', '\xe9\u20ac'), + (b'\xf4\x8f\xbf\xbf', 'strict', '\U0010ffff'), + (b'\xef\xbf\xbd', 'strict', '\ufffd'), + (b'[\xc3\xa9]', 'strict', '[\xe9]'), + # invalid bytes + (b'[\xff]', 'strict', None), + (b'[\xff]', 'ignore', '[]'), + (b'[\xff]', 'replace', '[\ufffd]'), + (b'[\xff]', 'surrogateescape', '[\udcff]'), + ] + if VISTA_OR_LATER: + tests.extend(( + (b'[\xed\xb2\x80]', 'strict', None), + (b'[\xed\xb2\x80]', 'ignore', '[]'), + (b'[\xed\xb2\x80]', 'replace', '[\ufffd\ufffd\ufffd]'), + )) + else: + tests.extend(( + (b'[\xed\xb2\x80]', 'strict', '[\udc80]'), + )) + for raw, errors, expected in tests: + if expected is not None: + try: + decoded = raw.decode('cp65001', errors) + except UnicodeDecodeError as err: + self.fail('Unable to decode %a from cp65001 with ' + 'errors=%r: %s' % (raw, errors, err)) + self.assertEqual(decoded, expected, + '%a.decode("cp65001", %r)=%a != %a' + % (raw, errors, decoded, expected)) + else: + self.assertRaises(UnicodeDecodeError, + raw.decode, 'cp65001', errors) + + @unittest.skipUnless(VISTA_OR_LATER, 'require Windows Vista or later') + def test_lone_surrogates(self): + self.assertRaises(UnicodeEncodeError, "\ud800".encode, "cp65001") + self.assertRaises(UnicodeDecodeError, b"\xed\xa0\x80".decode, "cp65001") + self.assertEqual("[\uDC80]".encode("cp65001", "backslashreplace"), + b'[\\udc80]') + self.assertEqual("[\uDC80]".encode("cp65001", "xmlcharrefreplace"), + b'[�]') + self.assertEqual("[\uDC80]".encode("cp65001", "surrogateescape"), + b'[\x80]') + self.assertEqual("[\uDC80]".encode("cp65001", "ignore"), + b'[]') + self.assertEqual("[\uDC80]".encode("cp65001", "replace"), + b'[?]') + + @unittest.skipUnless(VISTA_OR_LATER, 'require Windows Vista or later') + def test_surrogatepass_handler(self): + self.assertEqual("abc\ud800def".encode("cp65001", "surrogatepass"), + b"abc\xed\xa0\x80def") + self.assertEqual(b"abc\xed\xa0\x80def".decode("cp65001", "surrogatepass"), + "abc\ud800def") + self.assertEqual("\U00010fff\uD800".encode("cp65001", "surrogatepass"), + b"\xf0\x90\xbf\xbf\xed\xa0\x80") + self.assertEqual(b"\xf0\x90\xbf\xbf\xed\xa0\x80".decode("cp65001", "surrogatepass"), + "\U00010fff\uD800") + self.assertTrue(codecs.lookup_error("surrogatepass")) + + + class UTF7Test(ReadTest): encoding = "utf-7"