]> granicus.if.org Git - python/commitdiff
merge with 3.2
authorPhilip Jenvey <pjenvey@underboss.org>
Sat, 27 Oct 2012 00:05:09 +0000 (17:05 -0700)
committerPhilip Jenvey <pjenvey@underboss.org>
Sat, 27 Oct 2012 00:05:09 +0000 (17:05 -0700)
1  2 
Lib/test/test_codecs.py
Python/codecs.c

index 4e808ec6acd0288d4cf8dded53b815906bba1c94,42d0da3e703bba9db6f69db0eceee986329a1675..93660f7a19811f8c47ca3130e9fabd201ec407cc
@@@ -661,113 -644,10 +661,115 @@@ class UTF8Test(ReadTest)
                           b"abc\xed\xa0\x80def")
          self.assertEqual(b"abc\xed\xa0\x80def".decode("utf-8", "surrogatepass"),
                           "abc\ud800def")
 +        self.assertEqual("\U00010fff\uD800".encode("utf-8", "surrogatepass"),
 +                         b"\xf0\x90\xbf\xbf\xed\xa0\x80")
 +        self.assertEqual(b"\xf0\x90\xbf\xbf\xed\xa0\x80".decode("utf-8", "surrogatepass"),
 +                         "\U00010fff\uD800")
          self.assertTrue(codecs.lookup_error("surrogatepass"))
+         with self.assertRaises(UnicodeDecodeError):
+             b"abc\xed\xa0".decode("utf-8", "surrogatepass")
  
 +@unittest.skipUnless(sys.platform == 'win32',
 +                     'cp65001 is a Windows-only codec')
 +class CP65001Test(ReadTest):
 +    encoding = "cp65001"
 +
 +    def test_encode(self):
 +        tests = [
 +            ('abc', 'strict', b'abc'),
 +            ('\xe9\u20ac', 'strict',  b'\xc3\xa9\xe2\x82\xac'),
 +            ('\U0010ffff', 'strict', b'\xf4\x8f\xbf\xbf'),
 +        ]
 +        if VISTA_OR_LATER:
 +            tests.extend((
 +                ('\udc80', 'strict', None),
 +                ('\udc80', 'ignore', b''),
 +                ('\udc80', 'replace', b'?'),
 +                ('\udc80', 'backslashreplace', b'\\udc80'),
 +                ('\udc80', 'surrogatepass', b'\xed\xb2\x80'),
 +            ))
 +        else:
 +            tests.append(('\udc80', 'strict', b'\xed\xb2\x80'))
 +        for text, errors, expected in tests:
 +            if expected is not None:
 +                try:
 +                    encoded = text.encode('cp65001', errors)
 +                except UnicodeEncodeError as err:
 +                    self.fail('Unable to encode %a to cp65001 with '
 +                              'errors=%r: %s' % (text, errors, err))
 +                self.assertEqual(encoded, expected,
 +                    '%a.encode("cp65001", %r)=%a != %a'
 +                    % (text, errors, encoded, expected))
 +            else:
 +                self.assertRaises(UnicodeEncodeError,
 +                    text.encode, "cp65001", errors)
 +
 +    def test_decode(self):
 +        tests = [
 +            (b'abc', 'strict', 'abc'),
 +            (b'\xc3\xa9\xe2\x82\xac', 'strict', '\xe9\u20ac'),
 +            (b'\xf4\x8f\xbf\xbf', 'strict', '\U0010ffff'),
 +            (b'\xef\xbf\xbd', 'strict', '\ufffd'),
 +            (b'[\xc3\xa9]', 'strict', '[\xe9]'),
 +            # invalid bytes
 +            (b'[\xff]', 'strict', None),
 +            (b'[\xff]', 'ignore', '[]'),
 +            (b'[\xff]', 'replace', '[\ufffd]'),
 +            (b'[\xff]', 'surrogateescape', '[\udcff]'),
 +        ]
 +        if VISTA_OR_LATER:
 +            tests.extend((
 +                (b'[\xed\xb2\x80]', 'strict', None),
 +                (b'[\xed\xb2\x80]', 'ignore', '[]'),
 +                (b'[\xed\xb2\x80]', 'replace', '[\ufffd\ufffd\ufffd]'),
 +            ))
 +        else:
 +            tests.extend((
 +                (b'[\xed\xb2\x80]', 'strict', '[\udc80]'),
 +            ))
 +        for raw, errors, expected in tests:
 +            if expected is not None:
 +                try:
 +                    decoded = raw.decode('cp65001', errors)
 +                except UnicodeDecodeError as err:
 +                    self.fail('Unable to decode %a from cp65001 with '
 +                              'errors=%r: %s' % (raw, errors, err))
 +                self.assertEqual(decoded, expected,
 +                    '%a.decode("cp65001", %r)=%a != %a'
 +                    % (raw, errors, decoded, expected))
 +            else:
 +                self.assertRaises(UnicodeDecodeError,
 +                    raw.decode, 'cp65001', errors)
 +
 +    @unittest.skipUnless(VISTA_OR_LATER, 'require Windows Vista or later')
 +    def test_lone_surrogates(self):
 +        self.assertRaises(UnicodeEncodeError, "\ud800".encode, "cp65001")
 +        self.assertRaises(UnicodeDecodeError, b"\xed\xa0\x80".decode, "cp65001")
 +        self.assertEqual("[\uDC80]".encode("cp65001", "backslashreplace"),
 +                         b'[\\udc80]')
 +        self.assertEqual("[\uDC80]".encode("cp65001", "xmlcharrefreplace"),
 +                         b'[&#56448;]')
 +        self.assertEqual("[\uDC80]".encode("cp65001", "surrogateescape"),
 +                         b'[\x80]')
 +        self.assertEqual("[\uDC80]".encode("cp65001", "ignore"),
 +                         b'[]')
 +        self.assertEqual("[\uDC80]".encode("cp65001", "replace"),
 +                         b'[?]')
 +
 +    @unittest.skipUnless(VISTA_OR_LATER, 'require Windows Vista or later')
 +    def test_surrogatepass_handler(self):
 +        self.assertEqual("abc\ud800def".encode("cp65001", "surrogatepass"),
 +                         b"abc\xed\xa0\x80def")
 +        self.assertEqual(b"abc\xed\xa0\x80def".decode("cp65001", "surrogatepass"),
 +                         "abc\ud800def")
 +        self.assertEqual("\U00010fff\uD800".encode("cp65001", "surrogatepass"),
 +                         b"\xf0\x90\xbf\xbf\xed\xa0\x80")
 +        self.assertEqual(b"\xf0\x90\xbf\xbf\xed\xa0\x80".decode("cp65001", "surrogatepass"),
 +                         "\U00010fff\uD800")
 +        self.assertTrue(codecs.lookup_error("surrogatepass"))
 +
 +
 +
  class UTF7Test(ReadTest):
      encoding = "utf-7"
  
diff --cc Python/codecs.c
Simple merge