]> granicus.if.org Git - python/commitdiff
#16336: merge with 3.2.
authorEzio Melotti <ezio.melotti@gmail.com>
Sat, 3 Nov 2012 21:04:41 +0000 (23:04 +0200)
committerEzio Melotti <ezio.melotti@gmail.com>
Sat, 3 Nov 2012 21:04:41 +0000 (23:04 +0200)
1  2 
Lib/test/test_codecs.py
Misc/NEWS
Python/codecs.c

index 93660f7a19811f8c47ca3130e9fabd201ec407cc,0f7c23efccb90e111f8de4535bfd4a1d1c2c6aaa..f2a1ae3f790f2ef54bd7902fb5f87a7fdfeb5bfb
@@@ -668,108 -647,9 +668,110 @@@ class UTF8Test(ReadTest)
          self.assertTrue(codecs.lookup_error("surrogatepass"))
          with self.assertRaises(UnicodeDecodeError):
              b"abc\xed\xa0".decode("utf-8", "surrogatepass")
+         with self.assertRaises(UnicodeDecodeError):
+             b"abc\xed\xa0z".decode("utf-8", "surrogatepass")
  
 +@unittest.skipUnless(sys.platform == 'win32',
 +                     'cp65001 is a Windows-only codec')
 +class CP65001Test(ReadTest):
 +    encoding = "cp65001"
 +
 +    def test_encode(self):
 +        tests = [
 +            ('abc', 'strict', b'abc'),
 +            ('\xe9\u20ac', 'strict',  b'\xc3\xa9\xe2\x82\xac'),
 +            ('\U0010ffff', 'strict', b'\xf4\x8f\xbf\xbf'),
 +        ]
 +        if VISTA_OR_LATER:
 +            tests.extend((
 +                ('\udc80', 'strict', None),
 +                ('\udc80', 'ignore', b''),
 +                ('\udc80', 'replace', b'?'),
 +                ('\udc80', 'backslashreplace', b'\\udc80'),
 +                ('\udc80', 'surrogatepass', b'\xed\xb2\x80'),
 +            ))
 +        else:
 +            tests.append(('\udc80', 'strict', b'\xed\xb2\x80'))
 +        for text, errors, expected in tests:
 +            if expected is not None:
 +                try:
 +                    encoded = text.encode('cp65001', errors)
 +                except UnicodeEncodeError as err:
 +                    self.fail('Unable to encode %a to cp65001 with '
 +                              'errors=%r: %s' % (text, errors, err))
 +                self.assertEqual(encoded, expected,
 +                    '%a.encode("cp65001", %r)=%a != %a'
 +                    % (text, errors, encoded, expected))
 +            else:
 +                self.assertRaises(UnicodeEncodeError,
 +                    text.encode, "cp65001", errors)
 +
 +    def test_decode(self):
 +        tests = [
 +            (b'abc', 'strict', 'abc'),
 +            (b'\xc3\xa9\xe2\x82\xac', 'strict', '\xe9\u20ac'),
 +            (b'\xf4\x8f\xbf\xbf', 'strict', '\U0010ffff'),
 +            (b'\xef\xbf\xbd', 'strict', '\ufffd'),
 +            (b'[\xc3\xa9]', 'strict', '[\xe9]'),
 +            # invalid bytes
 +            (b'[\xff]', 'strict', None),
 +            (b'[\xff]', 'ignore', '[]'),
 +            (b'[\xff]', 'replace', '[\ufffd]'),
 +            (b'[\xff]', 'surrogateescape', '[\udcff]'),
 +        ]
 +        if VISTA_OR_LATER:
 +            tests.extend((
 +                (b'[\xed\xb2\x80]', 'strict', None),
 +                (b'[\xed\xb2\x80]', 'ignore', '[]'),
 +                (b'[\xed\xb2\x80]', 'replace', '[\ufffd\ufffd\ufffd]'),
 +            ))
 +        else:
 +            tests.extend((
 +                (b'[\xed\xb2\x80]', 'strict', '[\udc80]'),
 +            ))
 +        for raw, errors, expected in tests:
 +            if expected is not None:
 +                try:
 +                    decoded = raw.decode('cp65001', errors)
 +                except UnicodeDecodeError as err:
 +                    self.fail('Unable to decode %a from cp65001 with '
 +                              'errors=%r: %s' % (raw, errors, err))
 +                self.assertEqual(decoded, expected,
 +                    '%a.decode("cp65001", %r)=%a != %a'
 +                    % (raw, errors, decoded, expected))
 +            else:
 +                self.assertRaises(UnicodeDecodeError,
 +                    raw.decode, 'cp65001', errors)
 +
 +    @unittest.skipUnless(VISTA_OR_LATER, 'require Windows Vista or later')
 +    def test_lone_surrogates(self):
 +        self.assertRaises(UnicodeEncodeError, "\ud800".encode, "cp65001")
 +        self.assertRaises(UnicodeDecodeError, b"\xed\xa0\x80".decode, "cp65001")
 +        self.assertEqual("[\uDC80]".encode("cp65001", "backslashreplace"),
 +                         b'[\\udc80]')
 +        self.assertEqual("[\uDC80]".encode("cp65001", "xmlcharrefreplace"),
 +                         b'[&#56448;]')
 +        self.assertEqual("[\uDC80]".encode("cp65001", "surrogateescape"),
 +                         b'[\x80]')
 +        self.assertEqual("[\uDC80]".encode("cp65001", "ignore"),
 +                         b'[]')
 +        self.assertEqual("[\uDC80]".encode("cp65001", "replace"),
 +                         b'[?]')
 +
 +    @unittest.skipUnless(VISTA_OR_LATER, 'require Windows Vista or later')
 +    def test_surrogatepass_handler(self):
 +        self.assertEqual("abc\ud800def".encode("cp65001", "surrogatepass"),
 +                         b"abc\xed\xa0\x80def")
 +        self.assertEqual(b"abc\xed\xa0\x80def".decode("cp65001", "surrogatepass"),
 +                         "abc\ud800def")
 +        self.assertEqual("\U00010fff\uD800".encode("cp65001", "surrogatepass"),
 +                         b"\xf0\x90\xbf\xbf\xed\xa0\x80")
 +        self.assertEqual(b"\xf0\x90\xbf\xbf\xed\xa0\x80".decode("cp65001", "surrogatepass"),
 +                         "\U00010fff\uD800")
 +        self.assertTrue(codecs.lookup_error("surrogatepass"))
 +
 +
 +
  class UTF7Test(ReadTest):
      encoding = "utf-7"
  
diff --cc Misc/NEWS
Simple merge
diff --cc Python/codecs.c
Simple merge