From: Serhiy Storchaka Date: Sun, 15 Mar 2015 21:43:34 +0000 (+0200) Subject: Increased coverage of standard codec error handlers. X-Git-Tag: v3.5.0a3~172 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=93f4d4c1d614be8a043af35a13b0ff50d551bc7a;p=python Increased coverage of standard codec error handlers. --- 93f4d4c1d614be8a043af35a13b0ff50d551bc7a diff --cc Lib/test/test_codeccallbacks.py index e29ac53039,54277de003..b52e1f6d1c --- a/Lib/test/test_codeccallbacks.py +++ b/Lib/test/test_codeccallbacks.py @@@ -570,142 -549,151 +570,207 @@@ class CodecCallbackTest(unittest.TestCa codecs.backslashreplace_errors, UnicodeError("ouch") ) - # "backslashreplace" can only be used for encoding - self.assertRaises( - TypeError, - codecs.backslashreplace_errors, - UnicodeDecodeError("ascii", bytearray(b"\xff"), 0, 1, "ouch") - ) - self.assertRaises( - TypeError, - codecs.backslashreplace_errors, - UnicodeTranslateError("\u3042", 0, 1, "ouch") - ) # Use the correct exception - self.assertEqual( - codecs.backslashreplace_errors( - UnicodeEncodeError("ascii", "\u3042", 0, 1, "ouch")), - ("\\u3042", 1) - ) - self.assertEqual( - codecs.backslashreplace_errors( - UnicodeEncodeError("ascii", "\x00", 0, 1, "ouch")), - ("\\x00", 1) - ) - self.assertEqual( - codecs.backslashreplace_errors( - UnicodeEncodeError("ascii", "\xff", 0, 1, "ouch")), - ("\\xff", 1) - ) - self.assertEqual( - codecs.backslashreplace_errors( - UnicodeEncodeError("ascii", "\u0100", 0, 1, "ouch")), - ("\\u0100", 1) - ) - self.assertEqual( - codecs.backslashreplace_errors( - UnicodeEncodeError("ascii", "\uffff", 0, 1, "ouch")), - ("\\uffff", 1) - ) - if SIZEOF_WCHAR_T > 0: - self.assertEqual( - codecs.backslashreplace_errors( - UnicodeEncodeError("ascii", "\U00010000", - 0, 1, "ouch")), - ("\\U00010000", 1) - ) - self.assertEqual( - codecs.backslashreplace_errors( - UnicodeEncodeError("ascii", "\U0010ffff", - 0, 1, "ouch")), - ("\\U0010ffff", 1) - ) - # Lone surrogates (regardless of unicode width) - self.assertEqual( - codecs.backslashreplace_errors( - UnicodeEncodeError("ascii", "\ud800", 0, 1, "ouch")), - ("\\ud800", 1) - ) - self.assertEqual( - codecs.backslashreplace_errors( - UnicodeEncodeError("ascii", "\udfff", 0, 1, "ouch")), - ("\\udfff", 1) - ) + tests = [ + ("\u3042", "\\u3042"), + ("\n", "\\x0a"), + ("a", "\\x61"), + ("\x00", "\\x00"), + ("\xff", "\\xff"), + ("\u0100", "\\u0100"), + ("\uffff", "\\uffff"), + ("\U00010000", "\\U00010000"), + ("\U0010ffff", "\\U0010ffff"), + # Lone surrogates + ("\ud800", "\\ud800"), + ("\udfff", "\\udfff"), + ("\ud800\udfff", "\\ud800\\udfff"), + ] + for s, r in tests: + with self.subTest(str=s): + self.assertEqual( + codecs.backslashreplace_errors( + UnicodeEncodeError("ascii", s, 0, len(s), "ouch")), + (r, len(s)) + ) ++ self.assertEqual( ++ codecs.backslashreplace_errors( ++ UnicodeTranslateError(s, 0, len(s), "ouch")), ++ (r, len(s)) ++ ) ++ tests = [ ++ (b"a", "\\x61"), ++ (b"\n", "\\x0a"), ++ (b"\x00", "\\x00"), ++ (b"\xff", "\\xff"), ++ ] ++ for b, r in tests: ++ with self.subTest(bytes=b): ++ self.assertEqual( ++ codecs.backslashreplace_errors( ++ UnicodeDecodeError("ascii", bytearray(b), 0, 1, "ouch")), ++ (r, 1) ++ ) + + def test_badandgoodnamereplaceexceptions(self): + # "namereplace" complains about a non-exception passed in + self.assertRaises( + TypeError, + codecs.namereplace_errors, + 42 + ) + # "namereplace" complains about the wrong exception types + self.assertRaises( + TypeError, + codecs.namereplace_errors, + UnicodeError("ouch") + ) + # "namereplace" can only be used for encoding + self.assertRaises( + TypeError, + codecs.namereplace_errors, + UnicodeDecodeError("ascii", bytearray(b"\xff"), 0, 1, "ouch") + ) + self.assertRaises( + TypeError, + codecs.namereplace_errors, + UnicodeTranslateError("\u3042", 0, 1, "ouch") + ) + # Use the correct exception - self.assertEqual( - codecs.namereplace_errors( - UnicodeEncodeError("ascii", "\u3042", 0, 1, "ouch")), - ("\\N{HIRAGANA LETTER A}", 1) ++ tests = [ ++ ("\u3042", "\\N{HIRAGANA LETTER A}"), ++ ("\x00", "\\x00"), ++ ("\ufbf9", "\\N{ARABIC LIGATURE UIGHUR KIRGHIZ YEH WITH " ++ "HAMZA ABOVE WITH ALEF MAKSURA ISOLATED FORM}"), ++ ("\U000e007f", "\\N{CANCEL TAG}"), ++ ("\U0010ffff", "\\U0010ffff"), ++ # Lone surrogates ++ ("\ud800", "\\ud800"), ++ ("\udfff", "\\udfff"), ++ ("\ud800\udfff", "\\ud800\\udfff"), ++ ] ++ for s, r in tests: ++ with self.subTest(str=s): ++ self.assertEqual( ++ codecs.namereplace_errors( ++ UnicodeEncodeError("ascii", s, 0, len(s), "ouch")), ++ (r, len(s)) ++ ) + + def test_badandgoodsurrogateescapeexceptions(self): + surrogateescape_errors = codecs.lookup_error('surrogateescape') + # "surrogateescape" complains about a non-exception passed in + self.assertRaises( + TypeError, + surrogateescape_errors, + 42 ) - self.assertEqual( - codecs.namereplace_errors( - UnicodeEncodeError("ascii", "\x00", 0, 1, "ouch")), - ("\\x00", 1) + # "surrogateescape" complains about the wrong exception types + self.assertRaises( + TypeError, + surrogateescape_errors, + UnicodeError("ouch") ) - self.assertEqual( - codecs.namereplace_errors( - UnicodeEncodeError("ascii", "\xff", 0, 1, "ouch")), - ("\\N{LATIN SMALL LETTER Y WITH DIAERESIS}", 1) + # "surrogateescape" can not be used for translating + self.assertRaises( + TypeError, + surrogateescape_errors, + UnicodeTranslateError("\udc80", 0, 1, "ouch") ) + # Use the correct exception + for s in ("a", "\udc7f", "\udd00"): + with self.subTest(str=s): + self.assertRaises( + UnicodeEncodeError, + surrogateescape_errors, + UnicodeEncodeError("ascii", s, 0, 1, "ouch") + ) self.assertEqual( - codecs.namereplace_errors( - UnicodeEncodeError("ascii", "\u0100", 0, 1, "ouch")), - ("\\N{LATIN CAPITAL LETTER A WITH MACRON}", 1) + surrogateescape_errors( + UnicodeEncodeError("ascii", "\udc80", 0, 1, "ouch")), + (b"\x80", 1) ) - self.assertEqual( - codecs.namereplace_errors( - UnicodeEncodeError("ascii", "\uffff", 0, 1, "ouch")), - ("\\uffff", 1) - ) - if SIZEOF_WCHAR_T > 0: - self.assertEqual( - codecs.namereplace_errors( - UnicodeEncodeError("ascii", "\U00010000", - 0, 1, "ouch")), - ("\\N{LINEAR B SYLLABLE B008 A}", 1) - ) - self.assertEqual( - codecs.namereplace_errors( - UnicodeEncodeError("ascii", "\U0010ffff", - 0, 1, "ouch")), - ("\\U0010ffff", 1) - ) - # Lone surrogates (regardless of unicode width) - self.assertEqual( - codecs.namereplace_errors( - UnicodeEncodeError("ascii", "\ud800", 0, 1, "ouch")), - ("\\ud800", 1) - ) - self.assertEqual( - codecs.namereplace_errors( - UnicodeEncodeError("ascii", "\udfff", 0, 1, "ouch")), - ("\\udfff", 1) - ) - self.assertEqual( - codecs.backslashreplace_errors( - UnicodeDecodeError("ascii", bytearray(b"\xff"), 0, 1, "ouch")), - ("\\xff", 1) + self.assertRaises( + UnicodeDecodeError, + surrogateescape_errors, + UnicodeDecodeError("ascii", bytearray(b"a"), 0, 1, "ouch") ) self.assertEqual( - codecs.backslashreplace_errors( - UnicodeTranslateError("\u3042", 0, 1, "ouch")), - ("\\u3042", 1) + surrogateescape_errors( + UnicodeDecodeError("ascii", bytearray(b"\x80"), 0, 1, "ouch")), + ("\udc80", 1) ) + def test_badandgoodsurrogatepassexceptions(self): + surrogatepass_errors = codecs.lookup_error('surrogatepass') + # "surrogatepass" complains about a non-exception passed in + self.assertRaises( + TypeError, + surrogatepass_errors, + 42 + ) + # "surrogatepass" complains about the wrong exception types + self.assertRaises( + TypeError, + surrogatepass_errors, + UnicodeError("ouch") + ) + # "surrogatepass" can not be used for translating + self.assertRaises( + TypeError, + surrogatepass_errors, + UnicodeTranslateError("\ud800", 0, 1, "ouch") + ) + # Use the correct exception + for enc in ("utf-8", "utf-16le", "utf-16be", "utf-32le", "utf-32be"): + with self.subTest(encoding=enc): + self.assertRaises( + UnicodeEncodeError, + surrogatepass_errors, + UnicodeEncodeError(enc, "a", 0, 1, "ouch") + ) + self.assertRaises( + UnicodeDecodeError, + surrogatepass_errors, + UnicodeDecodeError(enc, "a".encode(enc), 0, 1, "ouch") + ) ++ for s in ("\ud800", "\udfff", "\ud800\udfff"): ++ with self.subTest(str=s): ++ self.assertRaises( ++ UnicodeEncodeError, ++ surrogatepass_errors, ++ UnicodeEncodeError("ascii", s, 0, len(s), "ouch") ++ ) + tests = [ - ("ascii", "\ud800", b'\xed\xa0\x80', 3), + ("utf-8", "\ud800", b'\xed\xa0\x80', 3), + ("utf-16le", "\ud800", b'\x00\xd8', 2), + ("utf-16be", "\ud800", b'\xd8\x00', 2), + ("utf-32le", "\ud800", b'\x00\xd8\x00\x00', 4), + ("utf-32be", "\ud800", b'\x00\x00\xd8\x00', 4), - ("ascii", "\udfff", b'\xed\xbf\xbf', 3), + ("utf-8", "\udfff", b'\xed\xbf\xbf', 3), + ("utf-16le", "\udfff", b'\xff\xdf', 2), + ("utf-16be", "\udfff", b'\xdf\xff', 2), + ("utf-32le", "\udfff", b'\xff\xdf\x00\x00', 4), + ("utf-32be", "\udfff", b'\x00\x00\xdf\xff', 4), - ("ascii", "\ud800\udfff", b'\xed\xa0\x80\xed\xbf\xbf', 3), + ("utf-8", "\ud800\udfff", b'\xed\xa0\x80\xed\xbf\xbf', 3), + ("utf-16le", "\ud800\udfff", b'\x00\xd8\xff\xdf', 2), + ("utf-16be", "\ud800\udfff", b'\xd8\x00\xdf\xff', 2), + ("utf-32le", "\ud800\udfff", b'\x00\xd8\x00\x00\xff\xdf\x00\x00', 4), + ("utf-32be", "\ud800\udfff", b'\x00\x00\xd8\x00\x00\x00\xdf\xff', 4), + ] + for enc, s, b, n in tests: + with self.subTest(encoding=enc, str=s, bytes=b): + self.assertEqual( + surrogatepass_errors( + UnicodeEncodeError(enc, s, 0, len(s), "ouch")), + (b, len(s)) + ) + self.assertEqual( + surrogatepass_errors( + UnicodeDecodeError(enc, bytearray(b[:n]), 0, n, "ouch")), + (s[:1], n) + ) + def test_badhandlerresults(self): results = ( 42, "foo", (1,2,3), ("foo", 1, 3), ("foo", None), ("foo",), ("foo", 1, 3), ("foo", None), ("foo",) ) encs = ("ascii", "latin-1", "iso-8859-1", "iso-8859-15")