From 1ce4ae3268e616414ad63a0cacdffa0b5830d0b5 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Martin=20v=2E=20L=C3=B6wis?= Date: Sat, 14 Sep 2002 09:19:53 +0000 Subject: [PATCH] Don't test whether surrogate sequences round-trip in UTF-8. 2.2.2 candidate. --- Lib/test/test_unicode.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index a57d6f4372..89e28b5b40 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -695,7 +695,10 @@ for encoding in ('utf-8', verify(unicode(u.encode(encoding),encoding) == u) # UTF-8 must be roundtrip safe for all UCS-2 code points -u = u''.join(map(unichr, range(0x10000))) +# This excludes surrogates: in the full range, there would be +# a surrogate pair (\udbff\udc00), which gets converted back +# to a non-BMP character (\U0010fc00) +u = u''.join(map(unichr, range(0,0xd800)+range(0xe000,0x10000))) for encoding in ('utf-8',): verify(unicode(u.encode(encoding),encoding) == u) -- 2.40.0