From 0c7b2c9c191a97b4aede17c15e2b5013e7edcfb5 Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Tue, 31 Mar 2009 22:11:53 +0000 Subject: [PATCH] #1651995: fix _convert_ref for non-ASCII characters. --- Lib/sgmllib.py | 2 +- Lib/test/test_sgmllib.py | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/Lib/sgmllib.py b/Lib/sgmllib.py index e5fbd32fcc..104b25f2a0 100644 --- a/Lib/sgmllib.py +++ b/Lib/sgmllib.py @@ -396,7 +396,7 @@ class SGMLParser(markupbase.ParserBase): n = int(name) except ValueError: return - if not 0 <= n <= 255: + if not 0 <= n <= 127: return return self.convert_codepoint(n) diff --git a/Lib/test/test_sgmllib.py b/Lib/test/test_sgmllib.py index 34fd7f00ba..081e0e17fc 100644 --- a/Lib/test/test_sgmllib.py +++ b/Lib/test/test_sgmllib.py @@ -373,6 +373,15 @@ DOCTYPE html PUBLIC '-//W3C//DTD HTML 4.01//EN' if len(data) != CHUNK: break + def test_only_decode_ascii(self): + # SF bug #1651995, make sure non-ascii character references are not decoded + s = '' + self.check_events(s, [ + ('starttag', 'signs', + [('exclamation', '!'), ('copyright', '©'), + ('quoteleft', '‘')]), + ]) + # XXX These tests have been disabled by prefixing their names with # an underscore. The first two exercise outstanding bugs in the # sgmllib module, and the third exhibits questionable behavior -- 2.50.1