]> granicus.if.org Git - python/commitdiff
#1651995: fix _convert_ref for non-ASCII characters.
authorGeorg Brandl <georg@python.org>
Tue, 31 Mar 2009 22:11:53 +0000 (22:11 +0000)
committerGeorg Brandl <georg@python.org>
Tue, 31 Mar 2009 22:11:53 +0000 (22:11 +0000)
Lib/sgmllib.py
Lib/test/test_sgmllib.py

index e5fbd32fcc8cfddf7a10b6e0aef9261a40be173b..104b25f2a07b0e716c40328d1878f56e774d8169 100644 (file)
@@ -396,7 +396,7 @@ class SGMLParser(markupbase.ParserBase):
             n = int(name)
         except ValueError:
             return
-        if not 0 <= n <= 255:
+        if not 0 <= n <= 127:
             return
         return self.convert_codepoint(n)
 
index 34fd7f00ba07a5d4f426d1af1069b11388052750..081e0e17fc95e4d3a27f3140fd537e51a9381f4d 100644 (file)
@@ -373,6 +373,15 @@ DOCTYPE html PUBLIC '-//W3C//DTD HTML 4.01//EN'
             if len(data) != CHUNK:
                 break
 
+    def test_only_decode_ascii(self):
+        # SF bug #1651995, make sure non-ascii character references are not decoded
+        s = '<signs exclamation="&#33" copyright="&#169" quoteleft="&#8216;">'
+        self.check_events(s, [
+            ('starttag', 'signs',
+             [('exclamation', '!'), ('copyright', '&#169'),
+              ('quoteleft', '&#8216;')]),
+            ])
+
     # XXX These tests have been disabled by prefixing their names with
     # an underscore.  The first two exercise outstanding bugs in the
     # sgmllib module, and the third exhibits questionable behavior