]> granicus.if.org Git - python/commitdiff
Issue #6662: Fix parsing of malformatted charref (&#bad;)
authorVictor Stinner <victor.stinner@haypocalc.com>
Mon, 24 May 2010 21:33:24 +0000 (21:33 +0000)
committerVictor Stinner <victor.stinner@haypocalc.com>
Mon, 24 May 2010 21:33:24 +0000 (21:33 +0000)
Lib/HTMLParser.py
Lib/test/test_htmlparser.py
Misc/NEWS

index 2cbc2ecbc7326c1cdeeebd6596ac97eb67ff6601..7cee47a7c5d7c6a44b712d28154580af5f05daf6 100644 (file)
@@ -175,6 +175,9 @@ class HTMLParser(markupbase.ParserBase):
                     i = self.updatepos(i, k)
                     continue
                 else:
+                    if ";" in rawdata[i:]: #bail by consuming &#
+                        self.handle_data(rawdata[0:2])
+                        i = self.updatepos(i, 2)
                     break
             elif startswith('&', i):
                 match = entityref.match(rawdata, i)
index 810af6c8cbc86073677f4c9165126f408ea35229..c45cf00ecea3cf068097a7f737d0f02a4d87ddf4 100644 (file)
@@ -313,6 +313,13 @@ DOCTYPE html [
                 ("starttag", "html", [("foo", u"\u20AC&aa&unsupported;")])
                 ])
 
+    def test_malformatted_charref(self):
+        self._run_check("<p>&#bad;</p>", [
+            ("starttag", "p", []),
+            ("data", "&#bad;"),
+            ("endtag", "p"),
+        ])
+
 
 def test_main():
     test_support.run_unittest(HTMLParserTestCase)
index b0941268897d28eca04ca9081254e102457b7f59..e2f0f0ccf8e5b5a6fcda33b7ec86bf65ab9871fd 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -29,6 +29,8 @@ C-API
 Library
 -------
 
+- Issue #6662: Fix parsing of malformatted charref (&#bad;)
+
 - Issue #8016: Add the CP858 codec.
 
 - Issue #3924: Ignore cookies with invalid "version" field in cookielib.