From: Ezio Melotti <ezio.melotti@gmail.com>
Date: Sat, 1 Feb 2014 19:20:22 +0000 (+0200)
Subject: #20288: fix handling of invalid numeric charrefs in HTMLParser.
X-Git-Tag: v2.7.8~65
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=5a88853bdc1074e62441c7558502bd989c39f056;p=python

#20288: fix handling of invalid numeric charrefs in HTMLParser.
---

diff --git a/Lib/HTMLParser.py b/Lib/HTMLParser.py
index 5a55e264af..3f97830a9a 100644
--- a/Lib/HTMLParser.py
+++ b/Lib/HTMLParser.py
@@ -195,9 +195,9 @@ class HTMLParser(markupbase.ParserBase):
                     i = self.updatepos(i, k)
                     continue
                 else:
-                    if ";" in rawdata[i:]: #bail by consuming &#
-                        self.handle_data(rawdata[0:2])
-                        i = self.updatepos(i, 2)
+                    if ";" in rawdata[i:]:  # bail by consuming '&#'
+                        self.handle_data(rawdata[i:i+2])
+                        i = self.updatepos(i, i+2)
                     break
             elif startswith('&', i):
                 match = entityref.match(rawdata, i)
diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py
index 6a0e461829..cde2bd23b7 100644
--- a/Lib/test/test_htmlparser.py
+++ b/Lib/test/test_htmlparser.py
@@ -394,6 +394,12 @@ text
             ("data", "&#bad;"),
             ("endtag", "p"),
         ])
+        # add the [] as a workaround to avoid buffering (see #20288)
+        self._run_check(["<div>&#bad;</div>"], [
+            ("starttag", "div", []),
+            ("data", "&#bad;"),
+            ("endtag", "div"),
+        ])
 
     def test_unescape_function(self):
         parser = HTMLParser.HTMLParser()
diff --git a/Misc/NEWS b/Misc/NEWS
index 67423709d9..ec12fbef9a 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -38,6 +38,8 @@ Core and Builtins
 Library
 -------
 
+- Issue #20288: fix handling of invalid numeric charrefs in HTMLParser.
+
 - Issue #19456: ntpath.join() now joins relative paths correctly when a drive
   is present.