From: Victor Stinner Date: Mon, 24 May 2010 21:42:59 +0000 (+0000) Subject: Merged revisions 81500-81501 via svnmerge from X-Git-Tag: v2.6.6rc1~245 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=b0c42877de222bf137d9ee4aac67c1ecb1f4ad93;p=python Merged revisions 81500-81501 via svnmerge from svn+ssh://pythondev@svn.python.org/python/trunk ........ r81500 | victor.stinner | 2010-05-24 23:33:24 +0200 (lun., 24 mai 2010) | 2 lines Issue #6662: Fix parsing of malformatted charref (&#bad;) ........ r81501 | victor.stinner | 2010-05-24 23:37:28 +0200 (lun., 24 mai 2010) | 2 lines Add the author of the last fix (Issue #6662) ........ --- diff --git a/Lib/HTMLParser.py b/Lib/HTMLParser.py index 2cbc2ecbc7..7cee47a7c5 100644 --- a/Lib/HTMLParser.py +++ b/Lib/HTMLParser.py @@ -175,6 +175,9 @@ class HTMLParser(markupbase.ParserBase): i = self.updatepos(i, k) continue else: + if ";" in rawdata[i:]: #bail by consuming &# + self.handle_data(rawdata[0:2]) + i = self.updatepos(i, 2) break elif startswith('&', i): match = entityref.match(rawdata, i) diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py index 810af6c8cb..c45cf00ece 100755 --- a/Lib/test/test_htmlparser.py +++ b/Lib/test/test_htmlparser.py @@ -313,6 +313,13 @@ DOCTYPE html [ ("starttag", "html", [("foo", u"\u20AC&aa&unsupported;")]) ]) + def test_malformatted_charref(self): + self._run_check("

&#bad;

", [ + ("starttag", "p", []), + ("data", "&#bad;"), + ("endtag", "p"), + ]) + def test_main(): test_support.run_unittest(HTMLParserTestCase) diff --git a/Misc/ACKS b/Misc/ACKS index efaa20ffe3..94a22a860e 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -191,7 +191,7 @@ Luke Dunstan Andy Dustman Gary Duzan Eugene Dvurechenski -Josip Dzolonga +Josip Dzolonga Maxim Dzumanenko Walter Dörwald Hans Eckardt @@ -812,3 +812,4 @@ Uwe Zessin Tarek ZiadŽ Peter Åstrand Jesse Noller +Fredrik Håård diff --git a/Misc/NEWS b/Misc/NEWS index badd19af6e..eba95a01f2 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -55,6 +55,9 @@ C-API Library ------- +- Issue #6662: Fix parsing of malformatted charref (&#bad;), patch written by + Fredrik Håård + - Issue #1628205: Socket file objects returned by socket.socket.makefile() now properly handles EINTR within the read, readline, write & flush methods. The socket.sendall() method now properly handles interrupted system calls.