From: Ezio Melotti Date: Wed, 15 Feb 2012 10:44:23 +0000 (+0200) Subject: #13987: HTMLParser is now able to handle EOFs in the middle of a construct. X-Git-Tag: v2.7.3rc1~57 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=d2307cb48ab09baa846947c5c2c4001dce9b6e52;p=python #13987: HTMLParser is now able to handle EOFs in the middle of a construct. --- diff --git a/Lib/HTMLParser.py b/Lib/HTMLParser.py index f230c5f163..d2268d02cd 100644 --- a/Lib/HTMLParser.py +++ b/Lib/HTMLParser.py @@ -170,9 +170,16 @@ class HTMLParser(markupbase.ParserBase): else: break if k < 0: - if end: - self.error("EOF in middle of construct") - break + if not end: + break + k = rawdata.find('>', i + 1) + if k < 0: + k = rawdata.find('<', i + 1) + if k < 0: + k = i + 1 + else: + k += 1 + self.handle_data(rawdata[i:k]) i = self.updatepos(i, k) elif startswith("&#", i): match = charref.match(rawdata, i) diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py index 6667512785..ba775abdac 100644 --- a/Lib/test/test_htmlparser.py +++ b/Lib/test/test_htmlparser.py @@ -204,16 +204,16 @@ text def test_starttag_junk_chars(self): self._run_check("", []) self._run_check("", [('comment', '$')]) - self._parse_error("") self._run_check("", [('endtag', 'a'") - self._parse_error("'", [('data', "