]> granicus.if.org Git - python/commitdiff
Merged revisions 81500-81501 via svnmerge from
authorVictor Stinner <victor.stinner@haypocalc.com>
Mon, 24 May 2010 21:42:59 +0000 (21:42 +0000)
committerVictor Stinner <victor.stinner@haypocalc.com>
Mon, 24 May 2010 21:42:59 +0000 (21:42 +0000)
svn+ssh://pythondev@svn.python.org/python/trunk

........
  r81500 | victor.stinner | 2010-05-24 23:33:24 +0200 (lun., 24 mai 2010) | 2 lines

  Issue #6662: Fix parsing of malformatted charref (&#bad;)
........
  r81501 | victor.stinner | 2010-05-24 23:37:28 +0200 (lun., 24 mai 2010) | 2 lines

  Add the author of the last fix (Issue #6662)
........

Lib/HTMLParser.py
Lib/test/test_htmlparser.py
Misc/ACKS
Misc/NEWS

index 2cbc2ecbc7326c1cdeeebd6596ac97eb67ff6601..7cee47a7c5d7c6a44b712d28154580af5f05daf6 100644 (file)
@@ -175,6 +175,9 @@ class HTMLParser(markupbase.ParserBase):
                     i = self.updatepos(i, k)
                     continue
                 else:
+                    if ";" in rawdata[i:]: #bail by consuming &#
+                        self.handle_data(rawdata[0:2])
+                        i = self.updatepos(i, 2)
                     break
             elif startswith('&', i):
                 match = entityref.match(rawdata, i)
index 810af6c8cbc86073677f4c9165126f408ea35229..c45cf00ecea3cf068097a7f737d0f02a4d87ddf4 100755 (executable)
@@ -313,6 +313,13 @@ DOCTYPE html [
                 ("starttag", "html", [("foo", u"\u20AC&aa&unsupported;")])
                 ])
 
+    def test_malformatted_charref(self):
+        self._run_check("<p>&#bad;</p>", [
+            ("starttag", "p", []),
+            ("data", "&#bad;"),
+            ("endtag", "p"),
+        ])
+
 
 def test_main():
     test_support.run_unittest(HTMLParserTestCase)
index efaa20ffe34b56de62f456e639661aa544791203..94a22a860ecbf21a42a2ff7b0b81e1eff5e9dc1b 100644 (file)
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -191,7 +191,7 @@ Luke Dunstan
 Andy Dustman
 Gary Duzan
 Eugene Dvurechenski
-Josip Dzolonga 
+Josip Dzolonga
 Maxim Dzumanenko
 Walter Dörwald
 Hans Eckardt
@@ -812,3 +812,4 @@ Uwe Zessin
 Tarek Ziad\8e
 Peter Åstrand
 Jesse Noller
+Fredrik Håård
index badd19af6e9d124847d611d11e451e111d475f60..eba95a01f28ab7d790fb77d54f07b8a67b60777f 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -55,6 +55,9 @@ C-API
 Library
 -------
 
+- Issue #6662: Fix parsing of malformatted charref (&#bad;), patch written by
+  Fredrik Håård
+
 - Issue #1628205: Socket file objects returned by socket.socket.makefile() now
   properly handles EINTR within the read, readline, write & flush methods.
   The socket.sendall() method now properly handles interrupted system calls.