]> granicus.if.org Git - python/commitdiff
Merged revisions 81504 via svnmerge from
authorVictor Stinner <victor.stinner@haypocalc.com>
Mon, 24 May 2010 21:48:07 +0000 (21:48 +0000)
committerVictor Stinner <victor.stinner@haypocalc.com>
Mon, 24 May 2010 21:48:07 +0000 (21:48 +0000)
svn+ssh://pythondev@svn.python.org/python/branches/py3k

................
  r81504 | victor.stinner | 2010-05-24 23:46:25 +0200 (lun., 24 mai 2010) | 13 lines

  Recorded merge of revisions 81500-81501 via svnmerge from
  svn+ssh://pythondev@svn.python.org/python/trunk

  ........
    r81500 | victor.stinner | 2010-05-24 23:33:24 +0200 (lun., 24 mai 2010) | 2 lines

    Issue #6662: Fix parsing of malformatted charref (&#bad;)
  ........
    r81501 | victor.stinner | 2010-05-24 23:37:28 +0200 (lun., 24 mai 2010) | 2 lines

    Add the author of the last fix (Issue #6662)
  ........
................

Lib/html/parser.py
Lib/test/test_htmlparser.py
Misc/ACKS
Misc/NEWS

index 83a58258c7a53786fbf2f910db52e9a5b35985d5..c2c7f6bf5da5fcab99caf17e19a7a7daafed0c83 100644 (file)
@@ -175,6 +175,9 @@ class HTMLParser(_markupbase.ParserBase):
                     i = self.updatepos(i, k)
                     continue
                 else:
+                    if ";" in rawdata[i:]: #bail by consuming &#
+                        self.handle_data(rawdata[0:2])
+                        i = self.updatepos(i, 2)
                     break
             elif startswith('&', i):
                 match = entityref.match(rawdata, i)
index dd74aac09b225529cb7c08c732fe5b0bc202f772..e982218dba11fe3e1f2ac12faf7840c4bbed2e04 100755 (executable)
@@ -136,6 +136,13 @@ text
     ("data", "\n"),
     ])
 
+    def test_malformatted_charref(self):
+        self._run_check("<p>&#bad;</p>", [
+            ("starttag", "p", []),
+            ("data", "&#bad;"),
+            ("endtag", "p"),
+        ])
+
     def test_unclosed_entityref(self):
         self._run_check("&entityref foo", [
             ("entityref", "entityref"),
index 838f6f073f158033423fd3b089579c050f8dd90f..459e21640dfa4d363e105b99ee429b24014dc1f1 100644 (file)
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -846,3 +846,4 @@ Siebren van der Zee
 Uwe Zessin
 Tarek Ziadé
 Peter Åstrand
+Fredrik Håård
index 27d16aa42eb616640d7740e62c82a2bed9524bd7..2b5b7919a7812e871eda51eddae5b4a47a29c2b3 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -54,6 +54,9 @@ C-API
 Library
 -------
 
+- Issue #6662: Fix parsing of malformatted charref (&#bad;), patch written by
+  Fredrik Håård
+
 - Issue #6268: Fix seek() method of codecs.open(), don't read or write the BOM
   twice after seek(0). Fix also reset() method of codecs, UTF-16, UTF-32 and
   StreamWriter classes.