]> granicus.if.org Git - python/commitdiff
Recorded merge of revisions 81500-81501 via svnmerge from
authorVictor Stinner <victor.stinner@haypocalc.com>
Mon, 24 May 2010 21:46:25 +0000 (21:46 +0000)
committerVictor Stinner <victor.stinner@haypocalc.com>
Mon, 24 May 2010 21:46:25 +0000 (21:46 +0000)
svn+ssh://pythondev@svn.python.org/python/trunk

........
  r81500 | victor.stinner | 2010-05-24 23:33:24 +0200 (lun., 24 mai 2010) | 2 lines

  Issue #6662: Fix parsing of malformatted charref (&#bad;)
........
  r81501 | victor.stinner | 2010-05-24 23:37:28 +0200 (lun., 24 mai 2010) | 2 lines

  Add the author of the last fix (Issue #6662)
........

Lib/html/parser.py
Lib/test/test_htmlparser.py
Misc/ACKS
Misc/NEWS

index 83a58258c7a53786fbf2f910db52e9a5b35985d5..c2c7f6bf5da5fcab99caf17e19a7a7daafed0c83 100644 (file)
@@ -175,6 +175,9 @@ class HTMLParser(_markupbase.ParserBase):
                     i = self.updatepos(i, k)
                     continue
                 else:
+                    if ";" in rawdata[i:]: #bail by consuming &#
+                        self.handle_data(rawdata[0:2])
+                        i = self.updatepos(i, 2)
                     break
             elif startswith('&', i):
                 match = entityref.match(rawdata, i)
index dd74aac09b225529cb7c08c732fe5b0bc202f772..e982218dba11fe3e1f2ac12faf7840c4bbed2e04 100644 (file)
@@ -136,6 +136,13 @@ text
     ("data", "\n"),
     ])
 
+    def test_malformatted_charref(self):
+        self._run_check("<p>&#bad;</p>", [
+            ("starttag", "p", []),
+            ("data", "&#bad;"),
+            ("endtag", "p"),
+        ])
+
     def test_unclosed_entityref(self):
         self._run_check("&entityref foo", [
             ("entityref", "entityref"),
index b35acaf555d76459325b43ec92257e9e8dba753a..ee625420e7cbf7729566fc10c47bf693604af6c4 100644 (file)
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -871,3 +871,4 @@ Siebren van der Zee
 Uwe Zessin
 Tarek Ziadé
 Peter Åstrand
+Fredrik Håård
index 36f374bdc955932b18f83f47fa6e0bc68be45b63..df363451ac443b2cafc0c6b9f2e7b69577273668 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -392,6 +392,9 @@ C-API
 Library
 -------
 
+- Issue #6662: Fix parsing of malformatted charref (&#bad;), patch written by
+  Fredrik Håård
+
 - Issue #8540: Decimal module: rename the Context._clamp attribute to
   Context.clamp and make it public.  This is useful in creating
   contexts that correspond to the decimal interchange formats