#12888: Fix a bug in HTMLParser.unescape that prevented it to escape more than 128...

author Ezio Melotti <ezio.melotti@gmail.com>

Mon, 5 Sep 2011 14:11:06 +0000 (17:11 +0300)

committer Ezio Melotti <ezio.melotti@gmail.com>

Mon, 5 Sep 2011 14:11:06 +0000 (17:11 +0300)
author Ezio Melotti <ezio.melotti@gmail.com>
Mon, 5 Sep 2011 14:11:06 +0000 (17:11 +0300)
committer Ezio Melotti <ezio.melotti@gmail.com>
Mon, 5 Sep 2011 14:11:06 +0000 (17:11 +0300)
diff --git a/Lib/html/parser.py b/Lib/html/parser.py

index 941228072a3ce777e14b4861e3385f10703c39e1..a6d5be94fa335866831ffb408696b8855d197082 100644 (file)
--- a/Lib/html/parser.py
+++ b/Lib/html/parser.py
@@ -458,4 +458,4 @@ class HTMLParser(_markupbase.ParserBase):
                      return '&'+s+';'
  
          return re.sub(r"&(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));",
-                      replaceEntities, s, re.ASCII)
+                      replaceEntities, s, flags=re.ASCII)
diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py

index 637ab01f126223db1274547ba79b1969ccfa2478..d45e45327fe82fe2421ed9834b23e2c42fa82742 100644 (file)
--- a/Lib/test/test_htmlparser.py
+++ b/Lib/test/test_htmlparser.py
@@ -377,7 +377,8 @@ class HTMLParserTolerantTestCase(TestCaseBase):
          p = html.parser.HTMLParser()
          self.assertEqual(p.unescape('&#bad;'),'&#bad;')
          self.assertEqual(p.unescape('&#0038;'),'&')
-
+        # see #12888
+        self.assertEqual(p.unescape('&#123; ' * 1050), '{ ' * 1050)
  
  def test_main():
      support.run_unittest(HTMLParserTestCase, HTMLParserTolerantTestCase)
diff --git a/Misc/ACKS b/Misc/ACKS

index 7096d728d2849297e9c25fdf7a2f7bd21844f53e..45ab6a44fa7c0600b16e178926efeb15eb431d49 100644 (file)
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -661,6 +661,7 @@ Douglas Orr
  Michele Orrù
  Oleg Oshmyan
  Denis S. Otkidach
+Peter Otten
  Michael Otteneder
  R. M. Oudkerk
  Russel Owen
diff --git a/Misc/NEWS b/Misc/NEWS

index c12bda23a987a54fbea46efefd9ca833a2e5ca20..ff1a0add421870bd3d04e9a75a17c1464db78868 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -25,6 +25,9 @@ Core and Builtins
  Library
  -------
  
+- Issue #12888: Fix a bug in HTMLParser.unescape that prevented it to escape
+  more than 128 entities.  Patch by Peter Otten.
+
  - Issue #12878: Expose a __dict__ attribute on io.IOBase and its subclasses.
  
  - Issue #12636: IDLE reads the coding cookie when executing a Python script.
author	Ezio Melotti <ezio.melotti@gmail.com>
	Mon, 5 Sep 2011 14:11:06 +0000 (17:11 +0300)
committer	Ezio Melotti <ezio.melotti@gmail.com>
	Mon, 5 Sep 2011 14:11:06 +0000 (17:11 +0300)
Lib/html/parser.py		patch \| blob \| history
Lib/test/test_htmlparser.py		patch \| blob \| history
Misc/ACKS		patch \| blob \| history
Misc/NEWS		patch \| blob \| history