From: Victor Stinner Date: Mon, 25 Sep 2017 08:43:56 +0000 (-0700) Subject: bpo-31170: Write unit test for Expat 2.2.4 UTF-8 bug (#3570) (#3745) X-Git-Tag: v2.7.15rc1~192 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=5f5da728aec9c4f74cc771fbf30037b64a447514;p=python bpo-31170: Write unit test for Expat 2.2.4 UTF-8 bug (#3570) (#3745) Non-regression tests for the Expat 2.2.3 UTF-8 decoder bug. (cherry picked from commit e6d9fcbb8d0c325e57df08ae8781aafedb71eca2) --- diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 55d7010123..e466867b7c 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -30,6 +30,7 @@ ET = None SIMPLE_XMLFILE = findfile("simple.xml", subdir="xmltestdata") SIMPLE_NS_XMLFILE = findfile("simple-ns.xml", subdir="xmltestdata") +UTF8_BUG_XMLFILE = findfile("expat224_utf8_bug.xml", subdir="xmltestdata") SAMPLE_XML = """\ @@ -1494,6 +1495,36 @@ class BugsTest(unittest.TestCase): ET.register_namespace('test10777', 'http://myuri/') ET.register_namespace('test10777', 'http://myuri/') + def check_expat224_utf8_bug(self, text): + xml = b'' % text + root = ET.XML(xml) + self.assertEqual(root.get('b'), text.decode('utf-8')) + + def test_expat224_utf8_bug(self): + # bpo-31170: Expat 2.2.3 had a bug in its UTF-8 decoder. + # Check that Expat 2.2.4 fixed the bug. + # + # Test buffer bounds at odd and even positions. + + text = b'\xc3\xa0' * 1024 + self.check_expat224_utf8_bug(text) + + text = b'x' + b'\xc3\xa0' * 1024 + self.check_expat224_utf8_bug(text) + + def test_expat224_utf8_bug_file(self): + with open(UTF8_BUG_XMLFILE, 'rb') as fp: + raw = fp.read() + root = ET.fromstring(raw) + xmlattr = root.get('b') + + # "Parse" manually the XML file to extract the value of the 'b' + # attribute of the XML element + text = raw.decode('utf-8').strip() + text = text.replace('\r\n', ' ') + text = text[6:-4] + self.assertEqual(root.get('b'), text) + # -------------------------------------------------------------------- diff --git a/Lib/test/xmltestdata/expat224_utf8_bug.xml b/Lib/test/xmltestdata/expat224_utf8_bug.xml new file mode 100644 index 0000000000..d66a8e6b50 --- /dev/null +++ b/Lib/test/xmltestdata/expat224_utf8_bug.xml @@ -0,0 +1,2 @@ +