From: Ezio Melotti Date: Sun, 20 Oct 2013 23:10:55 +0000 (+0300) Subject: #18958: Improve error message for json.load(s) while passing a string that starts... X-Git-Tag: v3.4.0b1~571^2~37 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=566a2be95c3aba65302d3d5b8108b27f810e1eaf;p=python #18958: Improve error message for json.load(s) while passing a string that starts with a UTF-8 BOM. --- diff --git a/Lib/json/__init__.py b/Lib/json/__init__.py index 6cedb6e31f..a459f77a7b 100644 --- a/Lib/json/__init__.py +++ b/Lib/json/__init__.py @@ -313,6 +313,8 @@ def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, if not isinstance(s, str): raise TypeError('the JSON object must be str, not {!r}'.format( s.__class__.__name__)) + if s.startswith(u'\ufeff'): + raise ValueError("Unexpected UTF-8 BOM (decode using utf-8-sig)") if (cls is None and object_hook is None and parse_int is None and parse_float is None and parse_constant is None and object_pairs_hook is None and not kw): diff --git a/Lib/test/test_json/test_decode.py b/Lib/test/test_json/test_decode.py index 05d07b7219..35c02de88c 100644 --- a/Lib/test/test_json/test_decode.py +++ b/Lib/test/test_json/test_decode.py @@ -77,5 +77,19 @@ class TestDecode: with self.assertRaisesRegex(TypeError, msg): self.json.load(BytesIO(b'[1,2,3]')) + def test_string_with_utf8_bom(self): + # see #18958 + bom_json = "[1,2,3]".encode('utf-8-sig').decode('utf-8') + with self.assertRaises(ValueError) as cm: + self.loads(bom_json) + self.assertIn('BOM', str(cm.exception)) + with self.assertRaises(ValueError) as cm: + self.json.load(StringIO(bom_json)) + self.assertIn('BOM', str(cm.exception)) + # make sure that the BOM is not detected in the middle of a string + bom_in_str = '"{}"'.format(''.encode('utf-8-sig').decode('utf-8')) + self.assertEqual(self.loads(bom_in_str), '\ufeff') + self.assertEqual(self.json.load(StringIO(bom_in_str)), '\ufeff') + class TestPyDecode(TestDecode, PyTest): pass class TestCDecode(TestDecode, CTest): pass diff --git a/Misc/NEWS b/Misc/NEWS index bff7097a60..af66063d69 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -62,6 +62,9 @@ Core and Builtins Library ------- +- Issue #18958: Improve error message for json.load(s) while passing a string + that starts with a UTF-8 BOM. + - Issue #19307: Improve error message for json.load(s) while passing objects of the wrong type.