.. function:: loads(s, *, encoding=None, cls=None, object_hook=None, parse_float=None, parse_int=None, parse_constant=None, object_pairs_hook=None, **kw)
- Deserialize *s* (a :class:`str` instance containing a JSON document) to a
- Python object using this :ref:`conversion table <json-to-py-table>`.
+ Deserialize *s* (a :class:`str`, :class:`bytes` or :class:`bytearray`
+ instance containing a JSON document) to a Python object using this
+ :ref:`conversion table <json-to-py-table>`.
The other arguments have the same meaning as in :func:`load`, except
*encoding* which is ignored and deprecated.
:term:`path-like object`.
+json
+----
+
+:func:`json.load` and :func:`json.loads` now support binary input. Encoded
+JSON should be represented using either UTF-8, UTF-16, or UTF-32.
+(Contributed by Serhiy Storchaka in :issue:`17909`.)
+
+
os
--
from .decoder import JSONDecoder, JSONDecodeError
from .encoder import JSONEncoder
+import codecs
_default_encoder = JSONEncoder(
skipkeys=False,
_default_decoder = JSONDecoder(object_hook=None, object_pairs_hook=None)
+def detect_encoding(b):
+ bstartswith = b.startswith
+ if bstartswith((codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE)):
+ return 'utf-32'
+ if bstartswith((codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)):
+ return 'utf-16'
+ if bstartswith(codecs.BOM_UTF8):
+ return 'utf-8-sig'
+
+ if len(b) >= 4:
+ if not b[0]:
+ # 00 00 -- -- - utf-32-be
+ # 00 XX -- -- - utf-16-be
+ return 'utf-16-be' if b[1] else 'utf-32-be'
+ if not b[1]:
+ # XX 00 00 00 - utf-32-le
+ # XX 00 XX XX - utf-16-le
+ return 'utf-16-le' if b[2] or b[3] else 'utf-32-le'
+ elif len(b) == 2:
+ if not b[0]:
+ # 00 XX - utf-16-be
+ return 'utf-16-be'
+ if not b[1]:
+ # XX 00 - utf-16-le
+ return 'utf-16-le'
+ # default
+ return 'utf-8'
+
+
def load(fp, *, cls=None, object_hook=None, parse_float=None,
parse_int=None, parse_constant=None, object_pairs_hook=None, **kw):
"""Deserialize ``fp`` (a ``.read()``-supporting file-like object containing
def loads(s, *, encoding=None, cls=None, object_hook=None, parse_float=None,
parse_int=None, parse_constant=None, object_pairs_hook=None, **kw):
- """Deserialize ``s`` (a ``str`` instance containing a JSON
- document) to a Python object.
+ """Deserialize ``s`` (a ``str``, ``bytes`` or ``bytearray`` instance
+ containing a JSON document) to a Python object.
``object_hook`` is an optional function that will be called with the
result of any object literal decode (a ``dict``). The return value of
The ``encoding`` argument is ignored and deprecated.
"""
- if not isinstance(s, str):
- raise TypeError('the JSON object must be str, not {!r}'.format(
- s.__class__.__name__))
- if s.startswith(u'\ufeff'):
- raise JSONDecodeError("Unexpected UTF-8 BOM (decode using utf-8-sig)",
- s, 0)
+ if isinstance(s, str):
+ if s.startswith('\ufeff'):
+ raise JSONDecodeError("Unexpected UTF-8 BOM (decode using utf-8-sig)",
+ s, 0)
+ else:
+ if not isinstance(s, (bytes, bytearray)):
+ raise TypeError('the JSON object must be str, bytes or bytearray, '
+ 'not {!r}'.format(s.__class__.__name__))
+ s = s.decode(detect_encoding(s), 'surrogatepass')
+
if (cls is None and object_hook is None and
parse_int is None and parse_float is None and
parse_constant is None and object_pairs_hook is None and not kw):
def test_invalid_input_type(self):
msg = 'the JSON object must be str'
- for value in [1, 3.14, b'bytes', b'\xff\x00', [], {}, None]:
+ for value in [1, 3.14, [], {}, None]:
self.assertRaisesRegex(TypeError, msg, self.loads, value)
- with self.assertRaisesRegex(TypeError, msg):
- self.json.load(BytesIO(b'[1,2,3]'))
def test_string_with_utf8_bom(self):
# see #18958
+import codecs
from collections import OrderedDict
from test.test_json import PyTest, CTest
self.assertRaises(TypeError, self.dumps, [b"hi"])
def test_bytes_decode(self):
- self.assertRaises(TypeError, self.loads, b'"hi"')
- self.assertRaises(TypeError, self.loads, b'["hi"]')
-
+ for encoding, bom in [
+ ('utf-8', codecs.BOM_UTF8),
+ ('utf-16be', codecs.BOM_UTF16_BE),
+ ('utf-16le', codecs.BOM_UTF16_LE),
+ ('utf-32be', codecs.BOM_UTF32_BE),
+ ('utf-32le', codecs.BOM_UTF32_LE),
+ ]:
+ data = ["a\xb5\u20ac\U0001d120"]
+ encoded = self.dumps(data).encode(encoding)
+ self.assertEqual(self.loads(bom + encoded), data)
+ self.assertEqual(self.loads(encoded), data)
+ self.assertRaises(UnicodeDecodeError, self.loads, b'["\x80"]')
def test_object_pairs_hook_with_unicode(self):
s = '{"xkd":1, "kcw":2, "art":3, "hxm":4, "qrt":5, "pad":6, "hoy":7}'
Library
-------
+- Issue #17909: ``json.load`` and ``json.loads`` now support binary input
+ encoded as UTF-8, UTF-16 or UTF-32. Patch by Serhiy Storchaka.
+
- Issue #27137: the pure Python fallback implementation of ``functools.partial``
now matches the behaviour of its accelerated C counterpart for subclassing,
pickling and text representation purposes. Patch by Emanuel Barry and