return 'utf-16-be' if b[1] else 'utf-32-be'
if not b[1]:
# XX 00 00 00 - utf-32-le
- # XX 00 XX XX - utf-16-le
+ # XX 00 00 XX - utf-16-le
+ # XX 00 XX -- - utf-16-le
return 'utf-16-le' if b[2] or b[3] else 'utf-32-le'
elif len(b) == 2:
if not b[0]:
self.assertEqual(self.loads(bom + encoded), data)
self.assertEqual(self.loads(encoded), data)
self.assertRaises(UnicodeDecodeError, self.loads, b'["\x80"]')
+ # RFC-7159 and ECMA-404 extend JSON to allow documents that
+ # consist of only a string, which can present a special case
+ # not covered by the encoding detection patterns specified in
+ # RFC-4627 for utf-16-le (XX 00 XX 00).
+ self.assertEqual(self.loads('"\u2600"'.encode('utf-16-le')),
+ '\u2600')
+ # Encoding detection for small (<4) bytes objects
+ # is implemented as a special case. RFC-7159 and ECMA-404
+ # allow single codepoint JSON documents which are only two
+ # bytes in utf-16 encodings w/o BOM.
+ self.assertEqual(self.loads(b'5\x00'), 5)
+ self.assertEqual(self.loads(b'\x007'), 7)
+ self.assertEqual(self.loads(b'57'), 57)
def test_object_pairs_hook_with_unicode(self):
s = '{"xkd":1, "kcw":2, "art":3, "hxm":4, "qrt":5, "pad":6, "hoy":7}'