Issue #28541: Improve test coverage for encoding detection in json library.

author Serhiy Storchaka <storchaka@gmail.com>

Sun, 30 Oct 2016 21:00:01 +0000 (23:00 +0200)

committer Serhiy Storchaka <storchaka@gmail.com>

Sun, 30 Oct 2016 21:00:01 +0000 (23:00 +0200)
author Serhiy Storchaka <storchaka@gmail.com>
Sun, 30 Oct 2016 21:00:01 +0000 (23:00 +0200)
committer Serhiy Storchaka <storchaka@gmail.com>
Sun, 30 Oct 2016 21:00:01 +0000 (23:00 +0200)
diff --git a/Lib/json/__init__.py b/Lib/json/__init__.py

index 8dcc6786e27d4ce722535f5ab3287a28d61794dc..94397aa4e946ade78b9b99a6c71e76ebc17b3539 100644 (file)
--- a/Lib/json/__init__.py
+++ b/Lib/json/__init__.py
@@ -257,7 +257,8 @@ def detect_encoding(b):
              return 'utf-16-be' if b[1] else 'utf-32-be'
          if not b[1]:
              # XX 00 00 00 - utf-32-le
-            # XX 00 XX XX - utf-16-le
+            # XX 00 00 XX - utf-16-le
+            # XX 00 XX -- - utf-16-le
              return 'utf-16-le' if b[2] or b[3] else 'utf-32-le'
      elif len(b) == 2:
          if not b[0]:
diff --git a/Lib/test/test_json/test_unicode.py b/Lib/test/test_json/test_unicode.py

index eda177aa68c1bc4ae17edb6cf2a5cd1407919d44..2e8bba2775256ae5f61b2c0b923db76f4a5a3505 100644 (file)
--- a/Lib/test/test_json/test_unicode.py
+++ b/Lib/test/test_json/test_unicode.py
@@ -65,6 +65,19 @@ class TestUnicode:
              self.assertEqual(self.loads(bom + encoded), data)
              self.assertEqual(self.loads(encoded), data)
          self.assertRaises(UnicodeDecodeError, self.loads, b'["\x80"]')
+        # RFC-7159 and ECMA-404 extend JSON to allow documents that
+        # consist of only a string, which can present a special case
+        # not covered by the encoding detection patterns specified in
+        # RFC-4627 for utf-16-le (XX 00 XX 00).
+        self.assertEqual(self.loads('"\u2600"'.encode('utf-16-le')),
+                         '\u2600')
+        # Encoding detection for small (<4) bytes objects
+        # is implemented as a special case. RFC-7159 and ECMA-404
+        # allow single codepoint JSON documents which are only two
+        # bytes in utf-16 encodings w/o BOM.
+        self.assertEqual(self.loads(b'5\x00'), 5)
+        self.assertEqual(self.loads(b'\x007'), 7)
+        self.assertEqual(self.loads(b'57'), 57)
  
      def test_object_pairs_hook_with_unicode(self):
          s = '{"xkd":1, "kcw":2, "art":3, "hxm":4, "qrt":5, "pad":6, "hoy":7}'
author	Serhiy Storchaka <storchaka@gmail.com>
	Sun, 30 Oct 2016 21:00:01 +0000 (23:00 +0200)
committer	Serhiy Storchaka <storchaka@gmail.com>
	Sun, 30 Oct 2016 21:00:01 +0000 (23:00 +0200)
Lib/json/__init__.py		patch \| blob \| history
Lib/test/test_json/test_unicode.py		patch \| blob \| history