bpo-37587: optimize json.loads (GH-15134)
authorInada Naoki <songofacandy@gmail.com>
Thu, 8 Aug 2019 08:57:10 +0000 (17:57 +0900)
committerGitHub <noreply@github.com>
Thu, 8 Aug 2019 08:57:10 +0000 (17:57 +0900)
Use a tighter scope temporary variable to help register allocation.
1% speedup for large string.

Use PyDict_SetItemDefault() for memoizing keys.
At most 4% speedup when the cache hit ratio is low.

Modules/_json.c

index 76da1d345e9df42a55dee480a6d522e8c5113063..112903ea577a2632c96dc001330b3367e4dad41c 100644 (file)
@@ -433,16 +433,21 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
     }
     while (1) {
         /* Find the end of the string or the next escape */
-        Py_UCS4 c = 0;
-        for (next = end; next < len; next++) {
-            c = PyUnicode_READ(kind, buf, next);
-            if (c == '"' || c == '\\') {
-                break;
-            }
-            else if (c <= 0x1f && strict) {
-                raise_errmsg("Invalid control character at", pystr, next);
-                goto bail;
+        Py_UCS4 c;
+        {
+            // Use tight scope variable to help register allocation.
+            Py_UCS4 d = 0;
+            for (next = end; next < len; next++) {
+                d = PyUnicode_READ(kind, buf, next);
+                if (d == '"' || d == '\\') {
+                    break;
+                }
+                if (d <= 0x1f && strict) {
+                    raise_errmsg("Invalid control character at", pystr, next);
+                    goto bail;
+                }
             }
+            c = d;
         }
         if (!(c == '"' || c == '\\')) {
             raise_errmsg("Unterminated string starting at", pystr, begin);
@@ -749,19 +754,13 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss
             key = scanstring_unicode(pystr, idx + 1, s->strict, &next_idx);
             if (key == NULL)
                 goto bail;
-            memokey = PyDict_GetItemWithError(s->memo, key);
-            if (memokey != NULL) {
-                Py_INCREF(memokey);
-                Py_DECREF(key);
-                key = memokey;
-            }
-            else if (PyErr_Occurred()) {
+            memokey = PyDict_SetDefault(s->memo, key, key);
+            if (memokey == NULL) {
                 goto bail;
             }
-            else {
-                if (PyDict_SetItem(s->memo, key, key) < 0)
-                    goto bail;
-            }
+            Py_INCREF(memokey);
+            Py_DECREF(key);
+            key = memokey;
             idx = next_idx;
 
             /* skip whitespace between key and : delimiter, read :, skip whitespace */