bpo-8425: Fast path for set inplace difference when the second set is large (GH-15590)
authorRaymond Hettinger <rhettinger@users.noreply.github.com>
Thu, 29 Aug 2019 16:02:58 +0000 (09:02 -0700)
committerGitHub <noreply@github.com>
Thu, 29 Aug 2019 16:02:58 +0000 (09:02 -0700)
Misc/NEWS.d/next/Core and Builtins/2019-08-29-01-55-38.bpo-8425.FTq4A8.rst [new file with mode: 0644]
Objects/setobject.c

diff --git a/Misc/NEWS.d/next/Core and Builtins/2019-08-29-01-55-38.bpo-8425.FTq4A8.rst b/Misc/NEWS.d/next/Core and Builtins/2019-08-29-01-55-38.bpo-8425.FTq4A8.rst
new file mode 100644 (file)
index 0000000..8e5ec0b
--- /dev/null
@@ -0,0 +1,3 @@
+Optimize set difference_update for the case when the other set is much
+larger than the base set.  (Suggested by Evgeny Kapun with code contributed
+by Michele Orrù).
index 56858dbccfe1589a7309051c4db4a625e41fe35b..fafc2fa9e46dfa7bdaa900a86932e66534e3893c 100644 (file)
@@ -1463,9 +1463,25 @@ set_difference_update_internal(PySetObject *so, PyObject *other)
         setentry *entry;
         Py_ssize_t pos = 0;
 
+        /* Optimization:  When the other set is more than 8 times
+           larger than the base set, replace the other set with
+           interesection of the two sets.
+        */
+        if ((PySet_GET_SIZE(other) >> 3) > PySet_GET_SIZE(so)) {
+            other = set_intersection(so, other);
+            if (other == NULL)
+                return -1;
+        } else {
+            Py_INCREF(other);
+        }
+
         while (set_next((PySetObject *)other, &pos, &entry))
-            if (set_discard_entry(so, entry->key, entry->hash) < 0)
+            if (set_discard_entry(so, entry->key, entry->hash) < 0) {
+                Py_DECREF(other);
                 return -1;
+            }
+
+        Py_DECREF(other);
     } else {
         PyObject *key, *it;
         it = PyObject_GetIter(other);