From: Antoine Pitrou Date: Tue, 30 Nov 2010 22:23:20 +0000 (+0000) Subject: Issue #8685: Speed up set difference `a - b` when source set `a` is X-Git-Tag: v3.2b1~152 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=715f3cd10d0481a2dd8c1969c4044d068b06cfd5;p=python Issue #8685: Speed up set difference `a - b` when source set `a` is much larger than operand `b`. Patch by Andrew Bennetts. --- diff --git a/Misc/NEWS b/Misc/NEWS index 470fcff19e..6d8371c863 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,9 @@ What's New in Python 3.2 Beta 1? Core and Builtins ----------------- +- Issue #8685: Speed up set difference ``a - b`` when source set ``a`` is + much larger than operand ``b``. Patch by Andrew Bennetts. + - Issue #10518: Bring back the callable() builtin. - Issue #8879. Add os.link support for Windows. diff --git a/Objects/setobject.c b/Objects/setobject.c index dfafefd057..478b5cb651 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -1524,6 +1524,20 @@ set_difference_update(PySetObject *so, PyObject *args) PyDoc_STRVAR(difference_update_doc, "Remove all elements of another set from this set."); +static PyObject * +set_copy_and_difference(PySetObject *so, PyObject *other) +{ + PyObject *result; + + result = set_copy(so); + if (result == NULL) + return NULL; + if (set_difference_update_internal((PySetObject *) result, other) != -1) + return result; + Py_DECREF(result); + return NULL; +} + static PyObject * set_difference(PySetObject *so, PyObject *other) { @@ -1532,13 +1546,13 @@ set_difference(PySetObject *so, PyObject *other) Py_ssize_t pos = 0; if (!PyAnySet_Check(other) && !PyDict_CheckExact(other)) { - result = set_copy(so); - if (result == NULL) - return NULL; - if (set_difference_update_internal((PySetObject *)result, other) != -1) - return result; - Py_DECREF(result); - return NULL; + return set_copy_and_difference(so, other); + } + + /* If len(so) much more than len(other), it's more efficient to simply copy + * so and then iterate other looking for common elements. */ + if ((PySet_GET_SIZE(so) >> 2) > PyObject_Size(other)) { + return set_copy_and_difference(so, other); } result = make_new_set_basetype(Py_TYPE(so), NULL); @@ -1560,6 +1574,7 @@ set_difference(PySetObject *so, PyObject *other) return result; } + /* Iterate over so, checking for common elements in other. */ while (set_next(so, &pos, &entry)) { int rv = set_contains_entry((PySetObject *)other, entry); if (rv == -1) {