]> granicus.if.org Git - python/commitdiff
bpo-37358: Use vectorcall for functools.partial (GH-14284)
authorJeroen Demeyer <J.Demeyer@UGent.be>
Sat, 13 Jul 2019 14:39:18 +0000 (16:39 +0200)
committerMiss Islington (bot) <31488909+miss-islington@users.noreply.github.com>
Sat, 13 Jul 2019 14:39:18 +0000 (07:39 -0700)
https://bugs.python.org/issue37358

Misc/NEWS.d/next/Library/2019-06-21-14-54-02.bpo-37358.RsASpn.rst [new file with mode: 0644]
Modules/_functoolsmodule.c

diff --git a/Misc/NEWS.d/next/Library/2019-06-21-14-54-02.bpo-37358.RsASpn.rst b/Misc/NEWS.d/next/Library/2019-06-21-14-54-02.bpo-37358.RsASpn.rst
new file mode 100644 (file)
index 0000000..bc37631
--- /dev/null
@@ -0,0 +1 @@
+Optimized ``functools.partial`` by using vectorcall.
index a101363bf02a07dedfa3184a52fc42c46bbb660c..17d49ac5b342da79bc8062b32bea3f2b7a14ec65 100644 (file)
@@ -18,13 +18,15 @@ typedef struct {
     PyObject *fn;
     PyObject *args;
     PyObject *kw;
-    PyObject *dict;
+    PyObject *dict;        /* __dict__ */
     PyObject *weakreflist; /* List of weak references */
-    int use_fastcall;
+    vectorcallfunc vectorcall;
 } partialobject;
 
 static PyTypeObject partial_type;
 
+static void partial_setvectorcall(partialobject *pto);
+
 static PyObject *
 partial_new(PyTypeObject *type, PyObject *args, PyObject *kw)
 {
@@ -107,8 +109,7 @@ partial_new(PyTypeObject *type, PyObject *args, PyObject *kw)
         return NULL;
     }
 
-    pto->use_fastcall = (_PyVectorcall_Function(func) != NULL);
-
+    partial_setvectorcall(pto);
     return (PyObject *)pto;
 }
 
@@ -126,77 +127,107 @@ partial_dealloc(partialobject *pto)
     Py_TYPE(pto)->tp_free(pto);
 }
 
+
+/* Merging keyword arguments using the vectorcall convention is messy, so
+ * if we would need to do that, we stop using vectorcall and fall back
+ * to using partial_call() instead. */
+_Py_NO_INLINE static PyObject *
+partial_vectorcall_fallback(partialobject *pto, PyObject *const *args,
+                            size_t nargsf, PyObject *kwnames)
+{
+    pto->vectorcall = NULL;
+    Py_ssize_t nargs = PyVectorcall_NARGS(nargsf);
+    return _PyObject_MakeTpCall((PyObject *)pto, args, nargs, kwnames);
+}
+
 static PyObject *
-partial_fastcall(partialobject *pto, PyObject **args, Py_ssize_t nargs,
-                 PyObject *kwargs)
+partial_vectorcall(partialobject *pto, PyObject *const *args,
+                   size_t nargsf, PyObject *kwnames)
 {
-    PyObject *small_stack[_PY_FASTCALL_SMALL_STACK];
-    PyObject *ret;
-    PyObject **stack, **stack_buf = NULL;
-    Py_ssize_t nargs2, pto_nargs;
+    /* pto->kw is mutable, so need to check every time */
+    if (PyDict_GET_SIZE(pto->kw)) {
+        return partial_vectorcall_fallback(pto, args, nargsf, kwnames);
+    }
+
+    Py_ssize_t nargs = PyVectorcall_NARGS(nargsf);
+    Py_ssize_t nargs_total = nargs;
+    if (kwnames != NULL) {
+        nargs_total += PyTuple_GET_SIZE(kwnames);
+    }
+
+    PyObject **pto_args = _PyTuple_ITEMS(pto->args);
+    Py_ssize_t pto_nargs = PyTuple_GET_SIZE(pto->args);
 
-    pto_nargs = PyTuple_GET_SIZE(pto->args);
-    nargs2 = pto_nargs + nargs;
+    /* Fast path if we're called without arguments */
+    if (nargs_total == 0) {
+        return _PyObject_Vectorcall(pto->fn, pto_args, pto_nargs, NULL);
+    }
 
-    if (pto_nargs == 0) {
-        stack = args;
+    /* Fast path using PY_VECTORCALL_ARGUMENTS_OFFSET to prepend a single
+     * positional argument */
+    if (pto_nargs == 1 && (nargsf & PY_VECTORCALL_ARGUMENTS_OFFSET)) {
+        PyObject **newargs = (PyObject **)args - 1;
+        PyObject *tmp = newargs[0];
+        newargs[0] = pto_args[0];
+        PyObject *ret = _PyObject_Vectorcall(pto->fn, newargs, nargs + 1, kwnames);
+        newargs[0] = tmp;
+        return ret;
     }
-    else if (nargs == 0) {
-        stack = _PyTuple_ITEMS(pto->args);
+
+    Py_ssize_t newnargs_total = pto_nargs + nargs_total;
+
+    PyObject *small_stack[_PY_FASTCALL_SMALL_STACK];
+    PyObject *ret;
+    PyObject **stack;
+
+    if (newnargs_total <= (Py_ssize_t)Py_ARRAY_LENGTH(small_stack)) {
+        stack = small_stack;
     }
     else {
-        if (nargs2 <= (Py_ssize_t)Py_ARRAY_LENGTH(small_stack)) {
-            stack = small_stack;
-        }
-        else {
-            stack_buf = PyMem_Malloc(nargs2 * sizeof(PyObject *));
-            if (stack_buf == NULL) {
-                PyErr_NoMemory();
-                return NULL;
-            }
-            stack = stack_buf;
+        stack = PyMem_Malloc(newnargs_total * sizeof(PyObject *));
+        if (stack == NULL) {
+            PyErr_NoMemory();
+            return NULL;
         }
-
-        /* use borrowed references */
-        memcpy(stack,
-               _PyTuple_ITEMS(pto->args),
-               pto_nargs * sizeof(PyObject*));
-        memcpy(&stack[pto_nargs],
-               args,
-               nargs * sizeof(PyObject*));
     }
 
-    ret = _PyObject_FastCallDict(pto->fn, stack, nargs2, kwargs);
-    PyMem_Free(stack_buf);
+    /* Copy to new stack, using borrowed references */
+    memcpy(stack, pto_args, pto_nargs * sizeof(PyObject*));
+    memcpy(stack + pto_nargs, args, nargs_total * sizeof(PyObject*));
+
+    ret = _PyObject_Vectorcall(pto->fn, stack, pto_nargs + nargs, kwnames);
+    if (stack != small_stack) {
+        PyMem_Free(stack);
+    }
     return ret;
 }
 
-static PyObject *
-partial_call_impl(partialobject *pto, PyObject *args, PyObject *kwargs)
+/* Set pto->vectorcall depending on the parameters of the partial object */
+static void
+partial_setvectorcall(partialobject *pto)
 {
-    PyObject *ret, *args2;
-
-    /* Note: tupleconcat() is optimized for empty tuples */
-    args2 = PySequence_Concat(pto->args, args);
-    if (args2 == NULL) {
-        return NULL;
+    if (_PyVectorcall_Function(pto->fn) == NULL) {
+        /* Don't use vectorcall if the underlying function doesn't support it */
+        pto->vectorcall = NULL;
+    }
+    /* We could have a special case if there are no arguments,
+     * but that is unlikely (why use partial without arguments?),
+     * so we don't optimize that */
+    else {
+        pto->vectorcall = (vectorcallfunc)partial_vectorcall;
     }
-    assert(PyTuple_Check(args2));
-
-    ret = PyObject_Call(pto->fn, args2, kwargs);
-    Py_DECREF(args2);
-    return ret;
 }
 
+
 static PyObject *
 partial_call(partialobject *pto, PyObject *args, PyObject *kwargs)
 {
-    PyObject *kwargs2, *res;
-
-    assert (PyCallable_Check(pto->fn));
-    assert (PyTuple_Check(pto->args));
-    assert (PyDict_Check(pto->kw));
+    assert(PyCallable_Check(pto->fn));
+    assert(PyTuple_Check(pto->args));
+    assert(PyDict_Check(pto->kw));
 
+    /* Merge keywords */
+    PyObject *kwargs2;
     if (PyDict_GET_SIZE(pto->kw) == 0) {
         /* kwargs can be NULL */
         kwargs2 = kwargs;
@@ -219,16 +250,16 @@ partial_call(partialobject *pto, PyObject *args, PyObject *kwargs)
         }
     }
 
-
-    if (pto->use_fastcall) {
-        res = partial_fastcall(pto,
-                               _PyTuple_ITEMS(args),
-                               PyTuple_GET_SIZE(args),
-                               kwargs2);
-    }
-    else {
-        res = partial_call_impl(pto, args, kwargs2);
+    /* Merge positional arguments */
+    /* Note: tupleconcat() is optimized for empty tuples */
+    PyObject *args2 = PySequence_Concat(pto->args, args);
+    if (args2 == NULL) {
+        Py_XDECREF(kwargs2);
+        return NULL;
     }
+
+    PyObject *res = PyObject_Call(pto->fn, args2, kwargs2);
+    Py_DECREF(args2);
     Py_XDECREF(kwargs2);
     return res;
 }
@@ -365,11 +396,11 @@ partial_setstate(partialobject *pto, PyObject *state)
         Py_INCREF(dict);
 
     Py_INCREF(fn);
-    pto->use_fastcall = (_PyVectorcall_Function(fn) != NULL);
     Py_SETREF(pto->fn, fn);
     Py_SETREF(pto->args, fnargs);
     Py_SETREF(pto->kw, kw);
     Py_XSETREF(pto->dict, dict);
+    partial_setvectorcall(pto);
     Py_RETURN_NONE;
 }
 
@@ -386,7 +417,7 @@ static PyTypeObject partial_type = {
     0,                                  /* tp_itemsize */
     /* methods */
     (destructor)partial_dealloc,        /* tp_dealloc */
-    0,                                  /* tp_vectorcall_offset */
+    offsetof(partialobject, vectorcall),/* tp_vectorcall_offset */
     0,                                  /* tp_getattr */
     0,                                  /* tp_setattr */
     0,                                  /* tp_as_async */
@@ -401,7 +432,8 @@ static PyTypeObject partial_type = {
     PyObject_GenericSetAttr,            /* tp_setattro */
     0,                                  /* tp_as_buffer */
     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
-        Py_TPFLAGS_BASETYPE,            /* tp_flags */
+        Py_TPFLAGS_BASETYPE |
+        _Py_TPFLAGS_HAVE_VECTORCALL,    /* tp_flags */
     partial_doc,                        /* tp_doc */
     (traverseproc)partial_traverse,     /* tp_traverse */
     0,                                  /* tp_clear */