From e974571d36009e327a97bb83389cf05c2b858288 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Sun, 31 Oct 2010 15:26:04 +0000 Subject: [PATCH] Issue #10160: Speed up operator.attrgetter. Patch by Christos Georgiou. --- Doc/library/operator.rst | 2 + Lib/test/test_operator.py | 9 ++- Misc/NEWS | 2 + Modules/operator.c | 133 +++++++++++++++++++++++++++++--------- 4 files changed, 114 insertions(+), 32 deletions(-) diff --git a/Doc/library/operator.rst b/Doc/library/operator.rst index 2d030615cc..efefaa690e 100644 --- a/Doc/library/operator.rst +++ b/Doc/library/operator.rst @@ -336,6 +336,8 @@ expect a function argument. b.date)``. Equivalent to:: def attrgetter(*items): + if any(not isinstance(item, str) for item in items): + raise TypeError('attribute name must be a string') if len(items) == 1: attr = items[0] def g(obj): diff --git a/Lib/test/test_operator.py b/Lib/test/test_operator.py index aa3219f499..f291e52522 100644 --- a/Lib/test/test_operator.py +++ b/Lib/test/test_operator.py @@ -275,8 +275,7 @@ class OperatorTestCase(unittest.TestCase): self.assertEqual(f(a), 'arthur') f = operator.attrgetter('rank') self.assertRaises(AttributeError, f, a) - f = operator.attrgetter(2) - self.assertRaises(TypeError, f, a) + self.assertRaises(TypeError, operator.attrgetter, 2) self.assertRaises(TypeError, operator.attrgetter) # multiple gets @@ -285,7 +284,7 @@ class OperatorTestCase(unittest.TestCase): record.y = 'Y' record.z = 'Z' self.assertEqual(operator.attrgetter('x','z','y')(record), ('X', 'Z', 'Y')) - self.assertRaises(TypeError, operator.attrgetter('x', (), 'y'), record) + self.assertRaises(TypeError, operator.attrgetter, ('x', (), 'y')) class C(object): def __getattr__(self, name): @@ -304,6 +303,10 @@ class OperatorTestCase(unittest.TestCase): self.assertEqual(f(a), ('arthur', 'thomas')) f = operator.attrgetter('name', 'child.name', 'child.child.name') self.assertRaises(AttributeError, f, a) + f = operator.attrgetter('child.') + self.assertRaises(AttributeError, f, a) + f = operator.attrgetter('.child') + self.assertRaises(AttributeError, f, a) a.child.child = A() a.child.child.name = 'johnson' diff --git a/Misc/NEWS b/Misc/NEWS index fc8c648204..ca66be0b7b 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -59,6 +59,8 @@ Core and Builtins Library ------- +- Issue #10160: Speed up operator.attrgetter. Patch by Christos Georgiou. + - logging: Added style option to basicConfig() to allow %, {} or $-formatting. - Issue #5729: json.dumps() now supports using a string such as '\t' diff --git a/Modules/operator.c b/Modules/operator.c index 8ca4d2d06c..48bdae07d3 100644 --- a/Modules/operator.c +++ b/Modules/operator.c @@ -383,7 +383,7 @@ attrgetter_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { attrgetterobject *ag; PyObject *attr; - Py_ssize_t nattrs; + Py_ssize_t nattrs, idx, char_idx; if (!_PyArg_NoKeywords("attrgetter()", kwds)) return NULL; @@ -392,15 +392,92 @@ attrgetter_new(PyTypeObject *type, PyObject *args, PyObject *kwds) if (nattrs <= 1) { if (!PyArg_UnpackTuple(args, "attrgetter", 1, 1, &attr)) return NULL; - } else - attr = args; + } + + attr = PyTuple_New(nattrs); + if (attr == NULL) + return NULL; + + /* prepare attr while checking args */ + for (idx = 0; idx < nattrs; ++idx) { + PyObject *item = PyTuple_GET_ITEM(args, idx); + Py_ssize_t item_len; + Py_UNICODE *item_buffer; + int dot_count; + + if (!PyUnicode_Check(item)) { + PyErr_SetString(PyExc_TypeError, + "attribute name must be a string"); + Py_DECREF(attr); + return NULL; + } + item_len = PyUnicode_GET_SIZE(item); + item_buffer = PyUnicode_AS_UNICODE(item); + + /* check whethere the string is dotted */ + dot_count = 0; + for (char_idx = 0; char_idx < item_len; ++char_idx) { + if (item_buffer[char_idx] == (Py_UNICODE)'.') + ++dot_count; + } + + if (dot_count == 0) { + Py_INCREF(item); + PyUnicode_InternInPlace(&item); + PyTuple_SET_ITEM(attr, idx, item); + } else { /* make it a tuple of non-dotted attrnames */ + PyObject *attr_chain = PyTuple_New(dot_count + 1); + PyObject *attr_chain_item; + + if (attr_chain == NULL) { + Py_DECREF(attr); + return NULL; + } + + Py_ssize_t unibuff_from = 0; + Py_ssize_t unibuff_till = 0; + Py_ssize_t attr_chain_idx = 0; + for (; dot_count > 0; --dot_count) { + while (item_buffer[unibuff_till] != (Py_UNICODE)'.') { + ++unibuff_till; + } + attr_chain_item = PyUnicode_FromUnicode( + item_buffer + unibuff_from, + unibuff_till - unibuff_from); + if (attr_chain_item == NULL) { + Py_DECREF(attr_chain); + Py_DECREF(attr); + return NULL; + } + PyUnicode_InternInPlace(&attr_chain_item); + PyTuple_SET_ITEM(attr_chain, attr_chain_idx, attr_chain_item); + ++attr_chain_idx; + unibuff_till = unibuff_from = unibuff_till + 1; + } + + /* now add the last dotless name */ + attr_chain_item = PyUnicode_FromUnicode( + item_buffer + unibuff_from, + item_len - unibuff_from); + if (attr_chain_item == NULL) { + Py_DECREF(attr_chain); + Py_DECREF(attr); + return NULL; + } + PyUnicode_InternInPlace(&attr_chain_item); + PyTuple_SET_ITEM(attr_chain, attr_chain_idx, attr_chain_item); + + PyTuple_SET_ITEM(attr, idx, attr_chain); + } + } /* create attrgetterobject structure */ ag = PyObject_GC_New(attrgetterobject, &attrgetter_type); - if (ag == NULL) + if (ag == NULL) { + Py_DECREF(attr); return NULL; + } - Py_INCREF(attr); ag->attr = attr; ag->nattrs = nattrs; @@ -426,33 +503,31 @@ attrgetter_traverse(attrgetterobject *ag, visitproc visit, void *arg) static PyObject * dotted_getattr(PyObject *obj, PyObject *attr) { - char *s, *p; - - if (!PyUnicode_Check(attr)) { - PyErr_SetString(PyExc_TypeError, - "attribute name must be a string"); - return NULL; - } - - s = _PyUnicode_AsString(attr); - Py_INCREF(obj); - for (;;) { - PyObject *newobj, *str; - p = strchr(s, '.'); - str = p ? PyUnicode_FromStringAndSize(s, (p-s)) : - PyUnicode_FromString(s); - if (str == NULL) { + PyObject *newobj; + + /* attr is either a tuple or instance of str. + Ensured by the setup code of attrgetter_new */ + if (PyTuple_CheckExact(attr)) { /* chained getattr */ + Py_ssize_t name_idx = 0, name_count; + PyObject *attr_name; + + name_count = PyTuple_GET_SIZE(attr); + Py_INCREF(obj); + for (name_idx = 0; name_idx < name_count; ++name_idx) { + attr_name = PyTuple_GET_ITEM(attr, name_idx); + newobj = PyObject_GetAttr(obj, attr_name); Py_DECREF(obj); - return NULL; + if (newobj == NULL) { + return NULL; + } + /* here */ + obj = newobj; } - newobj = PyObject_GetAttr(obj, str); - Py_DECREF(str); - Py_DECREF(obj); + } else { /* single getattr */ + newobj = PyObject_GetAttr(obj, attr); if (newobj == NULL) return NULL; obj = newobj; - if (p == NULL) break; - s = p+1; } return obj; @@ -466,8 +541,8 @@ attrgetter_call(attrgetterobject *ag, PyObject *args, PyObject *kw) if (!PyArg_UnpackTuple(args, "attrgetter", 1, 1, &obj)) return NULL; - if (ag->nattrs == 1) - return dotted_getattr(obj, ag->attr); + if (ag->nattrs == 1) /* ag->attr is always a tuple */ + return dotted_getattr(obj, PyTuple_GET_ITEM(ag->attr, 0)); assert(PyTuple_Check(ag->attr)); assert(PyTuple_GET_SIZE(ag->attr) == nattrs); -- 2.40.0