]> granicus.if.org Git - python/commitdiff
Extend work on revision 52962: Eliminate redundant calls to PyObject_Hash().
authorRaymond Hettinger <python@rcn.com>
Mon, 19 Feb 2007 03:04:45 +0000 (03:04 +0000)
committerRaymond Hettinger <python@rcn.com>
Mon, 19 Feb 2007 03:04:45 +0000 (03:04 +0000)
Include/dictobject.h
Lib/test/test_set.py
Objects/dictobject.c
Objects/setobject.c

index fd3d1fc3529404cad201174b8b1a0fd6169325cd..44b0838e04210ef77b33ef3f7361a60b8c628d31 100644 (file)
@@ -100,12 +100,15 @@ PyAPI_FUNC(int) PyDict_DelItem(PyObject *mp, PyObject *key);
 PyAPI_FUNC(void) PyDict_Clear(PyObject *mp);
 PyAPI_FUNC(int) PyDict_Next(
        PyObject *mp, Py_ssize_t *pos, PyObject **key, PyObject **value);
+PyAPI_FUNC(int) _PyDict_Next(
+       PyObject *mp, Py_ssize_t *pos, PyObject **key, PyObject **value, long *hash);
 PyAPI_FUNC(PyObject *) PyDict_Keys(PyObject *mp);
 PyAPI_FUNC(PyObject *) PyDict_Values(PyObject *mp);
 PyAPI_FUNC(PyObject *) PyDict_Items(PyObject *mp);
 PyAPI_FUNC(Py_ssize_t) PyDict_Size(PyObject *mp);
 PyAPI_FUNC(PyObject *) PyDict_Copy(PyObject *mp);
 PyAPI_FUNC(int) PyDict_Contains(PyObject *mp, PyObject *key);
+PyAPI_FUNC(int) _PyDict_Contains(PyObject *mp, PyObject *key, long hash);
 
 /* PyDict_Update(mp, other) is equivalent to PyDict_Merge(mp, other, 1). */
 PyAPI_FUNC(int) PyDict_Update(PyObject *mp, PyObject *other);
index 49bdec344547944c0ebdb32a158906bb8558ce9e..45f61b2e8d709d25eefc4a654aebf8212022cd4b 100644 (file)
@@ -26,6 +26,14 @@ class ReprWrapper:
     def __repr__(self):
         return repr(self.value)
 
+class HashCountingInt(int):
+    'int-like object that counts the number of times __hash__ is called'
+    def __init__(self, *args):
+        self.hash_count = 0
+    def __hash__(self):
+        self.hash_count += 1
+        return int.__hash__(self)
+
 class TestJointOps(unittest.TestCase):
     # Tests common to both set and frozenset
 
@@ -270,6 +278,18 @@ class TestJointOps(unittest.TestCase):
             fo.close()
             os.remove(test_support.TESTFN)
 
+    def test_do_not_rehash_dict_keys(self):
+        n = 10
+        d = dict.fromkeys(map(HashCountingInt, xrange(n)))
+        self.assertEqual(sum(elem.hash_count for elem in d), n)
+        s = self.thetype(d)
+        self.assertEqual(sum(elem.hash_count for elem in d), n)
+        s.difference(d)
+        self.assertEqual(sum(elem.hash_count for elem in d), n)    
+        if hasattr(s, 'symmetric_difference_update'):
+            s.symmetric_difference_update(d)
+        self.assertEqual(sum(elem.hash_count for elem in d), n)      
+
 class TestSet(TestJointOps):
     thetype = set
 
index 901e33383f222bb4c99eb1bc315d626d4b3691f0..1cb3ee6ad862db9187a43130ec67c32f148dceaf 100644 (file)
@@ -803,6 +803,34 @@ PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey, PyObject **pvalue)
        return 1;
 }
 
+/* Internal version of PyDict_Next that returns a hash value in addition to the key and value.*/
+int
+_PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey, PyObject **pvalue, long *phash)
+{
+       register Py_ssize_t i;
+       register Py_ssize_t mask;
+       register dictentry *ep;
+
+       if (!PyDict_Check(op))
+               return 0;
+       i = *ppos;
+       if (i < 0)
+               return 0;
+       ep = ((dictobject *)op)->ma_table;
+       mask = ((dictobject *)op)->ma_mask;
+       while (i <= mask && ep[i].me_value == NULL)
+               i++;
+       *ppos = i+1;
+       if (i > mask)
+               return 0;
+        *phash = (long)(ep[i].me_hash);
+       if (pkey)
+               *pkey = ep[i].me_key;
+       if (pvalue)
+               *pvalue = ep[i].me_value;
+       return 1;
+}
+
 /* Methods */
 
 static void
@@ -1987,6 +2015,17 @@ PyDict_Contains(PyObject *op, PyObject *key)
        return ep == NULL ? -1 : (ep->me_value != NULL);
 }
 
+/* Internal version of PyDict_Contains used when the hash value is already known */
+int
+_PyDict_Contains(PyObject *op, PyObject *key, long hash)
+{
+       dictobject *mp = (dictobject *)op;
+       dictentry *ep;
+
+       ep = (mp->ma_lookup)(mp, key, hash);
+       return ep == NULL ? -1 : (ep->me_value != NULL);
+}
+
 /* Hack to implement "key in dict" */
 static PySequenceMethods dict_as_sequence = {
        0,                      /* sq_length */
index fc9d8234f43a879c348eb596f838b254805ea238..1f06cee5a11bfe03834007e43a65dfe5d4fe387e 100644 (file)
@@ -918,8 +918,14 @@ set_update_internal(PySetObject *so, PyObject *other)
        if (PyDict_CheckExact(other)) {
                PyObject *value;
                Py_ssize_t pos = 0;
-               while (PyDict_Next(other, &pos, &key, &value)) {
-                       if (set_add_key(so, key) == -1)
+               long hash;
+
+               while (_PyDict_Next(other, &pos, &key, &value, &hash)) {
+                       setentry an_entry;
+
+                       an_entry.hash = hash;
+                       an_entry.key = key;
+                       if (set_add_entry(so, &an_entry) == -1)
                                return -1;
                }
                return 0;
@@ -1382,7 +1388,7 @@ set_difference(PySetObject *so, PyObject *other)
                        setentry entrycopy;
                        entrycopy.hash = entry->hash;
                        entrycopy.key = entry->key;
-                       if (!PyDict_Contains(other, entry->key)) {
+                       if (!_PyDict_Contains(other, entry->key, entry->hash)) {
                                if (set_add_entry((PySetObject *)result, &entrycopy) == -1) {
                                        Py_DECREF(result);
                                        return NULL;
@@ -1453,12 +1459,10 @@ set_symmetric_difference_update(PySetObject *so, PyObject *other)
        if (PyDict_CheckExact(other)) {
                PyObject *value;
                int rv;
-               while (PyDict_Next(other, &pos, &key, &value)) {
+               long hash;
+               while (_PyDict_Next(other, &pos, &key, &value, &hash)) {
                        setentry an_entry;
-                       long hash = PyObject_Hash(key);
 
-                       if (hash == -1)
-                               return NULL;
                        an_entry.hash = hash;
                        an_entry.key = key;
                        rv = set_discard_entry(so, &an_entry);