Issue #13521: dict.setdefault() now does only one lookup for the given key, making...

author Antoine Pitrou <solipsis@pitrou.net>

Sun, 26 Feb 2012 23:45:12 +0000 (00:45 +0100)

committer Antoine Pitrou <solipsis@pitrou.net>

Sun, 26 Feb 2012 23:45:12 +0000 (00:45 +0100)
author Antoine Pitrou <solipsis@pitrou.net>
Sun, 26 Feb 2012 23:45:12 +0000 (00:45 +0100)
committer Antoine Pitrou <solipsis@pitrou.net>
Sun, 26 Feb 2012 23:45:12 +0000 (00:45 +0100)
diff --git a/Lib/test/test_dict.py b/Lib/test/test_dict.py

index 29167d0e38589345cf0c2e7832fc25585a27e9f3..18f7ce67cee6d0b8bed70a63283e3230bb5420b5 100644 (file)
--- a/Lib/test/test_dict.py
+++ b/Lib/test/test_dict.py
@@ -299,6 +299,26 @@ class DictTest(unittest.TestCase):
          x.fail = True
          self.assertRaises(Exc, d.setdefault, x, [])
  
+    def test_setdefault_atomic(self):
+        # Issue #13521: setdefault() calls __hash__ and __eq__ only once.
+        class Hashed(object):
+            def __init__(self):
+                self.hash_count = 0
+                self.eq_count = 0
+            def __hash__(self):
+                self.hash_count += 1
+                return 42
+            def __eq__(self, other):
+                self.eq_count += 1
+                return id(self) == id(other)
+        hashed1 = Hashed()
+        y = {hashed1: 5}
+        hashed2 = Hashed()
+        y.setdefault(hashed2, [])
+        self.assertEqual(hashed1.hash_count, 1)
+        self.assertEqual(hashed2.hash_count, 1)
+        self.assertEqual(hashed1.eq_count + hashed2.eq_count, 1)
+
      def test_popitem(self):
          # dict.popitem()
          for copymode in -1, +1:
diff --git a/Misc/NEWS b/Misc/NEWS

index 7be7777dda70b5770818a04309f194c73aba68ae..8460cdda718f63c99e342108622239f83f62b5d8 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -9,6 +9,9 @@ What's New in Python 2.7.3 release candidate 1?
  Core and Builtins
  -----------------
  
+- Issue #13521: dict.setdefault() now does only one lookup for the given key,
+  making it "atomic" for many purposes.  Patch by Filip Gruszczyński.
+
  - Issue #13020: Fix a reference leak when allocating a structsequence object
    fails.  Patch by Suman Saha.
  
diff --git a/Objects/dictobject.c b/Objects/dictobject.c

index ac99cfb18e7b292cbbac76300db1db6bf31175fd..6c2b788b48d06dec3df9a036bfd053ed56bceae7 100644 (file)
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -502,27 +502,16 @@ _PyDict_MaybeUntrack(PyObject *op)
      _PyObject_GC_UNTRACK(op);
  }
  
-
  /*
-Internal routine to insert a new item into the table.
-Used both by the internal resize routine and by the public insert routine.
-Eats a reference to key and one to value.
-Returns -1 if an error occurred, or 0 on success.
+Internal routine to insert a new item into the table when you have entry object.
+Used by insertdict.
  */
  static int
-insertdict(register PyDictObject *mp, PyObject *key, long hash, PyObject *value)
+insertdict_by_entry(register PyDictObject *mp, PyObject *key, long hash,
+                    PyDictEntry *ep, PyObject *value)
  {
      PyObject *old_value;
-    register PyDictEntry *ep;
-    typedef PyDictEntry *(*lookupfunc)(PyDictObject *, PyObject *, long);
  
-    assert(mp->ma_lookup != NULL);
-    ep = mp->ma_lookup(mp, key, hash);
-    if (ep == NULL) {
-        Py_DECREF(key);
-        Py_DECREF(value);
-        return -1;
-    }
      MAINTAIN_TRACKING(mp, key, value);
      if (ep->me_value != NULL) {
          old_value = ep->me_value;
@@ -545,6 +534,28 @@ insertdict(register PyDictObject *mp, PyObject *key, long hash, PyObject *value)
      return 0;
  }
  
+
+/*
+Internal routine to insert a new item into the table.
+Used both by the internal resize routine and by the public insert routine.
+Eats a reference to key and one to value.
+Returns -1 if an error occurred, or 0 on success.
+*/
+static int
+insertdict(register PyDictObject *mp, PyObject *key, long hash, PyObject *value)
+{
+    register PyDictEntry *ep;
+
+    assert(mp->ma_lookup != NULL);
+    ep = mp->ma_lookup(mp, key, hash);
+    if (ep == NULL) {
+        Py_DECREF(key);
+        Py_DECREF(value);
+        return -1;
+    }
+    return insertdict_by_entry(mp, key, hash, ep, value);
+}
+
  /*
  Internal routine used by dictresize() to insert an item which is
  known to be absent from the dict.  This routine also assumes that
@@ -738,6 +749,45 @@ PyDict_GetItem(PyObject *op, PyObject *key)
      return ep->me_value;
  }
  
+static int
+dict_set_item_by_hash_or_entry(register PyObject *op, PyObject *key,
+                               long hash, PyDictEntry *ep, PyObject *value)
+{
+    register PyDictObject *mp;
+    register Py_ssize_t n_used;
+
+    mp = (PyDictObject *)op;
+    assert(mp->ma_fill <= mp->ma_mask);  /* at least one empty slot */
+    n_used = mp->ma_used;
+    Py_INCREF(value);
+    Py_INCREF(key);
+    if (ep == NULL) {
+        if (insertdict(mp, key, hash, value) != 0)
+            return -1;
+    }
+    else {
+        if (insertdict_by_entry(mp, key, hash, ep, value) != 0)
+            return -1;
+    }
+    /* If we added a key, we can safely resize.  Otherwise just return!
+     * If fill >= 2/3 size, adjust size.  Normally, this doubles or
+     * quaduples the size, but it's also possible for the dict to shrink
+     * (if ma_fill is much larger than ma_used, meaning a lot of dict
+     * keys have been * deleted).
+     *
+     * Quadrupling the size improves average dictionary sparseness
+     * (reducing collisions) at the cost of some memory and iteration
+     * speed (which loops over every possible entry).  It also halves
+     * the number of expensive resize operations in a growing dictionary.
+     *
+     * Very large dictionaries (over 50K items) use doubling instead.
+     * This may help applications with severe memory constraints.
+     */
+    if (!(mp->ma_used > n_used && mp->ma_fill*3 >= (mp->ma_mask+1)*2))
+        return 0;
+    return dictresize(mp, (mp->ma_used > 50000 ? 2 : 4) * mp->ma_used);
+}
+
  /* CAUTION: PyDict_SetItem() must guarantee that it won't resize the
   * dictionary if it's merely replacing the value for an existing key.
   * This means that it's safe to loop over a dictionary with PyDict_Next()
@@ -747,9 +797,7 @@ PyDict_GetItem(PyObject *op, PyObject *key)
  int
  PyDict_SetItem(register PyObject *op, PyObject *key, PyObject *value)
  {
-    register PyDictObject *mp;
      register long hash;
-    register Py_ssize_t n_used;
  
      if (!PyDict_Check(op)) {
          PyErr_BadInternalCall();
@@ -757,7 +805,6 @@ PyDict_SetItem(register PyObject *op, PyObject *key, PyObject *value)
      }
      assert(key);
      assert(value);
-    mp = (PyDictObject *)op;
      if (PyString_CheckExact(key)) {
          hash = ((PyStringObject *)key)->ob_shash;
          if (hash == -1)
@@ -768,29 +815,7 @@ PyDict_SetItem(register PyObject *op, PyObject *key, PyObject *value)
          if (hash == -1)
              return -1;
      }
-    assert(mp->ma_fill <= mp->ma_mask);  /* at least one empty slot */
-    n_used = mp->ma_used;
-    Py_INCREF(value);
-    Py_INCREF(key);
-    if (insertdict(mp, key, hash, value) != 0)
-        return -1;
-    /* If we added a key, we can safely resize.  Otherwise just return!
-     * If fill >= 2/3 size, adjust size.  Normally, this doubles or
-     * quaduples the size, but it's also possible for the dict to shrink
-     * (if ma_fill is much larger than ma_used, meaning a lot of dict
-     * keys have been * deleted).
-     *
-     * Quadrupling the size improves average dictionary sparseness
-     * (reducing collisions) at the cost of some memory and iteration
-     * speed (which loops over every possible entry).  It also halves
-     * the number of expensive resize operations in a growing dictionary.
-     *
-     * Very large dictionaries (over 50K items) use doubling instead.
-     * This may help applications with severe memory constraints.
-     */
-    if (!(mp->ma_used > n_used && mp->ma_fill*3 >= (mp->ma_mask+1)*2))
-        return 0;
-    return dictresize(mp, (mp->ma_used > 50000 ? 2 : 4) * mp->ma_used);
+    return dict_set_item_by_hash_or_entry(op, key, hash, NULL, value);
  }
  
  int
@@ -1957,9 +1982,9 @@ dict_setdefault(register PyDictObject *mp, PyObject *args)
          return NULL;
      val = ep->me_value;
      if (val == NULL) {
-        val = failobj;
-        if (PyDict_SetItem((PyObject*)mp, key, failobj))
-            val = NULL;
+        if (dict_set_item_by_hash_or_entry((PyObject*)mp, key, hash, ep,
+                                           failobj) == 0)
+            val = failobj;
      }
      Py_XINCREF(val);
      return val;
author	Antoine Pitrou <solipsis@pitrou.net>
	Sun, 26 Feb 2012 23:45:12 +0000 (00:45 +0100)
committer	Antoine Pitrou <solipsis@pitrou.net>
	Sun, 26 Feb 2012 23:45:12 +0000 (00:45 +0100)
Lib/test/test_dict.py		patch \| blob \| history
Misc/NEWS		patch \| blob \| history
Objects/dictobject.c		patch \| blob \| history