Further reduce the cost of hash collisions by inspecting an additional nearby entry.

author Raymond Hettinger <python@rcn.com>

Sun, 1 Sep 2013 04:27:08 +0000 (21:27 -0700)

committer Raymond Hettinger <python@rcn.com>

Sun, 1 Sep 2013 04:27:08 +0000 (21:27 -0700)
author Raymond Hettinger <python@rcn.com>
Sun, 1 Sep 2013 04:27:08 +0000 (21:27 -0700)
committer Raymond Hettinger <python@rcn.com>
Sun, 1 Sep 2013 04:27:08 +0000 (21:27 -0700)
diff --git a/Objects/setobject.c b/Objects/setobject.c

index 98969f5c81cc3c4f7ffaf6c12388235949b33d01..51a1653c32b2c3a8852bac8aaffd85cdc5d152b4 100644 (file)
--- a/Objects/setobject.c
+++ b/Objects/setobject.c
@@ -65,10 +65,11 @@ chaining would be substantial (100% with typical malloc overhead).
  The initial probe index is computed as hash mod the table size. Subsequent
  probe indices are computed as explained in Objects/dictobject.c.
  
-To improve cache locality, each probe is done in pairs.
-After the probe is examined, an adjacent entry is then examined as well.
-The likelihood is that an adjacent entry is in the same cache line and
-can be examined more cheaply than another probe elsewhere in memory.
+To improve cache locality, each probe inspects nearby entries before
+moving on to probes elsewhere in memory.  Depending on alignment and the
+size of a cache line, the nearby entries are cheaper to inspect than
+other probes elsewhere in memory.  This probe strategy reduces the cost
+of hash collisions.
  
  All arithmetic on hash should ignore overflow.
  
@@ -130,6 +131,26 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash)
          if (entry->key == dummy && freeslot == NULL)
              freeslot = entry;
  
+        entry = &table[j ^ 2];
+        if (entry->key == NULL)
+            break;
+        if (entry->key == key)
+            return entry;
+        if (entry->hash == hash && entry->key != dummy) {
+            PyObject *startkey = entry->key;
+            Py_INCREF(startkey);
+            cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
+            Py_DECREF(startkey);
+            if (cmp < 0)
+                return NULL;
+            if (table != so->table || entry->key != startkey)
+                return set_lookkey(so, key, hash);
+            if (cmp > 0)
+                return entry;
+        }
+        if (entry->key == dummy && freeslot == NULL)
+            freeslot = entry;
+
          i = i * 5 + perturb + 1;
          j = i & mask;
          perturb >>= PERTURB_SHIFT;
@@ -190,6 +211,17 @@ set_lookkey_unicode(PySetObject *so, PyObject *key, Py_hash_t hash)
          if (entry->key == dummy && freeslot == NULL)
              freeslot = entry;
  
+        entry = &table[j ^ 2];
+        if (entry->key == NULL)
+            break;
+        if (entry->key == key
+            || (entry->hash == hash
+            && entry->key != dummy
+            && unicode_eq(entry->key, key)))
+            return entry;
+        if (entry->key == dummy && freeslot == NULL)
+            freeslot = entry;
+
          i = i * 5 + perturb + 1;
          j = i & mask;
          perturb >>= PERTURB_SHIFT;
@@ -256,6 +288,9 @@ set_insert_clean(PySetObject *so, PyObject *key, Py_hash_t hash)
          if (entry->key == NULL)
              break;
          entry = &table[j ^ 1];
+        if (entry->key == NULL)
+            break;
+        entry = &table[j ^ 2];
          if (entry->key == NULL)
              break;
          i = i * 5 + perturb + 1;
author	Raymond Hettinger <python@rcn.com>
	Sun, 1 Sep 2013 04:27:08 +0000 (21:27 -0700)
committer	Raymond Hettinger <python@rcn.com>
	Sun, 1 Sep 2013 04:27:08 +0000 (21:27 -0700)