]> granicus.if.org Git - postgresql/commitdiff
Prevent growth of simplehash tables when they're "too empty".
authorAndres Freund <andres@anarazel.de>
Mon, 29 Jan 2018 19:02:09 +0000 (11:02 -0800)
committerAndres Freund <andres@anarazel.de>
Mon, 29 Jan 2018 19:24:57 +0000 (11:24 -0800)
In cases where simplehash tables where filled with either a lot of
conflicting hash-values, or values that hash to consecutive
values (i.e. build "chains") the growth heuristics in
d4c62a6b623d6eef88218158e9fa3cf974c6c7e5 could trigger rather
explosively.

To fix that, address some of the reasons (see previous commit) of why
the growth heuristics where needed, and only allow growth when the
table isn't too empty. While that means there's a few cases of bad
input that can be slower, that seems a lot better than running very
quickly out of memory.

Author: Tomas Vondra and Andres Freund, with additional input by
    Thomas Munro, Tom Lane Todd A. Cook
Reported-By: Todd A. Cook, Tomas Vondra, Thomas Munro
Discussion: https://postgr.es/m/20171127185700.1470.20362@wrigleys.postgresql.org
Backpatch: 10, where simplehash was introduced

src/include/lib/simplehash.h

index c5af5b96a73f2bbfa0af640c39f00098d0a0aaae..5273d494600ca2ffa3baae5987649995e0f4f604 100644 (file)
@@ -174,6 +174,10 @@ SH_SCOPE void SH_STAT(SH_TYPE * tb);
 #ifndef SH_GROW_MAX_MOVE
 #define SH_GROW_MAX_MOVE 150
 #endif
+#ifndef SH_GROW_MIN_FILLFACTOR
+/* but do not grow due to SH_GROW_MAX_* if below */
+#define SH_GROW_MIN_FILLFACTOR 0.1
+#endif
 
 #ifdef SH_STORE_HASH
 #define SH_COMPARE_KEYS(tb, ahash, akey, b) (ahash == SH_GET_HASH(tb, b) && SH_EQUAL(tb, b->SH_KEY, akey))
@@ -574,9 +578,12 @@ restart:
                                 * hashtables, grow the hashtable if collisions would require
                                 * us to move a lot of entries.  The most likely cause of such
                                 * imbalance is filling a (currently) small table, from a
-                                * currently big one, in hash-table order.
+                                * currently big one, in hash-table order.  Don't grow if the
+                                * hashtable would be too empty, to prevent quick space
+                                * explosion for some weird edge cases.
                                 */
-                               if (++emptydist > SH_GROW_MAX_MOVE)
+                               if (unlikely(++emptydist > SH_GROW_MAX_MOVE) &&
+                                       ((double) tb->members / tb->size) >= SH_GROW_MIN_FILLFACTOR)
                                {
                                        tb->grow_threshold = 0;
                                        goto restart;
@@ -621,9 +628,12 @@ restart:
                 * To avoid negative consequences from overly imbalanced hashtables,
                 * grow the hashtable if collisions lead to large runs. The most
                 * likely cause of such imbalance is filling a (currently) small
-                * table, from a currently big one, in hash-table order.
+                * table, from a currently big one, in hash-table order.  Don't grow
+                * if the hashtable would be too empty, to prevent quick space
+                * explosion for some weird edge cases.
                 */
-               if (insertdist > SH_GROW_MAX_DIB)
+               if (unlikely(insertdist > SH_GROW_MAX_DIB) &&
+                       ((double) tb->members / tb->size) >= SH_GROW_MIN_FILLFACTOR)
                {
                        tb->grow_threshold = 0;
                        goto restart;
@@ -923,6 +933,7 @@ SH_STAT(SH_TYPE * tb)
 #undef SH_MAX_FILLFACTOR
 #undef SH_GROW_MAX_DIB
 #undef SH_GROW_MAX_MOVE
+#undef SH_GROW_MIN_FILLFACTOR
 #undef SH_MAX_SIZE
 
 /* types */