GISTENTRY entry,
identry[INDEX_MAX_KEYS];
bool isnull[INDEX_MAX_KEYS];
+ int keep_current_best;
Assert(!GistPageIsLeaf(p));
*/
best_penalty[0] = -1;
+ /*
+ * If we find a tuple that's exactly as good as the currently best one, we
+ * could use either one. When inserting a lot of tuples with the same or
+ * similar keys, it's preferable to descend down the same path when
+ * possible, as that's more cache-friendly. On the other hand, if all
+ * inserts land on the same leaf page after a split, we're never going to
+ * insert anything to the other half of the split, and will end up using
+ * only 50% of the available space. Distributing the inserts evenly would
+ * lead to better space usage, but that hurts cache-locality during
+ * insertion. To get the best of both worlds, when we find a tuple that's
+ * exactly as good as the previous best, choose randomly whether to stick
+ * to the old best, or use the new one. Once we decide to stick to the
+ * old best, we keep sticking to it for any subsequent equally good tuples
+ * we might find. This favors tuples with low offsets, but still allows
+ * some inserts to go to other equally-good subtrees.
+ *
+ * keep_current_best is -1 if we haven't yet had to make a random choice
+ * whether to keep the current best tuple. If we have done so, and
+ * decided to keep it, keep_current_best is 1; if we've decided to
+ * replace, keep_current_best is 0. (This state will be reset to -1 as
+ * soon as we've made the replacement, but sometimes we make the choice in
+ * advance of actually finding a replacement best tuple.)
+ */
+ keep_current_best = -1;
+
/*
* Loop over tuples on page.
*/
if (j < r->rd_att->natts - 1)
best_penalty[j + 1] = -1;
+
+ /* we have new best, so reset keep-it decision */
+ keep_current_best = -1;
}
else if (best_penalty[j] == usize)
{
}
/*
- * If we find a tuple with zero penalty for all columns, there's no
- * need to examine remaining tuples; just break out of the loop and
- * return it.
+ * If we looped past the last column, and did not update "result",
+ * then this tuple is exactly as good as the prior best tuple.
+ */
+ if (j == r->rd_att->natts && result != i)
+ {
+ if (keep_current_best == -1)
+ {
+ /* we didn't make the random choice yet for this old best */
+ keep_current_best = (random() <= (MAX_RANDOM_VALUE / 2)) ? 1 : 0;
+ }
+ if (keep_current_best == 0)
+ {
+ /* we choose to use the new tuple */
+ result = i;
+ /* choose again if there are even more exactly-as-good ones */
+ keep_current_best = -1;
+ }
+ }
+
+ /*
+ * If we find a tuple with zero penalty for all columns, and we've
+ * decided we don't want to search for another tuple with equal
+ * penalty, there's no need to examine remaining tuples; just break
+ * out of the loop and return it.
*/
if (zero_penalty)
- break;
+ {
+ if (keep_current_best == -1)
+ {
+ /* we didn't make the random choice yet for this old best */
+ keep_current_best = (random() <= (MAX_RANDOM_VALUE / 2)) ? 1 : 0;
+ }
+ if (keep_current_best == 1)
+ break;
+ }
}
return result;