Back-patch recent fixes for gistchoose and gistRelocateBuildBuffersOnSplit.
authorTom Lane <tgl@sss.pgh.pa.us>
Fri, 31 Aug 2012 03:48:01 +0000 (23:48 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Fri, 31 Aug 2012 03:48:01 +0000 (23:48 -0400)
This back-ports commits c8ba697a4bdb934f0c51424c654e8db6133ea255 and
e5db11c5582b469c04a11f217a0f32c827da5dd7, which fix one definite and one
speculative bug in gistchoose, and make the code a lot more intelligible as
well.  In 9.2 only, this also affects the largely-copied-and-pasted logic
in gistRelocateBuildBuffersOnSplit.

The impact of the bugs was that the functions might make poor decisions
as to which index tree branch to push a new entry down into, resulting in
GiST index bloat and poor performance.  The fixes rectify these decisions
for future insertions, but a REINDEX would be needed to clean up any
existing index bloat.

Alexander Korotkov, Robert Haas, Tom Lane

src/backend/access/gist/gistutil.c

index 48c44157be07b4d50b8f064fd403f0343e3b4ab3..5ab1e9c7152136759be93ee6f96221ebe98e45cc 100644 (file)
@@ -372,36 +372,55 @@ gistgetadjusted(Relation r, IndexTuple oldtup, IndexTuple addtup, GISTSTATE *gis
 }
 
 /*
- * find entry with lowest penalty
+ * Search an upper index page for the entry with lowest penalty for insertion
+ * of the new index key contained in "it".
+ *
+ * Returns the index of the page entry to insert into.
  */
 OffsetNumber
 gistchoose(Relation r, Page p, IndexTuple it,  /* it has compressed entry */
                   GISTSTATE *giststate)
 {
+       OffsetNumber result;
        OffsetNumber maxoff;
        OffsetNumber i;
-       OffsetNumber which;
-       float           sum_grow,
-                               which_grow[INDEX_MAX_KEYS];
+       float           best_penalty[INDEX_MAX_KEYS];
        GISTENTRY       entry,
                                identry[INDEX_MAX_KEYS];
        bool            isnull[INDEX_MAX_KEYS];
 
-       maxoff = PageGetMaxOffsetNumber(p);
-       *which_grow = -1.0;
-       which = InvalidOffsetNumber;
-       sum_grow = 1;
+       Assert(!GistPageIsLeaf(p));
+
        gistDeCompressAtt(giststate, r,
                                          it, NULL, (OffsetNumber) 0,
                                          identry, isnull);
 
+       /* we'll return FirstOffsetNumber if page is empty (shouldn't happen) */
+       result = FirstOffsetNumber;
+
+       /*
+        * The index may have multiple columns, and there's a penalty value for
+        * each column.  The penalty associated with a column that appears earlier
+        * in the index definition is strictly more important than the penalty of
+        * a column that appears later in the index definition.
+        *
+        * best_penalty[j] is the best penalty we have seen so far for column j,
+        * or -1 when we haven't yet examined column j.  Array entries to the
+        * right of the first -1 are undefined.
+        */
+       best_penalty[0] = -1;
+
+       /*
+        * Loop over tuples on page.
+        */
+       maxoff = PageGetMaxOffsetNumber(p);
        Assert(maxoff >= FirstOffsetNumber);
-       Assert(!GistPageIsLeaf(p));
 
-       for (i = FirstOffsetNumber; i <= maxoff && sum_grow; i = OffsetNumberNext(i))
+       for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
        {
-               int                     j;
                IndexTuple      itup = (IndexTuple) PageGetItem(p, PageGetItemId(p, i));
+               bool            zero_penalty;
+               int                     j;
 
                if (!GistPageIsLeaf(p) && GistTupleIsInvalid(itup))
                {
@@ -411,41 +430,70 @@ gistchoose(Relation r, Page p, IndexTuple it,     /* it has compressed entry */
                        continue;
                }
 
-               sum_grow = 0;
+               zero_penalty = true;
+
+               /* Loop over index attributes. */
                for (j = 0; j < r->rd_att->natts; j++)
                {
                        Datum           datum;
                        float           usize;
                        bool            IsNull;
 
+                       /* Compute penalty for this column. */
                        datum = index_getattr(itup, j + 1, giststate->tupdesc, &IsNull);
                        gistdentryinit(giststate, j, &entry, datum, r, p, i,
                                                   FALSE, IsNull);
                        usize = gistpenalty(giststate, j, &entry, IsNull,
                                                                &identry[j], isnull[j]);
+                       if (usize > 0)
+                               zero_penalty = false;
 
-                       if (which_grow[j] < 0 || usize < which_grow[j])
+                       if (best_penalty[j] < 0 || usize < best_penalty[j])
+                       {
+                               /*
+                                * New best penalty for column.  Tentatively select this tuple
+                                * as the target, and record the best penalty.  Then reset the
+                                * next column's penalty to "unknown" (and indirectly, the
+                                * same for all the ones to its right).  This will force us to
+                                * adopt this tuple's penalty values as the best for all the
+                                * remaining columns during subsequent loop iterations.
+                                */
+                               result = i;
+                               best_penalty[j] = usize;
+
+                               if (j < r->rd_att->natts - 1)
+                                       best_penalty[j + 1] = -1;
+                       }
+                       else if (best_penalty[j] == usize)
                        {
-                               which = i;
-                               which_grow[j] = usize;
-                               if (j < r->rd_att->natts - 1 && i == FirstOffsetNumber)
-                                       which_grow[j + 1] = -1;
-                               sum_grow += which_grow[j];
+                               /*
+                                * The current tuple is exactly as good for this column as the
+                                * best tuple seen so far.      The next iteration of this loop
+                                * will compare the next column.
+                                */
                        }
-                       else if (which_grow[j] == usize)
-                               sum_grow += usize;
                        else
                        {
-                               sum_grow = 1;
+                               /*
+                                * The current tuple is worse for this column than the best
+                                * tuple seen so far.  Skip the remaining columns and move on
+                                * to the next tuple, if any.
+                                */
+                               zero_penalty = false;   /* so outer loop won't exit */
                                break;
                        }
                }
-       }
 
-       if (which == InvalidOffsetNumber)
-               which = FirstOffsetNumber;
+               /*
+                * If we find a tuple with zero penalty for all columns, there's no
+                * need to examine remaining tuples; just break out of the loop and
+                * return it.
+                */
+               if (zero_penalty)
+                       break;
+       }
 
-       return which;
+       return result;
 }
 
 /*