Fix logic bug in gistchoose and gistRelocateBuildBuffersOnSplit.

author Robert Haas <rhaas@postgresql.org>

Thu, 30 Aug 2012 17:05:45 +0000 (13:05 -0400)

committer Robert Haas <rhaas@postgresql.org>

Thu, 30 Aug 2012 17:09:07 +0000 (13:09 -0400)
author Robert Haas <rhaas@postgresql.org>
Thu, 30 Aug 2012 17:05:45 +0000 (13:05 -0400)
committer Robert Haas <rhaas@postgresql.org>
Thu, 30 Aug 2012 17:09:07 +0000 (13:09 -0400)
diff --git a/src/backend/access/gist/gistbuildbuffers.c b/src/backend/access/gist/gistbuildbuffers.c

index 39aec856f9270a4751ef3e97ee469e5c14efbf5f..d1e246adad3ecac4adf4ebf4584f06cf0c09601b 100644 (file)
--- a/src/backend/access/gist/gistbuildbuffers.c
+++ b/src/backend/access/gist/gistbuildbuffers.c
@@ -625,8 +625,13 @@ gistRelocateBuildBuffersOnSplit(GISTBuildBuffers *gfbb, GISTSTATE *giststate,
         }
  
         /*
-        * Loop through all index tuples on the buffer on the splitted page,
-        * moving them to buffers on the new pages.
+        * Loop through all index tuples on the buffer on the page being split,
+        * moving them to buffers on the new pages.  We try to move each tuple
+        * the page that will result in the lowest penalty for the leading column
+        * or, in the case of a tie, the lowest penalty for the earliest column
+        * that is not tied.
+        *
+        * The guts of this loop are very similar to gistchoose().
          */
         while (gistPopItupFromNodeBuffer(gfbb, &oldBuf, &itup))
         {
@@ -637,14 +642,18 @@ gistRelocateBuildBuffersOnSplit(GISTBuildBuffers *gfbb, GISTSTATE *giststate,
                 IndexTuple      newtup;
                 RelocationBufferInfo *targetBufferInfo;
  
-               /*
-                * Choose which page this tuple should go to.
-                */
                 gistDeCompressAtt(giststate, r,
                                                   itup, NULL, (OffsetNumber) 0, entry, isnull);
  
                 which = -1;
                 *which_grow = -1.0f;
+
+               /*
+                * Loop over possible target pages.  We'll exit early if we find an index key that
+                * can accommodate the new key with no penalty on any column.  sum_grow is used to
+                * track this condition.  It doesn't need to be exactly accurate, just >0 whenever
+                * we want the loop to continue and equal to 0 when we want it to terminate.
+                */
                 sum_grow = 1.0f;
  
                 for (i = 0; i < splitPagesCount && sum_grow; i++)
@@ -653,6 +662,8 @@ gistRelocateBuildBuffersOnSplit(GISTBuildBuffers *gfbb, GISTSTATE *giststate,
                         RelocationBufferInfo *splitPageInfo = &relocationBuffersInfos[i];
  
                         sum_grow = 0.0f;
+
+                       /* Loop over index attributes. */
                         for (j = 0; j < r->rd_att->natts; j++)
                         {
                                 float           usize;
@@ -664,16 +675,36 @@ gistRelocateBuildBuffersOnSplit(GISTBuildBuffers *gfbb, GISTSTATE *giststate,
  
                                 if (which_grow[j] < 0 || usize < which_grow[j])
                                 {
+                                       /*
+                                        * We get here in two cases.  First, we may have just discovered that the
+                                        * current tuple is the best one we've seen so far; that is, for the first
+                                        * column for which the penalty is not equal to the best tuple seen so far,
+                                        * this one has a lower penalty than the previously-seen one.  But, when
+                                        * a new best tuple is found, we must record the best penalty value for
+                                        * all the remaining columns.  We'll end up here for each remaining index
+                                        * column in that case, too.
+                                        */
                                         which = i;
                                         which_grow[j] = usize;
-                                       if (j < r->rd_att->natts - 1 && i == 0)
+                                       if (j < r->rd_att->natts - 1)
                                                 which_grow[j + 1] = -1;
                                         sum_grow += which_grow[j];
                                 }
                                 else if (which_grow[j] == usize)
+                               {
+                                       /*
+                                        * The current tuple is exactly as good for this column as the best tuple
+                                        * seen so far.  The next iteration of this loop will compare the next
+                                        * column.
+                                        */
                                         sum_grow += usize;
+                               }
                                 else
                                 {
+                                       /*
+                                        * The current tuple is worse for this column than the best tuple seen so
+                                        * far.  Skip the remaining columns and move on to the next tuple, if any.
+                                        */
                                         sum_grow = 1;
                                         break;
                                 }
diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c

index df1e2e396f268e4b09e898c0b614a1d0cb3e7dfa..7c6ce8495caac1e9d7c99afdb513689de93beea5 100644 (file)
--- a/src/backend/access/gist/gistutil.c
+++ b/src/backend/access/gist/gistutil.c
@@ -363,7 +363,12 @@ gistgetadjusted(Relation r, IndexTuple oldtup, IndexTuple addtup, GISTSTATE *gis
  }
  
  /*
- * find entry with lowest penalty
+ * Search a page for the entry with lowest penalty.
+ *
+ * The index may have multiple columns, and there's a penalty value for each column.
+ * The penalty associated with a column which appears earlier in the index definition is
+ * strictly more important than the penalty of column which appears later in the index
+ * definition.
   */
  OffsetNumber
  gistchoose(Relation r, Page p, IndexTuple it,  /* it has compressed entry */
@@ -389,12 +394,28 @@ gistchoose(Relation r, Page p, IndexTuple it,     /* it has compressed entry */
         Assert(maxoff >= FirstOffsetNumber);
         Assert(!GistPageIsLeaf(p));
  
+       /*
+        * Loop over tuples on page.
+        *
+        * We'll exit early if we find an index key that can accommodate the new key with no
+        * penalty on any column.  sum_grow is used to track this condition.  Normally, it is the
+        * sum of the penalties we've seen for this column so far, which is not a very useful
+        * quantity in general because the penalties for each column are only considered
+        * independently, but all we really care about is whether or not it's greater than zero.
+        * Since penalties can't be negative, the sum of the penalties will be greater than
+        * zero if and only if at least one penalty was greater than zero.  To make things just
+        * a bit more complicated, we arbitrarily set sum_grow to 1.0 whenever we want to force
+        * the at least one more iteration of this outer loop.  Any non-zero value would serve
+        * just as well.
+        */
         for (i = FirstOffsetNumber; i <= maxoff && sum_grow; i = OffsetNumberNext(i))
         {
                 int                     j;
                 IndexTuple      itup = (IndexTuple) PageGetItem(p, PageGetItemId(p, i));
  
                 sum_grow = 0;
+
+               /* Loop over indexed attribtues. */
                 for (j = 0; j < r->rd_att->natts; j++)
                 {
                         Datum           datum;
@@ -409,16 +430,36 @@ gistchoose(Relation r, Page p, IndexTuple it,     /* it has compressed entry */
  
                         if (which_grow[j] < 0 || usize < which_grow[j])
                         {
+                               /*
+                                * We get here in two cases.  First, we may have just discovered that the
+                                * current tuple is the best one we've seen so far; that is, for the first
+                                * column for which the penalty is not equal to the best tuple seen so far,
+                                * this one has a lower penalty than the previously-seen one.  But, when
+                                * a new best tuple is found, we must record the best penalty value for
+                                * all the remaining columns.  We'll end up here for each remaining index
+                                * column in that case, too.
+                                */
                                 which = i;
                                 which_grow[j] = usize;
-                               if (j < r->rd_att->natts - 1 && i == FirstOffsetNumber)
+                               if (j < r->rd_att->natts - 1)
                                         which_grow[j + 1] = -1;
                                 sum_grow += which_grow[j];
                         }
                         else if (which_grow[j] == usize)
+                       {
+                               /*
+                                * The current tuple is exactly as good for this column as the best tuple
+                                * seen so far.  The next iteration of this loop will compare the next
+                                * column.
+                                */
                                 sum_grow += usize;
+                       }
                         else
                         {
+                               /*
+                                * The current tuple is worse for this column than the best tuple seen so
+                                * far.  Skip the remaining columns and move on to the next tuple, if any.
+                                */
                                 sum_grow = 1;
                                 break;
                         }
author	Robert Haas <rhaas@postgresql.org>
	Thu, 30 Aug 2012 17:05:45 +0000 (13:05 -0400)
committer	Robert Haas <rhaas@postgresql.org>
	Thu, 30 Aug 2012 17:09:07 +0000 (13:09 -0400)
src/backend/access/gist/gistbuildbuffers.c		patch \| blob \| history
src/backend/access/gist/gistutil.c		patch \| blob \| history