Improve coding of gistchoose and gistRelocateBuildBuffersOnSplit.

author Tom Lane <tgl@sss.pgh.pa.us>

Fri, 31 Aug 2012 02:52:43 +0000 (22:52 -0400)

committer Tom Lane <tgl@sss.pgh.pa.us>

Fri, 31 Aug 2012 02:53:17 +0000 (22:53 -0400)
author Tom Lane <tgl@sss.pgh.pa.us>
Fri, 31 Aug 2012 02:52:43 +0000 (22:52 -0400)
committer Tom Lane <tgl@sss.pgh.pa.us>
Fri, 31 Aug 2012 02:53:17 +0000 (22:53 -0400)
diff --git a/src/backend/access/gist/gistbuildbuffers.c b/src/backend/access/gist/gistbuildbuffers.c

index d1e246adad3ecac4adf4ebf4584f06cf0c09601b..fc999767fdd038fb46cef382cf0b011f855f413c 100644 (file)
--- a/src/backend/access/gist/gistbuildbuffers.c
+++ b/src/backend/access/gist/gistbuildbuffers.c
@@ -625,18 +625,17 @@ gistRelocateBuildBuffersOnSplit(GISTBuildBuffers *gfbb, GISTSTATE *giststate,
         }
  
         /*
-        * Loop through all index tuples on the buffer on the page being split,
-        * moving them to buffers on the new pages.  We try to move each tuple
+        * Loop through all index tuples in the buffer of the page being split,
+        * moving them to buffers for the new pages.  We try to move each tuple to
          * the page that will result in the lowest penalty for the leading column
          * or, in the case of a tie, the lowest penalty for the earliest column
          * that is not tied.
          *
-        * The guts of this loop are very similar to gistchoose().
+        * The page searching logic is very similar to gistchoose().
          */
         while (gistPopItupFromNodeBuffer(gfbb, &oldBuf, &itup))
         {
-               float           sum_grow,
-                                       which_grow[INDEX_MAX_KEYS];
+               float           best_penalty[INDEX_MAX_KEYS];
                 int                     i,
                                         which;
                 IndexTuple      newtup;
@@ -645,71 +644,88 @@ gistRelocateBuildBuffersOnSplit(GISTBuildBuffers *gfbb, GISTSTATE *giststate,
                 gistDeCompressAtt(giststate, r,
                                                   itup, NULL, (OffsetNumber) 0, entry, isnull);
  
-               which = -1;
-               *which_grow = -1.0f;
+               /* default to using first page (shouldn't matter) */
+               which = 0;
  
                 /*
-                * Loop over possible target pages.  We'll exit early if we find an index key that
-                * can accommodate the new key with no penalty on any column.  sum_grow is used to
-                * track this condition.  It doesn't need to be exactly accurate, just >0 whenever
-                * we want the loop to continue and equal to 0 when we want it to terminate.
+                * best_penalty[j] is the best penalty we have seen so far for column
+                * j, or -1 when we haven't yet examined column j.  Array entries to
+                * the right of the first -1 are undefined.
                  */
-               sum_grow = 1.0f;
+               best_penalty[0] = -1;
  
-               for (i = 0; i < splitPagesCount && sum_grow; i++)
+               /*
+                * Loop over possible target pages, looking for one to move this tuple
+                * to.
+                */
+               for (i = 0; i < splitPagesCount; i++)
                 {
-                       int                     j;
                         RelocationBufferInfo *splitPageInfo = &relocationBuffersInfos[i];
+                       bool            zero_penalty;
+                       int                     j;
  
-                       sum_grow = 0.0f;
+                       zero_penalty = true;
  
                         /* Loop over index attributes. */
                         for (j = 0; j < r->rd_att->natts; j++)
                         {
                                 float           usize;
  
+                               /* Compute penalty for this column. */
                                 usize = gistpenalty(giststate, j,
                                                                         &splitPageInfo->entry[j],
                                                                         splitPageInfo->isnull[j],
                                                                         &entry[j], isnull[j]);
+                               if (usize > 0)
+                                       zero_penalty = false;
  
-                               if (which_grow[j] < 0 || usize < which_grow[j])
+                               if (best_penalty[j] < 0 || usize < best_penalty[j])
                                 {
                                         /*
-                                        * We get here in two cases.  First, we may have just discovered that the
-                                        * current tuple is the best one we've seen so far; that is, for the first
-                                        * column for which the penalty is not equal to the best tuple seen so far,
-                                        * this one has a lower penalty than the previously-seen one.  But, when
-                                        * a new best tuple is found, we must record the best penalty value for
-                                        * all the remaining columns.  We'll end up here for each remaining index
-                                        * column in that case, too.
+                                        * New best penalty for column.  Tentatively select this
+                                        * page as the target, and record the best penalty.  Then
+                                        * reset the next column's penalty to "unknown" (and
+                                        * indirectly, the same for all the ones to its right).
+                                        * This will force us to adopt this page's penalty values
+                                        * as the best for all the remaining columns during
+                                        * subsequent loop iterations.
                                          */
                                         which = i;
-                                       which_grow[j] = usize;
+                                       best_penalty[j] = usize;
+
                                         if (j < r->rd_att->natts - 1)
-                                               which_grow[j + 1] = -1;
-                                       sum_grow += which_grow[j];
+                                               best_penalty[j + 1] = -1;
                                 }
-                               else if (which_grow[j] == usize)
+                               else if (best_penalty[j] == usize)
                                 {
                                         /*
-                                        * The current tuple is exactly as good for this column as the best tuple
-                                        * seen so far.  The next iteration of this loop will compare the next
-                                        * column.
+                                        * The current page is exactly as good for this column as
+                                        * the best page seen so far.  The next iteration of this
+                                        * loop will compare the next column.
                                          */
-                                       sum_grow += usize;
                                 }
                                 else
                                 {
                                         /*
-                                        * The current tuple is worse for this column than the best tuple seen so
-                                        * far.  Skip the remaining columns and move on to the next tuple, if any.
+                                        * The current page is worse for this column than the best
+                                        * page seen so far.  Skip the remaining columns and move
+                                        * on to the next page, if any.
                                          */
-                                       sum_grow = 1;
+                                       zero_penalty = false;           /* so outer loop won't exit */
                                         break;
                                 }
                         }
+
+                       /*
+                        * If we find a page with zero penalty for all columns, there's no
+                        * need to examine remaining pages; just break out of the loop and
+                        * return it.
+                        */
+                       if (zero_penalty)
+                               break;
                 }
+
+               /* OK, "which" is the page index to push the tuple to */
                 targetBufferInfo = &relocationBuffersInfos[which];
  
                 /* Push item to selected node buffer */
diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c

index 7c6ce8495caac1e9d7c99afdb513689de93beea5..efb650ec5c33b805f57bf4865c5b27969ad20907 100644 (file)
--- a/src/backend/access/gist/gistutil.c
+++ b/src/backend/access/gist/gistutil.c
@@ -363,113 +363,120 @@ gistgetadjusted(Relation r, IndexTuple oldtup, IndexTuple addtup, GISTSTATE *gis
  }
  
  /*
- * Search a page for the entry with lowest penalty.
+ * Search an upper index page for the entry with lowest penalty for insertion
+ * of the new index key contained in "it".
   *
- * The index may have multiple columns, and there's a penalty value for each column.
- * The penalty associated with a column which appears earlier in the index definition is
- * strictly more important than the penalty of column which appears later in the index
- * definition.
+ * Returns the index of the page entry to insert into.
   */
  OffsetNumber
  gistchoose(Relation r, Page p, IndexTuple it,  /* it has compressed entry */
                    GISTSTATE *giststate)
  {
+       OffsetNumber result;
         OffsetNumber maxoff;
         OffsetNumber i;
-       OffsetNumber which;
-       float           sum_grow,
-                               which_grow[INDEX_MAX_KEYS];
+       float           best_penalty[INDEX_MAX_KEYS];
         GISTENTRY       entry,
                                 identry[INDEX_MAX_KEYS];
         bool            isnull[INDEX_MAX_KEYS];
  
-       maxoff = PageGetMaxOffsetNumber(p);
-       *which_grow = -1.0;
-       which = InvalidOffsetNumber;
-       sum_grow = 1;
+       Assert(!GistPageIsLeaf(p));
+
         gistDeCompressAtt(giststate, r,
                                           it, NULL, (OffsetNumber) 0,
                                           identry, isnull);
  
-       Assert(maxoff >= FirstOffsetNumber);
-       Assert(!GistPageIsLeaf(p));
+       /* we'll return FirstOffsetNumber if page is empty (shouldn't happen) */
+       result = FirstOffsetNumber;
  
         /*
-        * Loop over tuples on page.
+        * The index may have multiple columns, and there's a penalty value for
+        * each column.  The penalty associated with a column that appears earlier
+        * in the index definition is strictly more important than the penalty of
+        * a column that appears later in the index definition.
          *
-        * We'll exit early if we find an index key that can accommodate the new key with no
-        * penalty on any column.  sum_grow is used to track this condition.  Normally, it is the
-        * sum of the penalties we've seen for this column so far, which is not a very useful
-        * quantity in general because the penalties for each column are only considered
-        * independently, but all we really care about is whether or not it's greater than zero.
-        * Since penalties can't be negative, the sum of the penalties will be greater than
-        * zero if and only if at least one penalty was greater than zero.  To make things just
-        * a bit more complicated, we arbitrarily set sum_grow to 1.0 whenever we want to force
-        * the at least one more iteration of this outer loop.  Any non-zero value would serve
-        * just as well.
+        * best_penalty[j] is the best penalty we have seen so far for column j,
+        * or -1 when we haven't yet examined column j.  Array entries to the
+        * right of the first -1 are undefined.
          */
-       for (i = FirstOffsetNumber; i <= maxoff && sum_grow; i = OffsetNumberNext(i))
+       best_penalty[0] = -1;
+
+       /*
+        * Loop over tuples on page.
+        */
+       maxoff = PageGetMaxOffsetNumber(p);
+       Assert(maxoff >= FirstOffsetNumber);
+
+       for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
         {
-               int                     j;
                 IndexTuple      itup = (IndexTuple) PageGetItem(p, PageGetItemId(p, i));
+               bool            zero_penalty;
+               int                     j;
  
-               sum_grow = 0;
+               zero_penalty = true;
  
-               /* Loop over indexed attribtues. */
+               /* Loop over index attributes. */
                 for (j = 0; j < r->rd_att->natts; j++)
                 {
                         Datum           datum;
                         float           usize;
                         bool            IsNull;
  
+                       /* Compute penalty for this column. */
                         datum = index_getattr(itup, j + 1, giststate->tupdesc, &IsNull);
                         gistdentryinit(giststate, j, &entry, datum, r, p, i,
                                                    FALSE, IsNull);
                         usize = gistpenalty(giststate, j, &entry, IsNull,
                                                                 &identry[j], isnull[j]);
+                       if (usize > 0)
+                               zero_penalty = false;
  
-                       if (which_grow[j] < 0 || usize < which_grow[j])
+                       if (best_penalty[j] < 0 || usize < best_penalty[j])
                         {
                                 /*
-                                * We get here in two cases.  First, we may have just discovered that the
-                                * current tuple is the best one we've seen so far; that is, for the first
-                                * column for which the penalty is not equal to the best tuple seen so far,
-                                * this one has a lower penalty than the previously-seen one.  But, when
-                                * a new best tuple is found, we must record the best penalty value for
-                                * all the remaining columns.  We'll end up here for each remaining index
-                                * column in that case, too.
+                                * New best penalty for column.  Tentatively select this tuple
+                                * as the target, and record the best penalty.  Then reset the
+                                * next column's penalty to "unknown" (and indirectly, the
+                                * same for all the ones to its right).  This will force us to
+                                * adopt this tuple's penalty values as the best for all the
+                                * remaining columns during subsequent loop iterations.
                                  */
-                               which = i;
-                               which_grow[j] = usize;
+                               result = i;
+                               best_penalty[j] = usize;
+
                                 if (j < r->rd_att->natts - 1)
-                                       which_grow[j + 1] = -1;
-                               sum_grow += which_grow[j];
+                                       best_penalty[j + 1] = -1;
                         }
-                       else if (which_grow[j] == usize)
+                       else if (best_penalty[j] == usize)
                         {
                                 /*
-                                * The current tuple is exactly as good for this column as the best tuple
-                                * seen so far.  The next iteration of this loop will compare the next
-                                * column.
+                                * The current tuple is exactly as good for this column as the
+                                * best tuple seen so far.      The next iteration of this loop
+                                * will compare the next column.
                                  */
-                               sum_grow += usize;
                         }
                         else
                         {
                                 /*
-                                * The current tuple is worse for this column than the best tuple seen so
-                                * far.  Skip the remaining columns and move on to the next tuple, if any.
+                                * The current tuple is worse for this column than the best
+                                * tuple seen so far.  Skip the remaining columns and move on
+                                * to the next tuple, if any.
                                  */
-                               sum_grow = 1;
+                               zero_penalty = false;   /* so outer loop won't exit */
                                 break;
                         }
                 }
-       }
  
-       if (which == InvalidOffsetNumber)
-               which = FirstOffsetNumber;
+               /*
+                * If we find a tuple with zero penalty for all columns, there's no
+                * need to examine remaining tuples; just break out of the loop and
+                * return it.
+                */
+               if (zero_penalty)
+                       break;
+       }
  
-       return which;
+       return result;
  }
  
  /*
author	Tom Lane <tgl@sss.pgh.pa.us>
	Fri, 31 Aug 2012 02:52:43 +0000 (22:52 -0400)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Fri, 31 Aug 2012 02:53:17 +0000 (22:53 -0400)
src/backend/access/gist/gistbuildbuffers.c		patch \| blob \| history
src/backend/access/gist/gistutil.c		patch \| blob \| history