From: Robert Haas Date: Thu, 30 Aug 2012 17:05:45 +0000 (-0400) Subject: Fix logic bug in gistchoose and gistRelocateBuildBuffersOnSplit. X-Git-Tag: REL9_3_BETA1~991 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=c8ba697a4bdb934f0c51424c654e8db6133ea255;p=postgresql Fix logic bug in gistchoose and gistRelocateBuildBuffersOnSplit. Every time the best-tuple-found-so-far changes, we need to reset all the penalty values in which_grow[] to the penalties for the new best tuple. The old code failed to do this, resulting in inferior index quality. The original patch from Alexander Korotkov was just two lines; I took the liberty of fleshing that out by adding a bunch of comments that I hope will make this logic easier for others to understand than it was for me. --- diff --git a/src/backend/access/gist/gistbuildbuffers.c b/src/backend/access/gist/gistbuildbuffers.c index 39aec856f9..d1e246adad 100644 --- a/src/backend/access/gist/gistbuildbuffers.c +++ b/src/backend/access/gist/gistbuildbuffers.c @@ -625,8 +625,13 @@ gistRelocateBuildBuffersOnSplit(GISTBuildBuffers *gfbb, GISTSTATE *giststate, } /* - * Loop through all index tuples on the buffer on the splitted page, - * moving them to buffers on the new pages. + * Loop through all index tuples on the buffer on the page being split, + * moving them to buffers on the new pages. We try to move each tuple + * the page that will result in the lowest penalty for the leading column + * or, in the case of a tie, the lowest penalty for the earliest column + * that is not tied. + * + * The guts of this loop are very similar to gistchoose(). */ while (gistPopItupFromNodeBuffer(gfbb, &oldBuf, &itup)) { @@ -637,14 +642,18 @@ gistRelocateBuildBuffersOnSplit(GISTBuildBuffers *gfbb, GISTSTATE *giststate, IndexTuple newtup; RelocationBufferInfo *targetBufferInfo; - /* - * Choose which page this tuple should go to. - */ gistDeCompressAtt(giststate, r, itup, NULL, (OffsetNumber) 0, entry, isnull); which = -1; *which_grow = -1.0f; + + /* + * Loop over possible target pages. We'll exit early if we find an index key that + * can accommodate the new key with no penalty on any column. sum_grow is used to + * track this condition. It doesn't need to be exactly accurate, just >0 whenever + * we want the loop to continue and equal to 0 when we want it to terminate. + */ sum_grow = 1.0f; for (i = 0; i < splitPagesCount && sum_grow; i++) @@ -653,6 +662,8 @@ gistRelocateBuildBuffersOnSplit(GISTBuildBuffers *gfbb, GISTSTATE *giststate, RelocationBufferInfo *splitPageInfo = &relocationBuffersInfos[i]; sum_grow = 0.0f; + + /* Loop over index attributes. */ for (j = 0; j < r->rd_att->natts; j++) { float usize; @@ -664,16 +675,36 @@ gistRelocateBuildBuffersOnSplit(GISTBuildBuffers *gfbb, GISTSTATE *giststate, if (which_grow[j] < 0 || usize < which_grow[j]) { + /* + * We get here in two cases. First, we may have just discovered that the + * current tuple is the best one we've seen so far; that is, for the first + * column for which the penalty is not equal to the best tuple seen so far, + * this one has a lower penalty than the previously-seen one. But, when + * a new best tuple is found, we must record the best penalty value for + * all the remaining columns. We'll end up here for each remaining index + * column in that case, too. + */ which = i; which_grow[j] = usize; - if (j < r->rd_att->natts - 1 && i == 0) + if (j < r->rd_att->natts - 1) which_grow[j + 1] = -1; sum_grow += which_grow[j]; } else if (which_grow[j] == usize) + { + /* + * The current tuple is exactly as good for this column as the best tuple + * seen so far. The next iteration of this loop will compare the next + * column. + */ sum_grow += usize; + } else { + /* + * The current tuple is worse for this column than the best tuple seen so + * far. Skip the remaining columns and move on to the next tuple, if any. + */ sum_grow = 1; break; } diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c index df1e2e396f..7c6ce8495c 100644 --- a/src/backend/access/gist/gistutil.c +++ b/src/backend/access/gist/gistutil.c @@ -363,7 +363,12 @@ gistgetadjusted(Relation r, IndexTuple oldtup, IndexTuple addtup, GISTSTATE *gis } /* - * find entry with lowest penalty + * Search a page for the entry with lowest penalty. + * + * The index may have multiple columns, and there's a penalty value for each column. + * The penalty associated with a column which appears earlier in the index definition is + * strictly more important than the penalty of column which appears later in the index + * definition. */ OffsetNumber gistchoose(Relation r, Page p, IndexTuple it, /* it has compressed entry */ @@ -389,12 +394,28 @@ gistchoose(Relation r, Page p, IndexTuple it, /* it has compressed entry */ Assert(maxoff >= FirstOffsetNumber); Assert(!GistPageIsLeaf(p)); + /* + * Loop over tuples on page. + * + * We'll exit early if we find an index key that can accommodate the new key with no + * penalty on any column. sum_grow is used to track this condition. Normally, it is the + * sum of the penalties we've seen for this column so far, which is not a very useful + * quantity in general because the penalties for each column are only considered + * independently, but all we really care about is whether or not it's greater than zero. + * Since penalties can't be negative, the sum of the penalties will be greater than + * zero if and only if at least one penalty was greater than zero. To make things just + * a bit more complicated, we arbitrarily set sum_grow to 1.0 whenever we want to force + * the at least one more iteration of this outer loop. Any non-zero value would serve + * just as well. + */ for (i = FirstOffsetNumber; i <= maxoff && sum_grow; i = OffsetNumberNext(i)) { int j; IndexTuple itup = (IndexTuple) PageGetItem(p, PageGetItemId(p, i)); sum_grow = 0; + + /* Loop over indexed attribtues. */ for (j = 0; j < r->rd_att->natts; j++) { Datum datum; @@ -409,16 +430,36 @@ gistchoose(Relation r, Page p, IndexTuple it, /* it has compressed entry */ if (which_grow[j] < 0 || usize < which_grow[j]) { + /* + * We get here in two cases. First, we may have just discovered that the + * current tuple is the best one we've seen so far; that is, for the first + * column for which the penalty is not equal to the best tuple seen so far, + * this one has a lower penalty than the previously-seen one. But, when + * a new best tuple is found, we must record the best penalty value for + * all the remaining columns. We'll end up here for each remaining index + * column in that case, too. + */ which = i; which_grow[j] = usize; - if (j < r->rd_att->natts - 1 && i == FirstOffsetNumber) + if (j < r->rd_att->natts - 1) which_grow[j + 1] = -1; sum_grow += which_grow[j]; } else if (which_grow[j] == usize) + { + /* + * The current tuple is exactly as good for this column as the best tuple + * seen so far. The next iteration of this loop will compare the next + * column. + */ sum_grow += usize; + } else { + /* + * The current tuple is worse for this column than the best tuple seen so + * far. Skip the remaining columns and move on to the next tuple, if any. + */ sum_grow = 1; break; }