Repair bugs in GiST page splitting code for multi-column indexes.
authorTom Lane <tgl@sss.pgh.pa.us>
Thu, 7 Feb 2013 22:44:21 +0000 (17:44 -0500)
committerTom Lane <tgl@sss.pgh.pa.us>
Thu, 7 Feb 2013 22:44:21 +0000 (17:44 -0500)
When considering a non-last column in a multi-column GiST index,
gistsplit.c tries to improve on the split chosen by the opclass-specific
pickSplit function by considering penalties for the next column.  However,
there were two bugs in this code: it failed to recompute the union keys for
the leftmost index columns, even though these might well change after
reassigning tuples; and it included the old union keys in the recomputation
for the columns it did recompute, so that those keys couldn't get smaller
even if they should.  The first problem could result in an invalid index
in which searches wouldn't find index entries that are in fact present;
the second would make the index less efficient to search.

Both of these errors were caused by misuse of gistMakeUnionItVec, whose
API was designed in a way that just begged such errors to be made.  There
is no situation in which it's safe or useful to compute the union keys for
a subset of the index columns, and there is no caller that wants any
previous union keys to be included in the computation; so the undocumented
choice to treat the union keys as in/out rather than pure output parameters
is a waste of code as well as being dangerous.

Hence, rather than just making a minimal patch, I've changed the API of
gistMakeUnionItVec to remove the "startkey" parameter (it now always
processes all index columns) and treat the attr/isnull arrays as purely
output parameters.

In passing, also get rid of a couple of unnecessary and dangerous uses
of static variables in gistutil.c.  It's remarkable that the one in
gistMakeUnionKey hasn't given us portability troubles before now, because
in addition to posing a re-entrancy hazard, it was unsafely assuming that
a static char[] array would have at least Datum alignment.

Per investigation of a trouble report from Tomas Vondra.  (There are also
some bugs in contrib/btree_gist to be fixed, but that seems like material
for a separate patch.)  Back-patch to all supported branches.

src/backend/access/gist/gistsplit.c
src/backend/access/gist/gistutil.c
src/include/access/gist_private.h

index 5700e530fe49733751c8c4bdd2f750e7f858f509..5d177b7a6b8caa7d2721d78b2998581c866d4682 100644 (file)
 
 typedef struct
 {
-       Datum      *attr;
-       int                     len;
        OffsetNumber *entries;
+       int                     len;
+       Datum      *attr;
        bool       *isnull;
        bool       *equiv;
 } GistSplitUnion;
 
 
 /*
- * Forms unions of subkeys after page split, but
- * uses only tuples aren't in groups of equalent tuples
+ * Form unions of subkeys after a page split, ignoring any tuples
+ * that are marked in gsvp->equiv[]
  */
 static void
 gistunionsubkeyvec(GISTSTATE *giststate, IndexTuple *itvec,
-                                  GistSplitUnion *gsvp, int startkey)
+                                  GistSplitUnion *gsvp)
 {
        IndexTuple *cleanedItVec;
        int                     i,
@@ -49,35 +49,37 @@ gistunionsubkeyvec(GISTSTATE *giststate, IndexTuple *itvec,
                cleanedItVec[cleanedLen++] = itvec[gsvp->entries[i] - 1];
        }
 
-       gistMakeUnionItVec(giststate, cleanedItVec, cleanedLen, startkey,
-                                          gsvp->attr, gsvp->isnull);
+       if (!gistMakeUnionItVec(giststate, cleanedItVec, cleanedLen,
+                                                       gsvp->attr, gsvp->isnull))
+               elog(ERROR, "invalid GiST tuple not expected");
 
        pfree(cleanedItVec);
 }
 
 /*
- * unions subkeys for after user picksplit over attno-1 column
+ * Recompute unions of subkeys after a page split, ignoring any tuples
+ * that are marked in spl->spl_equiv[]
  */
 static void
-gistunionsubkey(GISTSTATE *giststate, IndexTuple *itvec, GistSplitVector *spl, int attno)
+gistunionsubkey(GISTSTATE *giststate, IndexTuple *itvec, GistSplitVector *spl)
 {
        GistSplitUnion gsvp;
 
        gsvp.equiv = spl->spl_equiv;
 
-       gsvp.attr = spl->spl_lattr;
-       gsvp.len = spl->splitVector.spl_nleft;
        gsvp.entries = spl->splitVector.spl_left;
+       gsvp.len = spl->splitVector.spl_nleft;
+       gsvp.attr = spl->spl_lattr;
        gsvp.isnull = spl->spl_lisnull;
 
-       gistunionsubkeyvec(giststate, itvec, &gsvp, attno);
+       gistunionsubkeyvec(giststate, itvec, &gsvp);
 
-       gsvp.attr = spl->spl_rattr;
-       gsvp.len = spl->splitVector.spl_nright;
        gsvp.entries = spl->splitVector.spl_right;
+       gsvp.len = spl->splitVector.spl_nright;
+       gsvp.attr = spl->spl_rattr;
        gsvp.isnull = spl->spl_risnull;
 
-       gistunionsubkeyvec(giststate, itvec, &gsvp, attno);
+       gistunionsubkeyvec(giststate, itvec, &gsvp);
 }
 
 /*
@@ -440,14 +442,14 @@ gistUserPicksplit(Relation r, GistEntryVector *entryvec, int attno, GistSplitVec
                         */
                        if (LenEquiv == 0)
                        {
-                               gistunionsubkey(giststate, itup, v, attno + 1);
+                               gistunionsubkey(giststate, itup, v);
                        }
                        else
                        {
                                cleanupOffsets(sv->spl_left, &sv->spl_nleft, v->spl_equiv, &LenEquiv);
                                cleanupOffsets(sv->spl_right, &sv->spl_nright, v->spl_equiv, &LenEquiv);
 
-                               gistunionsubkey(giststate, itup, v, attno + 1);
+                               gistunionsubkey(giststate, itup, v);
                                if (LenEquiv == 1)
                                {
                                        /*
@@ -468,7 +470,7 @@ gistUserPicksplit(Relation r, GistEntryVector *entryvec, int attno, GistSplitVec
                                         * performance, because it's very rarely
                                         */
                                        v->spl_equiv = NULL;
-                                       gistunionsubkey(giststate, itup, v, attno + 1);
+                                       gistunionsubkey(giststate, itup, v);
 
                                        return false;
                                }
@@ -547,7 +549,7 @@ gistSplitByInvalid(GISTSTATE *giststate, GistSplitVector *v, IndexTuple *itup, i
                gsvp.entries = v->splitVector.spl_left;
                gsvp.isnull = v->spl_lisnull;
 
-               gistunionsubkeyvec(giststate, itup, &gsvp, 0);
+               gistunionsubkeyvec(giststate, itup, &gsvp);
        }
 }
 
@@ -619,7 +621,7 @@ gistSplitByKey(Relation r, Page page, IndexTuple *itup, int len, GISTSTATE *gist
                                v->splitVector.spl_left[v->splitVector.spl_nleft++] = i;
 
                v->spl_equiv = NULL;
-               gistunionsubkey(giststate, itup, v, attno);
+               gistunionsubkey(giststate, itup, v);
        }
        else
        {
@@ -675,7 +677,7 @@ gistSplitByKey(Relation r, Page page, IndexTuple *itup, int len, GISTSTATE *gist
 
                                v->splitVector = backupSplit;
                                /* reunion left and right datums */
-                               gistunionsubkey(giststate, itup, v, attno);
+                               gistunionsubkey(giststate, itup, v);
                        }
                }
        }
index 5ab1e9c7152136759be93ee6f96221ebe98e45cc..073f1a9ad24f94947545a1239ddd3ea58de53ecf 100644 (file)
 #include "storage/bufmgr.h"
 #include "utils/rel.h"
 
-/*
- * static *S used for temrorary storage (saves stack and palloc() call)
- */
-
-static Datum attrS[INDEX_MAX_KEYS];
-static bool isnullS[INDEX_MAX_KEYS];
 
 /*
  * Write itup vector to page, has no control of free space.
@@ -150,12 +144,13 @@ gistfillitupvec(IndexTuple *vec, int veclen, int *memlen)
 }
 
 /*
- * Make unions of keys in IndexTuple vector, return FALSE if itvec contains
- * invalid tuple. Resulting Datums aren't compressed.
+ * Make unions of keys in IndexTuple vector (one union datum per index column).
+ * Union Datums are returned into the attr/isnull arrays.
+ * Resulting Datums aren't compressed.
+ * Fails and returns FALSE if itvec contains any invalid tuples.
  */
-
 bool
-gistMakeUnionItVec(GISTSTATE *giststate, IndexTuple *itvec, int len, int startkey,
+gistMakeUnionItVec(GISTSTATE *giststate, IndexTuple *itvec, int len,
                                   Datum *attr, bool *isnull)
 {
        int                     i;
@@ -164,19 +159,12 @@ gistMakeUnionItVec(GISTSTATE *giststate, IndexTuple *itvec, int len, int startke
 
        evec = (GistEntryVector *) palloc((len + 2) * sizeof(GISTENTRY) + GEVHDRSZ);
 
-       for (i = startkey; i < giststate->tupdesc->natts; i++)
+       for (i = 0; i < giststate->tupdesc->natts; i++)
        {
                int                     j;
 
+               /* Collect non-null datums for this column */
                evec->n = 0;
-               if (!isnull[i])
-               {
-                       gistentryinit(evec->vector[evec->n], attr[i],
-                                                 NULL, NULL, (OffsetNumber) 0,
-                                                 FALSE);
-                       evec->n++;
-               }
-
                for (j = 0; j < len; j++)
                {
                        Datum           datum;
@@ -198,7 +186,7 @@ gistMakeUnionItVec(GISTSTATE *giststate, IndexTuple *itvec, int len, int startke
                        evec->n++;
                }
 
-               /* If this tuple vector was all NULLs, the union is NULL */
+               /* If this column was all NULLs, the union is NULL */
                if (evec->n == 0)
                {
                        attr[i] = (Datum) 0;
@@ -208,6 +196,7 @@ gistMakeUnionItVec(GISTSTATE *giststate, IndexTuple *itvec, int len, int startke
                {
                        if (evec->n == 1)
                        {
+                               /* unionFn may expect at least two inputs */
                                evec->n = 2;
                                evec->vector[1] = evec->vector[0];
                        }
@@ -231,12 +220,13 @@ gistMakeUnionItVec(GISTSTATE *giststate, IndexTuple *itvec, int len, int startke
 IndexTuple
 gistunion(Relation r, IndexTuple *itvec, int len, GISTSTATE *giststate)
 {
-       memset(isnullS, TRUE, sizeof(bool) * giststate->tupdesc->natts);
+       Datum           attr[INDEX_MAX_KEYS];
+       bool            isnull[INDEX_MAX_KEYS];
 
-       if (!gistMakeUnionItVec(giststate, itvec, len, 0, attrS, isnullS))
+       if (!gistMakeUnionItVec(giststate, itvec, len, attr, isnull))
                return gist_form_invalid_tuple(InvalidBlockNumber);
 
-       return gistFormTuple(giststate, r, attrS, isnullS, false);
+       return gistFormTuple(giststate, r, attr, isnull, false);
 }
 
 /*
@@ -248,12 +238,15 @@ gistMakeUnionKey(GISTSTATE *giststate, int attno,
                                 GISTENTRY *entry2, bool isnull2,
                                 Datum *dst, bool *dstisnull)
 {
-
+       /* we need a GistEntryVector with room for exactly 2 elements */
+       union
+       {
+               GistEntryVector gev;
+               char            padding[2 * sizeof(GISTENTRY) + GEVHDRSZ];
+       }                       storage;
+       GistEntryVector *evec = &storage.gev;
        int                     dstsize;
 
-       static char storage[2 * sizeof(GISTENTRY) + GEVHDRSZ];
-       GistEntryVector *evec = (GistEntryVector *) storage;
-
        evec->n = 2;
 
        if (isnull1 && isnull2)
@@ -327,6 +320,8 @@ gistgetadjusted(Relation r, IndexTuple oldtup, IndexTuple addtup, GISTSTATE *gis
                                addentries[INDEX_MAX_KEYS];
        bool            oldisnull[INDEX_MAX_KEYS],
                                addisnull[INDEX_MAX_KEYS];
+       Datum           attr[INDEX_MAX_KEYS];
+       bool            isnull[INDEX_MAX_KEYS];
        IndexTuple      newtup = NULL;
        int                     i;
 
@@ -344,19 +339,20 @@ gistgetadjusted(Relation r, IndexTuple oldtup, IndexTuple addtup, GISTSTATE *gis
                gistMakeUnionKey(giststate, i,
                                                 oldentries + i, oldisnull[i],
                                                 addentries + i, addisnull[i],
-                                                attrS + i, isnullS + i);
+                                                attr + i, isnull + i);
 
                if (neednew)
                        /* we already need new key, so we can skip check */
                        continue;
 
-               if (isnullS[i])
+               if (isnull[i])
                        /* union of key may be NULL if and only if both keys are NULL */
                        continue;
 
                if (!addisnull[i])
                {
-                       if (oldisnull[i] || gistKeyIsEQ(giststate, i, oldentries[i].key, attrS[i]) == false)
+                       if (oldisnull[i] ||
+                               !gistKeyIsEQ(giststate, i, oldentries[i].key, attr[i]))
                                neednew = true;
                }
        }
@@ -364,7 +360,7 @@ gistgetadjusted(Relation r, IndexTuple oldtup, IndexTuple addtup, GISTSTATE *gis
        if (neednew)
        {
                /* need to update key */
-               newtup = gistFormTuple(giststate, r, attrS, isnullS, false);
+               newtup = gistFormTuple(giststate, r, attr, isnull, false);
                newtup->t_tid = oldtup->t_tid;
        }
 
index 4df5fed116b39b76c90a255bf223dd92602ba17f..e7027e607f58eb6eafe727e67947f9bea0a43ebe 100644 (file)
@@ -314,7 +314,7 @@ extern void gistdentryinit(GISTSTATE *giststate, int nkey, GISTENTRY *e,
 extern float gistpenalty(GISTSTATE *giststate, int attno,
                        GISTENTRY *key1, bool isNull1,
                        GISTENTRY *key2, bool isNull2);
-extern bool gistMakeUnionItVec(GISTSTATE *giststate, IndexTuple *itvec, int len, int startkey,
+extern bool gistMakeUnionItVec(GISTSTATE *giststate, IndexTuple *itvec, int len,
                                   Datum *attr, bool *isnull);
 extern bool gistKeyIsEQ(GISTSTATE *giststate, int attno, Datum a, Datum b);
 extern void gistDeCompressAtt(GISTSTATE *giststate, Relation r, IndexTuple tuple, Page p,