]> granicus.if.org Git - postgresql/commitdiff
Fix a serious bug introduced into GIN in 8.4: now that MergeItemPointers()
authorTom Lane <tgl@sss.pgh.pa.us>
Sat, 6 Jun 2009 02:39:40 +0000 (02:39 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Sat, 6 Jun 2009 02:39:40 +0000 (02:39 +0000)
is supposed to remove duplicate heap TIDs, we have to be sure to reduce the
tuple size and posting-item count accordingly in addItemPointersToTuple().
Failing to do so resulted in the effective injection of garbage TIDs into the
index contents, ie, whatever happened to be in the memory palloc'd for the
new tuple.  I'm not sure that this fully explains the index corruption
reported by Tatsuo Ishii, but the test case I'm using no longer fails.

src/backend/access/gin/gindatapage.c
src/backend/access/gin/ginentrypage.c
src/backend/access/gin/gininsert.c
src/include/access/gin.h

index a872d44880c7f04ffafbff40a98ad95e9a4a71ff..22199102ddcc0554587bd3122e292af18a06ac20 100644 (file)
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *                     $PostgreSQL: pgsql/src/backend/access/gin/gindatapage.c,v 1.14 2009/03/24 20:17:10 tgl Exp $
+ *                     $PostgreSQL: pgsql/src/backend/access/gin/gindatapage.c,v 1.15 2009/06/06 02:39:40 tgl Exp $
  *-------------------------------------------------------------------------
  */
 
@@ -32,10 +32,14 @@ compareItemPointers(ItemPointer a, ItemPointer b)
 }
 
 /*
- * Merge two ordered array of itempointer
+ * Merge two ordered arrays of itempointers, eliminating any duplicates.
+ * Returns the number of items in the result.
+ * Caller is responsible that there is enough space at *dst.
  */
-void
-MergeItemPointers(ItemPointerData *dst, ItemPointerData *a, uint32 na, ItemPointerData *b, uint32 nb)
+uint32
+MergeItemPointers(ItemPointerData *dst,
+                                 ItemPointerData *a, uint32 na,
+                                 ItemPointerData *b, uint32 nb)
 {
        ItemPointerData *dptr = dst;
        ItemPointerData *aptr = a,
@@ -62,6 +66,8 @@ MergeItemPointers(ItemPointerData *dst, ItemPointerData *a, uint32 na, ItemPoint
 
        while (bptr - b < nb)
                *dptr++ = *bptr++;
+
+       return dptr - dst;
 }
 
 /*
index e16575bc8130e3efb6d35043e8fa176c14b5d3e9..f35994db953eba5962503695221dd8b166169e69 100644 (file)
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *                     $PostgreSQL: pgsql/src/backend/access/gin/ginentrypage.c,v 1.19 2009/01/01 17:23:34 momjian Exp $
+ *                     $PostgreSQL: pgsql/src/backend/access/gin/ginentrypage.c,v 1.20 2009/06/06 02:39:40 tgl Exp $
  *-------------------------------------------------------------------------
  */
 
 #include "utils/rel.h"
 
 /*
- * forms tuple for entry tree. On leaf page, Index tuple has
- * non-traditional layout. Tuple may contain posting list or
- * root blocknumber of posting tree. Macros GinIsPostingTre: (itup) / GinSetPostingTree(itup, blkno)
+ * Form a tuple for entry tree.
+ *
+ * On leaf pages, Index tuple has non-traditional layout. Tuple may contain
+ * posting list or root blocknumber of posting tree.
+ * Macros: GinIsPostingTree(itup) / GinSetPostingTree(itup, blkno)
  * 1) Posting list
- *             - itup->t_info & INDEX_SIZE_MASK contains size of tuple as usual
+ *             - itup->t_info & INDEX_SIZE_MASK contains total size of tuple as usual
  *             - ItemPointerGetBlockNumber(&itup->t_tid) contains original
  *               size of tuple (without posting list).
- *               Macroses: GinGetOrigSizePosting(itup) / GinSetOrigSizePosting(itup,n)
+ *               Macros: GinGetOrigSizePosting(itup) / GinSetOrigSizePosting(itup,n)
  *             - ItemPointerGetOffsetNumber(&itup->t_tid) contains number
- *               of elements in posting list (number of heap itempointer)
- *               Macroses: GinGetNPosting(itup) / GinSetNPosting(itup,n)
- *             - After usual part of tuple there is a posting list
+ *               of elements in posting list (number of heap itempointers)
+ *               Macros: GinGetNPosting(itup) / GinSetNPosting(itup,n)
+ *             - After standard part of tuple there is a posting list, ie, array
+ *               of heap itempointers
  *               Macros: GinGetPosting(itup)
  * 2) Posting tree
  *             - itup->t_info & INDEX_SIZE_MASK contains size of tuple as usual
  *             - ItemPointerGetBlockNumber(&itup->t_tid) contains block number of
  *               root of posting tree
- *             - ItemPointerGetOffsetNumber(&itup->t_tid) contains magic number GIN_TREE_POSTING
+ *             - ItemPointerGetOffsetNumber(&itup->t_tid) contains magic number
+ *               GIN_TREE_POSTING, which distinguishes this from posting-list case
  *
- * Storage of attributes of tuple are different for single and multicolumn index.
- * For single-column index tuple stores only value to be indexed and for
- * multicolumn variant it stores two attributes: column number of value and value. 
+ * Attributes of an index tuple are different for single and multicolumn index.
+ * For single-column case, index tuple stores only value to be indexed.
+ * For multicolumn case, it stores two attributes: column number of value
+ * and value. 
  */
 IndexTuple
 GinFormTuple(GinState *ginstate, OffsetNumber attnum, Datum key, ItemPointerData *ipd, uint32 nipd)
@@ -89,6 +94,28 @@ GinFormTuple(GinState *ginstate, OffsetNumber attnum, Datum key, ItemPointerData
        return itup;
 }
 
+/*
+ * Sometimes we reduce the number of posting list items in a tuple after
+ * having built it with GinFormTuple.  This function adjusts the size
+ * fields to match.
+ */
+void
+GinShortenTuple(IndexTuple itup, uint32 nipd)
+{
+       uint32          newsize;
+
+       Assert(nipd <= GinGetNPosting(itup));
+
+       newsize = MAXALIGN(SHORTALIGN(GinGetOrigSizePosting(itup)) + sizeof(ItemPointerData) * nipd);
+
+       Assert(newsize <= (itup->t_info & INDEX_SIZE_MASK));
+
+       itup->t_info &= ~INDEX_SIZE_MASK;
+       itup->t_info |= newsize;
+
+       GinSetNPosting(itup, nipd);
+}
+
 /*
  * Entry tree is a "static", ie tuple never deletes from it,
  * so we don't use right bound, we use rightest key instead.
index f6a348eb85d88f98ee5aa4f2508aa9c1f03f2d57..ef3d4bbb0320e2b881042981af6d96eaaa8f2709 100644 (file)
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *                     $PostgreSQL: pgsql/src/backend/access/gin/gininsert.c,v 1.20 2009/03/24 22:06:03 tgl Exp $
+ *                     $PostgreSQL: pgsql/src/backend/access/gin/gininsert.c,v 1.21 2009/06/06 02:39:40 tgl Exp $
  *-------------------------------------------------------------------------
  */
 
@@ -102,17 +102,19 @@ addItemPointersToTuple(Relation index, GinState *ginstate, GinBtreeStack *stack,
 {
        Datum                   key = gin_index_getattr(ginstate, old);
        OffsetNumber    attnum = gintuple_get_attrnum(ginstate, old);
-       IndexTuple              res = GinFormTuple(ginstate, attnum, key, NULL, nitem + GinGetNPosting(old));
+       IndexTuple              res = GinFormTuple(ginstate, attnum, key,
+                                                                          NULL, nitem + GinGetNPosting(old));
 
        if (res)
        {
                /* good, small enough */
-               MergeItemPointers(GinGetPosting(res),
-                                                 GinGetPosting(old), GinGetNPosting(old),
-                                                 items, nitem
-                       );
+               uint32 newnitem;
 
-               GinSetNPosting(res, nitem + GinGetNPosting(old));
+               newnitem = MergeItemPointers(GinGetPosting(res),
+                                                                        GinGetPosting(old), GinGetNPosting(old),
+                                                                        items, nitem);
+               /* merge might have eliminated some duplicate items */
+               GinShortenTuple(res, newnitem);
        }
        else
        {
index 49a14b6290644141e80b3ca88569720db0da2852..366fc370b0a0c51a15160f4afbe0c4420e6f65ab 100644 (file)
@@ -4,7 +4,7 @@
  *
  *     Copyright (c) 2006-2009, PostgreSQL Global Development Group
  *
- *     $PostgreSQL: pgsql/src/include/access/gin.h,v 1.32 2009/06/05 18:50:47 tgl Exp $
+ *     $PostgreSQL: pgsql/src/include/access/gin.h,v 1.33 2009/06/06 02:39:40 tgl Exp $
  *--------------------------------------------------------------------------
  */
 #ifndef GIN_H
@@ -435,6 +435,7 @@ extern void findParents(GinBtree btree, GinBtreeStack *stack, BlockNumber rootBl
 /* ginentrypage.c */
 extern IndexTuple GinFormTuple(GinState *ginstate, OffsetNumber attnum, Datum key,
                                                                                ItemPointerData *ipd, uint32 nipd);
+extern void GinShortenTuple(IndexTuple itup, uint32 nipd);
 extern void prepareEntryScan(GinBtree btree, Relation index, OffsetNumber attnum,
                                                                Datum value, GinState *ginstate);
 extern void entryFillRoot(GinBtree btree, Buffer root, Buffer lbuf, Buffer rbuf);
@@ -442,7 +443,7 @@ extern IndexTuple ginPageGetLinkItup(Buffer buf);
 
 /* gindatapage.c */
 extern int     compareItemPointers(ItemPointer a, ItemPointer b);
-extern void MergeItemPointers(ItemPointerData *dst,
+extern uint32 MergeItemPointers(ItemPointerData *dst,
                                  ItemPointerData *a, uint32 na,
                                  ItemPointerData *b, uint32 nb);