]> granicus.if.org Git - postgresql/commitdiff
Fix an additional set of problems in GIN's handling of lossy page pointers.
authorTom Lane <tgl@sss.pgh.pa.us>
Sun, 1 Aug 2010 19:16:47 +0000 (19:16 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Sun, 1 Aug 2010 19:16:47 +0000 (19:16 +0000)
Although the key-combining code claimed to work correctly if its input
contained both lossy and exact pointers for a single page in a single TID
stream, in fact this did not work, and could not work without pretty
fundamental redesign.  Modify keyGetItem so that it will not return such a
stream, by handling lossy-pointer cases a bit more explicitly than we did
before.

Per followup investigation of a gripe from Artur Dabrowski.
An example of a query that failed given his data set is
select count(*) from search_tab where
(to_tsvector('german', keywords ) @@ to_tsquery('german', 'ee:* | dd:*')) and
(to_tsvector('german', keywords ) @@ to_tsquery('german', 'aa:*'));

Back-patch to 8.4 where the lossy pointer code was introduced.

src/backend/access/gin/ginget.c
src/include/access/gin.h

index 1cb0aec3151b269b3bdc5e51166bcf759705f12c..a3af0e61c9972c147302fa976ce074e32881f664 100644 (file)
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *                     $PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.30.4.1 2010/07/31 00:31:04 tgl Exp $
+ *                     $PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.30.4.2 2010/08/01 19:16:47 tgl Exp $
  *-------------------------------------------------------------------------
  */
 
@@ -43,13 +43,12 @@ findItemInPage(Page page, ItemPointer item, OffsetNumber *off)
        int                     res;
 
        if (GinPageGetOpaque(page)->flags & GIN_DELETED)
-               /* page was deleted by concurrent  vacuum */
+               /* page was deleted by concurrent vacuum */
                return false;
 
        /*
         * scan page to find equal or first greater value
         */
-
        for (*off = FirstOffsetNumber; *off <= maxoff; (*off)++)
        {
                res = compareItemPointers(item, (ItemPointer) GinDataPageGetItem(page, *off));
@@ -527,12 +526,40 @@ entryGetNextItem(Relation index, GinScanEntry entry)
        }
 }
 
+/* convenience function for invoking a key's consistentFn */
+static inline bool
+callConsistentFn(GinState *ginstate, GinScanKey key)
+{
+       /*
+        * Initialize recheckCurItem in case the consistentFn doesn't know it
+        * should set it.  The safe assumption in that case is to force recheck.
+        */
+       key->recheckCurItem = true;
+
+       return DatumGetBool(FunctionCall6(&ginstate->consistentFn[key->attnum - 1],
+                                                                         PointerGetDatum(key->entryRes),
+                                                                         UInt16GetDatum(key->strategy),
+                                                                         key->query,
+                                                                         UInt32GetDatum(key->nentries),
+                                                                         PointerGetDatum(key->extra_data),
+                                                                         PointerGetDatum(&key->recheckCurItem)));
+}
+
 #define gin_rand() (((double) random()) / ((double) MAX_RANDOM_VALUE))
 #define dropItem(e) ( gin_rand() > ((double)GinFuzzySearchLimit)/((double)((e)->predictNumberResult)) )
 
 /*
  * Sets entry->curItem to next heap item pointer for one entry of one scan key,
  * or sets entry->isFinished to TRUE if there are no more.
+ *
+ * Item pointers must be returned in ascending order.
+ *
+ * Note: this can return a "lossy page" item pointer, indicating that the
+ * entry potentially matches all items on that heap page.  However, it is
+ * not allowed to return both a lossy page pointer and exact (regular)
+ * item pointers for the same page.  (Doing so would break the key-combination
+ * logic in keyGetItem and scanGetItem; see comment in scanGetItem.)  In the
+ * current implementation this is guaranteed by the behavior of tidbitmaps.
  */
 static void
 entryGetItem(Relation index, GinScanEntry entry)
@@ -625,15 +652,20 @@ entryGetItem(Relation index, GinScanEntry entry)
  * item pointer (including the case where the item pointer is a lossy page
  * pointer).
  *
- * Note: lossy page could be returned after single items from the same page.
- * This is OK since the results will just be used to build a bitmap; we'll
- * set a bitmap entry more than once, but never actually return a row twice.
+ * Item pointers must be returned in ascending order.
+ *
+ * Note: this can return a "lossy page" item pointer, indicating that the
+ * key potentially matches all items on that heap page.  However, it is
+ * not allowed to return both a lossy page pointer and exact (regular)
+ * item pointers for the same page.  (Doing so would break the key-combination
+ * logic in scanGetItem.)
  */
 static void
 keyGetItem(Relation index, GinState *ginstate, MemoryContext tempCtx,
                   GinScanKey key, ItemPointer advancePast)
 {
        ItemPointerData myAdvancePast = *advancePast;
+       ItemPointerData curPageLossy;
        uint32          i;
        uint32          lossyEntry;
        bool            haveLossyEntry;
@@ -691,87 +723,112 @@ keyGetItem(Relation index, GinState *ginstate, MemoryContext tempCtx,
                myAdvancePast = key->curItem;
 
                /*
-                * Prepare entryRes array to be passed to consistentFn.
-                *
-                * If key->nentries == 1 then the consistentFn should always succeed,
-                * but we must call it anyway to find out the recheck status.
-                *
                 * Lossy-page entries pose a problem, since we don't know the correct
                 * entryRes state to pass to the consistentFn, and we also don't know
                 * what its combining logic will be (could be AND, OR, or even NOT).
-                * Our strategy for a single lossy-page entry is to try the
-                * consistentFn both ways and return a hit if it accepts either one
-                * (forcing the hit to be marked lossy so it will be rechecked).
+                * If the logic is OR then the consistentFn might succeed for all
+                * items in the lossy page even when none of the other entries match.
+                *
+                * If we have a single lossy-page entry then we check to see if the
+                * consistentFn will succeed with only that entry TRUE.  If so,
+                * we return a lossy-page pointer to indicate that the whole heap
+                * page must be checked.  (On the next call, we'll advance past all
+                * regular and lossy entries for this page before resuming search,
+                * thus ensuring that we never return both regular and lossy pointers
+                * for the same page.)
                 *
                 * This idea could be generalized to more than one lossy-page entry,
                 * but ideally lossy-page entries should be infrequent so it would
-                * seldom be the case that we have more than one.  If we do find more
-                * than one, we just punt and return the item as lossy.
+                * seldom be the case that we have more than one at once.  So it
+                * doesn't seem worth the extra complexity to optimize that case.
+                * If we do find more than one, we just punt and return a lossy-page
+                * pointer always.
                 *
                 * Note that only lossy-page entries pointing to the current item's
-                * page should trigger this processing.
+                * page should trigger this processing; we might have future lossy
+                * pages in the entry array, but they aren't relevant yet.
                 */
+               ItemPointerSetLossyPage(&curPageLossy,
+                                                               GinItemPointerGetBlockNumber(&key->curItem));
+
                lossyEntry = 0;
                haveLossyEntry = false;
                for (i = 0; i < key->nentries; i++)
                {
                        entry = key->scanEntry + i;
-
-                       if (entry->isFinished)
-                               key->entryRes[i] = FALSE;
-                       else if (ItemPointerIsLossyPage(&entry->curItem) &&
-                                        GinItemPointerGetBlockNumber(&entry->curItem) ==
-                                        GinItemPointerGetBlockNumber(&key->curItem))
+                       if (entry->isFinished == FALSE &&
+                               compareItemPointers(&entry->curItem, &curPageLossy) == 0)
                        {
                                if (haveLossyEntry)
                                {
-                                       /* Too many lossy entries, punt */
+                                       /* Multiple lossy entries, punt */
+                                       key->curItem = curPageLossy;
                                        key->recheckCurItem = true;
                                        return;
                                }
                                lossyEntry = i;
                                haveLossyEntry = true;
-                               /* initially assume TRUE */
-                               key->entryRes[i] = TRUE;
                        }
-                       else if (compareItemPointers(&entry->curItem, &key->curItem) == 0)
-                               key->entryRes[i] = TRUE;
-                       else
-                               key->entryRes[i] = FALSE;
                }
 
+               /* prepare for calling consistentFn in temp context */
                oldCtx = MemoryContextSwitchTo(tempCtx);
 
+               if (haveLossyEntry)
+               {
+                       /* Single lossy-page entry, so see if whole page matches */
+                       memset(key->entryRes, FALSE, key->nentries);
+                       key->entryRes[lossyEntry] = TRUE;
+
+                       if (callConsistentFn(ginstate, key))
+                       {
+                               /* Yes, so clean up ... */
+                               MemoryContextSwitchTo(oldCtx);
+                               MemoryContextReset(tempCtx);
+
+                               /* and return lossy pointer for whole page */
+                               key->curItem = curPageLossy;
+                               key->recheckCurItem = true;
+                               return;
+                       }
+               }
+
                /*
-                * Initialize recheckCurItem in case the consistentFn doesn't know it
-                * should set it.  The safe assumption in that case is to force
-                * recheck.
+                * At this point we know that we don't need to return a lossy
+                * whole-page pointer, but we might have matches for individual exact
+                * item pointers, possibly in combination with a lossy pointer.  Our
+                * strategy if there's a lossy pointer is to try the consistentFn both
+                * ways and return a hit if it accepts either one (forcing the hit to
+                * be marked lossy so it will be rechecked).
+                *
+                * Prepare entryRes array to be passed to consistentFn.
+                *
+                * (If key->nentries == 1 then the consistentFn should always succeed,
+                * but we must call it anyway to find out the recheck status.)
                 */
-               key->recheckCurItem = true;
+               for (i = 0; i < key->nentries; i++)
+               {
+                       entry = key->scanEntry + i;
+                       if (entry->isFinished == FALSE &&
+                               compareItemPointers(&entry->curItem, &key->curItem) == 0)
+                               key->entryRes[i] = TRUE;
+                       else
+                               key->entryRes[i] = FALSE;
+               }
+               if (haveLossyEntry)
+                       key->entryRes[lossyEntry] = TRUE;
 
-               res = DatumGetBool(FunctionCall6(&ginstate->consistentFn[key->attnum - 1],
-                                                                                PointerGetDatum(key->entryRes),
-                                                                                UInt16GetDatum(key->strategy),
-                                                                                key->query,
-                                                                                UInt32GetDatum(key->nentries),
-                                                                                PointerGetDatum(key->extra_data),
-                                                                                PointerGetDatum(&key->recheckCurItem)));
+               res = callConsistentFn(ginstate, key);
 
                if (!res && haveLossyEntry)
                {
                        /* try the other way for the lossy item */
                        key->entryRes[lossyEntry] = FALSE;
-                       key->recheckCurItem = true;
 
-                       res = DatumGetBool(FunctionCall6(&ginstate->consistentFn[key->attnum - 1],
-                                                                                        PointerGetDatum(key->entryRes),
-                                                                                        UInt16GetDatum(key->strategy),
-                                                                                        key->query,
-                                                                                        UInt32GetDatum(key->nentries),
-                                                                                        PointerGetDatum(key->extra_data),
-                                                                                        PointerGetDatum(&key->recheckCurItem)));
+                       res = callConsistentFn(ginstate, key);
                }
 
+               /* clean up after consistentFn calls */
                MemoryContextSwitchTo(oldCtx);
                MemoryContextReset(tempCtx);
 
@@ -1108,7 +1165,6 @@ scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids)
        GinScanOpaque so = (GinScanOpaque) scan->opaque;
        MemoryContext oldCtx;
        bool            recheck,
-                               keyrecheck,
                                match;
        int                     i;
        pendingPosition pos;
@@ -1152,8 +1208,8 @@ scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids)
                        continue;
 
                /*
-                * Matching of entries of one row is finished, so check row by
-                * consistent function.
+                * Matching of entries of one row is finished, so check row using
+                * consistent functions.
                 */
                oldCtx = MemoryContextSwitchTo(so->tempCtx);
                recheck = false;
@@ -1163,21 +1219,12 @@ scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids)
                {
                        GinScanKey      key = so->keys + i;
 
-                       keyrecheck = true;
-
-                       if (!DatumGetBool(FunctionCall6(&so->ginstate.consistentFn[key->attnum - 1],
-                                                                                       PointerGetDatum(key->entryRes),
-                                                                                       UInt16GetDatum(key->strategy),
-                                                                                       key->query,
-                                                                                       UInt32GetDatum(key->nentries),
-                                                                                       PointerGetDatum(key->extra_data),
-                                                                                       PointerGetDatum(&keyrecheck))))
+                       if (!callConsistentFn(&so->ginstate, key))
                        {
                                match = false;
                                break;
                        }
-
-                       recheck |= keyrecheck;
+                       recheck |= key->recheckCurItem;
                }
 
                MemoryContextSwitchTo(oldCtx);
@@ -1248,11 +1295,26 @@ scanGetItem(IndexScanDesc scan, ItemPointer advancePast,
 
                Assert(!ItemPointerIsMax(item));
 
-               /*
+               /*----------
                 * Now *item contains first ItemPointer after previous result.
                 *
                 * The item is a valid hit only if all the keys returned either
                 * that exact TID, or a lossy reference to the same page.
+                *
+                * This logic works only if a keyGetItem stream can never contain both
+                * exact and lossy pointers for the same page.  Else we could have a
+                * case like
+                *
+                *              stream 1                stream 2
+                *              ...                             ...
+                *              42/6                    42/7
+                *              50/1                    42/0xffff
+                *              ...                             ...
+                *
+                * We would conclude that 42/6 is not a match and advance stream 1,
+                * thus never detecting the match to the lossy pointer in stream 2.
+                * (keyGetItem has a similar problem versus entryGetItem.)
+                *----------
                 */
                match = true;
                for (i = 0; i < so->nkeys; i++)
index 07eb3733cc09be735422f9cd821272b3ae3c094b..f87dc8ebae125fda970d1edf7b6981c3c92acd10 100644 (file)
@@ -4,7 +4,7 @@
  *
  *     Copyright (c) 2006-2010, PostgreSQL Global Development Group
  *
- *     $PostgreSQL: pgsql/src/include/access/gin.h,v 1.38.4.2 2010/08/01 02:12:51 tgl Exp $
+ *     $PostgreSQL: pgsql/src/include/access/gin.h,v 1.38.4.3 2010/08/01 19:16:47 tgl Exp $
  *--------------------------------------------------------------------------
  */
 #ifndef GIN_H
@@ -107,13 +107,22 @@ typedef struct GinMetaPageData
  * We use our own ItemPointerGet(BlockNumber|GetOffsetNumber)
  * to avoid Asserts, since sometimes the ip_posid isn't "valid"
  */
-
 #define GinItemPointerGetBlockNumber(pointer) \
        BlockIdGetBlockNumber(&(pointer)->ip_blkid)
 
 #define GinItemPointerGetOffsetNumber(pointer) \
        ((pointer)->ip_posid)
 
+/*
+ * Special-case item pointer values needed by the GIN search logic.
+ *     MIN: sorts less than any valid item pointer
+ *     MAX: sorts greater than any valid item pointer
+ *     LOSSY PAGE: indicates a whole heap page, sorts after normal item
+ *                             pointers for that page
+ * Note that these are all distinguishable from an "invalid" item pointer
+ * (which is InvalidBlockNumber/0) as well as from all normal item
+ * pointers (which have item numbers in the range 1..MaxHeapTuplesPerPage).
+ */
 #define ItemPointerSetMin(p)  \
        ItemPointerSet((p), (BlockNumber)0, (OffsetNumber)0)
 #define ItemPointerIsMin(p)  \