1 /*-------------------------------------------------------------------------
4 * fetch tuples from a GIN scan.
7 * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
11 * src/backend/access/gin/ginget.c
12 *-------------------------------------------------------------------------
17 #include "access/gin_private.h"
18 #include "access/relscan.h"
19 #include "miscadmin.h"
20 #include "utils/datum.h"
21 #include "utils/memutils.h"
24 typedef struct pendingPosition
27 OffsetNumber firstOffset;
28 OffsetNumber lastOffset;
35 * Convenience function for invoking a key's consistentFn
38 callConsistentFn(GinState *ginstate, GinScanKey key)
41 * If we're dealing with a dummy EVERYTHING key, we don't want to call the
42 * consistentFn; just claim it matches.
44 if (key->searchMode == GIN_SEARCH_MODE_EVERYTHING)
46 key->recheckCurItem = false;
51 * Initialize recheckCurItem in case the consistentFn doesn't know it
52 * should set it. The safe assumption in that case is to force recheck.
54 key->recheckCurItem = true;
56 return DatumGetBool(FunctionCall8Coll(&ginstate->consistentFn[key->attnum - 1],
57 ginstate->supportCollation[key->attnum - 1],
58 PointerGetDatum(key->entryRes),
59 UInt16GetDatum(key->strategy),
61 UInt32GetDatum(key->nuserentries),
62 PointerGetDatum(key->extra_data),
63 PointerGetDatum(&key->recheckCurItem),
64 PointerGetDatum(key->queryValues),
65 PointerGetDatum(key->queryCategories)));
69 * Tries to refind previously taken ItemPointer on a posting page.
72 findItemInPostingPage(Page page, ItemPointer item, OffsetNumber *off)
74 OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff;
77 if (GinPageGetOpaque(page)->flags & GIN_DELETED)
78 /* page was deleted by concurrent vacuum */
82 * scan page to find equal or first greater value
84 for (*off = FirstOffsetNumber; *off <= maxoff; (*off)++)
86 res = ginCompareItemPointers(item, GinDataPageGetItemPointer(page, *off));
96 * Goes to the next page if current offset is outside of bounds
99 moveRightIfItNeeded(GinBtreeData *btree, GinBtreeStack *stack)
101 Page page = BufferGetPage(stack->buffer);
103 if (stack->off > PageGetMaxOffsetNumber(page))
106 * We scanned the whole page, so we should take right page
108 if (GinPageRightMost(page))
109 return false; /* no more pages */
111 stack->buffer = ginStepRight(stack->buffer, btree->index, GIN_SHARE);
112 stack->blkno = BufferGetBlockNumber(stack->buffer);
113 stack->off = FirstOffsetNumber;
120 * Scan all pages of a posting tree and save all its heap ItemPointers
121 * in scanEntry->matchBitmap
124 scanPostingTree(Relation index, GinScanEntry scanEntry,
125 BlockNumber rootPostingTree)
127 GinBtreeStack *stack;
131 /* Descend to the leftmost leaf page */
132 stack = ginScanBeginPostingTree(index, rootPostingTree);
133 buffer = stack->buffer;
134 IncrBufferRefCount(buffer); /* prevent unpin in freeGinBtreeStack */
136 freeGinBtreeStack(stack);
139 * Loop iterates through all leaf pages of posting tree
143 page = BufferGetPage(buffer);
145 if ((GinPageGetOpaque(page)->flags & GIN_DELETED) == 0 &&
146 GinPageGetOpaque(page)->maxoff >= FirstOffsetNumber)
148 tbm_add_tuples(scanEntry->matchBitmap,
149 GinDataPageGetItemPointer(page, FirstOffsetNumber),
150 GinPageGetOpaque(page)->maxoff, false);
151 scanEntry->predictNumberResult += GinPageGetOpaque(page)->maxoff;
154 if (GinPageRightMost(page))
155 break; /* no more pages */
157 buffer = ginStepRight(buffer, index, GIN_SHARE);
160 UnlockReleaseBuffer(buffer);
164 * Collects TIDs into scanEntry->matchBitmap for all heap tuples that
165 * match the search entry. This supports three different match modes:
167 * 1. Partial-match support: scan from current point until the
168 * comparePartialFn says we're done.
169 * 2. SEARCH_MODE_ALL: scan from current point (which should be first
170 * key for the current attnum) until we hit null items or end of attnum
171 * 3. SEARCH_MODE_EVERYTHING: scan from current point (which should be first
172 * key for the current attnum) until we hit end of attnum
174 * Returns true if done, false if it's necessary to restart scan from scratch
177 collectMatchBitmap(GinBtreeData *btree, GinBtreeStack *stack,
178 GinScanEntry scanEntry)
181 Form_pg_attribute attr;
183 /* Initialize empty bitmap result */
184 scanEntry->matchBitmap = tbm_create(work_mem * 1024L);
186 /* Null query cannot partial-match anything */
187 if (scanEntry->isPartialMatch &&
188 scanEntry->queryCategory != GIN_CAT_NORM_KEY)
191 /* Locate tupdesc entry for key column (for attbyval/attlen data) */
192 attnum = scanEntry->attnum;
193 attr = btree->ginstate->origTupdesc->attrs[attnum - 1];
200 GinNullCategory icategory;
203 * stack->off points to the interested entry, buffer is already locked
205 if (moveRightIfItNeeded(btree, stack) == false)
208 page = BufferGetPage(stack->buffer);
209 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
212 * If tuple stores another attribute then stop scan
214 if (gintuple_get_attrnum(btree->ginstate, itup) != attnum)
217 /* Safe to fetch attribute value */
218 idatum = gintuple_get_key(btree->ginstate, itup, &icategory);
221 * Check for appropriate scan stop conditions
223 if (scanEntry->isPartialMatch)
228 * In partial match, stop scan at any null (including
229 * placeholders); partial matches never match nulls
231 if (icategory != GIN_CAT_NORM_KEY)
235 * Check of partial match.
236 * case cmp == 0 => match
237 * case cmp > 0 => not match and finish scan
238 * case cmp < 0 => not match and continue scan
241 cmp = DatumGetInt32(FunctionCall4Coll(&btree->ginstate->comparePartialFn[attnum - 1],
242 btree->ginstate->supportCollation[attnum - 1],
245 UInt16GetDatum(scanEntry->strategy),
246 PointerGetDatum(scanEntry->extra_data)));
256 else if (scanEntry->searchMode == GIN_SEARCH_MODE_ALL)
259 * In ALL mode, we are not interested in null items, so we can
260 * stop if we get to a null-item placeholder (which will be the
261 * last entry for a given attnum). We do want to include NULL_KEY
262 * and EMPTY_ITEM entries, though.
264 if (icategory == GIN_CAT_NULL_ITEM)
269 * OK, we want to return the TIDs listed in this entry.
271 if (GinIsPostingTree(itup))
273 BlockNumber rootPostingTree = GinGetPostingTree(itup);
276 * We should unlock current page (but not unpin) during tree scan
277 * to prevent deadlock with vacuum processes.
279 * We save current entry value (idatum) to be able to re-find our
280 * tuple after re-locking
282 if (icategory == GIN_CAT_NORM_KEY)
283 idatum = datumCopy(idatum, attr->attbyval, attr->attlen);
285 LockBuffer(stack->buffer, GIN_UNLOCK);
287 /* Collect all the TIDs in this entry's posting tree */
288 scanPostingTree(btree->index, scanEntry, rootPostingTree);
291 * We lock again the entry page and while it was unlocked insert
292 * might have occurred, so we need to re-find our position.
294 LockBuffer(stack->buffer, GIN_SHARE);
295 page = BufferGetPage(stack->buffer);
296 if (!GinPageIsLeaf(page))
299 * Root page becomes non-leaf while we unlock it. We will
300 * start again, this situation doesn't occur often - root can
301 * became a non-leaf only once per life of index.
306 /* Search forward to re-find idatum */
310 GinNullCategory newCategory;
312 if (moveRightIfItNeeded(btree, stack) == false)
313 elog(ERROR, "lost saved point in index"); /* must not happen !!! */
315 page = BufferGetPage(stack->buffer);
316 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
318 if (gintuple_get_attrnum(btree->ginstate, itup) != attnum)
319 elog(ERROR, "lost saved point in index"); /* must not happen !!! */
320 newDatum = gintuple_get_key(btree->ginstate, itup,
323 if (ginCompareEntries(btree->ginstate, attnum,
324 newDatum, newCategory,
325 idatum, icategory) == 0)
331 if (icategory == GIN_CAT_NORM_KEY && !attr->attbyval)
332 pfree(DatumGetPointer(idatum));
336 tbm_add_tuples(scanEntry->matchBitmap,
337 GinGetPosting(itup), GinGetNPosting(itup), false);
338 scanEntry->predictNumberResult += GinGetNPosting(itup);
342 * Done with this entry, go to the next
349 * Start* functions setup beginning state of searches: finds correct buffer and pins it.
352 startScanEntry(GinState *ginstate, GinScanEntry entry)
354 GinBtreeData btreeEntry;
355 GinBtreeStack *stackEntry;
360 entry->buffer = InvalidBuffer;
361 ItemPointerSetMin(&entry->curItem);
362 entry->offset = InvalidOffsetNumber;
365 entry->matchBitmap = NULL;
366 entry->matchResult = NULL;
367 entry->reduceResult = FALSE;
368 entry->predictNumberResult = 0;
371 * we should find entry, and begin scan of posting tree or just store
372 * posting list in memory
374 ginPrepareEntryScan(&btreeEntry, entry->attnum,
375 entry->queryKey, entry->queryCategory,
377 stackEntry = ginFindLeafPage(&btreeEntry, true);
378 page = BufferGetPage(stackEntry->buffer);
381 entry->isFinished = TRUE;
383 if (entry->isPartialMatch ||
384 entry->queryCategory == GIN_CAT_EMPTY_QUERY)
387 * btreeEntry.findItem locates the first item >= given search key.
388 * (For GIN_CAT_EMPTY_QUERY, it will find the leftmost index item
389 * because of the way the GIN_CAT_EMPTY_QUERY category code is
390 * assigned.) We scan forward from there and collect all TIDs needed
391 * for the entry type.
393 btreeEntry.findItem(&btreeEntry, stackEntry);
394 if (collectMatchBitmap(&btreeEntry, stackEntry, entry) == false)
397 * GIN tree was seriously restructured, so we will cleanup all
398 * found data and rescan. See comments near 'return false' in
399 * collectMatchBitmap()
401 if (entry->matchBitmap)
403 if (entry->matchIterator)
404 tbm_end_iterate(entry->matchIterator);
405 entry->matchIterator = NULL;
406 tbm_free(entry->matchBitmap);
407 entry->matchBitmap = NULL;
409 LockBuffer(stackEntry->buffer, GIN_UNLOCK);
410 freeGinBtreeStack(stackEntry);
411 goto restartScanEntry;
414 if (entry->matchBitmap && !tbm_is_empty(entry->matchBitmap))
416 entry->matchIterator = tbm_begin_iterate(entry->matchBitmap);
417 entry->isFinished = FALSE;
420 else if (btreeEntry.findItem(&btreeEntry, stackEntry))
422 IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stackEntry->off));
424 if (GinIsPostingTree(itup))
426 BlockNumber rootPostingTree = GinGetPostingTree(itup);
427 GinBtreeStack *stack;
431 * We should unlock entry page before touching posting tree to
432 * prevent deadlocks with vacuum processes. Because entry is never
433 * deleted from page and posting tree is never reduced to the
434 * posting list, we can unlock page after getting BlockNumber of
435 * root of posting tree.
437 LockBuffer(stackEntry->buffer, GIN_UNLOCK);
440 stack = ginScanBeginPostingTree(ginstate->index, rootPostingTree);
441 entry->buffer = stack->buffer;
444 * We keep buffer pinned because we need to prevent deletion of
445 * page during scan. See GIN's vacuum implementation. RefCount is
446 * increased to keep buffer pinned after freeGinBtreeStack() call.
448 IncrBufferRefCount(entry->buffer);
450 page = BufferGetPage(entry->buffer);
451 entry->predictNumberResult = stack->predictNumber * GinPageGetOpaque(page)->maxoff;
454 * Keep page content in memory to prevent durable page locking
456 entry->list = (ItemPointerData *) palloc(BLCKSZ);
457 entry->nlist = GinPageGetOpaque(page)->maxoff;
459 GinDataPageGetItemPointer(page, FirstOffsetNumber),
460 GinPageGetOpaque(page)->maxoff * sizeof(ItemPointerData));
462 LockBuffer(entry->buffer, GIN_UNLOCK);
463 freeGinBtreeStack(stack);
464 entry->isFinished = FALSE;
466 else if (GinGetNPosting(itup) > 0)
468 entry->nlist = GinGetNPosting(itup);
469 entry->list = (ItemPointerData *) palloc(sizeof(ItemPointerData) * entry->nlist);
470 memcpy(entry->list, GinGetPosting(itup), sizeof(ItemPointerData) * entry->nlist);
471 entry->isFinished = FALSE;
476 LockBuffer(stackEntry->buffer, GIN_UNLOCK);
477 freeGinBtreeStack(stackEntry);
481 startScanKey(GinState *ginstate, GinScanKey key)
483 ItemPointerSetMin(&key->curItem);
484 key->curItemMatches = false;
485 key->recheckCurItem = false;
486 key->isFinished = false;
490 startScan(IndexScanDesc scan)
492 GinScanOpaque so = (GinScanOpaque) scan->opaque;
493 GinState *ginstate = &so->ginstate;
496 for (i = 0; i < so->totalentries; i++)
497 startScanEntry(ginstate, so->entries[i]);
499 if (GinFuzzySearchLimit > 0)
502 * If all of keys more than threshold we will try to reduce result, we
503 * hope (and only hope, for intersection operation of array our
504 * supposition isn't true), that total result will not more than
505 * minimal predictNumberResult.
508 for (i = 0; i < so->totalentries; i++)
509 if (so->entries[i]->predictNumberResult <= so->totalentries * GinFuzzySearchLimit)
512 for (i = 0; i < so->totalentries; i++)
513 if (so->entries[i]->predictNumberResult > so->totalentries * GinFuzzySearchLimit)
515 so->entries[i]->predictNumberResult /= so->totalentries;
516 so->entries[i]->reduceResult = TRUE;
520 for (i = 0; i < so->nkeys; i++)
521 startScanKey(ginstate, so->keys + i);
525 * Gets next ItemPointer from PostingTree. Note, that we copy
526 * page into GinScanEntry->list array and unlock page, but keep it pinned
527 * to prevent interference with vacuum
530 entryGetNextItem(GinState *ginstate, GinScanEntry entry)
536 if (entry->offset < entry->nlist)
538 entry->curItem = entry->list[entry->offset++];
542 LockBuffer(entry->buffer, GIN_SHARE);
543 page = BufferGetPage(entry->buffer);
547 * It's needed to go by right link. During that we should refind
548 * first ItemPointer greater that stored
550 if (GinPageRightMost(page))
552 UnlockReleaseBuffer(entry->buffer);
553 ItemPointerSetInvalid(&entry->curItem);
554 entry->buffer = InvalidBuffer;
555 entry->isFinished = TRUE;
559 entry->buffer = ginStepRight(entry->buffer,
562 page = BufferGetPage(entry->buffer);
564 entry->offset = InvalidOffsetNumber;
565 if (!ItemPointerIsValid(&entry->curItem) ||
566 findItemInPostingPage(page, &entry->curItem, &entry->offset))
569 * Found position equal to or greater than stored
571 entry->nlist = GinPageGetOpaque(page)->maxoff;
573 GinDataPageGetItemPointer(page, FirstOffsetNumber),
574 GinPageGetOpaque(page)->maxoff * sizeof(ItemPointerData));
576 LockBuffer(entry->buffer, GIN_UNLOCK);
578 if (!ItemPointerIsValid(&entry->curItem) ||
579 ginCompareItemPointers(&entry->curItem,
580 entry->list + entry->offset - 1) == 0)
583 * First pages are deleted or empty, or we found exact
584 * position, so break inner loop and continue outer one.
590 * Find greater than entry->curItem position, store it.
592 entry->curItem = entry->list[entry->offset - 1];
600 #define gin_rand() (((double) random()) / ((double) MAX_RANDOM_VALUE))
601 #define dropItem(e) ( gin_rand() > ((double)GinFuzzySearchLimit)/((double)((e)->predictNumberResult)) )
604 * Sets entry->curItem to next heap item pointer for one entry of one scan key,
605 * or sets entry->isFinished to TRUE if there are no more.
607 * Item pointers must be returned in ascending order.
609 * Note: this can return a "lossy page" item pointer, indicating that the
610 * entry potentially matches all items on that heap page. However, it is
611 * not allowed to return both a lossy page pointer and exact (regular)
612 * item pointers for the same page. (Doing so would break the key-combination
613 * logic in keyGetItem and scanGetItem; see comment in scanGetItem.) In the
614 * current implementation this is guaranteed by the behavior of tidbitmaps.
617 entryGetItem(GinState *ginstate, GinScanEntry entry)
619 Assert(!entry->isFinished);
621 if (entry->matchBitmap)
625 if (entry->matchResult == NULL ||
626 entry->offset >= entry->matchResult->ntuples)
628 entry->matchResult = tbm_iterate(entry->matchIterator);
630 if (entry->matchResult == NULL)
632 ItemPointerSetInvalid(&entry->curItem);
633 tbm_end_iterate(entry->matchIterator);
634 entry->matchIterator = NULL;
635 entry->isFinished = TRUE;
640 * Reset counter to the beginning of entry->matchResult. Note:
641 * entry->offset is still greater than matchResult->ntuples if
642 * matchResult is lossy. So, on next call we will get next
643 * result from TIDBitmap.
648 if (entry->matchResult->ntuples < 0)
651 * lossy result, so we need to check the whole page
653 ItemPointerSetLossyPage(&entry->curItem,
654 entry->matchResult->blockno);
657 * We might as well fall out of the loop; we could not
658 * estimate number of results on this page to support correct
659 * reducing of result even if it's enabled
664 ItemPointerSet(&entry->curItem,
665 entry->matchResult->blockno,
666 entry->matchResult->offsets[entry->offset]);
668 } while (entry->reduceResult == TRUE && dropItem(entry));
670 else if (!BufferIsValid(entry->buffer))
673 if (entry->offset <= entry->nlist)
674 entry->curItem = entry->list[entry->offset - 1];
677 ItemPointerSetInvalid(&entry->curItem);
678 entry->isFinished = TRUE;
685 entryGetNextItem(ginstate, entry);
686 } while (entry->isFinished == FALSE &&
687 entry->reduceResult == TRUE &&
693 * Identify the "current" item among the input entry streams for this scan key,
694 * and test whether it passes the scan key qual condition.
696 * The current item is the smallest curItem among the inputs. key->curItem
697 * is set to that value. key->curItemMatches is set to indicate whether that
698 * TID passes the consistentFn test. If so, key->recheckCurItem is set true
699 * iff recheck is needed for this item pointer (including the case where the
700 * item pointer is a lossy page pointer).
702 * If all entry streams are exhausted, sets key->isFinished to TRUE.
704 * Item pointers must be returned in ascending order.
706 * Note: this can return a "lossy page" item pointer, indicating that the
707 * key potentially matches all items on that heap page. However, it is
708 * not allowed to return both a lossy page pointer and exact (regular)
709 * item pointers for the same page. (Doing so would break the key-combination
710 * logic in scanGetItem.)
713 keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key)
715 ItemPointerData minItem;
716 ItemPointerData curPageLossy;
722 MemoryContext oldCtx;
724 Assert(!key->isFinished);
727 * Find the minimum of the active entry curItems.
729 * Note: a lossy-page entry is encoded by a ItemPointer with max value for
730 * offset (0xffff), so that it will sort after any exact entries for the
731 * same page. So we'll prefer to return exact pointers not lossy
732 * pointers, which is good.
734 ItemPointerSetMax(&minItem);
736 for (i = 0; i < key->nentries; i++)
738 entry = key->scanEntry[i];
739 if (entry->isFinished == FALSE &&
740 ginCompareItemPointers(&entry->curItem, &minItem) < 0)
741 minItem = entry->curItem;
744 if (ItemPointerIsMax(&minItem))
746 /* all entries are finished */
747 key->isFinished = TRUE;
752 * We might have already tested this item; if so, no need to repeat work.
753 * (Note: the ">" case can happen, if minItem is exact but we previously
754 * had to set curItem to a lossy-page pointer.)
756 if (ginCompareItemPointers(&key->curItem, &minItem) >= 0)
760 * OK, advance key->curItem and perform consistentFn test.
762 key->curItem = minItem;
765 * Lossy-page entries pose a problem, since we don't know the correct
766 * entryRes state to pass to the consistentFn, and we also don't know what
767 * its combining logic will be (could be AND, OR, or even NOT). If the
768 * logic is OR then the consistentFn might succeed for all items in the
769 * lossy page even when none of the other entries match.
771 * If we have a single lossy-page entry then we check to see if the
772 * consistentFn will succeed with only that entry TRUE. If so, we return
773 * a lossy-page pointer to indicate that the whole heap page must be
774 * checked. (On subsequent calls, we'll do nothing until minItem is past
775 * the page altogether, thus ensuring that we never return both regular
776 * and lossy pointers for the same page.)
778 * This idea could be generalized to more than one lossy-page entry, but
779 * ideally lossy-page entries should be infrequent so it would seldom be
780 * the case that we have more than one at once. So it doesn't seem worth
781 * the extra complexity to optimize that case. If we do find more than
782 * one, we just punt and return a lossy-page pointer always.
784 * Note that only lossy-page entries pointing to the current item's page
785 * should trigger this processing; we might have future lossy pages in the
786 * entry array, but they aren't relevant yet.
788 ItemPointerSetLossyPage(&curPageLossy,
789 GinItemPointerGetBlockNumber(&key->curItem));
792 haveLossyEntry = false;
793 for (i = 0; i < key->nentries; i++)
795 entry = key->scanEntry[i];
796 if (entry->isFinished == FALSE &&
797 ginCompareItemPointers(&entry->curItem, &curPageLossy) == 0)
801 /* Multiple lossy entries, punt */
802 key->curItem = curPageLossy;
803 key->curItemMatches = true;
804 key->recheckCurItem = true;
808 haveLossyEntry = true;
812 /* prepare for calling consistentFn in temp context */
813 oldCtx = MemoryContextSwitchTo(tempCtx);
817 /* Single lossy-page entry, so see if whole page matches */
818 memset(key->entryRes, FALSE, key->nentries);
819 key->entryRes[lossyEntry] = TRUE;
821 if (callConsistentFn(ginstate, key))
823 /* Yes, so clean up ... */
824 MemoryContextSwitchTo(oldCtx);
825 MemoryContextReset(tempCtx);
827 /* and return lossy pointer for whole page */
828 key->curItem = curPageLossy;
829 key->curItemMatches = true;
830 key->recheckCurItem = true;
836 * At this point we know that we don't need to return a lossy whole-page
837 * pointer, but we might have matches for individual exact item pointers,
838 * possibly in combination with a lossy pointer. Our strategy if there's
839 * a lossy pointer is to try the consistentFn both ways and return a hit
840 * if it accepts either one (forcing the hit to be marked lossy so it will
841 * be rechecked). An exception is that we don't need to try it both ways
842 * if the lossy pointer is in a "hidden" entry, because the consistentFn's
843 * result can't depend on that.
845 * Prepare entryRes array to be passed to consistentFn.
847 for (i = 0; i < key->nentries; i++)
849 entry = key->scanEntry[i];
850 if (entry->isFinished == FALSE &&
851 ginCompareItemPointers(&entry->curItem, &key->curItem) == 0)
852 key->entryRes[i] = TRUE;
854 key->entryRes[i] = FALSE;
857 key->entryRes[lossyEntry] = TRUE;
859 res = callConsistentFn(ginstate, key);
861 if (!res && haveLossyEntry && lossyEntry < key->nuserentries)
863 /* try the other way for the lossy item */
864 key->entryRes[lossyEntry] = FALSE;
866 res = callConsistentFn(ginstate, key);
869 key->curItemMatches = res;
870 /* If we matched a lossy entry, force recheckCurItem = true */
872 key->recheckCurItem = true;
874 /* clean up after consistentFn calls */
875 MemoryContextSwitchTo(oldCtx);
876 MemoryContextReset(tempCtx);
880 * Get next heap item pointer (after advancePast) from scan.
881 * Returns true if anything found.
882 * On success, *item and *recheck are set.
884 * Note: this is very nearly the same logic as in keyGetItem(), except
885 * that we know the keys are to be combined with AND logic, whereas in
886 * keyGetItem() the combination logic is known only to the consistentFn.
889 scanGetItem(IndexScanDesc scan, ItemPointer advancePast,
890 ItemPointerData *item, bool *recheck)
892 GinScanOpaque so = (GinScanOpaque) scan->opaque;
893 GinState *ginstate = &so->ginstate;
894 ItemPointerData myAdvancePast = *advancePast;
902 * Advance any entries that are <= myAdvancePast. In particular,
903 * since entry->curItem was initialized with ItemPointerSetMin, this
904 * ensures we fetch the first item for each entry on the first call.
908 for (i = 0; i < so->totalentries; i++)
910 GinScanEntry entry = so->entries[i];
912 while (entry->isFinished == FALSE &&
913 ginCompareItemPointers(&entry->curItem,
914 &myAdvancePast) <= 0)
915 entryGetItem(ginstate, entry);
917 if (entry->isFinished == FALSE)
923 /* all entries exhausted, so we're done */
928 * Perform the consistentFn test for each scan key. If any key
929 * reports isFinished, meaning its subset of the entries is exhausted,
930 * we can stop. Otherwise, set *item to the minimum of the key
933 ItemPointerSetMax(item);
935 for (i = 0; i < so->nkeys; i++)
937 GinScanKey key = so->keys + i;
939 keyGetItem(&so->ginstate, so->tempCtx, key);
942 return false; /* finished one of keys */
944 if (ginCompareItemPointers(&key->curItem, item) < 0)
945 *item = key->curItem;
948 Assert(!ItemPointerIsMax(item));
951 * Now *item contains first ItemPointer after previous result.
953 * The item is a valid hit only if all the keys succeeded for either
954 * that exact TID, or a lossy reference to the same page.
956 * This logic works only if a keyGetItem stream can never contain both
957 * exact and lossy pointers for the same page. Else we could have a
966 * We would conclude that 42/6 is not a match and advance stream 1,
967 * thus never detecting the match to the lossy pointer in stream 2.
968 * (keyGetItem has a similar problem versus entryGetItem.)
972 for (i = 0; i < so->nkeys; i++)
974 GinScanKey key = so->keys + i;
976 if (key->curItemMatches)
978 if (ginCompareItemPointers(item, &key->curItem) == 0)
980 if (ItemPointerIsLossyPage(&key->curItem) &&
981 GinItemPointerGetBlockNumber(&key->curItem) ==
982 GinItemPointerGetBlockNumber(item))
993 * No hit. Update myAdvancePast to this TID, so that on the next pass
994 * we'll move to the next possible entry.
996 myAdvancePast = *item;
1000 * We must return recheck = true if any of the keys are marked recheck.
1003 for (i = 0; i < so->nkeys; i++)
1005 GinScanKey key = so->keys + i;
1007 if (key->recheckCurItem)
1019 * Functions for scanning the pending list
1024 * Get ItemPointer of next heap row to be checked from pending list.
1025 * Returns false if there are no more. On pages with several heap rows
1026 * it returns each row separately, on page with part of heap row returns
1027 * per page data. pos->firstOffset and pos->lastOffset are set to identify
1028 * the range of pending-list tuples belonging to this heap row.
1030 * The pendingBuffer is presumed pinned and share-locked on entry, and is
1031 * pinned and share-locked on success exit. On failure exit it's released.
1034 scanGetCandidate(IndexScanDesc scan, pendingPosition *pos)
1036 OffsetNumber maxoff;
1040 ItemPointerSetInvalid(&pos->item);
1043 page = BufferGetPage(pos->pendingBuffer);
1045 maxoff = PageGetMaxOffsetNumber(page);
1046 if (pos->firstOffset > maxoff)
1048 BlockNumber blkno = GinPageGetOpaque(page)->rightlink;
1050 if (blkno == InvalidBlockNumber)
1052 UnlockReleaseBuffer(pos->pendingBuffer);
1053 pos->pendingBuffer = InvalidBuffer;
1060 * Here we must prevent deletion of next page by insertcleanup
1061 * process, which may be trying to obtain exclusive lock on
1062 * current page. So, we lock next page before releasing the
1065 Buffer tmpbuf = ReadBuffer(scan->indexRelation, blkno);
1067 LockBuffer(tmpbuf, GIN_SHARE);
1068 UnlockReleaseBuffer(pos->pendingBuffer);
1070 pos->pendingBuffer = tmpbuf;
1071 pos->firstOffset = FirstOffsetNumber;
1076 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, pos->firstOffset));
1077 pos->item = itup->t_tid;
1078 if (GinPageHasFullRow(page))
1081 * find itempointer to the next row
1083 for (pos->lastOffset = pos->firstOffset + 1; pos->lastOffset <= maxoff; pos->lastOffset++)
1085 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, pos->lastOffset));
1086 if (!ItemPointerEquals(&pos->item, &itup->t_tid))
1093 * All itempointers are the same on this page
1095 pos->lastOffset = maxoff + 1;
1099 * Now pos->firstOffset points to the first tuple of current heap
1100 * row, pos->lastOffset points to the first tuple of next heap row
1101 * (or to the end of page)
1111 * Scan pending-list page from current tuple (off) up till the first of:
1112 * - match is found (then returns true)
1113 * - no later match is possible
1114 * - tuple's attribute number is not equal to entry's attrnum
1115 * - reach end of page
1117 * datum[]/category[]/datumExtracted[] arrays are used to cache the results
1118 * of gintuple_get_key() on the current page.
1121 matchPartialInPendingList(GinState *ginstate, Page page,
1122 OffsetNumber off, OffsetNumber maxoff,
1124 Datum *datum, GinNullCategory *category,
1125 bool *datumExtracted)
1130 /* Partial match to a null is not possible */
1131 if (entry->queryCategory != GIN_CAT_NORM_KEY)
1134 while (off < maxoff)
1136 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
1138 if (gintuple_get_attrnum(ginstate, itup) != entry->attnum)
1141 if (datumExtracted[off - 1] == false)
1143 datum[off - 1] = gintuple_get_key(ginstate, itup,
1144 &category[off - 1]);
1145 datumExtracted[off - 1] = true;
1148 /* Once we hit nulls, no further match is possible */
1149 if (category[off - 1] != GIN_CAT_NORM_KEY)
1153 * Check partial match.
1154 * case cmp == 0 => match
1155 * case cmp > 0 => not match and end scan (no later match possible)
1156 * case cmp < 0 => not match and continue scan
1159 cmp = DatumGetInt32(FunctionCall4Coll(&ginstate->comparePartialFn[entry->attnum - 1],
1160 ginstate->supportCollation[entry->attnum - 1],
1163 UInt16GetDatum(entry->strategy),
1164 PointerGetDatum(entry->extra_data)));
1177 * Set up the entryRes array for each key by looking at
1178 * every entry for current heap row in pending list.
1180 * Returns true if each scan key has at least one entryRes match.
1181 * This corresponds to the situations where the normal index search will
1182 * try to apply the key's consistentFn. (A tuple not meeting that requirement
1183 * cannot be returned by the normal search since no entry stream will
1186 * The pendingBuffer is presumed pinned and share-locked on entry.
1189 collectMatchesForHeapRow(IndexScanDesc scan, pendingPosition *pos)
1191 GinScanOpaque so = (GinScanOpaque) scan->opaque;
1192 OffsetNumber attrnum;
1199 * Reset all entryRes and hasMatchKey flags
1201 for (i = 0; i < so->nkeys; i++)
1203 GinScanKey key = so->keys + i;
1205 memset(key->entryRes, FALSE, key->nentries);
1207 memset(pos->hasMatchKey, FALSE, so->nkeys);
1210 * Outer loop iterates over multiple pending-list pages when a single heap
1211 * row has entries spanning those pages.
1215 Datum datum[BLCKSZ / sizeof(IndexTupleData)];
1216 GinNullCategory category[BLCKSZ / sizeof(IndexTupleData)];
1217 bool datumExtracted[BLCKSZ / sizeof(IndexTupleData)];
1219 Assert(pos->lastOffset > pos->firstOffset);
1220 memset(datumExtracted + pos->firstOffset - 1, 0,
1221 sizeof(bool) * (pos->lastOffset - pos->firstOffset));
1223 page = BufferGetPage(pos->pendingBuffer);
1225 for (i = 0; i < so->nkeys; i++)
1227 GinScanKey key = so->keys + i;
1229 for (j = 0; j < key->nentries; j++)
1231 GinScanEntry entry = key->scanEntry[j];
1232 OffsetNumber StopLow = pos->firstOffset,
1233 StopHigh = pos->lastOffset,
1236 /* If already matched on earlier page, do no extra work */
1237 if (key->entryRes[j])
1241 * Interesting tuples are from pos->firstOffset to
1242 * pos->lastOffset and they are ordered by (attnum, Datum) as
1243 * it's done in entry tree. So we can use binary search to
1244 * avoid linear scanning.
1246 while (StopLow < StopHigh)
1250 StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
1252 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, StopMiddle));
1254 attrnum = gintuple_get_attrnum(&so->ginstate, itup);
1256 if (key->attnum < attrnum)
1258 StopHigh = StopMiddle;
1261 if (key->attnum > attrnum)
1263 StopLow = StopMiddle + 1;
1267 if (datumExtracted[StopMiddle - 1] == false)
1269 datum[StopMiddle - 1] =
1270 gintuple_get_key(&so->ginstate, itup,
1271 &category[StopMiddle - 1]);
1272 datumExtracted[StopMiddle - 1] = true;
1275 if (entry->queryCategory == GIN_CAT_EMPTY_QUERY)
1277 /* special behavior depending on searchMode */
1278 if (entry->searchMode == GIN_SEARCH_MODE_ALL)
1280 /* match anything except NULL_ITEM */
1281 if (category[StopMiddle - 1] == GIN_CAT_NULL_ITEM)
1288 /* match everything */
1294 res = ginCompareEntries(&so->ginstate,
1297 entry->queryCategory,
1298 datum[StopMiddle - 1],
1299 category[StopMiddle - 1]);
1305 * Found exact match (there can be only one, except in
1306 * EMPTY_QUERY mode).
1308 * If doing partial match, scan forward from here to
1309 * end of page to check for matches.
1311 * See comment above about tuple's ordering.
1313 if (entry->isPartialMatch)
1315 matchPartialInPendingList(&so->ginstate,
1324 key->entryRes[j] = true;
1326 /* done with binary search */
1330 StopHigh = StopMiddle;
1332 StopLow = StopMiddle + 1;
1335 if (StopLow >= StopHigh && entry->isPartialMatch)
1338 * No exact match on this page. If doing partial match,
1339 * scan from the first tuple greater than target value to
1340 * end of page. Note that since we don't remember whether
1341 * the comparePartialFn told us to stop early on a
1342 * previous page, we will uselessly apply comparePartialFn
1343 * to the first tuple on each subsequent page.
1346 matchPartialInPendingList(&so->ginstate,
1356 pos->hasMatchKey[i] |= key->entryRes[j];
1360 /* Advance firstOffset over the scanned tuples */
1361 pos->firstOffset = pos->lastOffset;
1363 if (GinPageHasFullRow(page))
1366 * We have examined all pending entries for the current heap row.
1367 * Break out of loop over pages.
1374 * Advance to next page of pending entries for the current heap
1375 * row. Complain if there isn't one.
1377 ItemPointerData item = pos->item;
1379 if (scanGetCandidate(scan, pos) == false ||
1380 !ItemPointerEquals(&pos->item, &item))
1381 elog(ERROR, "could not find additional pending pages for same heap tuple");
1386 * Now return "true" if all scan keys have at least one matching datum
1388 for (i = 0; i < so->nkeys; i++)
1390 if (pos->hasMatchKey[i] == false)
1398 * Collect all matched rows from pending list into bitmap
1401 scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids)
1403 GinScanOpaque so = (GinScanOpaque) scan->opaque;
1404 MemoryContext oldCtx;
1408 pendingPosition pos;
1409 Buffer metabuffer = ReadBuffer(scan->indexRelation, GIN_METAPAGE_BLKNO);
1414 LockBuffer(metabuffer, GIN_SHARE);
1415 blkno = GinPageGetMeta(BufferGetPage(metabuffer))->head;
1418 * fetch head of list before unlocking metapage. head page must be pinned
1419 * to prevent deletion by vacuum process
1421 if (blkno == InvalidBlockNumber)
1423 /* No pending list, so proceed with normal scan */
1424 UnlockReleaseBuffer(metabuffer);
1428 pos.pendingBuffer = ReadBuffer(scan->indexRelation, blkno);
1429 LockBuffer(pos.pendingBuffer, GIN_SHARE);
1430 pos.firstOffset = FirstOffsetNumber;
1431 UnlockReleaseBuffer(metabuffer);
1432 pos.hasMatchKey = palloc(sizeof(bool) * so->nkeys);
1435 * loop for each heap row. scanGetCandidate returns full row or row's
1436 * tuples from first page.
1438 while (scanGetCandidate(scan, &pos))
1441 * Check entries in tuple and set up entryRes array.
1443 * If pending tuples belonging to the current heap row are spread
1444 * across several pages, collectMatchesForHeapRow will read all of
1447 if (!collectMatchesForHeapRow(scan, &pos))
1451 * Matching of entries of one row is finished, so check row using
1452 * consistent functions.
1454 oldCtx = MemoryContextSwitchTo(so->tempCtx);
1458 for (i = 0; i < so->nkeys; i++)
1460 GinScanKey key = so->keys + i;
1462 if (!callConsistentFn(&so->ginstate, key))
1467 recheck |= key->recheckCurItem;
1470 MemoryContextSwitchTo(oldCtx);
1471 MemoryContextReset(so->tempCtx);
1475 tbm_add_tuples(tbm, &pos.item, 1, recheck);
1480 pfree(pos.hasMatchKey);
1484 #define GinIsNewKey(s) ( ((GinScanOpaque) scan->opaque)->keys == NULL )
1485 #define GinIsVoidRes(s) ( ((GinScanOpaque) scan->opaque)->isVoidRes )
1488 gingetbitmap(PG_FUNCTION_ARGS)
1490 IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
1491 TIDBitmap *tbm = (TIDBitmap *) PG_GETARG_POINTER(1);
1493 ItemPointerData iptr;
1497 * Set up the scan keys, and check for unsatisfiable query.
1499 if (GinIsNewKey(scan))
1500 ginNewScanKey(scan);
1502 if (GinIsVoidRes(scan))
1508 * First, scan the pending list and collect any matching entries into the
1509 * bitmap. After we scan a pending item, some other backend could post it
1510 * into the main index, and so we might visit it a second time during the
1511 * main scan. This is okay because we'll just re-set the same bit in the
1512 * bitmap. (The possibility of duplicate visits is a major reason why GIN
1513 * can't support the amgettuple API, however.) Note that it would not do
1514 * to scan the main index before the pending list, since concurrent
1515 * cleanup could then make us miss entries entirely.
1517 scanPendingInsert(scan, tbm, &ntids);
1520 * Now scan the main index.
1524 ItemPointerSetMin(&iptr);
1528 CHECK_FOR_INTERRUPTS();
1530 if (!scanGetItem(scan, &iptr, &iptr, &recheck))
1533 if (ItemPointerIsLossyPage(&iptr))
1534 tbm_add_page(tbm, ItemPointerGetBlockNumber(&iptr));
1536 tbm_add_tuples(tbm, &iptr, 1, recheck);
1540 PG_RETURN_INT64(ntids);