1 /*-------------------------------------------------------------------------
4 * fetch tuples from a GiST scan.
7 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
11 * src/backend/access/gist/gistget.c
13 *-------------------------------------------------------------------------
17 #include "access/genam.h"
18 #include "access/gist_private.h"
19 #include "access/relscan.h"
20 #include "miscadmin.h"
21 #include "storage/lmgr.h"
22 #include "storage/predicate.h"
24 #include "lib/pairingheap.h"
25 #include "utils/float.h"
26 #include "utils/memutils.h"
27 #include "utils/rel.h"
30 * gistkillitems() -- set LP_DEAD state for items an indexscan caller has
31 * told us were killed.
33 * We re-read page here, so it's important to check page LSN. If the page
34 * has been modified since the last read (as determined by LSN), we cannot
35 * flag any entries because it is possible that the old entry was vacuumed
36 * away and the TID was re-used by a completely different heap tuple.
39 gistkillitems(IndexScanDesc scan)
41 GISTScanOpaque so = (GISTScanOpaque) scan->opaque;
47 bool killedsomething = false;
49 Assert(so->curBlkno != InvalidBlockNumber);
50 Assert(!XLogRecPtrIsInvalid(so->curPageLSN));
51 Assert(so->killedItems != NULL);
53 buffer = ReadBuffer(scan->indexRelation, so->curBlkno);
54 if (!BufferIsValid(buffer))
57 LockBuffer(buffer, GIST_SHARE);
58 gistcheckpage(scan->indexRelation, buffer);
59 page = BufferGetPage(buffer);
62 * If page LSN differs it means that the page was modified since the last
63 * read. killedItems could be not valid so LP_DEAD hints applying is not
66 if (BufferGetLSNAtomic(buffer) != so->curPageLSN)
68 UnlockReleaseBuffer(buffer);
69 so->numKilled = 0; /* reset counter */
73 Assert(GistPageIsLeaf(page));
76 * Mark all killedItems as dead. We need no additional recheck, because,
77 * if page was modified, curPageLSN must have changed.
79 for (i = 0; i < so->numKilled; i++)
81 offnum = so->killedItems[i];
82 iid = PageGetItemId(page, offnum);
84 killedsomething = true;
89 GistMarkPageHasGarbage(page);
90 MarkBufferDirtyHint(buffer, true);
93 UnlockReleaseBuffer(buffer);
96 * Always reset the scan state, so we don't look for same items on other
103 * gistindex_keytest() -- does this index tuple satisfy the scan key(s)?
105 * The index tuple might represent either a heap tuple or a lower index page,
106 * depending on whether the containing page is a leaf page or not.
108 * On success return for a heap tuple, *recheck_p is set to indicate whether
109 * the quals need to be rechecked. We recheck if any of the consistent()
110 * functions request it. recheck is not interesting when examining a non-leaf
111 * entry, since we must visit the lower index page if there's any doubt.
112 * Similarly, *recheck_distances_p is set to indicate whether the distances
113 * need to be rechecked, and it is also ignored for non-leaf entries.
115 * If we are doing an ordered scan, so->distances[] is filled with distance
116 * data from the distance() functions before returning success.
118 * We must decompress the key in the IndexTuple before passing it to the
119 * sk_funcs (which actually are the opclass Consistent or Distance methods).
121 * Note that this function is always invoked in a short-lived memory context,
122 * so we don't need to worry about cleaning up allocated memory, either here
123 * or in the implementation of any Consistent or Distance methods.
126 gistindex_keytest(IndexScanDesc scan,
131 bool *recheck_distances_p)
133 GISTScanOpaque so = (GISTScanOpaque) scan->opaque;
134 GISTSTATE *giststate = so->giststate;
135 ScanKey key = scan->keyData;
136 int keySize = scan->numberOfKeys;
137 IndexOrderByDistance *distance_p;
138 Relation r = scan->indexRelation;
141 *recheck_distances_p = false;
144 * If it's a leftover invalid tuple from pre-9.1, treat it as a match with
145 * minimum possible distances. This means we'll always follow it to the
148 if (GistTupleIsInvalid(tuple))
152 if (GistPageIsLeaf(page)) /* shouldn't happen */
153 elog(ERROR, "invalid GiST tuple found on leaf page");
154 for (i = 0; i < scan->numberOfOrderBys; i++)
156 so->distances[i].value = -get_float8_infinity();
157 so->distances[i].isnull = false;
162 /* Check whether it matches according to the Consistent functions */
168 datum = index_getattr(tuple,
170 giststate->leafTupdesc,
173 if (key->sk_flags & SK_ISNULL)
176 * On non-leaf page we can't conclude that child hasn't NULL
177 * values because of assumption in GiST: union (VAL, NULL) is VAL.
178 * But if on non-leaf page key IS NULL, then all children are
181 if (key->sk_flags & SK_SEARCHNULL)
183 if (GistPageIsLeaf(page) && !isNull)
188 Assert(key->sk_flags & SK_SEARCHNOTNULL);
203 gistdentryinit(giststate, key->sk_attno - 1, &de,
204 datum, r, page, offset,
208 * Call the Consistent function to evaluate the test. The
209 * arguments are the index datum (as a GISTENTRY*), the comparison
210 * datum, the comparison operator's strategy number and subtype
211 * from pg_amop, and the recheck flag.
213 * (Presently there's no need to pass the subtype since it'll
214 * always be zero, but might as well pass it for possible future
217 * We initialize the recheck flag to true (the safest assumption)
218 * in case the Consistent function forgets to set it.
222 test = FunctionCall5Coll(&key->sk_func,
224 PointerGetDatum(&de),
226 Int16GetDatum(key->sk_strategy),
227 ObjectIdGetDatum(key->sk_subtype),
228 PointerGetDatum(&recheck));
230 if (!DatumGetBool(test))
232 *recheck_p |= recheck;
239 /* OK, it passes --- now let's compute the distances */
240 key = scan->orderByData;
241 distance_p = so->distances;
242 keySize = scan->numberOfOrderBys;
248 datum = index_getattr(tuple,
250 giststate->leafTupdesc,
253 if ((key->sk_flags & SK_ISNULL) || isNull)
255 /* Assume distance computes as null */
256 distance_p->value = 0.0;
257 distance_p->isnull = true;
265 gistdentryinit(giststate, key->sk_attno - 1, &de,
266 datum, r, page, offset,
270 * Call the Distance function to evaluate the distance. The
271 * arguments are the index datum (as a GISTENTRY*), the comparison
272 * datum, the ordering operator's strategy number and subtype from
273 * pg_amop, and the recheck flag.
275 * (Presently there's no need to pass the subtype since it'll
276 * always be zero, but might as well pass it for possible future
279 * If the function sets the recheck flag, the returned distance is
280 * a lower bound on the true distance and needs to be rechecked.
281 * We initialize the flag to 'false'. This flag was added in
282 * version 9.5; distance functions written before that won't know
283 * about the flag, but are expected to never be lossy.
286 dist = FunctionCall5Coll(&key->sk_func,
288 PointerGetDatum(&de),
290 Int16GetDatum(key->sk_strategy),
291 ObjectIdGetDatum(key->sk_subtype),
292 PointerGetDatum(&recheck));
293 *recheck_distances_p |= recheck;
294 distance_p->value = DatumGetFloat8(dist);
295 distance_p->isnull = false;
307 * Scan all items on the GiST index page identified by *pageItem, and insert
308 * them into the queue (or directly to output areas)
310 * scan: index scan we are executing
311 * pageItem: search queue item identifying an index page to scan
312 * myDistances: distances array associated with pageItem, or NULL at the root
313 * tbm: if not NULL, gistgetbitmap's output bitmap
314 * ntids: if not NULL, gistgetbitmap's output tuple counter
316 * If tbm/ntids aren't NULL, we are doing an amgetbitmap scan, and heap
317 * tuples should be reported directly into the bitmap. If they are NULL,
318 * we're doing a plain or ordered indexscan. For a plain indexscan, heap
319 * tuple TIDs are returned into so->pageData[]. For an ordered indexscan,
320 * heap tuple TIDs are pushed into individual search queue items. In an
321 * index-only scan, reconstructed index tuples are returned along with the
324 * If we detect that the index page has split since we saw its downlink
325 * in the parent, we push its new right sibling onto the queue so the
326 * sibling will be processed next.
329 gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem,
330 IndexOrderByDistance *myDistances, TIDBitmap *tbm, int64 *ntids)
332 GISTScanOpaque so = (GISTScanOpaque) scan->opaque;
333 GISTSTATE *giststate = so->giststate;
334 Relation r = scan->indexRelation;
337 GISTPageOpaque opaque;
340 MemoryContext oldcxt;
342 Assert(!GISTSearchItemIsHeap(*pageItem));
344 buffer = ReadBuffer(scan->indexRelation, pageItem->blkno);
345 LockBuffer(buffer, GIST_SHARE);
346 PredicateLockPage(r, BufferGetBlockNumber(buffer), scan->xs_snapshot);
347 gistcheckpage(scan->indexRelation, buffer);
348 page = BufferGetPage(buffer);
349 TestForOldSnapshot(scan->xs_snapshot, r, page);
350 opaque = GistPageGetOpaque(page);
353 * Check if we need to follow the rightlink. We need to follow it if the
354 * page was concurrently split since we visited the parent (in which case
355 * parentlsn < nsn), or if the system crashed after a page split but
356 * before the downlink was inserted into the parent.
358 if (!XLogRecPtrIsInvalid(pageItem->data.parentlsn) &&
359 (GistFollowRight(page) ||
360 pageItem->data.parentlsn < GistPageGetNSN(page)) &&
361 opaque->rightlink != InvalidBlockNumber /* sanity check */ )
363 /* There was a page split, follow right link to add pages */
364 GISTSearchItem *item;
366 /* This can't happen when starting at the root */
367 Assert(myDistances != NULL);
369 oldcxt = MemoryContextSwitchTo(so->queueCxt);
371 /* Create new GISTSearchItem for the right sibling index page */
372 item = palloc(SizeOfGISTSearchItem(scan->numberOfOrderBys));
373 item->blkno = opaque->rightlink;
374 item->data.parentlsn = pageItem->data.parentlsn;
376 /* Insert it into the queue using same distances as for this page */
377 memcpy(item->distances, myDistances,
378 sizeof(item->distances[0]) * scan->numberOfOrderBys);
380 pairingheap_add(so->queue, &item->phNode);
382 MemoryContextSwitchTo(oldcxt);
386 * Check if the page was deleted after we saw the downlink. There's
387 * nothing of interest on a deleted page. Note that we must do this after
388 * checking the NSN for concurrent splits! It's possible that the page
389 * originally contained some tuples that are visible to us, but was split
390 * so that all the visible tuples were moved to another page, and then
391 * this page was deleted.
393 if (GistPageIsDeleted(page))
395 UnlockReleaseBuffer(buffer);
399 so->nPageData = so->curPageData = 0;
400 scan->xs_hitup = NULL; /* might point into pageDataCxt */
402 MemoryContextReset(so->pageDataCxt);
405 * We save the LSN of the page as we read it, so that we know whether it
406 * safe to apply LP_DEAD hints to the page later. This allows us to drop
407 * the pin for MVCC scans, which allows vacuum to avoid blocking.
409 so->curPageLSN = BufferGetLSNAtomic(buffer);
412 * check all tuples on page
414 maxoff = PageGetMaxOffsetNumber(page);
415 for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
417 ItemId iid = PageGetItemId(page, i);
421 bool recheck_distances;
424 * If the scan specifies not to return killed tuples, then we treat a
425 * killed tuple as not passing the qual.
427 if (scan->ignore_killed_tuples && ItemIdIsDead(iid))
430 it = (IndexTuple) PageGetItem(page, iid);
433 * Must call gistindex_keytest in tempCxt, and clean up any leftover
436 oldcxt = MemoryContextSwitchTo(so->giststate->tempCxt);
438 match = gistindex_keytest(scan, it, page, i,
439 &recheck, &recheck_distances);
441 MemoryContextSwitchTo(oldcxt);
442 MemoryContextReset(so->giststate->tempCxt);
444 /* Ignore tuple if it doesn't match */
448 if (tbm && GistPageIsLeaf(page))
451 * getbitmap scan, so just push heap tuple TIDs into the bitmap
452 * without worrying about ordering
454 tbm_add_tuples(tbm, &it->t_tid, 1, recheck);
457 else if (scan->numberOfOrderBys == 0 && GistPageIsLeaf(page))
460 * Non-ordered scan, so report tuples in so->pageData[]
462 so->pageData[so->nPageData].heapPtr = it->t_tid;
463 so->pageData[so->nPageData].recheck = recheck;
464 so->pageData[so->nPageData].offnum = i;
467 * In an index-only scan, also fetch the data from the tuple. The
468 * reconstructed tuples are stored in pageDataCxt.
470 if (scan->xs_want_itup)
472 oldcxt = MemoryContextSwitchTo(so->pageDataCxt);
473 so->pageData[so->nPageData].recontup =
474 gistFetchTuple(giststate, r, it);
475 MemoryContextSwitchTo(oldcxt);
482 * Must push item into search queue. We get here for any lower
483 * index page, and also for heap tuples if doing an ordered
486 GISTSearchItem *item;
487 int nOrderBys = scan->numberOfOrderBys;
489 oldcxt = MemoryContextSwitchTo(so->queueCxt);
491 /* Create new GISTSearchItem for this item */
492 item = palloc(SizeOfGISTSearchItem(scan->numberOfOrderBys));
494 if (GistPageIsLeaf(page))
496 /* Creating heap-tuple GISTSearchItem */
497 item->blkno = InvalidBlockNumber;
498 item->data.heap.heapPtr = it->t_tid;
499 item->data.heap.recheck = recheck;
500 item->data.heap.recheckDistances = recheck_distances;
503 * In an index-only scan, also fetch the data from the tuple.
505 if (scan->xs_want_itup)
506 item->data.heap.recontup = gistFetchTuple(giststate, r, it);
510 /* Creating index-page GISTSearchItem */
511 item->blkno = ItemPointerGetBlockNumber(&it->t_tid);
514 * LSN of current page is lsn of parent page for child. We
515 * only have a shared lock, so we need to get the LSN
518 item->data.parentlsn = BufferGetLSNAtomic(buffer);
521 /* Insert it into the queue using new distance data */
522 memcpy(item->distances, so->distances,
523 sizeof(item->distances[0]) * nOrderBys);
525 pairingheap_add(so->queue, &item->phNode);
527 MemoryContextSwitchTo(oldcxt);
531 UnlockReleaseBuffer(buffer);
535 * Extract next item (in order) from search queue
537 * Returns a GISTSearchItem or NULL. Caller must pfree item when done with it.
539 static GISTSearchItem *
540 getNextGISTSearchItem(GISTScanOpaque so)
542 GISTSearchItem *item;
544 if (!pairingheap_is_empty(so->queue))
546 item = (GISTSearchItem *) pairingheap_remove_first(so->queue);
550 /* Done when both heaps are empty */
554 /* Return item; caller is responsible to pfree it */
559 * Fetch next heap tuple in an ordered search
562 getNextNearest(IndexScanDesc scan)
564 GISTScanOpaque so = (GISTScanOpaque) scan->opaque;
569 /* free previously returned tuple */
570 pfree(scan->xs_hitup);
571 scan->xs_hitup = NULL;
576 GISTSearchItem *item = getNextGISTSearchItem(so);
581 if (GISTSearchItemIsHeap(*item))
583 /* found a heap item at currently minimal distance */
584 scan->xs_heaptid = item->data.heap.heapPtr;
585 scan->xs_recheck = item->data.heap.recheck;
587 index_store_float8_orderby_distances(scan, so->orderByTypes,
589 item->data.heap.recheckDistances);
591 /* in an index-only scan, also return the reconstructed tuple. */
592 if (scan->xs_want_itup)
593 scan->xs_hitup = item->data.heap.recontup;
598 /* visit an index page, extract its items into queue */
599 CHECK_FOR_INTERRUPTS();
601 gistScanPage(scan, item, item->distances, NULL, NULL);
611 * gistgettuple() -- Get the next tuple in the scan
614 gistgettuple(IndexScanDesc scan, ScanDirection dir)
616 GISTScanOpaque so = (GISTScanOpaque) scan->opaque;
618 if (dir != ForwardScanDirection)
619 elog(ERROR, "GiST only supports forward scan direction");
626 /* Begin the scan by processing the root page */
627 GISTSearchItem fakeItem;
629 pgstat_count_index_scan(scan->indexRelation);
631 so->firstCall = false;
632 so->curPageData = so->nPageData = 0;
633 scan->xs_hitup = NULL;
635 MemoryContextReset(so->pageDataCxt);
637 fakeItem.blkno = GIST_ROOT_BLKNO;
638 memset(&fakeItem.data.parentlsn, 0, sizeof(GistNSN));
639 gistScanPage(scan, &fakeItem, NULL, NULL, NULL);
642 if (scan->numberOfOrderBys > 0)
644 /* Must fetch tuples in strict distance order */
645 return getNextNearest(scan);
649 /* Fetch tuples index-page-at-a-time */
652 if (so->curPageData < so->nPageData)
654 if (scan->kill_prior_tuple && so->curPageData > 0)
657 if (so->killedItems == NULL)
659 MemoryContext oldCxt =
660 MemoryContextSwitchTo(so->giststate->scanCxt);
663 (OffsetNumber *) palloc(MaxIndexTuplesPerPage
664 * sizeof(OffsetNumber));
666 MemoryContextSwitchTo(oldCxt);
668 if (so->numKilled < MaxIndexTuplesPerPage)
669 so->killedItems[so->numKilled++] =
670 so->pageData[so->curPageData - 1].offnum;
672 /* continuing to return tuples from a leaf page */
673 scan->xs_heaptid = so->pageData[so->curPageData].heapPtr;
674 scan->xs_recheck = so->pageData[so->curPageData].recheck;
676 /* in an index-only scan, also return the reconstructed tuple */
677 if (scan->xs_want_itup)
678 scan->xs_hitup = so->pageData[so->curPageData].recontup;
686 * Check the last returned tuple and add it to killedItems if
689 if (scan->kill_prior_tuple
690 && so->curPageData > 0
691 && so->curPageData == so->nPageData)
694 if (so->killedItems == NULL)
696 MemoryContext oldCxt =
697 MemoryContextSwitchTo(so->giststate->scanCxt);
700 (OffsetNumber *) palloc(MaxIndexTuplesPerPage
701 * sizeof(OffsetNumber));
703 MemoryContextSwitchTo(oldCxt);
705 if (so->numKilled < MaxIndexTuplesPerPage)
706 so->killedItems[so->numKilled++] =
707 so->pageData[so->curPageData - 1].offnum;
709 /* find and process the next index page */
712 GISTSearchItem *item;
714 if ((so->curBlkno != InvalidBlockNumber) && (so->numKilled > 0))
717 item = getNextGISTSearchItem(so);
722 CHECK_FOR_INTERRUPTS();
724 /* save current item BlockNumber for next gistkillitems() call */
725 so->curBlkno = item->blkno;
728 * While scanning a leaf page, ItemPointers of matching heap
729 * tuples are stored in so->pageData. If there are any on
730 * this page, we fall out of the inner "do" and loop around to
733 gistScanPage(scan, item, item->distances, NULL, NULL);
736 } while (so->nPageData == 0);
742 * gistgetbitmap() -- Get a bitmap of all heap tuple locations
745 gistgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
747 GISTScanOpaque so = (GISTScanOpaque) scan->opaque;
749 GISTSearchItem fakeItem;
754 pgstat_count_index_scan(scan->indexRelation);
756 /* Begin the scan by processing the root page */
757 so->curPageData = so->nPageData = 0;
758 scan->xs_hitup = NULL;
760 MemoryContextReset(so->pageDataCxt);
762 fakeItem.blkno = GIST_ROOT_BLKNO;
763 memset(&fakeItem.data.parentlsn, 0, sizeof(GistNSN));
764 gistScanPage(scan, &fakeItem, NULL, tbm, &ntids);
767 * While scanning a leaf page, ItemPointers of matching heap tuples will
768 * be stored directly into tbm, so we don't need to deal with them here.
772 GISTSearchItem *item = getNextGISTSearchItem(so);
777 CHECK_FOR_INTERRUPTS();
779 gistScanPage(scan, item, item->distances, tbm, &ntids);
788 * Can we do index-only scans on the given index column?
790 * Opclasses that implement a fetch function support index-only scans.
791 * Opclasses without compression functions also support index-only scans.
792 * Included attributes always can be fetched for index-only scans.
795 gistcanreturn(Relation index, int attno)
797 if (attno > IndexRelationGetNumberOfKeyAttributes(index) ||
798 OidIsValid(index_getprocid(index, attno, GIST_FETCH_PROC)) ||
799 !OidIsValid(index_getprocid(index, attno, GIST_COMPRESS_PROC)))