1 /*-------------------------------------------------------------------------
4 * fetch tuples from a GiST scan.
7 * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
11 * src/backend/access/gist/gistget.c
13 *-------------------------------------------------------------------------
17 #include "access/gist_private.h"
18 #include "access/relscan.h"
19 #include "catalog/pg_type.h"
20 #include "miscadmin.h"
22 #include "lib/pairingheap.h"
23 #include "utils/builtins.h"
24 #include "utils/memutils.h"
25 #include "utils/rel.h"
28 * gistkillitems() -- set LP_DEAD state for items an indexscan caller has
29 * told us were killed.
31 * We re-read page here, so it's important to check page LSN. If the page
32 * has been modified since the last read (as determined by LSN), we cannot
33 * flag any entries because it is possible that the old entry was vacuumed
34 * away and the TID was re-used by a completely different heap tuple.
37 gistkillitems(IndexScanDesc scan)
39 GISTScanOpaque so = (GISTScanOpaque) scan->opaque;
45 bool killedsomething = false;
47 Assert(so->curBlkno != InvalidBlockNumber);
48 Assert(!XLogRecPtrIsInvalid(so->curPageLSN));
49 Assert(so->killedItems != NULL);
51 buffer = ReadBuffer(scan->indexRelation, so->curBlkno);
52 if (!BufferIsValid(buffer))
55 LockBuffer(buffer, GIST_SHARE);
56 gistcheckpage(scan->indexRelation, buffer);
57 page = BufferGetPage(buffer);
60 * If page LSN differs it means that the page was modified since the last read.
61 * killedItems could be not valid so LP_DEAD hints applying is not safe.
63 if(PageGetLSN(page) != so->curPageLSN)
65 UnlockReleaseBuffer(buffer);
66 so->numKilled = 0; /* reset counter */
70 Assert(GistPageIsLeaf(page));
73 * Mark all killedItems as dead. We need no additional recheck,
74 * because, if page was modified, pageLSN must have changed.
76 for (i = 0; i < so->numKilled; i++)
78 offnum = so->killedItems[i];
79 iid = PageGetItemId(page, offnum);
81 killedsomething = true;
86 GistMarkPageHasGarbage(page);
87 MarkBufferDirtyHint(buffer, true);
90 UnlockReleaseBuffer(buffer);
93 * Always reset the scan state, so we don't look for same items on other
100 * gistindex_keytest() -- does this index tuple satisfy the scan key(s)?
102 * The index tuple might represent either a heap tuple or a lower index page,
103 * depending on whether the containing page is a leaf page or not.
105 * On success return for a heap tuple, *recheck_p is set to indicate whether
106 * the quals need to be rechecked. We recheck if any of the consistent()
107 * functions request it. recheck is not interesting when examining a non-leaf
108 * entry, since we must visit the lower index page if there's any doubt.
109 * Similarly, *recheck_distances_p is set to indicate whether the distances
110 * need to be rechecked, and it is also ignored for non-leaf entries.
112 * If we are doing an ordered scan, so->distances[] is filled with distance
113 * data from the distance() functions before returning success.
115 * We must decompress the key in the IndexTuple before passing it to the
116 * sk_funcs (which actually are the opclass Consistent or Distance methods).
118 * Note that this function is always invoked in a short-lived memory context,
119 * so we don't need to worry about cleaning up allocated memory, either here
120 * or in the implementation of any Consistent or Distance methods.
123 gistindex_keytest(IndexScanDesc scan,
128 bool *recheck_distances_p)
130 GISTScanOpaque so = (GISTScanOpaque) scan->opaque;
131 GISTSTATE *giststate = so->giststate;
132 ScanKey key = scan->keyData;
133 int keySize = scan->numberOfKeys;
135 Relation r = scan->indexRelation;
138 *recheck_distances_p = false;
141 * If it's a leftover invalid tuple from pre-9.1, treat it as a match with
142 * minimum possible distances. This means we'll always follow it to the
145 if (GistTupleIsInvalid(tuple))
149 if (GistPageIsLeaf(page)) /* shouldn't happen */
150 elog(ERROR, "invalid GiST tuple found on leaf page");
151 for (i = 0; i < scan->numberOfOrderBys; i++)
152 so->distances[i] = -get_float8_infinity();
156 /* Check whether it matches according to the Consistent functions */
162 datum = index_getattr(tuple,
167 if (key->sk_flags & SK_ISNULL)
170 * On non-leaf page we can't conclude that child hasn't NULL
171 * values because of assumption in GiST: union (VAL, NULL) is VAL.
172 * But if on non-leaf page key IS NULL, then all children are
175 if (key->sk_flags & SK_SEARCHNULL)
177 if (GistPageIsLeaf(page) && !isNull)
182 Assert(key->sk_flags & SK_SEARCHNOTNULL);
197 gistdentryinit(giststate, key->sk_attno - 1, &de,
198 datum, r, page, offset,
202 * Call the Consistent function to evaluate the test. The
203 * arguments are the index datum (as a GISTENTRY*), the comparison
204 * datum, the comparison operator's strategy number and subtype
205 * from pg_amop, and the recheck flag.
207 * (Presently there's no need to pass the subtype since it'll
208 * always be zero, but might as well pass it for possible future
211 * We initialize the recheck flag to true (the safest assumption)
212 * in case the Consistent function forgets to set it.
216 test = FunctionCall5Coll(&key->sk_func,
218 PointerGetDatum(&de),
220 Int32GetDatum(key->sk_strategy),
221 ObjectIdGetDatum(key->sk_subtype),
222 PointerGetDatum(&recheck));
224 if (!DatumGetBool(test))
226 *recheck_p |= recheck;
233 /* OK, it passes --- now let's compute the distances */
234 key = scan->orderByData;
235 distance_p = so->distances;
236 keySize = scan->numberOfOrderBys;
242 datum = index_getattr(tuple,
247 if ((key->sk_flags & SK_ISNULL) || isNull)
249 /* Assume distance computes as null and sorts to the end */
250 *distance_p = get_float8_infinity();
258 gistdentryinit(giststate, key->sk_attno - 1, &de,
259 datum, r, page, offset,
263 * Call the Distance function to evaluate the distance. The
264 * arguments are the index datum (as a GISTENTRY*), the comparison
265 * datum, the ordering operator's strategy number and subtype from
266 * pg_amop, and the recheck flag.
268 * (Presently there's no need to pass the subtype since it'll
269 * always be zero, but might as well pass it for possible future
272 * If the function sets the recheck flag, the returned distance is
273 * a lower bound on the true distance and needs to be rechecked.
274 * We initialize the flag to 'false'. This flag was added in
275 * version 9.5; distance functions written before that won't know
276 * about the flag, but are expected to never be lossy.
279 dist = FunctionCall5Coll(&key->sk_func,
281 PointerGetDatum(&de),
283 Int32GetDatum(key->sk_strategy),
284 ObjectIdGetDatum(key->sk_subtype),
285 PointerGetDatum(&recheck));
286 *recheck_distances_p |= recheck;
287 *distance_p = DatumGetFloat8(dist);
299 * Scan all items on the GiST index page identified by *pageItem, and insert
300 * them into the queue (or directly to output areas)
302 * scan: index scan we are executing
303 * pageItem: search queue item identifying an index page to scan
304 * myDistances: distances array associated with pageItem, or NULL at the root
305 * tbm: if not NULL, gistgetbitmap's output bitmap
306 * ntids: if not NULL, gistgetbitmap's output tuple counter
308 * If tbm/ntids aren't NULL, we are doing an amgetbitmap scan, and heap
309 * tuples should be reported directly into the bitmap. If they are NULL,
310 * we're doing a plain or ordered indexscan. For a plain indexscan, heap
311 * tuple TIDs are returned into so->pageData[]. For an ordered indexscan,
312 * heap tuple TIDs are pushed into individual search queue items. In an
313 * index-only scan, reconstructed index tuples are returned along with the
316 * If we detect that the index page has split since we saw its downlink
317 * in the parent, we push its new right sibling onto the queue so the
318 * sibling will be processed next.
321 gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, double *myDistances,
322 TIDBitmap *tbm, int64 *ntids)
324 GISTScanOpaque so = (GISTScanOpaque) scan->opaque;
325 GISTSTATE *giststate = so->giststate;
326 Relation r = scan->indexRelation;
329 GISTPageOpaque opaque;
332 MemoryContext oldcxt;
334 Assert(!GISTSearchItemIsHeap(*pageItem));
336 buffer = ReadBuffer(scan->indexRelation, pageItem->blkno);
337 LockBuffer(buffer, GIST_SHARE);
338 gistcheckpage(scan->indexRelation, buffer);
339 page = BufferGetPage(buffer);
340 opaque = GistPageGetOpaque(page);
343 * Check if we need to follow the rightlink. We need to follow it if the
344 * page was concurrently split since we visited the parent (in which case
345 * parentlsn < nsn), or if the system crashed after a page split but
346 * before the downlink was inserted into the parent.
348 if (!XLogRecPtrIsInvalid(pageItem->data.parentlsn) &&
349 (GistFollowRight(page) ||
350 pageItem->data.parentlsn < GistPageGetNSN(page)) &&
351 opaque->rightlink != InvalidBlockNumber /* sanity check */ )
353 /* There was a page split, follow right link to add pages */
354 GISTSearchItem *item;
356 /* This can't happen when starting at the root */
357 Assert(myDistances != NULL);
359 oldcxt = MemoryContextSwitchTo(so->queueCxt);
361 /* Create new GISTSearchItem for the right sibling index page */
362 item = palloc(SizeOfGISTSearchItem(scan->numberOfOrderBys));
363 item->blkno = opaque->rightlink;
364 item->data.parentlsn = pageItem->data.parentlsn;
366 /* Insert it into the queue using same distances as for this page */
367 memcpy(item->distances, myDistances,
368 sizeof(double) * scan->numberOfOrderBys);
370 pairingheap_add(so->queue, &item->phNode);
372 MemoryContextSwitchTo(oldcxt);
375 so->nPageData = so->curPageData = 0;
377 MemoryContextReset(so->pageDataCxt);
380 * We save the LSN of the page as we read it, so that we know whether it
381 * safe to apply LP_DEAD hints to the page later. This allows us to drop
382 * the pin for MVCC scans, which allows vacuum to avoid blocking.
384 so->curPageLSN = PageGetLSN(page);
387 * check all tuples on page
389 maxoff = PageGetMaxOffsetNumber(page);
390 for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
392 ItemId iid = PageGetItemId(page, i);
396 bool recheck_distances;
399 * If the scan specifies not to return killed tuples, then we treat a
400 * killed tuple as not passing the qual.
402 if(scan->ignore_killed_tuples && ItemIdIsDead(iid))
405 it = (IndexTuple) PageGetItem(page, iid);
407 * Must call gistindex_keytest in tempCxt, and clean up any leftover
410 oldcxt = MemoryContextSwitchTo(so->giststate->tempCxt);
412 match = gistindex_keytest(scan, it, page, i,
413 &recheck, &recheck_distances);
415 MemoryContextSwitchTo(oldcxt);
416 MemoryContextReset(so->giststate->tempCxt);
418 /* Ignore tuple if it doesn't match */
422 if (tbm && GistPageIsLeaf(page))
425 * getbitmap scan, so just push heap tuple TIDs into the bitmap
426 * without worrying about ordering
428 tbm_add_tuples(tbm, &it->t_tid, 1, recheck);
431 else if (scan->numberOfOrderBys == 0 && GistPageIsLeaf(page))
434 * Non-ordered scan, so report tuples in so->pageData[]
436 so->pageData[so->nPageData].heapPtr = it->t_tid;
437 so->pageData[so->nPageData].recheck = recheck;
438 so->pageData[so->nPageData].offnum = i;
441 * In an index-only scan, also fetch the data from the tuple.
443 if (scan->xs_want_itup)
445 oldcxt = MemoryContextSwitchTo(so->pageDataCxt);
446 so->pageData[so->nPageData].ftup =
447 gistFetchTuple(giststate, r, it);
448 MemoryContextSwitchTo(oldcxt);
455 * Must push item into search queue. We get here for any lower
456 * index page, and also for heap tuples if doing an ordered
459 GISTSearchItem *item;
461 oldcxt = MemoryContextSwitchTo(so->queueCxt);
463 /* Create new GISTSearchItem for this item */
464 item = palloc(SizeOfGISTSearchItem(scan->numberOfOrderBys));
466 if (GistPageIsLeaf(page))
468 /* Creating heap-tuple GISTSearchItem */
469 item->blkno = InvalidBlockNumber;
470 item->data.heap.heapPtr = it->t_tid;
471 item->data.heap.recheck = recheck;
472 item->data.heap.recheckDistances = recheck_distances;
475 * In an index-only scan, also fetch the data from the tuple.
477 if (scan->xs_want_itup)
478 item->data.heap.ftup = gistFetchTuple(giststate, r, it);
482 /* Creating index-page GISTSearchItem */
483 item->blkno = ItemPointerGetBlockNumber(&it->t_tid);
486 * LSN of current page is lsn of parent page for child. We
487 * only have a shared lock, so we need to get the LSN
490 item->data.parentlsn = BufferGetLSNAtomic(buffer);
493 /* Insert it into the queue using new distance data */
494 memcpy(item->distances, so->distances,
495 sizeof(double) * scan->numberOfOrderBys);
497 pairingheap_add(so->queue, &item->phNode);
499 MemoryContextSwitchTo(oldcxt);
503 UnlockReleaseBuffer(buffer);
507 * Extract next item (in order) from search queue
509 * Returns a GISTSearchItem or NULL. Caller must pfree item when done with it.
511 static GISTSearchItem *
512 getNextGISTSearchItem(GISTScanOpaque so)
514 GISTSearchItem *item;
516 if (!pairingheap_is_empty(so->queue))
518 item = (GISTSearchItem *) pairingheap_remove_first(so->queue);
522 /* Done when both heaps are empty */
526 /* Return item; caller is responsible to pfree it */
531 * Fetch next heap tuple in an ordered search
534 getNextNearest(IndexScanDesc scan)
536 GISTScanOpaque so = (GISTScanOpaque) scan->opaque;
542 /* free previously returned tuple */
543 pfree(scan->xs_itup);
544 scan->xs_itup = NULL;
549 GISTSearchItem *item = getNextGISTSearchItem(so);
554 if (GISTSearchItemIsHeap(*item))
556 /* found a heap item at currently minimal distance */
557 scan->xs_ctup.t_self = item->data.heap.heapPtr;
558 scan->xs_recheck = item->data.heap.recheck;
559 scan->xs_recheckorderby = item->data.heap.recheckDistances;
560 for (i = 0; i < scan->numberOfOrderBys; i++)
562 if (so->orderByTypes[i] == FLOAT8OID)
564 #ifndef USE_FLOAT8_BYVAL
565 /* must free any old value to avoid memory leakage */
566 if (!scan->xs_orderbynulls[i])
567 pfree(DatumGetPointer(scan->xs_orderbyvals[i]));
569 scan->xs_orderbyvals[i] = Float8GetDatum(item->distances[i]);
570 scan->xs_orderbynulls[i] = false;
572 else if (so->orderByTypes[i] == FLOAT4OID)
574 /* convert distance function's result to ORDER BY type */
575 #ifndef USE_FLOAT4_BYVAL
576 /* must free any old value to avoid memory leakage */
577 if (!scan->xs_orderbynulls[i])
578 pfree(DatumGetPointer(scan->xs_orderbyvals[i]));
580 scan->xs_orderbyvals[i] = Float4GetDatum((float4) item->distances[i]);
581 scan->xs_orderbynulls[i] = false;
586 * If the ordering operator's return value is anything
587 * else, we don't know how to convert the float8 bound
588 * calculated by the distance function to that. The
589 * executor won't actually need the order by values we
590 * return here, if there are no lossy results, so only
591 * insist on converting if the *recheck flag is set.
593 if (scan->xs_recheckorderby)
594 elog(ERROR, "GiST operator family's FOR ORDER BY operator must return float8 or float4 if the distance function is lossy");
595 scan->xs_orderbynulls[i] = true;
599 /* in an index-only scan, also return the reconstructed tuple. */
600 if (scan->xs_want_itup)
601 scan->xs_itup = item->data.heap.ftup;
606 /* visit an index page, extract its items into queue */
607 CHECK_FOR_INTERRUPTS();
609 gistScanPage(scan, item, item->distances, NULL, NULL);
619 * gistgettuple() -- Get the next tuple in the scan
622 gistgettuple(PG_FUNCTION_ARGS)
624 IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
625 ScanDirection dir = (ScanDirection) PG_GETARG_INT32(1);
626 GISTScanOpaque so = (GISTScanOpaque) scan->opaque;
628 if (dir != ForwardScanDirection)
629 elog(ERROR, "GiST only supports forward scan direction");
632 PG_RETURN_BOOL(false);
636 /* Begin the scan by processing the root page */
637 GISTSearchItem fakeItem;
639 pgstat_count_index_scan(scan->indexRelation);
641 so->firstCall = false;
642 so->curPageData = so->nPageData = 0;
644 MemoryContextReset(so->pageDataCxt);
646 fakeItem.blkno = GIST_ROOT_BLKNO;
647 memset(&fakeItem.data.parentlsn, 0, sizeof(GistNSN));
648 gistScanPage(scan, &fakeItem, NULL, NULL, NULL);
651 if (scan->numberOfOrderBys > 0)
653 /* Must fetch tuples in strict distance order */
654 PG_RETURN_BOOL(getNextNearest(scan));
658 /* Fetch tuples index-page-at-a-time */
661 if (so->curPageData < so->nPageData)
663 if (scan->kill_prior_tuple && so->curPageData > 0)
666 if (so->killedItems == NULL)
668 MemoryContext oldCxt =
669 MemoryContextSwitchTo(so->giststate->scanCxt);
672 (OffsetNumber *) palloc(MaxIndexTuplesPerPage
673 * sizeof(OffsetNumber));
675 MemoryContextSwitchTo(oldCxt);
677 if (so->numKilled < MaxIndexTuplesPerPage)
678 so->killedItems[so->numKilled++] =
679 so->pageData[so->curPageData - 1].offnum;
681 /* continuing to return tuples from a leaf page */
682 scan->xs_ctup.t_self = so->pageData[so->curPageData].heapPtr;
683 scan->xs_recheck = so->pageData[so->curPageData].recheck;
685 /* in an index-only scan, also return the reconstructed tuple */
686 if (scan->xs_want_itup)
687 scan->xs_itup = so->pageData[so->curPageData].ftup;
691 PG_RETURN_BOOL(true);
695 * Check the last returned tuple and add it to killitems if
698 if (scan->kill_prior_tuple
699 && so->curPageData > 0
700 && so->curPageData == so->nPageData)
703 if (so->killedItems == NULL)
705 MemoryContext oldCxt =
706 MemoryContextSwitchTo(so->giststate->scanCxt);
709 (OffsetNumber *) palloc(MaxIndexTuplesPerPage
710 * sizeof(OffsetNumber));
712 MemoryContextSwitchTo(oldCxt);
714 if (so->numKilled < MaxIndexTuplesPerPage)
715 so->killedItems[so->numKilled++] =
716 so->pageData[so->curPageData - 1].offnum;
718 /* find and process the next index page */
721 GISTSearchItem *item;
723 if ((so->curBlkno != InvalidBlockNumber) && (so->numKilled > 0))
726 item = getNextGISTSearchItem(so);
729 PG_RETURN_BOOL(false);
731 CHECK_FOR_INTERRUPTS();
733 /* save current item BlockNumber for next gistkillitems() call */
734 so->curBlkno = item->blkno;
737 * While scanning a leaf page, ItemPointers of matching heap
738 * tuples are stored in so->pageData. If there are any on
739 * this page, we fall out of the inner "do" and loop around to
742 gistScanPage(scan, item, item->distances, NULL, NULL);
745 } while (so->nPageData == 0);
751 * gistgetbitmap() -- Get a bitmap of all heap tuple locations
754 gistgetbitmap(PG_FUNCTION_ARGS)
756 IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
757 TIDBitmap *tbm = (TIDBitmap *) PG_GETARG_POINTER(1);
758 GISTScanOpaque so = (GISTScanOpaque) scan->opaque;
760 GISTSearchItem fakeItem;
765 pgstat_count_index_scan(scan->indexRelation);
767 /* Begin the scan by processing the root page */
768 so->curPageData = so->nPageData = 0;
770 MemoryContextReset(so->pageDataCxt);
772 fakeItem.blkno = GIST_ROOT_BLKNO;
773 memset(&fakeItem.data.parentlsn, 0, sizeof(GistNSN));
774 gistScanPage(scan, &fakeItem, NULL, tbm, &ntids);
777 * While scanning a leaf page, ItemPointers of matching heap tuples will
778 * be stored directly into tbm, so we don't need to deal with them here.
782 GISTSearchItem *item = getNextGISTSearchItem(so);
787 CHECK_FOR_INTERRUPTS();
789 gistScanPage(scan, item, item->distances, tbm, &ntids);
794 PG_RETURN_INT64(ntids);
798 * Can we do index-only scans on the given index column?
800 * Opclasses that implement a fetch function support index-only scans.
803 gistcanreturn(PG_FUNCTION_ARGS)
805 Relation index = (Relation) PG_GETARG_POINTER(0);
806 int attno = PG_GETARG_INT32(1);
808 if (OidIsValid(index_getprocid(index, attno, GIST_FETCH_PROC)))
809 PG_RETURN_BOOL(true);
811 PG_RETURN_BOOL(false);