1 /*-------------------------------------------------------------------------
4 * general index access method routines
6 * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * src/backend/access/index/indexam.c
14 * index_open - open an index relation by relation OID
15 * index_close - close an index relation
16 * index_beginscan - start a scan of an index with amgettuple
17 * index_beginscan_bitmap - start a scan of an index with amgetbitmap
18 * index_rescan - restart a scan of an index
19 * index_endscan - end a scan
20 * index_insert - insert an index tuple into a relation
21 * index_markpos - mark a scan position
22 * index_restrpos - restore a scan position
23 * index_parallelscan_estimate - estimate shared memory for parallel scan
24 * index_parallelscan_initialize - initialize parallel scan
25 * index_parallelrescan - (re)start a parallel scan of an index
26 * index_beginscan_parallel - join parallel index scan
27 * index_getnext_tid - get the next TID from a scan
28 * index_fetch_heap - get the scan's next heap tuple
29 * index_getnext - get the next heap tuple from a scan
30 * index_getbitmap - get all tuples from a scan
31 * index_bulk_delete - bulk deletion of index tuples
32 * index_vacuum_cleanup - post-deletion cleanup of an index
33 * index_can_return - does index support index-only scans?
34 * index_getprocid - get a support procedure OID
35 * index_getprocinfo - get a support procedure's lookup info
38 * This file contains the index_ routines which used
39 * to be a scattered collection of stuff in access/genam.
43 * Scans are implemented as follows:
45 * `0' represents an invalid item pointer.
46 * `-' represents an unknown item pointer.
47 * `X' represents a known item pointers.
48 * `+' represents known or invalid item pointers.
49 * `*' represents any item pointers.
51 * State is represented by a triple of these symbols in the order of
52 * previous, current, next. Note that the case of reverse scans works
56 * (1) + + - + 0 0 (if the next item pointer is invalid)
57 * (2) + X - (otherwise)
58 * (3) * 0 0 * 0 0 (no change)
59 * (4) + X 0 X 0 0 (shift)
60 * (5) * + X + X - (shift, add unknown)
62 * All other states cannot occur.
64 * Note: It would be possible to cache the status of the previous and
65 * next item pointer using the flags.
67 *-------------------------------------------------------------------------
72 #include "access/amapi.h"
73 #include "access/relscan.h"
74 #include "access/transam.h"
75 #include "access/xlog.h"
76 #include "catalog/catalog.h"
77 #include "catalog/index.h"
79 #include "storage/bufmgr.h"
80 #include "storage/lmgr.h"
81 #include "storage/predicate.h"
82 #include "utils/snapmgr.h"
83 #include "utils/tqual.h"
86 /* ----------------------------------------------------------------
87 * macros used in index_ routines
89 * Note: the ReindexIsProcessingIndex() check in RELATION_CHECKS is there
90 * to check that we don't try to scan or do retail insertions into an index
91 * that is currently being rebuilt or pending rebuild. This helps to catch
92 * things that don't work when reindexing system catalogs. The assertion
93 * doesn't prevent the actual rebuild because we don't use RELATION_CHECKS
94 * when calling the index AM's ambuild routine, and there is no reason for
95 * ambuild to call its subsidiary routines through this file.
96 * ----------------------------------------------------------------
98 #define RELATION_CHECKS \
100 AssertMacro(RelationIsValid(indexRelation)), \
101 AssertMacro(PointerIsValid(indexRelation->rd_amroutine)), \
102 AssertMacro(!ReindexIsProcessingIndex(RelationGetRelid(indexRelation))) \
105 #define SCAN_CHECKS \
107 AssertMacro(IndexScanIsValid(scan)), \
108 AssertMacro(RelationIsValid(scan->indexRelation)), \
109 AssertMacro(PointerIsValid(scan->indexRelation->rd_amroutine)) \
112 #define CHECK_REL_PROCEDURE(pname) \
114 if (indexRelation->rd_amroutine->pname == NULL) \
115 elog(ERROR, "function %s is not defined for index %s", \
116 CppAsString(pname), RelationGetRelationName(indexRelation)); \
119 #define CHECK_SCAN_PROCEDURE(pname) \
121 if (scan->indexRelation->rd_amroutine->pname == NULL) \
122 elog(ERROR, "function %s is not defined for index %s", \
123 CppAsString(pname), RelationGetRelationName(scan->indexRelation)); \
126 static IndexScanDesc index_beginscan_internal(Relation indexRelation,
127 int nkeys, int norderbys, Snapshot snapshot,
128 ParallelIndexScanDesc pscan, bool temp_snap);
131 /* ----------------------------------------------------------------
132 * index_ interface functions
133 * ----------------------------------------------------------------
137 * index_open - open an index relation by relation OID
139 * If lockmode is not "NoLock", the specified kind of lock is
140 * obtained on the index. (Generally, NoLock should only be
141 * used if the caller knows it has some appropriate lock on the
144 * An error is raised if the index does not exist.
146 * This is a convenience routine adapted for indexscan use.
147 * Some callers may prefer to use relation_open directly.
151 index_open(Oid relationId, LOCKMODE lockmode)
155 r = relation_open(relationId, lockmode);
157 if (r->rd_rel->relkind != RELKIND_INDEX)
159 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
160 errmsg("\"%s\" is not an index",
161 RelationGetRelationName(r))));
167 * index_close - close an index relation
169 * If lockmode is not "NoLock", we then release the specified lock.
171 * Note that it is often sensible to hold a lock beyond index_close;
172 * in that case, the lock is released automatically at xact end.
176 index_close(Relation relation, LOCKMODE lockmode)
178 LockRelId relid = relation->rd_lockInfo.lockRelId;
180 Assert(lockmode >= NoLock && lockmode < MAX_LOCKMODES);
182 /* The relcache does the real work... */
183 RelationClose(relation);
185 if (lockmode != NoLock)
186 UnlockRelationId(&relid, lockmode);
190 * index_insert - insert an index tuple into a relation
194 index_insert(Relation indexRelation,
197 ItemPointer heap_t_ctid,
198 Relation heapRelation,
199 IndexUniqueCheck checkUnique)
202 CHECK_REL_PROCEDURE(aminsert);
204 if (!(indexRelation->rd_amroutine->ampredlocks))
205 CheckForSerializableConflictIn(indexRelation,
209 return indexRelation->rd_amroutine->aminsert(indexRelation, values, isnull,
210 heap_t_ctid, heapRelation,
215 * index_beginscan - start a scan of an index with amgettuple
217 * Caller must be holding suitable locks on the heap and the index.
220 index_beginscan(Relation heapRelation,
221 Relation indexRelation,
223 int nkeys, int norderbys)
227 scan = index_beginscan_internal(indexRelation, nkeys, norderbys, snapshot, NULL, false);
230 * Save additional parameters into the scandesc. Everything else was set
231 * up by RelationGetIndexScan.
233 scan->heapRelation = heapRelation;
234 scan->xs_snapshot = snapshot;
240 * index_beginscan_bitmap - start a scan of an index with amgetbitmap
242 * As above, caller had better be holding some lock on the parent heap
243 * relation, even though it's not explicitly mentioned here.
246 index_beginscan_bitmap(Relation indexRelation,
252 scan = index_beginscan_internal(indexRelation, nkeys, 0, snapshot, NULL, false);
255 * Save additional parameters into the scandesc. Everything else was set
256 * up by RelationGetIndexScan.
258 scan->xs_snapshot = snapshot;
264 * index_beginscan_internal --- common code for index_beginscan variants
267 index_beginscan_internal(Relation indexRelation,
268 int nkeys, int norderbys, Snapshot snapshot,
269 ParallelIndexScanDesc pscan, bool temp_snap)
274 CHECK_REL_PROCEDURE(ambeginscan);
276 if (!(indexRelation->rd_amroutine->ampredlocks))
277 PredicateLockRelation(indexRelation, snapshot);
280 * We hold a reference count to the relcache entry throughout the scan.
282 RelationIncrementReferenceCount(indexRelation);
285 * Tell the AM to open a scan.
287 scan = indexRelation->rd_amroutine->ambeginscan(indexRelation, nkeys,
289 /* Initialize information for parallel scan. */
290 scan->parallel_scan = pscan;
291 scan->xs_temp_snap = temp_snap;
297 * index_rescan - (re)start a scan of an index
299 * During a restart, the caller may specify a new set of scankeys and/or
300 * orderbykeys; but the number of keys cannot differ from what index_beginscan
301 * was told. (Later we might relax that to "must not exceed", but currently
302 * the index AMs tend to assume that scan->numberOfKeys is what to believe.)
303 * To restart the scan without changing keys, pass NULL for the key arrays.
304 * (Of course, keys *must* be passed on the first call, unless
305 * scan->numberOfKeys is zero.)
309 index_rescan(IndexScanDesc scan,
310 ScanKey keys, int nkeys,
311 ScanKey orderbys, int norderbys)
314 CHECK_SCAN_PROCEDURE(amrescan);
316 Assert(nkeys == scan->numberOfKeys);
317 Assert(norderbys == scan->numberOfOrderBys);
319 /* Release any held pin on a heap page */
320 if (BufferIsValid(scan->xs_cbuf))
322 ReleaseBuffer(scan->xs_cbuf);
323 scan->xs_cbuf = InvalidBuffer;
326 scan->xs_continue_hot = false;
328 scan->kill_prior_tuple = false; /* for safety */
330 scan->indexRelation->rd_amroutine->amrescan(scan, keys, nkeys,
331 orderbys, norderbys);
335 * index_endscan - end a scan
339 index_endscan(IndexScanDesc scan)
342 CHECK_SCAN_PROCEDURE(amendscan);
344 /* Release any held pin on a heap page */
345 if (BufferIsValid(scan->xs_cbuf))
347 ReleaseBuffer(scan->xs_cbuf);
348 scan->xs_cbuf = InvalidBuffer;
351 /* End the AM's scan */
352 scan->indexRelation->rd_amroutine->amendscan(scan);
354 /* Release index refcount acquired by index_beginscan */
355 RelationDecrementReferenceCount(scan->indexRelation);
357 if (scan->xs_temp_snap)
358 UnregisterSnapshot(scan->xs_snapshot);
360 /* Release the scan data structure itself */
365 * index_markpos - mark a scan position
369 index_markpos(IndexScanDesc scan)
372 CHECK_SCAN_PROCEDURE(ammarkpos);
374 scan->indexRelation->rd_amroutine->ammarkpos(scan);
378 * index_restrpos - restore a scan position
380 * NOTE: this only restores the internal scan state of the index AM.
381 * The current result tuple (scan->xs_ctup) doesn't change. See comments
382 * for ExecRestrPos().
384 * NOTE: in the presence of HOT chains, mark/restore only works correctly
385 * if the scan's snapshot is MVCC-safe; that ensures that there's at most one
386 * returnable tuple in each HOT chain, and so restoring the prior state at the
387 * granularity of the index AM is sufficient. Since the only current user
388 * of mark/restore functionality is nodeMergejoin.c, this effectively means
389 * that merge-join plans only work for MVCC snapshots. This could be fixed
390 * if necessary, but for now it seems unimportant.
394 index_restrpos(IndexScanDesc scan)
396 Assert(IsMVCCSnapshot(scan->xs_snapshot));
399 CHECK_SCAN_PROCEDURE(amrestrpos);
401 scan->xs_continue_hot = false;
403 scan->kill_prior_tuple = false; /* for safety */
405 scan->indexRelation->rd_amroutine->amrestrpos(scan);
409 * index_parallelscan_estimate - estimate shared memory for parallel scan
411 * Currently, we don't pass any information to the AM-specific estimator,
412 * so it can probably only return a constant. In the future, we might need
413 * to pass more information.
416 index_parallelscan_estimate(Relation indexRelation, Snapshot snapshot)
422 nbytes = offsetof(ParallelIndexScanDescData, ps_snapshot_data);
423 nbytes = add_size(nbytes, EstimateSnapshotSpace(snapshot));
424 nbytes = MAXALIGN(nbytes);
427 * If amestimateparallelscan is not provided, assume there is no
428 * AM-specific data needed. (It's hard to believe that could work, but
429 * it's easy enough to cater to it here.)
431 if (indexRelation->rd_amroutine->amestimateparallelscan != NULL)
432 nbytes = add_size(nbytes,
433 indexRelation->rd_amroutine->amestimateparallelscan());
439 * index_parallelscan_initialize - initialize parallel scan
441 * We initialize both the ParallelIndexScanDesc proper and the AM-specific
442 * information which follows it.
444 * This function calls access method specific initialization routine to
445 * initialize am specific information. Call this just once in the leader
446 * process; then, individual workers attach via index_beginscan_parallel.
449 index_parallelscan_initialize(Relation heapRelation, Relation indexRelation,
450 Snapshot snapshot, ParallelIndexScanDesc target)
456 offset = add_size(offsetof(ParallelIndexScanDescData, ps_snapshot_data),
457 EstimateSnapshotSpace(snapshot));
458 offset = MAXALIGN(offset);
460 target->ps_relid = RelationGetRelid(heapRelation);
461 target->ps_indexid = RelationGetRelid(indexRelation);
462 target->ps_offset = offset;
463 SerializeSnapshot(snapshot, target->ps_snapshot_data);
465 /* aminitparallelscan is optional; assume no-op if not provided by AM */
466 if (indexRelation->rd_amroutine->aminitparallelscan != NULL)
470 amtarget = OffsetToPointer(target, offset);
471 indexRelation->rd_amroutine->aminitparallelscan(amtarget);
476 * index_parallelrescan - (re)start a parallel scan of an index
480 index_parallelrescan(IndexScanDesc scan)
484 /* amparallelrescan is optional; assume no-op if not provided by AM */
485 if (scan->indexRelation->rd_amroutine->amparallelrescan != NULL)
486 scan->indexRelation->rd_amroutine->amparallelrescan(scan);
490 * index_beginscan_parallel - join parallel index scan
492 * Caller must be holding suitable locks on the heap and the index.
495 index_beginscan_parallel(Relation heaprel, Relation indexrel, int nkeys,
496 int norderbys, ParallelIndexScanDesc pscan)
501 Assert(RelationGetRelid(heaprel) == pscan->ps_relid);
502 snapshot = RestoreSnapshot(pscan->ps_snapshot_data);
503 RegisterSnapshot(snapshot);
504 scan = index_beginscan_internal(indexrel, nkeys, norderbys, snapshot,
508 * Save additional parameters into the scandesc. Everything else was set
509 * up by index_beginscan_internal.
511 scan->heapRelation = heaprel;
512 scan->xs_snapshot = snapshot;
518 * index_getnext_tid - get the next TID from a scan
520 * The result is the next TID satisfying the scan keys,
521 * or NULL if no more matching tuples exist.
525 index_getnext_tid(IndexScanDesc scan, ScanDirection direction)
530 CHECK_SCAN_PROCEDURE(amgettuple);
532 Assert(TransactionIdIsValid(RecentGlobalXmin));
535 * The AM's amgettuple proc finds the next index entry matching the scan
536 * keys, and puts the TID into scan->xs_ctup.t_self. It should also set
537 * scan->xs_recheck and possibly scan->xs_itup, though we pay no attention
538 * to those fields here.
540 found = scan->indexRelation->rd_amroutine->amgettuple(scan, direction);
542 /* Reset kill flag immediately for safety */
543 scan->kill_prior_tuple = false;
545 /* If we're out of index entries, we're done */
548 /* ... but first, release any held pin on a heap page */
549 if (BufferIsValid(scan->xs_cbuf))
551 ReleaseBuffer(scan->xs_cbuf);
552 scan->xs_cbuf = InvalidBuffer;
557 pgstat_count_index_tuples(scan->indexRelation, 1);
559 /* Return the TID of the tuple we found. */
560 return &scan->xs_ctup.t_self;
564 * index_fetch_heap - get the scan's next heap tuple
566 * The result is a visible heap tuple associated with the index TID most
567 * recently fetched by index_getnext_tid, or NULL if no more matching tuples
568 * exist. (There can be more than one matching tuple because of HOT chains,
569 * although when using an MVCC snapshot it should be impossible for more than
570 * one such tuple to exist.)
572 * On success, the buffer containing the heap tup is pinned (the pin will be
573 * dropped in a future index_getnext_tid, index_fetch_heap or index_endscan
576 * Note: caller must check scan->xs_recheck, and perform rechecking of the
577 * scan keys if required. We do not do that here because we don't have
578 * enough information to do it efficiently in the general case.
582 index_fetch_heap(IndexScanDesc scan)
584 ItemPointer tid = &scan->xs_ctup.t_self;
585 bool all_dead = false;
588 /* We can skip the buffer-switching logic if we're in mid-HOT chain. */
589 if (!scan->xs_continue_hot)
591 /* Switch to correct buffer if we don't have it already */
592 Buffer prev_buf = scan->xs_cbuf;
594 scan->xs_cbuf = ReleaseAndReadBuffer(scan->xs_cbuf,
596 ItemPointerGetBlockNumber(tid));
599 * Prune page, but only if we weren't already on this page
601 if (prev_buf != scan->xs_cbuf)
602 heap_page_prune_opt(scan->heapRelation, scan->xs_cbuf);
605 /* Obtain share-lock on the buffer so we can examine visibility */
606 LockBuffer(scan->xs_cbuf, BUFFER_LOCK_SHARE);
607 got_heap_tuple = heap_hot_search_buffer(tid, scan->heapRelation,
612 !scan->xs_continue_hot);
613 LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK);
618 * Only in a non-MVCC snapshot can more than one member of the HOT
621 scan->xs_continue_hot = !IsMVCCSnapshot(scan->xs_snapshot);
622 pgstat_count_heap_fetch(scan->indexRelation);
623 return &scan->xs_ctup;
626 /* We've reached the end of the HOT chain. */
627 scan->xs_continue_hot = false;
630 * If we scanned a whole HOT chain and found only dead tuples, tell index
631 * AM to kill its entry for that TID (this will take effect in the next
632 * amgettuple call, in index_getnext_tid). We do not do this when in
633 * recovery because it may violate MVCC to do so. See comments in
634 * RelationGetIndexScan().
636 if (!scan->xactStartedInRecovery)
637 scan->kill_prior_tuple = all_dead;
643 * index_getnext - get the next heap tuple from a scan
645 * The result is the next heap tuple satisfying the scan keys and the
646 * snapshot, or NULL if no more matching tuples exist.
648 * On success, the buffer containing the heap tup is pinned (the pin will be
649 * dropped in a future index_getnext_tid, index_fetch_heap or index_endscan
652 * Note: caller must check scan->xs_recheck, and perform rechecking of the
653 * scan keys if required. We do not do that here because we don't have
654 * enough information to do it efficiently in the general case.
658 index_getnext(IndexScanDesc scan, ScanDirection direction)
665 if (scan->xs_continue_hot)
668 * We are resuming scan of a HOT chain after having returned an
669 * earlier member. Must still hold pin on current heap page.
671 Assert(BufferIsValid(scan->xs_cbuf));
672 Assert(ItemPointerGetBlockNumber(&scan->xs_ctup.t_self) ==
673 BufferGetBlockNumber(scan->xs_cbuf));
677 /* Time to fetch the next TID from the index */
678 tid = index_getnext_tid(scan, direction);
680 /* If we're out of index entries, we're done */
686 * Fetch the next (or only) visible heap tuple for this index entry.
687 * If we don't find anything, loop around and grab the next TID from
690 heapTuple = index_fetch_heap(scan);
691 if (heapTuple != NULL)
695 return NULL; /* failure exit */
699 * index_getbitmap - get all tuples at once from an index scan
701 * Adds the TIDs of all heap tuples satisfying the scan keys to a bitmap.
702 * Since there's no interlock between the index scan and the eventual heap
703 * access, this is only safe to use with MVCC-based snapshots: the heap
704 * item slot could have been replaced by a newer tuple by the time we get
707 * Returns the number of matching tuples found. (Note: this might be only
708 * approximate, so it should only be used for statistical purposes.)
712 index_getbitmap(IndexScanDesc scan, TIDBitmap *bitmap)
717 CHECK_SCAN_PROCEDURE(amgetbitmap);
719 /* just make sure this is false... */
720 scan->kill_prior_tuple = false;
723 * have the am's getbitmap proc do all the work.
725 ntids = scan->indexRelation->rd_amroutine->amgetbitmap(scan, bitmap);
727 pgstat_count_index_tuples(scan->indexRelation, ntids);
733 * index_bulk_delete - do mass deletion of index entries
735 * callback routine tells whether a given main-heap tuple is
738 * return value is an optional palloc'd struct of statistics
741 IndexBulkDeleteResult *
742 index_bulk_delete(IndexVacuumInfo *info,
743 IndexBulkDeleteResult *stats,
744 IndexBulkDeleteCallback callback,
745 void *callback_state)
747 Relation indexRelation = info->index;
750 CHECK_REL_PROCEDURE(ambulkdelete);
752 return indexRelation->rd_amroutine->ambulkdelete(info, stats,
753 callback, callback_state);
757 * index_vacuum_cleanup - do post-deletion cleanup of an index
759 * return value is an optional palloc'd struct of statistics
762 IndexBulkDeleteResult *
763 index_vacuum_cleanup(IndexVacuumInfo *info,
764 IndexBulkDeleteResult *stats)
766 Relation indexRelation = info->index;
769 CHECK_REL_PROCEDURE(amvacuumcleanup);
771 return indexRelation->rd_amroutine->amvacuumcleanup(info, stats);
777 * Does the index access method support index-only scans for the given
782 index_can_return(Relation indexRelation, int attno)
786 /* amcanreturn is optional; assume FALSE if not provided by AM */
787 if (indexRelation->rd_amroutine->amcanreturn == NULL)
790 return indexRelation->rd_amroutine->amcanreturn(indexRelation, attno);
796 * Index access methods typically require support routines that are
797 * not directly the implementation of any WHERE-clause query operator
798 * and so cannot be kept in pg_amop. Instead, such routines are kept
799 * in pg_amproc. These registered procedure OIDs are assigned numbers
800 * according to a convention established by the access method.
801 * The general index code doesn't know anything about the routines
802 * involved; it just builds an ordered list of them for
803 * each attribute on which an index is defined.
805 * As of Postgres 8.3, support routines within an operator family
806 * are further subdivided by the "left type" and "right type" of the
807 * query operator(s) that they support. The "default" functions for a
808 * particular indexed attribute are those with both types equal to
809 * the index opclass' opcintype (note that this is subtly different
810 * from the indexed attribute's own type: it may be a binary-compatible
811 * type instead). Only the default functions are stored in relcache
812 * entries --- access methods can use the syscache to look up non-default
815 * This routine returns the requested default procedure OID for a
816 * particular indexed attribute.
820 index_getprocid(Relation irel,
828 nproc = irel->rd_amroutine->amsupport;
830 Assert(procnum > 0 && procnum <= (uint16) nproc);
832 procindex = (nproc * (attnum - 1)) + (procnum - 1);
834 loc = irel->rd_support;
838 return loc[procindex];
844 * This routine allows index AMs to keep fmgr lookup info for
845 * support procs in the relcache. As above, only the "default"
846 * functions for any particular indexed attribute are cached.
848 * Note: the return value points into cached data that will be lost during
849 * any relcache rebuild! Therefore, either use the callinfo right away,
850 * or save it only after having acquired some type of lock on the index rel.
854 index_getprocinfo(Relation irel,
862 nproc = irel->rd_amroutine->amsupport;
864 Assert(procnum > 0 && procnum <= (uint16) nproc);
866 procindex = (nproc * (attnum - 1)) + (procnum - 1);
868 locinfo = irel->rd_supportinfo;
870 Assert(locinfo != NULL);
872 locinfo += procindex;
874 /* Initialize the lookup info if first time through */
875 if (locinfo->fn_oid == InvalidOid)
877 RegProcedure *loc = irel->rd_support;
882 procId = loc[procindex];
885 * Complain if function was not found during IndexSupportInitialize.
886 * This should not happen unless the system tables contain bogus
887 * entries for the index opclass. (If an AM wants to allow a support
888 * function to be optional, it can use index_getprocid.)
890 if (!RegProcedureIsValid(procId))
891 elog(ERROR, "missing support function %d for attribute %d of index \"%s\"",
892 procnum, attnum, RelationGetRelationName(irel));
894 fmgr_info_cxt(procId, locinfo, irel->rd_indexcxt);