1 /*-------------------------------------------------------------------------
4 * general index access method routines
6 * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * src/backend/access/index/indexam.c
14 * index_open - open an index relation by relation OID
15 * index_close - close an index relation
16 * index_beginscan - start a scan of an index with amgettuple
17 * index_beginscan_bitmap - start a scan of an index with amgetbitmap
18 * index_rescan - restart a scan of an index
19 * index_endscan - end a scan
20 * index_insert - insert an index tuple into a relation
21 * index_markpos - mark a scan position
22 * index_restrpos - restore a scan position
23 * index_getnext - get the next tuple from a scan
24 * index_getbitmap - get all tuples from a scan
25 * index_bulk_delete - bulk deletion of index tuples
26 * index_vacuum_cleanup - post-deletion cleanup of an index
27 * index_getprocid - get a support procedure OID
28 * index_getprocinfo - get a support procedure's lookup info
31 * This file contains the index_ routines which used
32 * to be a scattered collection of stuff in access/genam.
36 * Scans are implemented as follows:
38 * `0' represents an invalid item pointer.
39 * `-' represents an unknown item pointer.
40 * `X' represents a known item pointers.
41 * `+' represents known or invalid item pointers.
42 * `*' represents any item pointers.
44 * State is represented by a triple of these symbols in the order of
45 * previous, current, next. Note that the case of reverse scans works
49 * (1) + + - + 0 0 (if the next item pointer is invalid)
50 * (2) + X - (otherwise)
51 * (3) * 0 0 * 0 0 (no change)
52 * (4) + X 0 X 0 0 (shift)
53 * (5) * + X + X - (shift, add unknown)
55 * All other states cannot occur.
57 * Note: It would be possible to cache the status of the previous and
58 * next item pointer using the flags.
60 *-------------------------------------------------------------------------
65 #include "access/relscan.h"
66 #include "access/transam.h"
67 #include "access/xact.h"
68 #include "catalog/index.h"
70 #include "storage/bufmgr.h"
71 #include "storage/lmgr.h"
72 #include "storage/predicate.h"
73 #include "utils/relcache.h"
74 #include "utils/snapmgr.h"
75 #include "utils/tqual.h"
78 /* ----------------------------------------------------------------
79 * macros used in index_ routines
81 * Note: the ReindexIsProcessingIndex() check in RELATION_CHECKS is there
82 * to check that we don't try to scan or do retail insertions into an index
83 * that is currently being rebuilt or pending rebuild. This helps to catch
84 * things that don't work when reindexing system catalogs. The assertion
85 * doesn't prevent the actual rebuild because we don't use RELATION_CHECKS
86 * when calling the index AM's ambuild routine, and there is no reason for
87 * ambuild to call its subsidiary routines through this file.
88 * ----------------------------------------------------------------
90 #define RELATION_CHECKS \
92 AssertMacro(RelationIsValid(indexRelation)), \
93 AssertMacro(PointerIsValid(indexRelation->rd_am)), \
94 AssertMacro(!ReindexIsProcessingIndex(RelationGetRelid(indexRelation))) \
99 AssertMacro(IndexScanIsValid(scan)), \
100 AssertMacro(RelationIsValid(scan->indexRelation)), \
101 AssertMacro(PointerIsValid(scan->indexRelation->rd_am)) \
104 #define GET_REL_PROCEDURE(pname) \
106 procedure = &indexRelation->rd_aminfo->pname; \
107 if (!OidIsValid(procedure->fn_oid)) \
109 RegProcedure procOid = indexRelation->rd_am->pname; \
110 if (!RegProcedureIsValid(procOid)) \
111 elog(ERROR, "invalid %s regproc", CppAsString(pname)); \
112 fmgr_info_cxt(procOid, procedure, indexRelation->rd_indexcxt); \
116 #define GET_SCAN_PROCEDURE(pname) \
118 procedure = &scan->indexRelation->rd_aminfo->pname; \
119 if (!OidIsValid(procedure->fn_oid)) \
121 RegProcedure procOid = scan->indexRelation->rd_am->pname; \
122 if (!RegProcedureIsValid(procOid)) \
123 elog(ERROR, "invalid %s regproc", CppAsString(pname)); \
124 fmgr_info_cxt(procOid, procedure, scan->indexRelation->rd_indexcxt); \
128 static IndexScanDesc index_beginscan_internal(Relation indexRelation,
129 int nkeys, int norderbys);
132 /* ----------------------------------------------------------------
133 * index_ interface functions
134 * ----------------------------------------------------------------
138 * index_open - open an index relation by relation OID
140 * If lockmode is not "NoLock", the specified kind of lock is
141 * obtained on the index. (Generally, NoLock should only be
142 * used if the caller knows it has some appropriate lock on the
145 * An error is raised if the index does not exist.
147 * This is a convenience routine adapted for indexscan use.
148 * Some callers may prefer to use relation_open directly.
152 index_open(Oid relationId, LOCKMODE lockmode)
156 r = relation_open(relationId, lockmode);
158 if (r->rd_rel->relkind != RELKIND_INDEX)
160 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
161 errmsg("\"%s\" is not an index",
162 RelationGetRelationName(r))));
168 * index_close - close an index relation
170 * If lockmode is not "NoLock", we then release the specified lock.
172 * Note that it is often sensible to hold a lock beyond index_close;
173 * in that case, the lock is released automatically at xact end.
177 index_close(Relation relation, LOCKMODE lockmode)
179 LockRelId relid = relation->rd_lockInfo.lockRelId;
181 Assert(lockmode >= NoLock && lockmode < MAX_LOCKMODES);
183 /* The relcache does the real work... */
184 RelationClose(relation);
186 if (lockmode != NoLock)
187 UnlockRelationId(&relid, lockmode);
191 * index_insert - insert an index tuple into a relation
195 index_insert(Relation indexRelation,
198 ItemPointer heap_t_ctid,
199 Relation heapRelation,
200 IndexUniqueCheck checkUnique)
205 GET_REL_PROCEDURE(aminsert);
207 if (!(indexRelation->rd_am->ampredlocks))
208 CheckForSerializableConflictIn(indexRelation,
213 * have the am's insert proc do all the work.
215 return DatumGetBool(FunctionCall6(procedure,
216 PointerGetDatum(indexRelation),
217 PointerGetDatum(values),
218 PointerGetDatum(isnull),
219 PointerGetDatum(heap_t_ctid),
220 PointerGetDatum(heapRelation),
221 Int32GetDatum((int32) checkUnique)));
225 * index_beginscan - start a scan of an index with amgettuple
227 * Caller must be holding suitable locks on the heap and the index.
230 index_beginscan(Relation heapRelation,
231 Relation indexRelation,
233 int nkeys, int norderbys)
237 scan = index_beginscan_internal(indexRelation, nkeys, norderbys);
240 * Save additional parameters into the scandesc. Everything else was set
241 * up by RelationGetIndexScan.
243 scan->heapRelation = heapRelation;
244 scan->xs_snapshot = snapshot;
250 * index_beginscan_bitmap - start a scan of an index with amgetbitmap
252 * As above, caller had better be holding some lock on the parent heap
253 * relation, even though it's not explicitly mentioned here.
256 index_beginscan_bitmap(Relation indexRelation,
262 scan = index_beginscan_internal(indexRelation, nkeys, 0);
265 * Save additional parameters into the scandesc. Everything else was set
266 * up by RelationGetIndexScan.
268 scan->xs_snapshot = snapshot;
274 * index_beginscan_internal --- common code for index_beginscan variants
277 index_beginscan_internal(Relation indexRelation,
278 int nkeys, int norderbys)
284 GET_REL_PROCEDURE(ambeginscan);
286 if (!(indexRelation->rd_am->ampredlocks))
287 PredicateLockRelation(indexRelation);
290 * We hold a reference count to the relcache entry throughout the scan.
292 RelationIncrementReferenceCount(indexRelation);
295 * Tell the AM to open a scan.
297 scan = (IndexScanDesc)
298 DatumGetPointer(FunctionCall3(procedure,
299 PointerGetDatum(indexRelation),
300 Int32GetDatum(nkeys),
301 Int32GetDatum(norderbys)));
307 * index_rescan - (re)start a scan of an index
309 * During a restart, the caller may specify a new set of scankeys and/or
310 * orderbykeys; but the number of keys cannot differ from what index_beginscan
311 * was told. (Later we might relax that to "must not exceed", but currently
312 * the index AMs tend to assume that scan->numberOfKeys is what to believe.)
313 * To restart the scan without changing keys, pass NULL for the key arrays.
314 * (Of course, keys *must* be passed on the first call, unless
315 * scan->numberOfKeys is zero.)
319 index_rescan(IndexScanDesc scan,
320 ScanKey keys, int nkeys,
321 ScanKey orderbys, int norderbys)
326 GET_SCAN_PROCEDURE(amrescan);
328 Assert(nkeys == scan->numberOfKeys);
329 Assert(norderbys == scan->numberOfOrderBys);
331 /* Release any held pin on a heap page */
332 if (BufferIsValid(scan->xs_cbuf))
334 ReleaseBuffer(scan->xs_cbuf);
335 scan->xs_cbuf = InvalidBuffer;
338 scan->xs_next_hot = InvalidOffsetNumber;
340 scan->kill_prior_tuple = false; /* for safety */
342 FunctionCall5(procedure,
343 PointerGetDatum(scan),
344 PointerGetDatum(keys),
345 Int32GetDatum(nkeys),
346 PointerGetDatum(orderbys),
347 Int32GetDatum(norderbys));
351 * index_endscan - end a scan
355 index_endscan(IndexScanDesc scan)
360 GET_SCAN_PROCEDURE(amendscan);
362 /* Release any held pin on a heap page */
363 if (BufferIsValid(scan->xs_cbuf))
365 ReleaseBuffer(scan->xs_cbuf);
366 scan->xs_cbuf = InvalidBuffer;
369 /* End the AM's scan */
370 FunctionCall1(procedure, PointerGetDatum(scan));
372 /* Release index refcount acquired by index_beginscan */
373 RelationDecrementReferenceCount(scan->indexRelation);
375 /* Release the scan data structure itself */
380 * index_markpos - mark a scan position
384 index_markpos(IndexScanDesc scan)
389 GET_SCAN_PROCEDURE(ammarkpos);
391 FunctionCall1(procedure, PointerGetDatum(scan));
395 * index_restrpos - restore a scan position
397 * NOTE: this only restores the internal scan state of the index AM.
398 * The current result tuple (scan->xs_ctup) doesn't change. See comments
399 * for ExecRestrPos().
401 * NOTE: in the presence of HOT chains, mark/restore only works correctly
402 * if the scan's snapshot is MVCC-safe; that ensures that there's at most one
403 * returnable tuple in each HOT chain, and so restoring the prior state at the
404 * granularity of the index AM is sufficient. Since the only current user
405 * of mark/restore functionality is nodeMergejoin.c, this effectively means
406 * that merge-join plans only work for MVCC snapshots. This could be fixed
407 * if necessary, but for now it seems unimportant.
411 index_restrpos(IndexScanDesc scan)
415 Assert(IsMVCCSnapshot(scan->xs_snapshot));
418 GET_SCAN_PROCEDURE(amrestrpos);
420 scan->xs_next_hot = InvalidOffsetNumber;
422 scan->kill_prior_tuple = false; /* for safety */
424 FunctionCall1(procedure, PointerGetDatum(scan));
428 * index_getnext - get the next heap tuple from a scan
430 * The result is the next heap tuple satisfying the scan keys and the
431 * snapshot, or NULL if no more matching tuples exist. On success,
432 * the buffer containing the heap tuple is pinned (the pin will be dropped
433 * at the next index_getnext or index_endscan).
435 * Note: caller must check scan->xs_recheck, and perform rechecking of the
436 * scan keys if required. We do not do that here because we don't have
437 * enough information to do it efficiently in the general case.
441 index_getnext(IndexScanDesc scan, ScanDirection direction)
443 HeapTuple heapTuple = &scan->xs_ctup;
444 ItemPointer tid = &heapTuple->t_self;
448 GET_SCAN_PROCEDURE(amgettuple);
450 Assert(TransactionIdIsValid(RecentGlobalXmin));
453 * We always reset xs_hot_dead; if we are here then either we are just
454 * starting the scan, or we previously returned a visible tuple, and in
455 * either case it's inappropriate to kill the prior index entry.
457 scan->xs_hot_dead = false;
465 if (scan->xs_next_hot != InvalidOffsetNumber)
468 * We are resuming scan of a HOT chain after having returned an
469 * earlier member. Must still hold pin on current heap page.
471 Assert(BufferIsValid(scan->xs_cbuf));
472 Assert(ItemPointerGetBlockNumber(tid) ==
473 BufferGetBlockNumber(scan->xs_cbuf));
474 Assert(TransactionIdIsValid(scan->xs_prev_xmax));
475 offnum = scan->xs_next_hot;
476 at_chain_start = false;
477 scan->xs_next_hot = InvalidOffsetNumber;
485 * If we scanned a whole HOT chain and found only dead tuples,
486 * tell index AM to kill its entry for that TID. We do not do this
487 * when in recovery because it may violate MVCC to do so. see
488 * comments in RelationGetIndexScan().
490 if (!scan->xactStartedInRecovery)
491 scan->kill_prior_tuple = scan->xs_hot_dead;
494 * The AM's gettuple proc finds the next index entry matching the
495 * scan keys, and puts the TID in xs_ctup.t_self (ie, *tid). It
496 * should also set scan->xs_recheck, though we pay no attention to
499 found = DatumGetBool(FunctionCall2(procedure,
500 PointerGetDatum(scan),
501 Int32GetDatum(direction)));
503 /* Reset kill flag immediately for safety */
504 scan->kill_prior_tuple = false;
506 /* If we're out of index entries, break out of outer loop */
510 pgstat_count_index_tuples(scan->indexRelation, 1);
512 /* Switch to correct buffer if we don't have it already */
513 prev_buf = scan->xs_cbuf;
514 scan->xs_cbuf = ReleaseAndReadBuffer(scan->xs_cbuf,
516 ItemPointerGetBlockNumber(tid));
519 * Prune page, but only if we weren't already on this page
521 if (prev_buf != scan->xs_cbuf)
522 heap_page_prune_opt(scan->heapRelation, scan->xs_cbuf,
525 /* Prepare to scan HOT chain starting at index-referenced offnum */
526 offnum = ItemPointerGetOffsetNumber(tid);
527 at_chain_start = true;
529 /* We don't know what the first tuple's xmin should be */
530 scan->xs_prev_xmax = InvalidTransactionId;
532 /* Initialize flag to detect if all entries are dead */
533 scan->xs_hot_dead = true;
536 /* Obtain share-lock on the buffer so we can examine visibility */
537 LockBuffer(scan->xs_cbuf, BUFFER_LOCK_SHARE);
539 dp = (Page) BufferGetPage(scan->xs_cbuf);
541 /* Scan through possible multiple members of HOT-chain */
548 /* check for bogus TID */
549 if (offnum < FirstOffsetNumber ||
550 offnum > PageGetMaxOffsetNumber(dp))
553 lp = PageGetItemId(dp, offnum);
555 /* check for unused, dead, or redirected items */
556 if (!ItemIdIsNormal(lp))
558 /* We should only see a redirect at start of chain */
559 if (ItemIdIsRedirected(lp) && at_chain_start)
561 /* Follow the redirect */
562 offnum = ItemIdGetRedirect(lp);
563 at_chain_start = false;
566 /* else must be end of chain */
571 * We must initialize all of *heapTuple (ie, scan->xs_ctup) since
572 * it is returned to the executor on success.
574 heapTuple->t_data = (HeapTupleHeader) PageGetItem(dp, lp);
575 heapTuple->t_len = ItemIdGetLength(lp);
576 ItemPointerSetOffsetNumber(tid, offnum);
577 heapTuple->t_tableOid = RelationGetRelid(scan->heapRelation);
578 ctid = &heapTuple->t_data->t_ctid;
581 * Shouldn't see a HEAP_ONLY tuple at chain start. (This test
582 * should be unnecessary, since the chain root can't be removed
583 * while we have pin on the index entry, but let's make it
586 if (at_chain_start && HeapTupleIsHeapOnly(heapTuple))
590 * The xmin should match the previous xmax value, else chain is
591 * broken. (Note: this test is not optional because it protects
592 * us against the case where the prior chain member's xmax aborted
593 * since we looked at it.)
595 if (TransactionIdIsValid(scan->xs_prev_xmax) &&
596 !TransactionIdEquals(scan->xs_prev_xmax,
597 HeapTupleHeaderGetXmin(heapTuple->t_data)))
600 /* If it's visible per the snapshot, we must return it */
601 valid = HeapTupleSatisfiesVisibility(heapTuple, scan->xs_snapshot,
604 CheckForSerializableConflictOut(valid, scan->heapRelation,
605 heapTuple, scan->xs_cbuf);
610 * If the snapshot is MVCC, we know that it could accept at
611 * most one member of the HOT chain, so we can skip examining
612 * any more members. Otherwise, check for continuation of the
613 * HOT-chain, and set state for next time.
615 if (IsMVCCSnapshot(scan->xs_snapshot))
616 scan->xs_next_hot = InvalidOffsetNumber;
617 else if (HeapTupleIsHotUpdated(heapTuple))
619 Assert(ItemPointerGetBlockNumber(ctid) ==
620 ItemPointerGetBlockNumber(tid));
621 scan->xs_next_hot = ItemPointerGetOffsetNumber(ctid);
622 scan->xs_prev_xmax = HeapTupleHeaderGetXmax(heapTuple->t_data);
625 scan->xs_next_hot = InvalidOffsetNumber;
627 PredicateLockTuple(scan->heapRelation, heapTuple);
629 LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK);
631 pgstat_count_heap_fetch(scan->indexRelation);
637 * If we can't see it, maybe no one else can either. Check to see
638 * if the tuple is dead to all transactions. If we find that all
639 * the tuples in the HOT chain are dead, we'll signal the index AM
640 * to not return that TID on future indexscans.
642 if (scan->xs_hot_dead &&
643 HeapTupleSatisfiesVacuum(heapTuple->t_data, RecentGlobalXmin,
644 scan->xs_cbuf) != HEAPTUPLE_DEAD)
645 scan->xs_hot_dead = false;
648 * Check to see if HOT chain continues past this tuple; if so
649 * fetch the next offnum (we don't bother storing it into
650 * xs_next_hot, but must store xs_prev_xmax), and loop around.
652 if (HeapTupleIsHotUpdated(heapTuple))
654 Assert(ItemPointerGetBlockNumber(ctid) ==
655 ItemPointerGetBlockNumber(tid));
656 offnum = ItemPointerGetOffsetNumber(ctid);
657 at_chain_start = false;
658 scan->xs_prev_xmax = HeapTupleHeaderGetXmax(heapTuple->t_data);
661 break; /* end of chain */
662 } /* loop over a single HOT chain */
664 LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK);
666 /* Loop around to ask index AM for another TID */
667 scan->xs_next_hot = InvalidOffsetNumber;
670 /* Release any held pin on a heap page */
671 if (BufferIsValid(scan->xs_cbuf))
673 ReleaseBuffer(scan->xs_cbuf);
674 scan->xs_cbuf = InvalidBuffer;
677 return NULL; /* failure exit */
681 * index_getbitmap - get all tuples at once from an index scan
683 * Adds the TIDs of all heap tuples satisfying the scan keys to a bitmap.
684 * Since there's no interlock between the index scan and the eventual heap
685 * access, this is only safe to use with MVCC-based snapshots: the heap
686 * item slot could have been replaced by a newer tuple by the time we get
689 * Returns the number of matching tuples found. (Note: this might be only
690 * approximate, so it should only be used for statistical purposes.)
694 index_getbitmap(IndexScanDesc scan, TIDBitmap *bitmap)
701 GET_SCAN_PROCEDURE(amgetbitmap);
703 /* just make sure this is false... */
704 scan->kill_prior_tuple = false;
707 * have the am's getbitmap proc do all the work.
709 d = FunctionCall2(procedure,
710 PointerGetDatum(scan),
711 PointerGetDatum(bitmap));
713 ntids = DatumGetInt64(d);
715 /* If int8 is pass-by-ref, must free the result to avoid memory leak */
716 #ifndef USE_FLOAT8_BYVAL
717 pfree(DatumGetPointer(d));
720 pgstat_count_index_tuples(scan->indexRelation, ntids);
726 * index_bulk_delete - do mass deletion of index entries
728 * callback routine tells whether a given main-heap tuple is
731 * return value is an optional palloc'd struct of statistics
734 IndexBulkDeleteResult *
735 index_bulk_delete(IndexVacuumInfo *info,
736 IndexBulkDeleteResult *stats,
737 IndexBulkDeleteCallback callback,
738 void *callback_state)
740 Relation indexRelation = info->index;
742 IndexBulkDeleteResult *result;
745 GET_REL_PROCEDURE(ambulkdelete);
747 result = (IndexBulkDeleteResult *)
748 DatumGetPointer(FunctionCall4(procedure,
749 PointerGetDatum(info),
750 PointerGetDatum(stats),
751 PointerGetDatum((Pointer) callback),
752 PointerGetDatum(callback_state)));
758 * index_vacuum_cleanup - do post-deletion cleanup of an index
760 * return value is an optional palloc'd struct of statistics
763 IndexBulkDeleteResult *
764 index_vacuum_cleanup(IndexVacuumInfo *info,
765 IndexBulkDeleteResult *stats)
767 Relation indexRelation = info->index;
769 IndexBulkDeleteResult *result;
772 GET_REL_PROCEDURE(amvacuumcleanup);
774 result = (IndexBulkDeleteResult *)
775 DatumGetPointer(FunctionCall2(procedure,
776 PointerGetDatum(info),
777 PointerGetDatum(stats)));
785 * Index access methods typically require support routines that are
786 * not directly the implementation of any WHERE-clause query operator
787 * and so cannot be kept in pg_amop. Instead, such routines are kept
788 * in pg_amproc. These registered procedure OIDs are assigned numbers
789 * according to a convention established by the access method.
790 * The general index code doesn't know anything about the routines
791 * involved; it just builds an ordered list of them for
792 * each attribute on which an index is defined.
794 * As of Postgres 8.3, support routines within an operator family
795 * are further subdivided by the "left type" and "right type" of the
796 * query operator(s) that they support. The "default" functions for a
797 * particular indexed attribute are those with both types equal to
798 * the index opclass' opcintype (note that this is subtly different
799 * from the indexed attribute's own type: it may be a binary-compatible
800 * type instead). Only the default functions are stored in relcache
801 * entries --- access methods can use the syscache to look up non-default
804 * This routine returns the requested default procedure OID for a
805 * particular indexed attribute.
809 index_getprocid(Relation irel,
817 nproc = irel->rd_am->amsupport;
819 Assert(procnum > 0 && procnum <= (uint16) nproc);
821 procindex = (nproc * (attnum - 1)) + (procnum - 1);
823 loc = irel->rd_support;
827 return loc[procindex];
833 * This routine allows index AMs to keep fmgr lookup info for
834 * support procs in the relcache. As above, only the "default"
835 * functions for any particular indexed attribute are cached.
837 * Note: the return value points into cached data that will be lost during
838 * any relcache rebuild! Therefore, either use the callinfo right away,
839 * or save it only after having acquired some type of lock on the index rel.
843 index_getprocinfo(Relation irel,
851 nproc = irel->rd_am->amsupport;
853 Assert(procnum > 0 && procnum <= (uint16) nproc);
855 procindex = (nproc * (attnum - 1)) + (procnum - 1);
857 locinfo = irel->rd_supportinfo;
859 Assert(locinfo != NULL);
861 locinfo += procindex;
863 /* Initialize the lookup info if first time through */
864 if (locinfo->fn_oid == InvalidOid)
866 RegProcedure *loc = irel->rd_support;
871 procId = loc[procindex];
874 * Complain if function was not found during IndexSupportInitialize.
875 * This should not happen unless the system tables contain bogus
876 * entries for the index opclass. (If an AM wants to allow a support
877 * function to be optional, it can use index_getprocid.)
879 if (!RegProcedureIsValid(procId))
880 elog(ERROR, "missing support function %d for attribute %d of index \"%s\"",
881 procnum, attnum, RelationGetRelationName(irel));
883 fmgr_info_cxt(procId, locinfo, irel->rd_indexcxt);