1 /*-------------------------------------------------------------------------
4 * general index access method routines
6 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * $PostgreSQL: pgsql/src/backend/access/index/indexam.c,v 1.103 2008/03/26 18:48:59 alvherre Exp $
14 * index_open - open an index relation by relation OID
15 * index_close - close an index relation
16 * index_beginscan - start a scan of an index with amgettuple
17 * index_beginscan_multi - start a scan of an index with amgetmulti
18 * index_rescan - restart a scan of an index
19 * index_endscan - end a scan
20 * index_insert - insert an index tuple into a relation
21 * index_markpos - mark a scan position
22 * index_restrpos - restore a scan position
23 * index_getnext - get the next tuple from a scan
24 * index_getmulti - get multiple tuples from a scan
25 * index_bulk_delete - bulk deletion of index tuples
26 * index_vacuum_cleanup - post-deletion cleanup of an index
27 * index_getprocid - get a support procedure OID
28 * index_getprocinfo - get a support procedure's lookup info
31 * This file contains the index_ routines which used
32 * to be a scattered collection of stuff in access/genam.
36 * Scans are implemented as follows:
38 * `0' represents an invalid item pointer.
39 * `-' represents an unknown item pointer.
40 * `X' represents a known item pointers.
41 * `+' represents known or invalid item pointers.
42 * `*' represents any item pointers.
44 * State is represented by a triple of these symbols in the order of
45 * previous, current, next. Note that the case of reverse scans works
49 * (1) + + - + 0 0 (if the next item pointer is invalid)
50 * (2) + X - (otherwise)
51 * (3) * 0 0 * 0 0 (no change)
52 * (4) + X 0 X 0 0 (shift)
53 * (5) * + X + X - (shift, add unknown)
55 * All other states cannot occur.
57 * Note: It would be possible to cache the status of the previous and
58 * next item pointer using the flags.
60 *-------------------------------------------------------------------------
65 #include "access/genam.h"
66 #include "access/heapam.h"
67 #include "access/transam.h"
69 #include "utils/relcache.h"
70 #include "utils/snapmgr.h"
73 /* ----------------------------------------------------------------
74 * macros used in index_ routines
75 * ----------------------------------------------------------------
77 #define RELATION_CHECKS \
79 AssertMacro(RelationIsValid(indexRelation)), \
80 AssertMacro(PointerIsValid(indexRelation->rd_am)) \
85 AssertMacro(IndexScanIsValid(scan)), \
86 AssertMacro(RelationIsValid(scan->indexRelation)), \
87 AssertMacro(PointerIsValid(scan->indexRelation->rd_am)) \
90 #define GET_REL_PROCEDURE(pname) \
92 procedure = &indexRelation->rd_aminfo->pname; \
93 if (!OidIsValid(procedure->fn_oid)) \
95 RegProcedure procOid = indexRelation->rd_am->pname; \
96 if (!RegProcedureIsValid(procOid)) \
97 elog(ERROR, "invalid %s regproc", CppAsString(pname)); \
98 fmgr_info_cxt(procOid, procedure, indexRelation->rd_indexcxt); \
102 #define GET_SCAN_PROCEDURE(pname) \
104 procedure = &scan->indexRelation->rd_aminfo->pname; \
105 if (!OidIsValid(procedure->fn_oid)) \
107 RegProcedure procOid = scan->indexRelation->rd_am->pname; \
108 if (!RegProcedureIsValid(procOid)) \
109 elog(ERROR, "invalid %s regproc", CppAsString(pname)); \
110 fmgr_info_cxt(procOid, procedure, scan->indexRelation->rd_indexcxt); \
114 static IndexScanDesc index_beginscan_internal(Relation indexRelation,
115 int nkeys, ScanKey key);
118 /* ----------------------------------------------------------------
119 * index_ interface functions
120 * ----------------------------------------------------------------
124 * index_open - open an index relation by relation OID
126 * If lockmode is not "NoLock", the specified kind of lock is
127 * obtained on the index. (Generally, NoLock should only be
128 * used if the caller knows it has some appropriate lock on the
131 * An error is raised if the index does not exist.
133 * This is a convenience routine adapted for indexscan use.
134 * Some callers may prefer to use relation_open directly.
138 index_open(Oid relationId, LOCKMODE lockmode)
142 r = relation_open(relationId, lockmode);
144 if (r->rd_rel->relkind != RELKIND_INDEX)
146 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
147 errmsg("\"%s\" is not an index",
148 RelationGetRelationName(r))));
154 * index_close - close an index relation
156 * If lockmode is not "NoLock", we then release the specified lock.
158 * Note that it is often sensible to hold a lock beyond index_close;
159 * in that case, the lock is released automatically at xact end.
163 index_close(Relation relation, LOCKMODE lockmode)
165 LockRelId relid = relation->rd_lockInfo.lockRelId;
167 Assert(lockmode >= NoLock && lockmode < MAX_LOCKMODES);
169 /* The relcache does the real work... */
170 RelationClose(relation);
172 if (lockmode != NoLock)
173 UnlockRelationId(&relid, lockmode);
177 * index_insert - insert an index tuple into a relation
181 index_insert(Relation indexRelation,
184 ItemPointer heap_t_ctid,
185 Relation heapRelation,
186 bool check_uniqueness)
191 GET_REL_PROCEDURE(aminsert);
194 * have the am's insert proc do all the work.
196 return DatumGetBool(FunctionCall6(procedure,
197 PointerGetDatum(indexRelation),
198 PointerGetDatum(values),
199 PointerGetDatum(isnull),
200 PointerGetDatum(heap_t_ctid),
201 PointerGetDatum(heapRelation),
202 BoolGetDatum(check_uniqueness)));
206 * index_beginscan - start a scan of an index with amgettuple
208 * Note: heapRelation may be NULL if there is no intention of calling
209 * index_getnext on this scan; index_getnext_indexitem will not use the
210 * heapRelation link (nor the snapshot). However, the caller had better
211 * be holding some kind of lock on the heap relation in any case, to ensure
212 * no one deletes it (or the index) out from under us. Caller must also
213 * be holding a lock on the index.
216 index_beginscan(Relation heapRelation,
217 Relation indexRelation,
219 int nkeys, ScanKey key)
223 scan = index_beginscan_internal(indexRelation, nkeys, key);
226 * Save additional parameters into the scandesc. Everything else was set
227 * up by RelationGetIndexScan.
229 scan->is_multiscan = false;
230 scan->heapRelation = heapRelation;
231 scan->xs_snapshot = snapshot;
237 * index_beginscan_multi - start a scan of an index with amgetmulti
239 * As above, caller had better be holding some lock on the parent heap
240 * relation, even though it's not explicitly mentioned here.
243 index_beginscan_multi(Relation indexRelation,
245 int nkeys, ScanKey key)
249 scan = index_beginscan_internal(indexRelation, nkeys, key);
252 * Save additional parameters into the scandesc. Everything else was set
253 * up by RelationGetIndexScan.
255 scan->is_multiscan = true;
256 scan->xs_snapshot = snapshot;
262 * index_beginscan_internal --- common code for index_beginscan variants
265 index_beginscan_internal(Relation indexRelation,
266 int nkeys, ScanKey key)
272 GET_REL_PROCEDURE(ambeginscan);
275 * We hold a reference count to the relcache entry throughout the scan.
277 RelationIncrementReferenceCount(indexRelation);
280 * Tell the AM to open a scan.
282 scan = (IndexScanDesc)
283 DatumGetPointer(FunctionCall3(procedure,
284 PointerGetDatum(indexRelation),
285 Int32GetDatum(nkeys),
286 PointerGetDatum(key)));
292 * index_rescan - (re)start a scan of an index
294 * The caller may specify a new set of scankeys (but the number of keys
295 * cannot change). To restart the scan without changing keys, pass NULL
298 * Note that this is also called when first starting an indexscan;
299 * see RelationGetIndexScan. Keys *must* be passed in that case,
300 * unless scan->numberOfKeys is zero.
304 index_rescan(IndexScanDesc scan, ScanKey key)
309 GET_SCAN_PROCEDURE(amrescan);
311 /* Release any held pin on a heap page */
312 if (BufferIsValid(scan->xs_cbuf))
314 ReleaseBuffer(scan->xs_cbuf);
315 scan->xs_cbuf = InvalidBuffer;
318 scan->xs_next_hot = InvalidOffsetNumber;
320 scan->kill_prior_tuple = false; /* for safety */
322 FunctionCall2(procedure,
323 PointerGetDatum(scan),
324 PointerGetDatum(key));
328 * index_endscan - end a scan
332 index_endscan(IndexScanDesc scan)
337 GET_SCAN_PROCEDURE(amendscan);
339 /* Release any held pin on a heap page */
340 if (BufferIsValid(scan->xs_cbuf))
342 ReleaseBuffer(scan->xs_cbuf);
343 scan->xs_cbuf = InvalidBuffer;
346 /* End the AM's scan */
347 FunctionCall1(procedure, PointerGetDatum(scan));
349 /* Release index refcount acquired by index_beginscan */
350 RelationDecrementReferenceCount(scan->indexRelation);
352 /* Release the scan data structure itself */
357 * index_markpos - mark a scan position
361 index_markpos(IndexScanDesc scan)
366 GET_SCAN_PROCEDURE(ammarkpos);
368 FunctionCall1(procedure, PointerGetDatum(scan));
372 * index_restrpos - restore a scan position
374 * NOTE: this only restores the internal scan state of the index AM.
375 * The current result tuple (scan->xs_ctup) doesn't change. See comments
376 * for ExecRestrPos().
378 * NOTE: in the presence of HOT chains, mark/restore only works correctly
379 * if the scan's snapshot is MVCC-safe; that ensures that there's at most one
380 * returnable tuple in each HOT chain, and so restoring the prior state at the
381 * granularity of the index AM is sufficient. Since the only current user
382 * of mark/restore functionality is nodeMergejoin.c, this effectively means
383 * that merge-join plans only work for MVCC snapshots. This could be fixed
384 * if necessary, but for now it seems unimportant.
388 index_restrpos(IndexScanDesc scan)
392 Assert(IsMVCCSnapshot(scan->xs_snapshot));
395 GET_SCAN_PROCEDURE(amrestrpos);
397 scan->xs_next_hot = InvalidOffsetNumber;
399 scan->kill_prior_tuple = false; /* for safety */
401 FunctionCall1(procedure, PointerGetDatum(scan));
405 * index_getnext - get the next heap tuple from a scan
407 * The result is the next heap tuple satisfying the scan keys and the
408 * snapshot, or NULL if no more matching tuples exist. On success,
409 * the buffer containing the heap tuple is pinned (the pin will be dropped
410 * at the next index_getnext or index_endscan).
414 index_getnext(IndexScanDesc scan, ScanDirection direction)
416 HeapTuple heapTuple = &scan->xs_ctup;
417 ItemPointer tid = &heapTuple->t_self;
421 GET_SCAN_PROCEDURE(amgettuple);
424 * We always reset xs_hot_dead; if we are here then either we are just
425 * starting the scan, or we previously returned a visible tuple, and in
426 * either case it's inappropriate to kill the prior index entry.
428 scan->xs_hot_dead = false;
436 if (scan->xs_next_hot != InvalidOffsetNumber)
439 * We are resuming scan of a HOT chain after having returned an
440 * earlier member. Must still hold pin on current heap page.
442 Assert(BufferIsValid(scan->xs_cbuf));
443 Assert(ItemPointerGetBlockNumber(tid) ==
444 BufferGetBlockNumber(scan->xs_cbuf));
445 Assert(TransactionIdIsValid(scan->xs_prev_xmax));
446 offnum = scan->xs_next_hot;
447 at_chain_start = false;
448 scan->xs_next_hot = InvalidOffsetNumber;
456 * If we scanned a whole HOT chain and found only dead tuples,
457 * tell index AM to kill its entry for that TID.
459 scan->kill_prior_tuple = scan->xs_hot_dead;
462 * The AM's gettuple proc finds the next index entry matching the
463 * scan keys, and puts the TID in xs_ctup.t_self (ie, *tid).
465 found = DatumGetBool(FunctionCall2(procedure,
466 PointerGetDatum(scan),
467 Int32GetDatum(direction)));
469 /* Reset kill flag immediately for safety */
470 scan->kill_prior_tuple = false;
472 /* If we're out of index entries, break out of outer loop */
476 pgstat_count_index_tuples(scan->indexRelation, 1);
478 /* Switch to correct buffer if we don't have it already */
479 prev_buf = scan->xs_cbuf;
480 scan->xs_cbuf = ReleaseAndReadBuffer(scan->xs_cbuf,
482 ItemPointerGetBlockNumber(tid));
485 * Prune page, but only if we weren't already on this page
487 if (prev_buf != scan->xs_cbuf)
488 heap_page_prune_opt(scan->heapRelation, scan->xs_cbuf,
491 /* Prepare to scan HOT chain starting at index-referenced offnum */
492 offnum = ItemPointerGetOffsetNumber(tid);
493 at_chain_start = true;
495 /* We don't know what the first tuple's xmin should be */
496 scan->xs_prev_xmax = InvalidTransactionId;
498 /* Initialize flag to detect if all entries are dead */
499 scan->xs_hot_dead = true;
502 /* Obtain share-lock on the buffer so we can examine visibility */
503 LockBuffer(scan->xs_cbuf, BUFFER_LOCK_SHARE);
505 dp = (Page) BufferGetPage(scan->xs_cbuf);
507 /* Scan through possible multiple members of HOT-chain */
513 /* check for bogus TID */
514 if (offnum < FirstOffsetNumber ||
515 offnum > PageGetMaxOffsetNumber(dp))
518 lp = PageGetItemId(dp, offnum);
520 /* check for unused, dead, or redirected items */
521 if (!ItemIdIsNormal(lp))
523 /* We should only see a redirect at start of chain */
524 if (ItemIdIsRedirected(lp) && at_chain_start)
526 /* Follow the redirect */
527 offnum = ItemIdGetRedirect(lp);
528 at_chain_start = false;
531 /* else must be end of chain */
536 * We must initialize all of *heapTuple (ie, scan->xs_ctup) since
537 * it is returned to the executor on success.
539 heapTuple->t_data = (HeapTupleHeader) PageGetItem(dp, lp);
540 heapTuple->t_len = ItemIdGetLength(lp);
541 ItemPointerSetOffsetNumber(tid, offnum);
542 heapTuple->t_tableOid = RelationGetRelid(scan->heapRelation);
543 ctid = &heapTuple->t_data->t_ctid;
546 * Shouldn't see a HEAP_ONLY tuple at chain start. (This test
547 * should be unnecessary, since the chain root can't be removed
548 * while we have pin on the index entry, but let's make it
551 if (at_chain_start && HeapTupleIsHeapOnly(heapTuple))
555 * The xmin should match the previous xmax value, else chain is
556 * broken. (Note: this test is not optional because it protects
557 * us against the case where the prior chain member's xmax aborted
558 * since we looked at it.)
560 if (TransactionIdIsValid(scan->xs_prev_xmax) &&
561 !TransactionIdEquals(scan->xs_prev_xmax,
562 HeapTupleHeaderGetXmin(heapTuple->t_data)))
565 /* If it's visible per the snapshot, we must return it */
566 if (HeapTupleSatisfiesVisibility(heapTuple, scan->xs_snapshot,
570 * If the snapshot is MVCC, we know that it could accept at
571 * most one member of the HOT chain, so we can skip examining
572 * any more members. Otherwise, check for continuation of the
573 * HOT-chain, and set state for next time.
575 if (IsMVCCSnapshot(scan->xs_snapshot))
576 scan->xs_next_hot = InvalidOffsetNumber;
577 else if (HeapTupleIsHotUpdated(heapTuple))
579 Assert(ItemPointerGetBlockNumber(ctid) ==
580 ItemPointerGetBlockNumber(tid));
581 scan->xs_next_hot = ItemPointerGetOffsetNumber(ctid);
582 scan->xs_prev_xmax = HeapTupleHeaderGetXmax(heapTuple->t_data);
585 scan->xs_next_hot = InvalidOffsetNumber;
587 LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK);
589 pgstat_count_heap_fetch(scan->indexRelation);
595 * If we can't see it, maybe no one else can either. Check to see
596 * if the tuple is dead to all transactions. If we find that all
597 * the tuples in the HOT chain are dead, we'll signal the index AM
598 * to not return that TID on future indexscans.
600 if (scan->xs_hot_dead &&
601 HeapTupleSatisfiesVacuum(heapTuple->t_data, RecentGlobalXmin,
602 scan->xs_cbuf) != HEAPTUPLE_DEAD)
603 scan->xs_hot_dead = false;
606 * Check to see if HOT chain continues past this tuple; if so
607 * fetch the next offnum (we don't bother storing it into
608 * xs_next_hot, but must store xs_prev_xmax), and loop around.
610 if (HeapTupleIsHotUpdated(heapTuple))
612 Assert(ItemPointerGetBlockNumber(ctid) ==
613 ItemPointerGetBlockNumber(tid));
614 offnum = ItemPointerGetOffsetNumber(ctid);
615 at_chain_start = false;
616 scan->xs_prev_xmax = HeapTupleHeaderGetXmax(heapTuple->t_data);
619 break; /* end of chain */
620 } /* loop over a single HOT chain */
622 LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK);
624 /* Loop around to ask index AM for another TID */
625 scan->xs_next_hot = InvalidOffsetNumber;
628 /* Release any held pin on a heap page */
629 if (BufferIsValid(scan->xs_cbuf))
631 ReleaseBuffer(scan->xs_cbuf);
632 scan->xs_cbuf = InvalidBuffer;
635 return NULL; /* failure exit */
639 * index_getnext_indexitem - get the next index tuple from a scan
641 * Finds the next index tuple satisfying the scan keys. Note that the
642 * corresponding heap tuple is not accessed, and thus no time qual (snapshot)
643 * check is done, other than the index AM's internal check for killed tuples
644 * (which most callers of this routine will probably want to suppress by
645 * setting scan->ignore_killed_tuples = false).
647 * On success (TRUE return), the heap TID of the found index entry is in
648 * scan->xs_ctup.t_self. scan->xs_cbuf is untouched.
652 index_getnext_indexitem(IndexScanDesc scan,
653 ScanDirection direction)
659 GET_SCAN_PROCEDURE(amgettuple);
661 /* just make sure this is false... */
662 scan->kill_prior_tuple = false;
665 * have the am's gettuple proc do all the work.
667 found = DatumGetBool(FunctionCall2(procedure,
668 PointerGetDatum(scan),
669 Int32GetDatum(direction)));
672 pgstat_count_index_tuples(scan->indexRelation, 1);
678 * index_getmulti - get multiple tuples from an index scan
680 * Collects the TIDs of multiple heap tuples satisfying the scan keys.
681 * Since there's no interlock between the index scan and the eventual heap
682 * access, this is only safe to use with MVCC-based snapshots: the heap
683 * item slot could have been replaced by a newer tuple by the time we get
686 * A TRUE result indicates more calls should occur; a FALSE result says the
687 * scan is done. *returned_tids could be zero or nonzero in either case.
691 index_getmulti(IndexScanDesc scan,
692 ItemPointer tids, int32 max_tids,
693 int32 *returned_tids)
699 GET_SCAN_PROCEDURE(amgetmulti);
701 /* just make sure this is false... */
702 scan->kill_prior_tuple = false;
705 * have the am's getmulti proc do all the work.
707 found = DatumGetBool(FunctionCall4(procedure,
708 PointerGetDatum(scan),
709 PointerGetDatum(tids),
710 Int32GetDatum(max_tids),
711 PointerGetDatum(returned_tids)));
713 pgstat_count_index_tuples(scan->indexRelation, *returned_tids);
719 * index_bulk_delete - do mass deletion of index entries
721 * callback routine tells whether a given main-heap tuple is
724 * return value is an optional palloc'd struct of statistics
727 IndexBulkDeleteResult *
728 index_bulk_delete(IndexVacuumInfo *info,
729 IndexBulkDeleteResult *stats,
730 IndexBulkDeleteCallback callback,
731 void *callback_state)
733 Relation indexRelation = info->index;
735 IndexBulkDeleteResult *result;
738 GET_REL_PROCEDURE(ambulkdelete);
740 result = (IndexBulkDeleteResult *)
741 DatumGetPointer(FunctionCall4(procedure,
742 PointerGetDatum(info),
743 PointerGetDatum(stats),
744 PointerGetDatum((Pointer) callback),
745 PointerGetDatum(callback_state)));
751 * index_vacuum_cleanup - do post-deletion cleanup of an index
753 * return value is an optional palloc'd struct of statistics
756 IndexBulkDeleteResult *
757 index_vacuum_cleanup(IndexVacuumInfo *info,
758 IndexBulkDeleteResult *stats)
760 Relation indexRelation = info->index;
762 IndexBulkDeleteResult *result;
765 GET_REL_PROCEDURE(amvacuumcleanup);
767 result = (IndexBulkDeleteResult *)
768 DatumGetPointer(FunctionCall2(procedure,
769 PointerGetDatum(info),
770 PointerGetDatum(stats)));
778 * Index access methods typically require support routines that are
779 * not directly the implementation of any WHERE-clause query operator
780 * and so cannot be kept in pg_amop. Instead, such routines are kept
781 * in pg_amproc. These registered procedure OIDs are assigned numbers
782 * according to a convention established by the access method.
783 * The general index code doesn't know anything about the routines
784 * involved; it just builds an ordered list of them for
785 * each attribute on which an index is defined.
787 * As of Postgres 8.3, support routines within an operator family
788 * are further subdivided by the "left type" and "right type" of the
789 * query operator(s) that they support. The "default" functions for a
790 * particular indexed attribute are those with both types equal to
791 * the index opclass' opcintype (note that this is subtly different
792 * from the indexed attribute's own type: it may be a binary-compatible
793 * type instead). Only the default functions are stored in relcache
794 * entries --- access methods can use the syscache to look up non-default
797 * This routine returns the requested default procedure OID for a
798 * particular indexed attribute.
802 index_getprocid(Relation irel,
810 nproc = irel->rd_am->amsupport;
812 Assert(procnum > 0 && procnum <= (uint16) nproc);
814 procindex = (nproc * (attnum - 1)) + (procnum - 1);
816 loc = irel->rd_support;
820 return loc[procindex];
826 * This routine allows index AMs to keep fmgr lookup info for
827 * support procs in the relcache. As above, only the "default"
828 * functions for any particular indexed attribute are cached.
830 * Note: the return value points into cached data that will be lost during
831 * any relcache rebuild! Therefore, either use the callinfo right away,
832 * or save it only after having acquired some type of lock on the index rel.
836 index_getprocinfo(Relation irel,
844 nproc = irel->rd_am->amsupport;
846 Assert(procnum > 0 && procnum <= (uint16) nproc);
848 procindex = (nproc * (attnum - 1)) + (procnum - 1);
850 locinfo = irel->rd_supportinfo;
852 Assert(locinfo != NULL);
854 locinfo += procindex;
856 /* Initialize the lookup info if first time through */
857 if (locinfo->fn_oid == InvalidOid)
859 RegProcedure *loc = irel->rd_support;
864 procId = loc[procindex];
867 * Complain if function was not found during IndexSupportInitialize.
868 * This should not happen unless the system tables contain bogus
869 * entries for the index opclass. (If an AM wants to allow a support
870 * function to be optional, it can use index_getprocid.)
872 if (!RegProcedureIsValid(procId))
873 elog(ERROR, "missing support function %d for attribute %d of index \"%s\"",
874 procnum, attnum, RelationGetRelationName(irel));
876 fmgr_info_cxt(procId, locinfo, irel->rd_indexcxt);