1 /*-------------------------------------------------------------------------
4 * general index access method routines
6 * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * src/backend/access/index/indexam.c
14 * index_open - open an index relation by relation OID
15 * index_close - close an index relation
16 * index_beginscan - start a scan of an index with amgettuple
17 * index_beginscan_bitmap - start a scan of an index with amgetbitmap
18 * index_rescan - restart a scan of an index
19 * index_endscan - end a scan
20 * index_insert - insert an index tuple into a relation
21 * index_markpos - mark a scan position
22 * index_restrpos - restore a scan position
23 * index_getnext_tid - get the next TID from a scan
24 * index_fetch_heap - get the scan's next heap tuple
25 * index_getnext - get the next heap tuple from a scan
26 * index_getbitmap - get all tuples from a scan
27 * index_bulk_delete - bulk deletion of index tuples
28 * index_vacuum_cleanup - post-deletion cleanup of an index
29 * index_can_return - does index support index-only scans?
30 * index_getprocid - get a support procedure OID
31 * index_getprocinfo - get a support procedure's lookup info
34 * This file contains the index_ routines which used
35 * to be a scattered collection of stuff in access/genam.
39 * Scans are implemented as follows:
41 * `0' represents an invalid item pointer.
42 * `-' represents an unknown item pointer.
43 * `X' represents a known item pointers.
44 * `+' represents known or invalid item pointers.
45 * `*' represents any item pointers.
47 * State is represented by a triple of these symbols in the order of
48 * previous, current, next. Note that the case of reverse scans works
52 * (1) + + - + 0 0 (if the next item pointer is invalid)
53 * (2) + X - (otherwise)
54 * (3) * 0 0 * 0 0 (no change)
55 * (4) + X 0 X 0 0 (shift)
56 * (5) * + X + X - (shift, add unknown)
58 * All other states cannot occur.
60 * Note: It would be possible to cache the status of the previous and
61 * next item pointer using the flags.
63 *-------------------------------------------------------------------------
68 #include "access/relscan.h"
69 #include "access/transam.h"
70 #include "access/xlog.h"
72 #include "catalog/index.h"
73 #include "catalog/catalog.h"
75 #include "storage/bufmgr.h"
76 #include "storage/lmgr.h"
77 #include "storage/predicate.h"
78 #include "utils/snapmgr.h"
79 #include "utils/tqual.h"
82 /* ----------------------------------------------------------------
83 * macros used in index_ routines
85 * Note: the ReindexIsProcessingIndex() check in RELATION_CHECKS is there
86 * to check that we don't try to scan or do retail insertions into an index
87 * that is currently being rebuilt or pending rebuild. This helps to catch
88 * things that don't work when reindexing system catalogs. The assertion
89 * doesn't prevent the actual rebuild because we don't use RELATION_CHECKS
90 * when calling the index AM's ambuild routine, and there is no reason for
91 * ambuild to call its subsidiary routines through this file.
92 * ----------------------------------------------------------------
94 #define RELATION_CHECKS \
96 AssertMacro(RelationIsValid(indexRelation)), \
97 AssertMacro(PointerIsValid(indexRelation->rd_am)), \
98 AssertMacro(!ReindexIsProcessingIndex(RelationGetRelid(indexRelation))) \
101 #define SCAN_CHECKS \
103 AssertMacro(IndexScanIsValid(scan)), \
104 AssertMacro(RelationIsValid(scan->indexRelation)), \
105 AssertMacro(PointerIsValid(scan->indexRelation->rd_am)) \
108 #define GET_REL_PROCEDURE(pname) \
110 procedure = &indexRelation->rd_aminfo->pname; \
111 if (!OidIsValid(procedure->fn_oid)) \
113 RegProcedure procOid = indexRelation->rd_am->pname; \
114 if (!RegProcedureIsValid(procOid)) \
115 elog(ERROR, "invalid %s regproc", CppAsString(pname)); \
116 fmgr_info_cxt(procOid, procedure, indexRelation->rd_indexcxt); \
120 #define GET_UNCACHED_REL_PROCEDURE(pname) \
122 if (!RegProcedureIsValid(indexRelation->rd_am->pname)) \
123 elog(ERROR, "invalid %s regproc", CppAsString(pname)); \
124 fmgr_info(indexRelation->rd_am->pname, &procedure); \
127 #define GET_SCAN_PROCEDURE(pname) \
129 procedure = &scan->indexRelation->rd_aminfo->pname; \
130 if (!OidIsValid(procedure->fn_oid)) \
132 RegProcedure procOid = scan->indexRelation->rd_am->pname; \
133 if (!RegProcedureIsValid(procOid)) \
134 elog(ERROR, "invalid %s regproc", CppAsString(pname)); \
135 fmgr_info_cxt(procOid, procedure, scan->indexRelation->rd_indexcxt); \
139 static IndexScanDesc index_beginscan_internal(Relation indexRelation,
140 int nkeys, int norderbys, Snapshot snapshot);
143 /* ----------------------------------------------------------------
144 * index_ interface functions
145 * ----------------------------------------------------------------
149 * index_open - open an index relation by relation OID
151 * If lockmode is not "NoLock", the specified kind of lock is
152 * obtained on the index. (Generally, NoLock should only be
153 * used if the caller knows it has some appropriate lock on the
156 * An error is raised if the index does not exist.
158 * This is a convenience routine adapted for indexscan use.
159 * Some callers may prefer to use relation_open directly.
163 index_open(Oid relationId, LOCKMODE lockmode)
167 r = relation_open(relationId, lockmode);
169 if (r->rd_rel->relkind != RELKIND_INDEX)
171 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
172 errmsg("\"%s\" is not an index",
173 RelationGetRelationName(r))));
179 * index_close - close an index relation
181 * If lockmode is not "NoLock", we then release the specified lock.
183 * Note that it is often sensible to hold a lock beyond index_close;
184 * in that case, the lock is released automatically at xact end.
188 index_close(Relation relation, LOCKMODE lockmode)
190 LockRelId relid = relation->rd_lockInfo.lockRelId;
192 Assert(lockmode >= NoLock && lockmode < MAX_LOCKMODES);
194 /* The relcache does the real work... */
195 RelationClose(relation);
197 if (lockmode != NoLock)
198 UnlockRelationId(&relid, lockmode);
202 * index_insert - insert an index tuple into a relation
206 index_insert(Relation indexRelation,
209 ItemPointer heap_t_ctid,
210 Relation heapRelation,
211 IndexUniqueCheck checkUnique)
216 GET_REL_PROCEDURE(aminsert);
218 if (!(indexRelation->rd_am->ampredlocks))
219 CheckForSerializableConflictIn(indexRelation,
224 * have the am's insert proc do all the work.
226 return DatumGetBool(FunctionCall6(procedure,
227 PointerGetDatum(indexRelation),
228 PointerGetDatum(values),
229 PointerGetDatum(isnull),
230 PointerGetDatum(heap_t_ctid),
231 PointerGetDatum(heapRelation),
232 Int32GetDatum((int32) checkUnique)));
236 * index_beginscan - start a scan of an index with amgettuple
238 * Caller must be holding suitable locks on the heap and the index.
241 index_beginscan(Relation heapRelation,
242 Relation indexRelation,
244 int nkeys, int norderbys)
248 scan = index_beginscan_internal(indexRelation, nkeys, norderbys, snapshot);
251 * Save additional parameters into the scandesc. Everything else was set
252 * up by RelationGetIndexScan.
254 scan->heapRelation = heapRelation;
255 scan->xs_snapshot = snapshot;
261 * index_beginscan_bitmap - start a scan of an index with amgetbitmap
263 * As above, caller had better be holding some lock on the parent heap
264 * relation, even though it's not explicitly mentioned here.
267 index_beginscan_bitmap(Relation indexRelation,
273 scan = index_beginscan_internal(indexRelation, nkeys, 0, snapshot);
276 * Save additional parameters into the scandesc. Everything else was set
277 * up by RelationGetIndexScan.
279 scan->xs_snapshot = snapshot;
285 * index_beginscan_internal --- common code for index_beginscan variants
288 index_beginscan_internal(Relation indexRelation,
289 int nkeys, int norderbys, Snapshot snapshot)
295 GET_REL_PROCEDURE(ambeginscan);
297 if (!(indexRelation->rd_am->ampredlocks))
298 PredicateLockRelation(indexRelation, snapshot);
301 * We hold a reference count to the relcache entry throughout the scan.
303 RelationIncrementReferenceCount(indexRelation);
306 * Tell the AM to open a scan.
308 scan = (IndexScanDesc)
309 DatumGetPointer(FunctionCall3(procedure,
310 PointerGetDatum(indexRelation),
311 Int32GetDatum(nkeys),
312 Int32GetDatum(norderbys)));
318 * index_rescan - (re)start a scan of an index
320 * During a restart, the caller may specify a new set of scankeys and/or
321 * orderbykeys; but the number of keys cannot differ from what index_beginscan
322 * was told. (Later we might relax that to "must not exceed", but currently
323 * the index AMs tend to assume that scan->numberOfKeys is what to believe.)
324 * To restart the scan without changing keys, pass NULL for the key arrays.
325 * (Of course, keys *must* be passed on the first call, unless
326 * scan->numberOfKeys is zero.)
330 index_rescan(IndexScanDesc scan,
331 ScanKey keys, int nkeys,
332 ScanKey orderbys, int norderbys)
337 GET_SCAN_PROCEDURE(amrescan);
339 Assert(nkeys == scan->numberOfKeys);
340 Assert(norderbys == scan->numberOfOrderBys);
342 /* Release any held pin on a heap page */
343 if (BufferIsValid(scan->xs_cbuf))
345 ReleaseBuffer(scan->xs_cbuf);
346 scan->xs_cbuf = InvalidBuffer;
349 scan->xs_continue_hot = false;
351 scan->kill_prior_tuple = false; /* for safety */
353 FunctionCall5(procedure,
354 PointerGetDatum(scan),
355 PointerGetDatum(keys),
356 Int32GetDatum(nkeys),
357 PointerGetDatum(orderbys),
358 Int32GetDatum(norderbys));
362 * index_endscan - end a scan
366 index_endscan(IndexScanDesc scan)
371 GET_SCAN_PROCEDURE(amendscan);
373 /* Release any held pin on a heap page */
374 if (BufferIsValid(scan->xs_cbuf))
376 ReleaseBuffer(scan->xs_cbuf);
377 scan->xs_cbuf = InvalidBuffer;
380 /* End the AM's scan */
381 FunctionCall1(procedure, PointerGetDatum(scan));
383 /* Release index refcount acquired by index_beginscan */
384 RelationDecrementReferenceCount(scan->indexRelation);
386 /* Release the scan data structure itself */
391 * index_markpos - mark a scan position
395 index_markpos(IndexScanDesc scan)
400 GET_SCAN_PROCEDURE(ammarkpos);
402 FunctionCall1(procedure, PointerGetDatum(scan));
406 * index_restrpos - restore a scan position
408 * NOTE: this only restores the internal scan state of the index AM.
409 * The current result tuple (scan->xs_ctup) doesn't change. See comments
410 * for ExecRestrPos().
412 * NOTE: in the presence of HOT chains, mark/restore only works correctly
413 * if the scan's snapshot is MVCC-safe; that ensures that there's at most one
414 * returnable tuple in each HOT chain, and so restoring the prior state at the
415 * granularity of the index AM is sufficient. Since the only current user
416 * of mark/restore functionality is nodeMergejoin.c, this effectively means
417 * that merge-join plans only work for MVCC snapshots. This could be fixed
418 * if necessary, but for now it seems unimportant.
422 index_restrpos(IndexScanDesc scan)
426 Assert(IsMVCCSnapshot(scan->xs_snapshot));
429 GET_SCAN_PROCEDURE(amrestrpos);
431 scan->xs_continue_hot = false;
433 scan->kill_prior_tuple = false; /* for safety */
435 FunctionCall1(procedure, PointerGetDatum(scan));
439 * index_getnext_tid - get the next TID from a scan
441 * The result is the next TID satisfying the scan keys,
442 * or NULL if no more matching tuples exist.
446 index_getnext_tid(IndexScanDesc scan, ScanDirection direction)
452 GET_SCAN_PROCEDURE(amgettuple);
454 Assert(TransactionIdIsValid(RecentGlobalXmin));
457 * The AM's amgettuple proc finds the next index entry matching the scan
458 * keys, and puts the TID into scan->xs_ctup.t_self. It should also set
459 * scan->xs_recheck and possibly scan->xs_itup, though we pay no attention
460 * to those fields here.
462 found = DatumGetBool(FunctionCall2(procedure,
463 PointerGetDatum(scan),
464 Int32GetDatum(direction)));
466 /* Reset kill flag immediately for safety */
467 scan->kill_prior_tuple = false;
469 /* If we're out of index entries, we're done */
472 /* ... but first, release any held pin on a heap page */
473 if (BufferIsValid(scan->xs_cbuf))
475 ReleaseBuffer(scan->xs_cbuf);
476 scan->xs_cbuf = InvalidBuffer;
481 pgstat_count_index_tuples(scan->indexRelation, 1);
483 /* Return the TID of the tuple we found. */
484 return &scan->xs_ctup.t_self;
488 * index_fetch_heap - get the scan's next heap tuple
490 * The result is a visible heap tuple associated with the index TID most
491 * recently fetched by index_getnext_tid, or NULL if no more matching tuples
492 * exist. (There can be more than one matching tuple because of HOT chains,
493 * although when using an MVCC snapshot it should be impossible for more than
494 * one such tuple to exist.)
496 * On success, the buffer containing the heap tup is pinned (the pin will be
497 * dropped in a future index_getnext_tid, index_fetch_heap or index_endscan
500 * Note: caller must check scan->xs_recheck, and perform rechecking of the
501 * scan keys if required. We do not do that here because we don't have
502 * enough information to do it efficiently in the general case.
506 index_fetch_heap(IndexScanDesc scan)
508 ItemPointer tid = &scan->xs_ctup.t_self;
509 bool all_dead = false;
512 /* We can skip the buffer-switching logic if we're in mid-HOT chain. */
513 if (!scan->xs_continue_hot)
515 /* Switch to correct buffer if we don't have it already */
516 Buffer prev_buf = scan->xs_cbuf;
518 scan->xs_cbuf = ReleaseAndReadBuffer(scan->xs_cbuf,
520 ItemPointerGetBlockNumber(tid));
523 * Prune page, but only if we weren't already on this page
525 if (prev_buf != scan->xs_cbuf)
526 heap_page_prune_opt(scan->heapRelation, scan->xs_cbuf);
529 /* Obtain share-lock on the buffer so we can examine visibility */
530 LockBuffer(scan->xs_cbuf, BUFFER_LOCK_SHARE);
531 got_heap_tuple = heap_hot_search_buffer(tid, scan->heapRelation,
536 !scan->xs_continue_hot);
537 LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK);
542 * Only in a non-MVCC snapshot can more than one member of the HOT
545 scan->xs_continue_hot = !IsMVCCSnapshot(scan->xs_snapshot);
546 pgstat_count_heap_fetch(scan->indexRelation);
547 return &scan->xs_ctup;
550 /* We've reached the end of the HOT chain. */
551 scan->xs_continue_hot = false;
554 * If we scanned a whole HOT chain and found only dead tuples, tell index
555 * AM to kill its entry for that TID (this will take effect in the next
556 * amgettuple call, in index_getnext_tid). We do not do this when in
557 * recovery because it may violate MVCC to do so. See comments in
558 * RelationGetIndexScan().
560 if (!scan->xactStartedInRecovery)
561 scan->kill_prior_tuple = all_dead;
567 * index_getnext - get the next heap tuple from a scan
569 * The result is the next heap tuple satisfying the scan keys and the
570 * snapshot, or NULL if no more matching tuples exist.
572 * On success, the buffer containing the heap tup is pinned (the pin will be
573 * dropped in a future index_getnext_tid, index_fetch_heap or index_endscan
576 * Note: caller must check scan->xs_recheck, and perform rechecking of the
577 * scan keys if required. We do not do that here because we don't have
578 * enough information to do it efficiently in the general case.
582 index_getnext(IndexScanDesc scan, ScanDirection direction)
589 if (scan->xs_continue_hot)
592 * We are resuming scan of a HOT chain after having returned an
593 * earlier member. Must still hold pin on current heap page.
595 Assert(BufferIsValid(scan->xs_cbuf));
596 Assert(ItemPointerGetBlockNumber(&scan->xs_ctup.t_self) ==
597 BufferGetBlockNumber(scan->xs_cbuf));
601 /* Time to fetch the next TID from the index */
602 tid = index_getnext_tid(scan, direction);
604 /* If we're out of index entries, we're done */
610 * Fetch the next (or only) visible heap tuple for this index entry.
611 * If we don't find anything, loop around and grab the next TID from
614 heapTuple = index_fetch_heap(scan);
615 if (heapTuple != NULL)
619 return NULL; /* failure exit */
623 * index_getbitmap - get all tuples at once from an index scan
625 * Adds the TIDs of all heap tuples satisfying the scan keys to a bitmap.
626 * Since there's no interlock between the index scan and the eventual heap
627 * access, this is only safe to use with MVCC-based snapshots: the heap
628 * item slot could have been replaced by a newer tuple by the time we get
631 * Returns the number of matching tuples found. (Note: this might be only
632 * approximate, so it should only be used for statistical purposes.)
636 index_getbitmap(IndexScanDesc scan, TIDBitmap *bitmap)
643 GET_SCAN_PROCEDURE(amgetbitmap);
645 /* just make sure this is false... */
646 scan->kill_prior_tuple = false;
649 * have the am's getbitmap proc do all the work.
651 d = FunctionCall2(procedure,
652 PointerGetDatum(scan),
653 PointerGetDatum(bitmap));
655 ntids = DatumGetInt64(d);
657 /* If int8 is pass-by-ref, must free the result to avoid memory leak */
658 #ifndef USE_FLOAT8_BYVAL
659 pfree(DatumGetPointer(d));
662 pgstat_count_index_tuples(scan->indexRelation, ntids);
668 * index_bulk_delete - do mass deletion of index entries
670 * callback routine tells whether a given main-heap tuple is
673 * return value is an optional palloc'd struct of statistics
676 IndexBulkDeleteResult *
677 index_bulk_delete(IndexVacuumInfo *info,
678 IndexBulkDeleteResult *stats,
679 IndexBulkDeleteCallback callback,
680 void *callback_state)
682 Relation indexRelation = info->index;
684 IndexBulkDeleteResult *result;
687 GET_UNCACHED_REL_PROCEDURE(ambulkdelete);
689 result = (IndexBulkDeleteResult *)
690 DatumGetPointer(FunctionCall4(&procedure,
691 PointerGetDatum(info),
692 PointerGetDatum(stats),
693 PointerGetDatum((Pointer) callback),
694 PointerGetDatum(callback_state)));
700 * index_vacuum_cleanup - do post-deletion cleanup of an index
702 * return value is an optional palloc'd struct of statistics
705 IndexBulkDeleteResult *
706 index_vacuum_cleanup(IndexVacuumInfo *info,
707 IndexBulkDeleteResult *stats)
709 Relation indexRelation = info->index;
711 IndexBulkDeleteResult *result;
714 GET_UNCACHED_REL_PROCEDURE(amvacuumcleanup);
716 result = (IndexBulkDeleteResult *)
717 DatumGetPointer(FunctionCall2(&procedure,
718 PointerGetDatum(info),
719 PointerGetDatum(stats)));
725 * index_can_return - does index support index-only scans?
729 index_can_return(Relation indexRelation)
735 /* amcanreturn is optional; assume FALSE if not provided by AM */
736 if (!RegProcedureIsValid(indexRelation->rd_am->amcanreturn))
739 GET_REL_PROCEDURE(amcanreturn);
741 return DatumGetBool(FunctionCall1(procedure,
742 PointerGetDatum(indexRelation)));
748 * Index access methods typically require support routines that are
749 * not directly the implementation of any WHERE-clause query operator
750 * and so cannot be kept in pg_amop. Instead, such routines are kept
751 * in pg_amproc. These registered procedure OIDs are assigned numbers
752 * according to a convention established by the access method.
753 * The general index code doesn't know anything about the routines
754 * involved; it just builds an ordered list of them for
755 * each attribute on which an index is defined.
757 * As of Postgres 8.3, support routines within an operator family
758 * are further subdivided by the "left type" and "right type" of the
759 * query operator(s) that they support. The "default" functions for a
760 * particular indexed attribute are those with both types equal to
761 * the index opclass' opcintype (note that this is subtly different
762 * from the indexed attribute's own type: it may be a binary-compatible
763 * type instead). Only the default functions are stored in relcache
764 * entries --- access methods can use the syscache to look up non-default
767 * This routine returns the requested default procedure OID for a
768 * particular indexed attribute.
772 index_getprocid(Relation irel,
780 nproc = irel->rd_am->amsupport;
782 Assert(procnum > 0 && procnum <= (uint16) nproc);
784 procindex = (nproc * (attnum - 1)) + (procnum - 1);
786 loc = irel->rd_support;
790 return loc[procindex];
796 * This routine allows index AMs to keep fmgr lookup info for
797 * support procs in the relcache. As above, only the "default"
798 * functions for any particular indexed attribute are cached.
800 * Note: the return value points into cached data that will be lost during
801 * any relcache rebuild! Therefore, either use the callinfo right away,
802 * or save it only after having acquired some type of lock on the index rel.
806 index_getprocinfo(Relation irel,
814 nproc = irel->rd_am->amsupport;
816 Assert(procnum > 0 && procnum <= (uint16) nproc);
818 procindex = (nproc * (attnum - 1)) + (procnum - 1);
820 locinfo = irel->rd_supportinfo;
822 Assert(locinfo != NULL);
824 locinfo += procindex;
826 /* Initialize the lookup info if first time through */
827 if (locinfo->fn_oid == InvalidOid)
829 RegProcedure *loc = irel->rd_support;
834 procId = loc[procindex];
837 * Complain if function was not found during IndexSupportInitialize.
838 * This should not happen unless the system tables contain bogus
839 * entries for the index opclass. (If an AM wants to allow a support
840 * function to be optional, it can use index_getprocid.)
842 if (!RegProcedureIsValid(procId))
843 elog(ERROR, "missing support function %d for attribute %d of index \"%s\"",
844 procnum, attnum, RelationGetRelationName(irel));
846 fmgr_info_cxt(procId, locinfo, irel->rd_indexcxt);