* indexam.c
* general index access method routines
*
- * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/index/indexam.c,v 1.82 2005/05/27 23:31:20 tgl Exp $
+ * src/backend/access/index/indexam.c
*
* INTERFACE ROUTINES
* index_open - open an index relation by relation OID
- * index_openrv - open an index relation specified by a RangeVar
* index_close - close an index relation
* index_beginscan - start a scan of an index with amgettuple
- * index_beginscan_multi - start a scan of an index with amgetmulti
+ * index_beginscan_bitmap - start a scan of an index with amgetbitmap
* index_rescan - restart a scan of an index
* index_endscan - end a scan
* index_insert - insert an index tuple into a relation
* index_markpos - mark a scan position
* index_restrpos - restore a scan position
- * index_getnext - get the next tuple from a scan
- * index_getmulti - get multiple tuples from a scan
+ * index_parallelscan_estimate - estimate shared memory for parallel scan
+ * index_parallelscan_initialize - initialize parallel scan
+ * index_parallelrescan - (re)start a parallel scan of an index
+ * index_beginscan_parallel - join parallel index scan
+ * index_getnext_tid - get the next TID from a scan
+ * index_fetch_heap - get the scan's next heap tuple
+ * index_getnext - get the next heap tuple from a scan
+ * index_getbitmap - get all tuples from a scan
* index_bulk_delete - bulk deletion of index tuples
* index_vacuum_cleanup - post-deletion cleanup of an index
- * index_cost_estimator - fetch amcostestimate procedure OID
+ * index_can_return - does index support index-only scans?
* index_getprocid - get a support procedure OID
* index_getprocinfo - get a support procedure's lookup info
*
#include "postgres.h"
-#include "access/genam.h"
-#include "access/heapam.h"
-#include "utils/relcache.h"
-
+#include "access/amapi.h"
+#include "access/relscan.h"
+#include "access/transam.h"
+#include "access/xlog.h"
+#include "catalog/catalog.h"
+#include "catalog/index.h"
#include "pgstat.h"
+#include "storage/bufmgr.h"
+#include "storage/lmgr.h"
+#include "storage/predicate.h"
+#include "utils/snapmgr.h"
+#include "utils/tqual.h"
+
/* ----------------------------------------------------------------
* macros used in index_ routines
+ *
+ * Note: the ReindexIsProcessingIndex() check in RELATION_CHECKS is there
+ * to check that we don't try to scan or do retail insertions into an index
+ * that is currently being rebuilt or pending rebuild. This helps to catch
+ * things that don't work when reindexing system catalogs. The assertion
+ * doesn't prevent the actual rebuild because we don't use RELATION_CHECKS
+ * when calling the index AM's ambuild routine, and there is no reason for
+ * ambuild to call its subsidiary routines through this file.
* ----------------------------------------------------------------
*/
#define RELATION_CHECKS \
( \
AssertMacro(RelationIsValid(indexRelation)), \
- AssertMacro(PointerIsValid(indexRelation->rd_am)) \
+ AssertMacro(PointerIsValid(indexRelation->rd_amroutine)), \
+ AssertMacro(!ReindexIsProcessingIndex(RelationGetRelid(indexRelation))) \
)
#define SCAN_CHECKS \
( \
AssertMacro(IndexScanIsValid(scan)), \
AssertMacro(RelationIsValid(scan->indexRelation)), \
- AssertMacro(PointerIsValid(scan->indexRelation->rd_am)) \
+ AssertMacro(PointerIsValid(scan->indexRelation->rd_amroutine)) \
)
-#define GET_REL_PROCEDURE(pname) \
+#define CHECK_REL_PROCEDURE(pname) \
do { \
- procedure = &indexRelation->rd_aminfo->pname; \
- if (!OidIsValid(procedure->fn_oid)) \
- { \
- RegProcedure procOid = indexRelation->rd_am->pname; \
- if (!RegProcedureIsValid(procOid)) \
- elog(ERROR, "invalid %s regproc", CppAsString(pname)); \
- fmgr_info_cxt(procOid, procedure, indexRelation->rd_indexcxt); \
- } \
+ if (indexRelation->rd_amroutine->pname == NULL) \
+ elog(ERROR, "function %s is not defined for index %s", \
+ CppAsString(pname), RelationGetRelationName(indexRelation)); \
} while(0)
-#define GET_SCAN_PROCEDURE(pname) \
+#define CHECK_SCAN_PROCEDURE(pname) \
do { \
- procedure = &scan->indexRelation->rd_aminfo->pname; \
- if (!OidIsValid(procedure->fn_oid)) \
- { \
- RegProcedure procOid = scan->indexRelation->rd_am->pname; \
- if (!RegProcedureIsValid(procOid)) \
- elog(ERROR, "invalid %s regproc", CppAsString(pname)); \
- fmgr_info_cxt(procOid, procedure, scan->indexRelation->rd_indexcxt); \
- } \
+ if (scan->indexRelation->rd_amroutine->pname == NULL) \
+ elog(ERROR, "function %s is not defined for index %s", \
+ CppAsString(pname), RelationGetRelationName(scan->indexRelation)); \
} while(0)
static IndexScanDesc index_beginscan_internal(Relation indexRelation,
- int nkeys, ScanKey key);
+ int nkeys, int norderbys, Snapshot snapshot,
+ ParallelIndexScanDesc pscan, bool temp_snap);
/* ----------------------------------------------------------------
/* ----------------
* index_open - open an index relation by relation OID
*
- * Note: we acquire no lock on the index. A lock is not needed when
- * simply examining the index reldesc; the index's schema information
- * is considered to be protected by the lock that the caller had better
- * be holding on the parent relation. Some type of lock should be
- * obtained on the index before physically accessing it, however.
- * This is handled automatically for most uses by index_beginscan
- * and index_endscan for scan cases, or by ExecOpenIndices and
- * ExecCloseIndices for update cases. Other callers will need to
- * obtain their own locks.
+ * If lockmode is not "NoLock", the specified kind of lock is
+ * obtained on the index. (Generally, NoLock should only be
+ * used if the caller knows it has some appropriate lock on the
+ * index already.)
+ *
+ * An error is raised if the index does not exist.
*
* This is a convenience routine adapted for indexscan use.
* Some callers may prefer to use relation_open directly.
* ----------------
*/
Relation
-index_open(Oid relationId)
+index_open(Oid relationId, LOCKMODE lockmode)
{
Relation r;
- r = relation_open(relationId, NoLock);
+ r = relation_open(relationId, lockmode);
if (r->rd_rel->relkind != RELKIND_INDEX)
ereport(ERROR,
errmsg("\"%s\" is not an index",
RelationGetRelationName(r))));
- pgstat_initstats(&r->pgstat_info, r);
-
return r;
}
/* ----------------
- * index_openrv - open an index relation specified
- * by a RangeVar node
+ * index_close - close an index relation
*
- * As above, but relation is specified by a RangeVar.
- * ----------------
- */
-Relation
-index_openrv(const RangeVar *relation)
-{
- Relation r;
-
- r = relation_openrv(relation, NoLock);
-
- if (r->rd_rel->relkind != RELKIND_INDEX)
- ereport(ERROR,
- (errcode(ERRCODE_WRONG_OBJECT_TYPE),
- errmsg("\"%s\" is not an index",
- RelationGetRelationName(r))));
-
- pgstat_initstats(&r->pgstat_info, r);
-
- return r;
-}
-
-/* ----------------
- * index_close - close a index relation
+ * If lockmode is not "NoLock", we then release the specified lock.
*
- * presently the relcache routines do all the work we need
- * to open/close index relations.
+ * Note that it is often sensible to hold a lock beyond index_close;
+ * in that case, the lock is released automatically at xact end.
* ----------------
*/
void
-index_close(Relation relation)
+index_close(Relation relation, LOCKMODE lockmode)
{
+ LockRelId relid = relation->rd_lockInfo.lockRelId;
+
+ Assert(lockmode >= NoLock && lockmode < MAX_LOCKMODES);
+
+ /* The relcache does the real work... */
RelationClose(relation);
+
+ if (lockmode != NoLock)
+ UnlockRelationId(&relid, lockmode);
}
/* ----------------
bool *isnull,
ItemPointer heap_t_ctid,
Relation heapRelation,
- bool check_uniqueness)
+ IndexUniqueCheck checkUnique)
{
- FmgrInfo *procedure;
-
RELATION_CHECKS;
- GET_REL_PROCEDURE(aminsert);
+ CHECK_REL_PROCEDURE(aminsert);
- /*
- * have the am's insert proc do all the work.
- */
- return DatumGetBool(FunctionCall6(procedure,
- PointerGetDatum(indexRelation),
- PointerGetDatum(values),
- PointerGetDatum(isnull),
- PointerGetDatum(heap_t_ctid),
- PointerGetDatum(heapRelation),
- BoolGetDatum(check_uniqueness)));
+ if (!(indexRelation->rd_amroutine->ampredlocks))
+ CheckForSerializableConflictIn(indexRelation,
+ (HeapTuple) NULL,
+ InvalidBuffer);
+
+ return indexRelation->rd_amroutine->aminsert(indexRelation, values, isnull,
+ heap_t_ctid, heapRelation,
+ checkUnique);
}
/*
* index_beginscan - start a scan of an index with amgettuple
*
- * Note: heapRelation may be NULL if there is no intention of calling
- * index_getnext on this scan; index_getnext_indexitem will not use the
- * heapRelation link (nor the snapshot). However, the caller had better
- * be holding some kind of lock on the heap relation in any case, to ensure
- * no one deletes it (or the index) out from under us.
+ * Caller must be holding suitable locks on the heap and the index.
*/
IndexScanDesc
index_beginscan(Relation heapRelation,
Relation indexRelation,
Snapshot snapshot,
- int nkeys, ScanKey key)
+ int nkeys, int norderbys)
{
IndexScanDesc scan;
- scan = index_beginscan_internal(indexRelation, nkeys, key);
+ scan = index_beginscan_internal(indexRelation, nkeys, norderbys, snapshot, NULL, false);
/*
- * Save additional parameters into the scandesc. Everything else was
- * set up by RelationGetIndexScan.
+ * Save additional parameters into the scandesc. Everything else was set
+ * up by RelationGetIndexScan.
*/
- scan->is_multiscan = false;
scan->heapRelation = heapRelation;
scan->xs_snapshot = snapshot;
}
/*
- * index_beginscan_multi - start a scan of an index with amgetmulti
+ * index_beginscan_bitmap - start a scan of an index with amgetbitmap
*
* As above, caller had better be holding some lock on the parent heap
* relation, even though it's not explicitly mentioned here.
*/
IndexScanDesc
-index_beginscan_multi(Relation indexRelation,
- Snapshot snapshot,
- int nkeys, ScanKey key)
+index_beginscan_bitmap(Relation indexRelation,
+ Snapshot snapshot,
+ int nkeys)
{
IndexScanDesc scan;
- scan = index_beginscan_internal(indexRelation, nkeys, key);
+ scan = index_beginscan_internal(indexRelation, nkeys, 0, snapshot, NULL, false);
/*
- * Save additional parameters into the scandesc. Everything else was
- * set up by RelationGetIndexScan.
+ * Save additional parameters into the scandesc. Everything else was set
+ * up by RelationGetIndexScan.
*/
- scan->is_multiscan = true;
scan->xs_snapshot = snapshot;
return scan;
*/
static IndexScanDesc
index_beginscan_internal(Relation indexRelation,
- int nkeys, ScanKey key)
+ int nkeys, int norderbys, Snapshot snapshot,
+ ParallelIndexScanDesc pscan, bool temp_snap)
{
IndexScanDesc scan;
- FmgrInfo *procedure;
RELATION_CHECKS;
- GET_REL_PROCEDURE(ambeginscan);
+ CHECK_REL_PROCEDURE(ambeginscan);
- RelationIncrementReferenceCount(indexRelation);
+ if (!(indexRelation->rd_amroutine->ampredlocks))
+ PredicateLockRelation(indexRelation, snapshot);
/*
- * Acquire AccessShareLock for the duration of the scan
- *
- * Note: we could get an SI inval message here and consequently have to
- * rebuild the relcache entry. The refcount increment above ensures
- * that we will rebuild it and not just flush it...
+ * We hold a reference count to the relcache entry throughout the scan.
*/
- LockRelation(indexRelation, AccessShareLock);
+ RelationIncrementReferenceCount(indexRelation);
/*
* Tell the AM to open a scan.
*/
- scan = (IndexScanDesc)
- DatumGetPointer(FunctionCall3(procedure,
- PointerGetDatum(indexRelation),
- Int32GetDatum(nkeys),
- PointerGetDatum(key)));
+ scan = indexRelation->rd_amroutine->ambeginscan(indexRelation, nkeys,
+ norderbys);
+ /* Initialize information for parallel scan. */
+ scan->parallel_scan = pscan;
+ scan->xs_temp_snap = temp_snap;
return scan;
}
/* ----------------
* index_rescan - (re)start a scan of an index
*
- * The caller may specify a new set of scankeys (but the number of keys
- * cannot change). To restart the scan without changing keys, pass NULL
- * for the key array.
- *
- * Note that this is also called when first starting an indexscan;
- * see RelationGetIndexScan. Keys *must* be passed in that case,
- * unless scan->numberOfKeys is zero.
+ * During a restart, the caller may specify a new set of scankeys and/or
+ * orderbykeys; but the number of keys cannot differ from what index_beginscan
+ * was told. (Later we might relax that to "must not exceed", but currently
+ * the index AMs tend to assume that scan->numberOfKeys is what to believe.)
+ * To restart the scan without changing keys, pass NULL for the key arrays.
+ * (Of course, keys *must* be passed on the first call, unless
+ * scan->numberOfKeys is zero.)
* ----------------
*/
void
-index_rescan(IndexScanDesc scan, ScanKey key)
+index_rescan(IndexScanDesc scan,
+ ScanKey keys, int nkeys,
+ ScanKey orderbys, int norderbys)
{
- FmgrInfo *procedure;
-
SCAN_CHECKS;
- GET_SCAN_PROCEDURE(amrescan);
+ CHECK_SCAN_PROCEDURE(amrescan);
+
+ Assert(nkeys == scan->numberOfKeys);
+ Assert(norderbys == scan->numberOfOrderBys);
/* Release any held pin on a heap page */
if (BufferIsValid(scan->xs_cbuf))
scan->xs_cbuf = InvalidBuffer;
}
- scan->kill_prior_tuple = false; /* for safety */
- scan->keys_are_unique = false; /* may be set by index AM */
- scan->got_tuple = false;
- scan->unique_tuple_pos = 0;
- scan->unique_tuple_mark = 0;
+ scan->xs_continue_hot = false;
- FunctionCall2(procedure,
- PointerGetDatum(scan),
- PointerGetDatum(key));
+ scan->kill_prior_tuple = false; /* for safety */
- pgstat_reset_index_scan(&scan->xs_pgstat_info);
+ scan->indexRelation->rd_amroutine->amrescan(scan, keys, nkeys,
+ orderbys, norderbys);
}
/* ----------------
void
index_endscan(IndexScanDesc scan)
{
- FmgrInfo *procedure;
-
SCAN_CHECKS;
- GET_SCAN_PROCEDURE(amendscan);
+ CHECK_SCAN_PROCEDURE(amendscan);
/* Release any held pin on a heap page */
if (BufferIsValid(scan->xs_cbuf))
}
/* End the AM's scan */
- FunctionCall1(procedure, PointerGetDatum(scan));
-
- /* Release index lock and refcount acquired by index_beginscan */
-
- UnlockRelation(scan->indexRelation, AccessShareLock);
+ scan->indexRelation->rd_amroutine->amendscan(scan);
+ /* Release index refcount acquired by index_beginscan */
RelationDecrementReferenceCount(scan->indexRelation);
+ if (scan->xs_temp_snap)
+ UnregisterSnapshot(scan->xs_snapshot);
+
/* Release the scan data structure itself */
IndexScanEnd(scan);
}
void
index_markpos(IndexScanDesc scan)
{
- FmgrInfo *procedure;
-
SCAN_CHECKS;
- GET_SCAN_PROCEDURE(ammarkpos);
-
- scan->unique_tuple_mark = scan->unique_tuple_pos;
+ CHECK_SCAN_PROCEDURE(ammarkpos);
- FunctionCall1(procedure, PointerGetDatum(scan));
+ scan->indexRelation->rd_amroutine->ammarkpos(scan);
}
/* ----------------
* NOTE: this only restores the internal scan state of the index AM.
* The current result tuple (scan->xs_ctup) doesn't change. See comments
* for ExecRestrPos().
+ *
+ * NOTE: in the presence of HOT chains, mark/restore only works correctly
+ * if the scan's snapshot is MVCC-safe; that ensures that there's at most one
+ * returnable tuple in each HOT chain, and so restoring the prior state at the
+ * granularity of the index AM is sufficient. Since the only current user
+ * of mark/restore functionality is nodeMergejoin.c, this effectively means
+ * that merge-join plans only work for MVCC snapshots. This could be fixed
+ * if necessary, but for now it seems unimportant.
* ----------------
*/
void
index_restrpos(IndexScanDesc scan)
{
- FmgrInfo *procedure;
+ Assert(IsMVCCSnapshot(scan->xs_snapshot));
SCAN_CHECKS;
- GET_SCAN_PROCEDURE(amrestrpos);
+ CHECK_SCAN_PROCEDURE(amrestrpos);
+
+ scan->xs_continue_hot = false;
scan->kill_prior_tuple = false; /* for safety */
+ scan->indexRelation->rd_amroutine->amrestrpos(scan);
+}
+
+/*
+ * index_parallelscan_estimate - estimate shared memory for parallel scan
+ *
+ * Currently, we don't pass any information to the AM-specific estimator,
+ * so it can probably only return a constant. In the future, we might need
+ * to pass more information.
+ */
+Size
+index_parallelscan_estimate(Relation indexRelation, Snapshot snapshot)
+{
+ Size nbytes;
+
+ RELATION_CHECKS;
+
+ nbytes = offsetof(ParallelIndexScanDescData, ps_snapshot_data);
+ nbytes = add_size(nbytes, EstimateSnapshotSpace(snapshot));
+ nbytes = MAXALIGN(nbytes);
+
/*
- * We do not reset got_tuple; so if the scan is actually being
- * short-circuited by index_getnext, the effective position
- * restoration is done by restoring unique_tuple_pos.
+ * If amestimateparallelscan is not provided, assume there is no
+ * AM-specific data needed. (It's hard to believe that could work, but
+ * it's easy enough to cater to it here.)
*/
- scan->unique_tuple_pos = scan->unique_tuple_mark;
+ if (indexRelation->rd_amroutine->amestimateparallelscan != NULL)
+ nbytes = add_size(nbytes,
+ indexRelation->rd_amroutine->amestimateparallelscan());
- FunctionCall1(procedure, PointerGetDatum(scan));
+ return nbytes;
}
-/* ----------------
- * index_getnext - get the next heap tuple from a scan
+/*
+ * index_parallelscan_initialize - initialize parallel scan
*
- * The result is the next heap tuple satisfying the scan keys and the
- * snapshot, or NULL if no more matching tuples exist. On success,
- * the buffer containing the heap tuple is pinned (the pin will be dropped
- * at the next index_getnext or index_endscan). The index TID corresponding
- * to the heap tuple can be obtained if needed from scan->currentItemData.
- * ----------------
+ * We initialize both the ParallelIndexScanDesc proper and the AM-specific
+ * information which follows it.
+ *
+ * This function calls access method specific initialization routine to
+ * initialize am specific information. Call this just once in the leader
+ * process; then, individual workers attach via index_beginscan_parallel.
*/
-HeapTuple
-index_getnext(IndexScanDesc scan, ScanDirection direction)
+void
+index_parallelscan_initialize(Relation heapRelation, Relation indexRelation,
+ Snapshot snapshot, ParallelIndexScanDesc target)
{
- HeapTuple heapTuple = &scan->xs_ctup;
- FmgrInfo *procedure;
+ Size offset;
+
+ RELATION_CHECKS;
+
+ offset = add_size(offsetof(ParallelIndexScanDescData, ps_snapshot_data),
+ EstimateSnapshotSpace(snapshot));
+ offset = MAXALIGN(offset);
+
+ target->ps_relid = RelationGetRelid(heapRelation);
+ target->ps_indexid = RelationGetRelid(indexRelation);
+ target->ps_offset = offset;
+ SerializeSnapshot(snapshot, target->ps_snapshot_data);
+ /* aminitparallelscan is optional; assume no-op if not provided by AM */
+ if (indexRelation->rd_amroutine->aminitparallelscan != NULL)
+ {
+ void *amtarget;
+
+ amtarget = OffsetToPointer(target, offset);
+ indexRelation->rd_amroutine->aminitparallelscan(amtarget);
+ }
+}
+
+/* ----------------
+ * index_parallelrescan - (re)start a parallel scan of an index
+ * ----------------
+ */
+void
+index_parallelrescan(IndexScanDesc scan)
+{
SCAN_CHECKS;
- GET_SCAN_PROCEDURE(amgettuple);
+
+ /* amparallelrescan is optional; assume no-op if not provided by AM */
+ if (scan->indexRelation->rd_amroutine->amparallelrescan != NULL)
+ scan->indexRelation->rd_amroutine->amparallelrescan(scan);
+}
+
+/*
+ * index_beginscan_parallel - join parallel index scan
+ *
+ * Caller must be holding suitable locks on the heap and the index.
+ */
+IndexScanDesc
+index_beginscan_parallel(Relation heaprel, Relation indexrel, int nkeys,
+ int norderbys, ParallelIndexScanDesc pscan)
+{
+ Snapshot snapshot;
+ IndexScanDesc scan;
+
+ Assert(RelationGetRelid(heaprel) == pscan->ps_relid);
+ snapshot = RestoreSnapshot(pscan->ps_snapshot_data);
+ RegisterSnapshot(snapshot);
+ scan = index_beginscan_internal(indexrel, nkeys, norderbys, snapshot,
+ pscan, true);
/*
- * If we already got a tuple and it must be unique, there's no need to
- * make the index AM look through any additional tuples. (This can
- * save a useful amount of work in scenarios where there are many dead
- * tuples due to heavy update activity.)
- *
- * To do this we must keep track of the logical scan position
- * (before/on/after tuple). Also, we have to be sure to release scan
- * resources before returning NULL; if we fail to do so then a
- * multi-index scan can easily run the system out of free buffers. We
- * can release index-level resources fairly cheaply by calling
- * index_rescan. This means there are two persistent states as far as
- * the index AM is concerned: on-tuple and rescanned. If we are
- * actually asked to re-fetch the single tuple, we have to go through
- * a fresh indexscan startup, which penalizes that (infrequent) case.
+ * Save additional parameters into the scandesc. Everything else was set
+ * up by index_beginscan_internal.
*/
- if (scan->keys_are_unique && scan->got_tuple)
- {
- int new_tuple_pos = scan->unique_tuple_pos;
+ scan->heapRelation = heaprel;
+ scan->xs_snapshot = snapshot;
- if (ScanDirectionIsForward(direction))
- {
- if (new_tuple_pos <= 0)
- new_tuple_pos++;
- }
- else
- {
- if (new_tuple_pos >= 0)
- new_tuple_pos--;
- }
- if (new_tuple_pos == 0)
- {
- /*
- * We are moving onto the unique tuple from having been off
- * it. We just fall through and let the index AM do the work.
- * Note we should get the right answer regardless of scan
- * direction.
- */
- scan->unique_tuple_pos = 0; /* need to update position */
- }
- else
- {
- /*
- * Moving off the tuple; must do amrescan to release
- * index-level pins before we return NULL. Since index_rescan
- * will reset my state, must save and restore...
- */
- int unique_tuple_mark = scan->unique_tuple_mark;
+ return scan;
+}
- index_rescan(scan, NULL /* no change to key */ );
+/* ----------------
+ * index_getnext_tid - get the next TID from a scan
+ *
+ * The result is the next TID satisfying the scan keys,
+ * or NULL if no more matching tuples exist.
+ * ----------------
+ */
+ItemPointer
+index_getnext_tid(IndexScanDesc scan, ScanDirection direction)
+{
+ bool found;
- scan->keys_are_unique = true;
- scan->got_tuple = true;
- scan->unique_tuple_pos = new_tuple_pos;
- scan->unique_tuple_mark = unique_tuple_mark;
+ SCAN_CHECKS;
+ CHECK_SCAN_PROCEDURE(amgettuple);
- return NULL;
- }
- }
+ Assert(TransactionIdIsValid(RecentGlobalXmin));
- /* just make sure this is false... */
+ /*
+ * The AM's amgettuple proc finds the next index entry matching the scan
+ * keys, and puts the TID into scan->xs_ctup.t_self. It should also set
+ * scan->xs_recheck and possibly scan->xs_itup, though we pay no attention
+ * to those fields here.
+ */
+ found = scan->indexRelation->rd_amroutine->amgettuple(scan, direction);
+
+ /* Reset kill flag immediately for safety */
scan->kill_prior_tuple = false;
- for (;;)
+ /* If we're out of index entries, we're done */
+ if (!found)
{
- bool found;
+ /* ... but first, release any held pin on a heap page */
+ if (BufferIsValid(scan->xs_cbuf))
+ {
+ ReleaseBuffer(scan->xs_cbuf);
+ scan->xs_cbuf = InvalidBuffer;
+ }
+ return NULL;
+ }
- pgstat_count_index_scan(&scan->xs_pgstat_info);
+ pgstat_count_index_tuples(scan->indexRelation, 1);
- /*
- * The AM's gettuple proc finds the next tuple matching the scan
- * keys.
- */
- found = DatumGetBool(FunctionCall2(procedure,
- PointerGetDatum(scan),
- Int32GetDatum(direction)));
+ /* Return the TID of the tuple we found. */
+ return &scan->xs_ctup.t_self;
+}
- /* Reset kill flag immediately for safety */
- scan->kill_prior_tuple = false;
+/* ----------------
+ * index_fetch_heap - get the scan's next heap tuple
+ *
+ * The result is a visible heap tuple associated with the index TID most
+ * recently fetched by index_getnext_tid, or NULL if no more matching tuples
+ * exist. (There can be more than one matching tuple because of HOT chains,
+ * although when using an MVCC snapshot it should be impossible for more than
+ * one such tuple to exist.)
+ *
+ * On success, the buffer containing the heap tup is pinned (the pin will be
+ * dropped in a future index_getnext_tid, index_fetch_heap or index_endscan
+ * call).
+ *
+ * Note: caller must check scan->xs_recheck, and perform rechecking of the
+ * scan keys if required. We do not do that here because we don't have
+ * enough information to do it efficiently in the general case.
+ * ----------------
+ */
+HeapTuple
+index_fetch_heap(IndexScanDesc scan)
+{
+ ItemPointer tid = &scan->xs_ctup.t_self;
+ bool all_dead = false;
+ bool got_heap_tuple;
- if (!found)
- {
- /* Release any held pin on a heap page */
- if (BufferIsValid(scan->xs_cbuf))
- {
- ReleaseBuffer(scan->xs_cbuf);
- scan->xs_cbuf = InvalidBuffer;
- }
- return NULL; /* failure exit */
- }
+ /* We can skip the buffer-switching logic if we're in mid-HOT chain. */
+ if (!scan->xs_continue_hot)
+ {
+ /* Switch to correct buffer if we don't have it already */
+ Buffer prev_buf = scan->xs_cbuf;
+
+ scan->xs_cbuf = ReleaseAndReadBuffer(scan->xs_cbuf,
+ scan->heapRelation,
+ ItemPointerGetBlockNumber(tid));
/*
- * Fetch the heap tuple and see if it matches the snapshot.
+ * Prune page, but only if we weren't already on this page
*/
- if (heap_release_fetch(scan->heapRelation, scan->xs_snapshot,
- heapTuple, &scan->xs_cbuf, true,
- &scan->xs_pgstat_info))
- break;
-
- /* Skip if no undeleted tuple at this location */
- if (heapTuple->t_data == NULL)
- continue;
+ if (prev_buf != scan->xs_cbuf)
+ heap_page_prune_opt(scan->heapRelation, scan->xs_cbuf);
+ }
+ /* Obtain share-lock on the buffer so we can examine visibility */
+ LockBuffer(scan->xs_cbuf, BUFFER_LOCK_SHARE);
+ got_heap_tuple = heap_hot_search_buffer(tid, scan->heapRelation,
+ scan->xs_cbuf,
+ scan->xs_snapshot,
+ &scan->xs_ctup,
+ &all_dead,
+ !scan->xs_continue_hot);
+ LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK);
+
+ if (got_heap_tuple)
+ {
/*
- * If we can't see it, maybe no one else can either. Check to see
- * if the tuple is dead to all transactions. If so, signal the
- * index AM to not return it on future indexscans.
- *
- * We told heap_release_fetch to keep a pin on the buffer, so we can
- * re-access the tuple here. But we must re-lock the buffer first.
+ * Only in a non-MVCC snapshot can more than one member of the HOT
+ * chain be visible.
*/
- LockBuffer(scan->xs_cbuf, BUFFER_LOCK_SHARE);
-
- if (HeapTupleSatisfiesVacuum(heapTuple->t_data, RecentGlobalXmin,
- scan->xs_cbuf) == HEAPTUPLE_DEAD)
- scan->kill_prior_tuple = true;
-
- LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK);
+ scan->xs_continue_hot = !IsMVCCSnapshot(scan->xs_snapshot);
+ pgstat_count_heap_fetch(scan->indexRelation);
+ return &scan->xs_ctup;
}
- /* Success exit */
- scan->got_tuple = true;
+ /* We've reached the end of the HOT chain. */
+ scan->xs_continue_hot = false;
/*
- * If we just fetched a known-unique tuple, then subsequent calls will
- * go through the short-circuit code above. unique_tuple_pos has been
- * initialized to 0, which is the correct state ("on row").
+ * If we scanned a whole HOT chain and found only dead tuples, tell index
+ * AM to kill its entry for that TID (this will take effect in the next
+ * amgettuple call, in index_getnext_tid). We do not do this when in
+ * recovery because it may violate MVCC to do so. See comments in
+ * RelationGetIndexScan().
*/
+ if (!scan->xactStartedInRecovery)
+ scan->kill_prior_tuple = all_dead;
- pgstat_count_index_getnext(&scan->xs_pgstat_info);
-
- return heapTuple;
+ return NULL;
}
/* ----------------
- * index_getnext_indexitem - get the next index tuple from a scan
+ * index_getnext - get the next heap tuple from a scan
+ *
+ * The result is the next heap tuple satisfying the scan keys and the
+ * snapshot, or NULL if no more matching tuples exist.
*
- * Finds the next index tuple satisfying the scan keys. Note that the
- * corresponding heap tuple is not accessed, and thus no time qual (snapshot)
- * check is done, other than the index AM's internal check for killed tuples
- * (which most callers of this routine will probably want to suppress by
- * setting scan->ignore_killed_tuples = false).
+ * On success, the buffer containing the heap tup is pinned (the pin will be
+ * dropped in a future index_getnext_tid, index_fetch_heap or index_endscan
+ * call).
*
- * On success (TRUE return), the found index TID is in scan->currentItemData,
- * and its heap TID is in scan->xs_ctup.t_self. scan->xs_cbuf is untouched.
+ * Note: caller must check scan->xs_recheck, and perform rechecking of the
+ * scan keys if required. We do not do that here because we don't have
+ * enough information to do it efficiently in the general case.
* ----------------
*/
-bool
-index_getnext_indexitem(IndexScanDesc scan,
- ScanDirection direction)
+HeapTuple
+index_getnext(IndexScanDesc scan, ScanDirection direction)
{
- FmgrInfo *procedure;
- bool found;
+ HeapTuple heapTuple;
+ ItemPointer tid;
- SCAN_CHECKS;
- GET_SCAN_PROCEDURE(amgettuple);
+ for (;;)
+ {
+ if (scan->xs_continue_hot)
+ {
+ /*
+ * We are resuming scan of a HOT chain after having returned an
+ * earlier member. Must still hold pin on current heap page.
+ */
+ Assert(BufferIsValid(scan->xs_cbuf));
+ Assert(ItemPointerGetBlockNumber(&scan->xs_ctup.t_self) ==
+ BufferGetBlockNumber(scan->xs_cbuf));
+ }
+ else
+ {
+ /* Time to fetch the next TID from the index */
+ tid = index_getnext_tid(scan, direction);
- /* just make sure this is false... */
- scan->kill_prior_tuple = false;
+ /* If we're out of index entries, we're done */
+ if (tid == NULL)
+ break;
+ }
- /*
- * have the am's gettuple proc do all the work.
- */
- found = DatumGetBool(FunctionCall2(procedure,
- PointerGetDatum(scan),
- Int32GetDatum(direction)));
+ /*
+ * Fetch the next (or only) visible heap tuple for this index entry.
+ * If we don't find anything, loop around and grab the next TID from
+ * the index.
+ */
+ heapTuple = index_fetch_heap(scan);
+ if (heapTuple != NULL)
+ return heapTuple;
+ }
- return found;
+ return NULL; /* failure exit */
}
/* ----------------
- * index_getmulti - get multiple tuples from an index scan
+ * index_getbitmap - get all tuples at once from an index scan
*
- * Collects the TIDs of multiple heap tuples satisfying the scan keys.
+ * Adds the TIDs of all heap tuples satisfying the scan keys to a bitmap.
* Since there's no interlock between the index scan and the eventual heap
* access, this is only safe to use with MVCC-based snapshots: the heap
* item slot could have been replaced by a newer tuple by the time we get
* to it.
*
- * A TRUE result indicates more calls should occur; a FALSE result says the
- * scan is done. *returned_tids could be zero or nonzero in either case.
+ * Returns the number of matching tuples found. (Note: this might be only
+ * approximate, so it should only be used for statistical purposes.)
* ----------------
*/
-bool
-index_getmulti(IndexScanDesc scan,
- ItemPointer tids, int32 max_tids,
- int32 *returned_tids)
+int64
+index_getbitmap(IndexScanDesc scan, TIDBitmap *bitmap)
{
- FmgrInfo *procedure;
- bool found;
+ int64 ntids;
SCAN_CHECKS;
- GET_SCAN_PROCEDURE(amgetmulti);
+ CHECK_SCAN_PROCEDURE(amgetbitmap);
/* just make sure this is false... */
scan->kill_prior_tuple = false;
/*
- * have the am's getmulti proc do all the work.
+ * have the am's getbitmap proc do all the work.
*/
- found = DatumGetBool(FunctionCall4(procedure,
- PointerGetDatum(scan),
- PointerGetDatum(tids),
- Int32GetDatum(max_tids),
- PointerGetDatum(returned_tids)));
+ ntids = scan->indexRelation->rd_amroutine->amgetbitmap(scan, bitmap);
+
+ pgstat_count_index_tuples(scan->indexRelation, ntids);
- return found;
+ return ntids;
}
/* ----------------
* ----------------
*/
IndexBulkDeleteResult *
-index_bulk_delete(Relation indexRelation,
+index_bulk_delete(IndexVacuumInfo *info,
+ IndexBulkDeleteResult *stats,
IndexBulkDeleteCallback callback,
void *callback_state)
{
- FmgrInfo *procedure;
- IndexBulkDeleteResult *result;
+ Relation indexRelation = info->index;
RELATION_CHECKS;
- GET_REL_PROCEDURE(ambulkdelete);
-
- result = (IndexBulkDeleteResult *)
- DatumGetPointer(FunctionCall3(procedure,
- PointerGetDatum(indexRelation),
- PointerGetDatum((Pointer) callback),
- PointerGetDatum(callback_state)));
+ CHECK_REL_PROCEDURE(ambulkdelete);
- return result;
+ return indexRelation->rd_amroutine->ambulkdelete(info, stats,
+ callback, callback_state);
}
/* ----------------
* ----------------
*/
IndexBulkDeleteResult *
-index_vacuum_cleanup(Relation indexRelation,
- IndexVacuumCleanupInfo *info,
+index_vacuum_cleanup(IndexVacuumInfo *info,
IndexBulkDeleteResult *stats)
{
- FmgrInfo *procedure;
- IndexBulkDeleteResult *result;
+ Relation indexRelation = info->index;
RELATION_CHECKS;
+ CHECK_REL_PROCEDURE(amvacuumcleanup);
- /* It's okay for an index AM not to have a vacuumcleanup procedure */
- if (!RegProcedureIsValid(indexRelation->rd_am->amvacuumcleanup))
- return stats;
-
- GET_REL_PROCEDURE(amvacuumcleanup);
-
- result = (IndexBulkDeleteResult *)
- DatumGetPointer(FunctionCall3(procedure,
- PointerGetDatum(indexRelation),
- PointerGetDatum((Pointer) info),
- PointerGetDatum((Pointer) stats)));
-
- return result;
+ return indexRelation->rd_amroutine->amvacuumcleanup(info, stats);
}
/* ----------------
- * index_cost_estimator
+ * index_can_return
*
- * Fetch the amcostestimate procedure OID for an index.
- *
- * We could combine fetching and calling the procedure,
- * as index_insert does for example; but that would require
- * importing a bunch of planner/optimizer stuff into this file.
+ * Does the index access method support index-only scans for the given
+ * column?
* ----------------
*/
-RegProcedure
-index_cost_estimator(Relation indexRelation)
+bool
+index_can_return(Relation indexRelation, int attno)
{
- FmgrInfo *procedure;
-
RELATION_CHECKS;
- GET_REL_PROCEDURE(amcostestimate);
- return procedure->fn_oid;
+ /* amcanreturn is optional; assume FALSE if not provided by AM */
+ if (indexRelation->rd_amroutine->amcanreturn == NULL)
+ return false;
+
+ return indexRelation->rd_amroutine->amcanreturn(indexRelation, attno);
}
/* ----------------
* index_getprocid
*
- * Some indexed access methods may require support routines that are
- * not in the operator class/operator model imposed by pg_am. These
- * access methods may store the OIDs of registered procedures they
- * need in pg_amproc. These registered procedure OIDs are ordered in
- * a way that makes sense to the access method, and used only by the
- * access method. The general index code doesn't know anything about
- * the routines involved; it just builds an ordered list of them for
+ * Index access methods typically require support routines that are
+ * not directly the implementation of any WHERE-clause query operator
+ * and so cannot be kept in pg_amop. Instead, such routines are kept
+ * in pg_amproc. These registered procedure OIDs are assigned numbers
+ * according to a convention established by the access method.
+ * The general index code doesn't know anything about the routines
+ * involved; it just builds an ordered list of them for
* each attribute on which an index is defined.
*
- * This routine returns the requested procedure OID for a particular
- * indexed attribute.
+ * As of Postgres 8.3, support routines within an operator family
+ * are further subdivided by the "left type" and "right type" of the
+ * query operator(s) that they support. The "default" functions for a
+ * particular indexed attribute are those with both types equal to
+ * the index opclass' opcintype (note that this is subtly different
+ * from the indexed attribute's own type: it may be a binary-compatible
+ * type instead). Only the default functions are stored in relcache
+ * entries --- access methods can use the syscache to look up non-default
+ * functions.
+ *
+ * This routine returns the requested default procedure OID for a
+ * particular indexed attribute.
* ----------------
*/
RegProcedure
int nproc;
int procindex;
- nproc = irel->rd_am->amsupport;
+ nproc = irel->rd_amroutine->amsupport;
Assert(procnum > 0 && procnum <= (uint16) nproc);
* index_getprocinfo
*
* This routine allows index AMs to keep fmgr lookup info for
- * support procs in the relcache.
+ * support procs in the relcache. As above, only the "default"
+ * functions for any particular indexed attribute are cached.
*
* Note: the return value points into cached data that will be lost during
* any relcache rebuild! Therefore, either use the callinfo right away,
int nproc;
int procindex;
- nproc = irel->rd_am->amsupport;
+ nproc = irel->rd_amroutine->amsupport;
Assert(procnum > 0 && procnum <= (uint16) nproc);
procId = loc[procindex];
/*
- * Complain if function was not found during
- * IndexSupportInitialize. This should not happen unless the
- * system tables contain bogus entries for the index opclass. (If
- * an AM wants to allow a support function to be optional, it can
- * use index_getprocid.)
+ * Complain if function was not found during IndexSupportInitialize.
+ * This should not happen unless the system tables contain bogus
+ * entries for the index opclass. (If an AM wants to allow a support
+ * function to be optional, it can use index_getprocid.)
*/
if (!RegProcedureIsValid(procId))
elog(ERROR, "missing support function %d for attribute %d of index \"%s\"",