X-Git-Url: https://granicus.if.org/sourcecode?a=blobdiff_plain;f=src%2Fbackend%2Faccess%2Findex%2Findexam.c;h=ba27c1e86d9f64de25aa2db6cd8da9b31eb6453b;hb=7b4ac19982a77a1a2a6f096c4a11ee7325a14d2c;hp=fc53aeffb495696d339a973f146be00ee89874c4;hpb=d84fe82230c593f3dc5d7f427849b99d1efa8a0a;p=postgresql diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c index fc53aeffb4..ba27c1e86d 100644 --- a/src/backend/access/index/indexam.c +++ b/src/backend/access/index/indexam.c @@ -3,28 +3,36 @@ * indexam.c * general index access method routines * - * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/index/indexam.c,v 1.61 2002/06/20 20:29:25 momjian Exp $ + * src/backend/access/index/indexam.c * * INTERFACE ROUTINES * index_open - open an index relation by relation OID - * index_openrv - open an index relation specified by a RangeVar - * index_openr - open a system index relation by name * index_close - close an index relation - * index_beginscan - start a scan of an index + * index_beginscan - start a scan of an index with amgettuple + * index_beginscan_bitmap - start a scan of an index with amgetbitmap * index_rescan - restart a scan of an index * index_endscan - end a scan * index_insert - insert an index tuple into a relation * index_markpos - mark a scan position * index_restrpos - restore a scan position - * index_getnext - get the next tuple from a scan + * index_parallelscan_estimate - estimate shared memory for parallel scan + * index_parallelscan_initialize - initialize parallel scan + * index_parallelrescan - (re)start a parallel scan of an index + * index_beginscan_parallel - join parallel index scan + * index_getnext_tid - get the next TID from a scan + * index_fetch_heap - get the scan's next heap tuple + * index_getnext - get the next heap tuple from a scan + * index_getbitmap - get all tuples from a scan * index_bulk_delete - bulk deletion of index tuples - * index_cost_estimator - fetch amcostestimate procedure OID + * index_vacuum_cleanup - post-deletion cleanup of an index + * index_can_return - does index support index-only scans? * index_getprocid - get a support procedure OID + * index_getprocinfo - get a support procedure's lookup info * * NOTES * This file contains the index_ routines which used @@ -61,46 +69,63 @@ #include "postgres.h" -#include "access/genam.h" -#include "access/heapam.h" -#include "utils/relcache.h" - +#include "access/amapi.h" +#include "access/relscan.h" +#include "access/transam.h" +#include "access/xlog.h" +#include "catalog/catalog.h" +#include "catalog/index.h" #include "pgstat.h" +#include "storage/bufmgr.h" +#include "storage/lmgr.h" +#include "storage/predicate.h" +#include "utils/snapmgr.h" +#include "utils/tqual.h" + /* ---------------------------------------------------------------- * macros used in index_ routines + * + * Note: the ReindexIsProcessingIndex() check in RELATION_CHECKS is there + * to check that we don't try to scan or do retail insertions into an index + * that is currently being rebuilt or pending rebuild. This helps to catch + * things that don't work when reindexing system catalogs. The assertion + * doesn't prevent the actual rebuild because we don't use RELATION_CHECKS + * when calling the index AM's ambuild routine, and there is no reason for + * ambuild to call its subsidiary routines through this file. * ---------------------------------------------------------------- */ #define RELATION_CHECKS \ ( \ AssertMacro(RelationIsValid(indexRelation)), \ - AssertMacro(PointerIsValid(indexRelation->rd_am)) \ + AssertMacro(PointerIsValid(indexRelation->rd_amroutine)), \ + AssertMacro(!ReindexIsProcessingIndex(RelationGetRelid(indexRelation))) \ ) #define SCAN_CHECKS \ ( \ AssertMacro(IndexScanIsValid(scan)), \ AssertMacro(RelationIsValid(scan->indexRelation)), \ - AssertMacro(PointerIsValid(scan->indexRelation->rd_am)) \ + AssertMacro(PointerIsValid(scan->indexRelation->rd_amroutine)) \ ) -#define GET_REL_PROCEDURE(x,y) \ -( \ - procedure = indexRelation->rd_am->y, \ - (!RegProcedureIsValid(procedure)) ? \ - elog(ERROR, "index_%s: invalid %s regproc", \ - CppAsString(x), CppAsString(y)) \ - : (void)NULL \ -) +#define CHECK_REL_PROCEDURE(pname) \ +do { \ + if (indexRelation->rd_amroutine->pname == NULL) \ + elog(ERROR, "function %s is not defined for index %s", \ + CppAsString(pname), RelationGetRelationName(indexRelation)); \ +} while(0) -#define GET_SCAN_PROCEDURE(x,y) \ -( \ - procedure = scan->indexRelation->rd_am->y, \ - (!RegProcedureIsValid(procedure)) ? \ - elog(ERROR, "index_%s: invalid %s regproc", \ - CppAsString(x), CppAsString(y)) \ - : (void)NULL \ -) +#define CHECK_SCAN_PROCEDURE(pname) \ +do { \ + if (scan->indexRelation->rd_amroutine->pname == NULL) \ + elog(ERROR, "function %s is not defined for index %s", \ + CppAsString(pname), RelationGetRelationName(scan->indexRelation)); \ +} while(0) + +static IndexScanDesc index_beginscan_internal(Relation indexRelation, + int nkeys, int norderbys, Snapshot snapshot, + ParallelIndexScanDesc pscan, bool temp_snap); /* ---------------------------------------------------------------- @@ -111,180 +136,159 @@ /* ---------------- * index_open - open an index relation by relation OID * - * Note: we acquire no lock on the index. An AccessShareLock is - * acquired by index_beginscan (and released by index_endscan). - * Generally, the caller should already hold some type of lock on - * the parent relation to ensure that the index doesn't disappear. + * If lockmode is not "NoLock", the specified kind of lock is + * obtained on the index. (Generally, NoLock should only be + * used if the caller knows it has some appropriate lock on the + * index already.) + * + * An error is raised if the index does not exist. * * This is a convenience routine adapted for indexscan use. * Some callers may prefer to use relation_open directly. * ---------------- */ Relation -index_open(Oid relationId) +index_open(Oid relationId, LOCKMODE lockmode) { Relation r; - r = relation_open(relationId, NoLock); + r = relation_open(relationId, lockmode); if (r->rd_rel->relkind != RELKIND_INDEX) - elog(ERROR, "%s is not an index relation", - RelationGetRelationName(r)); - - pgstat_initstats(&r->pgstat_info, r); + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is not an index", + RelationGetRelationName(r)))); return r; } /* ---------------- - * index_openrv - open an index relation specified - * by a RangeVar node + * index_close - close an index relation * - * As above, but relation is specified by a RangeVar. - * ---------------- - */ -Relation -index_openrv(const RangeVar *relation) -{ - Relation r; - - r = relation_openrv(relation, NoLock); - - if (r->rd_rel->relkind != RELKIND_INDEX) - elog(ERROR, "%s is not an index relation", - RelationGetRelationName(r)); - - pgstat_initstats(&r->pgstat_info, r); - - return r; -} - -/* ---------------- - * index_openr - open a system index relation specified by name. + * If lockmode is not "NoLock", we then release the specified lock. * - * As above, but the relation is specified by an unqualified name; - * it is assumed to live in the system catalog namespace. + * Note that it is often sensible to hold a lock beyond index_close; + * in that case, the lock is released automatically at xact end. * ---------------- */ -Relation -index_openr(const char *sysRelationName) +void +index_close(Relation relation, LOCKMODE lockmode) { - Relation r; - - r = relation_openr(sysRelationName, NoLock); + LockRelId relid = relation->rd_lockInfo.lockRelId; - if (r->rd_rel->relkind != RELKIND_INDEX) - elog(ERROR, "%s is not an index relation", - RelationGetRelationName(r)); - - pgstat_initstats(&r->pgstat_info, r); + Assert(lockmode >= NoLock && lockmode < MAX_LOCKMODES); - return r; -} - -/* ---------------- - * index_close - close a index relation - * - * presently the relcache routines do all the work we need - * to open/close index relations. - * ---------------- - */ -void -index_close(Relation relation) -{ + /* The relcache does the real work... */ RelationClose(relation); + + if (lockmode != NoLock) + UnlockRelationId(&relid, lockmode); } /* ---------------- * index_insert - insert an index tuple into a relation * ---------------- */ -InsertIndexResult +bool index_insert(Relation indexRelation, - Datum *datums, - char *nulls, + Datum *values, + bool *isnull, ItemPointer heap_t_ctid, Relation heapRelation, - bool check_uniqueness) + IndexUniqueCheck checkUnique) { - RegProcedure procedure; - InsertIndexResult specificResult; - RELATION_CHECKS; - GET_REL_PROCEDURE(insert, aminsert); + CHECK_REL_PROCEDURE(aminsert); - /* - * have the am's insert proc do all the work. - */ - specificResult = (InsertIndexResult) - DatumGetPointer(OidFunctionCall6(procedure, - PointerGetDatum(indexRelation), - PointerGetDatum(datums), - PointerGetDatum(nulls), - PointerGetDatum(heap_t_ctid), - PointerGetDatum(heapRelation), - BoolGetDatum(check_uniqueness))); - - /* must be pfree'ed */ - return specificResult; + if (!(indexRelation->rd_amroutine->ampredlocks)) + CheckForSerializableConflictIn(indexRelation, + (HeapTuple) NULL, + InvalidBuffer); + + return indexRelation->rd_amroutine->aminsert(indexRelation, values, isnull, + heap_t_ctid, heapRelation, + checkUnique); } -/* ---------------- - * index_beginscan - start a scan of an index +/* + * index_beginscan - start a scan of an index with amgettuple * - * Note: heapRelation may be NULL if there is no intention of calling - * index_getnext on this scan; index_getnext_indexitem will not use the - * heapRelation link (nor the snapshot). However, the caller had better - * be holding some kind of lock on the heap relation in any case, to ensure - * no one deletes it (or the index) out from under us. - * ---------------- + * Caller must be holding suitable locks on the heap and the index. */ IndexScanDesc index_beginscan(Relation heapRelation, Relation indexRelation, Snapshot snapshot, - int nkeys, ScanKey key) + int nkeys, int norderbys) { IndexScanDesc scan; - RegProcedure procedure; - - RELATION_CHECKS; - GET_REL_PROCEDURE(beginscan, ambeginscan); - RelationIncrementReferenceCount(indexRelation); + scan = index_beginscan_internal(indexRelation, nkeys, norderbys, snapshot, NULL, false); /* - * Acquire AccessShareLock for the duration of the scan - * - * Note: we could get an SI inval message here and consequently have to - * rebuild the relcache entry. The refcount increment above ensures - * that we will rebuild it and not just flush it... + * Save additional parameters into the scandesc. Everything else was set + * up by RelationGetIndexScan. */ - LockRelation(indexRelation, AccessShareLock); + scan->heapRelation = heapRelation; + scan->xs_snapshot = snapshot; + + return scan; +} + +/* + * index_beginscan_bitmap - start a scan of an index with amgetbitmap + * + * As above, caller had better be holding some lock on the parent heap + * relation, even though it's not explicitly mentioned here. + */ +IndexScanDesc +index_beginscan_bitmap(Relation indexRelation, + Snapshot snapshot, + int nkeys) +{ + IndexScanDesc scan; + + scan = index_beginscan_internal(indexRelation, nkeys, 0, snapshot, NULL, false); /* - * Tell the AM to open a scan. + * Save additional parameters into the scandesc. Everything else was set + * up by RelationGetIndexScan. */ - scan = (IndexScanDesc) - DatumGetPointer(OidFunctionCall3(procedure, - PointerGetDatum(indexRelation), - Int32GetDatum(nkeys), - PointerGetDatum(key))); + scan->xs_snapshot = snapshot; + + return scan; +} + +/* + * index_beginscan_internal --- common code for index_beginscan variants + */ +static IndexScanDesc +index_beginscan_internal(Relation indexRelation, + int nkeys, int norderbys, Snapshot snapshot, + ParallelIndexScanDesc pscan, bool temp_snap) +{ + IndexScanDesc scan; + + RELATION_CHECKS; + CHECK_REL_PROCEDURE(ambeginscan); + + if (!(indexRelation->rd_amroutine->ampredlocks)) + PredicateLockRelation(indexRelation, snapshot); /* - * Save additional parameters into the scandesc. Everything else - * was set up by RelationGetIndexScan. + * We hold a reference count to the relcache entry throughout the scan. */ - scan->heapRelation = heapRelation; - scan->xs_snapshot = snapshot; + RelationIncrementReferenceCount(indexRelation); /* - * We want to look up the amgettuple procedure just once per scan, not - * once per index_getnext call. So do it here and save the fmgr info - * result in the scan descriptor. + * Tell the AM to open a scan. */ - GET_SCAN_PROCEDURE(beginscan, amgettuple); - fmgr_info(procedure, &scan->fn_getnext); + scan = indexRelation->rd_amroutine->ambeginscan(indexRelation, nkeys, + norderbys); + /* Initialize information for parallel scan. */ + scan->parallel_scan = pscan; + scan->xs_temp_snap = temp_snap; return scan; } @@ -292,28 +296,39 @@ index_beginscan(Relation heapRelation, /* ---------------- * index_rescan - (re)start a scan of an index * - * The caller may specify a new set of scankeys (but the number of keys - * cannot change). Note that this is also called when first starting - * an indexscan; see RelationGetIndexScan. + * During a restart, the caller may specify a new set of scankeys and/or + * orderbykeys; but the number of keys cannot differ from what index_beginscan + * was told. (Later we might relax that to "must not exceed", but currently + * the index AMs tend to assume that scan->numberOfKeys is what to believe.) + * To restart the scan without changing keys, pass NULL for the key arrays. + * (Of course, keys *must* be passed on the first call, unless + * scan->numberOfKeys is zero.) * ---------------- */ void -index_rescan(IndexScanDesc scan, ScanKey key) +index_rescan(IndexScanDesc scan, + ScanKey keys, int nkeys, + ScanKey orderbys, int norderbys) { - RegProcedure procedure; - SCAN_CHECKS; - GET_SCAN_PROCEDURE(rescan, amrescan); + CHECK_SCAN_PROCEDURE(amrescan); + + Assert(nkeys == scan->numberOfKeys); + Assert(norderbys == scan->numberOfOrderBys); + + /* Release any held pin on a heap page */ + if (BufferIsValid(scan->xs_cbuf)) + { + ReleaseBuffer(scan->xs_cbuf); + scan->xs_cbuf = InvalidBuffer; + } - scan->kill_prior_tuple = false; /* for safety */ - scan->keys_are_unique = false; /* may be set by amrescan */ - scan->got_tuple = false; + scan->xs_continue_hot = false; - OidFunctionCall2(procedure, - PointerGetDatum(scan), - PointerGetDatum(key)); + scan->kill_prior_tuple = false; /* for safety */ - pgstat_reset_index_scan(&scan->xs_pgstat_info); + scan->indexRelation->rd_amroutine->amrescan(scan, keys, nkeys, + orderbys, norderbys); } /* ---------------- @@ -323,10 +338,8 @@ index_rescan(IndexScanDesc scan, ScanKey key) void index_endscan(IndexScanDesc scan) { - RegProcedure procedure; - SCAN_CHECKS; - GET_SCAN_PROCEDURE(endscan, amendscan); + CHECK_SCAN_PROCEDURE(amendscan); /* Release any held pin on a heap page */ if (BufferIsValid(scan->xs_cbuf)) @@ -336,14 +349,14 @@ index_endscan(IndexScanDesc scan) } /* End the AM's scan */ - OidFunctionCall1(procedure, PointerGetDatum(scan)); - - /* Release index lock and refcount acquired by index_beginscan */ - - UnlockRelation(scan->indexRelation, AccessShareLock); + scan->indexRelation->rd_amroutine->amendscan(scan); + /* Release index refcount acquired by index_beginscan */ RelationDecrementReferenceCount(scan->indexRelation); + if (scan->xs_temp_snap) + UnregisterSnapshot(scan->xs_snapshot); + /* Release the scan data structure itself */ IndexScanEnd(scan); } @@ -355,164 +368,365 @@ index_endscan(IndexScanDesc scan) void index_markpos(IndexScanDesc scan) { - RegProcedure procedure; - SCAN_CHECKS; - GET_SCAN_PROCEDURE(markpos, ammarkpos); + CHECK_SCAN_PROCEDURE(ammarkpos); - OidFunctionCall1(procedure, PointerGetDatum(scan)); + scan->indexRelation->rd_amroutine->ammarkpos(scan); } /* ---------------- * index_restrpos - restore a scan position + * + * NOTE: this only restores the internal scan state of the index AM. + * The current result tuple (scan->xs_ctup) doesn't change. See comments + * for ExecRestrPos(). + * + * NOTE: in the presence of HOT chains, mark/restore only works correctly + * if the scan's snapshot is MVCC-safe; that ensures that there's at most one + * returnable tuple in each HOT chain, and so restoring the prior state at the + * granularity of the index AM is sufficient. Since the only current user + * of mark/restore functionality is nodeMergejoin.c, this effectively means + * that merge-join plans only work for MVCC snapshots. This could be fixed + * if necessary, but for now it seems unimportant. * ---------------- */ void index_restrpos(IndexScanDesc scan) { - RegProcedure procedure; + Assert(IsMVCCSnapshot(scan->xs_snapshot)); SCAN_CHECKS; - GET_SCAN_PROCEDURE(restrpos, amrestrpos); + CHECK_SCAN_PROCEDURE(amrestrpos); + + scan->xs_continue_hot = false; - scan->kill_prior_tuple = false; /* for safety */ - scan->got_tuple = false; + scan->kill_prior_tuple = false; /* for safety */ - OidFunctionCall1(procedure, PointerGetDatum(scan)); + scan->indexRelation->rd_amroutine->amrestrpos(scan); } -/* ---------------- - * index_getnext - get the next heap tuple from a scan +/* + * index_parallelscan_estimate - estimate shared memory for parallel scan * - * The result is the next heap tuple satisfying the scan keys and the - * snapshot, or NULL if no more matching tuples exist. On success, - * the buffer containing the heap tuple is pinned (the pin will be dropped - * at the next index_getnext or index_endscan). The index TID corresponding - * to the heap tuple can be obtained if needed from scan->currentItemData. - * ---------------- + * Currently, we don't pass any information to the AM-specific estimator, + * so it can probably only return a constant. In the future, we might need + * to pass more information. */ -HeapTuple -index_getnext(IndexScanDesc scan, ScanDirection direction) +Size +index_parallelscan_estimate(Relation indexRelation, Snapshot snapshot) { - HeapTuple heapTuple = &scan->xs_ctup; + Size nbytes; - SCAN_CHECKS; + RELATION_CHECKS; - /* Release any previously held pin */ - if (BufferIsValid(scan->xs_cbuf)) + nbytes = offsetof(ParallelIndexScanDescData, ps_snapshot_data); + nbytes = add_size(nbytes, EstimateSnapshotSpace(snapshot)); + nbytes = MAXALIGN(nbytes); + + /* + * If amestimateparallelscan is not provided, assume there is no + * AM-specific data needed. (It's hard to believe that could work, but + * it's easy enough to cater to it here.) + */ + if (indexRelation->rd_amroutine->amestimateparallelscan != NULL) + nbytes = add_size(nbytes, + indexRelation->rd_amroutine->amestimateparallelscan()); + + return nbytes; +} + +/* + * index_parallelscan_initialize - initialize parallel scan + * + * We initialize both the ParallelIndexScanDesc proper and the AM-specific + * information which follows it. + * + * This function calls access method specific initialization routine to + * initialize am specific information. Call this just once in the leader + * process; then, individual workers attach via index_beginscan_parallel. + */ +void +index_parallelscan_initialize(Relation heapRelation, Relation indexRelation, + Snapshot snapshot, ParallelIndexScanDesc target) +{ + Size offset; + + RELATION_CHECKS; + + offset = add_size(offsetof(ParallelIndexScanDescData, ps_snapshot_data), + EstimateSnapshotSpace(snapshot)); + offset = MAXALIGN(offset); + + target->ps_relid = RelationGetRelid(heapRelation); + target->ps_indexid = RelationGetRelid(indexRelation); + target->ps_offset = offset; + SerializeSnapshot(snapshot, target->ps_snapshot_data); + + /* aminitparallelscan is optional; assume no-op if not provided by AM */ + if (indexRelation->rd_amroutine->aminitparallelscan != NULL) { - ReleaseBuffer(scan->xs_cbuf); - scan->xs_cbuf = InvalidBuffer; + void *amtarget; + + amtarget = OffsetToPointer(target, offset); + indexRelation->rd_amroutine->aminitparallelscan(amtarget); } +} - /* just make sure this is false... */ - scan->kill_prior_tuple = false; +/* ---------------- + * index_parallelrescan - (re)start a parallel scan of an index + * ---------------- + */ +void +index_parallelrescan(IndexScanDesc scan) +{ + SCAN_CHECKS; + + /* amparallelrescan is optional; assume no-op if not provided by AM */ + if (scan->indexRelation->rd_amroutine->amparallelrescan != NULL) + scan->indexRelation->rd_amroutine->amparallelrescan(scan); +} + +/* + * index_beginscan_parallel - join parallel index scan + * + * Caller must be holding suitable locks on the heap and the index. + */ +IndexScanDesc +index_beginscan_parallel(Relation heaprel, Relation indexrel, int nkeys, + int norderbys, ParallelIndexScanDesc pscan) +{ + Snapshot snapshot; + IndexScanDesc scan; + + Assert(RelationGetRelid(heaprel) == pscan->ps_relid); + snapshot = RestoreSnapshot(pscan->ps_snapshot_data); + RegisterSnapshot(snapshot); + scan = index_beginscan_internal(indexrel, nkeys, norderbys, snapshot, + pscan, true); /* - * Can skip entering the index AM if we already got a tuple - * and it must be unique. + * Save additional parameters into the scandesc. Everything else was set + * up by index_beginscan_internal. */ - if (scan->keys_are_unique && scan->got_tuple) - return NULL; + scan->heapRelation = heaprel; + scan->xs_snapshot = snapshot; - for (;;) + return scan; +} + +/* ---------------- + * index_getnext_tid - get the next TID from a scan + * + * The result is the next TID satisfying the scan keys, + * or NULL if no more matching tuples exist. + * ---------------- + */ +ItemPointer +index_getnext_tid(IndexScanDesc scan, ScanDirection direction) +{ + bool found; + + SCAN_CHECKS; + CHECK_SCAN_PROCEDURE(amgettuple); + + Assert(TransactionIdIsValid(RecentGlobalXmin)); + + /* + * The AM's amgettuple proc finds the next index entry matching the scan + * keys, and puts the TID into scan->xs_ctup.t_self. It should also set + * scan->xs_recheck and possibly scan->xs_itup, though we pay no attention + * to those fields here. + */ + found = scan->indexRelation->rd_amroutine->amgettuple(scan, direction); + + /* Reset kill flag immediately for safety */ + scan->kill_prior_tuple = false; + + /* If we're out of index entries, we're done */ + if (!found) { - bool found; - uint16 sv_infomask; + /* ... but first, release any held pin on a heap page */ + if (BufferIsValid(scan->xs_cbuf)) + { + ReleaseBuffer(scan->xs_cbuf); + scan->xs_cbuf = InvalidBuffer; + } + return NULL; + } - pgstat_count_index_scan(&scan->xs_pgstat_info); + pgstat_count_index_tuples(scan->indexRelation, 1); - /* - * The AM's gettuple proc finds the next tuple matching the scan - * keys. index_beginscan already set up fn_getnext. - */ - found = DatumGetBool(FunctionCall2(&scan->fn_getnext, - PointerGetDatum(scan), - Int32GetDatum(direction))); + /* Return the TID of the tuple we found. */ + return &scan->xs_ctup.t_self; +} + +/* ---------------- + * index_fetch_heap - get the scan's next heap tuple + * + * The result is a visible heap tuple associated with the index TID most + * recently fetched by index_getnext_tid, or NULL if no more matching tuples + * exist. (There can be more than one matching tuple because of HOT chains, + * although when using an MVCC snapshot it should be impossible for more than + * one such tuple to exist.) + * + * On success, the buffer containing the heap tup is pinned (the pin will be + * dropped in a future index_getnext_tid, index_fetch_heap or index_endscan + * call). + * + * Note: caller must check scan->xs_recheck, and perform rechecking of the + * scan keys if required. We do not do that here because we don't have + * enough information to do it efficiently in the general case. + * ---------------- + */ +HeapTuple +index_fetch_heap(IndexScanDesc scan) +{ + ItemPointer tid = &scan->xs_ctup.t_self; + bool all_dead = false; + bool got_heap_tuple; - /* Reset kill flag immediately for safety */ - scan->kill_prior_tuple = false; + /* We can skip the buffer-switching logic if we're in mid-HOT chain. */ + if (!scan->xs_continue_hot) + { + /* Switch to correct buffer if we don't have it already */ + Buffer prev_buf = scan->xs_cbuf; - if (!found) - return NULL; /* failure exit */ + scan->xs_cbuf = ReleaseAndReadBuffer(scan->xs_cbuf, + scan->heapRelation, + ItemPointerGetBlockNumber(tid)); /* - * Fetch the heap tuple and see if it matches the snapshot. + * Prune page, but only if we weren't already on this page */ - if (heap_fetch(scan->heapRelation, scan->xs_snapshot, - heapTuple, &scan->xs_cbuf, true, - &scan->xs_pgstat_info)) - break; - - /* Skip if no tuple at this location */ - if (heapTuple->t_data == NULL) - continue; /* should we raise an error instead? */ + if (prev_buf != scan->xs_cbuf) + heap_page_prune_opt(scan->heapRelation, scan->xs_cbuf); + } + /* Obtain share-lock on the buffer so we can examine visibility */ + LockBuffer(scan->xs_cbuf, BUFFER_LOCK_SHARE); + got_heap_tuple = heap_hot_search_buffer(tid, scan->heapRelation, + scan->xs_cbuf, + scan->xs_snapshot, + &scan->xs_ctup, + &all_dead, + !scan->xs_continue_hot); + LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK); + + if (got_heap_tuple) + { /* - * If we can't see it, maybe no one else can either. Check to see - * if the tuple is dead to all transactions. If so, signal the - * index AM to not return it on future indexscans. - * - * We told heap_fetch to keep a pin on the buffer, so we can - * re-access the tuple here. But we must re-lock the buffer first. - * Also, it's just barely possible for an update of hint bits to - * occur here. + * Only in a non-MVCC snapshot can more than one member of the HOT + * chain be visible. */ - LockBuffer(scan->xs_cbuf, BUFFER_LOCK_SHARE); - sv_infomask = heapTuple->t_data->t_infomask; + scan->xs_continue_hot = !IsMVCCSnapshot(scan->xs_snapshot); + pgstat_count_heap_fetch(scan->indexRelation); + return &scan->xs_ctup; + } - if (HeapTupleSatisfiesVacuum(heapTuple->t_data, RecentGlobalXmin) == - HEAPTUPLE_DEAD) - scan->kill_prior_tuple = true; + /* We've reached the end of the HOT chain. */ + scan->xs_continue_hot = false; - if (sv_infomask != heapTuple->t_data->t_infomask) - SetBufferCommitInfoNeedsSave(scan->xs_cbuf); - LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK); - ReleaseBuffer(scan->xs_cbuf); - scan->xs_cbuf = InvalidBuffer; - } + /* + * If we scanned a whole HOT chain and found only dead tuples, tell index + * AM to kill its entry for that TID (this will take effect in the next + * amgettuple call, in index_getnext_tid). We do not do this when in + * recovery because it may violate MVCC to do so. See comments in + * RelationGetIndexScan(). + */ + if (!scan->xactStartedInRecovery) + scan->kill_prior_tuple = all_dead; + + return NULL; +} + +/* ---------------- + * index_getnext - get the next heap tuple from a scan + * + * The result is the next heap tuple satisfying the scan keys and the + * snapshot, or NULL if no more matching tuples exist. + * + * On success, the buffer containing the heap tup is pinned (the pin will be + * dropped in a future index_getnext_tid, index_fetch_heap or index_endscan + * call). + * + * Note: caller must check scan->xs_recheck, and perform rechecking of the + * scan keys if required. We do not do that here because we don't have + * enough information to do it efficiently in the general case. + * ---------------- + */ +HeapTuple +index_getnext(IndexScanDesc scan, ScanDirection direction) +{ + HeapTuple heapTuple; + ItemPointer tid; - /* Success exit */ - scan->got_tuple = true; + for (;;) + { + if (scan->xs_continue_hot) + { + /* + * We are resuming scan of a HOT chain after having returned an + * earlier member. Must still hold pin on current heap page. + */ + Assert(BufferIsValid(scan->xs_cbuf)); + Assert(ItemPointerGetBlockNumber(&scan->xs_ctup.t_self) == + BufferGetBlockNumber(scan->xs_cbuf)); + } + else + { + /* Time to fetch the next TID from the index */ + tid = index_getnext_tid(scan, direction); + + /* If we're out of index entries, we're done */ + if (tid == NULL) + break; + } - pgstat_count_index_getnext(&scan->xs_pgstat_info); + /* + * Fetch the next (or only) visible heap tuple for this index entry. + * If we don't find anything, loop around and grab the next TID from + * the index. + */ + heapTuple = index_fetch_heap(scan); + if (heapTuple != NULL) + return heapTuple; + } - return heapTuple; + return NULL; /* failure exit */ } /* ---------------- - * index_getnext_indexitem - get the next index tuple from a scan + * index_getbitmap - get all tuples at once from an index scan * - * Finds the next index tuple satisfying the scan keys. Note that the - * corresponding heap tuple is not accessed, and thus no time qual (snapshot) - * check is done, other than the index AM's internal check for killed tuples - * (which most callers of this routine will probably want to suppress by - * setting scan->ignore_killed_tuples = false). + * Adds the TIDs of all heap tuples satisfying the scan keys to a bitmap. + * Since there's no interlock between the index scan and the eventual heap + * access, this is only safe to use with MVCC-based snapshots: the heap + * item slot could have been replaced by a newer tuple by the time we get + * to it. * - * On success (TRUE return), the found index TID is in scan->currentItemData, - * and its heap TID is in scan->xs_ctup.t_self. scan->xs_cbuf is untouched. + * Returns the number of matching tuples found. (Note: this might be only + * approximate, so it should only be used for statistical purposes.) * ---------------- */ -bool -index_getnext_indexitem(IndexScanDesc scan, - ScanDirection direction) +int64 +index_getbitmap(IndexScanDesc scan, TIDBitmap *bitmap) { - bool found; + int64 ntids; SCAN_CHECKS; + CHECK_SCAN_PROCEDURE(amgetbitmap); /* just make sure this is false... */ scan->kill_prior_tuple = false; /* - * have the am's gettuple proc do all the work. index_beginscan - * already set up fn_getnext. + * have the am's getbitmap proc do all the work. */ - found = DatumGetBool(FunctionCall2(&scan->fn_getnext, - PointerGetDatum(scan), - Int32GetDatum(direction))); + ntids = scan->indexRelation->rd_amroutine->amgetbitmap(scan, bitmap); - return found; + pgstat_count_index_tuples(scan->indexRelation, ntids); + + return ntids; } /* ---------------- @@ -525,60 +739,81 @@ index_getnext_indexitem(IndexScanDesc scan, * ---------------- */ IndexBulkDeleteResult * -index_bulk_delete(Relation indexRelation, +index_bulk_delete(IndexVacuumInfo *info, + IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state) { - RegProcedure procedure; - IndexBulkDeleteResult *result; + Relation indexRelation = info->index; RELATION_CHECKS; - GET_REL_PROCEDURE(bulk_delete, ambulkdelete); - - result = (IndexBulkDeleteResult *) - DatumGetPointer(OidFunctionCall3(procedure, - PointerGetDatum(indexRelation), - PointerGetDatum((Pointer) callback), - PointerGetDatum(callback_state))); + CHECK_REL_PROCEDURE(ambulkdelete); - return result; + return indexRelation->rd_amroutine->ambulkdelete(info, stats, + callback, callback_state); } /* ---------------- - * index_cost_estimator + * index_vacuum_cleanup - do post-deletion cleanup of an index * - * Fetch the amcostestimate procedure OID for an index. - * - * We could combine fetching and calling the procedure, - * as index_insert does for example; but that would require - * importing a bunch of planner/optimizer stuff into this file. + * return value is an optional palloc'd struct of statistics * ---------------- */ -RegProcedure -index_cost_estimator(Relation indexRelation) +IndexBulkDeleteResult * +index_vacuum_cleanup(IndexVacuumInfo *info, + IndexBulkDeleteResult *stats) { - RegProcedure procedure; + Relation indexRelation = info->index; + + RELATION_CHECKS; + CHECK_REL_PROCEDURE(amvacuumcleanup); + return indexRelation->rd_amroutine->amvacuumcleanup(info, stats); +} + +/* ---------------- + * index_can_return + * + * Does the index access method support index-only scans for the given + * column? + * ---------------- + */ +bool +index_can_return(Relation indexRelation, int attno) +{ RELATION_CHECKS; - GET_REL_PROCEDURE(cost_estimator, amcostestimate); - return procedure; + /* amcanreturn is optional; assume FALSE if not provided by AM */ + if (indexRelation->rd_amroutine->amcanreturn == NULL) + return false; + + return indexRelation->rd_amroutine->amcanreturn(indexRelation, attno); } /* ---------------- * index_getprocid * - * Some indexed access methods may require support routines that are - * not in the operator class/operator model imposed by pg_am. These - * access methods may store the OIDs of registered procedures they - * need in pg_amproc. These registered procedure OIDs are ordered in - * a way that makes sense to the access method, and used only by the - * access method. The general index code doesn't know anything about - * the routines involved; it just builds an ordered list of them for + * Index access methods typically require support routines that are + * not directly the implementation of any WHERE-clause query operator + * and so cannot be kept in pg_amop. Instead, such routines are kept + * in pg_amproc. These registered procedure OIDs are assigned numbers + * according to a convention established by the access method. + * The general index code doesn't know anything about the routines + * involved; it just builds an ordered list of them for * each attribute on which an index is defined. * - * This routine returns the requested procedure OID for a particular - * indexed attribute. + * As of Postgres 8.3, support routines within an operator family + * are further subdivided by the "left type" and "right type" of the + * query operator(s) that they support. The "default" functions for a + * particular indexed attribute are those with both types equal to + * the index opclass' opcintype (note that this is subtly different + * from the indexed attribute's own type: it may be a binary-compatible + * type instead). Only the default functions are stored in relcache + * entries --- access methods can use the syscache to look up non-default + * functions. + * + * This routine returns the requested default procedure OID for a + * particular indexed attribute. * ---------------- */ RegProcedure @@ -590,7 +825,7 @@ index_getprocid(Relation irel, int nproc; int procindex; - nproc = irel->rd_am->amsupport; + nproc = irel->rd_amroutine->amsupport; Assert(procnum > 0 && procnum <= (uint16) nproc); @@ -607,10 +842,15 @@ index_getprocid(Relation irel, * index_getprocinfo * * This routine allows index AMs to keep fmgr lookup info for - * support procs in the relcache. + * support procs in the relcache. As above, only the "default" + * functions for any particular indexed attribute are cached. + * + * Note: the return value points into cached data that will be lost during + * any relcache rebuild! Therefore, either use the callinfo right away, + * or save it only after having acquired some type of lock on the index rel. * ---------------- */ -struct FmgrInfo * +FmgrInfo * index_getprocinfo(Relation irel, AttrNumber attnum, uint16 procnum) @@ -619,7 +859,7 @@ index_getprocinfo(Relation irel, int nproc; int procindex; - nproc = irel->rd_am->amsupport; + nproc = irel->rd_amroutine->amsupport; Assert(procnum > 0 && procnum <= (uint16) nproc); @@ -644,11 +884,11 @@ index_getprocinfo(Relation irel, /* * Complain if function was not found during IndexSupportInitialize. * This should not happen unless the system tables contain bogus - * entries for the index opclass. (If an AM wants to allow a - * support function to be optional, it can use index_getprocid.) + * entries for the index opclass. (If an AM wants to allow a support + * function to be optional, it can use index_getprocid.) */ if (!RegProcedureIsValid(procId)) - elog(ERROR, "Missing support function %d for attribute %d of index %s", + elog(ERROR, "missing support function %d for attribute %d of index \"%s\"", procnum, attnum, RelationGetRelationName(irel)); fmgr_info_cxt(procId, locinfo, irel->rd_indexcxt);