From: Robert Haas Date: Mon, 27 Jun 2011 14:27:17 +0000 (-0400) Subject: Avoid having two copies of the HOT-chain search logic. X-Git-Tag: REL9_2_BETA1~1490 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=4da99ea4231e3d8bbf28b666748c1028e7b7d665;p=postgresql Avoid having two copies of the HOT-chain search logic. It's been like this since HOT was originally introduced, but the logic is complex enough that this is a recipe for bugs, as we've already found out with SSI. So refactor heap_hot_search_buffer() so that it can satisfy the needs of index_getnext(), and make index_getnext() use that rather than duplicating the logic. This change was originally proposed by Heikki Linnakangas as part of a larger refactoring oriented towards allowing index-only scans. I extracted and adjusted this part, since it seems to have independent merit. Review by Jeff Davis. --- diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 7bb4a874c4..a48e057270 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -1514,6 +1514,10 @@ heap_fetch(Relation relation, * found, we update *tid to reference that tuple's offset number, and * return TRUE. If no match, return FALSE without modifying *tid. * + * heapTuple is a caller-supplied buffer. When a match is found, we return + * the tuple here, in addition to updating *tid. If no match is found, the + * contents of this buffer on return are undefined. + * * If all_dead is not NULL, we check non-visible tuples to see if they are * globally dead; *all_dead is set TRUE if all members of the HOT chain * are vacuumable, FALSE if not. @@ -1524,28 +1528,31 @@ heap_fetch(Relation relation, */ bool heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, - Snapshot snapshot, bool *all_dead) + Snapshot snapshot, HeapTuple heapTuple, + bool *all_dead, bool first_call) { Page dp = (Page) BufferGetPage(buffer); TransactionId prev_xmax = InvalidTransactionId; OffsetNumber offnum; bool at_chain_start; bool valid; + bool skip; + /* If this is not the first call, previous call returned a (live!) tuple */ if (all_dead) - *all_dead = true; + *all_dead = first_call; Assert(TransactionIdIsValid(RecentGlobalXmin)); Assert(ItemPointerGetBlockNumber(tid) == BufferGetBlockNumber(buffer)); offnum = ItemPointerGetOffsetNumber(tid); - at_chain_start = true; + at_chain_start = first_call; + skip = !first_call; /* Scan through possible multiple members of HOT-chain */ for (;;) { ItemId lp; - HeapTupleData heapTuple; /* check for bogus TID */ if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(dp)) @@ -1568,15 +1575,15 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, break; } - heapTuple.t_data = (HeapTupleHeader) PageGetItem(dp, lp); - heapTuple.t_len = ItemIdGetLength(lp); - heapTuple.t_tableOid = relation->rd_id; - heapTuple.t_self = *tid; + heapTuple->t_data = (HeapTupleHeader) PageGetItem(dp, lp); + heapTuple->t_len = ItemIdGetLength(lp); + heapTuple->t_tableOid = relation->rd_id; + heapTuple->t_self = *tid; /* * Shouldn't see a HEAP_ONLY tuple at chain start. */ - if (at_chain_start && HeapTupleIsHeapOnly(&heapTuple)) + if (at_chain_start && HeapTupleIsHeapOnly(heapTuple)) break; /* @@ -1585,21 +1592,32 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, */ if (TransactionIdIsValid(prev_xmax) && !TransactionIdEquals(prev_xmax, - HeapTupleHeaderGetXmin(heapTuple.t_data))) + HeapTupleHeaderGetXmin(heapTuple->t_data))) break; - /* If it's visible per the snapshot, we must return it */ - valid = HeapTupleSatisfiesVisibility(&heapTuple, snapshot, buffer); - CheckForSerializableConflictOut(valid, relation, &heapTuple, buffer, - snapshot); - if (valid) + /* + * When first_call is true (and thus, skip is initally false) we'll + * return the first tuple we find. But on later passes, heapTuple + * will initially be pointing to the tuple we returned last time. + * Returning it again would be incorrect (and would loop forever), + * so we skip it and return the next match we find. + */ + if (!skip) { - ItemPointerSetOffsetNumber(tid, offnum); - PredicateLockTuple(relation, &heapTuple, snapshot); - if (all_dead) - *all_dead = false; - return true; + /* If it's visible per the snapshot, we must return it */ + valid = HeapTupleSatisfiesVisibility(heapTuple, snapshot, buffer); + CheckForSerializableConflictOut(valid, relation, heapTuple, + buffer, snapshot); + if (valid) + { + ItemPointerSetOffsetNumber(tid, offnum); + PredicateLockTuple(relation, heapTuple, snapshot); + if (all_dead) + *all_dead = false; + return true; + } } + skip = false; /* * If we can't see it, maybe no one else can either. At caller @@ -1607,7 +1625,7 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, * transactions. */ if (all_dead && *all_dead && - HeapTupleSatisfiesVacuum(heapTuple.t_data, RecentGlobalXmin, + HeapTupleSatisfiesVacuum(heapTuple->t_data, RecentGlobalXmin, buffer) != HEAPTUPLE_DEAD) *all_dead = false; @@ -1615,13 +1633,13 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, * Check to see if HOT chain continues past this tuple; if so fetch * the next offnum and loop around. */ - if (HeapTupleIsHotUpdated(&heapTuple)) + if (HeapTupleIsHotUpdated(heapTuple)) { - Assert(ItemPointerGetBlockNumber(&heapTuple.t_data->t_ctid) == + Assert(ItemPointerGetBlockNumber(&heapTuple->t_data->t_ctid) == ItemPointerGetBlockNumber(tid)); - offnum = ItemPointerGetOffsetNumber(&heapTuple.t_data->t_ctid); + offnum = ItemPointerGetOffsetNumber(&heapTuple->t_data->t_ctid); at_chain_start = false; - prev_xmax = HeapTupleHeaderGetXmax(heapTuple.t_data); + prev_xmax = HeapTupleHeaderGetXmax(heapTuple->t_data); } else break; /* end of chain */ @@ -1643,10 +1661,12 @@ heap_hot_search(ItemPointer tid, Relation relation, Snapshot snapshot, { bool result; Buffer buffer; + HeapTupleData heapTuple; buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid)); LockBuffer(buffer, BUFFER_LOCK_SHARE); - result = heap_hot_search_buffer(tid, relation, buffer, snapshot, all_dead); + result = heap_hot_search_buffer(tid, relation, buffer, snapshot, + &heapTuple, all_dead, true); LockBuffer(buffer, BUFFER_LOCK_UNLOCK); ReleaseBuffer(buffer); return result; diff --git a/src/backend/access/index/genam.c b/src/backend/access/index/genam.c index db04e26a65..fe3aa3c9c1 100644 --- a/src/backend/access/index/genam.c +++ b/src/backend/access/index/genam.c @@ -113,9 +113,7 @@ RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys) ItemPointerSetInvalid(&scan->xs_ctup.t_self); scan->xs_ctup.t_data = NULL; scan->xs_cbuf = InvalidBuffer; - scan->xs_hot_dead = false; - scan->xs_next_hot = InvalidOffsetNumber; - scan->xs_prev_xmax = InvalidTransactionId; + scan->xs_continue_hot = false; return scan; } diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c index 31d705c056..13e68d644e 100644 --- a/src/backend/access/index/indexam.c +++ b/src/backend/access/index/indexam.c @@ -335,7 +335,7 @@ index_rescan(IndexScanDesc scan, scan->xs_cbuf = InvalidBuffer; } - scan->xs_next_hot = InvalidOffsetNumber; + scan->xs_continue_hot = false; scan->kill_prior_tuple = false; /* for safety */ @@ -417,7 +417,7 @@ index_restrpos(IndexScanDesc scan) SCAN_CHECKS; GET_SCAN_PROCEDURE(amrestrpos); - scan->xs_next_hot = InvalidOffsetNumber; + scan->xs_continue_hot = false; scan->kill_prior_tuple = false; /* for safety */ @@ -443,26 +443,18 @@ index_getnext(IndexScanDesc scan, ScanDirection direction) HeapTuple heapTuple = &scan->xs_ctup; ItemPointer tid = &heapTuple->t_self; FmgrInfo *procedure; + bool all_dead = false; SCAN_CHECKS; GET_SCAN_PROCEDURE(amgettuple); Assert(TransactionIdIsValid(RecentGlobalXmin)); - /* - * We always reset xs_hot_dead; if we are here then either we are just - * starting the scan, or we previously returned a visible tuple, and in - * either case it's inappropriate to kill the prior index entry. - */ - scan->xs_hot_dead = false; - for (;;) { - OffsetNumber offnum; - bool at_chain_start; - Page dp; + bool got_heap_tuple; - if (scan->xs_next_hot != InvalidOffsetNumber) + if (scan->xs_continue_hot) { /* * We are resuming scan of a HOT chain after having returned an @@ -471,10 +463,6 @@ index_getnext(IndexScanDesc scan, ScanDirection direction) Assert(BufferIsValid(scan->xs_cbuf)); Assert(ItemPointerGetBlockNumber(tid) == BufferGetBlockNumber(scan->xs_cbuf)); - Assert(TransactionIdIsValid(scan->xs_prev_xmax)); - offnum = scan->xs_next_hot; - at_chain_start = false; - scan->xs_next_hot = InvalidOffsetNumber; } else { @@ -488,7 +476,7 @@ index_getnext(IndexScanDesc scan, ScanDirection direction) * comments in RelationGetIndexScan(). */ if (!scan->xactStartedInRecovery) - scan->kill_prior_tuple = scan->xs_hot_dead; + scan->kill_prior_tuple = all_dead; /* * The AM's gettuple proc finds the next index entry matching the @@ -521,151 +509,31 @@ index_getnext(IndexScanDesc scan, ScanDirection direction) if (prev_buf != scan->xs_cbuf) heap_page_prune_opt(scan->heapRelation, scan->xs_cbuf, RecentGlobalXmin); - - /* Prepare to scan HOT chain starting at index-referenced offnum */ - offnum = ItemPointerGetOffsetNumber(tid); - at_chain_start = true; - - /* We don't know what the first tuple's xmin should be */ - scan->xs_prev_xmax = InvalidTransactionId; - - /* Initialize flag to detect if all entries are dead */ - scan->xs_hot_dead = true; } /* Obtain share-lock on the buffer so we can examine visibility */ LockBuffer(scan->xs_cbuf, BUFFER_LOCK_SHARE); + got_heap_tuple = heap_hot_search_buffer(tid, scan->heapRelation, + scan->xs_cbuf, + scan->xs_snapshot, + &scan->xs_ctup, + &all_dead, + !scan->xs_continue_hot); + LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK); - dp = (Page) BufferGetPage(scan->xs_cbuf); - - /* Scan through possible multiple members of HOT-chain */ - for (;;) + if (got_heap_tuple) { - ItemId lp; - ItemPointer ctid; - bool valid; - - /* check for bogus TID */ - if (offnum < FirstOffsetNumber || - offnum > PageGetMaxOffsetNumber(dp)) - break; - - lp = PageGetItemId(dp, offnum); - - /* check for unused, dead, or redirected items */ - if (!ItemIdIsNormal(lp)) - { - /* We should only see a redirect at start of chain */ - if (ItemIdIsRedirected(lp) && at_chain_start) - { - /* Follow the redirect */ - offnum = ItemIdGetRedirect(lp); - at_chain_start = false; - continue; - } - /* else must be end of chain */ - break; - } - - /* - * We must initialize all of *heapTuple (ie, scan->xs_ctup) since - * it is returned to the executor on success. - */ - heapTuple->t_data = (HeapTupleHeader) PageGetItem(dp, lp); - heapTuple->t_len = ItemIdGetLength(lp); - ItemPointerSetOffsetNumber(tid, offnum); - heapTuple->t_tableOid = RelationGetRelid(scan->heapRelation); - ctid = &heapTuple->t_data->t_ctid; - /* - * Shouldn't see a HEAP_ONLY tuple at chain start. (This test - * should be unnecessary, since the chain root can't be removed - * while we have pin on the index entry, but let's make it - * anyway.) + * Only in a non-MVCC snapshot can more than one member of the + * HOT chain be visible. */ - if (at_chain_start && HeapTupleIsHeapOnly(heapTuple)) - break; - - /* - * The xmin should match the previous xmax value, else chain is - * broken. (Note: this test is not optional because it protects - * us against the case where the prior chain member's xmax aborted - * since we looked at it.) - */ - if (TransactionIdIsValid(scan->xs_prev_xmax) && - !TransactionIdEquals(scan->xs_prev_xmax, - HeapTupleHeaderGetXmin(heapTuple->t_data))) - break; - - /* If it's visible per the snapshot, we must return it */ - valid = HeapTupleSatisfiesVisibility(heapTuple, scan->xs_snapshot, - scan->xs_cbuf); - - CheckForSerializableConflictOut(valid, scan->heapRelation, - heapTuple, scan->xs_cbuf, - scan->xs_snapshot); - - if (valid) - { - /* - * If the snapshot is MVCC, we know that it could accept at - * most one member of the HOT chain, so we can skip examining - * any more members. Otherwise, check for continuation of the - * HOT-chain, and set state for next time. - */ - if (IsMVCCSnapshot(scan->xs_snapshot)) - scan->xs_next_hot = InvalidOffsetNumber; - else if (HeapTupleIsHotUpdated(heapTuple)) - { - Assert(ItemPointerGetBlockNumber(ctid) == - ItemPointerGetBlockNumber(tid)); - scan->xs_next_hot = ItemPointerGetOffsetNumber(ctid); - scan->xs_prev_xmax = HeapTupleHeaderGetXmax(heapTuple->t_data); - } - else - scan->xs_next_hot = InvalidOffsetNumber; - - PredicateLockTuple(scan->heapRelation, heapTuple, scan->xs_snapshot); - - LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK); - - pgstat_count_heap_fetch(scan->indexRelation); - - return heapTuple; - } - - /* - * If we can't see it, maybe no one else can either. Check to see - * if the tuple is dead to all transactions. If we find that all - * the tuples in the HOT chain are dead, we'll signal the index AM - * to not return that TID on future indexscans. - */ - if (scan->xs_hot_dead && - HeapTupleSatisfiesVacuum(heapTuple->t_data, RecentGlobalXmin, - scan->xs_cbuf) != HEAPTUPLE_DEAD) - scan->xs_hot_dead = false; - - /* - * Check to see if HOT chain continues past this tuple; if so - * fetch the next offnum (we don't bother storing it into - * xs_next_hot, but must store xs_prev_xmax), and loop around. - */ - if (HeapTupleIsHotUpdated(heapTuple)) - { - Assert(ItemPointerGetBlockNumber(ctid) == - ItemPointerGetBlockNumber(tid)); - offnum = ItemPointerGetOffsetNumber(ctid); - at_chain_start = false; - scan->xs_prev_xmax = HeapTupleHeaderGetXmax(heapTuple->t_data); - } - else - break; /* end of chain */ - } /* loop over a single HOT chain */ - - LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK); + scan->xs_continue_hot = !IsMVCCSnapshot(scan->xs_snapshot); + pgstat_count_heap_fetch(scan->indexRelation); + return heapTuple; + } /* Loop around to ask index AM for another TID */ - scan->xs_next_hot = InvalidOffsetNumber; + scan->xs_continue_hot = false; } /* Release any held pin on a heap page */ diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c index e20ef1433e..40ad775758 100644 --- a/src/backend/executor/nodeBitmapHeapscan.c +++ b/src/backend/executor/nodeBitmapHeapscan.c @@ -349,9 +349,11 @@ bitgetpage(HeapScanDesc scan, TBMIterateResult *tbmres) { OffsetNumber offnum = tbmres->offsets[curslot]; ItemPointerData tid; + HeapTupleData heapTuple; ItemPointerSet(&tid, page, offnum); - if (heap_hot_search_buffer(&tid, scan->rs_rd, buffer, snapshot, NULL)) + if (heap_hot_search_buffer(&tid, scan->rs_rd, buffer, snapshot, + &heapTuple, NULL, true)) scan->rs_vistuples[ntup++] = ItemPointerGetOffsetNumber(&tid); } } diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index fc657612ed..ee474d6c90 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -83,7 +83,8 @@ extern bool heap_fetch(Relation relation, Snapshot snapshot, HeapTuple tuple, Buffer *userbuf, bool keep_buf, Relation stats_relation); extern bool heap_hot_search_buffer(ItemPointer tid, Relation relation, - Buffer buffer, Snapshot snapshot, bool *all_dead); + Buffer buffer, Snapshot snapshot, HeapTuple heapTuple, + bool *all_dead, bool first_call); extern bool heap_hot_search(ItemPointer tid, Relation relation, Snapshot snapshot, bool *all_dead); diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h index 7663033723..db1131e260 100644 --- a/src/include/access/relscan.h +++ b/src/include/access/relscan.h @@ -84,9 +84,7 @@ typedef struct IndexScanDescData bool xs_recheck; /* T means scan keys must be rechecked */ /* state data for traversing HOT chains in index_getnext */ - bool xs_hot_dead; /* T if all members of HOT chain are dead */ - OffsetNumber xs_next_hot; /* next member of HOT chain, if any */ - TransactionId xs_prev_xmax; /* previous HOT chain member's XMAX, if any */ + bool xs_continue_hot; /* T if must keep walking HOT chain */ } IndexScanDescData; /* Struct for heap-or-index scans of system tables */