* the passed-in buffer. The caller must hold not only a pin, but at least
* shared buffer content lock on the buffer containing the tuple.
*
- * NOTE: must check TransactionIdIsInProgress (which looks in PGXACT array)
+ * NOTE: When using a non-MVCC snapshot, we must check
+ * TransactionIdIsInProgress (which looks in the PGXACT array)
* before TransactionIdDidCommit/TransactionIdDidAbort (which look in
* pg_clog). Otherwise we have a race condition: we might decide that a
* just-committed transaction crashed, because none of the tests succeed.
* xact.c is careful to record commit/abort in pg_clog before it unsets
- * MyPgXact->xid in PGXACT array. That fixes that problem, but it also
- * means there is a window where TransactionIdIsInProgress and
+ * MyPgXact->xid in the PGXACT array. That fixes that problem, but it
+ * also means there is a window where TransactionIdIsInProgress and
* TransactionIdDidCommit will both return true. If we check only
* TransactionIdDidCommit, we could consider a tuple committed when a
* later GetSnapshotData call will still think the originating transaction
* subtransactions of our own main transaction and so there can't be any
* race condition.
*
+ * When using an MVCC snapshot, we rely on XidInMVCCSnapshot rather than
+ * TransactionIdIsInProgress, but the logic is otherwise the same: do not
+ * check pg_clog until after deciding that the xact is no longer in progress.
+ *
+ *
* Summary of visibility functions:
*
* HeapTupleSatisfiesMVCC()
* transactions started after the snapshot was taken
* changes made by the current command
*
- * (Notice, however, that the tuple status hint bits will be updated on the
- * basis of the true state of the transaction, even if we then pretend we
- * can't see it.)
+ * Notice that here, we will not update the tuple status hint bits if the
+ * inserting/deleting transaction is still running according to our snapshot,
+ * even if in reality it's committed or aborted by now. This is intentional.
+ * Checking the true transaction state would require access to high-traffic
+ * shared data structures, creating contention we'd rather do without, and it
+ * would not change the result of our visibility check anyway. The hint bits
+ * will be updated by the first visitor that has a snapshot new enough to see
+ * the inserting/deleting transaction as done. In the meantime, the cost of
+ * leaving the hint bits unset is basically that each HeapTupleSatisfiesMVCC
+ * call will need to run TransactionIdIsCurrentTransactionId in addition to
+ * XidInMVCCSnapshot (but it would have to do the latter anyway). In the old
+ * coding where we tried to set the hint bits as soon as possible, we instead
+ * did TransactionIdIsInProgress in each call --- to no avail, as long as the
+ * inserting/deleting transaction was still running --- which was more cycles
+ * and more contention on the PGXACT array.
*/
bool
HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot,
if (TransactionIdIsCurrentTransactionId(xvac))
return false;
- if (!TransactionIdIsInProgress(xvac))
+ if (!XidInMVCCSnapshot(xvac, snapshot))
{
if (TransactionIdDidCommit(xvac))
{
if (!TransactionIdIsCurrentTransactionId(xvac))
{
- if (TransactionIdIsInProgress(xvac))
+ if (XidInMVCCSnapshot(xvac, snapshot))
return false;
if (TransactionIdDidCommit(xvac))
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
else
return false; /* deleted before scan started */
}
- else if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmin(tuple)))
+ else if (XidInMVCCSnapshot(HeapTupleHeaderGetRawXmin(tuple), snapshot))
return false;
else if (TransactionIdDidCommit(HeapTupleHeaderGetRawXmin(tuple)))
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
return false;
}
}
+ else
+ {
+ /* xmin is committed, but maybe not according to our snapshot */
+ if (!HeapTupleHeaderXminFrozen(tuple) &&
+ XidInMVCCSnapshot(HeapTupleHeaderGetRawXmin(tuple), snapshot))
+ return false; /* treat as still in progress */
+ }
- /*
- * By here, the inserting transaction has committed - have to check
- * when...
- */
- if (!HeapTupleHeaderXminFrozen(tuple)
- && XidInMVCCSnapshot(HeapTupleHeaderGetRawXmin(tuple), snapshot))
- return false; /* treat as still in progress */
+ /* by here, the inserting transaction has committed */
if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid or aborted */
return true;
else
return false; /* deleted before scan started */
}
- if (TransactionIdIsInProgress(xmax))
+ if (XidInMVCCSnapshot(xmax, snapshot))
return true;
if (TransactionIdDidCommit(xmax))
- {
- /* updating transaction committed, but when? */
- if (XidInMVCCSnapshot(xmax, snapshot))
- return true; /* treat as still in progress */
- return false;
- }
+ return false; /* updating transaction committed */
/* it must have aborted or crashed */
return true;
}
return false; /* deleted before scan started */
}
- if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmax(tuple)))
+ if (XidInMVCCSnapshot(HeapTupleHeaderGetRawXmax(tuple), snapshot))
return true;
if (!TransactionIdDidCommit(HeapTupleHeaderGetRawXmax(tuple)))
SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
HeapTupleHeaderGetRawXmax(tuple));
}
+ else
+ {
+ /* xmax is committed, but maybe not according to our snapshot */
+ if (XidInMVCCSnapshot(HeapTupleHeaderGetRawXmax(tuple), snapshot))
+ return true; /* treat as still in progress */
+ }
- /*
- * OK, the deleting transaction committed too ... but when?
- */
- if (XidInMVCCSnapshot(HeapTupleHeaderGetRawXmax(tuple), snapshot))
- return true; /* treat as still in progress */
+ /* xmax transaction committed */
return false;
}
/*
* HeapTupleIsSurelyDead
*
- * Determine whether a tuple is surely dead. We sometimes use this
- * in lieu of HeapTupleSatisifesVacuum when the tuple has just been
- * tested by HeapTupleSatisfiesMVCC and, therefore, any hint bits that
- * can be set should already be set. We assume that if no hint bits
- * either for xmin or xmax, the transaction is still running. This is
- * therefore faster than HeapTupleSatisfiesVacuum, because we don't
- * consult CLOG (and also because we don't need to give an exact answer,
- * just whether or not the tuple is surely dead).
+ * Cheaply determine whether a tuple is surely dead to all onlookers.
+ * We sometimes use this in lieu of HeapTupleSatisfiesVacuum when the
+ * tuple has just been tested by another visibility routine (usually
+ * HeapTupleSatisfiesMVCC) and, therefore, any hint bits that can be set
+ * should already be set. We assume that if no hint bits are set, the xmin
+ * or xmax transaction is still running. This is therefore faster than
+ * HeapTupleSatisfiesVacuum, because we don't consult PGXACT nor CLOG.
+ * It's okay to return FALSE when in doubt, but we must return TRUE only
+ * if the tuple is removable.
*/
bool
HeapTupleIsSurelyDead(HeapTuple htup, TransactionId OldestXmin)
*
* Note: GetSnapshotData never stores either top xid or subxids of our own
* backend into a snapshot, so these xids will not be reported as "running"
- * by this function. This is OK for current uses, because we actually only
- * apply this for known-committed XIDs.
+ * by this function. This is OK for current uses, because we always check
+ * TransactionIdIsCurrentTransactionId first, except for known-committed
+ * XIDs which could not be ours anyway.
*/
static bool
XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot)
*/
if (!snapshot->suboverflowed)
{
- /* full data, so search subxip */
+ /* we have full data, so search subxip */
int32 j;
for (j = 0; j < snapshot->subxcnt; j++)
}
else
{
- /* overflowed, so convert xid to top-level */
+ /*
+ * Snapshot overflowed, so convert xid to top-level. This is safe
+ * because we eliminated too-old XIDs above.
+ */
xid = SubTransGetTopmostTransaction(xid);
/*
*/
if (snapshot->suboverflowed)
{
- /* overflowed, so convert xid to top-level */
+ /*
+ * Snapshot overflowed, so convert xid to top-level. This is safe
+ * because we eliminated too-old XIDs above.
+ */
xid = SubTransGetTopmostTransaction(xid);
/*