]> granicus.if.org Git - postgresql/commitdiff
Improve documentation about reasoning behind the order of operations
authorTom Lane <tgl@sss.pgh.pa.us>
Mon, 16 Jul 2001 22:43:34 +0000 (22:43 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Mon, 16 Jul 2001 22:43:34 +0000 (22:43 +0000)
in GetSnapshotData, GetNewTransactionId, CommitTransaction, AbortTransaction,
etc.  Correct race condition in transaction status testing in
HeapTupleSatisfiesVacuum --- this wasn't important for old VACUUM with
exclusive lock on its table, but it sure is important now.  All per
pghackers discussion 7/11/01 and 7/12/01.

src/backend/access/transam/varsup.c
src/backend/access/transam/xact.c
src/backend/storage/ipc/sinval.c
src/backend/utils/time/tqual.c

index 2b253fc585586949300936787a1d914b14d50155..857f2c3d3e80329e4a7d4a11995fb734da4663ac 100644 (file)
@@ -6,7 +6,7 @@
  * Copyright (c) 2000, PostgreSQL Global Development Group
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/access/transam/varsup.c,v 1.41 2001/07/12 04:11:13 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/access/transam/varsup.c,v 1.42 2001/07/16 22:43:33 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -60,8 +60,16 @@ GetNewTransactionId(TransactionId *xid)
         * XXX by storing xid into MyProc without acquiring SInvalLock, we are
         * relying on fetch/store of an xid to be atomic, else other backends
         * might see a partially-set xid here.  But holding both locks at once
-        * would be a nasty concurrency hit (and at this writing, could cause a
-        * deadlock against GetSnapshotData).  So for now, assume atomicity.
+        * would be a nasty concurrency hit (and in fact could cause a deadlock
+        * against GetSnapshotData).  So for now, assume atomicity.  Note that
+        * readers of PROC xid field should be careful to fetch the value only
+        * once, rather than assume they can read it multiple times and get the
+        * same answer each time.
+        *
+        * A solution to the atomic-store problem would be to give each PROC
+        * its own spinlock used only for fetching/storing that PROC's xid.
+        * (SInvalLock would then mean primarily that PROCs couldn't be added/
+        * removed while holding the lock.)
         */
        if (MyProc != (PROC *) NULL)
                MyProc->xid = *xid;
index d32a6dda978c100f3d7663a80cc175863f3b0086..f35e8d9203efb13654f7f43a709cd84bcce26128 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.107 2001/07/15 22:48:16 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.108 2001/07/16 22:43:33 tgl Exp $
  *
  * NOTES
  *             Transaction aborts can now occur two ways:
@@ -1018,16 +1018,21 @@ CommitTransaction(void)
 
        CloseSequences();
        AtEOXact_portals();
+
+       /* Here is where we really truly commit. */
        RecordTransactionCommit();
 
        /*
         * Let others know about no transaction in progress by me. Note that
-        * this must be done _before_ releasing locks we hold and
+        * this must be done _before_ releasing locks we hold and _after_
+        * RecordTransactionCommit.
+        *
         * SpinAcquire(SInvalLock) is required: UPDATE with xid 0 is blocked
         * by xid 1' UPDATE, xid 1 is doing commit while xid 2 gets snapshot -
         * if xid 2' GetSnapshotData sees xid 1 as running then it must see
         * xid 0 as running as well or it will see two tuple versions - one
-        * deleted by xid 1 and one inserted by xid 0.
+        * deleted by xid 1 and one inserted by xid 0.  See notes in
+        * GetSnapshotData.
         */
        if (MyProc != (PROC *) NULL)
        {
@@ -1038,6 +1043,12 @@ CommitTransaction(void)
                SpinRelease(SInvalLock);
        }
 
+       /*
+        * This is all post-commit cleanup.  Note that if an error is raised
+        * here, it's too late to abort the transaction.  This should be just
+        * noncritical resource releasing.
+        */
+
        RelationPurgeLocalRelation(true);
        AtEOXact_temp_relations(true);
        smgrDoPendingDeletes(true);
@@ -1080,23 +1091,6 @@ AbortTransaction(void)
        /* Prevent cancel/die interrupt while cleaning up */
        HOLD_INTERRUPTS();
 
-       /*
-        * Let others to know about no transaction in progress - vadim
-        * 11/26/96
-        *
-        * XXX it'd be nice to acquire SInvalLock for this, but too much risk of
-        * lockup: what if we were holding SInvalLock when we elog'd?  Net effect
-        * is that we are relying on fetch/store of an xid to be atomic, else
-        * other backends might see a partially-zeroed xid here.  Would it be
-        * safe to release spins before we reset xid/xmin?  But see also 
-        * GetNewTransactionId, which does the same thing.
-        */
-       if (MyProc != (PROC *) NULL)
-       {
-               MyProc->xid = InvalidTransactionId;
-               MyProc->xmin = InvalidTransactionId;
-       }
-
        /*
         * Release any spinlocks or buffer context locks we might be holding
         * as quickly as possible.      (Real locks, however, must be held till we
@@ -1143,10 +1137,23 @@ AbortTransaction(void)
        AtAbort_Notify();
        CloseSequences();
        AtEOXact_portals();
+
+       /* Advertise the fact that we aborted in pg_log. */
        RecordTransactionAbort();
 
-       /* Count transaction abort in statistics collector */
-       pgstat_count_xact_rollback();
+       /*
+        * Let others know about no transaction in progress by me. Note that
+        * this must be done _before_ releasing locks we hold and _after_
+        * RecordTransactionAbort.
+        */
+       if (MyProc != (PROC *) NULL)
+       {
+               /* Lock SInvalLock because that's what GetSnapshotData uses. */
+               SpinAcquire(SInvalLock);
+               MyProc->xid = InvalidTransactionId;
+               MyProc->xmin = InvalidTransactionId;
+               SpinRelease(SInvalLock);
+       }
 
        RelationPurgeLocalRelation(false);
        AtEOXact_temp_relations(false);
@@ -1165,6 +1172,9 @@ AbortTransaction(void)
 
        SharedBufferChanged = false;/* safest place to do it */
 
+       /* Count transaction abort in statistics collector */
+       pgstat_count_xact_rollback();
+
        /*
         * State remains TRANS_ABORT until CleanupTransaction().
         */
index a9b9046702c5ba4186bb155dba18781ed3e7fcb5..a93e91237f6e2df7f80519823f5b48103d9f180e 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/storage/ipc/sinval.c,v 1.36 2001/07/12 04:11:13 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/storage/ipc/sinval.c,v 1.37 2001/07/16 22:43:34 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -193,8 +193,10 @@ TransactionIdIsInProgress(TransactionId xid)
                if (pOffset != INVALID_OFFSET)
                {
                        PROC       *proc = (PROC *) MAKE_PTR(pOffset);
+                       /* Fetch xid just once - see GetNewTransactionId */
+                       TransactionId pxid = proc->xid;
 
-                       if (TransactionIdEquals(proc->xid, xid))
+                       if (TransactionIdEquals(pxid, xid))
                        {
                                result = true;
                                break;
@@ -236,9 +238,9 @@ GetXmaxRecent(TransactionId *XmaxRecent)
                if (pOffset != INVALID_OFFSET)
                {
                        PROC       *proc = (PROC *) MAKE_PTR(pOffset);
-                       TransactionId xid;
+                       /* Fetch xid just once - see GetNewTransactionId */
+                       TransactionId xid = proc->xid;
 
-                       xid = proc->xid;
                        if (! TransactionIdIsSpecial(xid))
                        {
                                if (TransactionIdPrecedes(xid, result))
@@ -256,8 +258,19 @@ GetXmaxRecent(TransactionId *XmaxRecent)
        *XmaxRecent = result;
 }
 
-/*
+/*----------
  * GetSnapshotData -- returns information about running transactions.
+ *
+ * The returned snapshot includes xmin (lowest still-running xact ID),
+ * xmax (next xact ID to be assigned), and a list of running xact IDs
+ * in the range xmin <= xid < xmax.  It is used as follows:
+ *             All xact IDs < xmin are considered finished.
+ *             All xact IDs >= xmax are considered still running.
+ *             For an xact ID xmin <= xid < xmax, consult list to see whether
+ *             it is considered running or not.
+ * This ensures that the set of transactions seen as "running" by the
+ * current xact will not change after it takes the snapshot.
+ *----------
  */
 Snapshot
 GetSnapshotData(bool serializable)
@@ -287,12 +300,33 @@ GetSnapshotData(bool serializable)
                elog(ERROR, "Memory exhausted in GetSnapshotData");
        }
 
-       /*
-        * Unfortunately, we have to call ReadNewTransactionId() after
-        * acquiring SInvalLock above. It's not good because
-        * ReadNewTransactionId() does SpinAcquire(XidGenLockId) but
-        * _necessary_.
+       /*--------------------
+        * Unfortunately, we have to call ReadNewTransactionId() after acquiring
+        * SInvalLock above.  It's not good because ReadNewTransactionId() does
+        * SpinAcquire(XidGenLockId), but *necessary*.  We need to be sure that
+        * no transactions exit the set of currently-running transactions
+        * between the time we fetch xmax and the time we finish building our
+        * snapshot.  Otherwise we could have a situation like this:
+        *
+        *              1. Tx Old is running (in Read Committed mode).
+        *              2. Tx S reads new transaction ID into xmax, then
+        *                 is swapped out before acquiring SInvalLock.
+        *              3. Tx New gets new transaction ID (>= S' xmax),
+        *                 makes changes and commits.
+        *              4. Tx Old changes some row R changed by Tx New and commits.
+        *              5. Tx S finishes getting its snapshot data.  It sees Tx Old as
+        *                 done, but sees Tx New as still running (since New >= xmax).
+        *
+        * Now S will see R changed by both Tx Old and Tx New, *but* does not
+        * see other changes made by Tx New.  If S is supposed to be in
+        * Serializable mode, this is wrong.
+        *
+        * By locking SInvalLock before we read xmax, we ensure that TX Old
+        * cannot exit the set of running transactions seen by Tx S.  Therefore
+        * both Old and New will be seen as still running => no inconsistency.
+        *--------------------
         */
+
        ReadNewTransactionId(&(snapshot->xmax));
 
        for (index = 0; index < segP->lastBackend; index++)
@@ -302,6 +336,7 @@ GetSnapshotData(bool serializable)
                if (pOffset != INVALID_OFFSET)
                {
                        PROC       *proc = (PROC *) MAKE_PTR(pOffset);
+                       /* Fetch xid just once - see GetNewTransactionId */
                        TransactionId xid = proc->xid;
 
                        /*
@@ -325,11 +360,12 @@ GetSnapshotData(bool serializable)
 
        if (serializable)
                MyProc->xmin = snapshot->xmin;
-       /* Serializable snapshot must be computed before any other... */
-       Assert(MyProc->xmin != InvalidTransactionId);
 
        SpinRelease(SInvalLock);
 
+       /* Serializable snapshot must be computed before any other... */
+       Assert(MyProc->xmin != InvalidTransactionId);
+
        snapshot->xcnt = count;
        return snapshot;
 }
index 2dd56b6f08e9fa8249ff441ca9d31e1e5aa007f4..35113a36228ee12aa9a3c761ab9e7f0a43e09dac 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/utils/time/tqual.c,v 1.38 2001/07/12 04:11:13 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/utils/time/tqual.c,v 1.39 2001/07/16 22:43:34 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -610,6 +610,13 @@ HeapTupleSatisfiesVacuum(HeapTupleHeader tuple, TransactionId XmaxRecent)
         *
         * If the inserting transaction aborted, then the tuple was never visible
         * to any other transaction, so we can delete it immediately.
+        *
+        * NOTE: must check TransactionIdIsInProgress (which looks in shared mem)
+        * before TransactionIdDidCommit/TransactionIdDidAbort (which look in
+        * pg_log).  Otherwise we have a race condition where we might decide
+        * that a just-committed transaction crashed, because none of the tests
+        * succeed.  xact.c is careful to record commit/abort in pg_log before
+        * it unsets MyProc->xid in shared memory.
         */
        if (!(tuple->t_infomask & HEAP_XMIN_COMMITTED))
        {
@@ -636,19 +643,19 @@ HeapTupleSatisfiesVacuum(HeapTupleHeader tuple, TransactionId XmaxRecent)
                        }
                        tuple->t_infomask |= HEAP_XMIN_COMMITTED;
                }
+               else if (TransactionIdIsInProgress(tuple->t_xmin))
+                       return HEAPTUPLE_INSERT_IN_PROGRESS;
+               else if (TransactionIdDidCommit(tuple->t_xmin))
+                       tuple->t_infomask |= HEAP_XMIN_COMMITTED;
                else if (TransactionIdDidAbort(tuple->t_xmin))
                {
                        tuple->t_infomask |= HEAP_XMIN_INVALID;
                        return HEAPTUPLE_DEAD;
                }
-               else if (TransactionIdDidCommit(tuple->t_xmin))
-                       tuple->t_infomask |= HEAP_XMIN_COMMITTED;
-               else if (TransactionIdIsInProgress(tuple->t_xmin))
-                       return HEAPTUPLE_INSERT_IN_PROGRESS;
                else
                {
                        /*
-                        * Not Aborted, Not Committed, Not in Progress -
+                        * Not in Progress, Not Committed, Not Aborted -
                         * so it's from crashed process. - vadim 11/26/96
                         */
                        tuple->t_infomask |= HEAP_XMIN_INVALID;
@@ -667,19 +674,19 @@ HeapTupleSatisfiesVacuum(HeapTupleHeader tuple, TransactionId XmaxRecent)
 
        if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
        {
-               if (TransactionIdDidAbort(tuple->t_xmax))
+               if (TransactionIdIsInProgress(tuple->t_xmax))
+                       return HEAPTUPLE_DELETE_IN_PROGRESS;
+               else if (TransactionIdDidCommit(tuple->t_xmax))
+                       tuple->t_infomask |= HEAP_XMAX_COMMITTED;
+               else if (TransactionIdDidAbort(tuple->t_xmax))
                {
                        tuple->t_infomask |= HEAP_XMAX_INVALID;
                        return HEAPTUPLE_LIVE;
                }
-               else if (TransactionIdDidCommit(tuple->t_xmax))
-                       tuple->t_infomask |= HEAP_XMAX_COMMITTED;
-               else if (TransactionIdIsInProgress(tuple->t_xmax))
-                       return HEAPTUPLE_DELETE_IN_PROGRESS;
                else
                {
                        /*
-                        * Not Aborted, Not Committed, Not in Progress -
+                        * Not in Progress, Not Committed, Not Aborted -
                         * so it's from crashed process. - vadim 06/02/97
                         */
                        tuple->t_infomask |= HEAP_XMAX_INVALID;