]> granicus.if.org Git - postgresql/commitdiff
Rework handling of subtransactions in 2PC recovery
authorSimon Riggs <simon@2ndQuadrant.com>
Thu, 27 Apr 2017 12:41:22 +0000 (14:41 +0200)
committerSimon Riggs <simon@2ndQuadrant.com>
Thu, 27 Apr 2017 12:41:22 +0000 (14:41 +0200)
The bug fixed by 0874d4f3e183757ba15a4b3f3bf563e0393dd9c2
caused us to question and rework the handling of
subtransactions in 2PC during and at end of recovery.
Patch adds checks and tests to ensure no further bugs.

This effectively removes the temporary measure put in place
by 546c13e11b29a5408b9d6a6e3cca301380b47f7f.

Author: Simon Riggs
Reviewed-by: Tom Lane, Michael Paquier
Discussion: http://postgr.es/m/CANP8+j+vvXmruL_i2buvdhMeVv5TQu0Hm2+C5N+kdVwHJuor8w@mail.gmail.com

src/backend/access/transam/subtrans.c
src/backend/access/transam/twophase.c
src/backend/access/transam/xact.c
src/backend/access/transam/xlog.c
src/backend/storage/ipc/procarray.c
src/include/access/subtrans.h
src/include/access/twophase.h

index 70828e517031c4748673d0ce38ac45b85f068445..cc68484a5d633db18db3e4b8f30380a7662af673 100644 (file)
@@ -68,11 +68,9 @@ static bool SubTransPagePrecedes(int page1, int page2);
 
 /*
  * Record the parent of a subtransaction in the subtrans log.
- *
- * In some cases we may need to overwrite an existing value.
  */
 void
-SubTransSetParent(TransactionId xid, TransactionId parent, bool overwriteOK)
+SubTransSetParent(TransactionId xid, TransactionId parent)
 {
        int                     pageno = TransactionIdToPage(xid);
        int                     entryno = TransactionIdToEntry(xid);
@@ -80,6 +78,7 @@ SubTransSetParent(TransactionId xid, TransactionId parent, bool overwriteOK)
        TransactionId *ptr;
 
        Assert(TransactionIdIsValid(parent));
+       Assert(TransactionIdFollows(xid, parent));
 
        LWLockAcquire(SubtransControlLock, LW_EXCLUSIVE);
 
@@ -87,13 +86,17 @@ SubTransSetParent(TransactionId xid, TransactionId parent, bool overwriteOK)
        ptr = (TransactionId *) SubTransCtl->shared->page_buffer[slotno];
        ptr += entryno;
 
-       /* Current state should be 0 */
-       Assert(*ptr == InvalidTransactionId ||
-                  (*ptr == parent && overwriteOK));
-
-       *ptr = parent;
-
-       SubTransCtl->shared->page_dirty[slotno] = true;
+       /*
+        * It's possible we'll try to set the parent xid multiple times
+        * but we shouldn't ever be changing the xid from one valid xid
+        * to another valid xid, which would corrupt the data structure.
+        */
+       if (*ptr != parent)
+       {
+               Assert(*ptr == InvalidTransactionId);
+               *ptr = parent;
+               SubTransCtl->shared->page_dirty[slotno] = true;
+       }
 
        LWLockRelease(SubtransControlLock);
 }
@@ -157,6 +160,15 @@ SubTransGetTopmostTransaction(TransactionId xid)
                if (TransactionIdPrecedes(parentXid, TransactionXmin))
                        break;
                parentXid = SubTransGetParent(parentXid);
+
+               /*
+                * By convention the parent xid gets allocated first, so should
+                * always precede the child xid. Anything else points to a corrupted
+                * data structure that could lead to an infinite loop, so exit.
+                */
+               if (!TransactionIdPrecedes(parentXid, previousXid))
+                       elog(ERROR, "pg_subtrans contains invalid entry: xid %u points to parent xid %u",
+                                                       previousXid, parentXid);
        }
 
        Assert(TransactionIdIsValid(previousXid));
index fa7124d90394c302fabd0b2a70e991c1a3ef3bbd..c9fff42991cdee8cdf3a973b3c8b662744b62c17 100644 (file)
@@ -221,8 +221,7 @@ static void RemoveGXact(GlobalTransaction gxact);
 static void XlogReadTwoPhaseData(XLogRecPtr lsn, char **buf, int *len);
 static char *ProcessTwoPhaseBuffer(TransactionId xid,
                                                        XLogRecPtr      prepare_start_lsn,
-                                                       bool fromdisk, bool overwriteOK, bool setParent,
-                                                       bool setNextXid);
+                                                       bool fromdisk, bool setParent, bool setNextXid);
 static void MarkAsPreparingGuts(GlobalTransaction gxact, TransactionId xid,
                                const char *gid, TimestampTz prepared_at, Oid owner,
                                Oid databaseid);
@@ -1743,8 +1742,7 @@ restoreTwoPhaseData(void)
                        xid = (TransactionId) strtoul(clde->d_name, NULL, 16);
 
                        buf = ProcessTwoPhaseBuffer(xid, InvalidXLogRecPtr,
-                                                                               true, false, false,
-                                                                               false);
+                                                                               true, false, false);
                        if (buf == NULL)
                                continue;
 
@@ -1804,8 +1802,7 @@ PrescanPreparedTransactions(TransactionId **xids_p, int *nxids_p)
 
                buf = ProcessTwoPhaseBuffer(xid,
                                gxact->prepare_start_lsn,
-                               gxact->ondisk, false, false,
-                               true);
+                               gxact->ondisk, false, true);
 
                if (buf == NULL)
                        continue;
@@ -1858,12 +1855,12 @@ PrescanPreparedTransactions(TransactionId **xids_p, int *nxids_p)
  * This is never called at the end of recovery - we use
  * RecoverPreparedTransactions() at that point.
  *
- * Currently we simply call SubTransSetParent() for any subxids of prepared
- * transactions. If overwriteOK is true, it's OK if some XIDs have already
- * been marked in pg_subtrans.
+ * The lack of calls to SubTransSetParent() calls here is by design;
+ * those calls are made by RecoverPreparedTransactions() at the end of recovery
+ * for those xacts that need this.
  */
 void
-StandbyRecoverPreparedTransactions(bool overwriteOK)
+StandbyRecoverPreparedTransactions(void)
 {
        int                     i;
 
@@ -1880,8 +1877,7 @@ StandbyRecoverPreparedTransactions(bool overwriteOK)
 
                buf = ProcessTwoPhaseBuffer(xid,
                                gxact->prepare_start_lsn,
-                               gxact->ondisk, overwriteOK, true,
-                               false);
+                               gxact->ondisk, false, false);
                if (buf != NULL)
                        pfree(buf);
        }
@@ -1895,6 +1891,13 @@ StandbyRecoverPreparedTransactions(bool overwriteOK)
  * each prepared transaction (reacquire locks, etc).
  *
  * This is run during database startup.
+ *
+ * At the end of recovery the way we take snapshots will change. We now need
+ * to mark all running transactions with their full SubTransSetParent() info
+ * to allow normal snapshots to work correctly if snapshots overflow.
+ * We do this here because by definition prepared transactions are the only
+ * type of write transaction still running, so this is necessary and
+ * complete.
  */
 void
 RecoverPreparedTransactions(void)
@@ -1913,15 +1916,21 @@ RecoverPreparedTransactions(void)
                TwoPhaseFileHeader *hdr;
                TransactionId *subxids;
                const char *gid;
-               bool            overwriteOK = false;
-               int                     i;
 
                xid = gxact->xid;
 
+               /*
+                * Reconstruct subtrans state for the transaction --- needed
+                * because pg_subtrans is not preserved over a restart.  Note that
+                * we are linking all the subtransactions directly to the
+                * top-level XID; there may originally have been a more complex
+                * hierarchy, but there's no need to restore that exactly.
+                * It's possible that SubTransSetParent has been set before, if
+                * the prepared transaction generated xid assignment records.
+                */
                buf = ProcessTwoPhaseBuffer(xid,
                                gxact->prepare_start_lsn,
-                               gxact->ondisk, false, false,
-                               false);
+                               gxact->ondisk, true, false);
                if (buf == NULL)
                        continue;
 
@@ -1939,25 +1948,6 @@ RecoverPreparedTransactions(void)
                bufptr += MAXALIGN(hdr->nabortrels * sizeof(RelFileNode));
                bufptr += MAXALIGN(hdr->ninvalmsgs * sizeof(SharedInvalidationMessage));
 
-               /*
-                * It's possible that SubTransSetParent has been set before, if
-                * the prepared transaction generated xid assignment records. Test
-                * here must match one used in AssignTransactionId().
-                */
-               if (InHotStandby && (hdr->nsubxacts >= PGPROC_MAX_CACHED_SUBXIDS ||
-                                                        XLogLogicalInfoActive()))
-                       overwriteOK = true;
-
-               /*
-                * Reconstruct subtrans state for the transaction --- needed
-                * because pg_subtrans is not preserved over a restart.  Note that
-                * we are linking all the subtransactions directly to the
-                * top-level XID; there may originally have been a more complex
-                * hierarchy, but there's no need to restore that exactly.
-                */
-               for (i = 0; i < hdr->nsubxacts; i++)
-                       SubTransSetParent(subxids[i], xid, true);
-
                /*
                 * Recreate its GXACT and dummy PGPROC. But, check whether
                 * it was added in redo and already has a shmem entry for
@@ -2006,8 +1996,7 @@ RecoverPreparedTransactions(void)
  * Given a transaction id, read it either from disk or read it directly
  * via shmem xlog record pointer using the provided "prepare_start_lsn".
  *
- * If setParent is true, then use the overwriteOK parameter to set up
- * subtransaction parent linkages.
+ * If setParent is true, set up subtransaction parent linkages.
  *
  * If setNextXid is true, set ShmemVariableCache->nextXid to the newest
  * value scanned.
@@ -2015,7 +2004,7 @@ RecoverPreparedTransactions(void)
 static char *
 ProcessTwoPhaseBuffer(TransactionId xid,
                                          XLogRecPtr prepare_start_lsn,
-                                         bool fromdisk, bool overwriteOK,
+                                         bool fromdisk,
                                          bool setParent, bool setNextXid)
 {
        TransactionId origNextXid = ShmemVariableCache->nextXid;
@@ -2142,7 +2131,7 @@ ProcessTwoPhaseBuffer(TransactionId xid,
                }
 
                if (setParent)
-                       SubTransSetParent(subxid, xid, overwriteOK);
+                       SubTransSetParent(subxid, xid);
        }
 
        return buf;
index 92b263aea1c401cdc65de5b407d53ec288c98b34..605639b0c3d57011ba4eba3a4497583a58f443ba 100644 (file)
@@ -559,7 +559,7 @@ AssignTransactionId(TransactionState s)
                XactTopTransactionId = s->transactionId;
 
        if (isSubXact)
-               SubTransSetParent(s->transactionId, s->parent->transactionId, false);
+               SubTransSetParent(s->transactionId, s->parent->transactionId);
 
        /*
         * If it's a top-level transaction, the predicate locking system needs to
index c7667879c65894dece013add4d6badc3e5cbd077..a89d99838ac4bf639f2dc72c53844b796c65ab70 100644 (file)
@@ -6930,7 +6930,7 @@ StartupXLOG(void)
 
                                ProcArrayApplyRecoveryInfo(&running);
 
-                               StandbyRecoverPreparedTransactions(false);
+                               StandbyRecoverPreparedTransactions();
                        }
                }
 
@@ -9692,7 +9692,7 @@ xlog_redo(XLogReaderState *record)
 
                        ProcArrayApplyRecoveryInfo(&running);
 
-                       StandbyRecoverPreparedTransactions(true);
+                       StandbyRecoverPreparedTransactions();
                }
 
                /* ControlFile->checkPointCopy always tracks the latest ckpt XID */
index ebf6a9292391a860e766992fb13189ae758c1363..4976bb03c7f6359bde16233f7a51c070a30664b0 100644 (file)
@@ -943,7 +943,7 @@ ProcArrayApplyXidAssignment(TransactionId topxid,
         * have attempted to SubTransSetParent().
         */
        for (i = 0; i < nsubxids; i++)
-               SubTransSetParent(subxids[i], topxid, false);
+               SubTransSetParent(subxids[i], topxid);
 
        /* KnownAssignedXids isn't maintained yet, so we're done for now */
        if (standbyState == STANDBY_INITIALIZED)
index bd30f5861f2306d8fe8b3f28d0734a6adc9c6015..847359873a9e3d7589b8b6b015e49dd2e77956de 100644 (file)
@@ -14,7 +14,7 @@
 /* Number of SLRU buffers to use for subtrans */
 #define NUM_SUBTRANS_BUFFERS   32
 
-extern void SubTransSetParent(TransactionId xid, TransactionId parent, bool overwriteOK);
+extern void SubTransSetParent(TransactionId xid, TransactionId parent);
 extern TransactionId SubTransGetParent(TransactionId xid);
 extern TransactionId SubTransGetTopmostTransaction(TransactionId xid);
 
index 4d547c55539788baef0b54af7e68e43b60378b27..80ec4ca4a5d67bfc2383958c2761f33dd08fffc7 100644 (file)
@@ -46,7 +46,7 @@ extern bool StandbyTransactionIdIsPrepared(TransactionId xid);
 
 extern TransactionId PrescanPreparedTransactions(TransactionId **xids_p,
                                                        int *nxids_p);
-extern void StandbyRecoverPreparedTransactions(bool overwriteOK);
+extern void StandbyRecoverPreparedTransactions(void);
 extern void RecoverPreparedTransactions(void);
 
 extern void CheckPointTwoPhase(XLogRecPtr redo_horizon);