]> granicus.if.org Git - postgresql/blobdiff - src/backend/access/transam/xact.c
pgindent run for 9.4
[postgresql] / src / backend / access / transam / xact.c
index 79158d8d71e2650ebad2ae45d9558d0223458325..3e744097c79ce30920ed8ae7b70b045c0ba5166f 100644 (file)
@@ -5,12 +5,12 @@
  *
  * See src/backend/access/transam/README for more information.
  *
- * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.259 2008/03/17 02:18:55 tgl Exp $
+ *       src/backend/access/transam/xact.c
  *
  *-------------------------------------------------------------------------
  */
 #include "access/twophase.h"
 #include "access/xact.h"
 #include "access/xlogutils.h"
+#include "catalog/catalog.h"
 #include "catalog/namespace.h"
+#include "catalog/storage.h"
 #include "commands/async.h"
 #include "commands/tablecmds.h"
 #include "commands/trigger.h"
 #include "executor/spi.h"
 #include "libpq/be-fsstubs.h"
+#include "libpq/pqsignal.h"
 #include "miscadmin.h"
 #include "pgstat.h"
+#include "replication/walsender.h"
+#include "replication/syncrep.h"
 #include "storage/fd.h"
 #include "storage/lmgr.h"
+#include "storage/predicate.h"
+#include "storage/proc.h"
 #include "storage/procarray.h"
 #include "storage/sinvaladt.h"
 #include "storage/smgr.h"
+#include "utils/catcache.h"
 #include "utils/combocid.h"
-#include "utils/flatfiles.h"
 #include "utils/guc.h"
 #include "utils/inval.h"
 #include "utils/memutils.h"
-#include "utils/relcache.h"
-#include "utils/xml.h"
+#include "utils/relmapper.h"
+#include "utils/snapmgr.h"
+#include "utils/timeout.h"
+#include "utils/timestamp.h"
+#include "pg_trace.h"
 
 
 /*
@@ -57,10 +67,10 @@ int                 XactIsoLevel;
 bool           DefaultXactReadOnly = false;
 bool           XactReadOnly;
 
-bool           XactSyncCommit = true;
+bool           DefaultXactDeferrable = false;
+bool           XactDeferrable;
 
-int                    CommitDelay = 0;        /* precommit delay in microseconds */
-int                    CommitSiblings = 5; /* # concurrent xacts needed to sleep */
+int                    synchronous_commit = SYNCHRONOUS_COMMIT_ON;
 
 /*
  * MyXactAccessedTempRel is set when a temporary relation is accessed.
@@ -107,7 +117,8 @@ typedef enum TBlockState
        /* subtransaction states */
        TBLOCK_SUBBEGIN,                        /* starting a subtransaction */
        TBLOCK_SUBINPROGRESS,           /* live subtransaction */
-       TBLOCK_SUBEND,                          /* RELEASE received */
+       TBLOCK_SUBRELEASE,                      /* RELEASE received */
+       TBLOCK_SUBCOMMIT,                       /* COMMIT received while TBLOCK_SUBINPROGRESS */
        TBLOCK_SUBABORT,                        /* failed subxact, awaiting ROLLBACK */
        TBLOCK_SUBABORT_END,            /* failed subxact, ROLLBACK received */
        TBLOCK_SUBABORT_PENDING,        /* live subxact, ROLLBACK received */
@@ -134,8 +145,10 @@ typedef struct TransactionStateData
        int                     nChildXids;             /* # of subcommitted child XIDs */
        int                     maxChildXids;   /* allocated size of childXids[] */
        Oid                     prevUser;               /* previous CurrentUserId setting */
-       bool            prevSecDefCxt;  /* previous SecurityDefinerContext setting */
+       int                     prevSecContext; /* previous SecurityRestrictionContext */
        bool            prevXactReadOnly;               /* entry-time xact r/o state */
+       bool            startedInRecovery;              /* did we start in recovery? */
+       bool            didLogXid;              /* has xid been included in WAL record? */
        struct TransactionStateData *parent;            /* back link to parent */
 } TransactionStateData;
 
@@ -162,11 +175,20 @@ static TransactionStateData TopTransactionStateData = {
        0,                                                      /* # of subcommitted child Xids */
        0,                                                      /* allocated size of childXids[] */
        InvalidOid,                                     /* previous CurrentUserId setting */
-       false,                                          /* previous SecurityDefinerContext setting */
+       0,                                                      /* previous SecurityRestrictionContext */
        false,                                          /* entry-time xact r/o state */
+       false,                                          /* startedInRecovery */
+       false,                                          /* didLogXid */
        NULL                                            /* link to parent state block */
 };
 
+/*
+ * unreportedXids holds XIDs of all subtransactions that have not yet been
+ * reported in a XLOG_XACT_ASSIGNMENT record.
+ */
+static int     nUnreportedXids;
+static TransactionId unreportedXids[PGPROC_MAX_CACHED_SUBXIDS];
+
 static TransactionState CurrentTransactionState = &TopTransactionStateData;
 
 /*
@@ -237,7 +259,7 @@ static void AbortTransaction(void);
 static void AtAbort_Memory(void);
 static void AtCleanup_Memory(void);
 static void AtAbort_ResourceOwner(void);
-static void AtCommit_LocalCache(void);
+static void AtCCI_LocalCache(void);
 static void AtCommit_Memory(void);
 static void AtStart_Cache(void);
 static void AtStart_Memory(void);
@@ -247,11 +269,12 @@ static void CallSubXactCallbacks(SubXactEvent event,
                                         SubTransactionId mySubid,
                                         SubTransactionId parentSubid);
 static void CleanupTransaction(void);
+static void CheckTransactionChain(bool isTopLevel, bool throwError,
+                                         const char *stmtType);
 static void CommitTransaction(void);
 static TransactionId RecordTransactionAbort(bool isSubXact);
 static void StartTransaction(void);
 
-static void RecordSubTransactionCommit(void);
 static void StartSubTransaction(void);
 static void CommitSubTransaction(void);
 static void AbortSubTransaction(void);
@@ -301,8 +324,7 @@ IsTransactionState(void)
 /*
  *     IsAbortedTransactionBlockState
  *
- *     This returns true if we are currently running a query
- *     within an aborted transaction block.
+ *     This returns true if we are within an aborted transaction block.
  */
 bool
 IsAbortedTransactionBlockState(void)
@@ -374,6 +396,45 @@ GetCurrentTransactionIdIfAny(void)
        return CurrentTransactionState->transactionId;
 }
 
+/*
+ *     MarkCurrentTransactionIdLoggedIfAny
+ *
+ * Remember that the current xid - if it is assigned - now has been wal logged.
+ */
+void
+MarkCurrentTransactionIdLoggedIfAny(void)
+{
+       if (TransactionIdIsValid(CurrentTransactionState->transactionId))
+               CurrentTransactionState->didLogXid = true;
+}
+
+
+/*
+ *     GetStableLatestTransactionId
+ *
+ * Get the transaction's XID if it has one, else read the next-to-be-assigned
+ * XID.  Once we have a value, return that same value for the remainder of the
+ * current transaction.  This is meant to provide the reference point for the
+ * age(xid) function, but might be useful for other maintenance tasks as well.
+ */
+TransactionId
+GetStableLatestTransactionId(void)
+{
+       static LocalTransactionId lxid = InvalidLocalTransactionId;
+       static TransactionId stablexid = InvalidTransactionId;
+
+       if (lxid != MyProc->lxid)
+       {
+               lxid = MyProc->lxid;
+               stablexid = GetTopTransactionIdIfAny();
+               if (!TransactionIdIsValid(stablexid))
+                       stablexid = ReadNewTransactionId();
+       }
+
+       Assert(TransactionIdIsValid(stablexid));
+
+       return stablexid;
+}
 
 /*
  * AssignTransactionId
@@ -389,6 +450,7 @@ AssignTransactionId(TransactionState s)
 {
        bool            isSubXact = (s->parent != NULL);
        ResourceOwner currentOwner;
+       bool            log_unknown_top = false;
 
        /* Assert that caller didn't screw up */
        Assert(!TransactionIdIsValid(s->transactionId));
@@ -396,10 +458,46 @@ AssignTransactionId(TransactionState s)
 
        /*
         * Ensure parent(s) have XIDs, so that a child always has an XID later
-        * than its parent.
+        * than its parent.  Musn't recurse here, or we might get a stack overflow
+        * if we're at the bottom of a huge stack of subtransactions none of which
+        * have XIDs yet.
         */
        if (isSubXact && !TransactionIdIsValid(s->parent->transactionId))
-               AssignTransactionId(s->parent);
+       {
+               TransactionState p = s->parent;
+               TransactionState *parents;
+               size_t          parentOffset = 0;
+
+               parents = palloc(sizeof(TransactionState) * s->nestingLevel);
+               while (p != NULL && !TransactionIdIsValid(p->transactionId))
+               {
+                       parents[parentOffset++] = p;
+                       p = p->parent;
+               }
+
+               /*
+                * This is technically a recursive call, but the recursion will never
+                * be more than one layer deep.
+                */
+               while (parentOffset != 0)
+                       AssignTransactionId(parents[--parentOffset]);
+
+               pfree(parents);
+       }
+
+       /*
+        * When wal_level=logical, guarantee that a subtransaction's xid can only
+        * be seen in the WAL stream if its toplevel xid has been logged before.
+        * If necessary we log a xact_assignment record with fewer than
+        * PGPROC_MAX_CACHED_SUBXIDS. Note that it is fine if didLogXid isn't set
+        * for a transaction even though it appears in a WAL record, we just might
+        * superfluously log something. That can happen when an xid is included
+        * somewhere inside a wal record, but not in XLogRecord->xl_xid, like in
+        * xl_standby_locks.
+        */
+       if (isSubXact && XLogLogicalInfoActive() &&
+               !TopTransactionStateData.didLogXid)
+               log_unknown_top = true;
 
        /*
         * Generate a new Xid and record it in PG_PROC and pg_subtrans.
@@ -412,7 +510,14 @@ AssignTransactionId(TransactionState s)
        s->transactionId = GetNewTransactionId(isSubXact);
 
        if (isSubXact)
-               SubTransSetParent(s->transactionId, s->parent->transactionId);
+               SubTransSetParent(s->transactionId, s->parent->transactionId, false);
+
+       /*
+        * If it's a top-level transaction, the predicate locking system needs to
+        * be told about it too.
+        */
+       if (!isSubXact)
+               RegisterPredicateLockingXid(s->transactionId);
 
        /*
         * Acquire lock on the transaction XID.  (We assume this cannot block.) We
@@ -433,8 +538,66 @@ AssignTransactionId(TransactionState s)
        }
        PG_END_TRY();
        CurrentResourceOwner = currentOwner;
-}
 
+       /*
+        * Every PGPROC_MAX_CACHED_SUBXIDS assigned transaction ids within each
+        * top-level transaction we issue a WAL record for the assignment. We
+        * include the top-level xid and all the subxids that have not yet been
+        * reported using XLOG_XACT_ASSIGNMENT records.
+        *
+        * This is required to limit the amount of shared memory required in a hot
+        * standby server to keep track of in-progress XIDs. See notes for
+        * RecordKnownAssignedTransactionIds().
+        *
+        * We don't keep track of the immediate parent of each subxid, only the
+        * top-level transaction that each subxact belongs to. This is correct in
+        * recovery only because aborted subtransactions are separately WAL
+        * logged.
+        *
+        * This is correct even for the case where several levels above us didn't
+        * have an xid assigned as we recursed up to them beforehand.
+        */
+       if (isSubXact && XLogStandbyInfoActive())
+       {
+               unreportedXids[nUnreportedXids] = s->transactionId;
+               nUnreportedXids++;
+
+               /*
+                * ensure this test matches similar one in
+                * RecoverPreparedTransactions()
+                */
+               if (nUnreportedXids >= PGPROC_MAX_CACHED_SUBXIDS ||
+                       log_unknown_top)
+               {
+                       XLogRecData rdata[2];
+                       xl_xact_assignment xlrec;
+
+                       /*
+                        * xtop is always set by now because we recurse up transaction
+                        * stack to the highest unassigned xid and then come back down
+                        */
+                       xlrec.xtop = GetTopTransactionId();
+                       Assert(TransactionIdIsValid(xlrec.xtop));
+                       xlrec.nsubxacts = nUnreportedXids;
+
+                       rdata[0].data = (char *) &xlrec;
+                       rdata[0].len = MinSizeOfXactAssignment;
+                       rdata[0].buffer = InvalidBuffer;
+                       rdata[0].next = &rdata[1];
+
+                       rdata[1].data = (char *) unreportedXids;
+                       rdata[1].len = nUnreportedXids * sizeof(TransactionId);
+                       rdata[1].buffer = InvalidBuffer;
+                       rdata[1].next = NULL;
+
+                       (void) XLogInsert(RM_XACT_ID, XLOG_XACT_ASSIGNMENT, rdata);
+
+                       nUnreportedXids = 0;
+                       /* mark top, not current xact as having been logged */
+                       TopTransactionStateData.didLogXid = true;
+               }
+       }
+}
 
 /*
  *     GetCurrentSubTransactionId
@@ -447,6 +610,27 @@ GetCurrentSubTransactionId(void)
        return s->subTransactionId;
 }
 
+/*
+ *     SubTransactionIsActive
+ *
+ * Test if the specified subxact ID is still active.  Note caller is
+ * responsible for checking whether this ID is relevant to the current xact.
+ */
+bool
+SubTransactionIsActive(SubTransactionId subxid)
+{
+       TransactionState s;
+
+       for (s = CurrentTransactionState; s != NULL; s = s->parent)
+       {
+               if (s->state == TRANS_ABORT)
+                       continue;
+               if (s->subTransactionId == subxid)
+                       return true;
+       }
+       return false;
+}
+
 
 /*
  *     GetCurrentCommandId
@@ -540,7 +724,7 @@ TransactionIdIsCurrentTransactionId(TransactionId xid)
 
        /*
         * We always say that BootstrapTransactionId is "not my transaction ID"
-        * even when it is (ie, during bootstrap).      Along with the fact that
+        * even when it is (ie, during bootstrap).  Along with the fact that
         * transam.c always treats BootstrapTransactionId as already committed,
         * this causes the tqual.c routines to see all tuples as committed, which
         * is what we need during bootstrap.  (Bootstrap mode only inserts tuples,
@@ -563,7 +747,8 @@ TransactionIdIsCurrentTransactionId(TransactionId xid)
         */
        for (s = CurrentTransactionState; s != NULL; s = s->parent)
        {
-               int low, high;
+               int                     low,
+                                       high;
 
                if (s->state == TRANS_ABORT)
                        continue;
@@ -576,8 +761,8 @@ TransactionIdIsCurrentTransactionId(TransactionId xid)
                high = s->nChildXids - 1;
                while (low <= high)
                {
-                       int                             middle;
-                       TransactionId   probe;
+                       int                     middle;
+                       TransactionId probe;
 
                        middle = low + (high - low) / 2;
                        probe = s->childXids[middle];
@@ -593,6 +778,18 @@ TransactionIdIsCurrentTransactionId(TransactionId xid)
        return false;
 }
 
+/*
+ *     TransactionStartedDuringRecovery
+ *
+ * Returns true if the current transaction started while recovery was still
+ * in progress. Recovery might have ended since so RecoveryInProgress() might
+ * return false already.
+ */
+bool
+TransactionStartedDuringRecovery(void)
+{
+       return CurrentTransactionState->startedInRecovery;
+}
 
 /*
  *     CommandCounterIncrement
@@ -601,50 +798,34 @@ void
 CommandCounterIncrement(void)
 {
        /*
-        * If the current value of the command counter hasn't been "used" to
-        * mark tuples, we need not increment it, since there's no need to
-        * distinguish a read-only command from others.  This helps postpone
-        * command counter overflow, and keeps no-op CommandCounterIncrement
-        * operations cheap.
+        * If the current value of the command counter hasn't been "used" to mark
+        * tuples, we need not increment it, since there's no need to distinguish
+        * a read-only command from others.  This helps postpone command counter
+        * overflow, and keeps no-op CommandCounterIncrement operations cheap.
         */
        if (currentCommandIdUsed)
        {
                currentCommandId += 1;
-               if (currentCommandId == FirstCommandId) /* check for overflow */
+               if (currentCommandId == InvalidCommandId)
                {
                        currentCommandId -= 1;
                        ereport(ERROR,
                                        (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
-                 errmsg("cannot have more than 2^32-1 commands in a transaction")));
+                                        errmsg("cannot have more than 2^32-2 commands in a transaction")));
                }
                currentCommandIdUsed = false;
 
-               /* Propagate new command ID into static snapshots, if set */
-               if (SerializableSnapshot)
-                       SerializableSnapshot->curcid = currentCommandId;
-               if (LatestSnapshot)
-                       LatestSnapshot->curcid = currentCommandId;
+               /* Propagate new command ID into static snapshots */
+               SnapshotSetCommandId(currentCommandId);
 
                /*
-                * Make any catalog changes done by the just-completed command
-                * visible in the local syscache.  We obviously don't need to do
-                * this after a read-only command.  (But see hacks in inval.c
-                * to make real sure we don't think a command that queued inval
-                * messages was read-only.)
+                * Make any catalog changes done by the just-completed command visible
+                * in the local syscache.  We obviously don't need to do this after a
+                * read-only command.  (But see hacks in inval.c to make real sure we
+                * don't think a command that queued inval messages was read-only.)
                 */
-               AtCommit_LocalCache();
+               AtCCI_LocalCache();
        }
-
-       /*
-        * Make any other backends' catalog changes visible to me.
-        *
-        * XXX this is probably in the wrong place: CommandCounterIncrement
-        * should be purely a local operation, most likely.  However fooling
-        * with this will affect asynchronous cross-backend interactions,
-        * which doesn't seem like a wise thing to do in late beta, so save
-        * improving this for another day - tgl 2007-11-30
-        */
-       AtStart_Cache();
 }
 
 /*
@@ -685,7 +866,7 @@ AtStart_Memory(void)
        /*
         * If this is the first time through, create a private context for
         * AbortTransaction to work in.  By reserving some space now, we can
-        * insulate AbortTransaction from out-of-memory scenarios.      Like
+        * insulate AbortTransaction from out-of-memory scenarios.  Like
         * ErrorContext, we set it up with slow growth rate and a nonzero minimum
         * size, so that space will be reserved immediately.
         */
@@ -788,7 +969,7 @@ AtSubStart_ResourceOwner(void)
        Assert(s->parent != NULL);
 
        /*
-        * Create a resource owner for the subtransaction.      We make it a child of
+        * Create a resource owner for the subtransaction.  We make it a child of
         * the immediate parent's resource owner.
         */
        s->curTransactionOwner =
@@ -808,11 +989,9 @@ AtSubStart_ResourceOwner(void)
  *     RecordTransactionCommit
  *
  * Returns latest XID among xact and its children, or InvalidTransactionId
- * if the xact has no XID.     (We compute that here just because it's easier.)
- *
- * This is exported only to support an ugly hack in VACUUM FULL.
+ * if the xact has no XID.  (We compute that here just because it's easier.)
  */
-TransactionId
+static TransactionId
 RecordTransactionCommit(void)
 {
        TransactionId xid = GetTopTransactionIdIfAny();
@@ -820,13 +999,20 @@ RecordTransactionCommit(void)
        TransactionId latestXid = InvalidTransactionId;
        int                     nrels;
        RelFileNode *rels;
-       bool            haveNonTemp;
        int                     nchildren;
        TransactionId *children;
+       int                     nmsgs = 0;
+       SharedInvalidationMessage *invalMessages = NULL;
+       bool            RelcacheInitFileInval = false;
+       bool            wrote_xlog;
 
        /* Get data needed for commit record */
-       nrels = smgrGetPendingDeletes(true, &rels, &haveNonTemp);
+       nrels = smgrGetPendingDeletes(true, &rels);
        nchildren = xactGetCommittedChildren(&children);
+       if (XLogStandbyInfoActive())
+               nmsgs = xactGetCommittedInvalidationMessages(&invalMessages,
+                                                                                                        &RelcacheInitFileInval);
+       wrote_xlog = (XactLastRecEnd != 0);
 
        /*
         * If we haven't been assigned an XID yet, we neither can, nor do we want
@@ -848,12 +1034,12 @@ RecordTransactionCommit(void)
 
                /*
                 * If we didn't create XLOG entries, we're done here; otherwise we
-                * should flush those entries the same as a commit record.      (An
+                * should flush those entries the same as a commit record.  (An
                 * example of a possible record that wouldn't cause an XID to be
                 * assigned is a sequence advance record due to nextval() --- we want
                 * to flush that to disk before reporting commit.)
                 */
-               if (XactLastRecEnd.xrecoff == 0)
+               if (!wrote_xlog)
                        goto cleanup;
        }
        else
@@ -861,15 +1047,11 @@ RecordTransactionCommit(void)
                /*
                 * Begin commit critical section and insert the commit XLOG record.
                 */
-               XLogRecData rdata[3];
-               int                     lastrdata = 0;
-               xl_xact_commit xlrec;
-
                /* Tell bufmgr and smgr to prepare for commit */
                BufmgrCommit();
 
                /*
-                * Mark ourselves as within our "commit critical section".      This
+                * Mark ourselves as within our "commit critical section".  This
                 * forces any concurrent checkpoint to wait until we've updated
                 * pg_clog.  Without this, it is possible for the checkpoint to set
                 * REDO after the XLOG record but fail to flush the pg_clog update to
@@ -877,96 +1059,160 @@ RecordTransactionCommit(void)
                 * crashes a little later.
                 *
                 * Note: we could, but don't bother to, set this flag in
-                * RecordTransactionAbort.      That's because loss of a transaction abort
+                * RecordTransactionAbort.  That's because loss of a transaction abort
                 * is noncritical; the presumption would be that it aborted, anyway.
                 *
-                * It's safe to change the inCommit flag of our own backend without
+                * It's safe to change the delayChkpt flag of our own backend without
                 * holding the ProcArrayLock, since we're the only one modifying it.
-                * This makes checkpoint's determination of which xacts are inCommit a
-                * bit fuzzy, but it doesn't matter.
+                * This makes checkpoint's determination of which xacts are delayChkpt
+                * bit fuzzy, but it doesn't matter.
                 */
                START_CRIT_SECTION();
-               MyProc->inCommit = true;
+               MyPgXact->delayChkpt = true;
 
                SetCurrentTransactionStopTimestamp();
-               xlrec.xact_time = xactStopTimestamp;
-               xlrec.nrels = nrels;
-               xlrec.nsubxacts = nchildren;
-               rdata[0].data = (char *) (&xlrec);
-               rdata[0].len = MinSizeOfXactCommit;
-               rdata[0].buffer = InvalidBuffer;
-               /* dump rels to delete */
-               if (nrels > 0)
+
+               /*
+                * Do we need the long commit record? If not, use the compact format.
+                *
+                * For now always use the non-compact version if wal_level=logical, so
+                * we can hide commits from other databases. TODO: In the future we
+                * should merge compact and non-compact commits and use a flags
+                * variable to determine if it contains subxacts, relations or
+                * invalidation messages, that's more extensible and degrades more
+                * gracefully. Till then, it's just 20 bytes of overhead.
+                */
+               if (nrels > 0 || nmsgs > 0 || RelcacheInitFileInval || forceSyncCommit ||
+                       XLogLogicalInfoActive())
                {
-                       rdata[0].next = &(rdata[1]);
-                       rdata[1].data = (char *) rels;
-                       rdata[1].len = nrels * sizeof(RelFileNode);
-                       rdata[1].buffer = InvalidBuffer;
-                       lastrdata = 1;
+                       XLogRecData rdata[4];
+                       int                     lastrdata = 0;
+                       xl_xact_commit xlrec;
+
+                       /*
+                        * Set flags required for recovery processing of commits.
+                        */
+                       xlrec.xinfo = 0;
+                       if (RelcacheInitFileInval)
+                               xlrec.xinfo |= XACT_COMPLETION_UPDATE_RELCACHE_FILE;
+                       if (forceSyncCommit)
+                               xlrec.xinfo |= XACT_COMPLETION_FORCE_SYNC_COMMIT;
+
+                       xlrec.dbId = MyDatabaseId;
+                       xlrec.tsId = MyDatabaseTableSpace;
+
+                       xlrec.xact_time = xactStopTimestamp;
+                       xlrec.nrels = nrels;
+                       xlrec.nsubxacts = nchildren;
+                       xlrec.nmsgs = nmsgs;
+                       rdata[0].data = (char *) (&xlrec);
+                       rdata[0].len = MinSizeOfXactCommit;
+                       rdata[0].buffer = InvalidBuffer;
+                       /* dump rels to delete */
+                       if (nrels > 0)
+                       {
+                               rdata[0].next = &(rdata[1]);
+                               rdata[1].data = (char *) rels;
+                               rdata[1].len = nrels * sizeof(RelFileNode);
+                               rdata[1].buffer = InvalidBuffer;
+                               lastrdata = 1;
+                       }
+                       /* dump committed child Xids */
+                       if (nchildren > 0)
+                       {
+                               rdata[lastrdata].next = &(rdata[2]);
+                               rdata[2].data = (char *) children;
+                               rdata[2].len = nchildren * sizeof(TransactionId);
+                               rdata[2].buffer = InvalidBuffer;
+                               lastrdata = 2;
+                       }
+                       /* dump shared cache invalidation messages */
+                       if (nmsgs > 0)
+                       {
+                               rdata[lastrdata].next = &(rdata[3]);
+                               rdata[3].data = (char *) invalMessages;
+                               rdata[3].len = nmsgs * sizeof(SharedInvalidationMessage);
+                               rdata[3].buffer = InvalidBuffer;
+                               lastrdata = 3;
+                       }
+                       rdata[lastrdata].next = NULL;
+
+                       (void) XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT, rdata);
                }
-               /* dump committed child Xids */
-               if (nchildren > 0)
+               else
                {
-                       rdata[lastrdata].next = &(rdata[2]);
-                       rdata[2].data = (char *) children;
-                       rdata[2].len = nchildren * sizeof(TransactionId);
-                       rdata[2].buffer = InvalidBuffer;
-                       lastrdata = 2;
-               }
-               rdata[lastrdata].next = NULL;
+                       XLogRecData rdata[2];
+                       int                     lastrdata = 0;
+                       xl_xact_commit_compact xlrec;
+
+                       xlrec.xact_time = xactStopTimestamp;
+                       xlrec.nsubxacts = nchildren;
+                       rdata[0].data = (char *) (&xlrec);
+                       rdata[0].len = MinSizeOfXactCommitCompact;
+                       rdata[0].buffer = InvalidBuffer;
+                       /* dump committed child Xids */
+                       if (nchildren > 0)
+                       {
+                               rdata[0].next = &(rdata[1]);
+                               rdata[1].data = (char *) children;
+                               rdata[1].len = nchildren * sizeof(TransactionId);
+                               rdata[1].buffer = InvalidBuffer;
+                               lastrdata = 1;
+                       }
+                       rdata[lastrdata].next = NULL;
 
-               (void) XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT, rdata);
+                       (void) XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT_COMPACT, rdata);
+               }
        }
 
        /*
-        * Check if we want to commit asynchronously.  If the user has set
-        * synchronous_commit = off, and we're not doing cleanup of any non-temp
-        * rels nor committing any command that wanted to force sync commit, then
-        * we can defer flushing XLOG.  (We must not allow asynchronous commit if
-        * there are any non-temp tables to be deleted, because we might delete
-        * the files before the COMMIT record is flushed to disk.  We do allow
-        * asynchronous commit if all to-be-deleted tables are temporary though,
-        * since they are lost anyway if we crash.)
+        * Check if we want to commit asynchronously.  We can allow the XLOG flush
+        * to happen asynchronously if synchronous_commit=off, or if the current
+        * transaction has not performed any WAL-logged operation.  The latter
+        * case can arise if the current transaction wrote only to temporary
+        * and/or unlogged tables.  In case of a crash, the loss of such a
+        * transaction will be irrelevant since temp tables will be lost anyway,
+        * and unlogged tables will be truncated.  (Given the foregoing, you might
+        * think that it would be unnecessary to emit the XLOG record at all in
+        * this case, but we don't currently try to do that.  It would certainly
+        * cause problems at least in Hot Standby mode, where the
+        * KnownAssignedXids machinery requires tracking every XID assignment.  It
+        * might be OK to skip it only when wal_level < hot_standby, but for now
+        * we don't.)
+        *
+        * However, if we're doing cleanup of any non-temp rels or committing any
+        * command that wanted to force sync commit, then we must flush XLOG
+        * immediately.  (We must not allow asynchronous commit if there are any
+        * non-temp tables to be deleted, because we might delete the files before
+        * the COMMIT record is flushed to disk.  We do allow asynchronous commit
+        * if all to-be-deleted tables are temporary though, since they are lost
+        * anyway if we crash.)
         */
-       if (XactSyncCommit || forceSyncCommit || haveNonTemp)
+       if ((wrote_xlog && synchronous_commit > SYNCHRONOUS_COMMIT_OFF) ||
+               forceSyncCommit || nrels > 0)
        {
-               /*
-                * Synchronous commit case.
-                *
-                * Sleep before flush! So we can flush more than one commit records
-                * per single fsync.  (The idea is some other backend may do the
-                * XLogFlush while we're sleeping.  This needs work still, because on
-                * most Unixen, the minimum select() delay is 10msec or more, which is
-                * way too long.)
-                *
-                * We do not sleep if enableFsync is not turned on, nor if there are
-                * fewer than CommitSiblings other backends with active transactions.
-                */
-               if (CommitDelay > 0 && enableFsync &&
-                       CountActiveBackends() >= CommitSiblings)
-                       pg_usleep(CommitDelay);
-
                XLogFlush(XactLastRecEnd);
 
                /*
                 * Now we may update the CLOG, if we wrote a COMMIT record above
                 */
                if (markXidCommitted)
-               {
-                       TransactionIdCommit(xid);
-                       /* to avoid race conditions, the parent must commit first */
-                       TransactionIdCommitTree(nchildren, children);
-               }
+                       TransactionIdCommitTree(xid, nchildren, children);
        }
        else
        {
                /*
-                * Asynchronous commit case.
+                * Asynchronous commit case:
+                *
+                * This enables possible committed transaction loss in the case of a
+                * postmaster crash because WAL buffers are left unwritten. Ideally we
+                * could issue the WAL write without the fsync, but some
+                * wal_sync_methods do not allow separate write/fsync.
                 *
                 * Report the latest async commit LSN, so that the WAL writer knows to
                 * flush this commit.
                 */
-               XLogSetAsyncCommitLSN(XactLastRecEnd);
+               XLogSetAsyncXactLSN(XactLastRecEnd);
 
                /*
                 * We must not immediately update the CLOG, since we didn't flush the
@@ -974,11 +1220,7 @@ RecordTransactionCommit(void)
                 * flushed before the CLOG may be updated.
                 */
                if (markXidCommitted)
-               {
-                       TransactionIdAsyncCommit(xid, XactLastRecEnd);
-                       /* to avoid race conditions, the parent must commit first */
-                       TransactionIdAsyncCommitTree(nchildren, children, XactLastRecEnd);
-               }
+                       TransactionIdAsyncCommitTree(xid, nchildren, children, XactLastRecEnd);
        }
 
        /*
@@ -987,15 +1229,24 @@ RecordTransactionCommit(void)
         */
        if (markXidCommitted)
        {
-               MyProc->inCommit = false;
+               MyPgXact->delayChkpt = false;
                END_CRIT_SECTION();
        }
 
        /* Compute latestXid while we have the child XIDs handy */
        latestXid = TransactionIdLatest(xid, nchildren, children);
 
+       /*
+        * Wait for synchronous replication, if required.
+        *
+        * Note that at this stage we have marked clog, but still show as running
+        * in the procarray and continue to hold locks.
+        */
+       if (wrote_xlog)
+               SyncRepWaitForLSN(XactLastRecEnd);
+
        /* Reset XactLastRecEnd until the next transaction writes something */
-       XactLastRecEnd.xrecoff = 0;
+       XactLastRecEnd = 0;
 
 cleanup:
        /* Clean up local data */
@@ -1007,11 +1258,18 @@ cleanup:
 
 
 /*
- *     AtCommit_LocalCache
+ *     AtCCI_LocalCache
  */
 static void
-AtCommit_LocalCache(void)
+AtCCI_LocalCache(void)
 {
+       /*
+        * Make any pending relation map changes visible.  We must do this before
+        * processing local sinval messages, so that the map changes will get
+        * reflected into the relcache when relcache invals are processed.
+        */
+       AtCCI_RelationMap();
+
        /*
         * Make catalog changes visible to me for the next command.
         */
@@ -1094,14 +1352,14 @@ AtSubCommit_childXids(void)
        /* Allocate or enlarge the parent array if necessary */
        if (s->parent->maxChildXids < new_nChildXids)
        {
-               int                             new_maxChildXids;
-               TransactionId  *new_childXids;
+               int                     new_maxChildXids;
+               TransactionId *new_childXids;
 
                /*
                 * Make it 2x what's needed right now, to avoid having to enlarge it
-                * repeatedly. But we can't go above MaxAllocSize.  (The latter
-                * limit is what ensures that we don't need to worry about integer
-                * overflow here or in the calculation of new_nChildXids.)
+                * repeatedly. But we can't go above MaxAllocSize.  (The latter limit
+                * is what ensures that we don't need to worry about integer overflow
+                * here or in the calculation of new_nChildXids.)
                 */
                new_maxChildXids = Min(new_nChildXids * 2,
                                                           (int) (MaxAllocSize / sizeof(TransactionId)));
@@ -1119,13 +1377,13 @@ AtSubCommit_childXids(void)
                 */
                if (s->parent->childXids == NULL)
                        new_childXids =
-                               MemoryContextAlloc(TopTransactionContext, 
+                               MemoryContextAlloc(TopTransactionContext,
                                                                   new_maxChildXids * sizeof(TransactionId));
                else
-                       new_childXids = repalloc(s->parent->childXids, 
-                                                                        new_maxChildXids * sizeof(TransactionId));
+                       new_childXids = repalloc(s->parent->childXids,
+                                                                  new_maxChildXids * sizeof(TransactionId));
 
-               s->parent->childXids  = new_childXids;
+               s->parent->childXids = new_childXids;
                s->parent->maxChildXids = new_maxChildXids;
        }
 
@@ -1134,7 +1392,7 @@ AtSubCommit_childXids(void)
         *
         * Note: We rely on the fact that the XID of a child always follows that
         * of its parent.  By copying the XID of this subtransaction before the
-        * XIDs of its children, we ensure that the array stays ordered.  Likewise,
+        * XIDs of its children, we ensure that the array stays ordered. Likewise,
         * all XIDs already in the array belong to subtransactions started and
         * subcommitted before us, so their XIDs must precede ours.
         */
@@ -1156,36 +1414,6 @@ AtSubCommit_childXids(void)
        s->maxChildXids = 0;
 }
 
-/*
- * RecordSubTransactionCommit
- */
-static void
-RecordSubTransactionCommit(void)
-{
-       TransactionId xid = GetCurrentTransactionIdIfAny();
-
-       /*
-        * We do not log the subcommit in XLOG; it doesn't matter until the
-        * top-level transaction commits.
-        *
-        * We must mark the subtransaction subcommitted in the CLOG if it had a
-        * valid XID assigned.  If it did not, nobody else will ever know about
-        * the existence of this subxact.  We don't have to deal with deletions
-        * scheduled for on-commit here, since they'll be reassigned to our parent
-        * (who might still abort).
-        */
-       if (TransactionIdIsValid(xid))
-       {
-               /* XXX does this really need to be a critical section? */
-               START_CRIT_SECTION();
-
-               /* Record subtransaction subcommit */
-               TransactionIdSubCommit(xid);
-
-               END_CRIT_SECTION();
-       }
-}
-
 /* ----------------------------------------------------------------
  *                                             AbortTransaction stuff
  * ----------------------------------------------------------------
@@ -1195,7 +1423,7 @@ RecordSubTransactionCommit(void)
  *     RecordTransactionAbort
  *
  * Returns latest XID among xact and its children, or InvalidTransactionId
- * if the xact has no XID.     (We compute that here just because it's easier.)
+ * if the xact has no XID.  (We compute that here just because it's easier.)
  */
 static TransactionId
 RecordTransactionAbort(bool isSubXact)
@@ -1212,7 +1440,7 @@ RecordTransactionAbort(bool isSubXact)
 
        /*
         * If we haven't been assigned an XID, nobody will care whether we aborted
-        * or not.      Hence, we're done in that case.  It does not matter if we have
+        * or not.  Hence, we're done in that case.  It does not matter if we have
         * rels to delete (note that this routine is not responsible for actually
         * deleting 'em).  We cannot have any child XIDs, either.
         */
@@ -1220,7 +1448,7 @@ RecordTransactionAbort(bool isSubXact)
        {
                /* Reset XactLastRecEnd until the next transaction writes something */
                if (!isSubXact)
-                       XactLastRecEnd.xrecoff = 0;
+                       XactLastRecEnd = 0;
                return InvalidTransactionId;
        }
 
@@ -1228,7 +1456,7 @@ RecordTransactionAbort(bool isSubXact)
         * We have a valid XID, so we should write an ABORT record for it.
         *
         * We do not flush XLOG to disk here, since the default assumption after a
-        * crash would be that we aborted, anyway.      For the same reason, we don't
+        * crash would be that we aborted, anyway.  For the same reason, we don't
         * need to worry about interlocking against checkpoint start.
         */
 
@@ -1240,7 +1468,7 @@ RecordTransactionAbort(bool isSubXact)
                         xid);
 
        /* Fetch the data we need for the abort record */
-       nrels = smgrGetPendingDeletes(false, &rels, NULL);
+       nrels = smgrGetPendingDeletes(false, &rels);
        nchildren = xactGetCommittedChildren(&children);
 
        /* XXX do we really need a critical section here? */
@@ -1281,6 +1509,18 @@ RecordTransactionAbort(bool isSubXact)
 
        (void) XLogInsert(RM_XACT_ID, XLOG_XACT_ABORT, rdata);
 
+       /*
+        * Report the latest async abort LSN, so that the WAL writer knows to
+        * flush this abort. There's nothing to be gained by delaying this, since
+        * WALWriter may as well do this when it can. This is important with
+        * streaming replication because if we don't flush WAL regularly we will
+        * find that large aborts leave us with a long backlog for when commits
+        * occur after the abort, increasing our window of data loss should
+        * problems occur at that point.
+        */
+       if (!isSubXact)
+               XLogSetAsyncXactLSN(XactLastRecEnd);
+
        /*
         * Mark the transaction aborted in clog.  This is not absolutely necessary
         * but we may as well do it while we are here; also, in the subxact case
@@ -1288,14 +1528,8 @@ RecordTransactionAbort(bool isSubXact)
         * waiting for already-aborted subtransactions.  It is OK to do it without
         * having flushed the ABORT record to disk, because in event of a crash
         * we'd be assumed to have aborted anyway.
-        *
-        * The ordering here isn't critical but it seems best to mark the parent
-        * first.  This assures an atomic transition of all the subtransactions to
-        * aborted state from the point of view of concurrent
-        * TransactionIdDidAbort calls.
         */
-       TransactionIdAbort(xid);
-       TransactionIdAbortTree(nchildren, children);
+       TransactionIdAbortTree(xid, nchildren, children);
 
        END_CRIT_SECTION();
 
@@ -1313,7 +1547,7 @@ RecordTransactionAbort(bool isSubXact)
 
        /* Reset XactLastRecEnd until the next transaction writes something */
        if (!isSubXact)
-               XactLastRecEnd.xrecoff = 0;
+               XactLastRecEnd = 0;
 
        /* And clean up local data */
        if (rels)
@@ -1390,7 +1624,7 @@ AtSubAbort_childXids(void)
 
        /*
         * We keep the child-XID arrays in TopTransactionContext (see
-        * AtSubCommit_childXids).      This means we'd better free the array
+        * AtSubCommit_childXids).  This means we'd better free the array
         * explicitly at abort to avoid leakage.
         */
        if (s->childXids != NULL)
@@ -1398,6 +1632,13 @@ AtSubAbort_childXids(void)
        s->childXids = NULL;
        s->nChildXids = 0;
        s->maxChildXids = 0;
+
+       /*
+        * We could prune the unreportedXids array here. But we don't bother. That
+        * would potentially reduce number of XLOG_XACT_ASSIGNMENT records but it
+        * would likely introduce more CPU time into the more common paths, so we
+        * choose not to do that.
+        */
 }
 
 /* ----------------------------------------------------------------
@@ -1506,11 +1747,25 @@ StartTransaction(void)
        s->transactionId = InvalidTransactionId;        /* until assigned */
 
        /*
-        * Make sure we've freed any old snapshot, and reset xact state variables
+        * Make sure we've reset xact state variables
+        *
+        * If recovery is still in progress, mark this transaction as read-only.
+        * We have lower level defences in XLogInsert and elsewhere to stop us
+        * from modifying data during recovery, but this gives the normal
+        * indication to the user that the transaction is read-only.
         */
-       FreeXactSnapshot();
+       if (RecoveryInProgress())
+       {
+               s->startedInRecovery = true;
+               XactReadOnly = true;
+       }
+       else
+       {
+               s->startedInRecovery = false;
+               XactReadOnly = DefaultXactReadOnly;
+       }
+       XactDeferrable = DefaultXactDeferrable;
        XactIsoLevel = DefaultXactIsoLevel;
-       XactReadOnly = DefaultXactReadOnly;
        forceSyncCommit = false;
        MyXactAccessedTempRel = false;
 
@@ -1522,6 +1777,12 @@ StartTransaction(void)
        currentCommandId = FirstCommandId;
        currentCommandIdUsed = false;
 
+       /*
+        * initialize reported xid accounting
+        */
+       nUnreportedXids = 0;
+       s->didLogXid = false;
+
        /*
         * must initialize resource-management stuff first
         */
@@ -1541,13 +1802,13 @@ StartTransaction(void)
        VirtualXactLockTableInsert(vxid);
 
        /*
-        * Advertise it in the proc array.      We assume assignment of
+        * Advertise it in the proc array.  We assume assignment of
         * LocalTransactionID is atomic, and the backendId should be set already.
         */
        Assert(MyProc->backendId == vxid.backendId);
        MyProc->lxid = vxid.localTransactionId;
 
-       PG_TRACE1(transaction__start, vxid.localTransactionId);
+       TRACE_POSTGRESQL_TRANSACTION_START(vxid.localTransactionId);
 
        /*
         * set transaction_timestamp() (a/k/a now()).  We want this to be the same
@@ -1569,9 +1830,9 @@ StartTransaction(void)
        s->childXids = NULL;
        s->nChildXids = 0;
        s->maxChildXids = 0;
-       GetUserIdAndContext(&s->prevUser, &s->prevSecDefCxt);
-       /* SecurityDefinerContext should never be set outside a transaction */
-       Assert(!s->prevSecDefCxt);
+       GetUserIdAndSecContext(&s->prevUser, &s->prevSecContext);
+       /* SecurityRestrictionContext should never be set outside a transaction */
+       Assert(s->prevSecContext == 0);
 
        /*
         * initialize other subsystems for new transaction
@@ -1613,12 +1874,10 @@ CommitTransaction(void)
        Assert(s->parent == NULL);
 
        /*
-        * Do pre-commit processing (most of this stuff requires database access,
-        * and in fact could still cause an error...)
-        *
-        * It is possible for CommitHoldablePortals to invoke functions that queue
-        * deferred triggers, and it's also possible that triggers create holdable
-        * cursors.  So we have to loop until there's nothing left to do.
+        * Do pre-commit processing that involves calling user-defined code, such
+        * as triggers.  Since closing cursors could queue trigger actions,
+        * triggers could open cursors, etc, we have to keep looping until there's
+        * nothing left to do.
         */
        for (;;)
        {
@@ -1628,19 +1887,25 @@ CommitTransaction(void)
                AfterTriggerFireDeferred();
 
                /*
-                * Convert any open holdable cursors into static portals.  If there
-                * weren't any, we are done ... otherwise loop back to check if they
-                * queued deferred triggers.  Lather, rinse, repeat.
+                * Close open portals (converting holdable ones into static portals).
+                * If there weren't any, we are done ... otherwise loop back to check
+                * if they queued deferred triggers.  Lather, rinse, repeat.
                 */
-               if (!CommitHoldablePortals())
+               if (!PreCommit_Portals(false))
                        break;
        }
 
-       /* Now we can shut down the deferred-trigger manager */
-       AfterTriggerEndXact(true);
+       CallXactCallbacks(XACT_EVENT_PRE_COMMIT);
+
+       /*
+        * The remaining actions cannot call any user-defined code, so it's safe
+        * to start shutting down within-transaction services.  But note that most
+        * of this stuff could still throw an error, which would switch us into
+        * the transaction-abort path.
+        */
 
-       /* Close any open regular cursors */
-       AtCommit_Portals();
+       /* Shut down the deferred-trigger manager */
+       AfterTriggerEndXact(true);
 
        /*
         * Let ON COMMIT management do its thing (must happen after closing
@@ -1651,18 +1916,26 @@ CommitTransaction(void)
        /* close large objects before lower-level cleanup */
        AtEOXact_LargeObject(true);
 
-       /* NOTIFY commit must come before lower-level cleanup */
-       AtCommit_Notify();
+       /*
+        * Mark serializable transaction as complete for predicate locking
+        * purposes.  This should be done as late as we can put it and still allow
+        * errors to be raised for failure patterns found at commit.
+        */
+       PreCommit_CheckForSerializationFailure();
 
        /*
-        * Update flat files if we changed pg_database, pg_authid or
-        * pg_auth_members.  This should be the last step before commit.
+        * Insert notifications sent by NOTIFY commands into the queue.  This
+        * should be late in the pre-commit sequence to minimize time spent
+        * holding the notify-insertion lock.
         */
-       AtEOXact_UpdateFlatFiles(true);
+       PreCommit_Notify();
 
        /* Prevent cancel/die interrupt while cleaning up */
        HOLD_INTERRUPTS();
 
+       /* Commit updates to the relation map --- do this as late as possible */
+       AtEOXact_RelationMap(true);
+
        /*
         * set the current transaction state information appropriately during
         * commit processing
@@ -1674,7 +1947,7 @@ CommitTransaction(void)
         */
        latestXid = RecordTransactionCommit();
 
-       PG_TRACE1(transaction__commit, MyProc->lxid);
+       TRACE_POSTGRESQL_TRANSACTION_COMMIT(MyProc->lxid);
 
        /*
         * Let others know about no transaction in progress by me. Note that this
@@ -1720,14 +1993,6 @@ CommitTransaction(void)
         */
        AtEOXact_Inval(true);
 
-       /*
-        * Likewise, dropping of files deleted during the transaction is best done
-        * after releasing relcache and buffer pins.  (This is not strictly
-        * necessary during commit, since such pins should have been released
-        * already, but this ordering is definitely critical during abort.)
-        */
-       smgrDoPendingDeletes(true);
-
        AtEOXact_MultiXact();
 
        ResourceOwnerRelease(TopTransactionResourceOwner,
@@ -1737,19 +2002,31 @@ CommitTransaction(void)
                                                 RESOURCE_RELEASE_AFTER_LOCKS,
                                                 true, true);
 
+       /*
+        * Likewise, dropping of files deleted during the transaction is best done
+        * after releasing relcache and buffer pins.  (This is not strictly
+        * necessary during commit, since such pins should have been released
+        * already, but this ordering is definitely critical during abort.)  Since
+        * this may take many seconds, also delay until after releasing locks.
+        * Other backends will observe the attendant catalog changes and not
+        * attempt to access affected files.
+        */
+       smgrDoPendingDeletes(true);
+
        /* Check we've released all catcache entries */
        AtEOXact_CatCache(true);
 
+       AtCommit_Notify();
        AtEOXact_GUC(true, 1);
        AtEOXact_SPI(true);
-       AtEOXact_xml();
        AtEOXact_on_commit_actions(true);
        AtEOXact_Namespace(true);
-       /* smgrcommit already done */
+       AtEOXact_SMgr();
        AtEOXact_Files();
        AtEOXact_ComboCid();
        AtEOXact_HashTables(true);
        AtEOXact_PgStat(true);
+       AtEOXact_Snapshot(true);
        pgstat_report_xact_timestamp(0);
 
        CurrentResourceOwner = NULL;
@@ -1802,12 +2079,10 @@ PrepareTransaction(void)
        Assert(s->parent == NULL);
 
        /*
-        * Do pre-commit processing (most of this stuff requires database access,
-        * and in fact could still cause an error...)
-        *
-        * It is possible for PrepareHoldablePortals to invoke functions that
-        * queue deferred triggers, and it's also possible that triggers create
-        * holdable cursors.  So we have to loop until there's nothing left to do.
+        * Do pre-commit processing that involves calling user-defined code, such
+        * as triggers.  Since closing cursors could queue trigger actions,
+        * triggers could open cursors, etc, we have to keep looping until there's
+        * nothing left to do.
         */
        for (;;)
        {
@@ -1817,19 +2092,25 @@ PrepareTransaction(void)
                AfterTriggerFireDeferred();
 
                /*
-                * Convert any open holdable cursors into static portals.  If there
-                * weren't any, we are done ... otherwise loop back to check if they
-                * queued deferred triggers.  Lather, rinse, repeat.
+                * Close open portals (converting holdable ones into static portals).
+                * If there weren't any, we are done ... otherwise loop back to check
+                * if they queued deferred triggers.  Lather, rinse, repeat.
                 */
-               if (!PrepareHoldablePortals())
+               if (!PreCommit_Portals(true))
                        break;
        }
 
-       /* Now we can shut down the deferred-trigger manager */
-       AfterTriggerEndXact(true);
+       CallXactCallbacks(XACT_EVENT_PRE_PREPARE);
+
+       /*
+        * The remaining actions cannot call any user-defined code, so it's safe
+        * to start shutting down within-transaction services.  But note that most
+        * of this stuff could still throw an error, which would switch us into
+        * the transaction-abort path.
+        */
 
-       /* Close any open regular cursors */
-       AtCommit_Portals();
+       /* Shut down the deferred-trigger manager */
+       AfterTriggerEndXact(true);
 
        /*
         * Let ON COMMIT management do its thing (must happen after closing
@@ -1840,18 +2121,25 @@ PrepareTransaction(void)
        /* close large objects before lower-level cleanup */
        AtEOXact_LargeObject(true);
 
-       /* NOTIFY and flatfiles will be handled below */
+       /*
+        * Mark serializable transaction as complete for predicate locking
+        * purposes.  This should be done as late as we can put it and still allow
+        * errors to be raised for failure patterns found at commit.
+        */
+       PreCommit_CheckForSerializationFailure();
+
+       /* NOTIFY will be handled below */
 
        /*
-        * Don't allow PREPARE TRANSACTION if we've accessed a temporary table
-        * in this transaction.  Having the prepared xact hold locks on another
+        * Don't allow PREPARE TRANSACTION if we've accessed a temporary table in
+        * this transaction.  Having the prepared xact hold locks on another
         * backend's temp table seems a bad idea --- for instance it would prevent
-        * the backend from exiting.  There are other problems too, such as how
-        * to clean up the source backend's local buffers and ON COMMIT state
-        * if the prepared xact includes a DROP of a temp table.
+        * the backend from exiting.  There are other problems too, such as how to
+        * clean up the source backend's local buffers and ON COMMIT state if the
+        * prepared xact includes a DROP of a temp table.
         *
-        * We must check this after executing any ON COMMIT actions, because
-        * they might still access a temp relation.
+        * We must check this after executing any ON COMMIT actions, because they
+        * might still access a temp relation.
         *
         * XXX In principle this could be relaxed to allow some useful special
         * cases, such as a temp table created and dropped all within the
@@ -1862,6 +2150,16 @@ PrepareTransaction(void)
                                (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
                                 errmsg("cannot PREPARE a transaction that has operated on temporary tables")));
 
+       /*
+        * Likewise, don't allow PREPARE after pg_export_snapshot.  This could be
+        * supported if we added cleanup logic to twophase.c, but for now it
+        * doesn't seem worth the trouble.
+        */
+       if (XactHasExportedSnapshots())
+               ereport(ERROR,
+                               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+               errmsg("cannot PREPARE a transaction that has exported snapshots")));
+
        /* Prevent cancel/die interrupt while cleaning up */
        HOLD_INTERRUPTS();
 
@@ -1903,10 +2201,11 @@ PrepareTransaction(void)
        StartPrepare(gxact);
 
        AtPrepare_Notify();
-       AtPrepare_UpdateFlatFiles();
-       AtPrepare_Inval();
        AtPrepare_Locks();
+       AtPrepare_PredicateLocks();
        AtPrepare_PgStat();
+       AtPrepare_MultiXact();
+       AtPrepare_RelationMap();
 
        /*
         * Here is where we really truly prepare.
@@ -1922,10 +2221,10 @@ PrepareTransaction(void)
         */
 
        /* Reset XactLastRecEnd until the next transaction writes something */
-       XactLastRecEnd.xrecoff = 0;
+       XactLastRecEnd = 0;
 
        /*
-        * Let others know about no transaction in progress by me.      This has to be
+        * Let others know about no transaction in progress by me.  This has to be
         * done *after* the prepared transaction has been marked valid, else
         * someone may think it is unlocked and recyclable.
         */
@@ -1934,7 +2233,7 @@ PrepareTransaction(void)
        /*
         * This is all post-transaction cleanup.  Note that if an error is raised
         * here, it's too late to abort the transaction.  This should be just
-        * noncritical resource releasing.      See notes in CommitTransaction.
+        * noncritical resource releasing.  See notes in CommitTransaction.
         */
 
        CallXactCallbacks(XACT_EVENT_PREPARE);
@@ -1949,7 +2248,7 @@ PrepareTransaction(void)
        /* Clean up the relation cache */
        AtEOXact_RelationCache(true);
 
-       /* notify and flatfiles don't need a postprepare call */
+       /* notify doesn't need a postprepare call */
 
        PostPrepare_PgStat();
 
@@ -1957,9 +2256,10 @@ PrepareTransaction(void)
 
        PostPrepare_smgr();
 
-       AtEOXact_MultiXact();
+       PostPrepare_MultiXact(xid);
 
        PostPrepare_Locks(xid);
+       PostPrepare_PredicateLocks(xid);
 
        ResourceOwnerRelease(TopTransactionResourceOwner,
                                                 RESOURCE_RELEASE_LOCKS,
@@ -1974,14 +2274,15 @@ PrepareTransaction(void)
        /* PREPARE acts the same as COMMIT as far as GUC is concerned */
        AtEOXact_GUC(true, 1);
        AtEOXact_SPI(true);
-       AtEOXact_xml();
        AtEOXact_on_commit_actions(true);
        AtEOXact_Namespace(true);
-       /* smgrcommit already done */
+       AtEOXact_SMgr();
        AtEOXact_Files();
        AtEOXact_ComboCid();
        AtEOXact_HashTables(true);
-       /* don't call AtEOXact_PgStat here */
+       /* don't call AtEOXact_PgStat here; we fixed pgstat state above */
+       AtEOXact_Snapshot(true);
+       pgstat_report_xact_timestamp(0);
 
        CurrentResourceOwner = NULL;
        ResourceOwnerDelete(TopTransactionResourceOwner);
@@ -2041,7 +2342,23 @@ AbortTransaction(void)
         * Also clean up any open wait for lock, since the lock manager will choke
         * if we try to wait for another lock before doing this.
         */
-       LockWaitCancel();
+       LockErrorCleanup();
+
+       /*
+        * If any timeout events are still active, make sure the timeout interrupt
+        * is scheduled.  This covers possible loss of a timeout interrupt due to
+        * longjmp'ing out of the SIGINT handler (see notes in handle_sig_alarm).
+        * We delay this till after LockErrorCleanup so that we don't uselessly
+        * reschedule lock or deadlock check timeouts.
+        */
+       reschedule_timeouts();
+
+       /*
+        * Re-enable signals, in case we got here by longjmp'ing out of a signal
+        * handler.  We do this fairly early in the sequence so that the timeout
+        * infrastructure will be functional if needed while aborting.
+        */
+       PG_SETMASK(&UnBlockSig);
 
        /*
         * check the current transaction state
@@ -2060,23 +2377,23 @@ AbortTransaction(void)
        /*
         * Reset user ID which might have been changed transiently.  We need this
         * to clean up in case control escaped out of a SECURITY DEFINER function
-        * or other local change of CurrentUserId; therefore, the prior value
-        * of SecurityDefinerContext also needs to be restored.
+        * or other local change of CurrentUserId; therefore, the prior value of
+        * SecurityRestrictionContext also needs to be restored.
         *
         * (Note: it is not necessary to restore session authorization or role
         * settings here because those can only be changed via GUC, and GUC will
         * take care of rolling them back if need be.)
         */
-       SetUserIdAndContext(s->prevUser, s->prevSecDefCxt);
+       SetUserIdAndSecContext(s->prevUser, s->prevSecContext);
 
        /*
         * do abort processing
         */
-       AfterTriggerEndXact(false);
+       AfterTriggerEndXact(false); /* 'false' means it's abort */
        AtAbort_Portals();
-       AtEOXact_LargeObject(false);    /* 'false' means it's abort */
+       AtEOXact_LargeObject(false);
        AtAbort_Notify();
-       AtEOXact_UpdateFlatFiles(false);
+       AtEOXact_RelationMap(false);
 
        /*
         * Advertise the fact that we aborted in pg_clog (assuming that we got as
@@ -2084,7 +2401,7 @@ AbortTransaction(void)
         */
        latestXid = RecordTransactionAbort(false);
 
-       PG_TRACE1(transaction__abort, MyProc->lxid);
+       TRACE_POSTGRESQL_TRANSACTION_ABORT(MyProc->lxid);
 
        /*
         * Let others know about no transaction in progress by me. Note that this
@@ -2094,39 +2411,41 @@ AbortTransaction(void)
        ProcArrayEndTransaction(MyProc, latestXid);
 
        /*
-        * Post-abort cleanup.  See notes in CommitTransaction() concerning
-        * ordering.
+        * Post-abort cleanup.  See notes in CommitTransaction() concerning
+        * ordering.  We can skip all of it if the transaction failed before
+        * creating a resource owner.
         */
+       if (TopTransactionResourceOwner != NULL)
+       {
+               CallXactCallbacks(XACT_EVENT_ABORT);
 
-       CallXactCallbacks(XACT_EVENT_ABORT);
-
-       ResourceOwnerRelease(TopTransactionResourceOwner,
-                                                RESOURCE_RELEASE_BEFORE_LOCKS,
-                                                false, true);
-       AtEOXact_Buffers(false);
-       AtEOXact_RelationCache(false);
-       AtEOXact_Inval(false);
-       smgrDoPendingDeletes(false);
-       AtEOXact_MultiXact();
-       ResourceOwnerRelease(TopTransactionResourceOwner,
-                                                RESOURCE_RELEASE_LOCKS,
-                                                false, true);
-       ResourceOwnerRelease(TopTransactionResourceOwner,
-                                                RESOURCE_RELEASE_AFTER_LOCKS,
-                                                false, true);
-       AtEOXact_CatCache(false);
-
-       AtEOXact_GUC(false, 1);
-       AtEOXact_SPI(false);
-       AtEOXact_xml();
-       AtEOXact_on_commit_actions(false);
-       AtEOXact_Namespace(false);
-       smgrabort();
-       AtEOXact_Files();
-       AtEOXact_ComboCid();
-       AtEOXact_HashTables(false);
-       AtEOXact_PgStat(false);
-       pgstat_report_xact_timestamp(0);
+               ResourceOwnerRelease(TopTransactionResourceOwner,
+                                                        RESOURCE_RELEASE_BEFORE_LOCKS,
+                                                        false, true);
+               AtEOXact_Buffers(false);
+               AtEOXact_RelationCache(false);
+               AtEOXact_Inval(false);
+               AtEOXact_MultiXact();
+               ResourceOwnerRelease(TopTransactionResourceOwner,
+                                                        RESOURCE_RELEASE_LOCKS,
+                                                        false, true);
+               ResourceOwnerRelease(TopTransactionResourceOwner,
+                                                        RESOURCE_RELEASE_AFTER_LOCKS,
+                                                        false, true);
+               smgrDoPendingDeletes(false);
+               AtEOXact_CatCache(false);
+
+               AtEOXact_GUC(false, 1);
+               AtEOXact_SPI(false);
+               AtEOXact_on_commit_actions(false);
+               AtEOXact_Namespace(false);
+               AtEOXact_SMgr();
+               AtEOXact_Files();
+               AtEOXact_ComboCid();
+               AtEOXact_HashTables(false);
+               AtEOXact_PgStat(false);
+               pgstat_report_xact_timestamp(0);
+       }
 
        /*
         * State remains TRANS_ABORT until CleanupTransaction().
@@ -2153,6 +2472,7 @@ CleanupTransaction(void)
         * do abort cleanup processing
         */
        AtCleanup_Portals();            /* now safe to release portal memory */
+       AtEOXact_Snapshot(false);       /* and release the transaction's snapshots */
 
        CurrentResourceOwner = NULL;    /* and resource owner */
        if (TopTransactionResourceOwner)
@@ -2225,7 +2545,8 @@ StartTransactionCommand(void)
                case TBLOCK_BEGIN:
                case TBLOCK_SUBBEGIN:
                case TBLOCK_END:
-               case TBLOCK_SUBEND:
+               case TBLOCK_SUBRELEASE:
+               case TBLOCK_SUBCOMMIT:
                case TBLOCK_ABORT_END:
                case TBLOCK_SUBABORT_END:
                case TBLOCK_ABORT_PENDING:
@@ -2325,7 +2646,7 @@ CommitTransactionCommand(void)
 
                        /*
                         * Here we were in a perfectly good transaction block but the user
-                        * told us to ROLLBACK anyway.  We have to abort the transaction
+                        * told us to ROLLBACK anyway.  We have to abort the transaction
                         * and then clean up.
                         */
                case TBLOCK_ABORT_PENDING:
@@ -2345,7 +2666,7 @@ CommitTransactionCommand(void)
 
                        /*
                         * We were just issued a SAVEPOINT inside a transaction block.
-                        * Start a subtransaction.      (DefineSavepoint already did
+                        * Start a subtransaction.  (DefineSavepoint already did
                         * PushTransaction, so as to have someplace to put the SUBBEGIN
                         * state.)
                         */
@@ -2355,17 +2676,37 @@ CommitTransactionCommand(void)
                        break;
 
                        /*
-                        * We were issued a COMMIT or RELEASE command, so we end the
-                        * current subtransaction and return to the parent transaction.
-                        * The parent might be ended too, so repeat till we are all the
-                        * way out or find an INPROGRESS transaction.
+                        * We were issued a RELEASE command, so we end the current
+                        * subtransaction and return to the parent transaction. The parent
+                        * might be ended too, so repeat till we find an INPROGRESS
+                        * transaction or subtransaction.
+                        */
+               case TBLOCK_SUBRELEASE:
+                       do
+                       {
+                               CommitSubTransaction();
+                               s = CurrentTransactionState;    /* changed by pop */
+                       } while (s->blockState == TBLOCK_SUBRELEASE);
+
+                       Assert(s->blockState == TBLOCK_INPROGRESS ||
+                                  s->blockState == TBLOCK_SUBINPROGRESS);
+                       break;
+
+                       /*
+                        * We were issued a COMMIT, so we end the current subtransaction
+                        * hierarchy and perform final commit. We do this by rolling up
+                        * any subtransactions into their parent, which leads to O(N^2)
+                        * operations with respect to resource owners - this isn't that
+                        * bad until we approach a thousands of savepoints but is
+                        * necessary for correctness should after triggers create new
+                        * resource owners.
                         */
-               case TBLOCK_SUBEND:
+               case TBLOCK_SUBCOMMIT:
                        do
                        {
                                CommitSubTransaction();
                                s = CurrentTransactionState;    /* changed by pop */
-                       } while (s->blockState == TBLOCK_SUBEND);
+                       } while (s->blockState == TBLOCK_SUBCOMMIT);
                        /* If we had a COMMIT command, finish off the main xact too */
                        if (s->blockState == TBLOCK_END)
                        {
@@ -2380,10 +2721,8 @@ CommitTransactionCommand(void)
                                s->blockState = TBLOCK_DEFAULT;
                        }
                        else
-                       {
-                               Assert(s->blockState == TBLOCK_INPROGRESS ||
-                                          s->blockState == TBLOCK_SUBINPROGRESS);
-                       }
+                               elog(ERROR, "CommitTransactionCommand: unexpected state %s",
+                                        BlockStateAsString(s->blockState));
                        break;
 
                        /*
@@ -2531,7 +2870,7 @@ AbortCurrentTransaction(void)
                        break;
 
                        /*
-                        * Here, we failed while trying to COMMIT.      Clean up the
+                        * Here, we failed while trying to COMMIT.  Clean up the
                         * transaction and return to idle state (we do not want to stay in
                         * the transaction).
                         */
@@ -2593,11 +2932,12 @@ AbortCurrentTransaction(void)
 
                        /*
                         * If we failed while trying to create a subtransaction, clean up
-                        * the broken subtransaction and abort the parent.      The same
+                        * the broken subtransaction and abort the parent.  The same
                         * applies if we get a failure while ending a subtransaction.
                         */
                case TBLOCK_SUBBEGIN:
-               case TBLOCK_SUBEND:
+               case TBLOCK_SUBRELEASE:
+               case TBLOCK_SUBCOMMIT:
                case TBLOCK_SUBABORT_PENDING:
                case TBLOCK_SUBRESTART:
                        AbortSubTransaction();
@@ -2674,6 +3014,26 @@ PreventTransactionChain(bool isTopLevel, const char *stmtType)
        /* all okay */
 }
 
+/*
+ *     These two functions allow for warnings or errors if a command is
+ *     executed outside of a transaction block.
+ *
+ *     While top-level transaction control commands (BEGIN/COMMIT/ABORT) and
+ *     SET that have no effect issue warnings, all other no-effect commands
+ *     generate errors.
+ */
+void
+WarnNoTransactionChain(bool isTopLevel, const char *stmtType)
+{
+       CheckTransactionChain(isTopLevel, false, stmtType);
+}
+
+void
+RequireTransactionChain(bool isTopLevel, const char *stmtType)
+{
+       CheckTransactionChain(isTopLevel, true, stmtType);
+}
+
 /*
  *     RequireTransactionChain
  *
@@ -2683,16 +3043,16 @@ PreventTransactionChain(bool isTopLevel, const char *stmtType)
  *     is presumably an error).  DECLARE CURSOR is an example.
  *
  *     If we appear to be running inside a user-defined function, we do not
- *     issue an error, since the function could issue more commands that make
+ *     issue anything, since the function could issue more commands that make
  *     use of the current statement's results.  Likewise subtransactions.
  *     Thus this is an inverse for PreventTransactionChain.
  *
  *     isTopLevel: passed down from ProcessUtility to determine whether we are
  *     inside a function.
- *     stmtType: statement type name, for error messages.
+ *     stmtType: statement type name, for warning or error messages.
  */
-void
-RequireTransactionChain(bool isTopLevel, const char *stmtType)
+static void
+CheckTransactionChain(bool isTopLevel, bool throwError, const char *stmtType)
 {
        /*
         * xact block already started?
@@ -2712,11 +3072,12 @@ RequireTransactionChain(bool isTopLevel, const char *stmtType)
        if (!isTopLevel)
                return;
 
-       ereport(ERROR,
+       ereport(throwError ? ERROR : WARNING,
                        (errcode(ERRCODE_NO_ACTIVE_SQL_TRANSACTION),
        /* translator: %s represents an SQL statement name */
                         errmsg("%s can only be used in transaction blocks",
                                        stmtType)));
+       return;
 }
 
 /*
@@ -2905,7 +3266,8 @@ BeginTransactionBlock(void)
                case TBLOCK_BEGIN:
                case TBLOCK_SUBBEGIN:
                case TBLOCK_END:
-               case TBLOCK_SUBEND:
+               case TBLOCK_SUBRELEASE:
+               case TBLOCK_SUBCOMMIT:
                case TBLOCK_ABORT_END:
                case TBLOCK_SUBABORT_END:
                case TBLOCK_ABORT_PENDING:
@@ -3015,7 +3377,7 @@ EndTransactionBlock(void)
                        while (s->parent != NULL)
                        {
                                if (s->blockState == TBLOCK_SUBINPROGRESS)
-                                       s->blockState = TBLOCK_SUBEND;
+                                       s->blockState = TBLOCK_SUBCOMMIT;
                                else
                                        elog(FATAL, "EndTransactionBlock: unexpected state %s",
                                                 BlockStateAsString(s->blockState));
@@ -3073,7 +3435,8 @@ EndTransactionBlock(void)
                case TBLOCK_BEGIN:
                case TBLOCK_SUBBEGIN:
                case TBLOCK_END:
-               case TBLOCK_SUBEND:
+               case TBLOCK_SUBRELEASE:
+               case TBLOCK_SUBCOMMIT:
                case TBLOCK_ABORT_END:
                case TBLOCK_SUBABORT_END:
                case TBLOCK_ABORT_PENDING:
@@ -3122,7 +3485,7 @@ UserAbortTransactionBlock(void)
                        break;
 
                        /*
-                        * We are inside a subtransaction.      Mark everything up to top
+                        * We are inside a subtransaction.  Mark everything up to top
                         * level as exitable.
                         */
                case TBLOCK_SUBINPROGRESS:
@@ -3154,7 +3517,7 @@ UserAbortTransactionBlock(void)
                         * default state.
                         */
                case TBLOCK_STARTED:
-                       ereport(NOTICE,
+                       ereport(WARNING,
                                        (errcode(ERRCODE_NO_ACTIVE_SQL_TRANSACTION),
                                         errmsg("there is no transaction in progress")));
                        s->blockState = TBLOCK_ABORT_PENDING;
@@ -3165,7 +3528,8 @@ UserAbortTransactionBlock(void)
                case TBLOCK_BEGIN:
                case TBLOCK_SUBBEGIN:
                case TBLOCK_END:
-               case TBLOCK_SUBEND:
+               case TBLOCK_SUBRELEASE:
+               case TBLOCK_SUBCOMMIT:
                case TBLOCK_ABORT_END:
                case TBLOCK_SUBABORT_END:
                case TBLOCK_ABORT_PENDING:
@@ -3210,7 +3574,8 @@ DefineSavepoint(char *name)
                case TBLOCK_BEGIN:
                case TBLOCK_SUBBEGIN:
                case TBLOCK_END:
-               case TBLOCK_SUBEND:
+               case TBLOCK_SUBRELEASE:
+               case TBLOCK_SUBCOMMIT:
                case TBLOCK_ABORT:
                case TBLOCK_SUBABORT:
                case TBLOCK_ABORT_END:
@@ -3254,7 +3619,7 @@ ReleaseSavepoint(List *options)
                        break;
 
                        /*
-                        * We are in a non-aborted subtransaction.      This is the only valid
+                        * We are in a non-aborted subtransaction.  This is the only valid
                         * case.
                         */
                case TBLOCK_SUBINPROGRESS:
@@ -3266,7 +3631,8 @@ ReleaseSavepoint(List *options)
                case TBLOCK_BEGIN:
                case TBLOCK_SUBBEGIN:
                case TBLOCK_END:
-               case TBLOCK_SUBEND:
+               case TBLOCK_SUBRELEASE:
+               case TBLOCK_SUBCOMMIT:
                case TBLOCK_ABORT:
                case TBLOCK_SUBABORT:
                case TBLOCK_ABORT_END:
@@ -3310,14 +3676,14 @@ ReleaseSavepoint(List *options)
 
        /*
         * Mark "commit pending" all subtransactions up to the target
-        * subtransaction.      The actual commits will happen when control gets to
+        * subtransaction.  The actual commits will happen when control gets to
         * CommitTransactionCommand.
         */
        xact = CurrentTransactionState;
        for (;;)
        {
                Assert(xact->blockState == TBLOCK_SUBINPROGRESS);
-               xact->blockState = TBLOCK_SUBEND;
+               xact->blockState = TBLOCK_SUBRELEASE;
                if (xact == target)
                        break;
                xact = xact->parent;
@@ -3366,7 +3732,8 @@ RollbackToSavepoint(List *options)
                case TBLOCK_BEGIN:
                case TBLOCK_SUBBEGIN:
                case TBLOCK_END:
-               case TBLOCK_SUBEND:
+               case TBLOCK_SUBRELEASE:
+               case TBLOCK_SUBCOMMIT:
                case TBLOCK_ABORT_END:
                case TBLOCK_SUBABORT_END:
                case TBLOCK_ABORT_PENDING:
@@ -3408,7 +3775,7 @@ RollbackToSavepoint(List *options)
 
        /*
         * Mark "abort pending" all subtransactions up to the target
-        * subtransaction.      The actual aborts will happen when control gets to
+        * subtransaction.  The actual aborts will happen when control gets to
         * CommitTransactionCommand.
         */
        xact = CurrentTransactionState;
@@ -3474,7 +3841,8 @@ BeginInternalSubTransaction(char *name)
                case TBLOCK_DEFAULT:
                case TBLOCK_BEGIN:
                case TBLOCK_SUBBEGIN:
-               case TBLOCK_SUBEND:
+               case TBLOCK_SUBRELEASE:
+               case TBLOCK_SUBCOMMIT:
                case TBLOCK_ABORT:
                case TBLOCK_SUBABORT:
                case TBLOCK_ABORT_END:
@@ -3540,7 +3908,8 @@ RollbackAndReleaseCurrentSubTransaction(void)
                case TBLOCK_SUBBEGIN:
                case TBLOCK_INPROGRESS:
                case TBLOCK_END:
-               case TBLOCK_SUBEND:
+               case TBLOCK_SUBRELEASE:
+               case TBLOCK_SUBCOMMIT:
                case TBLOCK_ABORT:
                case TBLOCK_ABORT_END:
                case TBLOCK_SUBABORT_END:
@@ -3589,7 +3958,24 @@ AbortOutOfAnyTransaction(void)
                switch (s->blockState)
                {
                        case TBLOCK_DEFAULT:
-                               /* Not in a transaction, do nothing */
+                               if (s->state == TRANS_DEFAULT)
+                               {
+                                       /* Not in a transaction, do nothing */
+                               }
+                               else
+                               {
+                                       /*
+                                        * We can get here after an error during transaction start
+                                        * (state will be TRANS_START).  Need to clean up the
+                                        * incompletely started transaction.  First, adjust the
+                                        * low-level state to suppress warning message from
+                                        * AbortTransaction.
+                                        */
+                                       if (s->state == TRANS_START)
+                                               s->state = TRANS_INPROGRESS;
+                                       AbortTransaction();
+                                       CleanupTransaction();
+                               }
                                break;
                        case TBLOCK_STARTED:
                        case TBLOCK_BEGIN:
@@ -3614,7 +4000,8 @@ AbortOutOfAnyTransaction(void)
                                 */
                        case TBLOCK_SUBBEGIN:
                        case TBLOCK_SUBINPROGRESS:
-                       case TBLOCK_SUBEND:
+                       case TBLOCK_SUBRELEASE:
+                       case TBLOCK_SUBCOMMIT:
                        case TBLOCK_SUBABORT_PENDING:
                        case TBLOCK_SUBRESTART:
                                AbortSubTransaction();
@@ -3686,7 +4073,8 @@ TransactionBlockStatusCode(void)
                case TBLOCK_INPROGRESS:
                case TBLOCK_SUBINPROGRESS:
                case TBLOCK_END:
-               case TBLOCK_SUBEND:
+               case TBLOCK_SUBRELEASE:
+               case TBLOCK_SUBCOMMIT:
                case TBLOCK_PREPARE:
                        return 'T';                     /* in transaction */
                case TBLOCK_ABORT:
@@ -3782,15 +4170,22 @@ CommitSubTransaction(void)
                elog(WARNING, "CommitSubTransaction while in %s state",
                         TransStateAsString(s->state));
 
-       /* Pre-commit processing goes here -- nothing to do at the moment */
+       /* Pre-commit processing goes here */
 
+       CallSubXactCallbacks(SUBXACT_EVENT_PRE_COMMIT_SUB, s->subTransactionId,
+                                                s->parent->subTransactionId);
+
+       /* Do the actual "commit", such as it is */
        s->state = TRANS_COMMIT;
 
        /* Must CCI to ensure commands of subtransaction are seen as done */
        CommandCounterIncrement();
 
-       /* Mark subtransaction as subcommitted */
-       RecordSubTransactionCommit();
+       /*
+        * Prior to 8.4 we marked subcommit in clog at this point.  We now only
+        * perform that step, if required, as part of the atomic update of the
+        * whole transaction tree at top level commit or abort.
+        */
 
        /* Post-commit cleanup */
        if (TransactionIdIsValid(s->transactionId))
@@ -3802,8 +4197,6 @@ CommitSubTransaction(void)
        AtEOSubXact_LargeObject(true, s->subTransactionId,
                                                        s->parent->subTransactionId);
        AtSubCommit_Notify();
-       AtEOSubXact_UpdateFlatFiles(true, s->subTransactionId,
-                                                               s->parent->subTransactionId);
 
        CallSubXactCallbacks(SUBXACT_EVENT_COMMIT_SUB, s->subTransactionId,
                                                 s->parent->subTransactionId);
@@ -3818,12 +4211,14 @@ CommitSubTransaction(void)
 
        /*
         * The only lock we actually release here is the subtransaction XID lock.
-        * The rest just get transferred to the parent resource owner.
         */
        CurrentResourceOwner = s->curTransactionOwner;
        if (TransactionIdIsValid(s->transactionId))
                XactLockTableDelete(s->transactionId);
 
+       /*
+        * Other locks should get transferred to their parent resource owner.
+        */
        ResourceOwnerRelease(s->curTransactionOwner,
                                                 RESOURCE_RELEASE_LOCKS,
                                                 true, false);
@@ -3841,6 +4236,7 @@ CommitSubTransaction(void)
                                          s->parent->subTransactionId);
        AtEOSubXact_HashTables(true, s->nestingLevel);
        AtEOSubXact_PgStat(true, s->nestingLevel);
+       AtSubCommit_Snapshot(s->nestingLevel);
 
        /*
         * We need to restore the upper transaction's read-only state, in case the
@@ -3890,7 +4286,27 @@ AbortSubTransaction(void)
        AbortBufferIO();
        UnlockBuffers();
 
-       LockWaitCancel();
+       /*
+        * Also clean up any open wait for lock, since the lock manager will choke
+        * if we try to wait for another lock before doing this.
+        */
+       LockErrorCleanup();
+
+       /*
+        * If any timeout events are still active, make sure the timeout interrupt
+        * is scheduled.  This covers possible loss of a timeout interrupt due to
+        * longjmp'ing out of the SIGINT handler (see notes in handle_sig_alarm).
+        * We delay this till after LockErrorCleanup so that we don't uselessly
+        * reschedule lock or deadlock check timeouts.
+        */
+       reschedule_timeouts();
+
+       /*
+        * Re-enable signals, in case we got here by longjmp'ing out of a signal
+        * handler.  We do this fairly early in the sequence so that the timeout
+        * infrastructure will be functional if needed while aborting.
+        */
+       PG_SETMASK(&UnBlockSig);
 
        /*
         * check the current transaction state
@@ -3904,10 +4320,10 @@ AbortSubTransaction(void)
        s->state = TRANS_ABORT;
 
        /*
-        * Reset user ID which might have been changed transiently.  (See notes
-        * in AbortTransaction.)
+        * Reset user ID which might have been changed transiently.  (See notes in
+        * AbortTransaction.)
         */
-       SetUserIdAndContext(s->prevUser, s->prevSecDefCxt);
+       SetUserIdAndSecContext(s->prevUser, s->prevSecContext);
 
        /*
         * We can skip all this stuff if the subxact failed before creating a
@@ -3922,8 +4338,6 @@ AbortSubTransaction(void)
                AtEOSubXact_LargeObject(false, s->subTransactionId,
                                                                s->parent->subTransactionId);
                AtSubAbort_Notify();
-               AtEOSubXact_UpdateFlatFiles(false, s->subTransactionId,
-                                                                       s->parent->subTransactionId);
 
                /* Advertise the fact that we aborted in pg_clog. */
                (void) RecordTransactionAbort(true);
@@ -3941,17 +4355,16 @@ AbortSubTransaction(void)
                AtEOSubXact_RelationCache(false, s->subTransactionId,
                                                                  s->parent->subTransactionId);
                AtEOSubXact_Inval(false);
-               AtSubAbort_smgr();
                ResourceOwnerRelease(s->curTransactionOwner,
                                                         RESOURCE_RELEASE_LOCKS,
                                                         false, false);
                ResourceOwnerRelease(s->curTransactionOwner,
                                                         RESOURCE_RELEASE_AFTER_LOCKS,
                                                         false, false);
+               AtSubAbort_smgr();
 
                AtEOXact_GUC(false, s->gucNestLevel);
                AtEOSubXact_SPI(false, s->subTransactionId);
-               AtEOXact_xml();
                AtEOSubXact_on_commit_actions(false, s->subTransactionId,
                                                                          s->parent->subTransactionId);
                AtEOSubXact_Namespace(false, s->subTransactionId,
@@ -3960,6 +4373,7 @@ AbortSubTransaction(void)
                                                  s->parent->subTransactionId);
                AtEOSubXact_HashTables(false, s->nestingLevel);
                AtEOSubXact_PgStat(false, s->nestingLevel);
+               AtSubAbort_Snapshot(s->nestingLevel);
        }
 
        /*
@@ -4049,7 +4463,7 @@ PushTransaction(void)
        s->savepointLevel = p->savepointLevel;
        s->state = TRANS_DEFAULT;
        s->blockState = TBLOCK_SUBBEGIN;
-       GetUserIdAndContext(&s->prevUser, &s->prevSecDefCxt);
+       GetUserIdAndSecContext(&s->prevUser, &s->prevSecContext);
        s->prevXactReadOnly = XactReadOnly;
 
        CurrentTransactionState = s;
@@ -4125,7 +4539,7 @@ ShowTransactionStateRec(TransactionState s)
 
        if (s->nChildXids > 0)
        {
-               int i;
+               int                     i;
 
                appendStringInfo(&buf, "%u", s->childXids[0]);
                for (i = 1; i < s->nChildXids; i++)
@@ -4181,8 +4595,10 @@ BlockStateAsString(TBlockState blockState)
                        return "SUB BEGIN";
                case TBLOCK_SUBINPROGRESS:
                        return "SUB INPROGRS";
-               case TBLOCK_SUBEND:
-                       return "SUB END";
+               case TBLOCK_SUBRELEASE:
+                       return "SUB RELEASE";
+               case TBLOCK_SUBCOMMIT:
+                       return "SUB COMMIT";
                case TBLOCK_SUBABORT:
                        return "SUB ABORT";
                case TBLOCK_SUBABORT_END:
@@ -4225,7 +4641,7 @@ TransStateAsString(TransState state)
 /*
  * xactGetCommittedChildren
  *
- * Gets the list of committed children of the current transaction.     The return
+ * Gets the list of committed children of the current transaction.  The return
  * value is the number of child transactions.  *ptr is set to point to an
  * array of TransactionIds.  The array is allocated in TopTransactionContext;
  * the caller should *not* pfree() it (this is a change from pre-8.4 code!).
@@ -4248,41 +4664,190 @@ xactGetCommittedChildren(TransactionId **ptr)
  *     XLOG support routines
  */
 
+/*
+ * Before 9.0 this was a fairly short function, but now it performs many
+ * actions for which the order of execution is critical.
+ */
 static void
-xact_redo_commit(xl_xact_commit *xlrec, TransactionId xid)
+xact_redo_commit_internal(TransactionId xid, XLogRecPtr lsn,
+                                                 TransactionId *sub_xids, int nsubxacts,
+                                                 SharedInvalidationMessage *inval_msgs, int nmsgs,
+                                                 RelFileNode *xnodes, int nrels,
+                                                 Oid dbId, Oid tsId,
+                                                 uint32 xinfo)
 {
-       TransactionId *sub_xids;
        TransactionId max_xid;
        int                     i;
 
-       TransactionIdCommit(xid);
+       max_xid = TransactionIdLatest(xid, nsubxacts, sub_xids);
 
-       /* Mark committed subtransactions as committed */
-       sub_xids = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]);
-       TransactionIdCommitTree(xlrec->nsubxacts, sub_xids);
-
-       /* Make sure nextXid is beyond any XID mentioned in the record */
-       max_xid = xid;
-       for (i = 0; i < xlrec->nsubxacts; i++)
-       {
-               if (TransactionIdPrecedes(max_xid, sub_xids[i]))
-                       max_xid = sub_xids[i];
-       }
+       /*
+        * Make sure nextXid is beyond any XID mentioned in the record.
+        *
+        * We don't expect anyone else to modify nextXid, hence we don't need to
+        * hold a lock while checking this. We still acquire the lock to modify
+        * it, though.
+        */
        if (TransactionIdFollowsOrEquals(max_xid,
                                                                         ShmemVariableCache->nextXid))
        {
+               LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
                ShmemVariableCache->nextXid = max_xid;
                TransactionIdAdvance(ShmemVariableCache->nextXid);
+               LWLockRelease(XidGenLock);
+       }
+
+       if (standbyState == STANDBY_DISABLED)
+       {
+               /*
+                * Mark the transaction committed in pg_clog.
+                */
+               TransactionIdCommitTree(xid, nsubxacts, sub_xids);
+       }
+       else
+       {
+               /*
+                * If a transaction completion record arrives that has as-yet
+                * unobserved subtransactions then this will not have been fully
+                * handled by the call to RecordKnownAssignedTransactionIds() in the
+                * main recovery loop in xlog.c. So we need to do bookkeeping again to
+                * cover that case. This is confusing and it is easy to think this
+                * call is irrelevant, which has happened three times in development
+                * already. Leave it in.
+                */
+               RecordKnownAssignedTransactionIds(max_xid);
+
+               /*
+                * Mark the transaction committed in pg_clog. We use async commit
+                * protocol during recovery to provide information on database
+                * consistency for when users try to set hint bits. It is important
+                * that we do not set hint bits until the minRecoveryPoint is past
+                * this commit record. This ensures that if we crash we don't see hint
+                * bits set on changes made by transactions that haven't yet
+                * recovered. It's unlikely but it's good to be safe.
+                */
+               TransactionIdAsyncCommitTree(xid, nsubxacts, sub_xids, lsn);
+
+               /*
+                * We must mark clog before we update the ProcArray.
+                */
+               ExpireTreeKnownAssignedTransactionIds(xid, nsubxacts, sub_xids, max_xid);
+
+               /*
+                * Send any cache invalidations attached to the commit. We must
+                * maintain the same order of invalidation then release locks as
+                * occurs in CommitTransaction().
+                */
+               ProcessCommittedInvalidationMessages(inval_msgs, nmsgs,
+                                                                 XactCompletionRelcacheInitFileInval(xinfo),
+                                                                                        dbId, tsId);
+
+               /*
+                * Release locks, if any. We do this for both two phase and normal one
+                * phase transactions. In effect we are ignoring the prepare phase and
+                * just going straight to lock release. At commit we release all locks
+                * via their top-level xid only, so no need to provide subxact list,
+                * which will save time when replaying commits.
+                */
+               StandbyReleaseLockTree(xid, 0, NULL);
        }
 
        /* Make sure files supposed to be dropped are dropped */
-       for (i = 0; i < xlrec->nrels; i++)
+       if (nrels > 0)
        {
-               XLogDropRelation(xlrec->xnodes[i]);
-               smgrdounlink(smgropen(xlrec->xnodes[i]), false, true);
+               /*
+                * First update minimum recovery point to cover this WAL record. Once
+                * a relation is deleted, there's no going back. The buffer manager
+                * enforces the WAL-first rule for normal updates to relation files,
+                * so that the minimum recovery point is always updated before the
+                * corresponding change in the data file is flushed to disk, but we
+                * have to do the same here since we're bypassing the buffer manager.
+                *
+                * Doing this before deleting the files means that if a deletion fails
+                * for some reason, you cannot start up the system even after restart,
+                * until you fix the underlying situation so that the deletion will
+                * succeed. Alternatively, we could update the minimum recovery point
+                * after deletion, but that would leave a small window where the
+                * WAL-first rule would be violated.
+                */
+               XLogFlush(lsn);
+
+               for (i = 0; i < nrels; i++)
+               {
+                       SMgrRelation srel = smgropen(xnodes[i], InvalidBackendId);
+                       ForkNumber      fork;
+
+                       for (fork = 0; fork <= MAX_FORKNUM; fork++)
+                               XLogDropRelation(xnodes[i], fork);
+                       smgrdounlink(srel, true);
+                       smgrclose(srel);
+               }
        }
+
+       /*
+        * We issue an XLogFlush() for the same reason we emit ForceSyncCommit()
+        * in normal operation. For example, in CREATE DATABASE, we copy all files
+        * from the template database, and then commit the transaction. If we
+        * crash after all the files have been copied but before the commit, you
+        * have files in the data directory without an entry in pg_database. To
+        * minimize the window for that, we use ForceSyncCommit() to rush the
+        * commit record to disk as quick as possible. We have the same window
+        * during recovery, and forcing an XLogFlush() (which updates
+        * minRecoveryPoint during recovery) helps to reduce that problem window,
+        * for any user that requested ForceSyncCommit().
+        */
+       if (XactCompletionForceSyncCommit(xinfo))
+               XLogFlush(lsn);
+
+}
+
+/*
+ * Utility function to call xact_redo_commit_internal after breaking down xlrec
+ */
+static void
+xact_redo_commit(xl_xact_commit *xlrec,
+                                TransactionId xid, XLogRecPtr lsn)
+{
+       TransactionId *subxacts;
+       SharedInvalidationMessage *inval_msgs;
+
+       /* subxid array follows relfilenodes */
+       subxacts = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]);
+       /* invalidation messages array follows subxids */
+       inval_msgs = (SharedInvalidationMessage *) &(subxacts[xlrec->nsubxacts]);
+
+       xact_redo_commit_internal(xid, lsn, subxacts, xlrec->nsubxacts,
+                                                         inval_msgs, xlrec->nmsgs,
+                                                         xlrec->xnodes, xlrec->nrels,
+                                                         xlrec->dbId,
+                                                         xlrec->tsId,
+                                                         xlrec->xinfo);
 }
 
+/*
+ * Utility function to call xact_redo_commit_internal  for compact form of message.
+ */
+static void
+xact_redo_commit_compact(xl_xact_commit_compact *xlrec,
+                                                TransactionId xid, XLogRecPtr lsn)
+{
+       xact_redo_commit_internal(xid, lsn, xlrec->subxacts, xlrec->nsubxacts,
+                                                         NULL, 0,      /* inval msgs */
+                                                         NULL, 0,      /* relfilenodes */
+                                                         InvalidOid,           /* dbId */
+                                                         InvalidOid,           /* tsId */
+                                                         0);           /* xinfo */
+}
+
+/*
+ * Be careful with the order of execution, as with xact_redo_commit().
+ * The two functions are similar but differ in key places.
+ *
+ * Note also that an abort can be for a subtransaction and its children,
+ * not just for a top level abort. That means we have to consider
+ * topxid != xid, whereas in commit we would find topxid == xid always
+ * because subtransaction commit is never WAL logged.
+ */
 static void
 xact_redo_abort(xl_xact_abort *xlrec, TransactionId xid)
 {
@@ -4290,31 +4855,72 @@ xact_redo_abort(xl_xact_abort *xlrec, TransactionId xid)
        TransactionId max_xid;
        int                     i;
 
-       TransactionIdAbort(xid);
-
-       /* Mark subtransactions as aborted */
        sub_xids = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]);
-       TransactionIdAbortTree(xlrec->nsubxacts, sub_xids);
+       max_xid = TransactionIdLatest(xid, xlrec->nsubxacts, sub_xids);
 
-       /* Make sure nextXid is beyond any XID mentioned in the record */
-       max_xid = xid;
-       for (i = 0; i < xlrec->nsubxacts; i++)
-       {
-               if (TransactionIdPrecedes(max_xid, sub_xids[i]))
-                       max_xid = sub_xids[i];
-       }
+       /*
+        * Make sure nextXid is beyond any XID mentioned in the record.
+        *
+        * We don't expect anyone else to modify nextXid, hence we don't need to
+        * hold a lock while checking this. We still acquire the lock to modify
+        * it, though.
+        */
        if (TransactionIdFollowsOrEquals(max_xid,
                                                                         ShmemVariableCache->nextXid))
        {
+               LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
                ShmemVariableCache->nextXid = max_xid;
                TransactionIdAdvance(ShmemVariableCache->nextXid);
+               LWLockRelease(XidGenLock);
+       }
+
+       if (standbyState == STANDBY_DISABLED)
+       {
+               /* Mark the transaction aborted in pg_clog, no need for async stuff */
+               TransactionIdAbortTree(xid, xlrec->nsubxacts, sub_xids);
+       }
+       else
+       {
+               /*
+                * If a transaction completion record arrives that has as-yet
+                * unobserved subtransactions then this will not have been fully
+                * handled by the call to RecordKnownAssignedTransactionIds() in the
+                * main recovery loop in xlog.c. So we need to do bookkeeping again to
+                * cover that case. This is confusing and it is easy to think this
+                * call is irrelevant, which has happened three times in development
+                * already. Leave it in.
+                */
+               RecordKnownAssignedTransactionIds(max_xid);
+
+               /* Mark the transaction aborted in pg_clog, no need for async stuff */
+               TransactionIdAbortTree(xid, xlrec->nsubxacts, sub_xids);
+
+               /*
+                * We must update the ProcArray after we have marked clog.
+                */
+               ExpireTreeKnownAssignedTransactionIds(xid, xlrec->nsubxacts, sub_xids, max_xid);
+
+               /*
+                * There are no flat files that need updating, nor invalidation
+                * messages to send or undo.
+                */
+
+               /*
+                * Release locks, if any. There are no invalidations to send.
+                */
+               StandbyReleaseLockTree(xid, xlrec->nsubxacts, sub_xids);
        }
 
        /* Make sure files supposed to be dropped are dropped */
        for (i = 0; i < xlrec->nrels; i++)
        {
-               XLogDropRelation(xlrec->xnodes[i]);
-               smgrdounlink(smgropen(xlrec->xnodes[i]), false, true);
+               SMgrRelation srel = smgropen(xlrec->xnodes[i], InvalidBackendId);
+               ForkNumber      fork;
+
+               for (fork = 0; fork <= MAX_FORKNUM; fork++)
+                       XLogDropRelation(xlrec->xnodes[i], fork);
+               smgrdounlink(srel, true);
+               smgrclose(srel);
        }
 }
 
@@ -4323,11 +4929,20 @@ xact_redo(XLogRecPtr lsn, XLogRecord *record)
 {
        uint8           info = record->xl_info & ~XLR_INFO_MASK;
 
-       if (info == XLOG_XACT_COMMIT)
+       /* Backup blocks are not used in xact records */
+       Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
+
+       if (info == XLOG_XACT_COMMIT_COMPACT)
+       {
+               xl_xact_commit_compact *xlrec = (xl_xact_commit_compact *) XLogRecGetData(record);
+
+               xact_redo_commit_compact(xlrec, record->xl_xid, lsn);
+       }
+       else if (info == XLOG_XACT_COMMIT)
        {
                xl_xact_commit *xlrec = (xl_xact_commit *) XLogRecGetData(record);
 
-               xact_redo_commit(xlrec, record->xl_xid);
+               xact_redo_commit(xlrec, record->xl_xid, lsn);
        }
        else if (info == XLOG_XACT_ABORT)
        {
@@ -4345,7 +4960,7 @@ xact_redo(XLogRecPtr lsn, XLogRecord *record)
        {
                xl_xact_commit_prepared *xlrec = (xl_xact_commit_prepared *) XLogRecGetData(record);
 
-               xact_redo_commit(&xlrec->crec, xlrec->xid);
+               xact_redo_commit(&xlrec->crec, xlrec->xid, lsn);
                RemoveTwoPhaseFile(xlrec->xid, false);
        }
        else if (info == XLOG_XACT_ABORT_PREPARED)
@@ -4355,103 +4970,14 @@ xact_redo(XLogRecPtr lsn, XLogRecord *record)
                xact_redo_abort(&xlrec->arec, xlrec->xid);
                RemoveTwoPhaseFile(xlrec->xid, false);
        }
-       else
-               elog(PANIC, "xact_redo: unknown op code %u", info);
-}
-
-static void
-xact_desc_commit(StringInfo buf, xl_xact_commit *xlrec)
-{
-       int                     i;
-
-       appendStringInfoString(buf, timestamptz_to_str(xlrec->xact_time));
-       if (xlrec->nrels > 0)
-       {
-               appendStringInfo(buf, "; rels:");
-               for (i = 0; i < xlrec->nrels; i++)
-               {
-                       RelFileNode rnode = xlrec->xnodes[i];
-
-                       appendStringInfo(buf, " %u/%u/%u",
-                                                        rnode.spcNode, rnode.dbNode, rnode.relNode);
-               }
-       }
-       if (xlrec->nsubxacts > 0)
-       {
-               TransactionId *xacts = (TransactionId *)
-               &xlrec->xnodes[xlrec->nrels];
-
-               appendStringInfo(buf, "; subxacts:");
-               for (i = 0; i < xlrec->nsubxacts; i++)
-                       appendStringInfo(buf, " %u", xacts[i]);
-       }
-}
-
-static void
-xact_desc_abort(StringInfo buf, xl_xact_abort *xlrec)
-{
-       int                     i;
-
-       appendStringInfoString(buf, timestamptz_to_str(xlrec->xact_time));
-       if (xlrec->nrels > 0)
-       {
-               appendStringInfo(buf, "; rels:");
-               for (i = 0; i < xlrec->nrels; i++)
-               {
-                       RelFileNode rnode = xlrec->xnodes[i];
-
-                       appendStringInfo(buf, " %u/%u/%u",
-                                                        rnode.spcNode, rnode.dbNode, rnode.relNode);
-               }
-       }
-       if (xlrec->nsubxacts > 0)
-       {
-               TransactionId *xacts = (TransactionId *)
-               &xlrec->xnodes[xlrec->nrels];
-
-               appendStringInfo(buf, "; subxacts:");
-               for (i = 0; i < xlrec->nsubxacts; i++)
-                       appendStringInfo(buf, " %u", xacts[i]);
-       }
-}
-
-void
-xact_desc(StringInfo buf, uint8 xl_info, char *rec)
-{
-       uint8           info = xl_info & ~XLR_INFO_MASK;
-
-       if (info == XLOG_XACT_COMMIT)
-       {
-               xl_xact_commit *xlrec = (xl_xact_commit *) rec;
-
-               appendStringInfo(buf, "commit: ");
-               xact_desc_commit(buf, xlrec);
-       }
-       else if (info == XLOG_XACT_ABORT)
+       else if (info == XLOG_XACT_ASSIGNMENT)
        {
-               xl_xact_abort *xlrec = (xl_xact_abort *) rec;
+               xl_xact_assignment *xlrec = (xl_xact_assignment *) XLogRecGetData(record);
 
-               appendStringInfo(buf, "abort: ");
-               xact_desc_abort(buf, xlrec);
-       }
-       else if (info == XLOG_XACT_PREPARE)
-       {
-               appendStringInfo(buf, "prepare");
-       }
-       else if (info == XLOG_XACT_COMMIT_PREPARED)
-       {
-               xl_xact_commit_prepared *xlrec = (xl_xact_commit_prepared *) rec;
-
-               appendStringInfo(buf, "commit %u: ", xlrec->xid);
-               xact_desc_commit(buf, &xlrec->crec);
-       }
-       else if (info == XLOG_XACT_ABORT_PREPARED)
-       {
-               xl_xact_abort_prepared *xlrec = (xl_xact_abort_prepared *) rec;
-
-               appendStringInfo(buf, "abort %u: ", xlrec->xid);
-               xact_desc_abort(buf, &xlrec->arec);
+               if (standbyState >= STANDBY_INITIALIZED)
+                       ProcArrayApplyXidAssignment(xlrec->xtop,
+                                                                               xlrec->nsubxacts, xlrec->xsub);
        }
        else
-               appendStringInfo(buf, "UNKNOWN");
+               elog(PANIC, "xact_redo: unknown op code %u", info);
 }