X-Git-Url: https://granicus.if.org/sourcecode?a=blobdiff_plain;f=src%2Fbackend%2Faccess%2Ftransam%2Fxact.c;h=9af53a5953f09e2b2726fc7efa8ad4fcc1f2b9ab;hb=a7b7b07af340c73adee9959edf260695591a9496;hp=b6efb3155816a4054f63b9a22920997e63647c34;hpb=b6197fe06939d35f67abee1ebe62690d43199783;p=postgresql

diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c
index b6efb31558..9af53a5953 100644
--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
@@ -3,138 +3,14 @@
  * xact.c
  *	  top level transaction system support routines
  *
- * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
+ * See src/backend/access/transam/README for more information.
+ *
+ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.170 2004/07/01 20:11:02 tgl Exp $
- *
- * NOTES
- *		Transaction aborts can now occur two ways:
- *
- *		1)	system dies from some internal cause  (syntax error, etc..)
- *		2)	user types ABORT
- *
- *		These two cases used to be treated identically, but now
- *		we need to distinguish them.  Why?	consider the following
- *		two situations:
- *
- *				case 1							case 2
- *				------							------
- *		1) user types BEGIN				1) user types BEGIN
- *		2) user does something			2) user does something
- *		3) user does not like what		3) system aborts for some reason
- *		   she sees and types ABORT
- *
- *		In case 1, we want to abort the transaction and return to the
- *		default state.	In case 2, there may be more commands coming
- *		our way which are part of the same transaction block and we have
- *		to ignore these commands until we see a COMMIT transaction or
- *		ROLLBACK.
- *
- *		Internal aborts are now handled by AbortTransactionBlock(), just as
- *		they always have been, and user aborts are now handled by
- *		UserAbortTransactionBlock().  Both of them rely on AbortTransaction()
- *		to do all the real work.  The only difference is what state we
- *		enter after AbortTransaction() does its work:
- *
- *		* AbortTransactionBlock() leaves us in TBLOCK_ABORT and
- *		* UserAbortTransactionBlock() leaves us in TBLOCK_ENDABORT
- *
- *		Low-level transaction abort handling is divided into two phases:
- *		* AbortTransaction() executes as soon as we realize the transaction
- *		  has failed.  It should release all shared resources (locks etc)
- *		  so that we do not delay other backends unnecessarily.
- *		* CleanupTransaction() executes when we finally see a user COMMIT
- *		  or ROLLBACK command; it cleans things up and gets us out of
- *		  the transaction internally.  In particular, we mustn't destroy
- *		  TopTransactionContext until this point.
- *
- *	 NOTES
- *		The essential aspects of the transaction system are:
- *
- *				o  transaction id generation
- *				o  transaction log updating
- *				o  memory cleanup
- *				o  cache invalidation
- *				o  lock cleanup
- *
- *		Hence, the functional division of the transaction code is
- *		based on which of the above things need to be done during
- *		a start/commit/abort transaction.  For instance, the
- *		routine AtCommit_Memory() takes care of all the memory
- *		cleanup stuff done at commit time.
- *
- *		The code is layered as follows:
- *
- *				StartTransaction
- *				CommitTransaction
- *				AbortTransaction
- *				CleanupTransaction
- *
- *		are provided to do the lower level work like recording
- *		the transaction status in the log and doing memory cleanup.
- *		above these routines are another set of functions:
- *
- *				StartTransactionCommand
- *				CommitTransactionCommand
- *				AbortCurrentTransaction
- *
- *		These are the routines used in the postgres main processing
- *		loop.  They are sensitive to the current transaction block state
- *		and make calls to the lower level routines appropriately.
- *
- *		Support for transaction blocks is provided via the functions:
- *
- *				BeginTransactionBlock
- *				CommitTransactionBlock
- *				AbortTransactionBlock
- *
- *		These are invoked only in response to a user "BEGIN WORK", "COMMIT",
- *		or "ROLLBACK" command.	The tricky part about these functions
- *		is that they are called within the postgres main loop, in between
- *		the StartTransactionCommand() and CommitTransactionCommand().
- *
- *		For example, consider the following sequence of user commands:
- *
- *		1)		begin
- *		2)		select * from foo
- *		3)		insert into foo (bar = baz)
- *		4)		commit
- *
- *		in the main processing loop, this results in the following
- *		transaction sequence:
- *
- *			/	StartTransactionCommand();
- *		1) /	ProcessUtility();				<< begin
- *		   \		BeginTransactionBlock();
- *			\	CommitTransactionCommand();
- *
- *			/	StartTransactionCommand();
- *		2) <	ProcessQuery();					<< select * from foo
- *			\	CommitTransactionCommand();
- *
- *			/	StartTransactionCommand();
- *		3) <	ProcessQuery();					<< insert into foo (bar = baz)
- *			\	CommitTransactionCommand();
- *
- *			/	StartTransactionCommand();
- *		4) /	ProcessUtility();				<< commit
- *		   \		CommitTransactionBlock();
- *			\	CommitTransactionCommand();
- *
- *		The point of this example is to demonstrate the need for
- *		StartTransactionCommand() and CommitTransactionCommand() to
- *		be state smart -- they should do nothing in between the calls
- *		to BeginTransactionBlock() and EndTransactionBlock() and
- *		outside these calls they need to do normal start/commit
- *		processing.
- *
- *		Furthermore, suppose the "select * from foo" caused an abort
- *		condition.	We would then want to abort the transaction and
- *		ignore all subsequent commands up to the "commit".
- *		-cim 3/23/90
+ *	  $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.260 2008/03/17 19:44:41 petere Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -144,72 +20,127 @@
 #include <time.h>
 #include <unistd.h>
 
-#include "access/gistscan.h"
-#include "access/hash.h"
-#include "access/nbtree.h"
-#include "access/rtree.h"
+#include "access/multixact.h"
 #include "access/subtrans.h"
+#include "access/transam.h"
+#include "access/twophase.h"
 #include "access/xact.h"
-#include "catalog/heap.h"
-#include "catalog/index.h"
+#include "access/xlogutils.h"
 #include "catalog/namespace.h"
 #include "commands/async.h"
 #include "commands/tablecmds.h"
 #include "commands/trigger.h"
-#include "commands/user.h"
 #include "executor/spi.h"
 #include "libpq/be-fsstubs.h"
 #include "miscadmin.h"
+#include "pgstat.h"
 #include "storage/fd.h"
-#include "storage/proc.h"
-#include "storage/sinval.h"
+#include "storage/lmgr.h"
+#include "storage/procarray.h"
+#include "storage/sinvaladt.h"
 #include "storage/smgr.h"
+#include "utils/combocid.h"
+#include "utils/flatfiles.h"
 #include "utils/guc.h"
 #include "utils/inval.h"
 #include "utils/memutils.h"
-#include "utils/portal.h"
-#include "utils/catcache.h"
 #include "utils/relcache.h"
-#include "pgstat.h"
+#include "utils/xml.h"
+#include "pg_trace.h"
 
 
-static void AbortTransaction(void);
-static void AtAbort_Cache(void);
-static void AtAbort_Locks(void);
-static void AtAbort_Memory(void);
-static void AtCleanup_Memory(void);
-static void AtCommit_Cache(void);
-static void AtCommit_LocalCache(void);
-static void AtCommit_Locks(void);
-static void AtCommit_Memory(void);
-static void AtStart_Cache(void);
-static void AtStart_Locks(void);
-static void AtStart_Memory(void);
-static void CallEOXactCallbacks(bool isCommit);
-static void CleanupTransaction(void);
-static void CommitTransaction(void);
-static void RecordTransactionAbort(void);
-static void StartTransaction(void);
+/*
+ *	User-tweakable parameters
+ */
+int			DefaultXactIsoLevel = XACT_READ_COMMITTED;
+int			XactIsoLevel;
 
-static void RecordSubTransactionCommit(void);
-static void StartSubTransaction(void);
-static void CommitSubTransaction(void);
-static void AbortSubTransaction(void);
-static void CleanupSubTransaction(void);
-static void StartAbortedSubTransaction(void);
-static void PushTransaction(void);
-static void PopTransaction(void);
+bool		DefaultXactReadOnly = false;
+bool		XactReadOnly;
 
-static void AtSubAbort_Locks(void);
-static void AtSubAbort_Memory(void);
-static void AtSubCleanup_Memory(void);
-static void AtSubCommit_Memory(void);
-static void AtSubStart_Memory(void);
+bool		XactSyncCommit = true;
 
-static void ShowTransactionState(const char *str);
-static void ShowTransactionStateRec(TransactionState state);
-static const char *BlockStateAsString(TBlockState blockState);
-static const char *TransStateAsString(TransState state);
+int			CommitDelay = 0;	/* precommit delay in microseconds */
+int			CommitSiblings = 5; /* # concurrent xacts needed to sleep */
+
+/*
+ * MyXactAccessedTempRel is set when a temporary relation is accessed.
+ * We don't allow PREPARE TRANSACTION in that case.  (This is global
+ * so that it can be set from heapam.c.)
+ */
+bool		MyXactAccessedTempRel = false;
+
+
+/*
+ *	transaction states - transaction state from server perspective
+ */
+typedef enum TransState
+{
+	TRANS_DEFAULT,				/* idle */
+	TRANS_START,				/* transaction starting */
+	TRANS_INPROGRESS,			/* inside a valid transaction */
+	TRANS_COMMIT,				/* commit in progress */
+	TRANS_ABORT,				/* abort in progress */
+	TRANS_PREPARE				/* prepare in progress */
+} TransState;
+
+/*
+ *	transaction block states - transaction state of client queries
+ *
+ * Note: the subtransaction states are used only for non-topmost
+ * transactions; the others appear only in the topmost transaction.
+ */
+typedef enum TBlockState
+{
+	/* not-in-transaction-block states */
+	TBLOCK_DEFAULT,				/* idle */
+	TBLOCK_STARTED,				/* running single-query transaction */
+
+	/* transaction block states */
+	TBLOCK_BEGIN,				/* starting transaction block */
+	TBLOCK_INPROGRESS,			/* live transaction */
+	TBLOCK_END,					/* COMMIT received */
+	TBLOCK_ABORT,				/* failed xact, awaiting ROLLBACK */
+	TBLOCK_ABORT_END,			/* failed xact, ROLLBACK received */
+	TBLOCK_ABORT_PENDING,		/* live xact, ROLLBACK received */
+	TBLOCK_PREPARE,				/* live xact, PREPARE received */
+
+	/* subtransaction states */
+	TBLOCK_SUBBEGIN,			/* starting a subtransaction */
+	TBLOCK_SUBINPROGRESS,		/* live subtransaction */
+	TBLOCK_SUBEND,				/* RELEASE received */
+	TBLOCK_SUBABORT,			/* failed subxact, awaiting ROLLBACK */
+	TBLOCK_SUBABORT_END,		/* failed subxact, ROLLBACK received */
+	TBLOCK_SUBABORT_PENDING,	/* live subxact, ROLLBACK received */
+	TBLOCK_SUBRESTART,			/* live subxact, ROLLBACK TO received */
+	TBLOCK_SUBABORT_RESTART		/* failed subxact, ROLLBACK TO received */
+} TBlockState;
+
+/*
+ *	transaction state structure
+ */
+typedef struct TransactionStateData
+{
+	TransactionId transactionId;	/* my XID, or Invalid if none */
+	SubTransactionId subTransactionId;	/* my subxact ID */
+	char	   *name;			/* savepoint name, if any */
+	int			savepointLevel; /* savepoint level */
+	TransState	state;			/* low-level state */
+	TBlockState blockState;		/* high-level state */
+	int			nestingLevel;	/* transaction nesting depth */
+	int			gucNestLevel;	/* GUC context nesting depth */
+	MemoryContext curTransactionContext;		/* my xact-lifetime context */
+	ResourceOwner curTransactionOwner;	/* my query resources */
+	TransactionId *childXids;	/* subcommitted child XIDs, in XID order */
+	int			nChildXids;		/* # of subcommitted child XIDs */
+	int			maxChildXids;	/* allocated size of childXids[] */
+	Oid			prevUser;		/* previous CurrentUserId setting */
+	bool		prevSecDefCxt;	/* previous SecurityDefinerContext setting */
+	bool		prevXactReadOnly;		/* entry-time xact r/o state */
+	struct TransactionStateData *parent;		/* back link to parent */
+} TransactionStateData;
+
+typedef TransactionStateData *TransactionState;
 
 /*
  * CurrentTransactionState always points to the current transaction state
@@ -218,56 +149,128 @@ static const char *TransStateAsString(TransState state);
  */
 static TransactionStateData TopTransactionStateData = {
 	0,							/* transaction id */
-	FirstCommandId,				/* command id */
+	0,							/* subtransaction id */
+	NULL,						/* savepoint name */
+	0,							/* savepoint level */
 	TRANS_DEFAULT,				/* transaction state */
 	TBLOCK_DEFAULT,				/* transaction block state from the client
 								 * perspective */
-	0,							/* nesting level */
+	0,							/* transaction nesting depth */
+	0,							/* GUC context nesting depth */
 	NULL,						/* cur transaction context */
-	NIL,						/* subcommitted child Xids */
-	0,							/* entry-time current userid */
+	NULL,						/* cur transaction resource owner */
+	NULL,						/* subcommitted child Xids */
+	0,							/* # of subcommitted child Xids */
+	0,							/* allocated size of childXids[] */
+	InvalidOid,					/* previous CurrentUserId setting */
+	false,						/* previous SecurityDefinerContext setting */
+	false,						/* entry-time xact r/o state */
 	NULL						/* link to parent state block */
 };
 
 static TransactionState CurrentTransactionState = &TopTransactionStateData;
 
 /*
- * These vars hold the value of now(), ie, the transaction start time.
- * This does not change as we enter and exit subtransactions, so we don't
- * keep it inside the TransactionState stack.
+ * The subtransaction ID and command ID assignment counters are global
+ * to a whole transaction, so we do not keep them in the state stack.
  */
-static AbsoluteTime xactStartTime;			/* integer part */
-static int		xactStartTimeUsec;			/* microsecond part */
+static SubTransactionId currentSubTransactionId;
+static CommandId currentCommandId;
+static bool currentCommandIdUsed;
 
+/*
+ * xactStartTimestamp is the value of transaction_timestamp().
+ * stmtStartTimestamp is the value of statement_timestamp().
+ * xactStopTimestamp is the time at which we log a commit or abort WAL record.
+ * These do not change as we enter and exit subtransactions, so we don't
+ * keep them inside the TransactionState stack.
+ */
+static TimestampTz xactStartTimestamp;
+static TimestampTz stmtStartTimestamp;
+static TimestampTz xactStopTimestamp;
 
 /*
- *	User-tweakable parameters
+ * GID to be used for preparing the current transaction.  This is also
+ * global to a whole transaction, so we don't keep it in the state stack.
  */
-int			DefaultXactIsoLevel = XACT_READ_COMMITTED;
-int			XactIsoLevel;
+static char *prepareGID;
 
-bool		DefaultXactReadOnly = false;
-bool		XactReadOnly;
+/*
+ * Some commands want to force synchronous commit.
+ */
+static bool forceSyncCommit = false;
 
-int			CommitDelay = 0;	/* precommit delay in microseconds */
-int			CommitSiblings = 5; /* number of concurrent xacts needed to
-								 * sleep */
+/*
+ * Private context for transaction-abort work --- we reserve space for this
+ * at startup to ensure that AbortTransaction and AbortSubTransaction can work
+ * when we've run out of memory.
+ */
+static MemoryContext TransactionAbortContext = NULL;
+
+/*
+ * List of add-on start- and end-of-xact callbacks
+ */
+typedef struct XactCallbackItem
+{
+	struct XactCallbackItem *next;
+	XactCallback callback;
+	void	   *arg;
+} XactCallbackItem;
 
+static XactCallbackItem *Xact_callbacks = NULL;
 
 /*
- * List of add-on end-of-xact callbacks
+ * List of add-on start- and end-of-subxact callbacks
  */
-typedef struct EOXactCallbackItem
+typedef struct SubXactCallbackItem
 {
-	struct EOXactCallbackItem *next;
-	EOXactCallback callback;
+	struct SubXactCallbackItem *next;
+	SubXactCallback callback;
 	void	   *arg;
-} EOXactCallbackItem;
+} SubXactCallbackItem;
+
+static SubXactCallbackItem *SubXact_callbacks = NULL;
+
+
+/* local function prototypes */
+static void AssignTransactionId(TransactionState s);
+static void AbortTransaction(void);
+static void AtAbort_Memory(void);
+static void AtCleanup_Memory(void);
+static void AtAbort_ResourceOwner(void);
+static void AtCommit_LocalCache(void);
+static void AtCommit_Memory(void);
+static void AtStart_Cache(void);
+static void AtStart_Memory(void);
+static void AtStart_ResourceOwner(void);
+static void CallXactCallbacks(XactEvent event);
+static void CallSubXactCallbacks(SubXactEvent event,
+					 SubTransactionId mySubid,
+					 SubTransactionId parentSubid);
+static void CleanupTransaction(void);
+static void CommitTransaction(void);
+static TransactionId RecordTransactionAbort(bool isSubXact);
+static void StartTransaction(void);
+
+static void RecordSubTransactionCommit(void);
+static void StartSubTransaction(void);
+static void CommitSubTransaction(void);
+static void AbortSubTransaction(void);
+static void CleanupSubTransaction(void);
+static void PushTransaction(void);
+static void PopTransaction(void);
 
-static EOXactCallbackItem *EOXact_callbacks = NULL;
+static void AtSubAbort_Memory(void);
+static void AtSubCleanup_Memory(void);
+static void AtSubAbort_ResourceOwner(void);
+static void AtSubCommit_Memory(void);
+static void AtSubStart_Memory(void);
+static void AtSubStart_ResourceOwner(void);
 
-static void (*_RollbackFunc) (void *) = NULL;
-static void *_RollbackData = NULL;
+static void ShowTransactionState(const char *str);
+static void ShowTransactionStateRec(TransactionState state);
+static const char *BlockStateAsString(TBlockState blockState);
+static const char *TransStateAsString(TransState state);
 
 
 /* ----------------------------------------------------------------
@@ -278,32 +281,22 @@ static void *_RollbackData = NULL;
 /*
  *	IsTransactionState
  *
- *	This returns true if we are currently running a query
- *	within an executing transaction.
+ *	This returns true if we are inside a valid transaction; that is,
+ *	it is safe to initiate database access, take heavyweight locks, etc.
  */
 bool
 IsTransactionState(void)
 {
 	TransactionState s = CurrentTransactionState;
 
-	switch (s->state)
-	{
-		case TRANS_DEFAULT:
-			return false;
-		case TRANS_START:
-			return true;
-		case TRANS_INPROGRESS:
-			return true;
-		case TRANS_COMMIT:
-			return true;
-		case TRANS_ABORT:
-			return true;
-	}
-
 	/*
-	 * Shouldn't get here, but lint is not happy with this...
+	 * TRANS_DEFAULT and TRANS_ABORT are obviously unsafe states.  However, we
+	 * also reject the startup/shutdown states TRANS_START, TRANS_COMMIT,
+	 * TRANS_PREPARE since it might be too soon or too late within those
+	 * transition states to do anything interesting.  Hence, the only "valid"
+	 * state is TRANS_INPROGRESS.
 	 */
-	return false;
+	return (s->state == TRANS_INPROGRESS);
 }
 
 /*
@@ -317,7 +310,7 @@ IsAbortedTransactionBlockState(void)
 {
 	TransactionState s = CurrentTransactionState;
 
-	if (s->blockState == TBLOCK_ABORT || 
+	if (s->blockState == TBLOCK_ABORT ||
 		s->blockState == TBLOCK_SUBABORT)
 		return true;
 
@@ -328,60 +321,200 @@ IsAbortedTransactionBlockState(void)
 /*
  *	GetTopTransactionId
  *
- * Get the ID of the main transaction, even if we are currently inside
- * a subtransaction.
+ * This will return the XID of the main transaction, assigning one if
+ * it's not yet set.  Be careful to call this only inside a valid xact.
  */
 TransactionId
 GetTopTransactionId(void)
 {
-	return TopTransactionStateData.transactionIdData;
+	if (!TransactionIdIsValid(TopTransactionStateData.transactionId))
+		AssignTransactionId(&TopTransactionStateData);
+	return TopTransactionStateData.transactionId;
 }
 
+/*
+ *	GetTopTransactionIdIfAny
+ *
+ * This will return the XID of the main transaction, if one is assigned.
+ * It will return InvalidTransactionId if we are not currently inside a
+ * transaction, or inside a transaction that hasn't yet been assigned an XID.
+ */
+TransactionId
+GetTopTransactionIdIfAny(void)
+{
+	return TopTransactionStateData.transactionId;
+}
 
 /*
  *	GetCurrentTransactionId
+ *
+ * This will return the XID of the current transaction (main or sub
+ * transaction), assigning one if it's not yet set.  Be careful to call this
+ * only inside a valid xact.
  */
 TransactionId
 GetCurrentTransactionId(void)
 {
 	TransactionState s = CurrentTransactionState;
 
-	return s->transactionIdData;
+	if (!TransactionIdIsValid(s->transactionId))
+		AssignTransactionId(s);
+	return s->transactionId;
+}
+
+/*
+ *	GetCurrentTransactionIdIfAny
+ *
+ * This will return the XID of the current sub xact, if one is assigned.
+ * It will return InvalidTransactionId if we are not currently inside a
+ * transaction, or inside a transaction that hasn't been assigned an XID yet.
+ */
+TransactionId
+GetCurrentTransactionIdIfAny(void)
+{
+	return CurrentTransactionState->transactionId;
 }
 
 
 /*
- *	GetCurrentCommandId
+ * AssignTransactionId
+ *
+ * Assigns a new permanent XID to the given TransactionState.
+ * We do not assign XIDs to transactions until/unless this is called.
+ * Also, any parent TransactionStates that don't yet have XIDs are assigned
+ * one; this maintains the invariant that a child transaction has an XID
+ * following its parent's.
  */
-CommandId
-GetCurrentCommandId(void)
+static void
+AssignTransactionId(TransactionState s)
+{
+	bool		isSubXact = (s->parent != NULL);
+	ResourceOwner currentOwner;
+
+	/* Assert that caller didn't screw up */
+	Assert(!TransactionIdIsValid(s->transactionId));
+	Assert(s->state == TRANS_INPROGRESS);
+
+	/*
+	 * Ensure parent(s) have XIDs, so that a child always has an XID later
+	 * than its parent.
+	 */
+	if (isSubXact && !TransactionIdIsValid(s->parent->transactionId))
+		AssignTransactionId(s->parent);
+
+	/*
+	 * Generate a new Xid and record it in PG_PROC and pg_subtrans.
+	 *
+	 * NB: we must make the subtrans entry BEFORE the Xid appears anywhere in
+	 * shared storage other than PG_PROC; because if there's no room for it in
+	 * PG_PROC, the subtrans entry is needed to ensure that other backends see
+	 * the Xid as "running".  See GetNewTransactionId.
+	 */
+	s->transactionId = GetNewTransactionId(isSubXact);
+
+	if (isSubXact)
+		SubTransSetParent(s->transactionId, s->parent->transactionId);
+
+	/*
+	 * Acquire lock on the transaction XID.  (We assume this cannot block.) We
+	 * have to ensure that the lock is assigned to the transaction's own
+	 * ResourceOwner.
+	 */
+	currentOwner = CurrentResourceOwner;
+	PG_TRY();
+	{
+		CurrentResourceOwner = s->curTransactionOwner;
+		XactLockTableInsert(s->transactionId);
+	}
+	PG_CATCH();
+	{
+		/* Ensure CurrentResourceOwner is restored on error */
+		CurrentResourceOwner = currentOwner;
+		PG_RE_THROW();
+	}
+	PG_END_TRY();
+	CurrentResourceOwner = currentOwner;
+}
+
+
+/*
+ *	GetCurrentSubTransactionId
+ */
+SubTransactionId
+GetCurrentSubTransactionId(void)
 {
 	TransactionState s = CurrentTransactionState;
 
-	return s->commandId;
+	return s->subTransactionId;
+}
+
+
+/*
+ *	GetCurrentCommandId
+ *
+ * "used" must be TRUE if the caller intends to use the command ID to mark
+ * inserted/updated/deleted tuples.  FALSE means the ID is being fetched
+ * for read-only purposes (ie, as a snapshot validity cutoff).  See
+ * CommandCounterIncrement() for discussion.
+ */
+CommandId
+GetCurrentCommandId(bool used)
+{
+	/* this is global to a transaction, not subtransaction-local */
+	if (used)
+		currentCommandIdUsed = true;
+	return currentCommandId;
 }
 
+/*
+ *	GetCurrentTransactionStartTimestamp
+ */
+TimestampTz
+GetCurrentTransactionStartTimestamp(void)
+{
+	return xactStartTimestamp;
+}
 
 /*
- *	GetCurrentTransactionStartTime
+ *	GetCurrentStatementStartTimestamp
  */
-AbsoluteTime
-GetCurrentTransactionStartTime(void)
+TimestampTz
+GetCurrentStatementStartTimestamp(void)
 {
-	return xactStartTime;
+	return stmtStartTimestamp;
 }
 
+/*
+ *	GetCurrentTransactionStopTimestamp
+ *
+ * We return current time if the transaction stop time hasn't been set
+ * (which can happen if we decide we don't need to log an XLOG record).
+ */
+TimestampTz
+GetCurrentTransactionStopTimestamp(void)
+{
+	if (xactStopTimestamp != 0)
+		return xactStopTimestamp;
+	return GetCurrentTimestamp();
+}
 
 /*
- *	GetCurrentTransactionStartTimeUsec
+ *	SetCurrentStatementStartTimestamp
  */
-AbsoluteTime
-GetCurrentTransactionStartTimeUsec(int *msec)
+void
+SetCurrentStatementStartTimestamp(void)
 {
-	*msec = xactStartTimeUsec;
-	return xactStartTime;
+	stmtStartTimestamp = GetCurrentTimestamp();
 }
 
+/*
+ *	SetCurrentTransactionStopTimestamp
+ */
+static inline void
+SetCurrentTransactionStopTimestamp(void)
+{
+	xactStopTimestamp = GetCurrentTimestamp();
+}
 
 /*
  *	GetCurrentTransactionNestLevel
@@ -400,41 +533,62 @@ GetCurrentTransactionNestLevel(void)
 
 /*
  *	TransactionIdIsCurrentTransactionId
- *
- *	During bootstrap, we cheat and say "it's not my transaction ID" even though
- *	it is.	Along with transam.c's cheat to say that the bootstrap XID is
- *	already committed, this causes the tqual.c routines to see previously
- *	inserted tuples as committed, which is what we need during bootstrap.
  */
 bool
 TransactionIdIsCurrentTransactionId(TransactionId xid)
 {
-	TransactionState s = CurrentTransactionState;
+	TransactionState s;
 
-	if (AMI_OVERRIDE)
-	{
-		Assert(xid == BootstrapTransactionId);
+	/*
+	 * We always say that BootstrapTransactionId is "not my transaction ID"
+	 * even when it is (ie, during bootstrap).	Along with the fact that
+	 * transam.c always treats BootstrapTransactionId as already committed,
+	 * this causes the tqual.c routines to see all tuples as committed, which
+	 * is what we need during bootstrap.  (Bootstrap mode only inserts tuples,
+	 * it never updates or deletes them, so all tuples can be presumed good
+	 * immediately.)
+	 *
+	 * Likewise, InvalidTransactionId and FrozenTransactionId are certainly
+	 * not my transaction ID, so we can just return "false" immediately for
+	 * any non-normal XID.
+	 */
+	if (!TransactionIdIsNormal(xid))
 		return false;
-	}
 
 	/*
-	 * We will return true for the Xid of the current subtransaction,
-	 * any of its subcommitted children, any of its parents, or any of
-	 * their previously subcommitted children.
+	 * We will return true for the Xid of the current subtransaction, any of
+	 * its subcommitted children, any of its parents, or any of their
+	 * previously subcommitted children.  However, a transaction being aborted
+	 * is no longer "current", even though it may still have an entry on the
+	 * state stack.
 	 */
-	while (s != NULL)
+	for (s = CurrentTransactionState; s != NULL; s = s->parent)
 	{
-		ListCell *cell;
+		int low, high;
 
-		if (TransactionIdEquals(xid, s->transactionIdData))
+		if (s->state == TRANS_ABORT)
+			continue;
+		if (!TransactionIdIsValid(s->transactionId))
+			continue;			/* it can't have any child XIDs either */
+		if (TransactionIdEquals(xid, s->transactionId))
 			return true;
-		foreach(cell, s->childXids)
+		/* As the childXids array is ordered, we can use binary search */
+		low = 0;
+		high = s->nChildXids - 1;
+		while (low <= high)
 		{
-			if (TransactionIdEquals(xid, lfirst_int(cell)))
+			int				middle;
+			TransactionId	probe;
+
+			middle = low + (high - low) / 2;
+			probe = s->childXids[middle];
+			if (TransactionIdEquals(probe, xid))
 				return true;
+			else if (TransactionIdPrecedes(probe, xid))
+				low = middle + 1;
+			else
+				high = middle - 1;
 		}
-
-		s = s->parent;
 	}
 
 	return false;
@@ -447,28 +601,65 @@ TransactionIdIsCurrentTransactionId(TransactionId xid)
 void
 CommandCounterIncrement(void)
 {
-	TransactionState s = CurrentTransactionState;
+	/*
+	 * If the current value of the command counter hasn't been "used" to
+	 * mark tuples, we need not increment it, since there's no need to
+	 * distinguish a read-only command from others.  This helps postpone
+	 * command counter overflow, and keeps no-op CommandCounterIncrement
+	 * operations cheap.
+	 */
+	if (currentCommandIdUsed)
+	{
+		currentCommandId += 1;
+		if (currentCommandId == FirstCommandId)	/* check for overflow */
+		{
+			currentCommandId -= 1;
+			ereport(ERROR,
+					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+		  errmsg("cannot have more than 2^32-1 commands in a transaction")));
+		}
+		currentCommandIdUsed = false;
 
-	s->commandId += 1;
-	if (s->commandId == FirstCommandId) /* check for overflow */
-		ereport(ERROR,
-				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
-				 errmsg("cannot have more than 2^32-1 commands in a transaction")));
+		/* Propagate new command ID into static snapshots, if set */
+		if (SerializableSnapshot)
+			SerializableSnapshot->curcid = currentCommandId;
+		if (LatestSnapshot)
+			LatestSnapshot->curcid = currentCommandId;
 
-	/* Propagate new command ID into query snapshots, if set */
-	if (QuerySnapshot)
-		QuerySnapshot->curcid = s->commandId;
-	if (SerializableSnapshot)
-		SerializableSnapshot->curcid = s->commandId;
+		/*
+		 * Make any catalog changes done by the just-completed command
+		 * visible in the local syscache.  We obviously don't need to do
+		 * this after a read-only command.  (But see hacks in inval.c
+		 * to make real sure we don't think a command that queued inval
+		 * messages was read-only.)
+		 */
+		AtCommit_LocalCache();
+	}
 
 	/*
-	 * make cache changes visible to me.  AtCommit_LocalCache() instead of
-	 * AtCommit_Cache() is called here.
+	 * Make any other backends' catalog changes visible to me.
+	 *
+	 * XXX this is probably in the wrong place: CommandCounterIncrement
+	 * should be purely a local operation, most likely.  However fooling
+	 * with this will affect asynchronous cross-backend interactions,
+	 * which doesn't seem like a wise thing to do in late beta, so save
+	 * improving this for another day - tgl 2007-11-30
 	 */
-	AtCommit_LocalCache();
 	AtStart_Cache();
 }
 
+/*
+ * ForceSyncCommit
+ *
+ * Interface routine to allow commands to force a synchronous commit of the
+ * current top-level transaction
+ */
+void
+ForceSyncCommit(void)
+{
+	forceSyncCommit = true;
+}
+
 
 /* ----------------------------------------------------------------
  *						StartTransaction stuff
@@ -484,20 +675,6 @@ AtStart_Cache(void)
 	AcceptInvalidationMessages();
 }
 
-/*
- *		AtStart_Locks
- */
-static void
-AtStart_Locks(void)
-{
-	/*
-	 * at present, it is unknown to me what belongs here -cim 3/18/90
-	 *
-	 * There isn't anything to do at the start of a xact for locks. -mer
-	 * 5/24/92
-	 */
-}
-
 /*
  *	AtStart_Memory
  */
@@ -506,6 +683,21 @@ AtStart_Memory(void)
 {
 	TransactionState s = CurrentTransactionState;
 
+	/*
+	 * If this is the first time through, create a private context for
+	 * AbortTransaction to work in.  By reserving some space now, we can
+	 * insulate AbortTransaction from out-of-memory scenarios.	Like
+	 * ErrorContext, we set it up with slow growth rate and a nonzero minimum
+	 * size, so that space will be reserved immediately.
+	 */
+	if (TransactionAbortContext == NULL)
+		TransactionAbortContext =
+			AllocSetContextCreate(TopMemoryContext,
+								  "TransactionAbortContext",
+								  32 * 1024,
+								  32 * 1024,
+								  32 * 1024);
+
 	/*
 	 * We shouldn't have a transaction context already.
 	 */
@@ -532,23 +724,46 @@ AtStart_Memory(void)
 	MemoryContextSwitchTo(CurTransactionContext);
 }
 
-/* ----------------------------------------------------------------
- *						StartSubTransaction stuff
- * ----------------------------------------------------------------
- */
-
 /*
- * AtSubStart_Memory
+ *	AtStart_ResourceOwner
  */
 static void
-AtSubStart_Memory(void)
+AtStart_ResourceOwner(void)
 {
 	TransactionState s = CurrentTransactionState;
 
-	Assert(CurTransactionContext != NULL);
+	/*
+	 * We shouldn't have a transaction resource owner already.
+	 */
+	Assert(TopTransactionResourceOwner == NULL);
 
 	/*
-	 * Create a CurTransactionContext, which will be used to hold data that
+	 * Create a toplevel resource owner for the transaction.
+	 */
+	s->curTransactionOwner = ResourceOwnerCreate(NULL, "TopTransaction");
+
+	TopTransactionResourceOwner = s->curTransactionOwner;
+	CurTransactionResourceOwner = s->curTransactionOwner;
+	CurrentResourceOwner = s->curTransactionOwner;
+}
+
+/* ----------------------------------------------------------------
+ *						StartSubTransaction stuff
+ * ----------------------------------------------------------------
+ */
+
+/*
+ * AtSubStart_Memory
+ */
+static void
+AtSubStart_Memory(void)
+{
+	TransactionState s = CurrentTransactionState;
+
+	Assert(CurTransactionContext != NULL);
+
+	/*
+	 * Create a CurTransactionContext, which will be used to hold data that
 	 * survives subtransaction commit but disappears on subtransaction abort.
 	 * We make it a child of the immediate parent's CurTransactionContext.
 	 */
@@ -563,6 +778,28 @@ AtSubStart_Memory(void)
 	MemoryContextSwitchTo(CurTransactionContext);
 }
 
+/*
+ * AtSubStart_ResourceOwner
+ */
+static void
+AtSubStart_ResourceOwner(void)
+{
+	TransactionState s = CurrentTransactionState;
+
+	Assert(s->parent != NULL);
+
+	/*
+	 * Create a resource owner for the subtransaction.	We make it a child of
+	 * the immediate parent's resource owner.
+	 */
+	s->curTransactionOwner =
+		ResourceOwnerCreate(s->parent->curTransactionOwner,
+							"SubTransaction");
+
+	CurTransactionResourceOwner = s->curTransactionOwner;
+	CurrentResourceOwner = s->curTransactionOwner;
+}
+
 /* ----------------------------------------------------------------
  *						CommitTransaction stuff
  * ----------------------------------------------------------------
@@ -570,167 +807,206 @@ AtSubStart_Memory(void)
 
 /*
  *	RecordTransactionCommit
+ *
+ * Returns latest XID among xact and its children, or InvalidTransactionId
+ * if the xact has no XID.	(We compute that here just because it's easier.)
+ *
+ * This is exported only to support an ugly hack in VACUUM FULL.
  */
-void
+TransactionId
 RecordTransactionCommit(void)
 {
+	TransactionId xid = GetTopTransactionIdIfAny();
+	bool		markXidCommitted = TransactionIdIsValid(xid);
+	TransactionId latestXid = InvalidTransactionId;
 	int			nrels;
-	RelFileNode *rptr;
+	RelFileNode *rels;
+	bool		haveNonTemp;
 	int			nchildren;
 	TransactionId *children;
 
 	/* Get data needed for commit record */
-	nrels = smgrGetPendingDeletes(true, &rptr);
-	nchildren = xactGetCommittedChildren(&children, false);
+	nrels = smgrGetPendingDeletes(true, &rels, &haveNonTemp);
+	nchildren = xactGetCommittedChildren(&children);
 
 	/*
-	 * If we made neither any XLOG entries nor any temp-rel updates,
-	 * and have no files to be deleted, we can omit recording the transaction
-	 * commit at all.  (This test includes the effects of subtransactions,
-	 * so the presence of committed subxacts need not alone force a write.)
+	 * If we haven't been assigned an XID yet, we neither can, nor do we want
+	 * to write a COMMIT record.
 	 */
-	if (MyXactMadeXLogEntry || MyXactMadeTempRelUpdate || nrels > 0)
+	if (!markXidCommitted)
+	{
+		/*
+		 * We expect that every smgrscheduleunlink is followed by a catalog
+		 * update, and hence XID assignment, so we shouldn't get here with any
+		 * pending deletes.  Use a real test not just an Assert to check this,
+		 * since it's a bit fragile.
+		 */
+		if (nrels != 0)
+			elog(ERROR, "cannot commit a transaction that deleted files but has no xid");
+
+		/* Can't have child XIDs either; AssignTransactionId enforces this */
+		Assert(nchildren == 0);
+
+		/*
+		 * If we didn't create XLOG entries, we're done here; otherwise we
+		 * should flush those entries the same as a commit record.	(An
+		 * example of a possible record that wouldn't cause an XID to be
+		 * assigned is a sequence advance record due to nextval() --- we want
+		 * to flush that to disk before reporting commit.)
+		 */
+		if (XactLastRecEnd.xrecoff == 0)
+			goto cleanup;
+	}
+	else
 	{
-		TransactionId xid = GetCurrentTransactionId();
-		bool		madeTCentries;
-		XLogRecPtr	recptr;
+		/*
+		 * Begin commit critical section and insert the commit XLOG record.
+		 */
+		XLogRecData rdata[3];
+		int			lastrdata = 0;
+		xl_xact_commit xlrec;
 
 		/* Tell bufmgr and smgr to prepare for commit */
 		BufmgrCommit();
 
-		START_CRIT_SECTION();
-
 		/*
-		 * We only need to log the commit in XLOG if the transaction made
-		 * any transaction-controlled XLOG entries or will delete files.
-		 * (If it made no transaction-controlled XLOG entries, its XID
-		 * appears nowhere in permanent storage, so no one else will ever care
-		 * if it committed.)
+		 * Mark ourselves as within our "commit critical section".	This
+		 * forces any concurrent checkpoint to wait until we've updated
+		 * pg_clog.  Without this, it is possible for the checkpoint to set
+		 * REDO after the XLOG record but fail to flush the pg_clog update to
+		 * disk, leading to loss of the transaction commit if the system
+		 * crashes a little later.
+		 *
+		 * Note: we could, but don't bother to, set this flag in
+		 * RecordTransactionAbort.	That's because loss of a transaction abort
+		 * is noncritical; the presumption would be that it aborted, anyway.
+		 *
+		 * It's safe to change the inCommit flag of our own backend without
+		 * holding the ProcArrayLock, since we're the only one modifying it.
+		 * This makes checkpoint's determination of which xacts are inCommit a
+		 * bit fuzzy, but it doesn't matter.
 		 */
-		madeTCentries = (MyLastRecPtr.xrecoff != 0);
-		if (madeTCentries || nrels > 0)
+		START_CRIT_SECTION();
+		MyProc->inCommit = true;
+
+		SetCurrentTransactionStopTimestamp();
+		xlrec.xact_time = xactStopTimestamp;
+		xlrec.nrels = nrels;
+		xlrec.nsubxacts = nchildren;
+		rdata[0].data = (char *) (&xlrec);
+		rdata[0].len = MinSizeOfXactCommit;
+		rdata[0].buffer = InvalidBuffer;
+		/* dump rels to delete */
+		if (nrels > 0)
 		{
-			XLogRecData rdata[3];
-			int			lastrdata = 0;
-			xl_xact_commit xlrec;
-
-			xlrec.xtime = time(NULL);
-			xlrec.nrels = nrels;
-			xlrec.nsubxacts = nchildren;
-			rdata[0].buffer = InvalidBuffer;
-			rdata[0].data = (char *) (&xlrec);
-			rdata[0].len = MinSizeOfXactCommit;
-			/* dump rels to delete */
-			if (nrels > 0)
-			{
-				rdata[0].next = &(rdata[1]);
-				rdata[1].buffer = InvalidBuffer;
-				rdata[1].data = (char *) rptr;
-				rdata[1].len = nrels * sizeof(RelFileNode);
-				lastrdata = 1;
-			}
-			/* dump committed child Xids */
-			if (nchildren > 0)
-			{
-				rdata[lastrdata].next = &(rdata[2]);
-				rdata[2].buffer = InvalidBuffer;
-				rdata[2].data = (char *) children;
-				rdata[2].len = nchildren * sizeof(TransactionId);
-				lastrdata = 2;
-			}
-			rdata[lastrdata].next = NULL;
-
-			recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT, rdata);
+			rdata[0].next = &(rdata[1]);
+			rdata[1].data = (char *) rels;
+			rdata[1].len = nrels * sizeof(RelFileNode);
+			rdata[1].buffer = InvalidBuffer;
+			lastrdata = 1;
 		}
-		else
+		/* dump committed child Xids */
+		if (nchildren > 0)
 		{
-			/* Just flush through last record written by me */
-			recptr = ProcLastRecEnd;
+			rdata[lastrdata].next = &(rdata[2]);
+			rdata[2].data = (char *) children;
+			rdata[2].len = nchildren * sizeof(TransactionId);
+			rdata[2].buffer = InvalidBuffer;
+			lastrdata = 2;
 		}
+		rdata[lastrdata].next = NULL;
 
+		(void) XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT, rdata);
+	}
+
+	/*
+	 * Check if we want to commit asynchronously.  If the user has set
+	 * synchronous_commit = off, and we're not doing cleanup of any non-temp
+	 * rels nor committing any command that wanted to force sync commit, then
+	 * we can defer flushing XLOG.	(We must not allow asynchronous commit if
+	 * there are any non-temp tables to be deleted, because we might delete
+	 * the files before the COMMIT record is flushed to disk.  We do allow
+	 * asynchronous commit if all to-be-deleted tables are temporary though,
+	 * since they are lost anyway if we crash.)
+	 */
+	if (XactSyncCommit || forceSyncCommit || haveNonTemp)
+	{
 		/*
-		 * We must flush our XLOG entries to disk if we made any XLOG
-		 * entries, whether in or out of transaction control.  For
-		 * example, if we reported a nextval() result to the client, this
-		 * ensures that any XLOG record generated by nextval will hit the
-		 * disk before we report the transaction committed.
+		 * Synchronous commit case.
+		 *
+		 * Sleep before flush! So we can flush more than one commit records
+		 * per single fsync.  (The idea is some other backend may do the
+		 * XLogFlush while we're sleeping.  This needs work still, because on
+		 * most Unixen, the minimum select() delay is 10msec or more, which is
+		 * way too long.)
 		 *
-		 * Note: if we generated a commit record above, MyXactMadeXLogEntry
-		 * will certainly be set now.
+		 * We do not sleep if enableFsync is not turned on, nor if there are
+		 * fewer than CommitSiblings other backends with active transactions.
 		 */
-		if (MyXactMadeXLogEntry)
-		{
-			/*
-			 * Sleep before flush! So we can flush more than one commit
-			 * records per single fsync.  (The idea is some other backend
-			 * may do the XLogFlush while we're sleeping.  This needs work
-			 * still, because on most Unixen, the minimum select() delay
-			 * is 10msec or more, which is way too long.)
-			 *
-			 * We do not sleep if enableFsync is not turned on, nor if there
-			 * are fewer than CommitSiblings other backends with active
-			 * transactions.
-			 */
-			if (CommitDelay > 0 && enableFsync &&
-				CountActiveBackends() >= CommitSiblings)
-				pg_usleep(CommitDelay);
+		if (CommitDelay > 0 && enableFsync &&
+			CountActiveBackends() >= CommitSiblings)
+			pg_usleep(CommitDelay);
 
-			XLogFlush(recptr);
-		}
+		XLogFlush(XactLastRecEnd);
 
 		/*
-		 * We must mark the transaction committed in clog if its XID
-		 * appears either in permanent rels or in local temporary rels. We
-		 * test this by seeing if we made transaction-controlled entries
-		 * *OR* local-rel tuple updates.  Note that if we made only the
-		 * latter, we have not emitted an XLOG record for our commit, and
-		 * so in the event of a crash the clog update might be lost.  This
-		 * is okay because no one else will ever care whether we
-		 * committed.
+		 * Now we may update the CLOG, if we wrote a COMMIT record above
 		 */
-		if (madeTCentries || MyXactMadeTempRelUpdate)
+		if (markXidCommitted)
 		{
 			TransactionIdCommit(xid);
 			/* to avoid race conditions, the parent must commit first */
 			TransactionIdCommitTree(nchildren, children);
 		}
-
-		END_CRIT_SECTION();
 	}
+	else
+	{
+		/*
+		 * Asynchronous commit case.
+		 *
+		 * Report the latest async commit LSN, so that the WAL writer knows to
+		 * flush this commit.
+		 */
+		XLogSetAsyncCommitLSN(XactLastRecEnd);
 
-	/* Break the chain of back-links in the XLOG records I output */
-	MyLastRecPtr.xrecoff = 0;
-	MyXactMadeXLogEntry = false;
-	MyXactMadeTempRelUpdate = false;
+		/*
+		 * We must not immediately update the CLOG, since we didn't flush the
+		 * XLOG. Instead, we store the LSN up to which the XLOG must be
+		 * flushed before the CLOG may be updated.
+		 */
+		if (markXidCommitted)
+		{
+			TransactionIdAsyncCommit(xid, XactLastRecEnd);
+			/* to avoid race conditions, the parent must commit first */
+			TransactionIdAsyncCommitTree(nchildren, children, XactLastRecEnd);
+		}
+	}
 
-	/* Show myself as out of the transaction in PGPROC array */
-	MyProc->logRec.xrecoff = 0;
+	/*
+	 * If we entered a commit critical section, leave it now, and let
+	 * checkpoints proceed.
+	 */
+	if (markXidCommitted)
+	{
+		MyProc->inCommit = false;
+		END_CRIT_SECTION();
+	}
 
-	/* And clean up local data */
-	if (rptr)
-		pfree(rptr);
-	if (children)
-		pfree(children);
-}
+	/* Compute latestXid while we have the child XIDs handy */
+	latestXid = TransactionIdLatest(xid, nchildren, children);
 
+	/* Reset XactLastRecEnd until the next transaction writes something */
+	XactLastRecEnd.xrecoff = 0;
 
-/*
- *	AtCommit_Cache
- */
-static void
-AtCommit_Cache(void)
-{
-	/*
-	 * Clean up the relation cache.
-	 */
-	AtEOXact_RelationCache(true);
+cleanup:
+	/* Clean up local data */
+	if (rels)
+		pfree(rels);
 
-	/*
-	 * Make catalog changes visible to all backends.
-	 */
-	AtEOXact_Inval(true);
+	return latestXid;
 }
 
+
 /*
  *	AtCommit_LocalCache
  */
@@ -743,20 +1019,6 @@ AtCommit_LocalCache(void)
 	CommandEndInvalidationMessages();
 }
 
-/*
- *	AtCommit_Locks
- */
-static void
-AtCommit_Locks(void)
-{
-	/*
-	 * XXX What if ProcReleaseLocks fails?	(race condition?)
-	 *
-	 * Then you're up a creek! -mer 5/24/92
-	 */
-	ProcReleaseLocks(ReleaseAllExceptSession, 0, NULL);
-}
-
 /*
  *	AtCommit_Memory
  */
@@ -764,9 +1026,8 @@ static void
 AtCommit_Memory(void)
 {
 	/*
-	 * Now that we're "out" of a transaction, have the system allocate
-	 * things in the top memory context instead of per-transaction
-	 * contexts.
+	 * Now that we're "out" of a transaction, have the system allocate things
+	 * in the top memory context instead of per-transaction contexts.
 	 */
 	MemoryContextSwitchTo(TopMemoryContext);
 
@@ -787,9 +1048,6 @@ AtCommit_Memory(void)
 
 /*
  * AtSubCommit_Memory
- *
- * We do not throw away the child's CurTransactionContext, since the data
- * it contains will be needed at upper commit.
  */
 static void
 AtSubCommit_Memory(void)
@@ -801,6 +1059,18 @@ AtSubCommit_Memory(void)
 	/* Return to parent transaction level's memory context. */
 	CurTransactionContext = s->parent->curTransactionContext;
 	MemoryContextSwitchTo(CurTransactionContext);
+
+	/*
+	 * Ordinarily we cannot throw away the child's CurTransactionContext,
+	 * since the data it contains will be needed at upper commit.  However, if
+	 * there isn't actually anything in it, we can throw it away.  This avoids
+	 * a small memory leak in the common case of "trivial" subxacts.
+	 */
+	if (MemoryContextIsEmpty(s->curTransactionContext))
+	{
+		MemoryContextDelete(s->curTransactionContext);
+		s->curTransactionContext = NULL;
+	}
 }
 
 /*
@@ -812,19 +1082,79 @@ static void
 AtSubCommit_childXids(void)
 {
 	TransactionState s = CurrentTransactionState;
-	MemoryContext old_cxt;
+	int			new_nChildXids;
 
 	Assert(s->parent != NULL);
 
-	old_cxt = MemoryContextSwitchTo(s->parent->curTransactionContext);
+	/*
+	 * The parent childXids array will need to hold my XID and all my
+	 * childXids, in addition to the XIDs already there.
+	 */
+	new_nChildXids = s->parent->nChildXids + s->nChildXids + 1;
+
+	/* Allocate or enlarge the parent array if necessary */
+	if (s->parent->maxChildXids < new_nChildXids)
+	{
+		int				new_maxChildXids;
+		TransactionId  *new_childXids;
+
+		/*
+		 * Make it 2x what's needed right now, to avoid having to enlarge it
+		 * repeatedly. But we can't go above MaxAllocSize.  (The latter
+		 * limit is what ensures that we don't need to worry about integer
+		 * overflow here or in the calculation of new_nChildXids.)
+		 */
+		new_maxChildXids = Min(new_nChildXids * 2,
+							   (int) (MaxAllocSize / sizeof(TransactionId)));
+
+		if (new_maxChildXids < new_nChildXids)
+			ereport(ERROR,
+					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+					 errmsg("maximum number of committed subtransactions (%d) exceeded",
+							(int) (MaxAllocSize / sizeof(TransactionId)))));
 
-	s->parent->childXids = list_concat(s->parent->childXids, s->childXids);
-	s->childXids = NIL;			/* ensure list not doubly referenced */
+		/*
+		 * We keep the child-XID arrays in TopTransactionContext; this avoids
+		 * setting up child-transaction contexts for what might be just a few
+		 * bytes of grandchild XIDs.
+		 */
+		if (s->parent->childXids == NULL)
+			new_childXids =
+				MemoryContextAlloc(TopTransactionContext, 
+								   new_maxChildXids * sizeof(TransactionId));
+		else
+			new_childXids = repalloc(s->parent->childXids, 
+									 new_maxChildXids * sizeof(TransactionId));
 
-	s->parent->childXids = lappend_int(s->parent->childXids,
-									   s->transactionIdData);
+		s->parent->childXids  = new_childXids;
+		s->parent->maxChildXids = new_maxChildXids;
+	}
 
-	MemoryContextSwitchTo(old_cxt);
+	/*
+	 * Copy all my XIDs to parent's array.
+	 *
+	 * Note: We rely on the fact that the XID of a child always follows that
+	 * of its parent.  By copying the XID of this subtransaction before the
+	 * XIDs of its children, we ensure that the array stays ordered.  Likewise,
+	 * all XIDs already in the array belong to subtransactions started and
+	 * subcommitted before us, so their XIDs must precede ours.
+	 */
+	s->parent->childXids[s->parent->nChildXids] = s->transactionId;
+
+	if (s->nChildXids > 0)
+		memcpy(&s->parent->childXids[s->parent->nChildXids + 1],
+			   s->childXids,
+			   s->nChildXids * sizeof(TransactionId));
+
+	s->parent->nChildXids = new_nChildXids;
+
+	/* Release child's array to avoid leakage */
+	if (s->childXids != NULL)
+		pfree(s->childXids);
+	/* We must reset these to avoid double-free if fail later in commit */
+	s->childXids = NULL;
+	s->nChildXids = 0;
+	s->maxChildXids = 0;
 }
 
 /*
@@ -833,23 +1163,20 @@ AtSubCommit_childXids(void)
 static void
 RecordSubTransactionCommit(void)
 {
+	TransactionId xid = GetCurrentTransactionIdIfAny();
+
 	/*
-	 * We do not log the subcommit in XLOG; it doesn't matter until
-	 * the top-level transaction commits.
+	 * We do not log the subcommit in XLOG; it doesn't matter until the
+	 * top-level transaction commits.
 	 *
-	 * We must mark the subtransaction subcommitted in clog if its XID
-	 * appears either in permanent rels or in local temporary rels. We
-	 * test this by seeing if we made transaction-controlled entries
-	 * *OR* local-rel tuple updates.  (The test here actually covers the
-	 * entire transaction tree so far, so it may mark subtransactions that
-	 * don't really need it, but it's probably not worth being tenser.
-	 * Note that if a prior subtransaction dirtied these variables, then
-	 * RecordTransactionCommit will have to do the full pushup anyway...)
+	 * We must mark the subtransaction subcommitted in the CLOG if it had a
+	 * valid XID assigned.	If it did not, nobody else will ever know about
+	 * the existence of this subxact.  We don't have to deal with deletions
+	 * scheduled for on-commit here, since they'll be reassigned to our parent
+	 * (who might still abort).
 	 */
-	if (MyLastRecPtr.xrecoff != 0 || MyXactMadeTempRelUpdate)
+	if (TransactionIdIsValid(xid))
 	{
-		TransactionId	xid = GetCurrentTransactionId();
-
 		/* XXX does this really need to be a critical section? */
 		START_CRIT_SECTION();
 
@@ -867,142 +1194,134 @@ RecordSubTransactionCommit(void)
 
 /*
  *	RecordTransactionAbort
+ *
+ * Returns latest XID among xact and its children, or InvalidTransactionId
+ * if the xact has no XID.	(We compute that here just because it's easier.)
  */
-static void
-RecordTransactionAbort(void)
+static TransactionId
+RecordTransactionAbort(bool isSubXact)
 {
+	TransactionId xid = GetCurrentTransactionIdIfAny();
+	TransactionId latestXid;
 	int			nrels;
-	RelFileNode *rptr;
-	int 			nchildren;
-	TransactionId  *children;
-
-	/* Get data needed for abort record */
-	nrels = smgrGetPendingDeletes(false, &rptr);
-	nchildren = xactGetCommittedChildren(&children, false);
+	RelFileNode *rels;
+	int			nchildren;
+	TransactionId *children;
+	XLogRecData rdata[3];
+	int			lastrdata = 0;
+	xl_xact_abort xlrec;
 
 	/*
-	 * If we made neither any transaction-controlled XLOG entries nor any
-	 * temp-rel updates, and are not going to delete any files, we can omit
-	 * recording the transaction abort at all.  No one will ever care that
-	 * it aborted.  (These tests cover our whole transaction tree.)
+	 * If we haven't been assigned an XID, nobody will care whether we aborted
+	 * or not.	Hence, we're done in that case.  It does not matter if we have
+	 * rels to delete (note that this routine is not responsible for actually
+	 * deleting 'em).  We cannot have any child XIDs, either.
 	 */
-	if (MyLastRecPtr.xrecoff != 0 || MyXactMadeTempRelUpdate || nrels > 0)
+	if (!TransactionIdIsValid(xid))
 	{
-		TransactionId	xid = GetCurrentTransactionId();
-
-		/*
-		 * Catch the scenario where we aborted partway through
-		 * RecordTransactionCommit ...
-		 */
-		if (TransactionIdDidCommit(xid))
-			elog(PANIC, "cannot abort transaction %u, it was already committed", xid);
-
-		START_CRIT_SECTION();
+		/* Reset XactLastRecEnd until the next transaction writes something */
+		if (!isSubXact)
+			XactLastRecEnd.xrecoff = 0;
+		return InvalidTransactionId;
+	}
 
-		/*
-		 * We only need to log the abort in XLOG if the transaction made
-		 * any transaction-controlled XLOG entries or will delete files.
-		 * (If it made no transaction-controlled XLOG entries, its XID
-		 * appears nowhere in permanent storage, so no one else will ever care
-		 * if it committed.)
-		 *
-		 * We do not flush XLOG to disk unless deleting files, since the
-		 * default assumption after a crash would be that we aborted, anyway.
-		 */
-		if (MyLastRecPtr.xrecoff != 0 || nrels > 0)
-		{
-			XLogRecData rdata[3];
-			int			lastrdata = 0;
-			xl_xact_abort xlrec;
-			XLogRecPtr	recptr;
-
-			xlrec.xtime = time(NULL);
-			xlrec.nrels = nrels;
-			xlrec.nsubxacts = nchildren;
-			rdata[0].buffer = InvalidBuffer;
-			rdata[0].data = (char *) (&xlrec);
-			rdata[0].len = MinSizeOfXactAbort;
-			/* dump rels to delete */
-			if (nrels > 0)
-			{
-				rdata[0].next = &(rdata[1]);
-				rdata[1].buffer = InvalidBuffer;
-				rdata[1].data = (char *) rptr;
-				rdata[1].len = nrels * sizeof(RelFileNode);
-				lastrdata = 1;
-			}
-			/* dump committed child Xids */
-			if (nchildren > 0)
-			{
-				rdata[lastrdata].next = &(rdata[2]);
-				rdata[2].buffer = InvalidBuffer;
-				rdata[2].data = (char *) children;
-				rdata[2].len = nchildren * sizeof(TransactionId);
-				lastrdata = 2;
-			}
-			rdata[lastrdata].next = NULL;
+	/*
+	 * We have a valid XID, so we should write an ABORT record for it.
+	 *
+	 * We do not flush XLOG to disk here, since the default assumption after a
+	 * crash would be that we aborted, anyway.	For the same reason, we don't
+	 * need to worry about interlocking against checkpoint start.
+	 */
 
-			recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_ABORT, rdata);
+	/*
+	 * Check that we haven't aborted halfway through RecordTransactionCommit.
+	 */
+	if (TransactionIdDidCommit(xid))
+		elog(PANIC, "cannot abort transaction %u, it was already committed",
+			 xid);
 
-			/* Must flush if we are deleting files... */
-			if (nrels > 0)
-				XLogFlush(recptr);
-		}
+	/* Fetch the data we need for the abort record */
+	nrels = smgrGetPendingDeletes(false, &rels, NULL);
+	nchildren = xactGetCommittedChildren(&children);
 
-		/*
-		 * Mark the transaction aborted in clog.  This is not absolutely
-		 * necessary but we may as well do it while we are here.
-		 *
-		 * The ordering here isn't critical but it seems best to mark the
-		 * parent last.  That reduces the chance that concurrent
-		 * TransactionIdDidAbort calls will decide they need to do redundant
-		 * work.
-		 */
-		TransactionIdAbortTree(nchildren, children);
-		TransactionIdAbort(xid);
+	/* XXX do we really need a critical section here? */
+	START_CRIT_SECTION();
 
-		END_CRIT_SECTION();
+	/* Write the ABORT record */
+	if (isSubXact)
+		xlrec.xact_time = GetCurrentTimestamp();
+	else
+	{
+		SetCurrentTransactionStopTimestamp();
+		xlrec.xact_time = xactStopTimestamp;
+	}
+	xlrec.nrels = nrels;
+	xlrec.nsubxacts = nchildren;
+	rdata[0].data = (char *) (&xlrec);
+	rdata[0].len = MinSizeOfXactAbort;
+	rdata[0].buffer = InvalidBuffer;
+	/* dump rels to delete */
+	if (nrels > 0)
+	{
+		rdata[0].next = &(rdata[1]);
+		rdata[1].data = (char *) rels;
+		rdata[1].len = nrels * sizeof(RelFileNode);
+		rdata[1].buffer = InvalidBuffer;
+		lastrdata = 1;
+	}
+	/* dump committed child Xids */
+	if (nchildren > 0)
+	{
+		rdata[lastrdata].next = &(rdata[2]);
+		rdata[2].data = (char *) children;
+		rdata[2].len = nchildren * sizeof(TransactionId);
+		rdata[2].buffer = InvalidBuffer;
+		lastrdata = 2;
 	}
+	rdata[lastrdata].next = NULL;
 
-	/* Break the chain of back-links in the XLOG records I output */
-	MyLastRecPtr.xrecoff = 0;
-	MyXactMadeXLogEntry = false;
-	MyXactMadeTempRelUpdate = false;
+	(void) XLogInsert(RM_XACT_ID, XLOG_XACT_ABORT, rdata);
 
-	/* Show myself as out of the transaction in PGPROC array */
-	MyProc->logRec.xrecoff = 0;
+	/*
+	 * Mark the transaction aborted in clog.  This is not absolutely necessary
+	 * but we may as well do it while we are here; also, in the subxact case
+	 * it is helpful because XactLockTableWait makes use of it to avoid
+	 * waiting for already-aborted subtransactions.  It is OK to do it without
+	 * having flushed the ABORT record to disk, because in event of a crash
+	 * we'd be assumed to have aborted anyway.
+	 *
+	 * The ordering here isn't critical but it seems best to mark the parent
+	 * first.  This assures an atomic transition of all the subtransactions to
+	 * aborted state from the point of view of concurrent
+	 * TransactionIdDidAbort calls.
+	 */
+	TransactionIdAbort(xid);
+	TransactionIdAbortTree(nchildren, children);
 
-	/* And clean up local data */
-	if (rptr)
-		pfree(rptr);
-	if (children)
-		pfree(children);
-}
+	END_CRIT_SECTION();
 
-/*
- *	AtAbort_Cache
- */
-static void
-AtAbort_Cache(void)
-{
-	AtEOXact_RelationCache(false);
-	AtEOXact_Inval(false);
-}
+	/* Compute latestXid while we have the child XIDs handy */
+	latestXid = TransactionIdLatest(xid, nchildren, children);
 
-/*
- *	AtAbort_Locks
- */
-static void
-AtAbort_Locks(void)
-{
 	/*
-	 * XXX What if ProcReleaseLocks() fails?  (race condition?)
-	 *
-	 * Then you're up a creek without a paddle! -mer
+	 * If we're aborting a subtransaction, we can immediately remove failed
+	 * XIDs from PGPROC's cache of running child XIDs.  We do that here for
+	 * subxacts, because we already have the child XID array at hand.  For
+	 * main xacts, the equivalent happens just after this function returns.
 	 */
-	ProcReleaseLocks(ReleaseAll, 0, NULL);
-}
+	if (isSubXact)
+		XidCacheRemoveRunningXids(xid, nchildren, children, latestXid);
+
+	/* Reset XactLastRecEnd until the next transaction writes something */
+	if (!isSubXact)
+		XactLastRecEnd.xrecoff = 0;
+
+	/* And clean up local data */
+	if (rels)
+		pfree(rels);
 
+	return latestXid;
+}
 
 /*
  *	AtAbort_Memory
@@ -1011,144 +1330,75 @@ static void
 AtAbort_Memory(void)
 {
 	/*
-	 * Make sure we are in a valid context (not a child of
-	 * TopTransactionContext...).  Note that it is possible for this code
-	 * to be called when we aren't in a transaction at all; go directly to
-	 * TopMemoryContext in that case.
+	 * Switch into TransactionAbortContext, which should have some free space
+	 * even if nothing else does.  We'll work in this context until we've
+	 * finished cleaning up.
+	 *
+	 * It is barely possible to get here when we've not been able to create
+	 * TransactionAbortContext yet; if so use TopMemoryContext.
 	 */
-	if (TopTransactionContext != NULL)
-	{
-		MemoryContextSwitchTo(TopTransactionContext);
-
-		/*
-		 * We do not want to destroy the transaction's global state yet,
-		 * so we can't free any memory here.
-		 */
-	}
+	if (TransactionAbortContext != NULL)
+		MemoryContextSwitchTo(TransactionAbortContext);
 	else
 		MemoryContextSwitchTo(TopMemoryContext);
 }
 
 /*
- * AtSubAbort_Locks
+ * AtSubAbort_Memory
  */
 static void
-AtSubAbort_Locks(void)
+AtSubAbort_Memory(void)
 {
-	int nxids;
-	TransactionId *xids;
+	Assert(TransactionAbortContext != NULL);
 
-	nxids = xactGetCommittedChildren(&xids, true);
+	MemoryContextSwitchTo(TransactionAbortContext);
+}
 
-	ProcReleaseLocks(ReleaseGivenXids, nxids, xids);
 
-	pfree(xids);
+/*
+ *	AtAbort_ResourceOwner
+ */
+static void
+AtAbort_ResourceOwner(void)
+{
+	/*
+	 * Make sure we have a valid ResourceOwner, if possible (else it will be
+	 * NULL, which is OK)
+	 */
+	CurrentResourceOwner = TopTransactionResourceOwner;
 }
 
-
 /*
- * AtSubAbort_Memory
+ * AtSubAbort_ResourceOwner
  */
 static void
-AtSubAbort_Memory(void)
+AtSubAbort_ResourceOwner(void)
 {
-	Assert(TopTransactionContext != NULL);
+	TransactionState s = CurrentTransactionState;
 
-	MemoryContextSwitchTo(TopTransactionContext);
+	/* Make sure we have a valid ResourceOwner */
+	CurrentResourceOwner = s->curTransactionOwner;
 }
 
+
 /*
- * RecordSubTransactionAbort
+ * AtSubAbort_childXids
  */
 static void
-RecordSubTransactionAbort(void)
+AtSubAbort_childXids(void)
 {
-	int			nrels;
-	RelFileNode *rptr;
-	int 			nchildren;
-	TransactionId  *children;
-
-	/* Get data needed for abort record */
-	nrels = smgrGetPendingDeletes(false, &rptr);
-	nchildren = xactGetCommittedChildren(&children, false);
+	TransactionState s = CurrentTransactionState;
 
 	/*
-	 * If we made neither any transaction-controlled XLOG entries nor any
-	 * temp-rel updates, and are not going to delete any files, we can omit
-	 * recording the transaction abort at all.  No one will ever care that
-	 * it aborted.  (These tests cover our whole transaction tree, and
-	 * therefore may mark subxacts that don't really need it, but it's
-	 * probably not worth being tenser.)
-	 *
-	 * In this case we needn't worry about marking subcommitted children as
-	 * aborted, because they didn't mark themselves as subcommitted in the
-	 * first place; see the optimization in RecordSubTransactionCommit.
+	 * We keep the child-XID arrays in TopTransactionContext (see
+	 * AtSubCommit_childXids).	This means we'd better free the array
+	 * explicitly at abort to avoid leakage.
 	 */
-	if (MyLastRecPtr.xrecoff != 0 || MyXactMadeTempRelUpdate || nrels > 0)
-	{
-		TransactionId	xid = GetCurrentTransactionId();
-
-		START_CRIT_SECTION();
-
-		/*
-		 * We only need to log the abort in XLOG if the transaction made
-		 * any transaction-controlled XLOG entries or will delete files.
-		 */
-		if (MyLastRecPtr.xrecoff != 0 || nrels > 0)
-		{
-			XLogRecData rdata[3];
-			int lastrdata = 0;
-			xl_xact_abort xlrec;
-			XLogRecPtr      recptr;
-
-			xlrec.xtime = time(NULL);
-			xlrec.nrels = nrels;
-			xlrec.nsubxacts = nchildren;
-			rdata[0].buffer = InvalidBuffer;
-			rdata[0].data = (char *) (&xlrec);
-			rdata[0].len = MinSizeOfXactAbort;
-			/* dump rels to delete */
-			if (nrels > 0)
-			{
-				rdata[0].next = &(rdata[1]);
-				rdata[1].buffer = InvalidBuffer;
-				rdata[1].data = (char *) rptr;
-				rdata[1].len = nrels * sizeof(RelFileNode);
-				lastrdata = 1;
-			}
-			/* dump committed child Xids */
-			if (nchildren > 0)
-			{
-				rdata[lastrdata].next = &(rdata[2]);
-				rdata[2].buffer = InvalidBuffer;
-				rdata[2].data = (char *) children;
-				rdata[2].len = nchildren * sizeof(TransactionId);
-				lastrdata = 2;
-			}
-			rdata[lastrdata].next = NULL;
-
-			recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_ABORT, rdata);
-
-			/* Must flush if we are deleting files... */
-			if (nrels > 0)
-				XLogFlush(recptr);
-		}
-
-		/*
-		 * Mark the transaction aborted in clog.  This is not absolutely
-		 * necessary but we may as well do it while we are here.
-		 */
-		TransactionIdAbortTree(nchildren, children);
-		TransactionIdAbort(xid);
-
-		END_CRIT_SECTION();
-	}
-
-	/* And clean up local data */
-	if (rptr)
-		pfree(rptr);
-	if (children)
-		pfree(children);
+	if (s->childXids != NULL)
+		pfree(s->childXids);
+	s->childXids = NULL;
+	s->nChildXids = 0;
+	s->maxChildXids = 0;
 }
 
 /* ----------------------------------------------------------------
@@ -1162,14 +1412,19 @@ RecordSubTransactionAbort(void)
 static void
 AtCleanup_Memory(void)
 {
+	Assert(CurrentTransactionState->parent == NULL);
+
 	/*
-	 * Now that we're "out" of a transaction, have the system allocate
-	 * things in the top memory context instead of per-transaction
-	 * contexts.
+	 * Now that we're "out" of a transaction, have the system allocate things
+	 * in the top memory context instead of per-transaction contexts.
 	 */
 	MemoryContextSwitchTo(TopMemoryContext);
 
-	Assert(CurrentTransactionState->parent == NULL);
+	/*
+	 * Clear the special abort context for next time.
+	 */
+	if (TransactionAbortContext != NULL)
+		MemoryContextResetAndDeleteChildren(TransactionAbortContext);
 
 	/*
 	 * Release all transaction-local memory.
@@ -1202,11 +1457,19 @@ AtSubCleanup_Memory(void)
 	CurTransactionContext = s->parent->curTransactionContext;
 
 	/*
-	 * Delete the subxact local memory contexts. Its CurTransactionContext
-	 * can go too (note this also kills CurTransactionContexts from any
-	 * children of the subxact).
+	 * Clear the special abort context for next time.
+	 */
+	if (TransactionAbortContext != NULL)
+		MemoryContextResetAndDeleteChildren(TransactionAbortContext);
+
+	/*
+	 * Delete the subxact local memory contexts. Its CurTransactionContext can
+	 * go too (note this also kills CurTransactionContexts from any children
+	 * of the subxact).
 	 */
-	MemoryContextDelete(s->curTransactionContext);
+	if (s->curTransactionContext)
+		MemoryContextDelete(s->curTransactionContext);
+	s->curTransactionContext = NULL;
 }
 
 /* ----------------------------------------------------------------
@@ -1220,19 +1483,28 @@ AtSubCleanup_Memory(void)
 static void
 StartTransaction(void)
 {
-	TransactionState s = CurrentTransactionState;
+	TransactionState s;
+	VirtualTransactionId vxid;
+
+	/*
+	 * Let's just make sure the state stack is empty
+	 */
+	s = &TopTransactionStateData;
+	CurrentTransactionState = s;
 
 	/*
 	 * check the current transaction state
 	 */
 	if (s->state != TRANS_DEFAULT)
-		elog(WARNING, "StartTransaction and not in default state");
+		elog(WARNING, "StartTransaction while in %s state",
+			 TransStateAsString(s->state));
 
 	/*
 	 * set the current transaction state information appropriately during
 	 * start processing
 	 */
 	s->state = TRANS_START;
+	s->transactionId = InvalidTransactionId;	/* until assigned */
 
 	/*
 	 * Make sure we've freed any old snapshot, and reset xact state variables
@@ -1240,45 +1512,75 @@ StartTransaction(void)
 	FreeXactSnapshot();
 	XactIsoLevel = DefaultXactIsoLevel;
 	XactReadOnly = DefaultXactReadOnly;
+	forceSyncCommit = false;
+	MyXactAccessedTempRel = false;
 
 	/*
-	 * generate a new transaction id
+	 * reinitialize within-transaction counters
 	 */
-	s->transactionIdData = GetNewTransactionId(false);
-
-	XactLockTableInsert(s->transactionIdData);
+	s->subTransactionId = TopSubTransactionId;
+	currentSubTransactionId = TopSubTransactionId;
+	currentCommandId = FirstCommandId;
+	currentCommandIdUsed = false;
 
 	/*
-	 * set now()
+	 * must initialize resource-management stuff first
 	 */
-	xactStartTime = GetCurrentAbsoluteTimeUsec(&(xactStartTimeUsec));
+	AtStart_Memory();
+	AtStart_ResourceOwner();
 
 	/*
-	 * initialize current transaction state fields
+	 * Assign a new LocalTransactionId, and combine it with the backendId to
+	 * form a virtual transaction id.
 	 */
-	s->commandId = FirstCommandId;
-	s->nestingLevel = 1;
-	s->childXids = NIL;
+	vxid.backendId = MyBackendId;
+	vxid.localTransactionId = GetNextLocalTransactionId();
 
 	/*
-	 * You might expect to see "s->currentUser = GetUserId();" here, but
-	 * you won't because it doesn't work during startup; the userid isn't
-	 * set yet during a backend's first transaction start.  We only use
-	 * the currentUser field in sub-transaction state structs.
+	 * Lock the virtual transaction id before we announce it in the proc array
 	 */
+	VirtualXactLockTableInsert(vxid);
 
 	/*
-	 * initialize the various transaction subsystems
+	 * Advertise it in the proc array.	We assume assignment of
+	 * LocalTransactionID is atomic, and the backendId should be set already.
 	 */
-	AtStart_Memory();
-	AtStart_Inval();
-	AtStart_Cache();
-	AtStart_Locks();
+	Assert(MyProc->backendId == vxid.backendId);
+	MyProc->lxid = vxid.localTransactionId;
+
+	TRACE_POSTGRESQL_TRANSACTION_START(vxid.localTransactionId);
+
+	/*
+	 * set transaction_timestamp() (a/k/a now()).  We want this to be the same
+	 * as the first command's statement_timestamp(), so don't do a fresh
+	 * GetCurrentTimestamp() call (which'd be expensive anyway).  Also, mark
+	 * xactStopTimestamp as unset.
+	 */
+	xactStartTimestamp = stmtStartTimestamp;
+	xactStopTimestamp = 0;
+	pgstat_report_xact_timestamp(xactStartTimestamp);
+
+	/*
+	 * initialize current transaction state fields
+	 *
+	 * note: prevXactReadOnly is not used at the outermost level
+	 */
+	s->nestingLevel = 1;
+	s->gucNestLevel = 1;
+	s->childXids = NULL;
+	s->nChildXids = 0;
+	s->maxChildXids = 0;
+	GetUserIdAndContext(&s->prevUser, &s->prevSecDefCxt);
+	/* SecurityDefinerContext should never be set outside a transaction */
+	Assert(!s->prevSecDefCxt);
 
 	/*
-	 * Tell the trigger manager we're starting a transaction
+	 * initialize other subsystems for new transaction
 	 */
-	DeferredTriggerBeginXact();
+	AtStart_GUC();
+	AtStart_Inval();
+	AtStart_Cache();
+	AfterTriggerBeginXact();
 
 	/*
 	 * done with start processing, set current transaction state to "in
@@ -1289,13 +1591,17 @@ StartTransaction(void)
 	ShowTransactionState("StartTransaction");
 }
 
+
 /*
  *	CommitTransaction
+ *
+ * NB: if you change this routine, better look at PrepareTransaction too!
  */
 static void
 CommitTransaction(void)
 {
 	TransactionState s = CurrentTransactionState;
+	TransactionId latestXid;
 
 	ShowTransactionState("CommitTransaction");
 
@@ -1303,118 +1609,407 @@ CommitTransaction(void)
 	 * check the current transaction state
 	 */
 	if (s->state != TRANS_INPROGRESS)
-		elog(WARNING, "CommitTransaction and not in in-progress state");
+		elog(WARNING, "CommitTransaction while in %s state",
+			 TransStateAsString(s->state));
 	Assert(s->parent == NULL);
 
 	/*
-	 * Tell the trigger manager that this transaction is about to be
-	 * committed. He'll invoke all trigger deferred until XACT before we
-	 * really start on committing the transaction.
+	 * Do pre-commit processing (most of this stuff requires database access,
+	 * and in fact could still cause an error...)
+	 *
+	 * It is possible for CommitHoldablePortals to invoke functions that queue
+	 * deferred triggers, and it's also possible that triggers create holdable
+	 * cursors.  So we have to loop until there's nothing left to do.
 	 */
-	DeferredTriggerEndXact();
+	for (;;)
+	{
+		/*
+		 * Fire all currently pending deferred triggers.
+		 */
+		AfterTriggerFireDeferred();
+
+		/*
+		 * Convert any open holdable cursors into static portals.  If there
+		 * weren't any, we are done ... otherwise loop back to check if they
+		 * queued deferred triggers.  Lather, rinse, repeat.
+		 */
+		if (!CommitHoldablePortals())
+			break;
+	}
+
+	/* Now we can shut down the deferred-trigger manager */
+	AfterTriggerEndXact(true);
+
+	/* Close any open regular cursors */
+	AtCommit_Portals();
 
 	/*
-	 * Similarly, let ON COMMIT management do its thing before we start to
-	 * commit.
+	 * Let ON COMMIT management do its thing (must happen after closing
+	 * cursors, to avoid dangling-reference problems)
 	 */
 	PreCommit_on_commit_actions();
 
+	/* close large objects before lower-level cleanup */
+	AtEOXact_LargeObject(true);
+
+	/* NOTIFY commit must come before lower-level cleanup */
+	AtCommit_Notify();
+
+	/*
+	 * Update flat files if we changed pg_database, pg_authid or
+	 * pg_auth_members.  This should be the last step before commit.
+	 */
+	AtEOXact_UpdateFlatFiles(true);
+
 	/* Prevent cancel/die interrupt while cleaning up */
 	HOLD_INTERRUPTS();
 
 	/*
 	 * set the current transaction state information appropriately during
-	 * the abort processing
+	 * commit processing
 	 */
 	s->state = TRANS_COMMIT;
 
 	/*
-	 * Do pre-commit processing (most of this stuff requires database
-	 * access, and in fact could still cause an error...)
+	 * Here is where we really truly commit.
 	 */
+	latestXid = RecordTransactionCommit();
 
-	AtCommit_Portals();
+	TRACE_POSTGRESQL_TRANSACTION_COMMIT(MyProc->lxid);
 
-	/* handle commit for large objects [ PA, 7/17/98 ] */
-	/* XXX probably this does not belong here */
-	lo_commit(true);
+	/*
+	 * Let others know about no transaction in progress by me. Note that this
+	 * must be done _before_ releasing locks we hold and _after_
+	 * RecordTransactionCommit.
+	 */
+	ProcArrayEndTransaction(MyProc, latestXid);
 
-	/* NOTIFY commit must come before lower-level cleanup */
-	AtCommit_Notify();
+	/*
+	 * This is all post-commit cleanup.  Note that if an error is raised here,
+	 * it's too late to abort the transaction.  This should be just
+	 * noncritical resource releasing.
+	 *
+	 * The ordering of operations is not entirely random.  The idea is:
+	 * release resources visible to other backends (eg, files, buffer pins);
+	 * then release locks; then release backend-local resources. We want to
+	 * release locks at the point where any backend waiting for us will see
+	 * our transaction as being fully cleaned up.
+	 *
+	 * Resources that can be associated with individual queries are handled by
+	 * the ResourceOwner mechanism.  The other calls here are for backend-wide
+	 * state.
+	 */
 
-	/* Update the flat password file if we changed pg_shadow or pg_group */
-	AtEOXact_UpdatePasswordFile(true);
+	CallXactCallbacks(XACT_EVENT_COMMIT);
+
+	ResourceOwnerRelease(TopTransactionResourceOwner,
+						 RESOURCE_RELEASE_BEFORE_LOCKS,
+						 true, true);
+
+	/* Check we've released all buffer pins */
+	AtEOXact_Buffers(true);
+
+	/* Clean up the relation cache */
+	AtEOXact_RelationCache(true);
 
 	/*
-	 * Here is where we really truly commit.
+	 * Make catalog changes visible to all backends.  This has to happen after
+	 * relcache references are dropped (see comments for
+	 * AtEOXact_RelationCache), but before locks are released (if anyone is
+	 * waiting for lock on a relation we've modified, we want them to know
+	 * about the catalog change before they start using the relation).
 	 */
-	RecordTransactionCommit();
+	AtEOXact_Inval(true);
 
 	/*
-	 * Let others know about no transaction in progress by me. Note that
-	 * this must be done _before_ releasing locks we hold and _after_
-	 * RecordTransactionCommit.
+	 * Likewise, dropping of files deleted during the transaction is best done
+	 * after releasing relcache and buffer pins.  (This is not strictly
+	 * necessary during commit, since such pins should have been released
+	 * already, but this ordering is definitely critical during abort.)
+	 */
+	smgrDoPendingDeletes(true);
+
+	AtEOXact_MultiXact();
+
+	ResourceOwnerRelease(TopTransactionResourceOwner,
+						 RESOURCE_RELEASE_LOCKS,
+						 true, true);
+	ResourceOwnerRelease(TopTransactionResourceOwner,
+						 RESOURCE_RELEASE_AFTER_LOCKS,
+						 true, true);
+
+	/* Check we've released all catcache entries */
+	AtEOXact_CatCache(true);
+
+	AtEOXact_GUC(true, 1);
+	AtEOXact_SPI(true);
+	AtEOXact_xml();
+	AtEOXact_on_commit_actions(true);
+	AtEOXact_Namespace(true);
+	/* smgrcommit already done */
+	AtEOXact_Files();
+	AtEOXact_ComboCid();
+	AtEOXact_HashTables(true);
+	AtEOXact_PgStat(true);
+	pgstat_report_xact_timestamp(0);
+
+	CurrentResourceOwner = NULL;
+	ResourceOwnerDelete(TopTransactionResourceOwner);
+	s->curTransactionOwner = NULL;
+	CurTransactionResourceOwner = NULL;
+	TopTransactionResourceOwner = NULL;
+
+	AtCommit_Memory();
+
+	s->transactionId = InvalidTransactionId;
+	s->subTransactionId = InvalidSubTransactionId;
+	s->nestingLevel = 0;
+	s->gucNestLevel = 0;
+	s->childXids = NULL;
+	s->nChildXids = 0;
+	s->maxChildXids = 0;
+
+	/*
+	 * done with commit processing, set current transaction state back to
+	 * default
+	 */
+	s->state = TRANS_DEFAULT;
+
+	RESUME_INTERRUPTS();
+}
+
+
+/*
+ *	PrepareTransaction
+ *
+ * NB: if you change this routine, better look at CommitTransaction too!
+ */
+static void
+PrepareTransaction(void)
+{
+	TransactionState s = CurrentTransactionState;
+	TransactionId xid = GetCurrentTransactionId();
+	GlobalTransaction gxact;
+	TimestampTz prepared_at;
+
+	ShowTransactionState("PrepareTransaction");
+
+	/*
+	 * check the current transaction state
+	 */
+	if (s->state != TRANS_INPROGRESS)
+		elog(WARNING, "PrepareTransaction while in %s state",
+			 TransStateAsString(s->state));
+	Assert(s->parent == NULL);
+
+	/*
+	 * Do pre-commit processing (most of this stuff requires database access,
+	 * and in fact could still cause an error...)
 	 *
-	 * LWLockAcquire(SInvalLock) is required: UPDATE with xid 0 is blocked by
-	 * xid 1' UPDATE, xid 1 is doing commit while xid 2 gets snapshot - if
-	 * xid 2' GetSnapshotData sees xid 1 as running then it must see xid 0
-	 * as running as well or it will see two tuple versions - one deleted
-	 * by xid 1 and one inserted by xid 0.	See notes in GetSnapshotData.
+	 * It is possible for PrepareHoldablePortals to invoke functions that
+	 * queue deferred triggers, and it's also possible that triggers create
+	 * holdable cursors.  So we have to loop until there's nothing left to do.
 	 */
-	if (MyProc != NULL)
+	for (;;)
 	{
-		/* Lock SInvalLock because that's what GetSnapshotData uses. */
-		LWLockAcquire(SInvalLock, LW_EXCLUSIVE);
-		MyProc->xid = InvalidTransactionId;
-		MyProc->xmin = InvalidTransactionId;
-		LWLockRelease(SInvalLock);
+		/*
+		 * Fire all currently pending deferred triggers.
+		 */
+		AfterTriggerFireDeferred();
+
+		/*
+		 * Convert any open holdable cursors into static portals.  If there
+		 * weren't any, we are done ... otherwise loop back to check if they
+		 * queued deferred triggers.  Lather, rinse, repeat.
+		 */
+		if (!PrepareHoldablePortals())
+			break;
 	}
 
+	/* Now we can shut down the deferred-trigger manager */
+	AfterTriggerEndXact(true);
+
+	/* Close any open regular cursors */
+	AtCommit_Portals();
+
 	/*
-	 * This is all post-commit cleanup.  Note that if an error is raised
-	 * here, it's too late to abort the transaction.  This should be just
-	 * noncritical resource releasing.
+	 * Let ON COMMIT management do its thing (must happen after closing
+	 * cursors, to avoid dangling-reference problems)
+	 */
+	PreCommit_on_commit_actions();
+
+	/* close large objects before lower-level cleanup */
+	AtEOXact_LargeObject(true);
+
+	/* NOTIFY and flatfiles will be handled below */
+
+	/*
+	 * Don't allow PREPARE TRANSACTION if we've accessed a temporary table
+	 * in this transaction.  Having the prepared xact hold locks on another
+	 * backend's temp table seems a bad idea --- for instance it would prevent
+	 * the backend from exiting.  There are other problems too, such as how
+	 * to clean up the source backend's local buffers and ON COMMIT state
+	 * if the prepared xact includes a DROP of a temp table.
 	 *
-	 * The ordering of operations is not entirely random.  The idea is:
-	 * release resources visible to other backends (eg, files, buffer
-	 * pins); then release locks; then release backend-local resources. We
-	 * want to release locks at the point where any backend waiting for us
-	 * will see our transaction as being fully cleaned up.
+	 * We must check this after executing any ON COMMIT actions, because
+	 * they might still access a temp relation.
+	 *
+	 * XXX In principle this could be relaxed to allow some useful special
+	 * cases, such as a temp table created and dropped all within the
+	 * transaction.  That seems to require much more bookkeeping though.
 	 */
+	if (MyXactAccessedTempRel)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot PREPARE a transaction that has operated on temporary tables")));
 
-	smgrDoPendingDeletes(true);
-	AtCommit_Cache();
+	/* Prevent cancel/die interrupt while cleaning up */
+	HOLD_INTERRUPTS();
+
+	/*
+	 * set the current transaction state information appropriately during
+	 * prepare processing
+	 */
+	s->state = TRANS_PREPARE;
+
+	prepared_at = GetCurrentTimestamp();
+
+	/* Tell bufmgr and smgr to prepare for commit */
+	BufmgrCommit();
+
+	/*
+	 * Reserve the GID for this transaction. This could fail if the requested
+	 * GID is invalid or already in use.
+	 */
+	gxact = MarkAsPreparing(xid, prepareGID, prepared_at,
+							GetUserId(), MyDatabaseId);
+	prepareGID = NULL;
+
+	/*
+	 * Collect data for the 2PC state file.  Note that in general, no actual
+	 * state change should happen in the called modules during this step,
+	 * since it's still possible to fail before commit, and in that case we
+	 * want transaction abort to be able to clean up.  (In particular, the
+	 * AtPrepare routines may error out if they find cases they cannot
+	 * handle.)  State cleanup should happen in the PostPrepare routines
+	 * below.  However, some modules can go ahead and clear state here because
+	 * they wouldn't do anything with it during abort anyway.
+	 *
+	 * Note: because the 2PC state file records will be replayed in the same
+	 * order they are made, the order of these calls has to match the order in
+	 * which we want things to happen during COMMIT PREPARED or ROLLBACK
+	 * PREPARED; in particular, pay attention to whether things should happen
+	 * before or after releasing the transaction's locks.
+	 */
+	StartPrepare(gxact);
+
+	AtPrepare_Notify();
+	AtPrepare_UpdateFlatFiles();
+	AtPrepare_Inval();
+	AtPrepare_Locks();
+	AtPrepare_PgStat();
+
+	/*
+	 * Here is where we really truly prepare.
+	 *
+	 * We have to record transaction prepares even if we didn't make any
+	 * updates, because the transaction manager might get confused if we lose
+	 * a global transaction.
+	 */
+	EndPrepare(gxact);
+
+	/*
+	 * Now we clean up backend-internal state and release internal resources.
+	 */
+
+	/* Reset XactLastRecEnd until the next transaction writes something */
+	XactLastRecEnd.xrecoff = 0;
+
+	/*
+	 * Let others know about no transaction in progress by me.	This has to be
+	 * done *after* the prepared transaction has been marked valid, else
+	 * someone may think it is unlocked and recyclable.
+	 */
+	ProcArrayClearTransaction(MyProc);
+
+	/*
+	 * This is all post-transaction cleanup.  Note that if an error is raised
+	 * here, it's too late to abort the transaction.  This should be just
+	 * noncritical resource releasing.	See notes in CommitTransaction.
+	 */
+
+	CallXactCallbacks(XACT_EVENT_PREPARE);
+
+	ResourceOwnerRelease(TopTransactionResourceOwner,
+						 RESOURCE_RELEASE_BEFORE_LOCKS,
+						 true, true);
+
+	/* Check we've released all buffer pins */
 	AtEOXact_Buffers(true);
-	/* smgrcommit already done */
 
-	AtCommit_Locks();
+	/* Clean up the relation cache */
+	AtEOXact_RelationCache(true);
+
+	/* notify and flatfiles don't need a postprepare call */
+
+	PostPrepare_PgStat();
+
+	PostPrepare_Inval();
+
+	PostPrepare_smgr();
+
+	AtEOXact_MultiXact();
 
-	CallEOXactCallbacks(true);
-	AtEOXact_GUC(true, false);
+	PostPrepare_Locks(xid);
+
+	ResourceOwnerRelease(TopTransactionResourceOwner,
+						 RESOURCE_RELEASE_LOCKS,
+						 true, true);
+	ResourceOwnerRelease(TopTransactionResourceOwner,
+						 RESOURCE_RELEASE_AFTER_LOCKS,
+						 true, true);
+
+	/* Check we've released all catcache entries */
+	AtEOXact_CatCache(true);
+
+	/* PREPARE acts the same as COMMIT as far as GUC is concerned */
+	AtEOXact_GUC(true, 1);
 	AtEOXact_SPI(true);
-	AtEOXact_gist();
-	AtEOXact_hash();
-	AtEOXact_nbtree();
-	AtEOXact_rtree();
-	AtEOXact_on_commit_actions(true, s->transactionIdData);
+	AtEOXact_xml();
+	AtEOXact_on_commit_actions(true);
 	AtEOXact_Namespace(true);
-	AtEOXact_CatCache(true);
+	/* smgrcommit already done */
 	AtEOXact_Files();
-	pgstat_count_xact_commit();
+	AtEOXact_ComboCid();
+	AtEOXact_HashTables(true);
+	/* don't call AtEOXact_PgStat here */
+
+	CurrentResourceOwner = NULL;
+	ResourceOwnerDelete(TopTransactionResourceOwner);
+	s->curTransactionOwner = NULL;
+	CurTransactionResourceOwner = NULL;
+	TopTransactionResourceOwner = NULL;
+
 	AtCommit_Memory();
 
+	s->transactionId = InvalidTransactionId;
+	s->subTransactionId = InvalidSubTransactionId;
 	s->nestingLevel = 0;
-	s->childXids = NIL;
+	s->gucNestLevel = 0;
+	s->childXids = NULL;
+	s->nChildXids = 0;
+	s->maxChildXids = 0;
 
 	/*
-	 * done with commit processing, set current transaction state back to
-	 * default
+	 * done with 1st phase commit processing, set current transaction state
+	 * back to default
 	 */
 	s->state = TRANS_DEFAULT;
 
 	RESUME_INTERRUPTS();
 }
 
+
 /*
  *	AbortTransaction
  */
@@ -1422,15 +2017,20 @@ static void
 AbortTransaction(void)
 {
 	TransactionState s = CurrentTransactionState;
+	TransactionId latestXid;
 
 	/* Prevent cancel/die interrupt while cleaning up */
 	HOLD_INTERRUPTS();
 
+	/* Make sure we have a valid memory context and resource owner */
+	AtAbort_Memory();
+	AtAbort_ResourceOwner();
+
 	/*
 	 * Release any LW locks we might be holding as quickly as possible.
 	 * (Regular locks, however, must be held till we finish aborting.)
-	 * Releasing LW locks is critical since we might try to grab them
-	 * again while cleaning up!
+	 * Releasing LW locks is critical since we might try to grab them again
+	 * while cleaning up!
 	 */
 	LWLockReleaseAll();
 
@@ -1439,90 +2039,95 @@ AbortTransaction(void)
 	UnlockBuffers();
 
 	/*
-	 * Also clean up any open wait for lock, since the lock manager will
-	 * choke if we try to wait for another lock before doing this.
+	 * Also clean up any open wait for lock, since the lock manager will choke
+	 * if we try to wait for another lock before doing this.
 	 */
 	LockWaitCancel();
 
 	/*
 	 * check the current transaction state
 	 */
-	if (s->state != TRANS_INPROGRESS)
-		elog(WARNING, "AbortTransaction and not in in-progress state");
+	if (s->state != TRANS_INPROGRESS && s->state != TRANS_PREPARE)
+		elog(WARNING, "AbortTransaction while in %s state",
+			 TransStateAsString(s->state));
 	Assert(s->parent == NULL);
 
 	/*
-	 * set the current transaction state information appropriately during
-	 * the abort processing
+	 * set the current transaction state information appropriately during the
+	 * abort processing
 	 */
 	s->state = TRANS_ABORT;
 
-	/* Make sure we are in a valid memory context */
-	AtAbort_Memory();
-
 	/*
-	 * Reset user id which might have been changed transiently.  We cannot
-	 * use s->currentUser, but must get the session userid from miscinit.c.
+	 * Reset user ID which might have been changed transiently.  We need this
+	 * to clean up in case control escaped out of a SECURITY DEFINER function
+	 * or other local change of CurrentUserId; therefore, the prior value
+	 * of SecurityDefinerContext also needs to be restored.
 	 *
-	 * (Note: it is not necessary to restore session authorization here
-	 * because that can only be changed via GUC, and GUC will take care of
-	 * rolling it back if need be.  However, an error within a SECURITY
-	 * DEFINER function could send control here with the wrong current
-	 * userid.)
+	 * (Note: it is not necessary to restore session authorization or role
+	 * settings here because those can only be changed via GUC, and GUC will
+	 * take care of rolling them back if need be.)
 	 */
-	SetUserId(GetSessionUserId());
+	SetUserIdAndContext(s->prevUser, s->prevSecDefCxt);
 
 	/*
 	 * do abort processing
 	 */
-	DeferredTriggerAbortXact();
+	AfterTriggerEndXact(false);
 	AtAbort_Portals();
-	lo_commit(false);			/* 'false' means it's abort */
+	AtEOXact_LargeObject(false);	/* 'false' means it's abort */
 	AtAbort_Notify();
-	AtEOXact_UpdatePasswordFile(false);
+	AtEOXact_UpdateFlatFiles(false);
+
+	/*
+	 * Advertise the fact that we aborted in pg_clog (assuming that we got as
+	 * far as assigning an XID to advertise).
+	 */
+	latestXid = RecordTransactionAbort(false);
 
-	/* Advertise the fact that we aborted in pg_clog. */
-	RecordTransactionAbort();
+	TRACE_POSTGRESQL_TRANSACTION_ABORT(MyProc->lxid);
 
 	/*
-	 * Let others know about no transaction in progress by me. Note that
-	 * this must be done _before_ releasing locks we hold and _after_
+	 * Let others know about no transaction in progress by me. Note that this
+	 * must be done _before_ releasing locks we hold and _after_
 	 * RecordTransactionAbort.
 	 */
-	if (MyProc != NULL)
-	{
-		/* Lock SInvalLock because that's what GetSnapshotData uses. */
-		LWLockAcquire(SInvalLock, LW_EXCLUSIVE);
-		MyProc->xid = InvalidTransactionId;
-		MyProc->xmin = InvalidTransactionId;
-		LWLockRelease(SInvalLock);
-	}
+	ProcArrayEndTransaction(MyProc, latestXid);
 
 	/*
 	 * Post-abort cleanup.	See notes in CommitTransaction() concerning
 	 * ordering.
 	 */
 
-	smgrDoPendingDeletes(false);
-	AtAbort_Cache();
-	AtEOXact_Buffers(false);
-	smgrabort();
+	CallXactCallbacks(XACT_EVENT_ABORT);
 
-	AtAbort_Locks();
+	ResourceOwnerRelease(TopTransactionResourceOwner,
+						 RESOURCE_RELEASE_BEFORE_LOCKS,
+						 false, true);
+	AtEOXact_Buffers(false);
+	AtEOXact_RelationCache(false);
+	AtEOXact_Inval(false);
+	smgrDoPendingDeletes(false);
+	AtEOXact_MultiXact();
+	ResourceOwnerRelease(TopTransactionResourceOwner,
+						 RESOURCE_RELEASE_LOCKS,
+						 false, true);
+	ResourceOwnerRelease(TopTransactionResourceOwner,
+						 RESOURCE_RELEASE_AFTER_LOCKS,
+						 false, true);
+	AtEOXact_CatCache(false);
 
-	CallEOXactCallbacks(false);
-	AtEOXact_GUC(false, false);
+	AtEOXact_GUC(false, 1);
 	AtEOXact_SPI(false);
-	AtEOXact_gist();
-	AtEOXact_hash();
-	AtEOXact_nbtree();
-	AtEOXact_rtree();
-	AtEOXact_on_commit_actions(false, s->transactionIdData);
+	AtEOXact_xml();
+	AtEOXact_on_commit_actions(false);
 	AtEOXact_Namespace(false);
-	AtEOXact_CatCache(false);
+	smgrabort();
 	AtEOXact_Files();
-	SetReindexProcessing(InvalidOid, InvalidOid);
-	pgstat_count_xact_rollback();
+	AtEOXact_ComboCid();
+	AtEOXact_HashTables(false);
+	AtEOXact_PgStat(false);
+	pgstat_report_xact_timestamp(0);
 
 	/*
 	 * State remains TRANS_ABORT until CleanupTransaction().
@@ -1542,16 +2147,30 @@ CleanupTransaction(void)
 	 * State should still be TRANS_ABORT from AbortTransaction().
 	 */
 	if (s->state != TRANS_ABORT)
-		elog(FATAL, "CleanupTransaction and not in abort state");
+		elog(FATAL, "CleanupTransaction: unexpected state %s",
+			 TransStateAsString(s->state));
 
 	/*
 	 * do abort cleanup processing
 	 */
 	AtCleanup_Portals();		/* now safe to release portal memory */
+
+	CurrentResourceOwner = NULL;	/* and resource owner */
+	if (TopTransactionResourceOwner)
+		ResourceOwnerDelete(TopTransactionResourceOwner);
+	s->curTransactionOwner = NULL;
+	CurTransactionResourceOwner = NULL;
+	TopTransactionResourceOwner = NULL;
+
 	AtCleanup_Memory();			/* and transaction memory */
 
+	s->transactionId = InvalidTransactionId;
+	s->subTransactionId = InvalidSubTransactionId;
 	s->nestingLevel = 0;
-	s->childXids = NIL;
+	s->gucNestLevel = 0;
+	s->childXids = NULL;
+	s->nChildXids = 0;
+	s->maxChildXids = 0;
 
 	/*
 	 * done with abort processing, set current transaction state back to
@@ -1571,8 +2190,8 @@ StartTransactionCommand(void)
 	switch (s->blockState)
 	{
 			/*
-			 * if we aren't in a transaction block, we just do our usual
-			 * start transaction.
+			 * if we aren't in a transaction block, we just do our usual start
+			 * transaction.
 			 */
 		case TBLOCK_DEFAULT:
 			StartTransaction();
@@ -1580,19 +2199,23 @@ StartTransactionCommand(void)
 			break;
 
 			/*
-			 * This is the case when we are somewhere in a transaction block
-			 * and about to start a new command.  For now we do nothing
-			 * but someday we may do command-local resource initialization.
+			 * We are somewhere in a transaction block or subtransaction and
+			 * about to start a new command.  For now we do nothing, but
+			 * someday we may do command-local resource initialization. (Note
+			 * that any needed CommandCounterIncrement was done by the
+			 * previous CommitTransactionCommand.)
 			 */
 		case TBLOCK_INPROGRESS:
 		case TBLOCK_SUBINPROGRESS:
 			break;
 
 			/*
-			 * Here we are in the middle of a transaction block but one of
-			 * the commands caused an abort so we do nothing but remain in
-			 * the abort state.  Eventually we will get to the "END
-			 * TRANSACTION" which will set things straight.
+			 * Here we are in a failed transaction block (one of the commands
+			 * caused an abort) so we do nothing but remain in the abort
+			 * state.  Eventually we will get a ROLLBACK command which will
+			 * get us out of this state.  (It is up to other code to ensure
+			 * that no commands other than ROLLBACK will be processed in these
+			 * states.)
 			 */
 		case TBLOCK_ABORT:
 		case TBLOCK_SUBABORT:
@@ -1602,13 +2225,16 @@ StartTransactionCommand(void)
 		case TBLOCK_STARTED:
 		case TBLOCK_BEGIN:
 		case TBLOCK_SUBBEGIN:
-		case TBLOCK_SUBBEGINABORT:
 		case TBLOCK_END:
 		case TBLOCK_SUBEND:
-		case TBLOCK_SUBENDABORT_OK:
-		case TBLOCK_SUBENDABORT_ERROR:
-		case TBLOCK_ENDABORT:
-			elog(FATAL, "StartTransactionCommand: unexpected state %s",
+		case TBLOCK_ABORT_END:
+		case TBLOCK_SUBABORT_END:
+		case TBLOCK_ABORT_PENDING:
+		case TBLOCK_SUBABORT_PENDING:
+		case TBLOCK_SUBRESTART:
+		case TBLOCK_SUBABORT_RESTART:
+		case TBLOCK_PREPARE:
+			elog(ERROR, "StartTransactionCommand: unexpected state %s",
 				 BlockStateAsString(s->blockState));
 			break;
 	}
@@ -1634,15 +2260,16 @@ CommitTransactionCommand(void)
 			/*
 			 * This shouldn't happen, because it means the previous
 			 * StartTransactionCommand didn't set the STARTED state
-			 * appropiately.
+			 * appropriately.
 			 */
 		case TBLOCK_DEFAULT:
-			elog(FATAL, "CommitTransactionCommand: unexpected TBLOCK_DEFAULT");
+			elog(FATAL, "CommitTransactionCommand: unexpected state %s",
+				 BlockStateAsString(s->blockState));
 			break;
 
 			/*
 			 * If we aren't in a transaction block, just do our usual
-			 * transaction commit.
+			 * transaction commit, and return to the idle state.
 			 */
 		case TBLOCK_STARTED:
 			CommitTransaction();
@@ -1650,10 +2277,10 @@ CommitTransactionCommand(void)
 			break;
 
 			/*
-			 * This is the case right after we get a "BEGIN TRANSACTION"
-			 * command, but the user hasn't done anything else yet, so we
-			 * change to the "transaction block in progress" state and
-			 * return.
+			 * We are completing a "BEGIN TRANSACTION" command, so we change
+			 * to the "transaction block in progress" state and return.  (We
+			 * assume the BEGIN did nothing to the database, so we need no
+			 * CommandCounterIncrement.)
 			 */
 		case TBLOCK_BEGIN:
 			s->blockState = TBLOCK_INPROGRESS;
@@ -1661,17 +2288,17 @@ CommitTransactionCommand(void)
 
 			/*
 			 * This is the case when we have finished executing a command
-			 * someplace within a transaction block.  We increment the
-			 * command counter and return.
+			 * someplace within a transaction block.  We increment the command
+			 * counter and return.
 			 */
 		case TBLOCK_INPROGRESS:
+		case TBLOCK_SUBINPROGRESS:
 			CommandCounterIncrement();
 			break;
 
 			/*
-			 * This is the case when we just got the "END TRANSACTION"
-			 * statement, so we commit the transaction and go back to the
-			 * default state.
+			 * We are completing a "COMMIT" command.  Do it and return to the
+			 * idle state.
 			 */
 		case TBLOCK_END:
 			CommitTransaction();
@@ -1679,87 +2306,162 @@ CommitTransactionCommand(void)
 			break;
 
 			/*
-			 * Here we are in the middle of a transaction block but one of
-			 * the commands caused an abort so we do nothing but remain in
-			 * the abort state.  Eventually we will get to the "END
-			 * TRANSACTION" which will set things straight.
+			 * Here we are in the middle of a transaction block but one of the
+			 * commands caused an abort so we do nothing but remain in the
+			 * abort state.  Eventually we will get a ROLLBACK comand.
 			 */
 		case TBLOCK_ABORT:
+		case TBLOCK_SUBABORT:
 			break;
 
 			/*
-			 * Here we were in an aborted transaction block which just
-			 * processed the "END TRANSACTION" command from the user, so
-			 * clean up and return to the default state.
+			 * Here we were in an aborted transaction block and we just got
+			 * the ROLLBACK command from the user, so clean up the
+			 * already-aborted transaction and return to the idle state.
 			 */
-		case TBLOCK_ENDABORT:
+		case TBLOCK_ABORT_END:
 			CleanupTransaction();
 			s->blockState = TBLOCK_DEFAULT;
 			break;
 
 			/*
-			 * We were just issued a BEGIN inside a transaction block.
-			 * Start a subtransaction.  (BeginTransactionBlock already
-			 * did PushTransaction, so as to have someplace to put the
-			 * SUBBEGIN state.)
+			 * Here we were in a perfectly good transaction block but the user
+			 * told us to ROLLBACK anyway.	We have to abort the transaction
+			 * and then clean up.
 			 */
-		case TBLOCK_SUBBEGIN:
-			StartSubTransaction();
-			s->blockState = TBLOCK_SUBINPROGRESS;
+		case TBLOCK_ABORT_PENDING:
+			AbortTransaction();
+			CleanupTransaction();
+			s->blockState = TBLOCK_DEFAULT;
 			break;
 
 			/*
-			 * We were issued a BEGIN inside an aborted transaction block.
-			 * Start a subtransaction, and put it in aborted state.
+			 * We are completing a "PREPARE TRANSACTION" command.  Do it and
+			 * return to the idle state.
 			 */
-		case TBLOCK_SUBBEGINABORT:
-			StartAbortedSubTransaction();
-			s->blockState = TBLOCK_SUBABORT;
+		case TBLOCK_PREPARE:
+			PrepareTransaction();
+			s->blockState = TBLOCK_DEFAULT;
 			break;
 
 			/*
-			 * Inside a subtransaction, increment the command counter.
+			 * We were just issued a SAVEPOINT inside a transaction block.
+			 * Start a subtransaction.	(DefineSavepoint already did
+			 * PushTransaction, so as to have someplace to put the SUBBEGIN
+			 * state.)
 			 */
-		case TBLOCK_SUBINPROGRESS:
-			CommandCounterIncrement();
+		case TBLOCK_SUBBEGIN:
+			StartSubTransaction();
+			s->blockState = TBLOCK_SUBINPROGRESS;
 			break;
 
 			/*
-			 * We were issued a COMMIT command, so we end the current
-			 * subtransaction and return to the parent transaction.
+			 * We were issued a COMMIT or RELEASE command, so we end the
+			 * current subtransaction and return to the parent transaction.
+			 * The parent might be ended too, so repeat till we are all the
+			 * way out or find an INPROGRESS transaction.
 			 */
 		case TBLOCK_SUBEND:
-			CommitSubTransaction();
-			PopTransaction();
-			s = CurrentTransactionState;		/* changed by pop */
+			do
+			{
+				CommitSubTransaction();
+				s = CurrentTransactionState;	/* changed by pop */
+			} while (s->blockState == TBLOCK_SUBEND);
+			/* If we had a COMMIT command, finish off the main xact too */
+			if (s->blockState == TBLOCK_END)
+			{
+				Assert(s->parent == NULL);
+				CommitTransaction();
+				s->blockState = TBLOCK_DEFAULT;
+			}
+			else if (s->blockState == TBLOCK_PREPARE)
+			{
+				Assert(s->parent == NULL);
+				PrepareTransaction();
+				s->blockState = TBLOCK_DEFAULT;
+			}
+			else
+			{
+				Assert(s->blockState == TBLOCK_INPROGRESS ||
+					   s->blockState == TBLOCK_SUBINPROGRESS);
+			}
 			break;
 
 			/*
-			 * If we are in an aborted subtransaction, do nothing.
+			 * The current already-failed subtransaction is ending due to a
+			 * ROLLBACK or ROLLBACK TO command, so pop it and recursively
+			 * examine the parent (which could be in any of several states).
 			 */
-		case TBLOCK_SUBABORT:
+		case TBLOCK_SUBABORT_END:
+			CleanupSubTransaction();
+			CommitTransactionCommand();
 			break;
 
 			/*
-			 * We are ending an aborted subtransaction via ROLLBACK,
-			 * so the parent can be allowed to live.
+			 * As above, but it's not dead yet, so abort first.
 			 */
-		case TBLOCK_SUBENDABORT_OK:
+		case TBLOCK_SUBABORT_PENDING:
+			AbortSubTransaction();
 			CleanupSubTransaction();
-			PopTransaction();
-			s = CurrentTransactionState;		/* changed by pop */
+			CommitTransactionCommand();
 			break;
 
 			/*
-			 * We are ending an aborted subtransaction via COMMIT.
-			 * End the subtransaction, and abort the parent too.
+			 * The current subtransaction is the target of a ROLLBACK TO
+			 * command.  Abort and pop it, then start a new subtransaction
+			 * with the same name.
 			 */
-		case TBLOCK_SUBENDABORT_ERROR:
-			CleanupSubTransaction();
-			PopTransaction();
-			s = CurrentTransactionState;		/* changed by pop */
-			Assert(s->blockState != TBLOCK_SUBENDABORT_ERROR);
-			AbortCurrentTransaction();
+		case TBLOCK_SUBRESTART:
+			{
+				char	   *name;
+				int			savepointLevel;
+
+				/* save name and keep Cleanup from freeing it */
+				name = s->name;
+				s->name = NULL;
+				savepointLevel = s->savepointLevel;
+
+				AbortSubTransaction();
+				CleanupSubTransaction();
+
+				DefineSavepoint(NULL);
+				s = CurrentTransactionState;	/* changed by push */
+				s->name = name;
+				s->savepointLevel = savepointLevel;
+
+				/* This is the same as TBLOCK_SUBBEGIN case */
+				AssertState(s->blockState == TBLOCK_SUBBEGIN);
+				StartSubTransaction();
+				s->blockState = TBLOCK_SUBINPROGRESS;
+			}
+			break;
+
+			/*
+			 * Same as above, but the subtransaction had already failed, so we
+			 * don't need AbortSubTransaction.
+			 */
+		case TBLOCK_SUBABORT_RESTART:
+			{
+				char	   *name;
+				int			savepointLevel;
+
+				/* save name and keep Cleanup from freeing it */
+				name = s->name;
+				s->name = NULL;
+				savepointLevel = s->savepointLevel;
+
+				CleanupSubTransaction();
+
+				DefineSavepoint(NULL);
+				s = CurrentTransactionState;	/* changed by push */
+				s->name = name;
+				s->savepointLevel = savepointLevel;
+
+				/* This is the same as TBLOCK_SUBBEGIN case */
+				AssertState(s->blockState == TBLOCK_SUBBEGIN);
+				StartSubTransaction();
+				s->blockState = TBLOCK_SUBINPROGRESS;
+			}
 			break;
 	}
 }
@@ -1774,15 +2476,30 @@ AbortCurrentTransaction(void)
 
 	switch (s->blockState)
 	{
-		/*
-		 * we aren't in a transaction, so we do nothing.
-		 */
 		case TBLOCK_DEFAULT:
+			if (s->state == TRANS_DEFAULT)
+			{
+				/* we are idle, so nothing to do */
+			}
+			else
+			{
+				/*
+				 * We can get here after an error during transaction start
+				 * (state will be TRANS_START).  Need to clean up the
+				 * incompletely started transaction.  First, adjust the
+				 * low-level state to suppress warning message from
+				 * AbortTransaction.
+				 */
+				if (s->state == TRANS_START)
+					s->state = TRANS_INPROGRESS;
+				AbortTransaction();
+				CleanupTransaction();
+			}
 			break;
 
 			/*
-			 * if we aren't in a transaction block, we just do the basic
-			 * abort & cleanup transaction.
+			 * if we aren't in a transaction block, we just do the basic abort
+			 * & cleanup transaction.
 			 */
 		case TBLOCK_STARTED:
 			AbortTransaction();
@@ -1791,33 +2508,33 @@ AbortCurrentTransaction(void)
 			break;
 
 			/*
-			 * If we are in TBLOCK_BEGIN it means something screwed up
-			 * right after reading "BEGIN TRANSACTION" so we enter the
-			 * abort state.  Eventually an "END TRANSACTION" will fix
-			 * things.
+			 * If we are in TBLOCK_BEGIN it means something screwed up right
+			 * after reading "BEGIN TRANSACTION".  We assume that the user
+			 * will interpret the error as meaning the BEGIN failed to get him
+			 * into a transaction block, so we should abort and return to idle
+			 * state.
 			 */
 		case TBLOCK_BEGIN:
 			AbortTransaction();
-			s->blockState = TBLOCK_ABORT;
-			/* CleanupTransaction happens when we exit TBLOCK_ENDABORT */
+			CleanupTransaction();
+			s->blockState = TBLOCK_DEFAULT;
 			break;
 
 			/*
-			 * This is the case when we are somewhere in a transaction block
-			 * and we've gotten a failure, so we abort the transaction and
-			 * set up the persistent ABORT state.  We will stay in ABORT
-			 * until we get an "END TRANSACTION".
+			 * We are somewhere in a transaction block and we've gotten a
+			 * failure, so we abort the transaction and set up the persistent
+			 * ABORT state.  We will stay in ABORT until we get a ROLLBACK.
 			 */
 		case TBLOCK_INPROGRESS:
 			AbortTransaction();
 			s->blockState = TBLOCK_ABORT;
-			/* CleanupTransaction happens when we exit TBLOCK_ENDABORT */
+			/* CleanupTransaction happens when we exit TBLOCK_ABORT_END */
 			break;
 
 			/*
-			 * Here, the system was fouled up just after the user wanted
-			 * to end the transaction block so we abort the transaction
-			 * and return to the default state.
+			 * Here, we failed while trying to COMMIT.	Clean up the
+			 * transaction and return to idle state (we do not want to stay in
+			 * the transaction).
 			 */
 		case TBLOCK_END:
 			AbortTransaction();
@@ -1826,67 +2543,75 @@ AbortCurrentTransaction(void)
 			break;
 
 			/*
-			 * Here, we are already in an aborted transaction state and
-			 * are waiting for an "END TRANSACTION" to come along and lo
-			 * and behold, we abort again! So we just remain in the abort
-			 * state.
+			 * Here, we are already in an aborted transaction state and are
+			 * waiting for a ROLLBACK, but for some reason we failed again! So
+			 * we just remain in the abort state.
 			 */
 		case TBLOCK_ABORT:
 		case TBLOCK_SUBABORT:
 			break;
 
 			/*
-			 * Here we were in an aborted transaction block which just
-			 * processed the "END TRANSACTION" command but somehow aborted
-			 * again.. since we must have done the abort processing, we
-			 * clean up and return to the default state.
+			 * We are in a failed transaction and we got the ROLLBACK command.
+			 * We have already aborted, we just need to cleanup and go to idle
+			 * state.
 			 */
-		case TBLOCK_ENDABORT:
+		case TBLOCK_ABORT_END:
 			CleanupTransaction();
 			s->blockState = TBLOCK_DEFAULT;
 			break;
 
 			/*
-			 * If we are just starting a subtransaction, put it
-			 * in aborted state.
+			 * We are in a live transaction and we got a ROLLBACK command.
+			 * Abort, cleanup, go to idle state.
 			 */
-		case TBLOCK_SUBBEGIN:
-		case TBLOCK_SUBBEGINABORT:
-			StartAbortedSubTransaction();
-			s->blockState = TBLOCK_SUBABORT;
+		case TBLOCK_ABORT_PENDING:
+			AbortTransaction();
+			CleanupTransaction();
+			s->blockState = TBLOCK_DEFAULT;
 			break;
 
+			/*
+			 * Here, we failed while trying to PREPARE.  Clean up the
+			 * transaction and return to idle state (we do not want to stay in
+			 * the transaction).
+			 */
+		case TBLOCK_PREPARE:
+			AbortTransaction();
+			CleanupTransaction();
+			s->blockState = TBLOCK_DEFAULT;
+			break;
+
+			/*
+			 * We got an error inside a subtransaction.  Abort just the
+			 * subtransaction, and go to the persistent SUBABORT state until
+			 * we get ROLLBACK.
+			 */
 		case TBLOCK_SUBINPROGRESS:
 			AbortSubTransaction();
 			s->blockState = TBLOCK_SUBABORT;
 			break;
 
 			/*
-			 * If we are aborting an ending transaction,
-			 * we have to abort the parent transaction too.
+			 * If we failed while trying to create a subtransaction, clean up
+			 * the broken subtransaction and abort the parent.	The same
+			 * applies if we get a failure while ending a subtransaction.
 			 */
+		case TBLOCK_SUBBEGIN:
 		case TBLOCK_SUBEND:
+		case TBLOCK_SUBABORT_PENDING:
+		case TBLOCK_SUBRESTART:
 			AbortSubTransaction();
 			CleanupSubTransaction();
-			PopTransaction();
-			s = CurrentTransactionState;		/* changed by pop */
-			Assert(s->blockState != TBLOCK_SUBEND &&
-					s->blockState != TBLOCK_SUBENDABORT_OK &&
-					s->blockState != TBLOCK_SUBENDABORT_ERROR);
 			AbortCurrentTransaction();
 			break;
 
 			/*
 			 * Same as above, except the Abort() was already done.
 			 */
-		case TBLOCK_SUBENDABORT_OK:
-		case TBLOCK_SUBENDABORT_ERROR:
+		case TBLOCK_SUBABORT_END:
+		case TBLOCK_SUBABORT_RESTART:
 			CleanupSubTransaction();
-			PopTransaction();
-			s = CurrentTransactionState;		/* changed by pop */
-			Assert(s->blockState != TBLOCK_SUBEND &&
-					s->blockState != TBLOCK_SUBENDABORT_OK &&
-					s->blockState != TBLOCK_SUBENDABORT_ERROR);
 			AbortCurrentTransaction();
 			break;
 	}
@@ -1904,12 +2629,14 @@ AbortCurrentTransaction(void)
  *	could issue more commands and possibly cause a failure after the statement
  *	completes).  Subtransactions are verboten too.
  *
- *	stmtNode: pointer to parameter block for statement; this is used in
- *	a very klugy way to determine whether we are inside a function.
- *	stmtType: statement type name for error messages.
+ *	isTopLevel: passed down from ProcessUtility to determine whether we are
+ *	inside a function or multi-query querystring.  (We will always fail if
+ *	this is false, but it's convenient to centralize the check here instead of
+ *	making callers do it.)
+ *	stmtType: statement type name, for error messages.
  */
 void
-PreventTransactionChain(void *stmtNode, const char *stmtType)
+PreventTransactionChain(bool isTopLevel, const char *stmtType)
 {
 	/*
 	 * xact block already started?
@@ -1932,15 +2659,14 @@ PreventTransactionChain(void *stmtNode, const char *stmtType)
 						stmtType)));
 
 	/*
-	 * Are we inside a function call?  If the statement's parameter block
-	 * was allocated in QueryContext, assume it is an interactive command.
-	 * Otherwise assume it is coming from a function.
+	 * inside a function call?
 	 */
-	if (!MemoryContextContains(QueryContext, stmtNode))
+	if (!isTopLevel)
 		ereport(ERROR,
 				(errcode(ERRCODE_ACTIVE_SQL_TRANSACTION),
 		/* translator: %s represents an SQL statement name */
-			 errmsg("%s cannot be executed from a function", stmtType)));
+				 errmsg("%s cannot be executed from a function or multi-command string",
+						stmtType)));
 
 	/* If we got past IsTransactionBlock test, should be in default state */
 	if (CurrentTransactionState->blockState != TBLOCK_DEFAULT &&
@@ -1962,12 +2688,12 @@ PreventTransactionChain(void *stmtNode, const char *stmtType)
  *	use of the current statement's results.  Likewise subtransactions.
  *	Thus this is an inverse for PreventTransactionChain.
  *
- *	stmtNode: pointer to parameter block for statement; this is used in
- *	a very klugy way to determine whether we are inside a function.
- *	stmtType: statement type name for error messages.
+ *	isTopLevel: passed down from ProcessUtility to determine whether we are
+ *	inside a function.
+ *	stmtType: statement type name, for error messages.
  */
 void
-RequireTransactionChain(void *stmtNode, const char *stmtType)
+RequireTransactionChain(bool isTopLevel, const char *stmtType)
 {
 	/*
 	 * xact block already started?
@@ -1982,16 +2708,15 @@ RequireTransactionChain(void *stmtNode, const char *stmtType)
 		return;
 
 	/*
-	 * Are we inside a function call?  If the statement's parameter block
-	 * was allocated in QueryContext, assume it is an interactive command.
-	 * Otherwise assume it is coming from a function.
+	 * inside a function call?
 	 */
-	if (!MemoryContextContains(QueryContext, stmtNode))
+	if (!isTopLevel)
 		return;
+
 	ereport(ERROR,
 			(errcode(ERRCODE_NO_ACTIVE_SQL_TRANSACTION),
 	/* translator: %s represents an SQL statement name */
-			 errmsg("%s may only be used in transaction blocks",
+			 errmsg("%s can only be used in transaction blocks",
 					stmtType)));
 }
 
@@ -2002,11 +2727,11 @@ RequireTransactionChain(void *stmtNode, const char *stmtType)
  *	a transaction block than when running as single commands.  ANALYZE is
  *	currently the only example.
  *
- *	stmtNode: pointer to parameter block for statement; this is used in
- *	a very klugy way to determine whether we are inside a function.
+ *	isTopLevel: passed down from ProcessUtility to determine whether we are
+ *	inside a function.
  */
 bool
-IsInTransactionChain(void *stmtNode)
+IsInTransactionChain(bool isTopLevel)
 {
 	/*
 	 * Return true on same conditions that would make PreventTransactionChain
@@ -2018,7 +2743,7 @@ IsInTransactionChain(void *stmtNode)
 	if (IsSubTransaction())
 		return true;
 
-	if (!MemoryContextContains(QueryContext, stmtNode))
+	if (!isTopLevel)
 		return true;
 
 	if (CurrentTransactionState->blockState != TBLOCK_DEFAULT &&
@@ -2030,43 +2755,44 @@ IsInTransactionChain(void *stmtNode)
 
 
 /*
- * Register or deregister callback functions for end-of-xact cleanup
+ * Register or deregister callback functions for start- and end-of-xact
+ * operations.
  *
  * These functions are intended for use by dynamically loaded modules.
  * For built-in modules we generally just hardwire the appropriate calls
  * (mainly because it's easier to control the order that way, where needed).
  *
- * Note that the callback occurs post-commit or post-abort, so the callback
- * functions can only do noncritical cleanup.
+ * At transaction end, the callback occurs post-commit or post-abort, so the
+ * callback functions can only do noncritical cleanup.
  */
 void
-RegisterEOXactCallback(EOXactCallback callback, void *arg)
+RegisterXactCallback(XactCallback callback, void *arg)
 {
-	EOXactCallbackItem *item;
+	XactCallbackItem *item;
 
-	item = (EOXactCallbackItem *)
-		MemoryContextAlloc(TopMemoryContext, sizeof(EOXactCallbackItem));
+	item = (XactCallbackItem *)
+		MemoryContextAlloc(TopMemoryContext, sizeof(XactCallbackItem));
 	item->callback = callback;
 	item->arg = arg;
-	item->next = EOXact_callbacks;
-	EOXact_callbacks = item;
+	item->next = Xact_callbacks;
+	Xact_callbacks = item;
 }
 
 void
-UnregisterEOXactCallback(EOXactCallback callback, void *arg)
+UnregisterXactCallback(XactCallback callback, void *arg)
 {
-	EOXactCallbackItem *item;
-	EOXactCallbackItem *prev;
+	XactCallbackItem *item;
+	XactCallbackItem *prev;
 
 	prev = NULL;
-	for (item = EOXact_callbacks; item; prev = item, item = item->next)
+	for (item = Xact_callbacks; item; prev = item, item = item->next)
 	{
 		if (item->callback == callback && item->arg == arg)
 		{
 			if (prev)
 				prev->next = item->next;
 			else
-				EOXact_callbacks = item->next;
+				Xact_callbacks = item->next;
 			pfree(item);
 			break;
 		}
@@ -2074,16 +2800,71 @@ UnregisterEOXactCallback(EOXactCallback callback, void *arg)
 }
 
 static void
-CallEOXactCallbacks(bool isCommit)
+CallXactCallbacks(XactEvent event)
 {
-	EOXactCallbackItem *item;
+	XactCallbackItem *item;
+
+	for (item = Xact_callbacks; item; item = item->next)
+		(*item->callback) (event, item->arg);
+}
+
 
-	for (item = EOXact_callbacks; item; item = item->next)
+/*
+ * Register or deregister callback functions for start- and end-of-subxact
+ * operations.
+ *
+ * Pretty much same as above, but for subtransaction events.
+ *
+ * At subtransaction end, the callback occurs post-subcommit or post-subabort,
+ * so the callback functions can only do noncritical cleanup.  At
+ * subtransaction start, the callback is called when the subtransaction has
+ * finished initializing.
+ */
+void
+RegisterSubXactCallback(SubXactCallback callback, void *arg)
+{
+	SubXactCallbackItem *item;
+
+	item = (SubXactCallbackItem *)
+		MemoryContextAlloc(TopMemoryContext, sizeof(SubXactCallbackItem));
+	item->callback = callback;
+	item->arg = arg;
+	item->next = SubXact_callbacks;
+	SubXact_callbacks = item;
+}
+
+void
+UnregisterSubXactCallback(SubXactCallback callback, void *arg)
+{
+	SubXactCallbackItem *item;
+	SubXactCallbackItem *prev;
+
+	prev = NULL;
+	for (item = SubXact_callbacks; item; prev = item, item = item->next)
 	{
-		(*item->callback) (isCommit, item->arg);
+		if (item->callback == callback && item->arg == arg)
+		{
+			if (prev)
+				prev->next = item->next;
+			else
+				SubXact_callbacks = item->next;
+			pfree(item);
+			break;
+		}
 	}
 }
 
+static void
+CallSubXactCallbacks(SubXactEvent event,
+					 SubTransactionId mySubid,
+					 SubTransactionId parentSubid)
+{
+	SubXactCallbackItem *item;
+
+	for (item = SubXact_callbacks; item; item = item->next)
+		(*item->callback) (event, mySubid, parentSubid, item->arg);
+}
+
 
 /* ----------------------------------------------------------------
  *					   transaction block support
@@ -2099,10 +2880,10 @@ BeginTransactionBlock(void)
 {
 	TransactionState s = CurrentTransactionState;
 
-	switch (s->blockState) {
+	switch (s->blockState)
+	{
 			/*
-			 * We are not inside a transaction block, so allow one
-			 * to begin.
+			 * We are not inside a transaction block, so allow one to begin.
 			 */
 		case TBLOCK_STARTED:
 			s->blockState = TBLOCK_BEGIN;
@@ -2110,196 +2891,683 @@ BeginTransactionBlock(void)
 
 			/*
 			 * Already a transaction block in progress.
-			 * Start a subtransaction.
 			 */
 		case TBLOCK_INPROGRESS:
 		case TBLOCK_SUBINPROGRESS:
-			PushTransaction();
-			s = CurrentTransactionState;		/* changed by push */
-			s->blockState = TBLOCK_SUBBEGIN;
-			break;
-
-			/*
-			 * An aborted transaction block should be allowed to start
-			 * a subtransaction, but it must put it in aborted state.
-			 */
 		case TBLOCK_ABORT:
 		case TBLOCK_SUBABORT:
-			PushTransaction();
-			s = CurrentTransactionState;		/* changed by push */
-			s->blockState = TBLOCK_SUBBEGINABORT;
+			ereport(WARNING,
+					(errcode(ERRCODE_ACTIVE_SQL_TRANSACTION),
+					 errmsg("there is already a transaction in progress")));
 			break;
 
-			/* These cases are invalid.  Reject them altogether. */
+			/* These cases are invalid. */
 		case TBLOCK_DEFAULT:
 		case TBLOCK_BEGIN:
 		case TBLOCK_SUBBEGIN:
-		case TBLOCK_SUBBEGINABORT:
-		case TBLOCK_ENDABORT:
 		case TBLOCK_END:
-		case TBLOCK_SUBENDABORT_OK:
-		case TBLOCK_SUBENDABORT_ERROR:
 		case TBLOCK_SUBEND:
+		case TBLOCK_ABORT_END:
+		case TBLOCK_SUBABORT_END:
+		case TBLOCK_ABORT_PENDING:
+		case TBLOCK_SUBABORT_PENDING:
+		case TBLOCK_SUBRESTART:
+		case TBLOCK_SUBABORT_RESTART:
+		case TBLOCK_PREPARE:
 			elog(FATAL, "BeginTransactionBlock: unexpected state %s",
 				 BlockStateAsString(s->blockState));
 			break;
 	}
 }
 
+/*
+ *	PrepareTransactionBlock
+ *		This executes a PREPARE command.
+ *
+ * Since PREPARE may actually do a ROLLBACK, the result indicates what
+ * happened: TRUE for PREPARE, FALSE for ROLLBACK.
+ *
+ * Note that we don't actually do anything here except change blockState.
+ * The real work will be done in the upcoming PrepareTransaction().
+ * We do it this way because it's not convenient to change memory context,
+ * resource owner, etc while executing inside a Portal.
+ */
+bool
+PrepareTransactionBlock(char *gid)
+{
+	TransactionState s;
+	bool		result;
+
+	/* Set up to commit the current transaction */
+	result = EndTransactionBlock();
+
+	/* If successful, change outer tblock state to PREPARE */
+	if (result)
+	{
+		s = CurrentTransactionState;
+
+		while (s->parent != NULL)
+			s = s->parent;
+
+		if (s->blockState == TBLOCK_END)
+		{
+			/* Save GID where PrepareTransaction can find it again */
+			prepareGID = MemoryContextStrdup(TopTransactionContext, gid);
+
+			s->blockState = TBLOCK_PREPARE;
+		}
+		else
+		{
+			/*
+			 * ignore case where we are not in a transaction;
+			 * EndTransactionBlock already issued a warning.
+			 */
+			Assert(s->blockState == TBLOCK_STARTED);
+			/* Don't send back a PREPARE result tag... */
+			result = false;
+		}
+	}
+
+	return result;
+}
+
 /*
  *	EndTransactionBlock
  *		This executes a COMMIT command.
+ *
+ * Since COMMIT may actually do a ROLLBACK, the result indicates what
+ * happened: TRUE for COMMIT, FALSE for ROLLBACK.
+ *
+ * Note that we don't actually do anything here except change blockState.
+ * The real work will be done in the upcoming CommitTransactionCommand().
+ * We do it this way because it's not convenient to change memory context,
+ * resource owner, etc while executing inside a Portal.
  */
-void
+bool
 EndTransactionBlock(void)
 {
 	TransactionState s = CurrentTransactionState;
+	bool		result = false;
 
-	switch (s->blockState) {
-		/*
-		 * here we are in a transaction block which should commit when we
-		 * get to the upcoming CommitTransactionCommand() so we set the
-		 * state to "END".	CommitTransactionCommand() will recognize this
-		 * and commit the transaction and return us to the default state
-		 */
+	switch (s->blockState)
+	{
+			/*
+			 * We are in a transaction block, so tell CommitTransactionCommand
+			 * to COMMIT.
+			 */
 		case TBLOCK_INPROGRESS:
 			s->blockState = TBLOCK_END;
+			result = true;
 			break;
 
 			/*
-			 * here we are in a subtransaction block.  Signal
-			 * CommitTransactionCommand() to end it and return to the
-			 * parent transaction.
+			 * We are in a failed transaction block.  Tell
+			 * CommitTransactionCommand it's time to exit the block.
 			 */
-		case TBLOCK_SUBINPROGRESS:
-			s->blockState = TBLOCK_SUBEND;
+		case TBLOCK_ABORT:
+			s->blockState = TBLOCK_ABORT_END;
 			break;
 
 			/*
-			 * here, we are in a transaction block which aborted. Since the
-			 * AbortTransaction() was already done, we need only
-			 * change to the special "END ABORT" state.  The upcoming
-			 * CommitTransactionCommand() will recognise this and then put us
-			 * back in the default state.
+			 * We are in a live subtransaction block.  Set up to subcommit all
+			 * open subtransactions and then commit the main transaction.
 			 */
-		case TBLOCK_ABORT:
-			s->blockState = TBLOCK_ENDABORT;
+		case TBLOCK_SUBINPROGRESS:
+			while (s->parent != NULL)
+			{
+				if (s->blockState == TBLOCK_SUBINPROGRESS)
+					s->blockState = TBLOCK_SUBEND;
+				else
+					elog(FATAL, "EndTransactionBlock: unexpected state %s",
+						 BlockStateAsString(s->blockState));
+				s = s->parent;
+			}
+			if (s->blockState == TBLOCK_INPROGRESS)
+				s->blockState = TBLOCK_END;
+			else
+				elog(FATAL, "EndTransactionBlock: unexpected state %s",
+					 BlockStateAsString(s->blockState));
+			result = true;
 			break;
 
 			/*
-			 * here we are in an aborted subtransaction.  Signal
-			 * CommitTransactionCommand() to clean up and return to the
-			 * parent transaction.  Since the user said COMMIT, we must
-			 * fail the parent transaction.
+			 * Here we are inside an aborted subtransaction.  Treat the COMMIT
+			 * as ROLLBACK: set up to abort everything and exit the main
+			 * transaction.
 			 */
 		case TBLOCK_SUBABORT:
-			s->blockState = TBLOCK_SUBENDABORT_ERROR;
+			while (s->parent != NULL)
+			{
+				if (s->blockState == TBLOCK_SUBINPROGRESS)
+					s->blockState = TBLOCK_SUBABORT_PENDING;
+				else if (s->blockState == TBLOCK_SUBABORT)
+					s->blockState = TBLOCK_SUBABORT_END;
+				else
+					elog(FATAL, "EndTransactionBlock: unexpected state %s",
+						 BlockStateAsString(s->blockState));
+				s = s->parent;
+			}
+			if (s->blockState == TBLOCK_INPROGRESS)
+				s->blockState = TBLOCK_ABORT_PENDING;
+			else if (s->blockState == TBLOCK_ABORT)
+				s->blockState = TBLOCK_ABORT_END;
+			else
+				elog(FATAL, "EndTransactionBlock: unexpected state %s",
+					 BlockStateAsString(s->blockState));
 			break;
 
-		case TBLOCK_STARTED:
 			/*
-			 * here, the user issued COMMIT when not inside a
-			 * transaction. Issue a WARNING and go to abort state.  The
-			 * upcoming call to CommitTransactionCommand() will then put us
-			 * back into the default state.
+			 * The user issued COMMIT when not inside a transaction.  Issue a
+			 * WARNING, staying in TBLOCK_STARTED state.  The upcoming call to
+			 * CommitTransactionCommand() will then close the transaction and
+			 * put us back into the default state.
 			 */
+		case TBLOCK_STARTED:
 			ereport(WARNING,
 					(errcode(ERRCODE_NO_ACTIVE_SQL_TRANSACTION),
 					 errmsg("there is no transaction in progress")));
-			AbortTransaction();
-			s->blockState = TBLOCK_ENDABORT;
+			result = true;
 			break;
 
-			/* these cases are invalid. */
+			/* These cases are invalid. */
 		case TBLOCK_DEFAULT:
 		case TBLOCK_BEGIN:
-		case TBLOCK_ENDABORT:
-		case TBLOCK_END:
 		case TBLOCK_SUBBEGIN:
-		case TBLOCK_SUBBEGINABORT:
+		case TBLOCK_END:
 		case TBLOCK_SUBEND:
-		case TBLOCK_SUBENDABORT_OK:
-		case TBLOCK_SUBENDABORT_ERROR:
+		case TBLOCK_ABORT_END:
+		case TBLOCK_SUBABORT_END:
+		case TBLOCK_ABORT_PENDING:
+		case TBLOCK_SUBABORT_PENDING:
+		case TBLOCK_SUBRESTART:
+		case TBLOCK_SUBABORT_RESTART:
+		case TBLOCK_PREPARE:
 			elog(FATAL, "EndTransactionBlock: unexpected state %s",
 				 BlockStateAsString(s->blockState));
 			break;
 	}
+
+	return result;
 }
 
 /*
  *	UserAbortTransactionBlock
  *		This executes a ROLLBACK command.
+ *
+ * As above, we don't actually do anything here except change blockState.
  */
 void
 UserAbortTransactionBlock(void)
 {
 	TransactionState s = CurrentTransactionState;
 
-	switch (s->blockState) {
-		/*
-		 * here we are inside a failed transaction block and we got an abort
-		 * command from the user.  Abort processing is already done, we just
-		 * need to move to the ENDABORT state so we will end up in the default
-		 * state after the upcoming CommitTransactionCommand().
-		 */
+	switch (s->blockState)
+	{
+			/*
+			 * We are inside a transaction block and we got a ROLLBACK command
+			 * from the user, so tell CommitTransactionCommand to abort and
+			 * exit the transaction block.
+			 */
+		case TBLOCK_INPROGRESS:
+			s->blockState = TBLOCK_ABORT_PENDING;
+			break;
+
+			/*
+			 * We are inside a failed transaction block and we got a ROLLBACK
+			 * command from the user.  Abort processing is already done, so
+			 * CommitTransactionCommand just has to cleanup and go back to
+			 * idle state.
+			 */
 		case TBLOCK_ABORT:
-			s->blockState = TBLOCK_ENDABORT;
+			s->blockState = TBLOCK_ABORT_END;
 			break;
 
 			/*
-			 * Ditto, for a subtransaction.  Here it is okay to allow the
-			 * parent transaction to continue.
+			 * We are inside a subtransaction.	Mark everything up to top
+			 * level as exitable.
 			 */
+		case TBLOCK_SUBINPROGRESS:
 		case TBLOCK_SUBABORT:
-			s->blockState = TBLOCK_SUBENDABORT_OK;
+			while (s->parent != NULL)
+			{
+				if (s->blockState == TBLOCK_SUBINPROGRESS)
+					s->blockState = TBLOCK_SUBABORT_PENDING;
+				else if (s->blockState == TBLOCK_SUBABORT)
+					s->blockState = TBLOCK_SUBABORT_END;
+				else
+					elog(FATAL, "UserAbortTransactionBlock: unexpected state %s",
+						 BlockStateAsString(s->blockState));
+				s = s->parent;
+			}
+			if (s->blockState == TBLOCK_INPROGRESS)
+				s->blockState = TBLOCK_ABORT_PENDING;
+			else if (s->blockState == TBLOCK_ABORT)
+				s->blockState = TBLOCK_ABORT_END;
+			else
+				elog(FATAL, "UserAbortTransactionBlock: unexpected state %s",
+					 BlockStateAsString(s->blockState));
 			break;
 
 			/*
-			 * here we are inside a transaction block and we got an abort
-			 * command from the user, so we move to the ENDABORT state and
-			 * do abort processing so we will end up in the default state
-			 * after the upcoming CommitTransactionCommand().
+			 * The user issued ABORT when not inside a transaction. Issue a
+			 * WARNING and go to abort state.  The upcoming call to
+			 * CommitTransactionCommand() will then put us back into the
+			 * default state.
 			 */
+		case TBLOCK_STARTED:
+			ereport(NOTICE,
+					(errcode(ERRCODE_NO_ACTIVE_SQL_TRANSACTION),
+					 errmsg("there is no transaction in progress")));
+			s->blockState = TBLOCK_ABORT_PENDING;
+			break;
+
+			/* These cases are invalid. */
+		case TBLOCK_DEFAULT:
+		case TBLOCK_BEGIN:
+		case TBLOCK_SUBBEGIN:
+		case TBLOCK_END:
+		case TBLOCK_SUBEND:
+		case TBLOCK_ABORT_END:
+		case TBLOCK_SUBABORT_END:
+		case TBLOCK_ABORT_PENDING:
+		case TBLOCK_SUBABORT_PENDING:
+		case TBLOCK_SUBRESTART:
+		case TBLOCK_SUBABORT_RESTART:
+		case TBLOCK_PREPARE:
+			elog(FATAL, "UserAbortTransactionBlock: unexpected state %s",
+				 BlockStateAsString(s->blockState));
+			break;
+	}
+}
+
+/*
+ * DefineSavepoint
+ *		This executes a SAVEPOINT command.
+ */
+void
+DefineSavepoint(char *name)
+{
+	TransactionState s = CurrentTransactionState;
+
+	switch (s->blockState)
+	{
 		case TBLOCK_INPROGRESS:
-			AbortTransaction();
-			s->blockState = TBLOCK_ENDABORT;
+		case TBLOCK_SUBINPROGRESS:
+			/* Normal subtransaction start */
+			PushTransaction();
+			s = CurrentTransactionState;		/* changed by push */
+
+			/*
+			 * Savepoint names, like the TransactionState block itself, live
+			 * in TopTransactionContext.
+			 */
+			if (name)
+				s->name = MemoryContextStrdup(TopTransactionContext, name);
+			break;
+
+			/* These cases are invalid. */
+		case TBLOCK_DEFAULT:
+		case TBLOCK_STARTED:
+		case TBLOCK_BEGIN:
+		case TBLOCK_SUBBEGIN:
+		case TBLOCK_END:
+		case TBLOCK_SUBEND:
+		case TBLOCK_ABORT:
+		case TBLOCK_SUBABORT:
+		case TBLOCK_ABORT_END:
+		case TBLOCK_SUBABORT_END:
+		case TBLOCK_ABORT_PENDING:
+		case TBLOCK_SUBABORT_PENDING:
+		case TBLOCK_SUBRESTART:
+		case TBLOCK_SUBABORT_RESTART:
+		case TBLOCK_PREPARE:
+			elog(FATAL, "DefineSavepoint: unexpected state %s",
+				 BlockStateAsString(s->blockState));
+			break;
+	}
+}
+
+/*
+ * ReleaseSavepoint
+ *		This executes a RELEASE command.
+ *
+ * As above, we don't actually do anything here except change blockState.
+ */
+void
+ReleaseSavepoint(List *options)
+{
+	TransactionState s = CurrentTransactionState;
+	TransactionState target,
+				xact;
+	ListCell   *cell;
+	char	   *name = NULL;
+
+	switch (s->blockState)
+	{
+			/*
+			 * We can't rollback to a savepoint if there is no savepoint
+			 * defined.
+			 */
+		case TBLOCK_INPROGRESS:
+			ereport(ERROR,
+					(errcode(ERRCODE_S_E_INVALID_SPECIFICATION),
+					 errmsg("no such savepoint")));
+			break;
+
+			/*
+			 * We are in a non-aborted subtransaction.	This is the only valid
+			 * case.
+			 */
+		case TBLOCK_SUBINPROGRESS:
+			break;
+
+			/* These cases are invalid. */
+		case TBLOCK_DEFAULT:
+		case TBLOCK_STARTED:
+		case TBLOCK_BEGIN:
+		case TBLOCK_SUBBEGIN:
+		case TBLOCK_END:
+		case TBLOCK_SUBEND:
+		case TBLOCK_ABORT:
+		case TBLOCK_SUBABORT:
+		case TBLOCK_ABORT_END:
+		case TBLOCK_SUBABORT_END:
+		case TBLOCK_ABORT_PENDING:
+		case TBLOCK_SUBABORT_PENDING:
+		case TBLOCK_SUBRESTART:
+		case TBLOCK_SUBABORT_RESTART:
+		case TBLOCK_PREPARE:
+			elog(FATAL, "ReleaseSavepoint: unexpected state %s",
+				 BlockStateAsString(s->blockState));
+			break;
+	}
+
+	foreach(cell, options)
+	{
+		DefElem    *elem = lfirst(cell);
+
+		if (strcmp(elem->defname, "savepoint_name") == 0)
+			name = strVal(elem->arg);
+	}
+
+	Assert(PointerIsValid(name));
+
+	for (target = s; PointerIsValid(target); target = target->parent)
+	{
+		if (PointerIsValid(target->name) && strcmp(target->name, name) == 0)
+			break;
+	}
+
+	if (!PointerIsValid(target))
+		ereport(ERROR,
+				(errcode(ERRCODE_S_E_INVALID_SPECIFICATION),
+				 errmsg("no such savepoint")));
+
+	/* disallow crossing savepoint level boundaries */
+	if (target->savepointLevel != s->savepointLevel)
+		ereport(ERROR,
+				(errcode(ERRCODE_S_E_INVALID_SPECIFICATION),
+				 errmsg("no such savepoint")));
+
+	/*
+	 * Mark "commit pending" all subtransactions up to the target
+	 * subtransaction.	The actual commits will happen when control gets to
+	 * CommitTransactionCommand.
+	 */
+	xact = CurrentTransactionState;
+	for (;;)
+	{
+		Assert(xact->blockState == TBLOCK_SUBINPROGRESS);
+		xact->blockState = TBLOCK_SUBEND;
+		if (xact == target)
+			break;
+		xact = xact->parent;
+		Assert(PointerIsValid(xact));
+	}
+}
+
+/*
+ * RollbackToSavepoint
+ *		This executes a ROLLBACK TO <savepoint> command.
+ *
+ * As above, we don't actually do anything here except change blockState.
+ */
+void
+RollbackToSavepoint(List *options)
+{
+	TransactionState s = CurrentTransactionState;
+	TransactionState target,
+				xact;
+	ListCell   *cell;
+	char	   *name = NULL;
+
+	switch (s->blockState)
+	{
+			/*
+			 * We can't rollback to a savepoint if there is no savepoint
+			 * defined.
+			 */
+		case TBLOCK_INPROGRESS:
+		case TBLOCK_ABORT:
+			ereport(ERROR,
+					(errcode(ERRCODE_S_E_INVALID_SPECIFICATION),
+					 errmsg("no such savepoint")));
 			break;
 
-			/* Ditto, for a subtransaction. */
+			/*
+			 * There is at least one savepoint, so proceed.
+			 */
+		case TBLOCK_SUBINPROGRESS:
+		case TBLOCK_SUBABORT:
+			break;
+
+			/* These cases are invalid. */
+		case TBLOCK_DEFAULT:
+		case TBLOCK_STARTED:
+		case TBLOCK_BEGIN:
+		case TBLOCK_SUBBEGIN:
+		case TBLOCK_END:
+		case TBLOCK_SUBEND:
+		case TBLOCK_ABORT_END:
+		case TBLOCK_SUBABORT_END:
+		case TBLOCK_ABORT_PENDING:
+		case TBLOCK_SUBABORT_PENDING:
+		case TBLOCK_SUBRESTART:
+		case TBLOCK_SUBABORT_RESTART:
+		case TBLOCK_PREPARE:
+			elog(FATAL, "RollbackToSavepoint: unexpected state %s",
+				 BlockStateAsString(s->blockState));
+			break;
+	}
+
+	foreach(cell, options)
+	{
+		DefElem    *elem = lfirst(cell);
+
+		if (strcmp(elem->defname, "savepoint_name") == 0)
+			name = strVal(elem->arg);
+	}
+
+	Assert(PointerIsValid(name));
+
+	for (target = s; PointerIsValid(target); target = target->parent)
+	{
+		if (PointerIsValid(target->name) && strcmp(target->name, name) == 0)
+			break;
+	}
+
+	if (!PointerIsValid(target))
+		ereport(ERROR,
+				(errcode(ERRCODE_S_E_INVALID_SPECIFICATION),
+				 errmsg("no such savepoint")));
+
+	/* disallow crossing savepoint level boundaries */
+	if (target->savepointLevel != s->savepointLevel)
+		ereport(ERROR,
+				(errcode(ERRCODE_S_E_INVALID_SPECIFICATION),
+				 errmsg("no such savepoint")));
+
+	/*
+	 * Mark "abort pending" all subtransactions up to the target
+	 * subtransaction.	The actual aborts will happen when control gets to
+	 * CommitTransactionCommand.
+	 */
+	xact = CurrentTransactionState;
+	for (;;)
+	{
+		if (xact == target)
+			break;
+		if (xact->blockState == TBLOCK_SUBINPROGRESS)
+			xact->blockState = TBLOCK_SUBABORT_PENDING;
+		else if (xact->blockState == TBLOCK_SUBABORT)
+			xact->blockState = TBLOCK_SUBABORT_END;
+		else
+			elog(FATAL, "RollbackToSavepoint: unexpected state %s",
+				 BlockStateAsString(xact->blockState));
+		xact = xact->parent;
+		Assert(PointerIsValid(xact));
+	}
+
+	/* And mark the target as "restart pending" */
+	if (xact->blockState == TBLOCK_SUBINPROGRESS)
+		xact->blockState = TBLOCK_SUBRESTART;
+	else if (xact->blockState == TBLOCK_SUBABORT)
+		xact->blockState = TBLOCK_SUBABORT_RESTART;
+	else
+		elog(FATAL, "RollbackToSavepoint: unexpected state %s",
+			 BlockStateAsString(xact->blockState));
+}
+
+/*
+ * BeginInternalSubTransaction
+ *		This is the same as DefineSavepoint except it allows TBLOCK_STARTED,
+ *		TBLOCK_END, and TBLOCK_PREPARE states, and therefore it can safely be
+ *		used in functions that might be called when not inside a BEGIN block
+ *		or when running deferred triggers at COMMIT/PREPARE time.  Also, it
+ *		automatically does CommitTransactionCommand/StartTransactionCommand
+ *		instead of expecting the caller to do it.
+ */
+void
+BeginInternalSubTransaction(char *name)
+{
+	TransactionState s = CurrentTransactionState;
+
+	switch (s->blockState)
+	{
+		case TBLOCK_STARTED:
+		case TBLOCK_INPROGRESS:
+		case TBLOCK_END:
+		case TBLOCK_PREPARE:
+		case TBLOCK_SUBINPROGRESS:
+			/* Normal subtransaction start */
+			PushTransaction();
+			s = CurrentTransactionState;		/* changed by push */
+
+			/*
+			 * Savepoint names, like the TransactionState block itself, live
+			 * in TopTransactionContext.
+			 */
+			if (name)
+				s->name = MemoryContextStrdup(TopTransactionContext, name);
+			break;
+
+			/* These cases are invalid. */
+		case TBLOCK_DEFAULT:
+		case TBLOCK_BEGIN:
+		case TBLOCK_SUBBEGIN:
+		case TBLOCK_SUBEND:
+		case TBLOCK_ABORT:
+		case TBLOCK_SUBABORT:
+		case TBLOCK_ABORT_END:
+		case TBLOCK_SUBABORT_END:
+		case TBLOCK_ABORT_PENDING:
+		case TBLOCK_SUBABORT_PENDING:
+		case TBLOCK_SUBRESTART:
+		case TBLOCK_SUBABORT_RESTART:
+			elog(FATAL, "BeginInternalSubTransaction: unexpected state %s",
+				 BlockStateAsString(s->blockState));
+			break;
+	}
+
+	CommitTransactionCommand();
+	StartTransactionCommand();
+}
+
+/*
+ * ReleaseCurrentSubTransaction
+ *
+ * RELEASE (ie, commit) the innermost subtransaction, regardless of its
+ * savepoint name (if any).
+ * NB: do NOT use CommitTransactionCommand/StartTransactionCommand with this.
+ */
+void
+ReleaseCurrentSubTransaction(void)
+{
+	TransactionState s = CurrentTransactionState;
+
+	if (s->blockState != TBLOCK_SUBINPROGRESS)
+		elog(ERROR, "ReleaseCurrentSubTransaction: unexpected state %s",
+			 BlockStateAsString(s->blockState));
+	Assert(s->state == TRANS_INPROGRESS);
+	MemoryContextSwitchTo(CurTransactionContext);
+	CommitSubTransaction();
+	s = CurrentTransactionState;	/* changed by pop */
+	Assert(s->state == TRANS_INPROGRESS);
+}
+
+/*
+ * RollbackAndReleaseCurrentSubTransaction
+ *
+ * ROLLBACK and RELEASE (ie, abort) the innermost subtransaction, regardless
+ * of its savepoint name (if any).
+ * NB: do NOT use CommitTransactionCommand/StartTransactionCommand with this.
+ */
+void
+RollbackAndReleaseCurrentSubTransaction(void)
+{
+	TransactionState s = CurrentTransactionState;
+
+	switch (s->blockState)
+	{
+			/* Must be in a subtransaction */
 		case TBLOCK_SUBINPROGRESS:
-			AbortSubTransaction();
-			s->blockState = TBLOCK_SUBENDABORT_OK;
-			break;
-
-			/*
-			 * here, the user issued ABORT when not inside a
-			 * transaction. Issue a WARNING and go to abort state.  The
-			 * upcoming call to CommitTransactionCommand() will then put us
-			 * back into the default state.
-			 */
-		case TBLOCK_STARTED:
-			ereport(WARNING,
-					(errcode(ERRCODE_NO_ACTIVE_SQL_TRANSACTION),
-					 errmsg("there is no transaction in progress")));
-			AbortTransaction();
-			s->blockState = TBLOCK_ENDABORT;
+		case TBLOCK_SUBABORT:
 			break;
 
-			/* these cases are invalid. */
+			/* These cases are invalid. */
 		case TBLOCK_DEFAULT:
+		case TBLOCK_STARTED:
 		case TBLOCK_BEGIN:
+		case TBLOCK_SUBBEGIN:
+		case TBLOCK_INPROGRESS:
 		case TBLOCK_END:
-		case TBLOCK_ENDABORT:
 		case TBLOCK_SUBEND:
-		case TBLOCK_SUBENDABORT_OK:
-		case TBLOCK_SUBENDABORT_ERROR:
-		case TBLOCK_SUBBEGIN:
-		case TBLOCK_SUBBEGINABORT:
-			elog(FATAL, "UserAbortTransactionBlock: unexpected state %s",
+		case TBLOCK_ABORT:
+		case TBLOCK_ABORT_END:
+		case TBLOCK_SUBABORT_END:
+		case TBLOCK_ABORT_PENDING:
+		case TBLOCK_SUBABORT_PENDING:
+		case TBLOCK_SUBRESTART:
+		case TBLOCK_SUBABORT_RESTART:
+		case TBLOCK_PREPARE:
+			elog(FATAL, "RollbackAndReleaseCurrentSubTransaction: unexpected state %s",
 				 BlockStateAsString(s->blockState));
 			break;
 	}
 
+	/*
+	 * Abort the current subtransaction, if needed.
+	 */
+	if (s->blockState == TBLOCK_SUBINPROGRESS)
+		AbortSubTransaction();
+
+	/* And clean it up, too */
+	CleanupSubTransaction();
+
+	s = CurrentTransactionState;	/* changed by pop */
+	AssertState(s->blockState == TBLOCK_SUBINPROGRESS ||
+				s->blockState == TBLOCK_INPROGRESS ||
+				s->blockState == TBLOCK_STARTED);
 }
 
 /*
@@ -2317,7 +3585,8 @@ AbortOutOfAnyTransaction(void)
 	/*
 	 * Get out of any transaction or nested transaction
 	 */
-	do {
+	do
+	{
 		switch (s->blockState)
 		{
 			case TBLOCK_DEFAULT:
@@ -2327,41 +3596,39 @@ AbortOutOfAnyTransaction(void)
 			case TBLOCK_BEGIN:
 			case TBLOCK_INPROGRESS:
 			case TBLOCK_END:
+			case TBLOCK_ABORT_PENDING:
+			case TBLOCK_PREPARE:
 				/* In a transaction, so clean up */
 				AbortTransaction();
 				CleanupTransaction();
 				s->blockState = TBLOCK_DEFAULT;
 				break;
 			case TBLOCK_ABORT:
-			case TBLOCK_ENDABORT:
+			case TBLOCK_ABORT_END:
 				/* AbortTransaction already done, still need Cleanup */
 				CleanupTransaction();
 				s->blockState = TBLOCK_DEFAULT;
 				break;
-			case TBLOCK_SUBBEGIN:
-			case TBLOCK_SUBBEGINABORT:
+
 				/*
-				 * We didn't get as far as starting the subxact, so there's
-				 * nothing to abort.  Just pop back to parent.
+				 * In a subtransaction, so clean it up and abort parent too
 				 */
-				PopTransaction();
-				s = CurrentTransactionState;		/* changed by pop */
-				break;
+			case TBLOCK_SUBBEGIN:
 			case TBLOCK_SUBINPROGRESS:
 			case TBLOCK_SUBEND:
-				/* In a subtransaction, so clean it up and abort parent too */
+			case TBLOCK_SUBABORT_PENDING:
+			case TBLOCK_SUBRESTART:
 				AbortSubTransaction();
 				CleanupSubTransaction();
-				PopTransaction();
-				s = CurrentTransactionState;		/* changed by pop */
+				s = CurrentTransactionState;	/* changed by pop */
 				break;
+
 			case TBLOCK_SUBABORT:
-			case TBLOCK_SUBENDABORT_OK:
-			case TBLOCK_SUBENDABORT_ERROR:
+			case TBLOCK_SUBABORT_END:
+			case TBLOCK_SUBABORT_RESTART:
 				/* As above, but AbortSubTransaction already done */
 				CleanupSubTransaction();
-				PopTransaction();
-				s = CurrentTransactionState;		/* changed by pop */
+				s = CurrentTransactionState;	/* changed by pop */
 				break;
 		}
 	} while (s->blockState != TBLOCK_DEFAULT);
@@ -2386,7 +3653,7 @@ IsTransactionBlock(void)
 
 /*
  * IsTransactionOrTransactionBlock --- are we within either a transaction
- * or a transaction block?  (The backend is only really "idle" when this
+ * or a transaction block?	(The backend is only really "idle" when this
  * returns false.)
  *
  * This should match up with IsTransactionBlock and IsTransactionState.
@@ -2416,18 +3683,21 @@ TransactionBlockStatusCode(void)
 		case TBLOCK_STARTED:
 			return 'I';			/* idle --- not in transaction */
 		case TBLOCK_BEGIN:
+		case TBLOCK_SUBBEGIN:
 		case TBLOCK_INPROGRESS:
-		case TBLOCK_END:
 		case TBLOCK_SUBINPROGRESS:
-		case TBLOCK_SUBBEGIN:
+		case TBLOCK_END:
 		case TBLOCK_SUBEND:
+		case TBLOCK_PREPARE:
 			return 'T';			/* in transaction */
 		case TBLOCK_ABORT:
-		case TBLOCK_ENDABORT:
 		case TBLOCK_SUBABORT:
-		case TBLOCK_SUBENDABORT_OK:
-		case TBLOCK_SUBENDABORT_ERROR:
-		case TBLOCK_SUBBEGINABORT:
+		case TBLOCK_ABORT_END:
+		case TBLOCK_SUBABORT_END:
+		case TBLOCK_ABORT_PENDING:
+		case TBLOCK_SUBABORT_PENDING:
+		case TBLOCK_SUBRESTART:
+		case TBLOCK_SUBABORT_RESTART:
 			return 'E';			/* in failed transaction */
 	}
 
@@ -2444,34 +3714,24 @@ bool
 IsSubTransaction(void)
 {
 	TransactionState s = CurrentTransactionState;
-	
-	switch (s->blockState) {
-		case TBLOCK_DEFAULT:
-		case TBLOCK_STARTED:
-		case TBLOCK_BEGIN:
-		case TBLOCK_INPROGRESS:
-		case TBLOCK_END:
-		case TBLOCK_ABORT:
-		case TBLOCK_ENDABORT:
-			return false;
-		case TBLOCK_SUBBEGIN:
-		case TBLOCK_SUBBEGINABORT:
-		case TBLOCK_SUBINPROGRESS:
-		case TBLOCK_SUBABORT:
-		case TBLOCK_SUBEND:
-		case TBLOCK_SUBENDABORT_OK:
-		case TBLOCK_SUBENDABORT_ERROR:
-			return true;
-	}
 
-	/* should never get here */
-	elog(FATAL, "invalid transaction block state: %s",
-		 BlockStateAsString(s->blockState));
-	return false;				/* keep compiler quiet */
+	if (s->nestingLevel >= 2)
+		return true;
+
+	return false;
 }
 
 /*
  * StartSubTransaction
+ *
+ * If you're wondering why this is separate from PushTransaction: it's because
+ * we can't conveniently do this stuff right inside DefineSavepoint.  The
+ * SAVEPOINT utility command will be executed inside a Portal, and if we
+ * muck with CurrentMemoryContext or CurrentResourceOwner then exit from
+ * the Portal will undo those settings.  So we make DefineSavepoint just
+ * push a dummy transaction block, and when control returns to the main
+ * idle loop, CommitTransactionCommand will be called, and we'll come here
+ * to finish starting the subtransaction.
  */
 static void
 StartSubTransaction(void)
@@ -2479,39 +3739,38 @@ StartSubTransaction(void)
 	TransactionState s = CurrentTransactionState;
 
 	if (s->state != TRANS_DEFAULT)
-		elog(WARNING, "StartSubTransaction and not in default state");
+		elog(WARNING, "StartSubTransaction while in %s state",
+			 TransStateAsString(s->state));
 
 	s->state = TRANS_START;
 
 	/*
-	 * Generate a new Xid and record it in pg_subtrans.
-	 */
-	s->transactionIdData = GetNewTransactionId(true);
-
-	SubTransSetParent(s->transactionIdData, s->parent->transactionIdData);
-
-	/*
-	 * Finish setup of other transaction state fields.
+	 * Initialize subsystems for new subtransaction
+	 *
+	 * must initialize resource-management stuff first
 	 */
-	s->currentUser = GetUserId();
-	
-	/* Initialize the various transaction subsystems */
 	AtSubStart_Memory();
+	AtSubStart_ResourceOwner();
 	AtSubStart_Inval();
-	AtSubStart_RelationCache();
-	AtSubStart_CatCache();
-	AtSubStart_Buffers();
-	AtSubStart_smgr();
 	AtSubStart_Notify();
-	DeferredTriggerBeginSubXact();
+	AfterTriggerBeginSubXact();
 
 	s->state = TRANS_INPROGRESS;
 
+	/*
+	 * Call start-of-subxact callbacks
+	 */
+	CallSubXactCallbacks(SUBXACT_EVENT_START_SUB, s->subTransactionId,
+						 s->parent->subTransactionId);
+
 	ShowTransactionState("StartSubTransaction");
 }
 
 /*
  * CommitSubTransaction
+ *
+ *	The caller has to make sure to always reassign CurrentTransactionState
+ *	if it has a local pointer to it after calling this function.
  */
 static void
 CommitSubTransaction(void)
@@ -2521,38 +3780,86 @@ CommitSubTransaction(void)
 	ShowTransactionState("CommitSubTransaction");
 
 	if (s->state != TRANS_INPROGRESS)
-		elog(WARNING, "CommitSubTransaction and not in in-progress state");
+		elog(WARNING, "CommitSubTransaction while in %s state",
+			 TransStateAsString(s->state));
 
-	/* Pre-commit processing */
-	AtSubCommit_Portals(s->parent->transactionIdData);
-	DeferredTriggerEndSubXact(true);
+	/* Pre-commit processing goes here -- nothing to do at the moment */
 
 	s->state = TRANS_COMMIT;
 
-	/* Mark subtransaction as subcommitted */
+	/* Must CCI to ensure commands of subtransaction are seen as done */
 	CommandCounterIncrement();
+
+	/* Mark subtransaction as subcommitted */
 	RecordSubTransactionCommit();
-	AtSubCommit_childXids();
 
 	/* Post-commit cleanup */
+	if (TransactionIdIsValid(s->transactionId))
+		AtSubCommit_childXids();
+	AfterTriggerEndSubXact(true);
+	AtSubCommit_Portals(s->subTransactionId,
+						s->parent->subTransactionId,
+						s->parent->curTransactionOwner);
+	AtEOSubXact_LargeObject(true, s->subTransactionId,
+							s->parent->subTransactionId);
+	AtSubCommit_Notify();
+	AtEOSubXact_UpdateFlatFiles(true, s->subTransactionId,
+								s->parent->subTransactionId);
+
+	CallSubXactCallbacks(SUBXACT_EVENT_COMMIT_SUB, s->subTransactionId,
+						 s->parent->subTransactionId);
+
+	ResourceOwnerRelease(s->curTransactionOwner,
+						 RESOURCE_RELEASE_BEFORE_LOCKS,
+						 true, false);
+	AtEOSubXact_RelationCache(true, s->subTransactionId,
+							  s->parent->subTransactionId);
+	AtEOSubXact_Inval(true);
 	AtSubCommit_smgr();
 
-	AtSubEOXact_Inval(true);
-	AtEOSubXact_SPI(true, s->transactionIdData);
-	AtSubCommit_Notify();
-	AtEOXact_GUC(true, true);
-	AtEOSubXact_gist(s->transactionIdData);
-	AtEOSubXact_hash(s->transactionIdData);
-	AtEOSubXact_rtree(s->transactionIdData);
-	AtEOSubXact_on_commit_actions(true, s->transactionIdData,
-								  s->parent->transactionIdData);
-
-	AtEOSubXact_CatCache(true);
-	AtEOSubXact_RelationCache(true);
-	AtEOSubXact_Buffers(true);
+	/*
+	 * The only lock we actually release here is the subtransaction XID lock.
+	 * The rest just get transferred to the parent resource owner.
+	 */
+	CurrentResourceOwner = s->curTransactionOwner;
+	if (TransactionIdIsValid(s->transactionId))
+		XactLockTableDelete(s->transactionId);
+
+	ResourceOwnerRelease(s->curTransactionOwner,
+						 RESOURCE_RELEASE_LOCKS,
+						 true, false);
+	ResourceOwnerRelease(s->curTransactionOwner,
+						 RESOURCE_RELEASE_AFTER_LOCKS,
+						 true, false);
+
+	AtEOXact_GUC(true, s->gucNestLevel);
+	AtEOSubXact_SPI(true, s->subTransactionId);
+	AtEOSubXact_on_commit_actions(true, s->subTransactionId,
+								  s->parent->subTransactionId);
+	AtEOSubXact_Namespace(true, s->subTransactionId,
+						  s->parent->subTransactionId);
+	AtEOSubXact_Files(true, s->subTransactionId,
+					  s->parent->subTransactionId);
+	AtEOSubXact_HashTables(true, s->nestingLevel);
+	AtEOSubXact_PgStat(true, s->nestingLevel);
+
+	/*
+	 * We need to restore the upper transaction's read-only state, in case the
+	 * upper is read-write while the child is read-only; GUC will incorrectly
+	 * think it should leave the child state in place.
+	 */
+	XactReadOnly = s->prevXactReadOnly;
+
+	CurrentResourceOwner = s->parent->curTransactionOwner;
+	CurTransactionResourceOwner = s->parent->curTransactionOwner;
+	ResourceOwnerDelete(s->curTransactionOwner);
+	s->curTransactionOwner = NULL;
+
 	AtSubCommit_Memory();
 
 	s->state = TRANS_DEFAULT;
+
+	PopTransaction();
 }
 
 /*
@@ -2563,17 +3870,18 @@ AbortSubTransaction(void)
 {
 	TransactionState s = CurrentTransactionState;
 
-	ShowTransactionState("AbortSubTransaction");
-
+	/* Prevent cancel/die interrupt while cleaning up */
 	HOLD_INTERRUPTS();
 
-	s->state = TRANS_ABORT;
+	/* Make sure we have a valid memory context and resource owner */
+	AtSubAbort_Memory();
+	AtSubAbort_ResourceOwner();
 
 	/*
 	 * Release any LW locks we might be holding as quickly as possible.
 	 * (Regular locks, however, must be held till we finish aborting.)
-	 * Releasing LW locks is critical since we might try to grab them
-	 * again while cleaning up!
+	 * Releasing LW locks is critical since we might try to grab them again
+	 * while cleaning up!
 	 *
 	 * FIXME This may be incorrect --- Are there some locks we should keep?
 	 * Buffer locks, for example?  I don't think so but I'm not sure.
@@ -2585,54 +3893,91 @@ AbortSubTransaction(void)
 
 	LockWaitCancel();
 
-	AtSubAbort_Memory();
-
 	/*
-	 * do abort processing
+	 * check the current transaction state
 	 */
+	ShowTransactionState("AbortSubTransaction");
 
-	RecordSubTransactionAbort();
+	if (s->state != TRANS_INPROGRESS)
+		elog(WARNING, "AbortSubTransaction while in %s state",
+			 TransStateAsString(s->state));
 
-	/* Post-abort cleanup */
-	AtSubAbort_smgr();
+	s->state = TRANS_ABORT;
 
-	DeferredTriggerEndSubXact(false);
-	AtSubAbort_Portals();
-	AtSubEOXact_Inval(false);
-	AtSubAbort_Locks();
-	AtEOSubXact_SPI(false, s->transactionIdData);
-	AtSubAbort_Notify();
-	AtEOXact_GUC(false, true);
-	AtEOSubXact_gist(s->transactionIdData);
-	AtEOSubXact_hash(s->transactionIdData);
-	AtEOSubXact_rtree(s->transactionIdData);
-	AtEOSubXact_on_commit_actions(false, s->transactionIdData,
-								  s->parent->transactionIdData);
-	AtEOSubXact_RelationCache(false);
-	AtEOSubXact_CatCache(false);
-	AtEOSubXact_Buffers(false);
+	/*
+	 * Reset user ID which might have been changed transiently.  (See notes
+	 * in AbortTransaction.)
+	 */
+	SetUserIdAndContext(s->prevUser, s->prevSecDefCxt);
 
 	/*
-	 * Reset user id which might have been changed transiently.  Here we
-	 * want to restore to the userid that was current at subxact entry.
-	 * (As in AbortTransaction, we need not worry about the session userid.)
-	 *
-	 * Must do this after AtEOXact_GUC to handle the case where we entered
-	 * the subxact inside a SECURITY DEFINER function (hence current and
-	 * session userids were different) and then session auth was changed
-	 * inside the subxact.  GUC will reset both current and session userids
-	 * to the entry-time session userid.  This is right in every other
-	 * scenario so it seems simplest to let GUC do that and fix it here.
+	 * We can skip all this stuff if the subxact failed before creating a
+	 * ResourceOwner...
 	 */
-	SetUserId(s->currentUser);
+	if (s->curTransactionOwner)
+	{
+		AfterTriggerEndSubXact(false);
+		AtSubAbort_Portals(s->subTransactionId,
+						   s->parent->subTransactionId,
+						   s->parent->curTransactionOwner);
+		AtEOSubXact_LargeObject(false, s->subTransactionId,
+								s->parent->subTransactionId);
+		AtSubAbort_Notify();
+		AtEOSubXact_UpdateFlatFiles(false, s->subTransactionId,
+									s->parent->subTransactionId);
+
+		/* Advertise the fact that we aborted in pg_clog. */
+		(void) RecordTransactionAbort(true);
+
+		/* Post-abort cleanup */
+		if (TransactionIdIsValid(s->transactionId))
+			AtSubAbort_childXids();
+
+		CallSubXactCallbacks(SUBXACT_EVENT_ABORT_SUB, s->subTransactionId,
+							 s->parent->subTransactionId);
+
+		ResourceOwnerRelease(s->curTransactionOwner,
+							 RESOURCE_RELEASE_BEFORE_LOCKS,
+							 false, false);
+		AtEOSubXact_RelationCache(false, s->subTransactionId,
+								  s->parent->subTransactionId);
+		AtEOSubXact_Inval(false);
+		AtSubAbort_smgr();
+		ResourceOwnerRelease(s->curTransactionOwner,
+							 RESOURCE_RELEASE_LOCKS,
+							 false, false);
+		ResourceOwnerRelease(s->curTransactionOwner,
+							 RESOURCE_RELEASE_AFTER_LOCKS,
+							 false, false);
+
+		AtEOXact_GUC(false, s->gucNestLevel);
+		AtEOSubXact_SPI(false, s->subTransactionId);
+		AtEOXact_xml();
+		AtEOSubXact_on_commit_actions(false, s->subTransactionId,
+									  s->parent->subTransactionId);
+		AtEOSubXact_Namespace(false, s->subTransactionId,
+							  s->parent->subTransactionId);
+		AtEOSubXact_Files(false, s->subTransactionId,
+						  s->parent->subTransactionId);
+		AtEOSubXact_HashTables(false, s->nestingLevel);
+		AtEOSubXact_PgStat(false, s->nestingLevel);
+	}
 
-	CommandCounterIncrement();
+	/*
+	 * Restore the upper transaction's read-only state, too.  This should be
+	 * redundant with GUC's cleanup but we may as well do it for consistency
+	 * with the commit case.
+	 */
+	XactReadOnly = s->prevXactReadOnly;
 
 	RESUME_INTERRUPTS();
 }
 
 /*
  * CleanupSubTransaction
+ *
+ *	The caller has to make sure to always reassign CurrentTransactionState
+ *	if it has a local pointer to it after calling this function.
  */
 static void
 CleanupSubTransaction(void)
@@ -2642,66 +3987,36 @@ CleanupSubTransaction(void)
 	ShowTransactionState("CleanupSubTransaction");
 
 	if (s->state != TRANS_ABORT)
-		elog(WARNING, "CleanupSubTransaction and not in aborted state");
-
-	AtSubCleanup_Portals();
-	AtSubCleanup_Memory();
-
-	s->state = TRANS_DEFAULT;
-}
-
-/*
- * StartAbortedSubTransaction
- *
- * This function is used to start a subtransaction and put it immediately
- * into aborted state.  The end result should be equivalent to
- * StartSubTransaction immediately followed by AbortSubTransaction.
- * The reason we don't implement it just that way is that many of the backend
- * modules aren't designed to handle starting a subtransaction when not
- * inside a valid transaction.  Rather than making them all capable of
- * doing that, we just omit the paired start and abort calls in this path.
- */
-static void
-StartAbortedSubTransaction(void)
-{
-	TransactionState s = CurrentTransactionState;
-
-	if (s->state != TRANS_DEFAULT)
-		elog(WARNING, "StartAbortedSubTransaction and not in default state");
-
-	s->state = TRANS_START;
+		elog(WARNING, "CleanupSubTransaction while in %s state",
+			 TransStateAsString(s->state));
 
-	/*
-	 * We don't bother to generate a new Xid, so the end state is not
-	 * *exactly* like we had done a full Start/AbortSubTransaction...
-	 */
-	s->transactionIdData = InvalidTransactionId;
+	AtSubCleanup_Portals(s->subTransactionId);
 
-	/* Make sure currentUser is reasonably valid */
-	Assert(s->parent != NULL);
-	s->currentUser = s->parent->currentUser;
-	
-	/*
-	 * Initialize only what has to be there for CleanupSubTransaction to work.
-	 */
-	AtSubStart_Memory();
+	CurrentResourceOwner = s->parent->curTransactionOwner;
+	CurTransactionResourceOwner = s->parent->curTransactionOwner;
+	if (s->curTransactionOwner)
+		ResourceOwnerDelete(s->curTransactionOwner);
+	s->curTransactionOwner = NULL;
 
-	s->state = TRANS_ABORT;
+	AtSubCleanup_Memory();
 
-	AtSubAbort_Memory();
+	s->state = TRANS_DEFAULT;
 
-	ShowTransactionState("StartAbortedSubTransaction");
+	PopTransaction();
 }
 
 /*
  * PushTransaction
- *		Set up transaction state for a subtransaction
+ *		Create transaction state stack entry for a subtransaction
+ *
+ *	The caller has to make sure to always reassign CurrentTransactionState
+ *	if it has a local pointer to it after calling this function.
  */
 static void
 PushTransaction(void)
 {
-	TransactionState    p = CurrentTransactionState;
-	TransactionState    s;
+	TransactionState p = CurrentTransactionState;
+	TransactionState s;
 
 	/*
 	 * We keep subtransaction state nodes in TopTransactionContext.
@@ -2709,28 +4024,51 @@ PushTransaction(void)
 	s = (TransactionState)
 		MemoryContextAllocZero(TopTransactionContext,
 							   sizeof(TransactionStateData));
+
+	/*
+	 * Assign a subtransaction ID, watching out for counter wraparound.
+	 */
+	currentSubTransactionId += 1;
+	if (currentSubTransactionId == InvalidSubTransactionId)
+	{
+		currentSubTransactionId -= 1;
+		pfree(s);
+		ereport(ERROR,
+				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+				 errmsg("cannot have more than 2^32-1 subtransactions in a transaction")));
+	}
+
+	/*
+	 * We can now stack a minimally valid subtransaction without fear of
+	 * failure.
+	 */
+	s->transactionId = InvalidTransactionId;	/* until assigned */
+	s->subTransactionId = currentSubTransactionId;
 	s->parent = p;
 	s->nestingLevel = p->nestingLevel + 1;
+	s->gucNestLevel = NewGUCNestLevel();
+	s->savepointLevel = p->savepointLevel;
 	s->state = TRANS_DEFAULT;
 	s->blockState = TBLOCK_SUBBEGIN;
+	GetUserIdAndContext(&s->prevUser, &s->prevSecDefCxt);
+	s->prevXactReadOnly = XactReadOnly;
 
-	/* Command IDs count in a continuous sequence through subtransactions */
-	s->commandId = p->commandId;
+	CurrentTransactionState = s;
 
 	/*
-	 * Copy down some other data so that we will have valid state until
-	 * StartSubTransaction runs.
+	 * AbortSubTransaction and CleanupSubTransaction have to be able to cope
+	 * with the subtransaction from here on out; in particular they should not
+	 * assume that it necessarily has a transaction context, resource owner,
+	 * or XID.
 	 */
-	s->transactionIdData = p->transactionIdData;
-	s->curTransactionContext = p->curTransactionContext;
-	s->currentUser = p->currentUser;
-
-	CurrentTransactionState = s;
 }
 
 /*
  * PopTransaction
  *		Pop back to parent transaction state
+ *
+ *	The caller has to make sure to always reassign CurrentTransactionState
+ *	if it has a local pointer to it after calling this function.
  */
 static void
 PopTransaction(void)
@@ -2738,21 +4076,25 @@ PopTransaction(void)
 	TransactionState s = CurrentTransactionState;
 
 	if (s->state != TRANS_DEFAULT)
-		elog(WARNING, "PopTransaction and not in default state");
+		elog(WARNING, "PopTransaction while in %s state",
+			 TransStateAsString(s->state));
 
 	if (s->parent == NULL)
 		elog(FATAL, "PopTransaction with no parent");
 
-	/* Command IDs count in a continuous sequence through subtransactions */
-	s->parent->commandId = s->commandId;
-
 	CurrentTransactionState = s->parent;
 
 	/* Let's just make sure CurTransactionContext is good */
 	CurTransactionContext = s->parent->curTransactionContext;
 	MemoryContextSwitchTo(CurTransactionContext);
 
+	/* Ditto for ResourceOwner links */
+	CurTransactionResourceOwner = s->parent->curTransactionOwner;
+	CurrentResourceOwner = s->parent->curTransactionOwner;
+
 	/* Free the old child structure */
+	if (s->name)
+		pfree(s->name);
 	pfree(s);
 }
 
@@ -2764,9 +4106,9 @@ static void
 ShowTransactionState(const char *str)
 {
 	/* skip work if message will definitely not be printed */
-	if (log_min_messages <= DEBUG2 || client_min_messages <= DEBUG2)
+	if (log_min_messages <= DEBUG3 || client_min_messages <= DEBUG3)
 	{
-		elog(DEBUG2, "%s", str);
+		elog(DEBUG3, "%s", str);
 		ShowTransactionStateRec(CurrentTransactionState);
 	}
 }
@@ -2778,18 +4120,35 @@ ShowTransactionState(const char *str)
 static void
 ShowTransactionStateRec(TransactionState s)
 {
+	StringInfoData buf;
+
+	initStringInfo(&buf);
+
+	if (s->nChildXids > 0)
+	{
+		int i;
+
+		appendStringInfo(&buf, "%u", s->childXids[0]);
+		for (i = 1; i < s->nChildXids; i++)
+			appendStringInfo(&buf, " %u", s->childXids[i]);
+	}
+
 	if (s->parent)
 		ShowTransactionStateRec(s->parent);
 
 	/* use ereport to suppress computation if msg will not be printed */
-	ereport(DEBUG2,
-			(errmsg_internal("blockState: %13s; state: %7s, xid/cid: %u/%02u, nestlvl: %d, children: %s",
+	ereport(DEBUG3,
+			(errmsg_internal("name: %s; blockState: %13s; state: %7s, xid/subid/cid: %u/%u/%u%s, nestlvl: %d, children: %s",
+							 PointerIsValid(s->name) ? s->name : "unnamed",
 							 BlockStateAsString(s->blockState),
 							 TransStateAsString(s->state),
-							 (unsigned int) s->transactionIdData,
-							 (unsigned int) s->commandId,
-							 s->nestingLevel,
-							 nodeToString(s->childXids))));
+							 (unsigned int) s->transactionId,
+							 (unsigned int) s->subTransactionId,
+							 (unsigned int) currentCommandId,
+							 currentCommandIdUsed ? " (used)" : "",
+							 s->nestingLevel, buf.data)));
+
+	pfree(buf.data);
 }
 
 /*
@@ -2799,7 +4158,8 @@ ShowTransactionStateRec(TransactionState s)
 static const char *
 BlockStateAsString(TBlockState blockState)
 {
-	switch (blockState) {
+	switch (blockState)
+	{
 		case TBLOCK_DEFAULT:
 			return "DEFAULT";
 		case TBLOCK_STARTED:
@@ -2812,22 +4172,28 @@ BlockStateAsString(TBlockState blockState)
 			return "END";
 		case TBLOCK_ABORT:
 			return "ABORT";
-		case TBLOCK_ENDABORT:
-			return "ENDABORT";
+		case TBLOCK_ABORT_END:
+			return "ABORT END";
+		case TBLOCK_ABORT_PENDING:
+			return "ABORT PEND";
+		case TBLOCK_PREPARE:
+			return "PREPARE";
 		case TBLOCK_SUBBEGIN:
 			return "SUB BEGIN";
-		case TBLOCK_SUBBEGINABORT:
-			return "SUB BEGIN AB";
 		case TBLOCK_SUBINPROGRESS:
 			return "SUB INPROGRS";
 		case TBLOCK_SUBEND:
 			return "SUB END";
 		case TBLOCK_SUBABORT:
 			return "SUB ABORT";
-		case TBLOCK_SUBENDABORT_OK:
-			return "SUB ENDAB OK";
-		case TBLOCK_SUBENDABORT_ERROR:
-			return "SUB ENDAB ERR";
+		case TBLOCK_SUBABORT_END:
+			return "SUB ABORT END";
+		case TBLOCK_SUBABORT_PENDING:
+			return "SUB ABRT PEND";
+		case TBLOCK_SUBRESTART:
+			return "SUB RESTART";
+		case TBLOCK_SUBABORT_RESTART:
+			return "SUB AB RESTRT";
 	}
 	return "UNRECOGNIZED";
 }
@@ -2839,17 +4205,20 @@ BlockStateAsString(TBlockState blockState)
 static const char *
 TransStateAsString(TransState state)
 {
-	switch (state) {
+	switch (state)
+	{
 		case TRANS_DEFAULT:
 			return "DEFAULT";
 		case TRANS_START:
 			return "START";
+		case TRANS_INPROGRESS:
+			return "INPROGR";
 		case TRANS_COMMIT:
 			return "COMMIT";
 		case TRANS_ABORT:
 			return "ABORT";
-		case TRANS_INPROGRESS:
-			return "INPROGR";
+		case TRANS_PREPARE:
+			return "PREPARE";
 	}
 	return "UNRECOGNIZED";
 }
@@ -2857,47 +4226,98 @@ TransStateAsString(TransState state)
 /*
  * xactGetCommittedChildren
  *
- * Gets the list of committed children of the current transaction.  The return
- * value is the number of child transactions.  *children is set to point to a
- * palloc'd array of TransactionIds.  If there are no subxacts, *children is
- * set to NULL.
- *
- * If metoo is true, include the current TransactionId.
+ * Gets the list of committed children of the current transaction.	The return
+ * value is the number of child transactions.  *ptr is set to point to an
+ * array of TransactionIds.  The array is allocated in TopTransactionContext;
+ * the caller should *not* pfree() it (this is a change from pre-8.4 code!).
+ * If there are no subxacts, *ptr is set to NULL.
  */
 int
-xactGetCommittedChildren(TransactionId **ptr, bool metoo)
+xactGetCommittedChildren(TransactionId **ptr)
 {
-	TransactionState	s = CurrentTransactionState;
-	int					nchildren;
-	TransactionId	   *children;
-	ListCell		   *p;
+	TransactionState s = CurrentTransactionState;
 
-	nchildren = list_length(s->childXids);
-	if (metoo)
-		nchildren++;
-	if (nchildren == 0)
-	{
+	if (s->nChildXids == 0)
 		*ptr = NULL;
-		return 0;
-	}
+	else
+		*ptr = s->childXids;
+
+	return s->nChildXids;
+}
 
-	children = (TransactionId *) palloc(nchildren * sizeof(TransactionId));
-	*ptr = children;
+/*
+ *	XLOG support routines
+ */
+
+static void
+xact_redo_commit(xl_xact_commit *xlrec, TransactionId xid)
+{
+	TransactionId *sub_xids;
+	TransactionId max_xid;
+	int			i;
+
+	TransactionIdCommit(xid);
+
+	/* Mark committed subtransactions as committed */
+	sub_xids = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]);
+	TransactionIdCommitTree(xlrec->nsubxacts, sub_xids);
 
-	foreach(p, s->childXids)
+	/* Make sure nextXid is beyond any XID mentioned in the record */
+	max_xid = xid;
+	for (i = 0; i < xlrec->nsubxacts; i++)
 	{
-		TransactionId child = lfirst_int(p);
-		*children++ = (TransactionId)child;
+		if (TransactionIdPrecedes(max_xid, sub_xids[i]))
+			max_xid = sub_xids[i];
+	}
+	if (TransactionIdFollowsOrEquals(max_xid,
+									 ShmemVariableCache->nextXid))
+	{
+		ShmemVariableCache->nextXid = max_xid;
+		TransactionIdAdvance(ShmemVariableCache->nextXid);
 	}
-	if (metoo)
-		*children = s->transactionIdData;
 
-	return nchildren;
+	/* Make sure files supposed to be dropped are dropped */
+	for (i = 0; i < xlrec->nrels; i++)
+	{
+		XLogDropRelation(xlrec->xnodes[i]);
+		smgrdounlink(smgropen(xlrec->xnodes[i]), false, true);
+	}
 }
 
-/*
- *	XLOG support routines
- */
+static void
+xact_redo_abort(xl_xact_abort *xlrec, TransactionId xid)
+{
+	TransactionId *sub_xids;
+	TransactionId max_xid;
+	int			i;
+
+	TransactionIdAbort(xid);
+
+	/* Mark subtransactions as aborted */
+	sub_xids = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]);
+	TransactionIdAbortTree(xlrec->nsubxacts, sub_xids);
+
+	/* Make sure nextXid is beyond any XID mentioned in the record */
+	max_xid = xid;
+	for (i = 0; i < xlrec->nsubxacts; i++)
+	{
+		if (TransactionIdPrecedes(max_xid, sub_xids[i]))
+			max_xid = sub_xids[i];
+	}
+	if (TransactionIdFollowsOrEquals(max_xid,
+									 ShmemVariableCache->nextXid))
+	{
+		ShmemVariableCache->nextXid = max_xid;
+		TransactionIdAdvance(ShmemVariableCache->nextXid);
+	}
+
+	/* Make sure files supposed to be dropped are dropped */
+	for (i = 0; i < xlrec->nrels; i++)
+	{
+		XLogDropRelation(xlrec->xnodes[i]);
+		smgrdounlink(smgropen(xlrec->xnodes[i]), false, true);
+	}
+}
 
 void
 xact_redo(XLogRecPtr lsn, XLogRecord *record)
@@ -2907,130 +4327,132 @@ xact_redo(XLogRecPtr lsn, XLogRecord *record)
 	if (info == XLOG_XACT_COMMIT)
 	{
 		xl_xact_commit *xlrec = (xl_xact_commit *) XLogRecGetData(record);
-		int		i;
 
-		TransactionIdCommit(record->xl_xid);
-		/* Mark committed subtransactions as committed */
-		TransactionIdCommitTree(xlrec->nsubxacts,
-								(TransactionId *) &(xlrec->xnodes[xlrec->nrels]));
-		/* Make sure files supposed to be dropped are dropped */
-		for (i = 0; i < xlrec->nrels; i++)
-		{
-			XLogCloseRelation(xlrec->xnodes[i]);
-			smgrdounlink(smgropen(xlrec->xnodes[i]), false, true);
-		}
+		xact_redo_commit(xlrec, record->xl_xid);
 	}
 	else if (info == XLOG_XACT_ABORT)
 	{
 		xl_xact_abort *xlrec = (xl_xact_abort *) XLogRecGetData(record);
-		int		i;
 
-		TransactionIdAbort(record->xl_xid);
-		/* mark subtransactions as aborted */
-		TransactionIdAbortTree(xlrec->nsubxacts,
-							   (TransactionId *) &(xlrec->xnodes[xlrec->nrels]));
-		/* Make sure files supposed to be dropped are dropped */
+		xact_redo_abort(xlrec, record->xl_xid);
+	}
+	else if (info == XLOG_XACT_PREPARE)
+	{
+		/* the record contents are exactly the 2PC file */
+		RecreateTwoPhaseFile(record->xl_xid,
+							 XLogRecGetData(record), record->xl_len);
+	}
+	else if (info == XLOG_XACT_COMMIT_PREPARED)
+	{
+		xl_xact_commit_prepared *xlrec = (xl_xact_commit_prepared *) XLogRecGetData(record);
+
+		xact_redo_commit(&xlrec->crec, xlrec->xid);
+		RemoveTwoPhaseFile(xlrec->xid, false);
+	}
+	else if (info == XLOG_XACT_ABORT_PREPARED)
+	{
+		xl_xact_abort_prepared *xlrec = (xl_xact_abort_prepared *) XLogRecGetData(record);
+
+		xact_redo_abort(&xlrec->arec, xlrec->xid);
+		RemoveTwoPhaseFile(xlrec->xid, false);
+	}
+	else
+		elog(PANIC, "xact_redo: unknown op code %u", info);
+}
+
+static void
+xact_desc_commit(StringInfo buf, xl_xact_commit *xlrec)
+{
+	int			i;
+
+	appendStringInfoString(buf, timestamptz_to_str(xlrec->xact_time));
+	if (xlrec->nrels > 0)
+	{
+		appendStringInfo(buf, "; rels:");
 		for (i = 0; i < xlrec->nrels; i++)
 		{
-			XLogCloseRelation(xlrec->xnodes[i]);
-			smgrdounlink(smgropen(xlrec->xnodes[i]), false, true);
+			RelFileNode rnode = xlrec->xnodes[i];
+
+			appendStringInfo(buf, " %u/%u/%u",
+							 rnode.spcNode, rnode.dbNode, rnode.relNode);
 		}
 	}
-	else
-		elog(PANIC, "xact_redo: unknown op code %u", info);
+	if (xlrec->nsubxacts > 0)
+	{
+		TransactionId *xacts = (TransactionId *)
+		&xlrec->xnodes[xlrec->nrels];
+
+		appendStringInfo(buf, "; subxacts:");
+		for (i = 0; i < xlrec->nsubxacts; i++)
+			appendStringInfo(buf, " %u", xacts[i]);
+	}
 }
 
-void
-xact_undo(XLogRecPtr lsn, XLogRecord *record)
+static void
+xact_desc_abort(StringInfo buf, xl_xact_abort *xlrec)
 {
-	uint8		info = record->xl_info & ~XLR_INFO_MASK;
+	int			i;
 
-	if (info == XLOG_XACT_COMMIT)		/* shouldn't be called by XLOG */
-		elog(PANIC, "xact_undo: can't undo committed xaction");
-	else if (info != XLOG_XACT_ABORT)
-		elog(PANIC, "xact_redo: unknown op code %u", info);
+	appendStringInfoString(buf, timestamptz_to_str(xlrec->xact_time));
+	if (xlrec->nrels > 0)
+	{
+		appendStringInfo(buf, "; rels:");
+		for (i = 0; i < xlrec->nrels; i++)
+		{
+			RelFileNode rnode = xlrec->xnodes[i];
+
+			appendStringInfo(buf, " %u/%u/%u",
+							 rnode.spcNode, rnode.dbNode, rnode.relNode);
+		}
+	}
+	if (xlrec->nsubxacts > 0)
+	{
+		TransactionId *xacts = (TransactionId *)
+		&xlrec->xnodes[xlrec->nrels];
+
+		appendStringInfo(buf, "; subxacts:");
+		for (i = 0; i < xlrec->nsubxacts; i++)
+			appendStringInfo(buf, " %u", xacts[i]);
+	}
 }
 
 void
-xact_desc(char *buf, uint8 xl_info, char *rec)
+xact_desc(StringInfo buf, uint8 xl_info, char *rec)
 {
 	uint8		info = xl_info & ~XLR_INFO_MASK;
-	int i;
 
 	if (info == XLOG_XACT_COMMIT)
 	{
 		xl_xact_commit *xlrec = (xl_xact_commit *) rec;
-		struct tm  *tm = localtime(&xlrec->xtime);
-
-		sprintf(buf + strlen(buf), "commit: %04u-%02u-%02u %02u:%02u:%02u",
-				tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday,
-				tm->tm_hour, tm->tm_min, tm->tm_sec);
-		if (xlrec->nrels > 0)
-		{
-			sprintf(buf + strlen(buf), "; rels:");
-			for (i = 0; i < xlrec->nrels; i++)
-			{
-				RelFileNode rnode = xlrec->xnodes[i];
-				sprintf(buf + strlen(buf), " %u/%u/%u",
-						rnode.spcNode, rnode.dbNode, rnode.relNode);
-			}
-		}
-		if (xlrec->nsubxacts > 0)
-		{
-			TransactionId *xacts = (TransactionId *)
-				&xlrec->xnodes[xlrec->nrels];
 
-			sprintf(buf + strlen(buf), "; subxacts:");
-			for (i = 0; i < xlrec->nsubxacts; i++)
-				sprintf(buf + strlen(buf), " %u", xacts[i]);
-		}
+		appendStringInfo(buf, "commit: ");
+		xact_desc_commit(buf, xlrec);
 	}
 	else if (info == XLOG_XACT_ABORT)
 	{
 		xl_xact_abort *xlrec = (xl_xact_abort *) rec;
-		struct tm  *tm = localtime(&xlrec->xtime);
-
-		sprintf(buf + strlen(buf), "abort: %04u-%02u-%02u %02u:%02u:%02u",
-				tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday,
-				tm->tm_hour, tm->tm_min, tm->tm_sec);
-		if (xlrec->nrels > 0)
-		{
-			sprintf(buf + strlen(buf), "; rels:");
-			for (i = 0; i < xlrec->nrels; i++)
-			{
-				RelFileNode rnode = xlrec->xnodes[i];
-				sprintf(buf + strlen(buf), " %u/%u/%u",
-						rnode.spcNode, rnode.dbNode, rnode.relNode);
-			}
-		}
-		if (xlrec->nsubxacts > 0)
-		{
-			TransactionId *xacts = (TransactionId *)
-				&xlrec->xnodes[xlrec->nrels];
 
-			sprintf(buf + strlen(buf), "; subxacts:");
-			for (i = 0; i < xlrec->nsubxacts; i++)
-				sprintf(buf + strlen(buf), " %u", xacts[i]);
-		}
+		appendStringInfo(buf, "abort: ");
+		xact_desc_abort(buf, xlrec);
 	}
-	else
-		strcat(buf, "UNKNOWN");
-}
-
-void
-XactPushRollback(void (*func) (void *), void *data)
-{
-#ifdef XLOG_II
-	if (_RollbackFunc != NULL)
-		elog(PANIC, "XactPushRollback: already installed");
-#endif
+	else if (info == XLOG_XACT_PREPARE)
+	{
+		appendStringInfo(buf, "prepare");
+	}
+	else if (info == XLOG_XACT_COMMIT_PREPARED)
+	{
+		xl_xact_commit_prepared *xlrec = (xl_xact_commit_prepared *) rec;
 
-	_RollbackFunc = func;
-	_RollbackData = data;
-}
+		appendStringInfo(buf, "commit %u: ", xlrec->xid);
+		xact_desc_commit(buf, &xlrec->crec);
+	}
+	else if (info == XLOG_XACT_ABORT_PREPARED)
+	{
+		xl_xact_abort_prepared *xlrec = (xl_xact_abort_prepared *) rec;
 
-void
-XactPopRollback(void)
-{
-	_RollbackFunc = NULL;
+		appendStringInfo(buf, "abort %u: ", xlrec->xid);
+		xact_desc_abort(buf, &xlrec->arec);
+	}
+	else
+		appendStringInfo(buf, "UNKNOWN");
 }