X-Git-Url: https://granicus.if.org/sourcecode?a=blobdiff_plain;f=src%2Fbackend%2Faccess%2Ftransam%2Fxact.c;h=9af53a5953f09e2b2726fc7efa8ad4fcc1f2b9ab;hb=a7b7b07af340c73adee9959edf260695591a9496;hp=3bb38e4227f45c098d9ab9c246eaa5877b78b6e4;hpb=b6b71b85bc45b49005b5aec87cba2c33fc8baf49;p=postgresql diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index 3bb38e4227..9af53a5953 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -5,12 +5,12 @@ * * See src/backend/access/transam/README for more information. * - * Portions Copyright (c) 1996-2004, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.183 2004/08/29 05:06:40 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.260 2008/03/17 19:44:41 petere Exp $ * *------------------------------------------------------------------------- */ @@ -20,29 +20,55 @@ #include #include +#include "access/multixact.h" #include "access/subtrans.h" +#include "access/transam.h" +#include "access/twophase.h" #include "access/xact.h" -#include "catalog/heap.h" -#include "catalog/index.h" +#include "access/xlogutils.h" #include "catalog/namespace.h" #include "commands/async.h" #include "commands/tablecmds.h" #include "commands/trigger.h" -#include "commands/user.h" #include "executor/spi.h" #include "libpq/be-fsstubs.h" #include "miscadmin.h" +#include "pgstat.h" #include "storage/fd.h" -#include "storage/proc.h" -#include "storage/sinval.h" +#include "storage/lmgr.h" +#include "storage/procarray.h" +#include "storage/sinvaladt.h" #include "storage/smgr.h" +#include "utils/combocid.h" +#include "utils/flatfiles.h" #include "utils/guc.h" #include "utils/inval.h" #include "utils/memutils.h" -#include "utils/portal.h" #include "utils/relcache.h" -#include "utils/resowner.h" -#include "pgstat.h" +#include "utils/xml.h" +#include "pg_trace.h" + + +/* + * User-tweakable parameters + */ +int DefaultXactIsoLevel = XACT_READ_COMMITTED; +int XactIsoLevel; + +bool DefaultXactReadOnly = false; +bool XactReadOnly; + +bool XactSyncCommit = true; + +int CommitDelay = 0; /* precommit delay in microseconds */ +int CommitSiblings = 5; /* # concurrent xacts needed to sleep */ + +/* + * MyXactAccessedTempRel is set when a temporary relation is accessed. + * We don't allow PREPARE TRANSACTION in that case. (This is global + * so that it can be set from heapam.c.) + */ +bool MyXactAccessedTempRel = false; /* @@ -50,38 +76,44 @@ */ typedef enum TransState { - TRANS_DEFAULT, - TRANS_START, - TRANS_INPROGRESS, - TRANS_COMMIT, - TRANS_ABORT + TRANS_DEFAULT, /* idle */ + TRANS_START, /* transaction starting */ + TRANS_INPROGRESS, /* inside a valid transaction */ + TRANS_COMMIT, /* commit in progress */ + TRANS_ABORT, /* abort in progress */ + TRANS_PREPARE /* prepare in progress */ } TransState; /* * transaction block states - transaction state of client queries + * + * Note: the subtransaction states are used only for non-topmost + * transactions; the others appear only in the topmost transaction. */ typedef enum TBlockState { /* not-in-transaction-block states */ - TBLOCK_DEFAULT, - TBLOCK_STARTED, + TBLOCK_DEFAULT, /* idle */ + TBLOCK_STARTED, /* running single-query transaction */ /* transaction block states */ - TBLOCK_BEGIN, - TBLOCK_INPROGRESS, - TBLOCK_END, - TBLOCK_ABORT, - TBLOCK_ENDABORT, + TBLOCK_BEGIN, /* starting transaction block */ + TBLOCK_INPROGRESS, /* live transaction */ + TBLOCK_END, /* COMMIT received */ + TBLOCK_ABORT, /* failed xact, awaiting ROLLBACK */ + TBLOCK_ABORT_END, /* failed xact, ROLLBACK received */ + TBLOCK_ABORT_PENDING, /* live xact, ROLLBACK received */ + TBLOCK_PREPARE, /* live xact, PREPARE received */ /* subtransaction states */ - TBLOCK_SUBBEGIN, - TBLOCK_SUBINPROGRESS, - TBLOCK_SUBEND, - TBLOCK_SUBABORT, - TBLOCK_SUBABORT_PENDING, - TBLOCK_SUBENDABORT_ALL, - TBLOCK_SUBENDABORT_RELEASE, - TBLOCK_SUBENDABORT + TBLOCK_SUBBEGIN, /* starting a subtransaction */ + TBLOCK_SUBINPROGRESS, /* live subtransaction */ + TBLOCK_SUBEND, /* RELEASE received */ + TBLOCK_SUBABORT, /* failed subxact, awaiting ROLLBACK */ + TBLOCK_SUBABORT_END, /* failed subxact, ROLLBACK received */ + TBLOCK_SUBABORT_PENDING, /* live subxact, ROLLBACK received */ + TBLOCK_SUBRESTART, /* live subxact, ROLLBACK TO received */ + TBLOCK_SUBABORT_RESTART /* failed subxact, ROLLBACK TO received */ } TBlockState; /* @@ -89,69 +121,27 @@ typedef enum TBlockState */ typedef struct TransactionStateData { - TransactionId transactionIdData; /* my XID */ + TransactionId transactionId; /* my XID, or Invalid if none */ + SubTransactionId subTransactionId; /* my subxact ID */ char *name; /* savepoint name, if any */ int savepointLevel; /* savepoint level */ - CommandId commandId; /* current CID */ TransState state; /* low-level state */ TBlockState blockState; /* high-level state */ - int nestingLevel; /* nest depth */ - MemoryContext curTransactionContext; /* my xact-lifetime - * context */ + int nestingLevel; /* transaction nesting depth */ + int gucNestLevel; /* GUC context nesting depth */ + MemoryContext curTransactionContext; /* my xact-lifetime context */ ResourceOwner curTransactionOwner; /* my query resources */ - List *childXids; /* subcommitted child XIDs */ - AclId currentUser; /* subxact start current_user */ + TransactionId *childXids; /* subcommitted child XIDs, in XID order */ + int nChildXids; /* # of subcommitted child XIDs */ + int maxChildXids; /* allocated size of childXids[] */ + Oid prevUser; /* previous CurrentUserId setting */ + bool prevSecDefCxt; /* previous SecurityDefinerContext setting */ bool prevXactReadOnly; /* entry-time xact r/o state */ struct TransactionStateData *parent; /* back link to parent */ } TransactionStateData; typedef TransactionStateData *TransactionState; -/* - * childXids is currently implemented as an integer List, relying on the - * assumption that TransactionIds are no wider than int. We use these - * macros to provide some isolation in case that changes in the future. - */ -#define lfirst_xid(lc) ((TransactionId) lfirst_int(lc)) -#define lappend_xid(list, datum) lappend_int(list, (int) (datum)) - - -static void AbortTransaction(void); -static void AtAbort_Memory(void); -static void AtCleanup_Memory(void); -static void AtCommit_LocalCache(void); -static void AtCommit_Memory(void); -static void AtStart_Cache(void); -static void AtStart_Memory(void); -static void AtStart_ResourceOwner(void); -static void CallXactCallbacks(XactEvent event, TransactionId parentXid); -static void CleanupTransaction(void); -static void CommitTransaction(void); -static void RecordTransactionAbort(void); -static void StartTransaction(void); - -static void RecordSubTransactionCommit(void); -static void StartSubTransaction(void); -static void CommitSubTransaction(void); -static void AbortSubTransaction(void); -static void CleanupSubTransaction(void); -static void StartAbortedSubTransaction(void); -static void PushTransaction(void); -static void PopTransaction(void); -static void CommitTransactionToLevel(int level); -static char *CleanupAbortedSubTransactions(bool returnName); - -static void AtSubAbort_Memory(void); -static void AtSubCleanup_Memory(void); -static void AtSubCommit_Memory(void); -static void AtSubStart_Memory(void); -static void AtSubStart_ResourceOwner(void); - -static void ShowTransactionState(const char *str); -static void ShowTransactionStateRec(TransactionState state); -static const char *BlockStateAsString(TBlockState blockState); -static const char *TransStateAsString(TransState state); - /* * CurrentTransactionState always points to the current transaction state * block. It will point to TopTransactionStateData when not in a @@ -159,17 +149,21 @@ static const char *TransStateAsString(TransState state); */ static TransactionStateData TopTransactionStateData = { 0, /* transaction id */ + 0, /* subtransaction id */ NULL, /* savepoint name */ 0, /* savepoint level */ - FirstCommandId, /* command id */ TRANS_DEFAULT, /* transaction state */ TBLOCK_DEFAULT, /* transaction block state from the client * perspective */ - 0, /* nesting level */ + 0, /* transaction nesting depth */ + 0, /* GUC context nesting depth */ NULL, /* cur transaction context */ NULL, /* cur transaction resource owner */ - NIL, /* subcommitted child Xids */ - 0, /* entry-time current userid */ + NULL, /* subcommitted child Xids */ + 0, /* # of subcommitted child Xids */ + 0, /* allocated size of childXids[] */ + InvalidOid, /* previous CurrentUserId setting */ + false, /* previous SecurityDefinerContext setting */ false, /* entry-time xact r/o state */ NULL /* link to parent state block */ }; @@ -177,27 +171,41 @@ static TransactionStateData TopTransactionStateData = { static TransactionState CurrentTransactionState = &TopTransactionStateData; /* - * These vars hold the value of now(), ie, the transaction start time. - * This does not change as we enter and exit subtransactions, so we don't - * keep it inside the TransactionState stack. + * The subtransaction ID and command ID assignment counters are global + * to a whole transaction, so we do not keep them in the state stack. */ -static AbsoluteTime xactStartTime; /* integer part */ -static int xactStartTimeUsec; /* microsecond part */ - +static SubTransactionId currentSubTransactionId; +static CommandId currentCommandId; +static bool currentCommandIdUsed; /* - * User-tweakable parameters + * xactStartTimestamp is the value of transaction_timestamp(). + * stmtStartTimestamp is the value of statement_timestamp(). + * xactStopTimestamp is the time at which we log a commit or abort WAL record. + * These do not change as we enter and exit subtransactions, so we don't + * keep them inside the TransactionState stack. */ -int DefaultXactIsoLevel = XACT_READ_COMMITTED; -int XactIsoLevel; +static TimestampTz xactStartTimestamp; +static TimestampTz stmtStartTimestamp; +static TimestampTz xactStopTimestamp; -bool DefaultXactReadOnly = false; -bool XactReadOnly; +/* + * GID to be used for preparing the current transaction. This is also + * global to a whole transaction, so we don't keep it in the state stack. + */ +static char *prepareGID; -int CommitDelay = 0; /* precommit delay in microseconds */ -int CommitSiblings = 5; /* number of concurrent xacts needed to - * sleep */ +/* + * Some commands want to force synchronous commit. + */ +static bool forceSyncCommit = false; +/* + * Private context for transaction-abort work --- we reserve space for this + * at startup to ensure that AbortTransaction and AbortSubTransaction can work + * when we've run out of memory. + */ +static MemoryContext TransactionAbortContext = NULL; /* * List of add-on start- and end-of-xact callbacks @@ -211,8 +219,58 @@ typedef struct XactCallbackItem static XactCallbackItem *Xact_callbacks = NULL; -static void (*_RollbackFunc) (void *) = NULL; -static void *_RollbackData = NULL; +/* + * List of add-on start- and end-of-subxact callbacks + */ +typedef struct SubXactCallbackItem +{ + struct SubXactCallbackItem *next; + SubXactCallback callback; + void *arg; +} SubXactCallbackItem; + +static SubXactCallbackItem *SubXact_callbacks = NULL; + + +/* local function prototypes */ +static void AssignTransactionId(TransactionState s); +static void AbortTransaction(void); +static void AtAbort_Memory(void); +static void AtCleanup_Memory(void); +static void AtAbort_ResourceOwner(void); +static void AtCommit_LocalCache(void); +static void AtCommit_Memory(void); +static void AtStart_Cache(void); +static void AtStart_Memory(void); +static void AtStart_ResourceOwner(void); +static void CallXactCallbacks(XactEvent event); +static void CallSubXactCallbacks(SubXactEvent event, + SubTransactionId mySubid, + SubTransactionId parentSubid); +static void CleanupTransaction(void); +static void CommitTransaction(void); +static TransactionId RecordTransactionAbort(bool isSubXact); +static void StartTransaction(void); + +static void RecordSubTransactionCommit(void); +static void StartSubTransaction(void); +static void CommitSubTransaction(void); +static void AbortSubTransaction(void); +static void CleanupSubTransaction(void); +static void PushTransaction(void); +static void PopTransaction(void); + +static void AtSubAbort_Memory(void); +static void AtSubCleanup_Memory(void); +static void AtSubAbort_ResourceOwner(void); +static void AtSubCommit_Memory(void); +static void AtSubStart_Memory(void); +static void AtSubStart_ResourceOwner(void); + +static void ShowTransactionState(const char *str); +static void ShowTransactionStateRec(TransactionState state); +static const char *BlockStateAsString(TBlockState blockState); +static const char *TransStateAsString(TransState state); /* ---------------------------------------------------------------- @@ -223,32 +281,22 @@ static void *_RollbackData = NULL; /* * IsTransactionState * - * This returns true if we are currently running a query - * within an executing transaction. + * This returns true if we are inside a valid transaction; that is, + * it is safe to initiate database access, take heavyweight locks, etc. */ bool IsTransactionState(void) { TransactionState s = CurrentTransactionState; - switch (s->state) - { - case TRANS_DEFAULT: - return false; - case TRANS_START: - return true; - case TRANS_INPROGRESS: - return true; - case TRANS_COMMIT: - return true; - case TRANS_ABORT: - return true; - } - /* - * Shouldn't get here, but lint is not happy with this... + * TRANS_DEFAULT and TRANS_ABORT are obviously unsafe states. However, we + * also reject the startup/shutdown states TRANS_START, TRANS_COMMIT, + * TRANS_PREPARE since it might be too soon or too late within those + * transition states to do anything interesting. Hence, the only "valid" + * state is TRANS_INPROGRESS. */ - return false; + return (s->state == TRANS_INPROGRESS); } /* @@ -273,60 +321,200 @@ IsAbortedTransactionBlockState(void) /* * GetTopTransactionId * - * Get the ID of the main transaction, even if we are currently inside - * a subtransaction. + * This will return the XID of the main transaction, assigning one if + * it's not yet set. Be careful to call this only inside a valid xact. */ TransactionId GetTopTransactionId(void) { - return TopTransactionStateData.transactionIdData; + if (!TransactionIdIsValid(TopTransactionStateData.transactionId)) + AssignTransactionId(&TopTransactionStateData); + return TopTransactionStateData.transactionId; } +/* + * GetTopTransactionIdIfAny + * + * This will return the XID of the main transaction, if one is assigned. + * It will return InvalidTransactionId if we are not currently inside a + * transaction, or inside a transaction that hasn't yet been assigned an XID. + */ +TransactionId +GetTopTransactionIdIfAny(void) +{ + return TopTransactionStateData.transactionId; +} /* * GetCurrentTransactionId + * + * This will return the XID of the current transaction (main or sub + * transaction), assigning one if it's not yet set. Be careful to call this + * only inside a valid xact. */ TransactionId GetCurrentTransactionId(void) { TransactionState s = CurrentTransactionState; - return s->transactionIdData; + if (!TransactionIdIsValid(s->transactionId)) + AssignTransactionId(s); + return s->transactionId; +} + +/* + * GetCurrentTransactionIdIfAny + * + * This will return the XID of the current sub xact, if one is assigned. + * It will return InvalidTransactionId if we are not currently inside a + * transaction, or inside a transaction that hasn't been assigned an XID yet. + */ +TransactionId +GetCurrentTransactionIdIfAny(void) +{ + return CurrentTransactionState->transactionId; } /* - * GetCurrentCommandId + * AssignTransactionId + * + * Assigns a new permanent XID to the given TransactionState. + * We do not assign XIDs to transactions until/unless this is called. + * Also, any parent TransactionStates that don't yet have XIDs are assigned + * one; this maintains the invariant that a child transaction has an XID + * following its parent's. */ -CommandId -GetCurrentCommandId(void) +static void +AssignTransactionId(TransactionState s) +{ + bool isSubXact = (s->parent != NULL); + ResourceOwner currentOwner; + + /* Assert that caller didn't screw up */ + Assert(!TransactionIdIsValid(s->transactionId)); + Assert(s->state == TRANS_INPROGRESS); + + /* + * Ensure parent(s) have XIDs, so that a child always has an XID later + * than its parent. + */ + if (isSubXact && !TransactionIdIsValid(s->parent->transactionId)) + AssignTransactionId(s->parent); + + /* + * Generate a new Xid and record it in PG_PROC and pg_subtrans. + * + * NB: we must make the subtrans entry BEFORE the Xid appears anywhere in + * shared storage other than PG_PROC; because if there's no room for it in + * PG_PROC, the subtrans entry is needed to ensure that other backends see + * the Xid as "running". See GetNewTransactionId. + */ + s->transactionId = GetNewTransactionId(isSubXact); + + if (isSubXact) + SubTransSetParent(s->transactionId, s->parent->transactionId); + + /* + * Acquire lock on the transaction XID. (We assume this cannot block.) We + * have to ensure that the lock is assigned to the transaction's own + * ResourceOwner. + */ + currentOwner = CurrentResourceOwner; + PG_TRY(); + { + CurrentResourceOwner = s->curTransactionOwner; + XactLockTableInsert(s->transactionId); + } + PG_CATCH(); + { + /* Ensure CurrentResourceOwner is restored on error */ + CurrentResourceOwner = currentOwner; + PG_RE_THROW(); + } + PG_END_TRY(); + CurrentResourceOwner = currentOwner; +} + + +/* + * GetCurrentSubTransactionId + */ +SubTransactionId +GetCurrentSubTransactionId(void) { TransactionState s = CurrentTransactionState; - return s->commandId; + return s->subTransactionId; +} + + +/* + * GetCurrentCommandId + * + * "used" must be TRUE if the caller intends to use the command ID to mark + * inserted/updated/deleted tuples. FALSE means the ID is being fetched + * for read-only purposes (ie, as a snapshot validity cutoff). See + * CommandCounterIncrement() for discussion. + */ +CommandId +GetCurrentCommandId(bool used) +{ + /* this is global to a transaction, not subtransaction-local */ + if (used) + currentCommandIdUsed = true; + return currentCommandId; } +/* + * GetCurrentTransactionStartTimestamp + */ +TimestampTz +GetCurrentTransactionStartTimestamp(void) +{ + return xactStartTimestamp; +} /* - * GetCurrentTransactionStartTime + * GetCurrentStatementStartTimestamp */ -AbsoluteTime -GetCurrentTransactionStartTime(void) +TimestampTz +GetCurrentStatementStartTimestamp(void) { - return xactStartTime; + return stmtStartTimestamp; } +/* + * GetCurrentTransactionStopTimestamp + * + * We return current time if the transaction stop time hasn't been set + * (which can happen if we decide we don't need to log an XLOG record). + */ +TimestampTz +GetCurrentTransactionStopTimestamp(void) +{ + if (xactStopTimestamp != 0) + return xactStopTimestamp; + return GetCurrentTimestamp(); +} /* - * GetCurrentTransactionStartTimeUsec + * SetCurrentStatementStartTimestamp */ -AbsoluteTime -GetCurrentTransactionStartTimeUsec(int *msec) +void +SetCurrentStatementStartTimestamp(void) { - *msec = xactStartTimeUsec; - return xactStartTime; + stmtStartTimestamp = GetCurrentTimestamp(); } +/* + * SetCurrentTransactionStopTimestamp + */ +static inline void +SetCurrentTransactionStopTimestamp(void) +{ + xactStopTimestamp = GetCurrentTimestamp(); +} /* * GetCurrentTransactionNestLevel @@ -345,42 +533,61 @@ GetCurrentTransactionNestLevel(void) /* * TransactionIdIsCurrentTransactionId - * - * During bootstrap, we cheat and say "it's not my transaction ID" even though - * it is. Along with transam.c's cheat to say that the bootstrap XID is - * already committed, this causes the tqual.c routines to see previously - * inserted tuples as committed, which is what we need during bootstrap. */ bool TransactionIdIsCurrentTransactionId(TransactionId xid) { TransactionState s; - if (AMI_OVERRIDE) - { - Assert(xid == BootstrapTransactionId); + /* + * We always say that BootstrapTransactionId is "not my transaction ID" + * even when it is (ie, during bootstrap). Along with the fact that + * transam.c always treats BootstrapTransactionId as already committed, + * this causes the tqual.c routines to see all tuples as committed, which + * is what we need during bootstrap. (Bootstrap mode only inserts tuples, + * it never updates or deletes them, so all tuples can be presumed good + * immediately.) + * + * Likewise, InvalidTransactionId and FrozenTransactionId are certainly + * not my transaction ID, so we can just return "false" immediately for + * any non-normal XID. + */ + if (!TransactionIdIsNormal(xid)) return false; - } /* - * We will return true for the Xid of the current subtransaction, any - * of its subcommitted children, any of its parents, or any of their - * previously subcommitted children. However, a transaction being - * aborted is no longer "current", even though it may still have an - * entry on the state stack. + * We will return true for the Xid of the current subtransaction, any of + * its subcommitted children, any of its parents, or any of their + * previously subcommitted children. However, a transaction being aborted + * is no longer "current", even though it may still have an entry on the + * state stack. */ for (s = CurrentTransactionState; s != NULL; s = s->parent) { - ListCell *cell; + int low, high; if (s->state == TRANS_ABORT) continue; - if (TransactionIdEquals(xid, s->transactionIdData)) + if (!TransactionIdIsValid(s->transactionId)) + continue; /* it can't have any child XIDs either */ + if (TransactionIdEquals(xid, s->transactionId)) return true; - foreach(cell, s->childXids) + /* As the childXids array is ordered, we can use binary search */ + low = 0; + high = s->nChildXids - 1; + while (low <= high) { - if (TransactionIdEquals(xid, lfirst_xid(cell))) + int middle; + TransactionId probe; + + middle = low + (high - low) / 2; + probe = s->childXids[middle]; + if (TransactionIdEquals(probe, xid)) return true; + else if (TransactionIdPrecedes(probe, xid)) + low = middle + 1; + else + high = middle - 1; } } @@ -394,27 +601,65 @@ TransactionIdIsCurrentTransactionId(TransactionId xid) void CommandCounterIncrement(void) { - TransactionState s = CurrentTransactionState; + /* + * If the current value of the command counter hasn't been "used" to + * mark tuples, we need not increment it, since there's no need to + * distinguish a read-only command from others. This helps postpone + * command counter overflow, and keeps no-op CommandCounterIncrement + * operations cheap. + */ + if (currentCommandIdUsed) + { + currentCommandId += 1; + if (currentCommandId == FirstCommandId) /* check for overflow */ + { + currentCommandId -= 1; + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("cannot have more than 2^32-1 commands in a transaction"))); + } + currentCommandIdUsed = false; - s->commandId += 1; - if (s->commandId == FirstCommandId) /* check for overflow */ - ereport(ERROR, - (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), - errmsg("cannot have more than 2^32-1 commands in a transaction"))); + /* Propagate new command ID into static snapshots, if set */ + if (SerializableSnapshot) + SerializableSnapshot->curcid = currentCommandId; + if (LatestSnapshot) + LatestSnapshot->curcid = currentCommandId; - /* Propagate new command ID into query snapshots, if set */ - if (QuerySnapshot) - QuerySnapshot->curcid = s->commandId; - if (SerializableSnapshot) - SerializableSnapshot->curcid = s->commandId; + /* + * Make any catalog changes done by the just-completed command + * visible in the local syscache. We obviously don't need to do + * this after a read-only command. (But see hacks in inval.c + * to make real sure we don't think a command that queued inval + * messages was read-only.) + */ + AtCommit_LocalCache(); + } /* - * make cache changes visible to me. + * Make any other backends' catalog changes visible to me. + * + * XXX this is probably in the wrong place: CommandCounterIncrement + * should be purely a local operation, most likely. However fooling + * with this will affect asynchronous cross-backend interactions, + * which doesn't seem like a wise thing to do in late beta, so save + * improving this for another day - tgl 2007-11-30 */ - AtCommit_LocalCache(); AtStart_Cache(); } +/* + * ForceSyncCommit + * + * Interface routine to allow commands to force a synchronous commit of the + * current top-level transaction + */ +void +ForceSyncCommit(void) +{ + forceSyncCommit = true; +} + /* ---------------------------------------------------------------- * StartTransaction stuff @@ -438,6 +683,21 @@ AtStart_Memory(void) { TransactionState s = CurrentTransactionState; + /* + * If this is the first time through, create a private context for + * AbortTransaction to work in. By reserving some space now, we can + * insulate AbortTransaction from out-of-memory scenarios. Like + * ErrorContext, we set it up with slow growth rate and a nonzero minimum + * size, so that space will be reserved immediately. + */ + if (TransactionAbortContext == NULL) + TransactionAbortContext = + AllocSetContextCreate(TopMemoryContext, + "TransactionAbortContext", + 32 * 1024, + 32 * 1024, + 32 * 1024); + /* * We shouldn't have a transaction context already. */ @@ -503,16 +763,15 @@ AtSubStart_Memory(void) Assert(CurTransactionContext != NULL); /* - * Create a CurTransactionContext, which will be used to hold data - * that survives subtransaction commit but disappears on - * subtransaction abort. We make it a child of the immediate parent's - * CurTransactionContext. + * Create a CurTransactionContext, which will be used to hold data that + * survives subtransaction commit but disappears on subtransaction abort. + * We make it a child of the immediate parent's CurTransactionContext. */ CurTransactionContext = AllocSetContextCreate(CurTransactionContext, "CurTransactionContext", - ALLOCSET_DEFAULT_MINSIZE, - ALLOCSET_DEFAULT_INITSIZE, - ALLOCSET_DEFAULT_MAXSIZE); + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); s->curTransactionContext = CurTransactionContext; /* Make the CurTransactionContext active. */ @@ -530,8 +789,8 @@ AtSubStart_ResourceOwner(void) Assert(s->parent != NULL); /* - * Create a resource owner for the subtransaction. We make it a child - * of the immediate parent's resource owner. + * Create a resource owner for the subtransaction. We make it a child of + * the immediate parent's resource owner. */ s->curTransactionOwner = ResourceOwnerCreate(s->parent->curTransactionOwner, @@ -548,168 +807,203 @@ AtSubStart_ResourceOwner(void) /* * RecordTransactionCommit + * + * Returns latest XID among xact and its children, or InvalidTransactionId + * if the xact has no XID. (We compute that here just because it's easier.) + * + * This is exported only to support an ugly hack in VACUUM FULL. */ -void +TransactionId RecordTransactionCommit(void) { + TransactionId xid = GetTopTransactionIdIfAny(); + bool markXidCommitted = TransactionIdIsValid(xid); + TransactionId latestXid = InvalidTransactionId; int nrels; - RelFileNode *rptr; + RelFileNode *rels; + bool haveNonTemp; int nchildren; TransactionId *children; /* Get data needed for commit record */ - nrels = smgrGetPendingDeletes(true, &rptr); + nrels = smgrGetPendingDeletes(true, &rels, &haveNonTemp); nchildren = xactGetCommittedChildren(&children); /* - * If we made neither any XLOG entries nor any temp-rel updates, and - * have no files to be deleted, we can omit recording the transaction - * commit at all. (This test includes the effects of subtransactions, - * so the presence of committed subxacts need not alone force a - * write.) + * If we haven't been assigned an XID yet, we neither can, nor do we want + * to write a COMMIT record. */ - if (MyXactMadeXLogEntry || MyXactMadeTempRelUpdate || nrels > 0) + if (!markXidCommitted) { - TransactionId xid = GetCurrentTransactionId(); - bool madeTCentries; - XLogRecPtr recptr; + /* + * We expect that every smgrscheduleunlink is followed by a catalog + * update, and hence XID assignment, so we shouldn't get here with any + * pending deletes. Use a real test not just an Assert to check this, + * since it's a bit fragile. + */ + if (nrels != 0) + elog(ERROR, "cannot commit a transaction that deleted files but has no xid"); + + /* Can't have child XIDs either; AssignTransactionId enforces this */ + Assert(nchildren == 0); + + /* + * If we didn't create XLOG entries, we're done here; otherwise we + * should flush those entries the same as a commit record. (An + * example of a possible record that wouldn't cause an XID to be + * assigned is a sequence advance record due to nextval() --- we want + * to flush that to disk before reporting commit.) + */ + if (XactLastRecEnd.xrecoff == 0) + goto cleanup; + } + else + { + /* + * Begin commit critical section and insert the commit XLOG record. + */ + XLogRecData rdata[3]; + int lastrdata = 0; + xl_xact_commit xlrec; /* Tell bufmgr and smgr to prepare for commit */ BufmgrCommit(); - START_CRIT_SECTION(); - /* - * If our transaction made any transaction-controlled XLOG - * entries, we need to lock out checkpoint start between writing - * our XLOG record and updating pg_clog. Otherwise it is possible - * for the checkpoint to set REDO after the XLOG record but fail - * to flush the pg_clog update to disk, leading to loss of the - * transaction commit if we crash a little later. Slightly klugy - * fix for problem discovered 2004-08-10. + * Mark ourselves as within our "commit critical section". This + * forces any concurrent checkpoint to wait until we've updated + * pg_clog. Without this, it is possible for the checkpoint to set + * REDO after the XLOG record but fail to flush the pg_clog update to + * disk, leading to loss of the transaction commit if the system + * crashes a little later. * - * (If it made no transaction-controlled XLOG entries, its XID - * appears nowhere in permanent storage, so no one else will ever - * care if it committed; so it doesn't matter if we lose the - * commit flag.) + * Note: we could, but don't bother to, set this flag in + * RecordTransactionAbort. That's because loss of a transaction abort + * is noncritical; the presumption would be that it aborted, anyway. * - * Note we only need a shared lock. + * It's safe to change the inCommit flag of our own backend without + * holding the ProcArrayLock, since we're the only one modifying it. + * This makes checkpoint's determination of which xacts are inCommit a + * bit fuzzy, but it doesn't matter. */ - madeTCentries = (MyLastRecPtr.xrecoff != 0); - if (madeTCentries) - LWLockAcquire(CheckpointStartLock, LW_SHARED); - - /* - * We only need to log the commit in XLOG if the transaction made - * any transaction-controlled XLOG entries or will delete files. - */ - if (madeTCentries || nrels > 0) + START_CRIT_SECTION(); + MyProc->inCommit = true; + + SetCurrentTransactionStopTimestamp(); + xlrec.xact_time = xactStopTimestamp; + xlrec.nrels = nrels; + xlrec.nsubxacts = nchildren; + rdata[0].data = (char *) (&xlrec); + rdata[0].len = MinSizeOfXactCommit; + rdata[0].buffer = InvalidBuffer; + /* dump rels to delete */ + if (nrels > 0) { - XLogRecData rdata[3]; - int lastrdata = 0; - xl_xact_commit xlrec; - - xlrec.xtime = time(NULL); - xlrec.nrels = nrels; - xlrec.nsubxacts = nchildren; - rdata[0].buffer = InvalidBuffer; - rdata[0].data = (char *) (&xlrec); - rdata[0].len = MinSizeOfXactCommit; - /* dump rels to delete */ - if (nrels > 0) - { - rdata[0].next = &(rdata[1]); - rdata[1].buffer = InvalidBuffer; - rdata[1].data = (char *) rptr; - rdata[1].len = nrels * sizeof(RelFileNode); - lastrdata = 1; - } - /* dump committed child Xids */ - if (nchildren > 0) - { - rdata[lastrdata].next = &(rdata[2]); - rdata[2].buffer = InvalidBuffer; - rdata[2].data = (char *) children; - rdata[2].len = nchildren * sizeof(TransactionId); - lastrdata = 2; - } - rdata[lastrdata].next = NULL; - - recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT, rdata); + rdata[0].next = &(rdata[1]); + rdata[1].data = (char *) rels; + rdata[1].len = nrels * sizeof(RelFileNode); + rdata[1].buffer = InvalidBuffer; + lastrdata = 1; } - else + /* dump committed child Xids */ + if (nchildren > 0) { - /* Just flush through last record written by me */ - recptr = ProcLastRecEnd; + rdata[lastrdata].next = &(rdata[2]); + rdata[2].data = (char *) children; + rdata[2].len = nchildren * sizeof(TransactionId); + rdata[2].buffer = InvalidBuffer; + lastrdata = 2; } + rdata[lastrdata].next = NULL; + + (void) XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT, rdata); + } + /* + * Check if we want to commit asynchronously. If the user has set + * synchronous_commit = off, and we're not doing cleanup of any non-temp + * rels nor committing any command that wanted to force sync commit, then + * we can defer flushing XLOG. (We must not allow asynchronous commit if + * there are any non-temp tables to be deleted, because we might delete + * the files before the COMMIT record is flushed to disk. We do allow + * asynchronous commit if all to-be-deleted tables are temporary though, + * since they are lost anyway if we crash.) + */ + if (XactSyncCommit || forceSyncCommit || haveNonTemp) + { /* - * We must flush our XLOG entries to disk if we made any XLOG - * entries, whether in or out of transaction control. For - * example, if we reported a nextval() result to the client, this - * ensures that any XLOG record generated by nextval will hit the - * disk before we report the transaction committed. + * Synchronous commit case. * - * Note: if we generated a commit record above, MyXactMadeXLogEntry - * will certainly be set now. + * Sleep before flush! So we can flush more than one commit records + * per single fsync. (The idea is some other backend may do the + * XLogFlush while we're sleeping. This needs work still, because on + * most Unixen, the minimum select() delay is 10msec or more, which is + * way too long.) + * + * We do not sleep if enableFsync is not turned on, nor if there are + * fewer than CommitSiblings other backends with active transactions. */ - if (MyXactMadeXLogEntry) - { - /* - * Sleep before flush! So we can flush more than one commit - * records per single fsync. (The idea is some other backend - * may do the XLogFlush while we're sleeping. This needs work - * still, because on most Unixen, the minimum select() delay - * is 10msec or more, which is way too long.) - * - * We do not sleep if enableFsync is not turned on, nor if there - * are fewer than CommitSiblings other backends with active - * transactions. - */ - if (CommitDelay > 0 && enableFsync && - CountActiveBackends() >= CommitSiblings) - pg_usleep(CommitDelay); + if (CommitDelay > 0 && enableFsync && + CountActiveBackends() >= CommitSiblings) + pg_usleep(CommitDelay); - XLogFlush(recptr); - } + XLogFlush(XactLastRecEnd); /* - * We must mark the transaction committed in clog if its XID - * appears either in permanent rels or in local temporary rels. We - * test this by seeing if we made transaction-controlled entries - * *OR* local-rel tuple updates. Note that if we made only the - * latter, we have not emitted an XLOG record for our commit, and - * so in the event of a crash the clog update might be lost. This - * is okay because no one else will ever care whether we - * committed. + * Now we may update the CLOG, if we wrote a COMMIT record above */ - if (madeTCentries || MyXactMadeTempRelUpdate) + if (markXidCommitted) { TransactionIdCommit(xid); /* to avoid race conditions, the parent must commit first */ TransactionIdCommitTree(nchildren, children); } + } + else + { + /* + * Asynchronous commit case. + * + * Report the latest async commit LSN, so that the WAL writer knows to + * flush this commit. + */ + XLogSetAsyncCommitLSN(XactLastRecEnd); - /* Unlock checkpoint lock if we acquired it */ - if (madeTCentries) - LWLockRelease(CheckpointStartLock); + /* + * We must not immediately update the CLOG, since we didn't flush the + * XLOG. Instead, we store the LSN up to which the XLOG must be + * flushed before the CLOG may be updated. + */ + if (markXidCommitted) + { + TransactionIdAsyncCommit(xid, XactLastRecEnd); + /* to avoid race conditions, the parent must commit first */ + TransactionIdAsyncCommitTree(nchildren, children, XactLastRecEnd); + } + } + /* + * If we entered a commit critical section, leave it now, and let + * checkpoints proceed. + */ + if (markXidCommitted) + { + MyProc->inCommit = false; END_CRIT_SECTION(); } - /* Break the chain of back-links in the XLOG records I output */ - MyLastRecPtr.xrecoff = 0; - MyXactMadeXLogEntry = false; - MyXactMadeTempRelUpdate = false; + /* Compute latestXid while we have the child XIDs handy */ + latestXid = TransactionIdLatest(xid, nchildren, children); - /* Show myself as out of the transaction in PGPROC array */ - MyProc->logRec.xrecoff = 0; + /* Reset XactLastRecEnd until the next transaction writes something */ + XactLastRecEnd.xrecoff = 0; - /* And clean up local data */ - if (rptr) - pfree(rptr); - if (children) - pfree(children); +cleanup: + /* Clean up local data */ + if (rels) + pfree(rels); + + return latestXid; } @@ -732,9 +1026,8 @@ static void AtCommit_Memory(void) { /* - * Now that we're "out" of a transaction, have the system allocate - * things in the top memory context instead of per-transaction - * contexts. + * Now that we're "out" of a transaction, have the system allocate things + * in the top memory context instead of per-transaction contexts. */ MemoryContextSwitchTo(TopMemoryContext); @@ -755,9 +1048,6 @@ AtCommit_Memory(void) /* * AtSubCommit_Memory - * - * We do not throw away the child's CurTransactionContext, since the data - * it contains will be needed at upper commit. */ static void AtSubCommit_Memory(void) @@ -769,6 +1059,18 @@ AtSubCommit_Memory(void) /* Return to parent transaction level's memory context. */ CurTransactionContext = s->parent->curTransactionContext; MemoryContextSwitchTo(CurTransactionContext); + + /* + * Ordinarily we cannot throw away the child's CurTransactionContext, + * since the data it contains will be needed at upper commit. However, if + * there isn't actually anything in it, we can throw it away. This avoids + * a small memory leak in the common case of "trivial" subxacts. + */ + if (MemoryContextIsEmpty(s->curTransactionContext)) + { + MemoryContextDelete(s->curTransactionContext); + s->curTransactionContext = NULL; + } } /* @@ -780,19 +1082,79 @@ static void AtSubCommit_childXids(void) { TransactionState s = CurrentTransactionState; - MemoryContext old_cxt; + int new_nChildXids; Assert(s->parent != NULL); - old_cxt = MemoryContextSwitchTo(s->parent->curTransactionContext); + /* + * The parent childXids array will need to hold my XID and all my + * childXids, in addition to the XIDs already there. + */ + new_nChildXids = s->parent->nChildXids + s->nChildXids + 1; + + /* Allocate or enlarge the parent array if necessary */ + if (s->parent->maxChildXids < new_nChildXids) + { + int new_maxChildXids; + TransactionId *new_childXids; + + /* + * Make it 2x what's needed right now, to avoid having to enlarge it + * repeatedly. But we can't go above MaxAllocSize. (The latter + * limit is what ensures that we don't need to worry about integer + * overflow here or in the calculation of new_nChildXids.) + */ + new_maxChildXids = Min(new_nChildXids * 2, + (int) (MaxAllocSize / sizeof(TransactionId))); + + if (new_maxChildXids < new_nChildXids) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("maximum number of committed subtransactions (%d) exceeded", + (int) (MaxAllocSize / sizeof(TransactionId))))); - s->parent->childXids = lappend_xid(s->parent->childXids, - s->transactionIdData); + /* + * We keep the child-XID arrays in TopTransactionContext; this avoids + * setting up child-transaction contexts for what might be just a few + * bytes of grandchild XIDs. + */ + if (s->parent->childXids == NULL) + new_childXids = + MemoryContextAlloc(TopTransactionContext, + new_maxChildXids * sizeof(TransactionId)); + else + new_childXids = repalloc(s->parent->childXids, + new_maxChildXids * sizeof(TransactionId)); - s->parent->childXids = list_concat(s->parent->childXids, s->childXids); - s->childXids = NIL; /* ensure list not doubly referenced */ + s->parent->childXids = new_childXids; + s->parent->maxChildXids = new_maxChildXids; + } - MemoryContextSwitchTo(old_cxt); + /* + * Copy all my XIDs to parent's array. + * + * Note: We rely on the fact that the XID of a child always follows that + * of its parent. By copying the XID of this subtransaction before the + * XIDs of its children, we ensure that the array stays ordered. Likewise, + * all XIDs already in the array belong to subtransactions started and + * subcommitted before us, so their XIDs must precede ours. + */ + s->parent->childXids[s->parent->nChildXids] = s->transactionId; + + if (s->nChildXids > 0) + memcpy(&s->parent->childXids[s->parent->nChildXids + 1], + s->childXids, + s->nChildXids * sizeof(TransactionId)); + + s->parent->nChildXids = new_nChildXids; + + /* Release child's array to avoid leakage */ + if (s->childXids != NULL) + pfree(s->childXids); + /* We must reset these to avoid double-free if fail later in commit */ + s->childXids = NULL; + s->nChildXids = 0; + s->maxChildXids = 0; } /* @@ -801,23 +1163,20 @@ AtSubCommit_childXids(void) static void RecordSubTransactionCommit(void) { + TransactionId xid = GetCurrentTransactionIdIfAny(); + /* * We do not log the subcommit in XLOG; it doesn't matter until the * top-level transaction commits. * - * We must mark the subtransaction subcommitted in clog if its XID - * appears either in permanent rels or in local temporary rels. We - * test this by seeing if we made transaction-controlled entries *OR* - * local-rel tuple updates. (The test here actually covers the entire - * transaction tree so far, so it may mark subtransactions that don't - * really need it, but it's probably not worth being tenser. Note that - * if a prior subtransaction dirtied these variables, then - * RecordTransactionCommit will have to do the full pushup anyway...) + * We must mark the subtransaction subcommitted in the CLOG if it had a + * valid XID assigned. If it did not, nobody else will ever know about + * the existence of this subxact. We don't have to deal with deletions + * scheduled for on-commit here, since they'll be reassigned to our parent + * (who might still abort). */ - if (MyLastRecPtr.xrecoff != 0 || MyXactMadeTempRelUpdate) + if (TransactionIdIsValid(xid)) { - TransactionId xid = GetCurrentTransactionId(); - /* XXX does this really need to be a critical section? */ START_CRIT_SECTION(); @@ -835,118 +1194,133 @@ RecordSubTransactionCommit(void) /* * RecordTransactionAbort + * + * Returns latest XID among xact and its children, or InvalidTransactionId + * if the xact has no XID. (We compute that here just because it's easier.) */ -static void -RecordTransactionAbort(void) +static TransactionId +RecordTransactionAbort(bool isSubXact) { + TransactionId xid = GetCurrentTransactionIdIfAny(); + TransactionId latestXid; int nrels; - RelFileNode *rptr; + RelFileNode *rels; int nchildren; TransactionId *children; - - /* Get data needed for abort record */ - nrels = smgrGetPendingDeletes(false, &rptr); - nchildren = xactGetCommittedChildren(&children); + XLogRecData rdata[3]; + int lastrdata = 0; + xl_xact_abort xlrec; /* - * If we made neither any transaction-controlled XLOG entries nor any - * temp-rel updates, and are not going to delete any files, we can - * omit recording the transaction abort at all. No one will ever care - * that it aborted. (These tests cover our whole transaction tree.) + * If we haven't been assigned an XID, nobody will care whether we aborted + * or not. Hence, we're done in that case. It does not matter if we have + * rels to delete (note that this routine is not responsible for actually + * deleting 'em). We cannot have any child XIDs, either. */ - if (MyLastRecPtr.xrecoff != 0 || MyXactMadeTempRelUpdate || nrels > 0) + if (!TransactionIdIsValid(xid)) { - TransactionId xid = GetCurrentTransactionId(); + /* Reset XactLastRecEnd until the next transaction writes something */ + if (!isSubXact) + XactLastRecEnd.xrecoff = 0; + return InvalidTransactionId; + } - /* - * Catch the scenario where we aborted partway through - * RecordTransactionCommit ... - */ - if (TransactionIdDidCommit(xid)) - elog(PANIC, "cannot abort transaction %u, it was already committed", xid); + /* + * We have a valid XID, so we should write an ABORT record for it. + * + * We do not flush XLOG to disk here, since the default assumption after a + * crash would be that we aborted, anyway. For the same reason, we don't + * need to worry about interlocking against checkpoint start. + */ - START_CRIT_SECTION(); + /* + * Check that we haven't aborted halfway through RecordTransactionCommit. + */ + if (TransactionIdDidCommit(xid)) + elog(PANIC, "cannot abort transaction %u, it was already committed", + xid); - /* - * We only need to log the abort in XLOG if the transaction made - * any transaction-controlled XLOG entries or will delete files. - * (If it made no transaction-controlled XLOG entries, its XID - * appears nowhere in permanent storage, so no one else will ever - * care if it committed.) - * - * We do not flush XLOG to disk unless deleting files, since the - * default assumption after a crash would be that we aborted, - * anyway. For the same reason, we don't need to worry about - * interlocking against checkpoint start. - */ - if (MyLastRecPtr.xrecoff != 0 || nrels > 0) - { - XLogRecData rdata[3]; - int lastrdata = 0; - xl_xact_abort xlrec; - XLogRecPtr recptr; - - xlrec.xtime = time(NULL); - xlrec.nrels = nrels; - xlrec.nsubxacts = nchildren; - rdata[0].buffer = InvalidBuffer; - rdata[0].data = (char *) (&xlrec); - rdata[0].len = MinSizeOfXactAbort; - /* dump rels to delete */ - if (nrels > 0) - { - rdata[0].next = &(rdata[1]); - rdata[1].buffer = InvalidBuffer; - rdata[1].data = (char *) rptr; - rdata[1].len = nrels * sizeof(RelFileNode); - lastrdata = 1; - } - /* dump committed child Xids */ - if (nchildren > 0) - { - rdata[lastrdata].next = &(rdata[2]); - rdata[2].buffer = InvalidBuffer; - rdata[2].data = (char *) children; - rdata[2].len = nchildren * sizeof(TransactionId); - lastrdata = 2; - } - rdata[lastrdata].next = NULL; + /* Fetch the data we need for the abort record */ + nrels = smgrGetPendingDeletes(false, &rels, NULL); + nchildren = xactGetCommittedChildren(&children); - recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_ABORT, rdata); + /* XXX do we really need a critical section here? */ + START_CRIT_SECTION(); - /* Must flush if we are deleting files... */ - if (nrels > 0) - XLogFlush(recptr); - } + /* Write the ABORT record */ + if (isSubXact) + xlrec.xact_time = GetCurrentTimestamp(); + else + { + SetCurrentTransactionStopTimestamp(); + xlrec.xact_time = xactStopTimestamp; + } + xlrec.nrels = nrels; + xlrec.nsubxacts = nchildren; + rdata[0].data = (char *) (&xlrec); + rdata[0].len = MinSizeOfXactAbort; + rdata[0].buffer = InvalidBuffer; + /* dump rels to delete */ + if (nrels > 0) + { + rdata[0].next = &(rdata[1]); + rdata[1].data = (char *) rels; + rdata[1].len = nrels * sizeof(RelFileNode); + rdata[1].buffer = InvalidBuffer; + lastrdata = 1; + } + /* dump committed child Xids */ + if (nchildren > 0) + { + rdata[lastrdata].next = &(rdata[2]); + rdata[2].data = (char *) children; + rdata[2].len = nchildren * sizeof(TransactionId); + rdata[2].buffer = InvalidBuffer; + lastrdata = 2; + } + rdata[lastrdata].next = NULL; - /* - * Mark the transaction aborted in clog. This is not absolutely - * necessary but we may as well do it while we are here. - * - * The ordering here isn't critical but it seems best to mark the - * parent first. This assures an atomic transition of all the - * subtransactions to aborted state from the point of view of - * concurrent TransactionIdDidAbort calls. - */ - TransactionIdAbort(xid); - TransactionIdAbortTree(nchildren, children); + (void) XLogInsert(RM_XACT_ID, XLOG_XACT_ABORT, rdata); - END_CRIT_SECTION(); - } + /* + * Mark the transaction aborted in clog. This is not absolutely necessary + * but we may as well do it while we are here; also, in the subxact case + * it is helpful because XactLockTableWait makes use of it to avoid + * waiting for already-aborted subtransactions. It is OK to do it without + * having flushed the ABORT record to disk, because in event of a crash + * we'd be assumed to have aborted anyway. + * + * The ordering here isn't critical but it seems best to mark the parent + * first. This assures an atomic transition of all the subtransactions to + * aborted state from the point of view of concurrent + * TransactionIdDidAbort calls. + */ + TransactionIdAbort(xid); + TransactionIdAbortTree(nchildren, children); + + END_CRIT_SECTION(); + + /* Compute latestXid while we have the child XIDs handy */ + latestXid = TransactionIdLatest(xid, nchildren, children); - /* Break the chain of back-links in the XLOG records I output */ - MyLastRecPtr.xrecoff = 0; - MyXactMadeXLogEntry = false; - MyXactMadeTempRelUpdate = false; + /* + * If we're aborting a subtransaction, we can immediately remove failed + * XIDs from PGPROC's cache of running child XIDs. We do that here for + * subxacts, because we already have the child XID array at hand. For + * main xacts, the equivalent happens just after this function returns. + */ + if (isSubXact) + XidCacheRemoveRunningXids(xid, nchildren, children, latestXid); - /* Show myself as out of the transaction in PGPROC array */ - MyProc->logRec.xrecoff = 0; + /* Reset XactLastRecEnd until the next transaction writes something */ + if (!isSubXact) + XactLastRecEnd.xrecoff = 0; /* And clean up local data */ - if (rptr) - pfree(rptr); - if (children) - pfree(children); + if (rels) + pfree(rels); + + return latestXid; } /* @@ -956,136 +1330,75 @@ static void AtAbort_Memory(void) { /* - * Make sure we are in a valid context (not a child of - * TopTransactionContext...). Note that it is possible for this code - * to be called when we aren't in a transaction at all; go directly to - * TopMemoryContext in that case. + * Switch into TransactionAbortContext, which should have some free space + * even if nothing else does. We'll work in this context until we've + * finished cleaning up. + * + * It is barely possible to get here when we've not been able to create + * TransactionAbortContext yet; if so use TopMemoryContext. */ - if (TopTransactionContext != NULL) - { - MemoryContextSwitchTo(TopTransactionContext); - - /* - * We do not want to destroy the transaction's global state yet, - * so we can't free any memory here. - */ - } + if (TransactionAbortContext != NULL) + MemoryContextSwitchTo(TransactionAbortContext); else MemoryContextSwitchTo(TopMemoryContext); } - /* * AtSubAbort_Memory */ static void AtSubAbort_Memory(void) { - Assert(TopTransactionContext != NULL); + Assert(TransactionAbortContext != NULL); - MemoryContextSwitchTo(TopTransactionContext); + MemoryContextSwitchTo(TransactionAbortContext); } + /* - * RecordSubTransactionAbort + * AtAbort_ResourceOwner */ static void -RecordSubTransactionAbort(void) +AtAbort_ResourceOwner(void) { - int nrels; - RelFileNode *rptr; - TransactionId xid = GetCurrentTransactionId(); - int nchildren; - TransactionId *children; - - /* Get data needed for abort record */ - nrels = smgrGetPendingDeletes(false, &rptr); - nchildren = xactGetCommittedChildren(&children); - /* - * If we made neither any transaction-controlled XLOG entries nor any - * temp-rel updates, and are not going to delete any files, we can - * omit recording the transaction abort at all. No one will ever care - * that it aborted. (These tests cover our whole transaction tree, - * and therefore may mark subxacts that don't really need it, but it's - * probably not worth being tenser.) - * - * In this case we needn't worry about marking subcommitted children as - * aborted, because they didn't mark themselves as subcommitted in the - * first place; see the optimization in RecordSubTransactionCommit. + * Make sure we have a valid ResourceOwner, if possible (else it will be + * NULL, which is OK) */ - if (MyLastRecPtr.xrecoff != 0 || MyXactMadeTempRelUpdate || nrels > 0) - { - START_CRIT_SECTION(); - - /* - * We only need to log the abort in XLOG if the transaction made - * any transaction-controlled XLOG entries or will delete files. - */ - if (MyLastRecPtr.xrecoff != 0 || nrels > 0) - { - XLogRecData rdata[3]; - int lastrdata = 0; - xl_xact_abort xlrec; - XLogRecPtr recptr; - - xlrec.xtime = time(NULL); - xlrec.nrels = nrels; - xlrec.nsubxacts = nchildren; - rdata[0].buffer = InvalidBuffer; - rdata[0].data = (char *) (&xlrec); - rdata[0].len = MinSizeOfXactAbort; - /* dump rels to delete */ - if (nrels > 0) - { - rdata[0].next = &(rdata[1]); - rdata[1].buffer = InvalidBuffer; - rdata[1].data = (char *) rptr; - rdata[1].len = nrels * sizeof(RelFileNode); - lastrdata = 1; - } - /* dump committed child Xids */ - if (nchildren > 0) - { - rdata[lastrdata].next = &(rdata[2]); - rdata[2].buffer = InvalidBuffer; - rdata[2].data = (char *) children; - rdata[2].len = nchildren * sizeof(TransactionId); - lastrdata = 2; - } - rdata[lastrdata].next = NULL; + CurrentResourceOwner = TopTransactionResourceOwner; +} - recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_ABORT, rdata); +/* + * AtSubAbort_ResourceOwner + */ +static void +AtSubAbort_ResourceOwner(void) +{ + TransactionState s = CurrentTransactionState; - /* Must flush if we are deleting files... */ - if (nrels > 0) - XLogFlush(recptr); - } + /* Make sure we have a valid ResourceOwner */ + CurrentResourceOwner = s->curTransactionOwner; +} - /* - * Mark the transaction aborted in clog. This is not absolutely - * necessary but we may as well do it while we are here. - */ - TransactionIdAbort(xid); - TransactionIdAbortTree(nchildren, children); - END_CRIT_SECTION(); - } +/* + * AtSubAbort_childXids + */ +static void +AtSubAbort_childXids(void) +{ + TransactionState s = CurrentTransactionState; /* - * We can immediately remove failed XIDs from PGPROC's cache of - * running child XIDs. It's easiest to do it here while we have the - * child XID array at hand, even though in the main-transaction case - * the equivalent work happens just after return from - * RecordTransactionAbort. + * We keep the child-XID arrays in TopTransactionContext (see + * AtSubCommit_childXids). This means we'd better free the array + * explicitly at abort to avoid leakage. */ - XidCacheRemoveRunningXids(xid, nchildren, children); - - /* And clean up local data */ - if (rptr) - pfree(rptr); - if (children) - pfree(children); + if (s->childXids != NULL) + pfree(s->childXids); + s->childXids = NULL; + s->nChildXids = 0; + s->maxChildXids = 0; } /* ---------------------------------------------------------------- @@ -1099,14 +1412,19 @@ RecordSubTransactionAbort(void) static void AtCleanup_Memory(void) { + Assert(CurrentTransactionState->parent == NULL); + /* - * Now that we're "out" of a transaction, have the system allocate - * things in the top memory context instead of per-transaction - * contexts. + * Now that we're "out" of a transaction, have the system allocate things + * in the top memory context instead of per-transaction contexts. */ MemoryContextSwitchTo(TopMemoryContext); - Assert(CurrentTransactionState->parent == NULL); + /* + * Clear the special abort context for next time. + */ + if (TransactionAbortContext != NULL) + MemoryContextResetAndDeleteChildren(TransactionAbortContext); /* * Release all transaction-local memory. @@ -1139,11 +1457,19 @@ AtSubCleanup_Memory(void) CurTransactionContext = s->parent->curTransactionContext; /* - * Delete the subxact local memory contexts. Its CurTransactionContext - * can go too (note this also kills CurTransactionContexts from any - * children of the subxact). + * Clear the special abort context for next time. */ - MemoryContextDelete(s->curTransactionContext); + if (TransactionAbortContext != NULL) + MemoryContextResetAndDeleteChildren(TransactionAbortContext); + + /* + * Delete the subxact local memory contexts. Its CurTransactionContext can + * go too (note this also kills CurTransactionContexts from any children + * of the subxact). + */ + if (s->curTransactionContext) + MemoryContextDelete(s->curTransactionContext); + s->curTransactionContext = NULL; } /* ---------------------------------------------------------------- @@ -1157,7 +1483,14 @@ AtSubCleanup_Memory(void) static void StartTransaction(void) { - TransactionState s = CurrentTransactionState; + TransactionState s; + VirtualTransactionId vxid; + + /* + * Let's just make sure the state stack is empty + */ + s = &TopTransactionStateData; + CurrentTransactionState = s; /* * check the current transaction state @@ -1171,14 +1504,24 @@ StartTransaction(void) * start processing */ s->state = TRANS_START; + s->transactionId = InvalidTransactionId; /* until assigned */ /* - * Make sure we've freed any old snapshot, and reset xact state - * variables + * Make sure we've freed any old snapshot, and reset xact state variables */ FreeXactSnapshot(); XactIsoLevel = DefaultXactIsoLevel; XactReadOnly = DefaultXactReadOnly; + forceSyncCommit = false; + MyXactAccessedTempRel = false; + + /* + * reinitialize within-transaction counters + */ + s->subTransactionId = TopSubTransactionId; + currentSubTransactionId = TopSubTransactionId; + currentCommandId = FirstCommandId; + currentCommandIdUsed = false; /* * must initialize resource-management stuff first @@ -1187,39 +1530,57 @@ StartTransaction(void) AtStart_ResourceOwner(); /* - * generate a new transaction id + * Assign a new LocalTransactionId, and combine it with the backendId to + * form a virtual transaction id. */ - s->transactionIdData = GetNewTransactionId(false); + vxid.backendId = MyBackendId; + vxid.localTransactionId = GetNextLocalTransactionId(); - XactLockTableInsert(s->transactionIdData); + /* + * Lock the virtual transaction id before we announce it in the proc array + */ + VirtualXactLockTableInsert(vxid); /* - * set now() + * Advertise it in the proc array. We assume assignment of + * LocalTransactionID is atomic, and the backendId should be set already. */ - xactStartTime = GetCurrentAbsoluteTimeUsec(&(xactStartTimeUsec)); + Assert(MyProc->backendId == vxid.backendId); + MyProc->lxid = vxid.localTransactionId; + + TRACE_POSTGRESQL_TRANSACTION_START(vxid.localTransactionId); /* - * initialize current transaction state fields + * set transaction_timestamp() (a/k/a now()). We want this to be the same + * as the first command's statement_timestamp(), so don't do a fresh + * GetCurrentTimestamp() call (which'd be expensive anyway). Also, mark + * xactStopTimestamp as unset. */ - s->commandId = FirstCommandId; - s->nestingLevel = 1; - s->childXids = NIL; + xactStartTimestamp = stmtStartTimestamp; + xactStopTimestamp = 0; + pgstat_report_xact_timestamp(xactStartTimestamp); /* - * You might expect to see "s->currentUser = GetUserId();" here, but - * you won't because it doesn't work during startup; the userid isn't - * set yet during a backend's first transaction start. We only use - * the currentUser field in sub-transaction state structs. + * initialize current transaction state fields * - * prevXactReadOnly is also valid only in sub-transactions. + * note: prevXactReadOnly is not used at the outermost level */ + s->nestingLevel = 1; + s->gucNestLevel = 1; + s->childXids = NULL; + s->nChildXids = 0; + s->maxChildXids = 0; + GetUserIdAndContext(&s->prevUser, &s->prevSecDefCxt); + /* SecurityDefinerContext should never be set outside a transaction */ + Assert(!s->prevSecDefCxt); /* * initialize other subsystems for new transaction */ + AtStart_GUC(); AtStart_Inval(); AtStart_Cache(); - DeferredTriggerBeginXact(); + AfterTriggerBeginXact(); /* * done with start processing, set current transaction state to "in @@ -1230,13 +1591,17 @@ StartTransaction(void) ShowTransactionState("StartTransaction"); } + /* * CommitTransaction + * + * NB: if you change this routine, better look at PrepareTransaction too! */ static void CommitTransaction(void) { TransactionState s = CurrentTransactionState; + TransactionId latestXid; ShowTransactionState("CommitTransaction"); @@ -1249,108 +1614,353 @@ CommitTransaction(void) Assert(s->parent == NULL); /* - * Tell the trigger manager that this transaction is about to be - * committed. He'll invoke all trigger deferred until XACT before we - * really start on committing the transaction. + * Do pre-commit processing (most of this stuff requires database access, + * and in fact could still cause an error...) + * + * It is possible for CommitHoldablePortals to invoke functions that queue + * deferred triggers, and it's also possible that triggers create holdable + * cursors. So we have to loop until there's nothing left to do. */ - DeferredTriggerEndXact(); + for (;;) + { + /* + * Fire all currently pending deferred triggers. + */ + AfterTriggerFireDeferred(); + + /* + * Convert any open holdable cursors into static portals. If there + * weren't any, we are done ... otherwise loop back to check if they + * queued deferred triggers. Lather, rinse, repeat. + */ + if (!CommitHoldablePortals()) + break; + } + + /* Now we can shut down the deferred-trigger manager */ + AfterTriggerEndXact(true); + + /* Close any open regular cursors */ + AtCommit_Portals(); /* - * Similarly, let ON COMMIT management do its thing before we start to - * commit. + * Let ON COMMIT management do its thing (must happen after closing + * cursors, to avoid dangling-reference problems) */ PreCommit_on_commit_actions(); + /* close large objects before lower-level cleanup */ + AtEOXact_LargeObject(true); + + /* NOTIFY commit must come before lower-level cleanup */ + AtCommit_Notify(); + + /* + * Update flat files if we changed pg_database, pg_authid or + * pg_auth_members. This should be the last step before commit. + */ + AtEOXact_UpdateFlatFiles(true); + /* Prevent cancel/die interrupt while cleaning up */ HOLD_INTERRUPTS(); /* * set the current transaction state information appropriately during - * the abort processing + * commit processing */ s->state = TRANS_COMMIT; /* - * Do pre-commit processing (most of this stuff requires database - * access, and in fact could still cause an error...) + * Here is where we really truly commit. + */ + latestXid = RecordTransactionCommit(); + + TRACE_POSTGRESQL_TRANSACTION_COMMIT(MyProc->lxid); + + /* + * Let others know about no transaction in progress by me. Note that this + * must be done _before_ releasing locks we hold and _after_ + * RecordTransactionCommit. */ + ProcArrayEndTransaction(MyProc, latestXid); - AtCommit_Portals(); + /* + * This is all post-commit cleanup. Note that if an error is raised here, + * it's too late to abort the transaction. This should be just + * noncritical resource releasing. + * + * The ordering of operations is not entirely random. The idea is: + * release resources visible to other backends (eg, files, buffer pins); + * then release locks; then release backend-local resources. We want to + * release locks at the point where any backend waiting for us will see + * our transaction as being fully cleaned up. + * + * Resources that can be associated with individual queries are handled by + * the ResourceOwner mechanism. The other calls here are for backend-wide + * state. + */ - /* close large objects before lower-level cleanup */ - AtEOXact_LargeObject(true); + CallXactCallbacks(XACT_EVENT_COMMIT); - /* NOTIFY commit must come before lower-level cleanup */ - AtCommit_Notify(); + ResourceOwnerRelease(TopTransactionResourceOwner, + RESOURCE_RELEASE_BEFORE_LOCKS, + true, true); + + /* Check we've released all buffer pins */ + AtEOXact_Buffers(true); - /* Update the flat password file if we changed pg_shadow or pg_group */ - /* This should be the last step before commit */ - AtEOXact_UpdatePasswordFile(true); + /* Clean up the relation cache */ + AtEOXact_RelationCache(true); /* - * Here is where we really truly commit. + * Make catalog changes visible to all backends. This has to happen after + * relcache references are dropped (see comments for + * AtEOXact_RelationCache), but before locks are released (if anyone is + * waiting for lock on a relation we've modified, we want them to know + * about the catalog change before they start using the relation). */ - RecordTransactionCommit(); + AtEOXact_Inval(true); /* - * Let others know about no transaction in progress by me. Note that - * this must be done _before_ releasing locks we hold and _after_ - * RecordTransactionCommit. + * Likewise, dropping of files deleted during the transaction is best done + * after releasing relcache and buffer pins. (This is not strictly + * necessary during commit, since such pins should have been released + * already, but this ordering is definitely critical during abort.) + */ + smgrDoPendingDeletes(true); + + AtEOXact_MultiXact(); + + ResourceOwnerRelease(TopTransactionResourceOwner, + RESOURCE_RELEASE_LOCKS, + true, true); + ResourceOwnerRelease(TopTransactionResourceOwner, + RESOURCE_RELEASE_AFTER_LOCKS, + true, true); + + /* Check we've released all catcache entries */ + AtEOXact_CatCache(true); + + AtEOXact_GUC(true, 1); + AtEOXact_SPI(true); + AtEOXact_xml(); + AtEOXact_on_commit_actions(true); + AtEOXact_Namespace(true); + /* smgrcommit already done */ + AtEOXact_Files(); + AtEOXact_ComboCid(); + AtEOXact_HashTables(true); + AtEOXact_PgStat(true); + pgstat_report_xact_timestamp(0); + + CurrentResourceOwner = NULL; + ResourceOwnerDelete(TopTransactionResourceOwner); + s->curTransactionOwner = NULL; + CurTransactionResourceOwner = NULL; + TopTransactionResourceOwner = NULL; + + AtCommit_Memory(); + + s->transactionId = InvalidTransactionId; + s->subTransactionId = InvalidSubTransactionId; + s->nestingLevel = 0; + s->gucNestLevel = 0; + s->childXids = NULL; + s->nChildXids = 0; + s->maxChildXids = 0; + + /* + * done with commit processing, set current transaction state back to + * default + */ + s->state = TRANS_DEFAULT; + + RESUME_INTERRUPTS(); +} + + +/* + * PrepareTransaction + * + * NB: if you change this routine, better look at CommitTransaction too! + */ +static void +PrepareTransaction(void) +{ + TransactionState s = CurrentTransactionState; + TransactionId xid = GetCurrentTransactionId(); + GlobalTransaction gxact; + TimestampTz prepared_at; + + ShowTransactionState("PrepareTransaction"); + + /* + * check the current transaction state + */ + if (s->state != TRANS_INPROGRESS) + elog(WARNING, "PrepareTransaction while in %s state", + TransStateAsString(s->state)); + Assert(s->parent == NULL); + + /* + * Do pre-commit processing (most of this stuff requires database access, + * and in fact could still cause an error...) * - * LWLockAcquire(SInvalLock) is required: UPDATE with xid 0 is blocked by - * xid 1' UPDATE, xid 1 is doing commit while xid 2 gets snapshot - if - * xid 2' GetSnapshotData sees xid 1 as running then it must see xid 0 - * as running as well or it will see two tuple versions - one deleted - * by xid 1 and one inserted by xid 0. See notes in GetSnapshotData. + * It is possible for PrepareHoldablePortals to invoke functions that + * queue deferred triggers, and it's also possible that triggers create + * holdable cursors. So we have to loop until there's nothing left to do. */ - if (MyProc != NULL) + for (;;) { - /* Lock SInvalLock because that's what GetSnapshotData uses. */ - LWLockAcquire(SInvalLock, LW_EXCLUSIVE); - MyProc->xid = InvalidTransactionId; - MyProc->xmin = InvalidTransactionId; - - /* Clear the subtransaction-XID cache too while holding the lock */ - MyProc->subxids.nxids = 0; - MyProc->subxids.overflowed = false; + /* + * Fire all currently pending deferred triggers. + */ + AfterTriggerFireDeferred(); - LWLockRelease(SInvalLock); + /* + * Convert any open holdable cursors into static portals. If there + * weren't any, we are done ... otherwise loop back to check if they + * queued deferred triggers. Lather, rinse, repeat. + */ + if (!PrepareHoldablePortals()) + break; } + /* Now we can shut down the deferred-trigger manager */ + AfterTriggerEndXact(true); + + /* Close any open regular cursors */ + AtCommit_Portals(); + /* - * This is all post-commit cleanup. Note that if an error is raised - * here, it's too late to abort the transaction. This should be just - * noncritical resource releasing. + * Let ON COMMIT management do its thing (must happen after closing + * cursors, to avoid dangling-reference problems) + */ + PreCommit_on_commit_actions(); + + /* close large objects before lower-level cleanup */ + AtEOXact_LargeObject(true); + + /* NOTIFY and flatfiles will be handled below */ + + /* + * Don't allow PREPARE TRANSACTION if we've accessed a temporary table + * in this transaction. Having the prepared xact hold locks on another + * backend's temp table seems a bad idea --- for instance it would prevent + * the backend from exiting. There are other problems too, such as how + * to clean up the source backend's local buffers and ON COMMIT state + * if the prepared xact includes a DROP of a temp table. * - * The ordering of operations is not entirely random. The idea is: - * release resources visible to other backends (eg, files, buffer - * pins); then release locks; then release backend-local resources. We - * want to release locks at the point where any backend waiting for us - * will see our transaction as being fully cleaned up. + * We must check this after executing any ON COMMIT actions, because + * they might still access a temp relation. * - * Resources that can be associated with individual queries are handled - * by the ResourceOwner mechanism. The other calls here are for - * backend-wide state. + * XXX In principle this could be relaxed to allow some useful special + * cases, such as a temp table created and dropped all within the + * transaction. That seems to require much more bookkeeping though. */ + if (MyXactAccessedTempRel) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot PREPARE a transaction that has operated on temporary tables"))); - smgrDoPendingDeletes(true); - /* smgrcommit already done */ + /* Prevent cancel/die interrupt while cleaning up */ + HOLD_INTERRUPTS(); + + /* + * set the current transaction state information appropriately during + * prepare processing + */ + s->state = TRANS_PREPARE; + + prepared_at = GetCurrentTimestamp(); + + /* Tell bufmgr and smgr to prepare for commit */ + BufmgrCommit(); + + /* + * Reserve the GID for this transaction. This could fail if the requested + * GID is invalid or already in use. + */ + gxact = MarkAsPreparing(xid, prepareGID, prepared_at, + GetUserId(), MyDatabaseId); + prepareGID = NULL; + + /* + * Collect data for the 2PC state file. Note that in general, no actual + * state change should happen in the called modules during this step, + * since it's still possible to fail before commit, and in that case we + * want transaction abort to be able to clean up. (In particular, the + * AtPrepare routines may error out if they find cases they cannot + * handle.) State cleanup should happen in the PostPrepare routines + * below. However, some modules can go ahead and clear state here because + * they wouldn't do anything with it during abort anyway. + * + * Note: because the 2PC state file records will be replayed in the same + * order they are made, the order of these calls has to match the order in + * which we want things to happen during COMMIT PREPARED or ROLLBACK + * PREPARED; in particular, pay attention to whether things should happen + * before or after releasing the transaction's locks. + */ + StartPrepare(gxact); + + AtPrepare_Notify(); + AtPrepare_UpdateFlatFiles(); + AtPrepare_Inval(); + AtPrepare_Locks(); + AtPrepare_PgStat(); + + /* + * Here is where we really truly prepare. + * + * We have to record transaction prepares even if we didn't make any + * updates, because the transaction manager might get confused if we lose + * a global transaction. + */ + EndPrepare(gxact); + + /* + * Now we clean up backend-internal state and release internal resources. + */ + + /* Reset XactLastRecEnd until the next transaction writes something */ + XactLastRecEnd.xrecoff = 0; + + /* + * Let others know about no transaction in progress by me. This has to be + * done *after* the prepared transaction has been marked valid, else + * someone may think it is unlocked and recyclable. + */ + ProcArrayClearTransaction(MyProc); - CallXactCallbacks(XACT_EVENT_COMMIT, InvalidTransactionId); + /* + * This is all post-transaction cleanup. Note that if an error is raised + * here, it's too late to abort the transaction. This should be just + * noncritical resource releasing. See notes in CommitTransaction. + */ + + CallXactCallbacks(XACT_EVENT_PREPARE); ResourceOwnerRelease(TopTransactionResourceOwner, RESOURCE_RELEASE_BEFORE_LOCKS, true, true); - /* - * Make catalog changes visible to all backends. This has to happen - * after relcache references are dropped (see comments for - * AtEOXact_RelationCache), but before locks are released (if anyone - * is waiting for lock on a relation we've modified, we want them to - * know about the catalog change before they start using the - * relation). - */ - AtEOXact_Inval(true); + /* Check we've released all buffer pins */ + AtEOXact_Buffers(true); + + /* Clean up the relation cache */ + AtEOXact_RelationCache(true); + + /* notify and flatfiles don't need a postprepare call */ + + PostPrepare_PgStat(); + + PostPrepare_Inval(); + + PostPrepare_smgr(); + + AtEOXact_MultiXact(); + + PostPrepare_Locks(xid); ResourceOwnerRelease(TopTransactionResourceOwner, RESOURCE_RELEASE_LOCKS, @@ -1359,12 +1969,20 @@ CommitTransaction(void) RESOURCE_RELEASE_AFTER_LOCKS, true, true); - AtEOXact_GUC(true, false); + /* Check we've released all catcache entries */ + AtEOXact_CatCache(true); + + /* PREPARE acts the same as COMMIT as far as GUC is concerned */ + AtEOXact_GUC(true, 1); AtEOXact_SPI(true); - AtEOXact_on_commit_actions(true, s->transactionIdData); + AtEOXact_xml(); + AtEOXact_on_commit_actions(true); AtEOXact_Namespace(true); + /* smgrcommit already done */ AtEOXact_Files(); - pgstat_count_xact_commit(); + AtEOXact_ComboCid(); + AtEOXact_HashTables(true); + /* don't call AtEOXact_PgStat here */ CurrentResourceOwner = NULL; ResourceOwnerDelete(TopTransactionResourceOwner); @@ -1374,18 +1992,24 @@ CommitTransaction(void) AtCommit_Memory(); + s->transactionId = InvalidTransactionId; + s->subTransactionId = InvalidSubTransactionId; s->nestingLevel = 0; - s->childXids = NIL; + s->gucNestLevel = 0; + s->childXids = NULL; + s->nChildXids = 0; + s->maxChildXids = 0; /* - * done with commit processing, set current transaction state back to - * default + * done with 1st phase commit processing, set current transaction state + * back to default */ s->state = TRANS_DEFAULT; RESUME_INTERRUPTS(); } + /* * AbortTransaction */ @@ -1393,15 +2017,20 @@ static void AbortTransaction(void) { TransactionState s = CurrentTransactionState; + TransactionId latestXid; /* Prevent cancel/die interrupt while cleaning up */ HOLD_INTERRUPTS(); + /* Make sure we have a valid memory context and resource owner */ + AtAbort_Memory(); + AtAbort_ResourceOwner(); + /* * Release any LW locks we might be holding as quickly as possible. * (Regular locks, however, must be held till we finish aborting.) - * Releasing LW locks is critical since we might try to grab them - * again while cleaning up! + * Releasing LW locks is critical since we might try to grab them again + * while cleaning up! */ LWLockReleaseAll(); @@ -1410,99 +2039,95 @@ AbortTransaction(void) UnlockBuffers(); /* - * Also clean up any open wait for lock, since the lock manager will - * choke if we try to wait for another lock before doing this. + * Also clean up any open wait for lock, since the lock manager will choke + * if we try to wait for another lock before doing this. */ LockWaitCancel(); /* * check the current transaction state */ - if (s->state != TRANS_INPROGRESS) + if (s->state != TRANS_INPROGRESS && s->state != TRANS_PREPARE) elog(WARNING, "AbortTransaction while in %s state", TransStateAsString(s->state)); Assert(s->parent == NULL); /* - * set the current transaction state information appropriately during - * the abort processing + * set the current transaction state information appropriately during the + * abort processing */ s->state = TRANS_ABORT; - /* Make sure we are in a valid memory context */ - AtAbort_Memory(); - /* - * Reset user id which might have been changed transiently. We cannot - * use s->currentUser, but must get the session userid from - * miscinit.c. + * Reset user ID which might have been changed transiently. We need this + * to clean up in case control escaped out of a SECURITY DEFINER function + * or other local change of CurrentUserId; therefore, the prior value + * of SecurityDefinerContext also needs to be restored. * - * (Note: it is not necessary to restore session authorization here - * because that can only be changed via GUC, and GUC will take care of - * rolling it back if need be. However, an error within a SECURITY - * DEFINER function could send control here with the wrong current - * userid.) + * (Note: it is not necessary to restore session authorization or role + * settings here because those can only be changed via GUC, and GUC will + * take care of rolling them back if need be.) */ - SetUserId(GetSessionUserId()); + SetUserIdAndContext(s->prevUser, s->prevSecDefCxt); /* * do abort processing */ - DeferredTriggerAbortXact(); + AfterTriggerEndXact(false); AtAbort_Portals(); AtEOXact_LargeObject(false); /* 'false' means it's abort */ AtAbort_Notify(); - AtEOXact_UpdatePasswordFile(false); - - /* Advertise the fact that we aborted in pg_clog. */ - RecordTransactionAbort(); + AtEOXact_UpdateFlatFiles(false); /* - * Let others know about no transaction in progress by me. Note that - * this must be done _before_ releasing locks we hold and _after_ - * RecordTransactionAbort. + * Advertise the fact that we aborted in pg_clog (assuming that we got as + * far as assigning an XID to advertise). */ - if (MyProc != NULL) - { - /* Lock SInvalLock because that's what GetSnapshotData uses. */ - LWLockAcquire(SInvalLock, LW_EXCLUSIVE); - MyProc->xid = InvalidTransactionId; - MyProc->xmin = InvalidTransactionId; + latestXid = RecordTransactionAbort(false); - /* Clear the subtransaction-XID cache too while holding the lock */ - MyProc->subxids.nxids = 0; - MyProc->subxids.overflowed = false; + TRACE_POSTGRESQL_TRANSACTION_ABORT(MyProc->lxid); - LWLockRelease(SInvalLock); - } + /* + * Let others know about no transaction in progress by me. Note that this + * must be done _before_ releasing locks we hold and _after_ + * RecordTransactionAbort. + */ + ProcArrayEndTransaction(MyProc, latestXid); /* * Post-abort cleanup. See notes in CommitTransaction() concerning * ordering. */ - smgrDoPendingDeletes(false); - smgrabort(); - - CallXactCallbacks(XACT_EVENT_ABORT, InvalidTransactionId); + CallXactCallbacks(XACT_EVENT_ABORT); ResourceOwnerRelease(TopTransactionResourceOwner, RESOURCE_RELEASE_BEFORE_LOCKS, false, true); + AtEOXact_Buffers(false); + AtEOXact_RelationCache(false); AtEOXact_Inval(false); + smgrDoPendingDeletes(false); + AtEOXact_MultiXact(); ResourceOwnerRelease(TopTransactionResourceOwner, RESOURCE_RELEASE_LOCKS, false, true); ResourceOwnerRelease(TopTransactionResourceOwner, RESOURCE_RELEASE_AFTER_LOCKS, false, true); + AtEOXact_CatCache(false); - AtEOXact_GUC(false, false); + AtEOXact_GUC(false, 1); AtEOXact_SPI(false); - AtEOXact_on_commit_actions(false, s->transactionIdData); + AtEOXact_xml(); + AtEOXact_on_commit_actions(false); AtEOXact_Namespace(false); + smgrabort(); AtEOXact_Files(); - pgstat_count_xact_rollback(); + AtEOXact_ComboCid(); + AtEOXact_HashTables(false); + AtEOXact_PgStat(false); + pgstat_report_xact_timestamp(0); /* * State remains TRANS_ABORT until CleanupTransaction(). @@ -1531,15 +2156,21 @@ CleanupTransaction(void) AtCleanup_Portals(); /* now safe to release portal memory */ CurrentResourceOwner = NULL; /* and resource owner */ - ResourceOwnerDelete(TopTransactionResourceOwner); + if (TopTransactionResourceOwner) + ResourceOwnerDelete(TopTransactionResourceOwner); s->curTransactionOwner = NULL; CurTransactionResourceOwner = NULL; TopTransactionResourceOwner = NULL; AtCleanup_Memory(); /* and transaction memory */ + s->transactionId = InvalidTransactionId; + s->subTransactionId = InvalidSubTransactionId; s->nestingLevel = 0; - s->childXids = NIL; + s->gucNestLevel = 0; + s->childXids = NULL; + s->nChildXids = 0; + s->maxChildXids = 0; /* * done with abort processing, set current transaction state back to @@ -1559,8 +2190,8 @@ StartTransactionCommand(void) switch (s->blockState) { /* - * if we aren't in a transaction block, we just do our usual - * start transaction. + * if we aren't in a transaction block, we just do our usual start + * transaction. */ case TBLOCK_DEFAULT: StartTransaction(); @@ -1568,20 +2199,23 @@ StartTransactionCommand(void) break; /* - * This is the case when we are somewhere in a transaction - * block and about to start a new command. For now we do - * nothing but someday we may do command-local resource - * initialization. + * We are somewhere in a transaction block or subtransaction and + * about to start a new command. For now we do nothing, but + * someday we may do command-local resource initialization. (Note + * that any needed CommandCounterIncrement was done by the + * previous CommitTransactionCommand.) */ case TBLOCK_INPROGRESS: case TBLOCK_SUBINPROGRESS: break; /* - * Here we are in the middle of a transaction block but one of - * the commands caused an abort so we do nothing but remain in - * the abort state. Eventually we will get to the "END - * TRANSACTION" which will set things straight. + * Here we are in a failed transaction block (one of the commands + * caused an abort) so we do nothing but remain in the abort + * state. Eventually we will get a ROLLBACK command which will + * get us out of this state. (It is up to other code to ensure + * that no commands other than ROLLBACK will be processed in these + * states.) */ case TBLOCK_ABORT: case TBLOCK_SUBABORT: @@ -1593,12 +2227,14 @@ StartTransactionCommand(void) case TBLOCK_SUBBEGIN: case TBLOCK_END: case TBLOCK_SUBEND: - case TBLOCK_SUBENDABORT_ALL: - case TBLOCK_SUBENDABORT: + case TBLOCK_ABORT_END: + case TBLOCK_SUBABORT_END: + case TBLOCK_ABORT_PENDING: case TBLOCK_SUBABORT_PENDING: - case TBLOCK_SUBENDABORT_RELEASE: - case TBLOCK_ENDABORT: - elog(FATAL, "StartTransactionCommand: unexpected state %s", + case TBLOCK_SUBRESTART: + case TBLOCK_SUBABORT_RESTART: + case TBLOCK_PREPARE: + elog(ERROR, "StartTransactionCommand: unexpected state %s", BlockStateAsString(s->blockState)); break; } @@ -1624,18 +2260,16 @@ CommitTransactionCommand(void) /* * This shouldn't happen, because it means the previous * StartTransactionCommand didn't set the STARTED state - * appropriately, or we didn't manage previous pending abort - * states. + * appropriately. */ case TBLOCK_DEFAULT: - case TBLOCK_SUBABORT_PENDING: elog(FATAL, "CommitTransactionCommand: unexpected state %s", BlockStateAsString(s->blockState)); break; /* * If we aren't in a transaction block, just do our usual - * transaction commit. + * transaction commit, and return to the idle state. */ case TBLOCK_STARTED: CommitTransaction(); @@ -1643,10 +2277,10 @@ CommitTransactionCommand(void) break; /* - * This is the case right after we get a "BEGIN TRANSACTION" - * command, but the user hasn't done anything else yet, so we - * change to the "transaction block in progress" state and - * return. + * We are completing a "BEGIN TRANSACTION" command, so we change + * to the "transaction block in progress" state and return. (We + * assume the BEGIN did nothing to the database, so we need no + * CommandCounterIncrement.) */ case TBLOCK_BEGIN: s->blockState = TBLOCK_INPROGRESS; @@ -1654,65 +2288,67 @@ CommitTransactionCommand(void) /* * This is the case when we have finished executing a command - * someplace within a transaction block. We increment the - * command counter and return. + * someplace within a transaction block. We increment the command + * counter and return. */ case TBLOCK_INPROGRESS: + case TBLOCK_SUBINPROGRESS: CommandCounterIncrement(); break; /* - * This is the case when we just got the "END TRANSACTION" - * statement, so we commit the transaction and go back to the - * default state. + * We are completing a "COMMIT" command. Do it and return to the + * idle state. */ case TBLOCK_END: - /* commit all open subtransactions */ - if (s->nestingLevel > 1) - CommitTransactionToLevel(2); - s = CurrentTransactionState; - Assert(s->parent == NULL); - /* and now the outer transaction */ CommitTransaction(); s->blockState = TBLOCK_DEFAULT; break; /* - * Here we are in the middle of a transaction block but one of - * the commands caused an abort so we do nothing but remain in - * the abort state. Eventually we will get to the "END - * TRANSACTION" which will set things straight. + * Here we are in the middle of a transaction block but one of the + * commands caused an abort so we do nothing but remain in the + * abort state. Eventually we will get a ROLLBACK comand. */ case TBLOCK_ABORT: + case TBLOCK_SUBABORT: + break; + + /* + * Here we were in an aborted transaction block and we just got + * the ROLLBACK command from the user, so clean up the + * already-aborted transaction and return to the idle state. + */ + case TBLOCK_ABORT_END: + CleanupTransaction(); + s->blockState = TBLOCK_DEFAULT; break; /* - * Here we were in an aborted transaction block which just - * processed the "END TRANSACTION" command from the user, so - * clean up and return to the default state. + * Here we were in a perfectly good transaction block but the user + * told us to ROLLBACK anyway. We have to abort the transaction + * and then clean up. */ - case TBLOCK_ENDABORT: + case TBLOCK_ABORT_PENDING: + AbortTransaction(); CleanupTransaction(); s->blockState = TBLOCK_DEFAULT; break; /* - * Ditto, but in a subtransaction. AbortOutOfAnyTransaction - * will do the dirty work. + * We are completing a "PREPARE TRANSACTION" command. Do it and + * return to the idle state. */ - case TBLOCK_SUBENDABORT_ALL: - AbortOutOfAnyTransaction(); - s = CurrentTransactionState; /* changed by - * AbortOutOfAnyTransaction - * */ - /* AbortOutOfAnyTransaction sets the blockState */ + case TBLOCK_PREPARE: + PrepareTransaction(); + s->blockState = TBLOCK_DEFAULT; break; /* * We were just issued a SAVEPOINT inside a transaction block. * Start a subtransaction. (DefineSavepoint already did - * PushTransaction, so as to have someplace to put the - * SUBBEGIN state.) + * PushTransaction, so as to have someplace to put the SUBBEGIN + * state.) */ case TBLOCK_SUBBEGIN: StartSubTransaction(); @@ -1720,56 +2356,78 @@ CommitTransactionCommand(void) break; /* - * Inside a subtransaction, increment the command counter. - */ - case TBLOCK_SUBINPROGRESS: - CommandCounterIncrement(); - break; - - /* - * We were issued a RELEASE command, so we end the current - * subtransaction and return to the parent transaction. - * - * Since RELEASE can exit multiple levels of subtransaction, we - * must loop here until we get out of all SUBEND'ed levels. + * We were issued a COMMIT or RELEASE command, so we end the + * current subtransaction and return to the parent transaction. + * The parent might be ended too, so repeat till we are all the + * way out or find an INPROGRESS transaction. */ case TBLOCK_SUBEND: do { - CommitSubTransaction(); - PopTransaction(); - s = CurrentTransactionState; /* changed by pop */ - } while (s->blockState == TBLOCK_SUBEND); + CommitSubTransaction(); + s = CurrentTransactionState; /* changed by pop */ + } while (s->blockState == TBLOCK_SUBEND); + /* If we had a COMMIT command, finish off the main xact too */ + if (s->blockState == TBLOCK_END) + { + Assert(s->parent == NULL); + CommitTransaction(); + s->blockState = TBLOCK_DEFAULT; + } + else if (s->blockState == TBLOCK_PREPARE) + { + Assert(s->parent == NULL); + PrepareTransaction(); + s->blockState = TBLOCK_DEFAULT; + } + else + { + Assert(s->blockState == TBLOCK_INPROGRESS || + s->blockState == TBLOCK_SUBINPROGRESS); + } break; /* - * If we are in an aborted subtransaction, do nothing. + * The current already-failed subtransaction is ending due to a + * ROLLBACK or ROLLBACK TO command, so pop it and recursively + * examine the parent (which could be in any of several states). */ - case TBLOCK_SUBABORT: + case TBLOCK_SUBABORT_END: + CleanupSubTransaction(); + CommitTransactionCommand(); break; /* - * The current subtransaction is ending. Do the equivalent of - * a ROLLBACK TO followed by a RELEASE command. + * As above, but it's not dead yet, so abort first. */ - case TBLOCK_SUBENDABORT_RELEASE: - CleanupAbortedSubTransactions(false); + case TBLOCK_SUBABORT_PENDING: + AbortSubTransaction(); + CleanupSubTransaction(); + CommitTransactionCommand(); break; /* - * The current subtransaction is ending due to a ROLLBACK TO - * command, so close all savepoints up to the target level. - * When finished, recreate the savepoint. + * The current subtransaction is the target of a ROLLBACK TO + * command. Abort and pop it, then start a new subtransaction + * with the same name. */ - case TBLOCK_SUBENDABORT: + case TBLOCK_SUBRESTART: { - char *name = CleanupAbortedSubTransactions(true); + char *name; + int savepointLevel; + + /* save name and keep Cleanup from freeing it */ + name = s->name; + s->name = NULL; + savepointLevel = s->savepointLevel; + + AbortSubTransaction(); + CleanupSubTransaction(); - Assert(PointerIsValid(name)); - DefineSavepoint(name); - s = CurrentTransactionState; /* changed by - * DefineSavepoint */ - pfree(name); + DefineSavepoint(NULL); + s = CurrentTransactionState; /* changed by push */ + s->name = name; + s->savepointLevel = savepointLevel; /* This is the same as TBLOCK_SUBBEGIN case */ AssertState(s->blockState == TBLOCK_SUBBEGIN); @@ -1777,56 +2435,35 @@ CommitTransactionCommand(void) s->blockState = TBLOCK_SUBINPROGRESS; } break; - } -} -/* - * CleanupAbortedSubTransactions - * - * Helper function for CommitTransactionCommand. Aborts and cleans up - * dead subtransactions after a ROLLBACK TO command. Optionally returns - * the name of the last dead subtransaction so it can be reused to redefine - * the savepoint. (Caller is responsible for pfree'ing the result.) - */ -static char * -CleanupAbortedSubTransactions(bool returnName) -{ - TransactionState s = CurrentTransactionState; - char *name = NULL; + /* + * Same as above, but the subtransaction had already failed, so we + * don't need AbortSubTransaction. + */ + case TBLOCK_SUBABORT_RESTART: + { + char *name; + int savepointLevel; - AssertState(PointerIsValid(s->parent)); - Assert(s->parent->blockState == TBLOCK_SUBINPROGRESS || - s->parent->blockState == TBLOCK_INPROGRESS || - s->parent->blockState == TBLOCK_STARTED || - s->parent->blockState == TBLOCK_SUBABORT_PENDING); + /* save name and keep Cleanup from freeing it */ + name = s->name; + s->name = NULL; + savepointLevel = s->savepointLevel; - /* - * Abort everything up to the target level. The current - * subtransaction only needs cleanup. If we need to save the name, - * look for the last subtransaction in TBLOCK_SUBABORT_PENDING state. - */ - if (returnName && s->parent->blockState != TBLOCK_SUBABORT_PENDING) - name = MemoryContextStrdup(TopMemoryContext, s->name); + CleanupSubTransaction(); - CleanupSubTransaction(); - PopTransaction(); - s = CurrentTransactionState; /* changed by pop */ + DefineSavepoint(NULL); + s = CurrentTransactionState; /* changed by push */ + s->name = name; + s->savepointLevel = savepointLevel; - while (s->blockState == TBLOCK_SUBABORT_PENDING) - { - AbortSubTransaction(); - if (returnName && s->parent->blockState != TBLOCK_SUBABORT_PENDING) - name = MemoryContextStrdup(TopMemoryContext, s->name); - CleanupSubTransaction(); - PopTransaction(); - s = CurrentTransactionState; + /* This is the same as TBLOCK_SUBBEGIN case */ + AssertState(s->blockState == TBLOCK_SUBBEGIN); + StartSubTransaction(); + s->blockState = TBLOCK_SUBINPROGRESS; + } + break; } - - AssertState(s->blockState == TBLOCK_SUBINPROGRESS || - s->blockState == TBLOCK_INPROGRESS || - s->blockState == TBLOCK_STARTED); - - return name; } /* @@ -1839,15 +2476,30 @@ AbortCurrentTransaction(void) switch (s->blockState) { - /* - * we aren't in a transaction, so we do nothing. - */ case TBLOCK_DEFAULT: + if (s->state == TRANS_DEFAULT) + { + /* we are idle, so nothing to do */ + } + else + { + /* + * We can get here after an error during transaction start + * (state will be TRANS_START). Need to clean up the + * incompletely started transaction. First, adjust the + * low-level state to suppress warning message from + * AbortTransaction. + */ + if (s->state == TRANS_START) + s->state = TRANS_INPROGRESS; + AbortTransaction(); + CleanupTransaction(); + } break; /* - * if we aren't in a transaction block, we just do the basic - * abort & cleanup transaction. + * if we aren't in a transaction block, we just do the basic abort + * & cleanup transaction. */ case TBLOCK_STARTED: AbortTransaction(); @@ -1856,33 +2508,33 @@ AbortCurrentTransaction(void) break; /* - * If we are in TBLOCK_BEGIN it means something screwed up - * right after reading "BEGIN TRANSACTION" so we enter the - * abort state. Eventually an "END TRANSACTION" will fix - * things. + * If we are in TBLOCK_BEGIN it means something screwed up right + * after reading "BEGIN TRANSACTION". We assume that the user + * will interpret the error as meaning the BEGIN failed to get him + * into a transaction block, so we should abort and return to idle + * state. */ case TBLOCK_BEGIN: AbortTransaction(); - s->blockState = TBLOCK_ABORT; - /* CleanupTransaction happens when we exit TBLOCK_ENDABORT */ + CleanupTransaction(); + s->blockState = TBLOCK_DEFAULT; break; /* - * This is the case when we are somewhere in a transaction - * block and we've gotten a failure, so we abort the - * transaction and set up the persistent ABORT state. We will - * stay in ABORT until we get an "END TRANSACTION". + * We are somewhere in a transaction block and we've gotten a + * failure, so we abort the transaction and set up the persistent + * ABORT state. We will stay in ABORT until we get a ROLLBACK. */ case TBLOCK_INPROGRESS: AbortTransaction(); s->blockState = TBLOCK_ABORT; - /* CleanupTransaction happens when we exit TBLOCK_ENDABORT */ + /* CleanupTransaction happens when we exit TBLOCK_ABORT_END */ break; /* - * Here, the system was fouled up just after the user wanted - * to end the transaction block so we abort the transaction - * and return to the default state. + * Here, we failed while trying to COMMIT. Clean up the + * transaction and return to idle state (we do not want to stay in + * the transaction). */ case TBLOCK_END: AbortTransaction(); @@ -1891,75 +2543,77 @@ AbortCurrentTransaction(void) break; /* - * Here, we are already in an aborted transaction state and - * are waiting for an "END TRANSACTION" to come along and lo - * and behold, we abort again! So we just remain in the abort - * state. + * Here, we are already in an aborted transaction state and are + * waiting for a ROLLBACK, but for some reason we failed again! So + * we just remain in the abort state. */ case TBLOCK_ABORT: case TBLOCK_SUBABORT: break; /* - * Here we were in an aborted transaction block which just - * processed the "END TRANSACTION" command but somehow aborted - * again.. since we must have done the abort processing, we - * clean up and return to the default state. + * We are in a failed transaction and we got the ROLLBACK command. + * We have already aborted, we just need to cleanup and go to idle + * state. + */ + case TBLOCK_ABORT_END: + CleanupTransaction(); + s->blockState = TBLOCK_DEFAULT; + break; + + /* + * We are in a live transaction and we got a ROLLBACK command. + * Abort, cleanup, go to idle state. */ - case TBLOCK_ENDABORT: + case TBLOCK_ABORT_PENDING: + AbortTransaction(); CleanupTransaction(); s->blockState = TBLOCK_DEFAULT; break; /* - * If we are just starting a subtransaction, put it in aborted - * state. + * Here, we failed while trying to PREPARE. Clean up the + * transaction and return to idle state (we do not want to stay in + * the transaction). */ - case TBLOCK_SUBBEGIN: - StartAbortedSubTransaction(); - s->blockState = TBLOCK_SUBABORT; + case TBLOCK_PREPARE: + AbortTransaction(); + CleanupTransaction(); + s->blockState = TBLOCK_DEFAULT; break; + /* + * We got an error inside a subtransaction. Abort just the + * subtransaction, and go to the persistent SUBABORT state until + * we get ROLLBACK. + */ case TBLOCK_SUBINPROGRESS: AbortSubTransaction(); s->blockState = TBLOCK_SUBABORT; break; /* - * If we are aborting an ending transaction, we have to abort - * the parent transaction too. + * If we failed while trying to create a subtransaction, clean up + * the broken subtransaction and abort the parent. The same + * applies if we get a failure while ending a subtransaction. */ + case TBLOCK_SUBBEGIN: case TBLOCK_SUBEND: case TBLOCK_SUBABORT_PENDING: + case TBLOCK_SUBRESTART: AbortSubTransaction(); CleanupSubTransaction(); - PopTransaction(); - s = CurrentTransactionState; /* changed by pop */ - Assert(s->blockState != TBLOCK_SUBEND && - s->blockState != TBLOCK_SUBENDABORT); AbortCurrentTransaction(); break; /* * Same as above, except the Abort() was already done. */ - case TBLOCK_SUBENDABORT: - case TBLOCK_SUBENDABORT_RELEASE: + case TBLOCK_SUBABORT_END: + case TBLOCK_SUBABORT_RESTART: CleanupSubTransaction(); - PopTransaction(); - s = CurrentTransactionState; /* changed by pop */ - Assert(s->blockState != TBLOCK_SUBEND && - s->blockState != TBLOCK_SUBENDABORT); AbortCurrentTransaction(); break; - - /* - * We are already aborting the whole transaction tree. Do - * nothing, CommitTransactionCommand will call - * AbortOutOfAnyTransaction and set things straight. - */ - case TBLOCK_SUBENDABORT_ALL: - break; } } @@ -1975,12 +2629,14 @@ AbortCurrentTransaction(void) * could issue more commands and possibly cause a failure after the statement * completes). Subtransactions are verboten too. * - * stmtNode: pointer to parameter block for statement; this is used in - * a very klugy way to determine whether we are inside a function. - * stmtType: statement type name for error messages. + * isTopLevel: passed down from ProcessUtility to determine whether we are + * inside a function or multi-query querystring. (We will always fail if + * this is false, but it's convenient to centralize the check here instead of + * making callers do it.) + * stmtType: statement type name, for error messages. */ void -PreventTransactionChain(void *stmtNode, const char *stmtType) +PreventTransactionChain(bool isTopLevel, const char *stmtType) { /* * xact block already started? @@ -2003,15 +2659,14 @@ PreventTransactionChain(void *stmtNode, const char *stmtType) stmtType))); /* - * Are we inside a function call? If the statement's parameter block - * was allocated in QueryContext, assume it is an interactive command. - * Otherwise assume it is coming from a function. + * inside a function call? */ - if (!MemoryContextContains(QueryContext, stmtNode)) + if (!isTopLevel) ereport(ERROR, (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION), /* translator: %s represents an SQL statement name */ - errmsg("%s cannot be executed from a function", stmtType))); + errmsg("%s cannot be executed from a function or multi-command string", + stmtType))); /* If we got past IsTransactionBlock test, should be in default state */ if (CurrentTransactionState->blockState != TBLOCK_DEFAULT && @@ -2033,12 +2688,12 @@ PreventTransactionChain(void *stmtNode, const char *stmtType) * use of the current statement's results. Likewise subtransactions. * Thus this is an inverse for PreventTransactionChain. * - * stmtNode: pointer to parameter block for statement; this is used in - * a very klugy way to determine whether we are inside a function. - * stmtType: statement type name for error messages. + * isTopLevel: passed down from ProcessUtility to determine whether we are + * inside a function. + * stmtType: statement type name, for error messages. */ void -RequireTransactionChain(void *stmtNode, const char *stmtType) +RequireTransactionChain(bool isTopLevel, const char *stmtType) { /* * xact block already started? @@ -2053,16 +2708,15 @@ RequireTransactionChain(void *stmtNode, const char *stmtType) return; /* - * Are we inside a function call? If the statement's parameter block - * was allocated in QueryContext, assume it is an interactive command. - * Otherwise assume it is coming from a function. + * inside a function call? */ - if (!MemoryContextContains(QueryContext, stmtNode)) + if (!isTopLevel) return; + ereport(ERROR, (errcode(ERRCODE_NO_ACTIVE_SQL_TRANSACTION), /* translator: %s represents an SQL statement name */ - errmsg("%s may only be used in transaction blocks", + errmsg("%s can only be used in transaction blocks", stmtType))); } @@ -2073,15 +2727,15 @@ RequireTransactionChain(void *stmtNode, const char *stmtType) * a transaction block than when running as single commands. ANALYZE is * currently the only example. * - * stmtNode: pointer to parameter block for statement; this is used in - * a very klugy way to determine whether we are inside a function. + * isTopLevel: passed down from ProcessUtility to determine whether we are + * inside a function. */ bool -IsInTransactionChain(void *stmtNode) +IsInTransactionChain(bool isTopLevel) { /* - * Return true on same conditions that would make - * PreventTransactionChain error out + * Return true on same conditions that would make PreventTransactionChain + * error out */ if (IsTransactionBlock()) return true; @@ -2089,7 +2743,7 @@ IsInTransactionChain(void *stmtNode) if (IsSubTransaction()) return true; - if (!MemoryContextContains(QueryContext, stmtNode)) + if (!isTopLevel) return true; if (CurrentTransactionState->blockState != TBLOCK_DEFAULT && @@ -2109,9 +2763,7 @@ IsInTransactionChain(void *stmtNode) * (mainly because it's easier to control the order that way, where needed). * * At transaction end, the callback occurs post-commit or post-abort, so the - * callback functions can only do noncritical cleanup. At subtransaction - * start, the callback is called when the subtransaction has finished - * initializing. + * callback functions can only do noncritical cleanup. */ void RegisterXactCallback(XactCallback callback, void *arg) @@ -2148,12 +2800,69 @@ UnregisterXactCallback(XactCallback callback, void *arg) } static void -CallXactCallbacks(XactEvent event, TransactionId parentXid) +CallXactCallbacks(XactEvent event) { XactCallbackItem *item; for (item = Xact_callbacks; item; item = item->next) - (*item->callback) (event, parentXid, item->arg); + (*item->callback) (event, item->arg); +} + + +/* + * Register or deregister callback functions for start- and end-of-subxact + * operations. + * + * Pretty much same as above, but for subtransaction events. + * + * At subtransaction end, the callback occurs post-subcommit or post-subabort, + * so the callback functions can only do noncritical cleanup. At + * subtransaction start, the callback is called when the subtransaction has + * finished initializing. + */ +void +RegisterSubXactCallback(SubXactCallback callback, void *arg) +{ + SubXactCallbackItem *item; + + item = (SubXactCallbackItem *) + MemoryContextAlloc(TopMemoryContext, sizeof(SubXactCallbackItem)); + item->callback = callback; + item->arg = arg; + item->next = SubXact_callbacks; + SubXact_callbacks = item; +} + +void +UnregisterSubXactCallback(SubXactCallback callback, void *arg) +{ + SubXactCallbackItem *item; + SubXactCallbackItem *prev; + + prev = NULL; + for (item = SubXact_callbacks; item; prev = item, item = item->next) + { + if (item->callback == callback && item->arg == arg) + { + if (prev) + prev->next = item->next; + else + SubXact_callbacks = item->next; + pfree(item); + break; + } + } +} + +static void +CallSubXactCallbacks(SubXactEvent event, + SubTransactionId mySubid, + SubTransactionId parentSubid) +{ + SubXactCallbackItem *item; + + for (item = SubXact_callbacks; item; item = item->next) + (*item->callback) (event, mySubid, parentSubid, item->arg); } @@ -2174,8 +2883,7 @@ BeginTransactionBlock(void) switch (s->blockState) { /* - * We are not inside a transaction block, so allow one to - * begin. + * We are not inside a transaction block, so allow one to begin. */ case TBLOCK_STARTED: s->blockState = TBLOCK_BEGIN; @@ -2190,32 +2898,90 @@ BeginTransactionBlock(void) case TBLOCK_SUBABORT: ereport(WARNING, (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION), - errmsg("there is already a transaction in progress"))); + errmsg("there is already a transaction in progress"))); break; - /* These cases are invalid. Reject them altogether. */ + /* These cases are invalid. */ case TBLOCK_DEFAULT: case TBLOCK_BEGIN: case TBLOCK_SUBBEGIN: - case TBLOCK_ENDABORT: case TBLOCK_END: - case TBLOCK_SUBENDABORT_ALL: - case TBLOCK_SUBENDABORT: - case TBLOCK_SUBABORT_PENDING: - case TBLOCK_SUBENDABORT_RELEASE: case TBLOCK_SUBEND: + case TBLOCK_ABORT_END: + case TBLOCK_SUBABORT_END: + case TBLOCK_ABORT_PENDING: + case TBLOCK_SUBABORT_PENDING: + case TBLOCK_SUBRESTART: + case TBLOCK_SUBABORT_RESTART: + case TBLOCK_PREPARE: elog(FATAL, "BeginTransactionBlock: unexpected state %s", BlockStateAsString(s->blockState)); break; } } +/* + * PrepareTransactionBlock + * This executes a PREPARE command. + * + * Since PREPARE may actually do a ROLLBACK, the result indicates what + * happened: TRUE for PREPARE, FALSE for ROLLBACK. + * + * Note that we don't actually do anything here except change blockState. + * The real work will be done in the upcoming PrepareTransaction(). + * We do it this way because it's not convenient to change memory context, + * resource owner, etc while executing inside a Portal. + */ +bool +PrepareTransactionBlock(char *gid) +{ + TransactionState s; + bool result; + + /* Set up to commit the current transaction */ + result = EndTransactionBlock(); + + /* If successful, change outer tblock state to PREPARE */ + if (result) + { + s = CurrentTransactionState; + + while (s->parent != NULL) + s = s->parent; + + if (s->blockState == TBLOCK_END) + { + /* Save GID where PrepareTransaction can find it again */ + prepareGID = MemoryContextStrdup(TopTransactionContext, gid); + + s->blockState = TBLOCK_PREPARE; + } + else + { + /* + * ignore case where we are not in a transaction; + * EndTransactionBlock already issued a warning. + */ + Assert(s->blockState == TBLOCK_STARTED); + /* Don't send back a PREPARE result tag... */ + result = false; + } + } + + return result; +} + /* * EndTransactionBlock * This executes a COMMIT command. * * Since COMMIT may actually do a ROLLBACK, the result indicates what * happened: TRUE for COMMIT, FALSE for ROLLBACK. + * + * Note that we don't actually do anything here except change blockState. + * The real work will be done in the upcoming CommitTransactionCommand(). + * We do it this way because it's not convenient to change memory context, + * resource owner, etc while executing inside a Portal. */ bool EndTransactionBlock(void) @@ -2226,64 +2992,96 @@ EndTransactionBlock(void) switch (s->blockState) { /* - * We are in a transaction block which should commit when we - * get to the upcoming CommitTransactionCommand() so we set - * the state to "END". CommitTransactionCommand() will - * recognize this and commit the transaction and return us to - * the default state. + * We are in a transaction block, so tell CommitTransactionCommand + * to COMMIT. */ case TBLOCK_INPROGRESS: - case TBLOCK_SUBINPROGRESS: s->blockState = TBLOCK_END; result = true; break; /* - * We are in a transaction block which aborted. Since the - * AbortTransaction() was already done, we need only change to - * the special "END ABORT" state. The upcoming - * CommitTransactionCommand() will recognise this and then put - * us back in the default state. + * We are in a failed transaction block. Tell + * CommitTransactionCommand it's time to exit the block. */ case TBLOCK_ABORT: - s->blockState = TBLOCK_ENDABORT; + s->blockState = TBLOCK_ABORT_END; break; /* - * Here we are inside an aborted subtransaction. Go to the - * "abort the whole tree" state so that - * CommitTransactionCommand() calls AbortOutOfAnyTransaction. + * We are in a live subtransaction block. Set up to subcommit all + * open subtransactions and then commit the main transaction. */ - case TBLOCK_SUBABORT: - s->blockState = TBLOCK_SUBENDABORT_ALL; + case TBLOCK_SUBINPROGRESS: + while (s->parent != NULL) + { + if (s->blockState == TBLOCK_SUBINPROGRESS) + s->blockState = TBLOCK_SUBEND; + else + elog(FATAL, "EndTransactionBlock: unexpected state %s", + BlockStateAsString(s->blockState)); + s = s->parent; + } + if (s->blockState == TBLOCK_INPROGRESS) + s->blockState = TBLOCK_END; + else + elog(FATAL, "EndTransactionBlock: unexpected state %s", + BlockStateAsString(s->blockState)); + result = true; break; - case TBLOCK_STARTED: + /* + * Here we are inside an aborted subtransaction. Treat the COMMIT + * as ROLLBACK: set up to abort everything and exit the main + * transaction. + */ + case TBLOCK_SUBABORT: + while (s->parent != NULL) + { + if (s->blockState == TBLOCK_SUBINPROGRESS) + s->blockState = TBLOCK_SUBABORT_PENDING; + else if (s->blockState == TBLOCK_SUBABORT) + s->blockState = TBLOCK_SUBABORT_END; + else + elog(FATAL, "EndTransactionBlock: unexpected state %s", + BlockStateAsString(s->blockState)); + s = s->parent; + } + if (s->blockState == TBLOCK_INPROGRESS) + s->blockState = TBLOCK_ABORT_PENDING; + else if (s->blockState == TBLOCK_ABORT) + s->blockState = TBLOCK_ABORT_END; + else + elog(FATAL, "EndTransactionBlock: unexpected state %s", + BlockStateAsString(s->blockState)); + break; /* - * here, the user issued COMMIT when not inside a transaction. - * Issue a WARNING and go to abort state. The upcoming call - * to CommitTransactionCommand() will then put us back into - * the default state. + * The user issued COMMIT when not inside a transaction. Issue a + * WARNING, staying in TBLOCK_STARTED state. The upcoming call to + * CommitTransactionCommand() will then close the transaction and + * put us back into the default state. */ + case TBLOCK_STARTED: ereport(WARNING, (errcode(ERRCODE_NO_ACTIVE_SQL_TRANSACTION), errmsg("there is no transaction in progress"))); - AbortTransaction(); - s->blockState = TBLOCK_ENDABORT; + result = true; break; - /* these cases are invalid. */ + /* These cases are invalid. */ case TBLOCK_DEFAULT: case TBLOCK_BEGIN: - case TBLOCK_ENDABORT: - case TBLOCK_END: case TBLOCK_SUBBEGIN: + case TBLOCK_END: case TBLOCK_SUBEND: - case TBLOCK_SUBENDABORT_ALL: - case TBLOCK_SUBENDABORT: + case TBLOCK_ABORT_END: + case TBLOCK_SUBABORT_END: + case TBLOCK_ABORT_PENDING: case TBLOCK_SUBABORT_PENDING: - case TBLOCK_SUBENDABORT_RELEASE: + case TBLOCK_SUBRESTART: + case TBLOCK_SUBABORT_RESTART: + case TBLOCK_PREPARE: elog(FATAL, "EndTransactionBlock: unexpected state %s", BlockStateAsString(s->blockState)); break; @@ -2295,6 +3093,8 @@ EndTransactionBlock(void) /* * UserAbortTransactionBlock * This executes a ROLLBACK command. + * + * As above, we don't actually do anything here except change blockState. */ void UserAbortTransactionBlock(void) @@ -2304,73 +3104,76 @@ UserAbortTransactionBlock(void) switch (s->blockState) { /* - * We are inside a failed transaction block and we got an - * abort command from the user. Abort processing is already - * done, we just need to move to the ENDABORT state so we will - * end up in the default state after the upcoming - * CommitTransactionCommand(). - */ - case TBLOCK_ABORT: - s->blockState = TBLOCK_ENDABORT; - break; - - /* - * We are inside a failed subtransaction and we got an abort - * command from the user. Abort processing is already done, - * so go to the "abort all" state and CommitTransactionCommand - * will call AbortOutOfAnyTransaction to set things straight. + * We are inside a transaction block and we got a ROLLBACK command + * from the user, so tell CommitTransactionCommand to abort and + * exit the transaction block. */ - case TBLOCK_SUBABORT: - s->blockState = TBLOCK_SUBENDABORT_ALL; + case TBLOCK_INPROGRESS: + s->blockState = TBLOCK_ABORT_PENDING; break; /* - * We are inside a transaction block and we got an abort - * command from the user, so we move to the ENDABORT state and - * do abort processing so we will end up in the default state - * after the upcoming CommitTransactionCommand(). + * We are inside a failed transaction block and we got a ROLLBACK + * command from the user. Abort processing is already done, so + * CommitTransactionCommand just has to cleanup and go back to + * idle state. */ - case TBLOCK_INPROGRESS: - AbortTransaction(); - s->blockState = TBLOCK_ENDABORT; + case TBLOCK_ABORT: + s->blockState = TBLOCK_ABORT_END; break; /* - * We are inside a subtransaction. Abort the current - * subtransaction and go to the "abort all" state, so - * CommitTransactionCommand will call AbortOutOfAnyTransaction - * to set things straight. + * We are inside a subtransaction. Mark everything up to top + * level as exitable. */ case TBLOCK_SUBINPROGRESS: - AbortSubTransaction(); - s->blockState = TBLOCK_SUBENDABORT_ALL; + case TBLOCK_SUBABORT: + while (s->parent != NULL) + { + if (s->blockState == TBLOCK_SUBINPROGRESS) + s->blockState = TBLOCK_SUBABORT_PENDING; + else if (s->blockState == TBLOCK_SUBABORT) + s->blockState = TBLOCK_SUBABORT_END; + else + elog(FATAL, "UserAbortTransactionBlock: unexpected state %s", + BlockStateAsString(s->blockState)); + s = s->parent; + } + if (s->blockState == TBLOCK_INPROGRESS) + s->blockState = TBLOCK_ABORT_PENDING; + else if (s->blockState == TBLOCK_ABORT) + s->blockState = TBLOCK_ABORT_END; + else + elog(FATAL, "UserAbortTransactionBlock: unexpected state %s", + BlockStateAsString(s->blockState)); break; /* - * The user issued ABORT when not inside a transaction. Issue - * a WARNING and go to abort state. The upcoming call to + * The user issued ABORT when not inside a transaction. Issue a + * WARNING and go to abort state. The upcoming call to * CommitTransactionCommand() will then put us back into the * default state. */ case TBLOCK_STARTED: - ereport(WARNING, + ereport(NOTICE, (errcode(ERRCODE_NO_ACTIVE_SQL_TRANSACTION), errmsg("there is no transaction in progress"))); - AbortTransaction(); - s->blockState = TBLOCK_ENDABORT; + s->blockState = TBLOCK_ABORT_PENDING; break; /* These cases are invalid. */ case TBLOCK_DEFAULT: case TBLOCK_BEGIN: + case TBLOCK_SUBBEGIN: case TBLOCK_END: - case TBLOCK_ENDABORT: case TBLOCK_SUBEND: - case TBLOCK_SUBENDABORT_ALL: - case TBLOCK_SUBENDABORT: + case TBLOCK_ABORT_END: + case TBLOCK_SUBABORT_END: + case TBLOCK_ABORT_PENDING: case TBLOCK_SUBABORT_PENDING: - case TBLOCK_SUBENDABORT_RELEASE: - case TBLOCK_SUBBEGIN: + case TBLOCK_SUBRESTART: + case TBLOCK_SUBABORT_RESTART: + case TBLOCK_PREPARE: elog(FATAL, "UserAbortTransactionBlock: unexpected state %s", BlockStateAsString(s->blockState)); break; @@ -2395,28 +3198,29 @@ DefineSavepoint(char *name) s = CurrentTransactionState; /* changed by push */ /* - * Note that we are allocating the savepoint name in the - * parent transaction's CurTransactionContext, since we don't - * yet have a transaction context for the new guy. + * Savepoint names, like the TransactionState block itself, live + * in TopTransactionContext. */ - s->name = MemoryContextStrdup(CurTransactionContext, name); - s->blockState = TBLOCK_SUBBEGIN; + if (name) + s->name = MemoryContextStrdup(TopTransactionContext, name); break; - /* These cases are invalid. Reject them altogether. */ + /* These cases are invalid. */ case TBLOCK_DEFAULT: case TBLOCK_STARTED: case TBLOCK_BEGIN: case TBLOCK_SUBBEGIN: + case TBLOCK_END: + case TBLOCK_SUBEND: case TBLOCK_ABORT: case TBLOCK_SUBABORT: - case TBLOCK_ENDABORT: - case TBLOCK_END: - case TBLOCK_SUBENDABORT_ALL: - case TBLOCK_SUBENDABORT: + case TBLOCK_ABORT_END: + case TBLOCK_SUBABORT_END: + case TBLOCK_ABORT_PENDING: case TBLOCK_SUBABORT_PENDING: - case TBLOCK_SUBENDABORT_RELEASE: - case TBLOCK_SUBEND: + case TBLOCK_SUBRESTART: + case TBLOCK_SUBABORT_RESTART: + case TBLOCK_PREPARE: elog(FATAL, "DefineSavepoint: unexpected state %s", BlockStateAsString(s->blockState)); break; @@ -2426,6 +3230,8 @@ DefineSavepoint(char *name) /* * ReleaseSavepoint * This executes a RELEASE command. + * + * As above, we don't actually do anything here except change blockState. */ void ReleaseSavepoint(List *options) @@ -2436,38 +3242,41 @@ ReleaseSavepoint(List *options) ListCell *cell; char *name = NULL; - /* - * Check valid block state transaction status. - */ switch (s->blockState) { + /* + * We can't rollback to a savepoint if there is no savepoint + * defined. + */ case TBLOCK_INPROGRESS: - case TBLOCK_ABORT: ereport(ERROR, (errcode(ERRCODE_S_E_INVALID_SPECIFICATION), errmsg("no such savepoint"))); break; /* - * We are in a non-aborted subtransaction. This is the only - * valid case. + * We are in a non-aborted subtransaction. This is the only valid + * case. */ case TBLOCK_SUBINPROGRESS: break; - /* these cases are invalid. */ + /* These cases are invalid. */ case TBLOCK_DEFAULT: case TBLOCK_STARTED: case TBLOCK_BEGIN: - case TBLOCK_ENDABORT: - case TBLOCK_END: - case TBLOCK_SUBABORT: case TBLOCK_SUBBEGIN: + case TBLOCK_END: case TBLOCK_SUBEND: - case TBLOCK_SUBENDABORT_ALL: - case TBLOCK_SUBENDABORT: + case TBLOCK_ABORT: + case TBLOCK_SUBABORT: + case TBLOCK_ABORT_END: + case TBLOCK_SUBABORT_END: + case TBLOCK_ABORT_PENDING: case TBLOCK_SUBABORT_PENDING: - case TBLOCK_SUBENDABORT_RELEASE: + case TBLOCK_SUBRESTART: + case TBLOCK_SUBABORT_RESTART: + case TBLOCK_PREPARE: elog(FATAL, "ReleaseSavepoint: unexpected state %s", BlockStateAsString(s->blockState)); break; @@ -2502,8 +3311,8 @@ ReleaseSavepoint(List *options) /* * Mark "commit pending" all subtransactions up to the target - * subtransaction. The actual commits will happen when control gets - * to CommitTransactionCommand. + * subtransaction. The actual commits will happen when control gets to + * CommitTransactionCommand. */ xact = CurrentTransactionState; for (;;) @@ -2520,6 +3329,8 @@ ReleaseSavepoint(List *options) /* * RollbackToSavepoint * This executes a ROLLBACK TO command. + * + * As above, we don't actually do anything here except change blockState. */ void RollbackToSavepoint(List *options) @@ -2533,11 +3344,11 @@ RollbackToSavepoint(List *options) switch (s->blockState) { /* - * We can't rollback to a savepoint if there is no saveopint + * We can't rollback to a savepoint if there is no savepoint * defined. */ - case TBLOCK_ABORT: case TBLOCK_INPROGRESS: + case TBLOCK_ABORT: ereport(ERROR, (errcode(ERRCODE_S_E_INVALID_SPECIFICATION), errmsg("no such savepoint"))); @@ -2546,27 +3357,24 @@ RollbackToSavepoint(List *options) /* * There is at least one savepoint, so proceed. */ - case TBLOCK_SUBABORT: case TBLOCK_SUBINPROGRESS: - - /* - * Have to do AbortSubTransaction, but first check if this is - * the right subtransaction - */ + case TBLOCK_SUBABORT: break; - /* these cases are invalid. */ + /* These cases are invalid. */ case TBLOCK_DEFAULT: case TBLOCK_STARTED: case TBLOCK_BEGIN: + case TBLOCK_SUBBEGIN: case TBLOCK_END: - case TBLOCK_ENDABORT: case TBLOCK_SUBEND: - case TBLOCK_SUBENDABORT_ALL: - case TBLOCK_SUBENDABORT: + case TBLOCK_ABORT_END: + case TBLOCK_SUBABORT_END: + case TBLOCK_ABORT_PENDING: case TBLOCK_SUBABORT_PENDING: - case TBLOCK_SUBENDABORT_RELEASE: - case TBLOCK_SUBBEGIN: + case TBLOCK_SUBRESTART: + case TBLOCK_SUBABORT_RESTART: + case TBLOCK_PREPARE: elog(FATAL, "RollbackToSavepoint: unexpected state %s", BlockStateAsString(s->blockState)); break; @@ -2599,39 +3407,45 @@ RollbackToSavepoint(List *options) (errcode(ERRCODE_S_E_INVALID_SPECIFICATION), errmsg("no such savepoint"))); - /* - * Abort the current subtransaction, if needed. We can't Cleanup the - * savepoint yet, so signal CommitTransactionCommand to do it and - * close all savepoints up to the target level. - */ - if (s->blockState == TBLOCK_SUBINPROGRESS) - AbortSubTransaction(); - s->blockState = TBLOCK_SUBENDABORT; - /* * Mark "abort pending" all subtransactions up to the target - * subtransaction. (Except the current subtransaction!) + * subtransaction. The actual aborts will happen when control gets to + * CommitTransactionCommand. */ xact = CurrentTransactionState; - - while (xact != target) + for (;;) { + if (xact == target) + break; + if (xact->blockState == TBLOCK_SUBINPROGRESS) + xact->blockState = TBLOCK_SUBABORT_PENDING; + else if (xact->blockState == TBLOCK_SUBABORT) + xact->blockState = TBLOCK_SUBABORT_END; + else + elog(FATAL, "RollbackToSavepoint: unexpected state %s", + BlockStateAsString(xact->blockState)); xact = xact->parent; Assert(PointerIsValid(xact)); - Assert(xact->blockState == TBLOCK_SUBINPROGRESS); - xact->blockState = TBLOCK_SUBABORT_PENDING; } + + /* And mark the target as "restart pending" */ + if (xact->blockState == TBLOCK_SUBINPROGRESS) + xact->blockState = TBLOCK_SUBRESTART; + else if (xact->blockState == TBLOCK_SUBABORT) + xact->blockState = TBLOCK_SUBABORT_RESTART; + else + elog(FATAL, "RollbackToSavepoint: unexpected state %s", + BlockStateAsString(xact->blockState)); } /* * BeginInternalSubTransaction - * This is the same as DefineSavepoint except it allows TBLOCK_STARTED - * state, and therefore it can safely be used in a function that might - * be called when not inside a BEGIN block. Also, we automatically - * cycle through CommitTransactionCommand/StartTransactionCommand + * This is the same as DefineSavepoint except it allows TBLOCK_STARTED, + * TBLOCK_END, and TBLOCK_PREPARE states, and therefore it can safely be + * used in functions that might be called when not inside a BEGIN block + * or when running deferred triggers at COMMIT/PREPARE time. Also, it + * automatically does CommitTransactionCommand/StartTransactionCommand * instead of expecting the caller to do it. - * - * Optionally, name can be NULL to create an unnamed savepoint. */ void BeginInternalSubTransaction(char *name) @@ -2642,34 +3456,34 @@ BeginInternalSubTransaction(char *name) { case TBLOCK_STARTED: case TBLOCK_INPROGRESS: + case TBLOCK_END: + case TBLOCK_PREPARE: case TBLOCK_SUBINPROGRESS: /* Normal subtransaction start */ PushTransaction(); s = CurrentTransactionState; /* changed by push */ /* - * Note that we are allocating the savepoint name in the - * parent transaction's CurTransactionContext, since we don't - * yet have a transaction context for the new guy. + * Savepoint names, like the TransactionState block itself, live + * in TopTransactionContext. */ if (name) - s->name = MemoryContextStrdup(CurTransactionContext, name); - s->blockState = TBLOCK_SUBBEGIN; + s->name = MemoryContextStrdup(TopTransactionContext, name); break; - /* These cases are invalid. Reject them altogether. */ + /* These cases are invalid. */ case TBLOCK_DEFAULT: case TBLOCK_BEGIN: case TBLOCK_SUBBEGIN: + case TBLOCK_SUBEND: case TBLOCK_ABORT: case TBLOCK_SUBABORT: - case TBLOCK_ENDABORT: - case TBLOCK_END: - case TBLOCK_SUBENDABORT_ALL: - case TBLOCK_SUBENDABORT: + case TBLOCK_ABORT_END: + case TBLOCK_SUBABORT_END: + case TBLOCK_ABORT_PENDING: case TBLOCK_SUBABORT_PENDING: - case TBLOCK_SUBENDABORT_RELEASE: - case TBLOCK_SUBEND: + case TBLOCK_SUBRESTART: + case TBLOCK_SUBABORT_RESTART: elog(FATAL, "BeginInternalSubTransaction: unexpected state %s", BlockStateAsString(s->blockState)); break; @@ -2694,8 +3508,11 @@ ReleaseCurrentSubTransaction(void) if (s->blockState != TBLOCK_SUBINPROGRESS) elog(ERROR, "ReleaseCurrentSubTransaction: unexpected state %s", BlockStateAsString(s->blockState)); + Assert(s->state == TRANS_INPROGRESS); MemoryContextSwitchTo(CurTransactionContext); - CommitTransactionToLevel(GetCurrentTransactionNestLevel()); + CommitSubTransaction(); + s = CurrentTransactionState; /* changed by pop */ + Assert(s->state == TRANS_INPROGRESS); } /* @@ -2713,24 +3530,26 @@ RollbackAndReleaseCurrentSubTransaction(void) switch (s->blockState) { /* Must be in a subtransaction */ - case TBLOCK_SUBABORT: case TBLOCK_SUBINPROGRESS: + case TBLOCK_SUBABORT: break; - /* these cases are invalid. */ + /* These cases are invalid. */ case TBLOCK_DEFAULT: case TBLOCK_STARTED: - case TBLOCK_ABORT: - case TBLOCK_INPROGRESS: case TBLOCK_BEGIN: + case TBLOCK_SUBBEGIN: + case TBLOCK_INPROGRESS: case TBLOCK_END: - case TBLOCK_ENDABORT: case TBLOCK_SUBEND: - case TBLOCK_SUBENDABORT_ALL: - case TBLOCK_SUBENDABORT: + case TBLOCK_ABORT: + case TBLOCK_ABORT_END: + case TBLOCK_SUBABORT_END: + case TBLOCK_ABORT_PENDING: case TBLOCK_SUBABORT_PENDING: - case TBLOCK_SUBENDABORT_RELEASE: - case TBLOCK_SUBBEGIN: + case TBLOCK_SUBRESTART: + case TBLOCK_SUBABORT_RESTART: + case TBLOCK_PREPARE: elog(FATAL, "RollbackAndReleaseCurrentSubTransaction: unexpected state %s", BlockStateAsString(s->blockState)); break; @@ -2741,10 +3560,14 @@ RollbackAndReleaseCurrentSubTransaction(void) */ if (s->blockState == TBLOCK_SUBINPROGRESS) AbortSubTransaction(); - s->blockState = TBLOCK_SUBENDABORT_RELEASE; - /* And clean it up, too */ - CleanupAbortedSubTransactions(false); + /* And clean it up, too */ + CleanupSubTransaction(); + + s = CurrentTransactionState; /* changed by pop */ + AssertState(s->blockState == TBLOCK_SUBINPROGRESS || + s->blockState == TBLOCK_INPROGRESS || + s->blockState == TBLOCK_STARTED); } /* @@ -2773,46 +3596,38 @@ AbortOutOfAnyTransaction(void) case TBLOCK_BEGIN: case TBLOCK_INPROGRESS: case TBLOCK_END: + case TBLOCK_ABORT_PENDING: + case TBLOCK_PREPARE: /* In a transaction, so clean up */ AbortTransaction(); CleanupTransaction(); s->blockState = TBLOCK_DEFAULT; break; case TBLOCK_ABORT: - case TBLOCK_ENDABORT: + case TBLOCK_ABORT_END: /* AbortTransaction already done, still need Cleanup */ CleanupTransaction(); s->blockState = TBLOCK_DEFAULT; break; - case TBLOCK_SUBBEGIN: /* - * We didn't get as far as starting the subxact, so - * there's nothing to abort. Just pop back to parent. + * In a subtransaction, so clean it up and abort parent too */ - PopTransaction(); - s = CurrentTransactionState; /* changed by pop */ - break; + case TBLOCK_SUBBEGIN: case TBLOCK_SUBINPROGRESS: case TBLOCK_SUBEND: case TBLOCK_SUBABORT_PENDING: - - /* - * In a subtransaction, so clean it up and abort parent - * too - */ + case TBLOCK_SUBRESTART: AbortSubTransaction(); CleanupSubTransaction(); - PopTransaction(); s = CurrentTransactionState; /* changed by pop */ break; + case TBLOCK_SUBABORT: - case TBLOCK_SUBENDABORT_ALL: - case TBLOCK_SUBENDABORT: - case TBLOCK_SUBENDABORT_RELEASE: + case TBLOCK_SUBABORT_END: + case TBLOCK_SUBABORT_RESTART: /* As above, but AbortSubTransaction already done */ CleanupSubTransaction(); - PopTransaction(); s = CurrentTransactionState; /* changed by pop */ break; } @@ -2822,28 +3637,6 @@ AbortOutOfAnyTransaction(void) Assert(s->parent == NULL); } -/* - * CommitTransactionToLevel - * - * Commit everything from the current transaction level - * up to the specified level (inclusive). - */ -static void -CommitTransactionToLevel(int level) -{ - TransactionState s = CurrentTransactionState; - - Assert(s->state == TRANS_INPROGRESS); - - while (s->nestingLevel >= level) - { - CommitSubTransaction(); - PopTransaction(); - s = CurrentTransactionState; /* changed by pop */ - Assert(s->state == TRANS_INPROGRESS); - } -} - /* * IsTransactionBlock --- are we within a transaction block? */ @@ -2890,19 +3683,21 @@ TransactionBlockStatusCode(void) case TBLOCK_STARTED: return 'I'; /* idle --- not in transaction */ case TBLOCK_BEGIN: + case TBLOCK_SUBBEGIN: case TBLOCK_INPROGRESS: - case TBLOCK_END: case TBLOCK_SUBINPROGRESS: - case TBLOCK_SUBBEGIN: + case TBLOCK_END: case TBLOCK_SUBEND: + case TBLOCK_PREPARE: return 'T'; /* in transaction */ case TBLOCK_ABORT: - case TBLOCK_ENDABORT: case TBLOCK_SUBABORT: - case TBLOCK_SUBENDABORT_ALL: - case TBLOCK_SUBENDABORT: + case TBLOCK_ABORT_END: + case TBLOCK_SUBABORT_END: + case TBLOCK_ABORT_PENDING: case TBLOCK_SUBABORT_PENDING: - case TBLOCK_SUBENDABORT_RELEASE: + case TBLOCK_SUBRESTART: + case TBLOCK_SUBABORT_RESTART: return 'E'; /* in failed transaction */ } @@ -2928,6 +3723,15 @@ IsSubTransaction(void) /* * StartSubTransaction + * + * If you're wondering why this is separate from PushTransaction: it's because + * we can't conveniently do this stuff right inside DefineSavepoint. The + * SAVEPOINT utility command will be executed inside a Portal, and if we + * muck with CurrentMemoryContext or CurrentResourceOwner then exit from + * the Portal will undo those settings. So we make DefineSavepoint just + * push a dummy transaction block, and when control returns to the main + * idle loop, CommitTransactionCommand will be called, and we'll come here + * to finish starting the subtransaction. */ static void StartSubTransaction(void) @@ -2941,49 +3745,32 @@ StartSubTransaction(void) s->state = TRANS_START; /* + * Initialize subsystems for new subtransaction + * * must initialize resource-management stuff first */ AtSubStart_Memory(); AtSubStart_ResourceOwner(); - - /* - * Generate a new Xid and record it in pg_subtrans. NB: we must make - * the subtrans entry BEFORE the Xid appears anywhere in shared - * storage, such as in the lock table; because until it's made the Xid - * may not appear to be "running" to other backends. See - * GetNewTransactionId. - */ - s->transactionIdData = GetNewTransactionId(true); - - SubTransSetParent(s->transactionIdData, s->parent->transactionIdData); - - XactLockTableInsert(s->transactionIdData); - - /* - * Finish setup of other transaction state fields. - */ - s->currentUser = GetUserId(); - s->prevXactReadOnly = XactReadOnly; - - /* - * Initialize other subsystems for new subtransaction - */ AtSubStart_Inval(); AtSubStart_Notify(); - DeferredTriggerBeginSubXact(); + AfterTriggerBeginSubXact(); s->state = TRANS_INPROGRESS; /* * Call start-of-subxact callbacks */ - CallXactCallbacks(XACT_EVENT_START_SUB, s->parent->transactionIdData); + CallSubXactCallbacks(SUBXACT_EVENT_START_SUB, s->subTransactionId, + s->parent->subTransactionId); ShowTransactionState("StartSubTransaction"); } /* * CommitSubTransaction + * + * The caller has to make sure to always reassign CurrentTransactionState + * if it has a local pointer to it after calling this function. */ static void CommitSubTransaction(void) @@ -3000,30 +3787,44 @@ CommitSubTransaction(void) s->state = TRANS_COMMIT; - /* Mark subtransaction as subcommitted */ + /* Must CCI to ensure commands of subtransaction are seen as done */ CommandCounterIncrement(); + + /* Mark subtransaction as subcommitted */ RecordSubTransactionCommit(); - AtSubCommit_childXids(); /* Post-commit cleanup */ - DeferredTriggerEndSubXact(true); - AtSubCommit_Portals(s->parent->transactionIdData, + if (TransactionIdIsValid(s->transactionId)) + AtSubCommit_childXids(); + AfterTriggerEndSubXact(true); + AtSubCommit_Portals(s->subTransactionId, + s->parent->subTransactionId, s->parent->curTransactionOwner); - AtEOSubXact_LargeObject(true, s->transactionIdData, - s->parent->transactionIdData); + AtEOSubXact_LargeObject(true, s->subTransactionId, + s->parent->subTransactionId); AtSubCommit_Notify(); - AtEOSubXact_UpdatePasswordFile(true, s->transactionIdData, - s->parent->transactionIdData); - AtSubCommit_smgr(); + AtEOSubXact_UpdateFlatFiles(true, s->subTransactionId, + s->parent->subTransactionId); - CallXactCallbacks(XACT_EVENT_COMMIT_SUB, s->parent->transactionIdData); + CallSubXactCallbacks(SUBXACT_EVENT_COMMIT_SUB, s->subTransactionId, + s->parent->subTransactionId); ResourceOwnerRelease(s->curTransactionOwner, RESOURCE_RELEASE_BEFORE_LOCKS, true, false); - AtEOSubXact_RelationCache(true, s->transactionIdData, - s->parent->transactionIdData); + AtEOSubXact_RelationCache(true, s->subTransactionId, + s->parent->subTransactionId); AtEOSubXact_Inval(true); + AtSubCommit_smgr(); + + /* + * The only lock we actually release here is the subtransaction XID lock. + * The rest just get transferred to the parent resource owner. + */ + CurrentResourceOwner = s->curTransactionOwner; + if (TransactionIdIsValid(s->transactionId)) + XactLockTableDelete(s->transactionId); + ResourceOwnerRelease(s->curTransactionOwner, RESOURCE_RELEASE_LOCKS, true, false); @@ -3031,19 +3832,21 @@ CommitSubTransaction(void) RESOURCE_RELEASE_AFTER_LOCKS, true, false); - AtEOXact_GUC(true, true); - AtEOSubXact_SPI(true, s->transactionIdData); - AtEOSubXact_on_commit_actions(true, s->transactionIdData, - s->parent->transactionIdData); - AtEOSubXact_Namespace(true, s->transactionIdData, - s->parent->transactionIdData); - AtEOSubXact_Files(true, s->transactionIdData, - s->parent->transactionIdData); + AtEOXact_GUC(true, s->gucNestLevel); + AtEOSubXact_SPI(true, s->subTransactionId); + AtEOSubXact_on_commit_actions(true, s->subTransactionId, + s->parent->subTransactionId); + AtEOSubXact_Namespace(true, s->subTransactionId, + s->parent->subTransactionId); + AtEOSubXact_Files(true, s->subTransactionId, + s->parent->subTransactionId); + AtEOSubXact_HashTables(true, s->nestingLevel); + AtEOSubXact_PgStat(true, s->nestingLevel); /* - * We need to restore the upper transaction's read-only state, in case - * the upper is read-write while the child is read-only; GUC will - * incorrectly think it should leave the child state in place. + * We need to restore the upper transaction's read-only state, in case the + * upper is read-write while the child is read-only; GUC will incorrectly + * think it should leave the child state in place. */ XactReadOnly = s->prevXactReadOnly; @@ -3055,6 +3858,8 @@ CommitSubTransaction(void) AtSubCommit_Memory(); s->state = TRANS_DEFAULT; + + PopTransaction(); } /* @@ -3065,21 +3870,18 @@ AbortSubTransaction(void) { TransactionState s = CurrentTransactionState; - ShowTransactionState("AbortSubTransaction"); - - if (s->state != TRANS_INPROGRESS) - elog(WARNING, "AbortSubTransaction while in %s state", - TransStateAsString(s->state)); - + /* Prevent cancel/die interrupt while cleaning up */ HOLD_INTERRUPTS(); - s->state = TRANS_ABORT; + /* Make sure we have a valid memory context and resource owner */ + AtSubAbort_Memory(); + AtSubAbort_ResourceOwner(); /* * Release any LW locks we might be holding as quickly as possible. * (Regular locks, however, must be held till we finish aborting.) - * Releasing LW locks is critical since we might try to grab them - * again while cleaning up! + * Releasing LW locks is critical since we might try to grab them again + * while cleaning up! * * FIXME This may be incorrect --- Are there some locks we should keep? * Buffer locks, for example? I don't think so but I'm not sure. @@ -3092,69 +3894,79 @@ AbortSubTransaction(void) LockWaitCancel(); /* - * do abort processing + * check the current transaction state */ - AtSubAbort_Memory(); - - DeferredTriggerEndSubXact(false); - AtSubAbort_Portals(s->parent->transactionIdData, - s->parent->curTransactionOwner); - AtEOSubXact_LargeObject(false, s->transactionIdData, - s->parent->transactionIdData); - AtSubAbort_Notify(); - AtEOSubXact_UpdatePasswordFile(false, s->transactionIdData, - s->parent->transactionIdData); + ShowTransactionState("AbortSubTransaction"); - /* Advertise the fact that we aborted in pg_clog. */ - RecordSubTransactionAbort(); + if (s->state != TRANS_INPROGRESS) + elog(WARNING, "AbortSubTransaction while in %s state", + TransStateAsString(s->state)); - /* Post-abort cleanup */ - AtSubAbort_smgr(); + s->state = TRANS_ABORT; - CallXactCallbacks(XACT_EVENT_ABORT_SUB, s->parent->transactionIdData); + /* + * Reset user ID which might have been changed transiently. (See notes + * in AbortTransaction.) + */ + SetUserIdAndContext(s->prevUser, s->prevSecDefCxt); - ResourceOwnerRelease(s->curTransactionOwner, - RESOURCE_RELEASE_BEFORE_LOCKS, - false, false); - AtEOSubXact_RelationCache(false, s->transactionIdData, - s->parent->transactionIdData); - AtEOSubXact_Inval(false); - ResourceOwnerRelease(s->curTransactionOwner, - RESOURCE_RELEASE_LOCKS, - false, false); - ResourceOwnerRelease(s->curTransactionOwner, - RESOURCE_RELEASE_AFTER_LOCKS, - false, false); - - AtEOXact_GUC(false, true); - AtEOSubXact_SPI(false, s->transactionIdData); - AtEOSubXact_on_commit_actions(false, s->transactionIdData, - s->parent->transactionIdData); - AtEOSubXact_Namespace(false, s->transactionIdData, - s->parent->transactionIdData); - AtEOSubXact_Files(false, s->transactionIdData, - s->parent->transactionIdData); - - /* - * Reset user id which might have been changed transiently. Here we - * want to restore to the userid that was current at subxact entry. - * (As in AbortTransaction, we need not worry about the session - * userid.) - * - * Must do this after AtEOXact_GUC to handle the case where we entered - * the subxact inside a SECURITY DEFINER function (hence current and - * session userids were different) and then session auth was changed - * inside the subxact. GUC will reset both current and session - * userids to the entry-time session userid. This is right in every - * other scenario so it seems simplest to let GUC do that and fix it - * here. + /* + * We can skip all this stuff if the subxact failed before creating a + * ResourceOwner... */ - SetUserId(s->currentUser); + if (s->curTransactionOwner) + { + AfterTriggerEndSubXact(false); + AtSubAbort_Portals(s->subTransactionId, + s->parent->subTransactionId, + s->parent->curTransactionOwner); + AtEOSubXact_LargeObject(false, s->subTransactionId, + s->parent->subTransactionId); + AtSubAbort_Notify(); + AtEOSubXact_UpdateFlatFiles(false, s->subTransactionId, + s->parent->subTransactionId); + + /* Advertise the fact that we aborted in pg_clog. */ + (void) RecordTransactionAbort(true); + + /* Post-abort cleanup */ + if (TransactionIdIsValid(s->transactionId)) + AtSubAbort_childXids(); + + CallSubXactCallbacks(SUBXACT_EVENT_ABORT_SUB, s->subTransactionId, + s->parent->subTransactionId); + + ResourceOwnerRelease(s->curTransactionOwner, + RESOURCE_RELEASE_BEFORE_LOCKS, + false, false); + AtEOSubXact_RelationCache(false, s->subTransactionId, + s->parent->subTransactionId); + AtEOSubXact_Inval(false); + AtSubAbort_smgr(); + ResourceOwnerRelease(s->curTransactionOwner, + RESOURCE_RELEASE_LOCKS, + false, false); + ResourceOwnerRelease(s->curTransactionOwner, + RESOURCE_RELEASE_AFTER_LOCKS, + false, false); + + AtEOXact_GUC(false, s->gucNestLevel); + AtEOSubXact_SPI(false, s->subTransactionId); + AtEOXact_xml(); + AtEOSubXact_on_commit_actions(false, s->subTransactionId, + s->parent->subTransactionId); + AtEOSubXact_Namespace(false, s->subTransactionId, + s->parent->subTransactionId); + AtEOSubXact_Files(false, s->subTransactionId, + s->parent->subTransactionId); + AtEOSubXact_HashTables(false, s->nestingLevel); + AtEOSubXact_PgStat(false, s->nestingLevel); + } /* - * Restore the upper transaction's read-only state, too. This should - * be redundant with GUC's cleanup but we may as well do it for - * consistency with the commit case. + * Restore the upper transaction's read-only state, too. This should be + * redundant with GUC's cleanup but we may as well do it for consistency + * with the commit case. */ XactReadOnly = s->prevXactReadOnly; @@ -3163,6 +3975,9 @@ AbortSubTransaction(void) /* * CleanupSubTransaction + * + * The caller has to make sure to always reassign CurrentTransactionState + * if it has a local pointer to it after calling this function. */ static void CleanupSubTransaction(void) @@ -3175,67 +3990,24 @@ CleanupSubTransaction(void) elog(WARNING, "CleanupSubTransaction while in %s state", TransStateAsString(s->state)); - AtSubCleanup_Portals(); + AtSubCleanup_Portals(s->subTransactionId); CurrentResourceOwner = s->parent->curTransactionOwner; CurTransactionResourceOwner = s->parent->curTransactionOwner; - ResourceOwnerDelete(s->curTransactionOwner); + if (s->curTransactionOwner) + ResourceOwnerDelete(s->curTransactionOwner); s->curTransactionOwner = NULL; AtSubCleanup_Memory(); s->state = TRANS_DEFAULT; -} - -/* - * StartAbortedSubTransaction - * - * This function is used to start a subtransaction and put it immediately - * into aborted state. The end result should be equivalent to - * StartSubTransaction immediately followed by AbortSubTransaction. - * The reason we don't implement it just that way is that many of the backend - * modules aren't designed to handle starting a subtransaction when not - * inside a valid transaction. Rather than making them all capable of - * doing that, we just omit the paired start and abort calls in this path. - */ -static void -StartAbortedSubTransaction(void) -{ - TransactionState s = CurrentTransactionState; - - if (s->state != TRANS_DEFAULT) - elog(WARNING, "StartAbortedSubTransaction while in %s state", - TransStateAsString(s->state)); - - s->state = TRANS_START; - - /* - * We don't bother to generate a new Xid, so the end state is not - * *exactly* like we had done a full Start/AbortSubTransaction... - */ - s->transactionIdData = InvalidTransactionId; - - /* Make sure currentUser is reasonably valid */ - Assert(s->parent != NULL); - s->currentUser = s->parent->currentUser; - - /* - * Initialize only what has to be there for CleanupSubTransaction to - * work. - */ - AtSubStart_Memory(); - AtSubStart_ResourceOwner(); - - s->state = TRANS_ABORT; - - AtSubAbort_Memory(); - ShowTransactionState("StartAbortedSubTransaction"); + PopTransaction(); } /* * PushTransaction - * Set up transaction state for a subtransaction + * Create transaction state stack entry for a subtransaction * * The caller has to make sure to always reassign CurrentTransactionState * if it has a local pointer to it after calling this function. @@ -3252,25 +4024,43 @@ PushTransaction(void) s = (TransactionState) MemoryContextAllocZero(TopTransactionContext, sizeof(TransactionStateData)); + + /* + * Assign a subtransaction ID, watching out for counter wraparound. + */ + currentSubTransactionId += 1; + if (currentSubTransactionId == InvalidSubTransactionId) + { + currentSubTransactionId -= 1; + pfree(s); + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("cannot have more than 2^32-1 subtransactions in a transaction"))); + } + + /* + * We can now stack a minimally valid subtransaction without fear of + * failure. + */ + s->transactionId = InvalidTransactionId; /* until assigned */ + s->subTransactionId = currentSubTransactionId; s->parent = p; s->nestingLevel = p->nestingLevel + 1; + s->gucNestLevel = NewGUCNestLevel(); s->savepointLevel = p->savepointLevel; s->state = TRANS_DEFAULT; s->blockState = TBLOCK_SUBBEGIN; + GetUserIdAndContext(&s->prevUser, &s->prevSecDefCxt); + s->prevXactReadOnly = XactReadOnly; - /* Command IDs count in a continuous sequence through subtransactions */ - s->commandId = p->commandId; + CurrentTransactionState = s; /* - * Copy down some other data so that we will have valid state until - * StartSubTransaction runs. + * AbortSubTransaction and CleanupSubTransaction have to be able to cope + * with the subtransaction from here on out; in particular they should not + * assume that it necessarily has a transaction context, resource owner, + * or XID. */ - s->transactionIdData = p->transactionIdData; - s->curTransactionContext = p->curTransactionContext; - s->curTransactionOwner = p->curTransactionOwner; - s->currentUser = p->currentUser; - - CurrentTransactionState = s; } /* @@ -3292,9 +4082,6 @@ PopTransaction(void) if (s->parent == NULL) elog(FATAL, "PopTransaction with no parent"); - /* Command IDs count in a continuous sequence through subtransactions */ - s->parent->commandId = s->commandId; - CurrentTransactionState = s->parent; /* Let's just make sure CurTransactionContext is good */ @@ -3319,9 +4106,9 @@ static void ShowTransactionState(const char *str) { /* skip work if message will definitely not be printed */ - if (log_min_messages <= DEBUG2 || client_min_messages <= DEBUG2) + if (log_min_messages <= DEBUG3 || client_min_messages <= DEBUG3) { - elog(DEBUG2, "%s", str); + elog(DEBUG3, "%s", str); ShowTransactionStateRec(CurrentTransactionState); } } @@ -3333,19 +4120,35 @@ ShowTransactionState(const char *str) static void ShowTransactionStateRec(TransactionState s) { + StringInfoData buf; + + initStringInfo(&buf); + + if (s->nChildXids > 0) + { + int i; + + appendStringInfo(&buf, "%u", s->childXids[0]); + for (i = 1; i < s->nChildXids; i++) + appendStringInfo(&buf, " %u", s->childXids[i]); + } + if (s->parent) ShowTransactionStateRec(s->parent); /* use ereport to suppress computation if msg will not be printed */ - ereport(DEBUG2, - (errmsg_internal("name: %s; blockState: %13s; state: %7s, xid/cid: %u/%02u, nestlvl: %d, children: %s", - PointerIsValid(s->name) ? s->name : "unnamed", + ereport(DEBUG3, + (errmsg_internal("name: %s; blockState: %13s; state: %7s, xid/subid/cid: %u/%u/%u%s, nestlvl: %d, children: %s", + PointerIsValid(s->name) ? s->name : "unnamed", BlockStateAsString(s->blockState), TransStateAsString(s->state), - (unsigned int) s->transactionIdData, - (unsigned int) s->commandId, - s->nestingLevel, - nodeToString(s->childXids)))); + (unsigned int) s->transactionId, + (unsigned int) s->subTransactionId, + (unsigned int) currentCommandId, + currentCommandIdUsed ? " (used)" : "", + s->nestingLevel, buf.data))); + + pfree(buf.data); } /* @@ -3369,8 +4172,12 @@ BlockStateAsString(TBlockState blockState) return "END"; case TBLOCK_ABORT: return "ABORT"; - case TBLOCK_ENDABORT: - return "ENDABORT"; + case TBLOCK_ABORT_END: + return "ABORT END"; + case TBLOCK_ABORT_PENDING: + return "ABORT PEND"; + case TBLOCK_PREPARE: + return "PREPARE"; case TBLOCK_SUBBEGIN: return "SUB BEGIN"; case TBLOCK_SUBINPROGRESS: @@ -3379,14 +4186,14 @@ BlockStateAsString(TBlockState blockState) return "SUB END"; case TBLOCK_SUBABORT: return "SUB ABORT"; - case TBLOCK_SUBENDABORT_ALL: - return "SUB ENDAB ALL"; - case TBLOCK_SUBENDABORT: - return "SUB ENDAB"; + case TBLOCK_SUBABORT_END: + return "SUB ABORT END"; case TBLOCK_SUBABORT_PENDING: return "SUB ABRT PEND"; - case TBLOCK_SUBENDABORT_RELEASE: - return "SUB ENDAB REL"; + case TBLOCK_SUBRESTART: + return "SUB RESTART"; + case TBLOCK_SUBABORT_RESTART: + return "SUB AB RESTRT"; } return "UNRECOGNIZED"; } @@ -3404,12 +4211,14 @@ TransStateAsString(TransState state) return "DEFAULT"; case TRANS_START: return "START"; + case TRANS_INPROGRESS: + return "INPROGR"; case TRANS_COMMIT: return "COMMIT"; case TRANS_ABORT: return "ABORT"; - case TRANS_INPROGRESS: - return "INPROGR"; + case TRANS_PREPARE: + return "PREPARE"; } return "UNRECOGNIZED"; } @@ -3418,41 +4227,97 @@ TransStateAsString(TransState state) * xactGetCommittedChildren * * Gets the list of committed children of the current transaction. The return - * value is the number of child transactions. *children is set to point to a - * palloc'd array of TransactionIds. If there are no subxacts, *children is - * set to NULL. + * value is the number of child transactions. *ptr is set to point to an + * array of TransactionIds. The array is allocated in TopTransactionContext; + * the caller should *not* pfree() it (this is a change from pre-8.4 code!). + * If there are no subxacts, *ptr is set to NULL. */ int xactGetCommittedChildren(TransactionId **ptr) { TransactionState s = CurrentTransactionState; - int nchildren; - TransactionId *children; - ListCell *p; - nchildren = list_length(s->childXids); - if (nchildren == 0) - { + if (s->nChildXids == 0) *ptr = NULL; - return 0; - } + else + *ptr = s->childXids; - children = (TransactionId *) palloc(nchildren * sizeof(TransactionId)); - *ptr = children; + return s->nChildXids; +} - foreach(p, s->childXids) - { - TransactionId child = lfirst_xid(p); +/* + * XLOG support routines + */ + +static void +xact_redo_commit(xl_xact_commit *xlrec, TransactionId xid) +{ + TransactionId *sub_xids; + TransactionId max_xid; + int i; + + TransactionIdCommit(xid); - *children++ = child; + /* Mark committed subtransactions as committed */ + sub_xids = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]); + TransactionIdCommitTree(xlrec->nsubxacts, sub_xids); + + /* Make sure nextXid is beyond any XID mentioned in the record */ + max_xid = xid; + for (i = 0; i < xlrec->nsubxacts; i++) + { + if (TransactionIdPrecedes(max_xid, sub_xids[i])) + max_xid = sub_xids[i]; + } + if (TransactionIdFollowsOrEquals(max_xid, + ShmemVariableCache->nextXid)) + { + ShmemVariableCache->nextXid = max_xid; + TransactionIdAdvance(ShmemVariableCache->nextXid); } - return nchildren; + /* Make sure files supposed to be dropped are dropped */ + for (i = 0; i < xlrec->nrels; i++) + { + XLogDropRelation(xlrec->xnodes[i]); + smgrdounlink(smgropen(xlrec->xnodes[i]), false, true); + } } -/* - * XLOG support routines - */ +static void +xact_redo_abort(xl_xact_abort *xlrec, TransactionId xid) +{ + TransactionId *sub_xids; + TransactionId max_xid; + int i; + + TransactionIdAbort(xid); + + /* Mark subtransactions as aborted */ + sub_xids = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]); + TransactionIdAbortTree(xlrec->nsubxacts, sub_xids); + + /* Make sure nextXid is beyond any XID mentioned in the record */ + max_xid = xid; + for (i = 0; i < xlrec->nsubxacts; i++) + { + if (TransactionIdPrecedes(max_xid, sub_xids[i])) + max_xid = sub_xids[i]; + } + if (TransactionIdFollowsOrEquals(max_xid, + ShmemVariableCache->nextXid)) + { + ShmemVariableCache->nextXid = max_xid; + TransactionIdAdvance(ShmemVariableCache->nextXid); + } + + /* Make sure files supposed to be dropped are dropped */ + for (i = 0; i < xlrec->nrels; i++) + { + XLogDropRelation(xlrec->xnodes[i]); + smgrdounlink(smgropen(xlrec->xnodes[i]), false, true); + } +} void xact_redo(XLogRecPtr lsn, XLogRecord *record) @@ -3462,132 +4327,132 @@ xact_redo(XLogRecPtr lsn, XLogRecord *record) if (info == XLOG_XACT_COMMIT) { xl_xact_commit *xlrec = (xl_xact_commit *) XLogRecGetData(record); - int i; - TransactionIdCommit(record->xl_xid); - /* Mark committed subtransactions as committed */ - TransactionIdCommitTree(xlrec->nsubxacts, - (TransactionId *) &(xlrec->xnodes[xlrec->nrels])); - /* Make sure files supposed to be dropped are dropped */ - for (i = 0; i < xlrec->nrels; i++) - { - XLogCloseRelation(xlrec->xnodes[i]); - smgrdounlink(smgropen(xlrec->xnodes[i]), false, true); - } + xact_redo_commit(xlrec, record->xl_xid); } else if (info == XLOG_XACT_ABORT) { xl_xact_abort *xlrec = (xl_xact_abort *) XLogRecGetData(record); - int i; - TransactionIdAbort(record->xl_xid); - /* mark subtransactions as aborted */ - TransactionIdAbortTree(xlrec->nsubxacts, - (TransactionId *) &(xlrec->xnodes[xlrec->nrels])); - /* Make sure files supposed to be dropped are dropped */ - for (i = 0; i < xlrec->nrels; i++) - { - XLogCloseRelation(xlrec->xnodes[i]); - smgrdounlink(smgropen(xlrec->xnodes[i]), false, true); - } + xact_redo_abort(xlrec, record->xl_xid); } - else - elog(PANIC, "xact_redo: unknown op code %u", info); -} + else if (info == XLOG_XACT_PREPARE) + { + /* the record contents are exactly the 2PC file */ + RecreateTwoPhaseFile(record->xl_xid, + XLogRecGetData(record), record->xl_len); + } + else if (info == XLOG_XACT_COMMIT_PREPARED) + { + xl_xact_commit_prepared *xlrec = (xl_xact_commit_prepared *) XLogRecGetData(record); -void -xact_undo(XLogRecPtr lsn, XLogRecord *record) -{ - uint8 info = record->xl_info & ~XLR_INFO_MASK; + xact_redo_commit(&xlrec->crec, xlrec->xid); + RemoveTwoPhaseFile(xlrec->xid, false); + } + else if (info == XLOG_XACT_ABORT_PREPARED) + { + xl_xact_abort_prepared *xlrec = (xl_xact_abort_prepared *) XLogRecGetData(record); - if (info == XLOG_XACT_COMMIT) /* shouldn't be called by XLOG */ - elog(PANIC, "xact_undo: can't undo committed xaction"); - else if (info != XLOG_XACT_ABORT) + xact_redo_abort(&xlrec->arec, xlrec->xid); + RemoveTwoPhaseFile(xlrec->xid, false); + } + else elog(PANIC, "xact_redo: unknown op code %u", info); } -void -xact_desc(char *buf, uint8 xl_info, char *rec) +static void +xact_desc_commit(StringInfo buf, xl_xact_commit *xlrec) { - uint8 info = xl_info & ~XLR_INFO_MASK; int i; - if (info == XLOG_XACT_COMMIT) + appendStringInfoString(buf, timestamptz_to_str(xlrec->xact_time)); + if (xlrec->nrels > 0) { - xl_xact_commit *xlrec = (xl_xact_commit *) rec; - struct tm *tm = localtime(&xlrec->xtime); - - sprintf(buf + strlen(buf), "commit: %04u-%02u-%02u %02u:%02u:%02u", - tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday, - tm->tm_hour, tm->tm_min, tm->tm_sec); - if (xlrec->nrels > 0) - { - sprintf(buf + strlen(buf), "; rels:"); - for (i = 0; i < xlrec->nrels; i++) - { - RelFileNode rnode = xlrec->xnodes[i]; - - sprintf(buf + strlen(buf), " %u/%u/%u", - rnode.spcNode, rnode.dbNode, rnode.relNode); - } - } - if (xlrec->nsubxacts > 0) + appendStringInfo(buf, "; rels:"); + for (i = 0; i < xlrec->nrels; i++) { - TransactionId *xacts = (TransactionId *) - &xlrec->xnodes[xlrec->nrels]; + RelFileNode rnode = xlrec->xnodes[i]; - sprintf(buf + strlen(buf), "; subxacts:"); - for (i = 0; i < xlrec->nsubxacts; i++) - sprintf(buf + strlen(buf), " %u", xacts[i]); + appendStringInfo(buf, " %u/%u/%u", + rnode.spcNode, rnode.dbNode, rnode.relNode); } } - else if (info == XLOG_XACT_ABORT) + if (xlrec->nsubxacts > 0) { - xl_xact_abort *xlrec = (xl_xact_abort *) rec; - struct tm *tm = localtime(&xlrec->xtime); + TransactionId *xacts = (TransactionId *) + &xlrec->xnodes[xlrec->nrels]; - sprintf(buf + strlen(buf), "abort: %04u-%02u-%02u %02u:%02u:%02u", - tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday, - tm->tm_hour, tm->tm_min, tm->tm_sec); - if (xlrec->nrels > 0) - { - sprintf(buf + strlen(buf), "; rels:"); - for (i = 0; i < xlrec->nrels; i++) - { - RelFileNode rnode = xlrec->xnodes[i]; + appendStringInfo(buf, "; subxacts:"); + for (i = 0; i < xlrec->nsubxacts; i++) + appendStringInfo(buf, " %u", xacts[i]); + } +} - sprintf(buf + strlen(buf), " %u/%u/%u", - rnode.spcNode, rnode.dbNode, rnode.relNode); - } - } - if (xlrec->nsubxacts > 0) +static void +xact_desc_abort(StringInfo buf, xl_xact_abort *xlrec) +{ + int i; + + appendStringInfoString(buf, timestamptz_to_str(xlrec->xact_time)); + if (xlrec->nrels > 0) + { + appendStringInfo(buf, "; rels:"); + for (i = 0; i < xlrec->nrels; i++) { - TransactionId *xacts = (TransactionId *) - &xlrec->xnodes[xlrec->nrels]; + RelFileNode rnode = xlrec->xnodes[i]; - sprintf(buf + strlen(buf), "; subxacts:"); - for (i = 0; i < xlrec->nsubxacts; i++) - sprintf(buf + strlen(buf), " %u", xacts[i]); + appendStringInfo(buf, " %u/%u/%u", + rnode.spcNode, rnode.dbNode, rnode.relNode); } } - else - strcat(buf, "UNKNOWN"); + if (xlrec->nsubxacts > 0) + { + TransactionId *xacts = (TransactionId *) + &xlrec->xnodes[xlrec->nrels]; + + appendStringInfo(buf, "; subxacts:"); + for (i = 0; i < xlrec->nsubxacts; i++) + appendStringInfo(buf, " %u", xacts[i]); + } } void - XactPushRollback(void (*func) (void *), void *data) +xact_desc(StringInfo buf, uint8 xl_info, char *rec) { -#ifdef XLOG_II - if (_RollbackFunc != NULL) - elog(PANIC, "XactPushRollback: already installed"); -#endif + uint8 info = xl_info & ~XLR_INFO_MASK; - _RollbackFunc = func; - _RollbackData = data; -} + if (info == XLOG_XACT_COMMIT) + { + xl_xact_commit *xlrec = (xl_xact_commit *) rec; -void -XactPopRollback(void) -{ - _RollbackFunc = NULL; + appendStringInfo(buf, "commit: "); + xact_desc_commit(buf, xlrec); + } + else if (info == XLOG_XACT_ABORT) + { + xl_xact_abort *xlrec = (xl_xact_abort *) rec; + + appendStringInfo(buf, "abort: "); + xact_desc_abort(buf, xlrec); + } + else if (info == XLOG_XACT_PREPARE) + { + appendStringInfo(buf, "prepare"); + } + else if (info == XLOG_XACT_COMMIT_PREPARED) + { + xl_xact_commit_prepared *xlrec = (xl_xact_commit_prepared *) rec; + + appendStringInfo(buf, "commit %u: ", xlrec->xid); + xact_desc_commit(buf, &xlrec->crec); + } + else if (info == XLOG_XACT_ABORT_PREPARED) + { + xl_xact_abort_prepared *xlrec = (xl_xact_abort_prepared *) rec; + + appendStringInfo(buf, "abort %u: ", xlrec->xid); + xact_desc_abort(buf, &xlrec->arec); + } + else + appendStringInfo(buf, "UNKNOWN"); }