X-Git-Url: https://granicus.if.org/sourcecode?a=blobdiff_plain;f=src%2Fbackend%2Faccess%2Ftransam%2Fxact.c;h=9af53a5953f09e2b2726fc7efa8ad4fcc1f2b9ab;hb=a7b7b07af340c73adee9959edf260695591a9496;hp=4ee1cc711cd6ef1189b6f4e955da46b094a597cf;hpb=8b4ff8b6a14096a28910fbff3d485f30dcb9a637;p=postgresql diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index 4ee1cc711c..9af53a5953 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -5,12 +5,12 @@ * * See src/backend/access/transam/README for more information. * - * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.232 2007/02/01 19:10:25 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.260 2008/03/17 19:44:41 petere Exp $ * *------------------------------------------------------------------------- */ @@ -37,12 +37,16 @@ #include "storage/fd.h" #include "storage/lmgr.h" #include "storage/procarray.h" +#include "storage/sinvaladt.h" #include "storage/smgr.h" +#include "utils/combocid.h" #include "utils/flatfiles.h" +#include "utils/guc.h" #include "utils/inval.h" #include "utils/memutils.h" #include "utils/relcache.h" -#include "utils/guc.h" +#include "utils/xml.h" +#include "pg_trace.h" /* @@ -54,21 +58,30 @@ int XactIsoLevel; bool DefaultXactReadOnly = false; bool XactReadOnly; +bool XactSyncCommit = true; + int CommitDelay = 0; /* precommit delay in microseconds */ int CommitSiblings = 5; /* # concurrent xacts needed to sleep */ +/* + * MyXactAccessedTempRel is set when a temporary relation is accessed. + * We don't allow PREPARE TRANSACTION in that case. (This is global + * so that it can be set from heapam.c.) + */ +bool MyXactAccessedTempRel = false; + /* * transaction states - transaction state from server perspective */ typedef enum TransState { - TRANS_DEFAULT, - TRANS_START, - TRANS_INPROGRESS, - TRANS_COMMIT, - TRANS_ABORT, - TRANS_PREPARE + TRANS_DEFAULT, /* idle */ + TRANS_START, /* transaction starting */ + TRANS_INPROGRESS, /* inside a valid transaction */ + TRANS_COMMIT, /* commit in progress */ + TRANS_ABORT, /* abort in progress */ + TRANS_PREPARE /* prepare in progress */ } TransState; /* @@ -114,11 +127,15 @@ typedef struct TransactionStateData int savepointLevel; /* savepoint level */ TransState state; /* low-level state */ TBlockState blockState; /* high-level state */ - int nestingLevel; /* nest depth */ + int nestingLevel; /* transaction nesting depth */ + int gucNestLevel; /* GUC context nesting depth */ MemoryContext curTransactionContext; /* my xact-lifetime context */ ResourceOwner curTransactionOwner; /* my query resources */ - List *childXids; /* subcommitted child XIDs */ - Oid currentUser; /* subxact start current_user */ + TransactionId *childXids; /* subcommitted child XIDs, in XID order */ + int nChildXids; /* # of subcommitted child XIDs */ + int maxChildXids; /* allocated size of childXids[] */ + Oid prevUser; /* previous CurrentUserId setting */ + bool prevSecDefCxt; /* previous SecurityDefinerContext setting */ bool prevXactReadOnly; /* entry-time xact r/o state */ struct TransactionStateData *parent; /* back link to parent */ } TransactionStateData; @@ -138,11 +155,15 @@ static TransactionStateData TopTransactionStateData = { TRANS_DEFAULT, /* transaction state */ TBLOCK_DEFAULT, /* transaction block state from the client * perspective */ - 0, /* nesting level */ + 0, /* transaction nesting depth */ + 0, /* GUC context nesting depth */ NULL, /* cur transaction context */ NULL, /* cur transaction resource owner */ - NIL, /* subcommitted child Xids */ - 0, /* entry-time current userid */ + NULL, /* subcommitted child Xids */ + 0, /* # of subcommitted child Xids */ + 0, /* allocated size of childXids[] */ + InvalidOid, /* previous CurrentUserId setting */ + false, /* previous SecurityDefinerContext setting */ false, /* entry-time xact r/o state */ NULL /* link to parent state block */ }; @@ -155,15 +176,18 @@ static TransactionState CurrentTransactionState = &TopTransactionStateData; */ static SubTransactionId currentSubTransactionId; static CommandId currentCommandId; +static bool currentCommandIdUsed; /* * xactStartTimestamp is the value of transaction_timestamp(). * stmtStartTimestamp is the value of statement_timestamp(). + * xactStopTimestamp is the time at which we log a commit or abort WAL record. * These do not change as we enter and exit subtransactions, so we don't * keep them inside the TransactionState stack. */ static TimestampTz xactStartTimestamp; static TimestampTz stmtStartTimestamp; +static TimestampTz xactStopTimestamp; /* * GID to be used for preparing the current transaction. This is also @@ -171,6 +195,11 @@ static TimestampTz stmtStartTimestamp; */ static char *prepareGID; +/* + * Some commands want to force synchronous commit. + */ +static bool forceSyncCommit = false; + /* * Private context for transaction-abort work --- we reserve space for this * at startup to ensure that AbortTransaction and AbortSubTransaction can work @@ -204,7 +233,7 @@ static SubXactCallbackItem *SubXact_callbacks = NULL; /* local function prototypes */ -static void AssignSubTransactionId(TransactionState s); +static void AssignTransactionId(TransactionState s); static void AbortTransaction(void); static void AtAbort_Memory(void); static void AtCleanup_Memory(void); @@ -220,7 +249,7 @@ static void CallSubXactCallbacks(SubXactEvent event, SubTransactionId parentSubid); static void CleanupTransaction(void); static void CommitTransaction(void); -static void RecordTransactionAbort(void); +static TransactionId RecordTransactionAbort(bool isSubXact); static void StartTransaction(void); static void RecordSubTransactionCommit(void); @@ -252,34 +281,22 @@ static const char *TransStateAsString(TransState state); /* * IsTransactionState * - * This returns true if we are currently running a query - * within an executing transaction. + * This returns true if we are inside a valid transaction; that is, + * it is safe to initiate database access, take heavyweight locks, etc. */ bool IsTransactionState(void) { TransactionState s = CurrentTransactionState; - switch (s->state) - { - case TRANS_DEFAULT: - return false; - case TRANS_START: - return true; - case TRANS_INPROGRESS: - return true; - case TRANS_COMMIT: - return true; - case TRANS_ABORT: - return true; - case TRANS_PREPARE: - return true; - } - /* - * Shouldn't get here, but lint is not happy without this... + * TRANS_DEFAULT and TRANS_ABORT are obviously unsafe states. However, we + * also reject the startup/shutdown states TRANS_START, TRANS_COMMIT, + * TRANS_PREPARE since it might be too soon or too late within those + * transition states to do anything interesting. Hence, the only "valid" + * state is TRANS_INPROGRESS. */ - return false; + return (s->state == TRANS_INPROGRESS); } /* @@ -304,23 +321,36 @@ IsAbortedTransactionBlockState(void) /* * GetTopTransactionId * - * Get the ID of the main transaction, even if we are currently inside - * a subtransaction. + * This will return the XID of the main transaction, assigning one if + * it's not yet set. Be careful to call this only inside a valid xact. */ TransactionId GetTopTransactionId(void) { + if (!TransactionIdIsValid(TopTransactionStateData.transactionId)) + AssignTransactionId(&TopTransactionStateData); return TopTransactionStateData.transactionId; } +/* + * GetTopTransactionIdIfAny + * + * This will return the XID of the main transaction, if one is assigned. + * It will return InvalidTransactionId if we are not currently inside a + * transaction, or inside a transaction that hasn't yet been assigned an XID. + */ +TransactionId +GetTopTransactionIdIfAny(void) +{ + return TopTransactionStateData.transactionId; +} /* * GetCurrentTransactionId * - * We do not assign XIDs to subtransactions until/unless this is called. - * When we do assign an XID to a subtransaction, recursively make sure - * its parent has one as well (this maintains the invariant that a child - * transaction has an XID following its parent's). + * This will return the XID of the current transaction (main or sub + * transaction), assigning one if it's not yet set. Be careful to call this + * only inside a valid xact. */ TransactionId GetCurrentTransactionId(void) @@ -328,20 +358,49 @@ GetCurrentTransactionId(void) TransactionState s = CurrentTransactionState; if (!TransactionIdIsValid(s->transactionId)) - AssignSubTransactionId(s); - + AssignTransactionId(s); return s->transactionId; } +/* + * GetCurrentTransactionIdIfAny + * + * This will return the XID of the current sub xact, if one is assigned. + * It will return InvalidTransactionId if we are not currently inside a + * transaction, or inside a transaction that hasn't been assigned an XID yet. + */ +TransactionId +GetCurrentTransactionIdIfAny(void) +{ + return CurrentTransactionState->transactionId; +} + + +/* + * AssignTransactionId + * + * Assigns a new permanent XID to the given TransactionState. + * We do not assign XIDs to transactions until/unless this is called. + * Also, any parent TransactionStates that don't yet have XIDs are assigned + * one; this maintains the invariant that a child transaction has an XID + * following its parent's. + */ static void -AssignSubTransactionId(TransactionState s) +AssignTransactionId(TransactionState s) { + bool isSubXact = (s->parent != NULL); ResourceOwner currentOwner; - Assert(s->parent != NULL); + /* Assert that caller didn't screw up */ + Assert(!TransactionIdIsValid(s->transactionId)); Assert(s->state == TRANS_INPROGRESS); - if (!TransactionIdIsValid(s->parent->transactionId)) - AssignSubTransactionId(s->parent); + + /* + * Ensure parent(s) have XIDs, so that a child always has an XID later + * than its parent. + */ + if (isSubXact && !TransactionIdIsValid(s->parent->transactionId)) + AssignTransactionId(s->parent); /* * Generate a new Xid and record it in PG_PROC and pg_subtrans. @@ -351,20 +410,20 @@ AssignSubTransactionId(TransactionState s) * PG_PROC, the subtrans entry is needed to ensure that other backends see * the Xid as "running". See GetNewTransactionId. */ - s->transactionId = GetNewTransactionId(true); + s->transactionId = GetNewTransactionId(isSubXact); - SubTransSetParent(s->transactionId, s->parent->transactionId); + if (isSubXact) + SubTransSetParent(s->transactionId, s->parent->transactionId); /* * Acquire lock on the transaction XID. (We assume this cannot block.) We - * have to be sure that the lock is assigned to the transaction's + * have to ensure that the lock is assigned to the transaction's own * ResourceOwner. */ currentOwner = CurrentResourceOwner; PG_TRY(); { CurrentResourceOwner = s->curTransactionOwner; - XactLockTableInsert(s->transactionId); } PG_CATCH(); @@ -378,22 +437,6 @@ AssignSubTransactionId(TransactionState s) } -/* - * GetCurrentTransactionIdIfAny - * - * Unlike GetCurrentTransactionId, this will return InvalidTransactionId - * if we are currently not in a transaction, or in a transaction or - * subtransaction that has not yet assigned itself an XID. - */ -TransactionId -GetCurrentTransactionIdIfAny(void) -{ - TransactionState s = CurrentTransactionState; - - return s->transactionId; -} - - /* * GetCurrentSubTransactionId */ @@ -408,11 +451,18 @@ GetCurrentSubTransactionId(void) /* * GetCurrentCommandId + * + * "used" must be TRUE if the caller intends to use the command ID to mark + * inserted/updated/deleted tuples. FALSE means the ID is being fetched + * for read-only purposes (ie, as a snapshot validity cutoff). See + * CommandCounterIncrement() for discussion. */ CommandId -GetCurrentCommandId(void) +GetCurrentCommandId(bool used) { /* this is global to a transaction, not subtransaction-local */ + if (used) + currentCommandIdUsed = true; return currentCommandId; } @@ -434,6 +484,20 @@ GetCurrentStatementStartTimestamp(void) return stmtStartTimestamp; } +/* + * GetCurrentTransactionStopTimestamp + * + * We return current time if the transaction stop time hasn't been set + * (which can happen if we decide we don't need to log an XLOG record). + */ +TimestampTz +GetCurrentTransactionStopTimestamp(void) +{ + if (xactStopTimestamp != 0) + return xactStopTimestamp; + return GetCurrentTimestamp(); +} + /* * SetCurrentStatementStartTimestamp */ @@ -443,6 +507,15 @@ SetCurrentStatementStartTimestamp(void) stmtStartTimestamp = GetCurrentTimestamp(); } +/* + * SetCurrentTransactionStopTimestamp + */ +static inline void +SetCurrentTransactionStopTimestamp(void) +{ + xactStopTimestamp = GetCurrentTimestamp(); +} + /* * GetCurrentTransactionNestLevel * @@ -491,7 +564,7 @@ TransactionIdIsCurrentTransactionId(TransactionId xid) */ for (s = CurrentTransactionState; s != NULL; s = s->parent) { - ListCell *cell; + int low, high; if (s->state == TRANS_ABORT) continue; @@ -499,10 +572,22 @@ TransactionIdIsCurrentTransactionId(TransactionId xid) continue; /* it can't have any child XIDs either */ if (TransactionIdEquals(xid, s->transactionId)) return true; - foreach(cell, s->childXids) + /* As the childXids array is ordered, we can use binary search */ + low = 0; + high = s->nChildXids - 1; + while (low <= high) { - if (TransactionIdEquals(xid, lfirst_xid(cell))) + int middle; + TransactionId probe; + + middle = low + (high - low) / 2; + probe = s->childXids[middle]; + if (TransactionIdEquals(probe, xid)) return true; + else if (TransactionIdPrecedes(probe, xid)) + low = middle + 1; + else + high = middle - 1; } } @@ -516,28 +601,65 @@ TransactionIdIsCurrentTransactionId(TransactionId xid) void CommandCounterIncrement(void) { - currentCommandId += 1; - if (currentCommandId == FirstCommandId) /* check for overflow */ + /* + * If the current value of the command counter hasn't been "used" to + * mark tuples, we need not increment it, since there's no need to + * distinguish a read-only command from others. This helps postpone + * command counter overflow, and keeps no-op CommandCounterIncrement + * operations cheap. + */ + if (currentCommandIdUsed) { - currentCommandId -= 1; - ereport(ERROR, - (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + currentCommandId += 1; + if (currentCommandId == FirstCommandId) /* check for overflow */ + { + currentCommandId -= 1; + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("cannot have more than 2^32-1 commands in a transaction"))); - } + } + currentCommandIdUsed = false; + + /* Propagate new command ID into static snapshots, if set */ + if (SerializableSnapshot) + SerializableSnapshot->curcid = currentCommandId; + if (LatestSnapshot) + LatestSnapshot->curcid = currentCommandId; - /* Propagate new command ID into static snapshots, if set */ - if (SerializableSnapshot) - SerializableSnapshot->curcid = currentCommandId; - if (LatestSnapshot) - LatestSnapshot->curcid = currentCommandId; + /* + * Make any catalog changes done by the just-completed command + * visible in the local syscache. We obviously don't need to do + * this after a read-only command. (But see hacks in inval.c + * to make real sure we don't think a command that queued inval + * messages was read-only.) + */ + AtCommit_LocalCache(); + } /* - * make cache changes visible to me. + * Make any other backends' catalog changes visible to me. + * + * XXX this is probably in the wrong place: CommandCounterIncrement + * should be purely a local operation, most likely. However fooling + * with this will affect asynchronous cross-backend interactions, + * which doesn't seem like a wise thing to do in late beta, so save + * improving this for another day - tgl 2007-11-30 */ - AtCommit_LocalCache(); AtStart_Cache(); } +/* + * ForceSyncCommit + * + * Interface routine to allow commands to force a synchronous commit of the + * current top-level transaction + */ +void +ForceSyncCommit(void) +{ + forceSyncCommit = true; +} + /* ---------------------------------------------------------------- * StartTransaction stuff @@ -564,9 +686,9 @@ AtStart_Memory(void) /* * If this is the first time through, create a private context for * AbortTransaction to work in. By reserving some space now, we can - * insulate AbortTransaction from out-of-memory scenarios. Like - * ErrorContext, we set it up with slow growth rate and a nonzero - * minimum size, so that space will be reserved immediately. + * insulate AbortTransaction from out-of-memory scenarios. Like + * ErrorContext, we set it up with slow growth rate and a nonzero minimum + * size, so that space will be reserved immediately. */ if (TransactionAbortContext == NULL) TransactionAbortContext = @@ -685,162 +807,203 @@ AtSubStart_ResourceOwner(void) /* * RecordTransactionCommit + * + * Returns latest XID among xact and its children, or InvalidTransactionId + * if the xact has no XID. (We compute that here just because it's easier.) + * + * This is exported only to support an ugly hack in VACUUM FULL. */ -void +TransactionId RecordTransactionCommit(void) { + TransactionId xid = GetTopTransactionIdIfAny(); + bool markXidCommitted = TransactionIdIsValid(xid); + TransactionId latestXid = InvalidTransactionId; int nrels; RelFileNode *rels; + bool haveNonTemp; int nchildren; TransactionId *children; /* Get data needed for commit record */ - nrels = smgrGetPendingDeletes(true, &rels); + nrels = smgrGetPendingDeletes(true, &rels, &haveNonTemp); nchildren = xactGetCommittedChildren(&children); /* - * If we made neither any XLOG entries nor any temp-rel updates, and have - * no files to be deleted, we can omit recording the transaction commit at - * all. (This test includes the effects of subtransactions, so the - * presence of committed subxacts need not alone force a write.) + * If we haven't been assigned an XID yet, we neither can, nor do we want + * to write a COMMIT record. */ - if (MyXactMadeXLogEntry || MyXactMadeTempRelUpdate || nrels > 0) + if (!markXidCommitted) { - TransactionId xid = GetCurrentTransactionId(); - bool madeTCentries; - XLogRecPtr recptr; + /* + * We expect that every smgrscheduleunlink is followed by a catalog + * update, and hence XID assignment, so we shouldn't get here with any + * pending deletes. Use a real test not just an Assert to check this, + * since it's a bit fragile. + */ + if (nrels != 0) + elog(ERROR, "cannot commit a transaction that deleted files but has no xid"); + + /* Can't have child XIDs either; AssignTransactionId enforces this */ + Assert(nchildren == 0); + + /* + * If we didn't create XLOG entries, we're done here; otherwise we + * should flush those entries the same as a commit record. (An + * example of a possible record that wouldn't cause an XID to be + * assigned is a sequence advance record due to nextval() --- we want + * to flush that to disk before reporting commit.) + */ + if (XactLastRecEnd.xrecoff == 0) + goto cleanup; + } + else + { + /* + * Begin commit critical section and insert the commit XLOG record. + */ + XLogRecData rdata[3]; + int lastrdata = 0; + xl_xact_commit xlrec; /* Tell bufmgr and smgr to prepare for commit */ BufmgrCommit(); - START_CRIT_SECTION(); - /* - * If our transaction made any transaction-controlled XLOG entries, we - * need to lock out checkpoint start between writing our XLOG record - * and updating pg_clog. Otherwise it is possible for the checkpoint - * to set REDO after the XLOG record but fail to flush the pg_clog - * update to disk, leading to loss of the transaction commit if we - * crash a little later. Slightly klugy fix for problem discovered - * 2004-08-10. + * Mark ourselves as within our "commit critical section". This + * forces any concurrent checkpoint to wait until we've updated + * pg_clog. Without this, it is possible for the checkpoint to set + * REDO after the XLOG record but fail to flush the pg_clog update to + * disk, leading to loss of the transaction commit if the system + * crashes a little later. * - * (If it made no transaction-controlled XLOG entries, its XID appears - * nowhere in permanent storage, so no one else will ever care if it - * committed; so it doesn't matter if we lose the commit flag.) + * Note: we could, but don't bother to, set this flag in + * RecordTransactionAbort. That's because loss of a transaction abort + * is noncritical; the presumption would be that it aborted, anyway. * - * Note we only need a shared lock. - */ - madeTCentries = (MyLastRecPtr.xrecoff != 0); - if (madeTCentries) - LWLockAcquire(CheckpointStartLock, LW_SHARED); - - /* - * We only need to log the commit in XLOG if the transaction made any - * transaction-controlled XLOG entries or will delete files. + * It's safe to change the inCommit flag of our own backend without + * holding the ProcArrayLock, since we're the only one modifying it. + * This makes checkpoint's determination of which xacts are inCommit a + * bit fuzzy, but it doesn't matter. */ - if (madeTCentries || nrels > 0) + START_CRIT_SECTION(); + MyProc->inCommit = true; + + SetCurrentTransactionStopTimestamp(); + xlrec.xact_time = xactStopTimestamp; + xlrec.nrels = nrels; + xlrec.nsubxacts = nchildren; + rdata[0].data = (char *) (&xlrec); + rdata[0].len = MinSizeOfXactCommit; + rdata[0].buffer = InvalidBuffer; + /* dump rels to delete */ + if (nrels > 0) { - XLogRecData rdata[3]; - int lastrdata = 0; - xl_xact_commit xlrec; - - xlrec.xtime = time(NULL); - xlrec.nrels = nrels; - xlrec.nsubxacts = nchildren; - rdata[0].data = (char *) (&xlrec); - rdata[0].len = MinSizeOfXactCommit; - rdata[0].buffer = InvalidBuffer; - /* dump rels to delete */ - if (nrels > 0) - { - rdata[0].next = &(rdata[1]); - rdata[1].data = (char *) rels; - rdata[1].len = nrels * sizeof(RelFileNode); - rdata[1].buffer = InvalidBuffer; - lastrdata = 1; - } - /* dump committed child Xids */ - if (nchildren > 0) - { - rdata[lastrdata].next = &(rdata[2]); - rdata[2].data = (char *) children; - rdata[2].len = nchildren * sizeof(TransactionId); - rdata[2].buffer = InvalidBuffer; - lastrdata = 2; - } - rdata[lastrdata].next = NULL; - - recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT, rdata); + rdata[0].next = &(rdata[1]); + rdata[1].data = (char *) rels; + rdata[1].len = nrels * sizeof(RelFileNode); + rdata[1].buffer = InvalidBuffer; + lastrdata = 1; } - else + /* dump committed child Xids */ + if (nchildren > 0) { - /* Just flush through last record written by me */ - recptr = ProcLastRecEnd; + rdata[lastrdata].next = &(rdata[2]); + rdata[2].data = (char *) children; + rdata[2].len = nchildren * sizeof(TransactionId); + rdata[2].buffer = InvalidBuffer; + lastrdata = 2; } + rdata[lastrdata].next = NULL; + (void) XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT, rdata); + } + + /* + * Check if we want to commit asynchronously. If the user has set + * synchronous_commit = off, and we're not doing cleanup of any non-temp + * rels nor committing any command that wanted to force sync commit, then + * we can defer flushing XLOG. (We must not allow asynchronous commit if + * there are any non-temp tables to be deleted, because we might delete + * the files before the COMMIT record is flushed to disk. We do allow + * asynchronous commit if all to-be-deleted tables are temporary though, + * since they are lost anyway if we crash.) + */ + if (XactSyncCommit || forceSyncCommit || haveNonTemp) + { /* - * We must flush our XLOG entries to disk if we made any XLOG entries, - * whether in or out of transaction control. For example, if we - * reported a nextval() result to the client, this ensures that any - * XLOG record generated by nextval will hit the disk before we report - * the transaction committed. + * Synchronous commit case. + * + * Sleep before flush! So we can flush more than one commit records + * per single fsync. (The idea is some other backend may do the + * XLogFlush while we're sleeping. This needs work still, because on + * most Unixen, the minimum select() delay is 10msec or more, which is + * way too long.) * - * Note: if we generated a commit record above, MyXactMadeXLogEntry - * will certainly be set now. + * We do not sleep if enableFsync is not turned on, nor if there are + * fewer than CommitSiblings other backends with active transactions. */ - if (MyXactMadeXLogEntry) - { - /* - * Sleep before flush! So we can flush more than one commit - * records per single fsync. (The idea is some other backend may - * do the XLogFlush while we're sleeping. This needs work still, - * because on most Unixen, the minimum select() delay is 10msec or - * more, which is way too long.) - * - * We do not sleep if enableFsync is not turned on, nor if there - * are fewer than CommitSiblings other backends with active - * transactions. - */ - if (CommitDelay > 0 && enableFsync && - CountActiveBackends() >= CommitSiblings) - pg_usleep(CommitDelay); + if (CommitDelay > 0 && enableFsync && + CountActiveBackends() >= CommitSiblings) + pg_usleep(CommitDelay); - XLogFlush(recptr); - } + XLogFlush(XactLastRecEnd); /* - * We must mark the transaction committed in clog if its XID appears - * either in permanent rels or in local temporary rels. We test this - * by seeing if we made transaction-controlled entries *OR* local-rel - * tuple updates. Note that if we made only the latter, we have not - * emitted an XLOG record for our commit, and so in the event of a - * crash the clog update might be lost. This is okay because no one - * else will ever care whether we committed. + * Now we may update the CLOG, if we wrote a COMMIT record above */ - if (madeTCentries || MyXactMadeTempRelUpdate) + if (markXidCommitted) { TransactionIdCommit(xid); /* to avoid race conditions, the parent must commit first */ TransactionIdCommitTree(nchildren, children); } + } + else + { + /* + * Asynchronous commit case. + * + * Report the latest async commit LSN, so that the WAL writer knows to + * flush this commit. + */ + XLogSetAsyncCommitLSN(XactLastRecEnd); - /* Unlock checkpoint lock if we acquired it */ - if (madeTCentries) - LWLockRelease(CheckpointStartLock); + /* + * We must not immediately update the CLOG, since we didn't flush the + * XLOG. Instead, we store the LSN up to which the XLOG must be + * flushed before the CLOG may be updated. + */ + if (markXidCommitted) + { + TransactionIdAsyncCommit(xid, XactLastRecEnd); + /* to avoid race conditions, the parent must commit first */ + TransactionIdAsyncCommitTree(nchildren, children, XactLastRecEnd); + } + } + /* + * If we entered a commit critical section, leave it now, and let + * checkpoints proceed. + */ + if (markXidCommitted) + { + MyProc->inCommit = false; END_CRIT_SECTION(); } - /* Break the chain of back-links in the XLOG records I output */ - MyLastRecPtr.xrecoff = 0; - MyXactMadeXLogEntry = false; - MyXactMadeTempRelUpdate = false; + /* Compute latestXid while we have the child XIDs handy */ + latestXid = TransactionIdLatest(xid, nchildren, children); - /* And clean up local data */ + /* Reset XactLastRecEnd until the next transaction writes something */ + XactLastRecEnd.xrecoff = 0; + +cleanup: + /* Clean up local data */ if (rels) pfree(rels); - if (children) - pfree(children); + + return latestXid; } @@ -919,34 +1082,79 @@ static void AtSubCommit_childXids(void) { TransactionState s = CurrentTransactionState; - MemoryContext old_cxt; + int new_nChildXids; Assert(s->parent != NULL); /* - * We keep the child-XID lists in TopTransactionContext; this avoids - * setting up child-transaction contexts for what might be just a few - * bytes of grandchild XIDs. + * The parent childXids array will need to hold my XID and all my + * childXids, in addition to the XIDs already there. */ - old_cxt = MemoryContextSwitchTo(TopTransactionContext); + new_nChildXids = s->parent->nChildXids + s->nChildXids + 1; - s->parent->childXids = lappend_xid(s->parent->childXids, - s->transactionId); - - if (s->childXids != NIL) + /* Allocate or enlarge the parent array if necessary */ + if (s->parent->maxChildXids < new_nChildXids) { - s->parent->childXids = list_concat(s->parent->childXids, - s->childXids); + int new_maxChildXids; + TransactionId *new_childXids; /* - * list_concat doesn't free the list header for the second list; do so - * here to avoid memory leakage (kluge) + * Make it 2x what's needed right now, to avoid having to enlarge it + * repeatedly. But we can't go above MaxAllocSize. (The latter + * limit is what ensures that we don't need to worry about integer + * overflow here or in the calculation of new_nChildXids.) */ - pfree(s->childXids); - s->childXids = NIL; + new_maxChildXids = Min(new_nChildXids * 2, + (int) (MaxAllocSize / sizeof(TransactionId))); + + if (new_maxChildXids < new_nChildXids) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("maximum number of committed subtransactions (%d) exceeded", + (int) (MaxAllocSize / sizeof(TransactionId))))); + + /* + * We keep the child-XID arrays in TopTransactionContext; this avoids + * setting up child-transaction contexts for what might be just a few + * bytes of grandchild XIDs. + */ + if (s->parent->childXids == NULL) + new_childXids = + MemoryContextAlloc(TopTransactionContext, + new_maxChildXids * sizeof(TransactionId)); + else + new_childXids = repalloc(s->parent->childXids, + new_maxChildXids * sizeof(TransactionId)); + + s->parent->childXids = new_childXids; + s->parent->maxChildXids = new_maxChildXids; } - MemoryContextSwitchTo(old_cxt); + /* + * Copy all my XIDs to parent's array. + * + * Note: We rely on the fact that the XID of a child always follows that + * of its parent. By copying the XID of this subtransaction before the + * XIDs of its children, we ensure that the array stays ordered. Likewise, + * all XIDs already in the array belong to subtransactions started and + * subcommitted before us, so their XIDs must precede ours. + */ + s->parent->childXids[s->parent->nChildXids] = s->transactionId; + + if (s->nChildXids > 0) + memcpy(&s->parent->childXids[s->parent->nChildXids + 1], + s->childXids, + s->nChildXids * sizeof(TransactionId)); + + s->parent->nChildXids = new_nChildXids; + + /* Release child's array to avoid leakage */ + if (s->childXids != NULL) + pfree(s->childXids); + /* We must reset these to avoid double-free if fail later in commit */ + s->childXids = NULL; + s->nChildXids = 0; + s->maxChildXids = 0; } /* @@ -955,23 +1163,20 @@ AtSubCommit_childXids(void) static void RecordSubTransactionCommit(void) { + TransactionId xid = GetCurrentTransactionIdIfAny(); + /* * We do not log the subcommit in XLOG; it doesn't matter until the * top-level transaction commits. * - * We must mark the subtransaction subcommitted in clog if its XID appears - * either in permanent rels or in local temporary rels. We test this by - * seeing if we made transaction-controlled entries *OR* local-rel tuple - * updates. (The test here actually covers the entire transaction tree so - * far, so it may mark subtransactions that don't really need it, but it's - * probably not worth being tenser. Note that if a prior subtransaction - * dirtied these variables, then RecordTransactionCommit will have to do - * the full pushup anyway...) + * We must mark the subtransaction subcommitted in the CLOG if it had a + * valid XID assigned. If it did not, nobody else will ever know about + * the existence of this subxact. We don't have to deal with deletions + * scheduled for on-commit here, since they'll be reassigned to our parent + * (who might still abort). */ - if (MyLastRecPtr.xrecoff != 0 || MyXactMadeTempRelUpdate) + if (TransactionIdIsValid(xid)) { - TransactionId xid = GetCurrentTransactionId(); - /* XXX does this really need to be a critical section? */ START_CRIT_SECTION(); @@ -989,115 +1194,133 @@ RecordSubTransactionCommit(void) /* * RecordTransactionAbort + * + * Returns latest XID among xact and its children, or InvalidTransactionId + * if the xact has no XID. (We compute that here just because it's easier.) */ -static void -RecordTransactionAbort(void) +static TransactionId +RecordTransactionAbort(bool isSubXact) { + TransactionId xid = GetCurrentTransactionIdIfAny(); + TransactionId latestXid; int nrels; RelFileNode *rels; int nchildren; TransactionId *children; - - /* Get data needed for abort record */ - nrels = smgrGetPendingDeletes(false, &rels); - nchildren = xactGetCommittedChildren(&children); + XLogRecData rdata[3]; + int lastrdata = 0; + xl_xact_abort xlrec; /* - * If we made neither any transaction-controlled XLOG entries nor any - * temp-rel updates, and are not going to delete any files, we can omit - * recording the transaction abort at all. No one will ever care that it - * aborted. (These tests cover our whole transaction tree.) + * If we haven't been assigned an XID, nobody will care whether we aborted + * or not. Hence, we're done in that case. It does not matter if we have + * rels to delete (note that this routine is not responsible for actually + * deleting 'em). We cannot have any child XIDs, either. */ - if (MyLastRecPtr.xrecoff != 0 || MyXactMadeTempRelUpdate || nrels > 0) + if (!TransactionIdIsValid(xid)) { - TransactionId xid = GetCurrentTransactionId(); + /* Reset XactLastRecEnd until the next transaction writes something */ + if (!isSubXact) + XactLastRecEnd.xrecoff = 0; + return InvalidTransactionId; + } - /* - * Catch the scenario where we aborted partway through - * RecordTransactionCommit ... - */ - if (TransactionIdDidCommit(xid)) - elog(PANIC, "cannot abort transaction %u, it was already committed", xid); + /* + * We have a valid XID, so we should write an ABORT record for it. + * + * We do not flush XLOG to disk here, since the default assumption after a + * crash would be that we aborted, anyway. For the same reason, we don't + * need to worry about interlocking against checkpoint start. + */ - START_CRIT_SECTION(); + /* + * Check that we haven't aborted halfway through RecordTransactionCommit. + */ + if (TransactionIdDidCommit(xid)) + elog(PANIC, "cannot abort transaction %u, it was already committed", + xid); - /* - * We only need to log the abort in XLOG if the transaction made any - * transaction-controlled XLOG entries or will delete files. (If it - * made no transaction-controlled XLOG entries, its XID appears - * nowhere in permanent storage, so no one else will ever care if it - * committed.) - * - * We do not flush XLOG to disk unless deleting files, since the - * default assumption after a crash would be that we aborted, anyway. - * For the same reason, we don't need to worry about interlocking - * against checkpoint start. - */ - if (MyLastRecPtr.xrecoff != 0 || nrels > 0) - { - XLogRecData rdata[3]; - int lastrdata = 0; - xl_xact_abort xlrec; - XLogRecPtr recptr; - - xlrec.xtime = time(NULL); - xlrec.nrels = nrels; - xlrec.nsubxacts = nchildren; - rdata[0].data = (char *) (&xlrec); - rdata[0].len = MinSizeOfXactAbort; - rdata[0].buffer = InvalidBuffer; - /* dump rels to delete */ - if (nrels > 0) - { - rdata[0].next = &(rdata[1]); - rdata[1].data = (char *) rels; - rdata[1].len = nrels * sizeof(RelFileNode); - rdata[1].buffer = InvalidBuffer; - lastrdata = 1; - } - /* dump committed child Xids */ - if (nchildren > 0) - { - rdata[lastrdata].next = &(rdata[2]); - rdata[2].data = (char *) children; - rdata[2].len = nchildren * sizeof(TransactionId); - rdata[2].buffer = InvalidBuffer; - lastrdata = 2; - } - rdata[lastrdata].next = NULL; + /* Fetch the data we need for the abort record */ + nrels = smgrGetPendingDeletes(false, &rels, NULL); + nchildren = xactGetCommittedChildren(&children); - recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_ABORT, rdata); + /* XXX do we really need a critical section here? */ + START_CRIT_SECTION(); - /* Must flush if we are deleting files... */ - if (nrels > 0) - XLogFlush(recptr); - } + /* Write the ABORT record */ + if (isSubXact) + xlrec.xact_time = GetCurrentTimestamp(); + else + { + SetCurrentTransactionStopTimestamp(); + xlrec.xact_time = xactStopTimestamp; + } + xlrec.nrels = nrels; + xlrec.nsubxacts = nchildren; + rdata[0].data = (char *) (&xlrec); + rdata[0].len = MinSizeOfXactAbort; + rdata[0].buffer = InvalidBuffer; + /* dump rels to delete */ + if (nrels > 0) + { + rdata[0].next = &(rdata[1]); + rdata[1].data = (char *) rels; + rdata[1].len = nrels * sizeof(RelFileNode); + rdata[1].buffer = InvalidBuffer; + lastrdata = 1; + } + /* dump committed child Xids */ + if (nchildren > 0) + { + rdata[lastrdata].next = &(rdata[2]); + rdata[2].data = (char *) children; + rdata[2].len = nchildren * sizeof(TransactionId); + rdata[2].buffer = InvalidBuffer; + lastrdata = 2; + } + rdata[lastrdata].next = NULL; - /* - * Mark the transaction aborted in clog. This is not absolutely - * necessary but we may as well do it while we are here. - * - * The ordering here isn't critical but it seems best to mark the - * parent first. This assures an atomic transition of all the - * subtransactions to aborted state from the point of view of - * concurrent TransactionIdDidAbort calls. - */ - TransactionIdAbort(xid); - TransactionIdAbortTree(nchildren, children); + (void) XLogInsert(RM_XACT_ID, XLOG_XACT_ABORT, rdata); - END_CRIT_SECTION(); - } + /* + * Mark the transaction aborted in clog. This is not absolutely necessary + * but we may as well do it while we are here; also, in the subxact case + * it is helpful because XactLockTableWait makes use of it to avoid + * waiting for already-aborted subtransactions. It is OK to do it without + * having flushed the ABORT record to disk, because in event of a crash + * we'd be assumed to have aborted anyway. + * + * The ordering here isn't critical but it seems best to mark the parent + * first. This assures an atomic transition of all the subtransactions to + * aborted state from the point of view of concurrent + * TransactionIdDidAbort calls. + */ + TransactionIdAbort(xid); + TransactionIdAbortTree(nchildren, children); + + END_CRIT_SECTION(); + + /* Compute latestXid while we have the child XIDs handy */ + latestXid = TransactionIdLatest(xid, nchildren, children); + + /* + * If we're aborting a subtransaction, we can immediately remove failed + * XIDs from PGPROC's cache of running child XIDs. We do that here for + * subxacts, because we already have the child XID array at hand. For + * main xacts, the equivalent happens just after this function returns. + */ + if (isSubXact) + XidCacheRemoveRunningXids(xid, nchildren, children, latestXid); - /* Break the chain of back-links in the XLOG records I output */ - MyLastRecPtr.xrecoff = 0; - MyXactMadeXLogEntry = false; - MyXactMadeTempRelUpdate = false; + /* Reset XactLastRecEnd until the next transaction writes something */ + if (!isSubXact) + XactLastRecEnd.xrecoff = 0; /* And clean up local data */ if (rels) pfree(rels); - if (children) - pfree(children); + + return latestXid; } /* @@ -1107,9 +1330,9 @@ static void AtAbort_Memory(void) { /* - * Switch into TransactionAbortContext, which should have some free - * space even if nothing else does. We'll work in this context until - * we've finished cleaning up. + * Switch into TransactionAbortContext, which should have some free space + * even if nothing else does. We'll work in this context until we've + * finished cleaning up. * * It is barely possible to get here when we've not been able to create * TransactionAbortContext yet; if so use TopMemoryContext. @@ -1167,114 +1390,15 @@ AtSubAbort_childXids(void) TransactionState s = CurrentTransactionState; /* - * We keep the child-XID lists in TopTransactionContext (see - * AtSubCommit_childXids). This means we'd better free the list + * We keep the child-XID arrays in TopTransactionContext (see + * AtSubCommit_childXids). This means we'd better free the array * explicitly at abort to avoid leakage. */ - list_free(s->childXids); - s->childXids = NIL; -} - -/* - * RecordSubTransactionAbort - */ -static void -RecordSubTransactionAbort(void) -{ - int nrels; - RelFileNode *rels; - TransactionId xid = GetCurrentTransactionId(); - int nchildren; - TransactionId *children; - - /* Get data needed for abort record */ - nrels = smgrGetPendingDeletes(false, &rels); - nchildren = xactGetCommittedChildren(&children); - - /* - * If we made neither any transaction-controlled XLOG entries nor any - * temp-rel updates, and are not going to delete any files, we can omit - * recording the transaction abort at all. No one will ever care that it - * aborted. (These tests cover our whole transaction tree, and therefore - * may mark subxacts that don't really need it, but it's probably not - * worth being tenser.) - * - * In this case we needn't worry about marking subcommitted children as - * aborted, because they didn't mark themselves as subcommitted in the - * first place; see the optimization in RecordSubTransactionCommit. - */ - if (MyLastRecPtr.xrecoff != 0 || MyXactMadeTempRelUpdate || nrels > 0) - { - START_CRIT_SECTION(); - - /* - * We only need to log the abort in XLOG if the transaction made any - * transaction-controlled XLOG entries or will delete files. - */ - if (MyLastRecPtr.xrecoff != 0 || nrels > 0) - { - XLogRecData rdata[3]; - int lastrdata = 0; - xl_xact_abort xlrec; - XLogRecPtr recptr; - - xlrec.xtime = time(NULL); - xlrec.nrels = nrels; - xlrec.nsubxacts = nchildren; - rdata[0].data = (char *) (&xlrec); - rdata[0].len = MinSizeOfXactAbort; - rdata[0].buffer = InvalidBuffer; - /* dump rels to delete */ - if (nrels > 0) - { - rdata[0].next = &(rdata[1]); - rdata[1].data = (char *) rels; - rdata[1].len = nrels * sizeof(RelFileNode); - rdata[1].buffer = InvalidBuffer; - lastrdata = 1; - } - /* dump committed child Xids */ - if (nchildren > 0) - { - rdata[lastrdata].next = &(rdata[2]); - rdata[2].data = (char *) children; - rdata[2].len = nchildren * sizeof(TransactionId); - rdata[2].buffer = InvalidBuffer; - lastrdata = 2; - } - rdata[lastrdata].next = NULL; - - recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_ABORT, rdata); - - /* Must flush if we are deleting files... */ - if (nrels > 0) - XLogFlush(recptr); - } - - /* - * Mark the transaction aborted in clog. This is not absolutely - * necessary but XactLockTableWait makes use of it to avoid waiting - * for already-aborted subtransactions. - */ - TransactionIdAbort(xid); - TransactionIdAbortTree(nchildren, children); - - END_CRIT_SECTION(); - } - - /* - * We can immediately remove failed XIDs from PGPROC's cache of running - * child XIDs. It's easiest to do it here while we have the child XID - * array at hand, even though in the main-transaction case the equivalent - * work happens just after return from RecordTransactionAbort. - */ - XidCacheRemoveRunningXids(xid, nchildren, children); - - /* And clean up local data */ - if (rels) - pfree(rels); - if (children) - pfree(children); + if (s->childXids != NULL) + pfree(s->childXids); + s->childXids = NULL; + s->nChildXids = 0; + s->maxChildXids = 0; } /* ---------------------------------------------------------------- @@ -1360,6 +1484,7 @@ static void StartTransaction(void) { TransactionState s; + VirtualTransactionId vxid; /* * Let's just make sure the state stack is empty @@ -1387,6 +1512,8 @@ StartTransaction(void) FreeXactSnapshot(); XactIsoLevel = DefaultXactIsoLevel; XactReadOnly = DefaultXactReadOnly; + forceSyncCommit = false; + MyXactAccessedTempRel = false; /* * reinitialize within-transaction counters @@ -1394,6 +1521,7 @@ StartTransaction(void) s->subTransactionId = TopSubTransactionId; currentSubTransactionId = TopSubTransactionId; currentCommandId = FirstCommandId; + currentCommandIdUsed = false; /* * must initialize resource-management stuff first @@ -1402,40 +1530,54 @@ StartTransaction(void) AtStart_ResourceOwner(); /* - * generate a new transaction id + * Assign a new LocalTransactionId, and combine it with the backendId to + * form a virtual transaction id. */ - s->transactionId = GetNewTransactionId(false); + vxid.backendId = MyBackendId; + vxid.localTransactionId = GetNextLocalTransactionId(); - XactLockTableInsert(s->transactionId); + /* + * Lock the virtual transaction id before we announce it in the proc array + */ + VirtualXactLockTableInsert(vxid); - PG_TRACE1(transaction__start, s->transactionId); + /* + * Advertise it in the proc array. We assume assignment of + * LocalTransactionID is atomic, and the backendId should be set already. + */ + Assert(MyProc->backendId == vxid.backendId); + MyProc->lxid = vxid.localTransactionId; + + TRACE_POSTGRESQL_TRANSACTION_START(vxid.localTransactionId); /* * set transaction_timestamp() (a/k/a now()). We want this to be the same * as the first command's statement_timestamp(), so don't do a fresh - * GetCurrentTimestamp() call (which'd be expensive anyway). + * GetCurrentTimestamp() call (which'd be expensive anyway). Also, mark + * xactStopTimestamp as unset. */ xactStartTimestamp = stmtStartTimestamp; - pgstat_report_txn_timestamp(xactStartTimestamp); + xactStopTimestamp = 0; + pgstat_report_xact_timestamp(xactStartTimestamp); /* * initialize current transaction state fields - */ - s->nestingLevel = 1; - s->childXids = NIL; - - /* - * You might expect to see "s->currentUser = GetUserId();" here, but you - * won't because it doesn't work during startup; the userid isn't set yet - * during a backend's first transaction start. We only use the - * currentUser field in sub-transaction state structs. * - * prevXactReadOnly is also valid only in sub-transactions. + * note: prevXactReadOnly is not used at the outermost level */ + s->nestingLevel = 1; + s->gucNestLevel = 1; + s->childXids = NULL; + s->nChildXids = 0; + s->maxChildXids = 0; + GetUserIdAndContext(&s->prevUser, &s->prevSecDefCxt); + /* SecurityDefinerContext should never be set outside a transaction */ + Assert(!s->prevSecDefCxt); /* * initialize other subsystems for new transaction */ + AtStart_GUC(); AtStart_Inval(); AtStart_Cache(); AfterTriggerBeginXact(); @@ -1459,6 +1601,7 @@ static void CommitTransaction(void) { TransactionState s = CurrentTransactionState; + TransactionId latestXid; ShowTransactionState("CommitTransaction"); @@ -1530,40 +1673,16 @@ CommitTransaction(void) /* * Here is where we really truly commit. */ - RecordTransactionCommit(); + latestXid = RecordTransactionCommit(); + + TRACE_POSTGRESQL_TRANSACTION_COMMIT(MyProc->lxid); - /*---------- - * Let others know about no transaction in progress by me. Note that - * this must be done _before_ releasing locks we hold and _after_ + /* + * Let others know about no transaction in progress by me. Note that this + * must be done _before_ releasing locks we hold and _after_ * RecordTransactionCommit. - * - * LWLockAcquire(ProcArrayLock) is required; consider this example: - * UPDATE with xid 0 is blocked by xid 1's UPDATE. - * xid 1 is doing commit while xid 2 gets snapshot. - * If xid 2's GetSnapshotData sees xid 1 as running then it must see - * xid 0 as running as well, or it will be able to see two tuple versions - * - one deleted by xid 1 and one inserted by xid 0. See notes in - * GetSnapshotData. - * - * Note: MyProc may be null during bootstrap. - *---------- */ - if (MyProc != NULL) - { - /* Lock ProcArrayLock because that's what GetSnapshotData uses. */ - LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); - MyProc->xid = InvalidTransactionId; - MyProc->xmin = InvalidTransactionId; - MyProc->inVacuum = false; /* must be cleared with xid/xmin */ - - /* Clear the subtransaction-XID cache too while holding the lock */ - MyProc->subxids.nxids = 0; - MyProc->subxids.overflowed = false; - - LWLockRelease(ProcArrayLock); - } - - PG_TRACE1(transaction__commit, s->transactionId); + ProcArrayEndTransaction(MyProc, latestXid); /* * This is all post-commit cleanup. Note that if an error is raised here, @@ -1622,14 +1741,17 @@ CommitTransaction(void) /* Check we've released all catcache entries */ AtEOXact_CatCache(true); - AtEOXact_GUC(true, false); + AtEOXact_GUC(true, 1); AtEOXact_SPI(true); + AtEOXact_xml(); AtEOXact_on_commit_actions(true); AtEOXact_Namespace(true); /* smgrcommit already done */ AtEOXact_Files(); - pgstat_count_xact_commit(); - pgstat_report_txn_timestamp(0); + AtEOXact_ComboCid(); + AtEOXact_HashTables(true); + AtEOXact_PgStat(true); + pgstat_report_xact_timestamp(0); CurrentResourceOwner = NULL; ResourceOwnerDelete(TopTransactionResourceOwner); @@ -1642,7 +1764,10 @@ CommitTransaction(void) s->transactionId = InvalidTransactionId; s->subTransactionId = InvalidSubTransactionId; s->nestingLevel = 0; - s->childXids = NIL; + s->gucNestLevel = 0; + s->childXids = NULL; + s->nChildXids = 0; + s->maxChildXids = 0; /* * done with commit processing, set current transaction state back to @@ -1718,6 +1843,26 @@ PrepareTransaction(void) /* NOTIFY and flatfiles will be handled below */ + /* + * Don't allow PREPARE TRANSACTION if we've accessed a temporary table + * in this transaction. Having the prepared xact hold locks on another + * backend's temp table seems a bad idea --- for instance it would prevent + * the backend from exiting. There are other problems too, such as how + * to clean up the source backend's local buffers and ON COMMIT state + * if the prepared xact includes a DROP of a temp table. + * + * We must check this after executing any ON COMMIT actions, because + * they might still access a temp relation. + * + * XXX In principle this could be relaxed to allow some useful special + * cases, such as a temp table created and dropped all within the + * transaction. That seems to require much more bookkeeping though. + */ + if (MyXactAccessedTempRel) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot PREPARE a transaction that has operated on temporary tables"))); + /* Prevent cancel/die interrupt while cleaning up */ HOLD_INTERRUPTS(); @@ -1762,6 +1907,7 @@ PrepareTransaction(void) AtPrepare_UpdateFlatFiles(); AtPrepare_Inval(); AtPrepare_Locks(); + AtPrepare_PgStat(); /* * Here is where we really truly prepare. @@ -1776,28 +1922,15 @@ PrepareTransaction(void) * Now we clean up backend-internal state and release internal resources. */ - /* Break the chain of back-links in the XLOG records I output */ - MyLastRecPtr.xrecoff = 0; - MyXactMadeXLogEntry = false; - MyXactMadeTempRelUpdate = false; + /* Reset XactLastRecEnd until the next transaction writes something */ + XactLastRecEnd.xrecoff = 0; /* * Let others know about no transaction in progress by me. This has to be * done *after* the prepared transaction has been marked valid, else * someone may think it is unlocked and recyclable. */ - - /* Lock ProcArrayLock because that's what GetSnapshotData uses. */ - LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); - MyProc->xid = InvalidTransactionId; - MyProc->xmin = InvalidTransactionId; - MyProc->inVacuum = false; /* must be cleared with xid/xmin */ - - /* Clear the subtransaction-XID cache too while holding the lock */ - MyProc->subxids.nxids = 0; - MyProc->subxids.overflowed = false; - - LWLockRelease(ProcArrayLock); + ProcArrayClearTransaction(MyProc); /* * This is all post-transaction cleanup. Note that if an error is raised @@ -1819,6 +1952,8 @@ PrepareTransaction(void) /* notify and flatfiles don't need a postprepare call */ + PostPrepare_PgStat(); + PostPrepare_Inval(); PostPrepare_smgr(); @@ -1838,12 +1973,16 @@ PrepareTransaction(void) AtEOXact_CatCache(true); /* PREPARE acts the same as COMMIT as far as GUC is concerned */ - AtEOXact_GUC(true, false); + AtEOXact_GUC(true, 1); AtEOXact_SPI(true); + AtEOXact_xml(); AtEOXact_on_commit_actions(true); AtEOXact_Namespace(true); /* smgrcommit already done */ AtEOXact_Files(); + AtEOXact_ComboCid(); + AtEOXact_HashTables(true); + /* don't call AtEOXact_PgStat here */ CurrentResourceOwner = NULL; ResourceOwnerDelete(TopTransactionResourceOwner); @@ -1856,7 +1995,10 @@ PrepareTransaction(void) s->transactionId = InvalidTransactionId; s->subTransactionId = InvalidSubTransactionId; s->nestingLevel = 0; - s->childXids = NIL; + s->gucNestLevel = 0; + s->childXids = NULL; + s->nChildXids = 0; + s->maxChildXids = 0; /* * done with 1st phase commit processing, set current transaction state @@ -1875,6 +2017,7 @@ static void AbortTransaction(void) { TransactionState s = CurrentTransactionState; + TransactionId latestXid; /* Prevent cancel/die interrupt while cleaning up */ HOLD_INTERRUPTS(); @@ -1916,17 +2059,16 @@ AbortTransaction(void) s->state = TRANS_ABORT; /* - * Reset user id which might have been changed transiently. We cannot use - * s->currentUser, since it may not be set yet; instead rely on internal - * state of miscinit.c. + * Reset user ID which might have been changed transiently. We need this + * to clean up in case control escaped out of a SECURITY DEFINER function + * or other local change of CurrentUserId; therefore, the prior value + * of SecurityDefinerContext also needs to be restored. * - * (Note: it is not necessary to restore session authorization here - * because that can only be changed via GUC, and GUC will take care of - * rolling it back if need be. However, an error within a SECURITY - * DEFINER function could send control here with the wrong current - * userid.) + * (Note: it is not necessary to restore session authorization or role + * settings here because those can only be changed via GUC, and GUC will + * take care of rolling them back if need be.) */ - AtAbort_UserId(); + SetUserIdAndContext(s->prevUser, s->prevSecDefCxt); /* * do abort processing @@ -1941,30 +2083,16 @@ AbortTransaction(void) * Advertise the fact that we aborted in pg_clog (assuming that we got as * far as assigning an XID to advertise). */ - if (TransactionIdIsValid(s->transactionId)) - RecordTransactionAbort(); + latestXid = RecordTransactionAbort(false); + + TRACE_POSTGRESQL_TRANSACTION_ABORT(MyProc->lxid); /* * Let others know about no transaction in progress by me. Note that this * must be done _before_ releasing locks we hold and _after_ * RecordTransactionAbort. */ - if (MyProc != NULL) - { - /* Lock ProcArrayLock because that's what GetSnapshotData uses. */ - LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); - MyProc->xid = InvalidTransactionId; - MyProc->xmin = InvalidTransactionId; - MyProc->inVacuum = false; /* must be cleared with xid/xmin */ - - /* Clear the subtransaction-XID cache too while holding the lock */ - MyProc->subxids.nxids = 0; - MyProc->subxids.overflowed = false; - - LWLockRelease(ProcArrayLock); - } - - PG_TRACE1(transaction__abort, s->transactionId); + ProcArrayEndTransaction(MyProc, latestXid); /* * Post-abort cleanup. See notes in CommitTransaction() concerning @@ -1989,14 +2117,17 @@ AbortTransaction(void) false, true); AtEOXact_CatCache(false); - AtEOXact_GUC(false, false); + AtEOXact_GUC(false, 1); AtEOXact_SPI(false); + AtEOXact_xml(); AtEOXact_on_commit_actions(false); AtEOXact_Namespace(false); smgrabort(); AtEOXact_Files(); - pgstat_count_xact_rollback(); - pgstat_report_txn_timestamp(0); + AtEOXact_ComboCid(); + AtEOXact_HashTables(false); + AtEOXact_PgStat(false); + pgstat_report_xact_timestamp(0); /* * State remains TRANS_ABORT until CleanupTransaction(). @@ -2036,7 +2167,10 @@ CleanupTransaction(void) s->transactionId = InvalidTransactionId; s->subTransactionId = InvalidSubTransactionId; s->nestingLevel = 0; - s->childXids = NIL; + s->gucNestLevel = 0; + s->childXids = NULL; + s->nChildXids = 0; + s->maxChildXids = 0; /* * done with abort processing, set current transaction state back to @@ -2495,12 +2629,14 @@ AbortCurrentTransaction(void) * could issue more commands and possibly cause a failure after the statement * completes). Subtransactions are verboten too. * - * stmtNode: pointer to parameter block for statement; this is used in - * a very klugy way to determine whether we are inside a function. - * stmtType: statement type name for error messages. + * isTopLevel: passed down from ProcessUtility to determine whether we are + * inside a function or multi-query querystring. (We will always fail if + * this is false, but it's convenient to centralize the check here instead of + * making callers do it.) + * stmtType: statement type name, for error messages. */ void -PreventTransactionChain(void *stmtNode, const char *stmtType) +PreventTransactionChain(bool isTopLevel, const char *stmtType) { /* * xact block already started? @@ -2523,15 +2659,14 @@ PreventTransactionChain(void *stmtNode, const char *stmtType) stmtType))); /* - * Are we inside a function call? If the statement's parameter block was - * allocated in QueryContext, assume it is an interactive command. - * Otherwise assume it is coming from a function. + * inside a function call? */ - if (!MemoryContextContains(QueryContext, stmtNode)) + if (!isTopLevel) ereport(ERROR, (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION), /* translator: %s represents an SQL statement name */ - errmsg("%s cannot be executed from a function", stmtType))); + errmsg("%s cannot be executed from a function or multi-command string", + stmtType))); /* If we got past IsTransactionBlock test, should be in default state */ if (CurrentTransactionState->blockState != TBLOCK_DEFAULT && @@ -2553,12 +2688,12 @@ PreventTransactionChain(void *stmtNode, const char *stmtType) * use of the current statement's results. Likewise subtransactions. * Thus this is an inverse for PreventTransactionChain. * - * stmtNode: pointer to parameter block for statement; this is used in - * a very klugy way to determine whether we are inside a function. - * stmtType: statement type name for error messages. + * isTopLevel: passed down from ProcessUtility to determine whether we are + * inside a function. + * stmtType: statement type name, for error messages. */ void -RequireTransactionChain(void *stmtNode, const char *stmtType) +RequireTransactionChain(bool isTopLevel, const char *stmtType) { /* * xact block already started? @@ -2573,12 +2708,11 @@ RequireTransactionChain(void *stmtNode, const char *stmtType) return; /* - * Are we inside a function call? If the statement's parameter block was - * allocated in QueryContext, assume it is an interactive command. - * Otherwise assume it is coming from a function. + * inside a function call? */ - if (!MemoryContextContains(QueryContext, stmtNode)) + if (!isTopLevel) return; + ereport(ERROR, (errcode(ERRCODE_NO_ACTIVE_SQL_TRANSACTION), /* translator: %s represents an SQL statement name */ @@ -2593,11 +2727,11 @@ RequireTransactionChain(void *stmtNode, const char *stmtType) * a transaction block than when running as single commands. ANALYZE is * currently the only example. * - * stmtNode: pointer to parameter block for statement; this is used in - * a very klugy way to determine whether we are inside a function. + * isTopLevel: passed down from ProcessUtility to determine whether we are + * inside a function. */ bool -IsInTransactionChain(void *stmtNode) +IsInTransactionChain(bool isTopLevel) { /* * Return true on same conditions that would make PreventTransactionChain @@ -2609,7 +2743,7 @@ IsInTransactionChain(void *stmtNode) if (IsSubTransaction()) return true; - if (!MemoryContextContains(QueryContext, stmtNode)) + if (!isTopLevel) return true; if (CurrentTransactionState->blockState != TBLOCK_DEFAULT && @@ -3021,7 +3155,7 @@ UserAbortTransactionBlock(void) * default state. */ case TBLOCK_STARTED: - ereport(WARNING, + ereport(NOTICE, (errcode(ERRCODE_NO_ACTIVE_SQL_TRANSACTION), errmsg("there is no transaction in progress"))); s->blockState = TBLOCK_ABORT_PENDING; @@ -3306,10 +3440,11 @@ RollbackToSavepoint(List *options) /* * BeginInternalSubTransaction - * This is the same as DefineSavepoint except it allows TBLOCK_STARTED - * state, and therefore it can safely be used in a function that might - * be called when not inside a BEGIN block. Also, we automatically - * cycle through CommitTransactionCommand/StartTransactionCommand + * This is the same as DefineSavepoint except it allows TBLOCK_STARTED, + * TBLOCK_END, and TBLOCK_PREPARE states, and therefore it can safely be + * used in functions that might be called when not inside a BEGIN block + * or when running deferred triggers at COMMIT/PREPARE time. Also, it + * automatically does CommitTransactionCommand/StartTransactionCommand * instead of expecting the caller to do it. */ void @@ -3321,6 +3456,8 @@ BeginInternalSubTransaction(char *name) { case TBLOCK_STARTED: case TBLOCK_INPROGRESS: + case TBLOCK_END: + case TBLOCK_PREPARE: case TBLOCK_SUBINPROGRESS: /* Normal subtransaction start */ PushTransaction(); @@ -3338,7 +3475,6 @@ BeginInternalSubTransaction(char *name) case TBLOCK_DEFAULT: case TBLOCK_BEGIN: case TBLOCK_SUBBEGIN: - case TBLOCK_END: case TBLOCK_SUBEND: case TBLOCK_ABORT: case TBLOCK_SUBABORT: @@ -3348,7 +3484,6 @@ BeginInternalSubTransaction(char *name) case TBLOCK_SUBABORT_PENDING: case TBLOCK_SUBRESTART: case TBLOCK_SUBABORT_RESTART: - case TBLOCK_PREPARE: elog(FATAL, "BeginInternalSubTransaction: unexpected state %s", BlockStateAsString(s->blockState)); break; @@ -3656,13 +3791,11 @@ CommitSubTransaction(void) CommandCounterIncrement(); /* Mark subtransaction as subcommitted */ - if (TransactionIdIsValid(s->transactionId)) - { - RecordSubTransactionCommit(); - AtSubCommit_childXids(); - } + RecordSubTransactionCommit(); /* Post-commit cleanup */ + if (TransactionIdIsValid(s->transactionId)) + AtSubCommit_childXids(); AfterTriggerEndSubXact(true); AtSubCommit_Portals(s->subTransactionId, s->parent->subTransactionId, @@ -3699,7 +3832,7 @@ CommitSubTransaction(void) RESOURCE_RELEASE_AFTER_LOCKS, true, false); - AtEOXact_GUC(true, true); + AtEOXact_GUC(true, s->gucNestLevel); AtEOSubXact_SPI(true, s->subTransactionId); AtEOSubXact_on_commit_actions(true, s->subTransactionId, s->parent->subTransactionId); @@ -3707,6 +3840,8 @@ CommitSubTransaction(void) s->parent->subTransactionId); AtEOSubXact_Files(true, s->subTransactionId, s->parent->subTransactionId); + AtEOSubXact_HashTables(true, s->nestingLevel); + AtEOSubXact_PgStat(true, s->nestingLevel); /* * We need to restore the upper transaction's read-only state, in case the @@ -3769,6 +3904,12 @@ AbortSubTransaction(void) s->state = TRANS_ABORT; + /* + * Reset user ID which might have been changed transiently. (See notes + * in AbortTransaction.) + */ + SetUserIdAndContext(s->prevUser, s->prevSecDefCxt); + /* * We can skip all this stuff if the subxact failed before creating a * ResourceOwner... @@ -3786,13 +3927,12 @@ AbortSubTransaction(void) s->parent->subTransactionId); /* Advertise the fact that we aborted in pg_clog. */ + (void) RecordTransactionAbort(true); + + /* Post-abort cleanup */ if (TransactionIdIsValid(s->transactionId)) - { - RecordSubTransactionAbort(); AtSubAbort_childXids(); - } - /* Post-abort cleanup */ CallSubXactCallbacks(SUBXACT_EVENT_ABORT_SUB, s->subTransactionId, s->parent->subTransactionId); @@ -3810,30 +3950,19 @@ AbortSubTransaction(void) RESOURCE_RELEASE_AFTER_LOCKS, false, false); - AtEOXact_GUC(false, true); + AtEOXact_GUC(false, s->gucNestLevel); AtEOSubXact_SPI(false, s->subTransactionId); + AtEOXact_xml(); AtEOSubXact_on_commit_actions(false, s->subTransactionId, s->parent->subTransactionId); AtEOSubXact_Namespace(false, s->subTransactionId, s->parent->subTransactionId); AtEOSubXact_Files(false, s->subTransactionId, s->parent->subTransactionId); + AtEOSubXact_HashTables(false, s->nestingLevel); + AtEOSubXact_PgStat(false, s->nestingLevel); } - /* - * Reset user id which might have been changed transiently. Here we want - * to restore to the userid that was current at subxact entry. (As in - * AbortTransaction, we need not worry about the session userid.) - * - * Must do this after AtEOXact_GUC to handle the case where we entered the - * subxact inside a SECURITY DEFINER function (hence current and session - * userids were different) and then session auth was changed inside the - * subxact. GUC will reset both current and session userids to the - * entry-time session userid. This is right in every other scenario so it - * seems simplest to let GUC do that and fix it here. - */ - SetUserId(s->currentUser); - /* * Restore the upper transaction's read-only state, too. This should be * redundant with GUC's cleanup but we may as well do it for consistency @@ -3888,13 +4017,6 @@ PushTransaction(void) { TransactionState p = CurrentTransactionState; TransactionState s; - Oid currentUser; - - /* - * At present, GetUserId cannot fail, but let's not assume that. Get the - * ID before entering the critical code sequence. - */ - currentUser = GetUserId(); /* * We keep subtransaction state nodes in TopTransactionContext. @@ -3924,10 +4046,11 @@ PushTransaction(void) s->subTransactionId = currentSubTransactionId; s->parent = p; s->nestingLevel = p->nestingLevel + 1; + s->gucNestLevel = NewGUCNestLevel(); s->savepointLevel = p->savepointLevel; s->state = TRANS_DEFAULT; s->blockState = TBLOCK_SUBBEGIN; - s->currentUser = currentUser; + GetUserIdAndContext(&s->prevUser, &s->prevSecDefCxt); s->prevXactReadOnly = XactReadOnly; CurrentTransactionState = s; @@ -3997,20 +4120,35 @@ ShowTransactionState(const char *str) static void ShowTransactionStateRec(TransactionState s) { + StringInfoData buf; + + initStringInfo(&buf); + + if (s->nChildXids > 0) + { + int i; + + appendStringInfo(&buf, "%u", s->childXids[0]); + for (i = 1; i < s->nChildXids; i++) + appendStringInfo(&buf, " %u", s->childXids[i]); + } + if (s->parent) ShowTransactionStateRec(s->parent); /* use ereport to suppress computation if msg will not be printed */ ereport(DEBUG3, - (errmsg_internal("name: %s; blockState: %13s; state: %7s, xid/subid/cid: %u/%u/%u, nestlvl: %d, children: %s", + (errmsg_internal("name: %s; blockState: %13s; state: %7s, xid/subid/cid: %u/%u/%u%s, nestlvl: %d, children: %s", PointerIsValid(s->name) ? s->name : "unnamed", BlockStateAsString(s->blockState), TransStateAsString(s->state), (unsigned int) s->transactionId, (unsigned int) s->subTransactionId, (unsigned int) currentCommandId, - s->nestingLevel, - nodeToString(s->childXids)))); + currentCommandIdUsed ? " (used)" : "", + s->nestingLevel, buf.data))); + + pfree(buf.data); } /* @@ -4089,36 +4227,22 @@ TransStateAsString(TransState state) * xactGetCommittedChildren * * Gets the list of committed children of the current transaction. The return - * value is the number of child transactions. *children is set to point to a - * palloc'd array of TransactionIds. If there are no subxacts, *children is - * set to NULL. + * value is the number of child transactions. *ptr is set to point to an + * array of TransactionIds. The array is allocated in TopTransactionContext; + * the caller should *not* pfree() it (this is a change from pre-8.4 code!). + * If there are no subxacts, *ptr is set to NULL. */ int xactGetCommittedChildren(TransactionId **ptr) { TransactionState s = CurrentTransactionState; - int nchildren; - TransactionId *children; - ListCell *p; - nchildren = list_length(s->childXids); - if (nchildren == 0) - { + if (s->nChildXids == 0) *ptr = NULL; - return 0; - } - - children = (TransactionId *) palloc(nchildren * sizeof(TransactionId)); - *ptr = children; - - foreach(p, s->childXids) - { - TransactionId child = lfirst_xid(p); - - *children++ = child; - } + else + *ptr = s->childXids; - return nchildren; + return s->nChildXids; } /* @@ -4239,12 +4363,9 @@ xact_redo(XLogRecPtr lsn, XLogRecord *record) static void xact_desc_commit(StringInfo buf, xl_xact_commit *xlrec) { - struct tm *tm = localtime(&xlrec->xtime); int i; - appendStringInfo(buf, "%04u-%02u-%02u %02u:%02u:%02u", - tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday, - tm->tm_hour, tm->tm_min, tm->tm_sec); + appendStringInfoString(buf, timestamptz_to_str(xlrec->xact_time)); if (xlrec->nrels > 0) { appendStringInfo(buf, "; rels:"); @@ -4270,12 +4391,9 @@ xact_desc_commit(StringInfo buf, xl_xact_commit *xlrec) static void xact_desc_abort(StringInfo buf, xl_xact_abort *xlrec) { - struct tm *tm = localtime(&xlrec->xtime); int i; - appendStringInfo(buf, "%04u-%02u-%02u %02u:%02u:%02u", - tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday, - tm->tm_hour, tm->tm_min, tm->tm_sec); + appendStringInfoString(buf, timestamptz_to_str(xlrec->xact_time)); if (xlrec->nrels > 0) { appendStringInfo(buf, "; rels:");