granicus.if.org Git - postgresql/blob - src/backend/access/transam/xact.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * xact.c
   4  *        top level transaction system support routines
   5  *
   6  * See src/backend/access/transam/README for more information.
   7  *
   8  * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
   9  * Portions Copyright (c) 1994, Regents of the University of California
  10  *
  11  *
  12  * IDENTIFICATION
  13  *        src/backend/access/transam/xact.c
  14  *
  15  *-------------------------------------------------------------------------
  16  */
  17
  18 #include "postgres.h"
  19
  20 #include <time.h>
  21 #include <unistd.h>
  22
  23 #include "access/multixact.h"
  24 #include "access/subtrans.h"
  25 #include "access/transam.h"
  26 #include "access/twophase.h"
  27 #include "access/xact.h"
  28 #include "access/xlogutils.h"
  29 #include "catalog/catalog.h"
  30 #include "catalog/namespace.h"
  31 #include "catalog/storage.h"
  32 #include "commands/async.h"
  33 #include "commands/tablecmds.h"
  34 #include "commands/trigger.h"
  35 #include "executor/spi.h"
  36 #include "libpq/be-fsstubs.h"
  37 #include "miscadmin.h"
  38 #include "pgstat.h"
  39 #include "replication/walsender.h"
  40 #include "replication/syncrep.h"
  41 #include "storage/lmgr.h"
  42 #include "storage/predicate.h"
  43 #include "storage/procarray.h"
  44 #include "storage/sinvaladt.h"
  45 #include "storage/smgr.h"
  46 #include "utils/combocid.h"
  47 #include "utils/guc.h"
  48 #include "utils/inval.h"
  49 #include "utils/memutils.h"
  50 #include "utils/relmapper.h"
  51 #include "utils/snapmgr.h"
  52 #include "utils/timestamp.h"
  53 #include "pg_trace.h"
  54
  55
  56 /*
  57  *      User-tweakable parameters
  58  */
  59 int                     DefaultXactIsoLevel = XACT_READ_COMMITTED;
  60 int                     XactIsoLevel;
  61
  62 bool            DefaultXactReadOnly = false;
  63 bool            XactReadOnly;
  64
  65 bool            DefaultXactDeferrable = false;
  66 bool            XactDeferrable;
  67
  68 int                     synchronous_commit = SYNCHRONOUS_COMMIT_ON;
  69
  70 int                     CommitDelay = 0;        /* precommit delay in microseconds */
  71 int                     CommitSiblings = 5; /* # concurrent xacts needed to sleep */
  72
  73 /*
  74  * MyXactAccessedTempRel is set when a temporary relation is accessed.
  75  * We don't allow PREPARE TRANSACTION in that case.  (This is global
  76  * so that it can be set from heapam.c.)
  77  */
  78 bool            MyXactAccessedTempRel = false;
  79
  80
  81 /*
  82  *      transaction states - transaction state from server perspective
  83  */
  84 typedef enum TransState
  85 {
  86         TRANS_DEFAULT,                          /* idle */
  87         TRANS_START,                            /* transaction starting */
  88         TRANS_INPROGRESS,                       /* inside a valid transaction */
  89         TRANS_COMMIT,                           /* commit in progress */
  90         TRANS_ABORT,                            /* abort in progress */
  91         TRANS_PREPARE                           /* prepare in progress */
  92 } TransState;
  93
  94 /*
  95  *      transaction block states - transaction state of client queries
  96  *
  97  * Note: the subtransaction states are used only for non-topmost
  98  * transactions; the others appear only in the topmost transaction.
  99  */
 100 typedef enum TBlockState
 101 {
 102         /* not-in-transaction-block states */
 103         TBLOCK_DEFAULT,                         /* idle */
 104         TBLOCK_STARTED,                         /* running single-query transaction */
 105
 106         /* transaction block states */
 107         TBLOCK_BEGIN,                           /* starting transaction block */
 108         TBLOCK_INPROGRESS,                      /* live transaction */
 109         TBLOCK_END,                                     /* COMMIT received */
 110         TBLOCK_ABORT,                           /* failed xact, awaiting ROLLBACK */
 111         TBLOCK_ABORT_END,                       /* failed xact, ROLLBACK received */
 112         TBLOCK_ABORT_PENDING,           /* live xact, ROLLBACK received */
 113         TBLOCK_PREPARE,                         /* live xact, PREPARE received */
 114
 115         /* subtransaction states */
 116         TBLOCK_SUBBEGIN,                        /* starting a subtransaction */
 117         TBLOCK_SUBINPROGRESS,           /* live subtransaction */
 118         TBLOCK_SUBRELEASE,                      /* RELEASE received */
 119         TBLOCK_SUBCOMMIT,                       /* COMMIT received while TBLOCK_SUBINPROGRESS */
 120         TBLOCK_SUBABORT,                        /* failed subxact, awaiting ROLLBACK */
 121         TBLOCK_SUBABORT_END,            /* failed subxact, ROLLBACK received */
 122         TBLOCK_SUBABORT_PENDING,        /* live subxact, ROLLBACK received */
 123         TBLOCK_SUBRESTART,                      /* live subxact, ROLLBACK TO received */
 124         TBLOCK_SUBABORT_RESTART         /* failed subxact, ROLLBACK TO received */
 125 } TBlockState;
 126
 127 /*
 128  *      transaction state structure
 129  */
 130 typedef struct TransactionStateData
 131 {
 132         TransactionId transactionId;    /* my XID, or Invalid if none */
 133         SubTransactionId subTransactionId;      /* my subxact ID */
 134         char       *name;                       /* savepoint name, if any */
 135         int                     savepointLevel; /* savepoint level */
 136         TransState      state;                  /* low-level state */
 137         TBlockState blockState;         /* high-level state */
 138         int                     nestingLevel;   /* transaction nesting depth */
 139         int                     gucNestLevel;   /* GUC context nesting depth */
 140         MemoryContext curTransactionContext;            /* my xact-lifetime context */
 141         ResourceOwner curTransactionOwner;      /* my query resources */
 142         TransactionId *childXids;       /* subcommitted child XIDs, in XID order */
 143         int                     nChildXids;             /* # of subcommitted child XIDs */
 144         int                     maxChildXids;   /* allocated size of childXids[] */
 145         Oid                     prevUser;               /* previous CurrentUserId setting */
 146         int                     prevSecContext; /* previous SecurityRestrictionContext */
 147         bool            prevXactReadOnly;               /* entry-time xact r/o state */
 148         bool            startedInRecovery;              /* did we start in recovery? */
 149         struct TransactionStateData *parent;            /* back link to parent */
 150 } TransactionStateData;
 151
 152 typedef TransactionStateData *TransactionState;
 153
 154 /*
 155  * CurrentTransactionState always points to the current transaction state
 156  * block.  It will point to TopTransactionStateData when not in a
 157  * transaction at all, or when in a top-level transaction.
 158  */
 159 static TransactionStateData TopTransactionStateData = {
 160         0,                                                      /* transaction id */
 161         0,                                                      /* subtransaction id */
 162         NULL,                                           /* savepoint name */
 163         0,                                                      /* savepoint level */
 164         TRANS_DEFAULT,                          /* transaction state */
 165         TBLOCK_DEFAULT,                         /* transaction block state from the client
 166                                                                  * perspective */
 167         0,                                                      /* transaction nesting depth */
 168         0,                                                      /* GUC context nesting depth */
 169         NULL,                                           /* cur transaction context */
 170         NULL,                                           /* cur transaction resource owner */
 171         NULL,                                           /* subcommitted child Xids */
 172         0,                                                      /* # of subcommitted child Xids */
 173         0,                                                      /* allocated size of childXids[] */
 174         InvalidOid,                                     /* previous CurrentUserId setting */
 175         0,                                                      /* previous SecurityRestrictionContext */
 176         false,                                          /* entry-time xact r/o state */
 177         false,                                          /* startedInRecovery */
 178         NULL                                            /* link to parent state block */
 179 };
 180
 181 /*
 182  * unreportedXids holds XIDs of all subtransactions that have not yet been
 183  * reported in a XLOG_XACT_ASSIGNMENT record.
 184  */
 185 static int      nUnreportedXids;
 186 static TransactionId unreportedXids[PGPROC_MAX_CACHED_SUBXIDS];
 187
 188 static TransactionState CurrentTransactionState = &TopTransactionStateData;
 189
 190 /*
 191  * The subtransaction ID and command ID assignment counters are global
 192  * to a whole transaction, so we do not keep them in the state stack.
 193  */
 194 static SubTransactionId currentSubTransactionId;
 195 static CommandId currentCommandId;
 196 static bool currentCommandIdUsed;
 197
 198 /*
 199  * xactStartTimestamp is the value of transaction_timestamp().
 200  * stmtStartTimestamp is the value of statement_timestamp().
 201  * xactStopTimestamp is the time at which we log a commit or abort WAL record.
 202  * These do not change as we enter and exit subtransactions, so we don't
 203  * keep them inside the TransactionState stack.
 204  */
 205 static TimestampTz xactStartTimestamp;
 206 static TimestampTz stmtStartTimestamp;
 207 static TimestampTz xactStopTimestamp;
 208
 209 /*
 210  * GID to be used for preparing the current transaction.  This is also
 211  * global to a whole transaction, so we don't keep it in the state stack.
 212  */
 213 static char *prepareGID;
 214
 215 /*
 216  * Some commands want to force synchronous commit.
 217  */
 218 static bool forceSyncCommit = false;
 219
 220 /*
 221  * Private context for transaction-abort work --- we reserve space for this
 222  * at startup to ensure that AbortTransaction and AbortSubTransaction can work
 223  * when we've run out of memory.
 224  */
 225 static MemoryContext TransactionAbortContext = NULL;
 226
 227 /*
 228  * List of add-on start- and end-of-xact callbacks
 229  */
 230 typedef struct XactCallbackItem
 231 {
 232         struct XactCallbackItem *next;
 233         XactCallback callback;
 234         void       *arg;
 235 } XactCallbackItem;
 236
 237 static XactCallbackItem *Xact_callbacks = NULL;
 238
 239 /*
 240  * List of add-on start- and end-of-subxact callbacks
 241  */
 242 typedef struct SubXactCallbackItem
 243 {
 244         struct SubXactCallbackItem *next;
 245         SubXactCallback callback;
 246         void       *arg;
 247 } SubXactCallbackItem;
 248
 249 static SubXactCallbackItem *SubXact_callbacks = NULL;
 250
 251
 252 /* local function prototypes */
 253 static void AssignTransactionId(TransactionState s);
 254 static void AbortTransaction(void);
 255 static void AtAbort_Memory(void);
 256 static void AtCleanup_Memory(void);
 257 static void AtAbort_ResourceOwner(void);
 258 static void AtCCI_LocalCache(void);
 259 static void AtCommit_Memory(void);
 260 static void AtStart_Cache(void);
 261 static void AtStart_Memory(void);
 262 static void AtStart_ResourceOwner(void);
 263 static void CallXactCallbacks(XactEvent event);
 264 static void CallSubXactCallbacks(SubXactEvent event,
 265                                          SubTransactionId mySubid,
 266                                          SubTransactionId parentSubid);
 267 static void CleanupTransaction(void);
 268 static void CommitTransaction(void);
 269 static TransactionId RecordTransactionAbort(bool isSubXact);
 270 static void StartTransaction(void);
 271
 272 static void StartSubTransaction(void);
 273 static void CommitSubTransaction(void);
 274 static void AbortSubTransaction(void);
 275 static void CleanupSubTransaction(void);
 276 static void PushTransaction(void);
 277 static void PopTransaction(void);
 278
 279 static void AtSubAbort_Memory(void);
 280 static void AtSubCleanup_Memory(void);
 281 static void AtSubAbort_ResourceOwner(void);
 282 static void AtSubCommit_Memory(void);
 283 static void AtSubStart_Memory(void);
 284 static void AtSubStart_ResourceOwner(void);
 285
 286 static void ShowTransactionState(const char *str);
 287 static void ShowTransactionStateRec(TransactionState state);
 288 static const char *BlockStateAsString(TBlockState blockState);
 289 static const char *TransStateAsString(TransState state);
 290
 291
 292 /* ----------------------------------------------------------------
 293  *      transaction state accessors
 294  * ----------------------------------------------------------------
 295  */
 296
 297 /*
 298  *      IsTransactionState
 299  *
 300  *      This returns true if we are inside a valid transaction; that is,
 301  *      it is safe to initiate database access, take heavyweight locks, etc.
 302  */
 303 bool
 304 IsTransactionState(void)
 305 {
 306         TransactionState s = CurrentTransactionState;
 307
 308         /*
 309          * TRANS_DEFAULT and TRANS_ABORT are obviously unsafe states.  However, we
 310          * also reject the startup/shutdown states TRANS_START, TRANS_COMMIT,
 311          * TRANS_PREPARE since it might be too soon or too late within those
 312          * transition states to do anything interesting.  Hence, the only "valid"
 313          * state is TRANS_INPROGRESS.
 314          */
 315         return (s->state == TRANS_INPROGRESS);
 316 }
 317
 318 /*
 319  *      IsAbortedTransactionBlockState
 320  *
 321  *      This returns true if we are within an aborted transaction block.
 322  */
 323 bool
 324 IsAbortedTransactionBlockState(void)
 325 {
 326         TransactionState s = CurrentTransactionState;
 327
 328         if (s->blockState == TBLOCK_ABORT ||
 329                 s->blockState == TBLOCK_SUBABORT)
 330                 return true;
 331
 332         return false;
 333 }
 334
 335
 336 /*
 337  *      GetTopTransactionId
 338  *
 339  * This will return the XID of the main transaction, assigning one if
 340  * it's not yet set.  Be careful to call this only inside a valid xact.
 341  */
 342 TransactionId
 343 GetTopTransactionId(void)
 344 {
 345         if (!TransactionIdIsValid(TopTransactionStateData.transactionId))
 346                 AssignTransactionId(&TopTransactionStateData);
 347         return TopTransactionStateData.transactionId;
 348 }
 349
 350 /*
 351  *      GetTopTransactionIdIfAny
 352  *
 353  * This will return the XID of the main transaction, if one is assigned.
 354  * It will return InvalidTransactionId if we are not currently inside a
 355  * transaction, or inside a transaction that hasn't yet been assigned an XID.
 356  */
 357 TransactionId
 358 GetTopTransactionIdIfAny(void)
 359 {
 360         return TopTransactionStateData.transactionId;
 361 }
 362
 363 /*
 364  *      GetCurrentTransactionId
 365  *
 366  * This will return the XID of the current transaction (main or sub
 367  * transaction), assigning one if it's not yet set.  Be careful to call this
 368  * only inside a valid xact.
 369  */
 370 TransactionId
 371 GetCurrentTransactionId(void)
 372 {
 373         TransactionState s = CurrentTransactionState;
 374
 375         if (!TransactionIdIsValid(s->transactionId))
 376                 AssignTransactionId(s);
 377         return s->transactionId;
 378 }
 379
 380 /*
 381  *      GetCurrentTransactionIdIfAny
 382  *
 383  * This will return the XID of the current sub xact, if one is assigned.
 384  * It will return InvalidTransactionId if we are not currently inside a
 385  * transaction, or inside a transaction that hasn't been assigned an XID yet.
 386  */
 387 TransactionId
 388 GetCurrentTransactionIdIfAny(void)
 389 {
 390         return CurrentTransactionState->transactionId;
 391 }
 392
 393
 394 /*
 395  * AssignTransactionId
 396  *
 397  * Assigns a new permanent XID to the given TransactionState.
 398  * We do not assign XIDs to transactions until/unless this is called.
 399  * Also, any parent TransactionStates that don't yet have XIDs are assigned
 400  * one; this maintains the invariant that a child transaction has an XID
 401  * following its parent's.
 402  */
 403 static void
 404 AssignTransactionId(TransactionState s)
 405 {
 406         bool            isSubXact = (s->parent != NULL);
 407         ResourceOwner currentOwner;
 408
 409         /* Assert that caller didn't screw up */
 410         Assert(!TransactionIdIsValid(s->transactionId));
 411         Assert(s->state == TRANS_INPROGRESS);
 412
 413         /*
 414          * Ensure parent(s) have XIDs, so that a child always has an XID later
 415          * than its parent.  Musn't recurse here, or we might get a stack overflow
 416          * if we're at the bottom of a huge stack of subtransactions none of which
 417          * have XIDs yet.
 418          */
 419         if (isSubXact && !TransactionIdIsValid(s->parent->transactionId))
 420         {
 421                 TransactionState p = s->parent;
 422                 TransactionState *parents;
 423                 size_t          parentOffset = 0;
 424
 425                 parents = palloc(sizeof(TransactionState) * s->nestingLevel);
 426                 while (p != NULL && !TransactionIdIsValid(p->transactionId))
 427                 {
 428                         parents[parentOffset++] = p;
 429                         p = p->parent;
 430                 }
 431
 432                 /*
 433                  * This is technically a recursive call, but the recursion will never
 434                  * be more than one layer deep.
 435                  */
 436                 while (parentOffset != 0)
 437                         AssignTransactionId(parents[--parentOffset]);
 438
 439                 pfree(parents);
 440         }
 441
 442         /*
 443          * Generate a new Xid and record it in PG_PROC and pg_subtrans.
 444          *
 445          * NB: we must make the subtrans entry BEFORE the Xid appears anywhere in
 446          * shared storage other than PG_PROC; because if there's no room for it in
 447          * PG_PROC, the subtrans entry is needed to ensure that other backends see
 448          * the Xid as "running".  See GetNewTransactionId.
 449          */
 450         s->transactionId = GetNewTransactionId(isSubXact);
 451
 452         if (isSubXact)
 453                 SubTransSetParent(s->transactionId, s->parent->transactionId, false);
 454
 455         /*
 456          * If it's a top-level transaction, the predicate locking system needs to
 457          * be told about it too.
 458          */
 459         if (!isSubXact)
 460                 RegisterPredicateLockingXid(s->transactionId);
 461
 462         /*
 463          * Acquire lock on the transaction XID.  (We assume this cannot block.) We
 464          * have to ensure that the lock is assigned to the transaction's own
 465          * ResourceOwner.
 466          */
 467         currentOwner = CurrentResourceOwner;
 468         PG_TRY();
 469         {
 470                 CurrentResourceOwner = s->curTransactionOwner;
 471                 XactLockTableInsert(s->transactionId);
 472         }
 473         PG_CATCH();
 474         {
 475                 /* Ensure CurrentResourceOwner is restored on error */
 476                 CurrentResourceOwner = currentOwner;
 477                 PG_RE_THROW();
 478         }
 479         PG_END_TRY();
 480         CurrentResourceOwner = currentOwner;
 481
 482         /*
 483          * Every PGPROC_MAX_CACHED_SUBXIDS assigned transaction ids within each
 484          * top-level transaction we issue a WAL record for the assignment. We
 485          * include the top-level xid and all the subxids that have not yet been
 486          * reported using XLOG_XACT_ASSIGNMENT records.
 487          *
 488          * This is required to limit the amount of shared memory required in a hot
 489          * standby server to keep track of in-progress XIDs. See notes for
 490          * RecordKnownAssignedTransactionIds().
 491          *
 492          * We don't keep track of the immediate parent of each subxid, only the
 493          * top-level transaction that each subxact belongs to. This is correct in
 494          * recovery only because aborted subtransactions are separately WAL
 495          * logged.
 496          */
 497         if (isSubXact && XLogStandbyInfoActive())
 498         {
 499                 unreportedXids[nUnreportedXids] = s->transactionId;
 500                 nUnreportedXids++;
 501
 502                 /*
 503                  * ensure this test matches similar one in
 504                  * RecoverPreparedTransactions()
 505                  */
 506                 if (nUnreportedXids >= PGPROC_MAX_CACHED_SUBXIDS)
 507                 {
 508                         XLogRecData rdata[2];
 509                         xl_xact_assignment xlrec;
 510
 511                         /*
 512                          * xtop is always set by now because we recurse up transaction
 513                          * stack to the highest unassigned xid and then come back down
 514                          */
 515                         xlrec.xtop = GetTopTransactionId();
 516                         Assert(TransactionIdIsValid(xlrec.xtop));
 517                         xlrec.nsubxacts = nUnreportedXids;
 518
 519                         rdata[0].data = (char *) &xlrec;
 520                         rdata[0].len = MinSizeOfXactAssignment;
 521                         rdata[0].buffer = InvalidBuffer;
 522                         rdata[0].next = &rdata[1];
 523
 524                         rdata[1].data = (char *) unreportedXids;
 525                         rdata[1].len = PGPROC_MAX_CACHED_SUBXIDS * sizeof(TransactionId);
 526                         rdata[1].buffer = InvalidBuffer;
 527                         rdata[1].next = NULL;
 528
 529                         (void) XLogInsert(RM_XACT_ID, XLOG_XACT_ASSIGNMENT, rdata);
 530
 531                         nUnreportedXids = 0;
 532                 }
 533         }
 534 }
 535
 536 /*
 537  *      GetCurrentSubTransactionId
 538  */
 539 SubTransactionId
 540 GetCurrentSubTransactionId(void)
 541 {
 542         TransactionState s = CurrentTransactionState;
 543
 544         return s->subTransactionId;
 545 }
 546
 547
 548 /*
 549  *      GetCurrentCommandId
 550  *
 551  * "used" must be TRUE if the caller intends to use the command ID to mark
 552  * inserted/updated/deleted tuples.  FALSE means the ID is being fetched
 553  * for read-only purposes (ie, as a snapshot validity cutoff).  See
 554  * CommandCounterIncrement() for discussion.
 555  */
 556 CommandId
 557 GetCurrentCommandId(bool used)
 558 {
 559         /* this is global to a transaction, not subtransaction-local */
 560         if (used)
 561                 currentCommandIdUsed = true;
 562         return currentCommandId;
 563 }
 564
 565 /*
 566  *      GetCurrentTransactionStartTimestamp
 567  */
 568 TimestampTz
 569 GetCurrentTransactionStartTimestamp(void)
 570 {
 571         return xactStartTimestamp;
 572 }
 573
 574 /*
 575  *      GetCurrentStatementStartTimestamp
 576  */
 577 TimestampTz
 578 GetCurrentStatementStartTimestamp(void)
 579 {
 580         return stmtStartTimestamp;
 581 }
 582
 583 /*
 584  *      GetCurrentTransactionStopTimestamp
 585  *
 586  * We return current time if the transaction stop time hasn't been set
 587  * (which can happen if we decide we don't need to log an XLOG record).
 588  */
 589 TimestampTz
 590 GetCurrentTransactionStopTimestamp(void)
 591 {
 592         if (xactStopTimestamp != 0)
 593                 return xactStopTimestamp;
 594         return GetCurrentTimestamp();
 595 }
 596
 597 /*
 598  *      SetCurrentStatementStartTimestamp
 599  */
 600 void
 601 SetCurrentStatementStartTimestamp(void)
 602 {
 603         stmtStartTimestamp = GetCurrentTimestamp();
 604 }
 605
 606 /*
 607  *      SetCurrentTransactionStopTimestamp
 608  */
 609 static inline void
 610 SetCurrentTransactionStopTimestamp(void)
 611 {
 612         xactStopTimestamp = GetCurrentTimestamp();
 613 }
 614
 615 /*
 616  *      GetCurrentTransactionNestLevel
 617  *
 618  * Note: this will return zero when not inside any transaction, one when
 619  * inside a top-level transaction, etc.
 620  */
 621 int
 622 GetCurrentTransactionNestLevel(void)
 623 {
 624         TransactionState s = CurrentTransactionState;
 625
 626         return s->nestingLevel;
 627 }
 628
 629
 630 /*
 631  *      TransactionIdIsCurrentTransactionId
 632  */
 633 bool
 634 TransactionIdIsCurrentTransactionId(TransactionId xid)
 635 {
 636         TransactionState s;
 637
 638         /*
 639          * We always say that BootstrapTransactionId is "not my transaction ID"
 640          * even when it is (ie, during bootstrap).      Along with the fact that
 641          * transam.c always treats BootstrapTransactionId as already committed,
 642          * this causes the tqual.c routines to see all tuples as committed, which
 643          * is what we need during bootstrap.  (Bootstrap mode only inserts tuples,
 644          * it never updates or deletes them, so all tuples can be presumed good
 645          * immediately.)
 646          *
 647          * Likewise, InvalidTransactionId and FrozenTransactionId are certainly
 648          * not my transaction ID, so we can just return "false" immediately for
 649          * any non-normal XID.
 650          */
 651         if (!TransactionIdIsNormal(xid))
 652                 return false;
 653
 654         /*
 655          * We will return true for the Xid of the current subtransaction, any of
 656          * its subcommitted children, any of its parents, or any of their
 657          * previously subcommitted children.  However, a transaction being aborted
 658          * is no longer "current", even though it may still have an entry on the
 659          * state stack.
 660          */
 661         for (s = CurrentTransactionState; s != NULL; s = s->parent)
 662         {
 663                 int                     low,
 664                                         high;
 665
 666                 if (s->state == TRANS_ABORT)
 667                         continue;
 668                 if (!TransactionIdIsValid(s->transactionId))
 669                         continue;                       /* it can't have any child XIDs either */
 670                 if (TransactionIdEquals(xid, s->transactionId))
 671                         return true;
 672                 /* As the childXids array is ordered, we can use binary search */
 673                 low = 0;
 674                 high = s->nChildXids - 1;
 675                 while (low <= high)
 676                 {
 677                         int                     middle;
 678                         TransactionId probe;
 679
 680                         middle = low + (high - low) / 2;
 681                         probe = s->childXids[middle];
 682                         if (TransactionIdEquals(probe, xid))
 683                                 return true;
 684                         else if (TransactionIdPrecedes(probe, xid))
 685                                 low = middle + 1;
 686                         else
 687                                 high = middle - 1;
 688                 }
 689         }
 690
 691         return false;
 692 }
 693
 694 /*
 695  *      TransactionStartedDuringRecovery
 696  *
 697  * Returns true if the current transaction started while recovery was still
 698  * in progress. Recovery might have ended since so RecoveryInProgress() might
 699  * return false already.
 700  */
 701 bool
 702 TransactionStartedDuringRecovery(void)
 703 {
 704         return CurrentTransactionState->startedInRecovery;
 705 }
 706
 707 /*
 708  *      CommandCounterIncrement
 709  */
 710 void
 711 CommandCounterIncrement(void)
 712 {
 713         /*
 714          * If the current value of the command counter hasn't been "used" to mark
 715          * tuples, we need not increment it, since there's no need to distinguish
 716          * a read-only command from others.  This helps postpone command counter
 717          * overflow, and keeps no-op CommandCounterIncrement operations cheap.
 718          */
 719         if (currentCommandIdUsed)
 720         {
 721                 currentCommandId += 1;
 722                 if (currentCommandId == FirstCommandId) /* check for overflow */
 723                 {
 724                         currentCommandId -= 1;
 725                         ereport(ERROR,
 726                                         (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
 727                                          errmsg("cannot have more than 2^32-1 commands in a transaction")));
 728                 }
 729                 currentCommandIdUsed = false;
 730
 731                 /* Propagate new command ID into static snapshots */
 732                 SnapshotSetCommandId(currentCommandId);
 733
 734                 /*
 735                  * Make any catalog changes done by the just-completed command visible
 736                  * in the local syscache.  We obviously don't need to do this after a
 737                  * read-only command.  (But see hacks in inval.c to make real sure we
 738                  * don't think a command that queued inval messages was read-only.)
 739                  */
 740                 AtCCI_LocalCache();
 741         }
 742 }
 743
 744 /*
 745  * ForceSyncCommit
 746  *
 747  * Interface routine to allow commands to force a synchronous commit of the
 748  * current top-level transaction
 749  */
 750 void
 751 ForceSyncCommit(void)
 752 {
 753         forceSyncCommit = true;
 754 }
 755
 756
 757 /* ----------------------------------------------------------------
 758  *                                              StartTransaction stuff
 759  * ----------------------------------------------------------------
 760  */
 761
 762 /*
 763  *      AtStart_Cache
 764  */
 765 static void
 766 AtStart_Cache(void)
 767 {
 768         AcceptInvalidationMessages();
 769 }
 770
 771 /*
 772  *      AtStart_Memory
 773  */
 774 static void
 775 AtStart_Memory(void)
 776 {
 777         TransactionState s = CurrentTransactionState;
 778
 779         /*
 780          * If this is the first time through, create a private context for
 781          * AbortTransaction to work in.  By reserving some space now, we can
 782          * insulate AbortTransaction from out-of-memory scenarios.      Like
 783          * ErrorContext, we set it up with slow growth rate and a nonzero minimum
 784          * size, so that space will be reserved immediately.
 785          */
 786         if (TransactionAbortContext == NULL)
 787                 TransactionAbortContext =
 788                         AllocSetContextCreate(TopMemoryContext,
 789                                                                   "TransactionAbortContext",
 790                                                                   32 * 1024,
 791                                                                   32 * 1024,
 792                                                                   32 * 1024);
 793
 794         /*
 795          * We shouldn't have a transaction context already.
 796          */
 797         Assert(TopTransactionContext == NULL);
 798
 799         /*
 800          * Create a toplevel context for the transaction.
 801          */
 802         TopTransactionContext =
 803                 AllocSetContextCreate(TopMemoryContext,
 804                                                           "TopTransactionContext",
 805                                                           ALLOCSET_DEFAULT_MINSIZE,
 806                                                           ALLOCSET_DEFAULT_INITSIZE,
 807                                                           ALLOCSET_DEFAULT_MAXSIZE);
 808
 809         /*
 810          * In a top-level transaction, CurTransactionContext is the same as
 811          * TopTransactionContext.
 812          */
 813         CurTransactionContext = TopTransactionContext;
 814         s->curTransactionContext = CurTransactionContext;
 815
 816         /* Make the CurTransactionContext active. */
 817         MemoryContextSwitchTo(CurTransactionContext);
 818 }
 819
 820 /*
 821  *      AtStart_ResourceOwner
 822  */
 823 static void
 824 AtStart_ResourceOwner(void)
 825 {
 826         TransactionState s = CurrentTransactionState;
 827
 828         /*
 829          * We shouldn't have a transaction resource owner already.
 830          */
 831         Assert(TopTransactionResourceOwner == NULL);
 832
 833         /*
 834          * Create a toplevel resource owner for the transaction.
 835          */
 836         s->curTransactionOwner = ResourceOwnerCreate(NULL, "TopTransaction");
 837
 838         TopTransactionResourceOwner = s->curTransactionOwner;
 839         CurTransactionResourceOwner = s->curTransactionOwner;
 840         CurrentResourceOwner = s->curTransactionOwner;
 841 }
 842
 843 /* ----------------------------------------------------------------
 844  *                                              StartSubTransaction stuff
 845  * ----------------------------------------------------------------
 846  */
 847
 848 /*
 849  * AtSubStart_Memory
 850  */
 851 static void
 852 AtSubStart_Memory(void)
 853 {
 854         TransactionState s = CurrentTransactionState;
 855
 856         Assert(CurTransactionContext != NULL);
 857
 858         /*
 859          * Create a CurTransactionContext, which will be used to hold data that
 860          * survives subtransaction commit but disappears on subtransaction abort.
 861          * We make it a child of the immediate parent's CurTransactionContext.
 862          */
 863         CurTransactionContext = AllocSetContextCreate(CurTransactionContext,
 864                                                                                                   "CurTransactionContext",
 865                                                                                                   ALLOCSET_DEFAULT_MINSIZE,
 866                                                                                                   ALLOCSET_DEFAULT_INITSIZE,
 867                                                                                                   ALLOCSET_DEFAULT_MAXSIZE);
 868         s->curTransactionContext = CurTransactionContext;
 869
 870         /* Make the CurTransactionContext active. */
 871         MemoryContextSwitchTo(CurTransactionContext);
 872 }
 873
 874 /*
 875  * AtSubStart_ResourceOwner
 876  */
 877 static void
 878 AtSubStart_ResourceOwner(void)
 879 {
 880         TransactionState s = CurrentTransactionState;
 881
 882         Assert(s->parent != NULL);
 883
 884         /*
 885          * Create a resource owner for the subtransaction.      We make it a child of
 886          * the immediate parent's resource owner.
 887          */
 888         s->curTransactionOwner =
 889                 ResourceOwnerCreate(s->parent->curTransactionOwner,
 890                                                         "SubTransaction");
 891
 892         CurTransactionResourceOwner = s->curTransactionOwner;
 893         CurrentResourceOwner = s->curTransactionOwner;
 894 }
 895
 896 /* ----------------------------------------------------------------
 897  *                                              CommitTransaction stuff
 898  * ----------------------------------------------------------------
 899  */
 900
 901 /*
 902  *      RecordTransactionCommit
 903  *
 904  * Returns latest XID among xact and its children, or InvalidTransactionId
 905  * if the xact has no XID.      (We compute that here just because it's easier.)
 906  */
 907 static TransactionId
 908 RecordTransactionCommit(void)
 909 {
 910         TransactionId xid = GetTopTransactionIdIfAny();
 911         bool            markXidCommitted = TransactionIdIsValid(xid);
 912         TransactionId latestXid = InvalidTransactionId;
 913         int                     nrels;
 914         RelFileNode *rels;
 915         int                     nchildren;
 916         TransactionId *children;
 917         int                     nmsgs = 0;
 918         SharedInvalidationMessage *invalMessages = NULL;
 919         bool            RelcacheInitFileInval = false;
 920         bool            wrote_xlog;
 921
 922         /* Get data needed for commit record */
 923         nrels = smgrGetPendingDeletes(true, &rels);
 924         nchildren = xactGetCommittedChildren(&children);
 925         if (XLogStandbyInfoActive())
 926                 nmsgs = xactGetCommittedInvalidationMessages(&invalMessages,
 927                                                                                                          &RelcacheInitFileInval);
 928         wrote_xlog = (XactLastRecEnd.xrecoff != 0);
 929
 930         /*
 931          * If we haven't been assigned an XID yet, we neither can, nor do we want
 932          * to write a COMMIT record.
 933          */
 934         if (!markXidCommitted)
 935         {
 936                 /*
 937                  * We expect that every smgrscheduleunlink is followed by a catalog
 938                  * update, and hence XID assignment, so we shouldn't get here with any
 939                  * pending deletes.  Use a real test not just an Assert to check this,
 940                  * since it's a bit fragile.
 941                  */
 942                 if (nrels != 0)
 943                         elog(ERROR, "cannot commit a transaction that deleted files but has no xid");
 944
 945                 /* Can't have child XIDs either; AssignTransactionId enforces this */
 946                 Assert(nchildren == 0);
 947
 948                 /*
 949                  * If we didn't create XLOG entries, we're done here; otherwise we
 950                  * should flush those entries the same as a commit record.      (An
 951                  * example of a possible record that wouldn't cause an XID to be
 952                  * assigned is a sequence advance record due to nextval() --- we want
 953                  * to flush that to disk before reporting commit.)
 954                  */
 955                 if (!wrote_xlog)
 956                         goto cleanup;
 957         }
 958         else
 959         {
 960                 /*
 961                  * Begin commit critical section and insert the commit XLOG record.
 962                  */
 963                 /* Tell bufmgr and smgr to prepare for commit */
 964                 BufmgrCommit();
 965
 966                 /*
 967                  * Mark ourselves as within our "commit critical section".      This
 968                  * forces any concurrent checkpoint to wait until we've updated
 969                  * pg_clog.  Without this, it is possible for the checkpoint to set
 970                  * REDO after the XLOG record but fail to flush the pg_clog update to
 971                  * disk, leading to loss of the transaction commit if the system
 972                  * crashes a little later.
 973                  *
 974                  * Note: we could, but don't bother to, set this flag in
 975                  * RecordTransactionAbort.      That's because loss of a transaction abort
 976                  * is noncritical; the presumption would be that it aborted, anyway.
 977                  *
 978                  * It's safe to change the inCommit flag of our own backend without
 979                  * holding the ProcArrayLock, since we're the only one modifying it.
 980                  * This makes checkpoint's determination of which xacts are inCommit a
 981                  * bit fuzzy, but it doesn't matter.
 982                  */
 983                 START_CRIT_SECTION();
 984                 MyPgXact->inCommit = true;
 985
 986                 SetCurrentTransactionStopTimestamp();
 987
 988                 /*
 989                  * Do we need the long commit record? If not, use the compact format.
 990                  */
 991                 if (nrels > 0 || nmsgs > 0 || RelcacheInitFileInval || forceSyncCommit)
 992                 {
 993                         XLogRecData rdata[4];
 994                         int                     lastrdata = 0;
 995                         xl_xact_commit xlrec;
 996                         /*
 997                          * Set flags required for recovery processing of commits.
 998                          */
 999                         xlrec.xinfo = 0;
1000                         if (RelcacheInitFileInval)
1001                                 xlrec.xinfo |= XACT_COMPLETION_UPDATE_RELCACHE_FILE;
1002                         if (forceSyncCommit)
1003                                 xlrec.xinfo |= XACT_COMPLETION_FORCE_SYNC_COMMIT;
1004
1005                         xlrec.dbId = MyDatabaseId;
1006                         xlrec.tsId = MyDatabaseTableSpace;
1007
1008                         xlrec.xact_time = xactStopTimestamp;
1009                         xlrec.nrels = nrels;
1010                         xlrec.nsubxacts = nchildren;
1011                         xlrec.nmsgs = nmsgs;
1012                         rdata[0].data = (char *) (&xlrec);
1013                         rdata[0].len = MinSizeOfXactCommit;
1014                         rdata[0].buffer = InvalidBuffer;
1015                         /* dump rels to delete */
1016                         if (nrels > 0)
1017                         {
1018                                 rdata[0].next = &(rdata[1]);
1019                                 rdata[1].data = (char *) rels;
1020                                 rdata[1].len = nrels * sizeof(RelFileNode);
1021                                 rdata[1].buffer = InvalidBuffer;
1022                                 lastrdata = 1;
1023                         }
1024                         /* dump committed child Xids */
1025                         if (nchildren > 0)
1026                         {
1027                                 rdata[lastrdata].next = &(rdata[2]);
1028                                 rdata[2].data = (char *) children;
1029                                 rdata[2].len = nchildren * sizeof(TransactionId);
1030                                 rdata[2].buffer = InvalidBuffer;
1031                                 lastrdata = 2;
1032                         }
1033                         /* dump shared cache invalidation messages */
1034                         if (nmsgs > 0)
1035                         {
1036                                 rdata[lastrdata].next = &(rdata[3]);
1037                                 rdata[3].data = (char *) invalMessages;
1038                                 rdata[3].len = nmsgs * sizeof(SharedInvalidationMessage);
1039                                 rdata[3].buffer = InvalidBuffer;
1040                                 lastrdata = 3;
1041                         }
1042                         rdata[lastrdata].next = NULL;
1043
1044                         (void) XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT, rdata);
1045                 }
1046                 else
1047                 {
1048                         XLogRecData rdata[2];
1049                         int                     lastrdata = 0;
1050                         xl_xact_commit_compact  xlrec;
1051                         xlrec.xact_time = xactStopTimestamp;
1052                         xlrec.nsubxacts = nchildren;
1053                         rdata[0].data = (char *) (&xlrec);
1054                         rdata[0].len = MinSizeOfXactCommitCompact;
1055                         rdata[0].buffer = InvalidBuffer;
1056                         /* dump committed child Xids */
1057                         if (nchildren > 0)
1058                         {
1059                                 rdata[0].next = &(rdata[1]);
1060                                 rdata[1].data = (char *) children;
1061                                 rdata[1].len = nchildren * sizeof(TransactionId);
1062                                 rdata[1].buffer = InvalidBuffer;
1063                                 lastrdata = 1;
1064                         }
1065                         rdata[lastrdata].next = NULL;
1066
1067                         (void) XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT_COMPACT, rdata);
1068                 }
1069         }
1070
1071         /*
1072          * Check if we want to commit asynchronously.  We can allow the XLOG flush
1073          * to happen asynchronously if synchronous_commit=off, or if the current
1074          * transaction has not performed any WAL-logged operation.      The latter
1075          * case can arise if the current transaction wrote only to temporary
1076          * and/or unlogged tables.      In case of a crash, the loss of such a
1077          * transaction will be irrelevant since temp tables will be lost anyway,
1078          * and unlogged tables will be truncated.  (Given the foregoing, you might
1079          * think that it would be unnecessary to emit the XLOG record at all in
1080          * this case, but we don't currently try to do that.  It would certainly
1081          * cause problems at least in Hot Standby mode, where the
1082          * KnownAssignedXids machinery requires tracking every XID assignment.  It
1083          * might be OK to skip it only when wal_level < hot_standby, but for now
1084          * we don't.)
1085          *
1086          * However, if we're doing cleanup of any non-temp rels or committing any
1087          * command that wanted to force sync commit, then we must flush XLOG
1088          * immediately.  (We must not allow asynchronous commit if there are any
1089          * non-temp tables to be deleted, because we might delete the files before
1090          * the COMMIT record is flushed to disk.  We do allow asynchronous commit
1091          * if all to-be-deleted tables are temporary though, since they are lost
1092          * anyway if we crash.)
1093          */
1094         if ((wrote_xlog && synchronous_commit > SYNCHRONOUS_COMMIT_OFF) ||
1095                 forceSyncCommit || nrels > 0)
1096         {
1097                 /*
1098                  * Synchronous commit case:
1099                  *
1100                  * Sleep before flush! So we can flush more than one commit records
1101                  * per single fsync.  (The idea is some other backend may do the
1102                  * XLogFlush while we're sleeping.  This needs work still, because on
1103                  * most Unixen, the minimum select() delay is 10msec or more, which is
1104                  * way too long.)
1105                  *
1106                  * We do not sleep if enableFsync is not turned on, nor if there are
1107                  * fewer than CommitSiblings other backends with active transactions.
1108                  */
1109                 if (CommitDelay > 0 && enableFsync &&
1110                         MinimumActiveBackends(CommitSiblings))
1111                         pg_usleep(CommitDelay);
1112
1113                 XLogFlush(XactLastRecEnd);
1114
1115                 /*
1116                  * Wake up all walsenders to send WAL up to the COMMIT record
1117                  * immediately if replication is enabled
1118                  */
1119                 if (max_wal_senders > 0)
1120                         WalSndWakeup();
1121
1122                 /*
1123                  * Now we may update the CLOG, if we wrote a COMMIT record above
1124                  */
1125                 if (markXidCommitted)
1126                         TransactionIdCommitTree(xid, nchildren, children);
1127         }
1128         else
1129         {
1130                 /*
1131                  * Asynchronous commit case:
1132                  *
1133                  * This enables possible committed transaction loss in the case of a
1134                  * postmaster crash because WAL buffers are left unwritten. Ideally we
1135                  * could issue the WAL write without the fsync, but some
1136                  * wal_sync_methods do not allow separate write/fsync.
1137                  *
1138                  * Report the latest async commit LSN, so that the WAL writer knows to
1139                  * flush this commit.
1140                  */
1141                 XLogSetAsyncXactLSN(XactLastRecEnd);
1142
1143                 /*
1144                  * We must not immediately update the CLOG, since we didn't flush the
1145                  * XLOG. Instead, we store the LSN up to which the XLOG must be
1146                  * flushed before the CLOG may be updated.
1147                  */
1148                 if (markXidCommitted)
1149                         TransactionIdAsyncCommitTree(xid, nchildren, children, XactLastRecEnd);
1150         }
1151
1152         /*
1153          * If we entered a commit critical section, leave it now, and let
1154          * checkpoints proceed.
1155          */
1156         if (markXidCommitted)
1157         {
1158                 MyPgXact->inCommit = false;
1159                 END_CRIT_SECTION();
1160         }
1161
1162         /* Compute latestXid while we have the child XIDs handy */
1163         latestXid = TransactionIdLatest(xid, nchildren, children);
1164
1165         /*
1166          * Wait for synchronous replication, if required.
1167          *
1168          * Note that at this stage we have marked clog, but still show as running
1169          * in the procarray and continue to hold locks.
1170          */
1171         if (wrote_xlog)
1172                 SyncRepWaitForLSN(XactLastRecEnd);
1173
1174         /* Reset XactLastRecEnd until the next transaction writes something */
1175         XactLastRecEnd.xrecoff = 0;
1176
1177 cleanup:
1178         /* Clean up local data */
1179         if (rels)
1180                 pfree(rels);
1181
1182         return latestXid;
1183 }
1184
1185
1186 /*
1187  *      AtCCI_LocalCache
1188  */
1189 static void
1190 AtCCI_LocalCache(void)
1191 {
1192         /*
1193          * Make any pending relation map changes visible.  We must do this before
1194          * processing local sinval messages, so that the map changes will get
1195          * reflected into the relcache when relcache invals are processed.
1196          */
1197         AtCCI_RelationMap();
1198
1199         /*
1200          * Make catalog changes visible to me for the next command.
1201          */
1202         CommandEndInvalidationMessages();
1203 }
1204
1205 /*
1206  *      AtCommit_Memory
1207  */
1208 static void
1209 AtCommit_Memory(void)
1210 {
1211         /*
1212          * Now that we're "out" of a transaction, have the system allocate things
1213          * in the top memory context instead of per-transaction contexts.
1214          */
1215         MemoryContextSwitchTo(TopMemoryContext);
1216
1217         /*
1218          * Release all transaction-local memory.
1219          */
1220         Assert(TopTransactionContext != NULL);
1221         MemoryContextDelete(TopTransactionContext);
1222         TopTransactionContext = NULL;
1223         CurTransactionContext = NULL;
1224         CurrentTransactionState->curTransactionContext = NULL;
1225 }
1226
1227 /* ----------------------------------------------------------------
1228  *                                              CommitSubTransaction stuff
1229  * ----------------------------------------------------------------
1230  */
1231
1232 /*
1233  * AtSubCommit_Memory
1234  */
1235 static void
1236 AtSubCommit_Memory(void)
1237 {
1238         TransactionState s = CurrentTransactionState;
1239
1240         Assert(s->parent != NULL);
1241
1242         /* Return to parent transaction level's memory context. */
1243         CurTransactionContext = s->parent->curTransactionContext;
1244         MemoryContextSwitchTo(CurTransactionContext);
1245
1246         /*
1247          * Ordinarily we cannot throw away the child's CurTransactionContext,
1248          * since the data it contains will be needed at upper commit.  However, if
1249          * there isn't actually anything in it, we can throw it away.  This avoids
1250          * a small memory leak in the common case of "trivial" subxacts.
1251          */
1252         if (MemoryContextIsEmpty(s->curTransactionContext))
1253         {
1254                 MemoryContextDelete(s->curTransactionContext);
1255                 s->curTransactionContext = NULL;
1256         }
1257 }
1258
1259 /*
1260  * AtSubCommit_childXids
1261  *
1262  * Pass my own XID and my child XIDs up to my parent as committed children.
1263  */
1264 static void
1265 AtSubCommit_childXids(void)
1266 {
1267         TransactionState s = CurrentTransactionState;
1268         int                     new_nChildXids;
1269
1270         Assert(s->parent != NULL);
1271
1272         /*
1273          * The parent childXids array will need to hold my XID and all my
1274          * childXids, in addition to the XIDs already there.
1275          */
1276         new_nChildXids = s->parent->nChildXids + s->nChildXids + 1;
1277
1278         /* Allocate or enlarge the parent array if necessary */
1279         if (s->parent->maxChildXids < new_nChildXids)
1280         {
1281                 int                     new_maxChildXids;
1282                 TransactionId *new_childXids;
1283
1284                 /*
1285                  * Make it 2x what's needed right now, to avoid having to enlarge it
1286                  * repeatedly. But we can't go above MaxAllocSize.  (The latter limit
1287                  * is what ensures that we don't need to worry about integer overflow
1288                  * here or in the calculation of new_nChildXids.)
1289                  */
1290                 new_maxChildXids = Min(new_nChildXids * 2,
1291                                                            (int) (MaxAllocSize / sizeof(TransactionId)));
1292
1293                 if (new_maxChildXids < new_nChildXids)
1294                         ereport(ERROR,
1295                                         (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1296                                          errmsg("maximum number of committed subtransactions (%d) exceeded",
1297                                                         (int) (MaxAllocSize / sizeof(TransactionId)))));
1298
1299                 /*
1300                  * We keep the child-XID arrays in TopTransactionContext; this avoids
1301                  * setting up child-transaction contexts for what might be just a few
1302                  * bytes of grandchild XIDs.
1303                  */
1304                 if (s->parent->childXids == NULL)
1305                         new_childXids =
1306                                 MemoryContextAlloc(TopTransactionContext,
1307                                                                    new_maxChildXids * sizeof(TransactionId));
1308                 else
1309                         new_childXids = repalloc(s->parent->childXids,
1310                                                                    new_maxChildXids * sizeof(TransactionId));
1311
1312                 s->parent->childXids = new_childXids;
1313                 s->parent->maxChildXids = new_maxChildXids;
1314         }
1315
1316         /*
1317          * Copy all my XIDs to parent's array.
1318          *
1319          * Note: We rely on the fact that the XID of a child always follows that
1320          * of its parent.  By copying the XID of this subtransaction before the
1321          * XIDs of its children, we ensure that the array stays ordered. Likewise,
1322          * all XIDs already in the array belong to subtransactions started and
1323          * subcommitted before us, so their XIDs must precede ours.
1324          */
1325         s->parent->childXids[s->parent->nChildXids] = s->transactionId;
1326
1327         if (s->nChildXids > 0)
1328                 memcpy(&s->parent->childXids[s->parent->nChildXids + 1],
1329                            s->childXids,
1330                            s->nChildXids * sizeof(TransactionId));
1331
1332         s->parent->nChildXids = new_nChildXids;
1333
1334         /* Release child's array to avoid leakage */
1335         if (s->childXids != NULL)
1336                 pfree(s->childXids);
1337         /* We must reset these to avoid double-free if fail later in commit */
1338         s->childXids = NULL;
1339         s->nChildXids = 0;
1340         s->maxChildXids = 0;
1341 }
1342
1343 /* ----------------------------------------------------------------
1344  *                                              AbortTransaction stuff
1345  * ----------------------------------------------------------------
1346  */
1347
1348 /*
1349  *      RecordTransactionAbort
1350  *
1351  * Returns latest XID among xact and its children, or InvalidTransactionId
1352  * if the xact has no XID.      (We compute that here just because it's easier.)
1353  */
1354 static TransactionId
1355 RecordTransactionAbort(bool isSubXact)
1356 {
1357         TransactionId xid = GetCurrentTransactionIdIfAny();
1358         TransactionId latestXid;
1359         int                     nrels;
1360         RelFileNode *rels;
1361         int                     nchildren;
1362         TransactionId *children;
1363         XLogRecData rdata[3];
1364         int                     lastrdata = 0;
1365         xl_xact_abort xlrec;
1366
1367         /*
1368          * If we haven't been assigned an XID, nobody will care whether we aborted
1369          * or not.      Hence, we're done in that case.  It does not matter if we have
1370          * rels to delete (note that this routine is not responsible for actually
1371          * deleting 'em).  We cannot have any child XIDs, either.
1372          */
1373         if (!TransactionIdIsValid(xid))
1374         {
1375                 /* Reset XactLastRecEnd until the next transaction writes something */
1376                 if (!isSubXact)
1377                         XactLastRecEnd.xrecoff = 0;
1378                 return InvalidTransactionId;
1379         }
1380
1381         /*
1382          * We have a valid XID, so we should write an ABORT record for it.
1383          *
1384          * We do not flush XLOG to disk here, since the default assumption after a
1385          * crash would be that we aborted, anyway.      For the same reason, we don't
1386          * need to worry about interlocking against checkpoint start.
1387          */
1388
1389         /*
1390          * Check that we haven't aborted halfway through RecordTransactionCommit.
1391          */
1392         if (TransactionIdDidCommit(xid))
1393                 elog(PANIC, "cannot abort transaction %u, it was already committed",
1394                          xid);
1395
1396         /* Fetch the data we need for the abort record */
1397         nrels = smgrGetPendingDeletes(false, &rels);
1398         nchildren = xactGetCommittedChildren(&children);
1399
1400         /* XXX do we really need a critical section here? */
1401         START_CRIT_SECTION();
1402
1403         /* Write the ABORT record */
1404         if (isSubXact)
1405                 xlrec.xact_time = GetCurrentTimestamp();
1406         else
1407         {
1408                 SetCurrentTransactionStopTimestamp();
1409                 xlrec.xact_time = xactStopTimestamp;
1410         }
1411         xlrec.nrels = nrels;
1412         xlrec.nsubxacts = nchildren;
1413         rdata[0].data = (char *) (&xlrec);
1414         rdata[0].len = MinSizeOfXactAbort;
1415         rdata[0].buffer = InvalidBuffer;
1416         /* dump rels to delete */
1417         if (nrels > 0)
1418         {
1419                 rdata[0].next = &(rdata[1]);
1420                 rdata[1].data = (char *) rels;
1421                 rdata[1].len = nrels * sizeof(RelFileNode);
1422                 rdata[1].buffer = InvalidBuffer;
1423                 lastrdata = 1;
1424         }
1425         /* dump committed child Xids */
1426         if (nchildren > 0)
1427         {
1428                 rdata[lastrdata].next = &(rdata[2]);
1429                 rdata[2].data = (char *) children;
1430                 rdata[2].len = nchildren * sizeof(TransactionId);
1431                 rdata[2].buffer = InvalidBuffer;
1432                 lastrdata = 2;
1433         }
1434         rdata[lastrdata].next = NULL;
1435
1436         (void) XLogInsert(RM_XACT_ID, XLOG_XACT_ABORT, rdata);
1437
1438         /*
1439          * Report the latest async abort LSN, so that the WAL writer knows to
1440          * flush this abort. There's nothing to be gained by delaying this, since
1441          * WALWriter may as well do this when it can. This is important with
1442          * streaming replication because if we don't flush WAL regularly we will
1443          * find that large aborts leave us with a long backlog for when commits
1444          * occur after the abort, increasing our window of data loss should
1445          * problems occur at that point.
1446          */
1447         if (!isSubXact)
1448                 XLogSetAsyncXactLSN(XactLastRecEnd);
1449
1450         /*
1451          * Mark the transaction aborted in clog.  This is not absolutely necessary
1452          * but we may as well do it while we are here; also, in the subxact case
1453          * it is helpful because XactLockTableWait makes use of it to avoid
1454          * waiting for already-aborted subtransactions.  It is OK to do it without
1455          * having flushed the ABORT record to disk, because in event of a crash
1456          * we'd be assumed to have aborted anyway.
1457          */
1458         TransactionIdAbortTree(xid, nchildren, children);
1459
1460         END_CRIT_SECTION();
1461
1462         /* Compute latestXid while we have the child XIDs handy */
1463         latestXid = TransactionIdLatest(xid, nchildren, children);
1464
1465         /*
1466          * If we're aborting a subtransaction, we can immediately remove failed
1467          * XIDs from PGPROC's cache of running child XIDs.  We do that here for
1468          * subxacts, because we already have the child XID array at hand.  For
1469          * main xacts, the equivalent happens just after this function returns.
1470          */
1471         if (isSubXact)
1472                 XidCacheRemoveRunningXids(xid, nchildren, children, latestXid);
1473
1474         /* Reset XactLastRecEnd until the next transaction writes something */
1475         if (!isSubXact)
1476                 XactLastRecEnd.xrecoff = 0;
1477
1478         /* And clean up local data */
1479         if (rels)
1480                 pfree(rels);
1481
1482         return latestXid;
1483 }
1484
1485 /*
1486  *      AtAbort_Memory
1487  */
1488 static void
1489 AtAbort_Memory(void)
1490 {
1491         /*
1492          * Switch into TransactionAbortContext, which should have some free space
1493          * even if nothing else does.  We'll work in this context until we've
1494          * finished cleaning up.
1495          *
1496          * It is barely possible to get here when we've not been able to create
1497          * TransactionAbortContext yet; if so use TopMemoryContext.
1498          */
1499         if (TransactionAbortContext != NULL)
1500                 MemoryContextSwitchTo(TransactionAbortContext);
1501         else
1502                 MemoryContextSwitchTo(TopMemoryContext);
1503 }
1504
1505 /*
1506  * AtSubAbort_Memory
1507  */
1508 static void
1509 AtSubAbort_Memory(void)
1510 {
1511         Assert(TransactionAbortContext != NULL);
1512
1513         MemoryContextSwitchTo(TransactionAbortContext);
1514 }
1515
1516
1517 /*
1518  *      AtAbort_ResourceOwner
1519  */
1520 static void
1521 AtAbort_ResourceOwner(void)
1522 {
1523         /*
1524          * Make sure we have a valid ResourceOwner, if possible (else it will be
1525          * NULL, which is OK)
1526          */
1527         CurrentResourceOwner = TopTransactionResourceOwner;
1528 }
1529
1530 /*
1531  * AtSubAbort_ResourceOwner
1532  */
1533 static void
1534 AtSubAbort_ResourceOwner(void)
1535 {
1536         TransactionState s = CurrentTransactionState;
1537
1538         /* Make sure we have a valid ResourceOwner */
1539         CurrentResourceOwner = s->curTransactionOwner;
1540 }
1541
1542
1543 /*
1544  * AtSubAbort_childXids
1545  */
1546 static void
1547 AtSubAbort_childXids(void)
1548 {
1549         TransactionState s = CurrentTransactionState;
1550
1551         /*
1552          * We keep the child-XID arrays in TopTransactionContext (see
1553          * AtSubCommit_childXids).      This means we'd better free the array
1554          * explicitly at abort to avoid leakage.
1555          */
1556         if (s->childXids != NULL)
1557                 pfree(s->childXids);
1558         s->childXids = NULL;
1559         s->nChildXids = 0;
1560         s->maxChildXids = 0;
1561
1562         /*
1563          * We could prune the unreportedXids array here. But we don't bother. That
1564          * would potentially reduce number of XLOG_XACT_ASSIGNMENT records but it
1565          * would likely introduce more CPU time into the more common paths, so we
1566          * choose not to do that.
1567          */
1568 }
1569
1570 /* ----------------------------------------------------------------
1571  *                                              CleanupTransaction stuff
1572  * ----------------------------------------------------------------
1573  */
1574
1575 /*
1576  *      AtCleanup_Memory
1577  */
1578 static void
1579 AtCleanup_Memory(void)
1580 {
1581         Assert(CurrentTransactionState->parent == NULL);
1582
1583         /*
1584          * Now that we're "out" of a transaction, have the system allocate things
1585          * in the top memory context instead of per-transaction contexts.
1586          */
1587         MemoryContextSwitchTo(TopMemoryContext);
1588
1589         /*
1590          * Clear the special abort context for next time.
1591          */
1592         if (TransactionAbortContext != NULL)
1593                 MemoryContextResetAndDeleteChildren(TransactionAbortContext);
1594
1595         /*
1596          * Release all transaction-local memory.
1597          */
1598         if (TopTransactionContext != NULL)
1599                 MemoryContextDelete(TopTransactionContext);
1600         TopTransactionContext = NULL;
1601         CurTransactionContext = NULL;
1602         CurrentTransactionState->curTransactionContext = NULL;
1603 }
1604
1605
1606 /* ----------------------------------------------------------------
1607  *                                              CleanupSubTransaction stuff
1608  * ----------------------------------------------------------------
1609  */
1610
1611 /*
1612  * AtSubCleanup_Memory
1613  */
1614 static void
1615 AtSubCleanup_Memory(void)
1616 {
1617         TransactionState s = CurrentTransactionState;
1618
1619         Assert(s->parent != NULL);
1620
1621         /* Make sure we're not in an about-to-be-deleted context */
1622         MemoryContextSwitchTo(s->parent->curTransactionContext);
1623         CurTransactionContext = s->parent->curTransactionContext;
1624
1625         /*
1626          * Clear the special abort context for next time.
1627          */
1628         if (TransactionAbortContext != NULL)
1629                 MemoryContextResetAndDeleteChildren(TransactionAbortContext);
1630
1631         /*
1632          * Delete the subxact local memory contexts. Its CurTransactionContext can
1633          * go too (note this also kills CurTransactionContexts from any children
1634          * of the subxact).
1635          */
1636         if (s->curTransactionContext)
1637                 MemoryContextDelete(s->curTransactionContext);
1638         s->curTransactionContext = NULL;
1639 }
1640
1641 /* ----------------------------------------------------------------
1642  *                                              interface routines
1643  * ----------------------------------------------------------------
1644  */
1645
1646 /*
1647  *      StartTransaction
1648  */
1649 static void
1650 StartTransaction(void)
1651 {
1652         TransactionState s;
1653         VirtualTransactionId vxid;
1654
1655         /*
1656          * Let's just make sure the state stack is empty
1657          */
1658         s = &TopTransactionStateData;
1659         CurrentTransactionState = s;
1660
1661         /*
1662          * check the current transaction state
1663          */
1664         if (s->state != TRANS_DEFAULT)
1665                 elog(WARNING, "StartTransaction while in %s state",
1666                          TransStateAsString(s->state));
1667
1668         /*
1669          * set the current transaction state information appropriately during
1670          * start processing
1671          */
1672         s->state = TRANS_START;
1673         s->transactionId = InvalidTransactionId;        /* until assigned */
1674
1675         /*
1676          * Make sure we've reset xact state variables
1677          *
1678          * If recovery is still in progress, mark this transaction as read-only.
1679          * We have lower level defences in XLogInsert and elsewhere to stop us
1680          * from modifying data during recovery, but this gives the normal
1681          * indication to the user that the transaction is read-only.
1682          */
1683         if (RecoveryInProgress())
1684         {
1685                 s->startedInRecovery = true;
1686                 XactReadOnly = true;
1687         }
1688         else
1689         {
1690                 s->startedInRecovery = false;
1691                 XactReadOnly = DefaultXactReadOnly;
1692         }
1693         XactDeferrable = DefaultXactDeferrable;
1694         XactIsoLevel = DefaultXactIsoLevel;
1695         forceSyncCommit = false;
1696         MyXactAccessedTempRel = false;
1697
1698         /*
1699          * reinitialize within-transaction counters
1700          */
1701         s->subTransactionId = TopSubTransactionId;
1702         currentSubTransactionId = TopSubTransactionId;
1703         currentCommandId = FirstCommandId;
1704         currentCommandIdUsed = false;
1705
1706         /*
1707          * initialize reported xid accounting
1708          */
1709         nUnreportedXids = 0;
1710
1711         /*
1712          * must initialize resource-management stuff first
1713          */
1714         AtStart_Memory();
1715         AtStart_ResourceOwner();
1716
1717         /*
1718          * Assign a new LocalTransactionId, and combine it with the backendId to
1719          * form a virtual transaction id.
1720          */
1721         vxid.backendId = MyBackendId;
1722         vxid.localTransactionId = GetNextLocalTransactionId();
1723
1724         /*
1725          * Lock the virtual transaction id before we announce it in the proc array
1726          */
1727         VirtualXactLockTableInsert(vxid);
1728
1729         /*
1730          * Advertise it in the proc array.      We assume assignment of
1731          * LocalTransactionID is atomic, and the backendId should be set already.
1732          */
1733         Assert(MyProc->backendId == vxid.backendId);
1734         MyProc->lxid = vxid.localTransactionId;
1735
1736         TRACE_POSTGRESQL_TRANSACTION_START(vxid.localTransactionId);
1737
1738         /*
1739          * set transaction_timestamp() (a/k/a now()).  We want this to be the same
1740          * as the first command's statement_timestamp(), so don't do a fresh
1741          * GetCurrentTimestamp() call (which'd be expensive anyway).  Also, mark
1742          * xactStopTimestamp as unset.
1743          */
1744         xactStartTimestamp = stmtStartTimestamp;
1745         xactStopTimestamp = 0;
1746         pgstat_report_xact_timestamp(xactStartTimestamp);
1747
1748         /*
1749          * initialize current transaction state fields
1750          *
1751          * note: prevXactReadOnly is not used at the outermost level
1752          */
1753         s->nestingLevel = 1;
1754         s->gucNestLevel = 1;
1755         s->childXids = NULL;
1756         s->nChildXids = 0;
1757         s->maxChildXids = 0;
1758         GetUserIdAndSecContext(&s->prevUser, &s->prevSecContext);
1759         /* SecurityRestrictionContext should never be set outside a transaction */
1760         Assert(s->prevSecContext == 0);
1761
1762         /*
1763          * initialize other subsystems for new transaction
1764          */
1765         AtStart_GUC();
1766         AtStart_Inval();
1767         AtStart_Cache();
1768         AfterTriggerBeginXact();
1769
1770         /*
1771          * done with start processing, set current transaction state to "in
1772          * progress"
1773          */
1774         s->state = TRANS_INPROGRESS;
1775
1776         ShowTransactionState("StartTransaction");
1777 }
1778
1779
1780 /*
1781  *      CommitTransaction
1782  *
1783  * NB: if you change this routine, better look at PrepareTransaction too!
1784  */
1785 static void
1786 CommitTransaction(void)
1787 {
1788         TransactionState s = CurrentTransactionState;
1789         TransactionId latestXid;
1790
1791         ShowTransactionState("CommitTransaction");
1792
1793         /*
1794          * check the current transaction state
1795          */
1796         if (s->state != TRANS_INPROGRESS)
1797                 elog(WARNING, "CommitTransaction while in %s state",
1798                          TransStateAsString(s->state));
1799         Assert(s->parent == NULL);
1800
1801         /*
1802          * Do pre-commit processing that involves calling user-defined code, such
1803          * as triggers.  Since closing cursors could queue trigger actions,
1804          * triggers could open cursors, etc, we have to keep looping until there's
1805          * nothing left to do.
1806          */
1807         for (;;)
1808         {
1809                 /*
1810                  * Fire all currently pending deferred triggers.
1811                  */
1812                 AfterTriggerFireDeferred();
1813
1814                 /*
1815                  * Close open portals (converting holdable ones into static portals).
1816                  * If there weren't any, we are done ... otherwise loop back to check
1817                  * if they queued deferred triggers.  Lather, rinse, repeat.
1818                  */
1819                 if (!PreCommit_Portals(false))
1820                         break;
1821         }
1822
1823         /*
1824          * The remaining actions cannot call any user-defined code, so it's safe
1825          * to start shutting down within-transaction services.  But note that most
1826          * of this stuff could still throw an error, which would switch us into
1827          * the transaction-abort path.
1828          */
1829
1830         /* Shut down the deferred-trigger manager */
1831         AfterTriggerEndXact(true);
1832
1833         /*
1834          * Let ON COMMIT management do its thing (must happen after closing
1835          * cursors, to avoid dangling-reference problems)
1836          */
1837         PreCommit_on_commit_actions();
1838
1839         /* close large objects before lower-level cleanup */
1840         AtEOXact_LargeObject(true);
1841
1842         /*
1843          * Mark serializable transaction as complete for predicate locking
1844          * purposes.  This should be done as late as we can put it and still allow
1845          * errors to be raised for failure patterns found at commit.
1846          */
1847         PreCommit_CheckForSerializationFailure();
1848
1849         /*
1850          * Insert notifications sent by NOTIFY commands into the queue.  This
1851          * should be late in the pre-commit sequence to minimize time spent
1852          * holding the notify-insertion lock.
1853          */
1854         PreCommit_Notify();
1855
1856         /* Prevent cancel/die interrupt while cleaning up */
1857         HOLD_INTERRUPTS();
1858
1859         /* Commit updates to the relation map --- do this as late as possible */
1860         AtEOXact_RelationMap(true);
1861
1862         /*
1863          * set the current transaction state information appropriately during
1864          * commit processing
1865          */
1866         s->state = TRANS_COMMIT;
1867
1868         /*
1869          * Here is where we really truly commit.
1870          */
1871         latestXid = RecordTransactionCommit();
1872
1873         TRACE_POSTGRESQL_TRANSACTION_COMMIT(MyProc->lxid);
1874
1875         /*
1876          * Let others know about no transaction in progress by me. Note that this
1877          * must be done _before_ releasing locks we hold and _after_
1878          * RecordTransactionCommit.
1879          */
1880         ProcArrayEndTransaction(MyProc, latestXid);
1881
1882         /*
1883          * This is all post-commit cleanup.  Note that if an error is raised here,
1884          * it's too late to abort the transaction.  This should be just
1885          * noncritical resource releasing.
1886          *
1887          * The ordering of operations is not entirely random.  The idea is:
1888          * release resources visible to other backends (eg, files, buffer pins);
1889          * then release locks; then release backend-local resources. We want to
1890          * release locks at the point where any backend waiting for us will see
1891          * our transaction as being fully cleaned up.
1892          *
1893          * Resources that can be associated with individual queries are handled by
1894          * the ResourceOwner mechanism.  The other calls here are for backend-wide
1895          * state.
1896          */
1897
1898         CallXactCallbacks(XACT_EVENT_COMMIT);
1899
1900         ResourceOwnerRelease(TopTransactionResourceOwner,
1901                                                  RESOURCE_RELEASE_BEFORE_LOCKS,
1902                                                  true, true);
1903
1904         /* Check we've released all buffer pins */
1905         AtEOXact_Buffers(true);
1906
1907         /* Clean up the relation cache */
1908         AtEOXact_RelationCache(true);
1909
1910         /*
1911          * Make catalog changes visible to all backends.  This has to happen after
1912          * relcache references are dropped (see comments for
1913          * AtEOXact_RelationCache), but before locks are released (if anyone is
1914          * waiting for lock on a relation we've modified, we want them to know
1915          * about the catalog change before they start using the relation).
1916          */
1917         AtEOXact_Inval(true);
1918
1919         /*
1920          * Likewise, dropping of files deleted during the transaction is best done
1921          * after releasing relcache and buffer pins.  (This is not strictly
1922          * necessary during commit, since such pins should have been released
1923          * already, but this ordering is definitely critical during abort.)
1924          */
1925         smgrDoPendingDeletes(true);
1926
1927         AtEOXact_MultiXact();
1928
1929         ResourceOwnerRelease(TopTransactionResourceOwner,
1930                                                  RESOURCE_RELEASE_LOCKS,
1931                                                  true, true);
1932         ResourceOwnerRelease(TopTransactionResourceOwner,
1933                                                  RESOURCE_RELEASE_AFTER_LOCKS,
1934                                                  true, true);
1935
1936         /* Check we've released all catcache entries */
1937         AtEOXact_CatCache(true);
1938
1939         AtCommit_Notify();
1940         AtEOXact_GUC(true, 1);
1941         AtEOXact_SPI(true);
1942         AtEOXact_on_commit_actions(true);
1943         AtEOXact_Namespace(true);
1944         /* smgrcommit already done */
1945         AtEOXact_Files();
1946         AtEOXact_ComboCid();
1947         AtEOXact_HashTables(true);
1948         AtEOXact_PgStat(true);
1949         AtEOXact_Snapshot(true);
1950         pgstat_report_xact_timestamp(0);
1951
1952         CurrentResourceOwner = NULL;
1953         ResourceOwnerDelete(TopTransactionResourceOwner);
1954         s->curTransactionOwner = NULL;
1955         CurTransactionResourceOwner = NULL;
1956         TopTransactionResourceOwner = NULL;
1957
1958         AtCommit_Memory();
1959
1960         s->transactionId = InvalidTransactionId;
1961         s->subTransactionId = InvalidSubTransactionId;
1962         s->nestingLevel = 0;
1963         s->gucNestLevel = 0;
1964         s->childXids = NULL;
1965         s->nChildXids = 0;
1966         s->maxChildXids = 0;
1967
1968         /*
1969          * done with commit processing, set current transaction state back to
1970          * default
1971          */
1972         s->state = TRANS_DEFAULT;
1973
1974         RESUME_INTERRUPTS();
1975 }
1976
1977
1978 /*
1979  *      PrepareTransaction
1980  *
1981  * NB: if you change this routine, better look at CommitTransaction too!
1982  */
1983 static void
1984 PrepareTransaction(void)
1985 {
1986         TransactionState s = CurrentTransactionState;
1987         TransactionId xid = GetCurrentTransactionId();
1988         GlobalTransaction gxact;
1989         TimestampTz prepared_at;
1990
1991         ShowTransactionState("PrepareTransaction");
1992
1993         /*
1994          * check the current transaction state
1995          */
1996         if (s->state != TRANS_INPROGRESS)
1997                 elog(WARNING, "PrepareTransaction while in %s state",
1998                          TransStateAsString(s->state));
1999         Assert(s->parent == NULL);
2000
2001         /*
2002          * Do pre-commit processing that involves calling user-defined code, such
2003          * as triggers.  Since closing cursors could queue trigger actions,
2004          * triggers could open cursors, etc, we have to keep looping until there's
2005          * nothing left to do.
2006          */
2007         for (;;)
2008         {
2009                 /*
2010                  * Fire all currently pending deferred triggers.
2011                  */
2012                 AfterTriggerFireDeferred();
2013
2014                 /*
2015                  * Close open portals (converting holdable ones into static portals).
2016                  * If there weren't any, we are done ... otherwise loop back to check
2017                  * if they queued deferred triggers.  Lather, rinse, repeat.
2018                  */
2019                 if (!PreCommit_Portals(true))
2020                         break;
2021         }
2022
2023         /*
2024          * The remaining actions cannot call any user-defined code, so it's safe
2025          * to start shutting down within-transaction services.  But note that most
2026          * of this stuff could still throw an error, which would switch us into
2027          * the transaction-abort path.
2028          */
2029
2030         /* Shut down the deferred-trigger manager */
2031         AfterTriggerEndXact(true);
2032
2033         /*
2034          * Let ON COMMIT management do its thing (must happen after closing
2035          * cursors, to avoid dangling-reference problems)
2036          */
2037         PreCommit_on_commit_actions();
2038
2039         /* close large objects before lower-level cleanup */
2040         AtEOXact_LargeObject(true);
2041
2042         /*
2043          * Mark serializable transaction as complete for predicate locking
2044          * purposes.  This should be done as late as we can put it and still allow
2045          * errors to be raised for failure patterns found at commit.
2046          */
2047         PreCommit_CheckForSerializationFailure();
2048
2049         /* NOTIFY will be handled below */
2050
2051         /*
2052          * Don't allow PREPARE TRANSACTION if we've accessed a temporary table in
2053          * this transaction.  Having the prepared xact hold locks on another
2054          * backend's temp table seems a bad idea --- for instance it would prevent
2055          * the backend from exiting.  There are other problems too, such as how to
2056          * clean up the source backend's local buffers and ON COMMIT state if the
2057          * prepared xact includes a DROP of a temp table.
2058          *
2059          * We must check this after executing any ON COMMIT actions, because they
2060          * might still access a temp relation.
2061          *
2062          * XXX In principle this could be relaxed to allow some useful special
2063          * cases, such as a temp table created and dropped all within the
2064          * transaction.  That seems to require much more bookkeeping though.
2065          */
2066         if (MyXactAccessedTempRel)
2067                 ereport(ERROR,
2068                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2069                                  errmsg("cannot PREPARE a transaction that has operated on temporary tables")));
2070
2071         /*
2072          * Likewise, don't allow PREPARE after pg_export_snapshot.  This could be
2073          * supported if we added cleanup logic to twophase.c, but for now it
2074          * doesn't seem worth the trouble.
2075          */
2076         if (XactHasExportedSnapshots())
2077                 ereport(ERROR,
2078                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2079                                  errmsg("cannot PREPARE a transaction that has exported snapshots")));
2080
2081         /* Prevent cancel/die interrupt while cleaning up */
2082         HOLD_INTERRUPTS();
2083
2084         /*
2085          * set the current transaction state information appropriately during
2086          * prepare processing
2087          */
2088         s->state = TRANS_PREPARE;
2089
2090         prepared_at = GetCurrentTimestamp();
2091
2092         /* Tell bufmgr and smgr to prepare for commit */
2093         BufmgrCommit();
2094
2095         /*
2096          * Reserve the GID for this transaction. This could fail if the requested
2097          * GID is invalid or already in use.
2098          */
2099         gxact = MarkAsPreparing(xid, prepareGID, prepared_at,
2100                                                         GetUserId(), MyDatabaseId);
2101         prepareGID = NULL;
2102
2103         /*
2104          * Collect data for the 2PC state file.  Note that in general, no actual
2105          * state change should happen in the called modules during this step,
2106          * since it's still possible to fail before commit, and in that case we
2107          * want transaction abort to be able to clean up.  (In particular, the
2108          * AtPrepare routines may error out if they find cases they cannot
2109          * handle.)  State cleanup should happen in the PostPrepare routines
2110          * below.  However, some modules can go ahead and clear state here because
2111          * they wouldn't do anything with it during abort anyway.
2112          *
2113          * Note: because the 2PC state file records will be replayed in the same
2114          * order they are made, the order of these calls has to match the order in
2115          * which we want things to happen during COMMIT PREPARED or ROLLBACK
2116          * PREPARED; in particular, pay attention to whether things should happen
2117          * before or after releasing the transaction's locks.
2118          */
2119         StartPrepare(gxact);
2120
2121         AtPrepare_Notify();
2122         AtPrepare_Locks();
2123         AtPrepare_PredicateLocks();
2124         AtPrepare_PgStat();
2125         AtPrepare_MultiXact();
2126         AtPrepare_RelationMap();
2127
2128         /*
2129          * Here is where we really truly prepare.
2130          *
2131          * We have to record transaction prepares even if we didn't make any
2132          * updates, because the transaction manager might get confused if we lose
2133          * a global transaction.
2134          */
2135         EndPrepare(gxact);
2136
2137         /*
2138          * Now we clean up backend-internal state and release internal resources.
2139          */
2140
2141         /* Reset XactLastRecEnd until the next transaction writes something */
2142         XactLastRecEnd.xrecoff = 0;
2143
2144         /*
2145          * Let others know about no transaction in progress by me.      This has to be
2146          * done *after* the prepared transaction has been marked valid, else
2147          * someone may think it is unlocked and recyclable.
2148          */
2149         ProcArrayClearTransaction(MyProc);
2150
2151         /*
2152          * This is all post-transaction cleanup.  Note that if an error is raised
2153          * here, it's too late to abort the transaction.  This should be just
2154          * noncritical resource releasing.      See notes in CommitTransaction.
2155          */
2156
2157         CallXactCallbacks(XACT_EVENT_PREPARE);
2158
2159         ResourceOwnerRelease(TopTransactionResourceOwner,
2160                                                  RESOURCE_RELEASE_BEFORE_LOCKS,
2161                                                  true, true);
2162
2163         /* Check we've released all buffer pins */
2164         AtEOXact_Buffers(true);
2165
2166         /* Clean up the relation cache */
2167         AtEOXact_RelationCache(true);
2168
2169         /* notify doesn't need a postprepare call */
2170
2171         PostPrepare_PgStat();
2172
2173         PostPrepare_Inval();
2174
2175         PostPrepare_smgr();
2176
2177         PostPrepare_MultiXact(xid);
2178
2179         PostPrepare_Locks(xid);
2180         PostPrepare_PredicateLocks(xid);
2181
2182         ResourceOwnerRelease(TopTransactionResourceOwner,
2183                                                  RESOURCE_RELEASE_LOCKS,
2184                                                  true, true);
2185         ResourceOwnerRelease(TopTransactionResourceOwner,
2186                                                  RESOURCE_RELEASE_AFTER_LOCKS,
2187                                                  true, true);
2188
2189         /* Check we've released all catcache entries */
2190         AtEOXact_CatCache(true);
2191
2192         /* PREPARE acts the same as COMMIT as far as GUC is concerned */
2193         AtEOXact_GUC(true, 1);
2194         AtEOXact_SPI(true);
2195         AtEOXact_on_commit_actions(true);
2196         AtEOXact_Namespace(true);
2197         /* smgrcommit already done */
2198         AtEOXact_Files();
2199         AtEOXact_ComboCid();
2200         AtEOXact_HashTables(true);
2201         /* don't call AtEOXact_PgStat here */
2202         AtEOXact_Snapshot(true);
2203
2204         CurrentResourceOwner = NULL;
2205         ResourceOwnerDelete(TopTransactionResourceOwner);
2206         s->curTransactionOwner = NULL;
2207         CurTransactionResourceOwner = NULL;
2208         TopTransactionResourceOwner = NULL;
2209
2210         AtCommit_Memory();
2211
2212         s->transactionId = InvalidTransactionId;
2213         s->subTransactionId = InvalidSubTransactionId;
2214         s->nestingLevel = 0;
2215         s->gucNestLevel = 0;
2216         s->childXids = NULL;
2217         s->nChildXids = 0;
2218         s->maxChildXids = 0;
2219
2220         /*
2221          * done with 1st phase commit processing, set current transaction state
2222          * back to default
2223          */
2224         s->state = TRANS_DEFAULT;
2225
2226         RESUME_INTERRUPTS();
2227 }
2228
2229
2230 /*
2231  *      AbortTransaction
2232  */
2233 static void
2234 AbortTransaction(void)
2235 {
2236         TransactionState s = CurrentTransactionState;
2237         TransactionId latestXid;
2238
2239         /* Prevent cancel/die interrupt while cleaning up */
2240         HOLD_INTERRUPTS();
2241
2242         /* Make sure we have a valid memory context and resource owner */
2243         AtAbort_Memory();
2244         AtAbort_ResourceOwner();
2245
2246         /*
2247          * Release any LW locks we might be holding as quickly as possible.
2248          * (Regular locks, however, must be held till we finish aborting.)
2249          * Releasing LW locks is critical since we might try to grab them again
2250          * while cleaning up!
2251          */
2252         LWLockReleaseAll();
2253
2254         /* Clean up buffer I/O and buffer context locks, too */
2255         AbortBufferIO();
2256         UnlockBuffers();
2257
2258         /*
2259          * Also clean up any open wait for lock, since the lock manager will choke
2260          * if we try to wait for another lock before doing this.
2261          */
2262         LockWaitCancel();
2263
2264         /*
2265          * check the current transaction state
2266          */
2267         if (s->state != TRANS_INPROGRESS && s->state != TRANS_PREPARE)
2268                 elog(WARNING, "AbortTransaction while in %s state",
2269                          TransStateAsString(s->state));
2270         Assert(s->parent == NULL);
2271
2272         /*
2273          * set the current transaction state information appropriately during the
2274          * abort processing
2275          */
2276         s->state = TRANS_ABORT;
2277
2278         /*
2279          * Reset user ID which might have been changed transiently.  We need this
2280          * to clean up in case control escaped out of a SECURITY DEFINER function
2281          * or other local change of CurrentUserId; therefore, the prior value of
2282          * SecurityRestrictionContext also needs to be restored.
2283          *
2284          * (Note: it is not necessary to restore session authorization or role
2285          * settings here because those can only be changed via GUC, and GUC will
2286          * take care of rolling them back if need be.)
2287          */
2288         SetUserIdAndSecContext(s->prevUser, s->prevSecContext);
2289
2290         /*
2291          * do abort processing
2292          */
2293         AfterTriggerEndXact(false); /* 'false' means it's abort */
2294         AtAbort_Portals();
2295         AtEOXact_LargeObject(false);
2296         AtAbort_Notify();
2297         AtEOXact_RelationMap(false);
2298
2299         /*
2300          * Advertise the fact that we aborted in pg_clog (assuming that we got as
2301          * far as assigning an XID to advertise).
2302          */
2303         latestXid = RecordTransactionAbort(false);
2304
2305         TRACE_POSTGRESQL_TRANSACTION_ABORT(MyProc->lxid);
2306
2307         /*
2308          * Let others know about no transaction in progress by me. Note that this
2309          * must be done _before_ releasing locks we hold and _after_
2310          * RecordTransactionAbort.
2311          */
2312         ProcArrayEndTransaction(MyProc, latestXid);
2313
2314         /*
2315          * Post-abort cleanup.  See notes in CommitTransaction() concerning
2316          * ordering.  We can skip all of it if the transaction failed before
2317          * creating a resource owner.
2318          */
2319         if (TopTransactionResourceOwner != NULL)
2320         {
2321                 CallXactCallbacks(XACT_EVENT_ABORT);
2322
2323                 ResourceOwnerRelease(TopTransactionResourceOwner,
2324                                                          RESOURCE_RELEASE_BEFORE_LOCKS,
2325                                                          false, true);
2326                 AtEOXact_Buffers(false);
2327                 AtEOXact_RelationCache(false);
2328                 AtEOXact_Inval(false);
2329                 smgrDoPendingDeletes(false);
2330                 AtEOXact_MultiXact();
2331                 ResourceOwnerRelease(TopTransactionResourceOwner,
2332                                                          RESOURCE_RELEASE_LOCKS,
2333                                                          false, true);
2334                 ResourceOwnerRelease(TopTransactionResourceOwner,
2335                                                          RESOURCE_RELEASE_AFTER_LOCKS,
2336                                                          false, true);
2337                 AtEOXact_CatCache(false);
2338
2339                 AtEOXact_GUC(false, 1);
2340                 AtEOXact_SPI(false);
2341                 AtEOXact_on_commit_actions(false);
2342                 AtEOXact_Namespace(false);
2343                 AtEOXact_Files();
2344                 AtEOXact_ComboCid();
2345                 AtEOXact_HashTables(false);
2346                 AtEOXact_PgStat(false);
2347                 pgstat_report_xact_timestamp(0);
2348         }
2349
2350         /*
2351          * State remains TRANS_ABORT until CleanupTransaction().
2352          */
2353         RESUME_INTERRUPTS();
2354 }
2355
2356 /*
2357  *      CleanupTransaction
2358  */
2359 static void
2360 CleanupTransaction(void)
2361 {
2362         TransactionState s = CurrentTransactionState;
2363
2364         /*
2365          * State should still be TRANS_ABORT from AbortTransaction().
2366          */
2367         if (s->state != TRANS_ABORT)
2368                 elog(FATAL, "CleanupTransaction: unexpected state %s",
2369                          TransStateAsString(s->state));
2370
2371         /*
2372          * do abort cleanup processing
2373          */
2374         AtCleanup_Portals();            /* now safe to release portal memory */
2375         AtEOXact_Snapshot(false);       /* and release the transaction's snapshots */
2376
2377         CurrentResourceOwner = NULL;    /* and resource owner */
2378         if (TopTransactionResourceOwner)
2379                 ResourceOwnerDelete(TopTransactionResourceOwner);
2380         s->curTransactionOwner = NULL;
2381         CurTransactionResourceOwner = NULL;
2382         TopTransactionResourceOwner = NULL;
2383
2384         AtCleanup_Memory();                     /* and transaction memory */
2385
2386         s->transactionId = InvalidTransactionId;
2387         s->subTransactionId = InvalidSubTransactionId;
2388         s->nestingLevel = 0;
2389         s->gucNestLevel = 0;
2390         s->childXids = NULL;
2391         s->nChildXids = 0;
2392         s->maxChildXids = 0;
2393
2394         /*
2395          * done with abort processing, set current transaction state back to
2396          * default
2397          */
2398         s->state = TRANS_DEFAULT;
2399 }
2400
2401 /*
2402  *      StartTransactionCommand
2403  */
2404 void
2405 StartTransactionCommand(void)
2406 {
2407         TransactionState s = CurrentTransactionState;
2408
2409         switch (s->blockState)
2410         {
2411                         /*
2412                          * if we aren't in a transaction block, we just do our usual start
2413                          * transaction.
2414                          */
2415                 case TBLOCK_DEFAULT:
2416                         StartTransaction();
2417                         s->blockState = TBLOCK_STARTED;
2418                         break;
2419
2420                         /*
2421                          * We are somewhere in a transaction block or subtransaction and
2422                          * about to start a new command.  For now we do nothing, but
2423                          * someday we may do command-local resource initialization. (Note
2424                          * that any needed CommandCounterIncrement was done by the
2425                          * previous CommitTransactionCommand.)
2426                          */
2427                 case TBLOCK_INPROGRESS:
2428                 case TBLOCK_SUBINPROGRESS:
2429                         break;
2430
2431                         /*
2432                          * Here we are in a failed transaction block (one of the commands
2433                          * caused an abort) so we do nothing but remain in the abort
2434                          * state.  Eventually we will get a ROLLBACK command which will
2435                          * get us out of this state.  (It is up to other code to ensure
2436                          * that no commands other than ROLLBACK will be processed in these
2437                          * states.)
2438                          */
2439                 case TBLOCK_ABORT:
2440                 case TBLOCK_SUBABORT:
2441                         break;
2442
2443                         /* These cases are invalid. */
2444                 case TBLOCK_STARTED:
2445                 case TBLOCK_BEGIN:
2446                 case TBLOCK_SUBBEGIN:
2447                 case TBLOCK_END:
2448                 case TBLOCK_SUBRELEASE:
2449                 case TBLOCK_SUBCOMMIT:
2450                 case TBLOCK_ABORT_END:
2451                 case TBLOCK_SUBABORT_END:
2452                 case TBLOCK_ABORT_PENDING:
2453                 case TBLOCK_SUBABORT_PENDING:
2454                 case TBLOCK_SUBRESTART:
2455                 case TBLOCK_SUBABORT_RESTART:
2456                 case TBLOCK_PREPARE:
2457                         elog(ERROR, "StartTransactionCommand: unexpected state %s",
2458                                  BlockStateAsString(s->blockState));
2459                         break;
2460         }
2461
2462         /*
2463          * We must switch to CurTransactionContext before returning. This is
2464          * already done if we called StartTransaction, otherwise not.
2465          */
2466         Assert(CurTransactionContext != NULL);
2467         MemoryContextSwitchTo(CurTransactionContext);
2468 }
2469
2470 /*
2471  *      CommitTransactionCommand
2472  */
2473 void
2474 CommitTransactionCommand(void)
2475 {
2476         TransactionState s = CurrentTransactionState;
2477
2478         switch (s->blockState)
2479         {
2480                         /*
2481                          * This shouldn't happen, because it means the previous
2482                          * StartTransactionCommand didn't set the STARTED state
2483                          * appropriately.
2484                          */
2485                 case TBLOCK_DEFAULT:
2486                         elog(FATAL, "CommitTransactionCommand: unexpected state %s",
2487                                  BlockStateAsString(s->blockState));
2488                         break;
2489
2490                         /*
2491                          * If we aren't in a transaction block, just do our usual
2492                          * transaction commit, and return to the idle state.
2493                          */
2494                 case TBLOCK_STARTED:
2495                         CommitTransaction();
2496                         s->blockState = TBLOCK_DEFAULT;
2497                         break;
2498
2499                         /*
2500                          * We are completing a "BEGIN TRANSACTION" command, so we change
2501                          * to the "transaction block in progress" state and return.  (We
2502                          * assume the BEGIN did nothing to the database, so we need no
2503                          * CommandCounterIncrement.)
2504                          */
2505                 case TBLOCK_BEGIN:
2506                         s->blockState = TBLOCK_INPROGRESS;
2507                         break;
2508
2509                         /*
2510                          * This is the case when we have finished executing a command
2511                          * someplace within a transaction block.  We increment the command
2512                          * counter and return.
2513                          */
2514                 case TBLOCK_INPROGRESS:
2515                 case TBLOCK_SUBINPROGRESS:
2516                         CommandCounterIncrement();
2517                         break;
2518
2519                         /*
2520                          * We are completing a "COMMIT" command.  Do it and return to the
2521                          * idle state.
2522                          */
2523                 case TBLOCK_END:
2524                         CommitTransaction();
2525                         s->blockState = TBLOCK_DEFAULT;
2526                         break;
2527
2528                         /*
2529                          * Here we are in the middle of a transaction block but one of the
2530                          * commands caused an abort so we do nothing but remain in the
2531                          * abort state.  Eventually we will get a ROLLBACK comand.
2532                          */
2533                 case TBLOCK_ABORT:
2534                 case TBLOCK_SUBABORT:
2535                         break;
2536
2537                         /*
2538                          * Here we were in an aborted transaction block and we just got
2539                          * the ROLLBACK command from the user, so clean up the
2540                          * already-aborted transaction and return to the idle state.
2541                          */
2542                 case TBLOCK_ABORT_END:
2543                         CleanupTransaction();
2544                         s->blockState = TBLOCK_DEFAULT;
2545                         break;
2546
2547                         /*
2548                          * Here we were in a perfectly good transaction block but the user
2549                          * told us to ROLLBACK anyway.  We have to abort the transaction
2550                          * and then clean up.
2551                          */
2552                 case TBLOCK_ABORT_PENDING:
2553                         AbortTransaction();
2554                         CleanupTransaction();
2555                         s->blockState = TBLOCK_DEFAULT;
2556                         break;
2557
2558                         /*
2559                          * We are completing a "PREPARE TRANSACTION" command.  Do it and
2560                          * return to the idle state.
2561                          */
2562                 case TBLOCK_PREPARE:
2563                         PrepareTransaction();
2564                         s->blockState = TBLOCK_DEFAULT;
2565                         break;
2566
2567                         /*
2568                          * We were just issued a SAVEPOINT inside a transaction block.
2569                          * Start a subtransaction.      (DefineSavepoint already did
2570                          * PushTransaction, so as to have someplace to put the SUBBEGIN
2571                          * state.)
2572                          */
2573                 case TBLOCK_SUBBEGIN:
2574                         StartSubTransaction();
2575                         s->blockState = TBLOCK_SUBINPROGRESS;
2576                         break;
2577
2578                         /*
2579                          * We were issued a RELEASE command, so we end the
2580                          * current subtransaction and return to the parent transaction.
2581                          * The parent might be ended too, so repeat till we find an
2582                          * INPROGRESS transaction or subtransaction.
2583                          */
2584                 case TBLOCK_SUBRELEASE:
2585                         do
2586                         {
2587                                 CommitSubTransaction();
2588                                 s = CurrentTransactionState;    /* changed by pop */
2589                         } while (s->blockState == TBLOCK_SUBRELEASE);
2590
2591                         Assert(s->blockState == TBLOCK_INPROGRESS ||
2592                                    s->blockState == TBLOCK_SUBINPROGRESS);
2593                         break;
2594
2595                         /*
2596                          * We were issued a COMMIT, so we end the current subtransaction
2597                          * hierarchy and perform final commit. We do this by rolling up
2598                          * any subtransactions into their parent, which leads to O(N^2)
2599                          * operations with respect to resource owners - this isn't that
2600                          * bad until we approach a thousands of savepoints but is necessary
2601                          * for correctness should after triggers create new resource
2602                          * owners.
2603                          */
2604                 case TBLOCK_SUBCOMMIT:
2605                         do
2606                         {
2607                                 CommitSubTransaction();
2608                                 s = CurrentTransactionState;    /* changed by pop */
2609                         } while (s->blockState == TBLOCK_SUBCOMMIT);
2610                         /* If we had a COMMIT command, finish off the main xact too */
2611                         if (s->blockState == TBLOCK_END)
2612                         {
2613                                 Assert(s->parent == NULL);
2614                                 CommitTransaction();
2615                                 s->blockState = TBLOCK_DEFAULT;
2616                         }
2617                         else if (s->blockState == TBLOCK_PREPARE)
2618                         {
2619                                 Assert(s->parent == NULL);
2620                                 PrepareTransaction();
2621                                 s->blockState = TBLOCK_DEFAULT;
2622                         }
2623                         else
2624                                 elog(ERROR, "CommitTransactionCommand: unexpected state %s",
2625                                          BlockStateAsString(s->blockState));
2626                         break;
2627
2628                         /*
2629                          * The current already-failed subtransaction is ending due to a
2630                          * ROLLBACK or ROLLBACK TO command, so pop it and recursively
2631                          * examine the parent (which could be in any of several states).
2632                          */
2633                 case TBLOCK_SUBABORT_END:
2634                         CleanupSubTransaction();
2635                         CommitTransactionCommand();
2636                         break;
2637
2638                         /*
2639                          * As above, but it's not dead yet, so abort first.
2640                          */
2641                 case TBLOCK_SUBABORT_PENDING:
2642                         AbortSubTransaction();
2643                         CleanupSubTransaction();
2644                         CommitTransactionCommand();
2645                         break;
2646
2647                         /*
2648                          * The current subtransaction is the target of a ROLLBACK TO
2649                          * command.  Abort and pop it, then start a new subtransaction
2650                          * with the same name.
2651                          */
2652                 case TBLOCK_SUBRESTART:
2653                         {
2654                                 char       *name;
2655                                 int                     savepointLevel;
2656
2657                                 /* save name and keep Cleanup from freeing it */
2658                                 name = s->name;
2659                                 s->name = NULL;
2660                                 savepointLevel = s->savepointLevel;
2661
2662                                 AbortSubTransaction();
2663                                 CleanupSubTransaction();
2664
2665                                 DefineSavepoint(NULL);
2666                                 s = CurrentTransactionState;    /* changed by push */
2667                                 s->name = name;
2668                                 s->savepointLevel = savepointLevel;
2669
2670                                 /* This is the same as TBLOCK_SUBBEGIN case */
2671                                 AssertState(s->blockState == TBLOCK_SUBBEGIN);
2672                                 StartSubTransaction();
2673                                 s->blockState = TBLOCK_SUBINPROGRESS;
2674                         }
2675                         break;
2676
2677                         /*
2678                          * Same as above, but the subtransaction had already failed, so we
2679                          * don't need AbortSubTransaction.
2680                          */
2681                 case TBLOCK_SUBABORT_RESTART:
2682                         {
2683                                 char       *name;
2684                                 int                     savepointLevel;
2685
2686                                 /* save name and keep Cleanup from freeing it */
2687                                 name = s->name;
2688                                 s->name = NULL;
2689                                 savepointLevel = s->savepointLevel;
2690
2691                                 CleanupSubTransaction();
2692
2693                                 DefineSavepoint(NULL);
2694                                 s = CurrentTransactionState;    /* changed by push */
2695                                 s->name = name;
2696                                 s->savepointLevel = savepointLevel;
2697
2698                                 /* This is the same as TBLOCK_SUBBEGIN case */
2699                                 AssertState(s->blockState == TBLOCK_SUBBEGIN);
2700                                 StartSubTransaction();
2701                                 s->blockState = TBLOCK_SUBINPROGRESS;
2702                         }
2703                         break;
2704         }
2705 }
2706
2707 /*
2708  *      AbortCurrentTransaction
2709  */
2710 void
2711 AbortCurrentTransaction(void)
2712 {
2713         TransactionState s = CurrentTransactionState;
2714
2715         switch (s->blockState)
2716         {
2717                 case TBLOCK_DEFAULT:
2718                         if (s->state == TRANS_DEFAULT)
2719                         {
2720                                 /* we are idle, so nothing to do */
2721                         }
2722                         else
2723                         {
2724                                 /*
2725                                  * We can get here after an error during transaction start
2726                                  * (state will be TRANS_START).  Need to clean up the
2727                                  * incompletely started transaction.  First, adjust the
2728                                  * low-level state to suppress warning message from
2729                                  * AbortTransaction.
2730                                  */
2731                                 if (s->state == TRANS_START)
2732                                         s->state = TRANS_INPROGRESS;
2733                                 AbortTransaction();
2734                                 CleanupTransaction();
2735                         }
2736                         break;
2737
2738                         /*
2739                          * if we aren't in a transaction block, we just do the basic abort
2740                          * & cleanup transaction.
2741                          */
2742                 case TBLOCK_STARTED:
2743                         AbortTransaction();
2744                         CleanupTransaction();
2745                         s->blockState = TBLOCK_DEFAULT;
2746                         break;
2747
2748                         /*
2749                          * If we are in TBLOCK_BEGIN it means something screwed up right
2750                          * after reading "BEGIN TRANSACTION".  We assume that the user
2751                          * will interpret the error as meaning the BEGIN failed to get him
2752                          * into a transaction block, so we should abort and return to idle
2753                          * state.
2754                          */
2755                 case TBLOCK_BEGIN:
2756                         AbortTransaction();
2757                         CleanupTransaction();
2758                         s->blockState = TBLOCK_DEFAULT;
2759                         break;
2760
2761                         /*
2762                          * We are somewhere in a transaction block and we've gotten a
2763                          * failure, so we abort the transaction and set up the persistent
2764                          * ABORT state.  We will stay in ABORT until we get a ROLLBACK.
2765                          */
2766                 case TBLOCK_INPROGRESS:
2767                         AbortTransaction();
2768                         s->blockState = TBLOCK_ABORT;
2769                         /* CleanupTransaction happens when we exit TBLOCK_ABORT_END */
2770                         break;
2771
2772                         /*
2773                          * Here, we failed while trying to COMMIT.      Clean up the
2774                          * transaction and return to idle state (we do not want to stay in
2775                          * the transaction).
2776                          */
2777                 case TBLOCK_END:
2778                         AbortTransaction();
2779                         CleanupTransaction();
2780                         s->blockState = TBLOCK_DEFAULT;
2781                         break;
2782
2783                         /*
2784                          * Here, we are already in an aborted transaction state and are
2785                          * waiting for a ROLLBACK, but for some reason we failed again! So
2786                          * we just remain in the abort state.
2787                          */
2788                 case TBLOCK_ABORT:
2789                 case TBLOCK_SUBABORT:
2790                         break;
2791
2792                         /*
2793                          * We are in a failed transaction and we got the ROLLBACK command.
2794                          * We have already aborted, we just need to cleanup and go to idle
2795                          * state.
2796                          */
2797                 case TBLOCK_ABORT_END:
2798                         CleanupTransaction();
2799                         s->blockState = TBLOCK_DEFAULT;
2800                         break;
2801
2802                         /*
2803                          * We are in a live transaction and we got a ROLLBACK command.
2804                          * Abort, cleanup, go to idle state.
2805                          */
2806                 case TBLOCK_ABORT_PENDING:
2807                         AbortTransaction();
2808                         CleanupTransaction();
2809                         s->blockState = TBLOCK_DEFAULT;
2810                         break;
2811
2812                         /*
2813                          * Here, we failed while trying to PREPARE.  Clean up the
2814                          * transaction and return to idle state (we do not want to stay in
2815                          * the transaction).
2816                          */
2817                 case TBLOCK_PREPARE:
2818                         AbortTransaction();
2819                         CleanupTransaction();
2820                         s->blockState = TBLOCK_DEFAULT;
2821                         break;
2822
2823                         /*
2824                          * We got an error inside a subtransaction.  Abort just the
2825                          * subtransaction, and go to the persistent SUBABORT state until
2826                          * we get ROLLBACK.
2827                          */
2828                 case TBLOCK_SUBINPROGRESS:
2829                         AbortSubTransaction();
2830                         s->blockState = TBLOCK_SUBABORT;
2831                         break;
2832
2833                         /*
2834                          * If we failed while trying to create a subtransaction, clean up
2835                          * the broken subtransaction and abort the parent.      The same
2836                          * applies if we get a failure while ending a subtransaction.
2837                          */
2838                 case TBLOCK_SUBBEGIN:
2839                 case TBLOCK_SUBRELEASE:
2840                 case TBLOCK_SUBCOMMIT:
2841                 case TBLOCK_SUBABORT_PENDING:
2842                 case TBLOCK_SUBRESTART:
2843                         AbortSubTransaction();
2844                         CleanupSubTransaction();
2845                         AbortCurrentTransaction();
2846                         break;
2847
2848                         /*
2849                          * Same as above, except the Abort() was already done.
2850                          */
2851                 case TBLOCK_SUBABORT_END:
2852                 case TBLOCK_SUBABORT_RESTART:
2853                         CleanupSubTransaction();
2854                         AbortCurrentTransaction();
2855                         break;
2856         }
2857 }
2858
2859 /*
2860  *      PreventTransactionChain
2861  *
2862  *      This routine is to be called by statements that must not run inside
2863  *      a transaction block, typically because they have non-rollback-able
2864  *      side effects or do internal commits.
2865  *
2866  *      If we have already started a transaction block, issue an error; also issue
2867  *      an error if we appear to be running inside a user-defined function (which
2868  *      could issue more commands and possibly cause a failure after the statement
2869  *      completes).  Subtransactions are verboten too.
2870  *
2871  *      isTopLevel: passed down from ProcessUtility to determine whether we are
2872  *      inside a function or multi-query querystring.  (We will always fail if
2873  *      this is false, but it's convenient to centralize the check here instead of
2874  *      making callers do it.)
2875  *      stmtType: statement type name, for error messages.
2876  */
2877 void
2878 PreventTransactionChain(bool isTopLevel, const char *stmtType)
2879 {
2880         /*
2881          * xact block already started?
2882          */
2883         if (IsTransactionBlock())
2884                 ereport(ERROR,
2885                                 (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION),
2886                 /* translator: %s represents an SQL statement name */
2887                                  errmsg("%s cannot run inside a transaction block",
2888                                                 stmtType)));
2889
2890         /*
2891          * subtransaction?
2892          */
2893         if (IsSubTransaction())
2894                 ereport(ERROR,
2895                                 (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION),
2896                 /* translator: %s represents an SQL statement name */
2897                                  errmsg("%s cannot run inside a subtransaction",
2898                                                 stmtType)));
2899
2900         /*
2901          * inside a function call?
2902          */
2903         if (!isTopLevel)
2904                 ereport(ERROR,
2905                                 (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION),
2906                 /* translator: %s represents an SQL statement name */
2907                                  errmsg("%s cannot be executed from a function or multi-command string",
2908                                                 stmtType)));
2909
2910         /* If we got past IsTransactionBlock test, should be in default state */
2911         if (CurrentTransactionState->blockState != TBLOCK_DEFAULT &&
2912                 CurrentTransactionState->blockState != TBLOCK_STARTED)
2913                 elog(FATAL, "cannot prevent transaction chain");
2914         /* all okay */
2915 }
2916
2917 /*
2918  *      RequireTransactionChain
2919  *
2920  *      This routine is to be called by statements that must run inside
2921  *      a transaction block, because they have no effects that persist past
2922  *      transaction end (and so calling them outside a transaction block
2923  *      is presumably an error).  DECLARE CURSOR is an example.
2924  *
2925  *      If we appear to be running inside a user-defined function, we do not
2926  *      issue an error, since the function could issue more commands that make
2927  *      use of the current statement's results.  Likewise subtransactions.
2928  *      Thus this is an inverse for PreventTransactionChain.
2929  *
2930  *      isTopLevel: passed down from ProcessUtility to determine whether we are
2931  *      inside a function.
2932  *      stmtType: statement type name, for error messages.
2933  */
2934 void
2935 RequireTransactionChain(bool isTopLevel, const char *stmtType)
2936 {
2937         /*
2938          * xact block already started?
2939          */
2940         if (IsTransactionBlock())
2941                 return;
2942
2943         /*
2944          * subtransaction?
2945          */
2946         if (IsSubTransaction())
2947                 return;
2948
2949         /*
2950          * inside a function call?
2951          */
2952         if (!isTopLevel)
2953                 return;
2954
2955         ereport(ERROR,
2956                         (errcode(ERRCODE_NO_ACTIVE_SQL_TRANSACTION),
2957         /* translator: %s represents an SQL statement name */
2958                          errmsg("%s can only be used in transaction blocks",
2959                                         stmtType)));
2960 }
2961
2962 /*
2963  *      IsInTransactionChain
2964  *
2965  *      This routine is for statements that need to behave differently inside
2966  *      a transaction block than when running as single commands.  ANALYZE is
2967  *      currently the only example.
2968  *
2969  *      isTopLevel: passed down from ProcessUtility to determine whether we are
2970  *      inside a function.
2971  */
2972 bool
2973 IsInTransactionChain(bool isTopLevel)
2974 {
2975         /*
2976          * Return true on same conditions that would make PreventTransactionChain
2977          * error out
2978          */
2979         if (IsTransactionBlock())
2980                 return true;
2981
2982         if (IsSubTransaction())
2983                 return true;
2984
2985         if (!isTopLevel)
2986                 return true;
2987
2988         if (CurrentTransactionState->blockState != TBLOCK_DEFAULT &&
2989                 CurrentTransactionState->blockState != TBLOCK_STARTED)
2990                 return true;
2991
2992         return false;
2993 }
2994
2995
2996 /*
2997  * Register or deregister callback functions for start- and end-of-xact
2998  * operations.
2999  *
3000  * These functions are intended for use by dynamically loaded modules.
3001  * For built-in modules we generally just hardwire the appropriate calls
3002  * (mainly because it's easier to control the order that way, where needed).
3003  *
3004  * At transaction end, the callback occurs post-commit or post-abort, so the
3005  * callback functions can only do noncritical cleanup.
3006  */
3007 void
3008 RegisterXactCallback(XactCallback callback, void *arg)
3009 {
3010         XactCallbackItem *item;
3011
3012         item = (XactCallbackItem *)
3013                 MemoryContextAlloc(TopMemoryContext, sizeof(XactCallbackItem));
3014         item->callback = callback;
3015         item->arg = arg;
3016         item->next = Xact_callbacks;
3017         Xact_callbacks = item;
3018 }
3019
3020 void
3021 UnregisterXactCallback(XactCallback callback, void *arg)
3022 {
3023         XactCallbackItem *item;
3024         XactCallbackItem *prev;
3025
3026         prev = NULL;
3027         for (item = Xact_callbacks; item; prev = item, item = item->next)
3028         {
3029                 if (item->callback == callback && item->arg == arg)
3030                 {
3031                         if (prev)
3032                                 prev->next = item->next;
3033                         else
3034                                 Xact_callbacks = item->next;
3035                         pfree(item);
3036                         break;
3037                 }
3038         }
3039 }
3040
3041 static void
3042 CallXactCallbacks(XactEvent event)
3043 {
3044         XactCallbackItem *item;
3045
3046         for (item = Xact_callbacks; item; item = item->next)
3047                 (*item->callback) (event, item->arg);
3048 }
3049
3050
3051 /*
3052  * Register or deregister callback functions for start- and end-of-subxact
3053  * operations.
3054  *
3055  * Pretty much same as above, but for subtransaction events.
3056  *
3057  * At subtransaction end, the callback occurs post-subcommit or post-subabort,
3058  * so the callback functions can only do noncritical cleanup.  At
3059  * subtransaction start, the callback is called when the subtransaction has
3060  * finished initializing.
3061  */
3062 void
3063 RegisterSubXactCallback(SubXactCallback callback, void *arg)
3064 {
3065         SubXactCallbackItem *item;
3066
3067         item = (SubXactCallbackItem *)
3068                 MemoryContextAlloc(TopMemoryContext, sizeof(SubXactCallbackItem));
3069         item->callback = callback;
3070         item->arg = arg;
3071         item->next = SubXact_callbacks;
3072         SubXact_callbacks = item;
3073 }
3074
3075 void
3076 UnregisterSubXactCallback(SubXactCallback callback, void *arg)
3077 {
3078         SubXactCallbackItem *item;
3079         SubXactCallbackItem *prev;
3080
3081         prev = NULL;
3082         for (item = SubXact_callbacks; item; prev = item, item = item->next)
3083         {
3084                 if (item->callback == callback && item->arg == arg)
3085                 {
3086                         if (prev)
3087                                 prev->next = item->next;
3088                         else
3089                                 SubXact_callbacks = item->next;
3090                         pfree(item);
3091                         break;
3092                 }
3093         }
3094 }
3095
3096 static void
3097 CallSubXactCallbacks(SubXactEvent event,
3098                                          SubTransactionId mySubid,
3099                                          SubTransactionId parentSubid)
3100 {
3101         SubXactCallbackItem *item;
3102
3103         for (item = SubXact_callbacks; item; item = item->next)
3104                 (*item->callback) (event, mySubid, parentSubid, item->arg);
3105 }
3106
3107
3108 /* ----------------------------------------------------------------
3109  *                                         transaction block support
3110  * ----------------------------------------------------------------
3111  */
3112
3113 /*
3114  *      BeginTransactionBlock
3115  *              This executes a BEGIN command.
3116  */
3117 void
3118 BeginTransactionBlock(void)
3119 {
3120         TransactionState s = CurrentTransactionState;
3121
3122         switch (s->blockState)
3123         {
3124                         /*
3125                          * We are not inside a transaction block, so allow one to begin.
3126                          */
3127                 case TBLOCK_STARTED:
3128                         s->blockState = TBLOCK_BEGIN;
3129                         break;
3130
3131                         /*
3132                          * Already a transaction block in progress.
3133                          */
3134                 case TBLOCK_INPROGRESS:
3135                 case TBLOCK_SUBINPROGRESS:
3136                 case TBLOCK_ABORT:
3137                 case TBLOCK_SUBABORT:
3138                         ereport(WARNING,
3139                                         (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION),
3140                                          errmsg("there is already a transaction in progress")));
3141                         break;
3142
3143                         /* These cases are invalid. */
3144                 case TBLOCK_DEFAULT:
3145                 case TBLOCK_BEGIN:
3146                 case TBLOCK_SUBBEGIN:
3147                 case TBLOCK_END:
3148                 case TBLOCK_SUBRELEASE:
3149                 case TBLOCK_SUBCOMMIT:
3150                 case TBLOCK_ABORT_END:
3151                 case TBLOCK_SUBABORT_END:
3152                 case TBLOCK_ABORT_PENDING:
3153                 case TBLOCK_SUBABORT_PENDING:
3154                 case TBLOCK_SUBRESTART:
3155                 case TBLOCK_SUBABORT_RESTART:
3156                 case TBLOCK_PREPARE:
3157                         elog(FATAL, "BeginTransactionBlock: unexpected state %s",
3158                                  BlockStateAsString(s->blockState));
3159                         break;
3160         }
3161 }
3162
3163 /*
3164  *      PrepareTransactionBlock
3165  *              This executes a PREPARE command.
3166  *
3167  * Since PREPARE may actually do a ROLLBACK, the result indicates what
3168  * happened: TRUE for PREPARE, FALSE for ROLLBACK.
3169  *
3170  * Note that we don't actually do anything here except change blockState.
3171  * The real work will be done in the upcoming PrepareTransaction().
3172  * We do it this way because it's not convenient to change memory context,
3173  * resource owner, etc while executing inside a Portal.
3174  */
3175 bool
3176 PrepareTransactionBlock(char *gid)
3177 {
3178         TransactionState s;
3179         bool            result;
3180
3181         /* Set up to commit the current transaction */
3182         result = EndTransactionBlock();
3183
3184         /* If successful, change outer tblock state to PREPARE */
3185         if (result)
3186         {
3187                 s = CurrentTransactionState;
3188
3189                 while (s->parent != NULL)
3190                         s = s->parent;
3191
3192                 if (s->blockState == TBLOCK_END)
3193                 {
3194                         /* Save GID where PrepareTransaction can find it again */
3195                         prepareGID = MemoryContextStrdup(TopTransactionContext, gid);
3196
3197                         s->blockState = TBLOCK_PREPARE;
3198                 }
3199                 else
3200                 {
3201                         /*
3202                          * ignore case where we are not in a transaction;
3203                          * EndTransactionBlock already issued a warning.
3204                          */
3205                         Assert(s->blockState == TBLOCK_STARTED);
3206                         /* Don't send back a PREPARE result tag... */
3207                         result = false;
3208                 }
3209         }
3210
3211         return result;
3212 }
3213
3214 /*
3215  *      EndTransactionBlock
3216  *              This executes a COMMIT command.
3217  *
3218  * Since COMMIT may actually do a ROLLBACK, the result indicates what
3219  * happened: TRUE for COMMIT, FALSE for ROLLBACK.
3220  *
3221  * Note that we don't actually do anything here except change blockState.
3222  * The real work will be done in the upcoming CommitTransactionCommand().
3223  * We do it this way because it's not convenient to change memory context,
3224  * resource owner, etc while executing inside a Portal.
3225  */
3226 bool
3227 EndTransactionBlock(void)
3228 {
3229         TransactionState s = CurrentTransactionState;
3230         bool            result = false;
3231
3232         switch (s->blockState)
3233         {
3234                         /*
3235                          * We are in a transaction block, so tell CommitTransactionCommand
3236                          * to COMMIT.
3237                          */
3238                 case TBLOCK_INPROGRESS:
3239                         s->blockState = TBLOCK_END;
3240                         result = true;
3241                         break;
3242
3243                         /*
3244                          * We are in a failed transaction block.  Tell
3245                          * CommitTransactionCommand it's time to exit the block.
3246                          */
3247                 case TBLOCK_ABORT:
3248                         s->blockState = TBLOCK_ABORT_END;
3249                         break;
3250
3251                         /*
3252                          * We are in a live subtransaction block.  Set up to subcommit all
3253                          * open subtransactions and then commit the main transaction.
3254                          */
3255                 case TBLOCK_SUBINPROGRESS:
3256                         while (s->parent != NULL)
3257                         {
3258                                 if (s->blockState == TBLOCK_SUBINPROGRESS)
3259                                         s->blockState = TBLOCK_SUBCOMMIT;
3260                                 else
3261                                         elog(FATAL, "EndTransactionBlock: unexpected state %s",
3262                                                  BlockStateAsString(s->blockState));
3263                                 s = s->parent;
3264                         }
3265                         if (s->blockState == TBLOCK_INPROGRESS)
3266                                 s->blockState = TBLOCK_END;
3267                         else
3268                                 elog(FATAL, "EndTransactionBlock: unexpected state %s",
3269                                          BlockStateAsString(s->blockState));
3270                         result = true;
3271                         break;
3272
3273                         /*
3274                          * Here we are inside an aborted subtransaction.  Treat the COMMIT
3275                          * as ROLLBACK: set up to abort everything and exit the main
3276                          * transaction.
3277                          */
3278                 case TBLOCK_SUBABORT:
3279                         while (s->parent != NULL)
3280                         {
3281                                 if (s->blockState == TBLOCK_SUBINPROGRESS)
3282                                         s->blockState = TBLOCK_SUBABORT_PENDING;
3283                                 else if (s->blockState == TBLOCK_SUBABORT)
3284                                         s->blockState = TBLOCK_SUBABORT_END;
3285                                 else
3286                                         elog(FATAL, "EndTransactionBlock: unexpected state %s",
3287                                                  BlockStateAsString(s->blockState));
3288                                 s = s->parent;
3289                         }
3290                         if (s->blockState == TBLOCK_INPROGRESS)
3291                                 s->blockState = TBLOCK_ABORT_PENDING;
3292                         else if (s->blockState == TBLOCK_ABORT)
3293                                 s->blockState = TBLOCK_ABORT_END;
3294                         else
3295                                 elog(FATAL, "EndTransactionBlock: unexpected state %s",
3296                                          BlockStateAsString(s->blockState));
3297                         break;
3298
3299                         /*
3300                          * The user issued COMMIT when not inside a transaction.  Issue a
3301                          * WARNING, staying in TBLOCK_STARTED state.  The upcoming call to
3302                          * CommitTransactionCommand() will then close the transaction and
3303                          * put us back into the default state.
3304                          */
3305                 case TBLOCK_STARTED:
3306                         ereport(WARNING,
3307                                         (errcode(ERRCODE_NO_ACTIVE_SQL_TRANSACTION),
3308                                          errmsg("there is no transaction in progress")));
3309                         result = true;
3310                         break;
3311
3312                         /* These cases are invalid. */
3313                 case TBLOCK_DEFAULT:
3314                 case TBLOCK_BEGIN:
3315                 case TBLOCK_SUBBEGIN:
3316                 case TBLOCK_END:
3317                 case TBLOCK_SUBRELEASE:
3318                 case TBLOCK_SUBCOMMIT:
3319                 case TBLOCK_ABORT_END:
3320                 case TBLOCK_SUBABORT_END:
3321                 case TBLOCK_ABORT_PENDING:
3322                 case TBLOCK_SUBABORT_PENDING:
3323                 case TBLOCK_SUBRESTART:
3324                 case TBLOCK_SUBABORT_RESTART:
3325                 case TBLOCK_PREPARE:
3326                         elog(FATAL, "EndTransactionBlock: unexpected state %s",
3327                                  BlockStateAsString(s->blockState));
3328                         break;
3329         }
3330
3331         return result;
3332 }
3333
3334 /*
3335  *      UserAbortTransactionBlock
3336  *              This executes a ROLLBACK command.
3337  *
3338  * As above, we don't actually do anything here except change blockState.
3339  */
3340 void
3341 UserAbortTransactionBlock(void)
3342 {
3343         TransactionState s = CurrentTransactionState;
3344
3345         switch (s->blockState)
3346         {
3347                         /*
3348                          * We are inside a transaction block and we got a ROLLBACK command
3349                          * from the user, so tell CommitTransactionCommand to abort and
3350                          * exit the transaction block.
3351                          */
3352                 case TBLOCK_INPROGRESS:
3353                         s->blockState = TBLOCK_ABORT_PENDING;
3354                         break;
3355
3356                         /*
3357                          * We are inside a failed transaction block and we got a ROLLBACK
3358                          * command from the user.  Abort processing is already done, so
3359                          * CommitTransactionCommand just has to cleanup and go back to
3360                          * idle state.
3361                          */
3362                 case TBLOCK_ABORT:
3363                         s->blockState = TBLOCK_ABORT_END;
3364                         break;
3365
3366                         /*
3367                          * We are inside a subtransaction.      Mark everything up to top
3368                          * level as exitable.
3369                          */
3370                 case TBLOCK_SUBINPROGRESS:
3371                 case TBLOCK_SUBABORT:
3372                         while (s->parent != NULL)
3373                         {
3374                                 if (s->blockState == TBLOCK_SUBINPROGRESS)
3375                                         s->blockState = TBLOCK_SUBABORT_PENDING;
3376                                 else if (s->blockState == TBLOCK_SUBABORT)
3377                                         s->blockState = TBLOCK_SUBABORT_END;
3378                                 else
3379                                         elog(FATAL, "UserAbortTransactionBlock: unexpected state %s",
3380                                                  BlockStateAsString(s->blockState));
3381                                 s = s->parent;
3382                         }
3383                         if (s->blockState == TBLOCK_INPROGRESS)
3384                                 s->blockState = TBLOCK_ABORT_PENDING;
3385                         else if (s->blockState == TBLOCK_ABORT)
3386                                 s->blockState = TBLOCK_ABORT_END;
3387                         else
3388                                 elog(FATAL, "UserAbortTransactionBlock: unexpected state %s",
3389                                          BlockStateAsString(s->blockState));
3390                         break;
3391
3392                         /*
3393                          * The user issued ABORT when not inside a transaction. Issue a
3394                          * WARNING and go to abort state.  The upcoming call to
3395                          * CommitTransactionCommand() will then put us back into the
3396                          * default state.
3397                          */
3398                 case TBLOCK_STARTED:
3399                         ereport(NOTICE,
3400                                         (errcode(ERRCODE_NO_ACTIVE_SQL_TRANSACTION),
3401                                          errmsg("there is no transaction in progress")));
3402                         s->blockState = TBLOCK_ABORT_PENDING;
3403                         break;
3404
3405                         /* These cases are invalid. */
3406                 case TBLOCK_DEFAULT:
3407                 case TBLOCK_BEGIN:
3408                 case TBLOCK_SUBBEGIN:
3409                 case TBLOCK_END:
3410                 case TBLOCK_SUBRELEASE:
3411                 case TBLOCK_SUBCOMMIT:
3412                 case TBLOCK_ABORT_END:
3413                 case TBLOCK_SUBABORT_END:
3414                 case TBLOCK_ABORT_PENDING:
3415                 case TBLOCK_SUBABORT_PENDING:
3416                 case TBLOCK_SUBRESTART:
3417                 case TBLOCK_SUBABORT_RESTART:
3418                 case TBLOCK_PREPARE:
3419                         elog(FATAL, "UserAbortTransactionBlock: unexpected state %s",
3420                                  BlockStateAsString(s->blockState));
3421                         break;
3422         }
3423 }
3424
3425 /*
3426  * DefineSavepoint
3427  *              This executes a SAVEPOINT command.
3428  */
3429 void
3430 DefineSavepoint(char *name)
3431 {
3432         TransactionState s = CurrentTransactionState;
3433
3434         switch (s->blockState)
3435         {
3436                 case TBLOCK_INPROGRESS:
3437                 case TBLOCK_SUBINPROGRESS:
3438                         /* Normal subtransaction start */
3439                         PushTransaction();
3440                         s = CurrentTransactionState;            /* changed by push */
3441
3442                         /*
3443                          * Savepoint names, like the TransactionState block itself, live
3444                          * in TopTransactionContext.
3445                          */
3446                         if (name)
3447                                 s->name = MemoryContextStrdup(TopTransactionContext, name);
3448                         break;
3449
3450                         /* These cases are invalid. */
3451                 case TBLOCK_DEFAULT:
3452                 case TBLOCK_STARTED:
3453                 case TBLOCK_BEGIN:
3454                 case TBLOCK_SUBBEGIN:
3455                 case TBLOCK_END:
3456                 case TBLOCK_SUBRELEASE:
3457                 case TBLOCK_SUBCOMMIT:
3458                 case TBLOCK_ABORT:
3459                 case TBLOCK_SUBABORT:
3460                 case TBLOCK_ABORT_END:
3461                 case TBLOCK_SUBABORT_END:
3462                 case TBLOCK_ABORT_PENDING:
3463                 case TBLOCK_SUBABORT_PENDING:
3464                 case TBLOCK_SUBRESTART:
3465                 case TBLOCK_SUBABORT_RESTART:
3466                 case TBLOCK_PREPARE:
3467                         elog(FATAL, "DefineSavepoint: unexpected state %s",
3468                                  BlockStateAsString(s->blockState));
3469                         break;
3470         }
3471 }
3472
3473 /*
3474  * ReleaseSavepoint
3475  *              This executes a RELEASE command.
3476  *
3477  * As above, we don't actually do anything here except change blockState.
3478  */
3479 void
3480 ReleaseSavepoint(List *options)
3481 {
3482         TransactionState s = CurrentTransactionState;
3483         TransactionState target,
3484                                 xact;
3485         ListCell   *cell;
3486         char       *name = NULL;
3487
3488         switch (s->blockState)
3489         {
3490                         /*
3491                          * We can't rollback to a savepoint if there is no savepoint
3492                          * defined.
3493                          */
3494                 case TBLOCK_INPROGRESS:
3495                         ereport(ERROR,
3496                                         (errcode(ERRCODE_S_E_INVALID_SPECIFICATION),
3497                                          errmsg("no such savepoint")));
3498                         break;
3499
3500                         /*
3501                          * We are in a non-aborted subtransaction.      This is the only valid
3502                          * case.
3503                          */
3504                 case TBLOCK_SUBINPROGRESS:
3505                         break;
3506
3507                         /* These cases are invalid. */
3508                 case TBLOCK_DEFAULT:
3509                 case TBLOCK_STARTED:
3510                 case TBLOCK_BEGIN:
3511                 case TBLOCK_SUBBEGIN:
3512                 case TBLOCK_END:
3513                 case TBLOCK_SUBRELEASE:
3514                 case TBLOCK_SUBCOMMIT:
3515                 case TBLOCK_ABORT:
3516                 case TBLOCK_SUBABORT:
3517                 case TBLOCK_ABORT_END:
3518                 case TBLOCK_SUBABORT_END:
3519                 case TBLOCK_ABORT_PENDING:
3520                 case TBLOCK_SUBABORT_PENDING:
3521                 case TBLOCK_SUBRESTART:
3522                 case TBLOCK_SUBABORT_RESTART:
3523                 case TBLOCK_PREPARE:
3524                         elog(FATAL, "ReleaseSavepoint: unexpected state %s",
3525                                  BlockStateAsString(s->blockState));
3526                         break;
3527         }
3528
3529         foreach(cell, options)
3530         {
3531                 DefElem    *elem = lfirst(cell);
3532
3533                 if (strcmp(elem->defname, "savepoint_name") == 0)
3534                         name = strVal(elem->arg);
3535         }
3536
3537         Assert(PointerIsValid(name));
3538
3539         for (target = s; PointerIsValid(target); target = target->parent)
3540         {
3541                 if (PointerIsValid(target->name) && strcmp(target->name, name) == 0)
3542                         break;
3543         }
3544
3545         if (!PointerIsValid(target))
3546                 ereport(ERROR,
3547                                 (errcode(ERRCODE_S_E_INVALID_SPECIFICATION),
3548                                  errmsg("no such savepoint")));
3549
3550         /* disallow crossing savepoint level boundaries */
3551         if (target->savepointLevel != s->savepointLevel)
3552                 ereport(ERROR,
3553                                 (errcode(ERRCODE_S_E_INVALID_SPECIFICATION),
3554                                  errmsg("no such savepoint")));
3555
3556         /*
3557          * Mark "commit pending" all subtransactions up to the target
3558          * subtransaction.      The actual commits will happen when control gets to
3559          * CommitTransactionCommand.
3560          */
3561         xact = CurrentTransactionState;
3562         for (;;)
3563         {
3564                 Assert(xact->blockState == TBLOCK_SUBINPROGRESS);
3565                 xact->blockState = TBLOCK_SUBRELEASE;
3566                 if (xact == target)
3567                         break;
3568                 xact = xact->parent;
3569                 Assert(PointerIsValid(xact));
3570         }
3571 }
3572
3573 /*
3574  * RollbackToSavepoint
3575  *              This executes a ROLLBACK TO <savepoint> command.
3576  *
3577  * As above, we don't actually do anything here except change blockState.
3578  */
3579 void
3580 RollbackToSavepoint(List *options)
3581 {
3582         TransactionState s = CurrentTransactionState;
3583         TransactionState target,
3584                                 xact;
3585         ListCell   *cell;
3586         char       *name = NULL;
3587
3588         switch (s->blockState)
3589         {
3590                         /*
3591                          * We can't rollback to a savepoint if there is no savepoint
3592                          * defined.
3593                          */
3594                 case TBLOCK_INPROGRESS:
3595                 case TBLOCK_ABORT:
3596                         ereport(ERROR,
3597                                         (errcode(ERRCODE_S_E_INVALID_SPECIFICATION),
3598                                          errmsg("no such savepoint")));
3599                         break;
3600
3601                         /*
3602                          * There is at least one savepoint, so proceed.
3603                          */
3604                 case TBLOCK_SUBINPROGRESS:
3605                 case TBLOCK_SUBABORT:
3606                         break;
3607
3608                         /* These cases are invalid. */
3609                 case TBLOCK_DEFAULT:
3610                 case TBLOCK_STARTED:
3611                 case TBLOCK_BEGIN:
3612                 case TBLOCK_SUBBEGIN:
3613                 case TBLOCK_END:
3614                 case TBLOCK_SUBRELEASE:
3615                 case TBLOCK_SUBCOMMIT:
3616                 case TBLOCK_ABORT_END:
3617                 case TBLOCK_SUBABORT_END:
3618                 case TBLOCK_ABORT_PENDING:
3619                 case TBLOCK_SUBABORT_PENDING:
3620                 case TBLOCK_SUBRESTART:
3621                 case TBLOCK_SUBABORT_RESTART:
3622                 case TBLOCK_PREPARE:
3623                         elog(FATAL, "RollbackToSavepoint: unexpected state %s",
3624                                  BlockStateAsString(s->blockState));
3625                         break;
3626         }
3627
3628         foreach(cell, options)
3629         {
3630                 DefElem    *elem = lfirst(cell);
3631
3632                 if (strcmp(elem->defname, "savepoint_name") == 0)
3633                         name = strVal(elem->arg);
3634         }
3635
3636         Assert(PointerIsValid(name));
3637
3638         for (target = s; PointerIsValid(target); target = target->parent)
3639         {
3640                 if (PointerIsValid(target->name) && strcmp(target->name, name) == 0)
3641                         break;
3642         }
3643
3644         if (!PointerIsValid(target))
3645                 ereport(ERROR,
3646                                 (errcode(ERRCODE_S_E_INVALID_SPECIFICATION),
3647                                  errmsg("no such savepoint")));
3648
3649         /* disallow crossing savepoint level boundaries */
3650         if (target->savepointLevel != s->savepointLevel)
3651                 ereport(ERROR,
3652                                 (errcode(ERRCODE_S_E_INVALID_SPECIFICATION),
3653                                  errmsg("no such savepoint")));
3654
3655         /*
3656          * Mark "abort pending" all subtransactions up to the target
3657          * subtransaction.      The actual aborts will happen when control gets to
3658          * CommitTransactionCommand.
3659          */
3660         xact = CurrentTransactionState;
3661         for (;;)
3662         {
3663                 if (xact == target)
3664                         break;
3665                 if (xact->blockState == TBLOCK_SUBINPROGRESS)
3666                         xact->blockState = TBLOCK_SUBABORT_PENDING;
3667                 else if (xact->blockState == TBLOCK_SUBABORT)
3668                         xact->blockState = TBLOCK_SUBABORT_END;
3669                 else
3670                         elog(FATAL, "RollbackToSavepoint: unexpected state %s",
3671                                  BlockStateAsString(xact->blockState));
3672                 xact = xact->parent;
3673                 Assert(PointerIsValid(xact));
3674         }
3675
3676         /* And mark the target as "restart pending" */
3677         if (xact->blockState == TBLOCK_SUBINPROGRESS)
3678                 xact->blockState = TBLOCK_SUBRESTART;
3679         else if (xact->blockState == TBLOCK_SUBABORT)
3680                 xact->blockState = TBLOCK_SUBABORT_RESTART;
3681         else
3682                 elog(FATAL, "RollbackToSavepoint: unexpected state %s",
3683                          BlockStateAsString(xact->blockState));
3684 }
3685
3686 /*
3687  * BeginInternalSubTransaction
3688  *              This is the same as DefineSavepoint except it allows TBLOCK_STARTED,
3689  *              TBLOCK_END, and TBLOCK_PREPARE states, and therefore it can safely be
3690  *              used in functions that might be called when not inside a BEGIN block
3691  *              or when running deferred triggers at COMMIT/PREPARE time.  Also, it
3692  *              automatically does CommitTransactionCommand/StartTransactionCommand
3693  *              instead of expecting the caller to do it.
3694  */
3695 void
3696 BeginInternalSubTransaction(char *name)
3697 {
3698         TransactionState s = CurrentTransactionState;
3699
3700         switch (s->blockState)
3701         {
3702                 case TBLOCK_STARTED:
3703                 case TBLOCK_INPROGRESS:
3704                 case TBLOCK_END:
3705                 case TBLOCK_PREPARE:
3706                 case TBLOCK_SUBINPROGRESS:
3707                         /* Normal subtransaction start */
3708                         PushTransaction();
3709                         s = CurrentTransactionState;            /* changed by push */
3710
3711                         /*
3712                          * Savepoint names, like the TransactionState block itself, live
3713                          * in TopTransactionContext.
3714                          */
3715                         if (name)
3716                                 s->name = MemoryContextStrdup(TopTransactionContext, name);
3717                         break;
3718
3719                         /* These cases are invalid. */
3720                 case TBLOCK_DEFAULT:
3721                 case TBLOCK_BEGIN:
3722                 case TBLOCK_SUBBEGIN:
3723                 case TBLOCK_SUBRELEASE:
3724                 case TBLOCK_SUBCOMMIT:
3725                 case TBLOCK_ABORT:
3726                 case TBLOCK_SUBABORT:
3727                 case TBLOCK_ABORT_END:
3728                 case TBLOCK_SUBABORT_END:
3729                 case TBLOCK_ABORT_PENDING:
3730                 case TBLOCK_SUBABORT_PENDING:
3731                 case TBLOCK_SUBRESTART:
3732                 case TBLOCK_SUBABORT_RESTART:
3733                         elog(FATAL, "BeginInternalSubTransaction: unexpected state %s",
3734                                  BlockStateAsString(s->blockState));
3735                         break;
3736         }
3737
3738         CommitTransactionCommand();
3739         StartTransactionCommand();
3740 }
3741
3742 /*
3743  * ReleaseCurrentSubTransaction
3744  *
3745  * RELEASE (ie, commit) the innermost subtransaction, regardless of its
3746  * savepoint name (if any).
3747  * NB: do NOT use CommitTransactionCommand/StartTransactionCommand with this.
3748  */
3749 void
3750 ReleaseCurrentSubTransaction(void)
3751 {
3752         TransactionState s = CurrentTransactionState;
3753
3754         if (s->blockState != TBLOCK_SUBINPROGRESS)
3755                 elog(ERROR, "ReleaseCurrentSubTransaction: unexpected state %s",
3756                          BlockStateAsString(s->blockState));
3757         Assert(s->state == TRANS_INPROGRESS);
3758         MemoryContextSwitchTo(CurTransactionContext);
3759         CommitSubTransaction();
3760         s = CurrentTransactionState;    /* changed by pop */
3761         Assert(s->state == TRANS_INPROGRESS);
3762 }
3763
3764 /*
3765  * RollbackAndReleaseCurrentSubTransaction
3766  *
3767  * ROLLBACK and RELEASE (ie, abort) the innermost subtransaction, regardless
3768  * of its savepoint name (if any).
3769  * NB: do NOT use CommitTransactionCommand/StartTransactionCommand with this.
3770  */
3771 void
3772 RollbackAndReleaseCurrentSubTransaction(void)
3773 {
3774         TransactionState s = CurrentTransactionState;
3775
3776         switch (s->blockState)
3777         {
3778                         /* Must be in a subtransaction */
3779                 case TBLOCK_SUBINPROGRESS:
3780                 case TBLOCK_SUBABORT:
3781                         break;
3782
3783                         /* These cases are invalid. */
3784                 case TBLOCK_DEFAULT:
3785                 case TBLOCK_STARTED:
3786                 case TBLOCK_BEGIN:
3787                 case TBLOCK_SUBBEGIN:
3788                 case TBLOCK_INPROGRESS:
3789                 case TBLOCK_END:
3790                 case TBLOCK_SUBRELEASE:
3791                 case TBLOCK_SUBCOMMIT:
3792                 case TBLOCK_ABORT:
3793                 case TBLOCK_ABORT_END:
3794                 case TBLOCK_SUBABORT_END:
3795                 case TBLOCK_ABORT_PENDING:
3796                 case TBLOCK_SUBABORT_PENDING:
3797                 case TBLOCK_SUBRESTART:
3798                 case TBLOCK_SUBABORT_RESTART:
3799                 case TBLOCK_PREPARE:
3800                         elog(FATAL, "RollbackAndReleaseCurrentSubTransaction: unexpected state %s",
3801                                  BlockStateAsString(s->blockState));
3802                         break;
3803         }
3804
3805         /*
3806          * Abort the current subtransaction, if needed.
3807          */
3808         if (s->blockState == TBLOCK_SUBINPROGRESS)
3809                 AbortSubTransaction();
3810
3811         /* And clean it up, too */
3812         CleanupSubTransaction();
3813
3814         s = CurrentTransactionState;    /* changed by pop */
3815         AssertState(s->blockState == TBLOCK_SUBINPROGRESS ||
3816                                 s->blockState == TBLOCK_INPROGRESS ||
3817                                 s->blockState == TBLOCK_STARTED);
3818 }
3819
3820 /*
3821  *      AbortOutOfAnyTransaction
3822  *
3823  *      This routine is provided for error recovery purposes.  It aborts any
3824  *      active transaction or transaction block, leaving the system in a known
3825  *      idle state.
3826  */
3827 void
3828 AbortOutOfAnyTransaction(void)
3829 {
3830         TransactionState s = CurrentTransactionState;
3831
3832         /*
3833          * Get out of any transaction or nested transaction
3834          */
3835         do
3836         {
3837                 switch (s->blockState)
3838                 {
3839                         case TBLOCK_DEFAULT:
3840                                 /* Not in a transaction, do nothing */
3841                                 break;
3842                         case TBLOCK_STARTED:
3843                         case TBLOCK_BEGIN:
3844                         case TBLOCK_INPROGRESS:
3845                         case TBLOCK_END:
3846                         case TBLOCK_ABORT_PENDING:
3847                         case TBLOCK_PREPARE:
3848                                 /* In a transaction, so clean up */
3849                                 AbortTransaction();
3850                                 CleanupTransaction();
3851                                 s->blockState = TBLOCK_DEFAULT;
3852                                 break;
3853                         case TBLOCK_ABORT:
3854                         case TBLOCK_ABORT_END:
3855                                 /* AbortTransaction already done, still need Cleanup */
3856                                 CleanupTransaction();
3857                                 s->blockState = TBLOCK_DEFAULT;
3858                                 break;
3859
3860                                 /*
3861                                  * In a subtransaction, so clean it up and abort parent too
3862                                  */
3863                         case TBLOCK_SUBBEGIN:
3864                         case TBLOCK_SUBINPROGRESS:
3865                         case TBLOCK_SUBRELEASE:
3866                         case TBLOCK_SUBCOMMIT:
3867                         case TBLOCK_SUBABORT_PENDING:
3868                         case TBLOCK_SUBRESTART:
3869                                 AbortSubTransaction();
3870                                 CleanupSubTransaction();
3871                                 s = CurrentTransactionState;    /* changed by pop */
3872                                 break;
3873
3874                         case TBLOCK_SUBABORT:
3875                         case TBLOCK_SUBABORT_END:
3876                         case TBLOCK_SUBABORT_RESTART:
3877                                 /* As above, but AbortSubTransaction already done */
3878                                 CleanupSubTransaction();
3879                                 s = CurrentTransactionState;    /* changed by pop */
3880                                 break;
3881                 }
3882         } while (s->blockState != TBLOCK_DEFAULT);
3883
3884         /* Should be out of all subxacts now */
3885         Assert(s->parent == NULL);
3886 }
3887
3888 /*
3889  * IsTransactionBlock --- are we within a transaction block?
3890  */
3891 bool
3892 IsTransactionBlock(void)
3893 {
3894         TransactionState s = CurrentTransactionState;
3895
3896         if (s->blockState == TBLOCK_DEFAULT || s->blockState == TBLOCK_STARTED)
3897                 return false;
3898
3899         return true;
3900 }
3901
3902 /*
3903  * IsTransactionOrTransactionBlock --- are we within either a transaction
3904  * or a transaction block?      (The backend is only really "idle" when this
3905  * returns false.)
3906  *
3907  * This should match up with IsTransactionBlock and IsTransactionState.
3908  */
3909 bool
3910 IsTransactionOrTransactionBlock(void)
3911 {
3912         TransactionState s = CurrentTransactionState;
3913
3914         if (s->blockState == TBLOCK_DEFAULT)
3915                 return false;
3916
3917         return true;
3918 }
3919
3920 /*
3921  * TransactionBlockStatusCode - return status code to send in ReadyForQuery
3922  */
3923 char
3924 TransactionBlockStatusCode(void)
3925 {
3926         TransactionState s = CurrentTransactionState;
3927
3928         switch (s->blockState)
3929         {
3930                 case TBLOCK_DEFAULT:
3931                 case TBLOCK_STARTED:
3932                         return 'I';                     /* idle --- not in transaction */
3933                 case TBLOCK_BEGIN:
3934                 case TBLOCK_SUBBEGIN:
3935                 case TBLOCK_INPROGRESS:
3936                 case TBLOCK_SUBINPROGRESS:
3937                 case TBLOCK_END:
3938                 case TBLOCK_SUBRELEASE:
3939                 case TBLOCK_SUBCOMMIT:
3940                 case TBLOCK_PREPARE:
3941                         return 'T';                     /* in transaction */
3942                 case TBLOCK_ABORT:
3943                 case TBLOCK_SUBABORT:
3944                 case TBLOCK_ABORT_END:
3945                 case TBLOCK_SUBABORT_END:
3946                 case TBLOCK_ABORT_PENDING:
3947                 case TBLOCK_SUBABORT_PENDING:
3948                 case TBLOCK_SUBRESTART:
3949                 case TBLOCK_SUBABORT_RESTART:
3950                         return 'E';                     /* in failed transaction */
3951         }
3952
3953         /* should never get here */
3954         elog(FATAL, "invalid transaction block state: %s",
3955                  BlockStateAsString(s->blockState));
3956         return 0;                                       /* keep compiler quiet */
3957 }
3958
3959 /*
3960  * IsSubTransaction
3961  */
3962 bool
3963 IsSubTransaction(void)
3964 {
3965         TransactionState s = CurrentTransactionState;
3966
3967         if (s->nestingLevel >= 2)
3968                 return true;
3969
3970         return false;
3971 }
3972
3973 /*
3974  * StartSubTransaction
3975  *
3976  * If you're wondering why this is separate from PushTransaction: it's because
3977  * we can't conveniently do this stuff right inside DefineSavepoint.  The
3978  * SAVEPOINT utility command will be executed inside a Portal, and if we
3979  * muck with CurrentMemoryContext or CurrentResourceOwner then exit from
3980  * the Portal will undo those settings.  So we make DefineSavepoint just
3981  * push a dummy transaction block, and when control returns to the main
3982  * idle loop, CommitTransactionCommand will be called, and we'll come here
3983  * to finish starting the subtransaction.
3984  */
3985 static void
3986 StartSubTransaction(void)
3987 {
3988         TransactionState s = CurrentTransactionState;
3989
3990         if (s->state != TRANS_DEFAULT)
3991                 elog(WARNING, "StartSubTransaction while in %s state",
3992                          TransStateAsString(s->state));
3993
3994         s->state = TRANS_START;
3995
3996         /*
3997          * Initialize subsystems for new subtransaction
3998          *
3999          * must initialize resource-management stuff first
4000          */
4001         AtSubStart_Memory();
4002         AtSubStart_ResourceOwner();
4003         AtSubStart_Inval();
4004         AtSubStart_Notify();
4005         AfterTriggerBeginSubXact();
4006
4007         s->state = TRANS_INPROGRESS;
4008
4009         /*
4010          * Call start-of-subxact callbacks
4011          */
4012         CallSubXactCallbacks(SUBXACT_EVENT_START_SUB, s->subTransactionId,
4013                                                  s->parent->subTransactionId);
4014
4015         ShowTransactionState("StartSubTransaction");
4016 }
4017
4018 /*
4019  * CommitSubTransaction
4020  *
4021  *      The caller has to make sure to always reassign CurrentTransactionState
4022  *      if it has a local pointer to it after calling this function.
4023  */
4024 static void
4025 CommitSubTransaction(void)
4026 {
4027         TransactionState s = CurrentTransactionState;
4028
4029         ShowTransactionState("CommitSubTransaction");
4030
4031         if (s->state != TRANS_INPROGRESS)
4032                 elog(WARNING, "CommitSubTransaction while in %s state",
4033                          TransStateAsString(s->state));
4034
4035         /* Pre-commit processing goes here -- nothing to do at the moment */
4036
4037         s->state = TRANS_COMMIT;
4038
4039         /* Must CCI to ensure commands of subtransaction are seen as done */
4040         CommandCounterIncrement();
4041
4042         /*
4043          * Prior to 8.4 we marked subcommit in clog at this point.      We now only
4044          * perform that step, if required, as part of the atomic update of the
4045          * whole transaction tree at top level commit or abort.
4046          */
4047
4048         /* Post-commit cleanup */
4049         if (TransactionIdIsValid(s->transactionId))
4050                 AtSubCommit_childXids();
4051         AfterTriggerEndSubXact(true);
4052         AtSubCommit_Portals(s->subTransactionId,
4053                                                 s->parent->subTransactionId,
4054                                                 s->parent->curTransactionOwner);
4055         AtEOSubXact_LargeObject(true, s->subTransactionId,
4056                                                         s->parent->subTransactionId);
4057         AtSubCommit_Notify();
4058
4059         CallSubXactCallbacks(SUBXACT_EVENT_COMMIT_SUB, s->subTransactionId,
4060                                                  s->parent->subTransactionId);
4061
4062         ResourceOwnerRelease(s->curTransactionOwner,
4063                                                  RESOURCE_RELEASE_BEFORE_LOCKS,
4064                                                  true, false);
4065         AtEOSubXact_RelationCache(true, s->subTransactionId,
4066                                                           s->parent->subTransactionId);
4067         AtEOSubXact_Inval(true);
4068         AtSubCommit_smgr();
4069
4070         /*
4071          * The only lock we actually release here is the subtransaction XID lock.
4072          */
4073         CurrentResourceOwner = s->curTransactionOwner;
4074         if (TransactionIdIsValid(s->transactionId))
4075                 XactLockTableDelete(s->transactionId);
4076
4077         /*
4078          * Other locks should get transferred to their parent resource owner.
4079          */
4080         ResourceOwnerRelease(s->curTransactionOwner,
4081                                                  RESOURCE_RELEASE_LOCKS,
4082                                                  true, false);
4083         ResourceOwnerRelease(s->curTransactionOwner,
4084                                                  RESOURCE_RELEASE_AFTER_LOCKS,
4085                                                  true, false);
4086
4087         AtEOXact_GUC(true, s->gucNestLevel);
4088         AtEOSubXact_SPI(true, s->subTransactionId);
4089         AtEOSubXact_on_commit_actions(true, s->subTransactionId,
4090                                                                   s->parent->subTransactionId);
4091         AtEOSubXact_Namespace(true, s->subTransactionId,
4092                                                   s->parent->subTransactionId);
4093         AtEOSubXact_Files(true, s->subTransactionId,
4094                                           s->parent->subTransactionId);
4095         AtEOSubXact_HashTables(true, s->nestingLevel);
4096         AtEOSubXact_PgStat(true, s->nestingLevel);
4097         AtSubCommit_Snapshot(s->nestingLevel);
4098
4099         /*
4100          * We need to restore the upper transaction's read-only state, in case the
4101          * upper is read-write while the child is read-only; GUC will incorrectly
4102          * think it should leave the child state in place.
4103          */
4104         XactReadOnly = s->prevXactReadOnly;
4105
4106         CurrentResourceOwner = s->parent->curTransactionOwner;
4107         CurTransactionResourceOwner = s->parent->curTransactionOwner;
4108         ResourceOwnerDelete(s->curTransactionOwner);
4109         s->curTransactionOwner = NULL;
4110
4111         AtSubCommit_Memory();
4112
4113         s->state = TRANS_DEFAULT;
4114
4115         PopTransaction();
4116 }
4117
4118 /*
4119  * AbortSubTransaction
4120  */
4121 static void
4122 AbortSubTransaction(void)
4123 {
4124         TransactionState s = CurrentTransactionState;
4125
4126         /* Prevent cancel/die interrupt while cleaning up */
4127         HOLD_INTERRUPTS();
4128
4129         /* Make sure we have a valid memory context and resource owner */
4130         AtSubAbort_Memory();
4131         AtSubAbort_ResourceOwner();
4132
4133         /*
4134          * Release any LW locks we might be holding as quickly as possible.
4135          * (Regular locks, however, must be held till we finish aborting.)
4136          * Releasing LW locks is critical since we might try to grab them again
4137          * while cleaning up!
4138          *
4139          * FIXME This may be incorrect --- Are there some locks we should keep?
4140          * Buffer locks, for example?  I don't think so but I'm not sure.
4141          */
4142         LWLockReleaseAll();
4143
4144         AbortBufferIO();
4145         UnlockBuffers();
4146
4147         LockWaitCancel();
4148
4149         /*
4150          * check the current transaction state
4151          */
4152         ShowTransactionState("AbortSubTransaction");
4153
4154         if (s->state != TRANS_INPROGRESS)
4155                 elog(WARNING, "AbortSubTransaction while in %s state",
4156                          TransStateAsString(s->state));
4157
4158         s->state = TRANS_ABORT;
4159
4160         /*
4161          * Reset user ID which might have been changed transiently.  (See notes in
4162          * AbortTransaction.)
4163          */
4164         SetUserIdAndSecContext(s->prevUser, s->prevSecContext);
4165
4166         /*
4167          * We can skip all this stuff if the subxact failed before creating a
4168          * ResourceOwner...
4169          */
4170         if (s->curTransactionOwner)
4171         {
4172                 AfterTriggerEndSubXact(false);
4173                 AtSubAbort_Portals(s->subTransactionId,
4174                                                    s->parent->subTransactionId,
4175                                                    s->parent->curTransactionOwner);
4176                 AtEOSubXact_LargeObject(false, s->subTransactionId,
4177                                                                 s->parent->subTransactionId);
4178                 AtSubAbort_Notify();
4179
4180                 /* Advertise the fact that we aborted in pg_clog. */
4181                 (void) RecordTransactionAbort(true);
4182
4183                 /* Post-abort cleanup */
4184                 if (TransactionIdIsValid(s->transactionId))
4185                         AtSubAbort_childXids();
4186
4187                 CallSubXactCallbacks(SUBXACT_EVENT_ABORT_SUB, s->subTransactionId,
4188                                                          s->parent->subTransactionId);
4189
4190                 ResourceOwnerRelease(s->curTransactionOwner,
4191                                                          RESOURCE_RELEASE_BEFORE_LOCKS,
4192                                                          false, false);
4193                 AtEOSubXact_RelationCache(false, s->subTransactionId,
4194                                                                   s->parent->subTransactionId);
4195                 AtEOSubXact_Inval(false);
4196                 AtSubAbort_smgr();
4197                 ResourceOwnerRelease(s->curTransactionOwner,
4198                                                          RESOURCE_RELEASE_LOCKS,
4199                                                          false, false);
4200                 ResourceOwnerRelease(s->curTransactionOwner,
4201                                                          RESOURCE_RELEASE_AFTER_LOCKS,
4202                                                          false, false);
4203
4204                 AtEOXact_GUC(false, s->gucNestLevel);
4205                 AtEOSubXact_SPI(false, s->subTransactionId);
4206                 AtEOSubXact_on_commit_actions(false, s->subTransactionId,
4207                                                                           s->parent->subTransactionId);
4208                 AtEOSubXact_Namespace(false, s->subTransactionId,
4209                                                           s->parent->subTransactionId);
4210                 AtEOSubXact_Files(false, s->subTransactionId,
4211                                                   s->parent->subTransactionId);
4212                 AtEOSubXact_HashTables(false, s->nestingLevel);
4213                 AtEOSubXact_PgStat(false, s->nestingLevel);
4214                 AtSubAbort_Snapshot(s->nestingLevel);
4215         }
4216
4217         /*
4218          * Restore the upper transaction's read-only state, too.  This should be
4219          * redundant with GUC's cleanup but we may as well do it for consistency
4220          * with the commit case.
4221          */
4222         XactReadOnly = s->prevXactReadOnly;
4223
4224         RESUME_INTERRUPTS();
4225 }
4226
4227 /*
4228  * CleanupSubTransaction
4229  *
4230  *      The caller has to make sure to always reassign CurrentTransactionState
4231  *      if it has a local pointer to it after calling this function.
4232  */
4233 static void
4234 CleanupSubTransaction(void)
4235 {
4236         TransactionState s = CurrentTransactionState;
4237
4238         ShowTransactionState("CleanupSubTransaction");
4239
4240         if (s->state != TRANS_ABORT)
4241                 elog(WARNING, "CleanupSubTransaction while in %s state",
4242                          TransStateAsString(s->state));
4243
4244         AtSubCleanup_Portals(s->subTransactionId);
4245
4246         CurrentResourceOwner = s->parent->curTransactionOwner;
4247         CurTransactionResourceOwner = s->parent->curTransactionOwner;
4248         if (s->curTransactionOwner)
4249                 ResourceOwnerDelete(s->curTransactionOwner);
4250         s->curTransactionOwner = NULL;
4251
4252         AtSubCleanup_Memory();
4253
4254         s->state = TRANS_DEFAULT;
4255
4256         PopTransaction();
4257 }
4258
4259 /*
4260  * PushTransaction
4261  *              Create transaction state stack entry for a subtransaction
4262  *
4263  *      The caller has to make sure to always reassign CurrentTransactionState
4264  *      if it has a local pointer to it after calling this function.
4265  */
4266 static void
4267 PushTransaction(void)
4268 {
4269         TransactionState p = CurrentTransactionState;
4270         TransactionState s;
4271
4272         /*
4273          * We keep subtransaction state nodes in TopTransactionContext.
4274          */
4275         s = (TransactionState)
4276                 MemoryContextAllocZero(TopTransactionContext,
4277                                                            sizeof(TransactionStateData));
4278
4279         /*
4280          * Assign a subtransaction ID, watching out for counter wraparound.
4281          */
4282         currentSubTransactionId += 1;
4283         if (currentSubTransactionId == InvalidSubTransactionId)
4284         {
4285                 currentSubTransactionId -= 1;
4286                 pfree(s);
4287                 ereport(ERROR,
4288                                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
4289                                  errmsg("cannot have more than 2^32-1 subtransactions in a transaction")));
4290         }
4291
4292         /*
4293          * We can now stack a minimally valid subtransaction without fear of
4294          * failure.
4295          */
4296         s->transactionId = InvalidTransactionId;        /* until assigned */
4297         s->subTransactionId = currentSubTransactionId;
4298         s->parent = p;
4299         s->nestingLevel = p->nestingLevel + 1;
4300         s->gucNestLevel = NewGUCNestLevel();
4301         s->savepointLevel = p->savepointLevel;
4302         s->state = TRANS_DEFAULT;
4303         s->blockState = TBLOCK_SUBBEGIN;
4304         GetUserIdAndSecContext(&s->prevUser, &s->prevSecContext);
4305         s->prevXactReadOnly = XactReadOnly;
4306
4307         CurrentTransactionState = s;
4308
4309         /*
4310          * AbortSubTransaction and CleanupSubTransaction have to be able to cope
4311          * with the subtransaction from here on out; in particular they should not
4312          * assume that it necessarily has a transaction context, resource owner,
4313          * or XID.
4314          */
4315 }
4316
4317 /*
4318  * PopTransaction
4319  *              Pop back to parent transaction state
4320  *
4321  *      The caller has to make sure to always reassign CurrentTransactionState
4322  *      if it has a local pointer to it after calling this function.
4323  */
4324 static void
4325 PopTransaction(void)
4326 {
4327         TransactionState s = CurrentTransactionState;
4328
4329         if (s->state != TRANS_DEFAULT)
4330                 elog(WARNING, "PopTransaction while in %s state",
4331                          TransStateAsString(s->state));
4332
4333         if (s->parent == NULL)
4334                 elog(FATAL, "PopTransaction with no parent");
4335
4336         CurrentTransactionState = s->parent;
4337
4338         /* Let's just make sure CurTransactionContext is good */
4339         CurTransactionContext = s->parent->curTransactionContext;
4340         MemoryContextSwitchTo(CurTransactionContext);
4341
4342         /* Ditto for ResourceOwner links */
4343         CurTransactionResourceOwner = s->parent->curTransactionOwner;
4344         CurrentResourceOwner = s->parent->curTransactionOwner;
4345
4346         /* Free the old child structure */
4347         if (s->name)
4348                 pfree(s->name);
4349         pfree(s);
4350 }
4351
4352 /*
4353  * ShowTransactionState
4354  *              Debug support
4355  */
4356 static void
4357 ShowTransactionState(const char *str)
4358 {
4359         /* skip work if message will definitely not be printed */
4360         if (log_min_messages <= DEBUG3 || client_min_messages <= DEBUG3)
4361         {
4362                 elog(DEBUG3, "%s", str);
4363                 ShowTransactionStateRec(CurrentTransactionState);
4364         }
4365 }
4366
4367 /*
4368  * ShowTransactionStateRec
4369  *              Recursive subroutine for ShowTransactionState
4370  */
4371 static void
4372 ShowTransactionStateRec(TransactionState s)
4373 {
4374         StringInfoData buf;
4375
4376         initStringInfo(&buf);
4377
4378         if (s->nChildXids > 0)
4379         {
4380                 int                     i;
4381
4382                 appendStringInfo(&buf, "%u", s->childXids[0]);
4383                 for (i = 1; i < s->nChildXids; i++)
4384                         appendStringInfo(&buf, " %u", s->childXids[i]);
4385         }
4386
4387         if (s->parent)
4388                 ShowTransactionStateRec(s->parent);
4389
4390         /* use ereport to suppress computation if msg will not be printed */
4391         ereport(DEBUG3,
4392                         (errmsg_internal("name: %s; blockState: %13s; state: %7s, xid/subid/cid: %u/%u/%u%s, nestlvl: %d, children: %s",
4393                                                          PointerIsValid(s->name) ? s->name : "unnamed",
4394                                                          BlockStateAsString(s->blockState),
4395                                                          TransStateAsString(s->state),
4396                                                          (unsigned int) s->transactionId,
4397                                                          (unsigned int) s->subTransactionId,
4398                                                          (unsigned int) currentCommandId,
4399                                                          currentCommandIdUsed ? " (used)" : "",
4400                                                          s->nestingLevel, buf.data)));
4401
4402         pfree(buf.data);
4403 }
4404
4405 /*
4406  * BlockStateAsString
4407  *              Debug support
4408  */
4409 static const char *
4410 BlockStateAsString(TBlockState blockState)
4411 {
4412         switch (blockState)
4413         {
4414                 case TBLOCK_DEFAULT:
4415                         return "DEFAULT";
4416                 case TBLOCK_STARTED:
4417                         return "STARTED";
4418                 case TBLOCK_BEGIN:
4419                         return "BEGIN";
4420                 case TBLOCK_INPROGRESS:
4421                         return "INPROGRESS";
4422                 case TBLOCK_END:
4423                         return "END";
4424                 case TBLOCK_ABORT:
4425                         return "ABORT";
4426                 case TBLOCK_ABORT_END:
4427                         return "ABORT END";
4428                 case TBLOCK_ABORT_PENDING:
4429                         return "ABORT PEND";
4430                 case TBLOCK_PREPARE:
4431                         return "PREPARE";
4432                 case TBLOCK_SUBBEGIN:
4433                         return "SUB BEGIN";
4434                 case TBLOCK_SUBINPROGRESS:
4435                         return "SUB INPROGRS";
4436                 case TBLOCK_SUBRELEASE:
4437                         return "SUB RELEASE";
4438                 case TBLOCK_SUBCOMMIT:
4439                         return "SUB COMMIT";
4440                 case TBLOCK_SUBABORT:
4441                         return "SUB ABORT";
4442                 case TBLOCK_SUBABORT_END:
4443                         return "SUB ABORT END";
4444                 case TBLOCK_SUBABORT_PENDING:
4445                         return "SUB ABRT PEND";
4446                 case TBLOCK_SUBRESTART:
4447                         return "SUB RESTART";
4448                 case TBLOCK_SUBABORT_RESTART:
4449                         return "SUB AB RESTRT";
4450         }
4451         return "UNRECOGNIZED";
4452 }
4453
4454 /*
4455  * TransStateAsString
4456  *              Debug support
4457  */
4458 static const char *
4459 TransStateAsString(TransState state)
4460 {
4461         switch (state)
4462         {
4463                 case TRANS_DEFAULT:
4464                         return "DEFAULT";
4465                 case TRANS_START:
4466                         return "START";
4467                 case TRANS_INPROGRESS:
4468                         return "INPROGR";
4469                 case TRANS_COMMIT:
4470                         return "COMMIT";
4471                 case TRANS_ABORT:
4472                         return "ABORT";
4473                 case TRANS_PREPARE:
4474                         return "PREPARE";
4475         }
4476         return "UNRECOGNIZED";
4477 }
4478
4479 /*
4480  * xactGetCommittedChildren
4481  *
4482  * Gets the list of committed children of the current transaction.      The return
4483  * value is the number of child transactions.  *ptr is set to point to an
4484  * array of TransactionIds.  The array is allocated in TopTransactionContext;
4485  * the caller should *not* pfree() it (this is a change from pre-8.4 code!).
4486  * If there are no subxacts, *ptr is set to NULL.
4487  */
4488 int
4489 xactGetCommittedChildren(TransactionId **ptr)
4490 {
4491         TransactionState s = CurrentTransactionState;
4492
4493         if (s->nChildXids == 0)
4494                 *ptr = NULL;
4495         else
4496                 *ptr = s->childXids;
4497
4498         return s->nChildXids;
4499 }
4500
4501 /*
4502  *      XLOG support routines
4503  */
4504
4505 /*
4506  * Before 9.0 this was a fairly short function, but now it performs many
4507  * actions for which the order of execution is critical.
4508  */
4509 static void
4510 xact_redo_commit_internal(TransactionId xid, XLogRecPtr lsn,
4511                                         TransactionId *sub_xids, int nsubxacts,
4512                                         SharedInvalidationMessage *inval_msgs, int nmsgs,
4513                                         RelFileNode *xnodes, int nrels,
4514                                         Oid dbId, Oid tsId,
4515                                         uint32 xinfo)
4516 {
4517         TransactionId max_xid;
4518         int                     i;
4519
4520         max_xid = TransactionIdLatest(xid, nsubxacts, sub_xids);
4521
4522         /*
4523          * Make sure nextXid is beyond any XID mentioned in the record.
4524          *
4525          * We don't expect anyone else to modify nextXid, hence we don't need to
4526          * hold a lock while checking this. We still acquire the lock to modify
4527          * it, though.
4528          */
4529         if (TransactionIdFollowsOrEquals(max_xid,
4530                                                                          ShmemVariableCache->nextXid))
4531         {
4532                 LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
4533                 ShmemVariableCache->nextXid = max_xid;
4534                 TransactionIdAdvance(ShmemVariableCache->nextXid);
4535                 LWLockRelease(XidGenLock);
4536         }
4537
4538         if (standbyState == STANDBY_DISABLED)
4539         {
4540                 /*
4541                  * Mark the transaction committed in pg_clog.
4542                  */
4543                 TransactionIdCommitTree(xid, nsubxacts, sub_xids);
4544         }
4545         else
4546         {
4547                 /*
4548                  * If a transaction completion record arrives that has as-yet
4549                  * unobserved subtransactions then this will not have been fully
4550                  * handled by the call to RecordKnownAssignedTransactionIds() in the
4551                  * main recovery loop in xlog.c. So we need to do bookkeeping again to
4552                  * cover that case. This is confusing and it is easy to think this
4553                  * call is irrelevant, which has happened three times in development
4554                  * already. Leave it in.
4555                  */
4556                 RecordKnownAssignedTransactionIds(max_xid);
4557
4558                 /*
4559                  * Mark the transaction committed in pg_clog. We use async commit
4560                  * protocol during recovery to provide information on database
4561                  * consistency for when users try to set hint bits. It is important
4562                  * that we do not set hint bits until the minRecoveryPoint is past
4563                  * this commit record. This ensures that if we crash we don't see hint
4564                  * bits set on changes made by transactions that haven't yet
4565                  * recovered. It's unlikely but it's good to be safe.
4566                  */
4567                 TransactionIdAsyncCommitTree(xid, nsubxacts, sub_xids, lsn);
4568
4569                 /*
4570                  * We must mark clog before we update the ProcArray.
4571                  */
4572                 ExpireTreeKnownAssignedTransactionIds(xid, nsubxacts, sub_xids, max_xid);
4573
4574                 /*
4575                  * Send any cache invalidations attached to the commit. We must
4576                  * maintain the same order of invalidation then release locks as
4577                  * occurs in CommitTransaction().
4578                  */
4579                 ProcessCommittedInvalidationMessages(inval_msgs, nmsgs,
4580                                                                   XactCompletionRelcacheInitFileInval(xinfo),
4581                                                                                          dbId, tsId);
4582
4583                 /*
4584                  * Release locks, if any. We do this for both two phase and normal one
4585                  * phase transactions. In effect we are ignoring the prepare phase and
4586                  * just going straight to lock release.
4587                  */
4588                 StandbyReleaseLockTree(xid, nsubxacts, sub_xids);
4589         }
4590
4591         /* Make sure files supposed to be dropped are dropped */
4592         for (i = 0; i < nrels; i++)
4593         {
4594                 SMgrRelation srel = smgropen(xnodes[i], InvalidBackendId);
4595                 ForkNumber      fork;
4596
4597                 for (fork = 0; fork <= MAX_FORKNUM; fork++)
4598                 {
4599                         XLogDropRelation(xnodes[i], fork);
4600                         smgrdounlink(srel, fork, true);
4601                 }
4602                 smgrclose(srel);
4603         }
4604
4605         /*
4606          * We issue an XLogFlush() for the same reason we emit ForceSyncCommit()
4607          * in normal operation. For example, in DROP DATABASE, we delete all the
4608          * files belonging to the database, and then commit the transaction. If we
4609          * crash after all the files have been deleted but before the commit, you
4610          * have an entry in pg_database without any files. To minimize the window
4611          * for that, we use ForceSyncCommit() to rush the commit record to disk as
4612          * quick as possible. We have the same window during recovery, and forcing
4613          * an XLogFlush() (which updates minRecoveryPoint during recovery) helps
4614          * to reduce that problem window, for any user that requested
4615          * ForceSyncCommit().
4616          */
4617         if (XactCompletionForceSyncCommit(xinfo))
4618                 XLogFlush(lsn);
4619
4620 }
4621 /*
4622  * Utility function to call xact_redo_commit_internal after breaking down xlrec
4623  */
4624 static void
4625 xact_redo_commit(xl_xact_commit *xlrec,
4626                                                         TransactionId xid, XLogRecPtr lsn)
4627 {
4628         TransactionId *subxacts;
4629         SharedInvalidationMessage *inval_msgs;
4630
4631         /* subxid array follows relfilenodes */
4632         subxacts = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]);
4633         /* invalidation messages array follows subxids */
4634         inval_msgs = (SharedInvalidationMessage *) &(subxacts[xlrec->nsubxacts]);
4635
4636         xact_redo_commit_internal(xid, lsn, subxacts, xlrec->nsubxacts,
4637                                                                 inval_msgs, xlrec->nmsgs,
4638                                                                 xlrec->xnodes, xlrec->nrels,
4639                                                                 xlrec->dbId,
4640                                                                 xlrec->tsId,
4641                                                                 xlrec->xinfo);
4642 }
4643
4644 /*
4645  * Utility function to call xact_redo_commit_internal  for compact form of message.
4646  */
4647 static void
4648 xact_redo_commit_compact(xl_xact_commit_compact *xlrec,
4649                                                         TransactionId xid, XLogRecPtr lsn)
4650 {
4651         xact_redo_commit_internal(xid, lsn, xlrec->subxacts, xlrec->nsubxacts,
4652                                                                 NULL, 0,                /* inval msgs */
4653                                                                 NULL, 0,                /* relfilenodes */
4654                                                                 InvalidOid,             /* dbId */
4655                                                                 InvalidOid,             /* tsId */
4656                                                                 0);                             /* xinfo */
4657 }
4658
4659 /*
4660  * Be careful with the order of execution, as with xact_redo_commit().
4661  * The two functions are similar but differ in key places.
4662  *
4663  * Note also that an abort can be for a subtransaction and its children,
4664  * not just for a top level abort. That means we have to consider
4665  * topxid != xid, whereas in commit we would find topxid == xid always
4666  * because subtransaction commit is never WAL logged.
4667  */
4668 static void
4669 xact_redo_abort(xl_xact_abort *xlrec, TransactionId xid)
4670 {
4671         TransactionId *sub_xids;
4672         TransactionId max_xid;
4673         int                     i;
4674
4675         sub_xids = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]);
4676         max_xid = TransactionIdLatest(xid, xlrec->nsubxacts, sub_xids);
4677
4678         /*
4679          * Make sure nextXid is beyond any XID mentioned in the record.
4680          *
4681          * We don't expect anyone else to modify nextXid, hence we don't need to
4682          * hold a lock while checking this. We still acquire the lock to modify
4683          * it, though.
4684          */
4685         if (TransactionIdFollowsOrEquals(max_xid,
4686                                                                          ShmemVariableCache->nextXid))
4687         {
4688                 LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
4689                 ShmemVariableCache->nextXid = max_xid;
4690                 TransactionIdAdvance(ShmemVariableCache->nextXid);
4691                 LWLockRelease(XidGenLock);
4692         }
4693
4694         if (standbyState == STANDBY_DISABLED)
4695         {
4696                 /* Mark the transaction aborted in pg_clog, no need for async stuff */
4697                 TransactionIdAbortTree(xid, xlrec->nsubxacts, sub_xids);
4698         }
4699         else
4700         {
4701                 /*
4702                  * If a transaction completion record arrives that has as-yet
4703                  * unobserved subtransactions then this will not have been fully
4704                  * handled by the call to RecordKnownAssignedTransactionIds() in the
4705                  * main recovery loop in xlog.c. So we need to do bookkeeping again to
4706                  * cover that case. This is confusing and it is easy to think this
4707                  * call is irrelevant, which has happened three times in development
4708                  * already. Leave it in.
4709                  */
4710                 RecordKnownAssignedTransactionIds(max_xid);
4711
4712                 /* Mark the transaction aborted in pg_clog, no need for async stuff */
4713                 TransactionIdAbortTree(xid, xlrec->nsubxacts, sub_xids);
4714
4715                 /*
4716                  * We must update the ProcArray after we have marked clog.
4717                  */
4718                 ExpireTreeKnownAssignedTransactionIds(xid, xlrec->nsubxacts, sub_xids, max_xid);
4719
4720                 /*
4721                  * There are no flat files that need updating, nor invalidation
4722                  * messages to send or undo.
4723                  */
4724
4725                 /*
4726                  * Release locks, if any. There are no invalidations to send.
4727                  */
4728                 StandbyReleaseLockTree(xid, xlrec->nsubxacts, sub_xids);
4729         }
4730
4731         /* Make sure files supposed to be dropped are dropped */
4732         for (i = 0; i < xlrec->nrels; i++)
4733         {
4734                 SMgrRelation srel = smgropen(xlrec->xnodes[i], InvalidBackendId);
4735                 ForkNumber      fork;
4736
4737                 for (fork = 0; fork <= MAX_FORKNUM; fork++)
4738                 {
4739                         XLogDropRelation(xlrec->xnodes[i], fork);
4740                         smgrdounlink(srel, fork, true);
4741                 }
4742                 smgrclose(srel);
4743         }
4744 }
4745
4746 void
4747 xact_redo(XLogRecPtr lsn, XLogRecord *record)
4748 {
4749         uint8           info = record->xl_info & ~XLR_INFO_MASK;
4750
4751         /* Backup blocks are not used in xact records */
4752         Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
4753
4754         if (info == XLOG_XACT_COMMIT_COMPACT)
4755         {
4756                 xl_xact_commit_compact *xlrec = (xl_xact_commit_compact *) XLogRecGetData(record);
4757
4758                 xact_redo_commit_compact(xlrec, record->xl_xid, lsn);
4759         }
4760         else if (info == XLOG_XACT_COMMIT)
4761         {
4762                 xl_xact_commit *xlrec = (xl_xact_commit *) XLogRecGetData(record);
4763
4764                 xact_redo_commit(xlrec, record->xl_xid, lsn);
4765         }
4766         else if (info == XLOG_XACT_ABORT)
4767         {
4768                 xl_xact_abort *xlrec = (xl_xact_abort *) XLogRecGetData(record);
4769
4770                 xact_redo_abort(xlrec, record->xl_xid);
4771         }
4772         else if (info == XLOG_XACT_PREPARE)
4773         {
4774                 /* the record contents are exactly the 2PC file */
4775                 RecreateTwoPhaseFile(record->xl_xid,
4776                                                          XLogRecGetData(record), record->xl_len);
4777         }
4778         else if (info == XLOG_XACT_COMMIT_PREPARED)
4779         {
4780                 xl_xact_commit_prepared *xlrec = (xl_xact_commit_prepared *) XLogRecGetData(record);
4781
4782                 xact_redo_commit(&xlrec->crec, xlrec->xid, lsn);
4783                 RemoveTwoPhaseFile(xlrec->xid, false);
4784         }
4785         else if (info == XLOG_XACT_ABORT_PREPARED)
4786         {
4787                 xl_xact_abort_prepared *xlrec = (xl_xact_abort_prepared *) XLogRecGetData(record);
4788
4789                 xact_redo_abort(&xlrec->arec, xlrec->xid);
4790                 RemoveTwoPhaseFile(xlrec->xid, false);
4791         }
4792         else if (info == XLOG_XACT_ASSIGNMENT)
4793         {
4794                 xl_xact_assignment *xlrec = (xl_xact_assignment *) XLogRecGetData(record);
4795
4796                 if (standbyState >= STANDBY_INITIALIZED)
4797                         ProcArrayApplyXidAssignment(xlrec->xtop,
4798                                                                                 xlrec->nsubxacts, xlrec->xsub);
4799         }
4800         else
4801                 elog(PANIC, "xact_redo: unknown op code %u", info);
4802 }
4803
4804 static void
4805 xact_desc_commit(StringInfo buf, xl_xact_commit *xlrec)
4806 {
4807         int                     i;
4808         TransactionId *subxacts;
4809
4810         subxacts = (TransactionId *) &xlrec->xnodes[xlrec->nrels];
4811
4812         appendStringInfoString(buf, timestamptz_to_str(xlrec->xact_time));
4813
4814         if (xlrec->nrels > 0)
4815         {
4816                 appendStringInfo(buf, "; rels:");
4817                 for (i = 0; i < xlrec->nrels; i++)
4818                 {
4819                         char       *path = relpathperm(xlrec->xnodes[i], MAIN_FORKNUM);
4820
4821                         appendStringInfo(buf, " %s", path);
4822                         pfree(path);
4823                 }
4824         }
4825         if (xlrec->nsubxacts > 0)
4826         {
4827                 appendStringInfo(buf, "; subxacts:");
4828                 for (i = 0; i < xlrec->nsubxacts; i++)
4829                         appendStringInfo(buf, " %u", subxacts[i]);
4830         }
4831         if (xlrec->nmsgs > 0)
4832         {
4833                 SharedInvalidationMessage *msgs;
4834
4835                 msgs = (SharedInvalidationMessage *) &subxacts[xlrec->nsubxacts];
4836
4837                 if (XactCompletionRelcacheInitFileInval(xlrec->xinfo))
4838                         appendStringInfo(buf, "; relcache init file inval dbid %u tsid %u",
4839                                                          xlrec->dbId, xlrec->tsId);
4840
4841                 appendStringInfo(buf, "; inval msgs:");
4842                 for (i = 0; i < xlrec->nmsgs; i++)
4843                 {
4844                         SharedInvalidationMessage *msg = &msgs[i];
4845
4846                         if (msg->id >= 0)
4847                                 appendStringInfo(buf, " catcache %d", msg->id);
4848                         else if (msg->id == SHAREDINVALCATALOG_ID)
4849                                 appendStringInfo(buf, " catalog %u", msg->cat.catId);
4850                         else if (msg->id == SHAREDINVALRELCACHE_ID)
4851                                 appendStringInfo(buf, " relcache %u", msg->rc.relId);
4852                         /* remaining cases not expected, but print something anyway */
4853                         else if (msg->id == SHAREDINVALSMGR_ID)
4854                                 appendStringInfo(buf, " smgr");
4855                         else if (msg->id == SHAREDINVALRELMAP_ID)
4856                                 appendStringInfo(buf, " relmap");
4857                         else
4858                                 appendStringInfo(buf, " unknown id %d", msg->id);
4859                 }
4860         }
4861 }
4862
4863 static void
4864 xact_desc_commit_compact(StringInfo buf, xl_xact_commit_compact *xlrec)
4865 {
4866         int                     i;
4867
4868         appendStringInfoString(buf, timestamptz_to_str(xlrec->xact_time));
4869
4870         if (xlrec->nsubxacts > 0)
4871         {
4872                 appendStringInfo(buf, "; subxacts:");
4873                 for (i = 0; i < xlrec->nsubxacts; i++)
4874                         appendStringInfo(buf, " %u", xlrec->subxacts[i]);
4875         }
4876 }
4877
4878 static void
4879 xact_desc_abort(StringInfo buf, xl_xact_abort *xlrec)
4880 {
4881         int                     i;
4882
4883         appendStringInfoString(buf, timestamptz_to_str(xlrec->xact_time));
4884         if (xlrec->nrels > 0)
4885         {
4886                 appendStringInfo(buf, "; rels:");
4887                 for (i = 0; i < xlrec->nrels; i++)
4888                 {
4889                         char       *path = relpathperm(xlrec->xnodes[i], MAIN_FORKNUM);
4890
4891                         appendStringInfo(buf, " %s", path);
4892                         pfree(path);
4893                 }
4894         }
4895         if (xlrec->nsubxacts > 0)
4896         {
4897                 TransactionId *xacts = (TransactionId *)
4898                 &xlrec->xnodes[xlrec->nrels];
4899
4900                 appendStringInfo(buf, "; subxacts:");
4901                 for (i = 0; i < xlrec->nsubxacts; i++)
4902                         appendStringInfo(buf, " %u", xacts[i]);
4903         }
4904 }
4905
4906 static void
4907 xact_desc_assignment(StringInfo buf, xl_xact_assignment *xlrec)
4908 {
4909         int                     i;
4910
4911         appendStringInfo(buf, "subxacts:");
4912
4913         for (i = 0; i < xlrec->nsubxacts; i++)
4914                 appendStringInfo(buf, " %u", xlrec->xsub[i]);
4915 }
4916
4917 void
4918 xact_desc(StringInfo buf, uint8 xl_info, char *rec)
4919 {
4920         uint8           info = xl_info & ~XLR_INFO_MASK;
4921
4922         if (info == XLOG_XACT_COMMIT_COMPACT)
4923         {
4924                 xl_xact_commit_compact *xlrec = (xl_xact_commit_compact *) rec;
4925
4926                 appendStringInfo(buf, "commit: ");
4927                 xact_desc_commit_compact(buf, xlrec);
4928         }
4929         else if (info == XLOG_XACT_COMMIT)
4930         {
4931                 xl_xact_commit *xlrec = (xl_xact_commit *) rec;
4932
4933                 appendStringInfo(buf, "commit: ");
4934                 xact_desc_commit(buf, xlrec);
4935         }
4936         else if (info == XLOG_XACT_ABORT)
4937         {
4938                 xl_xact_abort *xlrec = (xl_xact_abort *) rec;
4939
4940                 appendStringInfo(buf, "abort: ");
4941                 xact_desc_abort(buf, xlrec);
4942         }
4943         else if (info == XLOG_XACT_PREPARE)
4944         {
4945                 appendStringInfo(buf, "prepare");
4946         }
4947         else if (info == XLOG_XACT_COMMIT_PREPARED)
4948         {
4949                 xl_xact_commit_prepared *xlrec = (xl_xact_commit_prepared *) rec;
4950
4951                 appendStringInfo(buf, "commit prepared %u: ", xlrec->xid);
4952                 xact_desc_commit(buf, &xlrec->crec);
4953         }
4954         else if (info == XLOG_XACT_ABORT_PREPARED)
4955         {
4956                 xl_xact_abort_prepared *xlrec = (xl_xact_abort_prepared *) rec;
4957
4958                 appendStringInfo(buf, "abort prepared %u: ", xlrec->xid);
4959                 xact_desc_abort(buf, &xlrec->arec);
4960         }
4961         else if (info == XLOG_XACT_ASSIGNMENT)
4962         {
4963                 xl_xact_assignment *xlrec = (xl_xact_assignment *) rec;
4964
4965                 /*
4966                  * Note that we ignore the WAL record's xid, since we're more
4967                  * interested in the top-level xid that issued the record and which
4968                  * xids are being reported here.
4969                  */
4970                 appendStringInfo(buf, "xid assignment xtop %u: ", xlrec->xtop);
4971                 xact_desc_assignment(buf, xlrec);
4972         }
4973         else
4974                 appendStringInfo(buf, "UNKNOWN");
4975 }