granicus.if.org Git - postgresql/blob - src/backend/access/transam/xact.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * xact.c
   4  *        top level transaction system support routines
   5  *
   6  * See src/backend/access/transam/README for more information.
   7  *
   8  * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
   9  * Portions Copyright (c) 1994, Regents of the University of California
  10  *
  11  *
  12  * IDENTIFICATION
  13  *        src/backend/access/transam/xact.c
  14  *
  15  *-------------------------------------------------------------------------
  16  */
  17
  18 #include "postgres.h"
  19
  20 #include <time.h>
  21 #include <unistd.h>
  22
  23 #include "access/multixact.h"
  24 #include "access/subtrans.h"
  25 #include "access/transam.h"
  26 #include "access/twophase.h"
  27 #include "access/xact.h"
  28 #include "access/xlogutils.h"
  29 #include "catalog/catalog.h"
  30 #include "catalog/namespace.h"
  31 #include "catalog/storage.h"
  32 #include "commands/async.h"
  33 #include "commands/tablecmds.h"
  34 #include "commands/trigger.h"
  35 #include "executor/spi.h"
  36 #include "libpq/be-fsstubs.h"
  37 #include "miscadmin.h"
  38 #include "pgstat.h"
  39 #include "replication/walsender.h"
  40 #include "replication/syncrep.h"
  41 #include "storage/lmgr.h"
  42 #include "storage/predicate.h"
  43 #include "storage/procarray.h"
  44 #include "storage/sinvaladt.h"
  45 #include "storage/smgr.h"
  46 #include "utils/combocid.h"
  47 #include "utils/guc.h"
  48 #include "utils/inval.h"
  49 #include "utils/memutils.h"
  50 #include "utils/relmapper.h"
  51 #include "utils/snapmgr.h"
  52 #include "pg_trace.h"
  53
  54
  55 /*
  56  *      User-tweakable parameters
  57  */
  58 int                     DefaultXactIsoLevel = XACT_READ_COMMITTED;
  59 int                     XactIsoLevel;
  60
  61 bool            DefaultXactReadOnly = false;
  62 bool            XactReadOnly;
  63
  64 bool            DefaultXactDeferrable = false;
  65 bool            XactDeferrable;
  66
  67 int                     synchronous_commit = SYNCHRONOUS_COMMIT_ON;
  68
  69 int                     CommitDelay = 0;        /* precommit delay in microseconds */
  70 int                     CommitSiblings = 5; /* # concurrent xacts needed to sleep */
  71
  72 /*
  73  * MyXactAccessedTempRel is set when a temporary relation is accessed.
  74  * We don't allow PREPARE TRANSACTION in that case.  (This is global
  75  * so that it can be set from heapam.c.)
  76  */
  77 bool            MyXactAccessedTempRel = false;
  78
  79
  80 /*
  81  *      transaction states - transaction state from server perspective
  82  */
  83 typedef enum TransState
  84 {
  85         TRANS_DEFAULT,                          /* idle */
  86         TRANS_START,                            /* transaction starting */
  87         TRANS_INPROGRESS,                       /* inside a valid transaction */
  88         TRANS_COMMIT,                           /* commit in progress */
  89         TRANS_ABORT,                            /* abort in progress */
  90         TRANS_PREPARE                           /* prepare in progress */
  91 } TransState;
  92
  93 /*
  94  *      transaction block states - transaction state of client queries
  95  *
  96  * Note: the subtransaction states are used only for non-topmost
  97  * transactions; the others appear only in the topmost transaction.
  98  */
  99 typedef enum TBlockState
 100 {
 101         /* not-in-transaction-block states */
 102         TBLOCK_DEFAULT,                         /* idle */
 103         TBLOCK_STARTED,                         /* running single-query transaction */
 104
 105         /* transaction block states */
 106         TBLOCK_BEGIN,                           /* starting transaction block */
 107         TBLOCK_INPROGRESS,                      /* live transaction */
 108         TBLOCK_END,                                     /* COMMIT received */
 109         TBLOCK_ABORT,                           /* failed xact, awaiting ROLLBACK */
 110         TBLOCK_ABORT_END,                       /* failed xact, ROLLBACK received */
 111         TBLOCK_ABORT_PENDING,           /* live xact, ROLLBACK received */
 112         TBLOCK_PREPARE,                         /* live xact, PREPARE received */
 113
 114         /* subtransaction states */
 115         TBLOCK_SUBBEGIN,                        /* starting a subtransaction */
 116         TBLOCK_SUBINPROGRESS,           /* live subtransaction */
 117         TBLOCK_SUBRELEASE,                      /* RELEASE received */
 118         TBLOCK_SUBCOMMIT,                       /* COMMIT received while TBLOCK_SUBINPROGRESS */
 119         TBLOCK_SUBABORT,                        /* failed subxact, awaiting ROLLBACK */
 120         TBLOCK_SUBABORT_END,            /* failed subxact, ROLLBACK received */
 121         TBLOCK_SUBABORT_PENDING,        /* live subxact, ROLLBACK received */
 122         TBLOCK_SUBRESTART,                      /* live subxact, ROLLBACK TO received */
 123         TBLOCK_SUBABORT_RESTART         /* failed subxact, ROLLBACK TO received */
 124 } TBlockState;
 125
 126 /*
 127  *      transaction state structure
 128  */
 129 typedef struct TransactionStateData
 130 {
 131         TransactionId transactionId;    /* my XID, or Invalid if none */
 132         SubTransactionId subTransactionId;      /* my subxact ID */
 133         char       *name;                       /* savepoint name, if any */
 134         int                     savepointLevel; /* savepoint level */
 135         TransState      state;                  /* low-level state */
 136         TBlockState blockState;         /* high-level state */
 137         int                     nestingLevel;   /* transaction nesting depth */
 138         int                     gucNestLevel;   /* GUC context nesting depth */
 139         MemoryContext curTransactionContext;            /* my xact-lifetime context */
 140         ResourceOwner curTransactionOwner;      /* my query resources */
 141         TransactionId *childXids;       /* subcommitted child XIDs, in XID order */
 142         int                     nChildXids;             /* # of subcommitted child XIDs */
 143         int                     maxChildXids;   /* allocated size of childXids[] */
 144         Oid                     prevUser;               /* previous CurrentUserId setting */
 145         int                     prevSecContext; /* previous SecurityRestrictionContext */
 146         bool            prevXactReadOnly;               /* entry-time xact r/o state */
 147         bool            startedInRecovery;              /* did we start in recovery? */
 148         struct TransactionStateData *parent;            /* back link to parent */
 149 } TransactionStateData;
 150
 151 typedef TransactionStateData *TransactionState;
 152
 153 /*
 154  * CurrentTransactionState always points to the current transaction state
 155  * block.  It will point to TopTransactionStateData when not in a
 156  * transaction at all, or when in a top-level transaction.
 157  */
 158 static TransactionStateData TopTransactionStateData = {
 159         0,                                                      /* transaction id */
 160         0,                                                      /* subtransaction id */
 161         NULL,                                           /* savepoint name */
 162         0,                                                      /* savepoint level */
 163         TRANS_DEFAULT,                          /* transaction state */
 164         TBLOCK_DEFAULT,                         /* transaction block state from the client
 165                                                                  * perspective */
 166         0,                                                      /* transaction nesting depth */
 167         0,                                                      /* GUC context nesting depth */
 168         NULL,                                           /* cur transaction context */
 169         NULL,                                           /* cur transaction resource owner */
 170         NULL,                                           /* subcommitted child Xids */
 171         0,                                                      /* # of subcommitted child Xids */
 172         0,                                                      /* allocated size of childXids[] */
 173         InvalidOid,                                     /* previous CurrentUserId setting */
 174         0,                                                      /* previous SecurityRestrictionContext */
 175         false,                                          /* entry-time xact r/o state */
 176         false,                                          /* startedInRecovery */
 177         NULL                                            /* link to parent state block */
 178 };
 179
 180 /*
 181  * unreportedXids holds XIDs of all subtransactions that have not yet been
 182  * reported in a XLOG_XACT_ASSIGNMENT record.
 183  */
 184 static int      nUnreportedXids;
 185 static TransactionId unreportedXids[PGPROC_MAX_CACHED_SUBXIDS];
 186
 187 static TransactionState CurrentTransactionState = &TopTransactionStateData;
 188
 189 /*
 190  * The subtransaction ID and command ID assignment counters are global
 191  * to a whole transaction, so we do not keep them in the state stack.
 192  */
 193 static SubTransactionId currentSubTransactionId;
 194 static CommandId currentCommandId;
 195 static bool currentCommandIdUsed;
 196
 197 /*
 198  * xactStartTimestamp is the value of transaction_timestamp().
 199  * stmtStartTimestamp is the value of statement_timestamp().
 200  * xactStopTimestamp is the time at which we log a commit or abort WAL record.
 201  * These do not change as we enter and exit subtransactions, so we don't
 202  * keep them inside the TransactionState stack.
 203  */
 204 static TimestampTz xactStartTimestamp;
 205 static TimestampTz stmtStartTimestamp;
 206 static TimestampTz xactStopTimestamp;
 207
 208 /*
 209  * GID to be used for preparing the current transaction.  This is also
 210  * global to a whole transaction, so we don't keep it in the state stack.
 211  */
 212 static char *prepareGID;
 213
 214 /*
 215  * Some commands want to force synchronous commit.
 216  */
 217 static bool forceSyncCommit = false;
 218
 219 /*
 220  * Private context for transaction-abort work --- we reserve space for this
 221  * at startup to ensure that AbortTransaction and AbortSubTransaction can work
 222  * when we've run out of memory.
 223  */
 224 static MemoryContext TransactionAbortContext = NULL;
 225
 226 /*
 227  * List of add-on start- and end-of-xact callbacks
 228  */
 229 typedef struct XactCallbackItem
 230 {
 231         struct XactCallbackItem *next;
 232         XactCallback callback;
 233         void       *arg;
 234 } XactCallbackItem;
 235
 236 static XactCallbackItem *Xact_callbacks = NULL;
 237
 238 /*
 239  * List of add-on start- and end-of-subxact callbacks
 240  */
 241 typedef struct SubXactCallbackItem
 242 {
 243         struct SubXactCallbackItem *next;
 244         SubXactCallback callback;
 245         void       *arg;
 246 } SubXactCallbackItem;
 247
 248 static SubXactCallbackItem *SubXact_callbacks = NULL;
 249
 250
 251 /* local function prototypes */
 252 static void AssignTransactionId(TransactionState s);
 253 static void AbortTransaction(void);
 254 static void AtAbort_Memory(void);
 255 static void AtCleanup_Memory(void);
 256 static void AtAbort_ResourceOwner(void);
 257 static void AtCCI_LocalCache(void);
 258 static void AtCommit_Memory(void);
 259 static void AtStart_Cache(void);
 260 static void AtStart_Memory(void);
 261 static void AtStart_ResourceOwner(void);
 262 static void CallXactCallbacks(XactEvent event);
 263 static void CallSubXactCallbacks(SubXactEvent event,
 264                                          SubTransactionId mySubid,
 265                                          SubTransactionId parentSubid);
 266 static void CleanupTransaction(void);
 267 static void CommitTransaction(void);
 268 static TransactionId RecordTransactionAbort(bool isSubXact);
 269 static void StartTransaction(void);
 270
 271 static void StartSubTransaction(void);
 272 static void CommitSubTransaction(void);
 273 static void AbortSubTransaction(void);
 274 static void CleanupSubTransaction(void);
 275 static void PushTransaction(void);
 276 static void PopTransaction(void);
 277
 278 static void AtSubAbort_Memory(void);
 279 static void AtSubCleanup_Memory(void);
 280 static void AtSubAbort_ResourceOwner(void);
 281 static void AtSubCommit_Memory(void);
 282 static void AtSubStart_Memory(void);
 283 static void AtSubStart_ResourceOwner(void);
 284
 285 static void ShowTransactionState(const char *str);
 286 static void ShowTransactionStateRec(TransactionState state);
 287 static const char *BlockStateAsString(TBlockState blockState);
 288 static const char *TransStateAsString(TransState state);
 289
 290
 291 /* ----------------------------------------------------------------
 292  *      transaction state accessors
 293  * ----------------------------------------------------------------
 294  */
 295
 296 /*
 297  *      IsTransactionState
 298  *
 299  *      This returns true if we are inside a valid transaction; that is,
 300  *      it is safe to initiate database access, take heavyweight locks, etc.
 301  */
 302 bool
 303 IsTransactionState(void)
 304 {
 305         TransactionState s = CurrentTransactionState;
 306
 307         /*
 308          * TRANS_DEFAULT and TRANS_ABORT are obviously unsafe states.  However, we
 309          * also reject the startup/shutdown states TRANS_START, TRANS_COMMIT,
 310          * TRANS_PREPARE since it might be too soon or too late within those
 311          * transition states to do anything interesting.  Hence, the only "valid"
 312          * state is TRANS_INPROGRESS.
 313          */
 314         return (s->state == TRANS_INPROGRESS);
 315 }
 316
 317 /*
 318  *      IsAbortedTransactionBlockState
 319  *
 320  *      This returns true if we are within an aborted transaction block.
 321  */
 322 bool
 323 IsAbortedTransactionBlockState(void)
 324 {
 325         TransactionState s = CurrentTransactionState;
 326
 327         if (s->blockState == TBLOCK_ABORT ||
 328                 s->blockState == TBLOCK_SUBABORT)
 329                 return true;
 330
 331         return false;
 332 }
 333
 334
 335 /*
 336  *      GetTopTransactionId
 337  *
 338  * This will return the XID of the main transaction, assigning one if
 339  * it's not yet set.  Be careful to call this only inside a valid xact.
 340  */
 341 TransactionId
 342 GetTopTransactionId(void)
 343 {
 344         if (!TransactionIdIsValid(TopTransactionStateData.transactionId))
 345                 AssignTransactionId(&TopTransactionStateData);
 346         return TopTransactionStateData.transactionId;
 347 }
 348
 349 /*
 350  *      GetTopTransactionIdIfAny
 351  *
 352  * This will return the XID of the main transaction, if one is assigned.
 353  * It will return InvalidTransactionId if we are not currently inside a
 354  * transaction, or inside a transaction that hasn't yet been assigned an XID.
 355  */
 356 TransactionId
 357 GetTopTransactionIdIfAny(void)
 358 {
 359         return TopTransactionStateData.transactionId;
 360 }
 361
 362 /*
 363  *      GetCurrentTransactionId
 364  *
 365  * This will return the XID of the current transaction (main or sub
 366  * transaction), assigning one if it's not yet set.  Be careful to call this
 367  * only inside a valid xact.
 368  */
 369 TransactionId
 370 GetCurrentTransactionId(void)
 371 {
 372         TransactionState s = CurrentTransactionState;
 373
 374         if (!TransactionIdIsValid(s->transactionId))
 375                 AssignTransactionId(s);
 376         return s->transactionId;
 377 }
 378
 379 /*
 380  *      GetCurrentTransactionIdIfAny
 381  *
 382  * This will return the XID of the current sub xact, if one is assigned.
 383  * It will return InvalidTransactionId if we are not currently inside a
 384  * transaction, or inside a transaction that hasn't been assigned an XID yet.
 385  */
 386 TransactionId
 387 GetCurrentTransactionIdIfAny(void)
 388 {
 389         return CurrentTransactionState->transactionId;
 390 }
 391
 392
 393 /*
 394  * AssignTransactionId
 395  *
 396  * Assigns a new permanent XID to the given TransactionState.
 397  * We do not assign XIDs to transactions until/unless this is called.
 398  * Also, any parent TransactionStates that don't yet have XIDs are assigned
 399  * one; this maintains the invariant that a child transaction has an XID
 400  * following its parent's.
 401  */
 402 static void
 403 AssignTransactionId(TransactionState s)
 404 {
 405         bool            isSubXact = (s->parent != NULL);
 406         ResourceOwner currentOwner;
 407
 408         /* Assert that caller didn't screw up */
 409         Assert(!TransactionIdIsValid(s->transactionId));
 410         Assert(s->state == TRANS_INPROGRESS);
 411
 412         /*
 413          * Ensure parent(s) have XIDs, so that a child always has an XID later
 414          * than its parent.  Musn't recurse here, or we might get a stack overflow
 415          * if we're at the bottom of a huge stack of subtransactions none of which
 416          * have XIDs yet.
 417          */
 418         if (isSubXact && !TransactionIdIsValid(s->parent->transactionId))
 419         {
 420                 TransactionState p = s->parent;
 421                 TransactionState *parents;
 422                 size_t          parentOffset = 0;
 423
 424                 parents = palloc(sizeof(TransactionState) * s->nestingLevel);
 425                 while (p != NULL && !TransactionIdIsValid(p->transactionId))
 426                 {
 427                         parents[parentOffset++] = p;
 428                         p = p->parent;
 429                 }
 430
 431                 /*
 432                  * This is technically a recursive call, but the recursion will never
 433                  * be more than one layer deep.
 434                  */
 435                 while (parentOffset != 0)
 436                         AssignTransactionId(parents[--parentOffset]);
 437
 438                 pfree(parents);
 439         }
 440
 441         /*
 442          * Generate a new Xid and record it in PG_PROC and pg_subtrans.
 443          *
 444          * NB: we must make the subtrans entry BEFORE the Xid appears anywhere in
 445          * shared storage other than PG_PROC; because if there's no room for it in
 446          * PG_PROC, the subtrans entry is needed to ensure that other backends see
 447          * the Xid as "running".  See GetNewTransactionId.
 448          */
 449         s->transactionId = GetNewTransactionId(isSubXact);
 450
 451         if (isSubXact)
 452                 SubTransSetParent(s->transactionId, s->parent->transactionId, false);
 453
 454         /*
 455          * If it's a top-level transaction, the predicate locking system needs to
 456          * be told about it too.
 457          */
 458         if (!isSubXact)
 459                 RegisterPredicateLockingXid(s->transactionId);
 460
 461         /*
 462          * Acquire lock on the transaction XID.  (We assume this cannot block.) We
 463          * have to ensure that the lock is assigned to the transaction's own
 464          * ResourceOwner.
 465          */
 466         currentOwner = CurrentResourceOwner;
 467         PG_TRY();
 468         {
 469                 CurrentResourceOwner = s->curTransactionOwner;
 470                 XactLockTableInsert(s->transactionId);
 471         }
 472         PG_CATCH();
 473         {
 474                 /* Ensure CurrentResourceOwner is restored on error */
 475                 CurrentResourceOwner = currentOwner;
 476                 PG_RE_THROW();
 477         }
 478         PG_END_TRY();
 479         CurrentResourceOwner = currentOwner;
 480
 481         /*
 482          * Every PGPROC_MAX_CACHED_SUBXIDS assigned transaction ids within each
 483          * top-level transaction we issue a WAL record for the assignment. We
 484          * include the top-level xid and all the subxids that have not yet been
 485          * reported using XLOG_XACT_ASSIGNMENT records.
 486          *
 487          * This is required to limit the amount of shared memory required in a hot
 488          * standby server to keep track of in-progress XIDs. See notes for
 489          * RecordKnownAssignedTransactionIds().
 490          *
 491          * We don't keep track of the immediate parent of each subxid, only the
 492          * top-level transaction that each subxact belongs to. This is correct in
 493          * recovery only because aborted subtransactions are separately WAL
 494          * logged.
 495          */
 496         if (isSubXact && XLogStandbyInfoActive())
 497         {
 498                 unreportedXids[nUnreportedXids] = s->transactionId;
 499                 nUnreportedXids++;
 500
 501                 /*
 502                  * ensure this test matches similar one in
 503                  * RecoverPreparedTransactions()
 504                  */
 505                 if (nUnreportedXids >= PGPROC_MAX_CACHED_SUBXIDS)
 506                 {
 507                         XLogRecData rdata[2];
 508                         xl_xact_assignment xlrec;
 509
 510                         /*
 511                          * xtop is always set by now because we recurse up transaction
 512                          * stack to the highest unassigned xid and then come back down
 513                          */
 514                         xlrec.xtop = GetTopTransactionId();
 515                         Assert(TransactionIdIsValid(xlrec.xtop));
 516                         xlrec.nsubxacts = nUnreportedXids;
 517
 518                         rdata[0].data = (char *) &xlrec;
 519                         rdata[0].len = MinSizeOfXactAssignment;
 520                         rdata[0].buffer = InvalidBuffer;
 521                         rdata[0].next = &rdata[1];
 522
 523                         rdata[1].data = (char *) unreportedXids;
 524                         rdata[1].len = PGPROC_MAX_CACHED_SUBXIDS * sizeof(TransactionId);
 525                         rdata[1].buffer = InvalidBuffer;
 526                         rdata[1].next = NULL;
 527
 528                         (void) XLogInsert(RM_XACT_ID, XLOG_XACT_ASSIGNMENT, rdata);
 529
 530                         nUnreportedXids = 0;
 531                 }
 532         }
 533 }
 534
 535 /*
 536  *      GetCurrentSubTransactionId
 537  */
 538 SubTransactionId
 539 GetCurrentSubTransactionId(void)
 540 {
 541         TransactionState s = CurrentTransactionState;
 542
 543         return s->subTransactionId;
 544 }
 545
 546
 547 /*
 548  *      GetCurrentCommandId
 549  *
 550  * "used" must be TRUE if the caller intends to use the command ID to mark
 551  * inserted/updated/deleted tuples.  FALSE means the ID is being fetched
 552  * for read-only purposes (ie, as a snapshot validity cutoff).  See
 553  * CommandCounterIncrement() for discussion.
 554  */
 555 CommandId
 556 GetCurrentCommandId(bool used)
 557 {
 558         /* this is global to a transaction, not subtransaction-local */
 559         if (used)
 560                 currentCommandIdUsed = true;
 561         return currentCommandId;
 562 }
 563
 564 /*
 565  *      GetCurrentTransactionStartTimestamp
 566  */
 567 TimestampTz
 568 GetCurrentTransactionStartTimestamp(void)
 569 {
 570         return xactStartTimestamp;
 571 }
 572
 573 /*
 574  *      GetCurrentStatementStartTimestamp
 575  */
 576 TimestampTz
 577 GetCurrentStatementStartTimestamp(void)
 578 {
 579         return stmtStartTimestamp;
 580 }
 581
 582 /*
 583  *      GetCurrentTransactionStopTimestamp
 584  *
 585  * We return current time if the transaction stop time hasn't been set
 586  * (which can happen if we decide we don't need to log an XLOG record).
 587  */
 588 TimestampTz
 589 GetCurrentTransactionStopTimestamp(void)
 590 {
 591         if (xactStopTimestamp != 0)
 592                 return xactStopTimestamp;
 593         return GetCurrentTimestamp();
 594 }
 595
 596 /*
 597  *      SetCurrentStatementStartTimestamp
 598  */
 599 void
 600 SetCurrentStatementStartTimestamp(void)
 601 {
 602         stmtStartTimestamp = GetCurrentTimestamp();
 603 }
 604
 605 /*
 606  *      SetCurrentTransactionStopTimestamp
 607  */
 608 static inline void
 609 SetCurrentTransactionStopTimestamp(void)
 610 {
 611         xactStopTimestamp = GetCurrentTimestamp();
 612 }
 613
 614 /*
 615  *      GetCurrentTransactionNestLevel
 616  *
 617  * Note: this will return zero when not inside any transaction, one when
 618  * inside a top-level transaction, etc.
 619  */
 620 int
 621 GetCurrentTransactionNestLevel(void)
 622 {
 623         TransactionState s = CurrentTransactionState;
 624
 625         return s->nestingLevel;
 626 }
 627
 628
 629 /*
 630  *      TransactionIdIsCurrentTransactionId
 631  */
 632 bool
 633 TransactionIdIsCurrentTransactionId(TransactionId xid)
 634 {
 635         TransactionState s;
 636
 637         /*
 638          * We always say that BootstrapTransactionId is "not my transaction ID"
 639          * even when it is (ie, during bootstrap).      Along with the fact that
 640          * transam.c always treats BootstrapTransactionId as already committed,
 641          * this causes the tqual.c routines to see all tuples as committed, which
 642          * is what we need during bootstrap.  (Bootstrap mode only inserts tuples,
 643          * it never updates or deletes them, so all tuples can be presumed good
 644          * immediately.)
 645          *
 646          * Likewise, InvalidTransactionId and FrozenTransactionId are certainly
 647          * not my transaction ID, so we can just return "false" immediately for
 648          * any non-normal XID.
 649          */
 650         if (!TransactionIdIsNormal(xid))
 651                 return false;
 652
 653         /*
 654          * We will return true for the Xid of the current subtransaction, any of
 655          * its subcommitted children, any of its parents, or any of their
 656          * previously subcommitted children.  However, a transaction being aborted
 657          * is no longer "current", even though it may still have an entry on the
 658          * state stack.
 659          */
 660         for (s = CurrentTransactionState; s != NULL; s = s->parent)
 661         {
 662                 int                     low,
 663                                         high;
 664
 665                 if (s->state == TRANS_ABORT)
 666                         continue;
 667                 if (!TransactionIdIsValid(s->transactionId))
 668                         continue;                       /* it can't have any child XIDs either */
 669                 if (TransactionIdEquals(xid, s->transactionId))
 670                         return true;
 671                 /* As the childXids array is ordered, we can use binary search */
 672                 low = 0;
 673                 high = s->nChildXids - 1;
 674                 while (low <= high)
 675                 {
 676                         int                     middle;
 677                         TransactionId probe;
 678
 679                         middle = low + (high - low) / 2;
 680                         probe = s->childXids[middle];
 681                         if (TransactionIdEquals(probe, xid))
 682                                 return true;
 683                         else if (TransactionIdPrecedes(probe, xid))
 684                                 low = middle + 1;
 685                         else
 686                                 high = middle - 1;
 687                 }
 688         }
 689
 690         return false;
 691 }
 692
 693 /*
 694  *      TransactionStartedDuringRecovery
 695  *
 696  * Returns true if the current transaction started while recovery was still
 697  * in progress. Recovery might have ended since so RecoveryInProgress() might
 698  * return false already.
 699  */
 700 bool
 701 TransactionStartedDuringRecovery(void)
 702 {
 703         return CurrentTransactionState->startedInRecovery;
 704 }
 705
 706 /*
 707  *      CommandCounterIncrement
 708  */
 709 void
 710 CommandCounterIncrement(void)
 711 {
 712         /*
 713          * If the current value of the command counter hasn't been "used" to mark
 714          * tuples, we need not increment it, since there's no need to distinguish
 715          * a read-only command from others.  This helps postpone command counter
 716          * overflow, and keeps no-op CommandCounterIncrement operations cheap.
 717          */
 718         if (currentCommandIdUsed)
 719         {
 720                 currentCommandId += 1;
 721                 if (currentCommandId == FirstCommandId) /* check for overflow */
 722                 {
 723                         currentCommandId -= 1;
 724                         ereport(ERROR,
 725                                         (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
 726                                          errmsg("cannot have more than 2^32-1 commands in a transaction")));
 727                 }
 728                 currentCommandIdUsed = false;
 729
 730                 /* Propagate new command ID into static snapshots */
 731                 SnapshotSetCommandId(currentCommandId);
 732
 733                 /*
 734                  * Make any catalog changes done by the just-completed command visible
 735                  * in the local syscache.  We obviously don't need to do this after a
 736                  * read-only command.  (But see hacks in inval.c to make real sure we
 737                  * don't think a command that queued inval messages was read-only.)
 738                  */
 739                 AtCCI_LocalCache();
 740         }
 741 }
 742
 743 /*
 744  * ForceSyncCommit
 745  *
 746  * Interface routine to allow commands to force a synchronous commit of the
 747  * current top-level transaction
 748  */
 749 void
 750 ForceSyncCommit(void)
 751 {
 752         forceSyncCommit = true;
 753 }
 754
 755
 756 /* ----------------------------------------------------------------
 757  *                                              StartTransaction stuff
 758  * ----------------------------------------------------------------
 759  */
 760
 761 /*
 762  *      AtStart_Cache
 763  */
 764 static void
 765 AtStart_Cache(void)
 766 {
 767         AcceptInvalidationMessages();
 768 }
 769
 770 /*
 771  *      AtStart_Memory
 772  */
 773 static void
 774 AtStart_Memory(void)
 775 {
 776         TransactionState s = CurrentTransactionState;
 777
 778         /*
 779          * If this is the first time through, create a private context for
 780          * AbortTransaction to work in.  By reserving some space now, we can
 781          * insulate AbortTransaction from out-of-memory scenarios.      Like
 782          * ErrorContext, we set it up with slow growth rate and a nonzero minimum
 783          * size, so that space will be reserved immediately.
 784          */
 785         if (TransactionAbortContext == NULL)
 786                 TransactionAbortContext =
 787                         AllocSetContextCreate(TopMemoryContext,
 788                                                                   "TransactionAbortContext",
 789                                                                   32 * 1024,
 790                                                                   32 * 1024,
 791                                                                   32 * 1024);
 792
 793         /*
 794          * We shouldn't have a transaction context already.
 795          */
 796         Assert(TopTransactionContext == NULL);
 797
 798         /*
 799          * Create a toplevel context for the transaction.
 800          */
 801         TopTransactionContext =
 802                 AllocSetContextCreate(TopMemoryContext,
 803                                                           "TopTransactionContext",
 804                                                           ALLOCSET_DEFAULT_MINSIZE,
 805                                                           ALLOCSET_DEFAULT_INITSIZE,
 806                                                           ALLOCSET_DEFAULT_MAXSIZE);
 807
 808         /*
 809          * In a top-level transaction, CurTransactionContext is the same as
 810          * TopTransactionContext.
 811          */
 812         CurTransactionContext = TopTransactionContext;
 813         s->curTransactionContext = CurTransactionContext;
 814
 815         /* Make the CurTransactionContext active. */
 816         MemoryContextSwitchTo(CurTransactionContext);
 817 }
 818
 819 /*
 820  *      AtStart_ResourceOwner
 821  */
 822 static void
 823 AtStart_ResourceOwner(void)
 824 {
 825         TransactionState s = CurrentTransactionState;
 826
 827         /*
 828          * We shouldn't have a transaction resource owner already.
 829          */
 830         Assert(TopTransactionResourceOwner == NULL);
 831
 832         /*
 833          * Create a toplevel resource owner for the transaction.
 834          */
 835         s->curTransactionOwner = ResourceOwnerCreate(NULL, "TopTransaction");
 836
 837         TopTransactionResourceOwner = s->curTransactionOwner;
 838         CurTransactionResourceOwner = s->curTransactionOwner;
 839         CurrentResourceOwner = s->curTransactionOwner;
 840 }
 841
 842 /* ----------------------------------------------------------------
 843  *                                              StartSubTransaction stuff
 844  * ----------------------------------------------------------------
 845  */
 846
 847 /*
 848  * AtSubStart_Memory
 849  */
 850 static void
 851 AtSubStart_Memory(void)
 852 {
 853         TransactionState s = CurrentTransactionState;
 854
 855         Assert(CurTransactionContext != NULL);
 856
 857         /*
 858          * Create a CurTransactionContext, which will be used to hold data that
 859          * survives subtransaction commit but disappears on subtransaction abort.
 860          * We make it a child of the immediate parent's CurTransactionContext.
 861          */
 862         CurTransactionContext = AllocSetContextCreate(CurTransactionContext,
 863                                                                                                   "CurTransactionContext",
 864                                                                                                   ALLOCSET_DEFAULT_MINSIZE,
 865                                                                                                   ALLOCSET_DEFAULT_INITSIZE,
 866                                                                                                   ALLOCSET_DEFAULT_MAXSIZE);
 867         s->curTransactionContext = CurTransactionContext;
 868
 869         /* Make the CurTransactionContext active. */
 870         MemoryContextSwitchTo(CurTransactionContext);
 871 }
 872
 873 /*
 874  * AtSubStart_ResourceOwner
 875  */
 876 static void
 877 AtSubStart_ResourceOwner(void)
 878 {
 879         TransactionState s = CurrentTransactionState;
 880
 881         Assert(s->parent != NULL);
 882
 883         /*
 884          * Create a resource owner for the subtransaction.      We make it a child of
 885          * the immediate parent's resource owner.
 886          */
 887         s->curTransactionOwner =
 888                 ResourceOwnerCreate(s->parent->curTransactionOwner,
 889                                                         "SubTransaction");
 890
 891         CurTransactionResourceOwner = s->curTransactionOwner;
 892         CurrentResourceOwner = s->curTransactionOwner;
 893 }
 894
 895 /* ----------------------------------------------------------------
 896  *                                              CommitTransaction stuff
 897  * ----------------------------------------------------------------
 898  */
 899
 900 /*
 901  *      RecordTransactionCommit
 902  *
 903  * Returns latest XID among xact and its children, or InvalidTransactionId
 904  * if the xact has no XID.      (We compute that here just because it's easier.)
 905  */
 906 static TransactionId
 907 RecordTransactionCommit(void)
 908 {
 909         TransactionId xid = GetTopTransactionIdIfAny();
 910         bool            markXidCommitted = TransactionIdIsValid(xid);
 911         TransactionId latestXid = InvalidTransactionId;
 912         int                     nrels;
 913         RelFileNode *rels;
 914         int                     nchildren;
 915         TransactionId *children;
 916         int                     nmsgs = 0;
 917         SharedInvalidationMessage *invalMessages = NULL;
 918         bool            RelcacheInitFileInval = false;
 919         bool            wrote_xlog;
 920
 921         /* Get data needed for commit record */
 922         nrels = smgrGetPendingDeletes(true, &rels);
 923         nchildren = xactGetCommittedChildren(&children);
 924         if (XLogStandbyInfoActive())
 925                 nmsgs = xactGetCommittedInvalidationMessages(&invalMessages,
 926                                                                                                          &RelcacheInitFileInval);
 927         wrote_xlog = (XactLastRecEnd.xrecoff != 0);
 928
 929         /*
 930          * If we haven't been assigned an XID yet, we neither can, nor do we want
 931          * to write a COMMIT record.
 932          */
 933         if (!markXidCommitted)
 934         {
 935                 /*
 936                  * We expect that every smgrscheduleunlink is followed by a catalog
 937                  * update, and hence XID assignment, so we shouldn't get here with any
 938                  * pending deletes.  Use a real test not just an Assert to check this,
 939                  * since it's a bit fragile.
 940                  */
 941                 if (nrels != 0)
 942                         elog(ERROR, "cannot commit a transaction that deleted files but has no xid");
 943
 944                 /* Can't have child XIDs either; AssignTransactionId enforces this */
 945                 Assert(nchildren == 0);
 946
 947                 /*
 948                  * If we didn't create XLOG entries, we're done here; otherwise we
 949                  * should flush those entries the same as a commit record.      (An
 950                  * example of a possible record that wouldn't cause an XID to be
 951                  * assigned is a sequence advance record due to nextval() --- we want
 952                  * to flush that to disk before reporting commit.)
 953                  */
 954                 if (!wrote_xlog)
 955                         goto cleanup;
 956         }
 957         else
 958         {
 959                 /*
 960                  * Begin commit critical section and insert the commit XLOG record.
 961                  */
 962                 /* Tell bufmgr and smgr to prepare for commit */
 963                 BufmgrCommit();
 964
 965                 /*
 966                  * Mark ourselves as within our "commit critical section".      This
 967                  * forces any concurrent checkpoint to wait until we've updated
 968                  * pg_clog.  Without this, it is possible for the checkpoint to set
 969                  * REDO after the XLOG record but fail to flush the pg_clog update to
 970                  * disk, leading to loss of the transaction commit if the system
 971                  * crashes a little later.
 972                  *
 973                  * Note: we could, but don't bother to, set this flag in
 974                  * RecordTransactionAbort.      That's because loss of a transaction abort
 975                  * is noncritical; the presumption would be that it aborted, anyway.
 976                  *
 977                  * It's safe to change the inCommit flag of our own backend without
 978                  * holding the ProcArrayLock, since we're the only one modifying it.
 979                  * This makes checkpoint's determination of which xacts are inCommit a
 980                  * bit fuzzy, but it doesn't matter.
 981                  */
 982                 START_CRIT_SECTION();
 983                 MyProc->inCommit = true;
 984
 985                 SetCurrentTransactionStopTimestamp();
 986
 987                 /*
 988                  * Do we need the long commit record? If not, use the compact format.
 989                  */
 990                 if (nrels > 0 || nmsgs > 0 || RelcacheInitFileInval || forceSyncCommit)
 991                 {
 992                         XLogRecData rdata[4];
 993                         int                     lastrdata = 0;
 994                         xl_xact_commit xlrec;
 995                         /*
 996                          * Set flags required for recovery processing of commits.
 997                          */
 998                         xlrec.xinfo = 0;
 999                         if (RelcacheInitFileInval)
1000                                 xlrec.xinfo |= XACT_COMPLETION_UPDATE_RELCACHE_FILE;
1001                         if (forceSyncCommit)
1002                                 xlrec.xinfo |= XACT_COMPLETION_FORCE_SYNC_COMMIT;
1003
1004                         xlrec.dbId = MyDatabaseId;
1005                         xlrec.tsId = MyDatabaseTableSpace;
1006
1007                         xlrec.xact_time = xactStopTimestamp;
1008                         xlrec.nrels = nrels;
1009                         xlrec.nsubxacts = nchildren;
1010                         xlrec.nmsgs = nmsgs;
1011                         rdata[0].data = (char *) (&xlrec);
1012                         rdata[0].len = MinSizeOfXactCommit;
1013                         rdata[0].buffer = InvalidBuffer;
1014                         /* dump rels to delete */
1015                         if (nrels > 0)
1016                         {
1017                                 rdata[0].next = &(rdata[1]);
1018                                 rdata[1].data = (char *) rels;
1019                                 rdata[1].len = nrels * sizeof(RelFileNode);
1020                                 rdata[1].buffer = InvalidBuffer;
1021                                 lastrdata = 1;
1022                         }
1023                         /* dump committed child Xids */
1024                         if (nchildren > 0)
1025                         {
1026                                 rdata[lastrdata].next = &(rdata[2]);
1027                                 rdata[2].data = (char *) children;
1028                                 rdata[2].len = nchildren * sizeof(TransactionId);
1029                                 rdata[2].buffer = InvalidBuffer;
1030                                 lastrdata = 2;
1031                         }
1032                         /* dump shared cache invalidation messages */
1033                         if (nmsgs > 0)
1034                         {
1035                                 rdata[lastrdata].next = &(rdata[3]);
1036                                 rdata[3].data = (char *) invalMessages;
1037                                 rdata[3].len = nmsgs * sizeof(SharedInvalidationMessage);
1038                                 rdata[3].buffer = InvalidBuffer;
1039                                 lastrdata = 3;
1040                         }
1041                         rdata[lastrdata].next = NULL;
1042
1043                         (void) XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT, rdata);
1044                 }
1045                 else
1046                 {
1047                         XLogRecData rdata[2];
1048                         int                     lastrdata = 0;
1049                         xl_xact_commit_compact  xlrec;
1050                         xlrec.xact_time = xactStopTimestamp;
1051                         xlrec.nsubxacts = nchildren;
1052                         rdata[0].data = (char *) (&xlrec);
1053                         rdata[0].len = MinSizeOfXactCommitCompact;
1054                         rdata[0].buffer = InvalidBuffer;
1055                         /* dump committed child Xids */
1056                         if (nchildren > 0)
1057                         {
1058                                 rdata[0].next = &(rdata[1]);
1059                                 rdata[1].data = (char *) children;
1060                                 rdata[1].len = nchildren * sizeof(TransactionId);
1061                                 rdata[1].buffer = InvalidBuffer;
1062                                 lastrdata = 1;
1063                         }
1064                         rdata[lastrdata].next = NULL;
1065
1066                         (void) XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT_COMPACT, rdata);
1067                 }
1068         }
1069
1070         /*
1071          * Check if we want to commit asynchronously.  We can allow the XLOG flush
1072          * to happen asynchronously if synchronous_commit=off, or if the current
1073          * transaction has not performed any WAL-logged operation.      The latter
1074          * case can arise if the current transaction wrote only to temporary
1075          * and/or unlogged tables.      In case of a crash, the loss of such a
1076          * transaction will be irrelevant since temp tables will be lost anyway,
1077          * and unlogged tables will be truncated.  (Given the foregoing, you might
1078          * think that it would be unnecessary to emit the XLOG record at all in
1079          * this case, but we don't currently try to do that.  It would certainly
1080          * cause problems at least in Hot Standby mode, where the
1081          * KnownAssignedXids machinery requires tracking every XID assignment.  It
1082          * might be OK to skip it only when wal_level < hot_standby, but for now
1083          * we don't.)
1084          *
1085          * However, if we're doing cleanup of any non-temp rels or committing any
1086          * command that wanted to force sync commit, then we must flush XLOG
1087          * immediately.  (We must not allow asynchronous commit if there are any
1088          * non-temp tables to be deleted, because we might delete the files before
1089          * the COMMIT record is flushed to disk.  We do allow asynchronous commit
1090          * if all to-be-deleted tables are temporary though, since they are lost
1091          * anyway if we crash.)
1092          */
1093         if ((wrote_xlog && synchronous_commit > SYNCHRONOUS_COMMIT_OFF) ||
1094                 forceSyncCommit || nrels > 0)
1095         {
1096                 /*
1097                  * Synchronous commit case:
1098                  *
1099                  * Sleep before flush! So we can flush more than one commit records
1100                  * per single fsync.  (The idea is some other backend may do the
1101                  * XLogFlush while we're sleeping.  This needs work still, because on
1102                  * most Unixen, the minimum select() delay is 10msec or more, which is
1103                  * way too long.)
1104                  *
1105                  * We do not sleep if enableFsync is not turned on, nor if there are
1106                  * fewer than CommitSiblings other backends with active transactions.
1107                  */
1108                 if (CommitDelay > 0 && enableFsync &&
1109                         MinimumActiveBackends(CommitSiblings))
1110                         pg_usleep(CommitDelay);
1111
1112                 XLogFlush(XactLastRecEnd);
1113
1114                 /*
1115                  * Wake up all walsenders to send WAL up to the COMMIT record
1116                  * immediately if replication is enabled
1117                  */
1118                 if (max_wal_senders > 0)
1119                         WalSndWakeup();
1120
1121                 /*
1122                  * Now we may update the CLOG, if we wrote a COMMIT record above
1123                  */
1124                 if (markXidCommitted)
1125                         TransactionIdCommitTree(xid, nchildren, children);
1126         }
1127         else
1128         {
1129                 /*
1130                  * Asynchronous commit case:
1131                  *
1132                  * This enables possible committed transaction loss in the case of a
1133                  * postmaster crash because WAL buffers are left unwritten. Ideally we
1134                  * could issue the WAL write without the fsync, but some
1135                  * wal_sync_methods do not allow separate write/fsync.
1136                  *
1137                  * Report the latest async commit LSN, so that the WAL writer knows to
1138                  * flush this commit.
1139                  */
1140                 XLogSetAsyncXactLSN(XactLastRecEnd);
1141
1142                 /*
1143                  * We must not immediately update the CLOG, since we didn't flush the
1144                  * XLOG. Instead, we store the LSN up to which the XLOG must be
1145                  * flushed before the CLOG may be updated.
1146                  */
1147                 if (markXidCommitted)
1148                         TransactionIdAsyncCommitTree(xid, nchildren, children, XactLastRecEnd);
1149         }
1150
1151         /*
1152          * If we entered a commit critical section, leave it now, and let
1153          * checkpoints proceed.
1154          */
1155         if (markXidCommitted)
1156         {
1157                 MyProc->inCommit = false;
1158                 END_CRIT_SECTION();
1159         }
1160
1161         /* Compute latestXid while we have the child XIDs handy */
1162         latestXid = TransactionIdLatest(xid, nchildren, children);
1163
1164         /*
1165          * Wait for synchronous replication, if required.
1166          *
1167          * Note that at this stage we have marked clog, but still show as running
1168          * in the procarray and continue to hold locks.
1169          */
1170         SyncRepWaitForLSN(XactLastRecEnd);
1171
1172         /* Reset XactLastRecEnd until the next transaction writes something */
1173         XactLastRecEnd.xrecoff = 0;
1174
1175 cleanup:
1176         /* Clean up local data */
1177         if (rels)
1178                 pfree(rels);
1179
1180         return latestXid;
1181 }
1182
1183
1184 /*
1185  *      AtCCI_LocalCache
1186  */
1187 static void
1188 AtCCI_LocalCache(void)
1189 {
1190         /*
1191          * Make any pending relation map changes visible.  We must do this before
1192          * processing local sinval messages, so that the map changes will get
1193          * reflected into the relcache when relcache invals are processed.
1194          */
1195         AtCCI_RelationMap();
1196
1197         /*
1198          * Make catalog changes visible to me for the next command.
1199          */
1200         CommandEndInvalidationMessages();
1201 }
1202
1203 /*
1204  *      AtCommit_Memory
1205  */
1206 static void
1207 AtCommit_Memory(void)
1208 {
1209         /*
1210          * Now that we're "out" of a transaction, have the system allocate things
1211          * in the top memory context instead of per-transaction contexts.
1212          */
1213         MemoryContextSwitchTo(TopMemoryContext);
1214
1215         /*
1216          * Release all transaction-local memory.
1217          */
1218         Assert(TopTransactionContext != NULL);
1219         MemoryContextDelete(TopTransactionContext);
1220         TopTransactionContext = NULL;
1221         CurTransactionContext = NULL;
1222         CurrentTransactionState->curTransactionContext = NULL;
1223 }
1224
1225 /* ----------------------------------------------------------------
1226  *                                              CommitSubTransaction stuff
1227  * ----------------------------------------------------------------
1228  */
1229
1230 /*
1231  * AtSubCommit_Memory
1232  */
1233 static void
1234 AtSubCommit_Memory(void)
1235 {
1236         TransactionState s = CurrentTransactionState;
1237
1238         Assert(s->parent != NULL);
1239
1240         /* Return to parent transaction level's memory context. */
1241         CurTransactionContext = s->parent->curTransactionContext;
1242         MemoryContextSwitchTo(CurTransactionContext);
1243
1244         /*
1245          * Ordinarily we cannot throw away the child's CurTransactionContext,
1246          * since the data it contains will be needed at upper commit.  However, if
1247          * there isn't actually anything in it, we can throw it away.  This avoids
1248          * a small memory leak in the common case of "trivial" subxacts.
1249          */
1250         if (MemoryContextIsEmpty(s->curTransactionContext))
1251         {
1252                 MemoryContextDelete(s->curTransactionContext);
1253                 s->curTransactionContext = NULL;
1254         }
1255 }
1256
1257 /*
1258  * AtSubCommit_childXids
1259  *
1260  * Pass my own XID and my child XIDs up to my parent as committed children.
1261  */
1262 static void
1263 AtSubCommit_childXids(void)
1264 {
1265         TransactionState s = CurrentTransactionState;
1266         int                     new_nChildXids;
1267
1268         Assert(s->parent != NULL);
1269
1270         /*
1271          * The parent childXids array will need to hold my XID and all my
1272          * childXids, in addition to the XIDs already there.
1273          */
1274         new_nChildXids = s->parent->nChildXids + s->nChildXids + 1;
1275
1276         /* Allocate or enlarge the parent array if necessary */
1277         if (s->parent->maxChildXids < new_nChildXids)
1278         {
1279                 int                     new_maxChildXids;
1280                 TransactionId *new_childXids;
1281
1282                 /*
1283                  * Make it 2x what's needed right now, to avoid having to enlarge it
1284                  * repeatedly. But we can't go above MaxAllocSize.  (The latter limit
1285                  * is what ensures that we don't need to worry about integer overflow
1286                  * here or in the calculation of new_nChildXids.)
1287                  */
1288                 new_maxChildXids = Min(new_nChildXids * 2,
1289                                                            (int) (MaxAllocSize / sizeof(TransactionId)));
1290
1291                 if (new_maxChildXids < new_nChildXids)
1292                         ereport(ERROR,
1293                                         (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1294                                          errmsg("maximum number of committed subtransactions (%d) exceeded",
1295                                                         (int) (MaxAllocSize / sizeof(TransactionId)))));
1296
1297                 /*
1298                  * We keep the child-XID arrays in TopTransactionContext; this avoids
1299                  * setting up child-transaction contexts for what might be just a few
1300                  * bytes of grandchild XIDs.
1301                  */
1302                 if (s->parent->childXids == NULL)
1303                         new_childXids =
1304                                 MemoryContextAlloc(TopTransactionContext,
1305                                                                    new_maxChildXids * sizeof(TransactionId));
1306                 else
1307                         new_childXids = repalloc(s->parent->childXids,
1308                                                                    new_maxChildXids * sizeof(TransactionId));
1309
1310                 s->parent->childXids = new_childXids;
1311                 s->parent->maxChildXids = new_maxChildXids;
1312         }
1313
1314         /*
1315          * Copy all my XIDs to parent's array.
1316          *
1317          * Note: We rely on the fact that the XID of a child always follows that
1318          * of its parent.  By copying the XID of this subtransaction before the
1319          * XIDs of its children, we ensure that the array stays ordered. Likewise,
1320          * all XIDs already in the array belong to subtransactions started and
1321          * subcommitted before us, so their XIDs must precede ours.
1322          */
1323         s->parent->childXids[s->parent->nChildXids] = s->transactionId;
1324
1325         if (s->nChildXids > 0)
1326                 memcpy(&s->parent->childXids[s->parent->nChildXids + 1],
1327                            s->childXids,
1328                            s->nChildXids * sizeof(TransactionId));
1329
1330         s->parent->nChildXids = new_nChildXids;
1331
1332         /* Release child's array to avoid leakage */
1333         if (s->childXids != NULL)
1334                 pfree(s->childXids);
1335         /* We must reset these to avoid double-free if fail later in commit */
1336         s->childXids = NULL;
1337         s->nChildXids = 0;
1338         s->maxChildXids = 0;
1339 }
1340
1341 /* ----------------------------------------------------------------
1342  *                                              AbortTransaction stuff
1343  * ----------------------------------------------------------------
1344  */
1345
1346 /*
1347  *      RecordTransactionAbort
1348  *
1349  * Returns latest XID among xact and its children, or InvalidTransactionId
1350  * if the xact has no XID.      (We compute that here just because it's easier.)
1351  */
1352 static TransactionId
1353 RecordTransactionAbort(bool isSubXact)
1354 {
1355         TransactionId xid = GetCurrentTransactionIdIfAny();
1356         TransactionId latestXid;
1357         int                     nrels;
1358         RelFileNode *rels;
1359         int                     nchildren;
1360         TransactionId *children;
1361         XLogRecData rdata[3];
1362         int                     lastrdata = 0;
1363         xl_xact_abort xlrec;
1364
1365         /*
1366          * If we haven't been assigned an XID, nobody will care whether we aborted
1367          * or not.      Hence, we're done in that case.  It does not matter if we have
1368          * rels to delete (note that this routine is not responsible for actually
1369          * deleting 'em).  We cannot have any child XIDs, either.
1370          */
1371         if (!TransactionIdIsValid(xid))
1372         {
1373                 /* Reset XactLastRecEnd until the next transaction writes something */
1374                 if (!isSubXact)
1375                         XactLastRecEnd.xrecoff = 0;
1376                 return InvalidTransactionId;
1377         }
1378
1379         /*
1380          * We have a valid XID, so we should write an ABORT record for it.
1381          *
1382          * We do not flush XLOG to disk here, since the default assumption after a
1383          * crash would be that we aborted, anyway.      For the same reason, we don't
1384          * need to worry about interlocking against checkpoint start.
1385          */
1386
1387         /*
1388          * Check that we haven't aborted halfway through RecordTransactionCommit.
1389          */
1390         if (TransactionIdDidCommit(xid))
1391                 elog(PANIC, "cannot abort transaction %u, it was already committed",
1392                          xid);
1393
1394         /* Fetch the data we need for the abort record */
1395         nrels = smgrGetPendingDeletes(false, &rels);
1396         nchildren = xactGetCommittedChildren(&children);
1397
1398         /* XXX do we really need a critical section here? */
1399         START_CRIT_SECTION();
1400
1401         /* Write the ABORT record */
1402         if (isSubXact)
1403                 xlrec.xact_time = GetCurrentTimestamp();
1404         else
1405         {
1406                 SetCurrentTransactionStopTimestamp();
1407                 xlrec.xact_time = xactStopTimestamp;
1408         }
1409         xlrec.nrels = nrels;
1410         xlrec.nsubxacts = nchildren;
1411         rdata[0].data = (char *) (&xlrec);
1412         rdata[0].len = MinSizeOfXactAbort;
1413         rdata[0].buffer = InvalidBuffer;
1414         /* dump rels to delete */
1415         if (nrels > 0)
1416         {
1417                 rdata[0].next = &(rdata[1]);
1418                 rdata[1].data = (char *) rels;
1419                 rdata[1].len = nrels * sizeof(RelFileNode);
1420                 rdata[1].buffer = InvalidBuffer;
1421                 lastrdata = 1;
1422         }
1423         /* dump committed child Xids */
1424         if (nchildren > 0)
1425         {
1426                 rdata[lastrdata].next = &(rdata[2]);
1427                 rdata[2].data = (char *) children;
1428                 rdata[2].len = nchildren * sizeof(TransactionId);
1429                 rdata[2].buffer = InvalidBuffer;
1430                 lastrdata = 2;
1431         }
1432         rdata[lastrdata].next = NULL;
1433
1434         (void) XLogInsert(RM_XACT_ID, XLOG_XACT_ABORT, rdata);
1435
1436         /*
1437          * Report the latest async abort LSN, so that the WAL writer knows to
1438          * flush this abort. There's nothing to be gained by delaying this, since
1439          * WALWriter may as well do this when it can. This is important with
1440          * streaming replication because if we don't flush WAL regularly we will
1441          * find that large aborts leave us with a long backlog for when commits
1442          * occur after the abort, increasing our window of data loss should
1443          * problems occur at that point.
1444          */
1445         if (!isSubXact)
1446                 XLogSetAsyncXactLSN(XactLastRecEnd);
1447
1448         /*
1449          * Mark the transaction aborted in clog.  This is not absolutely necessary
1450          * but we may as well do it while we are here; also, in the subxact case
1451          * it is helpful because XactLockTableWait makes use of it to avoid
1452          * waiting for already-aborted subtransactions.  It is OK to do it without
1453          * having flushed the ABORT record to disk, because in event of a crash
1454          * we'd be assumed to have aborted anyway.
1455          */
1456         TransactionIdAbortTree(xid, nchildren, children);
1457
1458         END_CRIT_SECTION();
1459
1460         /* Compute latestXid while we have the child XIDs handy */
1461         latestXid = TransactionIdLatest(xid, nchildren, children);
1462
1463         /*
1464          * If we're aborting a subtransaction, we can immediately remove failed
1465          * XIDs from PGPROC's cache of running child XIDs.  We do that here for
1466          * subxacts, because we already have the child XID array at hand.  For
1467          * main xacts, the equivalent happens just after this function returns.
1468          */
1469         if (isSubXact)
1470                 XidCacheRemoveRunningXids(xid, nchildren, children, latestXid);
1471
1472         /* Reset XactLastRecEnd until the next transaction writes something */
1473         if (!isSubXact)
1474                 XactLastRecEnd.xrecoff = 0;
1475
1476         /* And clean up local data */
1477         if (rels)
1478                 pfree(rels);
1479
1480         return latestXid;
1481 }
1482
1483 /*
1484  *      AtAbort_Memory
1485  */
1486 static void
1487 AtAbort_Memory(void)
1488 {
1489         /*
1490          * Switch into TransactionAbortContext, which should have some free space
1491          * even if nothing else does.  We'll work in this context until we've
1492          * finished cleaning up.
1493          *
1494          * It is barely possible to get here when we've not been able to create
1495          * TransactionAbortContext yet; if so use TopMemoryContext.
1496          */
1497         if (TransactionAbortContext != NULL)
1498                 MemoryContextSwitchTo(TransactionAbortContext);
1499         else
1500                 MemoryContextSwitchTo(TopMemoryContext);
1501 }
1502
1503 /*
1504  * AtSubAbort_Memory
1505  */
1506 static void
1507 AtSubAbort_Memory(void)
1508 {
1509         Assert(TransactionAbortContext != NULL);
1510
1511         MemoryContextSwitchTo(TransactionAbortContext);
1512 }
1513
1514
1515 /*
1516  *      AtAbort_ResourceOwner
1517  */
1518 static void
1519 AtAbort_ResourceOwner(void)
1520 {
1521         /*
1522          * Make sure we have a valid ResourceOwner, if possible (else it will be
1523          * NULL, which is OK)
1524          */
1525         CurrentResourceOwner = TopTransactionResourceOwner;
1526 }
1527
1528 /*
1529  * AtSubAbort_ResourceOwner
1530  */
1531 static void
1532 AtSubAbort_ResourceOwner(void)
1533 {
1534         TransactionState s = CurrentTransactionState;
1535
1536         /* Make sure we have a valid ResourceOwner */
1537         CurrentResourceOwner = s->curTransactionOwner;
1538 }
1539
1540
1541 /*
1542  * AtSubAbort_childXids
1543  */
1544 static void
1545 AtSubAbort_childXids(void)
1546 {
1547         TransactionState s = CurrentTransactionState;
1548
1549         /*
1550          * We keep the child-XID arrays in TopTransactionContext (see
1551          * AtSubCommit_childXids).      This means we'd better free the array
1552          * explicitly at abort to avoid leakage.
1553          */
1554         if (s->childXids != NULL)
1555                 pfree(s->childXids);
1556         s->childXids = NULL;
1557         s->nChildXids = 0;
1558         s->maxChildXids = 0;
1559
1560         /*
1561          * We could prune the unreportedXids array here. But we don't bother. That
1562          * would potentially reduce number of XLOG_XACT_ASSIGNMENT records but it
1563          * would likely introduce more CPU time into the more common paths, so we
1564          * choose not to do that.
1565          */
1566 }
1567
1568 /* ----------------------------------------------------------------
1569  *                                              CleanupTransaction stuff
1570  * ----------------------------------------------------------------
1571  */
1572
1573 /*
1574  *      AtCleanup_Memory
1575  */
1576 static void
1577 AtCleanup_Memory(void)
1578 {
1579         Assert(CurrentTransactionState->parent == NULL);
1580
1581         /*
1582          * Now that we're "out" of a transaction, have the system allocate things
1583          * in the top memory context instead of per-transaction contexts.
1584          */
1585         MemoryContextSwitchTo(TopMemoryContext);
1586
1587         /*
1588          * Clear the special abort context for next time.
1589          */
1590         if (TransactionAbortContext != NULL)
1591                 MemoryContextResetAndDeleteChildren(TransactionAbortContext);
1592
1593         /*
1594          * Release all transaction-local memory.
1595          */
1596         if (TopTransactionContext != NULL)
1597                 MemoryContextDelete(TopTransactionContext);
1598         TopTransactionContext = NULL;
1599         CurTransactionContext = NULL;
1600         CurrentTransactionState->curTransactionContext = NULL;
1601 }
1602
1603
1604 /* ----------------------------------------------------------------
1605  *                                              CleanupSubTransaction stuff
1606  * ----------------------------------------------------------------
1607  */
1608
1609 /*
1610  * AtSubCleanup_Memory
1611  */
1612 static void
1613 AtSubCleanup_Memory(void)
1614 {
1615         TransactionState s = CurrentTransactionState;
1616
1617         Assert(s->parent != NULL);
1618
1619         /* Make sure we're not in an about-to-be-deleted context */
1620         MemoryContextSwitchTo(s->parent->curTransactionContext);
1621         CurTransactionContext = s->parent->curTransactionContext;
1622
1623         /*
1624          * Clear the special abort context for next time.
1625          */
1626         if (TransactionAbortContext != NULL)
1627                 MemoryContextResetAndDeleteChildren(TransactionAbortContext);
1628
1629         /*
1630          * Delete the subxact local memory contexts. Its CurTransactionContext can
1631          * go too (note this also kills CurTransactionContexts from any children
1632          * of the subxact).
1633          */
1634         if (s->curTransactionContext)
1635                 MemoryContextDelete(s->curTransactionContext);
1636         s->curTransactionContext = NULL;
1637 }
1638
1639 /* ----------------------------------------------------------------
1640  *                                              interface routines
1641  * ----------------------------------------------------------------
1642  */
1643
1644 /*
1645  *      StartTransaction
1646  */
1647 static void
1648 StartTransaction(void)
1649 {
1650         TransactionState s;
1651         VirtualTransactionId vxid;
1652
1653         /*
1654          * Let's just make sure the state stack is empty
1655          */
1656         s = &TopTransactionStateData;
1657         CurrentTransactionState = s;
1658
1659         /*
1660          * check the current transaction state
1661          */
1662         if (s->state != TRANS_DEFAULT)
1663                 elog(WARNING, "StartTransaction while in %s state",
1664                          TransStateAsString(s->state));
1665
1666         /*
1667          * set the current transaction state information appropriately during
1668          * start processing
1669          */
1670         s->state = TRANS_START;
1671         s->transactionId = InvalidTransactionId;        /* until assigned */
1672
1673         /*
1674          * Make sure we've reset xact state variables
1675          *
1676          * If recovery is still in progress, mark this transaction as read-only.
1677          * We have lower level defences in XLogInsert and elsewhere to stop us
1678          * from modifying data during recovery, but this gives the normal
1679          * indication to the user that the transaction is read-only.
1680          */
1681         if (RecoveryInProgress())
1682         {
1683                 s->startedInRecovery = true;
1684                 XactReadOnly = true;
1685         }
1686         else
1687         {
1688                 s->startedInRecovery = false;
1689                 XactReadOnly = DefaultXactReadOnly;
1690         }
1691         XactDeferrable = DefaultXactDeferrable;
1692         XactIsoLevel = DefaultXactIsoLevel;
1693         forceSyncCommit = false;
1694         MyXactAccessedTempRel = false;
1695
1696         /*
1697          * reinitialize within-transaction counters
1698          */
1699         s->subTransactionId = TopSubTransactionId;
1700         currentSubTransactionId = TopSubTransactionId;
1701         currentCommandId = FirstCommandId;
1702         currentCommandIdUsed = false;
1703
1704         /*
1705          * initialize reported xid accounting
1706          */
1707         nUnreportedXids = 0;
1708
1709         /*
1710          * must initialize resource-management stuff first
1711          */
1712         AtStart_Memory();
1713         AtStart_ResourceOwner();
1714
1715         /*
1716          * Assign a new LocalTransactionId, and combine it with the backendId to
1717          * form a virtual transaction id.
1718          */
1719         vxid.backendId = MyBackendId;
1720         vxid.localTransactionId = GetNextLocalTransactionId();
1721
1722         /*
1723          * Lock the virtual transaction id before we announce it in the proc array
1724          */
1725         VirtualXactLockTableInsert(vxid);
1726
1727         /*
1728          * Advertise it in the proc array.      We assume assignment of
1729          * LocalTransactionID is atomic, and the backendId should be set already.
1730          */
1731         Assert(MyProc->backendId == vxid.backendId);
1732         MyProc->lxid = vxid.localTransactionId;
1733
1734         TRACE_POSTGRESQL_TRANSACTION_START(vxid.localTransactionId);
1735
1736         /*
1737          * set transaction_timestamp() (a/k/a now()).  We want this to be the same
1738          * as the first command's statement_timestamp(), so don't do a fresh
1739          * GetCurrentTimestamp() call (which'd be expensive anyway).  Also, mark
1740          * xactStopTimestamp as unset.
1741          */
1742         xactStartTimestamp = stmtStartTimestamp;
1743         xactStopTimestamp = 0;
1744         pgstat_report_xact_timestamp(xactStartTimestamp);
1745
1746         /*
1747          * initialize current transaction state fields
1748          *
1749          * note: prevXactReadOnly is not used at the outermost level
1750          */
1751         s->nestingLevel = 1;
1752         s->gucNestLevel = 1;
1753         s->childXids = NULL;
1754         s->nChildXids = 0;
1755         s->maxChildXids = 0;
1756         GetUserIdAndSecContext(&s->prevUser, &s->prevSecContext);
1757         /* SecurityRestrictionContext should never be set outside a transaction */
1758         Assert(s->prevSecContext == 0);
1759
1760         /*
1761          * initialize other subsystems for new transaction
1762          */
1763         AtStart_GUC();
1764         AtStart_Inval();
1765         AtStart_Cache();
1766         AfterTriggerBeginXact();
1767
1768         /*
1769          * done with start processing, set current transaction state to "in
1770          * progress"
1771          */
1772         s->state = TRANS_INPROGRESS;
1773
1774         ShowTransactionState("StartTransaction");
1775 }
1776
1777
1778 /*
1779  *      CommitTransaction
1780  *
1781  * NB: if you change this routine, better look at PrepareTransaction too!
1782  */
1783 static void
1784 CommitTransaction(void)
1785 {
1786         TransactionState s = CurrentTransactionState;
1787         TransactionId latestXid;
1788
1789         ShowTransactionState("CommitTransaction");
1790
1791         /*
1792          * check the current transaction state
1793          */
1794         if (s->state != TRANS_INPROGRESS)
1795                 elog(WARNING, "CommitTransaction while in %s state",
1796                          TransStateAsString(s->state));
1797         Assert(s->parent == NULL);
1798
1799         /*
1800          * Do pre-commit processing that involves calling user-defined code, such
1801          * as triggers.  Since closing cursors could queue trigger actions,
1802          * triggers could open cursors, etc, we have to keep looping until there's
1803          * nothing left to do.
1804          */
1805         for (;;)
1806         {
1807                 /*
1808                  * Fire all currently pending deferred triggers.
1809                  */
1810                 AfterTriggerFireDeferred();
1811
1812                 /*
1813                  * Close open portals (converting holdable ones into static portals).
1814                  * If there weren't any, we are done ... otherwise loop back to check
1815                  * if they queued deferred triggers.  Lather, rinse, repeat.
1816                  */
1817                 if (!PreCommit_Portals(false))
1818                         break;
1819         }
1820
1821         /*
1822          * The remaining actions cannot call any user-defined code, so it's safe
1823          * to start shutting down within-transaction services.  But note that most
1824          * of this stuff could still throw an error, which would switch us into
1825          * the transaction-abort path.
1826          */
1827
1828         /* Shut down the deferred-trigger manager */
1829         AfterTriggerEndXact(true);
1830
1831         /*
1832          * Let ON COMMIT management do its thing (must happen after closing
1833          * cursors, to avoid dangling-reference problems)
1834          */
1835         PreCommit_on_commit_actions();
1836
1837         /* close large objects before lower-level cleanup */
1838         AtEOXact_LargeObject(true);
1839
1840         /*
1841          * Mark serializable transaction as complete for predicate locking
1842          * purposes.  This should be done as late as we can put it and still allow
1843          * errors to be raised for failure patterns found at commit.
1844          */
1845         PreCommit_CheckForSerializationFailure();
1846
1847         /*
1848          * Insert notifications sent by NOTIFY commands into the queue.  This
1849          * should be late in the pre-commit sequence to minimize time spent
1850          * holding the notify-insertion lock.
1851          */
1852         PreCommit_Notify();
1853
1854         /* Prevent cancel/die interrupt while cleaning up */
1855         HOLD_INTERRUPTS();
1856
1857         /* Commit updates to the relation map --- do this as late as possible */
1858         AtEOXact_RelationMap(true);
1859
1860         /*
1861          * set the current transaction state information appropriately during
1862          * commit processing
1863          */
1864         s->state = TRANS_COMMIT;
1865
1866         /*
1867          * Here is where we really truly commit.
1868          */
1869         latestXid = RecordTransactionCommit();
1870
1871         TRACE_POSTGRESQL_TRANSACTION_COMMIT(MyProc->lxid);
1872
1873         /*
1874          * Let others know about no transaction in progress by me. Note that this
1875          * must be done _before_ releasing locks we hold and _after_
1876          * RecordTransactionCommit.
1877          */
1878         ProcArrayEndTransaction(MyProc, latestXid);
1879
1880         /*
1881          * This is all post-commit cleanup.  Note that if an error is raised here,
1882          * it's too late to abort the transaction.  This should be just
1883          * noncritical resource releasing.
1884          *
1885          * The ordering of operations is not entirely random.  The idea is:
1886          * release resources visible to other backends (eg, files, buffer pins);
1887          * then release locks; then release backend-local resources. We want to
1888          * release locks at the point where any backend waiting for us will see
1889          * our transaction as being fully cleaned up.
1890          *
1891          * Resources that can be associated with individual queries are handled by
1892          * the ResourceOwner mechanism.  The other calls here are for backend-wide
1893          * state.
1894          */
1895
1896         CallXactCallbacks(XACT_EVENT_COMMIT);
1897
1898         ResourceOwnerRelease(TopTransactionResourceOwner,
1899                                                  RESOURCE_RELEASE_BEFORE_LOCKS,
1900                                                  true, true);
1901
1902         /* Check we've released all buffer pins */
1903         AtEOXact_Buffers(true);
1904
1905         /* Clean up the relation cache */
1906         AtEOXact_RelationCache(true);
1907
1908         /* Clean up the snapshot manager */
1909         AtEarlyCommit_Snapshot();
1910
1911         /*
1912          * Make catalog changes visible to all backends.  This has to happen after
1913          * relcache references are dropped (see comments for
1914          * AtEOXact_RelationCache), but before locks are released (if anyone is
1915          * waiting for lock on a relation we've modified, we want them to know
1916          * about the catalog change before they start using the relation).
1917          */
1918         AtEOXact_Inval(true);
1919
1920         /*
1921          * Likewise, dropping of files deleted during the transaction is best done
1922          * after releasing relcache and buffer pins.  (This is not strictly
1923          * necessary during commit, since such pins should have been released
1924          * already, but this ordering is definitely critical during abort.)
1925          */
1926         smgrDoPendingDeletes(true);
1927
1928         AtEOXact_MultiXact();
1929
1930         ResourceOwnerRelease(TopTransactionResourceOwner,
1931                                                  RESOURCE_RELEASE_LOCKS,
1932                                                  true, true);
1933         ResourceOwnerRelease(TopTransactionResourceOwner,
1934                                                  RESOURCE_RELEASE_AFTER_LOCKS,
1935                                                  true, true);
1936
1937         /* Check we've released all catcache entries */
1938         AtEOXact_CatCache(true);
1939
1940         AtCommit_Notify();
1941         AtEOXact_GUC(true, 1);
1942         AtEOXact_SPI(true);
1943         AtEOXact_on_commit_actions(true);
1944         AtEOXact_Namespace(true);
1945         /* smgrcommit already done */
1946         AtEOXact_Files();
1947         AtEOXact_ComboCid();
1948         AtEOXact_HashTables(true);
1949         AtEOXact_PgStat(true);
1950         AtEOXact_Snapshot(true);
1951         pgstat_report_xact_timestamp(0);
1952
1953         CurrentResourceOwner = NULL;
1954         ResourceOwnerDelete(TopTransactionResourceOwner);
1955         s->curTransactionOwner = NULL;
1956         CurTransactionResourceOwner = NULL;
1957         TopTransactionResourceOwner = NULL;
1958
1959         AtCommit_Memory();
1960
1961         s->transactionId = InvalidTransactionId;
1962         s->subTransactionId = InvalidSubTransactionId;
1963         s->nestingLevel = 0;
1964         s->gucNestLevel = 0;
1965         s->childXids = NULL;
1966         s->nChildXids = 0;
1967         s->maxChildXids = 0;
1968
1969         /*
1970          * done with commit processing, set current transaction state back to
1971          * default
1972          */
1973         s->state = TRANS_DEFAULT;
1974
1975         RESUME_INTERRUPTS();
1976 }
1977
1978
1979 /*
1980  *      PrepareTransaction
1981  *
1982  * NB: if you change this routine, better look at CommitTransaction too!
1983  */
1984 static void
1985 PrepareTransaction(void)
1986 {
1987         TransactionState s = CurrentTransactionState;
1988         TransactionId xid = GetCurrentTransactionId();
1989         GlobalTransaction gxact;
1990         TimestampTz prepared_at;
1991
1992         ShowTransactionState("PrepareTransaction");
1993
1994         /*
1995          * check the current transaction state
1996          */
1997         if (s->state != TRANS_INPROGRESS)
1998                 elog(WARNING, "PrepareTransaction while in %s state",
1999                          TransStateAsString(s->state));
2000         Assert(s->parent == NULL);
2001
2002         /*
2003          * Do pre-commit processing that involves calling user-defined code, such
2004          * as triggers.  Since closing cursors could queue trigger actions,
2005          * triggers could open cursors, etc, we have to keep looping until there's
2006          * nothing left to do.
2007          */
2008         for (;;)
2009         {
2010                 /*
2011                  * Fire all currently pending deferred triggers.
2012                  */
2013                 AfterTriggerFireDeferred();
2014
2015                 /*
2016                  * Close open portals (converting holdable ones into static portals).
2017                  * If there weren't any, we are done ... otherwise loop back to check
2018                  * if they queued deferred triggers.  Lather, rinse, repeat.
2019                  */
2020                 if (!PreCommit_Portals(true))
2021                         break;
2022         }
2023
2024         /*
2025          * The remaining actions cannot call any user-defined code, so it's safe
2026          * to start shutting down within-transaction services.  But note that most
2027          * of this stuff could still throw an error, which would switch us into
2028          * the transaction-abort path.
2029          */
2030
2031         /* Shut down the deferred-trigger manager */
2032         AfterTriggerEndXact(true);
2033
2034         /*
2035          * Let ON COMMIT management do its thing (must happen after closing
2036          * cursors, to avoid dangling-reference problems)
2037          */
2038         PreCommit_on_commit_actions();
2039
2040         /* close large objects before lower-level cleanup */
2041         AtEOXact_LargeObject(true);
2042
2043         /*
2044          * Mark serializable transaction as complete for predicate locking
2045          * purposes.  This should be done as late as we can put it and still allow
2046          * errors to be raised for failure patterns found at commit.
2047          */
2048         PreCommit_CheckForSerializationFailure();
2049
2050         /* NOTIFY will be handled below */
2051
2052         /*
2053          * Don't allow PREPARE TRANSACTION if we've accessed a temporary table in
2054          * this transaction.  Having the prepared xact hold locks on another
2055          * backend's temp table seems a bad idea --- for instance it would prevent
2056          * the backend from exiting.  There are other problems too, such as how to
2057          * clean up the source backend's local buffers and ON COMMIT state if the
2058          * prepared xact includes a DROP of a temp table.
2059          *
2060          * We must check this after executing any ON COMMIT actions, because they
2061          * might still access a temp relation.
2062          *
2063          * XXX In principle this could be relaxed to allow some useful special
2064          * cases, such as a temp table created and dropped all within the
2065          * transaction.  That seems to require much more bookkeeping though.
2066          */
2067         if (MyXactAccessedTempRel)
2068                 ereport(ERROR,
2069                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2070                                  errmsg("cannot PREPARE a transaction that has operated on temporary tables")));
2071
2072         /* Prevent cancel/die interrupt while cleaning up */
2073         HOLD_INTERRUPTS();
2074
2075         /*
2076          * set the current transaction state information appropriately during
2077          * prepare processing
2078          */
2079         s->state = TRANS_PREPARE;
2080
2081         prepared_at = GetCurrentTimestamp();
2082
2083         /* Tell bufmgr and smgr to prepare for commit */
2084         BufmgrCommit();
2085
2086         /*
2087          * Reserve the GID for this transaction. This could fail if the requested
2088          * GID is invalid or already in use.
2089          */
2090         gxact = MarkAsPreparing(xid, prepareGID, prepared_at,
2091                                                         GetUserId(), MyDatabaseId);
2092         prepareGID = NULL;
2093
2094         /*
2095          * Collect data for the 2PC state file.  Note that in general, no actual
2096          * state change should happen in the called modules during this step,
2097          * since it's still possible to fail before commit, and in that case we
2098          * want transaction abort to be able to clean up.  (In particular, the
2099          * AtPrepare routines may error out if they find cases they cannot
2100          * handle.)  State cleanup should happen in the PostPrepare routines
2101          * below.  However, some modules can go ahead and clear state here because
2102          * they wouldn't do anything with it during abort anyway.
2103          *
2104          * Note: because the 2PC state file records will be replayed in the same
2105          * order they are made, the order of these calls has to match the order in
2106          * which we want things to happen during COMMIT PREPARED or ROLLBACK
2107          * PREPARED; in particular, pay attention to whether things should happen
2108          * before or after releasing the transaction's locks.
2109          */
2110         StartPrepare(gxact);
2111
2112         AtPrepare_Notify();
2113         AtPrepare_Locks();
2114         AtPrepare_PredicateLocks();
2115         AtPrepare_PgStat();
2116         AtPrepare_MultiXact();
2117         AtPrepare_RelationMap();
2118
2119         /*
2120          * Here is where we really truly prepare.
2121          *
2122          * We have to record transaction prepares even if we didn't make any
2123          * updates, because the transaction manager might get confused if we lose
2124          * a global transaction.
2125          */
2126         EndPrepare(gxact);
2127
2128         /*
2129          * Now we clean up backend-internal state and release internal resources.
2130          */
2131
2132         /* Reset XactLastRecEnd until the next transaction writes something */
2133         XactLastRecEnd.xrecoff = 0;
2134
2135         /*
2136          * Let others know about no transaction in progress by me.      This has to be
2137          * done *after* the prepared transaction has been marked valid, else
2138          * someone may think it is unlocked and recyclable.
2139          */
2140         ProcArrayClearTransaction(MyProc);
2141
2142         /*
2143          * This is all post-transaction cleanup.  Note that if an error is raised
2144          * here, it's too late to abort the transaction.  This should be just
2145          * noncritical resource releasing.      See notes in CommitTransaction.
2146          */
2147
2148         CallXactCallbacks(XACT_EVENT_PREPARE);
2149
2150         ResourceOwnerRelease(TopTransactionResourceOwner,
2151                                                  RESOURCE_RELEASE_BEFORE_LOCKS,
2152                                                  true, true);
2153
2154         /* Check we've released all buffer pins */
2155         AtEOXact_Buffers(true);
2156
2157         /* Clean up the relation cache */
2158         AtEOXact_RelationCache(true);
2159
2160         /* Clean up the snapshot manager */
2161         AtEarlyCommit_Snapshot();
2162
2163         /* notify doesn't need a postprepare call */
2164
2165         PostPrepare_PgStat();
2166
2167         PostPrepare_Inval();
2168
2169         PostPrepare_smgr();
2170
2171         PostPrepare_MultiXact(xid);
2172
2173         PostPrepare_Locks(xid);
2174         PostPrepare_PredicateLocks(xid);
2175
2176         ResourceOwnerRelease(TopTransactionResourceOwner,
2177                                                  RESOURCE_RELEASE_LOCKS,
2178                                                  true, true);
2179         ResourceOwnerRelease(TopTransactionResourceOwner,
2180                                                  RESOURCE_RELEASE_AFTER_LOCKS,
2181                                                  true, true);
2182
2183         /* Check we've released all catcache entries */
2184         AtEOXact_CatCache(true);
2185
2186         /* PREPARE acts the same as COMMIT as far as GUC is concerned */
2187         AtEOXact_GUC(true, 1);
2188         AtEOXact_SPI(true);
2189         AtEOXact_on_commit_actions(true);
2190         AtEOXact_Namespace(true);
2191         /* smgrcommit already done */
2192         AtEOXact_Files();
2193         AtEOXact_ComboCid();
2194         AtEOXact_HashTables(true);
2195         /* don't call AtEOXact_PgStat here */
2196         AtEOXact_Snapshot(true);
2197
2198         CurrentResourceOwner = NULL;
2199         ResourceOwnerDelete(TopTransactionResourceOwner);
2200         s->curTransactionOwner = NULL;
2201         CurTransactionResourceOwner = NULL;
2202         TopTransactionResourceOwner = NULL;
2203
2204         AtCommit_Memory();
2205
2206         s->transactionId = InvalidTransactionId;
2207         s->subTransactionId = InvalidSubTransactionId;
2208         s->nestingLevel = 0;
2209         s->gucNestLevel = 0;
2210         s->childXids = NULL;
2211         s->nChildXids = 0;
2212         s->maxChildXids = 0;
2213
2214         /*
2215          * done with 1st phase commit processing, set current transaction state
2216          * back to default
2217          */
2218         s->state = TRANS_DEFAULT;
2219
2220         RESUME_INTERRUPTS();
2221 }
2222
2223
2224 /*
2225  *      AbortTransaction
2226  */
2227 static void
2228 AbortTransaction(void)
2229 {
2230         TransactionState s = CurrentTransactionState;
2231         TransactionId latestXid;
2232
2233         /* Prevent cancel/die interrupt while cleaning up */
2234         HOLD_INTERRUPTS();
2235
2236         /* Make sure we have a valid memory context and resource owner */
2237         AtAbort_Memory();
2238         AtAbort_ResourceOwner();
2239
2240         /*
2241          * Release any LW locks we might be holding as quickly as possible.
2242          * (Regular locks, however, must be held till we finish aborting.)
2243          * Releasing LW locks is critical since we might try to grab them again
2244          * while cleaning up!
2245          */
2246         LWLockReleaseAll();
2247
2248         /* Clean up buffer I/O and buffer context locks, too */
2249         AbortBufferIO();
2250         UnlockBuffers();
2251
2252         /*
2253          * Also clean up any open wait for lock, since the lock manager will choke
2254          * if we try to wait for another lock before doing this.
2255          */
2256         LockWaitCancel();
2257
2258         /*
2259          * check the current transaction state
2260          */
2261         if (s->state != TRANS_INPROGRESS && s->state != TRANS_PREPARE)
2262                 elog(WARNING, "AbortTransaction while in %s state",
2263                          TransStateAsString(s->state));
2264         Assert(s->parent == NULL);
2265
2266         /*
2267          * set the current transaction state information appropriately during the
2268          * abort processing
2269          */
2270         s->state = TRANS_ABORT;
2271
2272         /*
2273          * Reset user ID which might have been changed transiently.  We need this
2274          * to clean up in case control escaped out of a SECURITY DEFINER function
2275          * or other local change of CurrentUserId; therefore, the prior value of
2276          * SecurityRestrictionContext also needs to be restored.
2277          *
2278          * (Note: it is not necessary to restore session authorization or role
2279          * settings here because those can only be changed via GUC, and GUC will
2280          * take care of rolling them back if need be.)
2281          */
2282         SetUserIdAndSecContext(s->prevUser, s->prevSecContext);
2283
2284         /*
2285          * do abort processing
2286          */
2287         AfterTriggerEndXact(false); /* 'false' means it's abort */
2288         AtAbort_Portals();
2289         AtEOXact_LargeObject(false);
2290         AtAbort_Notify();
2291         AtEOXact_RelationMap(false);
2292
2293         /*
2294          * Advertise the fact that we aborted in pg_clog (assuming that we got as
2295          * far as assigning an XID to advertise).
2296          */
2297         latestXid = RecordTransactionAbort(false);
2298
2299         TRACE_POSTGRESQL_TRANSACTION_ABORT(MyProc->lxid);
2300
2301         /*
2302          * Let others know about no transaction in progress by me. Note that this
2303          * must be done _before_ releasing locks we hold and _after_
2304          * RecordTransactionAbort.
2305          */
2306         ProcArrayEndTransaction(MyProc, latestXid);
2307
2308         /*
2309          * Post-abort cleanup.  See notes in CommitTransaction() concerning
2310          * ordering.  We can skip all of it if the transaction failed before
2311          * creating a resource owner.
2312          */
2313         if (TopTransactionResourceOwner != NULL)
2314         {
2315                 CallXactCallbacks(XACT_EVENT_ABORT);
2316
2317                 ResourceOwnerRelease(TopTransactionResourceOwner,
2318                                                          RESOURCE_RELEASE_BEFORE_LOCKS,
2319                                                          false, true);
2320                 AtEOXact_Buffers(false);
2321                 AtEOXact_RelationCache(false);
2322                 AtEOXact_Inval(false);
2323                 smgrDoPendingDeletes(false);
2324                 AtEOXact_MultiXact();
2325                 ResourceOwnerRelease(TopTransactionResourceOwner,
2326                                                          RESOURCE_RELEASE_LOCKS,
2327                                                          false, true);
2328                 ResourceOwnerRelease(TopTransactionResourceOwner,
2329                                                          RESOURCE_RELEASE_AFTER_LOCKS,
2330                                                          false, true);
2331                 AtEOXact_CatCache(false);
2332
2333                 AtEOXact_GUC(false, 1);
2334                 AtEOXact_SPI(false);
2335                 AtEOXact_on_commit_actions(false);
2336                 AtEOXact_Namespace(false);
2337                 AtEOXact_Files();
2338                 AtEOXact_ComboCid();
2339                 AtEOXact_HashTables(false);
2340                 AtEOXact_PgStat(false);
2341                 AtEOXact_Snapshot(false);
2342                 pgstat_report_xact_timestamp(0);
2343         }
2344
2345         /*
2346          * State remains TRANS_ABORT until CleanupTransaction().
2347          */
2348         RESUME_INTERRUPTS();
2349 }
2350
2351 /*
2352  *      CleanupTransaction
2353  */
2354 static void
2355 CleanupTransaction(void)
2356 {
2357         TransactionState s = CurrentTransactionState;
2358
2359         /*
2360          * State should still be TRANS_ABORT from AbortTransaction().
2361          */
2362         if (s->state != TRANS_ABORT)
2363                 elog(FATAL, "CleanupTransaction: unexpected state %s",
2364                          TransStateAsString(s->state));
2365
2366         /*
2367          * do abort cleanup processing
2368          */
2369         AtCleanup_Portals();            /* now safe to release portal memory */
2370
2371         CurrentResourceOwner = NULL;    /* and resource owner */
2372         if (TopTransactionResourceOwner)
2373                 ResourceOwnerDelete(TopTransactionResourceOwner);
2374         s->curTransactionOwner = NULL;
2375         CurTransactionResourceOwner = NULL;
2376         TopTransactionResourceOwner = NULL;
2377
2378         AtCleanup_Memory();                     /* and transaction memory */
2379
2380         s->transactionId = InvalidTransactionId;
2381         s->subTransactionId = InvalidSubTransactionId;
2382         s->nestingLevel = 0;
2383         s->gucNestLevel = 0;
2384         s->childXids = NULL;
2385         s->nChildXids = 0;
2386         s->maxChildXids = 0;
2387
2388         /*
2389          * done with abort processing, set current transaction state back to
2390          * default
2391          */
2392         s->state = TRANS_DEFAULT;
2393 }
2394
2395 /*
2396  *      StartTransactionCommand
2397  */
2398 void
2399 StartTransactionCommand(void)
2400 {
2401         TransactionState s = CurrentTransactionState;
2402
2403         switch (s->blockState)
2404         {
2405                         /*
2406                          * if we aren't in a transaction block, we just do our usual start
2407                          * transaction.
2408                          */
2409                 case TBLOCK_DEFAULT:
2410                         StartTransaction();
2411                         s->blockState = TBLOCK_STARTED;
2412                         break;
2413
2414                         /*
2415                          * We are somewhere in a transaction block or subtransaction and
2416                          * about to start a new command.  For now we do nothing, but
2417                          * someday we may do command-local resource initialization. (Note
2418                          * that any needed CommandCounterIncrement was done by the
2419                          * previous CommitTransactionCommand.)
2420                          */
2421                 case TBLOCK_INPROGRESS:
2422                 case TBLOCK_SUBINPROGRESS:
2423                         break;
2424
2425                         /*
2426                          * Here we are in a failed transaction block (one of the commands
2427                          * caused an abort) so we do nothing but remain in the abort
2428                          * state.  Eventually we will get a ROLLBACK command which will
2429                          * get us out of this state.  (It is up to other code to ensure
2430                          * that no commands other than ROLLBACK will be processed in these
2431                          * states.)
2432                          */
2433                 case TBLOCK_ABORT:
2434                 case TBLOCK_SUBABORT:
2435                         break;
2436
2437                         /* These cases are invalid. */
2438                 case TBLOCK_STARTED:
2439                 case TBLOCK_BEGIN:
2440                 case TBLOCK_SUBBEGIN:
2441                 case TBLOCK_END:
2442                 case TBLOCK_SUBRELEASE:
2443                 case TBLOCK_SUBCOMMIT:
2444                 case TBLOCK_ABORT_END:
2445                 case TBLOCK_SUBABORT_END:
2446                 case TBLOCK_ABORT_PENDING:
2447                 case TBLOCK_SUBABORT_PENDING:
2448                 case TBLOCK_SUBRESTART:
2449                 case TBLOCK_SUBABORT_RESTART:
2450                 case TBLOCK_PREPARE:
2451                         elog(ERROR, "StartTransactionCommand: unexpected state %s",
2452                                  BlockStateAsString(s->blockState));
2453                         break;
2454         }
2455
2456         /*
2457          * We must switch to CurTransactionContext before returning. This is
2458          * already done if we called StartTransaction, otherwise not.
2459          */
2460         Assert(CurTransactionContext != NULL);
2461         MemoryContextSwitchTo(CurTransactionContext);
2462 }
2463
2464 /*
2465  *      CommitTransactionCommand
2466  */
2467 void
2468 CommitTransactionCommand(void)
2469 {
2470         TransactionState s = CurrentTransactionState;
2471
2472         switch (s->blockState)
2473         {
2474                         /*
2475                          * This shouldn't happen, because it means the previous
2476                          * StartTransactionCommand didn't set the STARTED state
2477                          * appropriately.
2478                          */
2479                 case TBLOCK_DEFAULT:
2480                         elog(FATAL, "CommitTransactionCommand: unexpected state %s",
2481                                  BlockStateAsString(s->blockState));
2482                         break;
2483
2484                         /*
2485                          * If we aren't in a transaction block, just do our usual
2486                          * transaction commit, and return to the idle state.
2487                          */
2488                 case TBLOCK_STARTED:
2489                         CommitTransaction();
2490                         s->blockState = TBLOCK_DEFAULT;
2491                         break;
2492
2493                         /*
2494                          * We are completing a "BEGIN TRANSACTION" command, so we change
2495                          * to the "transaction block in progress" state and return.  (We
2496                          * assume the BEGIN did nothing to the database, so we need no
2497                          * CommandCounterIncrement.)
2498                          */
2499                 case TBLOCK_BEGIN:
2500                         s->blockState = TBLOCK_INPROGRESS;
2501                         break;
2502
2503                         /*
2504                          * This is the case when we have finished executing a command
2505                          * someplace within a transaction block.  We increment the command
2506                          * counter and return.
2507                          */
2508                 case TBLOCK_INPROGRESS:
2509                 case TBLOCK_SUBINPROGRESS:
2510                         CommandCounterIncrement();
2511                         break;
2512
2513                         /*
2514                          * We are completing a "COMMIT" command.  Do it and return to the
2515                          * idle state.
2516                          */
2517                 case TBLOCK_END:
2518                         CommitTransaction();
2519                         s->blockState = TBLOCK_DEFAULT;
2520                         break;
2521
2522                         /*
2523                          * Here we are in the middle of a transaction block but one of the
2524                          * commands caused an abort so we do nothing but remain in the
2525                          * abort state.  Eventually we will get a ROLLBACK comand.
2526                          */
2527                 case TBLOCK_ABORT:
2528                 case TBLOCK_SUBABORT:
2529                         break;
2530
2531                         /*
2532                          * Here we were in an aborted transaction block and we just got
2533                          * the ROLLBACK command from the user, so clean up the
2534                          * already-aborted transaction and return to the idle state.
2535                          */
2536                 case TBLOCK_ABORT_END:
2537                         CleanupTransaction();
2538                         s->blockState = TBLOCK_DEFAULT;
2539                         break;
2540
2541                         /*
2542                          * Here we were in a perfectly good transaction block but the user
2543                          * told us to ROLLBACK anyway.  We have to abort the transaction
2544                          * and then clean up.
2545                          */
2546                 case TBLOCK_ABORT_PENDING:
2547                         AbortTransaction();
2548                         CleanupTransaction();
2549                         s->blockState = TBLOCK_DEFAULT;
2550                         break;
2551
2552                         /*
2553                          * We are completing a "PREPARE TRANSACTION" command.  Do it and
2554                          * return to the idle state.
2555                          */
2556                 case TBLOCK_PREPARE:
2557                         PrepareTransaction();
2558                         s->blockState = TBLOCK_DEFAULT;
2559                         break;
2560
2561                         /*
2562                          * We were just issued a SAVEPOINT inside a transaction block.
2563                          * Start a subtransaction.      (DefineSavepoint already did
2564                          * PushTransaction, so as to have someplace to put the SUBBEGIN
2565                          * state.)
2566                          */
2567                 case TBLOCK_SUBBEGIN:
2568                         StartSubTransaction();
2569                         s->blockState = TBLOCK_SUBINPROGRESS;
2570                         break;
2571
2572                         /*
2573                          * We were issued a RELEASE command, so we end the
2574                          * current subtransaction and return to the parent transaction.
2575                          * The parent might be ended too, so repeat till we find an
2576                          * INPROGRESS transaction or subtransaction.
2577                          */
2578                 case TBLOCK_SUBRELEASE:
2579                         do
2580                         {
2581                                 CommitSubTransaction();
2582                                 s = CurrentTransactionState;    /* changed by pop */
2583                         } while (s->blockState == TBLOCK_SUBRELEASE);
2584
2585                         Assert(s->blockState == TBLOCK_INPROGRESS ||
2586                                    s->blockState == TBLOCK_SUBINPROGRESS);
2587                         break;
2588
2589                         /*
2590                          * We were issued a COMMIT, so we end the current subtransaction
2591                          * hierarchy and perform final commit. We do this by rolling up
2592                          * any subtransactions into their parent, which leads to O(N^2)
2593                          * operations with respect to resource owners - this isn't that
2594                          * bad until we approach a thousands of savepoints but is necessary
2595                          * for correctness should after triggers create new resource
2596                          * owners.
2597                          */
2598                 case TBLOCK_SUBCOMMIT:
2599                         do
2600                         {
2601                                 CommitSubTransaction();
2602                                 s = CurrentTransactionState;    /* changed by pop */
2603                         } while (s->blockState == TBLOCK_SUBCOMMIT);
2604                         /* If we had a COMMIT command, finish off the main xact too */
2605                         if (s->blockState == TBLOCK_END)
2606                         {
2607                                 Assert(s->parent == NULL);
2608                                 CommitTransaction();
2609                                 s->blockState = TBLOCK_DEFAULT;
2610                         }
2611                         else if (s->blockState == TBLOCK_PREPARE)
2612                         {
2613                                 Assert(s->parent == NULL);
2614                                 PrepareTransaction();
2615                                 s->blockState = TBLOCK_DEFAULT;
2616                         }
2617                         else
2618                                 elog(ERROR, "CommitTransactionCommand: unexpected state %s",
2619                                          BlockStateAsString(s->blockState));
2620                         break;
2621
2622                         /*
2623                          * The current already-failed subtransaction is ending due to a
2624                          * ROLLBACK or ROLLBACK TO command, so pop it and recursively
2625                          * examine the parent (which could be in any of several states).
2626                          */
2627                 case TBLOCK_SUBABORT_END:
2628                         CleanupSubTransaction();
2629                         CommitTransactionCommand();
2630                         break;
2631
2632                         /*
2633                          * As above, but it's not dead yet, so abort first.
2634                          */
2635                 case TBLOCK_SUBABORT_PENDING:
2636                         AbortSubTransaction();
2637                         CleanupSubTransaction();
2638                         CommitTransactionCommand();
2639                         break;
2640
2641                         /*
2642                          * The current subtransaction is the target of a ROLLBACK TO
2643                          * command.  Abort and pop it, then start a new subtransaction
2644                          * with the same name.
2645                          */
2646                 case TBLOCK_SUBRESTART:
2647                         {
2648                                 char       *name;
2649                                 int                     savepointLevel;
2650
2651                                 /* save name and keep Cleanup from freeing it */
2652                                 name = s->name;
2653                                 s->name = NULL;
2654                                 savepointLevel = s->savepointLevel;
2655
2656                                 AbortSubTransaction();
2657                                 CleanupSubTransaction();
2658
2659                                 DefineSavepoint(NULL);
2660                                 s = CurrentTransactionState;    /* changed by push */
2661                                 s->name = name;
2662                                 s->savepointLevel = savepointLevel;
2663
2664                                 /* This is the same as TBLOCK_SUBBEGIN case */
2665                                 AssertState(s->blockState == TBLOCK_SUBBEGIN);
2666                                 StartSubTransaction();
2667                                 s->blockState = TBLOCK_SUBINPROGRESS;
2668                         }
2669                         break;
2670
2671                         /*
2672                          * Same as above, but the subtransaction had already failed, so we
2673                          * don't need AbortSubTransaction.
2674                          */
2675                 case TBLOCK_SUBABORT_RESTART:
2676                         {
2677                                 char       *name;
2678                                 int                     savepointLevel;
2679
2680                                 /* save name and keep Cleanup from freeing it */
2681                                 name = s->name;
2682                                 s->name = NULL;
2683                                 savepointLevel = s->savepointLevel;
2684
2685                                 CleanupSubTransaction();
2686
2687                                 DefineSavepoint(NULL);
2688                                 s = CurrentTransactionState;    /* changed by push */
2689                                 s->name = name;
2690                                 s->savepointLevel = savepointLevel;
2691
2692                                 /* This is the same as TBLOCK_SUBBEGIN case */
2693                                 AssertState(s->blockState == TBLOCK_SUBBEGIN);
2694                                 StartSubTransaction();
2695                                 s->blockState = TBLOCK_SUBINPROGRESS;
2696                         }
2697                         break;
2698         }
2699 }
2700
2701 /*
2702  *      AbortCurrentTransaction
2703  */
2704 void
2705 AbortCurrentTransaction(void)
2706 {
2707         TransactionState s = CurrentTransactionState;
2708
2709         switch (s->blockState)
2710         {
2711                 case TBLOCK_DEFAULT:
2712                         if (s->state == TRANS_DEFAULT)
2713                         {
2714                                 /* we are idle, so nothing to do */
2715                         }
2716                         else
2717                         {
2718                                 /*
2719                                  * We can get here after an error during transaction start
2720                                  * (state will be TRANS_START).  Need to clean up the
2721                                  * incompletely started transaction.  First, adjust the
2722                                  * low-level state to suppress warning message from
2723                                  * AbortTransaction.
2724                                  */
2725                                 if (s->state == TRANS_START)
2726                                         s->state = TRANS_INPROGRESS;
2727                                 AbortTransaction();
2728                                 CleanupTransaction();
2729                         }
2730                         break;
2731
2732                         /*
2733                          * if we aren't in a transaction block, we just do the basic abort
2734                          * & cleanup transaction.
2735                          */
2736                 case TBLOCK_STARTED:
2737                         AbortTransaction();
2738                         CleanupTransaction();
2739                         s->blockState = TBLOCK_DEFAULT;
2740                         break;
2741
2742                         /*
2743                          * If we are in TBLOCK_BEGIN it means something screwed up right
2744                          * after reading "BEGIN TRANSACTION".  We assume that the user
2745                          * will interpret the error as meaning the BEGIN failed to get him
2746                          * into a transaction block, so we should abort and return to idle
2747                          * state.
2748                          */
2749                 case TBLOCK_BEGIN:
2750                         AbortTransaction();
2751                         CleanupTransaction();
2752                         s->blockState = TBLOCK_DEFAULT;
2753                         break;
2754
2755                         /*
2756                          * We are somewhere in a transaction block and we've gotten a
2757                          * failure, so we abort the transaction and set up the persistent
2758                          * ABORT state.  We will stay in ABORT until we get a ROLLBACK.
2759                          */
2760                 case TBLOCK_INPROGRESS:
2761                         AbortTransaction();
2762                         s->blockState = TBLOCK_ABORT;
2763                         /* CleanupTransaction happens when we exit TBLOCK_ABORT_END */
2764                         break;
2765
2766                         /*
2767                          * Here, we failed while trying to COMMIT.      Clean up the
2768                          * transaction and return to idle state (we do not want to stay in
2769                          * the transaction).
2770                          */
2771                 case TBLOCK_END:
2772                         AbortTransaction();
2773                         CleanupTransaction();
2774                         s->blockState = TBLOCK_DEFAULT;
2775                         break;
2776
2777                         /*
2778                          * Here, we are already in an aborted transaction state and are
2779                          * waiting for a ROLLBACK, but for some reason we failed again! So
2780                          * we just remain in the abort state.
2781                          */
2782                 case TBLOCK_ABORT:
2783                 case TBLOCK_SUBABORT:
2784                         break;
2785
2786                         /*
2787                          * We are in a failed transaction and we got the ROLLBACK command.
2788                          * We have already aborted, we just need to cleanup and go to idle
2789                          * state.
2790                          */
2791                 case TBLOCK_ABORT_END:
2792                         CleanupTransaction();
2793                         s->blockState = TBLOCK_DEFAULT;
2794                         break;
2795
2796                         /*
2797                          * We are in a live transaction and we got a ROLLBACK command.
2798                          * Abort, cleanup, go to idle state.
2799                          */
2800                 case TBLOCK_ABORT_PENDING:
2801                         AbortTransaction();
2802                         CleanupTransaction();
2803                         s->blockState = TBLOCK_DEFAULT;
2804                         break;
2805
2806                         /*
2807                          * Here, we failed while trying to PREPARE.  Clean up the
2808                          * transaction and return to idle state (we do not want to stay in
2809                          * the transaction).
2810                          */
2811                 case TBLOCK_PREPARE:
2812                         AbortTransaction();
2813                         CleanupTransaction();
2814                         s->blockState = TBLOCK_DEFAULT;
2815                         break;
2816
2817                         /*
2818                          * We got an error inside a subtransaction.  Abort just the
2819                          * subtransaction, and go to the persistent SUBABORT state until
2820                          * we get ROLLBACK.
2821                          */
2822                 case TBLOCK_SUBINPROGRESS:
2823                         AbortSubTransaction();
2824                         s->blockState = TBLOCK_SUBABORT;
2825                         break;
2826
2827                         /*
2828                          * If we failed while trying to create a subtransaction, clean up
2829                          * the broken subtransaction and abort the parent.      The same
2830                          * applies if we get a failure while ending a subtransaction.
2831                          */
2832                 case TBLOCK_SUBBEGIN:
2833                 case TBLOCK_SUBRELEASE:
2834                 case TBLOCK_SUBCOMMIT:
2835                 case TBLOCK_SUBABORT_PENDING:
2836                 case TBLOCK_SUBRESTART:
2837                         AbortSubTransaction();
2838                         CleanupSubTransaction();
2839                         AbortCurrentTransaction();
2840                         break;
2841
2842                         /*
2843                          * Same as above, except the Abort() was already done.
2844                          */
2845                 case TBLOCK_SUBABORT_END:
2846                 case TBLOCK_SUBABORT_RESTART:
2847                         CleanupSubTransaction();
2848                         AbortCurrentTransaction();
2849                         break;
2850         }
2851 }
2852
2853 /*
2854  *      PreventTransactionChain
2855  *
2856  *      This routine is to be called by statements that must not run inside
2857  *      a transaction block, typically because they have non-rollback-able
2858  *      side effects or do internal commits.
2859  *
2860  *      If we have already started a transaction block, issue an error; also issue
2861  *      an error if we appear to be running inside a user-defined function (which
2862  *      could issue more commands and possibly cause a failure after the statement
2863  *      completes).  Subtransactions are verboten too.
2864  *
2865  *      isTopLevel: passed down from ProcessUtility to determine whether we are
2866  *      inside a function or multi-query querystring.  (We will always fail if
2867  *      this is false, but it's convenient to centralize the check here instead of
2868  *      making callers do it.)
2869  *      stmtType: statement type name, for error messages.
2870  */
2871 void
2872 PreventTransactionChain(bool isTopLevel, const char *stmtType)
2873 {
2874         /*
2875          * xact block already started?
2876          */
2877         if (IsTransactionBlock())
2878                 ereport(ERROR,
2879                                 (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION),
2880                 /* translator: %s represents an SQL statement name */
2881                                  errmsg("%s cannot run inside a transaction block",
2882                                                 stmtType)));
2883
2884         /*
2885          * subtransaction?
2886          */
2887         if (IsSubTransaction())
2888                 ereport(ERROR,
2889                                 (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION),
2890                 /* translator: %s represents an SQL statement name */
2891                                  errmsg("%s cannot run inside a subtransaction",
2892                                                 stmtType)));
2893
2894         /*
2895          * inside a function call?
2896          */
2897         if (!isTopLevel)
2898                 ereport(ERROR,
2899                                 (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION),
2900                 /* translator: %s represents an SQL statement name */
2901                                  errmsg("%s cannot be executed from a function or multi-command string",
2902                                                 stmtType)));
2903
2904         /* If we got past IsTransactionBlock test, should be in default state */
2905         if (CurrentTransactionState->blockState != TBLOCK_DEFAULT &&
2906                 CurrentTransactionState->blockState != TBLOCK_STARTED)
2907                 elog(FATAL, "cannot prevent transaction chain");
2908         /* all okay */
2909 }
2910
2911 /*
2912  *      RequireTransactionChain
2913  *
2914  *      This routine is to be called by statements that must run inside
2915  *      a transaction block, because they have no effects that persist past
2916  *      transaction end (and so calling them outside a transaction block
2917  *      is presumably an error).  DECLARE CURSOR is an example.
2918  *
2919  *      If we appear to be running inside a user-defined function, we do not
2920  *      issue an error, since the function could issue more commands that make
2921  *      use of the current statement's results.  Likewise subtransactions.
2922  *      Thus this is an inverse for PreventTransactionChain.
2923  *
2924  *      isTopLevel: passed down from ProcessUtility to determine whether we are
2925  *      inside a function.
2926  *      stmtType: statement type name, for error messages.
2927  */
2928 void
2929 RequireTransactionChain(bool isTopLevel, const char *stmtType)
2930 {
2931         /*
2932          * xact block already started?
2933          */
2934         if (IsTransactionBlock())
2935                 return;
2936
2937         /*
2938          * subtransaction?
2939          */
2940         if (IsSubTransaction())
2941                 return;
2942
2943         /*
2944          * inside a function call?
2945          */
2946         if (!isTopLevel)
2947                 return;
2948
2949         ereport(ERROR,
2950                         (errcode(ERRCODE_NO_ACTIVE_SQL_TRANSACTION),
2951         /* translator: %s represents an SQL statement name */
2952                          errmsg("%s can only be used in transaction blocks",
2953                                         stmtType)));
2954 }
2955
2956 /*
2957  *      IsInTransactionChain
2958  *
2959  *      This routine is for statements that need to behave differently inside
2960  *      a transaction block than when running as single commands.  ANALYZE is
2961  *      currently the only example.
2962  *
2963  *      isTopLevel: passed down from ProcessUtility to determine whether we are
2964  *      inside a function.
2965  */
2966 bool
2967 IsInTransactionChain(bool isTopLevel)
2968 {
2969         /*
2970          * Return true on same conditions that would make PreventTransactionChain
2971          * error out
2972          */
2973         if (IsTransactionBlock())
2974                 return true;
2975
2976         if (IsSubTransaction())
2977                 return true;
2978
2979         if (!isTopLevel)
2980                 return true;
2981
2982         if (CurrentTransactionState->blockState != TBLOCK_DEFAULT &&
2983                 CurrentTransactionState->blockState != TBLOCK_STARTED)
2984                 return true;
2985
2986         return false;
2987 }
2988
2989
2990 /*
2991  * Register or deregister callback functions for start- and end-of-xact
2992  * operations.
2993  *
2994  * These functions are intended for use by dynamically loaded modules.
2995  * For built-in modules we generally just hardwire the appropriate calls
2996  * (mainly because it's easier to control the order that way, where needed).
2997  *
2998  * At transaction end, the callback occurs post-commit or post-abort, so the
2999  * callback functions can only do noncritical cleanup.
3000  */
3001 void
3002 RegisterXactCallback(XactCallback callback, void *arg)
3003 {
3004         XactCallbackItem *item;
3005
3006         item = (XactCallbackItem *)
3007                 MemoryContextAlloc(TopMemoryContext, sizeof(XactCallbackItem));
3008         item->callback = callback;
3009         item->arg = arg;
3010         item->next = Xact_callbacks;
3011         Xact_callbacks = item;
3012 }
3013
3014 void
3015 UnregisterXactCallback(XactCallback callback, void *arg)
3016 {
3017         XactCallbackItem *item;
3018         XactCallbackItem *prev;
3019
3020         prev = NULL;
3021         for (item = Xact_callbacks; item; prev = item, item = item->next)
3022         {
3023                 if (item->callback == callback && item->arg == arg)
3024                 {
3025                         if (prev)
3026                                 prev->next = item->next;
3027                         else
3028                                 Xact_callbacks = item->next;
3029                         pfree(item);
3030                         break;
3031                 }
3032         }
3033 }
3034
3035 static void
3036 CallXactCallbacks(XactEvent event)
3037 {
3038         XactCallbackItem *item;
3039
3040         for (item = Xact_callbacks; item; item = item->next)
3041                 (*item->callback) (event, item->arg);
3042 }
3043
3044
3045 /*
3046  * Register or deregister callback functions for start- and end-of-subxact
3047  * operations.
3048  *
3049  * Pretty much same as above, but for subtransaction events.
3050  *
3051  * At subtransaction end, the callback occurs post-subcommit or post-subabort,
3052  * so the callback functions can only do noncritical cleanup.  At
3053  * subtransaction start, the callback is called when the subtransaction has
3054  * finished initializing.
3055  */
3056 void
3057 RegisterSubXactCallback(SubXactCallback callback, void *arg)
3058 {
3059         SubXactCallbackItem *item;
3060
3061         item = (SubXactCallbackItem *)
3062                 MemoryContextAlloc(TopMemoryContext, sizeof(SubXactCallbackItem));
3063         item->callback = callback;
3064         item->arg = arg;
3065         item->next = SubXact_callbacks;
3066         SubXact_callbacks = item;
3067 }
3068
3069 void
3070 UnregisterSubXactCallback(SubXactCallback callback, void *arg)
3071 {
3072         SubXactCallbackItem *item;
3073         SubXactCallbackItem *prev;
3074
3075         prev = NULL;
3076         for (item = SubXact_callbacks; item; prev = item, item = item->next)
3077         {
3078                 if (item->callback == callback && item->arg == arg)
3079                 {
3080                         if (prev)
3081                                 prev->next = item->next;
3082                         else
3083                                 SubXact_callbacks = item->next;
3084                         pfree(item);
3085                         break;
3086                 }
3087         }
3088 }
3089
3090 static void
3091 CallSubXactCallbacks(SubXactEvent event,
3092                                          SubTransactionId mySubid,
3093                                          SubTransactionId parentSubid)
3094 {
3095         SubXactCallbackItem *item;
3096
3097         for (item = SubXact_callbacks; item; item = item->next)
3098                 (*item->callback) (event, mySubid, parentSubid, item->arg);
3099 }
3100
3101
3102 /* ----------------------------------------------------------------
3103  *                                         transaction block support
3104  * ----------------------------------------------------------------
3105  */
3106
3107 /*
3108  *      BeginTransactionBlock
3109  *              This executes a BEGIN command.
3110  */
3111 void
3112 BeginTransactionBlock(void)
3113 {
3114         TransactionState s = CurrentTransactionState;
3115
3116         switch (s->blockState)
3117         {
3118                         /*
3119                          * We are not inside a transaction block, so allow one to begin.
3120                          */
3121                 case TBLOCK_STARTED:
3122                         s->blockState = TBLOCK_BEGIN;
3123                         break;
3124
3125                         /*
3126                          * Already a transaction block in progress.
3127                          */
3128                 case TBLOCK_INPROGRESS:
3129                 case TBLOCK_SUBINPROGRESS:
3130                 case TBLOCK_ABORT:
3131                 case TBLOCK_SUBABORT:
3132                         ereport(WARNING,
3133                                         (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION),
3134                                          errmsg("there is already a transaction in progress")));
3135                         break;
3136
3137                         /* These cases are invalid. */
3138                 case TBLOCK_DEFAULT:
3139                 case TBLOCK_BEGIN:
3140                 case TBLOCK_SUBBEGIN:
3141                 case TBLOCK_END:
3142                 case TBLOCK_SUBRELEASE:
3143                 case TBLOCK_SUBCOMMIT:
3144                 case TBLOCK_ABORT_END:
3145                 case TBLOCK_SUBABORT_END:
3146                 case TBLOCK_ABORT_PENDING:
3147                 case TBLOCK_SUBABORT_PENDING:
3148                 case TBLOCK_SUBRESTART:
3149                 case TBLOCK_SUBABORT_RESTART:
3150                 case TBLOCK_PREPARE:
3151                         elog(FATAL, "BeginTransactionBlock: unexpected state %s",
3152                                  BlockStateAsString(s->blockState));
3153                         break;
3154         }
3155 }
3156
3157 /*
3158  *      PrepareTransactionBlock
3159  *              This executes a PREPARE command.
3160  *
3161  * Since PREPARE may actually do a ROLLBACK, the result indicates what
3162  * happened: TRUE for PREPARE, FALSE for ROLLBACK.
3163  *
3164  * Note that we don't actually do anything here except change blockState.
3165  * The real work will be done in the upcoming PrepareTransaction().
3166  * We do it this way because it's not convenient to change memory context,
3167  * resource owner, etc while executing inside a Portal.
3168  */
3169 bool
3170 PrepareTransactionBlock(char *gid)
3171 {
3172         TransactionState s;
3173         bool            result;
3174
3175         /* Set up to commit the current transaction */
3176         result = EndTransactionBlock();
3177
3178         /* If successful, change outer tblock state to PREPARE */
3179         if (result)
3180         {
3181                 s = CurrentTransactionState;
3182
3183                 while (s->parent != NULL)
3184                         s = s->parent;
3185
3186                 if (s->blockState == TBLOCK_END)
3187                 {
3188                         /* Save GID where PrepareTransaction can find it again */
3189                         prepareGID = MemoryContextStrdup(TopTransactionContext, gid);
3190
3191                         s->blockState = TBLOCK_PREPARE;
3192                 }
3193                 else
3194                 {
3195                         /*
3196                          * ignore case where we are not in a transaction;
3197                          * EndTransactionBlock already issued a warning.
3198                          */
3199                         Assert(s->blockState == TBLOCK_STARTED);
3200                         /* Don't send back a PREPARE result tag... */
3201                         result = false;
3202                 }
3203         }
3204
3205         return result;
3206 }
3207
3208 /*
3209  *      EndTransactionBlock
3210  *              This executes a COMMIT command.
3211  *
3212  * Since COMMIT may actually do a ROLLBACK, the result indicates what
3213  * happened: TRUE for COMMIT, FALSE for ROLLBACK.
3214  *
3215  * Note that we don't actually do anything here except change blockState.
3216  * The real work will be done in the upcoming CommitTransactionCommand().
3217  * We do it this way because it's not convenient to change memory context,
3218  * resource owner, etc while executing inside a Portal.
3219  */
3220 bool
3221 EndTransactionBlock(void)
3222 {
3223         TransactionState s = CurrentTransactionState;
3224         bool            result = false;
3225
3226         switch (s->blockState)
3227         {
3228                         /*
3229                          * We are in a transaction block, so tell CommitTransactionCommand
3230                          * to COMMIT.
3231                          */
3232                 case TBLOCK_INPROGRESS:
3233                         s->blockState = TBLOCK_END;
3234                         result = true;
3235                         break;
3236
3237                         /*
3238                          * We are in a failed transaction block.  Tell
3239                          * CommitTransactionCommand it's time to exit the block.
3240                          */
3241                 case TBLOCK_ABORT:
3242                         s->blockState = TBLOCK_ABORT_END;
3243                         break;
3244
3245                         /*
3246                          * We are in a live subtransaction block.  Set up to subcommit all
3247                          * open subtransactions and then commit the main transaction.
3248                          */
3249                 case TBLOCK_SUBINPROGRESS:
3250                         while (s->parent != NULL)
3251                         {
3252                                 if (s->blockState == TBLOCK_SUBINPROGRESS)
3253                                         s->blockState = TBLOCK_SUBCOMMIT;
3254                                 else
3255                                         elog(FATAL, "EndTransactionBlock: unexpected state %s",
3256                                                  BlockStateAsString(s->blockState));
3257                                 s = s->parent;
3258                         }
3259                         if (s->blockState == TBLOCK_INPROGRESS)
3260                                 s->blockState = TBLOCK_END;
3261                         else
3262                                 elog(FATAL, "EndTransactionBlock: unexpected state %s",
3263                                          BlockStateAsString(s->blockState));
3264                         result = true;
3265                         break;
3266
3267                         /*
3268                          * Here we are inside an aborted subtransaction.  Treat the COMMIT
3269                          * as ROLLBACK: set up to abort everything and exit the main
3270                          * transaction.
3271                          */
3272                 case TBLOCK_SUBABORT:
3273                         while (s->parent != NULL)
3274                         {
3275                                 if (s->blockState == TBLOCK_SUBINPROGRESS)
3276                                         s->blockState = TBLOCK_SUBABORT_PENDING;
3277                                 else if (s->blockState == TBLOCK_SUBABORT)
3278                                         s->blockState = TBLOCK_SUBABORT_END;
3279                                 else
3280                                         elog(FATAL, "EndTransactionBlock: unexpected state %s",
3281                                                  BlockStateAsString(s->blockState));
3282                                 s = s->parent;
3283                         }
3284                         if (s->blockState == TBLOCK_INPROGRESS)
3285                                 s->blockState = TBLOCK_ABORT_PENDING;
3286                         else if (s->blockState == TBLOCK_ABORT)
3287                                 s->blockState = TBLOCK_ABORT_END;
3288                         else
3289                                 elog(FATAL, "EndTransactionBlock: unexpected state %s",
3290                                          BlockStateAsString(s->blockState));
3291                         break;
3292
3293                         /*
3294                          * The user issued COMMIT when not inside a transaction.  Issue a
3295                          * WARNING, staying in TBLOCK_STARTED state.  The upcoming call to
3296                          * CommitTransactionCommand() will then close the transaction and
3297                          * put us back into the default state.
3298                          */
3299                 case TBLOCK_STARTED:
3300                         ereport(WARNING,
3301                                         (errcode(ERRCODE_NO_ACTIVE_SQL_TRANSACTION),
3302                                          errmsg("there is no transaction in progress")));
3303                         result = true;
3304                         break;
3305
3306                         /* These cases are invalid. */
3307                 case TBLOCK_DEFAULT:
3308                 case TBLOCK_BEGIN:
3309                 case TBLOCK_SUBBEGIN:
3310                 case TBLOCK_END:
3311                 case TBLOCK_SUBRELEASE:
3312                 case TBLOCK_SUBCOMMIT:
3313                 case TBLOCK_ABORT_END:
3314                 case TBLOCK_SUBABORT_END:
3315                 case TBLOCK_ABORT_PENDING:
3316                 case TBLOCK_SUBABORT_PENDING:
3317                 case TBLOCK_SUBRESTART:
3318                 case TBLOCK_SUBABORT_RESTART:
3319                 case TBLOCK_PREPARE:
3320                         elog(FATAL, "EndTransactionBlock: unexpected state %s",
3321                                  BlockStateAsString(s->blockState));
3322                         break;
3323         }
3324
3325         return result;
3326 }
3327
3328 /*
3329  *      UserAbortTransactionBlock
3330  *              This executes a ROLLBACK command.
3331  *
3332  * As above, we don't actually do anything here except change blockState.
3333  */
3334 void
3335 UserAbortTransactionBlock(void)
3336 {
3337         TransactionState s = CurrentTransactionState;
3338
3339         switch (s->blockState)
3340         {
3341                         /*
3342                          * We are inside a transaction block and we got a ROLLBACK command
3343                          * from the user, so tell CommitTransactionCommand to abort and
3344                          * exit the transaction block.
3345                          */
3346                 case TBLOCK_INPROGRESS:
3347                         s->blockState = TBLOCK_ABORT_PENDING;
3348                         break;
3349
3350                         /*
3351                          * We are inside a failed transaction block and we got a ROLLBACK
3352                          * command from the user.  Abort processing is already done, so
3353                          * CommitTransactionCommand just has to cleanup and go back to
3354                          * idle state.
3355                          */
3356                 case TBLOCK_ABORT:
3357                         s->blockState = TBLOCK_ABORT_END;
3358                         break;
3359
3360                         /*
3361                          * We are inside a subtransaction.      Mark everything up to top
3362                          * level as exitable.
3363                          */
3364                 case TBLOCK_SUBINPROGRESS:
3365                 case TBLOCK_SUBABORT:
3366                         while (s->parent != NULL)
3367                         {
3368                                 if (s->blockState == TBLOCK_SUBINPROGRESS)
3369                                         s->blockState = TBLOCK_SUBABORT_PENDING;
3370                                 else if (s->blockState == TBLOCK_SUBABORT)
3371                                         s->blockState = TBLOCK_SUBABORT_END;
3372                                 else
3373                                         elog(FATAL, "UserAbortTransactionBlock: unexpected state %s",
3374                                                  BlockStateAsString(s->blockState));
3375                                 s = s->parent;
3376                         }
3377                         if (s->blockState == TBLOCK_INPROGRESS)
3378                                 s->blockState = TBLOCK_ABORT_PENDING;
3379                         else if (s->blockState == TBLOCK_ABORT)
3380                                 s->blockState = TBLOCK_ABORT_END;
3381                         else
3382                                 elog(FATAL, "UserAbortTransactionBlock: unexpected state %s",
3383                                          BlockStateAsString(s->blockState));
3384                         break;
3385
3386                         /*
3387                          * The user issued ABORT when not inside a transaction. Issue a
3388                          * WARNING and go to abort state.  The upcoming call to
3389                          * CommitTransactionCommand() will then put us back into the
3390                          * default state.
3391                          */
3392                 case TBLOCK_STARTED:
3393                         ereport(NOTICE,
3394                                         (errcode(ERRCODE_NO_ACTIVE_SQL_TRANSACTION),
3395                                          errmsg("there is no transaction in progress")));
3396                         s->blockState = TBLOCK_ABORT_PENDING;
3397                         break;
3398
3399                         /* These cases are invalid. */
3400                 case TBLOCK_DEFAULT:
3401                 case TBLOCK_BEGIN:
3402                 case TBLOCK_SUBBEGIN:
3403                 case TBLOCK_END:
3404                 case TBLOCK_SUBRELEASE:
3405                 case TBLOCK_SUBCOMMIT:
3406                 case TBLOCK_ABORT_END:
3407                 case TBLOCK_SUBABORT_END:
3408                 case TBLOCK_ABORT_PENDING:
3409                 case TBLOCK_SUBABORT_PENDING:
3410                 case TBLOCK_SUBRESTART:
3411                 case TBLOCK_SUBABORT_RESTART:
3412                 case TBLOCK_PREPARE:
3413                         elog(FATAL, "UserAbortTransactionBlock: unexpected state %s",
3414                                  BlockStateAsString(s->blockState));
3415                         break;
3416         }
3417 }
3418
3419 /*
3420  * DefineSavepoint
3421  *              This executes a SAVEPOINT command.
3422  */
3423 void
3424 DefineSavepoint(char *name)
3425 {
3426         TransactionState s = CurrentTransactionState;
3427
3428         switch (s->blockState)
3429         {
3430                 case TBLOCK_INPROGRESS:
3431                 case TBLOCK_SUBINPROGRESS:
3432                         /* Normal subtransaction start */
3433                         PushTransaction();
3434                         s = CurrentTransactionState;            /* changed by push */
3435
3436                         /*
3437                          * Savepoint names, like the TransactionState block itself, live
3438                          * in TopTransactionContext.
3439                          */
3440                         if (name)
3441                                 s->name = MemoryContextStrdup(TopTransactionContext, name);
3442                         break;
3443
3444                         /* These cases are invalid. */
3445                 case TBLOCK_DEFAULT:
3446                 case TBLOCK_STARTED:
3447                 case TBLOCK_BEGIN:
3448                 case TBLOCK_SUBBEGIN:
3449                 case TBLOCK_END:
3450                 case TBLOCK_SUBRELEASE:
3451                 case TBLOCK_SUBCOMMIT:
3452                 case TBLOCK_ABORT:
3453                 case TBLOCK_SUBABORT:
3454                 case TBLOCK_ABORT_END:
3455                 case TBLOCK_SUBABORT_END:
3456                 case TBLOCK_ABORT_PENDING:
3457                 case TBLOCK_SUBABORT_PENDING:
3458                 case TBLOCK_SUBRESTART:
3459                 case TBLOCK_SUBABORT_RESTART:
3460                 case TBLOCK_PREPARE:
3461                         elog(FATAL, "DefineSavepoint: unexpected state %s",
3462                                  BlockStateAsString(s->blockState));
3463                         break;
3464         }
3465 }
3466
3467 /*
3468  * ReleaseSavepoint
3469  *              This executes a RELEASE command.
3470  *
3471  * As above, we don't actually do anything here except change blockState.
3472  */
3473 void
3474 ReleaseSavepoint(List *options)
3475 {
3476         TransactionState s = CurrentTransactionState;
3477         TransactionState target,
3478                                 xact;
3479         ListCell   *cell;
3480         char       *name = NULL;
3481
3482         switch (s->blockState)
3483         {
3484                         /*
3485                          * We can't rollback to a savepoint if there is no savepoint
3486                          * defined.
3487                          */
3488                 case TBLOCK_INPROGRESS:
3489                         ereport(ERROR,
3490                                         (errcode(ERRCODE_S_E_INVALID_SPECIFICATION),
3491                                          errmsg("no such savepoint")));
3492                         break;
3493
3494                         /*
3495                          * We are in a non-aborted subtransaction.      This is the only valid
3496                          * case.
3497                          */
3498                 case TBLOCK_SUBINPROGRESS:
3499                         break;
3500
3501                         /* These cases are invalid. */
3502                 case TBLOCK_DEFAULT:
3503                 case TBLOCK_STARTED:
3504                 case TBLOCK_BEGIN:
3505                 case TBLOCK_SUBBEGIN:
3506                 case TBLOCK_END:
3507                 case TBLOCK_SUBRELEASE:
3508                 case TBLOCK_SUBCOMMIT:
3509                 case TBLOCK_ABORT:
3510                 case TBLOCK_SUBABORT:
3511                 case TBLOCK_ABORT_END:
3512                 case TBLOCK_SUBABORT_END:
3513                 case TBLOCK_ABORT_PENDING:
3514                 case TBLOCK_SUBABORT_PENDING:
3515                 case TBLOCK_SUBRESTART:
3516                 case TBLOCK_SUBABORT_RESTART:
3517                 case TBLOCK_PREPARE:
3518                         elog(FATAL, "ReleaseSavepoint: unexpected state %s",
3519                                  BlockStateAsString(s->blockState));
3520                         break;
3521         }
3522
3523         foreach(cell, options)
3524         {
3525                 DefElem    *elem = lfirst(cell);
3526
3527                 if (strcmp(elem->defname, "savepoint_name") == 0)
3528                         name = strVal(elem->arg);
3529         }
3530
3531         Assert(PointerIsValid(name));
3532
3533         for (target = s; PointerIsValid(target); target = target->parent)
3534         {
3535                 if (PointerIsValid(target->name) && strcmp(target->name, name) == 0)
3536                         break;
3537         }
3538
3539         if (!PointerIsValid(target))
3540                 ereport(ERROR,
3541                                 (errcode(ERRCODE_S_E_INVALID_SPECIFICATION),
3542                                  errmsg("no such savepoint")));
3543
3544         /* disallow crossing savepoint level boundaries */
3545         if (target->savepointLevel != s->savepointLevel)
3546                 ereport(ERROR,
3547                                 (errcode(ERRCODE_S_E_INVALID_SPECIFICATION),
3548                                  errmsg("no such savepoint")));
3549
3550         /*
3551          * Mark "commit pending" all subtransactions up to the target
3552          * subtransaction.      The actual commits will happen when control gets to
3553          * CommitTransactionCommand.
3554          */
3555         xact = CurrentTransactionState;
3556         for (;;)
3557         {
3558                 Assert(xact->blockState == TBLOCK_SUBINPROGRESS);
3559                 xact->blockState = TBLOCK_SUBRELEASE;
3560                 if (xact == target)
3561                         break;
3562                 xact = xact->parent;
3563                 Assert(PointerIsValid(xact));
3564         }
3565 }
3566
3567 /*
3568  * RollbackToSavepoint
3569  *              This executes a ROLLBACK TO <savepoint> command.
3570  *
3571  * As above, we don't actually do anything here except change blockState.
3572  */
3573 void
3574 RollbackToSavepoint(List *options)
3575 {
3576         TransactionState s = CurrentTransactionState;
3577         TransactionState target,
3578                                 xact;
3579         ListCell   *cell;
3580         char       *name = NULL;
3581
3582         switch (s->blockState)
3583         {
3584                         /*
3585                          * We can't rollback to a savepoint if there is no savepoint
3586                          * defined.
3587                          */
3588                 case TBLOCK_INPROGRESS:
3589                 case TBLOCK_ABORT:
3590                         ereport(ERROR,
3591                                         (errcode(ERRCODE_S_E_INVALID_SPECIFICATION),
3592                                          errmsg("no such savepoint")));
3593                         break;
3594
3595                         /*
3596                          * There is at least one savepoint, so proceed.
3597                          */
3598                 case TBLOCK_SUBINPROGRESS:
3599                 case TBLOCK_SUBABORT:
3600                         break;
3601
3602                         /* These cases are invalid. */
3603                 case TBLOCK_DEFAULT:
3604                 case TBLOCK_STARTED:
3605                 case TBLOCK_BEGIN:
3606                 case TBLOCK_SUBBEGIN:
3607                 case TBLOCK_END:
3608                 case TBLOCK_SUBRELEASE:
3609                 case TBLOCK_SUBCOMMIT:
3610                 case TBLOCK_ABORT_END:
3611                 case TBLOCK_SUBABORT_END:
3612                 case TBLOCK_ABORT_PENDING:
3613                 case TBLOCK_SUBABORT_PENDING:
3614                 case TBLOCK_SUBRESTART:
3615                 case TBLOCK_SUBABORT_RESTART:
3616                 case TBLOCK_PREPARE:
3617                         elog(FATAL, "RollbackToSavepoint: unexpected state %s",
3618                                  BlockStateAsString(s->blockState));
3619                         break;
3620         }
3621
3622         foreach(cell, options)
3623         {
3624                 DefElem    *elem = lfirst(cell);
3625
3626                 if (strcmp(elem->defname, "savepoint_name") == 0)
3627                         name = strVal(elem->arg);
3628         }
3629
3630         Assert(PointerIsValid(name));
3631
3632         for (target = s; PointerIsValid(target); target = target->parent)
3633         {
3634                 if (PointerIsValid(target->name) && strcmp(target->name, name) == 0)
3635                         break;
3636         }
3637
3638         if (!PointerIsValid(target))
3639                 ereport(ERROR,
3640                                 (errcode(ERRCODE_S_E_INVALID_SPECIFICATION),
3641                                  errmsg("no such savepoint")));
3642
3643         /* disallow crossing savepoint level boundaries */
3644         if (target->savepointLevel != s->savepointLevel)
3645                 ereport(ERROR,
3646                                 (errcode(ERRCODE_S_E_INVALID_SPECIFICATION),
3647                                  errmsg("no such savepoint")));
3648
3649         /*
3650          * Mark "abort pending" all subtransactions up to the target
3651          * subtransaction.      The actual aborts will happen when control gets to
3652          * CommitTransactionCommand.
3653          */
3654         xact = CurrentTransactionState;
3655         for (;;)
3656         {
3657                 if (xact == target)
3658                         break;
3659                 if (xact->blockState == TBLOCK_SUBINPROGRESS)
3660                         xact->blockState = TBLOCK_SUBABORT_PENDING;
3661                 else if (xact->blockState == TBLOCK_SUBABORT)
3662                         xact->blockState = TBLOCK_SUBABORT_END;
3663                 else
3664                         elog(FATAL, "RollbackToSavepoint: unexpected state %s",
3665                                  BlockStateAsString(xact->blockState));
3666                 xact = xact->parent;
3667                 Assert(PointerIsValid(xact));
3668         }
3669
3670         /* And mark the target as "restart pending" */
3671         if (xact->blockState == TBLOCK_SUBINPROGRESS)
3672                 xact->blockState = TBLOCK_SUBRESTART;
3673         else if (xact->blockState == TBLOCK_SUBABORT)
3674                 xact->blockState = TBLOCK_SUBABORT_RESTART;
3675         else
3676                 elog(FATAL, "RollbackToSavepoint: unexpected state %s",
3677                          BlockStateAsString(xact->blockState));
3678 }
3679
3680 /*
3681  * BeginInternalSubTransaction
3682  *              This is the same as DefineSavepoint except it allows TBLOCK_STARTED,
3683  *              TBLOCK_END, and TBLOCK_PREPARE states, and therefore it can safely be
3684  *              used in functions that might be called when not inside a BEGIN block
3685  *              or when running deferred triggers at COMMIT/PREPARE time.  Also, it
3686  *              automatically does CommitTransactionCommand/StartTransactionCommand
3687  *              instead of expecting the caller to do it.
3688  */
3689 void
3690 BeginInternalSubTransaction(char *name)
3691 {
3692         TransactionState s = CurrentTransactionState;
3693
3694         switch (s->blockState)
3695         {
3696                 case TBLOCK_STARTED:
3697                 case TBLOCK_INPROGRESS:
3698                 case TBLOCK_END:
3699                 case TBLOCK_PREPARE:
3700                 case TBLOCK_SUBINPROGRESS:
3701                         /* Normal subtransaction start */
3702                         PushTransaction();
3703                         s = CurrentTransactionState;            /* changed by push */
3704
3705                         /*
3706                          * Savepoint names, like the TransactionState block itself, live
3707                          * in TopTransactionContext.
3708                          */
3709                         if (name)
3710                                 s->name = MemoryContextStrdup(TopTransactionContext, name);
3711                         break;
3712
3713                         /* These cases are invalid. */
3714                 case TBLOCK_DEFAULT:
3715                 case TBLOCK_BEGIN:
3716                 case TBLOCK_SUBBEGIN:
3717                 case TBLOCK_SUBRELEASE:
3718                 case TBLOCK_SUBCOMMIT:
3719                 case TBLOCK_ABORT:
3720                 case TBLOCK_SUBABORT:
3721                 case TBLOCK_ABORT_END:
3722                 case TBLOCK_SUBABORT_END:
3723                 case TBLOCK_ABORT_PENDING:
3724                 case TBLOCK_SUBABORT_PENDING:
3725                 case TBLOCK_SUBRESTART:
3726                 case TBLOCK_SUBABORT_RESTART:
3727                         elog(FATAL, "BeginInternalSubTransaction: unexpected state %s",
3728                                  BlockStateAsString(s->blockState));
3729                         break;
3730         }
3731
3732         CommitTransactionCommand();
3733         StartTransactionCommand();
3734 }
3735
3736 /*
3737  * ReleaseCurrentSubTransaction
3738  *
3739  * RELEASE (ie, commit) the innermost subtransaction, regardless of its
3740  * savepoint name (if any).
3741  * NB: do NOT use CommitTransactionCommand/StartTransactionCommand with this.
3742  */
3743 void
3744 ReleaseCurrentSubTransaction(void)
3745 {
3746         TransactionState s = CurrentTransactionState;
3747
3748         if (s->blockState != TBLOCK_SUBINPROGRESS)
3749                 elog(ERROR, "ReleaseCurrentSubTransaction: unexpected state %s",
3750                          BlockStateAsString(s->blockState));
3751         Assert(s->state == TRANS_INPROGRESS);
3752         MemoryContextSwitchTo(CurTransactionContext);
3753         CommitSubTransaction();
3754         s = CurrentTransactionState;    /* changed by pop */
3755         Assert(s->state == TRANS_INPROGRESS);
3756 }
3757
3758 /*
3759  * RollbackAndReleaseCurrentSubTransaction
3760  *
3761  * ROLLBACK and RELEASE (ie, abort) the innermost subtransaction, regardless
3762  * of its savepoint name (if any).
3763  * NB: do NOT use CommitTransactionCommand/StartTransactionCommand with this.
3764  */
3765 void
3766 RollbackAndReleaseCurrentSubTransaction(void)
3767 {
3768         TransactionState s = CurrentTransactionState;
3769
3770         switch (s->blockState)
3771         {
3772                         /* Must be in a subtransaction */
3773                 case TBLOCK_SUBINPROGRESS:
3774                 case TBLOCK_SUBABORT:
3775                         break;
3776
3777                         /* These cases are invalid. */
3778                 case TBLOCK_DEFAULT:
3779                 case TBLOCK_STARTED:
3780                 case TBLOCK_BEGIN:
3781                 case TBLOCK_SUBBEGIN:
3782                 case TBLOCK_INPROGRESS:
3783                 case TBLOCK_END:
3784                 case TBLOCK_SUBRELEASE:
3785                 case TBLOCK_SUBCOMMIT:
3786                 case TBLOCK_ABORT:
3787                 case TBLOCK_ABORT_END:
3788                 case TBLOCK_SUBABORT_END:
3789                 case TBLOCK_ABORT_PENDING:
3790                 case TBLOCK_SUBABORT_PENDING:
3791                 case TBLOCK_SUBRESTART:
3792                 case TBLOCK_SUBABORT_RESTART:
3793                 case TBLOCK_PREPARE:
3794                         elog(FATAL, "RollbackAndReleaseCurrentSubTransaction: unexpected state %s",
3795                                  BlockStateAsString(s->blockState));
3796                         break;
3797         }
3798
3799         /*
3800          * Abort the current subtransaction, if needed.
3801          */
3802         if (s->blockState == TBLOCK_SUBINPROGRESS)
3803                 AbortSubTransaction();
3804
3805         /* And clean it up, too */
3806         CleanupSubTransaction();
3807
3808         s = CurrentTransactionState;    /* changed by pop */
3809         AssertState(s->blockState == TBLOCK_SUBINPROGRESS ||
3810                                 s->blockState == TBLOCK_INPROGRESS ||
3811                                 s->blockState == TBLOCK_STARTED);
3812 }
3813
3814 /*
3815  *      AbortOutOfAnyTransaction
3816  *
3817  *      This routine is provided for error recovery purposes.  It aborts any
3818  *      active transaction or transaction block, leaving the system in a known
3819  *      idle state.
3820  */
3821 void
3822 AbortOutOfAnyTransaction(void)
3823 {
3824         TransactionState s = CurrentTransactionState;
3825
3826         /*
3827          * Get out of any transaction or nested transaction
3828          */
3829         do
3830         {
3831                 switch (s->blockState)
3832                 {
3833                         case TBLOCK_DEFAULT:
3834                                 /* Not in a transaction, do nothing */
3835                                 break;
3836                         case TBLOCK_STARTED:
3837                         case TBLOCK_BEGIN:
3838                         case TBLOCK_INPROGRESS:
3839                         case TBLOCK_END:
3840                         case TBLOCK_ABORT_PENDING:
3841                         case TBLOCK_PREPARE:
3842                                 /* In a transaction, so clean up */
3843                                 AbortTransaction();
3844                                 CleanupTransaction();
3845                                 s->blockState = TBLOCK_DEFAULT;
3846                                 break;
3847                         case TBLOCK_ABORT:
3848                         case TBLOCK_ABORT_END:
3849                                 /* AbortTransaction already done, still need Cleanup */
3850                                 CleanupTransaction();
3851                                 s->blockState = TBLOCK_DEFAULT;
3852                                 break;
3853
3854                                 /*
3855                                  * In a subtransaction, so clean it up and abort parent too
3856                                  */
3857                         case TBLOCK_SUBBEGIN:
3858                         case TBLOCK_SUBINPROGRESS:
3859                         case TBLOCK_SUBRELEASE:
3860                         case TBLOCK_SUBCOMMIT:
3861                         case TBLOCK_SUBABORT_PENDING:
3862                         case TBLOCK_SUBRESTART:
3863                                 AbortSubTransaction();
3864                                 CleanupSubTransaction();
3865                                 s = CurrentTransactionState;    /* changed by pop */
3866                                 break;
3867
3868                         case TBLOCK_SUBABORT:
3869                         case TBLOCK_SUBABORT_END:
3870                         case TBLOCK_SUBABORT_RESTART:
3871                                 /* As above, but AbortSubTransaction already done */
3872                                 CleanupSubTransaction();
3873                                 s = CurrentTransactionState;    /* changed by pop */
3874                                 break;
3875                 }
3876         } while (s->blockState != TBLOCK_DEFAULT);
3877
3878         /* Should be out of all subxacts now */
3879         Assert(s->parent == NULL);
3880 }
3881
3882 /*
3883  * IsTransactionBlock --- are we within a transaction block?
3884  */
3885 bool
3886 IsTransactionBlock(void)
3887 {
3888         TransactionState s = CurrentTransactionState;
3889
3890         if (s->blockState == TBLOCK_DEFAULT || s->blockState == TBLOCK_STARTED)
3891                 return false;
3892
3893         return true;
3894 }
3895
3896 /*
3897  * IsTransactionOrTransactionBlock --- are we within either a transaction
3898  * or a transaction block?      (The backend is only really "idle" when this
3899  * returns false.)
3900  *
3901  * This should match up with IsTransactionBlock and IsTransactionState.
3902  */
3903 bool
3904 IsTransactionOrTransactionBlock(void)
3905 {
3906         TransactionState s = CurrentTransactionState;
3907
3908         if (s->blockState == TBLOCK_DEFAULT)
3909                 return false;
3910
3911         return true;
3912 }
3913
3914 /*
3915  * TransactionBlockStatusCode - return status code to send in ReadyForQuery
3916  */
3917 char
3918 TransactionBlockStatusCode(void)
3919 {
3920         TransactionState s = CurrentTransactionState;
3921
3922         switch (s->blockState)
3923         {
3924                 case TBLOCK_DEFAULT:
3925                 case TBLOCK_STARTED:
3926                         return 'I';                     /* idle --- not in transaction */
3927                 case TBLOCK_BEGIN:
3928                 case TBLOCK_SUBBEGIN:
3929                 case TBLOCK_INPROGRESS:
3930                 case TBLOCK_SUBINPROGRESS:
3931                 case TBLOCK_END:
3932                 case TBLOCK_SUBRELEASE:
3933                 case TBLOCK_SUBCOMMIT:
3934                 case TBLOCK_PREPARE:
3935                         return 'T';                     /* in transaction */
3936                 case TBLOCK_ABORT:
3937                 case TBLOCK_SUBABORT:
3938                 case TBLOCK_ABORT_END:
3939                 case TBLOCK_SUBABORT_END:
3940                 case TBLOCK_ABORT_PENDING:
3941                 case TBLOCK_SUBABORT_PENDING:
3942                 case TBLOCK_SUBRESTART:
3943                 case TBLOCK_SUBABORT_RESTART:
3944                         return 'E';                     /* in failed transaction */
3945         }
3946
3947         /* should never get here */
3948         elog(FATAL, "invalid transaction block state: %s",
3949                  BlockStateAsString(s->blockState));
3950         return 0;                                       /* keep compiler quiet */
3951 }
3952
3953 /*
3954  * IsSubTransaction
3955  */
3956 bool
3957 IsSubTransaction(void)
3958 {
3959         TransactionState s = CurrentTransactionState;
3960
3961         if (s->nestingLevel >= 2)
3962                 return true;
3963
3964         return false;
3965 }
3966
3967 /*
3968  * StartSubTransaction
3969  *
3970  * If you're wondering why this is separate from PushTransaction: it's because
3971  * we can't conveniently do this stuff right inside DefineSavepoint.  The
3972  * SAVEPOINT utility command will be executed inside a Portal, and if we
3973  * muck with CurrentMemoryContext or CurrentResourceOwner then exit from
3974  * the Portal will undo those settings.  So we make DefineSavepoint just
3975  * push a dummy transaction block, and when control returns to the main
3976  * idle loop, CommitTransactionCommand will be called, and we'll come here
3977  * to finish starting the subtransaction.
3978  */
3979 static void
3980 StartSubTransaction(void)
3981 {
3982         TransactionState s = CurrentTransactionState;
3983
3984         if (s->state != TRANS_DEFAULT)
3985                 elog(WARNING, "StartSubTransaction while in %s state",
3986                          TransStateAsString(s->state));
3987
3988         s->state = TRANS_START;
3989
3990         /*
3991          * Initialize subsystems for new subtransaction
3992          *
3993          * must initialize resource-management stuff first
3994          */
3995         AtSubStart_Memory();
3996         AtSubStart_ResourceOwner();
3997         AtSubStart_Inval();
3998         AtSubStart_Notify();
3999         AfterTriggerBeginSubXact();
4000
4001         s->state = TRANS_INPROGRESS;
4002
4003         /*
4004          * Call start-of-subxact callbacks
4005          */
4006         CallSubXactCallbacks(SUBXACT_EVENT_START_SUB, s->subTransactionId,
4007                                                  s->parent->subTransactionId);
4008
4009         ShowTransactionState("StartSubTransaction");
4010 }
4011
4012 /*
4013  * CommitSubTransaction
4014  *
4015  *      The caller has to make sure to always reassign CurrentTransactionState
4016  *      if it has a local pointer to it after calling this function.
4017  */
4018 static void
4019 CommitSubTransaction(void)
4020 {
4021         TransactionState s = CurrentTransactionState;
4022
4023         ShowTransactionState("CommitSubTransaction");
4024
4025         if (s->state != TRANS_INPROGRESS)
4026                 elog(WARNING, "CommitSubTransaction while in %s state",
4027                          TransStateAsString(s->state));
4028
4029         /* Pre-commit processing goes here -- nothing to do at the moment */
4030
4031         s->state = TRANS_COMMIT;
4032
4033         /* Must CCI to ensure commands of subtransaction are seen as done */
4034         CommandCounterIncrement();
4035
4036         /*
4037          * Prior to 8.4 we marked subcommit in clog at this point.      We now only
4038          * perform that step, if required, as part of the atomic update of the
4039          * whole transaction tree at top level commit or abort.
4040          */
4041
4042         /* Post-commit cleanup */
4043         if (TransactionIdIsValid(s->transactionId))
4044                 AtSubCommit_childXids();
4045         AfterTriggerEndSubXact(true);
4046         AtSubCommit_Portals(s->subTransactionId,
4047                                                 s->parent->subTransactionId,
4048                                                 s->parent->curTransactionOwner);
4049         AtEOSubXact_LargeObject(true, s->subTransactionId,
4050                                                         s->parent->subTransactionId);
4051         AtSubCommit_Notify();
4052
4053         CallSubXactCallbacks(SUBXACT_EVENT_COMMIT_SUB, s->subTransactionId,
4054                                                  s->parent->subTransactionId);
4055
4056         ResourceOwnerRelease(s->curTransactionOwner,
4057                                                  RESOURCE_RELEASE_BEFORE_LOCKS,
4058                                                  true, false);
4059         AtEOSubXact_RelationCache(true, s->subTransactionId,
4060                                                           s->parent->subTransactionId);
4061         AtEOSubXact_Inval(true);
4062         AtSubCommit_smgr();
4063
4064         /*
4065          * The only lock we actually release here is the subtransaction XID lock.
4066          */
4067         CurrentResourceOwner = s->curTransactionOwner;
4068         if (TransactionIdIsValid(s->transactionId))
4069                 XactLockTableDelete(s->transactionId);
4070
4071         /*
4072          * Other locks should get transferred to their parent resource owner.
4073          */
4074         ResourceOwnerRelease(s->curTransactionOwner,
4075                                                  RESOURCE_RELEASE_LOCKS,
4076                                                  true, false);
4077         ResourceOwnerRelease(s->curTransactionOwner,
4078                                                  RESOURCE_RELEASE_AFTER_LOCKS,
4079                                                  true, false);
4080
4081         AtEOXact_GUC(true, s->gucNestLevel);
4082         AtEOSubXact_SPI(true, s->subTransactionId);
4083         AtEOSubXact_on_commit_actions(true, s->subTransactionId,
4084                                                                   s->parent->subTransactionId);
4085         AtEOSubXact_Namespace(true, s->subTransactionId,
4086                                                   s->parent->subTransactionId);
4087         AtEOSubXact_Files(true, s->subTransactionId,
4088                                           s->parent->subTransactionId);
4089         AtEOSubXact_HashTables(true, s->nestingLevel);
4090         AtEOSubXact_PgStat(true, s->nestingLevel);
4091         AtSubCommit_Snapshot(s->nestingLevel);
4092
4093         /*
4094          * We need to restore the upper transaction's read-only state, in case the
4095          * upper is read-write while the child is read-only; GUC will incorrectly
4096          * think it should leave the child state in place.
4097          */
4098         XactReadOnly = s->prevXactReadOnly;
4099
4100         CurrentResourceOwner = s->parent->curTransactionOwner;
4101         CurTransactionResourceOwner = s->parent->curTransactionOwner;
4102         ResourceOwnerDelete(s->curTransactionOwner);
4103         s->curTransactionOwner = NULL;
4104
4105         AtSubCommit_Memory();
4106
4107         s->state = TRANS_DEFAULT;
4108
4109         PopTransaction();
4110 }
4111
4112 /*
4113  * AbortSubTransaction
4114  */
4115 static void
4116 AbortSubTransaction(void)
4117 {
4118         TransactionState s = CurrentTransactionState;
4119
4120         /* Prevent cancel/die interrupt while cleaning up */
4121         HOLD_INTERRUPTS();
4122
4123         /* Make sure we have a valid memory context and resource owner */
4124         AtSubAbort_Memory();
4125         AtSubAbort_ResourceOwner();
4126
4127         /*
4128          * Release any LW locks we might be holding as quickly as possible.
4129          * (Regular locks, however, must be held till we finish aborting.)
4130          * Releasing LW locks is critical since we might try to grab them again
4131          * while cleaning up!
4132          *
4133          * FIXME This may be incorrect --- Are there some locks we should keep?
4134          * Buffer locks, for example?  I don't think so but I'm not sure.
4135          */
4136         LWLockReleaseAll();
4137
4138         AbortBufferIO();
4139         UnlockBuffers();
4140
4141         LockWaitCancel();
4142
4143         /*
4144          * check the current transaction state
4145          */
4146         ShowTransactionState("AbortSubTransaction");
4147
4148         if (s->state != TRANS_INPROGRESS)
4149                 elog(WARNING, "AbortSubTransaction while in %s state",
4150                          TransStateAsString(s->state));
4151
4152         s->state = TRANS_ABORT;
4153
4154         /*
4155          * Reset user ID which might have been changed transiently.  (See notes in
4156          * AbortTransaction.)
4157          */
4158         SetUserIdAndSecContext(s->prevUser, s->prevSecContext);
4159
4160         /*
4161          * We can skip all this stuff if the subxact failed before creating a
4162          * ResourceOwner...
4163          */
4164         if (s->curTransactionOwner)
4165         {
4166                 AfterTriggerEndSubXact(false);
4167                 AtSubAbort_Portals(s->subTransactionId,
4168                                                    s->parent->subTransactionId,
4169                                                    s->parent->curTransactionOwner);
4170                 AtEOSubXact_LargeObject(false, s->subTransactionId,
4171                                                                 s->parent->subTransactionId);
4172                 AtSubAbort_Notify();
4173
4174                 /* Advertise the fact that we aborted in pg_clog. */
4175                 (void) RecordTransactionAbort(true);
4176
4177                 /* Post-abort cleanup */
4178                 if (TransactionIdIsValid(s->transactionId))
4179                         AtSubAbort_childXids();
4180
4181                 CallSubXactCallbacks(SUBXACT_EVENT_ABORT_SUB, s->subTransactionId,
4182                                                          s->parent->subTransactionId);
4183
4184                 ResourceOwnerRelease(s->curTransactionOwner,
4185                                                          RESOURCE_RELEASE_BEFORE_LOCKS,
4186                                                          false, false);
4187                 AtEOSubXact_RelationCache(false, s->subTransactionId,
4188                                                                   s->parent->subTransactionId);
4189                 AtEOSubXact_Inval(false);
4190                 AtSubAbort_smgr();
4191                 ResourceOwnerRelease(s->curTransactionOwner,
4192                                                          RESOURCE_RELEASE_LOCKS,
4193                                                          false, false);
4194                 ResourceOwnerRelease(s->curTransactionOwner,
4195                                                          RESOURCE_RELEASE_AFTER_LOCKS,
4196                                                          false, false);
4197
4198                 AtEOXact_GUC(false, s->gucNestLevel);
4199                 AtEOSubXact_SPI(false, s->subTransactionId);
4200                 AtEOSubXact_on_commit_actions(false, s->subTransactionId,
4201                                                                           s->parent->subTransactionId);
4202                 AtEOSubXact_Namespace(false, s->subTransactionId,
4203                                                           s->parent->subTransactionId);
4204                 AtEOSubXact_Files(false, s->subTransactionId,
4205                                                   s->parent->subTransactionId);
4206                 AtEOSubXact_HashTables(false, s->nestingLevel);
4207                 AtEOSubXact_PgStat(false, s->nestingLevel);
4208                 AtSubAbort_Snapshot(s->nestingLevel);
4209         }
4210
4211         /*
4212          * Restore the upper transaction's read-only state, too.  This should be
4213          * redundant with GUC's cleanup but we may as well do it for consistency
4214          * with the commit case.
4215          */
4216         XactReadOnly = s->prevXactReadOnly;
4217
4218         RESUME_INTERRUPTS();
4219 }
4220
4221 /*
4222  * CleanupSubTransaction
4223  *
4224  *      The caller has to make sure to always reassign CurrentTransactionState
4225  *      if it has a local pointer to it after calling this function.
4226  */
4227 static void
4228 CleanupSubTransaction(void)
4229 {
4230         TransactionState s = CurrentTransactionState;
4231
4232         ShowTransactionState("CleanupSubTransaction");
4233
4234         if (s->state != TRANS_ABORT)
4235                 elog(WARNING, "CleanupSubTransaction while in %s state",
4236                          TransStateAsString(s->state));
4237
4238         AtSubCleanup_Portals(s->subTransactionId);
4239
4240         CurrentResourceOwner = s->parent->curTransactionOwner;
4241         CurTransactionResourceOwner = s->parent->curTransactionOwner;
4242         if (s->curTransactionOwner)
4243                 ResourceOwnerDelete(s->curTransactionOwner);
4244         s->curTransactionOwner = NULL;
4245
4246         AtSubCleanup_Memory();
4247
4248         s->state = TRANS_DEFAULT;
4249
4250         PopTransaction();
4251 }
4252
4253 /*
4254  * PushTransaction
4255  *              Create transaction state stack entry for a subtransaction
4256  *
4257  *      The caller has to make sure to always reassign CurrentTransactionState
4258  *      if it has a local pointer to it after calling this function.
4259  */
4260 static void
4261 PushTransaction(void)
4262 {
4263         TransactionState p = CurrentTransactionState;
4264         TransactionState s;
4265
4266         /*
4267          * We keep subtransaction state nodes in TopTransactionContext.
4268          */
4269         s = (TransactionState)
4270                 MemoryContextAllocZero(TopTransactionContext,
4271                                                            sizeof(TransactionStateData));
4272
4273         /*
4274          * Assign a subtransaction ID, watching out for counter wraparound.
4275          */
4276         currentSubTransactionId += 1;
4277         if (currentSubTransactionId == InvalidSubTransactionId)
4278         {
4279                 currentSubTransactionId -= 1;
4280                 pfree(s);
4281                 ereport(ERROR,
4282                                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
4283                                  errmsg("cannot have more than 2^32-1 subtransactions in a transaction")));
4284         }
4285
4286         /*
4287          * We can now stack a minimally valid subtransaction without fear of
4288          * failure.
4289          */
4290         s->transactionId = InvalidTransactionId;        /* until assigned */
4291         s->subTransactionId = currentSubTransactionId;
4292         s->parent = p;
4293         s->nestingLevel = p->nestingLevel + 1;
4294         s->gucNestLevel = NewGUCNestLevel();
4295         s->savepointLevel = p->savepointLevel;
4296         s->state = TRANS_DEFAULT;
4297         s->blockState = TBLOCK_SUBBEGIN;
4298         GetUserIdAndSecContext(&s->prevUser, &s->prevSecContext);
4299         s->prevXactReadOnly = XactReadOnly;
4300
4301         CurrentTransactionState = s;
4302
4303         /*
4304          * AbortSubTransaction and CleanupSubTransaction have to be able to cope
4305          * with the subtransaction from here on out; in particular they should not
4306          * assume that it necessarily has a transaction context, resource owner,
4307          * or XID.
4308          */
4309 }
4310
4311 /*
4312  * PopTransaction
4313  *              Pop back to parent transaction state
4314  *
4315  *      The caller has to make sure to always reassign CurrentTransactionState
4316  *      if it has a local pointer to it after calling this function.
4317  */
4318 static void
4319 PopTransaction(void)
4320 {
4321         TransactionState s = CurrentTransactionState;
4322
4323         if (s->state != TRANS_DEFAULT)
4324                 elog(WARNING, "PopTransaction while in %s state",
4325                          TransStateAsString(s->state));
4326
4327         if (s->parent == NULL)
4328                 elog(FATAL, "PopTransaction with no parent");
4329
4330         CurrentTransactionState = s->parent;
4331
4332         /* Let's just make sure CurTransactionContext is good */
4333         CurTransactionContext = s->parent->curTransactionContext;
4334         MemoryContextSwitchTo(CurTransactionContext);
4335
4336         /* Ditto for ResourceOwner links */
4337         CurTransactionResourceOwner = s->parent->curTransactionOwner;
4338         CurrentResourceOwner = s->parent->curTransactionOwner;
4339
4340         /* Free the old child structure */
4341         if (s->name)
4342                 pfree(s->name);
4343         pfree(s);
4344 }
4345
4346 /*
4347  * ShowTransactionState
4348  *              Debug support
4349  */
4350 static void
4351 ShowTransactionState(const char *str)
4352 {
4353         /* skip work if message will definitely not be printed */
4354         if (log_min_messages <= DEBUG3 || client_min_messages <= DEBUG3)
4355         {
4356                 elog(DEBUG3, "%s", str);
4357                 ShowTransactionStateRec(CurrentTransactionState);
4358         }
4359 }
4360
4361 /*
4362  * ShowTransactionStateRec
4363  *              Recursive subroutine for ShowTransactionState
4364  */
4365 static void
4366 ShowTransactionStateRec(TransactionState s)
4367 {
4368         StringInfoData buf;
4369
4370         initStringInfo(&buf);
4371
4372         if (s->nChildXids > 0)
4373         {
4374                 int                     i;
4375
4376                 appendStringInfo(&buf, "%u", s->childXids[0]);
4377                 for (i = 1; i < s->nChildXids; i++)
4378                         appendStringInfo(&buf, " %u", s->childXids[i]);
4379         }
4380
4381         if (s->parent)
4382                 ShowTransactionStateRec(s->parent);
4383
4384         /* use ereport to suppress computation if msg will not be printed */
4385         ereport(DEBUG3,
4386                         (errmsg_internal("name: %s; blockState: %13s; state: %7s, xid/subid/cid: %u/%u/%u%s, nestlvl: %d, children: %s",
4387                                                          PointerIsValid(s->name) ? s->name : "unnamed",
4388                                                          BlockStateAsString(s->blockState),
4389                                                          TransStateAsString(s->state),
4390                                                          (unsigned int) s->transactionId,
4391                                                          (unsigned int) s->subTransactionId,
4392                                                          (unsigned int) currentCommandId,
4393                                                          currentCommandIdUsed ? " (used)" : "",
4394                                                          s->nestingLevel, buf.data)));
4395
4396         pfree(buf.data);
4397 }
4398
4399 /*
4400  * BlockStateAsString
4401  *              Debug support
4402  */
4403 static const char *
4404 BlockStateAsString(TBlockState blockState)
4405 {
4406         switch (blockState)
4407         {
4408                 case TBLOCK_DEFAULT:
4409                         return "DEFAULT";
4410                 case TBLOCK_STARTED:
4411                         return "STARTED";
4412                 case TBLOCK_BEGIN:
4413                         return "BEGIN";
4414                 case TBLOCK_INPROGRESS:
4415                         return "INPROGRESS";
4416                 case TBLOCK_END:
4417                         return "END";
4418                 case TBLOCK_ABORT:
4419                         return "ABORT";
4420                 case TBLOCK_ABORT_END:
4421                         return "ABORT END";
4422                 case TBLOCK_ABORT_PENDING:
4423                         return "ABORT PEND";
4424                 case TBLOCK_PREPARE:
4425                         return "PREPARE";
4426                 case TBLOCK_SUBBEGIN:
4427                         return "SUB BEGIN";
4428                 case TBLOCK_SUBINPROGRESS:
4429                         return "SUB INPROGRS";
4430                 case TBLOCK_SUBRELEASE:
4431                         return "SUB RELEASE";
4432                 case TBLOCK_SUBCOMMIT:
4433                         return "SUB COMMIT";
4434                 case TBLOCK_SUBABORT:
4435                         return "SUB ABORT";
4436                 case TBLOCK_SUBABORT_END:
4437                         return "SUB ABORT END";
4438                 case TBLOCK_SUBABORT_PENDING:
4439                         return "SUB ABRT PEND";
4440                 case TBLOCK_SUBRESTART:
4441                         return "SUB RESTART";
4442                 case TBLOCK_SUBABORT_RESTART:
4443                         return "SUB AB RESTRT";
4444         }
4445         return "UNRECOGNIZED";
4446 }
4447
4448 /*
4449  * TransStateAsString
4450  *              Debug support
4451  */
4452 static const char *
4453 TransStateAsString(TransState state)
4454 {
4455         switch (state)
4456         {
4457                 case TRANS_DEFAULT:
4458                         return "DEFAULT";
4459                 case TRANS_START:
4460                         return "START";
4461                 case TRANS_INPROGRESS:
4462                         return "INPROGR";
4463                 case TRANS_COMMIT:
4464                         return "COMMIT";
4465                 case TRANS_ABORT:
4466                         return "ABORT";
4467                 case TRANS_PREPARE:
4468                         return "PREPARE";
4469         }
4470         return "UNRECOGNIZED";
4471 }
4472
4473 /*
4474  * xactGetCommittedChildren
4475  *
4476  * Gets the list of committed children of the current transaction.      The return
4477  * value is the number of child transactions.  *ptr is set to point to an
4478  * array of TransactionIds.  The array is allocated in TopTransactionContext;
4479  * the caller should *not* pfree() it (this is a change from pre-8.4 code!).
4480  * If there are no subxacts, *ptr is set to NULL.
4481  */
4482 int
4483 xactGetCommittedChildren(TransactionId **ptr)
4484 {
4485         TransactionState s = CurrentTransactionState;
4486
4487         if (s->nChildXids == 0)
4488                 *ptr = NULL;
4489         else
4490                 *ptr = s->childXids;
4491
4492         return s->nChildXids;
4493 }
4494
4495 /*
4496  *      XLOG support routines
4497  */
4498
4499 /*
4500  * Before 9.0 this was a fairly short function, but now it performs many
4501  * actions for which the order of execution is critical.
4502  */
4503 static void
4504 xact_redo_commit_internal(TransactionId xid, XLogRecPtr lsn,
4505                                         TransactionId *sub_xids, int nsubxacts,
4506                                         SharedInvalidationMessage *inval_msgs, int nmsgs,
4507                                         RelFileNode *xnodes, int nrels,
4508                                         Oid dbId, Oid tsId,
4509                                         uint32 xinfo)
4510 {
4511         TransactionId max_xid;
4512         int                     i;
4513
4514         max_xid = TransactionIdLatest(xid, nsubxacts, sub_xids);
4515
4516         /*
4517          * Make sure nextXid is beyond any XID mentioned in the record.
4518          *
4519          * We don't expect anyone else to modify nextXid, hence we don't need to
4520          * hold a lock while checking this. We still acquire the lock to modify
4521          * it, though.
4522          */
4523         if (TransactionIdFollowsOrEquals(max_xid,
4524                                                                          ShmemVariableCache->nextXid))
4525         {
4526                 LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
4527                 ShmemVariableCache->nextXid = max_xid;
4528                 TransactionIdAdvance(ShmemVariableCache->nextXid);
4529                 LWLockRelease(XidGenLock);
4530         }
4531
4532         if (standbyState == STANDBY_DISABLED)
4533         {
4534                 /*
4535                  * Mark the transaction committed in pg_clog.
4536                  */
4537                 TransactionIdCommitTree(xid, nsubxacts, sub_xids);
4538         }
4539         else
4540         {
4541                 /*
4542                  * If a transaction completion record arrives that has as-yet
4543                  * unobserved subtransactions then this will not have been fully
4544                  * handled by the call to RecordKnownAssignedTransactionIds() in the
4545                  * main recovery loop in xlog.c. So we need to do bookkeeping again to
4546                  * cover that case. This is confusing and it is easy to think this
4547                  * call is irrelevant, which has happened three times in development
4548                  * already. Leave it in.
4549                  */
4550                 RecordKnownAssignedTransactionIds(max_xid);
4551
4552                 /*
4553                  * Mark the transaction committed in pg_clog. We use async commit
4554                  * protocol during recovery to provide information on database
4555                  * consistency for when users try to set hint bits. It is important
4556                  * that we do not set hint bits until the minRecoveryPoint is past
4557                  * this commit record. This ensures that if we crash we don't see hint
4558                  * bits set on changes made by transactions that haven't yet
4559                  * recovered. It's unlikely but it's good to be safe.
4560                  */
4561                 TransactionIdAsyncCommitTree(xid, nsubxacts, sub_xids, lsn);
4562
4563                 /*
4564                  * We must mark clog before we update the ProcArray.
4565                  */
4566                 ExpireTreeKnownAssignedTransactionIds(xid, nsubxacts, sub_xids, max_xid);
4567
4568                 /*
4569                  * Send any cache invalidations attached to the commit. We must
4570                  * maintain the same order of invalidation then release locks as
4571                  * occurs in CommitTransaction().
4572                  */
4573                 ProcessCommittedInvalidationMessages(inval_msgs, nmsgs,
4574                                                                   XactCompletionRelcacheInitFileInval(xinfo),
4575                                                                                          dbId, tsId);
4576
4577                 /*
4578                  * Release locks, if any. We do this for both two phase and normal one
4579                  * phase transactions. In effect we are ignoring the prepare phase and
4580                  * just going straight to lock release.
4581                  */
4582                 StandbyReleaseLockTree(xid, nsubxacts, sub_xids);
4583         }
4584
4585         /* Make sure files supposed to be dropped are dropped */
4586         for (i = 0; i < nrels; i++)
4587         {
4588                 SMgrRelation srel = smgropen(xnodes[i], InvalidBackendId);
4589                 ForkNumber      fork;
4590
4591                 for (fork = 0; fork <= MAX_FORKNUM; fork++)
4592                 {
4593                         if (smgrexists(srel, fork))
4594                         {
4595                                 XLogDropRelation(xnodes[i], fork);
4596                                 smgrdounlink(srel, fork, true);
4597                         }
4598                 }
4599                 smgrclose(srel);
4600         }
4601
4602         /*
4603          * We issue an XLogFlush() for the same reason we emit ForceSyncCommit()
4604          * in normal operation. For example, in DROP DATABASE, we delete all the
4605          * files belonging to the database, and then commit the transaction. If we
4606          * crash after all the files have been deleted but before the commit, you
4607          * have an entry in pg_database without any files. To minimize the window
4608          * for that, we use ForceSyncCommit() to rush the commit record to disk as
4609          * quick as possible. We have the same window during recovery, and forcing
4610          * an XLogFlush() (which updates minRecoveryPoint during recovery) helps
4611          * to reduce that problem window, for any user that requested
4612          * ForceSyncCommit().
4613          */
4614         if (XactCompletionForceSyncCommit(xinfo))
4615                 XLogFlush(lsn);
4616
4617 }
4618 /*
4619  * Utility function to call xact_redo_commit_internal after breaking down xlrec
4620  */
4621 static void
4622 xact_redo_commit(xl_xact_commit *xlrec,
4623                                                         TransactionId xid, XLogRecPtr lsn)
4624 {
4625         TransactionId *subxacts;
4626         SharedInvalidationMessage *inval_msgs;
4627
4628         /* subxid array follows relfilenodes */
4629         subxacts = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]);
4630         /* invalidation messages array follows subxids */
4631         inval_msgs = (SharedInvalidationMessage *) &(subxacts[xlrec->nsubxacts]);
4632
4633         xact_redo_commit_internal(xid, lsn, subxacts, xlrec->nsubxacts,
4634                                                                 inval_msgs, xlrec->nmsgs,
4635                                                                 xlrec->xnodes, xlrec->nrels,
4636                                                                 xlrec->dbId,
4637                                                                 xlrec->tsId,
4638                                                                 xlrec->xinfo);
4639 }
4640
4641 /*
4642  * Utility function to call xact_redo_commit_internal  for compact form of message.
4643  */
4644 static void
4645 xact_redo_commit_compact(xl_xact_commit_compact *xlrec,
4646                                                         TransactionId xid, XLogRecPtr lsn)
4647 {
4648         xact_redo_commit_internal(xid, lsn, xlrec->subxacts, xlrec->nsubxacts,
4649                                                                 NULL, 0,                /* inval msgs */
4650                                                                 NULL, 0,                /* relfilenodes */
4651                                                                 InvalidOid,             /* dbId */
4652                                                                 InvalidOid,             /* tsId */
4653                                                                 0);                             /* xinfo */
4654 }
4655
4656 /*
4657  * Be careful with the order of execution, as with xact_redo_commit().
4658  * The two functions are similar but differ in key places.
4659  *
4660  * Note also that an abort can be for a subtransaction and its children,
4661  * not just for a top level abort. That means we have to consider
4662  * topxid != xid, whereas in commit we would find topxid == xid always
4663  * because subtransaction commit is never WAL logged.
4664  */
4665 static void
4666 xact_redo_abort(xl_xact_abort *xlrec, TransactionId xid)
4667 {
4668         TransactionId *sub_xids;
4669         TransactionId max_xid;
4670         int                     i;
4671
4672         sub_xids = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]);
4673         max_xid = TransactionIdLatest(xid, xlrec->nsubxacts, sub_xids);
4674
4675         /*
4676          * Make sure nextXid is beyond any XID mentioned in the record.
4677          *
4678          * We don't expect anyone else to modify nextXid, hence we don't need to
4679          * hold a lock while checking this. We still acquire the lock to modify
4680          * it, though.
4681          */
4682         if (TransactionIdFollowsOrEquals(max_xid,
4683                                                                          ShmemVariableCache->nextXid))
4684         {
4685                 LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
4686                 ShmemVariableCache->nextXid = max_xid;
4687                 TransactionIdAdvance(ShmemVariableCache->nextXid);
4688                 LWLockRelease(XidGenLock);
4689         }
4690
4691         if (standbyState == STANDBY_DISABLED)
4692         {
4693                 /* Mark the transaction aborted in pg_clog, no need for async stuff */
4694                 TransactionIdAbortTree(xid, xlrec->nsubxacts, sub_xids);
4695         }
4696         else
4697         {
4698                 /*
4699                  * If a transaction completion record arrives that has as-yet
4700                  * unobserved subtransactions then this will not have been fully
4701                  * handled by the call to RecordKnownAssignedTransactionIds() in the
4702                  * main recovery loop in xlog.c. So we need to do bookkeeping again to
4703                  * cover that case. This is confusing and it is easy to think this
4704                  * call is irrelevant, which has happened three times in development
4705                  * already. Leave it in.
4706                  */
4707                 RecordKnownAssignedTransactionIds(max_xid);
4708
4709                 /* Mark the transaction aborted in pg_clog, no need for async stuff */
4710                 TransactionIdAbortTree(xid, xlrec->nsubxacts, sub_xids);
4711
4712                 /*
4713                  * We must update the ProcArray after we have marked clog.
4714                  */
4715                 ExpireTreeKnownAssignedTransactionIds(xid, xlrec->nsubxacts, sub_xids, max_xid);
4716
4717                 /*
4718                  * There are no flat files that need updating, nor invalidation
4719                  * messages to send or undo.
4720                  */
4721
4722                 /*
4723                  * Release locks, if any. There are no invalidations to send.
4724                  */
4725                 StandbyReleaseLockTree(xid, xlrec->nsubxacts, sub_xids);
4726         }
4727
4728         /* Make sure files supposed to be dropped are dropped */
4729         for (i = 0; i < xlrec->nrels; i++)
4730         {
4731                 SMgrRelation srel = smgropen(xlrec->xnodes[i], InvalidBackendId);
4732                 ForkNumber      fork;
4733
4734                 for (fork = 0; fork <= MAX_FORKNUM; fork++)
4735                 {
4736                         if (smgrexists(srel, fork))
4737                         {
4738                                 XLogDropRelation(xlrec->xnodes[i], fork);
4739                                 smgrdounlink(srel, fork, true);
4740                         }
4741                 }
4742                 smgrclose(srel);
4743         }
4744 }
4745
4746 void
4747 xact_redo(XLogRecPtr lsn, XLogRecord *record)
4748 {
4749         uint8           info = record->xl_info & ~XLR_INFO_MASK;
4750
4751         /* Backup blocks are not used in xact records */
4752         Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
4753
4754         if (info == XLOG_XACT_COMMIT_COMPACT)
4755         {
4756                 xl_xact_commit_compact *xlrec = (xl_xact_commit_compact *) XLogRecGetData(record);
4757
4758                 xact_redo_commit_compact(xlrec, record->xl_xid, lsn);
4759         }
4760         else if (info == XLOG_XACT_COMMIT)
4761         {
4762                 xl_xact_commit *xlrec = (xl_xact_commit *) XLogRecGetData(record);
4763
4764                 xact_redo_commit(xlrec, record->xl_xid, lsn);
4765         }
4766         else if (info == XLOG_XACT_ABORT)
4767         {
4768                 xl_xact_abort *xlrec = (xl_xact_abort *) XLogRecGetData(record);
4769
4770                 xact_redo_abort(xlrec, record->xl_xid);
4771         }
4772         else if (info == XLOG_XACT_PREPARE)
4773         {
4774                 /* the record contents are exactly the 2PC file */
4775                 RecreateTwoPhaseFile(record->xl_xid,
4776                                                          XLogRecGetData(record), record->xl_len);
4777         }
4778         else if (info == XLOG_XACT_COMMIT_PREPARED)
4779         {
4780                 xl_xact_commit_prepared *xlrec = (xl_xact_commit_prepared *) XLogRecGetData(record);
4781
4782                 xact_redo_commit(&xlrec->crec, xlrec->xid, lsn);
4783                 RemoveTwoPhaseFile(xlrec->xid, false);
4784         }
4785         else if (info == XLOG_XACT_ABORT_PREPARED)
4786         {
4787                 xl_xact_abort_prepared *xlrec = (xl_xact_abort_prepared *) XLogRecGetData(record);
4788
4789                 xact_redo_abort(&xlrec->arec, xlrec->xid);
4790                 RemoveTwoPhaseFile(xlrec->xid, false);
4791         }
4792         else if (info == XLOG_XACT_ASSIGNMENT)
4793         {
4794                 xl_xact_assignment *xlrec = (xl_xact_assignment *) XLogRecGetData(record);
4795
4796                 if (standbyState >= STANDBY_INITIALIZED)
4797                         ProcArrayApplyXidAssignment(xlrec->xtop,
4798                                                                                 xlrec->nsubxacts, xlrec->xsub);
4799         }
4800         else
4801                 elog(PANIC, "xact_redo: unknown op code %u", info);
4802 }
4803
4804 static void
4805 xact_desc_commit(StringInfo buf, xl_xact_commit *xlrec)
4806 {
4807         int                     i;
4808         TransactionId *subxacts;
4809
4810         subxacts = (TransactionId *) &xlrec->xnodes[xlrec->nrels];
4811
4812         appendStringInfoString(buf, timestamptz_to_str(xlrec->xact_time));
4813
4814         if (xlrec->nrels > 0)
4815         {
4816                 appendStringInfo(buf, "; rels:");
4817                 for (i = 0; i < xlrec->nrels; i++)
4818                 {
4819                         char       *path = relpathperm(xlrec->xnodes[i], MAIN_FORKNUM);
4820
4821                         appendStringInfo(buf, " %s", path);
4822                         pfree(path);
4823                 }
4824         }
4825         if (xlrec->nsubxacts > 0)
4826         {
4827                 appendStringInfo(buf, "; subxacts:");
4828                 for (i = 0; i < xlrec->nsubxacts; i++)
4829                         appendStringInfo(buf, " %u", subxacts[i]);
4830         }
4831         if (xlrec->nmsgs > 0)
4832         {
4833                 SharedInvalidationMessage *msgs;
4834
4835                 msgs = (SharedInvalidationMessage *) &subxacts[xlrec->nsubxacts];
4836
4837                 if (XactCompletionRelcacheInitFileInval(xlrec->xinfo))
4838                         appendStringInfo(buf, "; relcache init file inval dbid %u tsid %u",
4839                                                          xlrec->dbId, xlrec->tsId);
4840
4841                 appendStringInfo(buf, "; inval msgs:");
4842                 for (i = 0; i < xlrec->nmsgs; i++)
4843                 {
4844                         SharedInvalidationMessage *msg = &msgs[i];
4845
4846                         if (msg->id >= 0)
4847                                 appendStringInfo(buf, " catcache %d", msg->id);
4848                         else if (msg->id == SHAREDINVALCATALOG_ID)
4849                                 appendStringInfo(buf, " catalog %u", msg->cat.catId);
4850                         else if (msg->id == SHAREDINVALRELCACHE_ID)
4851                                 appendStringInfo(buf, " relcache %u", msg->rc.relId);
4852                         /* remaining cases not expected, but print something anyway */
4853                         else if (msg->id == SHAREDINVALSMGR_ID)
4854                                 appendStringInfo(buf, " smgr");
4855                         else if (msg->id == SHAREDINVALRELMAP_ID)
4856                                 appendStringInfo(buf, " relmap");
4857                         else
4858                                 appendStringInfo(buf, " unknown id %d", msg->id);
4859                 }
4860         }
4861 }
4862
4863 static void
4864 xact_desc_commit_compact(StringInfo buf, xl_xact_commit_compact *xlrec)
4865 {
4866         int                     i;
4867
4868         appendStringInfoString(buf, timestamptz_to_str(xlrec->xact_time));
4869
4870         if (xlrec->nsubxacts > 0)
4871         {
4872                 appendStringInfo(buf, "; subxacts:");
4873                 for (i = 0; i < xlrec->nsubxacts; i++)
4874                         appendStringInfo(buf, " %u", xlrec->subxacts[i]);
4875         }
4876 }
4877
4878 static void
4879 xact_desc_abort(StringInfo buf, xl_xact_abort *xlrec)
4880 {
4881         int                     i;
4882
4883         appendStringInfoString(buf, timestamptz_to_str(xlrec->xact_time));
4884         if (xlrec->nrels > 0)
4885         {
4886                 appendStringInfo(buf, "; rels:");
4887                 for (i = 0; i < xlrec->nrels; i++)
4888                 {
4889                         char       *path = relpathperm(xlrec->xnodes[i], MAIN_FORKNUM);
4890
4891                         appendStringInfo(buf, " %s", path);
4892                         pfree(path);
4893                 }
4894         }
4895         if (xlrec->nsubxacts > 0)
4896         {
4897                 TransactionId *xacts = (TransactionId *)
4898                 &xlrec->xnodes[xlrec->nrels];
4899
4900                 appendStringInfo(buf, "; subxacts:");
4901                 for (i = 0; i < xlrec->nsubxacts; i++)
4902                         appendStringInfo(buf, " %u", xacts[i]);
4903         }
4904 }
4905
4906 static void
4907 xact_desc_assignment(StringInfo buf, xl_xact_assignment *xlrec)
4908 {
4909         int                     i;
4910
4911         appendStringInfo(buf, "subxacts:");
4912
4913         for (i = 0; i < xlrec->nsubxacts; i++)
4914                 appendStringInfo(buf, " %u", xlrec->xsub[i]);
4915 }
4916
4917 void
4918 xact_desc(StringInfo buf, uint8 xl_info, char *rec)
4919 {
4920         uint8           info = xl_info & ~XLR_INFO_MASK;
4921
4922         if (info == XLOG_XACT_COMMIT_COMPACT)
4923         {
4924                 xl_xact_commit_compact *xlrec = (xl_xact_commit_compact *) rec;
4925
4926                 appendStringInfo(buf, "commit: ");
4927                 xact_desc_commit_compact(buf, xlrec);
4928         }
4929         else if (info == XLOG_XACT_COMMIT)
4930         {
4931                 xl_xact_commit *xlrec = (xl_xact_commit *) rec;
4932
4933                 appendStringInfo(buf, "commit: ");
4934                 xact_desc_commit(buf, xlrec);
4935         }
4936         else if (info == XLOG_XACT_ABORT)
4937         {
4938                 xl_xact_abort *xlrec = (xl_xact_abort *) rec;
4939
4940                 appendStringInfo(buf, "abort: ");
4941                 xact_desc_abort(buf, xlrec);
4942         }
4943         else if (info == XLOG_XACT_PREPARE)
4944         {
4945                 appendStringInfo(buf, "prepare");
4946         }
4947         else if (info == XLOG_XACT_COMMIT_PREPARED)
4948         {
4949                 xl_xact_commit_prepared *xlrec = (xl_xact_commit_prepared *) rec;
4950
4951                 appendStringInfo(buf, "commit prepared %u: ", xlrec->xid);
4952                 xact_desc_commit(buf, &xlrec->crec);
4953         }
4954         else if (info == XLOG_XACT_ABORT_PREPARED)
4955         {
4956                 xl_xact_abort_prepared *xlrec = (xl_xact_abort_prepared *) rec;
4957
4958                 appendStringInfo(buf, "abort prepared %u: ", xlrec->xid);
4959                 xact_desc_abort(buf, &xlrec->arec);
4960         }
4961         else if (info == XLOG_XACT_ASSIGNMENT)
4962         {
4963                 xl_xact_assignment *xlrec = (xl_xact_assignment *) rec;
4964
4965                 /*
4966                  * Note that we ignore the WAL record's xid, since we're more
4967                  * interested in the top-level xid that issued the record and which
4968                  * xids are being reported here.
4969                  */
4970                 appendStringInfo(buf, "xid assignment xtop %u: ", xlrec->xtop);
4971                 xact_desc_assignment(buf, xlrec);
4972         }
4973         else
4974                 appendStringInfo(buf, "UNKNOWN");
4975 }