granicus.if.org Git - postgresql/blob - src/backend/access/transam/xact.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * xact.c
   4  *        top level transaction system support routines
   5  *
   6  * See src/backend/access/transam/README for more information.
   7  *
   8  * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
   9  * Portions Copyright (c) 1994, Regents of the University of California
  10  *
  11  *
  12  * IDENTIFICATION
  13  *        src/backend/access/transam/xact.c
  14  *
  15  *-------------------------------------------------------------------------
  16  */
  17
  18 #include "postgres.h"
  19
  20 #include <time.h>
  21 #include <unistd.h>
  22
  23 #include "access/multixact.h"
  24 #include "access/subtrans.h"
  25 #include "access/transam.h"
  26 #include "access/twophase.h"
  27 #include "access/xact.h"
  28 #include "access/xlogutils.h"
  29 #include "catalog/catalog.h"
  30 #include "catalog/namespace.h"
  31 #include "catalog/storage.h"
  32 #include "commands/async.h"
  33 #include "commands/tablecmds.h"
  34 #include "commands/trigger.h"
  35 #include "executor/spi.h"
  36 #include "libpq/be-fsstubs.h"
  37 #include "miscadmin.h"
  38 #include "pgstat.h"
  39 #include "replication/walsender.h"
  40 #include "replication/syncrep.h"
  41 #include "storage/lmgr.h"
  42 #include "storage/predicate.h"
  43 #include "storage/procarray.h"
  44 #include "storage/sinvaladt.h"
  45 #include "storage/smgr.h"
  46 #include "utils/combocid.h"
  47 #include "utils/guc.h"
  48 #include "utils/inval.h"
  49 #include "utils/memutils.h"
  50 #include "utils/relmapper.h"
  51 #include "utils/snapmgr.h"
  52 #include "utils/timestamp.h"
  53 #include "pg_trace.h"
  54
  55
  56 /*
  57  *      User-tweakable parameters
  58  */
  59 int                     DefaultXactIsoLevel = XACT_READ_COMMITTED;
  60 int                     XactIsoLevel;
  61
  62 bool            DefaultXactReadOnly = false;
  63 bool            XactReadOnly;
  64
  65 bool            DefaultXactDeferrable = false;
  66 bool            XactDeferrable;
  67
  68 int                     synchronous_commit = SYNCHRONOUS_COMMIT_ON;
  69
  70 int                     CommitDelay = 0;        /* precommit delay in microseconds */
  71 int                     CommitSiblings = 5; /* # concurrent xacts needed to sleep */
  72
  73 /*
  74  * MyXactAccessedTempRel is set when a temporary relation is accessed.
  75  * We don't allow PREPARE TRANSACTION in that case.  (This is global
  76  * so that it can be set from heapam.c.)
  77  */
  78 bool            MyXactAccessedTempRel = false;
  79
  80
  81 /*
  82  *      transaction states - transaction state from server perspective
  83  */
  84 typedef enum TransState
  85 {
  86         TRANS_DEFAULT,                          /* idle */
  87         TRANS_START,                            /* transaction starting */
  88         TRANS_INPROGRESS,                       /* inside a valid transaction */
  89         TRANS_COMMIT,                           /* commit in progress */
  90         TRANS_ABORT,                            /* abort in progress */
  91         TRANS_PREPARE                           /* prepare in progress */
  92 } TransState;
  93
  94 /*
  95  *      transaction block states - transaction state of client queries
  96  *
  97  * Note: the subtransaction states are used only for non-topmost
  98  * transactions; the others appear only in the topmost transaction.
  99  */
 100 typedef enum TBlockState
 101 {
 102         /* not-in-transaction-block states */
 103         TBLOCK_DEFAULT,                         /* idle */
 104         TBLOCK_STARTED,                         /* running single-query transaction */
 105
 106         /* transaction block states */
 107         TBLOCK_BEGIN,                           /* starting transaction block */
 108         TBLOCK_INPROGRESS,                      /* live transaction */
 109         TBLOCK_END,                                     /* COMMIT received */
 110         TBLOCK_ABORT,                           /* failed xact, awaiting ROLLBACK */
 111         TBLOCK_ABORT_END,                       /* failed xact, ROLLBACK received */
 112         TBLOCK_ABORT_PENDING,           /* live xact, ROLLBACK received */
 113         TBLOCK_PREPARE,                         /* live xact, PREPARE received */
 114
 115         /* subtransaction states */
 116         TBLOCK_SUBBEGIN,                        /* starting a subtransaction */
 117         TBLOCK_SUBINPROGRESS,           /* live subtransaction */
 118         TBLOCK_SUBRELEASE,                      /* RELEASE received */
 119         TBLOCK_SUBCOMMIT,                       /* COMMIT received while TBLOCK_SUBINPROGRESS */
 120         TBLOCK_SUBABORT,                        /* failed subxact, awaiting ROLLBACK */
 121         TBLOCK_SUBABORT_END,            /* failed subxact, ROLLBACK received */
 122         TBLOCK_SUBABORT_PENDING,        /* live subxact, ROLLBACK received */
 123         TBLOCK_SUBRESTART,                      /* live subxact, ROLLBACK TO received */
 124         TBLOCK_SUBABORT_RESTART         /* failed subxact, ROLLBACK TO received */
 125 } TBlockState;
 126
 127 /*
 128  *      transaction state structure
 129  */
 130 typedef struct TransactionStateData
 131 {
 132         TransactionId transactionId;    /* my XID, or Invalid if none */
 133         SubTransactionId subTransactionId;      /* my subxact ID */
 134         char       *name;                       /* savepoint name, if any */
 135         int                     savepointLevel; /* savepoint level */
 136         TransState      state;                  /* low-level state */
 137         TBlockState blockState;         /* high-level state */
 138         int                     nestingLevel;   /* transaction nesting depth */
 139         int                     gucNestLevel;   /* GUC context nesting depth */
 140         MemoryContext curTransactionContext;            /* my xact-lifetime context */
 141         ResourceOwner curTransactionOwner;      /* my query resources */
 142         TransactionId *childXids;       /* subcommitted child XIDs, in XID order */
 143         int                     nChildXids;             /* # of subcommitted child XIDs */
 144         int                     maxChildXids;   /* allocated size of childXids[] */
 145         Oid                     prevUser;               /* previous CurrentUserId setting */
 146         int                     prevSecContext; /* previous SecurityRestrictionContext */
 147         bool            prevXactReadOnly;               /* entry-time xact r/o state */
 148         bool            startedInRecovery;              /* did we start in recovery? */
 149         struct TransactionStateData *parent;            /* back link to parent */
 150 } TransactionStateData;
 151
 152 typedef TransactionStateData *TransactionState;
 153
 154 /*
 155  * CurrentTransactionState always points to the current transaction state
 156  * block.  It will point to TopTransactionStateData when not in a
 157  * transaction at all, or when in a top-level transaction.
 158  */
 159 static TransactionStateData TopTransactionStateData = {
 160         0,                                                      /* transaction id */
 161         0,                                                      /* subtransaction id */
 162         NULL,                                           /* savepoint name */
 163         0,                                                      /* savepoint level */
 164         TRANS_DEFAULT,                          /* transaction state */
 165         TBLOCK_DEFAULT,                         /* transaction block state from the client
 166                                                                  * perspective */
 167         0,                                                      /* transaction nesting depth */
 168         0,                                                      /* GUC context nesting depth */
 169         NULL,                                           /* cur transaction context */
 170         NULL,                                           /* cur transaction resource owner */
 171         NULL,                                           /* subcommitted child Xids */
 172         0,                                                      /* # of subcommitted child Xids */
 173         0,                                                      /* allocated size of childXids[] */
 174         InvalidOid,                                     /* previous CurrentUserId setting */
 175         0,                                                      /* previous SecurityRestrictionContext */
 176         false,                                          /* entry-time xact r/o state */
 177         false,                                          /* startedInRecovery */
 178         NULL                                            /* link to parent state block */
 179 };
 180
 181 /*
 182  * unreportedXids holds XIDs of all subtransactions that have not yet been
 183  * reported in a XLOG_XACT_ASSIGNMENT record.
 184  */
 185 static int      nUnreportedXids;
 186 static TransactionId unreportedXids[PGPROC_MAX_CACHED_SUBXIDS];
 187
 188 static TransactionState CurrentTransactionState = &TopTransactionStateData;
 189
 190 /*
 191  * The subtransaction ID and command ID assignment counters are global
 192  * to a whole transaction, so we do not keep them in the state stack.
 193  */
 194 static SubTransactionId currentSubTransactionId;
 195 static CommandId currentCommandId;
 196 static bool currentCommandIdUsed;
 197
 198 /*
 199  * xactStartTimestamp is the value of transaction_timestamp().
 200  * stmtStartTimestamp is the value of statement_timestamp().
 201  * xactStopTimestamp is the time at which we log a commit or abort WAL record.
 202  * These do not change as we enter and exit subtransactions, so we don't
 203  * keep them inside the TransactionState stack.
 204  */
 205 static TimestampTz xactStartTimestamp;
 206 static TimestampTz stmtStartTimestamp;
 207 static TimestampTz xactStopTimestamp;
 208
 209 /*
 210  * GID to be used for preparing the current transaction.  This is also
 211  * global to a whole transaction, so we don't keep it in the state stack.
 212  */
 213 static char *prepareGID;
 214
 215 /*
 216  * Some commands want to force synchronous commit.
 217  */
 218 static bool forceSyncCommit = false;
 219
 220 /*
 221  * Private context for transaction-abort work --- we reserve space for this
 222  * at startup to ensure that AbortTransaction and AbortSubTransaction can work
 223  * when we've run out of memory.
 224  */
 225 static MemoryContext TransactionAbortContext = NULL;
 226
 227 /*
 228  * List of add-on start- and end-of-xact callbacks
 229  */
 230 typedef struct XactCallbackItem
 231 {
 232         struct XactCallbackItem *next;
 233         XactCallback callback;
 234         void       *arg;
 235 } XactCallbackItem;
 236
 237 static XactCallbackItem *Xact_callbacks = NULL;
 238
 239 /*
 240  * List of add-on start- and end-of-subxact callbacks
 241  */
 242 typedef struct SubXactCallbackItem
 243 {
 244         struct SubXactCallbackItem *next;
 245         SubXactCallback callback;
 246         void       *arg;
 247 } SubXactCallbackItem;
 248
 249 static SubXactCallbackItem *SubXact_callbacks = NULL;
 250
 251
 252 /* local function prototypes */
 253 static void AssignTransactionId(TransactionState s);
 254 static void AbortTransaction(void);
 255 static void AtAbort_Memory(void);
 256 static void AtCleanup_Memory(void);
 257 static void AtAbort_ResourceOwner(void);
 258 static void AtCCI_LocalCache(void);
 259 static void AtCommit_Memory(void);
 260 static void AtStart_Cache(void);
 261 static void AtStart_Memory(void);
 262 static void AtStart_ResourceOwner(void);
 263 static void CallXactCallbacks(XactEvent event);
 264 static void CallSubXactCallbacks(SubXactEvent event,
 265                                          SubTransactionId mySubid,
 266                                          SubTransactionId parentSubid);
 267 static void CleanupTransaction(void);
 268 static void CommitTransaction(void);
 269 static TransactionId RecordTransactionAbort(bool isSubXact);
 270 static void StartTransaction(void);
 271
 272 static void StartSubTransaction(void);
 273 static void CommitSubTransaction(void);
 274 static void AbortSubTransaction(void);
 275 static void CleanupSubTransaction(void);
 276 static void PushTransaction(void);
 277 static void PopTransaction(void);
 278
 279 static void AtSubAbort_Memory(void);
 280 static void AtSubCleanup_Memory(void);
 281 static void AtSubAbort_ResourceOwner(void);
 282 static void AtSubCommit_Memory(void);
 283 static void AtSubStart_Memory(void);
 284 static void AtSubStart_ResourceOwner(void);
 285
 286 static void ShowTransactionState(const char *str);
 287 static void ShowTransactionStateRec(TransactionState state);
 288 static const char *BlockStateAsString(TBlockState blockState);
 289 static const char *TransStateAsString(TransState state);
 290
 291
 292 /* ----------------------------------------------------------------
 293  *      transaction state accessors
 294  * ----------------------------------------------------------------
 295  */
 296
 297 /*
 298  *      IsTransactionState
 299  *
 300  *      This returns true if we are inside a valid transaction; that is,
 301  *      it is safe to initiate database access, take heavyweight locks, etc.
 302  */
 303 bool
 304 IsTransactionState(void)
 305 {
 306         TransactionState s = CurrentTransactionState;
 307
 308         /*
 309          * TRANS_DEFAULT and TRANS_ABORT are obviously unsafe states.  However, we
 310          * also reject the startup/shutdown states TRANS_START, TRANS_COMMIT,
 311          * TRANS_PREPARE since it might be too soon or too late within those
 312          * transition states to do anything interesting.  Hence, the only "valid"
 313          * state is TRANS_INPROGRESS.
 314          */
 315         return (s->state == TRANS_INPROGRESS);
 316 }
 317
 318 /*
 319  *      IsAbortedTransactionBlockState
 320  *
 321  *      This returns true if we are within an aborted transaction block.
 322  */
 323 bool
 324 IsAbortedTransactionBlockState(void)
 325 {
 326         TransactionState s = CurrentTransactionState;
 327
 328         if (s->blockState == TBLOCK_ABORT ||
 329                 s->blockState == TBLOCK_SUBABORT)
 330                 return true;
 331
 332         return false;
 333 }
 334
 335
 336 /*
 337  *      GetTopTransactionId
 338  *
 339  * This will return the XID of the main transaction, assigning one if
 340  * it's not yet set.  Be careful to call this only inside a valid xact.
 341  */
 342 TransactionId
 343 GetTopTransactionId(void)
 344 {
 345         if (!TransactionIdIsValid(TopTransactionStateData.transactionId))
 346                 AssignTransactionId(&TopTransactionStateData);
 347         return TopTransactionStateData.transactionId;
 348 }
 349
 350 /*
 351  *      GetTopTransactionIdIfAny
 352  *
 353  * This will return the XID of the main transaction, if one is assigned.
 354  * It will return InvalidTransactionId if we are not currently inside a
 355  * transaction, or inside a transaction that hasn't yet been assigned an XID.
 356  */
 357 TransactionId
 358 GetTopTransactionIdIfAny(void)
 359 {
 360         return TopTransactionStateData.transactionId;
 361 }
 362
 363 /*
 364  *      GetCurrentTransactionId
 365  *
 366  * This will return the XID of the current transaction (main or sub
 367  * transaction), assigning one if it's not yet set.  Be careful to call this
 368  * only inside a valid xact.
 369  */
 370 TransactionId
 371 GetCurrentTransactionId(void)
 372 {
 373         TransactionState s = CurrentTransactionState;
 374
 375         if (!TransactionIdIsValid(s->transactionId))
 376                 AssignTransactionId(s);
 377         return s->transactionId;
 378 }
 379
 380 /*
 381  *      GetCurrentTransactionIdIfAny
 382  *
 383  * This will return the XID of the current sub xact, if one is assigned.
 384  * It will return InvalidTransactionId if we are not currently inside a
 385  * transaction, or inside a transaction that hasn't been assigned an XID yet.
 386  */
 387 TransactionId
 388 GetCurrentTransactionIdIfAny(void)
 389 {
 390         return CurrentTransactionState->transactionId;
 391 }
 392
 393
 394 /*
 395  *      GetStableLatestTransactionIdIfAny
 396  *
 397  * Get the latest XID once and then return same value for rest of transaction.
 398  * Acts as a useful reference point for maintenance tasks.
 399  */
 400 TransactionId
 401 GetStableLatestTransactionId(void)
 402 {
 403         static LocalTransactionId lxid = InvalidLocalTransactionId;
 404         static TransactionId stablexid = InvalidTransactionId;
 405
 406         if (lxid != MyProc->lxid ||
 407                 !TransactionIdIsValid(stablexid))
 408         {
 409                 lxid = MyProc->lxid;
 410                 stablexid = ReadNewTransactionId();
 411         }
 412
 413         return stablexid;
 414 }
 415
 416 /*
 417  * AssignTransactionId
 418  *
 419  * Assigns a new permanent XID to the given TransactionState.
 420  * We do not assign XIDs to transactions until/unless this is called.
 421  * Also, any parent TransactionStates that don't yet have XIDs are assigned
 422  * one; this maintains the invariant that a child transaction has an XID
 423  * following its parent's.
 424  */
 425 static void
 426 AssignTransactionId(TransactionState s)
 427 {
 428         bool            isSubXact = (s->parent != NULL);
 429         ResourceOwner currentOwner;
 430
 431         /* Assert that caller didn't screw up */
 432         Assert(!TransactionIdIsValid(s->transactionId));
 433         Assert(s->state == TRANS_INPROGRESS);
 434
 435         /*
 436          * Ensure parent(s) have XIDs, so that a child always has an XID later
 437          * than its parent.  Musn't recurse here, or we might get a stack overflow
 438          * if we're at the bottom of a huge stack of subtransactions none of which
 439          * have XIDs yet.
 440          */
 441         if (isSubXact && !TransactionIdIsValid(s->parent->transactionId))
 442         {
 443                 TransactionState p = s->parent;
 444                 TransactionState *parents;
 445                 size_t          parentOffset = 0;
 446
 447                 parents = palloc(sizeof(TransactionState) * s->nestingLevel);
 448                 while (p != NULL && !TransactionIdIsValid(p->transactionId))
 449                 {
 450                         parents[parentOffset++] = p;
 451                         p = p->parent;
 452                 }
 453
 454                 /*
 455                  * This is technically a recursive call, but the recursion will never
 456                  * be more than one layer deep.
 457                  */
 458                 while (parentOffset != 0)
 459                         AssignTransactionId(parents[--parentOffset]);
 460
 461                 pfree(parents);
 462         }
 463
 464         /*
 465          * Generate a new Xid and record it in PG_PROC and pg_subtrans.
 466          *
 467          * NB: we must make the subtrans entry BEFORE the Xid appears anywhere in
 468          * shared storage other than PG_PROC; because if there's no room for it in
 469          * PG_PROC, the subtrans entry is needed to ensure that other backends see
 470          * the Xid as "running".  See GetNewTransactionId.
 471          */
 472         s->transactionId = GetNewTransactionId(isSubXact);
 473
 474         if (isSubXact)
 475                 SubTransSetParent(s->transactionId, s->parent->transactionId, false);
 476
 477         /*
 478          * If it's a top-level transaction, the predicate locking system needs to
 479          * be told about it too.
 480          */
 481         if (!isSubXact)
 482                 RegisterPredicateLockingXid(s->transactionId);
 483
 484         /*
 485          * Acquire lock on the transaction XID.  (We assume this cannot block.) We
 486          * have to ensure that the lock is assigned to the transaction's own
 487          * ResourceOwner.
 488          */
 489         currentOwner = CurrentResourceOwner;
 490         PG_TRY();
 491         {
 492                 CurrentResourceOwner = s->curTransactionOwner;
 493                 XactLockTableInsert(s->transactionId);
 494         }
 495         PG_CATCH();
 496         {
 497                 /* Ensure CurrentResourceOwner is restored on error */
 498                 CurrentResourceOwner = currentOwner;
 499                 PG_RE_THROW();
 500         }
 501         PG_END_TRY();
 502         CurrentResourceOwner = currentOwner;
 503
 504         /*
 505          * Every PGPROC_MAX_CACHED_SUBXIDS assigned transaction ids within each
 506          * top-level transaction we issue a WAL record for the assignment. We
 507          * include the top-level xid and all the subxids that have not yet been
 508          * reported using XLOG_XACT_ASSIGNMENT records.
 509          *
 510          * This is required to limit the amount of shared memory required in a hot
 511          * standby server to keep track of in-progress XIDs. See notes for
 512          * RecordKnownAssignedTransactionIds().
 513          *
 514          * We don't keep track of the immediate parent of each subxid, only the
 515          * top-level transaction that each subxact belongs to. This is correct in
 516          * recovery only because aborted subtransactions are separately WAL
 517          * logged.
 518          */
 519         if (isSubXact && XLogStandbyInfoActive())
 520         {
 521                 unreportedXids[nUnreportedXids] = s->transactionId;
 522                 nUnreportedXids++;
 523
 524                 /*
 525                  * ensure this test matches similar one in
 526                  * RecoverPreparedTransactions()
 527                  */
 528                 if (nUnreportedXids >= PGPROC_MAX_CACHED_SUBXIDS)
 529                 {
 530                         XLogRecData rdata[2];
 531                         xl_xact_assignment xlrec;
 532
 533                         /*
 534                          * xtop is always set by now because we recurse up transaction
 535                          * stack to the highest unassigned xid and then come back down
 536                          */
 537                         xlrec.xtop = GetTopTransactionId();
 538                         Assert(TransactionIdIsValid(xlrec.xtop));
 539                         xlrec.nsubxacts = nUnreportedXids;
 540
 541                         rdata[0].data = (char *) &xlrec;
 542                         rdata[0].len = MinSizeOfXactAssignment;
 543                         rdata[0].buffer = InvalidBuffer;
 544                         rdata[0].next = &rdata[1];
 545
 546                         rdata[1].data = (char *) unreportedXids;
 547                         rdata[1].len = PGPROC_MAX_CACHED_SUBXIDS * sizeof(TransactionId);
 548                         rdata[1].buffer = InvalidBuffer;
 549                         rdata[1].next = NULL;
 550
 551                         (void) XLogInsert(RM_XACT_ID, XLOG_XACT_ASSIGNMENT, rdata);
 552
 553                         nUnreportedXids = 0;
 554                 }
 555         }
 556 }
 557
 558 /*
 559  *      GetCurrentSubTransactionId
 560  */
 561 SubTransactionId
 562 GetCurrentSubTransactionId(void)
 563 {
 564         TransactionState s = CurrentTransactionState;
 565
 566         return s->subTransactionId;
 567 }
 568
 569
 570 /*
 571  *      GetCurrentCommandId
 572  *
 573  * "used" must be TRUE if the caller intends to use the command ID to mark
 574  * inserted/updated/deleted tuples.  FALSE means the ID is being fetched
 575  * for read-only purposes (ie, as a snapshot validity cutoff).  See
 576  * CommandCounterIncrement() for discussion.
 577  */
 578 CommandId
 579 GetCurrentCommandId(bool used)
 580 {
 581         /* this is global to a transaction, not subtransaction-local */
 582         if (used)
 583                 currentCommandIdUsed = true;
 584         return currentCommandId;
 585 }
 586
 587 /*
 588  *      GetCurrentTransactionStartTimestamp
 589  */
 590 TimestampTz
 591 GetCurrentTransactionStartTimestamp(void)
 592 {
 593         return xactStartTimestamp;
 594 }
 595
 596 /*
 597  *      GetCurrentStatementStartTimestamp
 598  */
 599 TimestampTz
 600 GetCurrentStatementStartTimestamp(void)
 601 {
 602         return stmtStartTimestamp;
 603 }
 604
 605 /*
 606  *      GetCurrentTransactionStopTimestamp
 607  *
 608  * We return current time if the transaction stop time hasn't been set
 609  * (which can happen if we decide we don't need to log an XLOG record).
 610  */
 611 TimestampTz
 612 GetCurrentTransactionStopTimestamp(void)
 613 {
 614         if (xactStopTimestamp != 0)
 615                 return xactStopTimestamp;
 616         return GetCurrentTimestamp();
 617 }
 618
 619 /*
 620  *      SetCurrentStatementStartTimestamp
 621  */
 622 void
 623 SetCurrentStatementStartTimestamp(void)
 624 {
 625         stmtStartTimestamp = GetCurrentTimestamp();
 626 }
 627
 628 /*
 629  *      SetCurrentTransactionStopTimestamp
 630  */
 631 static inline void
 632 SetCurrentTransactionStopTimestamp(void)
 633 {
 634         xactStopTimestamp = GetCurrentTimestamp();
 635 }
 636
 637 /*
 638  *      GetCurrentTransactionNestLevel
 639  *
 640  * Note: this will return zero when not inside any transaction, one when
 641  * inside a top-level transaction, etc.
 642  */
 643 int
 644 GetCurrentTransactionNestLevel(void)
 645 {
 646         TransactionState s = CurrentTransactionState;
 647
 648         return s->nestingLevel;
 649 }
 650
 651
 652 /*
 653  *      TransactionIdIsCurrentTransactionId
 654  */
 655 bool
 656 TransactionIdIsCurrentTransactionId(TransactionId xid)
 657 {
 658         TransactionState s;
 659
 660         /*
 661          * We always say that BootstrapTransactionId is "not my transaction ID"
 662          * even when it is (ie, during bootstrap).      Along with the fact that
 663          * transam.c always treats BootstrapTransactionId as already committed,
 664          * this causes the tqual.c routines to see all tuples as committed, which
 665          * is what we need during bootstrap.  (Bootstrap mode only inserts tuples,
 666          * it never updates or deletes them, so all tuples can be presumed good
 667          * immediately.)
 668          *
 669          * Likewise, InvalidTransactionId and FrozenTransactionId are certainly
 670          * not my transaction ID, so we can just return "false" immediately for
 671          * any non-normal XID.
 672          */
 673         if (!TransactionIdIsNormal(xid))
 674                 return false;
 675
 676         /*
 677          * We will return true for the Xid of the current subtransaction, any of
 678          * its subcommitted children, any of its parents, or any of their
 679          * previously subcommitted children.  However, a transaction being aborted
 680          * is no longer "current", even though it may still have an entry on the
 681          * state stack.
 682          */
 683         for (s = CurrentTransactionState; s != NULL; s = s->parent)
 684         {
 685                 int                     low,
 686                                         high;
 687
 688                 if (s->state == TRANS_ABORT)
 689                         continue;
 690                 if (!TransactionIdIsValid(s->transactionId))
 691                         continue;                       /* it can't have any child XIDs either */
 692                 if (TransactionIdEquals(xid, s->transactionId))
 693                         return true;
 694                 /* As the childXids array is ordered, we can use binary search */
 695                 low = 0;
 696                 high = s->nChildXids - 1;
 697                 while (low <= high)
 698                 {
 699                         int                     middle;
 700                         TransactionId probe;
 701
 702                         middle = low + (high - low) / 2;
 703                         probe = s->childXids[middle];
 704                         if (TransactionIdEquals(probe, xid))
 705                                 return true;
 706                         else if (TransactionIdPrecedes(probe, xid))
 707                                 low = middle + 1;
 708                         else
 709                                 high = middle - 1;
 710                 }
 711         }
 712
 713         return false;
 714 }
 715
 716 /*
 717  *      TransactionStartedDuringRecovery
 718  *
 719  * Returns true if the current transaction started while recovery was still
 720  * in progress. Recovery might have ended since so RecoveryInProgress() might
 721  * return false already.
 722  */
 723 bool
 724 TransactionStartedDuringRecovery(void)
 725 {
 726         return CurrentTransactionState->startedInRecovery;
 727 }
 728
 729 /*
 730  *      CommandCounterIncrement
 731  */
 732 void
 733 CommandCounterIncrement(void)
 734 {
 735         /*
 736          * If the current value of the command counter hasn't been "used" to mark
 737          * tuples, we need not increment it, since there's no need to distinguish
 738          * a read-only command from others.  This helps postpone command counter
 739          * overflow, and keeps no-op CommandCounterIncrement operations cheap.
 740          */
 741         if (currentCommandIdUsed)
 742         {
 743                 currentCommandId += 1;
 744                 if (currentCommandId == FirstCommandId) /* check for overflow */
 745                 {
 746                         currentCommandId -= 1;
 747                         ereport(ERROR,
 748                                         (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
 749                                          errmsg("cannot have more than 2^32-1 commands in a transaction")));
 750                 }
 751                 currentCommandIdUsed = false;
 752
 753                 /* Propagate new command ID into static snapshots */
 754                 SnapshotSetCommandId(currentCommandId);
 755
 756                 /*
 757                  * Make any catalog changes done by the just-completed command visible
 758                  * in the local syscache.  We obviously don't need to do this after a
 759                  * read-only command.  (But see hacks in inval.c to make real sure we
 760                  * don't think a command that queued inval messages was read-only.)
 761                  */
 762                 AtCCI_LocalCache();
 763         }
 764 }
 765
 766 /*
 767  * ForceSyncCommit
 768  *
 769  * Interface routine to allow commands to force a synchronous commit of the
 770  * current top-level transaction
 771  */
 772 void
 773 ForceSyncCommit(void)
 774 {
 775         forceSyncCommit = true;
 776 }
 777
 778
 779 /* ----------------------------------------------------------------
 780  *                                              StartTransaction stuff
 781  * ----------------------------------------------------------------
 782  */
 783
 784 /*
 785  *      AtStart_Cache
 786  */
 787 static void
 788 AtStart_Cache(void)
 789 {
 790         AcceptInvalidationMessages();
 791 }
 792
 793 /*
 794  *      AtStart_Memory
 795  */
 796 static void
 797 AtStart_Memory(void)
 798 {
 799         TransactionState s = CurrentTransactionState;
 800
 801         /*
 802          * If this is the first time through, create a private context for
 803          * AbortTransaction to work in.  By reserving some space now, we can
 804          * insulate AbortTransaction from out-of-memory scenarios.      Like
 805          * ErrorContext, we set it up with slow growth rate and a nonzero minimum
 806          * size, so that space will be reserved immediately.
 807          */
 808         if (TransactionAbortContext == NULL)
 809                 TransactionAbortContext =
 810                         AllocSetContextCreate(TopMemoryContext,
 811                                                                   "TransactionAbortContext",
 812                                                                   32 * 1024,
 813                                                                   32 * 1024,
 814                                                                   32 * 1024);
 815
 816         /*
 817          * We shouldn't have a transaction context already.
 818          */
 819         Assert(TopTransactionContext == NULL);
 820
 821         /*
 822          * Create a toplevel context for the transaction.
 823          */
 824         TopTransactionContext =
 825                 AllocSetContextCreate(TopMemoryContext,
 826                                                           "TopTransactionContext",
 827                                                           ALLOCSET_DEFAULT_MINSIZE,
 828                                                           ALLOCSET_DEFAULT_INITSIZE,
 829                                                           ALLOCSET_DEFAULT_MAXSIZE);
 830
 831         /*
 832          * In a top-level transaction, CurTransactionContext is the same as
 833          * TopTransactionContext.
 834          */
 835         CurTransactionContext = TopTransactionContext;
 836         s->curTransactionContext = CurTransactionContext;
 837
 838         /* Make the CurTransactionContext active. */
 839         MemoryContextSwitchTo(CurTransactionContext);
 840 }
 841
 842 /*
 843  *      AtStart_ResourceOwner
 844  */
 845 static void
 846 AtStart_ResourceOwner(void)
 847 {
 848         TransactionState s = CurrentTransactionState;
 849
 850         /*
 851          * We shouldn't have a transaction resource owner already.
 852          */
 853         Assert(TopTransactionResourceOwner == NULL);
 854
 855         /*
 856          * Create a toplevel resource owner for the transaction.
 857          */
 858         s->curTransactionOwner = ResourceOwnerCreate(NULL, "TopTransaction");
 859
 860         TopTransactionResourceOwner = s->curTransactionOwner;
 861         CurTransactionResourceOwner = s->curTransactionOwner;
 862         CurrentResourceOwner = s->curTransactionOwner;
 863 }
 864
 865 /* ----------------------------------------------------------------
 866  *                                              StartSubTransaction stuff
 867  * ----------------------------------------------------------------
 868  */
 869
 870 /*
 871  * AtSubStart_Memory
 872  */
 873 static void
 874 AtSubStart_Memory(void)
 875 {
 876         TransactionState s = CurrentTransactionState;
 877
 878         Assert(CurTransactionContext != NULL);
 879
 880         /*
 881          * Create a CurTransactionContext, which will be used to hold data that
 882          * survives subtransaction commit but disappears on subtransaction abort.
 883          * We make it a child of the immediate parent's CurTransactionContext.
 884          */
 885         CurTransactionContext = AllocSetContextCreate(CurTransactionContext,
 886                                                                                                   "CurTransactionContext",
 887                                                                                                   ALLOCSET_DEFAULT_MINSIZE,
 888                                                                                                   ALLOCSET_DEFAULT_INITSIZE,
 889                                                                                                   ALLOCSET_DEFAULT_MAXSIZE);
 890         s->curTransactionContext = CurTransactionContext;
 891
 892         /* Make the CurTransactionContext active. */
 893         MemoryContextSwitchTo(CurTransactionContext);
 894 }
 895
 896 /*
 897  * AtSubStart_ResourceOwner
 898  */
 899 static void
 900 AtSubStart_ResourceOwner(void)
 901 {
 902         TransactionState s = CurrentTransactionState;
 903
 904         Assert(s->parent != NULL);
 905
 906         /*
 907          * Create a resource owner for the subtransaction.      We make it a child of
 908          * the immediate parent's resource owner.
 909          */
 910         s->curTransactionOwner =
 911                 ResourceOwnerCreate(s->parent->curTransactionOwner,
 912                                                         "SubTransaction");
 913
 914         CurTransactionResourceOwner = s->curTransactionOwner;
 915         CurrentResourceOwner = s->curTransactionOwner;
 916 }
 917
 918 /* ----------------------------------------------------------------
 919  *                                              CommitTransaction stuff
 920  * ----------------------------------------------------------------
 921  */
 922
 923 /*
 924  *      RecordTransactionCommit
 925  *
 926  * Returns latest XID among xact and its children, or InvalidTransactionId
 927  * if the xact has no XID.      (We compute that here just because it's easier.)
 928  */
 929 static TransactionId
 930 RecordTransactionCommit(void)
 931 {
 932         TransactionId xid = GetTopTransactionIdIfAny();
 933         bool            markXidCommitted = TransactionIdIsValid(xid);
 934         TransactionId latestXid = InvalidTransactionId;
 935         int                     nrels;
 936         RelFileNode *rels;
 937         int                     nchildren;
 938         TransactionId *children;
 939         int                     nmsgs = 0;
 940         SharedInvalidationMessage *invalMessages = NULL;
 941         bool            RelcacheInitFileInval = false;
 942         bool            wrote_xlog;
 943
 944         /* Get data needed for commit record */
 945         nrels = smgrGetPendingDeletes(true, &rels);
 946         nchildren = xactGetCommittedChildren(&children);
 947         if (XLogStandbyInfoActive())
 948                 nmsgs = xactGetCommittedInvalidationMessages(&invalMessages,
 949                                                                                                          &RelcacheInitFileInval);
 950         wrote_xlog = (XactLastRecEnd.xrecoff != 0);
 951
 952         /*
 953          * If we haven't been assigned an XID yet, we neither can, nor do we want
 954          * to write a COMMIT record.
 955          */
 956         if (!markXidCommitted)
 957         {
 958                 /*
 959                  * We expect that every smgrscheduleunlink is followed by a catalog
 960                  * update, and hence XID assignment, so we shouldn't get here with any
 961                  * pending deletes.  Use a real test not just an Assert to check this,
 962                  * since it's a bit fragile.
 963                  */
 964                 if (nrels != 0)
 965                         elog(ERROR, "cannot commit a transaction that deleted files but has no xid");
 966
 967                 /* Can't have child XIDs either; AssignTransactionId enforces this */
 968                 Assert(nchildren == 0);
 969
 970                 /*
 971                  * If we didn't create XLOG entries, we're done here; otherwise we
 972                  * should flush those entries the same as a commit record.      (An
 973                  * example of a possible record that wouldn't cause an XID to be
 974                  * assigned is a sequence advance record due to nextval() --- we want
 975                  * to flush that to disk before reporting commit.)
 976                  */
 977                 if (!wrote_xlog)
 978                         goto cleanup;
 979         }
 980         else
 981         {
 982                 /*
 983                  * Begin commit critical section and insert the commit XLOG record.
 984                  */
 985                 /* Tell bufmgr and smgr to prepare for commit */
 986                 BufmgrCommit();
 987
 988                 /*
 989                  * Mark ourselves as within our "commit critical section".      This
 990                  * forces any concurrent checkpoint to wait until we've updated
 991                  * pg_clog.  Without this, it is possible for the checkpoint to set
 992                  * REDO after the XLOG record but fail to flush the pg_clog update to
 993                  * disk, leading to loss of the transaction commit if the system
 994                  * crashes a little later.
 995                  *
 996                  * Note: we could, but don't bother to, set this flag in
 997                  * RecordTransactionAbort.      That's because loss of a transaction abort
 998                  * is noncritical; the presumption would be that it aborted, anyway.
 999                  *
1000                  * It's safe to change the inCommit flag of our own backend without
1001                  * holding the ProcArrayLock, since we're the only one modifying it.
1002                  * This makes checkpoint's determination of which xacts are inCommit a
1003                  * bit fuzzy, but it doesn't matter.
1004                  */
1005                 START_CRIT_SECTION();
1006                 MyPgXact->inCommit = true;
1007
1008                 SetCurrentTransactionStopTimestamp();
1009
1010                 /*
1011                  * Do we need the long commit record? If not, use the compact format.
1012                  */
1013                 if (nrels > 0 || nmsgs > 0 || RelcacheInitFileInval || forceSyncCommit)
1014                 {
1015                         XLogRecData rdata[4];
1016                         int                     lastrdata = 0;
1017                         xl_xact_commit xlrec;
1018                         /*
1019                          * Set flags required for recovery processing of commits.
1020                          */
1021                         xlrec.xinfo = 0;
1022                         if (RelcacheInitFileInval)
1023                                 xlrec.xinfo |= XACT_COMPLETION_UPDATE_RELCACHE_FILE;
1024                         if (forceSyncCommit)
1025                                 xlrec.xinfo |= XACT_COMPLETION_FORCE_SYNC_COMMIT;
1026
1027                         xlrec.dbId = MyDatabaseId;
1028                         xlrec.tsId = MyDatabaseTableSpace;
1029
1030                         xlrec.xact_time = xactStopTimestamp;
1031                         xlrec.nrels = nrels;
1032                         xlrec.nsubxacts = nchildren;
1033                         xlrec.nmsgs = nmsgs;
1034                         rdata[0].data = (char *) (&xlrec);
1035                         rdata[0].len = MinSizeOfXactCommit;
1036                         rdata[0].buffer = InvalidBuffer;
1037                         /* dump rels to delete */
1038                         if (nrels > 0)
1039                         {
1040                                 rdata[0].next = &(rdata[1]);
1041                                 rdata[1].data = (char *) rels;
1042                                 rdata[1].len = nrels * sizeof(RelFileNode);
1043                                 rdata[1].buffer = InvalidBuffer;
1044                                 lastrdata = 1;
1045                         }
1046                         /* dump committed child Xids */
1047                         if (nchildren > 0)
1048                         {
1049                                 rdata[lastrdata].next = &(rdata[2]);
1050                                 rdata[2].data = (char *) children;
1051                                 rdata[2].len = nchildren * sizeof(TransactionId);
1052                                 rdata[2].buffer = InvalidBuffer;
1053                                 lastrdata = 2;
1054                         }
1055                         /* dump shared cache invalidation messages */
1056                         if (nmsgs > 0)
1057                         {
1058                                 rdata[lastrdata].next = &(rdata[3]);
1059                                 rdata[3].data = (char *) invalMessages;
1060                                 rdata[3].len = nmsgs * sizeof(SharedInvalidationMessage);
1061                                 rdata[3].buffer = InvalidBuffer;
1062                                 lastrdata = 3;
1063                         }
1064                         rdata[lastrdata].next = NULL;
1065
1066                         (void) XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT, rdata);
1067                 }
1068                 else
1069                 {
1070                         XLogRecData rdata[2];
1071                         int                     lastrdata = 0;
1072                         xl_xact_commit_compact  xlrec;
1073                         xlrec.xact_time = xactStopTimestamp;
1074                         xlrec.nsubxacts = nchildren;
1075                         rdata[0].data = (char *) (&xlrec);
1076                         rdata[0].len = MinSizeOfXactCommitCompact;
1077                         rdata[0].buffer = InvalidBuffer;
1078                         /* dump committed child Xids */
1079                         if (nchildren > 0)
1080                         {
1081                                 rdata[0].next = &(rdata[1]);
1082                                 rdata[1].data = (char *) children;
1083                                 rdata[1].len = nchildren * sizeof(TransactionId);
1084                                 rdata[1].buffer = InvalidBuffer;
1085                                 lastrdata = 1;
1086                         }
1087                         rdata[lastrdata].next = NULL;
1088
1089                         (void) XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT_COMPACT, rdata);
1090                 }
1091         }
1092
1093         /*
1094          * Check if we want to commit asynchronously.  We can allow the XLOG flush
1095          * to happen asynchronously if synchronous_commit=off, or if the current
1096          * transaction has not performed any WAL-logged operation.      The latter
1097          * case can arise if the current transaction wrote only to temporary
1098          * and/or unlogged tables.      In case of a crash, the loss of such a
1099          * transaction will be irrelevant since temp tables will be lost anyway,
1100          * and unlogged tables will be truncated.  (Given the foregoing, you might
1101          * think that it would be unnecessary to emit the XLOG record at all in
1102          * this case, but we don't currently try to do that.  It would certainly
1103          * cause problems at least in Hot Standby mode, where the
1104          * KnownAssignedXids machinery requires tracking every XID assignment.  It
1105          * might be OK to skip it only when wal_level < hot_standby, but for now
1106          * we don't.)
1107          *
1108          * However, if we're doing cleanup of any non-temp rels or committing any
1109          * command that wanted to force sync commit, then we must flush XLOG
1110          * immediately.  (We must not allow asynchronous commit if there are any
1111          * non-temp tables to be deleted, because we might delete the files before
1112          * the COMMIT record is flushed to disk.  We do allow asynchronous commit
1113          * if all to-be-deleted tables are temporary though, since they are lost
1114          * anyway if we crash.)
1115          */
1116         if ((wrote_xlog && synchronous_commit > SYNCHRONOUS_COMMIT_OFF) ||
1117                 forceSyncCommit || nrels > 0)
1118         {
1119                 /*
1120                  * Synchronous commit case:
1121                  *
1122                  * Sleep before flush! So we can flush more than one commit records
1123                  * per single fsync.  (The idea is some other backend may do the
1124                  * XLogFlush while we're sleeping.  This needs work still, because on
1125                  * most Unixen, the minimum select() delay is 10msec or more, which is
1126                  * way too long.)
1127                  *
1128                  * We do not sleep if enableFsync is not turned on, nor if there are
1129                  * fewer than CommitSiblings other backends with active transactions.
1130                  */
1131                 if (CommitDelay > 0 && enableFsync &&
1132                         MinimumActiveBackends(CommitSiblings))
1133                         pg_usleep(CommitDelay);
1134
1135                 XLogFlush(XactLastRecEnd);
1136
1137                 /*
1138                  * Wake up all walsenders to send WAL up to the COMMIT record
1139                  * immediately if replication is enabled
1140                  */
1141                 if (max_wal_senders > 0)
1142                         WalSndWakeup();
1143
1144                 /*
1145                  * Now we may update the CLOG, if we wrote a COMMIT record above
1146                  */
1147                 if (markXidCommitted)
1148                         TransactionIdCommitTree(xid, nchildren, children);
1149         }
1150         else
1151         {
1152                 /*
1153                  * Asynchronous commit case:
1154                  *
1155                  * This enables possible committed transaction loss in the case of a
1156                  * postmaster crash because WAL buffers are left unwritten. Ideally we
1157                  * could issue the WAL write without the fsync, but some
1158                  * wal_sync_methods do not allow separate write/fsync.
1159                  *
1160                  * Report the latest async commit LSN, so that the WAL writer knows to
1161                  * flush this commit.
1162                  */
1163                 XLogSetAsyncXactLSN(XactLastRecEnd);
1164
1165                 /*
1166                  * We must not immediately update the CLOG, since we didn't flush the
1167                  * XLOG. Instead, we store the LSN up to which the XLOG must be
1168                  * flushed before the CLOG may be updated.
1169                  */
1170                 if (markXidCommitted)
1171                         TransactionIdAsyncCommitTree(xid, nchildren, children, XactLastRecEnd);
1172         }
1173
1174         /*
1175          * If we entered a commit critical section, leave it now, and let
1176          * checkpoints proceed.
1177          */
1178         if (markXidCommitted)
1179         {
1180                 MyPgXact->inCommit = false;
1181                 END_CRIT_SECTION();
1182         }
1183
1184         /* Compute latestXid while we have the child XIDs handy */
1185         latestXid = TransactionIdLatest(xid, nchildren, children);
1186
1187         /*
1188          * Wait for synchronous replication, if required.
1189          *
1190          * Note that at this stage we have marked clog, but still show as running
1191          * in the procarray and continue to hold locks.
1192          */
1193         if (wrote_xlog)
1194                 SyncRepWaitForLSN(XactLastRecEnd);
1195
1196         /* Reset XactLastRecEnd until the next transaction writes something */
1197         XactLastRecEnd.xrecoff = 0;
1198
1199 cleanup:
1200         /* Clean up local data */
1201         if (rels)
1202                 pfree(rels);
1203
1204         return latestXid;
1205 }
1206
1207
1208 /*
1209  *      AtCCI_LocalCache
1210  */
1211 static void
1212 AtCCI_LocalCache(void)
1213 {
1214         /*
1215          * Make any pending relation map changes visible.  We must do this before
1216          * processing local sinval messages, so that the map changes will get
1217          * reflected into the relcache when relcache invals are processed.
1218          */
1219         AtCCI_RelationMap();
1220
1221         /*
1222          * Make catalog changes visible to me for the next command.
1223          */
1224         CommandEndInvalidationMessages();
1225 }
1226
1227 /*
1228  *      AtCommit_Memory
1229  */
1230 static void
1231 AtCommit_Memory(void)
1232 {
1233         /*
1234          * Now that we're "out" of a transaction, have the system allocate things
1235          * in the top memory context instead of per-transaction contexts.
1236          */
1237         MemoryContextSwitchTo(TopMemoryContext);
1238
1239         /*
1240          * Release all transaction-local memory.
1241          */
1242         Assert(TopTransactionContext != NULL);
1243         MemoryContextDelete(TopTransactionContext);
1244         TopTransactionContext = NULL;
1245         CurTransactionContext = NULL;
1246         CurrentTransactionState->curTransactionContext = NULL;
1247 }
1248
1249 /* ----------------------------------------------------------------
1250  *                                              CommitSubTransaction stuff
1251  * ----------------------------------------------------------------
1252  */
1253
1254 /*
1255  * AtSubCommit_Memory
1256  */
1257 static void
1258 AtSubCommit_Memory(void)
1259 {
1260         TransactionState s = CurrentTransactionState;
1261
1262         Assert(s->parent != NULL);
1263
1264         /* Return to parent transaction level's memory context. */
1265         CurTransactionContext = s->parent->curTransactionContext;
1266         MemoryContextSwitchTo(CurTransactionContext);
1267
1268         /*
1269          * Ordinarily we cannot throw away the child's CurTransactionContext,
1270          * since the data it contains will be needed at upper commit.  However, if
1271          * there isn't actually anything in it, we can throw it away.  This avoids
1272          * a small memory leak in the common case of "trivial" subxacts.
1273          */
1274         if (MemoryContextIsEmpty(s->curTransactionContext))
1275         {
1276                 MemoryContextDelete(s->curTransactionContext);
1277                 s->curTransactionContext = NULL;
1278         }
1279 }
1280
1281 /*
1282  * AtSubCommit_childXids
1283  *
1284  * Pass my own XID and my child XIDs up to my parent as committed children.
1285  */
1286 static void
1287 AtSubCommit_childXids(void)
1288 {
1289         TransactionState s = CurrentTransactionState;
1290         int                     new_nChildXids;
1291
1292         Assert(s->parent != NULL);
1293
1294         /*
1295          * The parent childXids array will need to hold my XID and all my
1296          * childXids, in addition to the XIDs already there.
1297          */
1298         new_nChildXids = s->parent->nChildXids + s->nChildXids + 1;
1299
1300         /* Allocate or enlarge the parent array if necessary */
1301         if (s->parent->maxChildXids < new_nChildXids)
1302         {
1303                 int                     new_maxChildXids;
1304                 TransactionId *new_childXids;
1305
1306                 /*
1307                  * Make it 2x what's needed right now, to avoid having to enlarge it
1308                  * repeatedly. But we can't go above MaxAllocSize.  (The latter limit
1309                  * is what ensures that we don't need to worry about integer overflow
1310                  * here or in the calculation of new_nChildXids.)
1311                  */
1312                 new_maxChildXids = Min(new_nChildXids * 2,
1313                                                            (int) (MaxAllocSize / sizeof(TransactionId)));
1314
1315                 if (new_maxChildXids < new_nChildXids)
1316                         ereport(ERROR,
1317                                         (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1318                                          errmsg("maximum number of committed subtransactions (%d) exceeded",
1319                                                         (int) (MaxAllocSize / sizeof(TransactionId)))));
1320
1321                 /*
1322                  * We keep the child-XID arrays in TopTransactionContext; this avoids
1323                  * setting up child-transaction contexts for what might be just a few
1324                  * bytes of grandchild XIDs.
1325                  */
1326                 if (s->parent->childXids == NULL)
1327                         new_childXids =
1328                                 MemoryContextAlloc(TopTransactionContext,
1329                                                                    new_maxChildXids * sizeof(TransactionId));
1330                 else
1331                         new_childXids = repalloc(s->parent->childXids,
1332                                                                    new_maxChildXids * sizeof(TransactionId));
1333
1334                 s->parent->childXids = new_childXids;
1335                 s->parent->maxChildXids = new_maxChildXids;
1336         }
1337
1338         /*
1339          * Copy all my XIDs to parent's array.
1340          *
1341          * Note: We rely on the fact that the XID of a child always follows that
1342          * of its parent.  By copying the XID of this subtransaction before the
1343          * XIDs of its children, we ensure that the array stays ordered. Likewise,
1344          * all XIDs already in the array belong to subtransactions started and
1345          * subcommitted before us, so their XIDs must precede ours.
1346          */
1347         s->parent->childXids[s->parent->nChildXids] = s->transactionId;
1348
1349         if (s->nChildXids > 0)
1350                 memcpy(&s->parent->childXids[s->parent->nChildXids + 1],
1351                            s->childXids,
1352                            s->nChildXids * sizeof(TransactionId));
1353
1354         s->parent->nChildXids = new_nChildXids;
1355
1356         /* Release child's array to avoid leakage */
1357         if (s->childXids != NULL)
1358                 pfree(s->childXids);
1359         /* We must reset these to avoid double-free if fail later in commit */
1360         s->childXids = NULL;
1361         s->nChildXids = 0;
1362         s->maxChildXids = 0;
1363 }
1364
1365 /* ----------------------------------------------------------------
1366  *                                              AbortTransaction stuff
1367  * ----------------------------------------------------------------
1368  */
1369
1370 /*
1371  *      RecordTransactionAbort
1372  *
1373  * Returns latest XID among xact and its children, or InvalidTransactionId
1374  * if the xact has no XID.      (We compute that here just because it's easier.)
1375  */
1376 static TransactionId
1377 RecordTransactionAbort(bool isSubXact)
1378 {
1379         TransactionId xid = GetCurrentTransactionIdIfAny();
1380         TransactionId latestXid;
1381         int                     nrels;
1382         RelFileNode *rels;
1383         int                     nchildren;
1384         TransactionId *children;
1385         XLogRecData rdata[3];
1386         int                     lastrdata = 0;
1387         xl_xact_abort xlrec;
1388
1389         /*
1390          * If we haven't been assigned an XID, nobody will care whether we aborted
1391          * or not.      Hence, we're done in that case.  It does not matter if we have
1392          * rels to delete (note that this routine is not responsible for actually
1393          * deleting 'em).  We cannot have any child XIDs, either.
1394          */
1395         if (!TransactionIdIsValid(xid))
1396         {
1397                 /* Reset XactLastRecEnd until the next transaction writes something */
1398                 if (!isSubXact)
1399                         XactLastRecEnd.xrecoff = 0;
1400                 return InvalidTransactionId;
1401         }
1402
1403         /*
1404          * We have a valid XID, so we should write an ABORT record for it.
1405          *
1406          * We do not flush XLOG to disk here, since the default assumption after a
1407          * crash would be that we aborted, anyway.      For the same reason, we don't
1408          * need to worry about interlocking against checkpoint start.
1409          */
1410
1411         /*
1412          * Check that we haven't aborted halfway through RecordTransactionCommit.
1413          */
1414         if (TransactionIdDidCommit(xid))
1415                 elog(PANIC, "cannot abort transaction %u, it was already committed",
1416                          xid);
1417
1418         /* Fetch the data we need for the abort record */
1419         nrels = smgrGetPendingDeletes(false, &rels);
1420         nchildren = xactGetCommittedChildren(&children);
1421
1422         /* XXX do we really need a critical section here? */
1423         START_CRIT_SECTION();
1424
1425         /* Write the ABORT record */
1426         if (isSubXact)
1427                 xlrec.xact_time = GetCurrentTimestamp();
1428         else
1429         {
1430                 SetCurrentTransactionStopTimestamp();
1431                 xlrec.xact_time = xactStopTimestamp;
1432         }
1433         xlrec.nrels = nrels;
1434         xlrec.nsubxacts = nchildren;
1435         rdata[0].data = (char *) (&xlrec);
1436         rdata[0].len = MinSizeOfXactAbort;
1437         rdata[0].buffer = InvalidBuffer;
1438         /* dump rels to delete */
1439         if (nrels > 0)
1440         {
1441                 rdata[0].next = &(rdata[1]);
1442                 rdata[1].data = (char *) rels;
1443                 rdata[1].len = nrels * sizeof(RelFileNode);
1444                 rdata[1].buffer = InvalidBuffer;
1445                 lastrdata = 1;
1446         }
1447         /* dump committed child Xids */
1448         if (nchildren > 0)
1449         {
1450                 rdata[lastrdata].next = &(rdata[2]);
1451                 rdata[2].data = (char *) children;
1452                 rdata[2].len = nchildren * sizeof(TransactionId);
1453                 rdata[2].buffer = InvalidBuffer;
1454                 lastrdata = 2;
1455         }
1456         rdata[lastrdata].next = NULL;
1457
1458         (void) XLogInsert(RM_XACT_ID, XLOG_XACT_ABORT, rdata);
1459
1460         /*
1461          * Report the latest async abort LSN, so that the WAL writer knows to
1462          * flush this abort. There's nothing to be gained by delaying this, since
1463          * WALWriter may as well do this when it can. This is important with
1464          * streaming replication because if we don't flush WAL regularly we will
1465          * find that large aborts leave us with a long backlog for when commits
1466          * occur after the abort, increasing our window of data loss should
1467          * problems occur at that point.
1468          */
1469         if (!isSubXact)
1470                 XLogSetAsyncXactLSN(XactLastRecEnd);
1471
1472         /*
1473          * Mark the transaction aborted in clog.  This is not absolutely necessary
1474          * but we may as well do it while we are here; also, in the subxact case
1475          * it is helpful because XactLockTableWait makes use of it to avoid
1476          * waiting for already-aborted subtransactions.  It is OK to do it without
1477          * having flushed the ABORT record to disk, because in event of a crash
1478          * we'd be assumed to have aborted anyway.
1479          */
1480         TransactionIdAbortTree(xid, nchildren, children);
1481
1482         END_CRIT_SECTION();
1483
1484         /* Compute latestXid while we have the child XIDs handy */
1485         latestXid = TransactionIdLatest(xid, nchildren, children);
1486
1487         /*
1488          * If we're aborting a subtransaction, we can immediately remove failed
1489          * XIDs from PGPROC's cache of running child XIDs.  We do that here for
1490          * subxacts, because we already have the child XID array at hand.  For
1491          * main xacts, the equivalent happens just after this function returns.
1492          */
1493         if (isSubXact)
1494                 XidCacheRemoveRunningXids(xid, nchildren, children, latestXid);
1495
1496         /* Reset XactLastRecEnd until the next transaction writes something */
1497         if (!isSubXact)
1498                 XactLastRecEnd.xrecoff = 0;
1499
1500         /* And clean up local data */
1501         if (rels)
1502                 pfree(rels);
1503
1504         return latestXid;
1505 }
1506
1507 /*
1508  *      AtAbort_Memory
1509  */
1510 static void
1511 AtAbort_Memory(void)
1512 {
1513         /*
1514          * Switch into TransactionAbortContext, which should have some free space
1515          * even if nothing else does.  We'll work in this context until we've
1516          * finished cleaning up.
1517          *
1518          * It is barely possible to get here when we've not been able to create
1519          * TransactionAbortContext yet; if so use TopMemoryContext.
1520          */
1521         if (TransactionAbortContext != NULL)
1522                 MemoryContextSwitchTo(TransactionAbortContext);
1523         else
1524                 MemoryContextSwitchTo(TopMemoryContext);
1525 }
1526
1527 /*
1528  * AtSubAbort_Memory
1529  */
1530 static void
1531 AtSubAbort_Memory(void)
1532 {
1533         Assert(TransactionAbortContext != NULL);
1534
1535         MemoryContextSwitchTo(TransactionAbortContext);
1536 }
1537
1538
1539 /*
1540  *      AtAbort_ResourceOwner
1541  */
1542 static void
1543 AtAbort_ResourceOwner(void)
1544 {
1545         /*
1546          * Make sure we have a valid ResourceOwner, if possible (else it will be
1547          * NULL, which is OK)
1548          */
1549         CurrentResourceOwner = TopTransactionResourceOwner;
1550 }
1551
1552 /*
1553  * AtSubAbort_ResourceOwner
1554  */
1555 static void
1556 AtSubAbort_ResourceOwner(void)
1557 {
1558         TransactionState s = CurrentTransactionState;
1559
1560         /* Make sure we have a valid ResourceOwner */
1561         CurrentResourceOwner = s->curTransactionOwner;
1562 }
1563
1564
1565 /*
1566  * AtSubAbort_childXids
1567  */
1568 static void
1569 AtSubAbort_childXids(void)
1570 {
1571         TransactionState s = CurrentTransactionState;
1572
1573         /*
1574          * We keep the child-XID arrays in TopTransactionContext (see
1575          * AtSubCommit_childXids).      This means we'd better free the array
1576          * explicitly at abort to avoid leakage.
1577          */
1578         if (s->childXids != NULL)
1579                 pfree(s->childXids);
1580         s->childXids = NULL;
1581         s->nChildXids = 0;
1582         s->maxChildXids = 0;
1583
1584         /*
1585          * We could prune the unreportedXids array here. But we don't bother. That
1586          * would potentially reduce number of XLOG_XACT_ASSIGNMENT records but it
1587          * would likely introduce more CPU time into the more common paths, so we
1588          * choose not to do that.
1589          */
1590 }
1591
1592 /* ----------------------------------------------------------------
1593  *                                              CleanupTransaction stuff
1594  * ----------------------------------------------------------------
1595  */
1596
1597 /*
1598  *      AtCleanup_Memory
1599  */
1600 static void
1601 AtCleanup_Memory(void)
1602 {
1603         Assert(CurrentTransactionState->parent == NULL);
1604
1605         /*
1606          * Now that we're "out" of a transaction, have the system allocate things
1607          * in the top memory context instead of per-transaction contexts.
1608          */
1609         MemoryContextSwitchTo(TopMemoryContext);
1610
1611         /*
1612          * Clear the special abort context for next time.
1613          */
1614         if (TransactionAbortContext != NULL)
1615                 MemoryContextResetAndDeleteChildren(TransactionAbortContext);
1616
1617         /*
1618          * Release all transaction-local memory.
1619          */
1620         if (TopTransactionContext != NULL)
1621                 MemoryContextDelete(TopTransactionContext);
1622         TopTransactionContext = NULL;
1623         CurTransactionContext = NULL;
1624         CurrentTransactionState->curTransactionContext = NULL;
1625 }
1626
1627
1628 /* ----------------------------------------------------------------
1629  *                                              CleanupSubTransaction stuff
1630  * ----------------------------------------------------------------
1631  */
1632
1633 /*
1634  * AtSubCleanup_Memory
1635  */
1636 static void
1637 AtSubCleanup_Memory(void)
1638 {
1639         TransactionState s = CurrentTransactionState;
1640
1641         Assert(s->parent != NULL);
1642
1643         /* Make sure we're not in an about-to-be-deleted context */
1644         MemoryContextSwitchTo(s->parent->curTransactionContext);
1645         CurTransactionContext = s->parent->curTransactionContext;
1646
1647         /*
1648          * Clear the special abort context for next time.
1649          */
1650         if (TransactionAbortContext != NULL)
1651                 MemoryContextResetAndDeleteChildren(TransactionAbortContext);
1652
1653         /*
1654          * Delete the subxact local memory contexts. Its CurTransactionContext can
1655          * go too (note this also kills CurTransactionContexts from any children
1656          * of the subxact).
1657          */
1658         if (s->curTransactionContext)
1659                 MemoryContextDelete(s->curTransactionContext);
1660         s->curTransactionContext = NULL;
1661 }
1662
1663 /* ----------------------------------------------------------------
1664  *                                              interface routines
1665  * ----------------------------------------------------------------
1666  */
1667
1668 /*
1669  *      StartTransaction
1670  */
1671 static void
1672 StartTransaction(void)
1673 {
1674         TransactionState s;
1675         VirtualTransactionId vxid;
1676
1677         /*
1678          * Let's just make sure the state stack is empty
1679          */
1680         s = &TopTransactionStateData;
1681         CurrentTransactionState = s;
1682
1683         /*
1684          * check the current transaction state
1685          */
1686         if (s->state != TRANS_DEFAULT)
1687                 elog(WARNING, "StartTransaction while in %s state",
1688                          TransStateAsString(s->state));
1689
1690         /*
1691          * set the current transaction state information appropriately during
1692          * start processing
1693          */
1694         s->state = TRANS_START;
1695         s->transactionId = InvalidTransactionId;        /* until assigned */
1696
1697         /*
1698          * Make sure we've reset xact state variables
1699          *
1700          * If recovery is still in progress, mark this transaction as read-only.
1701          * We have lower level defences in XLogInsert and elsewhere to stop us
1702          * from modifying data during recovery, but this gives the normal
1703          * indication to the user that the transaction is read-only.
1704          */
1705         if (RecoveryInProgress())
1706         {
1707                 s->startedInRecovery = true;
1708                 XactReadOnly = true;
1709         }
1710         else
1711         {
1712                 s->startedInRecovery = false;
1713                 XactReadOnly = DefaultXactReadOnly;
1714         }
1715         XactDeferrable = DefaultXactDeferrable;
1716         XactIsoLevel = DefaultXactIsoLevel;
1717         forceSyncCommit = false;
1718         MyXactAccessedTempRel = false;
1719
1720         /*
1721          * reinitialize within-transaction counters
1722          */
1723         s->subTransactionId = TopSubTransactionId;
1724         currentSubTransactionId = TopSubTransactionId;
1725         currentCommandId = FirstCommandId;
1726         currentCommandIdUsed = false;
1727
1728         /*
1729          * initialize reported xid accounting
1730          */
1731         nUnreportedXids = 0;
1732
1733         /*
1734          * must initialize resource-management stuff first
1735          */
1736         AtStart_Memory();
1737         AtStart_ResourceOwner();
1738
1739         /*
1740          * Assign a new LocalTransactionId, and combine it with the backendId to
1741          * form a virtual transaction id.
1742          */
1743         vxid.backendId = MyBackendId;
1744         vxid.localTransactionId = GetNextLocalTransactionId();
1745
1746         /*
1747          * Lock the virtual transaction id before we announce it in the proc array
1748          */
1749         VirtualXactLockTableInsert(vxid);
1750
1751         /*
1752          * Advertise it in the proc array.      We assume assignment of
1753          * LocalTransactionID is atomic, and the backendId should be set already.
1754          */
1755         Assert(MyProc->backendId == vxid.backendId);
1756         MyProc->lxid = vxid.localTransactionId;
1757
1758         TRACE_POSTGRESQL_TRANSACTION_START(vxid.localTransactionId);
1759
1760         /*
1761          * set transaction_timestamp() (a/k/a now()).  We want this to be the same
1762          * as the first command's statement_timestamp(), so don't do a fresh
1763          * GetCurrentTimestamp() call (which'd be expensive anyway).  Also, mark
1764          * xactStopTimestamp as unset.
1765          */
1766         xactStartTimestamp = stmtStartTimestamp;
1767         xactStopTimestamp = 0;
1768         pgstat_report_xact_timestamp(xactStartTimestamp);
1769
1770         /*
1771          * initialize current transaction state fields
1772          *
1773          * note: prevXactReadOnly is not used at the outermost level
1774          */
1775         s->nestingLevel = 1;
1776         s->gucNestLevel = 1;
1777         s->childXids = NULL;
1778         s->nChildXids = 0;
1779         s->maxChildXids = 0;
1780         GetUserIdAndSecContext(&s->prevUser, &s->prevSecContext);
1781         /* SecurityRestrictionContext should never be set outside a transaction */
1782         Assert(s->prevSecContext == 0);
1783
1784         /*
1785          * initialize other subsystems for new transaction
1786          */
1787         AtStart_GUC();
1788         AtStart_Inval();
1789         AtStart_Cache();
1790         AfterTriggerBeginXact();
1791
1792         /*
1793          * done with start processing, set current transaction state to "in
1794          * progress"
1795          */
1796         s->state = TRANS_INPROGRESS;
1797
1798         ShowTransactionState("StartTransaction");
1799 }
1800
1801
1802 /*
1803  *      CommitTransaction
1804  *
1805  * NB: if you change this routine, better look at PrepareTransaction too!
1806  */
1807 static void
1808 CommitTransaction(void)
1809 {
1810         TransactionState s = CurrentTransactionState;
1811         TransactionId latestXid;
1812
1813         ShowTransactionState("CommitTransaction");
1814
1815         /*
1816          * check the current transaction state
1817          */
1818         if (s->state != TRANS_INPROGRESS)
1819                 elog(WARNING, "CommitTransaction while in %s state",
1820                          TransStateAsString(s->state));
1821         Assert(s->parent == NULL);
1822
1823         /*
1824          * Do pre-commit processing that involves calling user-defined code, such
1825          * as triggers.  Since closing cursors could queue trigger actions,
1826          * triggers could open cursors, etc, we have to keep looping until there's
1827          * nothing left to do.
1828          */
1829         for (;;)
1830         {
1831                 /*
1832                  * Fire all currently pending deferred triggers.
1833                  */
1834                 AfterTriggerFireDeferred();
1835
1836                 /*
1837                  * Close open portals (converting holdable ones into static portals).
1838                  * If there weren't any, we are done ... otherwise loop back to check
1839                  * if they queued deferred triggers.  Lather, rinse, repeat.
1840                  */
1841                 if (!PreCommit_Portals(false))
1842                         break;
1843         }
1844
1845         /*
1846          * The remaining actions cannot call any user-defined code, so it's safe
1847          * to start shutting down within-transaction services.  But note that most
1848          * of this stuff could still throw an error, which would switch us into
1849          * the transaction-abort path.
1850          */
1851
1852         /* Shut down the deferred-trigger manager */
1853         AfterTriggerEndXact(true);
1854
1855         /*
1856          * Let ON COMMIT management do its thing (must happen after closing
1857          * cursors, to avoid dangling-reference problems)
1858          */
1859         PreCommit_on_commit_actions();
1860
1861         /* close large objects before lower-level cleanup */
1862         AtEOXact_LargeObject(true);
1863
1864         /*
1865          * Mark serializable transaction as complete for predicate locking
1866          * purposes.  This should be done as late as we can put it and still allow
1867          * errors to be raised for failure patterns found at commit.
1868          */
1869         PreCommit_CheckForSerializationFailure();
1870
1871         /*
1872          * Insert notifications sent by NOTIFY commands into the queue.  This
1873          * should be late in the pre-commit sequence to minimize time spent
1874          * holding the notify-insertion lock.
1875          */
1876         PreCommit_Notify();
1877
1878         /* Prevent cancel/die interrupt while cleaning up */
1879         HOLD_INTERRUPTS();
1880
1881         /* Commit updates to the relation map --- do this as late as possible */
1882         AtEOXact_RelationMap(true);
1883
1884         /*
1885          * set the current transaction state information appropriately during
1886          * commit processing
1887          */
1888         s->state = TRANS_COMMIT;
1889
1890         /*
1891          * Here is where we really truly commit.
1892          */
1893         latestXid = RecordTransactionCommit();
1894
1895         TRACE_POSTGRESQL_TRANSACTION_COMMIT(MyProc->lxid);
1896
1897         /*
1898          * Let others know about no transaction in progress by me. Note that this
1899          * must be done _before_ releasing locks we hold and _after_
1900          * RecordTransactionCommit.
1901          */
1902         ProcArrayEndTransaction(MyProc, latestXid);
1903
1904         /*
1905          * This is all post-commit cleanup.  Note that if an error is raised here,
1906          * it's too late to abort the transaction.  This should be just
1907          * noncritical resource releasing.
1908          *
1909          * The ordering of operations is not entirely random.  The idea is:
1910          * release resources visible to other backends (eg, files, buffer pins);
1911          * then release locks; then release backend-local resources. We want to
1912          * release locks at the point where any backend waiting for us will see
1913          * our transaction as being fully cleaned up.
1914          *
1915          * Resources that can be associated with individual queries are handled by
1916          * the ResourceOwner mechanism.  The other calls here are for backend-wide
1917          * state.
1918          */
1919
1920         CallXactCallbacks(XACT_EVENT_COMMIT);
1921
1922         ResourceOwnerRelease(TopTransactionResourceOwner,
1923                                                  RESOURCE_RELEASE_BEFORE_LOCKS,
1924                                                  true, true);
1925
1926         /* Check we've released all buffer pins */
1927         AtEOXact_Buffers(true);
1928
1929         /* Clean up the relation cache */
1930         AtEOXact_RelationCache(true);
1931
1932         /*
1933          * Make catalog changes visible to all backends.  This has to happen after
1934          * relcache references are dropped (see comments for
1935          * AtEOXact_RelationCache), but before locks are released (if anyone is
1936          * waiting for lock on a relation we've modified, we want them to know
1937          * about the catalog change before they start using the relation).
1938          */
1939         AtEOXact_Inval(true);
1940
1941         /*
1942          * Likewise, dropping of files deleted during the transaction is best done
1943          * after releasing relcache and buffer pins.  (This is not strictly
1944          * necessary during commit, since such pins should have been released
1945          * already, but this ordering is definitely critical during abort.)
1946          */
1947         smgrDoPendingDeletes(true);
1948
1949         AtEOXact_MultiXact();
1950
1951         ResourceOwnerRelease(TopTransactionResourceOwner,
1952                                                  RESOURCE_RELEASE_LOCKS,
1953                                                  true, true);
1954         ResourceOwnerRelease(TopTransactionResourceOwner,
1955                                                  RESOURCE_RELEASE_AFTER_LOCKS,
1956                                                  true, true);
1957
1958         /* Check we've released all catcache entries */
1959         AtEOXact_CatCache(true);
1960
1961         AtCommit_Notify();
1962         AtEOXact_GUC(true, 1);
1963         AtEOXact_SPI(true);
1964         AtEOXact_on_commit_actions(true);
1965         AtEOXact_Namespace(true);
1966         /* smgrcommit already done */
1967         AtEOXact_Files();
1968         AtEOXact_ComboCid();
1969         AtEOXact_HashTables(true);
1970         AtEOXact_PgStat(true);
1971         AtEOXact_Snapshot(true);
1972         pgstat_report_xact_timestamp(0);
1973
1974         CurrentResourceOwner = NULL;
1975         ResourceOwnerDelete(TopTransactionResourceOwner);
1976         s->curTransactionOwner = NULL;
1977         CurTransactionResourceOwner = NULL;
1978         TopTransactionResourceOwner = NULL;
1979
1980         AtCommit_Memory();
1981
1982         s->transactionId = InvalidTransactionId;
1983         s->subTransactionId = InvalidSubTransactionId;
1984         s->nestingLevel = 0;
1985         s->gucNestLevel = 0;
1986         s->childXids = NULL;
1987         s->nChildXids = 0;
1988         s->maxChildXids = 0;
1989
1990         /*
1991          * done with commit processing, set current transaction state back to
1992          * default
1993          */
1994         s->state = TRANS_DEFAULT;
1995
1996         RESUME_INTERRUPTS();
1997 }
1998
1999
2000 /*
2001  *      PrepareTransaction
2002  *
2003  * NB: if you change this routine, better look at CommitTransaction too!
2004  */
2005 static void
2006 PrepareTransaction(void)
2007 {
2008         TransactionState s = CurrentTransactionState;
2009         TransactionId xid = GetCurrentTransactionId();
2010         GlobalTransaction gxact;
2011         TimestampTz prepared_at;
2012
2013         ShowTransactionState("PrepareTransaction");
2014
2015         /*
2016          * check the current transaction state
2017          */
2018         if (s->state != TRANS_INPROGRESS)
2019                 elog(WARNING, "PrepareTransaction while in %s state",
2020                          TransStateAsString(s->state));
2021         Assert(s->parent == NULL);
2022
2023         /*
2024          * Do pre-commit processing that involves calling user-defined code, such
2025          * as triggers.  Since closing cursors could queue trigger actions,
2026          * triggers could open cursors, etc, we have to keep looping until there's
2027          * nothing left to do.
2028          */
2029         for (;;)
2030         {
2031                 /*
2032                  * Fire all currently pending deferred triggers.
2033                  */
2034                 AfterTriggerFireDeferred();
2035
2036                 /*
2037                  * Close open portals (converting holdable ones into static portals).
2038                  * If there weren't any, we are done ... otherwise loop back to check
2039                  * if they queued deferred triggers.  Lather, rinse, repeat.
2040                  */
2041                 if (!PreCommit_Portals(true))
2042                         break;
2043         }
2044
2045         /*
2046          * The remaining actions cannot call any user-defined code, so it's safe
2047          * to start shutting down within-transaction services.  But note that most
2048          * of this stuff could still throw an error, which would switch us into
2049          * the transaction-abort path.
2050          */
2051
2052         /* Shut down the deferred-trigger manager */
2053         AfterTriggerEndXact(true);
2054
2055         /*
2056          * Let ON COMMIT management do its thing (must happen after closing
2057          * cursors, to avoid dangling-reference problems)
2058          */
2059         PreCommit_on_commit_actions();
2060
2061         /* close large objects before lower-level cleanup */
2062         AtEOXact_LargeObject(true);
2063
2064         /*
2065          * Mark serializable transaction as complete for predicate locking
2066          * purposes.  This should be done as late as we can put it and still allow
2067          * errors to be raised for failure patterns found at commit.
2068          */
2069         PreCommit_CheckForSerializationFailure();
2070
2071         /* NOTIFY will be handled below */
2072
2073         /*
2074          * Don't allow PREPARE TRANSACTION if we've accessed a temporary table in
2075          * this transaction.  Having the prepared xact hold locks on another
2076          * backend's temp table seems a bad idea --- for instance it would prevent
2077          * the backend from exiting.  There are other problems too, such as how to
2078          * clean up the source backend's local buffers and ON COMMIT state if the
2079          * prepared xact includes a DROP of a temp table.
2080          *
2081          * We must check this after executing any ON COMMIT actions, because they
2082          * might still access a temp relation.
2083          *
2084          * XXX In principle this could be relaxed to allow some useful special
2085          * cases, such as a temp table created and dropped all within the
2086          * transaction.  That seems to require much more bookkeeping though.
2087          */
2088         if (MyXactAccessedTempRel)
2089                 ereport(ERROR,
2090                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2091                                  errmsg("cannot PREPARE a transaction that has operated on temporary tables")));
2092
2093         /*
2094          * Likewise, don't allow PREPARE after pg_export_snapshot.  This could be
2095          * supported if we added cleanup logic to twophase.c, but for now it
2096          * doesn't seem worth the trouble.
2097          */
2098         if (XactHasExportedSnapshots())
2099                 ereport(ERROR,
2100                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2101                                  errmsg("cannot PREPARE a transaction that has exported snapshots")));
2102
2103         /* Prevent cancel/die interrupt while cleaning up */
2104         HOLD_INTERRUPTS();
2105
2106         /*
2107          * set the current transaction state information appropriately during
2108          * prepare processing
2109          */
2110         s->state = TRANS_PREPARE;
2111
2112         prepared_at = GetCurrentTimestamp();
2113
2114         /* Tell bufmgr and smgr to prepare for commit */
2115         BufmgrCommit();
2116
2117         /*
2118          * Reserve the GID for this transaction. This could fail if the requested
2119          * GID is invalid or already in use.
2120          */
2121         gxact = MarkAsPreparing(xid, prepareGID, prepared_at,
2122                                                         GetUserId(), MyDatabaseId);
2123         prepareGID = NULL;
2124
2125         /*
2126          * Collect data for the 2PC state file.  Note that in general, no actual
2127          * state change should happen in the called modules during this step,
2128          * since it's still possible to fail before commit, and in that case we
2129          * want transaction abort to be able to clean up.  (In particular, the
2130          * AtPrepare routines may error out if they find cases they cannot
2131          * handle.)  State cleanup should happen in the PostPrepare routines
2132          * below.  However, some modules can go ahead and clear state here because
2133          * they wouldn't do anything with it during abort anyway.
2134          *
2135          * Note: because the 2PC state file records will be replayed in the same
2136          * order they are made, the order of these calls has to match the order in
2137          * which we want things to happen during COMMIT PREPARED or ROLLBACK
2138          * PREPARED; in particular, pay attention to whether things should happen
2139          * before or after releasing the transaction's locks.
2140          */
2141         StartPrepare(gxact);
2142
2143         AtPrepare_Notify();
2144         AtPrepare_Locks();
2145         AtPrepare_PredicateLocks();
2146         AtPrepare_PgStat();
2147         AtPrepare_MultiXact();
2148         AtPrepare_RelationMap();
2149
2150         /*
2151          * Here is where we really truly prepare.
2152          *
2153          * We have to record transaction prepares even if we didn't make any
2154          * updates, because the transaction manager might get confused if we lose
2155          * a global transaction.
2156          */
2157         EndPrepare(gxact);
2158
2159         /*
2160          * Now we clean up backend-internal state and release internal resources.
2161          */
2162
2163         /* Reset XactLastRecEnd until the next transaction writes something */
2164         XactLastRecEnd.xrecoff = 0;
2165
2166         /*
2167          * Let others know about no transaction in progress by me.      This has to be
2168          * done *after* the prepared transaction has been marked valid, else
2169          * someone may think it is unlocked and recyclable.
2170          */
2171         ProcArrayClearTransaction(MyProc);
2172
2173         /*
2174          * This is all post-transaction cleanup.  Note that if an error is raised
2175          * here, it's too late to abort the transaction.  This should be just
2176          * noncritical resource releasing.      See notes in CommitTransaction.
2177          */
2178
2179         CallXactCallbacks(XACT_EVENT_PREPARE);
2180
2181         ResourceOwnerRelease(TopTransactionResourceOwner,
2182                                                  RESOURCE_RELEASE_BEFORE_LOCKS,
2183                                                  true, true);
2184
2185         /* Check we've released all buffer pins */
2186         AtEOXact_Buffers(true);
2187
2188         /* Clean up the relation cache */
2189         AtEOXact_RelationCache(true);
2190
2191         /* notify doesn't need a postprepare call */
2192
2193         PostPrepare_PgStat();
2194
2195         PostPrepare_Inval();
2196
2197         PostPrepare_smgr();
2198
2199         PostPrepare_MultiXact(xid);
2200
2201         PostPrepare_Locks(xid);
2202         PostPrepare_PredicateLocks(xid);
2203
2204         ResourceOwnerRelease(TopTransactionResourceOwner,
2205                                                  RESOURCE_RELEASE_LOCKS,
2206                                                  true, true);
2207         ResourceOwnerRelease(TopTransactionResourceOwner,
2208                                                  RESOURCE_RELEASE_AFTER_LOCKS,
2209                                                  true, true);
2210
2211         /* Check we've released all catcache entries */
2212         AtEOXact_CatCache(true);
2213
2214         /* PREPARE acts the same as COMMIT as far as GUC is concerned */
2215         AtEOXact_GUC(true, 1);
2216         AtEOXact_SPI(true);
2217         AtEOXact_on_commit_actions(true);
2218         AtEOXact_Namespace(true);
2219         /* smgrcommit already done */
2220         AtEOXact_Files();
2221         AtEOXact_ComboCid();
2222         AtEOXact_HashTables(true);
2223         /* don't call AtEOXact_PgStat here */
2224         AtEOXact_Snapshot(true);
2225
2226         CurrentResourceOwner = NULL;
2227         ResourceOwnerDelete(TopTransactionResourceOwner);
2228         s->curTransactionOwner = NULL;
2229         CurTransactionResourceOwner = NULL;
2230         TopTransactionResourceOwner = NULL;
2231
2232         AtCommit_Memory();
2233
2234         s->transactionId = InvalidTransactionId;
2235         s->subTransactionId = InvalidSubTransactionId;
2236         s->nestingLevel = 0;
2237         s->gucNestLevel = 0;
2238         s->childXids = NULL;
2239         s->nChildXids = 0;
2240         s->maxChildXids = 0;
2241
2242         /*
2243          * done with 1st phase commit processing, set current transaction state
2244          * back to default
2245          */
2246         s->state = TRANS_DEFAULT;
2247
2248         RESUME_INTERRUPTS();
2249 }
2250
2251
2252 /*
2253  *      AbortTransaction
2254  */
2255 static void
2256 AbortTransaction(void)
2257 {
2258         TransactionState s = CurrentTransactionState;
2259         TransactionId latestXid;
2260
2261         /* Prevent cancel/die interrupt while cleaning up */
2262         HOLD_INTERRUPTS();
2263
2264         /* Make sure we have a valid memory context and resource owner */
2265         AtAbort_Memory();
2266         AtAbort_ResourceOwner();
2267
2268         /*
2269          * Release any LW locks we might be holding as quickly as possible.
2270          * (Regular locks, however, must be held till we finish aborting.)
2271          * Releasing LW locks is critical since we might try to grab them again
2272          * while cleaning up!
2273          */
2274         LWLockReleaseAll();
2275
2276         /* Clean up buffer I/O and buffer context locks, too */
2277         AbortBufferIO();
2278         UnlockBuffers();
2279
2280         /*
2281          * Also clean up any open wait for lock, since the lock manager will choke
2282          * if we try to wait for another lock before doing this.
2283          */
2284         LockErrorCleanup();
2285
2286         /*
2287          * check the current transaction state
2288          */
2289         if (s->state != TRANS_INPROGRESS && s->state != TRANS_PREPARE)
2290                 elog(WARNING, "AbortTransaction while in %s state",
2291                          TransStateAsString(s->state));
2292         Assert(s->parent == NULL);
2293
2294         /*
2295          * set the current transaction state information appropriately during the
2296          * abort processing
2297          */
2298         s->state = TRANS_ABORT;
2299
2300         /*
2301          * Reset user ID which might have been changed transiently.  We need this
2302          * to clean up in case control escaped out of a SECURITY DEFINER function
2303          * or other local change of CurrentUserId; therefore, the prior value of
2304          * SecurityRestrictionContext also needs to be restored.
2305          *
2306          * (Note: it is not necessary to restore session authorization or role
2307          * settings here because those can only be changed via GUC, and GUC will
2308          * take care of rolling them back if need be.)
2309          */
2310         SetUserIdAndSecContext(s->prevUser, s->prevSecContext);
2311
2312         /*
2313          * do abort processing
2314          */
2315         AfterTriggerEndXact(false); /* 'false' means it's abort */
2316         AtAbort_Portals();
2317         AtEOXact_LargeObject(false);
2318         AtAbort_Notify();
2319         AtEOXact_RelationMap(false);
2320
2321         /*
2322          * Advertise the fact that we aborted in pg_clog (assuming that we got as
2323          * far as assigning an XID to advertise).
2324          */
2325         latestXid = RecordTransactionAbort(false);
2326
2327         TRACE_POSTGRESQL_TRANSACTION_ABORT(MyProc->lxid);
2328
2329         /*
2330          * Let others know about no transaction in progress by me. Note that this
2331          * must be done _before_ releasing locks we hold and _after_
2332          * RecordTransactionAbort.
2333          */
2334         ProcArrayEndTransaction(MyProc, latestXid);
2335
2336         /*
2337          * Post-abort cleanup.  See notes in CommitTransaction() concerning
2338          * ordering.  We can skip all of it if the transaction failed before
2339          * creating a resource owner.
2340          */
2341         if (TopTransactionResourceOwner != NULL)
2342         {
2343                 CallXactCallbacks(XACT_EVENT_ABORT);
2344
2345                 ResourceOwnerRelease(TopTransactionResourceOwner,
2346                                                          RESOURCE_RELEASE_BEFORE_LOCKS,
2347                                                          false, true);
2348                 AtEOXact_Buffers(false);
2349                 AtEOXact_RelationCache(false);
2350                 AtEOXact_Inval(false);
2351                 smgrDoPendingDeletes(false);
2352                 AtEOXact_MultiXact();
2353                 ResourceOwnerRelease(TopTransactionResourceOwner,
2354                                                          RESOURCE_RELEASE_LOCKS,
2355                                                          false, true);
2356                 ResourceOwnerRelease(TopTransactionResourceOwner,
2357                                                          RESOURCE_RELEASE_AFTER_LOCKS,
2358                                                          false, true);
2359                 AtEOXact_CatCache(false);
2360
2361                 AtEOXact_GUC(false, 1);
2362                 AtEOXact_SPI(false);
2363                 AtEOXact_on_commit_actions(false);
2364                 AtEOXact_Namespace(false);
2365                 AtEOXact_Files();
2366                 AtEOXact_ComboCid();
2367                 AtEOXact_HashTables(false);
2368                 AtEOXact_PgStat(false);
2369                 pgstat_report_xact_timestamp(0);
2370         }
2371
2372         /*
2373          * State remains TRANS_ABORT until CleanupTransaction().
2374          */
2375         RESUME_INTERRUPTS();
2376 }
2377
2378 /*
2379  *      CleanupTransaction
2380  */
2381 static void
2382 CleanupTransaction(void)
2383 {
2384         TransactionState s = CurrentTransactionState;
2385
2386         /*
2387          * State should still be TRANS_ABORT from AbortTransaction().
2388          */
2389         if (s->state != TRANS_ABORT)
2390                 elog(FATAL, "CleanupTransaction: unexpected state %s",
2391                          TransStateAsString(s->state));
2392
2393         /*
2394          * do abort cleanup processing
2395          */
2396         AtCleanup_Portals();            /* now safe to release portal memory */
2397         AtEOXact_Snapshot(false);       /* and release the transaction's snapshots */
2398
2399         CurrentResourceOwner = NULL;    /* and resource owner */
2400         if (TopTransactionResourceOwner)
2401                 ResourceOwnerDelete(TopTransactionResourceOwner);
2402         s->curTransactionOwner = NULL;
2403         CurTransactionResourceOwner = NULL;
2404         TopTransactionResourceOwner = NULL;
2405
2406         AtCleanup_Memory();                     /* and transaction memory */
2407
2408         s->transactionId = InvalidTransactionId;
2409         s->subTransactionId = InvalidSubTransactionId;
2410         s->nestingLevel = 0;
2411         s->gucNestLevel = 0;
2412         s->childXids = NULL;
2413         s->nChildXids = 0;
2414         s->maxChildXids = 0;
2415
2416         /*
2417          * done with abort processing, set current transaction state back to
2418          * default
2419          */
2420         s->state = TRANS_DEFAULT;
2421 }
2422
2423 /*
2424  *      StartTransactionCommand
2425  */
2426 void
2427 StartTransactionCommand(void)
2428 {
2429         TransactionState s = CurrentTransactionState;
2430
2431         switch (s->blockState)
2432         {
2433                         /*
2434                          * if we aren't in a transaction block, we just do our usual start
2435                          * transaction.
2436                          */
2437                 case TBLOCK_DEFAULT:
2438                         StartTransaction();
2439                         s->blockState = TBLOCK_STARTED;
2440                         break;
2441
2442                         /*
2443                          * We are somewhere in a transaction block or subtransaction and
2444                          * about to start a new command.  For now we do nothing, but
2445                          * someday we may do command-local resource initialization. (Note
2446                          * that any needed CommandCounterIncrement was done by the
2447                          * previous CommitTransactionCommand.)
2448                          */
2449                 case TBLOCK_INPROGRESS:
2450                 case TBLOCK_SUBINPROGRESS:
2451                         break;
2452
2453                         /*
2454                          * Here we are in a failed transaction block (one of the commands
2455                          * caused an abort) so we do nothing but remain in the abort
2456                          * state.  Eventually we will get a ROLLBACK command which will
2457                          * get us out of this state.  (It is up to other code to ensure
2458                          * that no commands other than ROLLBACK will be processed in these
2459                          * states.)
2460                          */
2461                 case TBLOCK_ABORT:
2462                 case TBLOCK_SUBABORT:
2463                         break;
2464
2465                         /* These cases are invalid. */
2466                 case TBLOCK_STARTED:
2467                 case TBLOCK_BEGIN:
2468                 case TBLOCK_SUBBEGIN:
2469                 case TBLOCK_END:
2470                 case TBLOCK_SUBRELEASE:
2471                 case TBLOCK_SUBCOMMIT:
2472                 case TBLOCK_ABORT_END:
2473                 case TBLOCK_SUBABORT_END:
2474                 case TBLOCK_ABORT_PENDING:
2475                 case TBLOCK_SUBABORT_PENDING:
2476                 case TBLOCK_SUBRESTART:
2477                 case TBLOCK_SUBABORT_RESTART:
2478                 case TBLOCK_PREPARE:
2479                         elog(ERROR, "StartTransactionCommand: unexpected state %s",
2480                                  BlockStateAsString(s->blockState));
2481                         break;
2482         }
2483
2484         /*
2485          * We must switch to CurTransactionContext before returning. This is
2486          * already done if we called StartTransaction, otherwise not.
2487          */
2488         Assert(CurTransactionContext != NULL);
2489         MemoryContextSwitchTo(CurTransactionContext);
2490 }
2491
2492 /*
2493  *      CommitTransactionCommand
2494  */
2495 void
2496 CommitTransactionCommand(void)
2497 {
2498         TransactionState s = CurrentTransactionState;
2499
2500         switch (s->blockState)
2501         {
2502                         /*
2503                          * This shouldn't happen, because it means the previous
2504                          * StartTransactionCommand didn't set the STARTED state
2505                          * appropriately.
2506                          */
2507                 case TBLOCK_DEFAULT:
2508                         elog(FATAL, "CommitTransactionCommand: unexpected state %s",
2509                                  BlockStateAsString(s->blockState));
2510                         break;
2511
2512                         /*
2513                          * If we aren't in a transaction block, just do our usual
2514                          * transaction commit, and return to the idle state.
2515                          */
2516                 case TBLOCK_STARTED:
2517                         CommitTransaction();
2518                         s->blockState = TBLOCK_DEFAULT;
2519                         break;
2520
2521                         /*
2522                          * We are completing a "BEGIN TRANSACTION" command, so we change
2523                          * to the "transaction block in progress" state and return.  (We
2524                          * assume the BEGIN did nothing to the database, so we need no
2525                          * CommandCounterIncrement.)
2526                          */
2527                 case TBLOCK_BEGIN:
2528                         s->blockState = TBLOCK_INPROGRESS;
2529                         break;
2530
2531                         /*
2532                          * This is the case when we have finished executing a command
2533                          * someplace within a transaction block.  We increment the command
2534                          * counter and return.
2535                          */
2536                 case TBLOCK_INPROGRESS:
2537                 case TBLOCK_SUBINPROGRESS:
2538                         CommandCounterIncrement();
2539                         break;
2540
2541                         /*
2542                          * We are completing a "COMMIT" command.  Do it and return to the
2543                          * idle state.
2544                          */
2545                 case TBLOCK_END:
2546                         CommitTransaction();
2547                         s->blockState = TBLOCK_DEFAULT;
2548                         break;
2549
2550                         /*
2551                          * Here we are in the middle of a transaction block but one of the
2552                          * commands caused an abort so we do nothing but remain in the
2553                          * abort state.  Eventually we will get a ROLLBACK comand.
2554                          */
2555                 case TBLOCK_ABORT:
2556                 case TBLOCK_SUBABORT:
2557                         break;
2558
2559                         /*
2560                          * Here we were in an aborted transaction block and we just got
2561                          * the ROLLBACK command from the user, so clean up the
2562                          * already-aborted transaction and return to the idle state.
2563                          */
2564                 case TBLOCK_ABORT_END:
2565                         CleanupTransaction();
2566                         s->blockState = TBLOCK_DEFAULT;
2567                         break;
2568
2569                         /*
2570                          * Here we were in a perfectly good transaction block but the user
2571                          * told us to ROLLBACK anyway.  We have to abort the transaction
2572                          * and then clean up.
2573                          */
2574                 case TBLOCK_ABORT_PENDING:
2575                         AbortTransaction();
2576                         CleanupTransaction();
2577                         s->blockState = TBLOCK_DEFAULT;
2578                         break;
2579
2580                         /*
2581                          * We are completing a "PREPARE TRANSACTION" command.  Do it and
2582                          * return to the idle state.
2583                          */
2584                 case TBLOCK_PREPARE:
2585                         PrepareTransaction();
2586                         s->blockState = TBLOCK_DEFAULT;
2587                         break;
2588
2589                         /*
2590                          * We were just issued a SAVEPOINT inside a transaction block.
2591                          * Start a subtransaction.      (DefineSavepoint already did
2592                          * PushTransaction, so as to have someplace to put the SUBBEGIN
2593                          * state.)
2594                          */
2595                 case TBLOCK_SUBBEGIN:
2596                         StartSubTransaction();
2597                         s->blockState = TBLOCK_SUBINPROGRESS;
2598                         break;
2599
2600                         /*
2601                          * We were issued a RELEASE command, so we end the
2602                          * current subtransaction and return to the parent transaction.
2603                          * The parent might be ended too, so repeat till we find an
2604                          * INPROGRESS transaction or subtransaction.
2605                          */
2606                 case TBLOCK_SUBRELEASE:
2607                         do
2608                         {
2609                                 CommitSubTransaction();
2610                                 s = CurrentTransactionState;    /* changed by pop */
2611                         } while (s->blockState == TBLOCK_SUBRELEASE);
2612
2613                         Assert(s->blockState == TBLOCK_INPROGRESS ||
2614                                    s->blockState == TBLOCK_SUBINPROGRESS);
2615                         break;
2616
2617                         /*
2618                          * We were issued a COMMIT, so we end the current subtransaction
2619                          * hierarchy and perform final commit. We do this by rolling up
2620                          * any subtransactions into their parent, which leads to O(N^2)
2621                          * operations with respect to resource owners - this isn't that
2622                          * bad until we approach a thousands of savepoints but is necessary
2623                          * for correctness should after triggers create new resource
2624                          * owners.
2625                          */
2626                 case TBLOCK_SUBCOMMIT:
2627                         do
2628                         {
2629                                 CommitSubTransaction();
2630                                 s = CurrentTransactionState;    /* changed by pop */
2631                         } while (s->blockState == TBLOCK_SUBCOMMIT);
2632                         /* If we had a COMMIT command, finish off the main xact too */
2633                         if (s->blockState == TBLOCK_END)
2634                         {
2635                                 Assert(s->parent == NULL);
2636                                 CommitTransaction();
2637                                 s->blockState = TBLOCK_DEFAULT;
2638                         }
2639                         else if (s->blockState == TBLOCK_PREPARE)
2640                         {
2641                                 Assert(s->parent == NULL);
2642                                 PrepareTransaction();
2643                                 s->blockState = TBLOCK_DEFAULT;
2644                         }
2645                         else
2646                                 elog(ERROR, "CommitTransactionCommand: unexpected state %s",
2647                                          BlockStateAsString(s->blockState));
2648                         break;
2649
2650                         /*
2651                          * The current already-failed subtransaction is ending due to a
2652                          * ROLLBACK or ROLLBACK TO command, so pop it and recursively
2653                          * examine the parent (which could be in any of several states).
2654                          */
2655                 case TBLOCK_SUBABORT_END:
2656                         CleanupSubTransaction();
2657                         CommitTransactionCommand();
2658                         break;
2659
2660                         /*
2661                          * As above, but it's not dead yet, so abort first.
2662                          */
2663                 case TBLOCK_SUBABORT_PENDING:
2664                         AbortSubTransaction();
2665                         CleanupSubTransaction();
2666                         CommitTransactionCommand();
2667                         break;
2668
2669                         /*
2670                          * The current subtransaction is the target of a ROLLBACK TO
2671                          * command.  Abort and pop it, then start a new subtransaction
2672                          * with the same name.
2673                          */
2674                 case TBLOCK_SUBRESTART:
2675                         {
2676                                 char       *name;
2677                                 int                     savepointLevel;
2678
2679                                 /* save name and keep Cleanup from freeing it */
2680                                 name = s->name;
2681                                 s->name = NULL;
2682                                 savepointLevel = s->savepointLevel;
2683
2684                                 AbortSubTransaction();
2685                                 CleanupSubTransaction();
2686
2687                                 DefineSavepoint(NULL);
2688                                 s = CurrentTransactionState;    /* changed by push */
2689                                 s->name = name;
2690                                 s->savepointLevel = savepointLevel;
2691
2692                                 /* This is the same as TBLOCK_SUBBEGIN case */
2693                                 AssertState(s->blockState == TBLOCK_SUBBEGIN);
2694                                 StartSubTransaction();
2695                                 s->blockState = TBLOCK_SUBINPROGRESS;
2696                         }
2697                         break;
2698
2699                         /*
2700                          * Same as above, but the subtransaction had already failed, so we
2701                          * don't need AbortSubTransaction.
2702                          */
2703                 case TBLOCK_SUBABORT_RESTART:
2704                         {
2705                                 char       *name;
2706                                 int                     savepointLevel;
2707
2708                                 /* save name and keep Cleanup from freeing it */
2709                                 name = s->name;
2710                                 s->name = NULL;
2711                                 savepointLevel = s->savepointLevel;
2712
2713                                 CleanupSubTransaction();
2714
2715                                 DefineSavepoint(NULL);
2716                                 s = CurrentTransactionState;    /* changed by push */
2717                                 s->name = name;
2718                                 s->savepointLevel = savepointLevel;
2719
2720                                 /* This is the same as TBLOCK_SUBBEGIN case */
2721                                 AssertState(s->blockState == TBLOCK_SUBBEGIN);
2722                                 StartSubTransaction();
2723                                 s->blockState = TBLOCK_SUBINPROGRESS;
2724                         }
2725                         break;
2726         }
2727 }
2728
2729 /*
2730  *      AbortCurrentTransaction
2731  */
2732 void
2733 AbortCurrentTransaction(void)
2734 {
2735         TransactionState s = CurrentTransactionState;
2736
2737         switch (s->blockState)
2738         {
2739                 case TBLOCK_DEFAULT:
2740                         if (s->state == TRANS_DEFAULT)
2741                         {
2742                                 /* we are idle, so nothing to do */
2743                         }
2744                         else
2745                         {
2746                                 /*
2747                                  * We can get here after an error during transaction start
2748                                  * (state will be TRANS_START).  Need to clean up the
2749                                  * incompletely started transaction.  First, adjust the
2750                                  * low-level state to suppress warning message from
2751                                  * AbortTransaction.
2752                                  */
2753                                 if (s->state == TRANS_START)
2754                                         s->state = TRANS_INPROGRESS;
2755                                 AbortTransaction();
2756                                 CleanupTransaction();
2757                         }
2758                         break;
2759
2760                         /*
2761                          * if we aren't in a transaction block, we just do the basic abort
2762                          * & cleanup transaction.
2763                          */
2764                 case TBLOCK_STARTED:
2765                         AbortTransaction();
2766                         CleanupTransaction();
2767                         s->blockState = TBLOCK_DEFAULT;
2768                         break;
2769
2770                         /*
2771                          * If we are in TBLOCK_BEGIN it means something screwed up right
2772                          * after reading "BEGIN TRANSACTION".  We assume that the user
2773                          * will interpret the error as meaning the BEGIN failed to get him
2774                          * into a transaction block, so we should abort and return to idle
2775                          * state.
2776                          */
2777                 case TBLOCK_BEGIN:
2778                         AbortTransaction();
2779                         CleanupTransaction();
2780                         s->blockState = TBLOCK_DEFAULT;
2781                         break;
2782
2783                         /*
2784                          * We are somewhere in a transaction block and we've gotten a
2785                          * failure, so we abort the transaction and set up the persistent
2786                          * ABORT state.  We will stay in ABORT until we get a ROLLBACK.
2787                          */
2788                 case TBLOCK_INPROGRESS:
2789                         AbortTransaction();
2790                         s->blockState = TBLOCK_ABORT;
2791                         /* CleanupTransaction happens when we exit TBLOCK_ABORT_END */
2792                         break;
2793
2794                         /*
2795                          * Here, we failed while trying to COMMIT.      Clean up the
2796                          * transaction and return to idle state (we do not want to stay in
2797                          * the transaction).
2798                          */
2799                 case TBLOCK_END:
2800                         AbortTransaction();
2801                         CleanupTransaction();
2802                         s->blockState = TBLOCK_DEFAULT;
2803                         break;
2804
2805                         /*
2806                          * Here, we are already in an aborted transaction state and are
2807                          * waiting for a ROLLBACK, but for some reason we failed again! So
2808                          * we just remain in the abort state.
2809                          */
2810                 case TBLOCK_ABORT:
2811                 case TBLOCK_SUBABORT:
2812                         break;
2813
2814                         /*
2815                          * We are in a failed transaction and we got the ROLLBACK command.
2816                          * We have already aborted, we just need to cleanup and go to idle
2817                          * state.
2818                          */
2819                 case TBLOCK_ABORT_END:
2820                         CleanupTransaction();
2821                         s->blockState = TBLOCK_DEFAULT;
2822                         break;
2823
2824                         /*
2825                          * We are in a live transaction and we got a ROLLBACK command.
2826                          * Abort, cleanup, go to idle state.
2827                          */
2828                 case TBLOCK_ABORT_PENDING:
2829                         AbortTransaction();
2830                         CleanupTransaction();
2831                         s->blockState = TBLOCK_DEFAULT;
2832                         break;
2833
2834                         /*
2835                          * Here, we failed while trying to PREPARE.  Clean up the
2836                          * transaction and return to idle state (we do not want to stay in
2837                          * the transaction).
2838                          */
2839                 case TBLOCK_PREPARE:
2840                         AbortTransaction();
2841                         CleanupTransaction();
2842                         s->blockState = TBLOCK_DEFAULT;
2843                         break;
2844
2845                         /*
2846                          * We got an error inside a subtransaction.  Abort just the
2847                          * subtransaction, and go to the persistent SUBABORT state until
2848                          * we get ROLLBACK.
2849                          */
2850                 case TBLOCK_SUBINPROGRESS:
2851                         AbortSubTransaction();
2852                         s->blockState = TBLOCK_SUBABORT;
2853                         break;
2854
2855                         /*
2856                          * If we failed while trying to create a subtransaction, clean up
2857                          * the broken subtransaction and abort the parent.      The same
2858                          * applies if we get a failure while ending a subtransaction.
2859                          */
2860                 case TBLOCK_SUBBEGIN:
2861                 case TBLOCK_SUBRELEASE:
2862                 case TBLOCK_SUBCOMMIT:
2863                 case TBLOCK_SUBABORT_PENDING:
2864                 case TBLOCK_SUBRESTART:
2865                         AbortSubTransaction();
2866                         CleanupSubTransaction();
2867                         AbortCurrentTransaction();
2868                         break;
2869
2870                         /*
2871                          * Same as above, except the Abort() was already done.
2872                          */
2873                 case TBLOCK_SUBABORT_END:
2874                 case TBLOCK_SUBABORT_RESTART:
2875                         CleanupSubTransaction();
2876                         AbortCurrentTransaction();
2877                         break;
2878         }
2879 }
2880
2881 /*
2882  *      PreventTransactionChain
2883  *
2884  *      This routine is to be called by statements that must not run inside
2885  *      a transaction block, typically because they have non-rollback-able
2886  *      side effects or do internal commits.
2887  *
2888  *      If we have already started a transaction block, issue an error; also issue
2889  *      an error if we appear to be running inside a user-defined function (which
2890  *      could issue more commands and possibly cause a failure after the statement
2891  *      completes).  Subtransactions are verboten too.
2892  *
2893  *      isTopLevel: passed down from ProcessUtility to determine whether we are
2894  *      inside a function or multi-query querystring.  (We will always fail if
2895  *      this is false, but it's convenient to centralize the check here instead of
2896  *      making callers do it.)
2897  *      stmtType: statement type name, for error messages.
2898  */
2899 void
2900 PreventTransactionChain(bool isTopLevel, const char *stmtType)
2901 {
2902         /*
2903          * xact block already started?
2904          */
2905         if (IsTransactionBlock())
2906                 ereport(ERROR,
2907                                 (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION),
2908                 /* translator: %s represents an SQL statement name */
2909                                  errmsg("%s cannot run inside a transaction block",
2910                                                 stmtType)));
2911
2912         /*
2913          * subtransaction?
2914          */
2915         if (IsSubTransaction())
2916                 ereport(ERROR,
2917                                 (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION),
2918                 /* translator: %s represents an SQL statement name */
2919                                  errmsg("%s cannot run inside a subtransaction",
2920                                                 stmtType)));
2921
2922         /*
2923          * inside a function call?
2924          */
2925         if (!isTopLevel)
2926                 ereport(ERROR,
2927                                 (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION),
2928                 /* translator: %s represents an SQL statement name */
2929                                  errmsg("%s cannot be executed from a function or multi-command string",
2930                                                 stmtType)));
2931
2932         /* If we got past IsTransactionBlock test, should be in default state */
2933         if (CurrentTransactionState->blockState != TBLOCK_DEFAULT &&
2934                 CurrentTransactionState->blockState != TBLOCK_STARTED)
2935                 elog(FATAL, "cannot prevent transaction chain");
2936         /* all okay */
2937 }
2938
2939 /*
2940  *      RequireTransactionChain
2941  *
2942  *      This routine is to be called by statements that must run inside
2943  *      a transaction block, because they have no effects that persist past
2944  *      transaction end (and so calling them outside a transaction block
2945  *      is presumably an error).  DECLARE CURSOR is an example.
2946  *
2947  *      If we appear to be running inside a user-defined function, we do not
2948  *      issue an error, since the function could issue more commands that make
2949  *      use of the current statement's results.  Likewise subtransactions.
2950  *      Thus this is an inverse for PreventTransactionChain.
2951  *
2952  *      isTopLevel: passed down from ProcessUtility to determine whether we are
2953  *      inside a function.
2954  *      stmtType: statement type name, for error messages.
2955  */
2956 void
2957 RequireTransactionChain(bool isTopLevel, const char *stmtType)
2958 {
2959         /*
2960          * xact block already started?
2961          */
2962         if (IsTransactionBlock())
2963                 return;
2964
2965         /*
2966          * subtransaction?
2967          */
2968         if (IsSubTransaction())
2969                 return;
2970
2971         /*
2972          * inside a function call?
2973          */
2974         if (!isTopLevel)
2975                 return;
2976
2977         ereport(ERROR,
2978                         (errcode(ERRCODE_NO_ACTIVE_SQL_TRANSACTION),
2979         /* translator: %s represents an SQL statement name */
2980                          errmsg("%s can only be used in transaction blocks",
2981                                         stmtType)));
2982 }
2983
2984 /*
2985  *      IsInTransactionChain
2986  *
2987  *      This routine is for statements that need to behave differently inside
2988  *      a transaction block than when running as single commands.  ANALYZE is
2989  *      currently the only example.
2990  *
2991  *      isTopLevel: passed down from ProcessUtility to determine whether we are
2992  *      inside a function.
2993  */
2994 bool
2995 IsInTransactionChain(bool isTopLevel)
2996 {
2997         /*
2998          * Return true on same conditions that would make PreventTransactionChain
2999          * error out
3000          */
3001         if (IsTransactionBlock())
3002                 return true;
3003
3004         if (IsSubTransaction())
3005                 return true;
3006
3007         if (!isTopLevel)
3008                 return true;
3009
3010         if (CurrentTransactionState->blockState != TBLOCK_DEFAULT &&
3011                 CurrentTransactionState->blockState != TBLOCK_STARTED)
3012                 return true;
3013
3014         return false;
3015 }
3016
3017
3018 /*
3019  * Register or deregister callback functions for start- and end-of-xact
3020  * operations.
3021  *
3022  * These functions are intended for use by dynamically loaded modules.
3023  * For built-in modules we generally just hardwire the appropriate calls
3024  * (mainly because it's easier to control the order that way, where needed).
3025  *
3026  * At transaction end, the callback occurs post-commit or post-abort, so the
3027  * callback functions can only do noncritical cleanup.
3028  */
3029 void
3030 RegisterXactCallback(XactCallback callback, void *arg)
3031 {
3032         XactCallbackItem *item;
3033
3034         item = (XactCallbackItem *)
3035                 MemoryContextAlloc(TopMemoryContext, sizeof(XactCallbackItem));
3036         item->callback = callback;
3037         item->arg = arg;
3038         item->next = Xact_callbacks;
3039         Xact_callbacks = item;
3040 }
3041
3042 void
3043 UnregisterXactCallback(XactCallback callback, void *arg)
3044 {
3045         XactCallbackItem *item;
3046         XactCallbackItem *prev;
3047
3048         prev = NULL;
3049         for (item = Xact_callbacks; item; prev = item, item = item->next)
3050         {
3051                 if (item->callback == callback && item->arg == arg)
3052                 {
3053                         if (prev)
3054                                 prev->next = item->next;
3055                         else
3056                                 Xact_callbacks = item->next;
3057                         pfree(item);
3058                         break;
3059                 }
3060         }
3061 }
3062
3063 static void
3064 CallXactCallbacks(XactEvent event)
3065 {
3066         XactCallbackItem *item;
3067
3068         for (item = Xact_callbacks; item; item = item->next)
3069                 (*item->callback) (event, item->arg);
3070 }
3071
3072
3073 /*
3074  * Register or deregister callback functions for start- and end-of-subxact
3075  * operations.
3076  *
3077  * Pretty much same as above, but for subtransaction events.
3078  *
3079  * At subtransaction end, the callback occurs post-subcommit or post-subabort,
3080  * so the callback functions can only do noncritical cleanup.  At
3081  * subtransaction start, the callback is called when the subtransaction has
3082  * finished initializing.
3083  */
3084 void
3085 RegisterSubXactCallback(SubXactCallback callback, void *arg)
3086 {
3087         SubXactCallbackItem *item;
3088
3089         item = (SubXactCallbackItem *)
3090                 MemoryContextAlloc(TopMemoryContext, sizeof(SubXactCallbackItem));
3091         item->callback = callback;
3092         item->arg = arg;
3093         item->next = SubXact_callbacks;
3094         SubXact_callbacks = item;
3095 }
3096
3097 void
3098 UnregisterSubXactCallback(SubXactCallback callback, void *arg)
3099 {
3100         SubXactCallbackItem *item;
3101         SubXactCallbackItem *prev;
3102
3103         prev = NULL;
3104         for (item = SubXact_callbacks; item; prev = item, item = item->next)
3105         {
3106                 if (item->callback == callback && item->arg == arg)
3107                 {
3108                         if (prev)
3109                                 prev->next = item->next;
3110                         else
3111                                 SubXact_callbacks = item->next;
3112                         pfree(item);
3113                         break;
3114                 }
3115         }
3116 }
3117
3118 static void
3119 CallSubXactCallbacks(SubXactEvent event,
3120                                          SubTransactionId mySubid,
3121                                          SubTransactionId parentSubid)
3122 {
3123         SubXactCallbackItem *item;
3124
3125         for (item = SubXact_callbacks; item; item = item->next)
3126                 (*item->callback) (event, mySubid, parentSubid, item->arg);
3127 }
3128
3129
3130 /* ----------------------------------------------------------------
3131  *                                         transaction block support
3132  * ----------------------------------------------------------------
3133  */
3134
3135 /*
3136  *      BeginTransactionBlock
3137  *              This executes a BEGIN command.
3138  */
3139 void
3140 BeginTransactionBlock(void)
3141 {
3142         TransactionState s = CurrentTransactionState;
3143
3144         switch (s->blockState)
3145         {
3146                         /*
3147                          * We are not inside a transaction block, so allow one to begin.
3148                          */
3149                 case TBLOCK_STARTED:
3150                         s->blockState = TBLOCK_BEGIN;
3151                         break;
3152
3153                         /*
3154                          * Already a transaction block in progress.
3155                          */
3156                 case TBLOCK_INPROGRESS:
3157                 case TBLOCK_SUBINPROGRESS:
3158                 case TBLOCK_ABORT:
3159                 case TBLOCK_SUBABORT:
3160                         ereport(WARNING,
3161                                         (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION),
3162                                          errmsg("there is already a transaction in progress")));
3163                         break;
3164
3165                         /* These cases are invalid. */
3166                 case TBLOCK_DEFAULT:
3167                 case TBLOCK_BEGIN:
3168                 case TBLOCK_SUBBEGIN:
3169                 case TBLOCK_END:
3170                 case TBLOCK_SUBRELEASE:
3171                 case TBLOCK_SUBCOMMIT:
3172                 case TBLOCK_ABORT_END:
3173                 case TBLOCK_SUBABORT_END:
3174                 case TBLOCK_ABORT_PENDING:
3175                 case TBLOCK_SUBABORT_PENDING:
3176                 case TBLOCK_SUBRESTART:
3177                 case TBLOCK_SUBABORT_RESTART:
3178                 case TBLOCK_PREPARE:
3179                         elog(FATAL, "BeginTransactionBlock: unexpected state %s",
3180                                  BlockStateAsString(s->blockState));
3181                         break;
3182         }
3183 }
3184
3185 /*
3186  *      PrepareTransactionBlock
3187  *              This executes a PREPARE command.
3188  *
3189  * Since PREPARE may actually do a ROLLBACK, the result indicates what
3190  * happened: TRUE for PREPARE, FALSE for ROLLBACK.
3191  *
3192  * Note that we don't actually do anything here except change blockState.
3193  * The real work will be done in the upcoming PrepareTransaction().
3194  * We do it this way because it's not convenient to change memory context,
3195  * resource owner, etc while executing inside a Portal.
3196  */
3197 bool
3198 PrepareTransactionBlock(char *gid)
3199 {
3200         TransactionState s;
3201         bool            result;
3202
3203         /* Set up to commit the current transaction */
3204         result = EndTransactionBlock();
3205
3206         /* If successful, change outer tblock state to PREPARE */
3207         if (result)
3208         {
3209                 s = CurrentTransactionState;
3210
3211                 while (s->parent != NULL)
3212                         s = s->parent;
3213
3214                 if (s->blockState == TBLOCK_END)
3215                 {
3216                         /* Save GID where PrepareTransaction can find it again */
3217                         prepareGID = MemoryContextStrdup(TopTransactionContext, gid);
3218
3219                         s->blockState = TBLOCK_PREPARE;
3220                 }
3221                 else
3222                 {
3223                         /*
3224                          * ignore case where we are not in a transaction;
3225                          * EndTransactionBlock already issued a warning.
3226                          */
3227                         Assert(s->blockState == TBLOCK_STARTED);
3228                         /* Don't send back a PREPARE result tag... */
3229                         result = false;
3230                 }
3231         }
3232
3233         return result;
3234 }
3235
3236 /*
3237  *      EndTransactionBlock
3238  *              This executes a COMMIT command.
3239  *
3240  * Since COMMIT may actually do a ROLLBACK, the result indicates what
3241  * happened: TRUE for COMMIT, FALSE for ROLLBACK.
3242  *
3243  * Note that we don't actually do anything here except change blockState.
3244  * The real work will be done in the upcoming CommitTransactionCommand().
3245  * We do it this way because it's not convenient to change memory context,
3246  * resource owner, etc while executing inside a Portal.
3247  */
3248 bool
3249 EndTransactionBlock(void)
3250 {
3251         TransactionState s = CurrentTransactionState;
3252         bool            result = false;
3253
3254         switch (s->blockState)
3255         {
3256                         /*
3257                          * We are in a transaction block, so tell CommitTransactionCommand
3258                          * to COMMIT.
3259                          */
3260                 case TBLOCK_INPROGRESS:
3261                         s->blockState = TBLOCK_END;
3262                         result = true;
3263                         break;
3264
3265                         /*
3266                          * We are in a failed transaction block.  Tell
3267                          * CommitTransactionCommand it's time to exit the block.
3268                          */
3269                 case TBLOCK_ABORT:
3270                         s->blockState = TBLOCK_ABORT_END;
3271                         break;
3272
3273                         /*
3274                          * We are in a live subtransaction block.  Set up to subcommit all
3275                          * open subtransactions and then commit the main transaction.
3276                          */
3277                 case TBLOCK_SUBINPROGRESS:
3278                         while (s->parent != NULL)
3279                         {
3280                                 if (s->blockState == TBLOCK_SUBINPROGRESS)
3281                                         s->blockState = TBLOCK_SUBCOMMIT;
3282                                 else
3283                                         elog(FATAL, "EndTransactionBlock: unexpected state %s",
3284                                                  BlockStateAsString(s->blockState));
3285                                 s = s->parent;
3286                         }
3287                         if (s->blockState == TBLOCK_INPROGRESS)
3288                                 s->blockState = TBLOCK_END;
3289                         else
3290                                 elog(FATAL, "EndTransactionBlock: unexpected state %s",
3291                                          BlockStateAsString(s->blockState));
3292                         result = true;
3293                         break;
3294
3295                         /*
3296                          * Here we are inside an aborted subtransaction.  Treat the COMMIT
3297                          * as ROLLBACK: set up to abort everything and exit the main
3298                          * transaction.
3299                          */
3300                 case TBLOCK_SUBABORT:
3301                         while (s->parent != NULL)
3302                         {
3303                                 if (s->blockState == TBLOCK_SUBINPROGRESS)
3304                                         s->blockState = TBLOCK_SUBABORT_PENDING;
3305                                 else if (s->blockState == TBLOCK_SUBABORT)
3306                                         s->blockState = TBLOCK_SUBABORT_END;
3307                                 else
3308                                         elog(FATAL, "EndTransactionBlock: unexpected state %s",
3309                                                  BlockStateAsString(s->blockState));
3310                                 s = s->parent;
3311                         }
3312                         if (s->blockState == TBLOCK_INPROGRESS)
3313                                 s->blockState = TBLOCK_ABORT_PENDING;
3314                         else if (s->blockState == TBLOCK_ABORT)
3315                                 s->blockState = TBLOCK_ABORT_END;
3316                         else
3317                                 elog(FATAL, "EndTransactionBlock: unexpected state %s",
3318                                          BlockStateAsString(s->blockState));
3319                         break;
3320
3321                         /*
3322                          * The user issued COMMIT when not inside a transaction.  Issue a
3323                          * WARNING, staying in TBLOCK_STARTED state.  The upcoming call to
3324                          * CommitTransactionCommand() will then close the transaction and
3325                          * put us back into the default state.
3326                          */
3327                 case TBLOCK_STARTED:
3328                         ereport(WARNING,
3329                                         (errcode(ERRCODE_NO_ACTIVE_SQL_TRANSACTION),
3330                                          errmsg("there is no transaction in progress")));
3331                         result = true;
3332                         break;
3333
3334                         /* These cases are invalid. */
3335                 case TBLOCK_DEFAULT:
3336                 case TBLOCK_BEGIN:
3337                 case TBLOCK_SUBBEGIN:
3338                 case TBLOCK_END:
3339                 case TBLOCK_SUBRELEASE:
3340                 case TBLOCK_SUBCOMMIT:
3341                 case TBLOCK_ABORT_END:
3342                 case TBLOCK_SUBABORT_END:
3343                 case TBLOCK_ABORT_PENDING:
3344                 case TBLOCK_SUBABORT_PENDING:
3345                 case TBLOCK_SUBRESTART:
3346                 case TBLOCK_SUBABORT_RESTART:
3347                 case TBLOCK_PREPARE:
3348                         elog(FATAL, "EndTransactionBlock: unexpected state %s",
3349                                  BlockStateAsString(s->blockState));
3350                         break;
3351         }
3352
3353         return result;
3354 }
3355
3356 /*
3357  *      UserAbortTransactionBlock
3358  *              This executes a ROLLBACK command.
3359  *
3360  * As above, we don't actually do anything here except change blockState.
3361  */
3362 void
3363 UserAbortTransactionBlock(void)
3364 {
3365         TransactionState s = CurrentTransactionState;
3366
3367         switch (s->blockState)
3368         {
3369                         /*
3370                          * We are inside a transaction block and we got a ROLLBACK command
3371                          * from the user, so tell CommitTransactionCommand to abort and
3372                          * exit the transaction block.
3373                          */
3374                 case TBLOCK_INPROGRESS:
3375                         s->blockState = TBLOCK_ABORT_PENDING;
3376                         break;
3377
3378                         /*
3379                          * We are inside a failed transaction block and we got a ROLLBACK
3380                          * command from the user.  Abort processing is already done, so
3381                          * CommitTransactionCommand just has to cleanup and go back to
3382                          * idle state.
3383                          */
3384                 case TBLOCK_ABORT:
3385                         s->blockState = TBLOCK_ABORT_END;
3386                         break;
3387
3388                         /*
3389                          * We are inside a subtransaction.      Mark everything up to top
3390                          * level as exitable.
3391                          */
3392                 case TBLOCK_SUBINPROGRESS:
3393                 case TBLOCK_SUBABORT:
3394                         while (s->parent != NULL)
3395                         {
3396                                 if (s->blockState == TBLOCK_SUBINPROGRESS)
3397                                         s->blockState = TBLOCK_SUBABORT_PENDING;
3398                                 else if (s->blockState == TBLOCK_SUBABORT)
3399                                         s->blockState = TBLOCK_SUBABORT_END;
3400                                 else
3401                                         elog(FATAL, "UserAbortTransactionBlock: unexpected state %s",
3402                                                  BlockStateAsString(s->blockState));
3403                                 s = s->parent;
3404                         }
3405                         if (s->blockState == TBLOCK_INPROGRESS)
3406                                 s->blockState = TBLOCK_ABORT_PENDING;
3407                         else if (s->blockState == TBLOCK_ABORT)
3408                                 s->blockState = TBLOCK_ABORT_END;
3409                         else
3410                                 elog(FATAL, "UserAbortTransactionBlock: unexpected state %s",
3411                                          BlockStateAsString(s->blockState));
3412                         break;
3413
3414                         /*
3415                          * The user issued ABORT when not inside a transaction. Issue a
3416                          * WARNING and go to abort state.  The upcoming call to
3417                          * CommitTransactionCommand() will then put us back into the
3418                          * default state.
3419                          */
3420                 case TBLOCK_STARTED:
3421                         ereport(NOTICE,
3422                                         (errcode(ERRCODE_NO_ACTIVE_SQL_TRANSACTION),
3423                                          errmsg("there is no transaction in progress")));
3424                         s->blockState = TBLOCK_ABORT_PENDING;
3425                         break;
3426
3427                         /* These cases are invalid. */
3428                 case TBLOCK_DEFAULT:
3429                 case TBLOCK_BEGIN:
3430                 case TBLOCK_SUBBEGIN:
3431                 case TBLOCK_END:
3432                 case TBLOCK_SUBRELEASE:
3433                 case TBLOCK_SUBCOMMIT:
3434                 case TBLOCK_ABORT_END:
3435                 case TBLOCK_SUBABORT_END:
3436                 case TBLOCK_ABORT_PENDING:
3437                 case TBLOCK_SUBABORT_PENDING:
3438                 case TBLOCK_SUBRESTART:
3439                 case TBLOCK_SUBABORT_RESTART:
3440                 case TBLOCK_PREPARE:
3441                         elog(FATAL, "UserAbortTransactionBlock: unexpected state %s",
3442                                  BlockStateAsString(s->blockState));
3443                         break;
3444         }
3445 }
3446
3447 /*
3448  * DefineSavepoint
3449  *              This executes a SAVEPOINT command.
3450  */
3451 void
3452 DefineSavepoint(char *name)
3453 {
3454         TransactionState s = CurrentTransactionState;
3455
3456         switch (s->blockState)
3457         {
3458                 case TBLOCK_INPROGRESS:
3459                 case TBLOCK_SUBINPROGRESS:
3460                         /* Normal subtransaction start */
3461                         PushTransaction();
3462                         s = CurrentTransactionState;            /* changed by push */
3463
3464                         /*
3465                          * Savepoint names, like the TransactionState block itself, live
3466                          * in TopTransactionContext.
3467                          */
3468                         if (name)
3469                                 s->name = MemoryContextStrdup(TopTransactionContext, name);
3470                         break;
3471
3472                         /* These cases are invalid. */
3473                 case TBLOCK_DEFAULT:
3474                 case TBLOCK_STARTED:
3475                 case TBLOCK_BEGIN:
3476                 case TBLOCK_SUBBEGIN:
3477                 case TBLOCK_END:
3478                 case TBLOCK_SUBRELEASE:
3479                 case TBLOCK_SUBCOMMIT:
3480                 case TBLOCK_ABORT:
3481                 case TBLOCK_SUBABORT:
3482                 case TBLOCK_ABORT_END:
3483                 case TBLOCK_SUBABORT_END:
3484                 case TBLOCK_ABORT_PENDING:
3485                 case TBLOCK_SUBABORT_PENDING:
3486                 case TBLOCK_SUBRESTART:
3487                 case TBLOCK_SUBABORT_RESTART:
3488                 case TBLOCK_PREPARE:
3489                         elog(FATAL, "DefineSavepoint: unexpected state %s",
3490                                  BlockStateAsString(s->blockState));
3491                         break;
3492         }
3493 }
3494
3495 /*
3496  * ReleaseSavepoint
3497  *              This executes a RELEASE command.
3498  *
3499  * As above, we don't actually do anything here except change blockState.
3500  */
3501 void
3502 ReleaseSavepoint(List *options)
3503 {
3504         TransactionState s = CurrentTransactionState;
3505         TransactionState target,
3506                                 xact;
3507         ListCell   *cell;
3508         char       *name = NULL;
3509
3510         switch (s->blockState)
3511         {
3512                         /*
3513                          * We can't rollback to a savepoint if there is no savepoint
3514                          * defined.
3515                          */
3516                 case TBLOCK_INPROGRESS:
3517                         ereport(ERROR,
3518                                         (errcode(ERRCODE_S_E_INVALID_SPECIFICATION),
3519                                          errmsg("no such savepoint")));
3520                         break;
3521
3522                         /*
3523                          * We are in a non-aborted subtransaction.      This is the only valid
3524                          * case.
3525                          */
3526                 case TBLOCK_SUBINPROGRESS:
3527                         break;
3528
3529                         /* These cases are invalid. */
3530                 case TBLOCK_DEFAULT:
3531                 case TBLOCK_STARTED:
3532                 case TBLOCK_BEGIN:
3533                 case TBLOCK_SUBBEGIN:
3534                 case TBLOCK_END:
3535                 case TBLOCK_SUBRELEASE:
3536                 case TBLOCK_SUBCOMMIT:
3537                 case TBLOCK_ABORT:
3538                 case TBLOCK_SUBABORT:
3539                 case TBLOCK_ABORT_END:
3540                 case TBLOCK_SUBABORT_END:
3541                 case TBLOCK_ABORT_PENDING:
3542                 case TBLOCK_SUBABORT_PENDING:
3543                 case TBLOCK_SUBRESTART:
3544                 case TBLOCK_SUBABORT_RESTART:
3545                 case TBLOCK_PREPARE:
3546                         elog(FATAL, "ReleaseSavepoint: unexpected state %s",
3547                                  BlockStateAsString(s->blockState));
3548                         break;
3549         }
3550
3551         foreach(cell, options)
3552         {
3553                 DefElem    *elem = lfirst(cell);
3554
3555                 if (strcmp(elem->defname, "savepoint_name") == 0)
3556                         name = strVal(elem->arg);
3557         }
3558
3559         Assert(PointerIsValid(name));
3560
3561         for (target = s; PointerIsValid(target); target = target->parent)
3562         {
3563                 if (PointerIsValid(target->name) && strcmp(target->name, name) == 0)
3564                         break;
3565         }
3566
3567         if (!PointerIsValid(target))
3568                 ereport(ERROR,
3569                                 (errcode(ERRCODE_S_E_INVALID_SPECIFICATION),
3570                                  errmsg("no such savepoint")));
3571
3572         /* disallow crossing savepoint level boundaries */
3573         if (target->savepointLevel != s->savepointLevel)
3574                 ereport(ERROR,
3575                                 (errcode(ERRCODE_S_E_INVALID_SPECIFICATION),
3576                                  errmsg("no such savepoint")));
3577
3578         /*
3579          * Mark "commit pending" all subtransactions up to the target
3580          * subtransaction.      The actual commits will happen when control gets to
3581          * CommitTransactionCommand.
3582          */
3583         xact = CurrentTransactionState;
3584         for (;;)
3585         {
3586                 Assert(xact->blockState == TBLOCK_SUBINPROGRESS);
3587                 xact->blockState = TBLOCK_SUBRELEASE;
3588                 if (xact == target)
3589                         break;
3590                 xact = xact->parent;
3591                 Assert(PointerIsValid(xact));
3592         }
3593 }
3594
3595 /*
3596  * RollbackToSavepoint
3597  *              This executes a ROLLBACK TO <savepoint> command.
3598  *
3599  * As above, we don't actually do anything here except change blockState.
3600  */
3601 void
3602 RollbackToSavepoint(List *options)
3603 {
3604         TransactionState s = CurrentTransactionState;
3605         TransactionState target,
3606                                 xact;
3607         ListCell   *cell;
3608         char       *name = NULL;
3609
3610         switch (s->blockState)
3611         {
3612                         /*
3613                          * We can't rollback to a savepoint if there is no savepoint
3614                          * defined.
3615                          */
3616                 case TBLOCK_INPROGRESS:
3617                 case TBLOCK_ABORT:
3618                         ereport(ERROR,
3619                                         (errcode(ERRCODE_S_E_INVALID_SPECIFICATION),
3620                                          errmsg("no such savepoint")));
3621                         break;
3622
3623                         /*
3624                          * There is at least one savepoint, so proceed.
3625                          */
3626                 case TBLOCK_SUBINPROGRESS:
3627                 case TBLOCK_SUBABORT:
3628                         break;
3629
3630                         /* These cases are invalid. */
3631                 case TBLOCK_DEFAULT:
3632                 case TBLOCK_STARTED:
3633                 case TBLOCK_BEGIN:
3634                 case TBLOCK_SUBBEGIN:
3635                 case TBLOCK_END:
3636                 case TBLOCK_SUBRELEASE:
3637                 case TBLOCK_SUBCOMMIT:
3638                 case TBLOCK_ABORT_END:
3639                 case TBLOCK_SUBABORT_END:
3640                 case TBLOCK_ABORT_PENDING:
3641                 case TBLOCK_SUBABORT_PENDING:
3642                 case TBLOCK_SUBRESTART:
3643                 case TBLOCK_SUBABORT_RESTART:
3644                 case TBLOCK_PREPARE:
3645                         elog(FATAL, "RollbackToSavepoint: unexpected state %s",
3646                                  BlockStateAsString(s->blockState));
3647                         break;
3648         }
3649
3650         foreach(cell, options)
3651         {
3652                 DefElem    *elem = lfirst(cell);
3653
3654                 if (strcmp(elem->defname, "savepoint_name") == 0)
3655                         name = strVal(elem->arg);
3656         }
3657
3658         Assert(PointerIsValid(name));
3659
3660         for (target = s; PointerIsValid(target); target = target->parent)
3661         {
3662                 if (PointerIsValid(target->name) && strcmp(target->name, name) == 0)
3663                         break;
3664         }
3665
3666         if (!PointerIsValid(target))
3667                 ereport(ERROR,
3668                                 (errcode(ERRCODE_S_E_INVALID_SPECIFICATION),
3669                                  errmsg("no such savepoint")));
3670
3671         /* disallow crossing savepoint level boundaries */
3672         if (target->savepointLevel != s->savepointLevel)
3673                 ereport(ERROR,
3674                                 (errcode(ERRCODE_S_E_INVALID_SPECIFICATION),
3675                                  errmsg("no such savepoint")));
3676
3677         /*
3678          * Mark "abort pending" all subtransactions up to the target
3679          * subtransaction.      The actual aborts will happen when control gets to
3680          * CommitTransactionCommand.
3681          */
3682         xact = CurrentTransactionState;
3683         for (;;)
3684         {
3685                 if (xact == target)
3686                         break;
3687                 if (xact->blockState == TBLOCK_SUBINPROGRESS)
3688                         xact->blockState = TBLOCK_SUBABORT_PENDING;
3689                 else if (xact->blockState == TBLOCK_SUBABORT)
3690                         xact->blockState = TBLOCK_SUBABORT_END;
3691                 else
3692                         elog(FATAL, "RollbackToSavepoint: unexpected state %s",
3693                                  BlockStateAsString(xact->blockState));
3694                 xact = xact->parent;
3695                 Assert(PointerIsValid(xact));
3696         }
3697
3698         /* And mark the target as "restart pending" */
3699         if (xact->blockState == TBLOCK_SUBINPROGRESS)
3700                 xact->blockState = TBLOCK_SUBRESTART;
3701         else if (xact->blockState == TBLOCK_SUBABORT)
3702                 xact->blockState = TBLOCK_SUBABORT_RESTART;
3703         else
3704                 elog(FATAL, "RollbackToSavepoint: unexpected state %s",
3705                          BlockStateAsString(xact->blockState));
3706 }
3707
3708 /*
3709  * BeginInternalSubTransaction
3710  *              This is the same as DefineSavepoint except it allows TBLOCK_STARTED,
3711  *              TBLOCK_END, and TBLOCK_PREPARE states, and therefore it can safely be
3712  *              used in functions that might be called when not inside a BEGIN block
3713  *              or when running deferred triggers at COMMIT/PREPARE time.  Also, it
3714  *              automatically does CommitTransactionCommand/StartTransactionCommand
3715  *              instead of expecting the caller to do it.
3716  */
3717 void
3718 BeginInternalSubTransaction(char *name)
3719 {
3720         TransactionState s = CurrentTransactionState;
3721
3722         switch (s->blockState)
3723         {
3724                 case TBLOCK_STARTED:
3725                 case TBLOCK_INPROGRESS:
3726                 case TBLOCK_END:
3727                 case TBLOCK_PREPARE:
3728                 case TBLOCK_SUBINPROGRESS:
3729                         /* Normal subtransaction start */
3730                         PushTransaction();
3731                         s = CurrentTransactionState;            /* changed by push */
3732
3733                         /*
3734                          * Savepoint names, like the TransactionState block itself, live
3735                          * in TopTransactionContext.
3736                          */
3737                         if (name)
3738                                 s->name = MemoryContextStrdup(TopTransactionContext, name);
3739                         break;
3740
3741                         /* These cases are invalid. */
3742                 case TBLOCK_DEFAULT:
3743                 case TBLOCK_BEGIN:
3744                 case TBLOCK_SUBBEGIN:
3745                 case TBLOCK_SUBRELEASE:
3746                 case TBLOCK_SUBCOMMIT:
3747                 case TBLOCK_ABORT:
3748                 case TBLOCK_SUBABORT:
3749                 case TBLOCK_ABORT_END:
3750                 case TBLOCK_SUBABORT_END:
3751                 case TBLOCK_ABORT_PENDING:
3752                 case TBLOCK_SUBABORT_PENDING:
3753                 case TBLOCK_SUBRESTART:
3754                 case TBLOCK_SUBABORT_RESTART:
3755                         elog(FATAL, "BeginInternalSubTransaction: unexpected state %s",
3756                                  BlockStateAsString(s->blockState));
3757                         break;
3758         }
3759
3760         CommitTransactionCommand();
3761         StartTransactionCommand();
3762 }
3763
3764 /*
3765  * ReleaseCurrentSubTransaction
3766  *
3767  * RELEASE (ie, commit) the innermost subtransaction, regardless of its
3768  * savepoint name (if any).
3769  * NB: do NOT use CommitTransactionCommand/StartTransactionCommand with this.
3770  */
3771 void
3772 ReleaseCurrentSubTransaction(void)
3773 {
3774         TransactionState s = CurrentTransactionState;
3775
3776         if (s->blockState != TBLOCK_SUBINPROGRESS)
3777                 elog(ERROR, "ReleaseCurrentSubTransaction: unexpected state %s",
3778                          BlockStateAsString(s->blockState));
3779         Assert(s->state == TRANS_INPROGRESS);
3780         MemoryContextSwitchTo(CurTransactionContext);
3781         CommitSubTransaction();
3782         s = CurrentTransactionState;    /* changed by pop */
3783         Assert(s->state == TRANS_INPROGRESS);
3784 }
3785
3786 /*
3787  * RollbackAndReleaseCurrentSubTransaction
3788  *
3789  * ROLLBACK and RELEASE (ie, abort) the innermost subtransaction, regardless
3790  * of its savepoint name (if any).
3791  * NB: do NOT use CommitTransactionCommand/StartTransactionCommand with this.
3792  */
3793 void
3794 RollbackAndReleaseCurrentSubTransaction(void)
3795 {
3796         TransactionState s = CurrentTransactionState;
3797
3798         switch (s->blockState)
3799         {
3800                         /* Must be in a subtransaction */
3801                 case TBLOCK_SUBINPROGRESS:
3802                 case TBLOCK_SUBABORT:
3803                         break;
3804
3805                         /* These cases are invalid. */
3806                 case TBLOCK_DEFAULT:
3807                 case TBLOCK_STARTED:
3808                 case TBLOCK_BEGIN:
3809                 case TBLOCK_SUBBEGIN:
3810                 case TBLOCK_INPROGRESS:
3811                 case TBLOCK_END:
3812                 case TBLOCK_SUBRELEASE:
3813                 case TBLOCK_SUBCOMMIT:
3814                 case TBLOCK_ABORT:
3815                 case TBLOCK_ABORT_END:
3816                 case TBLOCK_SUBABORT_END:
3817                 case TBLOCK_ABORT_PENDING:
3818                 case TBLOCK_SUBABORT_PENDING:
3819                 case TBLOCK_SUBRESTART:
3820                 case TBLOCK_SUBABORT_RESTART:
3821                 case TBLOCK_PREPARE:
3822                         elog(FATAL, "RollbackAndReleaseCurrentSubTransaction: unexpected state %s",
3823                                  BlockStateAsString(s->blockState));
3824                         break;
3825         }
3826
3827         /*
3828          * Abort the current subtransaction, if needed.
3829          */
3830         if (s->blockState == TBLOCK_SUBINPROGRESS)
3831                 AbortSubTransaction();
3832
3833         /* And clean it up, too */
3834         CleanupSubTransaction();
3835
3836         s = CurrentTransactionState;    /* changed by pop */
3837         AssertState(s->blockState == TBLOCK_SUBINPROGRESS ||
3838                                 s->blockState == TBLOCK_INPROGRESS ||
3839                                 s->blockState == TBLOCK_STARTED);
3840 }
3841
3842 /*
3843  *      AbortOutOfAnyTransaction
3844  *
3845  *      This routine is provided for error recovery purposes.  It aborts any
3846  *      active transaction or transaction block, leaving the system in a known
3847  *      idle state.
3848  */
3849 void
3850 AbortOutOfAnyTransaction(void)
3851 {
3852         TransactionState s = CurrentTransactionState;
3853
3854         /*
3855          * Get out of any transaction or nested transaction
3856          */
3857         do
3858         {
3859                 switch (s->blockState)
3860                 {
3861                         case TBLOCK_DEFAULT:
3862                                 /* Not in a transaction, do nothing */
3863                                 break;
3864                         case TBLOCK_STARTED:
3865                         case TBLOCK_BEGIN:
3866                         case TBLOCK_INPROGRESS:
3867                         case TBLOCK_END:
3868                         case TBLOCK_ABORT_PENDING:
3869                         case TBLOCK_PREPARE:
3870                                 /* In a transaction, so clean up */
3871                                 AbortTransaction();
3872                                 CleanupTransaction();
3873                                 s->blockState = TBLOCK_DEFAULT;
3874                                 break;
3875                         case TBLOCK_ABORT:
3876                         case TBLOCK_ABORT_END:
3877                                 /* AbortTransaction already done, still need Cleanup */
3878                                 CleanupTransaction();
3879                                 s->blockState = TBLOCK_DEFAULT;
3880                                 break;
3881
3882                                 /*
3883                                  * In a subtransaction, so clean it up and abort parent too
3884                                  */
3885                         case TBLOCK_SUBBEGIN:
3886                         case TBLOCK_SUBINPROGRESS:
3887                         case TBLOCK_SUBRELEASE:
3888                         case TBLOCK_SUBCOMMIT:
3889                         case TBLOCK_SUBABORT_PENDING:
3890                         case TBLOCK_SUBRESTART:
3891                                 AbortSubTransaction();
3892                                 CleanupSubTransaction();
3893                                 s = CurrentTransactionState;    /* changed by pop */
3894                                 break;
3895
3896                         case TBLOCK_SUBABORT:
3897                         case TBLOCK_SUBABORT_END:
3898                         case TBLOCK_SUBABORT_RESTART:
3899                                 /* As above, but AbortSubTransaction already done */
3900                                 CleanupSubTransaction();
3901                                 s = CurrentTransactionState;    /* changed by pop */
3902                                 break;
3903                 }
3904         } while (s->blockState != TBLOCK_DEFAULT);
3905
3906         /* Should be out of all subxacts now */
3907         Assert(s->parent == NULL);
3908 }
3909
3910 /*
3911  * IsTransactionBlock --- are we within a transaction block?
3912  */
3913 bool
3914 IsTransactionBlock(void)
3915 {
3916         TransactionState s = CurrentTransactionState;
3917
3918         if (s->blockState == TBLOCK_DEFAULT || s->blockState == TBLOCK_STARTED)
3919                 return false;
3920
3921         return true;
3922 }
3923
3924 /*
3925  * IsTransactionOrTransactionBlock --- are we within either a transaction
3926  * or a transaction block?      (The backend is only really "idle" when this
3927  * returns false.)
3928  *
3929  * This should match up with IsTransactionBlock and IsTransactionState.
3930  */
3931 bool
3932 IsTransactionOrTransactionBlock(void)
3933 {
3934         TransactionState s = CurrentTransactionState;
3935
3936         if (s->blockState == TBLOCK_DEFAULT)
3937                 return false;
3938
3939         return true;
3940 }
3941
3942 /*
3943  * TransactionBlockStatusCode - return status code to send in ReadyForQuery
3944  */
3945 char
3946 TransactionBlockStatusCode(void)
3947 {
3948         TransactionState s = CurrentTransactionState;
3949
3950         switch (s->blockState)
3951         {
3952                 case TBLOCK_DEFAULT:
3953                 case TBLOCK_STARTED:
3954                         return 'I';                     /* idle --- not in transaction */
3955                 case TBLOCK_BEGIN:
3956                 case TBLOCK_SUBBEGIN:
3957                 case TBLOCK_INPROGRESS:
3958                 case TBLOCK_SUBINPROGRESS:
3959                 case TBLOCK_END:
3960                 case TBLOCK_SUBRELEASE:
3961                 case TBLOCK_SUBCOMMIT:
3962                 case TBLOCK_PREPARE:
3963                         return 'T';                     /* in transaction */
3964                 case TBLOCK_ABORT:
3965                 case TBLOCK_SUBABORT:
3966                 case TBLOCK_ABORT_END:
3967                 case TBLOCK_SUBABORT_END:
3968                 case TBLOCK_ABORT_PENDING:
3969                 case TBLOCK_SUBABORT_PENDING:
3970                 case TBLOCK_SUBRESTART:
3971                 case TBLOCK_SUBABORT_RESTART:
3972                         return 'E';                     /* in failed transaction */
3973         }
3974
3975         /* should never get here */
3976         elog(FATAL, "invalid transaction block state: %s",
3977                  BlockStateAsString(s->blockState));
3978         return 0;                                       /* keep compiler quiet */
3979 }
3980
3981 /*
3982  * IsSubTransaction
3983  */
3984 bool
3985 IsSubTransaction(void)
3986 {
3987         TransactionState s = CurrentTransactionState;
3988
3989         if (s->nestingLevel >= 2)
3990                 return true;
3991
3992         return false;
3993 }
3994
3995 /*
3996  * StartSubTransaction
3997  *
3998  * If you're wondering why this is separate from PushTransaction: it's because
3999  * we can't conveniently do this stuff right inside DefineSavepoint.  The
4000  * SAVEPOINT utility command will be executed inside a Portal, and if we
4001  * muck with CurrentMemoryContext or CurrentResourceOwner then exit from
4002  * the Portal will undo those settings.  So we make DefineSavepoint just
4003  * push a dummy transaction block, and when control returns to the main
4004  * idle loop, CommitTransactionCommand will be called, and we'll come here
4005  * to finish starting the subtransaction.
4006  */
4007 static void
4008 StartSubTransaction(void)
4009 {
4010         TransactionState s = CurrentTransactionState;
4011
4012         if (s->state != TRANS_DEFAULT)
4013                 elog(WARNING, "StartSubTransaction while in %s state",
4014                          TransStateAsString(s->state));
4015
4016         s->state = TRANS_START;
4017
4018         /*
4019          * Initialize subsystems for new subtransaction
4020          *
4021          * must initialize resource-management stuff first
4022          */
4023         AtSubStart_Memory();
4024         AtSubStart_ResourceOwner();
4025         AtSubStart_Inval();
4026         AtSubStart_Notify();
4027         AfterTriggerBeginSubXact();
4028
4029         s->state = TRANS_INPROGRESS;
4030
4031         /*
4032          * Call start-of-subxact callbacks
4033          */
4034         CallSubXactCallbacks(SUBXACT_EVENT_START_SUB, s->subTransactionId,
4035                                                  s->parent->subTransactionId);
4036
4037         ShowTransactionState("StartSubTransaction");
4038 }
4039
4040 /*
4041  * CommitSubTransaction
4042  *
4043  *      The caller has to make sure to always reassign CurrentTransactionState
4044  *      if it has a local pointer to it after calling this function.
4045  */
4046 static void
4047 CommitSubTransaction(void)
4048 {
4049         TransactionState s = CurrentTransactionState;
4050
4051         ShowTransactionState("CommitSubTransaction");
4052
4053         if (s->state != TRANS_INPROGRESS)
4054                 elog(WARNING, "CommitSubTransaction while in %s state",
4055                          TransStateAsString(s->state));
4056
4057         /* Pre-commit processing goes here -- nothing to do at the moment */
4058
4059         s->state = TRANS_COMMIT;
4060
4061         /* Must CCI to ensure commands of subtransaction are seen as done */
4062         CommandCounterIncrement();
4063
4064         /*
4065          * Prior to 8.4 we marked subcommit in clog at this point.      We now only
4066          * perform that step, if required, as part of the atomic update of the
4067          * whole transaction tree at top level commit or abort.
4068          */
4069
4070         /* Post-commit cleanup */
4071         if (TransactionIdIsValid(s->transactionId))
4072                 AtSubCommit_childXids();
4073         AfterTriggerEndSubXact(true);
4074         AtSubCommit_Portals(s->subTransactionId,
4075                                                 s->parent->subTransactionId,
4076                                                 s->parent->curTransactionOwner);
4077         AtEOSubXact_LargeObject(true, s->subTransactionId,
4078                                                         s->parent->subTransactionId);
4079         AtSubCommit_Notify();
4080
4081         CallSubXactCallbacks(SUBXACT_EVENT_COMMIT_SUB, s->subTransactionId,
4082                                                  s->parent->subTransactionId);
4083
4084         ResourceOwnerRelease(s->curTransactionOwner,
4085                                                  RESOURCE_RELEASE_BEFORE_LOCKS,
4086                                                  true, false);
4087         AtEOSubXact_RelationCache(true, s->subTransactionId,
4088                                                           s->parent->subTransactionId);
4089         AtEOSubXact_Inval(true);
4090         AtSubCommit_smgr();
4091
4092         /*
4093          * The only lock we actually release here is the subtransaction XID lock.
4094          */
4095         CurrentResourceOwner = s->curTransactionOwner;
4096         if (TransactionIdIsValid(s->transactionId))
4097                 XactLockTableDelete(s->transactionId);
4098
4099         /*
4100          * Other locks should get transferred to their parent resource owner.
4101          */
4102         ResourceOwnerRelease(s->curTransactionOwner,
4103                                                  RESOURCE_RELEASE_LOCKS,
4104                                                  true, false);
4105         ResourceOwnerRelease(s->curTransactionOwner,
4106                                                  RESOURCE_RELEASE_AFTER_LOCKS,
4107                                                  true, false);
4108
4109         AtEOXact_GUC(true, s->gucNestLevel);
4110         AtEOSubXact_SPI(true, s->subTransactionId);
4111         AtEOSubXact_on_commit_actions(true, s->subTransactionId,
4112                                                                   s->parent->subTransactionId);
4113         AtEOSubXact_Namespace(true, s->subTransactionId,
4114                                                   s->parent->subTransactionId);
4115         AtEOSubXact_Files(true, s->subTransactionId,
4116                                           s->parent->subTransactionId);
4117         AtEOSubXact_HashTables(true, s->nestingLevel);
4118         AtEOSubXact_PgStat(true, s->nestingLevel);
4119         AtSubCommit_Snapshot(s->nestingLevel);
4120
4121         /*
4122          * We need to restore the upper transaction's read-only state, in case the
4123          * upper is read-write while the child is read-only; GUC will incorrectly
4124          * think it should leave the child state in place.
4125          */
4126         XactReadOnly = s->prevXactReadOnly;
4127
4128         CurrentResourceOwner = s->parent->curTransactionOwner;
4129         CurTransactionResourceOwner = s->parent->curTransactionOwner;
4130         ResourceOwnerDelete(s->curTransactionOwner);
4131         s->curTransactionOwner = NULL;
4132
4133         AtSubCommit_Memory();
4134
4135         s->state = TRANS_DEFAULT;
4136
4137         PopTransaction();
4138 }
4139
4140 /*
4141  * AbortSubTransaction
4142  */
4143 static void
4144 AbortSubTransaction(void)
4145 {
4146         TransactionState s = CurrentTransactionState;
4147
4148         /* Prevent cancel/die interrupt while cleaning up */
4149         HOLD_INTERRUPTS();
4150
4151         /* Make sure we have a valid memory context and resource owner */
4152         AtSubAbort_Memory();
4153         AtSubAbort_ResourceOwner();
4154
4155         /*
4156          * Release any LW locks we might be holding as quickly as possible.
4157          * (Regular locks, however, must be held till we finish aborting.)
4158          * Releasing LW locks is critical since we might try to grab them again
4159          * while cleaning up!
4160          *
4161          * FIXME This may be incorrect --- Are there some locks we should keep?
4162          * Buffer locks, for example?  I don't think so but I'm not sure.
4163          */
4164         LWLockReleaseAll();
4165
4166         AbortBufferIO();
4167         UnlockBuffers();
4168
4169         LockErrorCleanup();
4170
4171         /*
4172          * check the current transaction state
4173          */
4174         ShowTransactionState("AbortSubTransaction");
4175
4176         if (s->state != TRANS_INPROGRESS)
4177                 elog(WARNING, "AbortSubTransaction while in %s state",
4178                          TransStateAsString(s->state));
4179
4180         s->state = TRANS_ABORT;
4181
4182         /*
4183          * Reset user ID which might have been changed transiently.  (See notes in
4184          * AbortTransaction.)
4185          */
4186         SetUserIdAndSecContext(s->prevUser, s->prevSecContext);
4187
4188         /*
4189          * We can skip all this stuff if the subxact failed before creating a
4190          * ResourceOwner...
4191          */
4192         if (s->curTransactionOwner)
4193         {
4194                 AfterTriggerEndSubXact(false);
4195                 AtSubAbort_Portals(s->subTransactionId,
4196                                                    s->parent->subTransactionId,
4197                                                    s->parent->curTransactionOwner);
4198                 AtEOSubXact_LargeObject(false, s->subTransactionId,
4199                                                                 s->parent->subTransactionId);
4200                 AtSubAbort_Notify();
4201
4202                 /* Advertise the fact that we aborted in pg_clog. */
4203                 (void) RecordTransactionAbort(true);
4204
4205                 /* Post-abort cleanup */
4206                 if (TransactionIdIsValid(s->transactionId))
4207                         AtSubAbort_childXids();
4208
4209                 CallSubXactCallbacks(SUBXACT_EVENT_ABORT_SUB, s->subTransactionId,
4210                                                          s->parent->subTransactionId);
4211
4212                 ResourceOwnerRelease(s->curTransactionOwner,
4213                                                          RESOURCE_RELEASE_BEFORE_LOCKS,
4214                                                          false, false);
4215                 AtEOSubXact_RelationCache(false, s->subTransactionId,
4216                                                                   s->parent->subTransactionId);
4217                 AtEOSubXact_Inval(false);
4218                 AtSubAbort_smgr();
4219                 ResourceOwnerRelease(s->curTransactionOwner,
4220                                                          RESOURCE_RELEASE_LOCKS,
4221                                                          false, false);
4222                 ResourceOwnerRelease(s->curTransactionOwner,
4223                                                          RESOURCE_RELEASE_AFTER_LOCKS,
4224                                                          false, false);
4225
4226                 AtEOXact_GUC(false, s->gucNestLevel);
4227                 AtEOSubXact_SPI(false, s->subTransactionId);
4228                 AtEOSubXact_on_commit_actions(false, s->subTransactionId,
4229                                                                           s->parent->subTransactionId);
4230                 AtEOSubXact_Namespace(false, s->subTransactionId,
4231                                                           s->parent->subTransactionId);
4232                 AtEOSubXact_Files(false, s->subTransactionId,
4233                                                   s->parent->subTransactionId);
4234                 AtEOSubXact_HashTables(false, s->nestingLevel);
4235                 AtEOSubXact_PgStat(false, s->nestingLevel);
4236                 AtSubAbort_Snapshot(s->nestingLevel);
4237         }
4238
4239         /*
4240          * Restore the upper transaction's read-only state, too.  This should be
4241          * redundant with GUC's cleanup but we may as well do it for consistency
4242          * with the commit case.
4243          */
4244         XactReadOnly = s->prevXactReadOnly;
4245
4246         RESUME_INTERRUPTS();
4247 }
4248
4249 /*
4250  * CleanupSubTransaction
4251  *
4252  *      The caller has to make sure to always reassign CurrentTransactionState
4253  *      if it has a local pointer to it after calling this function.
4254  */
4255 static void
4256 CleanupSubTransaction(void)
4257 {
4258         TransactionState s = CurrentTransactionState;
4259
4260         ShowTransactionState("CleanupSubTransaction");
4261
4262         if (s->state != TRANS_ABORT)
4263                 elog(WARNING, "CleanupSubTransaction while in %s state",
4264                          TransStateAsString(s->state));
4265
4266         AtSubCleanup_Portals(s->subTransactionId);
4267
4268         CurrentResourceOwner = s->parent->curTransactionOwner;
4269         CurTransactionResourceOwner = s->parent->curTransactionOwner;
4270         if (s->curTransactionOwner)
4271                 ResourceOwnerDelete(s->curTransactionOwner);
4272         s->curTransactionOwner = NULL;
4273
4274         AtSubCleanup_Memory();
4275
4276         s->state = TRANS_DEFAULT;
4277
4278         PopTransaction();
4279 }
4280
4281 /*
4282  * PushTransaction
4283  *              Create transaction state stack entry for a subtransaction
4284  *
4285  *      The caller has to make sure to always reassign CurrentTransactionState
4286  *      if it has a local pointer to it after calling this function.
4287  */
4288 static void
4289 PushTransaction(void)
4290 {
4291         TransactionState p = CurrentTransactionState;
4292         TransactionState s;
4293
4294         /*
4295          * We keep subtransaction state nodes in TopTransactionContext.
4296          */
4297         s = (TransactionState)
4298                 MemoryContextAllocZero(TopTransactionContext,
4299                                                            sizeof(TransactionStateData));
4300
4301         /*
4302          * Assign a subtransaction ID, watching out for counter wraparound.
4303          */
4304         currentSubTransactionId += 1;
4305         if (currentSubTransactionId == InvalidSubTransactionId)
4306         {
4307                 currentSubTransactionId -= 1;
4308                 pfree(s);
4309                 ereport(ERROR,
4310                                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
4311                                  errmsg("cannot have more than 2^32-1 subtransactions in a transaction")));
4312         }
4313
4314         /*
4315          * We can now stack a minimally valid subtransaction without fear of
4316          * failure.
4317          */
4318         s->transactionId = InvalidTransactionId;        /* until assigned */
4319         s->subTransactionId = currentSubTransactionId;
4320         s->parent = p;
4321         s->nestingLevel = p->nestingLevel + 1;
4322         s->gucNestLevel = NewGUCNestLevel();
4323         s->savepointLevel = p->savepointLevel;
4324         s->state = TRANS_DEFAULT;
4325         s->blockState = TBLOCK_SUBBEGIN;
4326         GetUserIdAndSecContext(&s->prevUser, &s->prevSecContext);
4327         s->prevXactReadOnly = XactReadOnly;
4328
4329         CurrentTransactionState = s;
4330
4331         /*
4332          * AbortSubTransaction and CleanupSubTransaction have to be able to cope
4333          * with the subtransaction from here on out; in particular they should not
4334          * assume that it necessarily has a transaction context, resource owner,
4335          * or XID.
4336          */
4337 }
4338
4339 /*
4340  * PopTransaction
4341  *              Pop back to parent transaction state
4342  *
4343  *      The caller has to make sure to always reassign CurrentTransactionState
4344  *      if it has a local pointer to it after calling this function.
4345  */
4346 static void
4347 PopTransaction(void)
4348 {
4349         TransactionState s = CurrentTransactionState;
4350
4351         if (s->state != TRANS_DEFAULT)
4352                 elog(WARNING, "PopTransaction while in %s state",
4353                          TransStateAsString(s->state));
4354
4355         if (s->parent == NULL)
4356                 elog(FATAL, "PopTransaction with no parent");
4357
4358         CurrentTransactionState = s->parent;
4359
4360         /* Let's just make sure CurTransactionContext is good */
4361         CurTransactionContext = s->parent->curTransactionContext;
4362         MemoryContextSwitchTo(CurTransactionContext);
4363
4364         /* Ditto for ResourceOwner links */
4365         CurTransactionResourceOwner = s->parent->curTransactionOwner;
4366         CurrentResourceOwner = s->parent->curTransactionOwner;
4367
4368         /* Free the old child structure */
4369         if (s->name)
4370                 pfree(s->name);
4371         pfree(s);
4372 }
4373
4374 /*
4375  * ShowTransactionState
4376  *              Debug support
4377  */
4378 static void
4379 ShowTransactionState(const char *str)
4380 {
4381         /* skip work if message will definitely not be printed */
4382         if (log_min_messages <= DEBUG3 || client_min_messages <= DEBUG3)
4383         {
4384                 elog(DEBUG3, "%s", str);
4385                 ShowTransactionStateRec(CurrentTransactionState);
4386         }
4387 }
4388
4389 /*
4390  * ShowTransactionStateRec
4391  *              Recursive subroutine for ShowTransactionState
4392  */
4393 static void
4394 ShowTransactionStateRec(TransactionState s)
4395 {
4396         StringInfoData buf;
4397
4398         initStringInfo(&buf);
4399
4400         if (s->nChildXids > 0)
4401         {
4402                 int                     i;
4403
4404                 appendStringInfo(&buf, "%u", s->childXids[0]);
4405                 for (i = 1; i < s->nChildXids; i++)
4406                         appendStringInfo(&buf, " %u", s->childXids[i]);
4407         }
4408
4409         if (s->parent)
4410                 ShowTransactionStateRec(s->parent);
4411
4412         /* use ereport to suppress computation if msg will not be printed */
4413         ereport(DEBUG3,
4414                         (errmsg_internal("name: %s; blockState: %13s; state: %7s, xid/subid/cid: %u/%u/%u%s, nestlvl: %d, children: %s",
4415                                                          PointerIsValid(s->name) ? s->name : "unnamed",
4416                                                          BlockStateAsString(s->blockState),
4417                                                          TransStateAsString(s->state),
4418                                                          (unsigned int) s->transactionId,
4419                                                          (unsigned int) s->subTransactionId,
4420                                                          (unsigned int) currentCommandId,
4421                                                          currentCommandIdUsed ? " (used)" : "",
4422                                                          s->nestingLevel, buf.data)));
4423
4424         pfree(buf.data);
4425 }
4426
4427 /*
4428  * BlockStateAsString
4429  *              Debug support
4430  */
4431 static const char *
4432 BlockStateAsString(TBlockState blockState)
4433 {
4434         switch (blockState)
4435         {
4436                 case TBLOCK_DEFAULT:
4437                         return "DEFAULT";
4438                 case TBLOCK_STARTED:
4439                         return "STARTED";
4440                 case TBLOCK_BEGIN:
4441                         return "BEGIN";
4442                 case TBLOCK_INPROGRESS:
4443                         return "INPROGRESS";
4444                 case TBLOCK_END:
4445                         return "END";
4446                 case TBLOCK_ABORT:
4447                         return "ABORT";
4448                 case TBLOCK_ABORT_END:
4449                         return "ABORT END";
4450                 case TBLOCK_ABORT_PENDING:
4451                         return "ABORT PEND";
4452                 case TBLOCK_PREPARE:
4453                         return "PREPARE";
4454                 case TBLOCK_SUBBEGIN:
4455                         return "SUB BEGIN";
4456                 case TBLOCK_SUBINPROGRESS:
4457                         return "SUB INPROGRS";
4458                 case TBLOCK_SUBRELEASE:
4459                         return "SUB RELEASE";
4460                 case TBLOCK_SUBCOMMIT:
4461                         return "SUB COMMIT";
4462                 case TBLOCK_SUBABORT:
4463                         return "SUB ABORT";
4464                 case TBLOCK_SUBABORT_END:
4465                         return "SUB ABORT END";
4466                 case TBLOCK_SUBABORT_PENDING:
4467                         return "SUB ABRT PEND";
4468                 case TBLOCK_SUBRESTART:
4469                         return "SUB RESTART";
4470                 case TBLOCK_SUBABORT_RESTART:
4471                         return "SUB AB RESTRT";
4472         }
4473         return "UNRECOGNIZED";
4474 }
4475
4476 /*
4477  * TransStateAsString
4478  *              Debug support
4479  */
4480 static const char *
4481 TransStateAsString(TransState state)
4482 {
4483         switch (state)
4484         {
4485                 case TRANS_DEFAULT:
4486                         return "DEFAULT";
4487                 case TRANS_START:
4488                         return "START";
4489                 case TRANS_INPROGRESS:
4490                         return "INPROGR";
4491                 case TRANS_COMMIT:
4492                         return "COMMIT";
4493                 case TRANS_ABORT:
4494                         return "ABORT";
4495                 case TRANS_PREPARE:
4496                         return "PREPARE";
4497         }
4498         return "UNRECOGNIZED";
4499 }
4500
4501 /*
4502  * xactGetCommittedChildren
4503  *
4504  * Gets the list of committed children of the current transaction.      The return
4505  * value is the number of child transactions.  *ptr is set to point to an
4506  * array of TransactionIds.  The array is allocated in TopTransactionContext;
4507  * the caller should *not* pfree() it (this is a change from pre-8.4 code!).
4508  * If there are no subxacts, *ptr is set to NULL.
4509  */
4510 int
4511 xactGetCommittedChildren(TransactionId **ptr)
4512 {
4513         TransactionState s = CurrentTransactionState;
4514
4515         if (s->nChildXids == 0)
4516                 *ptr = NULL;
4517         else
4518                 *ptr = s->childXids;
4519
4520         return s->nChildXids;
4521 }
4522
4523 /*
4524  *      XLOG support routines
4525  */
4526
4527 /*
4528  * Before 9.0 this was a fairly short function, but now it performs many
4529  * actions for which the order of execution is critical.
4530  */
4531 static void
4532 xact_redo_commit_internal(TransactionId xid, XLogRecPtr lsn,
4533                                         TransactionId *sub_xids, int nsubxacts,
4534                                         SharedInvalidationMessage *inval_msgs, int nmsgs,
4535                                         RelFileNode *xnodes, int nrels,
4536                                         Oid dbId, Oid tsId,
4537                                         uint32 xinfo)
4538 {
4539         TransactionId max_xid;
4540         int                     i;
4541
4542         max_xid = TransactionIdLatest(xid, nsubxacts, sub_xids);
4543
4544         /*
4545          * Make sure nextXid is beyond any XID mentioned in the record.
4546          *
4547          * We don't expect anyone else to modify nextXid, hence we don't need to
4548          * hold a lock while checking this. We still acquire the lock to modify
4549          * it, though.
4550          */
4551         if (TransactionIdFollowsOrEquals(max_xid,
4552                                                                          ShmemVariableCache->nextXid))
4553         {
4554                 LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
4555                 ShmemVariableCache->nextXid = max_xid;
4556                 TransactionIdAdvance(ShmemVariableCache->nextXid);
4557                 LWLockRelease(XidGenLock);
4558         }
4559
4560         if (standbyState == STANDBY_DISABLED)
4561         {
4562                 /*
4563                  * Mark the transaction committed in pg_clog.
4564                  */
4565                 TransactionIdCommitTree(xid, nsubxacts, sub_xids);
4566         }
4567         else
4568         {
4569                 /*
4570                  * If a transaction completion record arrives that has as-yet
4571                  * unobserved subtransactions then this will not have been fully
4572                  * handled by the call to RecordKnownAssignedTransactionIds() in the
4573                  * main recovery loop in xlog.c. So we need to do bookkeeping again to
4574                  * cover that case. This is confusing and it is easy to think this
4575                  * call is irrelevant, which has happened three times in development
4576                  * already. Leave it in.
4577                  */
4578                 RecordKnownAssignedTransactionIds(max_xid);
4579
4580                 /*
4581                  * Mark the transaction committed in pg_clog. We use async commit
4582                  * protocol during recovery to provide information on database
4583                  * consistency for when users try to set hint bits. It is important
4584                  * that we do not set hint bits until the minRecoveryPoint is past
4585                  * this commit record. This ensures that if we crash we don't see hint
4586                  * bits set on changes made by transactions that haven't yet
4587                  * recovered. It's unlikely but it's good to be safe.
4588                  */
4589                 TransactionIdAsyncCommitTree(xid, nsubxacts, sub_xids, lsn);
4590
4591                 /*
4592                  * We must mark clog before we update the ProcArray.
4593                  */
4594                 ExpireTreeKnownAssignedTransactionIds(xid, nsubxacts, sub_xids, max_xid);
4595
4596                 /*
4597                  * Send any cache invalidations attached to the commit. We must
4598                  * maintain the same order of invalidation then release locks as
4599                  * occurs in CommitTransaction().
4600                  */
4601                 ProcessCommittedInvalidationMessages(inval_msgs, nmsgs,
4602                                                                   XactCompletionRelcacheInitFileInval(xinfo),
4603                                                                                          dbId, tsId);
4604
4605                 /*
4606                  * Release locks, if any. We do this for both two phase and normal one
4607                  * phase transactions. In effect we are ignoring the prepare phase and
4608                  * just going straight to lock release.
4609                  */
4610                 StandbyReleaseLockTree(xid, nsubxacts, sub_xids);
4611         }
4612
4613         /* Make sure files supposed to be dropped are dropped */
4614         for (i = 0; i < nrels; i++)
4615         {
4616                 SMgrRelation srel = smgropen(xnodes[i], InvalidBackendId);
4617                 ForkNumber      fork;
4618
4619                 for (fork = 0; fork <= MAX_FORKNUM; fork++)
4620                 {
4621                         XLogDropRelation(xnodes[i], fork);
4622                         smgrdounlink(srel, fork, true);
4623                 }
4624                 smgrclose(srel);
4625         }
4626
4627         /*
4628          * We issue an XLogFlush() for the same reason we emit ForceSyncCommit()
4629          * in normal operation. For example, in DROP DATABASE, we delete all the
4630          * files belonging to the database, and then commit the transaction. If we
4631          * crash after all the files have been deleted but before the commit, you
4632          * have an entry in pg_database without any files. To minimize the window
4633          * for that, we use ForceSyncCommit() to rush the commit record to disk as
4634          * quick as possible. We have the same window during recovery, and forcing
4635          * an XLogFlush() (which updates minRecoveryPoint during recovery) helps
4636          * to reduce that problem window, for any user that requested
4637          * ForceSyncCommit().
4638          */
4639         if (XactCompletionForceSyncCommit(xinfo))
4640                 XLogFlush(lsn);
4641
4642 }
4643 /*
4644  * Utility function to call xact_redo_commit_internal after breaking down xlrec
4645  */
4646 static void
4647 xact_redo_commit(xl_xact_commit *xlrec,
4648                                                         TransactionId xid, XLogRecPtr lsn)
4649 {
4650         TransactionId *subxacts;
4651         SharedInvalidationMessage *inval_msgs;
4652
4653         /* subxid array follows relfilenodes */
4654         subxacts = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]);
4655         /* invalidation messages array follows subxids */
4656         inval_msgs = (SharedInvalidationMessage *) &(subxacts[xlrec->nsubxacts]);
4657
4658         xact_redo_commit_internal(xid, lsn, subxacts, xlrec->nsubxacts,
4659                                                                 inval_msgs, xlrec->nmsgs,
4660                                                                 xlrec->xnodes, xlrec->nrels,
4661                                                                 xlrec->dbId,
4662                                                                 xlrec->tsId,
4663                                                                 xlrec->xinfo);
4664 }
4665
4666 /*
4667  * Utility function to call xact_redo_commit_internal  for compact form of message.
4668  */
4669 static void
4670 xact_redo_commit_compact(xl_xact_commit_compact *xlrec,
4671                                                         TransactionId xid, XLogRecPtr lsn)
4672 {
4673         xact_redo_commit_internal(xid, lsn, xlrec->subxacts, xlrec->nsubxacts,
4674                                                                 NULL, 0,                /* inval msgs */
4675                                                                 NULL, 0,                /* relfilenodes */
4676                                                                 InvalidOid,             /* dbId */
4677                                                                 InvalidOid,             /* tsId */
4678                                                                 0);                             /* xinfo */
4679 }
4680
4681 /*
4682  * Be careful with the order of execution, as with xact_redo_commit().
4683  * The two functions are similar but differ in key places.
4684  *
4685  * Note also that an abort can be for a subtransaction and its children,
4686  * not just for a top level abort. That means we have to consider
4687  * topxid != xid, whereas in commit we would find topxid == xid always
4688  * because subtransaction commit is never WAL logged.
4689  */
4690 static void
4691 xact_redo_abort(xl_xact_abort *xlrec, TransactionId xid)
4692 {
4693         TransactionId *sub_xids;
4694         TransactionId max_xid;
4695         int                     i;
4696
4697         sub_xids = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]);
4698         max_xid = TransactionIdLatest(xid, xlrec->nsubxacts, sub_xids);
4699
4700         /*
4701          * Make sure nextXid is beyond any XID mentioned in the record.
4702          *
4703          * We don't expect anyone else to modify nextXid, hence we don't need to
4704          * hold a lock while checking this. We still acquire the lock to modify
4705          * it, though.
4706          */
4707         if (TransactionIdFollowsOrEquals(max_xid,
4708                                                                          ShmemVariableCache->nextXid))
4709         {
4710                 LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
4711                 ShmemVariableCache->nextXid = max_xid;
4712                 TransactionIdAdvance(ShmemVariableCache->nextXid);
4713                 LWLockRelease(XidGenLock);
4714         }
4715
4716         if (standbyState == STANDBY_DISABLED)
4717         {
4718                 /* Mark the transaction aborted in pg_clog, no need for async stuff */
4719                 TransactionIdAbortTree(xid, xlrec->nsubxacts, sub_xids);
4720         }
4721         else
4722         {
4723                 /*
4724                  * If a transaction completion record arrives that has as-yet
4725                  * unobserved subtransactions then this will not have been fully
4726                  * handled by the call to RecordKnownAssignedTransactionIds() in the
4727                  * main recovery loop in xlog.c. So we need to do bookkeeping again to
4728                  * cover that case. This is confusing and it is easy to think this
4729                  * call is irrelevant, which has happened three times in development
4730                  * already. Leave it in.
4731                  */
4732                 RecordKnownAssignedTransactionIds(max_xid);
4733
4734                 /* Mark the transaction aborted in pg_clog, no need for async stuff */
4735                 TransactionIdAbortTree(xid, xlrec->nsubxacts, sub_xids);
4736
4737                 /*
4738                  * We must update the ProcArray after we have marked clog.
4739                  */
4740                 ExpireTreeKnownAssignedTransactionIds(xid, xlrec->nsubxacts, sub_xids, max_xid);
4741
4742                 /*
4743                  * There are no flat files that need updating, nor invalidation
4744                  * messages to send or undo.
4745                  */
4746
4747                 /*
4748                  * Release locks, if any. There are no invalidations to send.
4749                  */
4750                 StandbyReleaseLockTree(xid, xlrec->nsubxacts, sub_xids);
4751         }
4752
4753         /* Make sure files supposed to be dropped are dropped */
4754         for (i = 0; i < xlrec->nrels; i++)
4755         {
4756                 SMgrRelation srel = smgropen(xlrec->xnodes[i], InvalidBackendId);
4757                 ForkNumber      fork;
4758
4759                 for (fork = 0; fork <= MAX_FORKNUM; fork++)
4760                 {
4761                         XLogDropRelation(xlrec->xnodes[i], fork);
4762                         smgrdounlink(srel, fork, true);
4763                 }
4764                 smgrclose(srel);
4765         }
4766 }
4767
4768 void
4769 xact_redo(XLogRecPtr lsn, XLogRecord *record)
4770 {
4771         uint8           info = record->xl_info & ~XLR_INFO_MASK;
4772
4773         /* Backup blocks are not used in xact records */
4774         Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
4775
4776         if (info == XLOG_XACT_COMMIT_COMPACT)
4777         {
4778                 xl_xact_commit_compact *xlrec = (xl_xact_commit_compact *) XLogRecGetData(record);
4779
4780                 xact_redo_commit_compact(xlrec, record->xl_xid, lsn);
4781         }
4782         else if (info == XLOG_XACT_COMMIT)
4783         {
4784                 xl_xact_commit *xlrec = (xl_xact_commit *) XLogRecGetData(record);
4785
4786                 xact_redo_commit(xlrec, record->xl_xid, lsn);
4787         }
4788         else if (info == XLOG_XACT_ABORT)
4789         {
4790                 xl_xact_abort *xlrec = (xl_xact_abort *) XLogRecGetData(record);
4791
4792                 xact_redo_abort(xlrec, record->xl_xid);
4793         }
4794         else if (info == XLOG_XACT_PREPARE)
4795         {
4796                 /* the record contents are exactly the 2PC file */
4797                 RecreateTwoPhaseFile(record->xl_xid,
4798                                                          XLogRecGetData(record), record->xl_len);
4799         }
4800         else if (info == XLOG_XACT_COMMIT_PREPARED)
4801         {
4802                 xl_xact_commit_prepared *xlrec = (xl_xact_commit_prepared *) XLogRecGetData(record);
4803
4804                 xact_redo_commit(&xlrec->crec, xlrec->xid, lsn);
4805                 RemoveTwoPhaseFile(xlrec->xid, false);
4806         }
4807         else if (info == XLOG_XACT_ABORT_PREPARED)
4808         {
4809                 xl_xact_abort_prepared *xlrec = (xl_xact_abort_prepared *) XLogRecGetData(record);
4810
4811                 xact_redo_abort(&xlrec->arec, xlrec->xid);
4812                 RemoveTwoPhaseFile(xlrec->xid, false);
4813         }
4814         else if (info == XLOG_XACT_ASSIGNMENT)
4815         {
4816                 xl_xact_assignment *xlrec = (xl_xact_assignment *) XLogRecGetData(record);
4817
4818                 if (standbyState >= STANDBY_INITIALIZED)
4819                         ProcArrayApplyXidAssignment(xlrec->xtop,
4820                                                                                 xlrec->nsubxacts, xlrec->xsub);
4821         }
4822         else
4823                 elog(PANIC, "xact_redo: unknown op code %u", info);
4824 }
4825
4826 static void
4827 xact_desc_commit(StringInfo buf, xl_xact_commit *xlrec)
4828 {
4829         int                     i;
4830         TransactionId *subxacts;
4831
4832         subxacts = (TransactionId *) &xlrec->xnodes[xlrec->nrels];
4833
4834         appendStringInfoString(buf, timestamptz_to_str(xlrec->xact_time));
4835
4836         if (xlrec->nrels > 0)
4837         {
4838                 appendStringInfo(buf, "; rels:");
4839                 for (i = 0; i < xlrec->nrels; i++)
4840                 {
4841                         char       *path = relpathperm(xlrec->xnodes[i], MAIN_FORKNUM);
4842
4843                         appendStringInfo(buf, " %s", path);
4844                         pfree(path);
4845                 }
4846         }
4847         if (xlrec->nsubxacts > 0)
4848         {
4849                 appendStringInfo(buf, "; subxacts:");
4850                 for (i = 0; i < xlrec->nsubxacts; i++)
4851                         appendStringInfo(buf, " %u", subxacts[i]);
4852         }
4853         if (xlrec->nmsgs > 0)
4854         {
4855                 SharedInvalidationMessage *msgs;
4856
4857                 msgs = (SharedInvalidationMessage *) &subxacts[xlrec->nsubxacts];
4858
4859                 if (XactCompletionRelcacheInitFileInval(xlrec->xinfo))
4860                         appendStringInfo(buf, "; relcache init file inval dbid %u tsid %u",
4861                                                          xlrec->dbId, xlrec->tsId);
4862
4863                 appendStringInfo(buf, "; inval msgs:");
4864                 for (i = 0; i < xlrec->nmsgs; i++)
4865                 {
4866                         SharedInvalidationMessage *msg = &msgs[i];
4867
4868                         if (msg->id >= 0)
4869                                 appendStringInfo(buf, " catcache %d", msg->id);
4870                         else if (msg->id == SHAREDINVALCATALOG_ID)
4871                                 appendStringInfo(buf, " catalog %u", msg->cat.catId);
4872                         else if (msg->id == SHAREDINVALRELCACHE_ID)
4873                                 appendStringInfo(buf, " relcache %u", msg->rc.relId);
4874                         /* remaining cases not expected, but print something anyway */
4875                         else if (msg->id == SHAREDINVALSMGR_ID)
4876                                 appendStringInfo(buf, " smgr");
4877                         else if (msg->id == SHAREDINVALRELMAP_ID)
4878                                 appendStringInfo(buf, " relmap");
4879                         else
4880                                 appendStringInfo(buf, " unknown id %d", msg->id);
4881                 }
4882         }
4883 }
4884
4885 static void
4886 xact_desc_commit_compact(StringInfo buf, xl_xact_commit_compact *xlrec)
4887 {
4888         int                     i;
4889
4890         appendStringInfoString(buf, timestamptz_to_str(xlrec->xact_time));
4891
4892         if (xlrec->nsubxacts > 0)
4893         {
4894                 appendStringInfo(buf, "; subxacts:");
4895                 for (i = 0; i < xlrec->nsubxacts; i++)
4896                         appendStringInfo(buf, " %u", xlrec->subxacts[i]);
4897         }
4898 }
4899
4900 static void
4901 xact_desc_abort(StringInfo buf, xl_xact_abort *xlrec)
4902 {
4903         int                     i;
4904
4905         appendStringInfoString(buf, timestamptz_to_str(xlrec->xact_time));
4906         if (xlrec->nrels > 0)
4907         {
4908                 appendStringInfo(buf, "; rels:");
4909                 for (i = 0; i < xlrec->nrels; i++)
4910                 {
4911                         char       *path = relpathperm(xlrec->xnodes[i], MAIN_FORKNUM);
4912
4913                         appendStringInfo(buf, " %s", path);
4914                         pfree(path);
4915                 }
4916         }
4917         if (xlrec->nsubxacts > 0)
4918         {
4919                 TransactionId *xacts = (TransactionId *)
4920                 &xlrec->xnodes[xlrec->nrels];
4921
4922                 appendStringInfo(buf, "; subxacts:");
4923                 for (i = 0; i < xlrec->nsubxacts; i++)
4924                         appendStringInfo(buf, " %u", xacts[i]);
4925         }
4926 }
4927
4928 static void
4929 xact_desc_assignment(StringInfo buf, xl_xact_assignment *xlrec)
4930 {
4931         int                     i;
4932
4933         appendStringInfo(buf, "subxacts:");
4934
4935         for (i = 0; i < xlrec->nsubxacts; i++)
4936                 appendStringInfo(buf, " %u", xlrec->xsub[i]);
4937 }
4938
4939 void
4940 xact_desc(StringInfo buf, uint8 xl_info, char *rec)
4941 {
4942         uint8           info = xl_info & ~XLR_INFO_MASK;
4943
4944         if (info == XLOG_XACT_COMMIT_COMPACT)
4945         {
4946                 xl_xact_commit_compact *xlrec = (xl_xact_commit_compact *) rec;
4947
4948                 appendStringInfo(buf, "commit: ");
4949                 xact_desc_commit_compact(buf, xlrec);
4950         }
4951         else if (info == XLOG_XACT_COMMIT)
4952         {
4953                 xl_xact_commit *xlrec = (xl_xact_commit *) rec;
4954
4955                 appendStringInfo(buf, "commit: ");
4956                 xact_desc_commit(buf, xlrec);
4957         }
4958         else if (info == XLOG_XACT_ABORT)
4959         {
4960                 xl_xact_abort *xlrec = (xl_xact_abort *) rec;
4961
4962                 appendStringInfo(buf, "abort: ");
4963                 xact_desc_abort(buf, xlrec);
4964         }
4965         else if (info == XLOG_XACT_PREPARE)
4966         {
4967                 appendStringInfo(buf, "prepare");
4968         }
4969         else if (info == XLOG_XACT_COMMIT_PREPARED)
4970         {
4971                 xl_xact_commit_prepared *xlrec = (xl_xact_commit_prepared *) rec;
4972
4973                 appendStringInfo(buf, "commit prepared %u: ", xlrec->xid);
4974                 xact_desc_commit(buf, &xlrec->crec);
4975         }
4976         else if (info == XLOG_XACT_ABORT_PREPARED)
4977         {
4978                 xl_xact_abort_prepared *xlrec = (xl_xact_abort_prepared *) rec;
4979
4980                 appendStringInfo(buf, "abort prepared %u: ", xlrec->xid);
4981                 xact_desc_abort(buf, &xlrec->arec);
4982         }
4983         else if (info == XLOG_XACT_ASSIGNMENT)
4984         {
4985                 xl_xact_assignment *xlrec = (xl_xact_assignment *) rec;
4986
4987                 /*
4988                  * Note that we ignore the WAL record's xid, since we're more
4989                  * interested in the top-level xid that issued the record and which
4990                  * xids are being reported here.
4991                  */
4992                 appendStringInfo(buf, "xid assignment xtop %u: ", xlrec->xtop);
4993                 xact_desc_assignment(buf, xlrec);
4994         }
4995         else
4996                 appendStringInfo(buf, "UNKNOWN");
4997 }