granicus.if.org Git - postgresql/blob - src/backend/executor/execMain.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * execMain.c
   4  *        top level executor interface routines
   5  *
   6  * INTERFACE ROUTINES
   7  *      ExecutorStart()
   8  *      ExecutorRun()
   9  *      ExecutorEnd()
  10  *
  11  *      The old ExecutorMain() has been replaced by ExecutorStart(),
  12  *      ExecutorRun() and ExecutorEnd()
  13  *
  14  *      These three procedures are the external interfaces to the executor.
  15  *      In each case, the query descriptor is required as an argument.
  16  *
  17  *      ExecutorStart() must be called at the beginning of execution of any
  18  *      query plan and ExecutorEnd() should always be called at the end of
  19  *      execution of a plan.
  20  *
  21  *      ExecutorRun accepts direction and count arguments that specify whether
  22  *      the plan is to be executed forwards, backwards, and for how many tuples.
  23  *
  24  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  25  * Portions Copyright (c) 1994, Regents of the University of California
  26  *
  27  *
  28  * IDENTIFICATION
  29  *        $PostgreSQL: pgsql/src/backend/executor/execMain.c,v 1.308 2008/05/12 00:00:48 alvherre Exp $
  30  *
  31  *-------------------------------------------------------------------------
  32  */
  33 #include "postgres.h"
  34
  35 #include "access/heapam.h"
  36 #include "access/reloptions.h"
  37 #include "access/transam.h"
  38 #include "access/xact.h"
  39 #include "catalog/heap.h"
  40 #include "catalog/namespace.h"
  41 #include "catalog/toasting.h"
  42 #include "commands/tablespace.h"
  43 #include "commands/trigger.h"
  44 #include "executor/execdebug.h"
  45 #include "executor/instrument.h"
  46 #include "executor/nodeSubplan.h"
  47 #include "miscadmin.h"
  48 #include "optimizer/clauses.h"
  49 #include "parser/parse_clause.h"
  50 #include "parser/parsetree.h"
  51 #include "storage/bufmgr.h"
  52 #include "storage/lmgr.h"
  53 #include "storage/smgr.h"
  54 #include "utils/acl.h"
  55 #include "utils/lsyscache.h"
  56 #include "utils/memutils.h"
  57 #include "utils/tqual.h"
  58
  59
  60 typedef struct evalPlanQual
  61 {
  62         Index           rti;
  63         EState     *estate;
  64         PlanState  *planstate;
  65         struct evalPlanQual *next;      /* stack of active PlanQual plans */
  66         struct evalPlanQual *free;      /* list of free PlanQual plans */
  67 } evalPlanQual;
  68
  69 /* decls for local routines only used within this module */
  70 static void InitPlan(QueryDesc *queryDesc, int eflags);
  71 static void ExecEndPlan(PlanState *planstate, EState *estate);
  72 static TupleTableSlot *ExecutePlan(EState *estate, PlanState *planstate,
  73                         CmdType operation,
  74                         long numberTuples,
  75                         ScanDirection direction,
  76                         DestReceiver *dest);
  77 static void ExecSelect(TupleTableSlot *slot,
  78                    DestReceiver *dest, EState *estate);
  79 static void ExecInsert(TupleTableSlot *slot, ItemPointer tupleid,
  80                    TupleTableSlot *planSlot,
  81                    DestReceiver *dest, EState *estate);
  82 static void ExecDelete(ItemPointer tupleid,
  83                    TupleTableSlot *planSlot,
  84                    DestReceiver *dest, EState *estate);
  85 static void ExecUpdate(TupleTableSlot *slot, ItemPointer tupleid,
  86                    TupleTableSlot *planSlot,
  87                    DestReceiver *dest, EState *estate);
  88 static void ExecProcessReturning(ProjectionInfo *projectReturning,
  89                                          TupleTableSlot *tupleSlot,
  90                                          TupleTableSlot *planSlot,
  91                                          DestReceiver *dest);
  92 static TupleTableSlot *EvalPlanQualNext(EState *estate);
  93 static void EndEvalPlanQual(EState *estate);
  94 static void ExecCheckRTPerms(List *rangeTable);
  95 static void ExecCheckRTEPerms(RangeTblEntry *rte);
  96 static void ExecCheckXactReadOnly(PlannedStmt *plannedstmt);
  97 static void EvalPlanQualStart(evalPlanQual *epq, EState *estate,
  98                                   evalPlanQual *priorepq);
  99 static void EvalPlanQualStop(evalPlanQual *epq);
 100 static void OpenIntoRel(QueryDesc *queryDesc);
 101 static void CloseIntoRel(QueryDesc *queryDesc);
 102 static void intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo);
 103 static void intorel_receive(TupleTableSlot *slot, DestReceiver *self);
 104 static void intorel_shutdown(DestReceiver *self);
 105 static void intorel_destroy(DestReceiver *self);
 106
 107 /* end of local decls */
 108
 109
 110 /* ----------------------------------------------------------------
 111  *              ExecutorStart
 112  *
 113  *              This routine must be called at the beginning of any execution of any
 114  *              query plan
 115  *
 116  * Takes a QueryDesc previously created by CreateQueryDesc (it's not real
 117  * clear why we bother to separate the two functions, but...).  The tupDesc
 118  * field of the QueryDesc is filled in to describe the tuples that will be
 119  * returned, and the internal fields (estate and planstate) are set up.
 120  *
 121  * eflags contains flag bits as described in executor.h.
 122  *
 123  * NB: the CurrentMemoryContext when this is called will become the parent
 124  * of the per-query context used for this Executor invocation.
 125  * ----------------------------------------------------------------
 126  */
 127 void
 128 ExecutorStart(QueryDesc *queryDesc, int eflags)
 129 {
 130         EState     *estate;
 131         MemoryContext oldcontext;
 132
 133         /* sanity checks: queryDesc must not be started already */
 134         Assert(queryDesc != NULL);
 135         Assert(queryDesc->estate == NULL);
 136
 137         /*
 138          * If the transaction is read-only, we need to check if any writes are
 139          * planned to non-temporary tables.  EXPLAIN is considered read-only.
 140          */
 141         if (XactReadOnly && !(eflags & EXEC_FLAG_EXPLAIN_ONLY))
 142                 ExecCheckXactReadOnly(queryDesc->plannedstmt);
 143
 144         /*
 145          * Build EState, switch into per-query memory context for startup.
 146          */
 147         estate = CreateExecutorState();
 148         queryDesc->estate = estate;
 149
 150         oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
 151
 152         /*
 153          * Fill in parameters, if any, from queryDesc
 154          */
 155         estate->es_param_list_info = queryDesc->params;
 156
 157         if (queryDesc->plannedstmt->nParamExec > 0)
 158                 estate->es_param_exec_vals = (ParamExecData *)
 159                         palloc0(queryDesc->plannedstmt->nParamExec * sizeof(ParamExecData));
 160
 161         /*
 162          * If non-read-only query, set the command ID to mark output tuples with
 163          */
 164         switch (queryDesc->operation)
 165         {
 166                 case CMD_SELECT:
 167                         /* SELECT INTO and SELECT FOR UPDATE/SHARE need to mark tuples */
 168                         if (queryDesc->plannedstmt->intoClause != NULL ||
 169                                 queryDesc->plannedstmt->rowMarks != NIL)
 170                                 estate->es_output_cid = GetCurrentCommandId(true);
 171                         break;
 172
 173                 case CMD_INSERT:
 174                 case CMD_DELETE:
 175                 case CMD_UPDATE:
 176                         estate->es_output_cid = GetCurrentCommandId(true);
 177                         break;
 178
 179                 default:
 180                         elog(ERROR, "unrecognized operation code: %d",
 181                                  (int) queryDesc->operation);
 182                         break;
 183         }
 184
 185         /*
 186          * Copy other important information into the EState
 187          */
 188         estate->es_snapshot = queryDesc->snapshot;
 189         estate->es_crosscheck_snapshot = queryDesc->crosscheck_snapshot;
 190         estate->es_instrument = queryDesc->doInstrument;
 191
 192         /*
 193          * Initialize the plan state tree
 194          */
 195         InitPlan(queryDesc, eflags);
 196
 197         MemoryContextSwitchTo(oldcontext);
 198 }
 199
 200 /* ----------------------------------------------------------------
 201  *              ExecutorRun
 202  *
 203  *              This is the main routine of the executor module. It accepts
 204  *              the query descriptor from the traffic cop and executes the
 205  *              query plan.
 206  *
 207  *              ExecutorStart must have been called already.
 208  *
 209  *              If direction is NoMovementScanDirection then nothing is done
 210  *              except to start up/shut down the destination.  Otherwise,
 211  *              we retrieve up to 'count' tuples in the specified direction.
 212  *
 213  *              Note: count = 0 is interpreted as no portal limit, i.e., run to
 214  *              completion.
 215  *
 216  * ----------------------------------------------------------------
 217  */
 218 TupleTableSlot *
 219 ExecutorRun(QueryDesc *queryDesc,
 220                         ScanDirection direction, long count)
 221 {
 222         EState     *estate;
 223         CmdType         operation;
 224         DestReceiver *dest;
 225         bool            sendTuples;
 226         TupleTableSlot *result;
 227         MemoryContext oldcontext;
 228
 229         /* sanity checks */
 230         Assert(queryDesc != NULL);
 231
 232         estate = queryDesc->estate;
 233
 234         Assert(estate != NULL);
 235
 236         /*
 237          * Switch into per-query memory context
 238          */
 239         oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
 240
 241         /*
 242          * extract information from the query descriptor and the query feature.
 243          */
 244         operation = queryDesc->operation;
 245         dest = queryDesc->dest;
 246
 247         /*
 248          * startup tuple receiver, if we will be emitting tuples
 249          */
 250         estate->es_processed = 0;
 251         estate->es_lastoid = InvalidOid;
 252
 253         sendTuples = (operation == CMD_SELECT ||
 254                                   queryDesc->plannedstmt->returningLists);
 255
 256         if (sendTuples)
 257                 (*dest->rStartup) (dest, operation, queryDesc->tupDesc);
 258
 259         /*
 260          * run plan
 261          */
 262         if (ScanDirectionIsNoMovement(direction))
 263                 result = NULL;
 264         else
 265                 result = ExecutePlan(estate,
 266                                                          queryDesc->planstate,
 267                                                          operation,
 268                                                          count,
 269                                                          direction,
 270                                                          dest);
 271
 272         /*
 273          * shutdown tuple receiver, if we started it
 274          */
 275         if (sendTuples)
 276                 (*dest->rShutdown) (dest);
 277
 278         MemoryContextSwitchTo(oldcontext);
 279
 280         return result;
 281 }
 282
 283 /* ----------------------------------------------------------------
 284  *              ExecutorEnd
 285  *
 286  *              This routine must be called at the end of execution of any
 287  *              query plan
 288  * ----------------------------------------------------------------
 289  */
 290 void
 291 ExecutorEnd(QueryDesc *queryDesc)
 292 {
 293         EState     *estate;
 294         MemoryContext oldcontext;
 295
 296         /* sanity checks */
 297         Assert(queryDesc != NULL);
 298
 299         estate = queryDesc->estate;
 300
 301         Assert(estate != NULL);
 302
 303         /*
 304          * Switch into per-query memory context to run ExecEndPlan
 305          */
 306         oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
 307
 308         ExecEndPlan(queryDesc->planstate, estate);
 309
 310         /*
 311          * Close the SELECT INTO relation if any
 312          */
 313         if (estate->es_select_into)
 314                 CloseIntoRel(queryDesc);
 315
 316         /*
 317          * Must switch out of context before destroying it
 318          */
 319         MemoryContextSwitchTo(oldcontext);
 320
 321         /*
 322          * Release EState and per-query memory context.  This should release
 323          * everything the executor has allocated.
 324          */
 325         FreeExecutorState(estate);
 326
 327         /* Reset queryDesc fields that no longer point to anything */
 328         queryDesc->tupDesc = NULL;
 329         queryDesc->estate = NULL;
 330         queryDesc->planstate = NULL;
 331 }
 332
 333 /* ----------------------------------------------------------------
 334  *              ExecutorRewind
 335  *
 336  *              This routine may be called on an open queryDesc to rewind it
 337  *              to the start.
 338  * ----------------------------------------------------------------
 339  */
 340 void
 341 ExecutorRewind(QueryDesc *queryDesc)
 342 {
 343         EState     *estate;
 344         MemoryContext oldcontext;
 345
 346         /* sanity checks */
 347         Assert(queryDesc != NULL);
 348
 349         estate = queryDesc->estate;
 350
 351         Assert(estate != NULL);
 352
 353         /* It's probably not sensible to rescan updating queries */
 354         Assert(queryDesc->operation == CMD_SELECT);
 355
 356         /*
 357          * Switch into per-query memory context
 358          */
 359         oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
 360
 361         /*
 362          * rescan plan
 363          */
 364         ExecReScan(queryDesc->planstate, NULL);
 365
 366         MemoryContextSwitchTo(oldcontext);
 367 }
 368
 369
 370 /*
 371  * ExecCheckRTPerms
 372  *              Check access permissions for all relations listed in a range table.
 373  */
 374 static void
 375 ExecCheckRTPerms(List *rangeTable)
 376 {
 377         ListCell   *l;
 378
 379         foreach(l, rangeTable)
 380         {
 381                 ExecCheckRTEPerms((RangeTblEntry *) lfirst(l));
 382         }
 383 }
 384
 385 /*
 386  * ExecCheckRTEPerms
 387  *              Check access permissions for a single RTE.
 388  */
 389 static void
 390 ExecCheckRTEPerms(RangeTblEntry *rte)
 391 {
 392         AclMode         requiredPerms;
 393         Oid                     relOid;
 394         Oid                     userid;
 395
 396         /*
 397          * Only plain-relation RTEs need to be checked here.  Function RTEs are
 398          * checked by init_fcache when the function is prepared for execution.
 399          * Join, subquery, and special RTEs need no checks.
 400          */
 401         if (rte->rtekind != RTE_RELATION)
 402                 return;
 403
 404         /*
 405          * No work if requiredPerms is empty.
 406          */
 407         requiredPerms = rte->requiredPerms;
 408         if (requiredPerms == 0)
 409                 return;
 410
 411         relOid = rte->relid;
 412
 413         /*
 414          * userid to check as: current user unless we have a setuid indication.
 415          *
 416          * Note: GetUserId() is presently fast enough that there's no harm in
 417          * calling it separately for each RTE.  If that stops being true, we could
 418          * call it once in ExecCheckRTPerms and pass the userid down from there.
 419          * But for now, no need for the extra clutter.
 420          */
 421         userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
 422
 423         /*
 424          * We must have *all* the requiredPerms bits, so use aclmask not aclcheck.
 425          */
 426         if (pg_class_aclmask(relOid, userid, requiredPerms, ACLMASK_ALL)
 427                 != requiredPerms)
 428                 aclcheck_error(ACLCHECK_NO_PRIV, ACL_KIND_CLASS,
 429                                            get_rel_name(relOid));
 430 }
 431
 432 /*
 433  * Check that the query does not imply any writes to non-temp tables.
 434  */
 435 static void
 436 ExecCheckXactReadOnly(PlannedStmt *plannedstmt)
 437 {
 438         ListCell   *l;
 439
 440         /*
 441          * CREATE TABLE AS or SELECT INTO?
 442          *
 443          * XXX should we allow this if the destination is temp?
 444          */
 445         if (plannedstmt->intoClause != NULL)
 446                 goto fail;
 447
 448         /* Fail if write permissions are requested on any non-temp table */
 449         foreach(l, plannedstmt->rtable)
 450         {
 451                 RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
 452
 453                 if (rte->rtekind != RTE_RELATION)
 454                         continue;
 455
 456                 if ((rte->requiredPerms & (~ACL_SELECT)) == 0)
 457                         continue;
 458
 459                 if (isTempNamespace(get_rel_namespace(rte->relid)))
 460                         continue;
 461
 462                 goto fail;
 463         }
 464
 465         return;
 466
 467 fail:
 468         ereport(ERROR,
 469                         (errcode(ERRCODE_READ_ONLY_SQL_TRANSACTION),
 470                          errmsg("transaction is read-only")));
 471 }
 472
 473
 474 /* ----------------------------------------------------------------
 475  *              InitPlan
 476  *
 477  *              Initializes the query plan: open files, allocate storage
 478  *              and start up the rule manager
 479  * ----------------------------------------------------------------
 480  */
 481 static void
 482 InitPlan(QueryDesc *queryDesc, int eflags)
 483 {
 484         CmdType         operation = queryDesc->operation;
 485         PlannedStmt *plannedstmt = queryDesc->plannedstmt;
 486         Plan       *plan = plannedstmt->planTree;
 487         List       *rangeTable = plannedstmt->rtable;
 488         EState     *estate = queryDesc->estate;
 489         PlanState  *planstate;
 490         TupleDesc       tupType;
 491         ListCell   *l;
 492         int                     i;
 493
 494         /*
 495          * Do permissions checks
 496          */
 497         ExecCheckRTPerms(rangeTable);
 498
 499         /*
 500          * initialize the node's execution state
 501          */
 502         estate->es_range_table = rangeTable;
 503
 504         /*
 505          * initialize result relation stuff
 506          */
 507         if (plannedstmt->resultRelations)
 508         {
 509                 List       *resultRelations = plannedstmt->resultRelations;
 510                 int                     numResultRelations = list_length(resultRelations);
 511                 ResultRelInfo *resultRelInfos;
 512                 ResultRelInfo *resultRelInfo;
 513
 514                 resultRelInfos = (ResultRelInfo *)
 515                         palloc(numResultRelations * sizeof(ResultRelInfo));
 516                 resultRelInfo = resultRelInfos;
 517                 foreach(l, resultRelations)
 518                 {
 519                         Index           resultRelationIndex = lfirst_int(l);
 520                         Oid                     resultRelationOid;
 521                         Relation        resultRelation;
 522
 523                         resultRelationOid = getrelid(resultRelationIndex, rangeTable);
 524                         resultRelation = heap_open(resultRelationOid, RowExclusiveLock);
 525                         InitResultRelInfo(resultRelInfo,
 526                                                           resultRelation,
 527                                                           resultRelationIndex,
 528                                                           operation,
 529                                                           estate->es_instrument);
 530                         resultRelInfo++;
 531                 }
 532                 estate->es_result_relations = resultRelInfos;
 533                 estate->es_num_result_relations = numResultRelations;
 534                 /* Initialize to first or only result rel */
 535                 estate->es_result_relation_info = resultRelInfos;
 536         }
 537         else
 538         {
 539                 /*
 540                  * if no result relation, then set state appropriately
 541                  */
 542                 estate->es_result_relations = NULL;
 543                 estate->es_num_result_relations = 0;
 544                 estate->es_result_relation_info = NULL;
 545         }
 546
 547         /*
 548          * Detect whether we're doing SELECT INTO.  If so, set the es_into_oids
 549          * flag appropriately so that the plan tree will be initialized with the
 550          * correct tuple descriptors.  (Other SELECT INTO stuff comes later.)
 551          */
 552         estate->es_select_into = false;
 553         if (operation == CMD_SELECT && plannedstmt->intoClause != NULL)
 554         {
 555                 estate->es_select_into = true;
 556                 estate->es_into_oids = interpretOidsOption(plannedstmt->intoClause->options);
 557         }
 558
 559         /*
 560          * Have to lock relations selected FOR UPDATE/FOR SHARE before we
 561          * initialize the plan tree, else we'd be doing a lock upgrade. While we
 562          * are at it, build the ExecRowMark list.
 563          */
 564         estate->es_rowMarks = NIL;
 565         foreach(l, plannedstmt->rowMarks)
 566         {
 567                 RowMarkClause *rc = (RowMarkClause *) lfirst(l);
 568                 Oid                     relid = getrelid(rc->rti, rangeTable);
 569                 Relation        relation;
 570                 ExecRowMark *erm;
 571
 572                 relation = heap_open(relid, RowShareLock);
 573                 erm = (ExecRowMark *) palloc(sizeof(ExecRowMark));
 574                 erm->relation = relation;
 575                 erm->rti = rc->rti;
 576                 erm->forUpdate = rc->forUpdate;
 577                 erm->noWait = rc->noWait;
 578                 /* We'll set up ctidAttno below */
 579                 erm->ctidAttNo = InvalidAttrNumber;
 580                 estate->es_rowMarks = lappend(estate->es_rowMarks, erm);
 581         }
 582
 583         /*
 584          * Initialize the executor "tuple" table.  We need slots for all the plan
 585          * nodes, plus possibly output slots for the junkfilter(s). At this point
 586          * we aren't sure if we need junkfilters, so just add slots for them
 587          * unconditionally.  Also, if it's not a SELECT, set up a slot for use for
 588          * trigger output tuples.  Also, one for RETURNING-list evaluation.
 589          */
 590         {
 591                 int                     nSlots;
 592
 593                 /* Slots for the main plan tree */
 594                 nSlots = ExecCountSlotsNode(plan);
 595                 /* Add slots for subplans and initplans */
 596                 foreach(l, plannedstmt->subplans)
 597                 {
 598                         Plan       *subplan = (Plan *) lfirst(l);
 599
 600                         nSlots += ExecCountSlotsNode(subplan);
 601                 }
 602                 /* Add slots for junkfilter(s) */
 603                 if (plannedstmt->resultRelations != NIL)
 604                         nSlots += list_length(plannedstmt->resultRelations);
 605                 else
 606                         nSlots += 1;
 607                 if (operation != CMD_SELECT)
 608                         nSlots++;                       /* for es_trig_tuple_slot */
 609                 if (plannedstmt->returningLists)
 610                         nSlots++;                       /* for RETURNING projection */
 611
 612                 estate->es_tupleTable = ExecCreateTupleTable(nSlots);
 613
 614                 if (operation != CMD_SELECT)
 615                         estate->es_trig_tuple_slot =
 616                                 ExecAllocTableSlot(estate->es_tupleTable);
 617         }
 618
 619         /* mark EvalPlanQual not active */
 620         estate->es_plannedstmt = plannedstmt;
 621         estate->es_evalPlanQual = NULL;
 622         estate->es_evTupleNull = NULL;
 623         estate->es_evTuple = NULL;
 624         estate->es_useEvalPlan = false;
 625
 626         /*
 627          * Initialize private state information for each SubPlan.  We must do this
 628          * before running ExecInitNode on the main query tree, since
 629          * ExecInitSubPlan expects to be able to find these entries.
 630          */
 631         Assert(estate->es_subplanstates == NIL);
 632         i = 1;                                          /* subplan indices count from 1 */
 633         foreach(l, plannedstmt->subplans)
 634         {
 635                 Plan       *subplan = (Plan *) lfirst(l);
 636                 PlanState  *subplanstate;
 637                 int                     sp_eflags;
 638
 639                 /*
 640                  * A subplan will never need to do BACKWARD scan nor MARK/RESTORE. If
 641                  * it is a parameterless subplan (not initplan), we suggest that it be
 642                  * prepared to handle REWIND efficiently; otherwise there is no need.
 643                  */
 644                 sp_eflags = eflags & EXEC_FLAG_EXPLAIN_ONLY;
 645                 if (bms_is_member(i, plannedstmt->rewindPlanIDs))
 646                         sp_eflags |= EXEC_FLAG_REWIND;
 647
 648                 subplanstate = ExecInitNode(subplan, estate, sp_eflags);
 649
 650                 estate->es_subplanstates = lappend(estate->es_subplanstates,
 651                                                                                    subplanstate);
 652
 653                 i++;
 654         }
 655
 656         /*
 657          * Initialize the private state information for all the nodes in the query
 658          * tree.  This opens files, allocates storage and leaves us ready to start
 659          * processing tuples.
 660          */
 661         planstate = ExecInitNode(plan, estate, eflags);
 662
 663         /*
 664          * Get the tuple descriptor describing the type of tuples to return. (this
 665          * is especially important if we are creating a relation with "SELECT
 666          * INTO")
 667          */
 668         tupType = ExecGetResultType(planstate);
 669
 670         /*
 671          * Initialize the junk filter if needed.  SELECT and INSERT queries need a
 672          * filter if there are any junk attrs in the tlist.  INSERT and SELECT
 673          * INTO also need a filter if the plan may return raw disk tuples (else
 674          * heap_insert will be scribbling on the source relation!). UPDATE and
 675          * DELETE always need a filter, since there's always a junk 'ctid'
 676          * attribute present --- no need to look first.
 677          */
 678         {
 679                 bool            junk_filter_needed = false;
 680                 ListCell   *tlist;
 681
 682                 switch (operation)
 683                 {
 684                         case CMD_SELECT:
 685                         case CMD_INSERT:
 686                                 foreach(tlist, plan->targetlist)
 687                                 {
 688                                         TargetEntry *tle = (TargetEntry *) lfirst(tlist);
 689
 690                                         if (tle->resjunk)
 691                                         {
 692                                                 junk_filter_needed = true;
 693                                                 break;
 694                                         }
 695                                 }
 696                                 if (!junk_filter_needed &&
 697                                         (operation == CMD_INSERT || estate->es_select_into) &&
 698                                         ExecMayReturnRawTuples(planstate))
 699                                         junk_filter_needed = true;
 700                                 break;
 701                         case CMD_UPDATE:
 702                         case CMD_DELETE:
 703                                 junk_filter_needed = true;
 704                                 break;
 705                         default:
 706                                 break;
 707                 }
 708
 709                 if (junk_filter_needed)
 710                 {
 711                         /*
 712                          * If there are multiple result relations, each one needs its own
 713                          * junk filter.  Note this is only possible for UPDATE/DELETE, so
 714                          * we can't be fooled by some needing a filter and some not.
 715                          */
 716                         if (list_length(plannedstmt->resultRelations) > 1)
 717                         {
 718                                 PlanState **appendplans;
 719                                 int                     as_nplans;
 720                                 ResultRelInfo *resultRelInfo;
 721
 722                                 /* Top plan had better be an Append here. */
 723                                 Assert(IsA(plan, Append));
 724                                 Assert(((Append *) plan)->isTarget);
 725                                 Assert(IsA(planstate, AppendState));
 726                                 appendplans = ((AppendState *) planstate)->appendplans;
 727                                 as_nplans = ((AppendState *) planstate)->as_nplans;
 728                                 Assert(as_nplans == estate->es_num_result_relations);
 729                                 resultRelInfo = estate->es_result_relations;
 730                                 for (i = 0; i < as_nplans; i++)
 731                                 {
 732                                         PlanState  *subplan = appendplans[i];
 733                                         JunkFilter *j;
 734
 735                                         j = ExecInitJunkFilter(subplan->plan->targetlist,
 736                                                         resultRelInfo->ri_RelationDesc->rd_att->tdhasoid,
 737                                                                   ExecAllocTableSlot(estate->es_tupleTable));
 738
 739                                         /*
 740                                          * Since it must be UPDATE/DELETE, there had better be a
 741                                          * "ctid" junk attribute in the tlist ... but ctid could
 742                                          * be at a different resno for each result relation. We
 743                                          * look up the ctid resnos now and save them in the
 744                                          * junkfilters.
 745                                          */
 746                                         j->jf_junkAttNo = ExecFindJunkAttribute(j, "ctid");
 747                                         if (!AttributeNumberIsValid(j->jf_junkAttNo))
 748                                                 elog(ERROR, "could not find junk ctid column");
 749                                         resultRelInfo->ri_junkFilter = j;
 750                                         resultRelInfo++;
 751                                 }
 752
 753                                 /*
 754                                  * Set active junkfilter too; at this point ExecInitAppend has
 755                                  * already selected an active result relation...
 756                                  */
 757                                 estate->es_junkFilter =
 758                                         estate->es_result_relation_info->ri_junkFilter;
 759
 760                                 /*
 761                                  * We currently can't support rowmarks in this case, because
 762                                  * the associated junk CTIDs might have different resnos in
 763                                  * different subplans.
 764                                  */
 765                                 if (estate->es_rowMarks)
 766                                         ereport(ERROR,
 767                                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 768                                                          errmsg("SELECT FOR UPDATE/SHARE is not supported within a query with multiple result relations")));
 769                         }
 770                         else
 771                         {
 772                                 /* Normal case with just one JunkFilter */
 773                                 JunkFilter *j;
 774
 775                                 j = ExecInitJunkFilter(planstate->plan->targetlist,
 776                                                                            tupType->tdhasoid,
 777                                                                   ExecAllocTableSlot(estate->es_tupleTable));
 778                                 estate->es_junkFilter = j;
 779                                 if (estate->es_result_relation_info)
 780                                         estate->es_result_relation_info->ri_junkFilter = j;
 781
 782                                 if (operation == CMD_SELECT)
 783                                 {
 784                                         /* For SELECT, want to return the cleaned tuple type */
 785                                         tupType = j->jf_cleanTupType;
 786                                 }
 787                                 else if (operation == CMD_UPDATE || operation == CMD_DELETE)
 788                                 {
 789                                         /* For UPDATE/DELETE, find the ctid junk attr now */
 790                                         j->jf_junkAttNo = ExecFindJunkAttribute(j, "ctid");
 791                                         if (!AttributeNumberIsValid(j->jf_junkAttNo))
 792                                                 elog(ERROR, "could not find junk ctid column");
 793                                 }
 794
 795                                 /* For SELECT FOR UPDATE/SHARE, find the ctid attrs now */
 796                                 foreach(l, estate->es_rowMarks)
 797                                 {
 798                                         ExecRowMark *erm = (ExecRowMark *) lfirst(l);
 799                                         char            resname[32];
 800
 801                                         snprintf(resname, sizeof(resname), "ctid%u", erm->rti);
 802                                         erm->ctidAttNo = ExecFindJunkAttribute(j, resname);
 803                                         if (!AttributeNumberIsValid(erm->ctidAttNo))
 804                                                 elog(ERROR, "could not find junk \"%s\" column",
 805                                                          resname);
 806                                 }
 807                         }
 808                 }
 809                 else
 810                 {
 811                         estate->es_junkFilter = NULL;
 812                         if (estate->es_rowMarks)
 813                                 elog(ERROR, "SELECT FOR UPDATE/SHARE, but no junk columns");
 814                 }
 815         }
 816
 817         /*
 818          * Initialize RETURNING projections if needed.
 819          */
 820         if (plannedstmt->returningLists)
 821         {
 822                 TupleTableSlot *slot;
 823                 ExprContext *econtext;
 824                 ResultRelInfo *resultRelInfo;
 825
 826                 /*
 827                  * We set QueryDesc.tupDesc to be the RETURNING rowtype in this case.
 828                  * We assume all the sublists will generate the same output tupdesc.
 829                  */
 830                 tupType = ExecTypeFromTL((List *) linitial(plannedstmt->returningLists),
 831                                                                  false);
 832
 833                 /* Set up a slot for the output of the RETURNING projection(s) */
 834                 slot = ExecAllocTableSlot(estate->es_tupleTable);
 835                 ExecSetSlotDescriptor(slot, tupType);
 836                 /* Need an econtext too */
 837                 econtext = CreateExprContext(estate);
 838
 839                 /*
 840                  * Build a projection for each result rel.      Note that any SubPlans in
 841                  * the RETURNING lists get attached to the topmost plan node.
 842                  */
 843                 Assert(list_length(plannedstmt->returningLists) == estate->es_num_result_relations);
 844                 resultRelInfo = estate->es_result_relations;
 845                 foreach(l, plannedstmt->returningLists)
 846                 {
 847                         List       *rlist = (List *) lfirst(l);
 848                         List       *rliststate;
 849
 850                         rliststate = (List *) ExecInitExpr((Expr *) rlist, planstate);
 851                         resultRelInfo->ri_projectReturning =
 852                                 ExecBuildProjectionInfo(rliststate, econtext, slot,
 853                                                                          resultRelInfo->ri_RelationDesc->rd_att);
 854                         resultRelInfo++;
 855                 }
 856         }
 857
 858         queryDesc->tupDesc = tupType;
 859         queryDesc->planstate = planstate;
 860
 861         /*
 862          * If doing SELECT INTO, initialize the "into" relation.  We must wait
 863          * till now so we have the "clean" result tuple type to create the new
 864          * table from.
 865          *
 866          * If EXPLAIN, skip creating the "into" relation.
 867          */
 868         if (estate->es_select_into && !(eflags & EXEC_FLAG_EXPLAIN_ONLY))
 869                 OpenIntoRel(queryDesc);
 870 }
 871
 872 /*
 873  * Initialize ResultRelInfo data for one result relation
 874  */
 875 void
 876 InitResultRelInfo(ResultRelInfo *resultRelInfo,
 877                                   Relation resultRelationDesc,
 878                                   Index resultRelationIndex,
 879                                   CmdType operation,
 880                                   bool doInstrument)
 881 {
 882         /*
 883          * Check valid relkind ... parser and/or planner should have noticed this
 884          * already, but let's make sure.
 885          */
 886         switch (resultRelationDesc->rd_rel->relkind)
 887         {
 888                 case RELKIND_RELATION:
 889                         /* OK */
 890                         break;
 891                 case RELKIND_SEQUENCE:
 892                         ereport(ERROR,
 893                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
 894                                          errmsg("cannot change sequence \"%s\"",
 895                                                         RelationGetRelationName(resultRelationDesc))));
 896                         break;
 897                 case RELKIND_TOASTVALUE:
 898                         ereport(ERROR,
 899                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
 900                                          errmsg("cannot change TOAST relation \"%s\"",
 901                                                         RelationGetRelationName(resultRelationDesc))));
 902                         break;
 903                 case RELKIND_VIEW:
 904                         ereport(ERROR,
 905                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
 906                                          errmsg("cannot change view \"%s\"",
 907                                                         RelationGetRelationName(resultRelationDesc))));
 908                         break;
 909                 default:
 910                         ereport(ERROR,
 911                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
 912                                          errmsg("cannot change relation \"%s\"",
 913                                                         RelationGetRelationName(resultRelationDesc))));
 914                         break;
 915         }
 916
 917         /* OK, fill in the node */
 918         MemSet(resultRelInfo, 0, sizeof(ResultRelInfo));
 919         resultRelInfo->type = T_ResultRelInfo;
 920         resultRelInfo->ri_RangeTableIndex = resultRelationIndex;
 921         resultRelInfo->ri_RelationDesc = resultRelationDesc;
 922         resultRelInfo->ri_NumIndices = 0;
 923         resultRelInfo->ri_IndexRelationDescs = NULL;
 924         resultRelInfo->ri_IndexRelationInfo = NULL;
 925         /* make a copy so as not to depend on relcache info not changing... */
 926         resultRelInfo->ri_TrigDesc = CopyTriggerDesc(resultRelationDesc->trigdesc);
 927         if (resultRelInfo->ri_TrigDesc)
 928         {
 929                 int                     n = resultRelInfo->ri_TrigDesc->numtriggers;
 930
 931                 resultRelInfo->ri_TrigFunctions = (FmgrInfo *)
 932                         palloc0(n * sizeof(FmgrInfo));
 933                 if (doInstrument)
 934                         resultRelInfo->ri_TrigInstrument = InstrAlloc(n);
 935                 else
 936                         resultRelInfo->ri_TrigInstrument = NULL;
 937         }
 938         else
 939         {
 940                 resultRelInfo->ri_TrigFunctions = NULL;
 941                 resultRelInfo->ri_TrigInstrument = NULL;
 942         }
 943         resultRelInfo->ri_ConstraintExprs = NULL;
 944         resultRelInfo->ri_junkFilter = NULL;
 945         resultRelInfo->ri_projectReturning = NULL;
 946
 947         /*
 948          * If there are indices on the result relation, open them and save
 949          * descriptors in the result relation info, so that we can add new index
 950          * entries for the tuples we add/update.  We need not do this for a
 951          * DELETE, however, since deletion doesn't affect indexes.
 952          */
 953         if (resultRelationDesc->rd_rel->relhasindex &&
 954                 operation != CMD_DELETE)
 955                 ExecOpenIndices(resultRelInfo);
 956 }
 957
 958 /*
 959  *              ExecGetTriggerResultRel
 960  *
 961  * Get a ResultRelInfo for a trigger target relation.  Most of the time,
 962  * triggers are fired on one of the result relations of the query, and so
 963  * we can just return a member of the es_result_relations array.  (Note: in
 964  * self-join situations there might be multiple members with the same OID;
 965  * if so it doesn't matter which one we pick.)  However, it is sometimes
 966  * necessary to fire triggers on other relations; this happens mainly when an
 967  * RI update trigger queues additional triggers on other relations, which will
 968  * be processed in the context of the outer query.      For efficiency's sake,
 969  * we want to have a ResultRelInfo for those triggers too; that can avoid
 970  * repeated re-opening of the relation.  (It also provides a way for EXPLAIN
 971  * ANALYZE to report the runtimes of such triggers.)  So we make additional
 972  * ResultRelInfo's as needed, and save them in es_trig_target_relations.
 973  */
 974 ResultRelInfo *
 975 ExecGetTriggerResultRel(EState *estate, Oid relid)
 976 {
 977         ResultRelInfo *rInfo;
 978         int                     nr;
 979         ListCell   *l;
 980         Relation        rel;
 981         MemoryContext oldcontext;
 982
 983         /* First, search through the query result relations */
 984         rInfo = estate->es_result_relations;
 985         nr = estate->es_num_result_relations;
 986         while (nr > 0)
 987         {
 988                 if (RelationGetRelid(rInfo->ri_RelationDesc) == relid)
 989                         return rInfo;
 990                 rInfo++;
 991                 nr--;
 992         }
 993         /* Nope, but maybe we already made an extra ResultRelInfo for it */
 994         foreach(l, estate->es_trig_target_relations)
 995         {
 996                 rInfo = (ResultRelInfo *) lfirst(l);
 997                 if (RelationGetRelid(rInfo->ri_RelationDesc) == relid)
 998                         return rInfo;
 999         }
1000         /* Nope, so we need a new one */
1001
1002         /*
1003          * Open the target relation's relcache entry.  We assume that an
1004          * appropriate lock is still held by the backend from whenever the trigger
1005          * event got queued, so we need take no new lock here.
1006          */
1007         rel = heap_open(relid, NoLock);
1008
1009         /*
1010          * Make the new entry in the right context.  Currently, we don't need any
1011          * index information in ResultRelInfos used only for triggers, so tell
1012          * InitResultRelInfo it's a DELETE.
1013          */
1014         oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
1015         rInfo = makeNode(ResultRelInfo);
1016         InitResultRelInfo(rInfo,
1017                                           rel,
1018                                           0,            /* dummy rangetable index */
1019                                           CMD_DELETE,
1020                                           estate->es_instrument);
1021         estate->es_trig_target_relations =
1022                 lappend(estate->es_trig_target_relations, rInfo);
1023         MemoryContextSwitchTo(oldcontext);
1024
1025         return rInfo;
1026 }
1027
1028 /*
1029  *              ExecContextForcesOids
1030  *
1031  * This is pretty grotty: when doing INSERT, UPDATE, or SELECT INTO,
1032  * we need to ensure that result tuples have space for an OID iff they are
1033  * going to be stored into a relation that has OIDs.  In other contexts
1034  * we are free to choose whether to leave space for OIDs in result tuples
1035  * (we generally don't want to, but we do if a physical-tlist optimization
1036  * is possible).  This routine checks the plan context and returns TRUE if the
1037  * choice is forced, FALSE if the choice is not forced.  In the TRUE case,
1038  * *hasoids is set to the required value.
1039  *
1040  * One reason this is ugly is that all plan nodes in the plan tree will emit
1041  * tuples with space for an OID, though we really only need the topmost node
1042  * to do so.  However, node types like Sort don't project new tuples but just
1043  * return their inputs, and in those cases the requirement propagates down
1044  * to the input node.  Eventually we might make this code smart enough to
1045  * recognize how far down the requirement really goes, but for now we just
1046  * make all plan nodes do the same thing if the top level forces the choice.
1047  *
1048  * We assume that estate->es_result_relation_info is already set up to
1049  * describe the target relation.  Note that in an UPDATE that spans an
1050  * inheritance tree, some of the target relations may have OIDs and some not.
1051  * We have to make the decisions on a per-relation basis as we initialize
1052  * each of the child plans of the topmost Append plan.
1053  *
1054  * SELECT INTO is even uglier, because we don't have the INTO relation's
1055  * descriptor available when this code runs; we have to look aside at a
1056  * flag set by InitPlan().
1057  */
1058 bool
1059 ExecContextForcesOids(PlanState *planstate, bool *hasoids)
1060 {
1061         if (planstate->state->es_select_into)
1062         {
1063                 *hasoids = planstate->state->es_into_oids;
1064                 return true;
1065         }
1066         else
1067         {
1068                 ResultRelInfo *ri = planstate->state->es_result_relation_info;
1069
1070                 if (ri != NULL)
1071                 {
1072                         Relation        rel = ri->ri_RelationDesc;
1073
1074                         if (rel != NULL)
1075                         {
1076                                 *hasoids = rel->rd_rel->relhasoids;
1077                                 return true;
1078                         }
1079                 }
1080         }
1081
1082         return false;
1083 }
1084
1085 /* ----------------------------------------------------------------
1086  *              ExecEndPlan
1087  *
1088  *              Cleans up the query plan -- closes files and frees up storage
1089  *
1090  * NOTE: we are no longer very worried about freeing storage per se
1091  * in this code; FreeExecutorState should be guaranteed to release all
1092  * memory that needs to be released.  What we are worried about doing
1093  * is closing relations and dropping buffer pins.  Thus, for example,
1094  * tuple tables must be cleared or dropped to ensure pins are released.
1095  * ----------------------------------------------------------------
1096  */
1097 static void
1098 ExecEndPlan(PlanState *planstate, EState *estate)
1099 {
1100         ResultRelInfo *resultRelInfo;
1101         int                     i;
1102         ListCell   *l;
1103
1104         /*
1105          * shut down any PlanQual processing we were doing
1106          */
1107         if (estate->es_evalPlanQual != NULL)
1108                 EndEvalPlanQual(estate);
1109
1110         /*
1111          * shut down the node-type-specific query processing
1112          */
1113         ExecEndNode(planstate);
1114
1115         /*
1116          * for subplans too
1117          */
1118         foreach(l, estate->es_subplanstates)
1119         {
1120                 PlanState  *subplanstate = (PlanState *) lfirst(l);
1121
1122                 ExecEndNode(subplanstate);
1123         }
1124
1125         /*
1126          * destroy the executor "tuple" table.
1127          */
1128         ExecDropTupleTable(estate->es_tupleTable, true);
1129         estate->es_tupleTable = NULL;
1130
1131         /*
1132          * close the result relation(s) if any, but hold locks until xact commit.
1133          */
1134         resultRelInfo = estate->es_result_relations;
1135         for (i = estate->es_num_result_relations; i > 0; i--)
1136         {
1137                 /* Close indices and then the relation itself */
1138                 ExecCloseIndices(resultRelInfo);
1139                 heap_close(resultRelInfo->ri_RelationDesc, NoLock);
1140                 resultRelInfo++;
1141         }
1142
1143         /*
1144          * likewise close any trigger target relations
1145          */
1146         foreach(l, estate->es_trig_target_relations)
1147         {
1148                 resultRelInfo = (ResultRelInfo *) lfirst(l);
1149                 /* Close indices and then the relation itself */
1150                 ExecCloseIndices(resultRelInfo);
1151                 heap_close(resultRelInfo->ri_RelationDesc, NoLock);
1152         }
1153
1154         /*
1155          * close any relations selected FOR UPDATE/FOR SHARE, again keeping locks
1156          */
1157         foreach(l, estate->es_rowMarks)
1158         {
1159                 ExecRowMark *erm = lfirst(l);
1160
1161                 heap_close(erm->relation, NoLock);
1162         }
1163 }
1164
1165 /* ----------------------------------------------------------------
1166  *              ExecutePlan
1167  *
1168  *              processes the query plan to retrieve 'numberTuples' tuples in the
1169  *              direction specified.
1170  *
1171  *              Retrieves all tuples if numberTuples is 0
1172  *
1173  *              result is either a slot containing the last tuple in the case
1174  *              of a SELECT or NULL otherwise.
1175  *
1176  * Note: the ctid attribute is a 'junk' attribute that is removed before the
1177  * user can see it
1178  * ----------------------------------------------------------------
1179  */
1180 static TupleTableSlot *
1181 ExecutePlan(EState *estate,
1182                         PlanState *planstate,
1183                         CmdType operation,
1184                         long numberTuples,
1185                         ScanDirection direction,
1186                         DestReceiver *dest)
1187 {
1188         JunkFilter *junkfilter;
1189         TupleTableSlot *planSlot;
1190         TupleTableSlot *slot;
1191         ItemPointer tupleid = NULL;
1192         ItemPointerData tuple_ctid;
1193         long            current_tuple_count;
1194         TupleTableSlot *result;
1195
1196         /*
1197          * initialize local variables
1198          */
1199         current_tuple_count = 0;
1200         result = NULL;
1201
1202         /*
1203          * Set the direction.
1204          */
1205         estate->es_direction = direction;
1206
1207         /*
1208          * Process BEFORE EACH STATEMENT triggers
1209          */
1210         switch (operation)
1211         {
1212                 case CMD_UPDATE:
1213                         ExecBSUpdateTriggers(estate, estate->es_result_relation_info);
1214                         break;
1215                 case CMD_DELETE:
1216                         ExecBSDeleteTriggers(estate, estate->es_result_relation_info);
1217                         break;
1218                 case CMD_INSERT:
1219                         ExecBSInsertTriggers(estate, estate->es_result_relation_info);
1220                         break;
1221                 default:
1222                         /* do nothing */
1223                         break;
1224         }
1225
1226         /*
1227          * Loop until we've processed the proper number of tuples from the plan.
1228          */
1229
1230         for (;;)
1231         {
1232                 /* Reset the per-output-tuple exprcontext */
1233                 ResetPerTupleExprContext(estate);
1234
1235                 /*
1236                  * Execute the plan and obtain a tuple
1237                  */
1238 lnext:  ;
1239                 if (estate->es_useEvalPlan)
1240                 {
1241                         planSlot = EvalPlanQualNext(estate);
1242                         if (TupIsNull(planSlot))
1243                                 planSlot = ExecProcNode(planstate);
1244                 }
1245                 else
1246                         planSlot = ExecProcNode(planstate);
1247
1248                 /*
1249                  * if the tuple is null, then we assume there is nothing more to
1250                  * process so we just return null...
1251                  */
1252                 if (TupIsNull(planSlot))
1253                 {
1254                         result = NULL;
1255                         break;
1256                 }
1257                 slot = planSlot;
1258
1259                 /*
1260                  * If we have a junk filter, then project a new tuple with the junk
1261                  * removed.
1262                  *
1263                  * Store this new "clean" tuple in the junkfilter's resultSlot.
1264                  * (Formerly, we stored it back over the "dirty" tuple, which is WRONG
1265                  * because that tuple slot has the wrong descriptor.)
1266                  *
1267                  * But first, extract all the junk information we need.
1268                  */
1269                 if ((junkfilter = estate->es_junkFilter) != NULL)
1270                 {
1271                         /*
1272                          * Process any FOR UPDATE or FOR SHARE locking requested.
1273                          */
1274                         if (estate->es_rowMarks != NIL)
1275                         {
1276                                 ListCell   *l;
1277
1278                 lmark:  ;
1279                                 foreach(l, estate->es_rowMarks)
1280                                 {
1281                                         ExecRowMark *erm = lfirst(l);
1282                                         Datum           datum;
1283                                         bool            isNull;
1284                                         HeapTupleData tuple;
1285                                         Buffer          buffer;
1286                                         ItemPointerData update_ctid;
1287                                         TransactionId update_xmax;
1288                                         TupleTableSlot *newSlot;
1289                                         LockTupleMode lockmode;
1290                                         HTSU_Result test;
1291
1292                                         datum = ExecGetJunkAttribute(slot,
1293                                                                                                  erm->ctidAttNo,
1294                                                                                                  &isNull);
1295                                         /* shouldn't ever get a null result... */
1296                                         if (isNull)
1297                                                 elog(ERROR, "ctid is NULL");
1298
1299                                         tuple.t_self = *((ItemPointer) DatumGetPointer(datum));
1300
1301                                         if (erm->forUpdate)
1302                                                 lockmode = LockTupleExclusive;
1303                                         else
1304                                                 lockmode = LockTupleShared;
1305
1306                                         test = heap_lock_tuple(erm->relation, &tuple, &buffer,
1307                                                                                    &update_ctid, &update_xmax,
1308                                                                                    estate->es_output_cid,
1309                                                                                    lockmode, erm->noWait);
1310                                         ReleaseBuffer(buffer);
1311                                         switch (test)
1312                                         {
1313                                                 case HeapTupleSelfUpdated:
1314                                                         /* treat it as deleted; do not process */
1315                                                         goto lnext;
1316
1317                                                 case HeapTupleMayBeUpdated:
1318                                                         break;
1319
1320                                                 case HeapTupleUpdated:
1321                                                         if (IsXactIsoLevelSerializable)
1322                                                                 ereport(ERROR,
1323                                                                  (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1324                                                                   errmsg("could not serialize access due to concurrent update")));
1325                                                         if (!ItemPointerEquals(&update_ctid,
1326                                                                                                    &tuple.t_self))
1327                                                         {
1328                                                                 /* updated, so look at updated version */
1329                                                                 newSlot = EvalPlanQual(estate,
1330                                                                                                            erm->rti,
1331                                                                                                            &update_ctid,
1332                                                                                                            update_xmax);
1333                                                                 if (!TupIsNull(newSlot))
1334                                                                 {
1335                                                                         slot = planSlot = newSlot;
1336                                                                         estate->es_useEvalPlan = true;
1337                                                                         goto lmark;
1338                                                                 }
1339                                                         }
1340
1341                                                         /*
1342                                                          * if tuple was deleted or PlanQual failed for
1343                                                          * updated tuple - we must not return this tuple!
1344                                                          */
1345                                                         goto lnext;
1346
1347                                                 default:
1348                                                         elog(ERROR, "unrecognized heap_lock_tuple status: %u",
1349                                                                  test);
1350                                                         return NULL;
1351                                         }
1352                                 }
1353                         }
1354
1355                         /*
1356                          * extract the 'ctid' junk attribute.
1357                          */
1358                         if (operation == CMD_UPDATE || operation == CMD_DELETE)
1359                         {
1360                                 Datum           datum;
1361                                 bool            isNull;
1362
1363                                 datum = ExecGetJunkAttribute(slot, junkfilter->jf_junkAttNo,
1364                                                                                          &isNull);
1365                                 /* shouldn't ever get a null result... */
1366                                 if (isNull)
1367                                         elog(ERROR, "ctid is NULL");
1368
1369                                 tupleid = (ItemPointer) DatumGetPointer(datum);
1370                                 tuple_ctid = *tupleid;  /* make sure we don't free the ctid!! */
1371                                 tupleid = &tuple_ctid;
1372                         }
1373
1374                         /*
1375                          * Create a new "clean" tuple with all junk attributes removed. We
1376                          * don't need to do this for DELETE, however (there will in fact
1377                          * be no non-junk attributes in a DELETE!)
1378                          */
1379                         if (operation != CMD_DELETE)
1380                                 slot = ExecFilterJunk(junkfilter, slot);
1381                 }
1382
1383                 /*
1384                  * now that we have a tuple, do the appropriate thing with it.. either
1385                  * return it to the user, add it to a relation someplace, delete it
1386                  * from a relation, or modify some of its attributes.
1387                  */
1388                 switch (operation)
1389                 {
1390                         case CMD_SELECT:
1391                                 ExecSelect(slot, dest, estate);
1392                                 result = slot;
1393                                 break;
1394
1395                         case CMD_INSERT:
1396                                 ExecInsert(slot, tupleid, planSlot, dest, estate);
1397                                 result = NULL;
1398                                 break;
1399
1400                         case CMD_DELETE:
1401                                 ExecDelete(tupleid, planSlot, dest, estate);
1402                                 result = NULL;
1403                                 break;
1404
1405                         case CMD_UPDATE:
1406                                 ExecUpdate(slot, tupleid, planSlot, dest, estate);
1407                                 result = NULL;
1408                                 break;
1409
1410                         default:
1411                                 elog(ERROR, "unrecognized operation code: %d",
1412                                          (int) operation);
1413                                 result = NULL;
1414                                 break;
1415                 }
1416
1417                 /*
1418                  * check our tuple count.. if we've processed the proper number then
1419                  * quit, else loop again and process more tuples.  Zero numberTuples
1420                  * means no limit.
1421                  */
1422                 current_tuple_count++;
1423                 if (numberTuples && numberTuples == current_tuple_count)
1424                         break;
1425         }
1426
1427         /*
1428          * Process AFTER EACH STATEMENT triggers
1429          */
1430         switch (operation)
1431         {
1432                 case CMD_UPDATE:
1433                         ExecASUpdateTriggers(estate, estate->es_result_relation_info);
1434                         break;
1435                 case CMD_DELETE:
1436                         ExecASDeleteTriggers(estate, estate->es_result_relation_info);
1437                         break;
1438                 case CMD_INSERT:
1439                         ExecASInsertTriggers(estate, estate->es_result_relation_info);
1440                         break;
1441                 default:
1442                         /* do nothing */
1443                         break;
1444         }
1445
1446         /*
1447          * here, result is either a slot containing a tuple in the case of a
1448          * SELECT or NULL otherwise.
1449          */
1450         return result;
1451 }
1452
1453 /* ----------------------------------------------------------------
1454  *              ExecSelect
1455  *
1456  *              SELECTs are easy.. we just pass the tuple to the appropriate
1457  *              output function.
1458  * ----------------------------------------------------------------
1459  */
1460 static void
1461 ExecSelect(TupleTableSlot *slot,
1462                    DestReceiver *dest,
1463                    EState *estate)
1464 {
1465         (*dest->receiveSlot) (slot, dest);
1466         IncrRetrieved();
1467         (estate->es_processed)++;
1468 }
1469
1470 /* ----------------------------------------------------------------
1471  *              ExecInsert
1472  *
1473  *              INSERTs are trickier.. we have to insert the tuple into
1474  *              the base relation and insert appropriate tuples into the
1475  *              index relations.
1476  * ----------------------------------------------------------------
1477  */
1478 static void
1479 ExecInsert(TupleTableSlot *slot,
1480                    ItemPointer tupleid,
1481                    TupleTableSlot *planSlot,
1482                    DestReceiver *dest,
1483                    EState *estate)
1484 {
1485         HeapTuple       tuple;
1486         ResultRelInfo *resultRelInfo;
1487         Relation        resultRelationDesc;
1488         Oid                     newId;
1489
1490         /*
1491          * get the heap tuple out of the tuple table slot, making sure we have a
1492          * writable copy
1493          */
1494         tuple = ExecMaterializeSlot(slot);
1495
1496         /*
1497          * get information on the (current) result relation
1498          */
1499         resultRelInfo = estate->es_result_relation_info;
1500         resultRelationDesc = resultRelInfo->ri_RelationDesc;
1501
1502         /* BEFORE ROW INSERT Triggers */
1503         if (resultRelInfo->ri_TrigDesc &&
1504                 resultRelInfo->ri_TrigDesc->n_before_row[TRIGGER_EVENT_INSERT] > 0)
1505         {
1506                 HeapTuple       newtuple;
1507
1508                 newtuple = ExecBRInsertTriggers(estate, resultRelInfo, tuple);
1509
1510                 if (newtuple == NULL)   /* "do nothing" */
1511                         return;
1512
1513                 if (newtuple != tuple)  /* modified by Trigger(s) */
1514                 {
1515                         /*
1516                          * Put the modified tuple into a slot for convenience of routines
1517                          * below.  We assume the tuple was allocated in per-tuple memory
1518                          * context, and therefore will go away by itself. The tuple table
1519                          * slot should not try to clear it.
1520                          */
1521                         TupleTableSlot *newslot = estate->es_trig_tuple_slot;
1522
1523                         if (newslot->tts_tupleDescriptor != slot->tts_tupleDescriptor)
1524                                 ExecSetSlotDescriptor(newslot, slot->tts_tupleDescriptor);
1525                         ExecStoreTuple(newtuple, newslot, InvalidBuffer, false);
1526                         slot = newslot;
1527                         tuple = newtuple;
1528                 }
1529         }
1530
1531         /*
1532          * Check the constraints of the tuple
1533          */
1534         if (resultRelationDesc->rd_att->constr)
1535                 ExecConstraints(resultRelInfo, slot, estate);
1536
1537         /*
1538          * insert the tuple
1539          *
1540          * Note: heap_insert returns the tid (location) of the new tuple in the
1541          * t_self field.
1542          */
1543         newId = heap_insert(resultRelationDesc, tuple,
1544                                                 estate->es_output_cid,
1545                                                 true, true);
1546
1547         IncrAppended();
1548         (estate->es_processed)++;
1549         estate->es_lastoid = newId;
1550         setLastTid(&(tuple->t_self));
1551
1552         /*
1553          * insert index entries for tuple
1554          */
1555         if (resultRelInfo->ri_NumIndices > 0)
1556                 ExecInsertIndexTuples(slot, &(tuple->t_self), estate, false);
1557
1558         /* AFTER ROW INSERT Triggers */
1559         ExecARInsertTriggers(estate, resultRelInfo, tuple);
1560
1561         /* Process RETURNING if present */
1562         if (resultRelInfo->ri_projectReturning)
1563                 ExecProcessReturning(resultRelInfo->ri_projectReturning,
1564                                                          slot, planSlot, dest);
1565 }
1566
1567 /* ----------------------------------------------------------------
1568  *              ExecDelete
1569  *
1570  *              DELETE is like UPDATE, except that we delete the tuple and no
1571  *              index modifications are needed
1572  * ----------------------------------------------------------------
1573  */
1574 static void
1575 ExecDelete(ItemPointer tupleid,
1576                    TupleTableSlot *planSlot,
1577                    DestReceiver *dest,
1578                    EState *estate)
1579 {
1580         ResultRelInfo *resultRelInfo;
1581         Relation        resultRelationDesc;
1582         HTSU_Result result;
1583         ItemPointerData update_ctid;
1584         TransactionId update_xmax;
1585
1586         /*
1587          * get information on the (current) result relation
1588          */
1589         resultRelInfo = estate->es_result_relation_info;
1590         resultRelationDesc = resultRelInfo->ri_RelationDesc;
1591
1592         /* BEFORE ROW DELETE Triggers */
1593         if (resultRelInfo->ri_TrigDesc &&
1594                 resultRelInfo->ri_TrigDesc->n_before_row[TRIGGER_EVENT_DELETE] > 0)
1595         {
1596                 bool            dodelete;
1597
1598                 dodelete = ExecBRDeleteTriggers(estate, resultRelInfo, tupleid);
1599
1600                 if (!dodelete)                  /* "do nothing" */
1601                         return;
1602         }
1603
1604         /*
1605          * delete the tuple
1606          *
1607          * Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check that
1608          * the row to be deleted is visible to that snapshot, and throw a can't-
1609          * serialize error if not.      This is a special-case behavior needed for
1610          * referential integrity updates in serializable transactions.
1611          */
1612 ldelete:;
1613         result = heap_delete(resultRelationDesc, tupleid,
1614                                                  &update_ctid, &update_xmax,
1615                                                  estate->es_output_cid,
1616                                                  estate->es_crosscheck_snapshot,
1617                                                  true /* wait for commit */ );
1618         switch (result)
1619         {
1620                 case HeapTupleSelfUpdated:
1621                         /* already deleted by self; nothing to do */
1622                         return;
1623
1624                 case HeapTupleMayBeUpdated:
1625                         break;
1626
1627                 case HeapTupleUpdated:
1628                         if (IsXactIsoLevelSerializable)
1629                                 ereport(ERROR,
1630                                                 (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1631                                                  errmsg("could not serialize access due to concurrent update")));
1632                         else if (!ItemPointerEquals(tupleid, &update_ctid))
1633                         {
1634                                 TupleTableSlot *epqslot;
1635
1636                                 epqslot = EvalPlanQual(estate,
1637                                                                            resultRelInfo->ri_RangeTableIndex,
1638                                                                            &update_ctid,
1639                                                                            update_xmax);
1640                                 if (!TupIsNull(epqslot))
1641                                 {
1642                                         *tupleid = update_ctid;
1643                                         goto ldelete;
1644                                 }
1645                         }
1646                         /* tuple already deleted; nothing to do */
1647                         return;
1648
1649                 default:
1650                         elog(ERROR, "unrecognized heap_delete status: %u", result);
1651                         return;
1652         }
1653
1654         IncrDeleted();
1655         (estate->es_processed)++;
1656
1657         /*
1658          * Note: Normally one would think that we have to delete index tuples
1659          * associated with the heap tuple now...
1660          *
1661          * ... but in POSTGRES, we have no need to do this because VACUUM will
1662          * take care of it later.  We can't delete index tuples immediately
1663          * anyway, since the tuple is still visible to other transactions.
1664          */
1665
1666         /* AFTER ROW DELETE Triggers */
1667         ExecARDeleteTriggers(estate, resultRelInfo, tupleid);
1668
1669         /* Process RETURNING if present */
1670         if (resultRelInfo->ri_projectReturning)
1671         {
1672                 /*
1673                  * We have to put the target tuple into a slot, which means first we
1674                  * gotta fetch it.      We can use the trigger tuple slot.
1675                  */
1676                 TupleTableSlot *slot = estate->es_trig_tuple_slot;
1677                 HeapTupleData deltuple;
1678                 Buffer          delbuffer;
1679
1680                 deltuple.t_self = *tupleid;
1681                 if (!heap_fetch(resultRelationDesc, SnapshotAny,
1682                                                 &deltuple, &delbuffer, false, NULL))
1683                         elog(ERROR, "failed to fetch deleted tuple for DELETE RETURNING");
1684
1685                 if (slot->tts_tupleDescriptor != RelationGetDescr(resultRelationDesc))
1686                         ExecSetSlotDescriptor(slot, RelationGetDescr(resultRelationDesc));
1687                 ExecStoreTuple(&deltuple, slot, InvalidBuffer, false);
1688
1689                 ExecProcessReturning(resultRelInfo->ri_projectReturning,
1690                                                          slot, planSlot, dest);
1691
1692                 ExecClearTuple(slot);
1693                 ReleaseBuffer(delbuffer);
1694         }
1695 }
1696
1697 /* ----------------------------------------------------------------
1698  *              ExecUpdate
1699  *
1700  *              note: we can't run UPDATE queries with transactions
1701  *              off because UPDATEs are actually INSERTs and our
1702  *              scan will mistakenly loop forever, updating the tuple
1703  *              it just inserted..      This should be fixed but until it
1704  *              is, we don't want to get stuck in an infinite loop
1705  *              which corrupts your database..
1706  * ----------------------------------------------------------------
1707  */
1708 static void
1709 ExecUpdate(TupleTableSlot *slot,
1710                    ItemPointer tupleid,
1711                    TupleTableSlot *planSlot,
1712                    DestReceiver *dest,
1713                    EState *estate)
1714 {
1715         HeapTuple       tuple;
1716         ResultRelInfo *resultRelInfo;
1717         Relation        resultRelationDesc;
1718         HTSU_Result result;
1719         ItemPointerData update_ctid;
1720         TransactionId update_xmax;
1721
1722         /*
1723          * abort the operation if not running transactions
1724          */
1725         if (IsBootstrapProcessingMode())
1726                 elog(ERROR, "cannot UPDATE during bootstrap");
1727
1728         /*
1729          * get the heap tuple out of the tuple table slot, making sure we have a
1730          * writable copy
1731          */
1732         tuple = ExecMaterializeSlot(slot);
1733
1734         /*
1735          * get information on the (current) result relation
1736          */
1737         resultRelInfo = estate->es_result_relation_info;
1738         resultRelationDesc = resultRelInfo->ri_RelationDesc;
1739
1740         /* BEFORE ROW UPDATE Triggers */
1741         if (resultRelInfo->ri_TrigDesc &&
1742                 resultRelInfo->ri_TrigDesc->n_before_row[TRIGGER_EVENT_UPDATE] > 0)
1743         {
1744                 HeapTuple       newtuple;
1745
1746                 newtuple = ExecBRUpdateTriggers(estate, resultRelInfo,
1747                                                                                 tupleid, tuple);
1748
1749                 if (newtuple == NULL)   /* "do nothing" */
1750                         return;
1751
1752                 if (newtuple != tuple)  /* modified by Trigger(s) */
1753                 {
1754                         /*
1755                          * Put the modified tuple into a slot for convenience of routines
1756                          * below.  We assume the tuple was allocated in per-tuple memory
1757                          * context, and therefore will go away by itself. The tuple table
1758                          * slot should not try to clear it.
1759                          */
1760                         TupleTableSlot *newslot = estate->es_trig_tuple_slot;
1761
1762                         if (newslot->tts_tupleDescriptor != slot->tts_tupleDescriptor)
1763                                 ExecSetSlotDescriptor(newslot, slot->tts_tupleDescriptor);
1764                         ExecStoreTuple(newtuple, newslot, InvalidBuffer, false);
1765                         slot = newslot;
1766                         tuple = newtuple;
1767                 }
1768         }
1769
1770         /*
1771          * Check the constraints of the tuple
1772          *
1773          * If we generate a new candidate tuple after EvalPlanQual testing, we
1774          * must loop back here and recheck constraints.  (We don't need to redo
1775          * triggers, however.  If there are any BEFORE triggers then trigger.c
1776          * will have done heap_lock_tuple to lock the correct tuple, so there's no
1777          * need to do them again.)
1778          */
1779 lreplace:;
1780         if (resultRelationDesc->rd_att->constr)
1781                 ExecConstraints(resultRelInfo, slot, estate);
1782
1783         /*
1784          * replace the heap tuple
1785          *
1786          * Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check that
1787          * the row to be updated is visible to that snapshot, and throw a can't-
1788          * serialize error if not.      This is a special-case behavior needed for
1789          * referential integrity updates in serializable transactions.
1790          */
1791         result = heap_update(resultRelationDesc, tupleid, tuple,
1792                                                  &update_ctid, &update_xmax,
1793                                                  estate->es_output_cid,
1794                                                  estate->es_crosscheck_snapshot,
1795                                                  true /* wait for commit */ );
1796         switch (result)
1797         {
1798                 case HeapTupleSelfUpdated:
1799                         /* already deleted by self; nothing to do */
1800                         return;
1801
1802                 case HeapTupleMayBeUpdated:
1803                         break;
1804
1805                 case HeapTupleUpdated:
1806                         if (IsXactIsoLevelSerializable)
1807                                 ereport(ERROR,
1808                                                 (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1809                                                  errmsg("could not serialize access due to concurrent update")));
1810                         else if (!ItemPointerEquals(tupleid, &update_ctid))
1811                         {
1812                                 TupleTableSlot *epqslot;
1813
1814                                 epqslot = EvalPlanQual(estate,
1815                                                                            resultRelInfo->ri_RangeTableIndex,
1816                                                                            &update_ctid,
1817                                                                            update_xmax);
1818                                 if (!TupIsNull(epqslot))
1819                                 {
1820                                         *tupleid = update_ctid;
1821                                         slot = ExecFilterJunk(estate->es_junkFilter, epqslot);
1822                                         tuple = ExecMaterializeSlot(slot);
1823                                         goto lreplace;
1824                                 }
1825                         }
1826                         /* tuple already deleted; nothing to do */
1827                         return;
1828
1829                 default:
1830                         elog(ERROR, "unrecognized heap_update status: %u", result);
1831                         return;
1832         }
1833
1834         IncrReplaced();
1835         (estate->es_processed)++;
1836
1837         /*
1838          * Note: instead of having to update the old index tuples associated with
1839          * the heap tuple, all we do is form and insert new index tuples. This is
1840          * because UPDATEs are actually DELETEs and INSERTs, and index tuple
1841          * deletion is done later by VACUUM (see notes in ExecDelete).  All we do
1842          * here is insert new index tuples.  -cim 9/27/89
1843          */
1844
1845         /*
1846          * insert index entries for tuple
1847          *
1848          * Note: heap_update returns the tid (location) of the new tuple in the
1849          * t_self field.
1850          *
1851          * If it's a HOT update, we mustn't insert new index entries.
1852          */
1853         if (resultRelInfo->ri_NumIndices > 0 && !HeapTupleIsHeapOnly(tuple))
1854                 ExecInsertIndexTuples(slot, &(tuple->t_self), estate, false);
1855
1856         /* AFTER ROW UPDATE Triggers */
1857         ExecARUpdateTriggers(estate, resultRelInfo, tupleid, tuple);
1858
1859         /* Process RETURNING if present */
1860         if (resultRelInfo->ri_projectReturning)
1861                 ExecProcessReturning(resultRelInfo->ri_projectReturning,
1862                                                          slot, planSlot, dest);
1863 }
1864
1865 /*
1866  * ExecRelCheck --- check that tuple meets constraints for result relation
1867  */
1868 static const char *
1869 ExecRelCheck(ResultRelInfo *resultRelInfo,
1870                          TupleTableSlot *slot, EState *estate)
1871 {
1872         Relation        rel = resultRelInfo->ri_RelationDesc;
1873         int                     ncheck = rel->rd_att->constr->num_check;
1874         ConstrCheck *check = rel->rd_att->constr->check;
1875         ExprContext *econtext;
1876         MemoryContext oldContext;
1877         List       *qual;
1878         int                     i;
1879
1880         /*
1881          * If first time through for this result relation, build expression
1882          * nodetrees for rel's constraint expressions.  Keep them in the per-query
1883          * memory context so they'll survive throughout the query.
1884          */
1885         if (resultRelInfo->ri_ConstraintExprs == NULL)
1886         {
1887                 oldContext = MemoryContextSwitchTo(estate->es_query_cxt);
1888                 resultRelInfo->ri_ConstraintExprs =
1889                         (List **) palloc(ncheck * sizeof(List *));
1890                 for (i = 0; i < ncheck; i++)
1891                 {
1892                         /* ExecQual wants implicit-AND form */
1893                         qual = make_ands_implicit(stringToNode(check[i].ccbin));
1894                         resultRelInfo->ri_ConstraintExprs[i] = (List *)
1895                                 ExecPrepareExpr((Expr *) qual, estate);
1896                 }
1897                 MemoryContextSwitchTo(oldContext);
1898         }
1899
1900         /*
1901          * We will use the EState's per-tuple context for evaluating constraint
1902          * expressions (creating it if it's not already there).
1903          */
1904         econtext = GetPerTupleExprContext(estate);
1905
1906         /* Arrange for econtext's scan tuple to be the tuple under test */
1907         econtext->ecxt_scantuple = slot;
1908
1909         /* And evaluate the constraints */
1910         for (i = 0; i < ncheck; i++)
1911         {
1912                 qual = resultRelInfo->ri_ConstraintExprs[i];
1913
1914                 /*
1915                  * NOTE: SQL92 specifies that a NULL result from a constraint
1916                  * expression is not to be treated as a failure.  Therefore, tell
1917                  * ExecQual to return TRUE for NULL.
1918                  */
1919                 if (!ExecQual(qual, econtext, true))
1920                         return check[i].ccname;
1921         }
1922
1923         /* NULL result means no error */
1924         return NULL;
1925 }
1926
1927 void
1928 ExecConstraints(ResultRelInfo *resultRelInfo,
1929                                 TupleTableSlot *slot, EState *estate)
1930 {
1931         Relation        rel = resultRelInfo->ri_RelationDesc;
1932         TupleConstr *constr = rel->rd_att->constr;
1933
1934         Assert(constr);
1935
1936         if (constr->has_not_null)
1937         {
1938                 int                     natts = rel->rd_att->natts;
1939                 int                     attrChk;
1940
1941                 for (attrChk = 1; attrChk <= natts; attrChk++)
1942                 {
1943                         if (rel->rd_att->attrs[attrChk - 1]->attnotnull &&
1944                                 slot_attisnull(slot, attrChk))
1945                                 ereport(ERROR,
1946                                                 (errcode(ERRCODE_NOT_NULL_VIOLATION),
1947                                                  errmsg("null value in column \"%s\" violates not-null constraint",
1948                                                 NameStr(rel->rd_att->attrs[attrChk - 1]->attname))));
1949                 }
1950         }
1951
1952         if (constr->num_check > 0)
1953         {
1954                 const char *failed;
1955
1956                 if ((failed = ExecRelCheck(resultRelInfo, slot, estate)) != NULL)
1957                         ereport(ERROR,
1958                                         (errcode(ERRCODE_CHECK_VIOLATION),
1959                                          errmsg("new row for relation \"%s\" violates check constraint \"%s\"",
1960                                                         RelationGetRelationName(rel), failed)));
1961         }
1962 }
1963
1964 /*
1965  * ExecProcessReturning --- evaluate a RETURNING list and send to dest
1966  *
1967  * projectReturning: RETURNING projection info for current result rel
1968  * tupleSlot: slot holding tuple actually inserted/updated/deleted
1969  * planSlot: slot holding tuple returned by top plan node
1970  * dest: where to send the output
1971  */
1972 static void
1973 ExecProcessReturning(ProjectionInfo *projectReturning,
1974                                          TupleTableSlot *tupleSlot,
1975                                          TupleTableSlot *planSlot,
1976                                          DestReceiver *dest)
1977 {
1978         ExprContext *econtext = projectReturning->pi_exprContext;
1979         TupleTableSlot *retSlot;
1980
1981         /*
1982          * Reset per-tuple memory context to free any expression evaluation
1983          * storage allocated in the previous cycle.
1984          */
1985         ResetExprContext(econtext);
1986
1987         /* Make tuple and any needed join variables available to ExecProject */
1988         econtext->ecxt_scantuple = tupleSlot;
1989         econtext->ecxt_outertuple = planSlot;
1990
1991         /* Compute the RETURNING expressions */
1992         retSlot = ExecProject(projectReturning, NULL);
1993
1994         /* Send to dest */
1995         (*dest->receiveSlot) (retSlot, dest);
1996
1997         ExecClearTuple(retSlot);
1998 }
1999
2000 /*
2001  * Check a modified tuple to see if we want to process its updated version
2002  * under READ COMMITTED rules.
2003  *
2004  * See backend/executor/README for some info about how this works.
2005  *
2006  *      estate - executor state data
2007  *      rti - rangetable index of table containing tuple
2008  *      *tid - t_ctid from the outdated tuple (ie, next updated version)
2009  *      priorXmax - t_xmax from the outdated tuple
2010  *
2011  * *tid is also an output parameter: it's modified to hold the TID of the
2012  * latest version of the tuple (note this may be changed even on failure)
2013  *
2014  * Returns a slot containing the new candidate update/delete tuple, or
2015  * NULL if we determine we shouldn't process the row.
2016  */
2017 TupleTableSlot *
2018 EvalPlanQual(EState *estate, Index rti,
2019                          ItemPointer tid, TransactionId priorXmax)
2020 {
2021         evalPlanQual *epq;
2022         EState     *epqstate;
2023         Relation        relation;
2024         HeapTupleData tuple;
2025         HeapTuple       copyTuple = NULL;
2026         SnapshotData SnapshotDirty;
2027         bool            endNode;
2028
2029         Assert(rti != 0);
2030
2031         /*
2032          * find relation containing target tuple
2033          */
2034         if (estate->es_result_relation_info != NULL &&
2035                 estate->es_result_relation_info->ri_RangeTableIndex == rti)
2036                 relation = estate->es_result_relation_info->ri_RelationDesc;
2037         else
2038         {
2039                 ListCell   *l;
2040
2041                 relation = NULL;
2042                 foreach(l, estate->es_rowMarks)
2043                 {
2044                         if (((ExecRowMark *) lfirst(l))->rti == rti)
2045                         {
2046                                 relation = ((ExecRowMark *) lfirst(l))->relation;
2047                                 break;
2048                         }
2049                 }
2050                 if (relation == NULL)
2051                         elog(ERROR, "could not find RowMark for RT index %u", rti);
2052         }
2053
2054         /*
2055          * fetch tid tuple
2056          *
2057          * Loop here to deal with updated or busy tuples
2058          */
2059         InitDirtySnapshot(SnapshotDirty);
2060         tuple.t_self = *tid;
2061         for (;;)
2062         {
2063                 Buffer          buffer;
2064
2065                 if (heap_fetch(relation, &SnapshotDirty, &tuple, &buffer, true, NULL))
2066                 {
2067                         /*
2068                          * If xmin isn't what we're expecting, the slot must have been
2069                          * recycled and reused for an unrelated tuple.  This implies that
2070                          * the latest version of the row was deleted, so we need do
2071                          * nothing.  (Should be safe to examine xmin without getting
2072                          * buffer's content lock, since xmin never changes in an existing
2073                          * tuple.)
2074                          */
2075                         if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data),
2076                                                                          priorXmax))
2077                         {
2078                                 ReleaseBuffer(buffer);
2079                                 return NULL;
2080                         }
2081
2082                         /* otherwise xmin should not be dirty... */
2083                         if (TransactionIdIsValid(SnapshotDirty.xmin))
2084                                 elog(ERROR, "t_xmin is uncommitted in tuple to be updated");
2085
2086                         /*
2087                          * If tuple is being updated by other transaction then we have to
2088                          * wait for its commit/abort.
2089                          */
2090                         if (TransactionIdIsValid(SnapshotDirty.xmax))
2091                         {
2092                                 ReleaseBuffer(buffer);
2093                                 XactLockTableWait(SnapshotDirty.xmax);
2094                                 continue;               /* loop back to repeat heap_fetch */
2095                         }
2096
2097                         /*
2098                          * If tuple was inserted by our own transaction, we have to check
2099                          * cmin against es_output_cid: cmin >= current CID means our
2100                          * command cannot see the tuple, so we should ignore it.  Without
2101                          * this we are open to the "Halloween problem" of indefinitely
2102                          * re-updating the same tuple. (We need not check cmax because
2103                          * HeapTupleSatisfiesDirty will consider a tuple deleted by our
2104                          * transaction dead, regardless of cmax.)  We just checked that
2105                          * priorXmax == xmin, so we can test that variable instead of
2106                          * doing HeapTupleHeaderGetXmin again.
2107                          */
2108                         if (TransactionIdIsCurrentTransactionId(priorXmax) &&
2109                                 HeapTupleHeaderGetCmin(tuple.t_data) >= estate->es_output_cid)
2110                         {
2111                                 ReleaseBuffer(buffer);
2112                                 return NULL;
2113                         }
2114
2115                         /*
2116                          * We got tuple - now copy it for use by recheck query.
2117                          */
2118                         copyTuple = heap_copytuple(&tuple);
2119                         ReleaseBuffer(buffer);
2120                         break;
2121                 }
2122
2123                 /*
2124                  * If the referenced slot was actually empty, the latest version of
2125                  * the row must have been deleted, so we need do nothing.
2126                  */
2127                 if (tuple.t_data == NULL)
2128                 {
2129                         ReleaseBuffer(buffer);
2130                         return NULL;
2131                 }
2132
2133                 /*
2134                  * As above, if xmin isn't what we're expecting, do nothing.
2135                  */
2136                 if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data),
2137                                                                  priorXmax))
2138                 {
2139                         ReleaseBuffer(buffer);
2140                         return NULL;
2141                 }
2142
2143                 /*
2144                  * If we get here, the tuple was found but failed SnapshotDirty.
2145                  * Assuming the xmin is either a committed xact or our own xact (as it
2146                  * certainly should be if we're trying to modify the tuple), this must
2147                  * mean that the row was updated or deleted by either a committed xact
2148                  * or our own xact.  If it was deleted, we can ignore it; if it was
2149                  * updated then chain up to the next version and repeat the whole
2150                  * test.
2151                  *
2152                  * As above, it should be safe to examine xmax and t_ctid without the
2153                  * buffer content lock, because they can't be changing.
2154                  */
2155                 if (ItemPointerEquals(&tuple.t_self, &tuple.t_data->t_ctid))
2156                 {
2157                         /* deleted, so forget about it */
2158                         ReleaseBuffer(buffer);
2159                         return NULL;
2160                 }
2161
2162                 /* updated, so look at the updated row */
2163                 tuple.t_self = tuple.t_data->t_ctid;
2164                 /* updated row should have xmin matching this xmax */
2165                 priorXmax = HeapTupleHeaderGetXmax(tuple.t_data);
2166                 ReleaseBuffer(buffer);
2167                 /* loop back to fetch next in chain */
2168         }
2169
2170         /*
2171          * For UPDATE/DELETE we have to return tid of actual row we're executing
2172          * PQ for.
2173          */
2174         *tid = tuple.t_self;
2175
2176         /*
2177          * Need to run a recheck subquery.      Find or create a PQ stack entry.
2178          */
2179         epq = estate->es_evalPlanQual;
2180         endNode = true;
2181
2182         if (epq != NULL && epq->rti == 0)
2183         {
2184                 /* Top PQ stack entry is idle, so re-use it */
2185                 Assert(!(estate->es_useEvalPlan) && epq->next == NULL);
2186                 epq->rti = rti;
2187                 endNode = false;
2188         }
2189
2190         /*
2191          * If this is request for another RTE - Ra, - then we have to check wasn't
2192          * PlanQual requested for Ra already and if so then Ra' row was updated
2193          * again and we have to re-start old execution for Ra and forget all what
2194          * we done after Ra was suspended. Cool? -:))
2195          */
2196         if (epq != NULL && epq->rti != rti &&
2197                 epq->estate->es_evTuple[rti - 1] != NULL)
2198         {
2199                 do
2200                 {
2201                         evalPlanQual *oldepq;
2202
2203                         /* stop execution */
2204                         EvalPlanQualStop(epq);
2205                         /* pop previous PlanQual from the stack */
2206                         oldepq = epq->next;
2207                         Assert(oldepq && oldepq->rti != 0);
2208                         /* push current PQ to freePQ stack */
2209                         oldepq->free = epq;
2210                         epq = oldepq;
2211                         estate->es_evalPlanQual = epq;
2212                 } while (epq->rti != rti);
2213         }
2214
2215         /*
2216          * If we are requested for another RTE then we have to suspend execution
2217          * of current PlanQual and start execution for new one.
2218          */
2219         if (epq == NULL || epq->rti != rti)
2220         {
2221                 /* try to reuse plan used previously */
2222                 evalPlanQual *newepq = (epq != NULL) ? epq->free : NULL;
2223
2224                 if (newepq == NULL)             /* first call or freePQ stack is empty */
2225                 {
2226                         newepq = (evalPlanQual *) palloc0(sizeof(evalPlanQual));
2227                         newepq->free = NULL;
2228                         newepq->estate = NULL;
2229                         newepq->planstate = NULL;
2230                 }
2231                 else
2232                 {
2233                         /* recycle previously used PlanQual */
2234                         Assert(newepq->estate == NULL);
2235                         epq->free = NULL;
2236                 }
2237                 /* push current PQ to the stack */
2238                 newepq->next = epq;
2239                 epq = newepq;
2240                 estate->es_evalPlanQual = epq;
2241                 epq->rti = rti;
2242                 endNode = false;
2243         }
2244
2245         Assert(epq->rti == rti);
2246
2247         /*
2248          * Ok - we're requested for the same RTE.  Unfortunately we still have to
2249          * end and restart execution of the plan, because ExecReScan wouldn't
2250          * ensure that upper plan nodes would reset themselves.  We could make
2251          * that work if insertion of the target tuple were integrated with the
2252          * Param mechanism somehow, so that the upper plan nodes know that their
2253          * children's outputs have changed.
2254          *
2255          * Note that the stack of free evalPlanQual nodes is quite useless at the
2256          * moment, since it only saves us from pallocing/releasing the
2257          * evalPlanQual nodes themselves.  But it will be useful once we implement
2258          * ReScan instead of end/restart for re-using PlanQual nodes.
2259          */
2260         if (endNode)
2261         {
2262                 /* stop execution */
2263                 EvalPlanQualStop(epq);
2264         }
2265
2266         /*
2267          * Initialize new recheck query.
2268          *
2269          * Note: if we were re-using PlanQual plans via ExecReScan, we'd need to
2270          * instead copy down changeable state from the top plan (including
2271          * es_result_relation_info, es_junkFilter) and reset locally changeable
2272          * state in the epq (including es_param_exec_vals, es_evTupleNull).
2273          */
2274         EvalPlanQualStart(epq, estate, epq->next);
2275
2276         /*
2277          * free old RTE' tuple, if any, and store target tuple where relation's
2278          * scan node will see it
2279          */
2280         epqstate = epq->estate;
2281         if (epqstate->es_evTuple[rti - 1] != NULL)
2282                 heap_freetuple(epqstate->es_evTuple[rti - 1]);
2283         epqstate->es_evTuple[rti - 1] = copyTuple;
2284
2285         return EvalPlanQualNext(estate);
2286 }
2287
2288 static TupleTableSlot *
2289 EvalPlanQualNext(EState *estate)
2290 {
2291         evalPlanQual *epq = estate->es_evalPlanQual;
2292         MemoryContext oldcontext;
2293         TupleTableSlot *slot;
2294
2295         Assert(epq->rti != 0);
2296
2297 lpqnext:;
2298         oldcontext = MemoryContextSwitchTo(epq->estate->es_query_cxt);
2299         slot = ExecProcNode(epq->planstate);
2300         MemoryContextSwitchTo(oldcontext);
2301
2302         /*
2303          * No more tuples for this PQ. Continue previous one.
2304          */
2305         if (TupIsNull(slot))
2306         {
2307                 evalPlanQual *oldepq;
2308
2309                 /* stop execution */
2310                 EvalPlanQualStop(epq);
2311                 /* pop old PQ from the stack */
2312                 oldepq = epq->next;
2313                 if (oldepq == NULL)
2314                 {
2315                         /* this is the first (oldest) PQ - mark as free */
2316                         epq->rti = 0;
2317                         estate->es_useEvalPlan = false;
2318                         /* and continue Query execution */
2319                         return NULL;
2320                 }
2321                 Assert(oldepq->rti != 0);
2322                 /* push current PQ to freePQ stack */
2323                 oldepq->free = epq;
2324                 epq = oldepq;
2325                 estate->es_evalPlanQual = epq;
2326                 goto lpqnext;
2327         }
2328
2329         return slot;
2330 }
2331
2332 static void
2333 EndEvalPlanQual(EState *estate)
2334 {
2335         evalPlanQual *epq = estate->es_evalPlanQual;
2336
2337         if (epq->rti == 0)                      /* plans already shutdowned */
2338         {
2339                 Assert(epq->next == NULL);
2340                 return;
2341         }
2342
2343         for (;;)
2344         {
2345                 evalPlanQual *oldepq;
2346
2347                 /* stop execution */
2348                 EvalPlanQualStop(epq);
2349                 /* pop old PQ from the stack */
2350                 oldepq = epq->next;
2351                 if (oldepq == NULL)
2352                 {
2353                         /* this is the first (oldest) PQ - mark as free */
2354                         epq->rti = 0;
2355                         estate->es_useEvalPlan = false;
2356                         break;
2357                 }
2358                 Assert(oldepq->rti != 0);
2359                 /* push current PQ to freePQ stack */
2360                 oldepq->free = epq;
2361                 epq = oldepq;
2362                 estate->es_evalPlanQual = epq;
2363         }
2364 }
2365
2366 /*
2367  * Start execution of one level of PlanQual.
2368  *
2369  * This is a cut-down version of ExecutorStart(): we copy some state from
2370  * the top-level estate rather than initializing it fresh.
2371  */
2372 static void
2373 EvalPlanQualStart(evalPlanQual *epq, EState *estate, evalPlanQual *priorepq)
2374 {
2375         EState     *epqstate;
2376         int                     rtsize;
2377         MemoryContext oldcontext;
2378         ListCell   *l;
2379
2380         rtsize = list_length(estate->es_range_table);
2381
2382         epq->estate = epqstate = CreateExecutorState();
2383
2384         oldcontext = MemoryContextSwitchTo(epqstate->es_query_cxt);
2385
2386         /*
2387          * The epqstates share the top query's copy of unchanging state such as
2388          * the snapshot, rangetable, result-rel info, and external Param info.
2389          * They need their own copies of local state, including a tuple table,
2390          * es_param_exec_vals, etc.
2391          */
2392         epqstate->es_direction = ForwardScanDirection;
2393         epqstate->es_snapshot = estate->es_snapshot;
2394         epqstate->es_crosscheck_snapshot = estate->es_crosscheck_snapshot;
2395         epqstate->es_range_table = estate->es_range_table;
2396         epqstate->es_output_cid = estate->es_output_cid;
2397         epqstate->es_result_relations = estate->es_result_relations;
2398         epqstate->es_num_result_relations = estate->es_num_result_relations;
2399         epqstate->es_result_relation_info = estate->es_result_relation_info;
2400         epqstate->es_junkFilter = estate->es_junkFilter;
2401         /* es_trig_target_relations must NOT be copied */
2402         epqstate->es_into_relation_descriptor = estate->es_into_relation_descriptor;
2403         epqstate->es_into_relation_use_wal = estate->es_into_relation_use_wal;
2404         epqstate->es_param_list_info = estate->es_param_list_info;
2405         if (estate->es_plannedstmt->nParamExec > 0)
2406                 epqstate->es_param_exec_vals = (ParamExecData *)
2407                         palloc0(estate->es_plannedstmt->nParamExec * sizeof(ParamExecData));
2408         epqstate->es_rowMarks = estate->es_rowMarks;
2409         epqstate->es_instrument = estate->es_instrument;
2410         epqstate->es_select_into = estate->es_select_into;
2411         epqstate->es_into_oids = estate->es_into_oids;
2412         epqstate->es_plannedstmt = estate->es_plannedstmt;
2413
2414         /*
2415          * Each epqstate must have its own es_evTupleNull state, but all the stack
2416          * entries share es_evTuple state.      This allows sub-rechecks to inherit
2417          * the value being examined by an outer recheck.
2418          */
2419         epqstate->es_evTupleNull = (bool *) palloc0(rtsize * sizeof(bool));
2420         if (priorepq == NULL)
2421                 /* first PQ stack entry */
2422                 epqstate->es_evTuple = (HeapTuple *)
2423                         palloc0(rtsize * sizeof(HeapTuple));
2424         else
2425                 /* later stack entries share the same storage */
2426                 epqstate->es_evTuple = priorepq->estate->es_evTuple;
2427
2428         /*
2429          * Create sub-tuple-table; we needn't redo the CountSlots work though.
2430          */
2431         epqstate->es_tupleTable =
2432                 ExecCreateTupleTable(estate->es_tupleTable->size);
2433
2434         /*
2435          * Initialize private state information for each SubPlan.  We must do this
2436          * before running ExecInitNode on the main query tree, since
2437          * ExecInitSubPlan expects to be able to find these entries.
2438          */
2439         Assert(epqstate->es_subplanstates == NIL);
2440         foreach(l, estate->es_plannedstmt->subplans)
2441         {
2442                 Plan       *subplan = (Plan *) lfirst(l);
2443                 PlanState  *subplanstate;
2444
2445                 subplanstate = ExecInitNode(subplan, epqstate, 0);
2446
2447                 epqstate->es_subplanstates = lappend(epqstate->es_subplanstates,
2448                                                                                          subplanstate);
2449         }
2450
2451         /*
2452          * Initialize the private state information for all the nodes in the query
2453          * tree.  This opens files, allocates storage and leaves us ready to start
2454          * processing tuples.
2455          */
2456         epq->planstate = ExecInitNode(estate->es_plannedstmt->planTree, epqstate, 0);
2457
2458         MemoryContextSwitchTo(oldcontext);
2459 }
2460
2461 /*
2462  * End execution of one level of PlanQual.
2463  *
2464  * This is a cut-down version of ExecutorEnd(); basically we want to do most
2465  * of the normal cleanup, but *not* close result relations (which we are
2466  * just sharing from the outer query).  We do, however, have to close any
2467  * trigger target relations that got opened, since those are not shared.
2468  */
2469 static void
2470 EvalPlanQualStop(evalPlanQual *epq)
2471 {
2472         EState     *epqstate = epq->estate;
2473         MemoryContext oldcontext;
2474         ListCell   *l;
2475
2476         oldcontext = MemoryContextSwitchTo(epqstate->es_query_cxt);
2477
2478         ExecEndNode(epq->planstate);
2479
2480         foreach(l, epqstate->es_subplanstates)
2481         {
2482                 PlanState  *subplanstate = (PlanState *) lfirst(l);
2483
2484                 ExecEndNode(subplanstate);
2485         }
2486
2487         ExecDropTupleTable(epqstate->es_tupleTable, true);
2488         epqstate->es_tupleTable = NULL;
2489
2490         if (epqstate->es_evTuple[epq->rti - 1] != NULL)
2491         {
2492                 heap_freetuple(epqstate->es_evTuple[epq->rti - 1]);
2493                 epqstate->es_evTuple[epq->rti - 1] = NULL;
2494         }
2495
2496         foreach(l, epqstate->es_trig_target_relations)
2497         {
2498                 ResultRelInfo *resultRelInfo = (ResultRelInfo *) lfirst(l);
2499
2500                 /* Close indices and then the relation itself */
2501                 ExecCloseIndices(resultRelInfo);
2502                 heap_close(resultRelInfo->ri_RelationDesc, NoLock);
2503         }
2504
2505         MemoryContextSwitchTo(oldcontext);
2506
2507         FreeExecutorState(epqstate);
2508
2509         epq->estate = NULL;
2510         epq->planstate = NULL;
2511 }
2512
2513 /*
2514  * ExecGetActivePlanTree --- get the active PlanState tree from a QueryDesc
2515  *
2516  * Ordinarily this is just the one mentioned in the QueryDesc, but if we
2517  * are looking at a row returned by the EvalPlanQual machinery, we need
2518  * to look at the subsidiary state instead.
2519  */
2520 PlanState *
2521 ExecGetActivePlanTree(QueryDesc *queryDesc)
2522 {
2523         EState     *estate = queryDesc->estate;
2524
2525         if (estate && estate->es_useEvalPlan && estate->es_evalPlanQual != NULL)
2526                 return estate->es_evalPlanQual->planstate;
2527         else
2528                 return queryDesc->planstate;
2529 }
2530
2531
2532 /*
2533  * Support for SELECT INTO (a/k/a CREATE TABLE AS)
2534  *
2535  * We implement SELECT INTO by diverting SELECT's normal output with
2536  * a specialized DestReceiver type.
2537  *
2538  * TODO: remove some of the INTO-specific cruft from EState, and keep
2539  * it in the DestReceiver instead.
2540  */
2541
2542 typedef struct
2543 {
2544         DestReceiver pub;                       /* publicly-known function pointers */
2545         EState     *estate;                     /* EState we are working with */
2546 } DR_intorel;
2547
2548 /*
2549  * OpenIntoRel --- actually create the SELECT INTO target relation
2550  *
2551  * This also replaces QueryDesc->dest with the special DestReceiver for
2552  * SELECT INTO.  We assume that the correct result tuple type has already
2553  * been placed in queryDesc->tupDesc.
2554  */
2555 static void
2556 OpenIntoRel(QueryDesc *queryDesc)
2557 {
2558         IntoClause *into = queryDesc->plannedstmt->intoClause;
2559         EState     *estate = queryDesc->estate;
2560         Relation        intoRelationDesc;
2561         char       *intoName;
2562         Oid                     namespaceId;
2563         Oid                     tablespaceId;
2564         Datum           reloptions;
2565         AclResult       aclresult;
2566         Oid                     intoRelationId;
2567         TupleDesc       tupdesc;
2568         DR_intorel *myState;
2569
2570         Assert(into);
2571
2572         /*
2573          * Check consistency of arguments
2574          */
2575         if (into->onCommit != ONCOMMIT_NOOP && !into->rel->istemp)
2576                 ereport(ERROR,
2577                                 (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
2578                                  errmsg("ON COMMIT can only be used on temporary tables")));
2579
2580         /*
2581          * Find namespace to create in, check its permissions
2582          */
2583         intoName = into->rel->relname;
2584         namespaceId = RangeVarGetCreationNamespace(into->rel);
2585
2586         aclresult = pg_namespace_aclcheck(namespaceId, GetUserId(),
2587                                                                           ACL_CREATE);
2588         if (aclresult != ACLCHECK_OK)
2589                 aclcheck_error(aclresult, ACL_KIND_NAMESPACE,
2590                                            get_namespace_name(namespaceId));
2591
2592         /*
2593          * Select tablespace to use.  If not specified, use default tablespace
2594          * (which may in turn default to database's default).
2595          */
2596         if (into->tableSpaceName)
2597         {
2598                 tablespaceId = get_tablespace_oid(into->tableSpaceName);
2599                 if (!OidIsValid(tablespaceId))
2600                         ereport(ERROR,
2601                                         (errcode(ERRCODE_UNDEFINED_OBJECT),
2602                                          errmsg("tablespace \"%s\" does not exist",
2603                                                         into->tableSpaceName)));
2604         }
2605         else
2606         {
2607                 tablespaceId = GetDefaultTablespace(into->rel->istemp);
2608                 /* note InvalidOid is OK in this case */
2609         }
2610
2611         /* Check permissions except when using the database's default space */
2612         if (OidIsValid(tablespaceId) && tablespaceId != MyDatabaseTableSpace)
2613         {
2614                 AclResult       aclresult;
2615
2616                 aclresult = pg_tablespace_aclcheck(tablespaceId, GetUserId(),
2617                                                                                    ACL_CREATE);
2618
2619                 if (aclresult != ACLCHECK_OK)
2620                         aclcheck_error(aclresult, ACL_KIND_TABLESPACE,
2621                                                    get_tablespace_name(tablespaceId));
2622         }
2623
2624         /* Parse and validate any reloptions */
2625         reloptions = transformRelOptions((Datum) 0,
2626                                                                          into->options,
2627                                                                          true,
2628                                                                          false);
2629         (void) heap_reloptions(RELKIND_RELATION, reloptions, true);
2630
2631         /* Copy the tupdesc because heap_create_with_catalog modifies it */
2632         tupdesc = CreateTupleDescCopy(queryDesc->tupDesc);
2633
2634         /* Now we can actually create the new relation */
2635         intoRelationId = heap_create_with_catalog(intoName,
2636                                                                                           namespaceId,
2637                                                                                           tablespaceId,
2638                                                                                           InvalidOid,
2639                                                                                           GetUserId(),
2640                                                                                           tupdesc,
2641                                                                                           NIL,
2642                                                                                           RELKIND_RELATION,
2643                                                                                           false,
2644                                                                                           true,
2645                                                                                           0,
2646                                                                                           into->onCommit,
2647                                                                                           reloptions,
2648                                                                                           allowSystemTableMods);
2649
2650         FreeTupleDesc(tupdesc);
2651
2652         /*
2653          * Advance command counter so that the newly-created relation's catalog
2654          * tuples will be visible to heap_open.
2655          */
2656         CommandCounterIncrement();
2657
2658         /*
2659          * If necessary, create a TOAST table for the INTO relation. Note that
2660          * AlterTableCreateToastTable ends with CommandCounterIncrement(), so that
2661          * the TOAST table will be visible for insertion.
2662          */
2663         AlterTableCreateToastTable(intoRelationId);
2664
2665         /*
2666          * And open the constructed table for writing.
2667          */
2668         intoRelationDesc = heap_open(intoRelationId, AccessExclusiveLock);
2669
2670         /* use_wal off requires rd_targblock be initially invalid */
2671         Assert(intoRelationDesc->rd_targblock == InvalidBlockNumber);
2672
2673         /*
2674          * We can skip WAL-logging the insertions, unless PITR is in use.
2675          */
2676         estate->es_into_relation_use_wal = XLogArchivingActive();
2677         estate->es_into_relation_descriptor = intoRelationDesc;
2678
2679         /*
2680          * Now replace the query's DestReceiver with one for SELECT INTO
2681          */
2682         queryDesc->dest = CreateDestReceiver(DestIntoRel, NULL);
2683         myState = (DR_intorel *) queryDesc->dest;
2684         Assert(myState->pub.mydest == DestIntoRel);
2685         myState->estate = estate;
2686 }
2687
2688 /*
2689  * CloseIntoRel --- clean up SELECT INTO at ExecutorEnd time
2690  */
2691 static void
2692 CloseIntoRel(QueryDesc *queryDesc)
2693 {
2694         EState     *estate = queryDesc->estate;
2695
2696         /* OpenIntoRel might never have gotten called */
2697         if (estate->es_into_relation_descriptor)
2698         {
2699                 /* If we skipped using WAL, must heap_sync before commit */
2700                 if (!estate->es_into_relation_use_wal)
2701                         heap_sync(estate->es_into_relation_descriptor);
2702
2703                 /* close rel, but keep lock until commit */
2704                 heap_close(estate->es_into_relation_descriptor, NoLock);
2705
2706                 estate->es_into_relation_descriptor = NULL;
2707         }
2708 }
2709
2710 /*
2711  * CreateIntoRelDestReceiver -- create a suitable DestReceiver object
2712  *
2713  * Since CreateDestReceiver doesn't accept the parameters we'd need,
2714  * we just leave the private fields empty here.  OpenIntoRel will
2715  * fill them in.
2716  */
2717 DestReceiver *
2718 CreateIntoRelDestReceiver(void)
2719 {
2720         DR_intorel *self = (DR_intorel *) palloc(sizeof(DR_intorel));
2721
2722         self->pub.receiveSlot = intorel_receive;
2723         self->pub.rStartup = intorel_startup;
2724         self->pub.rShutdown = intorel_shutdown;
2725         self->pub.rDestroy = intorel_destroy;
2726         self->pub.mydest = DestIntoRel;
2727
2728         self->estate = NULL;
2729
2730         return (DestReceiver *) self;
2731 }
2732
2733 /*
2734  * intorel_startup --- executor startup
2735  */
2736 static void
2737 intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo)
2738 {
2739         /* no-op */
2740 }
2741
2742 /*
2743  * intorel_receive --- receive one tuple
2744  */
2745 static void
2746 intorel_receive(TupleTableSlot *slot, DestReceiver *self)
2747 {
2748         DR_intorel *myState = (DR_intorel *) self;
2749         EState     *estate = myState->estate;
2750         HeapTuple       tuple;
2751
2752         tuple = ExecCopySlotTuple(slot);
2753
2754         heap_insert(estate->es_into_relation_descriptor,
2755                                 tuple,
2756                                 estate->es_output_cid,
2757                                 estate->es_into_relation_use_wal,
2758                                 false);                 /* never any point in using FSM */
2759
2760         /* We know this is a newly created relation, so there are no indexes */
2761
2762         heap_freetuple(tuple);
2763
2764         IncrAppended();
2765 }
2766
2767 /*
2768  * intorel_shutdown --- executor end
2769  */
2770 static void
2771 intorel_shutdown(DestReceiver *self)
2772 {
2773         /* no-op */
2774 }
2775
2776 /*
2777  * intorel_destroy --- release DestReceiver object
2778  */
2779 static void
2780 intorel_destroy(DestReceiver *self)
2781 {
2782         pfree(self);
2783 }