granicus.if.org Git - postgresql/blob - src/backend/executor/execMain.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * execMain.c
   4  *        top level executor interface routines
   5  *
   6  * INTERFACE ROUTINES
   7  *      ExecutorStart()
   8  *      ExecutorRun()
   9  *      ExecutorEnd()
  10  *
  11  *      The old ExecutorMain() has been replaced by ExecutorStart(),
  12  *      ExecutorRun() and ExecutorEnd()
  13  *
  14  *      These three procedures are the external interfaces to the executor.
  15  *      In each case, the query descriptor is required as an argument.
  16  *
  17  *      ExecutorStart() must be called at the beginning of execution of any
  18  *      query plan and ExecutorEnd() should always be called at the end of
  19  *      execution of a plan.
  20  *
  21  *      ExecutorRun accepts direction and count arguments that specify whether
  22  *      the plan is to be executed forwards, backwards, and for how many tuples.
  23  *
  24  * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
  25  * Portions Copyright (c) 1994, Regents of the University of California
  26  *
  27  *
  28  * IDENTIFICATION
  29  *        $PostgreSQL: pgsql/src/backend/executor/execMain.c,v 1.275 2006/07/13 16:49:14 momjian Exp $
  30  *
  31  *-------------------------------------------------------------------------
  32  */
  33 #include "postgres.h"
  34
  35 #include "access/heapam.h"
  36 #include "access/reloptions.h"
  37 #include "access/transam.h"
  38 #include "access/xact.h"
  39 #include "access/xlog.h"
  40 #include "catalog/heap.h"
  41 #include "catalog/namespace.h"
  42 #include "commands/tablecmds.h"
  43 #include "commands/tablespace.h"
  44 #include "commands/trigger.h"
  45 #include "executor/execdebug.h"
  46 #include "executor/execdefs.h"
  47 #include "executor/instrument.h"
  48 #include "miscadmin.h"
  49 #include "optimizer/clauses.h"
  50 #include "optimizer/var.h"
  51 #include "parser/parse_clause.h"
  52 #include "parser/parsetree.h"
  53 #include "storage/smgr.h"
  54 #include "utils/acl.h"
  55 #include "utils/guc.h"
  56 #include "utils/lsyscache.h"
  57 #include "utils/memutils.h"
  58
  59
  60 typedef struct evalPlanQual
  61 {
  62         Index           rti;
  63         EState     *estate;
  64         PlanState  *planstate;
  65         struct evalPlanQual *next;      /* stack of active PlanQual plans */
  66         struct evalPlanQual *free;      /* list of free PlanQual plans */
  67 } evalPlanQual;
  68
  69 /* decls for local routines only used within this module */
  70 static void InitPlan(QueryDesc *queryDesc, int eflags);
  71 static void initResultRelInfo(ResultRelInfo *resultRelInfo,
  72                                   Index resultRelationIndex,
  73                                   List *rangeTable,
  74                                   CmdType operation,
  75                                   bool doInstrument);
  76 static TupleTableSlot *ExecutePlan(EState *estate, PlanState *planstate,
  77                         CmdType operation,
  78                         long numberTuples,
  79                         ScanDirection direction,
  80                         DestReceiver *dest);
  81 static void ExecSelect(TupleTableSlot *slot,
  82                    DestReceiver *dest,
  83                    EState *estate);
  84 static void ExecInsert(TupleTableSlot *slot, ItemPointer tupleid,
  85                    EState *estate);
  86 static void ExecDelete(TupleTableSlot *slot, ItemPointer tupleid,
  87                    EState *estate);
  88 static void ExecUpdate(TupleTableSlot *slot, ItemPointer tupleid,
  89                    EState *estate);
  90 static TupleTableSlot *EvalPlanQualNext(EState *estate);
  91 static void EndEvalPlanQual(EState *estate);
  92 static void ExecCheckRTEPerms(RangeTblEntry *rte);
  93 static void ExecCheckXactReadOnly(Query *parsetree);
  94 static void EvalPlanQualStart(evalPlanQual *epq, EState *estate,
  95                                   evalPlanQual *priorepq);
  96 static void EvalPlanQualStop(evalPlanQual *epq);
  97
  98 /* end of local decls */
  99
 100
 101 /* ----------------------------------------------------------------
 102  *              ExecutorStart
 103  *
 104  *              This routine must be called at the beginning of any execution of any
 105  *              query plan
 106  *
 107  * Takes a QueryDesc previously created by CreateQueryDesc (it's not real
 108  * clear why we bother to separate the two functions, but...).  The tupDesc
 109  * field of the QueryDesc is filled in to describe the tuples that will be
 110  * returned, and the internal fields (estate and planstate) are set up.
 111  *
 112  * eflags contains flag bits as described in executor.h.
 113  *
 114  * NB: the CurrentMemoryContext when this is called will become the parent
 115  * of the per-query context used for this Executor invocation.
 116  * ----------------------------------------------------------------
 117  */
 118 void
 119 ExecutorStart(QueryDesc *queryDesc, int eflags)
 120 {
 121         EState     *estate;
 122         MemoryContext oldcontext;
 123
 124         /* sanity checks: queryDesc must not be started already */
 125         Assert(queryDesc != NULL);
 126         Assert(queryDesc->estate == NULL);
 127
 128         /*
 129          * If the transaction is read-only, we need to check if any writes are
 130          * planned to non-temporary tables.  EXPLAIN is considered read-only.
 131          */
 132         if (XactReadOnly && !(eflags & EXEC_FLAG_EXPLAIN_ONLY))
 133                 ExecCheckXactReadOnly(queryDesc->parsetree);
 134
 135         /*
 136          * Build EState, switch into per-query memory context for startup.
 137          */
 138         estate = CreateExecutorState();
 139         queryDesc->estate = estate;
 140
 141         oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
 142
 143         /*
 144          * Fill in parameters, if any, from queryDesc
 145          */
 146         estate->es_param_list_info = queryDesc->params;
 147
 148         if (queryDesc->plantree->nParamExec > 0)
 149                 estate->es_param_exec_vals = (ParamExecData *)
 150                         palloc0(queryDesc->plantree->nParamExec * sizeof(ParamExecData));
 151
 152         /*
 153          * Copy other important information into the EState
 154          */
 155         estate->es_snapshot = queryDesc->snapshot;
 156         estate->es_crosscheck_snapshot = queryDesc->crosscheck_snapshot;
 157         estate->es_instrument = queryDesc->doInstrument;
 158
 159         /*
 160          * Initialize the plan state tree
 161          */
 162         InitPlan(queryDesc, eflags);
 163
 164         MemoryContextSwitchTo(oldcontext);
 165 }
 166
 167 /* ----------------------------------------------------------------
 168  *              ExecutorRun
 169  *
 170  *              This is the main routine of the executor module. It accepts
 171  *              the query descriptor from the traffic cop and executes the
 172  *              query plan.
 173  *
 174  *              ExecutorStart must have been called already.
 175  *
 176  *              If direction is NoMovementScanDirection then nothing is done
 177  *              except to start up/shut down the destination.  Otherwise,
 178  *              we retrieve up to 'count' tuples in the specified direction.
 179  *
 180  *              Note: count = 0 is interpreted as no portal limit, i.e., run to
 181  *              completion.
 182  *
 183  * ----------------------------------------------------------------
 184  */
 185 TupleTableSlot *
 186 ExecutorRun(QueryDesc *queryDesc,
 187                         ScanDirection direction, long count)
 188 {
 189         EState     *estate;
 190         CmdType         operation;
 191         DestReceiver *dest;
 192         TupleTableSlot *result;
 193         MemoryContext oldcontext;
 194
 195         /* sanity checks */
 196         Assert(queryDesc != NULL);
 197
 198         estate = queryDesc->estate;
 199
 200         Assert(estate != NULL);
 201
 202         /*
 203          * Switch into per-query memory context
 204          */
 205         oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
 206
 207         /*
 208          * extract information from the query descriptor and the query feature.
 209          */
 210         operation = queryDesc->operation;
 211         dest = queryDesc->dest;
 212
 213         /*
 214          * startup tuple receiver
 215          */
 216         estate->es_processed = 0;
 217         estate->es_lastoid = InvalidOid;
 218
 219         (*dest->rStartup) (dest, operation, queryDesc->tupDesc);
 220
 221         /*
 222          * run plan
 223          */
 224         if (ScanDirectionIsNoMovement(direction))
 225                 result = NULL;
 226         else
 227                 result = ExecutePlan(estate,
 228                                                          queryDesc->planstate,
 229                                                          operation,
 230                                                          count,
 231                                                          direction,
 232                                                          dest);
 233
 234         /*
 235          * shutdown receiver
 236          */
 237         (*dest->rShutdown) (dest);
 238
 239         MemoryContextSwitchTo(oldcontext);
 240
 241         return result;
 242 }
 243
 244 /* ----------------------------------------------------------------
 245  *              ExecutorEnd
 246  *
 247  *              This routine must be called at the end of execution of any
 248  *              query plan
 249  * ----------------------------------------------------------------
 250  */
 251 void
 252 ExecutorEnd(QueryDesc *queryDesc)
 253 {
 254         EState     *estate;
 255         MemoryContext oldcontext;
 256
 257         /* sanity checks */
 258         Assert(queryDesc != NULL);
 259
 260         estate = queryDesc->estate;
 261
 262         Assert(estate != NULL);
 263
 264         /*
 265          * Switch into per-query memory context to run ExecEndPlan
 266          */
 267         oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
 268
 269         ExecEndPlan(queryDesc->planstate, estate);
 270
 271         /*
 272          * Must switch out of context before destroying it
 273          */
 274         MemoryContextSwitchTo(oldcontext);
 275
 276         /*
 277          * Release EState and per-query memory context.  This should release
 278          * everything the executor has allocated.
 279          */
 280         FreeExecutorState(estate);
 281
 282         /* Reset queryDesc fields that no longer point to anything */
 283         queryDesc->tupDesc = NULL;
 284         queryDesc->estate = NULL;
 285         queryDesc->planstate = NULL;
 286 }
 287
 288 /* ----------------------------------------------------------------
 289  *              ExecutorRewind
 290  *
 291  *              This routine may be called on an open queryDesc to rewind it
 292  *              to the start.
 293  * ----------------------------------------------------------------
 294  */
 295 void
 296 ExecutorRewind(QueryDesc *queryDesc)
 297 {
 298         EState     *estate;
 299         MemoryContext oldcontext;
 300
 301         /* sanity checks */
 302         Assert(queryDesc != NULL);
 303
 304         estate = queryDesc->estate;
 305
 306         Assert(estate != NULL);
 307
 308         /* It's probably not sensible to rescan updating queries */
 309         Assert(queryDesc->operation == CMD_SELECT);
 310
 311         /*
 312          * Switch into per-query memory context
 313          */
 314         oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
 315
 316         /*
 317          * rescan plan
 318          */
 319         ExecReScan(queryDesc->planstate, NULL);
 320
 321         MemoryContextSwitchTo(oldcontext);
 322 }
 323
 324
 325 /*
 326  * ExecCheckRTPerms
 327  *              Check access permissions for all relations listed in a range table.
 328  */
 329 void
 330 ExecCheckRTPerms(List *rangeTable)
 331 {
 332         ListCell   *l;
 333
 334         foreach(l, rangeTable)
 335         {
 336                 RangeTblEntry *rte = lfirst(l);
 337
 338                 ExecCheckRTEPerms(rte);
 339         }
 340 }
 341
 342 /*
 343  * ExecCheckRTEPerms
 344  *              Check access permissions for a single RTE.
 345  */
 346 static void
 347 ExecCheckRTEPerms(RangeTblEntry *rte)
 348 {
 349         AclMode         requiredPerms;
 350         Oid                     relOid;
 351         Oid                     userid;
 352
 353         /*
 354          * Only plain-relation RTEs need to be checked here.  Subquery RTEs are
 355          * checked by ExecInitSubqueryScan if the subquery is still a separate
 356          * subquery --- if it's been pulled up into our query level then the RTEs
 357          * are in our rangetable and will be checked here. Function RTEs are
 358          * checked by init_fcache when the function is prepared for execution.
 359          * Join and special RTEs need no checks.
 360          */
 361         if (rte->rtekind != RTE_RELATION)
 362                 return;
 363
 364         /*
 365          * No work if requiredPerms is empty.
 366          */
 367         requiredPerms = rte->requiredPerms;
 368         if (requiredPerms == 0)
 369                 return;
 370
 371         relOid = rte->relid;
 372
 373         /*
 374          * userid to check as: current user unless we have a setuid indication.
 375          *
 376          * Note: GetUserId() is presently fast enough that there's no harm in
 377          * calling it separately for each RTE.  If that stops being true, we could
 378          * call it once in ExecCheckRTPerms and pass the userid down from there.
 379          * But for now, no need for the extra clutter.
 380          */
 381         userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
 382
 383         /*
 384          * We must have *all* the requiredPerms bits, so use aclmask not aclcheck.
 385          */
 386         if (pg_class_aclmask(relOid, userid, requiredPerms, ACLMASK_ALL)
 387                 != requiredPerms)
 388                 aclcheck_error(ACLCHECK_NO_PRIV, ACL_KIND_CLASS,
 389                                            get_rel_name(relOid));
 390 }
 391
 392 /*
 393  * Check that the query does not imply any writes to non-temp tables.
 394  */
 395 static void
 396 ExecCheckXactReadOnly(Query *parsetree)
 397 {
 398         ListCell   *l;
 399
 400         /*
 401          * CREATE TABLE AS or SELECT INTO?
 402          *
 403          * XXX should we allow this if the destination is temp?
 404          */
 405         if (parsetree->into != NULL)
 406                 goto fail;
 407
 408         /* Fail if write permissions are requested on any non-temp table */
 409         foreach(l, parsetree->rtable)
 410         {
 411                 RangeTblEntry *rte = lfirst(l);
 412
 413                 if (rte->rtekind == RTE_SUBQUERY)
 414                 {
 415                         ExecCheckXactReadOnly(rte->subquery);
 416                         continue;
 417                 }
 418
 419                 if (rte->rtekind != RTE_RELATION)
 420                         continue;
 421
 422                 if ((rte->requiredPerms & (~ACL_SELECT)) == 0)
 423                         continue;
 424
 425                 if (isTempNamespace(get_rel_namespace(rte->relid)))
 426                         continue;
 427
 428                 goto fail;
 429         }
 430
 431         return;
 432
 433 fail:
 434         ereport(ERROR,
 435                         (errcode(ERRCODE_READ_ONLY_SQL_TRANSACTION),
 436                          errmsg("transaction is read-only")));
 437 }
 438
 439
 440 /* ----------------------------------------------------------------
 441  *              InitPlan
 442  *
 443  *              Initializes the query plan: open files, allocate storage
 444  *              and start up the rule manager
 445  * ----------------------------------------------------------------
 446  */
 447 static void
 448 InitPlan(QueryDesc *queryDesc, int eflags)
 449 {
 450         CmdType         operation = queryDesc->operation;
 451         Query      *parseTree = queryDesc->parsetree;
 452         Plan       *plan = queryDesc->plantree;
 453         EState     *estate = queryDesc->estate;
 454         PlanState  *planstate;
 455         List       *rangeTable;
 456         Relation        intoRelationDesc;
 457         bool            do_select_into;
 458         TupleDesc       tupType;
 459         ListCell   *l;
 460
 461         /*
 462          * Do permissions checks.  It's sufficient to examine the query's top
 463          * rangetable here --- subplan RTEs will be checked during
 464          * ExecInitSubPlan().
 465          */
 466         ExecCheckRTPerms(parseTree->rtable);
 467
 468         /*
 469          * get information from query descriptor
 470          */
 471         rangeTable = parseTree->rtable;
 472
 473         /*
 474          * initialize the node's execution state
 475          */
 476         estate->es_range_table = rangeTable;
 477
 478         /*
 479          * if there is a result relation, initialize result relation stuff
 480          */
 481         if (parseTree->resultRelation != 0 && operation != CMD_SELECT)
 482         {
 483                 List       *resultRelations = parseTree->resultRelations;
 484                 int                     numResultRelations;
 485                 ResultRelInfo *resultRelInfos;
 486
 487                 if (resultRelations != NIL)
 488                 {
 489                         /*
 490                          * Multiple result relations (due to inheritance)
 491                          * parseTree->resultRelations identifies them all
 492                          */
 493                         ResultRelInfo *resultRelInfo;
 494
 495                         numResultRelations = list_length(resultRelations);
 496                         resultRelInfos = (ResultRelInfo *)
 497                                 palloc(numResultRelations * sizeof(ResultRelInfo));
 498                         resultRelInfo = resultRelInfos;
 499                         foreach(l, resultRelations)
 500                         {
 501                                 initResultRelInfo(resultRelInfo,
 502                                                                   lfirst_int(l),
 503                                                                   rangeTable,
 504                                                                   operation,
 505                                                                   estate->es_instrument);
 506                                 resultRelInfo++;
 507                         }
 508                 }
 509                 else
 510                 {
 511                         /*
 512                          * Single result relation identified by parseTree->resultRelation
 513                          */
 514                         numResultRelations = 1;
 515                         resultRelInfos = (ResultRelInfo *) palloc(sizeof(ResultRelInfo));
 516                         initResultRelInfo(resultRelInfos,
 517                                                           parseTree->resultRelation,
 518                                                           rangeTable,
 519                                                           operation,
 520                                                           estate->es_instrument);
 521                 }
 522
 523                 estate->es_result_relations = resultRelInfos;
 524                 estate->es_num_result_relations = numResultRelations;
 525                 /* Initialize to first or only result rel */
 526                 estate->es_result_relation_info = resultRelInfos;
 527         }
 528         else
 529         {
 530                 /*
 531                  * if no result relation, then set state appropriately
 532                  */
 533                 estate->es_result_relations = NULL;
 534                 estate->es_num_result_relations = 0;
 535                 estate->es_result_relation_info = NULL;
 536         }
 537
 538         /*
 539          * Detect whether we're doing SELECT INTO.  If so, set the es_into_oids
 540          * flag appropriately so that the plan tree will be initialized with the
 541          * correct tuple descriptors.
 542          */
 543         do_select_into = false;
 544
 545         if (operation == CMD_SELECT && parseTree->into != NULL)
 546         {
 547                 do_select_into = true;
 548                 estate->es_select_into = true;
 549                 estate->es_into_oids = interpretOidsOption(parseTree->intoOptions);
 550         }
 551
 552         /*
 553          * Have to lock relations selected FOR UPDATE/FOR SHARE
 554          */
 555         estate->es_rowMarks = NIL;
 556         foreach(l, parseTree->rowMarks)
 557         {
 558                 RowMarkClause *rc = (RowMarkClause *) lfirst(l);
 559                 Oid                     relid = getrelid(rc->rti, rangeTable);
 560                 Relation        relation;
 561                 ExecRowMark *erm;
 562
 563                 relation = heap_open(relid, RowShareLock);
 564                 erm = (ExecRowMark *) palloc(sizeof(ExecRowMark));
 565                 erm->relation = relation;
 566                 erm->rti = rc->rti;
 567                 erm->forUpdate = rc->forUpdate;
 568                 erm->noWait = rc->noWait;
 569                 snprintf(erm->resname, sizeof(erm->resname), "ctid%u", rc->rti);
 570                 estate->es_rowMarks = lappend(estate->es_rowMarks, erm);
 571         }
 572
 573         /*
 574          * initialize the executor "tuple" table.  We need slots for all the plan
 575          * nodes, plus possibly output slots for the junkfilter(s). At this point
 576          * we aren't sure if we need junkfilters, so just add slots for them
 577          * unconditionally.  Also, if it's not a SELECT, set up a slot for use for
 578          * trigger output tuples.
 579          */
 580         {
 581                 int                     nSlots = ExecCountSlotsNode(plan);
 582
 583                 if (parseTree->resultRelations != NIL)
 584                         nSlots += list_length(parseTree->resultRelations);
 585                 else
 586                         nSlots += 1;
 587                 if (operation != CMD_SELECT)
 588                         nSlots++;
 589
 590                 estate->es_tupleTable = ExecCreateTupleTable(nSlots);
 591
 592                 if (operation != CMD_SELECT)
 593                         estate->es_trig_tuple_slot =
 594                                 ExecAllocTableSlot(estate->es_tupleTable);
 595         }
 596
 597         /* mark EvalPlanQual not active */
 598         estate->es_topPlan = plan;
 599         estate->es_evalPlanQual = NULL;
 600         estate->es_evTupleNull = NULL;
 601         estate->es_evTuple = NULL;
 602         estate->es_useEvalPlan = false;
 603
 604         /*
 605          * initialize the private state information for all the nodes in the query
 606          * tree.  This opens files, allocates storage and leaves us ready to start
 607          * processing tuples.
 608          */
 609         planstate = ExecInitNode(plan, estate, eflags);
 610
 611         /*
 612          * Get the tuple descriptor describing the type of tuples to return. (this
 613          * is especially important if we are creating a relation with "SELECT
 614          * INTO")
 615          */
 616         tupType = ExecGetResultType(planstate);
 617
 618         /*
 619          * Initialize the junk filter if needed.  SELECT and INSERT queries need a
 620          * filter if there are any junk attrs in the tlist.  INSERT and SELECT
 621          * INTO also need a filter if the plan may return raw disk tuples (else
 622          * heap_insert will be scribbling on the source relation!). UPDATE and
 623          * DELETE always need a filter, since there's always a junk 'ctid'
 624          * attribute present --- no need to look first.
 625          */
 626         {
 627                 bool            junk_filter_needed = false;
 628                 ListCell   *tlist;
 629
 630                 switch (operation)
 631                 {
 632                         case CMD_SELECT:
 633                         case CMD_INSERT:
 634                                 foreach(tlist, plan->targetlist)
 635                                 {
 636                                         TargetEntry *tle = (TargetEntry *) lfirst(tlist);
 637
 638                                         if (tle->resjunk)
 639                                         {
 640                                                 junk_filter_needed = true;
 641                                                 break;
 642                                         }
 643                                 }
 644                                 if (!junk_filter_needed &&
 645                                         (operation == CMD_INSERT || do_select_into) &&
 646                                         ExecMayReturnRawTuples(planstate))
 647                                         junk_filter_needed = true;
 648                                 break;
 649                         case CMD_UPDATE:
 650                         case CMD_DELETE:
 651                                 junk_filter_needed = true;
 652                                 break;
 653                         default:
 654                                 break;
 655                 }
 656
 657                 if (junk_filter_needed)
 658                 {
 659                         /*
 660                          * If there are multiple result relations, each one needs its own
 661                          * junk filter.  Note this is only possible for UPDATE/DELETE, so
 662                          * we can't be fooled by some needing a filter and some not.
 663                          */
 664                         if (parseTree->resultRelations != NIL)
 665                         {
 666                                 PlanState **appendplans;
 667                                 int                     as_nplans;
 668                                 ResultRelInfo *resultRelInfo;
 669                                 int                     i;
 670
 671                                 /* Top plan had better be an Append here. */
 672                                 Assert(IsA(plan, Append));
 673                                 Assert(((Append *) plan)->isTarget);
 674                                 Assert(IsA(planstate, AppendState));
 675                                 appendplans = ((AppendState *) planstate)->appendplans;
 676                                 as_nplans = ((AppendState *) planstate)->as_nplans;
 677                                 Assert(as_nplans == estate->es_num_result_relations);
 678                                 resultRelInfo = estate->es_result_relations;
 679                                 for (i = 0; i < as_nplans; i++)
 680                                 {
 681                                         PlanState  *subplan = appendplans[i];
 682                                         JunkFilter *j;
 683
 684                                         j = ExecInitJunkFilter(subplan->plan->targetlist,
 685                                                         resultRelInfo->ri_RelationDesc->rd_att->tdhasoid,
 686                                                                   ExecAllocTableSlot(estate->es_tupleTable));
 687                                         resultRelInfo->ri_junkFilter = j;
 688                                         resultRelInfo++;
 689                                 }
 690
 691                                 /*
 692                                  * Set active junkfilter too; at this point ExecInitAppend has
 693                                  * already selected an active result relation...
 694                                  */
 695                                 estate->es_junkFilter =
 696                                         estate->es_result_relation_info->ri_junkFilter;
 697                         }
 698                         else
 699                         {
 700                                 /* Normal case with just one JunkFilter */
 701                                 JunkFilter *j;
 702
 703                                 j = ExecInitJunkFilter(planstate->plan->targetlist,
 704                                                                            tupType->tdhasoid,
 705                                                                   ExecAllocTableSlot(estate->es_tupleTable));
 706                                 estate->es_junkFilter = j;
 707                                 if (estate->es_result_relation_info)
 708                                         estate->es_result_relation_info->ri_junkFilter = j;
 709
 710                                 /* For SELECT, want to return the cleaned tuple type */
 711                                 if (operation == CMD_SELECT)
 712                                         tupType = j->jf_cleanTupType;
 713                         }
 714                 }
 715                 else
 716                         estate->es_junkFilter = NULL;
 717         }
 718
 719         /*
 720          * If doing SELECT INTO, initialize the "into" relation.  We must wait
 721          * till now so we have the "clean" result tuple type to create the new
 722          * table from.
 723          *
 724          * If EXPLAIN, skip creating the "into" relation.
 725          */
 726         intoRelationDesc = NULL;
 727
 728         if (do_select_into && !(eflags & EXEC_FLAG_EXPLAIN_ONLY))
 729         {
 730                 char       *intoName;
 731                 Oid                     namespaceId;
 732                 Oid                     tablespaceId;
 733                 Datum           reloptions;
 734                 AclResult       aclresult;
 735                 Oid                     intoRelationId;
 736                 TupleDesc       tupdesc;
 737
 738                 /*
 739                  * Check consistency of arguments
 740                  */
 741                 if (parseTree->intoOnCommit != ONCOMMIT_NOOP && !parseTree->into->istemp)
 742                         ereport(ERROR,
 743                                         (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
 744                                          errmsg("ON COMMIT can only be used on temporary tables")));
 745
 746                 /*
 747                  * find namespace to create in, check permissions
 748                  */
 749                 intoName = parseTree->into->relname;
 750                 namespaceId = RangeVarGetCreationNamespace(parseTree->into);
 751
 752                 aclresult = pg_namespace_aclcheck(namespaceId, GetUserId(),
 753                                                                                   ACL_CREATE);
 754                 if (aclresult != ACLCHECK_OK)
 755                         aclcheck_error(aclresult, ACL_KIND_NAMESPACE,
 756                                                    get_namespace_name(namespaceId));
 757
 758                 /*
 759                  * Select tablespace to use.  If not specified, use default_tablespace
 760                  * (which may in turn default to database's default).
 761                  */
 762                 if (parseTree->intoTableSpaceName)
 763                 {
 764                         tablespaceId = get_tablespace_oid(parseTree->intoTableSpaceName);
 765                         if (!OidIsValid(tablespaceId))
 766                                 ereport(ERROR,
 767                                                 (errcode(ERRCODE_UNDEFINED_OBJECT),
 768                                                  errmsg("tablespace \"%s\" does not exist",
 769                                                                 parseTree->intoTableSpaceName)));
 770                 } else
 771                 {
 772                         tablespaceId = GetDefaultTablespace();
 773                         /* note InvalidOid is OK in this case */
 774                 }
 775
 776                 /* Parse and validate any reloptions */
 777                 reloptions = transformRelOptions((Datum) 0,
 778                                                                                  parseTree->intoOptions,
 779                                                                                  true,
 780                                                                                  false);
 781                 (void) heap_reloptions(RELKIND_RELATION, reloptions, true);
 782
 783                 /* Check permissions except when using the database's default */
 784                 if (OidIsValid(tablespaceId))
 785                 {
 786                         AclResult       aclresult;
 787
 788                         aclresult = pg_tablespace_aclcheck(tablespaceId, GetUserId(),
 789                                                                                            ACL_CREATE);
 790
 791                         if (aclresult != ACLCHECK_OK)
 792                                 aclcheck_error(aclresult, ACL_KIND_TABLESPACE,
 793                                                            get_tablespace_name(tablespaceId));
 794                 }
 795
 796                 /*
 797                  * have to copy tupType to get rid of constraints
 798                  */
 799                 tupdesc = CreateTupleDescCopy(tupType);
 800
 801                 intoRelationId = heap_create_with_catalog(intoName,
 802                                                                                                   namespaceId,
 803                                                                                                   tablespaceId,
 804                                                                                                   InvalidOid,
 805                                                                                                   GetUserId(),
 806                                                                                                   tupdesc,
 807                                                                                                   RELKIND_RELATION,
 808                                                                                                   false,
 809                                                                                                   true,
 810                                                                                                   0,
 811                                                                                                   parseTree->intoOnCommit,
 812                                                                                                   reloptions,
 813                                                                                                   allowSystemTableMods);
 814
 815                 FreeTupleDesc(tupdesc);
 816
 817                 /*
 818                  * Advance command counter so that the newly-created relation's
 819                  * catalog tuples will be visible to heap_open.
 820                  */
 821                 CommandCounterIncrement();
 822
 823                 /*
 824                  * If necessary, create a TOAST table for the into relation. Note that
 825                  * AlterTableCreateToastTable ends with CommandCounterIncrement(), so
 826                  * that the TOAST table will be visible for insertion.
 827                  */
 828                 AlterTableCreateToastTable(intoRelationId, true);
 829
 830                 /*
 831                  * And open the constructed table for writing.
 832                  */
 833                 intoRelationDesc = heap_open(intoRelationId, AccessExclusiveLock);
 834
 835                 /* use_wal off requires rd_targblock be initially invalid */
 836                 Assert(intoRelationDesc->rd_targblock == InvalidBlockNumber);
 837
 838                 /*
 839                  * We can skip WAL-logging the insertions, unless PITR is in use.
 840                  *
 841                  * Note that for a non-temp INTO table, this is safe only because we
 842                  * know that the catalog changes above will have been WAL-logged, and
 843                  * so RecordTransactionCommit will think it needs to WAL-log the
 844                  * eventual transaction commit.  Else the commit might be lost, even
 845                  * though all the data is safely fsync'd ...
 846                  */
 847                 estate->es_into_relation_use_wal = XLogArchivingActive();
 848         }
 849
 850         estate->es_into_relation_descriptor = intoRelationDesc;
 851
 852         queryDesc->tupDesc = tupType;
 853         queryDesc->planstate = planstate;
 854 }
 855
 856 /*
 857  * Initialize ResultRelInfo data for one result relation
 858  */
 859 static void
 860 initResultRelInfo(ResultRelInfo *resultRelInfo,
 861                                   Index resultRelationIndex,
 862                                   List *rangeTable,
 863                                   CmdType operation,
 864                                   bool doInstrument)
 865 {
 866         Oid                     resultRelationOid;
 867         Relation        resultRelationDesc;
 868
 869         resultRelationOid = getrelid(resultRelationIndex, rangeTable);
 870         resultRelationDesc = heap_open(resultRelationOid, RowExclusiveLock);
 871
 872         switch (resultRelationDesc->rd_rel->relkind)
 873         {
 874                 case RELKIND_SEQUENCE:
 875                         ereport(ERROR,
 876                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
 877                                          errmsg("cannot change sequence \"%s\"",
 878                                                         RelationGetRelationName(resultRelationDesc))));
 879                         break;
 880                 case RELKIND_TOASTVALUE:
 881                         ereport(ERROR,
 882                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
 883                                          errmsg("cannot change TOAST relation \"%s\"",
 884                                                         RelationGetRelationName(resultRelationDesc))));
 885                         break;
 886                 case RELKIND_VIEW:
 887                         ereport(ERROR,
 888                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
 889                                          errmsg("cannot change view \"%s\"",
 890                                                         RelationGetRelationName(resultRelationDesc))));
 891                         break;
 892         }
 893
 894         MemSet(resultRelInfo, 0, sizeof(ResultRelInfo));
 895         resultRelInfo->type = T_ResultRelInfo;
 896         resultRelInfo->ri_RangeTableIndex = resultRelationIndex;
 897         resultRelInfo->ri_RelationDesc = resultRelationDesc;
 898         resultRelInfo->ri_NumIndices = 0;
 899         resultRelInfo->ri_IndexRelationDescs = NULL;
 900         resultRelInfo->ri_IndexRelationInfo = NULL;
 901         /* make a copy so as not to depend on relcache info not changing... */
 902         resultRelInfo->ri_TrigDesc = CopyTriggerDesc(resultRelationDesc->trigdesc);
 903         if (resultRelInfo->ri_TrigDesc)
 904         {
 905                 int                     n = resultRelInfo->ri_TrigDesc->numtriggers;
 906
 907                 resultRelInfo->ri_TrigFunctions = (FmgrInfo *)
 908                         palloc0(n * sizeof(FmgrInfo));
 909                 if (doInstrument)
 910                         resultRelInfo->ri_TrigInstrument = InstrAlloc(n);
 911                 else
 912                         resultRelInfo->ri_TrigInstrument = NULL;
 913         }
 914         else
 915         {
 916                 resultRelInfo->ri_TrigFunctions = NULL;
 917                 resultRelInfo->ri_TrigInstrument = NULL;
 918         }
 919         resultRelInfo->ri_ConstraintExprs = NULL;
 920         resultRelInfo->ri_junkFilter = NULL;
 921
 922         /*
 923          * If there are indices on the result relation, open them and save
 924          * descriptors in the result relation info, so that we can add new index
 925          * entries for the tuples we add/update.  We need not do this for a
 926          * DELETE, however, since deletion doesn't affect indexes.
 927          */
 928         if (resultRelationDesc->rd_rel->relhasindex &&
 929                 operation != CMD_DELETE)
 930                 ExecOpenIndices(resultRelInfo);
 931 }
 932
 933 /*
 934  *              ExecContextForcesOids
 935  *
 936  * This is pretty grotty: when doing INSERT, UPDATE, or SELECT INTO,
 937  * we need to ensure that result tuples have space for an OID iff they are
 938  * going to be stored into a relation that has OIDs.  In other contexts
 939  * we are free to choose whether to leave space for OIDs in result tuples
 940  * (we generally don't want to, but we do if a physical-tlist optimization
 941  * is possible).  This routine checks the plan context and returns TRUE if the
 942  * choice is forced, FALSE if the choice is not forced.  In the TRUE case,
 943  * *hasoids is set to the required value.
 944  *
 945  * One reason this is ugly is that all plan nodes in the plan tree will emit
 946  * tuples with space for an OID, though we really only need the topmost node
 947  * to do so.  However, node types like Sort don't project new tuples but just
 948  * return their inputs, and in those cases the requirement propagates down
 949  * to the input node.  Eventually we might make this code smart enough to
 950  * recognize how far down the requirement really goes, but for now we just
 951  * make all plan nodes do the same thing if the top level forces the choice.
 952  *
 953  * We assume that estate->es_result_relation_info is already set up to
 954  * describe the target relation.  Note that in an UPDATE that spans an
 955  * inheritance tree, some of the target relations may have OIDs and some not.
 956  * We have to make the decisions on a per-relation basis as we initialize
 957  * each of the child plans of the topmost Append plan.
 958  *
 959  * SELECT INTO is even uglier, because we don't have the INTO relation's
 960  * descriptor available when this code runs; we have to look aside at a
 961  * flag set by InitPlan().
 962  */
 963 bool
 964 ExecContextForcesOids(PlanState *planstate, bool *hasoids)
 965 {
 966         if (planstate->state->es_select_into)
 967         {
 968                 *hasoids = planstate->state->es_into_oids;
 969                 return true;
 970         }
 971         else
 972         {
 973                 ResultRelInfo *ri = planstate->state->es_result_relation_info;
 974
 975                 if (ri != NULL)
 976                 {
 977                         Relation        rel = ri->ri_RelationDesc;
 978
 979                         if (rel != NULL)
 980                         {
 981                                 *hasoids = rel->rd_rel->relhasoids;
 982                                 return true;
 983                         }
 984                 }
 985         }
 986
 987         return false;
 988 }
 989
 990 /* ----------------------------------------------------------------
 991  *              ExecEndPlan
 992  *
 993  *              Cleans up the query plan -- closes files and frees up storage
 994  *
 995  * NOTE: we are no longer very worried about freeing storage per se
 996  * in this code; FreeExecutorState should be guaranteed to release all
 997  * memory that needs to be released.  What we are worried about doing
 998  * is closing relations and dropping buffer pins.  Thus, for example,
 999  * tuple tables must be cleared or dropped to ensure pins are released.
1000  * ----------------------------------------------------------------
1001  */
1002 void
1003 ExecEndPlan(PlanState *planstate, EState *estate)
1004 {
1005         ResultRelInfo *resultRelInfo;
1006         int                     i;
1007         ListCell   *l;
1008
1009         /*
1010          * shut down any PlanQual processing we were doing
1011          */
1012         if (estate->es_evalPlanQual != NULL)
1013                 EndEvalPlanQual(estate);
1014
1015         /*
1016          * shut down the node-type-specific query processing
1017          */
1018         ExecEndNode(planstate);
1019
1020         /*
1021          * destroy the executor "tuple" table.
1022          */
1023         ExecDropTupleTable(estate->es_tupleTable, true);
1024         estate->es_tupleTable = NULL;
1025
1026         /*
1027          * close the result relation(s) if any, but hold locks until xact commit.
1028          */
1029         resultRelInfo = estate->es_result_relations;
1030         for (i = estate->es_num_result_relations; i > 0; i--)
1031         {
1032                 /* Close indices and then the relation itself */
1033                 ExecCloseIndices(resultRelInfo);
1034                 heap_close(resultRelInfo->ri_RelationDesc, NoLock);
1035                 resultRelInfo++;
1036         }
1037
1038         /*
1039          * close the "into" relation if necessary, again keeping lock
1040          */
1041         if (estate->es_into_relation_descriptor != NULL)
1042         {
1043                 /*
1044                  * If we skipped using WAL, and it's not a temp relation, we must
1045                  * force the relation down to disk before it's safe to commit the
1046                  * transaction.  This requires forcing out any dirty buffers and then
1047                  * doing a forced fsync.
1048                  */
1049                 if (!estate->es_into_relation_use_wal &&
1050                         !estate->es_into_relation_descriptor->rd_istemp)
1051                 {
1052                         FlushRelationBuffers(estate->es_into_relation_descriptor);
1053                         /* FlushRelationBuffers will have opened rd_smgr */
1054                         smgrimmedsync(estate->es_into_relation_descriptor->rd_smgr);
1055                 }
1056
1057                 heap_close(estate->es_into_relation_descriptor, NoLock);
1058         }
1059
1060         /*
1061          * close any relations selected FOR UPDATE/FOR SHARE, again keeping locks
1062          */
1063         foreach(l, estate->es_rowMarks)
1064         {
1065                 ExecRowMark *erm = lfirst(l);
1066
1067                 heap_close(erm->relation, NoLock);
1068         }
1069 }
1070
1071 /* ----------------------------------------------------------------
1072  *              ExecutePlan
1073  *
1074  *              processes the query plan to retrieve 'numberTuples' tuples in the
1075  *              direction specified.
1076  *
1077  *              Retrieves all tuples if numberTuples is 0
1078  *
1079  *              result is either a slot containing the last tuple in the case
1080  *              of a SELECT or NULL otherwise.
1081  *
1082  * Note: the ctid attribute is a 'junk' attribute that is removed before the
1083  * user can see it
1084  * ----------------------------------------------------------------
1085  */
1086 static TupleTableSlot *
1087 ExecutePlan(EState *estate,
1088                         PlanState *planstate,
1089                         CmdType operation,
1090                         long numberTuples,
1091                         ScanDirection direction,
1092                         DestReceiver *dest)
1093 {
1094         JunkFilter *junkfilter;
1095         TupleTableSlot *slot;
1096         ItemPointer tupleid = NULL;
1097         ItemPointerData tuple_ctid;
1098         long            current_tuple_count;
1099         TupleTableSlot *result;
1100
1101         /*
1102          * initialize local variables
1103          */
1104         slot = NULL;
1105         current_tuple_count = 0;
1106         result = NULL;
1107
1108         /*
1109          * Set the direction.
1110          */
1111         estate->es_direction = direction;
1112
1113         /*
1114          * Process BEFORE EACH STATEMENT triggers
1115          */
1116         switch (operation)
1117         {
1118                 case CMD_UPDATE:
1119                         ExecBSUpdateTriggers(estate, estate->es_result_relation_info);
1120                         break;
1121                 case CMD_DELETE:
1122                         ExecBSDeleteTriggers(estate, estate->es_result_relation_info);
1123                         break;
1124                 case CMD_INSERT:
1125                         ExecBSInsertTriggers(estate, estate->es_result_relation_info);
1126                         break;
1127                 default:
1128                         /* do nothing */
1129                         break;
1130         }
1131
1132         /*
1133          * Loop until we've processed the proper number of tuples from the plan.
1134          */
1135
1136         for (;;)
1137         {
1138                 /* Reset the per-output-tuple exprcontext */
1139                 ResetPerTupleExprContext(estate);
1140
1141                 /*
1142                  * Execute the plan and obtain a tuple
1143                  */
1144 lnext:  ;
1145                 if (estate->es_useEvalPlan)
1146                 {
1147                         slot = EvalPlanQualNext(estate);
1148                         if (TupIsNull(slot))
1149                                 slot = ExecProcNode(planstate);
1150                 }
1151                 else
1152                         slot = ExecProcNode(planstate);
1153
1154                 /*
1155                  * if the tuple is null, then we assume there is nothing more to
1156                  * process so we just return null...
1157                  */
1158                 if (TupIsNull(slot))
1159                 {
1160                         result = NULL;
1161                         break;
1162                 }
1163
1164                 /*
1165                  * if we have a junk filter, then project a new tuple with the junk
1166                  * removed.
1167                  *
1168                  * Store this new "clean" tuple in the junkfilter's resultSlot.
1169                  * (Formerly, we stored it back over the "dirty" tuple, which is WRONG
1170                  * because that tuple slot has the wrong descriptor.)
1171                  *
1172                  * Also, extract all the junk information we need.
1173                  */
1174                 if ((junkfilter = estate->es_junkFilter) != NULL)
1175                 {
1176                         Datum           datum;
1177                         bool            isNull;
1178
1179                         /*
1180                          * extract the 'ctid' junk attribute.
1181                          */
1182                         if (operation == CMD_UPDATE || operation == CMD_DELETE)
1183                         {
1184                                 if (!ExecGetJunkAttribute(junkfilter,
1185                                                                                   slot,
1186                                                                                   "ctid",
1187                                                                                   &datum,
1188                                                                                   &isNull))
1189                                         elog(ERROR, "could not find junk ctid column");
1190
1191                                 /* shouldn't ever get a null result... */
1192                                 if (isNull)
1193                                         elog(ERROR, "ctid is NULL");
1194
1195                                 tupleid = (ItemPointer) DatumGetPointer(datum);
1196                                 tuple_ctid = *tupleid;  /* make sure we don't free the ctid!! */
1197                                 tupleid = &tuple_ctid;
1198                         }
1199
1200                         /*
1201                          * Process any FOR UPDATE or FOR SHARE locking requested.
1202                          */
1203                         else if (estate->es_rowMarks != NIL)
1204                         {
1205                                 ListCell   *l;
1206
1207                 lmark:  ;
1208                                 foreach(l, estate->es_rowMarks)
1209                                 {
1210                                         ExecRowMark *erm = lfirst(l);
1211                                         HeapTupleData tuple;
1212                                         Buffer          buffer;
1213                                         ItemPointerData update_ctid;
1214                                         TransactionId update_xmax;
1215                                         TupleTableSlot *newSlot;
1216                                         LockTupleMode lockmode;
1217                                         HTSU_Result test;
1218
1219                                         if (!ExecGetJunkAttribute(junkfilter,
1220                                                                                           slot,
1221                                                                                           erm->resname,
1222                                                                                           &datum,
1223                                                                                           &isNull))
1224                                                 elog(ERROR, "could not find junk \"%s\" column",
1225                                                          erm->resname);
1226
1227                                         /* shouldn't ever get a null result... */
1228                                         if (isNull)
1229                                                 elog(ERROR, "\"%s\" is NULL", erm->resname);
1230
1231                                         tuple.t_self = *((ItemPointer) DatumGetPointer(datum));
1232
1233                                         if (erm->forUpdate)
1234                                                 lockmode = LockTupleExclusive;
1235                                         else
1236                                                 lockmode = LockTupleShared;
1237
1238                                         test = heap_lock_tuple(erm->relation, &tuple, &buffer,
1239                                                                                    &update_ctid, &update_xmax,
1240                                                                                    estate->es_snapshot->curcid,
1241                                                                                    lockmode, erm->noWait);
1242                                         ReleaseBuffer(buffer);
1243                                         switch (test)
1244                                         {
1245                                                 case HeapTupleSelfUpdated:
1246                                                         /* treat it as deleted; do not process */
1247                                                         goto lnext;
1248
1249                                                 case HeapTupleMayBeUpdated:
1250                                                         break;
1251
1252                                                 case HeapTupleUpdated:
1253                                                         if (IsXactIsoLevelSerializable)
1254                                                                 ereport(ERROR,
1255                                                                  (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1256                                                                   errmsg("could not serialize access due to concurrent update")));
1257                                                         if (!ItemPointerEquals(&update_ctid,
1258                                                                                                    &tuple.t_self))
1259                                                         {
1260                                                                 /* updated, so look at updated version */
1261                                                                 newSlot = EvalPlanQual(estate,
1262                                                                                                            erm->rti,
1263                                                                                                            &update_ctid,
1264                                                                                                            update_xmax,
1265                                                                                                            estate->es_snapshot->curcid);
1266                                                                 if (!TupIsNull(newSlot))
1267                                                                 {
1268                                                                         slot = newSlot;
1269                                                                         estate->es_useEvalPlan = true;
1270                                                                         goto lmark;
1271                                                                 }
1272                                                         }
1273
1274                                                         /*
1275                                                          * if tuple was deleted or PlanQual failed for
1276                                                          * updated tuple - we must not return this tuple!
1277                                                          */
1278                                                         goto lnext;
1279
1280                                                 default:
1281                                                         elog(ERROR, "unrecognized heap_lock_tuple status: %u",
1282                                                                  test);
1283                                                         return NULL;
1284                                         }
1285                                 }
1286                         }
1287
1288                         /*
1289                          * Finally create a new "clean" tuple with all junk attributes
1290                          * removed
1291                          */
1292                         slot = ExecFilterJunk(junkfilter, slot);
1293                 }
1294
1295                 /*
1296                  * now that we have a tuple, do the appropriate thing with it.. either
1297                  * return it to the user, add it to a relation someplace, delete it
1298                  * from a relation, or modify some of its attributes.
1299                  */
1300                 switch (operation)
1301                 {
1302                         case CMD_SELECT:
1303                                 ExecSelect(slot,        /* slot containing tuple */
1304                                                    dest,        /* destination's tuple-receiver obj */
1305                                                    estate);
1306                                 result = slot;
1307                                 break;
1308
1309                         case CMD_INSERT:
1310                                 ExecInsert(slot, tupleid, estate);
1311                                 result = NULL;
1312                                 break;
1313
1314                         case CMD_DELETE:
1315                                 ExecDelete(slot, tupleid, estate);
1316                                 result = NULL;
1317                                 break;
1318
1319                         case CMD_UPDATE:
1320                                 ExecUpdate(slot, tupleid, estate);
1321                                 result = NULL;
1322                                 break;
1323
1324                         default:
1325                                 elog(ERROR, "unrecognized operation code: %d",
1326                                          (int) operation);
1327                                 result = NULL;
1328                                 break;
1329                 }
1330
1331                 /*
1332                  * check our tuple count.. if we've processed the proper number then
1333                  * quit, else loop again and process more tuples.  Zero numberTuples
1334                  * means no limit.
1335                  */
1336                 current_tuple_count++;
1337                 if (numberTuples && numberTuples == current_tuple_count)
1338                         break;
1339         }
1340
1341         /*
1342          * Process AFTER EACH STATEMENT triggers
1343          */
1344         switch (operation)
1345         {
1346                 case CMD_UPDATE:
1347                         ExecASUpdateTriggers(estate, estate->es_result_relation_info);
1348                         break;
1349                 case CMD_DELETE:
1350                         ExecASDeleteTriggers(estate, estate->es_result_relation_info);
1351                         break;
1352                 case CMD_INSERT:
1353                         ExecASInsertTriggers(estate, estate->es_result_relation_info);
1354                         break;
1355                 default:
1356                         /* do nothing */
1357                         break;
1358         }
1359
1360         /*
1361          * here, result is either a slot containing a tuple in the case of a
1362          * SELECT or NULL otherwise.
1363          */
1364         return result;
1365 }
1366
1367 /* ----------------------------------------------------------------
1368  *              ExecSelect
1369  *
1370  *              SELECTs are easy.. we just pass the tuple to the appropriate
1371  *              print function.  The only complexity is when we do a
1372  *              "SELECT INTO", in which case we insert the tuple into
1373  *              the appropriate relation (note: this is a newly created relation
1374  *              so we don't need to worry about indices or locks.)
1375  * ----------------------------------------------------------------
1376  */
1377 static void
1378 ExecSelect(TupleTableSlot *slot,
1379                    DestReceiver *dest,
1380                    EState *estate)
1381 {
1382         /*
1383          * insert the tuple into the "into relation"
1384          *
1385          * XXX this probably ought to be replaced by a separate destination
1386          */
1387         if (estate->es_into_relation_descriptor != NULL)
1388         {
1389                 HeapTuple       tuple;
1390
1391                 tuple = ExecCopySlotTuple(slot);
1392                 heap_insert(estate->es_into_relation_descriptor, tuple,
1393                                         estate->es_snapshot->curcid,
1394                                         estate->es_into_relation_use_wal,
1395                                         false);         /* never any point in using FSM */
1396                 /* we know there are no indexes to update */
1397                 heap_freetuple(tuple);
1398                 IncrAppended();
1399         }
1400
1401         /*
1402          * send the tuple to the destination
1403          */
1404         (*dest->receiveSlot) (slot, dest);
1405         IncrRetrieved();
1406         (estate->es_processed)++;
1407 }
1408
1409 /* ----------------------------------------------------------------
1410  *              ExecInsert
1411  *
1412  *              INSERTs are trickier.. we have to insert the tuple into
1413  *              the base relation and insert appropriate tuples into the
1414  *              index relations.
1415  * ----------------------------------------------------------------
1416  */
1417 static void
1418 ExecInsert(TupleTableSlot *slot,
1419                    ItemPointer tupleid,
1420                    EState *estate)
1421 {
1422         HeapTuple       tuple;
1423         ResultRelInfo *resultRelInfo;
1424         Relation        resultRelationDesc;
1425         Oid                     newId;
1426
1427         /*
1428          * get the heap tuple out of the tuple table slot, making sure we have a
1429          * writable copy
1430          */
1431         tuple = ExecMaterializeSlot(slot);
1432
1433         /*
1434          * get information on the (current) result relation
1435          */
1436         resultRelInfo = estate->es_result_relation_info;
1437         resultRelationDesc = resultRelInfo->ri_RelationDesc;
1438
1439         /* BEFORE ROW INSERT Triggers */
1440         if (resultRelInfo->ri_TrigDesc &&
1441                 resultRelInfo->ri_TrigDesc->n_before_row[TRIGGER_EVENT_INSERT] > 0)
1442         {
1443                 HeapTuple       newtuple;
1444
1445                 newtuple = ExecBRInsertTriggers(estate, resultRelInfo, tuple);
1446
1447                 if (newtuple == NULL)   /* "do nothing" */
1448                         return;
1449
1450                 if (newtuple != tuple)  /* modified by Trigger(s) */
1451                 {
1452                         /*
1453                          * Put the modified tuple into a slot for convenience of routines
1454                          * below.  We assume the tuple was allocated in per-tuple memory
1455                          * context, and therefore will go away by itself. The tuple table
1456                          * slot should not try to clear it.
1457                          */
1458                         TupleTableSlot *newslot = estate->es_trig_tuple_slot;
1459
1460                         if (newslot->tts_tupleDescriptor != slot->tts_tupleDescriptor)
1461                                 ExecSetSlotDescriptor(newslot, slot->tts_tupleDescriptor);
1462                         ExecStoreTuple(newtuple, newslot, InvalidBuffer, false);
1463                         slot = newslot;
1464                         tuple = newtuple;
1465                 }
1466         }
1467
1468         /*
1469          * Check the constraints of the tuple
1470          */
1471         if (resultRelationDesc->rd_att->constr)
1472                 ExecConstraints(resultRelInfo, slot, estate);
1473
1474         /*
1475          * insert the tuple
1476          *
1477          * Note: heap_insert returns the tid (location) of the new tuple in the
1478          * t_self field.
1479          */
1480         newId = heap_insert(resultRelationDesc, tuple,
1481                                                 estate->es_snapshot->curcid,
1482                                                 true, true);
1483
1484         IncrAppended();
1485         (estate->es_processed)++;
1486         estate->es_lastoid = newId;
1487         setLastTid(&(tuple->t_self));
1488
1489         /*
1490          * insert index entries for tuple
1491          */
1492         if (resultRelInfo->ri_NumIndices > 0)
1493                 ExecInsertIndexTuples(slot, &(tuple->t_self), estate, false);
1494
1495         /* AFTER ROW INSERT Triggers */
1496         ExecARInsertTriggers(estate, resultRelInfo, tuple);
1497 }
1498
1499 /* ----------------------------------------------------------------
1500  *              ExecDelete
1501  *
1502  *              DELETE is like UPDATE, except that we delete the tuple and no
1503  *              index modifications are needed
1504  * ----------------------------------------------------------------
1505  */
1506 static void
1507 ExecDelete(TupleTableSlot *slot,
1508                    ItemPointer tupleid,
1509                    EState *estate)
1510 {
1511         ResultRelInfo *resultRelInfo;
1512         Relation        resultRelationDesc;
1513         HTSU_Result result;
1514         ItemPointerData update_ctid;
1515         TransactionId update_xmax;
1516
1517         /*
1518          * get information on the (current) result relation
1519          */
1520         resultRelInfo = estate->es_result_relation_info;
1521         resultRelationDesc = resultRelInfo->ri_RelationDesc;
1522
1523         /* BEFORE ROW DELETE Triggers */
1524         if (resultRelInfo->ri_TrigDesc &&
1525                 resultRelInfo->ri_TrigDesc->n_before_row[TRIGGER_EVENT_DELETE] > 0)
1526         {
1527                 bool            dodelete;
1528
1529                 dodelete = ExecBRDeleteTriggers(estate, resultRelInfo, tupleid,
1530                                                                                 estate->es_snapshot->curcid);
1531
1532                 if (!dodelete)                  /* "do nothing" */
1533                         return;
1534         }
1535
1536         /*
1537          * delete the tuple
1538          *
1539          * Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check that
1540          * the row to be deleted is visible to that snapshot, and throw a can't-
1541          * serialize error if not.      This is a special-case behavior needed for
1542          * referential integrity updates in serializable transactions.
1543          */
1544 ldelete:;
1545         result = heap_delete(resultRelationDesc, tupleid,
1546                                                  &update_ctid, &update_xmax,
1547                                                  estate->es_snapshot->curcid,
1548                                                  estate->es_crosscheck_snapshot,
1549                                                  true /* wait for commit */ );
1550         switch (result)
1551         {
1552                 case HeapTupleSelfUpdated:
1553                         /* already deleted by self; nothing to do */
1554                         return;
1555
1556                 case HeapTupleMayBeUpdated:
1557                         break;
1558
1559                 case HeapTupleUpdated:
1560                         if (IsXactIsoLevelSerializable)
1561                                 ereport(ERROR,
1562                                                 (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1563                                                  errmsg("could not serialize access due to concurrent update")));
1564                         else if (!ItemPointerEquals(tupleid, &update_ctid))
1565                         {
1566                                 TupleTableSlot *epqslot;
1567
1568                                 epqslot = EvalPlanQual(estate,
1569                                                                            resultRelInfo->ri_RangeTableIndex,
1570                                                                            &update_ctid,
1571                                                                            update_xmax,
1572                                                                            estate->es_snapshot->curcid);
1573                                 if (!TupIsNull(epqslot))
1574                                 {
1575                                         *tupleid = update_ctid;
1576                                         goto ldelete;
1577                                 }
1578                         }
1579                         /* tuple already deleted; nothing to do */
1580                         return;
1581
1582                 default:
1583                         elog(ERROR, "unrecognized heap_delete status: %u", result);
1584                         return;
1585         }
1586
1587         IncrDeleted();
1588         (estate->es_processed)++;
1589
1590         /*
1591          * Note: Normally one would think that we have to delete index tuples
1592          * associated with the heap tuple now...
1593          *
1594          * ... but in POSTGRES, we have no need to do this because VACUUM will
1595          * take care of it later.  We can't delete index tuples immediately
1596          * anyway, since the tuple is still visible to other transactions.
1597          */
1598
1599         /* AFTER ROW DELETE Triggers */
1600         ExecARDeleteTriggers(estate, resultRelInfo, tupleid);
1601 }
1602
1603 /* ----------------------------------------------------------------
1604  *              ExecUpdate
1605  *
1606  *              note: we can't run UPDATE queries with transactions
1607  *              off because UPDATEs are actually INSERTs and our
1608  *              scan will mistakenly loop forever, updating the tuple
1609  *              it just inserted..      This should be fixed but until it
1610  *              is, we don't want to get stuck in an infinite loop
1611  *              which corrupts your database..
1612  * ----------------------------------------------------------------
1613  */
1614 static void
1615 ExecUpdate(TupleTableSlot *slot,
1616                    ItemPointer tupleid,
1617                    EState *estate)
1618 {
1619         HeapTuple       tuple;
1620         ResultRelInfo *resultRelInfo;
1621         Relation        resultRelationDesc;
1622         HTSU_Result result;
1623         ItemPointerData update_ctid;
1624         TransactionId update_xmax;
1625
1626         /*
1627          * abort the operation if not running transactions
1628          */
1629         if (IsBootstrapProcessingMode())
1630                 elog(ERROR, "cannot UPDATE during bootstrap");
1631
1632         /*
1633          * get the heap tuple out of the tuple table slot, making sure we have a
1634          * writable copy
1635          */
1636         tuple = ExecMaterializeSlot(slot);
1637
1638         /*
1639          * get information on the (current) result relation
1640          */
1641         resultRelInfo = estate->es_result_relation_info;
1642         resultRelationDesc = resultRelInfo->ri_RelationDesc;
1643
1644         /* BEFORE ROW UPDATE Triggers */
1645         if (resultRelInfo->ri_TrigDesc &&
1646                 resultRelInfo->ri_TrigDesc->n_before_row[TRIGGER_EVENT_UPDATE] > 0)
1647         {
1648                 HeapTuple       newtuple;
1649
1650                 newtuple = ExecBRUpdateTriggers(estate, resultRelInfo,
1651                                                                                 tupleid, tuple,
1652                                                                                 estate->es_snapshot->curcid);
1653
1654                 if (newtuple == NULL)   /* "do nothing" */
1655                         return;
1656
1657                 if (newtuple != tuple)  /* modified by Trigger(s) */
1658                 {
1659                         /*
1660                          * Put the modified tuple into a slot for convenience of routines
1661                          * below.  We assume the tuple was allocated in per-tuple memory
1662                          * context, and therefore will go away by itself. The tuple table
1663                          * slot should not try to clear it.
1664                          */
1665                         TupleTableSlot *newslot = estate->es_trig_tuple_slot;
1666
1667                         if (newslot->tts_tupleDescriptor != slot->tts_tupleDescriptor)
1668                                 ExecSetSlotDescriptor(newslot, slot->tts_tupleDescriptor);
1669                         ExecStoreTuple(newtuple, newslot, InvalidBuffer, false);
1670                         slot = newslot;
1671                         tuple = newtuple;
1672                 }
1673         }
1674
1675         /*
1676          * Check the constraints of the tuple
1677          *
1678          * If we generate a new candidate tuple after EvalPlanQual testing, we
1679          * must loop back here and recheck constraints.  (We don't need to redo
1680          * triggers, however.  If there are any BEFORE triggers then trigger.c
1681          * will have done heap_lock_tuple to lock the correct tuple, so there's no
1682          * need to do them again.)
1683          */
1684 lreplace:;
1685         if (resultRelationDesc->rd_att->constr)
1686                 ExecConstraints(resultRelInfo, slot, estate);
1687
1688         /*
1689          * replace the heap tuple
1690          *
1691          * Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check that
1692          * the row to be updated is visible to that snapshot, and throw a can't-
1693          * serialize error if not.      This is a special-case behavior needed for
1694          * referential integrity updates in serializable transactions.
1695          */
1696         result = heap_update(resultRelationDesc, tupleid, tuple,
1697                                                  &update_ctid, &update_xmax,
1698                                                  estate->es_snapshot->curcid,
1699                                                  estate->es_crosscheck_snapshot,
1700                                                  true /* wait for commit */ );
1701         switch (result)
1702         {
1703                 case HeapTupleSelfUpdated:
1704                         /* already deleted by self; nothing to do */
1705                         return;
1706
1707                 case HeapTupleMayBeUpdated:
1708                         break;
1709
1710                 case HeapTupleUpdated:
1711                         if (IsXactIsoLevelSerializable)
1712                                 ereport(ERROR,
1713                                                 (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1714                                                  errmsg("could not serialize access due to concurrent update")));
1715                         else if (!ItemPointerEquals(tupleid, &update_ctid))
1716                         {
1717                                 TupleTableSlot *epqslot;
1718
1719                                 epqslot = EvalPlanQual(estate,
1720                                                                            resultRelInfo->ri_RangeTableIndex,
1721                                                                            &update_ctid,
1722                                                                            update_xmax,
1723                                                                            estate->es_snapshot->curcid);
1724                                 if (!TupIsNull(epqslot))
1725                                 {
1726                                         *tupleid = update_ctid;
1727                                         slot = ExecFilterJunk(estate->es_junkFilter, epqslot);
1728                                         tuple = ExecMaterializeSlot(slot);
1729                                         goto lreplace;
1730                                 }
1731                         }
1732                         /* tuple already deleted; nothing to do */
1733                         return;
1734
1735                 default:
1736                         elog(ERROR, "unrecognized heap_update status: %u", result);
1737                         return;
1738         }
1739
1740         IncrReplaced();
1741         (estate->es_processed)++;
1742
1743         /*
1744          * Note: instead of having to update the old index tuples associated with
1745          * the heap tuple, all we do is form and insert new index tuples. This is
1746          * because UPDATEs are actually DELETEs and INSERTs, and index tuple
1747          * deletion is done later by VACUUM (see notes in ExecDelete).  All we do
1748          * here is insert new index tuples.  -cim 9/27/89
1749          */
1750
1751         /*
1752          * insert index entries for tuple
1753          *
1754          * Note: heap_update returns the tid (location) of the new tuple in the
1755          * t_self field.
1756          */
1757         if (resultRelInfo->ri_NumIndices > 0)
1758                 ExecInsertIndexTuples(slot, &(tuple->t_self), estate, false);
1759
1760         /* AFTER ROW UPDATE Triggers */
1761         ExecARUpdateTriggers(estate, resultRelInfo, tupleid, tuple);
1762 }
1763
1764 static const char *
1765 ExecRelCheck(ResultRelInfo *resultRelInfo,
1766                          TupleTableSlot *slot, EState *estate)
1767 {
1768         Relation        rel = resultRelInfo->ri_RelationDesc;
1769         int                     ncheck = rel->rd_att->constr->num_check;
1770         ConstrCheck *check = rel->rd_att->constr->check;
1771         ExprContext *econtext;
1772         MemoryContext oldContext;
1773         List       *qual;
1774         int                     i;
1775
1776         /*
1777          * If first time through for this result relation, build expression
1778          * nodetrees for rel's constraint expressions.  Keep them in the per-query
1779          * memory context so they'll survive throughout the query.
1780          */
1781         if (resultRelInfo->ri_ConstraintExprs == NULL)
1782         {
1783                 oldContext = MemoryContextSwitchTo(estate->es_query_cxt);
1784                 resultRelInfo->ri_ConstraintExprs =
1785                         (List **) palloc(ncheck * sizeof(List *));
1786                 for (i = 0; i < ncheck; i++)
1787                 {
1788                         /* ExecQual wants implicit-AND form */
1789                         qual = make_ands_implicit(stringToNode(check[i].ccbin));
1790                         resultRelInfo->ri_ConstraintExprs[i] = (List *)
1791                                 ExecPrepareExpr((Expr *) qual, estate);
1792                 }
1793                 MemoryContextSwitchTo(oldContext);
1794         }
1795
1796         /*
1797          * We will use the EState's per-tuple context for evaluating constraint
1798          * expressions (creating it if it's not already there).
1799          */
1800         econtext = GetPerTupleExprContext(estate);
1801
1802         /* Arrange for econtext's scan tuple to be the tuple under test */
1803         econtext->ecxt_scantuple = slot;
1804
1805         /* And evaluate the constraints */
1806         for (i = 0; i < ncheck; i++)
1807         {
1808                 qual = resultRelInfo->ri_ConstraintExprs[i];
1809
1810                 /*
1811                  * NOTE: SQL92 specifies that a NULL result from a constraint
1812                  * expression is not to be treated as a failure.  Therefore, tell
1813                  * ExecQual to return TRUE for NULL.
1814                  */
1815                 if (!ExecQual(qual, econtext, true))
1816                         return check[i].ccname;
1817         }
1818
1819         /* NULL result means no error */
1820         return NULL;
1821 }
1822
1823 void
1824 ExecConstraints(ResultRelInfo *resultRelInfo,
1825                                 TupleTableSlot *slot, EState *estate)
1826 {
1827         Relation        rel = resultRelInfo->ri_RelationDesc;
1828         TupleConstr *constr = rel->rd_att->constr;
1829
1830         Assert(constr);
1831
1832         if (constr->has_not_null)
1833         {
1834                 int                     natts = rel->rd_att->natts;
1835                 int                     attrChk;
1836
1837                 for (attrChk = 1; attrChk <= natts; attrChk++)
1838                 {
1839                         if (rel->rd_att->attrs[attrChk - 1]->attnotnull &&
1840                                 slot_attisnull(slot, attrChk))
1841                                 ereport(ERROR,
1842                                                 (errcode(ERRCODE_NOT_NULL_VIOLATION),
1843                                                  errmsg("null value in column \"%s\" violates not-null constraint",
1844                                                 NameStr(rel->rd_att->attrs[attrChk - 1]->attname))));
1845                 }
1846         }
1847
1848         if (constr->num_check > 0)
1849         {
1850                 const char *failed;
1851
1852                 if ((failed = ExecRelCheck(resultRelInfo, slot, estate)) != NULL)
1853                         ereport(ERROR,
1854                                         (errcode(ERRCODE_CHECK_VIOLATION),
1855                                          errmsg("new row for relation \"%s\" violates check constraint \"%s\"",
1856                                                         RelationGetRelationName(rel), failed)));
1857         }
1858 }
1859
1860 /*
1861  * Check a modified tuple to see if we want to process its updated version
1862  * under READ COMMITTED rules.
1863  *
1864  * See backend/executor/README for some info about how this works.
1865  *
1866  *      estate - executor state data
1867  *      rti - rangetable index of table containing tuple
1868  *      *tid - t_ctid from the outdated tuple (ie, next updated version)
1869  *      priorXmax - t_xmax from the outdated tuple
1870  *      curCid - command ID of current command of my transaction
1871  *
1872  * *tid is also an output parameter: it's modified to hold the TID of the
1873  * latest version of the tuple (note this may be changed even on failure)
1874  *
1875  * Returns a slot containing the new candidate update/delete tuple, or
1876  * NULL if we determine we shouldn't process the row.
1877  */
1878 TupleTableSlot *
1879 EvalPlanQual(EState *estate, Index rti,
1880                          ItemPointer tid, TransactionId priorXmax, CommandId curCid)
1881 {
1882         evalPlanQual *epq;
1883         EState     *epqstate;
1884         Relation        relation;
1885         HeapTupleData tuple;
1886         HeapTuple       copyTuple = NULL;
1887         bool            endNode;
1888
1889         Assert(rti != 0);
1890
1891         /*
1892          * find relation containing target tuple
1893          */
1894         if (estate->es_result_relation_info != NULL &&
1895                 estate->es_result_relation_info->ri_RangeTableIndex == rti)
1896                 relation = estate->es_result_relation_info->ri_RelationDesc;
1897         else
1898         {
1899                 ListCell   *l;
1900
1901                 relation = NULL;
1902                 foreach(l, estate->es_rowMarks)
1903                 {
1904                         if (((ExecRowMark *) lfirst(l))->rti == rti)
1905                         {
1906                                 relation = ((ExecRowMark *) lfirst(l))->relation;
1907                                 break;
1908                         }
1909                 }
1910                 if (relation == NULL)
1911                         elog(ERROR, "could not find RowMark for RT index %u", rti);
1912         }
1913
1914         /*
1915          * fetch tid tuple
1916          *
1917          * Loop here to deal with updated or busy tuples
1918          */
1919         tuple.t_self = *tid;
1920         for (;;)
1921         {
1922                 Buffer          buffer;
1923
1924                 if (heap_fetch(relation, SnapshotDirty, &tuple, &buffer, true, NULL))
1925                 {
1926                         /*
1927                          * If xmin isn't what we're expecting, the slot must have been
1928                          * recycled and reused for an unrelated tuple.  This implies that
1929                          * the latest version of the row was deleted, so we need do
1930                          * nothing.  (Should be safe to examine xmin without getting
1931                          * buffer's content lock, since xmin never changes in an existing
1932                          * tuple.)
1933                          */
1934                         if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data),
1935                                                                          priorXmax))
1936                         {
1937                                 ReleaseBuffer(buffer);
1938                                 return NULL;
1939                         }
1940
1941                         /* otherwise xmin should not be dirty... */
1942                         if (TransactionIdIsValid(SnapshotDirty->xmin))
1943                                 elog(ERROR, "t_xmin is uncommitted in tuple to be updated");
1944
1945                         /*
1946                          * If tuple is being updated by other transaction then we have to
1947                          * wait for its commit/abort.
1948                          */
1949                         if (TransactionIdIsValid(SnapshotDirty->xmax))
1950                         {
1951                                 ReleaseBuffer(buffer);
1952                                 XactLockTableWait(SnapshotDirty->xmax);
1953                                 continue;               /* loop back to repeat heap_fetch */
1954                         }
1955
1956                         /*
1957                          * If tuple was inserted by our own transaction, we have to check
1958                          * cmin against curCid: cmin >= curCid means our command cannot
1959                          * see the tuple, so we should ignore it.  Without this we are
1960                          * open to the "Halloween problem" of indefinitely re-updating
1961                          * the same tuple.  (We need not check cmax because
1962                          * HeapTupleSatisfiesDirty will consider a tuple deleted by
1963                          * our transaction dead, regardless of cmax.)  We just checked
1964                          * that priorXmax == xmin, so we can test that variable instead
1965                          * of doing HeapTupleHeaderGetXmin again.
1966                          */
1967                         if (TransactionIdIsCurrentTransactionId(priorXmax) &&
1968                                 HeapTupleHeaderGetCmin(tuple.t_data) >= curCid)
1969                         {
1970                                 ReleaseBuffer(buffer);
1971                                 return NULL;
1972                         }
1973
1974                         /*
1975                          * We got tuple - now copy it for use by recheck query.
1976                          */
1977                         copyTuple = heap_copytuple(&tuple);
1978                         ReleaseBuffer(buffer);
1979                         break;
1980                 }
1981
1982                 /*
1983                  * If the referenced slot was actually empty, the latest version of
1984                  * the row must have been deleted, so we need do nothing.
1985                  */
1986                 if (tuple.t_data == NULL)
1987                 {
1988                         ReleaseBuffer(buffer);
1989                         return NULL;
1990                 }
1991
1992                 /*
1993                  * As above, if xmin isn't what we're expecting, do nothing.
1994                  */
1995                 if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data),
1996                                                                  priorXmax))
1997                 {
1998                         ReleaseBuffer(buffer);
1999                         return NULL;
2000                 }
2001
2002                 /*
2003                  * If we get here, the tuple was found but failed SnapshotDirty.
2004                  * Assuming the xmin is either a committed xact or our own xact (as it
2005                  * certainly should be if we're trying to modify the tuple), this must
2006                  * mean that the row was updated or deleted by either a committed xact
2007                  * or our own xact.  If it was deleted, we can ignore it; if it was
2008                  * updated then chain up to the next version and repeat the whole
2009                  * test.
2010                  *
2011                  * As above, it should be safe to examine xmax and t_ctid without the
2012                  * buffer content lock, because they can't be changing.
2013                  */
2014                 if (ItemPointerEquals(&tuple.t_self, &tuple.t_data->t_ctid))
2015                 {
2016                         /* deleted, so forget about it */
2017                         ReleaseBuffer(buffer);
2018                         return NULL;
2019                 }
2020
2021                 /* updated, so look at the updated row */
2022                 tuple.t_self = tuple.t_data->t_ctid;
2023                 /* updated row should have xmin matching this xmax */
2024                 priorXmax = HeapTupleHeaderGetXmax(tuple.t_data);
2025                 ReleaseBuffer(buffer);
2026                 /* loop back to fetch next in chain */
2027         }
2028
2029         /*
2030          * For UPDATE/DELETE we have to return tid of actual row we're executing
2031          * PQ for.
2032          */
2033         *tid = tuple.t_self;
2034
2035         /*
2036          * Need to run a recheck subquery.      Find or create a PQ stack entry.
2037          */
2038         epq = estate->es_evalPlanQual;
2039         endNode = true;
2040
2041         if (epq != NULL && epq->rti == 0)
2042         {
2043                 /* Top PQ stack entry is idle, so re-use it */
2044                 Assert(!(estate->es_useEvalPlan) && epq->next == NULL);
2045                 epq->rti = rti;
2046                 endNode = false;
2047         }
2048
2049         /*
2050          * If this is request for another RTE - Ra, - then we have to check wasn't
2051          * PlanQual requested for Ra already and if so then Ra' row was updated
2052          * again and we have to re-start old execution for Ra and forget all what
2053          * we done after Ra was suspended. Cool? -:))
2054          */
2055         if (epq != NULL && epq->rti != rti &&
2056                 epq->estate->es_evTuple[rti - 1] != NULL)
2057         {
2058                 do
2059                 {
2060                         evalPlanQual *oldepq;
2061
2062                         /* stop execution */
2063                         EvalPlanQualStop(epq);
2064                         /* pop previous PlanQual from the stack */
2065                         oldepq = epq->next;
2066                         Assert(oldepq && oldepq->rti != 0);
2067                         /* push current PQ to freePQ stack */
2068                         oldepq->free = epq;
2069                         epq = oldepq;
2070                         estate->es_evalPlanQual = epq;
2071                 } while (epq->rti != rti);
2072         }
2073
2074         /*
2075          * If we are requested for another RTE then we have to suspend execution
2076          * of current PlanQual and start execution for new one.
2077          */
2078         if (epq == NULL || epq->rti != rti)
2079         {
2080                 /* try to reuse plan used previously */
2081                 evalPlanQual *newepq = (epq != NULL) ? epq->free : NULL;
2082
2083                 if (newepq == NULL)             /* first call or freePQ stack is empty */
2084                 {
2085                         newepq = (evalPlanQual *) palloc0(sizeof(evalPlanQual));
2086                         newepq->free = NULL;
2087                         newepq->estate = NULL;
2088                         newepq->planstate = NULL;
2089                 }
2090                 else
2091                 {
2092                         /* recycle previously used PlanQual */
2093                         Assert(newepq->estate == NULL);
2094                         epq->free = NULL;
2095                 }
2096                 /* push current PQ to the stack */
2097                 newepq->next = epq;
2098                 epq = newepq;
2099                 estate->es_evalPlanQual = epq;
2100                 epq->rti = rti;
2101                 endNode = false;
2102         }
2103
2104         Assert(epq->rti == rti);
2105
2106         /*
2107          * Ok - we're requested for the same RTE.  Unfortunately we still have to
2108          * end and restart execution of the plan, because ExecReScan wouldn't
2109          * ensure that upper plan nodes would reset themselves.  We could make
2110          * that work if insertion of the target tuple were integrated with the
2111          * Param mechanism somehow, so that the upper plan nodes know that their
2112          * children's outputs have changed.
2113          *
2114          * Note that the stack of free evalPlanQual nodes is quite useless at the
2115          * moment, since it only saves us from pallocing/releasing the
2116          * evalPlanQual nodes themselves.  But it will be useful once we implement
2117          * ReScan instead of end/restart for re-using PlanQual nodes.
2118          */
2119         if (endNode)
2120         {
2121                 /* stop execution */
2122                 EvalPlanQualStop(epq);
2123         }
2124
2125         /*
2126          * Initialize new recheck query.
2127          *
2128          * Note: if we were re-using PlanQual plans via ExecReScan, we'd need to
2129          * instead copy down changeable state from the top plan (including
2130          * es_result_relation_info, es_junkFilter) and reset locally changeable
2131          * state in the epq (including es_param_exec_vals, es_evTupleNull).
2132          */
2133         EvalPlanQualStart(epq, estate, epq->next);
2134
2135         /*
2136          * free old RTE' tuple, if any, and store target tuple where relation's
2137          * scan node will see it
2138          */
2139         epqstate = epq->estate;
2140         if (epqstate->es_evTuple[rti - 1] != NULL)
2141                 heap_freetuple(epqstate->es_evTuple[rti - 1]);
2142         epqstate->es_evTuple[rti - 1] = copyTuple;
2143
2144         return EvalPlanQualNext(estate);
2145 }
2146
2147 static TupleTableSlot *
2148 EvalPlanQualNext(EState *estate)
2149 {
2150         evalPlanQual *epq = estate->es_evalPlanQual;
2151         MemoryContext oldcontext;
2152         TupleTableSlot *slot;
2153
2154         Assert(epq->rti != 0);
2155
2156 lpqnext:;
2157         oldcontext = MemoryContextSwitchTo(epq->estate->es_query_cxt);
2158         slot = ExecProcNode(epq->planstate);
2159         MemoryContextSwitchTo(oldcontext);
2160
2161         /*
2162          * No more tuples for this PQ. Continue previous one.
2163          */
2164         if (TupIsNull(slot))
2165         {
2166                 evalPlanQual *oldepq;
2167
2168                 /* stop execution */
2169                 EvalPlanQualStop(epq);
2170                 /* pop old PQ from the stack */
2171                 oldepq = epq->next;
2172                 if (oldepq == NULL)
2173                 {
2174                         /* this is the first (oldest) PQ - mark as free */
2175                         epq->rti = 0;
2176                         estate->es_useEvalPlan = false;
2177                         /* and continue Query execution */
2178                         return NULL;
2179                 }
2180                 Assert(oldepq->rti != 0);
2181                 /* push current PQ to freePQ stack */
2182                 oldepq->free = epq;
2183                 epq = oldepq;
2184                 estate->es_evalPlanQual = epq;
2185                 goto lpqnext;
2186         }
2187
2188         return slot;
2189 }
2190
2191 static void
2192 EndEvalPlanQual(EState *estate)
2193 {
2194         evalPlanQual *epq = estate->es_evalPlanQual;
2195
2196         if (epq->rti == 0)                      /* plans already shutdowned */
2197         {
2198                 Assert(epq->next == NULL);
2199                 return;
2200         }
2201
2202         for (;;)
2203         {
2204                 evalPlanQual *oldepq;
2205
2206                 /* stop execution */
2207                 EvalPlanQualStop(epq);
2208                 /* pop old PQ from the stack */
2209                 oldepq = epq->next;
2210                 if (oldepq == NULL)
2211                 {
2212                         /* this is the first (oldest) PQ - mark as free */
2213                         epq->rti = 0;
2214                         estate->es_useEvalPlan = false;
2215                         break;
2216                 }
2217                 Assert(oldepq->rti != 0);
2218                 /* push current PQ to freePQ stack */
2219                 oldepq->free = epq;
2220                 epq = oldepq;
2221                 estate->es_evalPlanQual = epq;
2222         }
2223 }
2224
2225 /*
2226  * Start execution of one level of PlanQual.
2227  *
2228  * This is a cut-down version of ExecutorStart(): we copy some state from
2229  * the top-level estate rather than initializing it fresh.
2230  */
2231 static void
2232 EvalPlanQualStart(evalPlanQual *epq, EState *estate, evalPlanQual *priorepq)
2233 {
2234         EState     *epqstate;
2235         int                     rtsize;
2236         MemoryContext oldcontext;
2237
2238         rtsize = list_length(estate->es_range_table);
2239
2240         epq->estate = epqstate = CreateExecutorState();
2241
2242         oldcontext = MemoryContextSwitchTo(epqstate->es_query_cxt);
2243
2244         /*
2245          * The epqstates share the top query's copy of unchanging state such as
2246          * the snapshot, rangetable, result-rel info, and external Param info.
2247          * They need their own copies of local state, including a tuple table,
2248          * es_param_exec_vals, etc.
2249          */
2250         epqstate->es_direction = ForwardScanDirection;
2251         epqstate->es_snapshot = estate->es_snapshot;
2252         epqstate->es_crosscheck_snapshot = estate->es_crosscheck_snapshot;
2253         epqstate->es_range_table = estate->es_range_table;
2254         epqstate->es_result_relations = estate->es_result_relations;
2255         epqstate->es_num_result_relations = estate->es_num_result_relations;
2256         epqstate->es_result_relation_info = estate->es_result_relation_info;
2257         epqstate->es_junkFilter = estate->es_junkFilter;
2258         epqstate->es_into_relation_descriptor = estate->es_into_relation_descriptor;
2259         epqstate->es_into_relation_use_wal = estate->es_into_relation_use_wal;
2260         epqstate->es_param_list_info = estate->es_param_list_info;
2261         if (estate->es_topPlan->nParamExec > 0)
2262                 epqstate->es_param_exec_vals = (ParamExecData *)
2263                         palloc0(estate->es_topPlan->nParamExec * sizeof(ParamExecData));
2264         epqstate->es_rowMarks = estate->es_rowMarks;
2265         epqstate->es_instrument = estate->es_instrument;
2266         epqstate->es_select_into = estate->es_select_into;
2267         epqstate->es_into_oids = estate->es_into_oids;
2268         epqstate->es_topPlan = estate->es_topPlan;
2269
2270         /*
2271          * Each epqstate must have its own es_evTupleNull state, but all the stack
2272          * entries share es_evTuple state.      This allows sub-rechecks to inherit
2273          * the value being examined by an outer recheck.
2274          */
2275         epqstate->es_evTupleNull = (bool *) palloc0(rtsize * sizeof(bool));
2276         if (priorepq == NULL)
2277                 /* first PQ stack entry */
2278                 epqstate->es_evTuple = (HeapTuple *)
2279                         palloc0(rtsize * sizeof(HeapTuple));
2280         else
2281                 /* later stack entries share the same storage */
2282                 epqstate->es_evTuple = priorepq->estate->es_evTuple;
2283
2284         epqstate->es_tupleTable =
2285                 ExecCreateTupleTable(estate->es_tupleTable->size);
2286
2287         epq->planstate = ExecInitNode(estate->es_topPlan, epqstate, 0);
2288
2289         MemoryContextSwitchTo(oldcontext);
2290 }
2291
2292 /*
2293  * End execution of one level of PlanQual.
2294  *
2295  * This is a cut-down version of ExecutorEnd(); basically we want to do most
2296  * of the normal cleanup, but *not* close result relations (which we are
2297  * just sharing from the outer query).
2298  */
2299 static void
2300 EvalPlanQualStop(evalPlanQual *epq)
2301 {
2302         EState     *epqstate = epq->estate;
2303         MemoryContext oldcontext;
2304
2305         oldcontext = MemoryContextSwitchTo(epqstate->es_query_cxt);
2306
2307         ExecEndNode(epq->planstate);
2308
2309         ExecDropTupleTable(epqstate->es_tupleTable, true);
2310         epqstate->es_tupleTable = NULL;
2311
2312         if (epqstate->es_evTuple[epq->rti - 1] != NULL)
2313         {
2314                 heap_freetuple(epqstate->es_evTuple[epq->rti - 1]);
2315                 epqstate->es_evTuple[epq->rti - 1] = NULL;
2316         }
2317
2318         MemoryContextSwitchTo(oldcontext);
2319
2320         FreeExecutorState(epqstate);
2321
2322         epq->estate = NULL;
2323         epq->planstate = NULL;
2324 }