granicus.if.org Git - postgresql/blob - src/backend/executor/execMain.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * execMain.c
   4  *        top level executor interface routines
   5  *
   6  * INTERFACE ROUTINES
   7  *      ExecutorStart()
   8  *      ExecutorRun()
   9  *      ExecutorFinish()
  10  *      ExecutorEnd()
  11  *
  12  *      These four procedures are the external interface to the executor.
  13  *      In each case, the query descriptor is required as an argument.
  14  *
  15  *      ExecutorStart must be called at the beginning of execution of any
  16  *      query plan and ExecutorEnd must always be called at the end of
  17  *      execution of a plan (unless it is aborted due to error).
  18  *
  19  *      ExecutorRun accepts direction and count arguments that specify whether
  20  *      the plan is to be executed forwards, backwards, and for how many tuples.
  21  *      In some cases ExecutorRun may be called multiple times to process all
  22  *      the tuples for a plan.  It is also acceptable to stop short of executing
  23  *      the whole plan (but only if it is a SELECT).
  24  *
  25  *      ExecutorFinish must be called after the final ExecutorRun call and
  26  *      before ExecutorEnd.  This can be omitted only in case of EXPLAIN,
  27  *      which should also omit ExecutorRun.
  28  *
  29  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
  30  * Portions Copyright (c) 1994, Regents of the University of California
  31  *
  32  *
  33  * IDENTIFICATION
  34  *        src/backend/executor/execMain.c
  35  *
  36  *-------------------------------------------------------------------------
  37  */
  38 #include "postgres.h"
  39
  40 #include "access/htup_details.h"
  41 #include "access/sysattr.h"
  42 #include "access/transam.h"
  43 #include "access/xact.h"
  44 #include "catalog/namespace.h"
  45 #include "catalog/partition.h"
  46 #include "catalog/pg_inherits_fn.h"
  47 #include "catalog/pg_publication.h"
  48 #include "commands/matview.h"
  49 #include "commands/trigger.h"
  50 #include "executor/execdebug.h"
  51 #include "foreign/fdwapi.h"
  52 #include "mb/pg_wchar.h"
  53 #include "miscadmin.h"
  54 #include "optimizer/clauses.h"
  55 #include "parser/parsetree.h"
  56 #include "rewrite/rewriteManip.h"
  57 #include "storage/bufmgr.h"
  58 #include "storage/lmgr.h"
  59 #include "tcop/utility.h"
  60 #include "utils/acl.h"
  61 #include "utils/lsyscache.h"
  62 #include "utils/memutils.h"
  63 #include "utils/rls.h"
  64 #include "utils/ruleutils.h"
  65 #include "utils/snapmgr.h"
  66 #include "utils/tqual.h"
  67
  68
  69 /* Hooks for plugins to get control in ExecutorStart/Run/Finish/End */
  70 ExecutorStart_hook_type ExecutorStart_hook = NULL;
  71 ExecutorRun_hook_type ExecutorRun_hook = NULL;
  72 ExecutorFinish_hook_type ExecutorFinish_hook = NULL;
  73 ExecutorEnd_hook_type ExecutorEnd_hook = NULL;
  74
  75 /* Hook for plugin to get control in ExecCheckRTPerms() */
  76 ExecutorCheckPerms_hook_type ExecutorCheckPerms_hook = NULL;
  77
  78 /* decls for local routines only used within this module */
  79 static void InitPlan(QueryDesc *queryDesc, int eflags);
  80 static void CheckValidRowMarkRel(Relation rel, RowMarkType markType);
  81 static void ExecPostprocessPlan(EState *estate);
  82 static void ExecEndPlan(PlanState *planstate, EState *estate);
  83 static void ExecutePlan(EState *estate, PlanState *planstate,
  84                         bool use_parallel_mode,
  85                         CmdType operation,
  86                         bool sendTuples,
  87                         uint64 numberTuples,
  88                         ScanDirection direction,
  89                         DestReceiver *dest,
  90                         bool execute_once);
  91 static bool ExecCheckRTEPerms(RangeTblEntry *rte);
  92 static bool ExecCheckRTEPermsModified(Oid relOid, Oid userid,
  93                                                   Bitmapset *modifiedCols,
  94                                                   AclMode requiredPerms);
  95 static void ExecCheckXactReadOnly(PlannedStmt *plannedstmt);
  96 static char *ExecBuildSlotValueDescription(Oid reloid,
  97                                                           TupleTableSlot *slot,
  98                                                           TupleDesc tupdesc,
  99                                                           Bitmapset *modifiedCols,
 100                                                           int maxfieldlen);
 101 static char *ExecBuildSlotPartitionKeyDescription(Relation rel,
 102                                                                          Datum *values,
 103                                                                          bool *isnull,
 104                                                                          int maxfieldlen);
 105 static void EvalPlanQualStart(EPQState *epqstate, EState *parentestate,
 106                                   Plan *planTree);
 107 static void ExecPartitionCheck(ResultRelInfo *resultRelInfo,
 108                                    TupleTableSlot *slot, EState *estate);
 109
 110 /*
 111  * Note that GetUpdatedColumns() also exists in commands/trigger.c.  There does
 112  * not appear to be any good header to put it into, given the structures that
 113  * it uses, so we let them be duplicated.  Be sure to update both if one needs
 114  * to be changed, however.
 115  */
 116 #define GetInsertedColumns(relinfo, estate) \
 117         (rt_fetch((relinfo)->ri_RangeTableIndex, (estate)->es_range_table)->insertedCols)
 118 #define GetUpdatedColumns(relinfo, estate) \
 119         (rt_fetch((relinfo)->ri_RangeTableIndex, (estate)->es_range_table)->updatedCols)
 120
 121 /* end of local decls */
 122
 123
 124 /* ----------------------------------------------------------------
 125  *              ExecutorStart
 126  *
 127  *              This routine must be called at the beginning of any execution of any
 128  *              query plan
 129  *
 130  * Takes a QueryDesc previously created by CreateQueryDesc (which is separate
 131  * only because some places use QueryDescs for utility commands).  The tupDesc
 132  * field of the QueryDesc is filled in to describe the tuples that will be
 133  * returned, and the internal fields (estate and planstate) are set up.
 134  *
 135  * eflags contains flag bits as described in executor.h.
 136  *
 137  * NB: the CurrentMemoryContext when this is called will become the parent
 138  * of the per-query context used for this Executor invocation.
 139  *
 140  * We provide a function hook variable that lets loadable plugins
 141  * get control when ExecutorStart is called.  Such a plugin would
 142  * normally call standard_ExecutorStart().
 143  *
 144  * ----------------------------------------------------------------
 145  */
 146 void
 147 ExecutorStart(QueryDesc *queryDesc, int eflags)
 148 {
 149         if (ExecutorStart_hook)
 150                 (*ExecutorStart_hook) (queryDesc, eflags);
 151         else
 152                 standard_ExecutorStart(queryDesc, eflags);
 153 }
 154
 155 void
 156 standard_ExecutorStart(QueryDesc *queryDesc, int eflags)
 157 {
 158         EState     *estate;
 159         MemoryContext oldcontext;
 160
 161         /* sanity checks: queryDesc must not be started already */
 162         Assert(queryDesc != NULL);
 163         Assert(queryDesc->estate == NULL);
 164
 165         /*
 166          * If the transaction is read-only, we need to check if any writes are
 167          * planned to non-temporary tables.  EXPLAIN is considered read-only.
 168          *
 169          * Don't allow writes in parallel mode.  Supporting UPDATE and DELETE
 170          * would require (a) storing the combocid hash in shared memory, rather
 171          * than synchronizing it just once at the start of parallelism, and (b) an
 172          * alternative to heap_update()'s reliance on xmax for mutual exclusion.
 173          * INSERT may have no such troubles, but we forbid it to simplify the
 174          * checks.
 175          *
 176          * We have lower-level defenses in CommandCounterIncrement and elsewhere
 177          * against performing unsafe operations in parallel mode, but this gives a
 178          * more user-friendly error message.
 179          */
 180         if ((XactReadOnly || IsInParallelMode()) &&
 181                 !(eflags & EXEC_FLAG_EXPLAIN_ONLY))
 182                 ExecCheckXactReadOnly(queryDesc->plannedstmt);
 183
 184         /*
 185          * Build EState, switch into per-query memory context for startup.
 186          */
 187         estate = CreateExecutorState();
 188         queryDesc->estate = estate;
 189
 190         oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
 191
 192         /*
 193          * Fill in external parameters, if any, from queryDesc; and allocate
 194          * workspace for internal parameters
 195          */
 196         estate->es_param_list_info = queryDesc->params;
 197
 198         if (queryDesc->plannedstmt->nParamExec > 0)
 199                 estate->es_param_exec_vals = (ParamExecData *)
 200                         palloc0(queryDesc->plannedstmt->nParamExec * sizeof(ParamExecData));
 201
 202         estate->es_sourceText = queryDesc->sourceText;
 203
 204         /*
 205          * Fill in the query environment, if any, from queryDesc.
 206          */
 207         estate->es_queryEnv = queryDesc->queryEnv;
 208
 209         /*
 210          * If non-read-only query, set the command ID to mark output tuples with
 211          */
 212         switch (queryDesc->operation)
 213         {
 214                 case CMD_SELECT:
 215
 216                         /*
 217                          * SELECT FOR [KEY] UPDATE/SHARE and modifying CTEs need to mark
 218                          * tuples
 219                          */
 220                         if (queryDesc->plannedstmt->rowMarks != NIL ||
 221                                 queryDesc->plannedstmt->hasModifyingCTE)
 222                                 estate->es_output_cid = GetCurrentCommandId(true);
 223
 224                         /*
 225                          * A SELECT without modifying CTEs can't possibly queue triggers,
 226                          * so force skip-triggers mode. This is just a marginal efficiency
 227                          * hack, since AfterTriggerBeginQuery/AfterTriggerEndQuery aren't
 228                          * all that expensive, but we might as well do it.
 229                          */
 230                         if (!queryDesc->plannedstmt->hasModifyingCTE)
 231                                 eflags |= EXEC_FLAG_SKIP_TRIGGERS;
 232                         break;
 233
 234                 case CMD_INSERT:
 235                 case CMD_DELETE:
 236                 case CMD_UPDATE:
 237                         estate->es_output_cid = GetCurrentCommandId(true);
 238                         break;
 239
 240                 default:
 241                         elog(ERROR, "unrecognized operation code: %d",
 242                                  (int) queryDesc->operation);
 243                         break;
 244         }
 245
 246         /*
 247          * Copy other important information into the EState
 248          */
 249         estate->es_snapshot = RegisterSnapshot(queryDesc->snapshot);
 250         estate->es_crosscheck_snapshot = RegisterSnapshot(queryDesc->crosscheck_snapshot);
 251         estate->es_top_eflags = eflags;
 252         estate->es_instrument = queryDesc->instrument_options;
 253
 254         /*
 255          * Set up an AFTER-trigger statement context, unless told not to, or
 256          * unless it's EXPLAIN-only mode (when ExecutorFinish won't be called).
 257          */
 258         if (!(eflags & (EXEC_FLAG_SKIP_TRIGGERS | EXEC_FLAG_EXPLAIN_ONLY)))
 259                 AfterTriggerBeginQuery();
 260
 261         /*
 262          * Initialize the plan state tree
 263          */
 264         InitPlan(queryDesc, eflags);
 265
 266         MemoryContextSwitchTo(oldcontext);
 267 }
 268
 269 /* ----------------------------------------------------------------
 270  *              ExecutorRun
 271  *
 272  *              This is the main routine of the executor module. It accepts
 273  *              the query descriptor from the traffic cop and executes the
 274  *              query plan.
 275  *
 276  *              ExecutorStart must have been called already.
 277  *
 278  *              If direction is NoMovementScanDirection then nothing is done
 279  *              except to start up/shut down the destination.  Otherwise,
 280  *              we retrieve up to 'count' tuples in the specified direction.
 281  *
 282  *              Note: count = 0 is interpreted as no portal limit, i.e., run to
 283  *              completion.  Also note that the count limit is only applied to
 284  *              retrieved tuples, not for instance to those inserted/updated/deleted
 285  *              by a ModifyTable plan node.
 286  *
 287  *              There is no return value, but output tuples (if any) are sent to
 288  *              the destination receiver specified in the QueryDesc; and the number
 289  *              of tuples processed at the top level can be found in
 290  *              estate->es_processed.
 291  *
 292  *              We provide a function hook variable that lets loadable plugins
 293  *              get control when ExecutorRun is called.  Such a plugin would
 294  *              normally call standard_ExecutorRun().
 295  *
 296  * ----------------------------------------------------------------
 297  */
 298 void
 299 ExecutorRun(QueryDesc *queryDesc,
 300                         ScanDirection direction, uint64 count,
 301                         bool execute_once)
 302 {
 303         if (ExecutorRun_hook)
 304                 (*ExecutorRun_hook) (queryDesc, direction, count, execute_once);
 305         else
 306                 standard_ExecutorRun(queryDesc, direction, count, execute_once);
 307 }
 308
 309 void
 310 standard_ExecutorRun(QueryDesc *queryDesc,
 311                                          ScanDirection direction, uint64 count, bool execute_once)
 312 {
 313         EState     *estate;
 314         CmdType         operation;
 315         DestReceiver *dest;
 316         bool            sendTuples;
 317         MemoryContext oldcontext;
 318
 319         /* sanity checks */
 320         Assert(queryDesc != NULL);
 321
 322         estate = queryDesc->estate;
 323
 324         Assert(estate != NULL);
 325         Assert(!(estate->es_top_eflags & EXEC_FLAG_EXPLAIN_ONLY));
 326
 327         /*
 328          * Switch into per-query memory context
 329          */
 330         oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
 331
 332         /* Allow instrumentation of Executor overall runtime */
 333         if (queryDesc->totaltime)
 334                 InstrStartNode(queryDesc->totaltime);
 335
 336         /*
 337          * extract information from the query descriptor and the query feature.
 338          */
 339         operation = queryDesc->operation;
 340         dest = queryDesc->dest;
 341
 342         /*
 343          * startup tuple receiver, if we will be emitting tuples
 344          */
 345         estate->es_processed = 0;
 346         estate->es_lastoid = InvalidOid;
 347
 348         sendTuples = (operation == CMD_SELECT ||
 349                                   queryDesc->plannedstmt->hasReturning);
 350
 351         if (sendTuples)
 352                 dest->rStartup(dest, operation, queryDesc->tupDesc);
 353
 354         /*
 355          * run plan
 356          */
 357         if (!ScanDirectionIsNoMovement(direction))
 358         {
 359                 if (execute_once && queryDesc->already_executed)
 360                         elog(ERROR, "can't re-execute query flagged for single execution");
 361                 queryDesc->already_executed = true;
 362
 363                 ExecutePlan(estate,
 364                                         queryDesc->planstate,
 365                                         queryDesc->plannedstmt->parallelModeNeeded,
 366                                         operation,
 367                                         sendTuples,
 368                                         count,
 369                                         direction,
 370                                         dest,
 371                                         execute_once);
 372         }
 373
 374         /*
 375          * shutdown tuple receiver, if we started it
 376          */
 377         if (sendTuples)
 378                 dest->rShutdown(dest);
 379
 380         if (queryDesc->totaltime)
 381                 InstrStopNode(queryDesc->totaltime, estate->es_processed);
 382
 383         MemoryContextSwitchTo(oldcontext);
 384 }
 385
 386 /* ----------------------------------------------------------------
 387  *              ExecutorFinish
 388  *
 389  *              This routine must be called after the last ExecutorRun call.
 390  *              It performs cleanup such as firing AFTER triggers.  It is
 391  *              separate from ExecutorEnd because EXPLAIN ANALYZE needs to
 392  *              include these actions in the total runtime.
 393  *
 394  *              We provide a function hook variable that lets loadable plugins
 395  *              get control when ExecutorFinish is called.  Such a plugin would
 396  *              normally call standard_ExecutorFinish().
 397  *
 398  * ----------------------------------------------------------------
 399  */
 400 void
 401 ExecutorFinish(QueryDesc *queryDesc)
 402 {
 403         if (ExecutorFinish_hook)
 404                 (*ExecutorFinish_hook) (queryDesc);
 405         else
 406                 standard_ExecutorFinish(queryDesc);
 407 }
 408
 409 void
 410 standard_ExecutorFinish(QueryDesc *queryDesc)
 411 {
 412         EState     *estate;
 413         MemoryContext oldcontext;
 414
 415         /* sanity checks */
 416         Assert(queryDesc != NULL);
 417
 418         estate = queryDesc->estate;
 419
 420         Assert(estate != NULL);
 421         Assert(!(estate->es_top_eflags & EXEC_FLAG_EXPLAIN_ONLY));
 422
 423         /* This should be run once and only once per Executor instance */
 424         Assert(!estate->es_finished);
 425
 426         /* Switch into per-query memory context */
 427         oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
 428
 429         /* Allow instrumentation of Executor overall runtime */
 430         if (queryDesc->totaltime)
 431                 InstrStartNode(queryDesc->totaltime);
 432
 433         /* Run ModifyTable nodes to completion */
 434         ExecPostprocessPlan(estate);
 435
 436         /* Execute queued AFTER triggers, unless told not to */
 437         if (!(estate->es_top_eflags & EXEC_FLAG_SKIP_TRIGGERS))
 438                 AfterTriggerEndQuery(estate);
 439
 440         if (queryDesc->totaltime)
 441                 InstrStopNode(queryDesc->totaltime, 0);
 442
 443         MemoryContextSwitchTo(oldcontext);
 444
 445         estate->es_finished = true;
 446 }
 447
 448 /* ----------------------------------------------------------------
 449  *              ExecutorEnd
 450  *
 451  *              This routine must be called at the end of execution of any
 452  *              query plan
 453  *
 454  *              We provide a function hook variable that lets loadable plugins
 455  *              get control when ExecutorEnd is called.  Such a plugin would
 456  *              normally call standard_ExecutorEnd().
 457  *
 458  * ----------------------------------------------------------------
 459  */
 460 void
 461 ExecutorEnd(QueryDesc *queryDesc)
 462 {
 463         if (ExecutorEnd_hook)
 464                 (*ExecutorEnd_hook) (queryDesc);
 465         else
 466                 standard_ExecutorEnd(queryDesc);
 467 }
 468
 469 void
 470 standard_ExecutorEnd(QueryDesc *queryDesc)
 471 {
 472         EState     *estate;
 473         MemoryContext oldcontext;
 474
 475         /* sanity checks */
 476         Assert(queryDesc != NULL);
 477
 478         estate = queryDesc->estate;
 479
 480         Assert(estate != NULL);
 481
 482         /*
 483          * Check that ExecutorFinish was called, unless in EXPLAIN-only mode. This
 484          * Assert is needed because ExecutorFinish is new as of 9.1, and callers
 485          * might forget to call it.
 486          */
 487         Assert(estate->es_finished ||
 488                    (estate->es_top_eflags & EXEC_FLAG_EXPLAIN_ONLY));
 489
 490         /*
 491          * Switch into per-query memory context to run ExecEndPlan
 492          */
 493         oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
 494
 495         ExecEndPlan(queryDesc->planstate, estate);
 496
 497         /* do away with our snapshots */
 498         UnregisterSnapshot(estate->es_snapshot);
 499         UnregisterSnapshot(estate->es_crosscheck_snapshot);
 500
 501         /*
 502          * Must switch out of context before destroying it
 503          */
 504         MemoryContextSwitchTo(oldcontext);
 505
 506         /*
 507          * Release EState and per-query memory context.  This should release
 508          * everything the executor has allocated.
 509          */
 510         FreeExecutorState(estate);
 511
 512         /* Reset queryDesc fields that no longer point to anything */
 513         queryDesc->tupDesc = NULL;
 514         queryDesc->estate = NULL;
 515         queryDesc->planstate = NULL;
 516         queryDesc->totaltime = NULL;
 517 }
 518
 519 /* ----------------------------------------------------------------
 520  *              ExecutorRewind
 521  *
 522  *              This routine may be called on an open queryDesc to rewind it
 523  *              to the start.
 524  * ----------------------------------------------------------------
 525  */
 526 void
 527 ExecutorRewind(QueryDesc *queryDesc)
 528 {
 529         EState     *estate;
 530         MemoryContext oldcontext;
 531
 532         /* sanity checks */
 533         Assert(queryDesc != NULL);
 534
 535         estate = queryDesc->estate;
 536
 537         Assert(estate != NULL);
 538
 539         /* It's probably not sensible to rescan updating queries */
 540         Assert(queryDesc->operation == CMD_SELECT);
 541
 542         /*
 543          * Switch into per-query memory context
 544          */
 545         oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
 546
 547         /*
 548          * rescan plan
 549          */
 550         ExecReScan(queryDesc->planstate);
 551
 552         MemoryContextSwitchTo(oldcontext);
 553 }
 554
 555
 556 /*
 557  * ExecCheckRTPerms
 558  *              Check access permissions for all relations listed in a range table.
 559  *
 560  * Returns true if permissions are adequate.  Otherwise, throws an appropriate
 561  * error if ereport_on_violation is true, or simply returns false otherwise.
 562  *
 563  * Note that this does NOT address row level security policies (aka: RLS).  If
 564  * rows will be returned to the user as a result of this permission check
 565  * passing, then RLS also needs to be consulted (and check_enable_rls()).
 566  *
 567  * See rewrite/rowsecurity.c.
 568  */
 569 bool
 570 ExecCheckRTPerms(List *rangeTable, bool ereport_on_violation)
 571 {
 572         ListCell   *l;
 573         bool            result = true;
 574
 575         foreach(l, rangeTable)
 576         {
 577                 RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
 578
 579                 result = ExecCheckRTEPerms(rte);
 580                 if (!result)
 581                 {
 582                         Assert(rte->rtekind == RTE_RELATION);
 583                         if (ereport_on_violation)
 584                                 aclcheck_error(ACLCHECK_NO_PRIV, ACL_KIND_CLASS,
 585                                                            get_rel_name(rte->relid));
 586                         return false;
 587                 }
 588         }
 589
 590         if (ExecutorCheckPerms_hook)
 591                 result = (*ExecutorCheckPerms_hook) (rangeTable,
 592                                                                                          ereport_on_violation);
 593         return result;
 594 }
 595
 596 /*
 597  * ExecCheckRTEPerms
 598  *              Check access permissions for a single RTE.
 599  */
 600 static bool
 601 ExecCheckRTEPerms(RangeTblEntry *rte)
 602 {
 603         AclMode         requiredPerms;
 604         AclMode         relPerms;
 605         AclMode         remainingPerms;
 606         Oid                     relOid;
 607         Oid                     userid;
 608
 609         /*
 610          * Only plain-relation RTEs need to be checked here.  Function RTEs are
 611          * checked when the function is prepared for execution.  Join, subquery,
 612          * and special RTEs need no checks.
 613          */
 614         if (rte->rtekind != RTE_RELATION)
 615                 return true;
 616
 617         /*
 618          * No work if requiredPerms is empty.
 619          */
 620         requiredPerms = rte->requiredPerms;
 621         if (requiredPerms == 0)
 622                 return true;
 623
 624         relOid = rte->relid;
 625
 626         /*
 627          * userid to check as: current user unless we have a setuid indication.
 628          *
 629          * Note: GetUserId() is presently fast enough that there's no harm in
 630          * calling it separately for each RTE.  If that stops being true, we could
 631          * call it once in ExecCheckRTPerms and pass the userid down from there.
 632          * But for now, no need for the extra clutter.
 633          */
 634         userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
 635
 636         /*
 637          * We must have *all* the requiredPerms bits, but some of the bits can be
 638          * satisfied from column-level rather than relation-level permissions.
 639          * First, remove any bits that are satisfied by relation permissions.
 640          */
 641         relPerms = pg_class_aclmask(relOid, userid, requiredPerms, ACLMASK_ALL);
 642         remainingPerms = requiredPerms & ~relPerms;
 643         if (remainingPerms != 0)
 644         {
 645                 int                     col = -1;
 646
 647                 /*
 648                  * If we lack any permissions that exist only as relation permissions,
 649                  * we can fail straight away.
 650                  */
 651                 if (remainingPerms & ~(ACL_SELECT | ACL_INSERT | ACL_UPDATE))
 652                         return false;
 653
 654                 /*
 655                  * Check to see if we have the needed privileges at column level.
 656                  *
 657                  * Note: failures just report a table-level error; it would be nicer
 658                  * to report a column-level error if we have some but not all of the
 659                  * column privileges.
 660                  */
 661                 if (remainingPerms & ACL_SELECT)
 662                 {
 663                         /*
 664                          * When the query doesn't explicitly reference any columns (for
 665                          * example, SELECT COUNT(*) FROM table), allow the query if we
 666                          * have SELECT on any column of the rel, as per SQL spec.
 667                          */
 668                         if (bms_is_empty(rte->selectedCols))
 669                         {
 670                                 if (pg_attribute_aclcheck_all(relOid, userid, ACL_SELECT,
 671                                                                                           ACLMASK_ANY) != ACLCHECK_OK)
 672                                         return false;
 673                         }
 674
 675                         while ((col = bms_next_member(rte->selectedCols, col)) >= 0)
 676                         {
 677                                 /* bit #s are offset by FirstLowInvalidHeapAttributeNumber */
 678                                 AttrNumber      attno = col + FirstLowInvalidHeapAttributeNumber;
 679
 680                                 if (attno == InvalidAttrNumber)
 681                                 {
 682                                         /* Whole-row reference, must have priv on all cols */
 683                                         if (pg_attribute_aclcheck_all(relOid, userid, ACL_SELECT,
 684                                                                                                   ACLMASK_ALL) != ACLCHECK_OK)
 685                                                 return false;
 686                                 }
 687                                 else
 688                                 {
 689                                         if (pg_attribute_aclcheck(relOid, attno, userid,
 690                                                                                           ACL_SELECT) != ACLCHECK_OK)
 691                                                 return false;
 692                                 }
 693                         }
 694                 }
 695
 696                 /*
 697                  * Basically the same for the mod columns, for both INSERT and UPDATE
 698                  * privilege as specified by remainingPerms.
 699                  */
 700                 if (remainingPerms & ACL_INSERT && !ExecCheckRTEPermsModified(relOid,
 701                                                                                                                                           userid,
 702                                                                                                                                           rte->insertedCols,
 703                                                                                                                                           ACL_INSERT))
 704                         return false;
 705
 706                 if (remainingPerms & ACL_UPDATE && !ExecCheckRTEPermsModified(relOid,
 707                                                                                                                                           userid,
 708                                                                                                                                           rte->updatedCols,
 709                                                                                                                                           ACL_UPDATE))
 710                         return false;
 711         }
 712         return true;
 713 }
 714
 715 /*
 716  * ExecCheckRTEPermsModified
 717  *              Check INSERT or UPDATE access permissions for a single RTE (these
 718  *              are processed uniformly).
 719  */
 720 static bool
 721 ExecCheckRTEPermsModified(Oid relOid, Oid userid, Bitmapset *modifiedCols,
 722                                                   AclMode requiredPerms)
 723 {
 724         int                     col = -1;
 725
 726         /*
 727          * When the query doesn't explicitly update any columns, allow the query
 728          * if we have permission on any column of the rel.  This is to handle
 729          * SELECT FOR UPDATE as well as possible corner cases in UPDATE.
 730          */
 731         if (bms_is_empty(modifiedCols))
 732         {
 733                 if (pg_attribute_aclcheck_all(relOid, userid, requiredPerms,
 734                                                                           ACLMASK_ANY) != ACLCHECK_OK)
 735                         return false;
 736         }
 737
 738         while ((col = bms_next_member(modifiedCols, col)) >= 0)
 739         {
 740                 /* bit #s are offset by FirstLowInvalidHeapAttributeNumber */
 741                 AttrNumber      attno = col + FirstLowInvalidHeapAttributeNumber;
 742
 743                 if (attno == InvalidAttrNumber)
 744                 {
 745                         /* whole-row reference can't happen here */
 746                         elog(ERROR, "whole-row update is not implemented");
 747                 }
 748                 else
 749                 {
 750                         if (pg_attribute_aclcheck(relOid, attno, userid,
 751                                                                           requiredPerms) != ACLCHECK_OK)
 752                                 return false;
 753                 }
 754         }
 755         return true;
 756 }
 757
 758 /*
 759  * Check that the query does not imply any writes to non-temp tables;
 760  * unless we're in parallel mode, in which case don't even allow writes
 761  * to temp tables.
 762  *
 763  * Note: in a Hot Standby this would need to reject writes to temp
 764  * tables just as we do in parallel mode; but an HS standby can't have created
 765  * any temp tables in the first place, so no need to check that.
 766  */
 767 static void
 768 ExecCheckXactReadOnly(PlannedStmt *plannedstmt)
 769 {
 770         ListCell   *l;
 771
 772         /*
 773          * Fail if write permissions are requested in parallel mode for table
 774          * (temp or non-temp), otherwise fail for any non-temp table.
 775          */
 776         foreach(l, plannedstmt->rtable)
 777         {
 778                 RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
 779
 780                 if (rte->rtekind != RTE_RELATION)
 781                         continue;
 782
 783                 if ((rte->requiredPerms & (~ACL_SELECT)) == 0)
 784                         continue;
 785
 786                 if (isTempNamespace(get_rel_namespace(rte->relid)))
 787                         continue;
 788
 789                 PreventCommandIfReadOnly(CreateCommandTag((Node *) plannedstmt));
 790         }
 791
 792         if (plannedstmt->commandType != CMD_SELECT || plannedstmt->hasModifyingCTE)
 793                 PreventCommandIfParallelMode(CreateCommandTag((Node *) plannedstmt));
 794 }
 795
 796
 797 /* ----------------------------------------------------------------
 798  *              InitPlan
 799  *
 800  *              Initializes the query plan: open files, allocate storage
 801  *              and start up the rule manager
 802  * ----------------------------------------------------------------
 803  */
 804 static void
 805 InitPlan(QueryDesc *queryDesc, int eflags)
 806 {
 807         CmdType         operation = queryDesc->operation;
 808         PlannedStmt *plannedstmt = queryDesc->plannedstmt;
 809         Plan       *plan = plannedstmt->planTree;
 810         List       *rangeTable = plannedstmt->rtable;
 811         EState     *estate = queryDesc->estate;
 812         PlanState  *planstate;
 813         TupleDesc       tupType;
 814         ListCell   *l;
 815         int                     i;
 816
 817         /*
 818          * Do permissions checks
 819          */
 820         ExecCheckRTPerms(rangeTable, true);
 821
 822         /*
 823          * initialize the node's execution state
 824          */
 825         estate->es_range_table = rangeTable;
 826         estate->es_plannedstmt = plannedstmt;
 827
 828         /*
 829          * initialize result relation stuff, and open/lock the result rels.
 830          *
 831          * We must do this before initializing the plan tree, else we might try to
 832          * do a lock upgrade if a result rel is also a source rel.
 833          */
 834         if (plannedstmt->resultRelations)
 835         {
 836                 List       *resultRelations = plannedstmt->resultRelations;
 837                 int                     numResultRelations = list_length(resultRelations);
 838                 ResultRelInfo *resultRelInfos;
 839                 ResultRelInfo *resultRelInfo;
 840
 841                 resultRelInfos = (ResultRelInfo *)
 842                         palloc(numResultRelations * sizeof(ResultRelInfo));
 843                 resultRelInfo = resultRelInfos;
 844                 foreach(l, resultRelations)
 845                 {
 846                         Index           resultRelationIndex = lfirst_int(l);
 847                         Oid                     resultRelationOid;
 848                         Relation        resultRelation;
 849
 850                         resultRelationOid = getrelid(resultRelationIndex, rangeTable);
 851                         resultRelation = heap_open(resultRelationOid, RowExclusiveLock);
 852
 853                         InitResultRelInfo(resultRelInfo,
 854                                                           resultRelation,
 855                                                           resultRelationIndex,
 856                                                           NULL,
 857                                                           estate->es_instrument);
 858                         resultRelInfo++;
 859                 }
 860                 estate->es_result_relations = resultRelInfos;
 861                 estate->es_num_result_relations = numResultRelations;
 862                 /* es_result_relation_info is NULL except when within ModifyTable */
 863                 estate->es_result_relation_info = NULL;
 864
 865                 /*
 866                  * In the partitioned result relation case, lock the non-leaf result
 867                  * relations too.  A subset of these are the roots of respective
 868                  * partitioned tables, for which we also allocate ResulRelInfos.
 869                  */
 870                 estate->es_root_result_relations = NULL;
 871                 estate->es_num_root_result_relations = 0;
 872                 if (plannedstmt->nonleafResultRelations)
 873                 {
 874                         int                     num_roots = list_length(plannedstmt->rootResultRelations);
 875
 876                         /*
 877                          * Firstly, build ResultRelInfos for all the partitioned table
 878                          * roots, because we will need them to fire the statement-level
 879                          * triggers, if any.
 880                          */
 881                         resultRelInfos = (ResultRelInfo *)
 882                                 palloc(num_roots * sizeof(ResultRelInfo));
 883                         resultRelInfo = resultRelInfos;
 884                         foreach(l, plannedstmt->rootResultRelations)
 885                         {
 886                                 Index           resultRelIndex = lfirst_int(l);
 887                                 Oid                     resultRelOid;
 888                                 Relation        resultRelDesc;
 889
 890                                 resultRelOid = getrelid(resultRelIndex, rangeTable);
 891                                 resultRelDesc = heap_open(resultRelOid, RowExclusiveLock);
 892                                 InitResultRelInfo(resultRelInfo,
 893                                                                   resultRelDesc,
 894                                                                   lfirst_int(l),
 895                                                                   NULL,
 896                                                                   estate->es_instrument);
 897                                 resultRelInfo++;
 898                         }
 899
 900                         estate->es_root_result_relations = resultRelInfos;
 901                         estate->es_num_root_result_relations = num_roots;
 902
 903                         /* Simply lock the rest of them. */
 904                         foreach(l, plannedstmt->nonleafResultRelations)
 905                         {
 906                                 Index           resultRelIndex = lfirst_int(l);
 907
 908                                 /* We locked the roots above. */
 909                                 if (!list_member_int(plannedstmt->rootResultRelations,
 910                                                                          resultRelIndex))
 911                                         LockRelationOid(getrelid(resultRelIndex, rangeTable),
 912                                                                         RowExclusiveLock);
 913                         }
 914                 }
 915         }
 916         else
 917         {
 918                 /*
 919                  * if no result relation, then set state appropriately
 920                  */
 921                 estate->es_result_relations = NULL;
 922                 estate->es_num_result_relations = 0;
 923                 estate->es_result_relation_info = NULL;
 924                 estate->es_root_result_relations = NULL;
 925                 estate->es_num_root_result_relations = 0;
 926         }
 927
 928         /*
 929          * Similarly, we have to lock relations selected FOR [KEY] UPDATE/SHARE
 930          * before we initialize the plan tree, else we'd be risking lock upgrades.
 931          * While we are at it, build the ExecRowMark list.  Any partitioned child
 932          * tables are ignored here (because isParent=true) and will be locked by
 933          * the first Append or MergeAppend node that references them.  (Note that
 934          * the RowMarks corresponding to partitioned child tables are present in
 935          * the same list as the rest, i.e., plannedstmt->rowMarks.)
 936          */
 937         estate->es_rowMarks = NIL;
 938         foreach(l, plannedstmt->rowMarks)
 939         {
 940                 PlanRowMark *rc = (PlanRowMark *) lfirst(l);
 941                 Oid                     relid;
 942                 Relation        relation;
 943                 ExecRowMark *erm;
 944
 945                 /* ignore "parent" rowmarks; they are irrelevant at runtime */
 946                 if (rc->isParent)
 947                         continue;
 948
 949                 /* get relation's OID (will produce InvalidOid if subquery) */
 950                 relid = getrelid(rc->rti, rangeTable);
 951
 952                 /*
 953                  * If you change the conditions under which rel locks are acquired
 954                  * here, be sure to adjust ExecOpenScanRelation to match.
 955                  */
 956                 switch (rc->markType)
 957                 {
 958                         case ROW_MARK_EXCLUSIVE:
 959                         case ROW_MARK_NOKEYEXCLUSIVE:
 960                         case ROW_MARK_SHARE:
 961                         case ROW_MARK_KEYSHARE:
 962                                 relation = heap_open(relid, RowShareLock);
 963                                 break;
 964                         case ROW_MARK_REFERENCE:
 965                                 relation = heap_open(relid, AccessShareLock);
 966                                 break;
 967                         case ROW_MARK_COPY:
 968                                 /* no physical table access is required */
 969                                 relation = NULL;
 970                                 break;
 971                         default:
 972                                 elog(ERROR, "unrecognized markType: %d", rc->markType);
 973                                 relation = NULL;        /* keep compiler quiet */
 974                                 break;
 975                 }
 976
 977                 /* Check that relation is a legal target for marking */
 978                 if (relation)
 979                         CheckValidRowMarkRel(relation, rc->markType);
 980
 981                 erm = (ExecRowMark *) palloc(sizeof(ExecRowMark));
 982                 erm->relation = relation;
 983                 erm->relid = relid;
 984                 erm->rti = rc->rti;
 985                 erm->prti = rc->prti;
 986                 erm->rowmarkId = rc->rowmarkId;
 987                 erm->markType = rc->markType;
 988                 erm->strength = rc->strength;
 989                 erm->waitPolicy = rc->waitPolicy;
 990                 erm->ermActive = false;
 991                 ItemPointerSetInvalid(&(erm->curCtid));
 992                 erm->ermExtra = NULL;
 993                 estate->es_rowMarks = lappend(estate->es_rowMarks, erm);
 994         }
 995
 996         /*
 997          * Initialize the executor's tuple table to empty.
 998          */
 999         estate->es_tupleTable = NIL;
1000         estate->es_trig_tuple_slot = NULL;
1001         estate->es_trig_oldtup_slot = NULL;
1002         estate->es_trig_newtup_slot = NULL;
1003
1004         /* mark EvalPlanQual not active */
1005         estate->es_epqTuple = NULL;
1006         estate->es_epqTupleSet = NULL;
1007         estate->es_epqScanDone = NULL;
1008
1009         /*
1010          * Initialize private state information for each SubPlan.  We must do this
1011          * before running ExecInitNode on the main query tree, since
1012          * ExecInitSubPlan expects to be able to find these entries.
1013          */
1014         Assert(estate->es_subplanstates == NIL);
1015         i = 1;                                          /* subplan indices count from 1 */
1016         foreach(l, plannedstmt->subplans)
1017         {
1018                 Plan       *subplan = (Plan *) lfirst(l);
1019                 PlanState  *subplanstate;
1020                 int                     sp_eflags;
1021
1022                 /*
1023                  * A subplan will never need to do BACKWARD scan nor MARK/RESTORE. If
1024                  * it is a parameterless subplan (not initplan), we suggest that it be
1025                  * prepared to handle REWIND efficiently; otherwise there is no need.
1026                  */
1027                 sp_eflags = eflags
1028                         & (EXEC_FLAG_EXPLAIN_ONLY | EXEC_FLAG_WITH_NO_DATA);
1029                 if (bms_is_member(i, plannedstmt->rewindPlanIDs))
1030                         sp_eflags |= EXEC_FLAG_REWIND;
1031
1032                 subplanstate = ExecInitNode(subplan, estate, sp_eflags);
1033
1034                 estate->es_subplanstates = lappend(estate->es_subplanstates,
1035                                                                                    subplanstate);
1036
1037                 i++;
1038         }
1039
1040         /*
1041          * Initialize the private state information for all the nodes in the query
1042          * tree.  This opens files, allocates storage and leaves us ready to start
1043          * processing tuples.
1044          */
1045         planstate = ExecInitNode(plan, estate, eflags);
1046
1047         /*
1048          * Get the tuple descriptor describing the type of tuples to return.
1049          */
1050         tupType = ExecGetResultType(planstate);
1051
1052         /*
1053          * Initialize the junk filter if needed.  SELECT queries need a filter if
1054          * there are any junk attrs in the top-level tlist.
1055          */
1056         if (operation == CMD_SELECT)
1057         {
1058                 bool            junk_filter_needed = false;
1059                 ListCell   *tlist;
1060
1061                 foreach(tlist, plan->targetlist)
1062                 {
1063                         TargetEntry *tle = (TargetEntry *) lfirst(tlist);
1064
1065                         if (tle->resjunk)
1066                         {
1067                                 junk_filter_needed = true;
1068                                 break;
1069                         }
1070                 }
1071
1072                 if (junk_filter_needed)
1073                 {
1074                         JunkFilter *j;
1075
1076                         j = ExecInitJunkFilter(planstate->plan->targetlist,
1077                                                                    tupType->tdhasoid,
1078                                                                    ExecInitExtraTupleSlot(estate));
1079                         estate->es_junkFilter = j;
1080
1081                         /* Want to return the cleaned tuple type */
1082                         tupType = j->jf_cleanTupType;
1083                 }
1084         }
1085
1086         queryDesc->tupDesc = tupType;
1087         queryDesc->planstate = planstate;
1088 }
1089
1090 /*
1091  * Check that a proposed result relation is a legal target for the operation
1092  *
1093  * Generally the parser and/or planner should have noticed any such mistake
1094  * already, but let's make sure.
1095  *
1096  * Note: when changing this function, you probably also need to look at
1097  * CheckValidRowMarkRel.
1098  */
1099 void
1100 CheckValidResultRel(ResultRelInfo *resultRelInfo, CmdType operation)
1101 {
1102         Relation        resultRel = resultRelInfo->ri_RelationDesc;
1103         TriggerDesc *trigDesc = resultRel->trigdesc;
1104         FdwRoutine *fdwroutine;
1105
1106         switch (resultRel->rd_rel->relkind)
1107         {
1108                 case RELKIND_RELATION:
1109                 case RELKIND_PARTITIONED_TABLE:
1110                         CheckCmdReplicaIdentity(resultRel, operation);
1111                         break;
1112                 case RELKIND_SEQUENCE:
1113                         ereport(ERROR,
1114                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1115                                          errmsg("cannot change sequence \"%s\"",
1116                                                         RelationGetRelationName(resultRel))));
1117                         break;
1118                 case RELKIND_TOASTVALUE:
1119                         ereport(ERROR,
1120                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1121                                          errmsg("cannot change TOAST relation \"%s\"",
1122                                                         RelationGetRelationName(resultRel))));
1123                         break;
1124                 case RELKIND_VIEW:
1125
1126                         /*
1127                          * Okay only if there's a suitable INSTEAD OF trigger.  Messages
1128                          * here should match rewriteHandler.c's rewriteTargetView, except
1129                          * that we omit errdetail because we haven't got the information
1130                          * handy (and given that we really shouldn't get here anyway, it's
1131                          * not worth great exertion to get).
1132                          */
1133                         switch (operation)
1134                         {
1135                                 case CMD_INSERT:
1136                                         if (!trigDesc || !trigDesc->trig_insert_instead_row)
1137                                                 ereport(ERROR,
1138                                                                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1139                                                                  errmsg("cannot insert into view \"%s\"",
1140                                                                                 RelationGetRelationName(resultRel)),
1141                                                                  errhint("To enable inserting into the view, provide an INSTEAD OF INSERT trigger or an unconditional ON INSERT DO INSTEAD rule.")));
1142                                         break;
1143                                 case CMD_UPDATE:
1144                                         if (!trigDesc || !trigDesc->trig_update_instead_row)
1145                                                 ereport(ERROR,
1146                                                                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1147                                                                  errmsg("cannot update view \"%s\"",
1148                                                                                 RelationGetRelationName(resultRel)),
1149                                                                  errhint("To enable updating the view, provide an INSTEAD OF UPDATE trigger or an unconditional ON UPDATE DO INSTEAD rule.")));
1150                                         break;
1151                                 case CMD_DELETE:
1152                                         if (!trigDesc || !trigDesc->trig_delete_instead_row)
1153                                                 ereport(ERROR,
1154                                                                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1155                                                                  errmsg("cannot delete from view \"%s\"",
1156                                                                                 RelationGetRelationName(resultRel)),
1157                                                                  errhint("To enable deleting from the view, provide an INSTEAD OF DELETE trigger or an unconditional ON DELETE DO INSTEAD rule.")));
1158                                         break;
1159                                 default:
1160                                         elog(ERROR, "unrecognized CmdType: %d", (int) operation);
1161                                         break;
1162                         }
1163                         break;
1164                 case RELKIND_MATVIEW:
1165                         if (!MatViewIncrementalMaintenanceIsEnabled())
1166                                 ereport(ERROR,
1167                                                 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1168                                                  errmsg("cannot change materialized view \"%s\"",
1169                                                                 RelationGetRelationName(resultRel))));
1170                         break;
1171                 case RELKIND_FOREIGN_TABLE:
1172                         /* Okay only if the FDW supports it */
1173                         fdwroutine = resultRelInfo->ri_FdwRoutine;
1174                         switch (operation)
1175                         {
1176                                 case CMD_INSERT:
1177
1178                                         /*
1179                                          * If foreign partition to do tuple-routing for, skip the
1180                                          * check; it's disallowed elsewhere.
1181                                          */
1182                                         if (resultRelInfo->ri_PartitionRoot)
1183                                                 break;
1184                                         if (fdwroutine->ExecForeignInsert == NULL)
1185                                                 ereport(ERROR,
1186                                                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1187                                                                  errmsg("cannot insert into foreign table \"%s\"",
1188                                                                                 RelationGetRelationName(resultRel))));
1189                                         if (fdwroutine->IsForeignRelUpdatable != NULL &&
1190                                                 (fdwroutine->IsForeignRelUpdatable(resultRel) & (1 << CMD_INSERT)) == 0)
1191                                                 ereport(ERROR,
1192                                                                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1193                                                                  errmsg("foreign table \"%s\" does not allow inserts",
1194                                                                                 RelationGetRelationName(resultRel))));
1195                                         break;
1196                                 case CMD_UPDATE:
1197                                         if (fdwroutine->ExecForeignUpdate == NULL)
1198                                                 ereport(ERROR,
1199                                                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1200                                                                  errmsg("cannot update foreign table \"%s\"",
1201                                                                                 RelationGetRelationName(resultRel))));
1202                                         if (fdwroutine->IsForeignRelUpdatable != NULL &&
1203                                                 (fdwroutine->IsForeignRelUpdatable(resultRel) & (1 << CMD_UPDATE)) == 0)
1204                                                 ereport(ERROR,
1205                                                                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1206                                                                  errmsg("foreign table \"%s\" does not allow updates",
1207                                                                                 RelationGetRelationName(resultRel))));
1208                                         break;
1209                                 case CMD_DELETE:
1210                                         if (fdwroutine->ExecForeignDelete == NULL)
1211                                                 ereport(ERROR,
1212                                                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1213                                                                  errmsg("cannot delete from foreign table \"%s\"",
1214                                                                                 RelationGetRelationName(resultRel))));
1215                                         if (fdwroutine->IsForeignRelUpdatable != NULL &&
1216                                                 (fdwroutine->IsForeignRelUpdatable(resultRel) & (1 << CMD_DELETE)) == 0)
1217                                                 ereport(ERROR,
1218                                                                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1219                                                                  errmsg("foreign table \"%s\" does not allow deletes",
1220                                                                                 RelationGetRelationName(resultRel))));
1221                                         break;
1222                                 default:
1223                                         elog(ERROR, "unrecognized CmdType: %d", (int) operation);
1224                                         break;
1225                         }
1226                         break;
1227                 default:
1228                         ereport(ERROR,
1229                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1230                                          errmsg("cannot change relation \"%s\"",
1231                                                         RelationGetRelationName(resultRel))));
1232                         break;
1233         }
1234 }
1235
1236 /*
1237  * Check that a proposed rowmark target relation is a legal target
1238  *
1239  * In most cases parser and/or planner should have noticed this already, but
1240  * they don't cover all cases.
1241  */
1242 static void
1243 CheckValidRowMarkRel(Relation rel, RowMarkType markType)
1244 {
1245         FdwRoutine *fdwroutine;
1246
1247         switch (rel->rd_rel->relkind)
1248         {
1249                 case RELKIND_RELATION:
1250                 case RELKIND_PARTITIONED_TABLE:
1251                         /* OK */
1252                         break;
1253                 case RELKIND_SEQUENCE:
1254                         /* Must disallow this because we don't vacuum sequences */
1255                         ereport(ERROR,
1256                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1257                                          errmsg("cannot lock rows in sequence \"%s\"",
1258                                                         RelationGetRelationName(rel))));
1259                         break;
1260                 case RELKIND_TOASTVALUE:
1261                         /* We could allow this, but there seems no good reason to */
1262                         ereport(ERROR,
1263                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1264                                          errmsg("cannot lock rows in TOAST relation \"%s\"",
1265                                                         RelationGetRelationName(rel))));
1266                         break;
1267                 case RELKIND_VIEW:
1268                         /* Should not get here; planner should have expanded the view */
1269                         ereport(ERROR,
1270                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1271                                          errmsg("cannot lock rows in view \"%s\"",
1272                                                         RelationGetRelationName(rel))));
1273                         break;
1274                 case RELKIND_MATVIEW:
1275                         /* Allow referencing a matview, but not actual locking clauses */
1276                         if (markType != ROW_MARK_REFERENCE)
1277                                 ereport(ERROR,
1278                                                 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1279                                                  errmsg("cannot lock rows in materialized view \"%s\"",
1280                                                                 RelationGetRelationName(rel))));
1281                         break;
1282                 case RELKIND_FOREIGN_TABLE:
1283                         /* Okay only if the FDW supports it */
1284                         fdwroutine = GetFdwRoutineForRelation(rel, false);
1285                         if (fdwroutine->RefetchForeignRow == NULL)
1286                                 ereport(ERROR,
1287                                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1288                                                  errmsg("cannot lock rows in foreign table \"%s\"",
1289                                                                 RelationGetRelationName(rel))));
1290                         break;
1291                 default:
1292                         ereport(ERROR,
1293                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1294                                          errmsg("cannot lock rows in relation \"%s\"",
1295                                                         RelationGetRelationName(rel))));
1296                         break;
1297         }
1298 }
1299
1300 /*
1301  * Initialize ResultRelInfo data for one result relation
1302  *
1303  * Caution: before Postgres 9.1, this function included the relkind checking
1304  * that's now in CheckValidResultRel, and it also did ExecOpenIndices if
1305  * appropriate.  Be sure callers cover those needs.
1306  */
1307 void
1308 InitResultRelInfo(ResultRelInfo *resultRelInfo,
1309                                   Relation resultRelationDesc,
1310                                   Index resultRelationIndex,
1311                                   Relation partition_root,
1312                                   int instrument_options)
1313 {
1314         List       *partition_check = NIL;
1315
1316         MemSet(resultRelInfo, 0, sizeof(ResultRelInfo));
1317         resultRelInfo->type = T_ResultRelInfo;
1318         resultRelInfo->ri_RangeTableIndex = resultRelationIndex;
1319         resultRelInfo->ri_RelationDesc = resultRelationDesc;
1320         resultRelInfo->ri_NumIndices = 0;
1321         resultRelInfo->ri_IndexRelationDescs = NULL;
1322         resultRelInfo->ri_IndexRelationInfo = NULL;
1323         /* make a copy so as not to depend on relcache info not changing... */
1324         resultRelInfo->ri_TrigDesc = CopyTriggerDesc(resultRelationDesc->trigdesc);
1325         if (resultRelInfo->ri_TrigDesc)
1326         {
1327                 int                     n = resultRelInfo->ri_TrigDesc->numtriggers;
1328
1329                 resultRelInfo->ri_TrigFunctions = (FmgrInfo *)
1330                         palloc0(n * sizeof(FmgrInfo));
1331                 resultRelInfo->ri_TrigWhenExprs = (ExprState **)
1332                         palloc0(n * sizeof(ExprState *));
1333                 if (instrument_options)
1334                         resultRelInfo->ri_TrigInstrument = InstrAlloc(n, instrument_options);
1335         }
1336         else
1337         {
1338                 resultRelInfo->ri_TrigFunctions = NULL;
1339                 resultRelInfo->ri_TrigWhenExprs = NULL;
1340                 resultRelInfo->ri_TrigInstrument = NULL;
1341         }
1342         if (resultRelationDesc->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
1343                 resultRelInfo->ri_FdwRoutine = GetFdwRoutineForRelation(resultRelationDesc, true);
1344         else
1345                 resultRelInfo->ri_FdwRoutine = NULL;
1346         resultRelInfo->ri_FdwState = NULL;
1347         resultRelInfo->ri_usesFdwDirectModify = false;
1348         resultRelInfo->ri_ConstraintExprs = NULL;
1349         resultRelInfo->ri_junkFilter = NULL;
1350         resultRelInfo->ri_projectReturning = NULL;
1351
1352         /*
1353          * Partition constraint, which also includes the partition constraint of
1354          * all the ancestors that are partitions.  Note that it will be checked
1355          * even in the case of tuple-routing where this table is the target leaf
1356          * partition, if there any BR triggers defined on the table.  Although
1357          * tuple-routing implicitly preserves the partition constraint of the
1358          * target partition for a given row, the BR triggers may change the row
1359          * such that the constraint is no longer satisfied, which we must fail for
1360          * by checking it explicitly.
1361          *
1362          * If this is a partitioned table, the partition constraint (if any) of a
1363          * given row will be checked just before performing tuple-routing.
1364          */
1365         partition_check = RelationGetPartitionQual(resultRelationDesc);
1366
1367         resultRelInfo->ri_PartitionCheck = partition_check;
1368         resultRelInfo->ri_PartitionRoot = partition_root;
1369 }
1370
1371 /*
1372  *              ExecGetTriggerResultRel
1373  *
1374  * Get a ResultRelInfo for a trigger target relation.  Most of the time,
1375  * triggers are fired on one of the result relations of the query, and so
1376  * we can just return a member of the es_result_relations array, the
1377  * es_root_result_relations array (if any), or the es_leaf_result_relations
1378  * list (if any).  (Note: in self-join situations there might be multiple
1379  * members with the same OID; if so it doesn't matter which one we pick.)
1380  * However, it is sometimes necessary to fire triggers on other relations;
1381  * this happens mainly when an RI update trigger queues additional triggers
1382  * on other relations, which will be processed in the context of the outer
1383  * query.  For efficiency's sake, we want to have a ResultRelInfo for those
1384  * triggers too; that can avoid repeated re-opening of the relation.  (It
1385  * also provides a way for EXPLAIN ANALYZE to report the runtimes of such
1386  * triggers.)  So we make additional ResultRelInfo's as needed, and save them
1387  * in es_trig_target_relations.
1388  */
1389 ResultRelInfo *
1390 ExecGetTriggerResultRel(EState *estate, Oid relid)
1391 {
1392         ResultRelInfo *rInfo;
1393         int                     nr;
1394         ListCell   *l;
1395         Relation        rel;
1396         MemoryContext oldcontext;
1397
1398         /* First, search through the query result relations */
1399         rInfo = estate->es_result_relations;
1400         nr = estate->es_num_result_relations;
1401         while (nr > 0)
1402         {
1403                 if (RelationGetRelid(rInfo->ri_RelationDesc) == relid)
1404                         return rInfo;
1405                 rInfo++;
1406                 nr--;
1407         }
1408         /* Second, search through the root result relations, if any */
1409         rInfo = estate->es_root_result_relations;
1410         nr = estate->es_num_root_result_relations;
1411         while (nr > 0)
1412         {
1413                 if (RelationGetRelid(rInfo->ri_RelationDesc) == relid)
1414                         return rInfo;
1415                 rInfo++;
1416                 nr--;
1417         }
1418         /* Third, search through the leaf result relations, if any */
1419         foreach(l, estate->es_leaf_result_relations)
1420         {
1421                 rInfo = (ResultRelInfo *) lfirst(l);
1422                 if (RelationGetRelid(rInfo->ri_RelationDesc) == relid)
1423                         return rInfo;
1424         }
1425         /* Nope, but maybe we already made an extra ResultRelInfo for it */
1426         foreach(l, estate->es_trig_target_relations)
1427         {
1428                 rInfo = (ResultRelInfo *) lfirst(l);
1429                 if (RelationGetRelid(rInfo->ri_RelationDesc) == relid)
1430                         return rInfo;
1431         }
1432         /* Nope, so we need a new one */
1433
1434         /*
1435          * Open the target relation's relcache entry.  We assume that an
1436          * appropriate lock is still held by the backend from whenever the trigger
1437          * event got queued, so we need take no new lock here.  Also, we need not
1438          * recheck the relkind, so no need for CheckValidResultRel.
1439          */
1440         rel = heap_open(relid, NoLock);
1441
1442         /*
1443          * Make the new entry in the right context.
1444          */
1445         oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
1446         rInfo = makeNode(ResultRelInfo);
1447         InitResultRelInfo(rInfo,
1448                                           rel,
1449                                           0,            /* dummy rangetable index */
1450                                           NULL,
1451                                           estate->es_instrument);
1452         estate->es_trig_target_relations =
1453                 lappend(estate->es_trig_target_relations, rInfo);
1454         MemoryContextSwitchTo(oldcontext);
1455
1456         /*
1457          * Currently, we don't need any index information in ResultRelInfos used
1458          * only for triggers, so no need to call ExecOpenIndices.
1459          */
1460
1461         return rInfo;
1462 }
1463
1464 /*
1465  * Close any relations that have been opened by ExecGetTriggerResultRel().
1466  */
1467 void
1468 ExecCleanUpTriggerState(EState *estate)
1469 {
1470         ListCell   *l;
1471
1472         foreach(l, estate->es_trig_target_relations)
1473         {
1474                 ResultRelInfo *resultRelInfo = (ResultRelInfo *) lfirst(l);
1475
1476                 /* Close indices and then the relation itself */
1477                 ExecCloseIndices(resultRelInfo);
1478                 heap_close(resultRelInfo->ri_RelationDesc, NoLock);
1479         }
1480 }
1481
1482 /*
1483  *              ExecContextForcesOids
1484  *
1485  * This is pretty grotty: when doing INSERT, UPDATE, or CREATE TABLE AS,
1486  * we need to ensure that result tuples have space for an OID iff they are
1487  * going to be stored into a relation that has OIDs.  In other contexts
1488  * we are free to choose whether to leave space for OIDs in result tuples
1489  * (we generally don't want to, but we do if a physical-tlist optimization
1490  * is possible).  This routine checks the plan context and returns TRUE if the
1491  * choice is forced, FALSE if the choice is not forced.  In the TRUE case,
1492  * *hasoids is set to the required value.
1493  *
1494  * One reason this is ugly is that all plan nodes in the plan tree will emit
1495  * tuples with space for an OID, though we really only need the topmost node
1496  * to do so.  However, node types like Sort don't project new tuples but just
1497  * return their inputs, and in those cases the requirement propagates down
1498  * to the input node.  Eventually we might make this code smart enough to
1499  * recognize how far down the requirement really goes, but for now we just
1500  * make all plan nodes do the same thing if the top level forces the choice.
1501  *
1502  * We assume that if we are generating tuples for INSERT or UPDATE,
1503  * estate->es_result_relation_info is already set up to describe the target
1504  * relation.  Note that in an UPDATE that spans an inheritance tree, some of
1505  * the target relations may have OIDs and some not.  We have to make the
1506  * decisions on a per-relation basis as we initialize each of the subplans of
1507  * the ModifyTable node, so ModifyTable has to set es_result_relation_info
1508  * while initializing each subplan.
1509  *
1510  * CREATE TABLE AS is even uglier, because we don't have the target relation's
1511  * descriptor available when this code runs; we have to look aside at the
1512  * flags passed to ExecutorStart().
1513  */
1514 bool
1515 ExecContextForcesOids(PlanState *planstate, bool *hasoids)
1516 {
1517         ResultRelInfo *ri = planstate->state->es_result_relation_info;
1518
1519         if (ri != NULL)
1520         {
1521                 Relation        rel = ri->ri_RelationDesc;
1522
1523                 if (rel != NULL)
1524                 {
1525                         *hasoids = rel->rd_rel->relhasoids;
1526                         return true;
1527                 }
1528         }
1529
1530         if (planstate->state->es_top_eflags & EXEC_FLAG_WITH_OIDS)
1531         {
1532                 *hasoids = true;
1533                 return true;
1534         }
1535         if (planstate->state->es_top_eflags & EXEC_FLAG_WITHOUT_OIDS)
1536         {
1537                 *hasoids = false;
1538                 return true;
1539         }
1540
1541         return false;
1542 }
1543
1544 /* ----------------------------------------------------------------
1545  *              ExecPostprocessPlan
1546  *
1547  *              Give plan nodes a final chance to execute before shutdown
1548  * ----------------------------------------------------------------
1549  */
1550 static void
1551 ExecPostprocessPlan(EState *estate)
1552 {
1553         ListCell   *lc;
1554
1555         /*
1556          * Make sure nodes run forward.
1557          */
1558         estate->es_direction = ForwardScanDirection;
1559
1560         /*
1561          * Run any secondary ModifyTable nodes to completion, in case the main
1562          * query did not fetch all rows from them.  (We do this to ensure that
1563          * such nodes have predictable results.)
1564          */
1565         foreach(lc, estate->es_auxmodifytables)
1566         {
1567                 PlanState  *ps = (PlanState *) lfirst(lc);
1568
1569                 for (;;)
1570                 {
1571                         TupleTableSlot *slot;
1572
1573                         /* Reset the per-output-tuple exprcontext each time */
1574                         ResetPerTupleExprContext(estate);
1575
1576                         slot = ExecProcNode(ps);
1577
1578                         if (TupIsNull(slot))
1579                                 break;
1580                 }
1581         }
1582 }
1583
1584 /* ----------------------------------------------------------------
1585  *              ExecEndPlan
1586  *
1587  *              Cleans up the query plan -- closes files and frees up storage
1588  *
1589  * NOTE: we are no longer very worried about freeing storage per se
1590  * in this code; FreeExecutorState should be guaranteed to release all
1591  * memory that needs to be released.  What we are worried about doing
1592  * is closing relations and dropping buffer pins.  Thus, for example,
1593  * tuple tables must be cleared or dropped to ensure pins are released.
1594  * ----------------------------------------------------------------
1595  */
1596 static void
1597 ExecEndPlan(PlanState *planstate, EState *estate)
1598 {
1599         ResultRelInfo *resultRelInfo;
1600         int                     i;
1601         ListCell   *l;
1602
1603         /*
1604          * shut down the node-type-specific query processing
1605          */
1606         ExecEndNode(planstate);
1607
1608         /*
1609          * for subplans too
1610          */
1611         foreach(l, estate->es_subplanstates)
1612         {
1613                 PlanState  *subplanstate = (PlanState *) lfirst(l);
1614
1615                 ExecEndNode(subplanstate);
1616         }
1617
1618         /*
1619          * destroy the executor's tuple table.  Actually we only care about
1620          * releasing buffer pins and tupdesc refcounts; there's no need to pfree
1621          * the TupleTableSlots, since the containing memory context is about to go
1622          * away anyway.
1623          */
1624         ExecResetTupleTable(estate->es_tupleTable, false);
1625
1626         /*
1627          * close the result relation(s) if any, but hold locks until xact commit.
1628          */
1629         resultRelInfo = estate->es_result_relations;
1630         for (i = estate->es_num_result_relations; i > 0; i--)
1631         {
1632                 /* Close indices and then the relation itself */
1633                 ExecCloseIndices(resultRelInfo);
1634                 heap_close(resultRelInfo->ri_RelationDesc, NoLock);
1635                 resultRelInfo++;
1636         }
1637
1638         /* Close the root target relation(s). */
1639         resultRelInfo = estate->es_root_result_relations;
1640         for (i = estate->es_num_root_result_relations; i > 0; i--)
1641         {
1642                 heap_close(resultRelInfo->ri_RelationDesc, NoLock);
1643                 resultRelInfo++;
1644         }
1645
1646         /* likewise close any trigger target relations */
1647         ExecCleanUpTriggerState(estate);
1648
1649         /*
1650          * close any relations selected FOR [KEY] UPDATE/SHARE, again keeping
1651          * locks
1652          */
1653         foreach(l, estate->es_rowMarks)
1654         {
1655                 ExecRowMark *erm = (ExecRowMark *) lfirst(l);
1656
1657                 if (erm->relation)
1658                         heap_close(erm->relation, NoLock);
1659         }
1660 }
1661
1662 /* ----------------------------------------------------------------
1663  *              ExecutePlan
1664  *
1665  *              Processes the query plan until we have retrieved 'numberTuples' tuples,
1666  *              moving in the specified direction.
1667  *
1668  *              Runs to completion if numberTuples is 0
1669  *
1670  * Note: the ctid attribute is a 'junk' attribute that is removed before the
1671  * user can see it
1672  * ----------------------------------------------------------------
1673  */
1674 static void
1675 ExecutePlan(EState *estate,
1676                         PlanState *planstate,
1677                         bool use_parallel_mode,
1678                         CmdType operation,
1679                         bool sendTuples,
1680                         uint64 numberTuples,
1681                         ScanDirection direction,
1682                         DestReceiver *dest,
1683                         bool execute_once)
1684 {
1685         TupleTableSlot *slot;
1686         uint64          current_tuple_count;
1687
1688         /*
1689          * initialize local variables
1690          */
1691         current_tuple_count = 0;
1692
1693         /*
1694          * Set the direction.
1695          */
1696         estate->es_direction = direction;
1697
1698         /*
1699          * If the plan might potentially be executed multiple times, we must force
1700          * it to run without parallelism, because we might exit early.
1701          */
1702         if (!execute_once)
1703                 use_parallel_mode = false;
1704
1705         estate->es_use_parallel_mode = use_parallel_mode;
1706         if (use_parallel_mode)
1707                 EnterParallelMode();
1708
1709         /*
1710          * Loop until we've processed the proper number of tuples from the plan.
1711          */
1712         for (;;)
1713         {
1714                 /* Reset the per-output-tuple exprcontext */
1715                 ResetPerTupleExprContext(estate);
1716
1717                 /*
1718                  * Execute the plan and obtain a tuple
1719                  */
1720                 slot = ExecProcNode(planstate);
1721
1722                 /*
1723                  * if the tuple is null, then we assume there is nothing more to
1724                  * process so we just end the loop...
1725                  */
1726                 if (TupIsNull(slot))
1727                 {
1728                         /* Allow nodes to release or shut down resources. */
1729                         (void) ExecShutdownNode(planstate);
1730                         break;
1731                 }
1732
1733                 /*
1734                  * If we have a junk filter, then project a new tuple with the junk
1735                  * removed.
1736                  *
1737                  * Store this new "clean" tuple in the junkfilter's resultSlot.
1738                  * (Formerly, we stored it back over the "dirty" tuple, which is WRONG
1739                  * because that tuple slot has the wrong descriptor.)
1740                  */
1741                 if (estate->es_junkFilter != NULL)
1742                         slot = ExecFilterJunk(estate->es_junkFilter, slot);
1743
1744                 /*
1745                  * If we are supposed to send the tuple somewhere, do so. (In
1746                  * practice, this is probably always the case at this point.)
1747                  */
1748                 if (sendTuples)
1749                 {
1750                         /*
1751                          * If we are not able to send the tuple, we assume the destination
1752                          * has closed and no more tuples can be sent. If that's the case,
1753                          * end the loop.
1754                          */
1755                         if (!dest->receiveSlot(slot, dest))
1756                                 break;
1757                 }
1758
1759                 /*
1760                  * Count tuples processed, if this is a SELECT.  (For other operation
1761                  * types, the ModifyTable plan node must count the appropriate
1762                  * events.)
1763                  */
1764                 if (operation == CMD_SELECT)
1765                         (estate->es_processed)++;
1766
1767                 /*
1768                  * check our tuple count.. if we've processed the proper number then
1769                  * quit, else loop again and process more tuples.  Zero numberTuples
1770                  * means no limit.
1771                  */
1772                 current_tuple_count++;
1773                 if (numberTuples && numberTuples == current_tuple_count)
1774                 {
1775                         /* Allow nodes to release or shut down resources. */
1776                         (void) ExecShutdownNode(planstate);
1777                         break;
1778                 }
1779         }
1780
1781         if (use_parallel_mode)
1782                 ExitParallelMode();
1783 }
1784
1785
1786 /*
1787  * ExecRelCheck --- check that tuple meets constraints for result relation
1788  *
1789  * Returns NULL if OK, else name of failed check constraint
1790  */
1791 static const char *
1792 ExecRelCheck(ResultRelInfo *resultRelInfo,
1793                          TupleTableSlot *slot, EState *estate)
1794 {
1795         Relation        rel = resultRelInfo->ri_RelationDesc;
1796         int                     ncheck = rel->rd_att->constr->num_check;
1797         ConstrCheck *check = rel->rd_att->constr->check;
1798         ExprContext *econtext;
1799         MemoryContext oldContext;
1800         int                     i;
1801
1802         /*
1803          * If first time through for this result relation, build expression
1804          * nodetrees for rel's constraint expressions.  Keep them in the per-query
1805          * memory context so they'll survive throughout the query.
1806          */
1807         if (resultRelInfo->ri_ConstraintExprs == NULL)
1808         {
1809                 oldContext = MemoryContextSwitchTo(estate->es_query_cxt);
1810                 resultRelInfo->ri_ConstraintExprs =
1811                         (ExprState **) palloc(ncheck * sizeof(ExprState *));
1812                 for (i = 0; i < ncheck; i++)
1813                 {
1814                         Expr       *checkconstr;
1815
1816                         checkconstr = stringToNode(check[i].ccbin);
1817                         resultRelInfo->ri_ConstraintExprs[i] =
1818                                 ExecPrepareExpr(checkconstr, estate);
1819                 }
1820                 MemoryContextSwitchTo(oldContext);
1821         }
1822
1823         /*
1824          * We will use the EState's per-tuple context for evaluating constraint
1825          * expressions (creating it if it's not already there).
1826          */
1827         econtext = GetPerTupleExprContext(estate);
1828
1829         /* Arrange for econtext's scan tuple to be the tuple under test */
1830         econtext->ecxt_scantuple = slot;
1831
1832         /* And evaluate the constraints */
1833         for (i = 0; i < ncheck; i++)
1834         {
1835                 ExprState  *checkconstr = resultRelInfo->ri_ConstraintExprs[i];
1836
1837                 /*
1838                  * NOTE: SQL specifies that a NULL result from a constraint expression
1839                  * is not to be treated as a failure.  Therefore, use ExecCheck not
1840                  * ExecQual.
1841                  */
1842                 if (!ExecCheck(checkconstr, econtext))
1843                         return check[i].ccname;
1844         }
1845
1846         /* NULL result means no error */
1847         return NULL;
1848 }
1849
1850 /*
1851  * ExecPartitionCheck --- check that tuple meets the partition constraint.
1852  */
1853 static void
1854 ExecPartitionCheck(ResultRelInfo *resultRelInfo, TupleTableSlot *slot,
1855                                    EState *estate)
1856 {
1857         Relation        rel = resultRelInfo->ri_RelationDesc;
1858         TupleDesc       tupdesc = RelationGetDescr(rel);
1859         Bitmapset  *modifiedCols;
1860         Bitmapset  *insertedCols;
1861         Bitmapset  *updatedCols;
1862         ExprContext *econtext;
1863
1864         /*
1865          * If first time through, build expression state tree for the partition
1866          * check expression.  Keep it in the per-query memory context so they'll
1867          * survive throughout the query.
1868          */
1869         if (resultRelInfo->ri_PartitionCheckExpr == NULL)
1870         {
1871                 List       *qual = resultRelInfo->ri_PartitionCheck;
1872
1873                 resultRelInfo->ri_PartitionCheckExpr = ExecPrepareCheck(qual, estate);
1874         }
1875
1876         /*
1877          * We will use the EState's per-tuple context for evaluating constraint
1878          * expressions (creating it if it's not already there).
1879          */
1880         econtext = GetPerTupleExprContext(estate);
1881
1882         /* Arrange for econtext's scan tuple to be the tuple under test */
1883         econtext->ecxt_scantuple = slot;
1884
1885         /*
1886          * As in case of the catalogued constraints, we treat a NULL result as
1887          * success here, not a failure.
1888          */
1889         if (!ExecCheck(resultRelInfo->ri_PartitionCheckExpr, econtext))
1890         {
1891                 char       *val_desc;
1892                 Relation        orig_rel = rel;
1893
1894                 /* See the comment above. */
1895                 if (resultRelInfo->ri_PartitionRoot)
1896                 {
1897                         HeapTuple       tuple = ExecFetchSlotTuple(slot);
1898                         TupleDesc       old_tupdesc = RelationGetDescr(rel);
1899                         TupleConversionMap *map;
1900
1901                         rel = resultRelInfo->ri_PartitionRoot;
1902                         tupdesc = RelationGetDescr(rel);
1903                         /* a reverse map */
1904                         map = convert_tuples_by_name(old_tupdesc, tupdesc,
1905                                                                                  gettext_noop("could not convert row type"));
1906                         if (map != NULL)
1907                         {
1908                                 tuple = do_convert_tuple(tuple, map);
1909                                 ExecSetSlotDescriptor(slot, tupdesc);
1910                                 ExecStoreTuple(tuple, slot, InvalidBuffer, false);
1911                         }
1912                 }
1913
1914                 insertedCols = GetInsertedColumns(resultRelInfo, estate);
1915                 updatedCols = GetUpdatedColumns(resultRelInfo, estate);
1916                 modifiedCols = bms_union(insertedCols, updatedCols);
1917                 val_desc = ExecBuildSlotValueDescription(RelationGetRelid(rel),
1918                                                                                                  slot,
1919                                                                                                  tupdesc,
1920                                                                                                  modifiedCols,
1921                                                                                                  64);
1922                 ereport(ERROR,
1923                                 (errcode(ERRCODE_CHECK_VIOLATION),
1924                                  errmsg("new row for relation \"%s\" violates partition constraint",
1925                                                 RelationGetRelationName(orig_rel)),
1926                                  val_desc ? errdetail("Failing row contains %s.", val_desc) : 0));
1927         }
1928 }
1929
1930 /*
1931  * ExecConstraints - check constraints of the tuple in 'slot'
1932  *
1933  * This checks the traditional NOT NULL and check constraints, as well as
1934  * the partition constraint, if any.
1935  *
1936  * Note: 'slot' contains the tuple to check the constraints of, which may
1937  * have been converted from the original input tuple after tuple routing.
1938  * 'resultRelInfo' is the original result relation, before tuple routing.
1939  */
1940 void
1941 ExecConstraints(ResultRelInfo *resultRelInfo,
1942                                 TupleTableSlot *slot, EState *estate)
1943 {
1944         Relation        rel = resultRelInfo->ri_RelationDesc;
1945         TupleDesc       tupdesc = RelationGetDescr(rel);
1946         TupleConstr *constr = tupdesc->constr;
1947         Bitmapset  *modifiedCols;
1948         Bitmapset  *insertedCols;
1949         Bitmapset  *updatedCols;
1950
1951         Assert(constr || resultRelInfo->ri_PartitionCheck);
1952
1953         if (constr && constr->has_not_null)
1954         {
1955                 int                     natts = tupdesc->natts;
1956                 int                     attrChk;
1957
1958                 for (attrChk = 1; attrChk <= natts; attrChk++)
1959                 {
1960                         Form_pg_attribute att = TupleDescAttr(tupdesc, attrChk - 1);
1961
1962                         if (att->attnotnull && slot_attisnull(slot, attrChk))
1963                         {
1964                                 char       *val_desc;
1965                                 Relation        orig_rel = rel;
1966                                 TupleDesc       orig_tupdesc = RelationGetDescr(rel);
1967
1968                                 /*
1969                                  * If the tuple has been routed, it's been converted to the
1970                                  * partition's rowtype, which might differ from the root
1971                                  * table's.  We must convert it back to the root table's
1972                                  * rowtype so that val_desc shown error message matches the
1973                                  * input tuple.
1974                                  */
1975                                 if (resultRelInfo->ri_PartitionRoot)
1976                                 {
1977                                         HeapTuple       tuple = ExecFetchSlotTuple(slot);
1978                                         TupleConversionMap *map;
1979
1980                                         rel = resultRelInfo->ri_PartitionRoot;
1981                                         tupdesc = RelationGetDescr(rel);
1982                                         /* a reverse map */
1983                                         map = convert_tuples_by_name(orig_tupdesc, tupdesc,
1984                                                                                                  gettext_noop("could not convert row type"));
1985                                         if (map != NULL)
1986                                         {
1987                                                 tuple = do_convert_tuple(tuple, map);
1988                                                 ExecSetSlotDescriptor(slot, tupdesc);
1989                                                 ExecStoreTuple(tuple, slot, InvalidBuffer, false);
1990                                         }
1991                                 }
1992
1993                                 insertedCols = GetInsertedColumns(resultRelInfo, estate);
1994                                 updatedCols = GetUpdatedColumns(resultRelInfo, estate);
1995                                 modifiedCols = bms_union(insertedCols, updatedCols);
1996                                 val_desc = ExecBuildSlotValueDescription(RelationGetRelid(rel),
1997                                                                                                                  slot,
1998                                                                                                                  tupdesc,
1999                                                                                                                  modifiedCols,
2000                                                                                                                  64);
2001
2002                                 ereport(ERROR,
2003                                                 (errcode(ERRCODE_NOT_NULL_VIOLATION),
2004                                                  errmsg("null value in column \"%s\" violates not-null constraint",
2005                                                                 NameStr(att->attname)),
2006                                                  val_desc ? errdetail("Failing row contains %s.", val_desc) : 0,
2007                                                  errtablecol(orig_rel, attrChk)));
2008                         }
2009                 }
2010         }
2011
2012         if (constr && constr->num_check > 0)
2013         {
2014                 const char *failed;
2015
2016                 if ((failed = ExecRelCheck(resultRelInfo, slot, estate)) != NULL)
2017                 {
2018                         char       *val_desc;
2019                         Relation        orig_rel = rel;
2020
2021                         /* See the comment above. */
2022                         if (resultRelInfo->ri_PartitionRoot)
2023                         {
2024                                 HeapTuple       tuple = ExecFetchSlotTuple(slot);
2025                                 TupleDesc       old_tupdesc = RelationGetDescr(rel);
2026                                 TupleConversionMap *map;
2027
2028                                 rel = resultRelInfo->ri_PartitionRoot;
2029                                 tupdesc = RelationGetDescr(rel);
2030                                 /* a reverse map */
2031                                 map = convert_tuples_by_name(old_tupdesc, tupdesc,
2032                                                                                          gettext_noop("could not convert row type"));
2033                                 if (map != NULL)
2034                                 {
2035                                         tuple = do_convert_tuple(tuple, map);
2036                                         ExecSetSlotDescriptor(slot, tupdesc);
2037                                         ExecStoreTuple(tuple, slot, InvalidBuffer, false);
2038                                 }
2039                         }
2040
2041                         insertedCols = GetInsertedColumns(resultRelInfo, estate);
2042                         updatedCols = GetUpdatedColumns(resultRelInfo, estate);
2043                         modifiedCols = bms_union(insertedCols, updatedCols);
2044                         val_desc = ExecBuildSlotValueDescription(RelationGetRelid(rel),
2045                                                                                                          slot,
2046                                                                                                          tupdesc,
2047                                                                                                          modifiedCols,
2048                                                                                                          64);
2049                         ereport(ERROR,
2050                                         (errcode(ERRCODE_CHECK_VIOLATION),
2051                                          errmsg("new row for relation \"%s\" violates check constraint \"%s\"",
2052                                                         RelationGetRelationName(orig_rel), failed),
2053                                          val_desc ? errdetail("Failing row contains %s.", val_desc) : 0,
2054                                          errtableconstraint(orig_rel, failed)));
2055                 }
2056         }
2057
2058         if (resultRelInfo->ri_PartitionCheck)
2059                 ExecPartitionCheck(resultRelInfo, slot, estate);
2060 }
2061
2062
2063 /*
2064  * ExecWithCheckOptions -- check that tuple satisfies any WITH CHECK OPTIONs
2065  * of the specified kind.
2066  *
2067  * Note that this needs to be called multiple times to ensure that all kinds of
2068  * WITH CHECK OPTIONs are handled (both those from views which have the WITH
2069  * CHECK OPTION set and from row level security policies).  See ExecInsert()
2070  * and ExecUpdate().
2071  */
2072 void
2073 ExecWithCheckOptions(WCOKind kind, ResultRelInfo *resultRelInfo,
2074                                          TupleTableSlot *slot, EState *estate)
2075 {
2076         Relation        rel = resultRelInfo->ri_RelationDesc;
2077         TupleDesc       tupdesc = RelationGetDescr(rel);
2078         ExprContext *econtext;
2079         ListCell   *l1,
2080                            *l2;
2081
2082         /*
2083          * We will use the EState's per-tuple context for evaluating constraint
2084          * expressions (creating it if it's not already there).
2085          */
2086         econtext = GetPerTupleExprContext(estate);
2087
2088         /* Arrange for econtext's scan tuple to be the tuple under test */
2089         econtext->ecxt_scantuple = slot;
2090
2091         /* Check each of the constraints */
2092         forboth(l1, resultRelInfo->ri_WithCheckOptions,
2093                         l2, resultRelInfo->ri_WithCheckOptionExprs)
2094         {
2095                 WithCheckOption *wco = (WithCheckOption *) lfirst(l1);
2096                 ExprState  *wcoExpr = (ExprState *) lfirst(l2);
2097
2098                 /*
2099                  * Skip any WCOs which are not the kind we are looking for at this
2100                  * time.
2101                  */
2102                 if (wco->kind != kind)
2103                         continue;
2104
2105                 /*
2106                  * WITH CHECK OPTION checks are intended to ensure that the new tuple
2107                  * is visible (in the case of a view) or that it passes the
2108                  * 'with-check' policy (in the case of row security). If the qual
2109                  * evaluates to NULL or FALSE, then the new tuple won't be included in
2110                  * the view or doesn't pass the 'with-check' policy for the table.
2111                  */
2112                 if (!ExecQual(wcoExpr, econtext))
2113                 {
2114                         char       *val_desc;
2115                         Bitmapset  *modifiedCols;
2116                         Bitmapset  *insertedCols;
2117                         Bitmapset  *updatedCols;
2118
2119                         switch (wco->kind)
2120                         {
2121                                         /*
2122                                          * For WITH CHECK OPTIONs coming from views, we might be
2123                                          * able to provide the details on the row, depending on
2124                                          * the permissions on the relation (that is, if the user
2125                                          * could view it directly anyway).  For RLS violations, we
2126                                          * don't include the data since we don't know if the user
2127                                          * should be able to view the tuple as that depends on the
2128                                          * USING policy.
2129                                          */
2130                                 case WCO_VIEW_CHECK:
2131                                         /* See the comment in ExecConstraints(). */
2132                                         if (resultRelInfo->ri_PartitionRoot)
2133                                         {
2134                                                 HeapTuple       tuple = ExecFetchSlotTuple(slot);
2135                                                 TupleDesc       old_tupdesc = RelationGetDescr(rel);
2136                                                 TupleConversionMap *map;
2137
2138                                                 rel = resultRelInfo->ri_PartitionRoot;
2139                                                 tupdesc = RelationGetDescr(rel);
2140                                                 /* a reverse map */
2141                                                 map = convert_tuples_by_name(old_tupdesc, tupdesc,
2142                                                                                                          gettext_noop("could not convert row type"));
2143                                                 if (map != NULL)
2144                                                 {
2145                                                         tuple = do_convert_tuple(tuple, map);
2146                                                         ExecSetSlotDescriptor(slot, tupdesc);
2147                                                         ExecStoreTuple(tuple, slot, InvalidBuffer, false);
2148                                                 }
2149                                         }
2150
2151                                         insertedCols = GetInsertedColumns(resultRelInfo, estate);
2152                                         updatedCols = GetUpdatedColumns(resultRelInfo, estate);
2153                                         modifiedCols = bms_union(insertedCols, updatedCols);
2154                                         val_desc = ExecBuildSlotValueDescription(RelationGetRelid(rel),
2155                                                                                                                          slot,
2156                                                                                                                          tupdesc,
2157                                                                                                                          modifiedCols,
2158                                                                                                                          64);
2159
2160                                         ereport(ERROR,
2161                                                         (errcode(ERRCODE_WITH_CHECK_OPTION_VIOLATION),
2162                                                          errmsg("new row violates check option for view \"%s\"",
2163                                                                         wco->relname),
2164                                                          val_desc ? errdetail("Failing row contains %s.",
2165                                                                                                   val_desc) : 0));
2166                                         break;
2167                                 case WCO_RLS_INSERT_CHECK:
2168                                 case WCO_RLS_UPDATE_CHECK:
2169                                         if (wco->polname != NULL)
2170                                                 ereport(ERROR,
2171                                                                 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
2172                                                                  errmsg("new row violates row-level security policy \"%s\" for table \"%s\"",
2173                                                                                 wco->polname, wco->relname)));
2174                                         else
2175                                                 ereport(ERROR,
2176                                                                 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
2177                                                                  errmsg("new row violates row-level security policy for table \"%s\"",
2178                                                                                 wco->relname)));
2179                                         break;
2180                                 case WCO_RLS_CONFLICT_CHECK:
2181                                         if (wco->polname != NULL)
2182                                                 ereport(ERROR,
2183                                                                 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
2184                                                                  errmsg("new row violates row-level security policy \"%s\" (USING expression) for table \"%s\"",
2185                                                                                 wco->polname, wco->relname)));
2186                                         else
2187                                                 ereport(ERROR,
2188                                                                 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
2189                                                                  errmsg("new row violates row-level security policy (USING expression) for table \"%s\"",
2190                                                                                 wco->relname)));
2191                                         break;
2192                                 default:
2193                                         elog(ERROR, "unrecognized WCO kind: %u", wco->kind);
2194                                         break;
2195                         }
2196                 }
2197         }
2198 }
2199
2200 /*
2201  * ExecBuildSlotValueDescription -- construct a string representing a tuple
2202  *
2203  * This is intentionally very similar to BuildIndexValueDescription, but
2204  * unlike that function, we truncate long field values (to at most maxfieldlen
2205  * bytes).  That seems necessary here since heap field values could be very
2206  * long, whereas index entries typically aren't so wide.
2207  *
2208  * Also, unlike the case with index entries, we need to be prepared to ignore
2209  * dropped columns.  We used to use the slot's tuple descriptor to decode the
2210  * data, but the slot's descriptor doesn't identify dropped columns, so we
2211  * now need to be passed the relation's descriptor.
2212  *
2213  * Note that, like BuildIndexValueDescription, if the user does not have
2214  * permission to view any of the columns involved, a NULL is returned.  Unlike
2215  * BuildIndexValueDescription, if the user has access to view a subset of the
2216  * column involved, that subset will be returned with a key identifying which
2217  * columns they are.
2218  */
2219 static char *
2220 ExecBuildSlotValueDescription(Oid reloid,
2221                                                           TupleTableSlot *slot,
2222                                                           TupleDesc tupdesc,
2223                                                           Bitmapset *modifiedCols,
2224                                                           int maxfieldlen)
2225 {
2226         StringInfoData buf;
2227         StringInfoData collist;
2228         bool            write_comma = false;
2229         bool            write_comma_collist = false;
2230         int                     i;
2231         AclResult       aclresult;
2232         bool            table_perm = false;
2233         bool            any_perm = false;
2234
2235         /*
2236          * Check if RLS is enabled and should be active for the relation; if so,
2237          * then don't return anything.  Otherwise, go through normal permission
2238          * checks.
2239          */
2240         if (check_enable_rls(reloid, InvalidOid, true) == RLS_ENABLED)
2241                 return NULL;
2242
2243         initStringInfo(&buf);
2244
2245         appendStringInfoChar(&buf, '(');
2246
2247         /*
2248          * Check if the user has permissions to see the row.  Table-level SELECT
2249          * allows access to all columns.  If the user does not have table-level
2250          * SELECT then we check each column and include those the user has SELECT
2251          * rights on.  Additionally, we always include columns the user provided
2252          * data for.
2253          */
2254         aclresult = pg_class_aclcheck(reloid, GetUserId(), ACL_SELECT);
2255         if (aclresult != ACLCHECK_OK)
2256         {
2257                 /* Set up the buffer for the column list */
2258                 initStringInfo(&collist);
2259                 appendStringInfoChar(&collist, '(');
2260         }
2261         else
2262                 table_perm = any_perm = true;
2263
2264         /* Make sure the tuple is fully deconstructed */
2265         slot_getallattrs(slot);
2266
2267         for (i = 0; i < tupdesc->natts; i++)
2268         {
2269                 bool            column_perm = false;
2270                 char       *val;
2271                 int                     vallen;
2272                 Form_pg_attribute att = TupleDescAttr(tupdesc, i);
2273
2274                 /* ignore dropped columns */
2275                 if (att->attisdropped)
2276                         continue;
2277
2278                 if (!table_perm)
2279                 {
2280                         /*
2281                          * No table-level SELECT, so need to make sure they either have
2282                          * SELECT rights on the column or that they have provided the data
2283                          * for the column.  If not, omit this column from the error
2284                          * message.
2285                          */
2286                         aclresult = pg_attribute_aclcheck(reloid, att->attnum,
2287                                                                                           GetUserId(), ACL_SELECT);
2288                         if (bms_is_member(att->attnum - FirstLowInvalidHeapAttributeNumber,
2289                                                           modifiedCols) || aclresult == ACLCHECK_OK)
2290                         {
2291                                 column_perm = any_perm = true;
2292
2293                                 if (write_comma_collist)
2294                                         appendStringInfoString(&collist, ", ");
2295                                 else
2296                                         write_comma_collist = true;
2297
2298                                 appendStringInfoString(&collist, NameStr(att->attname));
2299                         }
2300                 }
2301
2302                 if (table_perm || column_perm)
2303                 {
2304                         if (slot->tts_isnull[i])
2305                                 val = "null";
2306                         else
2307                         {
2308                                 Oid                     foutoid;
2309                                 bool            typisvarlena;
2310
2311                                 getTypeOutputInfo(att->atttypid,
2312                                                                   &foutoid, &typisvarlena);
2313                                 val = OidOutputFunctionCall(foutoid, slot->tts_values[i]);
2314                         }
2315
2316                         if (write_comma)
2317                                 appendStringInfoString(&buf, ", ");
2318                         else
2319                                 write_comma = true;
2320
2321                         /* truncate if needed */
2322                         vallen = strlen(val);
2323                         if (vallen <= maxfieldlen)
2324                                 appendStringInfoString(&buf, val);
2325                         else
2326                         {
2327                                 vallen = pg_mbcliplen(val, vallen, maxfieldlen);
2328                                 appendBinaryStringInfo(&buf, val, vallen);
2329                                 appendStringInfoString(&buf, "...");
2330                         }
2331                 }
2332         }
2333
2334         /* If we end up with zero columns being returned, then return NULL. */
2335         if (!any_perm)
2336                 return NULL;
2337
2338         appendStringInfoChar(&buf, ')');
2339
2340         if (!table_perm)
2341         {
2342                 appendStringInfoString(&collist, ") = ");
2343                 appendStringInfoString(&collist, buf.data);
2344
2345                 return collist.data;
2346         }
2347
2348         return buf.data;
2349 }
2350
2351
2352 /*
2353  * ExecUpdateLockMode -- find the appropriate UPDATE tuple lock mode for a
2354  * given ResultRelInfo
2355  */
2356 LockTupleMode
2357 ExecUpdateLockMode(EState *estate, ResultRelInfo *relinfo)
2358 {
2359         Bitmapset  *keyCols;
2360         Bitmapset  *updatedCols;
2361
2362         /*
2363          * Compute lock mode to use.  If columns that are part of the key have not
2364          * been modified, then we can use a weaker lock, allowing for better
2365          * concurrency.
2366          */
2367         updatedCols = GetUpdatedColumns(relinfo, estate);
2368         keyCols = RelationGetIndexAttrBitmap(relinfo->ri_RelationDesc,
2369                                                                                  INDEX_ATTR_BITMAP_KEY);
2370
2371         if (bms_overlap(keyCols, updatedCols))
2372                 return LockTupleExclusive;
2373
2374         return LockTupleNoKeyExclusive;
2375 }
2376
2377 /*
2378  * ExecFindRowMark -- find the ExecRowMark struct for given rangetable index
2379  *
2380  * If no such struct, either return NULL or throw error depending on missing_ok
2381  */
2382 ExecRowMark *
2383 ExecFindRowMark(EState *estate, Index rti, bool missing_ok)
2384 {
2385         ListCell   *lc;
2386
2387         foreach(lc, estate->es_rowMarks)
2388         {
2389                 ExecRowMark *erm = (ExecRowMark *) lfirst(lc);
2390
2391                 if (erm->rti == rti)
2392                         return erm;
2393         }
2394         if (!missing_ok)
2395                 elog(ERROR, "failed to find ExecRowMark for rangetable index %u", rti);
2396         return NULL;
2397 }
2398
2399 /*
2400  * ExecBuildAuxRowMark -- create an ExecAuxRowMark struct
2401  *
2402  * Inputs are the underlying ExecRowMark struct and the targetlist of the
2403  * input plan node (not planstate node!).  We need the latter to find out
2404  * the column numbers of the resjunk columns.
2405  */
2406 ExecAuxRowMark *
2407 ExecBuildAuxRowMark(ExecRowMark *erm, List *targetlist)
2408 {
2409         ExecAuxRowMark *aerm = (ExecAuxRowMark *) palloc0(sizeof(ExecAuxRowMark));
2410         char            resname[32];
2411
2412         aerm->rowmark = erm;
2413
2414         /* Look up the resjunk columns associated with this rowmark */
2415         if (erm->markType != ROW_MARK_COPY)
2416         {
2417                 /* need ctid for all methods other than COPY */
2418                 snprintf(resname, sizeof(resname), "ctid%u", erm->rowmarkId);
2419                 aerm->ctidAttNo = ExecFindJunkAttributeInTlist(targetlist,
2420                                                                                                            resname);
2421                 if (!AttributeNumberIsValid(aerm->ctidAttNo))
2422                         elog(ERROR, "could not find junk %s column", resname);
2423         }
2424         else
2425         {
2426                 /* need wholerow if COPY */
2427                 snprintf(resname, sizeof(resname), "wholerow%u", erm->rowmarkId);
2428                 aerm->wholeAttNo = ExecFindJunkAttributeInTlist(targetlist,
2429                                                                                                                 resname);
2430                 if (!AttributeNumberIsValid(aerm->wholeAttNo))
2431                         elog(ERROR, "could not find junk %s column", resname);
2432         }
2433
2434         /* if child rel, need tableoid */
2435         if (erm->rti != erm->prti)
2436         {
2437                 snprintf(resname, sizeof(resname), "tableoid%u", erm->rowmarkId);
2438                 aerm->toidAttNo = ExecFindJunkAttributeInTlist(targetlist,
2439                                                                                                            resname);
2440                 if (!AttributeNumberIsValid(aerm->toidAttNo))
2441                         elog(ERROR, "could not find junk %s column", resname);
2442         }
2443
2444         return aerm;
2445 }
2446
2447
2448 /*
2449  * EvalPlanQual logic --- recheck modified tuple(s) to see if we want to
2450  * process the updated version under READ COMMITTED rules.
2451  *
2452  * See backend/executor/README for some info about how this works.
2453  */
2454
2455
2456 /*
2457  * Check a modified tuple to see if we want to process its updated version
2458  * under READ COMMITTED rules.
2459  *
2460  *      estate - outer executor state data
2461  *      epqstate - state for EvalPlanQual rechecking
2462  *      relation - table containing tuple
2463  *      rti - rangetable index of table containing tuple
2464  *      lockmode - requested tuple lock mode
2465  *      *tid - t_ctid from the outdated tuple (ie, next updated version)
2466  *      priorXmax - t_xmax from the outdated tuple
2467  *
2468  * *tid is also an output parameter: it's modified to hold the TID of the
2469  * latest version of the tuple (note this may be changed even on failure)
2470  *
2471  * Returns a slot containing the new candidate update/delete tuple, or
2472  * NULL if we determine we shouldn't process the row.
2473  *
2474  * Note: properly, lockmode should be declared as enum LockTupleMode,
2475  * but we use "int" to avoid having to include heapam.h in executor.h.
2476  */
2477 TupleTableSlot *
2478 EvalPlanQual(EState *estate, EPQState *epqstate,
2479                          Relation relation, Index rti, int lockmode,
2480                          ItemPointer tid, TransactionId priorXmax)
2481 {
2482         TupleTableSlot *slot;
2483         HeapTuple       copyTuple;
2484
2485         Assert(rti > 0);
2486
2487         /*
2488          * Get and lock the updated version of the row; if fail, return NULL.
2489          */
2490         copyTuple = EvalPlanQualFetch(estate, relation, lockmode, LockWaitBlock,
2491                                                                   tid, priorXmax);
2492
2493         if (copyTuple == NULL)
2494                 return NULL;
2495
2496         /*
2497          * For UPDATE/DELETE we have to return tid of actual row we're executing
2498          * PQ for.
2499          */
2500         *tid = copyTuple->t_self;
2501
2502         /*
2503          * Need to run a recheck subquery.  Initialize or reinitialize EPQ state.
2504          */
2505         EvalPlanQualBegin(epqstate, estate);
2506
2507         /*
2508          * Free old test tuple, if any, and store new tuple where relation's scan
2509          * node will see it
2510          */
2511         EvalPlanQualSetTuple(epqstate, rti, copyTuple);
2512
2513         /*
2514          * Fetch any non-locked source rows
2515          */
2516         EvalPlanQualFetchRowMarks(epqstate);
2517
2518         /*
2519          * Run the EPQ query.  We assume it will return at most one tuple.
2520          */
2521         slot = EvalPlanQualNext(epqstate);
2522
2523         /*
2524          * If we got a tuple, force the slot to materialize the tuple so that it
2525          * is not dependent on any local state in the EPQ query (in particular,
2526          * it's highly likely that the slot contains references to any pass-by-ref
2527          * datums that may be present in copyTuple).  As with the next step, this
2528          * is to guard against early re-use of the EPQ query.
2529          */
2530         if (!TupIsNull(slot))
2531                 (void) ExecMaterializeSlot(slot);
2532
2533         /*
2534          * Clear out the test tuple.  This is needed in case the EPQ query is
2535          * re-used to test a tuple for a different relation.  (Not clear that can
2536          * really happen, but let's be safe.)
2537          */
2538         EvalPlanQualSetTuple(epqstate, rti, NULL);
2539
2540         return slot;
2541 }
2542
2543 /*
2544  * Fetch a copy of the newest version of an outdated tuple
2545  *
2546  *      estate - executor state data
2547  *      relation - table containing tuple
2548  *      lockmode - requested tuple lock mode
2549  *      wait_policy - requested lock wait policy
2550  *      *tid - t_ctid from the outdated tuple (ie, next updated version)
2551  *      priorXmax - t_xmax from the outdated tuple
2552  *
2553  * Returns a palloc'd copy of the newest tuple version, or NULL if we find
2554  * that there is no newest version (ie, the row was deleted not updated).
2555  * We also return NULL if the tuple is locked and the wait policy is to skip
2556  * such tuples.
2557  *
2558  * If successful, we have locked the newest tuple version, so caller does not
2559  * need to worry about it changing anymore.
2560  *
2561  * Note: properly, lockmode should be declared as enum LockTupleMode,
2562  * but we use "int" to avoid having to include heapam.h in executor.h.
2563  */
2564 HeapTuple
2565 EvalPlanQualFetch(EState *estate, Relation relation, int lockmode,
2566                                   LockWaitPolicy wait_policy,
2567                                   ItemPointer tid, TransactionId priorXmax)
2568 {
2569         HeapTuple       copyTuple = NULL;
2570         HeapTupleData tuple;
2571         SnapshotData SnapshotDirty;
2572
2573         /*
2574          * fetch target tuple
2575          *
2576          * Loop here to deal with updated or busy tuples
2577          */
2578         InitDirtySnapshot(SnapshotDirty);
2579         tuple.t_self = *tid;
2580         for (;;)
2581         {
2582                 Buffer          buffer;
2583
2584                 if (heap_fetch(relation, &SnapshotDirty, &tuple, &buffer, true, NULL))
2585                 {
2586                         HTSU_Result test;
2587                         HeapUpdateFailureData hufd;
2588
2589                         /*
2590                          * If xmin isn't what we're expecting, the slot must have been
2591                          * recycled and reused for an unrelated tuple.  This implies that
2592                          * the latest version of the row was deleted, so we need do
2593                          * nothing.  (Should be safe to examine xmin without getting
2594                          * buffer's content lock.  We assume reading a TransactionId to be
2595                          * atomic, and Xmin never changes in an existing tuple, except to
2596                          * invalid or frozen, and neither of those can match priorXmax.)
2597                          */
2598                         if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data),
2599                                                                          priorXmax))
2600                         {
2601                                 ReleaseBuffer(buffer);
2602                                 return NULL;
2603                         }
2604
2605                         /* otherwise xmin should not be dirty... */
2606                         if (TransactionIdIsValid(SnapshotDirty.xmin))
2607                                 elog(ERROR, "t_xmin is uncommitted in tuple to be updated");
2608
2609                         /*
2610                          * If tuple is being updated by other transaction then we have to
2611                          * wait for its commit/abort, or die trying.
2612                          */
2613                         if (TransactionIdIsValid(SnapshotDirty.xmax))
2614                         {
2615                                 ReleaseBuffer(buffer);
2616                                 switch (wait_policy)
2617                                 {
2618                                         case LockWaitBlock:
2619                                                 XactLockTableWait(SnapshotDirty.xmax,
2620                                                                                   relation, &tuple.t_self,
2621                                                                                   XLTW_FetchUpdated);
2622                                                 break;
2623                                         case LockWaitSkip:
2624                                                 if (!ConditionalXactLockTableWait(SnapshotDirty.xmax))
2625                                                         return NULL;    /* skip instead of waiting */
2626                                                 break;
2627                                         case LockWaitError:
2628                                                 if (!ConditionalXactLockTableWait(SnapshotDirty.xmax))
2629                                                         ereport(ERROR,
2630                                                                         (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
2631                                                                          errmsg("could not obtain lock on row in relation \"%s\"",
2632                                                                                         RelationGetRelationName(relation))));
2633                                                 break;
2634                                 }
2635                                 continue;               /* loop back to repeat heap_fetch */
2636                         }
2637
2638                         /*
2639                          * If tuple was inserted by our own transaction, we have to check
2640                          * cmin against es_output_cid: cmin >= current CID means our
2641                          * command cannot see the tuple, so we should ignore it. Otherwise
2642                          * heap_lock_tuple() will throw an error, and so would any later
2643                          * attempt to update or delete the tuple.  (We need not check cmax
2644                          * because HeapTupleSatisfiesDirty will consider a tuple deleted
2645                          * by our transaction dead, regardless of cmax.) We just checked
2646                          * that priorXmax == xmin, so we can test that variable instead of
2647                          * doing HeapTupleHeaderGetXmin again.
2648                          */
2649                         if (TransactionIdIsCurrentTransactionId(priorXmax) &&
2650                                 HeapTupleHeaderGetCmin(tuple.t_data) >= estate->es_output_cid)
2651                         {
2652                                 ReleaseBuffer(buffer);
2653                                 return NULL;
2654                         }
2655
2656                         /*
2657                          * This is a live tuple, so now try to lock it.
2658                          */
2659                         test = heap_lock_tuple(relation, &tuple,
2660                                                                    estate->es_output_cid,
2661                                                                    lockmode, wait_policy,
2662                                                                    false, &buffer, &hufd);
2663                         /* We now have two pins on the buffer, get rid of one */
2664                         ReleaseBuffer(buffer);
2665
2666                         switch (test)
2667                         {
2668                                 case HeapTupleSelfUpdated:
2669
2670                                         /*
2671                                          * The target tuple was already updated or deleted by the
2672                                          * current command, or by a later command in the current
2673                                          * transaction.  We *must* ignore the tuple in the former
2674                                          * case, so as to avoid the "Halloween problem" of
2675                                          * repeated update attempts.  In the latter case it might
2676                                          * be sensible to fetch the updated tuple instead, but
2677                                          * doing so would require changing heap_update and
2678                                          * heap_delete to not complain about updating "invisible"
2679                                          * tuples, which seems pretty scary (heap_lock_tuple will
2680                                          * not complain, but few callers expect
2681                                          * HeapTupleInvisible, and we're not one of them).  So for
2682                                          * now, treat the tuple as deleted and do not process.
2683                                          */
2684                                         ReleaseBuffer(buffer);
2685                                         return NULL;
2686
2687                                 case HeapTupleMayBeUpdated:
2688                                         /* successfully locked */
2689                                         break;
2690
2691                                 case HeapTupleUpdated:
2692                                         ReleaseBuffer(buffer);
2693                                         if (IsolationUsesXactSnapshot())
2694                                                 ereport(ERROR,
2695                                                                 (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
2696                                                                  errmsg("could not serialize access due to concurrent update")));
2697
2698                                         /* Should not encounter speculative tuple on recheck */
2699                                         Assert(!HeapTupleHeaderIsSpeculative(tuple.t_data));
2700                                         if (!ItemPointerEquals(&hufd.ctid, &tuple.t_self))
2701                                         {
2702                                                 /* it was updated, so look at the updated version */
2703                                                 tuple.t_self = hufd.ctid;
2704                                                 /* updated row should have xmin matching this xmax */
2705                                                 priorXmax = hufd.xmax;
2706                                                 continue;
2707                                         }
2708                                         /* tuple was deleted, so give up */
2709                                         return NULL;
2710
2711                                 case HeapTupleWouldBlock:
2712                                         ReleaseBuffer(buffer);
2713                                         return NULL;
2714
2715                                 case HeapTupleInvisible:
2716                                         elog(ERROR, "attempted to lock invisible tuple");
2717
2718                                 default:
2719                                         ReleaseBuffer(buffer);
2720                                         elog(ERROR, "unrecognized heap_lock_tuple status: %u",
2721                                                  test);
2722                                         return NULL;    /* keep compiler quiet */
2723                         }
2724
2725                         /*
2726                          * We got tuple - now copy it for use by recheck query.
2727                          */
2728                         copyTuple = heap_copytuple(&tuple);
2729                         ReleaseBuffer(buffer);
2730                         break;
2731                 }
2732
2733                 /*
2734                  * If the referenced slot was actually empty, the latest version of
2735                  * the row must have been deleted, so we need do nothing.
2736                  */
2737                 if (tuple.t_data == NULL)
2738                 {
2739                         ReleaseBuffer(buffer);
2740                         return NULL;
2741                 }
2742
2743                 /*
2744                  * As above, if xmin isn't what we're expecting, do nothing.
2745                  */
2746                 if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data),
2747                                                                  priorXmax))
2748                 {
2749                         ReleaseBuffer(buffer);
2750                         return NULL;
2751                 }
2752
2753                 /*
2754                  * If we get here, the tuple was found but failed SnapshotDirty.
2755                  * Assuming the xmin is either a committed xact or our own xact (as it
2756                  * certainly should be if we're trying to modify the tuple), this must
2757                  * mean that the row was updated or deleted by either a committed xact
2758                  * or our own xact.  If it was deleted, we can ignore it; if it was
2759                  * updated then chain up to the next version and repeat the whole
2760                  * process.
2761                  *
2762                  * As above, it should be safe to examine xmax and t_ctid without the
2763                  * buffer content lock, because they can't be changing.
2764                  */
2765                 if (ItemPointerEquals(&tuple.t_self, &tuple.t_data->t_ctid))
2766                 {
2767                         /* deleted, so forget about it */
2768                         ReleaseBuffer(buffer);
2769                         return NULL;
2770                 }
2771
2772                 /* updated, so look at the updated row */
2773                 tuple.t_self = tuple.t_data->t_ctid;
2774                 /* updated row should have xmin matching this xmax */
2775                 priorXmax = HeapTupleHeaderGetUpdateXid(tuple.t_data);
2776                 ReleaseBuffer(buffer);
2777                 /* loop back to fetch next in chain */
2778         }
2779
2780         /*
2781          * Return the copied tuple
2782          */
2783         return copyTuple;
2784 }
2785
2786 /*
2787  * EvalPlanQualInit -- initialize during creation of a plan state node
2788  * that might need to invoke EPQ processing.
2789  *
2790  * Note: subplan/auxrowmarks can be NULL/NIL if they will be set later
2791  * with EvalPlanQualSetPlan.
2792  */
2793 void
2794 EvalPlanQualInit(EPQState *epqstate, EState *estate,
2795                                  Plan *subplan, List *auxrowmarks, int epqParam)
2796 {
2797         /* Mark the EPQ state inactive */
2798         epqstate->estate = NULL;
2799         epqstate->planstate = NULL;
2800         epqstate->origslot = NULL;
2801         /* ... and remember data that EvalPlanQualBegin will need */
2802         epqstate->plan = subplan;
2803         epqstate->arowMarks = auxrowmarks;
2804         epqstate->epqParam = epqParam;
2805 }
2806
2807 /*
2808  * EvalPlanQualSetPlan -- set or change subplan of an EPQState.
2809  *
2810  * We need this so that ModifyTable can deal with multiple subplans.
2811  */
2812 void
2813 EvalPlanQualSetPlan(EPQState *epqstate, Plan *subplan, List *auxrowmarks)
2814 {
2815         /* If we have a live EPQ query, shut it down */
2816         EvalPlanQualEnd(epqstate);
2817         /* And set/change the plan pointer */
2818         epqstate->plan = subplan;
2819         /* The rowmarks depend on the plan, too */
2820         epqstate->arowMarks = auxrowmarks;
2821 }
2822
2823 /*
2824  * Install one test tuple into EPQ state, or clear test tuple if tuple == NULL
2825  *
2826  * NB: passed tuple must be palloc'd; it may get freed later
2827  */
2828 void
2829 EvalPlanQualSetTuple(EPQState *epqstate, Index rti, HeapTuple tuple)
2830 {
2831         EState     *estate = epqstate->estate;
2832
2833         Assert(rti > 0);
2834
2835         /*
2836          * free old test tuple, if any, and store new tuple where relation's scan
2837          * node will see it
2838          */
2839         if (estate->es_epqTuple[rti - 1] != NULL)
2840                 heap_freetuple(estate->es_epqTuple[rti - 1]);
2841         estate->es_epqTuple[rti - 1] = tuple;
2842         estate->es_epqTupleSet[rti - 1] = true;
2843 }
2844
2845 /*
2846  * Fetch back the current test tuple (if any) for the specified RTI
2847  */
2848 HeapTuple
2849 EvalPlanQualGetTuple(EPQState *epqstate, Index rti)
2850 {
2851         EState     *estate = epqstate->estate;
2852
2853         Assert(rti > 0);
2854
2855         return estate->es_epqTuple[rti - 1];
2856 }
2857
2858 /*
2859  * Fetch the current row values for any non-locked relations that need
2860  * to be scanned by an EvalPlanQual operation.  origslot must have been set
2861  * to contain the current result row (top-level row) that we need to recheck.
2862  */
2863 void
2864 EvalPlanQualFetchRowMarks(EPQState *epqstate)
2865 {
2866         ListCell   *l;
2867
2868         Assert(epqstate->origslot != NULL);
2869
2870         foreach(l, epqstate->arowMarks)
2871         {
2872                 ExecAuxRowMark *aerm = (ExecAuxRowMark *) lfirst(l);
2873                 ExecRowMark *erm = aerm->rowmark;
2874                 Datum           datum;
2875                 bool            isNull;
2876                 HeapTupleData tuple;
2877
2878                 if (RowMarkRequiresRowShareLock(erm->markType))
2879                         elog(ERROR, "EvalPlanQual doesn't support locking rowmarks");
2880
2881                 /* clear any leftover test tuple for this rel */
2882                 EvalPlanQualSetTuple(epqstate, erm->rti, NULL);
2883
2884                 /* if child rel, must check whether it produced this row */
2885                 if (erm->rti != erm->prti)
2886                 {
2887                         Oid                     tableoid;
2888
2889                         datum = ExecGetJunkAttribute(epqstate->origslot,
2890                                                                                  aerm->toidAttNo,
2891                                                                                  &isNull);
2892                         /* non-locked rels could be on the inside of outer joins */
2893                         if (isNull)
2894                                 continue;
2895                         tableoid = DatumGetObjectId(datum);
2896
2897                         Assert(OidIsValid(erm->relid));
2898                         if (tableoid != erm->relid)
2899                         {
2900                                 /* this child is inactive right now */
2901                                 continue;
2902                         }
2903                 }
2904
2905                 if (erm->markType == ROW_MARK_REFERENCE)
2906                 {
2907                         HeapTuple       copyTuple;
2908
2909                         Assert(erm->relation != NULL);
2910
2911                         /* fetch the tuple's ctid */
2912                         datum = ExecGetJunkAttribute(epqstate->origslot,
2913                                                                                  aerm->ctidAttNo,
2914                                                                                  &isNull);
2915                         /* non-locked rels could be on the inside of outer joins */
2916                         if (isNull)
2917                                 continue;
2918
2919                         /* fetch requests on foreign tables must be passed to their FDW */
2920                         if (erm->relation->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
2921                         {
2922                                 FdwRoutine *fdwroutine;
2923                                 bool            updated = false;
2924
2925                                 fdwroutine = GetFdwRoutineForRelation(erm->relation, false);
2926                                 /* this should have been checked already, but let's be safe */
2927                                 if (fdwroutine->RefetchForeignRow == NULL)
2928                                         ereport(ERROR,
2929                                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2930                                                          errmsg("cannot lock rows in foreign table \"%s\"",
2931                                                                         RelationGetRelationName(erm->relation))));
2932                                 copyTuple = fdwroutine->RefetchForeignRow(epqstate->estate,
2933                                                                                                                   erm,
2934                                                                                                                   datum,
2935                                                                                                                   &updated);
2936                                 if (copyTuple == NULL)
2937                                         elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck");
2938
2939                                 /*
2940                                  * Ideally we'd insist on updated == false here, but that
2941                                  * assumes that FDWs can track that exactly, which they might
2942                                  * not be able to.  So just ignore the flag.
2943                                  */
2944                         }
2945                         else
2946                         {
2947                                 /* ordinary table, fetch the tuple */
2948                                 Buffer          buffer;
2949
2950                                 tuple.t_self = *((ItemPointer) DatumGetPointer(datum));
2951                                 if (!heap_fetch(erm->relation, SnapshotAny, &tuple, &buffer,
2952                                                                 false, NULL))
2953                                         elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck");
2954
2955                                 /* successful, copy tuple */
2956                                 copyTuple = heap_copytuple(&tuple);
2957                                 ReleaseBuffer(buffer);
2958                         }
2959
2960                         /* store tuple */
2961                         EvalPlanQualSetTuple(epqstate, erm->rti, copyTuple);
2962                 }
2963                 else
2964                 {
2965                         HeapTupleHeader td;
2966
2967                         Assert(erm->markType == ROW_MARK_COPY);
2968
2969                         /* fetch the whole-row Var for the relation */
2970                         datum = ExecGetJunkAttribute(epqstate->origslot,
2971                                                                                  aerm->wholeAttNo,
2972                                                                                  &isNull);
2973                         /* non-locked rels could be on the inside of outer joins */
2974                         if (isNull)
2975                                 continue;
2976                         td = DatumGetHeapTupleHeader(datum);
2977
2978                         /* build a temporary HeapTuple control structure */
2979                         tuple.t_len = HeapTupleHeaderGetDatumLength(td);
2980                         tuple.t_data = td;
2981                         /* relation might be a foreign table, if so provide tableoid */
2982                         tuple.t_tableOid = erm->relid;
2983                         /* also copy t_ctid in case there's valid data there */
2984                         tuple.t_self = td->t_ctid;
2985
2986                         /* copy and store tuple */
2987                         EvalPlanQualSetTuple(epqstate, erm->rti,
2988                                                                  heap_copytuple(&tuple));
2989                 }
2990         }
2991 }
2992
2993 /*
2994  * Fetch the next row (if any) from EvalPlanQual testing
2995  *
2996  * (In practice, there should never be more than one row...)
2997  */
2998 TupleTableSlot *
2999 EvalPlanQualNext(EPQState *epqstate)
3000 {
3001         MemoryContext oldcontext;
3002         TupleTableSlot *slot;
3003
3004         oldcontext = MemoryContextSwitchTo(epqstate->estate->es_query_cxt);
3005         slot = ExecProcNode(epqstate->planstate);
3006         MemoryContextSwitchTo(oldcontext);
3007
3008         return slot;
3009 }
3010
3011 /*
3012  * Initialize or reset an EvalPlanQual state tree
3013  */
3014 void
3015 EvalPlanQualBegin(EPQState *epqstate, EState *parentestate)
3016 {
3017         EState     *estate = epqstate->estate;
3018
3019         if (estate == NULL)
3020         {
3021                 /* First time through, so create a child EState */
3022                 EvalPlanQualStart(epqstate, parentestate, epqstate->plan);
3023         }
3024         else
3025         {
3026                 /*
3027                  * We already have a suitable child EPQ tree, so just reset it.
3028                  */
3029                 int                     rtsize = list_length(parentestate->es_range_table);
3030                 PlanState  *planstate = epqstate->planstate;
3031
3032                 MemSet(estate->es_epqScanDone, 0, rtsize * sizeof(bool));
3033
3034                 /* Recopy current values of parent parameters */
3035                 if (parentestate->es_plannedstmt->nParamExec > 0)
3036                 {
3037                         int                     i = parentestate->es_plannedstmt->nParamExec;
3038
3039                         while (--i >= 0)
3040                         {
3041                                 /* copy value if any, but not execPlan link */
3042                                 estate->es_param_exec_vals[i].value =
3043                                         parentestate->es_param_exec_vals[i].value;
3044                                 estate->es_param_exec_vals[i].isnull =
3045                                         parentestate->es_param_exec_vals[i].isnull;
3046                         }
3047                 }
3048
3049                 /*
3050                  * Mark child plan tree as needing rescan at all scan nodes.  The
3051                  * first ExecProcNode will take care of actually doing the rescan.
3052                  */
3053                 planstate->chgParam = bms_add_member(planstate->chgParam,
3054                                                                                          epqstate->epqParam);
3055         }
3056 }
3057
3058 /*
3059  * Start execution of an EvalPlanQual plan tree.
3060  *
3061  * This is a cut-down version of ExecutorStart(): we copy some state from
3062  * the top-level estate rather than initializing it fresh.
3063  */
3064 static void
3065 EvalPlanQualStart(EPQState *epqstate, EState *parentestate, Plan *planTree)
3066 {
3067         EState     *estate;
3068         int                     rtsize;
3069         MemoryContext oldcontext;
3070         ListCell   *l;
3071
3072         rtsize = list_length(parentestate->es_range_table);
3073
3074         epqstate->estate = estate = CreateExecutorState();
3075
3076         oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
3077
3078         /*
3079          * Child EPQ EStates share the parent's copy of unchanging state such as
3080          * the snapshot, rangetable, result-rel info, and external Param info.
3081          * They need their own copies of local state, including a tuple table,
3082          * es_param_exec_vals, etc.
3083          *
3084          * The ResultRelInfo array management is trickier than it looks.  We
3085          * create a fresh array for the child but copy all the content from the
3086          * parent.  This is because it's okay for the child to share any
3087          * per-relation state the parent has already created --- but if the child
3088          * sets up any ResultRelInfo fields, such as its own junkfilter, that
3089          * state must *not* propagate back to the parent.  (For one thing, the
3090          * pointed-to data is in a memory context that won't last long enough.)
3091          */
3092         estate->es_direction = ForwardScanDirection;
3093         estate->es_snapshot = parentestate->es_snapshot;
3094         estate->es_crosscheck_snapshot = parentestate->es_crosscheck_snapshot;
3095         estate->es_range_table = parentestate->es_range_table;
3096         estate->es_plannedstmt = parentestate->es_plannedstmt;
3097         estate->es_junkFilter = parentestate->es_junkFilter;
3098         estate->es_output_cid = parentestate->es_output_cid;
3099         if (parentestate->es_num_result_relations > 0)
3100         {
3101                 int                     numResultRelations = parentestate->es_num_result_relations;
3102                 ResultRelInfo *resultRelInfos;
3103
3104                 resultRelInfos = (ResultRelInfo *)
3105                         palloc(numResultRelations * sizeof(ResultRelInfo));
3106                 memcpy(resultRelInfos, parentestate->es_result_relations,
3107                            numResultRelations * sizeof(ResultRelInfo));
3108                 estate->es_result_relations = resultRelInfos;
3109                 estate->es_num_result_relations = numResultRelations;
3110         }
3111         /* es_result_relation_info must NOT be copied */
3112         /* es_trig_target_relations must NOT be copied */
3113         estate->es_rowMarks = parentestate->es_rowMarks;
3114         estate->es_top_eflags = parentestate->es_top_eflags;
3115         estate->es_instrument = parentestate->es_instrument;
3116         /* es_auxmodifytables must NOT be copied */
3117
3118         /*
3119          * The external param list is simply shared from parent.  The internal
3120          * param workspace has to be local state, but we copy the initial values
3121          * from the parent, so as to have access to any param values that were
3122          * already set from other parts of the parent's plan tree.
3123          */
3124         estate->es_param_list_info = parentestate->es_param_list_info;
3125         if (parentestate->es_plannedstmt->nParamExec > 0)
3126         {
3127                 int                     i = parentestate->es_plannedstmt->nParamExec;
3128
3129                 estate->es_param_exec_vals = (ParamExecData *)
3130                         palloc0(i * sizeof(ParamExecData));
3131                 while (--i >= 0)
3132                 {
3133                         /* copy value if any, but not execPlan link */
3134                         estate->es_param_exec_vals[i].value =
3135                                 parentestate->es_param_exec_vals[i].value;
3136                         estate->es_param_exec_vals[i].isnull =
3137                                 parentestate->es_param_exec_vals[i].isnull;
3138                 }
3139         }
3140
3141         /*
3142          * Each EState must have its own es_epqScanDone state, but if we have
3143          * nested EPQ checks they should share es_epqTuple arrays.  This allows
3144          * sub-rechecks to inherit the values being examined by an outer recheck.
3145          */
3146         estate->es_epqScanDone = (bool *) palloc0(rtsize * sizeof(bool));
3147         if (parentestate->es_epqTuple != NULL)
3148         {
3149                 estate->es_epqTuple = parentestate->es_epqTuple;
3150                 estate->es_epqTupleSet = parentestate->es_epqTupleSet;
3151         }
3152         else
3153         {
3154                 estate->es_epqTuple = (HeapTuple *)
3155                         palloc0(rtsize * sizeof(HeapTuple));
3156                 estate->es_epqTupleSet = (bool *)
3157                         palloc0(rtsize * sizeof(bool));
3158         }
3159
3160         /*
3161          * Each estate also has its own tuple table.
3162          */
3163         estate->es_tupleTable = NIL;
3164
3165         /*
3166          * Initialize private state information for each SubPlan.  We must do this
3167          * before running ExecInitNode on the main query tree, since
3168          * ExecInitSubPlan expects to be able to find these entries. Some of the
3169          * SubPlans might not be used in the part of the plan tree we intend to
3170          * run, but since it's not easy to tell which, we just initialize them
3171          * all.
3172          */
3173         Assert(estate->es_subplanstates == NIL);
3174         foreach(l, parentestate->es_plannedstmt->subplans)
3175         {
3176                 Plan       *subplan = (Plan *) lfirst(l);
3177                 PlanState  *subplanstate;
3178
3179                 subplanstate = ExecInitNode(subplan, estate, 0);
3180                 estate->es_subplanstates = lappend(estate->es_subplanstates,
3181                                                                                    subplanstate);
3182         }
3183
3184         /*
3185          * Initialize the private state information for all the nodes in the part
3186          * of the plan tree we need to run.  This opens files, allocates storage
3187          * and leaves us ready to start processing tuples.
3188          */
3189         epqstate->planstate = ExecInitNode(planTree, estate, 0);
3190
3191         MemoryContextSwitchTo(oldcontext);
3192 }
3193
3194 /*
3195  * EvalPlanQualEnd -- shut down at termination of parent plan state node,
3196  * or if we are done with the current EPQ child.
3197  *
3198  * This is a cut-down version of ExecutorEnd(); basically we want to do most
3199  * of the normal cleanup, but *not* close result relations (which we are
3200  * just sharing from the outer query).  We do, however, have to close any
3201  * trigger target relations that got opened, since those are not shared.
3202  * (There probably shouldn't be any of the latter, but just in case...)
3203  */
3204 void
3205 EvalPlanQualEnd(EPQState *epqstate)
3206 {
3207         EState     *estate = epqstate->estate;
3208         MemoryContext oldcontext;
3209         ListCell   *l;
3210
3211         if (estate == NULL)
3212                 return;                                 /* idle, so nothing to do */
3213
3214         oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
3215
3216         ExecEndNode(epqstate->planstate);
3217
3218         foreach(l, estate->es_subplanstates)
3219         {
3220                 PlanState  *subplanstate = (PlanState *) lfirst(l);
3221
3222                 ExecEndNode(subplanstate);
3223         }
3224
3225         /* throw away the per-estate tuple table */
3226         ExecResetTupleTable(estate->es_tupleTable, false);
3227
3228         /* close any trigger target relations attached to this EState */
3229         ExecCleanUpTriggerState(estate);
3230
3231         MemoryContextSwitchTo(oldcontext);
3232
3233         FreeExecutorState(estate);
3234
3235         /* Mark EPQState idle */
3236         epqstate->estate = NULL;
3237         epqstate->planstate = NULL;
3238         epqstate->origslot = NULL;
3239 }
3240
3241 /*
3242  * ExecSetupPartitionTupleRouting - set up information needed during
3243  * tuple routing for partitioned tables
3244  *
3245  * Output arguments:
3246  * 'pd' receives an array of PartitionDispatch objects with one entry for
3247  *              every partitioned table in the partition tree
3248  * 'partitions' receives an array of ResultRelInfo* objects with one entry for
3249  *              every leaf partition in the partition tree
3250  * 'tup_conv_maps' receives an array of TupleConversionMap objects with one
3251  *              entry for every leaf partition (required to convert input tuple based
3252  *              on the root table's rowtype to a leaf partition's rowtype after tuple
3253  *              routing is done)
3254  * 'partition_tuple_slot' receives a standalone TupleTableSlot to be used
3255  *              to manipulate any given leaf partition's rowtype after that partition
3256  *              is chosen by tuple-routing.
3257  * 'num_parted' receives the number of partitioned tables in the partition
3258  *              tree (= the number of entries in the 'pd' output array)
3259  * 'num_partitions' receives the number of leaf partitions in the partition
3260  *              tree (= the number of entries in the 'partitions' and 'tup_conv_maps'
3261  *              output arrays
3262  *
3263  * Note that all the relations in the partition tree are locked using the
3264  * RowExclusiveLock mode upon return from this function.
3265  */
3266 void
3267 ExecSetupPartitionTupleRouting(Relation rel,
3268                                                            Index resultRTindex,
3269                                                            EState *estate,
3270                                                            PartitionDispatch **pd,
3271                                                            ResultRelInfo ***partitions,
3272                                                            TupleConversionMap ***tup_conv_maps,
3273                                                            TupleTableSlot **partition_tuple_slot,
3274                                                            int *num_parted, int *num_partitions)
3275 {
3276         TupleDesc       tupDesc = RelationGetDescr(rel);
3277         List       *leaf_parts;
3278         ListCell   *cell;
3279         int                     i;
3280         ResultRelInfo *leaf_part_rri;
3281
3282         /*
3283          * Get the information about the partition tree after locking all the
3284          * partitions.
3285          */
3286         (void) find_all_inheritors(RelationGetRelid(rel), RowExclusiveLock, NULL);
3287         *pd = RelationGetPartitionDispatchInfo(rel, num_parted, &leaf_parts);
3288         *num_partitions = list_length(leaf_parts);
3289         *partitions = (ResultRelInfo **) palloc(*num_partitions *
3290                                                                                         sizeof(ResultRelInfo *));
3291         *tup_conv_maps = (TupleConversionMap **) palloc0(*num_partitions *
3292                                                                                                          sizeof(TupleConversionMap *));
3293
3294         /*
3295          * Initialize an empty slot that will be used to manipulate tuples of any
3296          * given partition's rowtype.  It is attached to the caller-specified node
3297          * (such as ModifyTableState) and released when the node finishes
3298          * processing.
3299          */
3300         *partition_tuple_slot = MakeTupleTableSlot();
3301
3302         leaf_part_rri = (ResultRelInfo *) palloc0(*num_partitions *
3303                                                                                           sizeof(ResultRelInfo));
3304         i = 0;
3305         foreach(cell, leaf_parts)
3306         {
3307                 Relation        partrel;
3308                 TupleDesc       part_tupdesc;
3309
3310                 /*
3311                  * We locked all the partitions above including the leaf partitions.
3312                  * Note that each of the relations in *partitions are eventually
3313                  * closed by the caller.
3314                  */
3315                 partrel = heap_open(lfirst_oid(cell), NoLock);
3316                 part_tupdesc = RelationGetDescr(partrel);
3317
3318                 /*
3319                  * Save a tuple conversion map to convert a tuple routed to this
3320                  * partition from the parent's type to the partition's.
3321                  */
3322                 (*tup_conv_maps)[i] = convert_tuples_by_name(tupDesc, part_tupdesc,
3323                                                                                                          gettext_noop("could not convert row type"));
3324
3325                 InitResultRelInfo(leaf_part_rri,
3326                                                   partrel,
3327                                                   resultRTindex,
3328                                                   rel,
3329                                                   estate->es_instrument);
3330
3331                 /*
3332                  * Verify result relation is a valid target for INSERT.
3333                  */
3334                 CheckValidResultRel(leaf_part_rri, CMD_INSERT);
3335
3336                 /*
3337                  * Open partition indices (remember we do not support ON CONFLICT in
3338                  * case of partitioned tables, so we do not need support information
3339                  * for speculative insertion)
3340                  */
3341                 if (leaf_part_rri->ri_RelationDesc->rd_rel->relhasindex &&
3342                         leaf_part_rri->ri_IndexRelationDescs == NULL)
3343                         ExecOpenIndices(leaf_part_rri, false);
3344
3345                 estate->es_leaf_result_relations =
3346                         lappend(estate->es_leaf_result_relations, leaf_part_rri);
3347
3348                 (*partitions)[i] = leaf_part_rri++;
3349                 i++;
3350         }
3351 }
3352
3353 /*
3354  * ExecFindPartition -- Find a leaf partition in the partition tree rooted
3355  * at parent, for the heap tuple contained in *slot
3356  *
3357  * estate must be non-NULL; we'll need it to compute any expressions in the
3358  * partition key(s)
3359  *
3360  * If no leaf partition is found, this routine errors out with the appropriate
3361  * error message, else it returns the leaf partition sequence number returned
3362  * by get_partition_for_tuple() unchanged.
3363  */
3364 int
3365 ExecFindPartition(ResultRelInfo *resultRelInfo, PartitionDispatch *pd,
3366                                   TupleTableSlot *slot, EState *estate)
3367 {
3368         int                     result;
3369         PartitionDispatchData *failed_at;
3370         TupleTableSlot *failed_slot;
3371
3372         /*
3373          * First check the root table's partition constraint, if any.  No point in
3374          * routing the tuple if it doesn't belong in the root table itself.
3375          */
3376         if (resultRelInfo->ri_PartitionCheck)
3377                 ExecPartitionCheck(resultRelInfo, slot, estate);
3378
3379         result = get_partition_for_tuple(pd, slot, estate,
3380                                                                          &failed_at, &failed_slot);
3381         if (result < 0)
3382         {
3383                 Relation        failed_rel;
3384                 Datum           key_values[PARTITION_MAX_KEYS];
3385                 bool            key_isnull[PARTITION_MAX_KEYS];
3386                 char       *val_desc;
3387                 ExprContext *ecxt = GetPerTupleExprContext(estate);
3388
3389                 failed_rel = failed_at->reldesc;
3390                 ecxt->ecxt_scantuple = failed_slot;
3391                 FormPartitionKeyDatum(failed_at, failed_slot, estate,
3392                                                           key_values, key_isnull);
3393                 val_desc = ExecBuildSlotPartitionKeyDescription(failed_rel,
3394                                                                                                                 key_values,
3395                                                                                                                 key_isnull,
3396                                                                                                                 64);
3397                 Assert(OidIsValid(RelationGetRelid(failed_rel)));
3398                 ereport(ERROR,
3399                                 (errcode(ERRCODE_CHECK_VIOLATION),
3400                                  errmsg("no partition of relation \"%s\" found for row",
3401                                                 RelationGetRelationName(failed_rel)),
3402                                  val_desc ? errdetail("Partition key of the failing row contains %s.", val_desc) : 0));
3403         }
3404
3405         return result;
3406 }
3407
3408 /*
3409  * BuildSlotPartitionKeyDescription
3410  *
3411  * This works very much like BuildIndexValueDescription() and is currently
3412  * used for building error messages when ExecFindPartition() fails to find
3413  * partition for a row.
3414  */
3415 static char *
3416 ExecBuildSlotPartitionKeyDescription(Relation rel,
3417                                                                          Datum *values,
3418                                                                          bool *isnull,
3419                                                                          int maxfieldlen)
3420 {
3421         StringInfoData buf;
3422         PartitionKey key = RelationGetPartitionKey(rel);
3423         int                     partnatts = get_partition_natts(key);
3424         int                     i;
3425         Oid                     relid = RelationGetRelid(rel);
3426         AclResult       aclresult;
3427
3428         if (check_enable_rls(relid, InvalidOid, true) == RLS_ENABLED)
3429                 return NULL;
3430
3431         /* If the user has table-level access, just go build the description. */
3432         aclresult = pg_class_aclcheck(relid, GetUserId(), ACL_SELECT);
3433         if (aclresult != ACLCHECK_OK)
3434         {
3435                 /*
3436                  * Step through the columns of the partition key and make sure the
3437                  * user has SELECT rights on all of them.
3438                  */
3439                 for (i = 0; i < partnatts; i++)
3440                 {
3441                         AttrNumber      attnum = get_partition_col_attnum(key, i);
3442
3443                         /*
3444                          * If this partition key column is an expression, we return no
3445                          * detail rather than try to figure out what column(s) the
3446                          * expression includes and if the user has SELECT rights on them.
3447                          */
3448                         if (attnum == InvalidAttrNumber ||
3449                                 pg_attribute_aclcheck(relid, attnum, GetUserId(),
3450                                                                           ACL_SELECT) != ACLCHECK_OK)
3451                                 return NULL;
3452                 }
3453         }
3454
3455         initStringInfo(&buf);
3456         appendStringInfo(&buf, "(%s) = (",
3457                                          pg_get_partkeydef_columns(relid, true));
3458
3459         for (i = 0; i < partnatts; i++)
3460         {
3461                 char       *val;
3462                 int                     vallen;
3463
3464                 if (isnull[i])
3465                         val = "null";
3466                 else
3467                 {
3468                         Oid                     foutoid;
3469                         bool            typisvarlena;
3470
3471                         getTypeOutputInfo(get_partition_col_typid(key, i),
3472                                                           &foutoid, &typisvarlena);
3473                         val = OidOutputFunctionCall(foutoid, values[i]);
3474                 }
3475
3476                 if (i > 0)
3477                         appendStringInfoString(&buf, ", ");
3478
3479                 /* truncate if needed */
3480                 vallen = strlen(val);
3481                 if (vallen <= maxfieldlen)
3482                         appendStringInfoString(&buf, val);
3483                 else
3484                 {
3485                         vallen = pg_mbcliplen(val, vallen, maxfieldlen);
3486                         appendBinaryStringInfo(&buf, val, vallen);
3487                         appendStringInfoString(&buf, "...");
3488                 }
3489         }
3490
3491         appendStringInfoChar(&buf, ')');
3492
3493         return buf.data;
3494 }