]> granicus.if.org Git - postgresql/blob - src/backend/executor/execMain.c
Allow include files to compile own their own.
[postgresql] / src / backend / executor / execMain.c
1 /*-------------------------------------------------------------------------
2  *
3  * execMain.c
4  *        top level executor interface routines
5  *
6  * INTERFACE ROUTINES
7  *      ExecutorStart()
8  *      ExecutorRun()
9  *      ExecutorEnd()
10  *
11  *      The old ExecutorMain() has been replaced by ExecutorStart(),
12  *      ExecutorRun() and ExecutorEnd()
13  *
14  *      These three procedures are the external interfaces to the executor.
15  *      In each case, the query descriptor is required as an argument.
16  *
17  *      ExecutorStart() must be called at the beginning of execution of any
18  *      query plan and ExecutorEnd() should always be called at the end of
19  *      execution of a plan.
20  *
21  *      ExecutorRun accepts direction and count arguments that specify whether
22  *      the plan is to be executed forwards, backwards, and for how many tuples.
23  *
24  * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
25  * Portions Copyright (c) 1994, Regents of the University of California
26  *
27  *
28  * IDENTIFICATION
29  *        $PostgreSQL: pgsql/src/backend/executor/execMain.c,v 1.275 2006/07/13 16:49:14 momjian Exp $
30  *
31  *-------------------------------------------------------------------------
32  */
33 #include "postgres.h"
34
35 #include "access/heapam.h"
36 #include "access/reloptions.h"
37 #include "access/transam.h"
38 #include "access/xact.h"
39 #include "access/xlog.h"
40 #include "catalog/heap.h"
41 #include "catalog/namespace.h"
42 #include "commands/tablecmds.h"
43 #include "commands/tablespace.h"
44 #include "commands/trigger.h"
45 #include "executor/execdebug.h"
46 #include "executor/execdefs.h"
47 #include "executor/instrument.h"
48 #include "miscadmin.h"
49 #include "optimizer/clauses.h"
50 #include "optimizer/var.h"
51 #include "parser/parse_clause.h"
52 #include "parser/parsetree.h"
53 #include "storage/smgr.h"
54 #include "utils/acl.h"
55 #include "utils/guc.h"
56 #include "utils/lsyscache.h"
57 #include "utils/memutils.h"
58
59
60 typedef struct evalPlanQual
61 {
62         Index           rti;
63         EState     *estate;
64         PlanState  *planstate;
65         struct evalPlanQual *next;      /* stack of active PlanQual plans */
66         struct evalPlanQual *free;      /* list of free PlanQual plans */
67 } evalPlanQual;
68
69 /* decls for local routines only used within this module */
70 static void InitPlan(QueryDesc *queryDesc, int eflags);
71 static void initResultRelInfo(ResultRelInfo *resultRelInfo,
72                                   Index resultRelationIndex,
73                                   List *rangeTable,
74                                   CmdType operation,
75                                   bool doInstrument);
76 static TupleTableSlot *ExecutePlan(EState *estate, PlanState *planstate,
77                         CmdType operation,
78                         long numberTuples,
79                         ScanDirection direction,
80                         DestReceiver *dest);
81 static void ExecSelect(TupleTableSlot *slot,
82                    DestReceiver *dest,
83                    EState *estate);
84 static void ExecInsert(TupleTableSlot *slot, ItemPointer tupleid,
85                    EState *estate);
86 static void ExecDelete(TupleTableSlot *slot, ItemPointer tupleid,
87                    EState *estate);
88 static void ExecUpdate(TupleTableSlot *slot, ItemPointer tupleid,
89                    EState *estate);
90 static TupleTableSlot *EvalPlanQualNext(EState *estate);
91 static void EndEvalPlanQual(EState *estate);
92 static void ExecCheckRTEPerms(RangeTblEntry *rte);
93 static void ExecCheckXactReadOnly(Query *parsetree);
94 static void EvalPlanQualStart(evalPlanQual *epq, EState *estate,
95                                   evalPlanQual *priorepq);
96 static void EvalPlanQualStop(evalPlanQual *epq);
97
98 /* end of local decls */
99
100
101 /* ----------------------------------------------------------------
102  *              ExecutorStart
103  *
104  *              This routine must be called at the beginning of any execution of any
105  *              query plan
106  *
107  * Takes a QueryDesc previously created by CreateQueryDesc (it's not real
108  * clear why we bother to separate the two functions, but...).  The tupDesc
109  * field of the QueryDesc is filled in to describe the tuples that will be
110  * returned, and the internal fields (estate and planstate) are set up.
111  *
112  * eflags contains flag bits as described in executor.h.
113  *
114  * NB: the CurrentMemoryContext when this is called will become the parent
115  * of the per-query context used for this Executor invocation.
116  * ----------------------------------------------------------------
117  */
118 void
119 ExecutorStart(QueryDesc *queryDesc, int eflags)
120 {
121         EState     *estate;
122         MemoryContext oldcontext;
123
124         /* sanity checks: queryDesc must not be started already */
125         Assert(queryDesc != NULL);
126         Assert(queryDesc->estate == NULL);
127
128         /*
129          * If the transaction is read-only, we need to check if any writes are
130          * planned to non-temporary tables.  EXPLAIN is considered read-only.
131          */
132         if (XactReadOnly && !(eflags & EXEC_FLAG_EXPLAIN_ONLY))
133                 ExecCheckXactReadOnly(queryDesc->parsetree);
134
135         /*
136          * Build EState, switch into per-query memory context for startup.
137          */
138         estate = CreateExecutorState();
139         queryDesc->estate = estate;
140
141         oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
142
143         /*
144          * Fill in parameters, if any, from queryDesc
145          */
146         estate->es_param_list_info = queryDesc->params;
147
148         if (queryDesc->plantree->nParamExec > 0)
149                 estate->es_param_exec_vals = (ParamExecData *)
150                         palloc0(queryDesc->plantree->nParamExec * sizeof(ParamExecData));
151
152         /*
153          * Copy other important information into the EState
154          */
155         estate->es_snapshot = queryDesc->snapshot;
156         estate->es_crosscheck_snapshot = queryDesc->crosscheck_snapshot;
157         estate->es_instrument = queryDesc->doInstrument;
158
159         /*
160          * Initialize the plan state tree
161          */
162         InitPlan(queryDesc, eflags);
163
164         MemoryContextSwitchTo(oldcontext);
165 }
166
167 /* ----------------------------------------------------------------
168  *              ExecutorRun
169  *
170  *              This is the main routine of the executor module. It accepts
171  *              the query descriptor from the traffic cop and executes the
172  *              query plan.
173  *
174  *              ExecutorStart must have been called already.
175  *
176  *              If direction is NoMovementScanDirection then nothing is done
177  *              except to start up/shut down the destination.  Otherwise,
178  *              we retrieve up to 'count' tuples in the specified direction.
179  *
180  *              Note: count = 0 is interpreted as no portal limit, i.e., run to
181  *              completion.
182  *
183  * ----------------------------------------------------------------
184  */
185 TupleTableSlot *
186 ExecutorRun(QueryDesc *queryDesc,
187                         ScanDirection direction, long count)
188 {
189         EState     *estate;
190         CmdType         operation;
191         DestReceiver *dest;
192         TupleTableSlot *result;
193         MemoryContext oldcontext;
194
195         /* sanity checks */
196         Assert(queryDesc != NULL);
197
198         estate = queryDesc->estate;
199
200         Assert(estate != NULL);
201
202         /*
203          * Switch into per-query memory context
204          */
205         oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
206
207         /*
208          * extract information from the query descriptor and the query feature.
209          */
210         operation = queryDesc->operation;
211         dest = queryDesc->dest;
212
213         /*
214          * startup tuple receiver
215          */
216         estate->es_processed = 0;
217         estate->es_lastoid = InvalidOid;
218
219         (*dest->rStartup) (dest, operation, queryDesc->tupDesc);
220
221         /*
222          * run plan
223          */
224         if (ScanDirectionIsNoMovement(direction))
225                 result = NULL;
226         else
227                 result = ExecutePlan(estate,
228                                                          queryDesc->planstate,
229                                                          operation,
230                                                          count,
231                                                          direction,
232                                                          dest);
233
234         /*
235          * shutdown receiver
236          */
237         (*dest->rShutdown) (dest);
238
239         MemoryContextSwitchTo(oldcontext);
240
241         return result;
242 }
243
244 /* ----------------------------------------------------------------
245  *              ExecutorEnd
246  *
247  *              This routine must be called at the end of execution of any
248  *              query plan
249  * ----------------------------------------------------------------
250  */
251 void
252 ExecutorEnd(QueryDesc *queryDesc)
253 {
254         EState     *estate;
255         MemoryContext oldcontext;
256
257         /* sanity checks */
258         Assert(queryDesc != NULL);
259
260         estate = queryDesc->estate;
261
262         Assert(estate != NULL);
263
264         /*
265          * Switch into per-query memory context to run ExecEndPlan
266          */
267         oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
268
269         ExecEndPlan(queryDesc->planstate, estate);
270
271         /*
272          * Must switch out of context before destroying it
273          */
274         MemoryContextSwitchTo(oldcontext);
275
276         /*
277          * Release EState and per-query memory context.  This should release
278          * everything the executor has allocated.
279          */
280         FreeExecutorState(estate);
281
282         /* Reset queryDesc fields that no longer point to anything */
283         queryDesc->tupDesc = NULL;
284         queryDesc->estate = NULL;
285         queryDesc->planstate = NULL;
286 }
287
288 /* ----------------------------------------------------------------
289  *              ExecutorRewind
290  *
291  *              This routine may be called on an open queryDesc to rewind it
292  *              to the start.
293  * ----------------------------------------------------------------
294  */
295 void
296 ExecutorRewind(QueryDesc *queryDesc)
297 {
298         EState     *estate;
299         MemoryContext oldcontext;
300
301         /* sanity checks */
302         Assert(queryDesc != NULL);
303
304         estate = queryDesc->estate;
305
306         Assert(estate != NULL);
307
308         /* It's probably not sensible to rescan updating queries */
309         Assert(queryDesc->operation == CMD_SELECT);
310
311         /*
312          * Switch into per-query memory context
313          */
314         oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
315
316         /*
317          * rescan plan
318          */
319         ExecReScan(queryDesc->planstate, NULL);
320
321         MemoryContextSwitchTo(oldcontext);
322 }
323
324
325 /*
326  * ExecCheckRTPerms
327  *              Check access permissions for all relations listed in a range table.
328  */
329 void
330 ExecCheckRTPerms(List *rangeTable)
331 {
332         ListCell   *l;
333
334         foreach(l, rangeTable)
335         {
336                 RangeTblEntry *rte = lfirst(l);
337
338                 ExecCheckRTEPerms(rte);
339         }
340 }
341
342 /*
343  * ExecCheckRTEPerms
344  *              Check access permissions for a single RTE.
345  */
346 static void
347 ExecCheckRTEPerms(RangeTblEntry *rte)
348 {
349         AclMode         requiredPerms;
350         Oid                     relOid;
351         Oid                     userid;
352
353         /*
354          * Only plain-relation RTEs need to be checked here.  Subquery RTEs are
355          * checked by ExecInitSubqueryScan if the subquery is still a separate
356          * subquery --- if it's been pulled up into our query level then the RTEs
357          * are in our rangetable and will be checked here. Function RTEs are
358          * checked by init_fcache when the function is prepared for execution.
359          * Join and special RTEs need no checks.
360          */
361         if (rte->rtekind != RTE_RELATION)
362                 return;
363
364         /*
365          * No work if requiredPerms is empty.
366          */
367         requiredPerms = rte->requiredPerms;
368         if (requiredPerms == 0)
369                 return;
370
371         relOid = rte->relid;
372
373         /*
374          * userid to check as: current user unless we have a setuid indication.
375          *
376          * Note: GetUserId() is presently fast enough that there's no harm in
377          * calling it separately for each RTE.  If that stops being true, we could
378          * call it once in ExecCheckRTPerms and pass the userid down from there.
379          * But for now, no need for the extra clutter.
380          */
381         userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
382
383         /*
384          * We must have *all* the requiredPerms bits, so use aclmask not aclcheck.
385          */
386         if (pg_class_aclmask(relOid, userid, requiredPerms, ACLMASK_ALL)
387                 != requiredPerms)
388                 aclcheck_error(ACLCHECK_NO_PRIV, ACL_KIND_CLASS,
389                                            get_rel_name(relOid));
390 }
391
392 /*
393  * Check that the query does not imply any writes to non-temp tables.
394  */
395 static void
396 ExecCheckXactReadOnly(Query *parsetree)
397 {
398         ListCell   *l;
399
400         /*
401          * CREATE TABLE AS or SELECT INTO?
402          *
403          * XXX should we allow this if the destination is temp?
404          */
405         if (parsetree->into != NULL)
406                 goto fail;
407
408         /* Fail if write permissions are requested on any non-temp table */
409         foreach(l, parsetree->rtable)
410         {
411                 RangeTblEntry *rte = lfirst(l);
412
413                 if (rte->rtekind == RTE_SUBQUERY)
414                 {
415                         ExecCheckXactReadOnly(rte->subquery);
416                         continue;
417                 }
418
419                 if (rte->rtekind != RTE_RELATION)
420                         continue;
421
422                 if ((rte->requiredPerms & (~ACL_SELECT)) == 0)
423                         continue;
424
425                 if (isTempNamespace(get_rel_namespace(rte->relid)))
426                         continue;
427
428                 goto fail;
429         }
430
431         return;
432
433 fail:
434         ereport(ERROR,
435                         (errcode(ERRCODE_READ_ONLY_SQL_TRANSACTION),
436                          errmsg("transaction is read-only")));
437 }
438
439
440 /* ----------------------------------------------------------------
441  *              InitPlan
442  *
443  *              Initializes the query plan: open files, allocate storage
444  *              and start up the rule manager
445  * ----------------------------------------------------------------
446  */
447 static void
448 InitPlan(QueryDesc *queryDesc, int eflags)
449 {
450         CmdType         operation = queryDesc->operation;
451         Query      *parseTree = queryDesc->parsetree;
452         Plan       *plan = queryDesc->plantree;
453         EState     *estate = queryDesc->estate;
454         PlanState  *planstate;
455         List       *rangeTable;
456         Relation        intoRelationDesc;
457         bool            do_select_into;
458         TupleDesc       tupType;
459         ListCell   *l;
460
461         /*
462          * Do permissions checks.  It's sufficient to examine the query's top
463          * rangetable here --- subplan RTEs will be checked during
464          * ExecInitSubPlan().
465          */
466         ExecCheckRTPerms(parseTree->rtable);
467
468         /*
469          * get information from query descriptor
470          */
471         rangeTable = parseTree->rtable;
472
473         /*
474          * initialize the node's execution state
475          */
476         estate->es_range_table = rangeTable;
477
478         /*
479          * if there is a result relation, initialize result relation stuff
480          */
481         if (parseTree->resultRelation != 0 && operation != CMD_SELECT)
482         {
483                 List       *resultRelations = parseTree->resultRelations;
484                 int                     numResultRelations;
485                 ResultRelInfo *resultRelInfos;
486
487                 if (resultRelations != NIL)
488                 {
489                         /*
490                          * Multiple result relations (due to inheritance)
491                          * parseTree->resultRelations identifies them all
492                          */
493                         ResultRelInfo *resultRelInfo;
494
495                         numResultRelations = list_length(resultRelations);
496                         resultRelInfos = (ResultRelInfo *)
497                                 palloc(numResultRelations * sizeof(ResultRelInfo));
498                         resultRelInfo = resultRelInfos;
499                         foreach(l, resultRelations)
500                         {
501                                 initResultRelInfo(resultRelInfo,
502                                                                   lfirst_int(l),
503                                                                   rangeTable,
504                                                                   operation,
505                                                                   estate->es_instrument);
506                                 resultRelInfo++;
507                         }
508                 }
509                 else
510                 {
511                         /*
512                          * Single result relation identified by parseTree->resultRelation
513                          */
514                         numResultRelations = 1;
515                         resultRelInfos = (ResultRelInfo *) palloc(sizeof(ResultRelInfo));
516                         initResultRelInfo(resultRelInfos,
517                                                           parseTree->resultRelation,
518                                                           rangeTable,
519                                                           operation,
520                                                           estate->es_instrument);
521                 }
522
523                 estate->es_result_relations = resultRelInfos;
524                 estate->es_num_result_relations = numResultRelations;
525                 /* Initialize to first or only result rel */
526                 estate->es_result_relation_info = resultRelInfos;
527         }
528         else
529         {
530                 /*
531                  * if no result relation, then set state appropriately
532                  */
533                 estate->es_result_relations = NULL;
534                 estate->es_num_result_relations = 0;
535                 estate->es_result_relation_info = NULL;
536         }
537
538         /*
539          * Detect whether we're doing SELECT INTO.  If so, set the es_into_oids
540          * flag appropriately so that the plan tree will be initialized with the
541          * correct tuple descriptors.
542          */
543         do_select_into = false;
544
545         if (operation == CMD_SELECT && parseTree->into != NULL)
546         {
547                 do_select_into = true;
548                 estate->es_select_into = true;
549                 estate->es_into_oids = interpretOidsOption(parseTree->intoOptions);
550         }
551
552         /*
553          * Have to lock relations selected FOR UPDATE/FOR SHARE
554          */
555         estate->es_rowMarks = NIL;
556         foreach(l, parseTree->rowMarks)
557         {
558                 RowMarkClause *rc = (RowMarkClause *) lfirst(l);
559                 Oid                     relid = getrelid(rc->rti, rangeTable);
560                 Relation        relation;
561                 ExecRowMark *erm;
562
563                 relation = heap_open(relid, RowShareLock);
564                 erm = (ExecRowMark *) palloc(sizeof(ExecRowMark));
565                 erm->relation = relation;
566                 erm->rti = rc->rti;
567                 erm->forUpdate = rc->forUpdate;
568                 erm->noWait = rc->noWait;
569                 snprintf(erm->resname, sizeof(erm->resname), "ctid%u", rc->rti);
570                 estate->es_rowMarks = lappend(estate->es_rowMarks, erm);
571         }
572
573         /*
574          * initialize the executor "tuple" table.  We need slots for all the plan
575          * nodes, plus possibly output slots for the junkfilter(s). At this point
576          * we aren't sure if we need junkfilters, so just add slots for them
577          * unconditionally.  Also, if it's not a SELECT, set up a slot for use for
578          * trigger output tuples.
579          */
580         {
581                 int                     nSlots = ExecCountSlotsNode(plan);
582
583                 if (parseTree->resultRelations != NIL)
584                         nSlots += list_length(parseTree->resultRelations);
585                 else
586                         nSlots += 1;
587                 if (operation != CMD_SELECT)
588                         nSlots++;
589
590                 estate->es_tupleTable = ExecCreateTupleTable(nSlots);
591
592                 if (operation != CMD_SELECT)
593                         estate->es_trig_tuple_slot =
594                                 ExecAllocTableSlot(estate->es_tupleTable);
595         }
596
597         /* mark EvalPlanQual not active */
598         estate->es_topPlan = plan;
599         estate->es_evalPlanQual = NULL;
600         estate->es_evTupleNull = NULL;
601         estate->es_evTuple = NULL;
602         estate->es_useEvalPlan = false;
603
604         /*
605          * initialize the private state information for all the nodes in the query
606          * tree.  This opens files, allocates storage and leaves us ready to start
607          * processing tuples.
608          */
609         planstate = ExecInitNode(plan, estate, eflags);
610
611         /*
612          * Get the tuple descriptor describing the type of tuples to return. (this
613          * is especially important if we are creating a relation with "SELECT
614          * INTO")
615          */
616         tupType = ExecGetResultType(planstate);
617
618         /*
619          * Initialize the junk filter if needed.  SELECT and INSERT queries need a
620          * filter if there are any junk attrs in the tlist.  INSERT and SELECT
621          * INTO also need a filter if the plan may return raw disk tuples (else
622          * heap_insert will be scribbling on the source relation!). UPDATE and
623          * DELETE always need a filter, since there's always a junk 'ctid'
624          * attribute present --- no need to look first.
625          */
626         {
627                 bool            junk_filter_needed = false;
628                 ListCell   *tlist;
629
630                 switch (operation)
631                 {
632                         case CMD_SELECT:
633                         case CMD_INSERT:
634                                 foreach(tlist, plan->targetlist)
635                                 {
636                                         TargetEntry *tle = (TargetEntry *) lfirst(tlist);
637
638                                         if (tle->resjunk)
639                                         {
640                                                 junk_filter_needed = true;
641                                                 break;
642                                         }
643                                 }
644                                 if (!junk_filter_needed &&
645                                         (operation == CMD_INSERT || do_select_into) &&
646                                         ExecMayReturnRawTuples(planstate))
647                                         junk_filter_needed = true;
648                                 break;
649                         case CMD_UPDATE:
650                         case CMD_DELETE:
651                                 junk_filter_needed = true;
652                                 break;
653                         default:
654                                 break;
655                 }
656
657                 if (junk_filter_needed)
658                 {
659                         /*
660                          * If there are multiple result relations, each one needs its own
661                          * junk filter.  Note this is only possible for UPDATE/DELETE, so
662                          * we can't be fooled by some needing a filter and some not.
663                          */
664                         if (parseTree->resultRelations != NIL)
665                         {
666                                 PlanState **appendplans;
667                                 int                     as_nplans;
668                                 ResultRelInfo *resultRelInfo;
669                                 int                     i;
670
671                                 /* Top plan had better be an Append here. */
672                                 Assert(IsA(plan, Append));
673                                 Assert(((Append *) plan)->isTarget);
674                                 Assert(IsA(planstate, AppendState));
675                                 appendplans = ((AppendState *) planstate)->appendplans;
676                                 as_nplans = ((AppendState *) planstate)->as_nplans;
677                                 Assert(as_nplans == estate->es_num_result_relations);
678                                 resultRelInfo = estate->es_result_relations;
679                                 for (i = 0; i < as_nplans; i++)
680                                 {
681                                         PlanState  *subplan = appendplans[i];
682                                         JunkFilter *j;
683
684                                         j = ExecInitJunkFilter(subplan->plan->targetlist,
685                                                         resultRelInfo->ri_RelationDesc->rd_att->tdhasoid,
686                                                                   ExecAllocTableSlot(estate->es_tupleTable));
687                                         resultRelInfo->ri_junkFilter = j;
688                                         resultRelInfo++;
689                                 }
690
691                                 /*
692                                  * Set active junkfilter too; at this point ExecInitAppend has
693                                  * already selected an active result relation...
694                                  */
695                                 estate->es_junkFilter =
696                                         estate->es_result_relation_info->ri_junkFilter;
697                         }
698                         else
699                         {
700                                 /* Normal case with just one JunkFilter */
701                                 JunkFilter *j;
702
703                                 j = ExecInitJunkFilter(planstate->plan->targetlist,
704                                                                            tupType->tdhasoid,
705                                                                   ExecAllocTableSlot(estate->es_tupleTable));
706                                 estate->es_junkFilter = j;
707                                 if (estate->es_result_relation_info)
708                                         estate->es_result_relation_info->ri_junkFilter = j;
709
710                                 /* For SELECT, want to return the cleaned tuple type */
711                                 if (operation == CMD_SELECT)
712                                         tupType = j->jf_cleanTupType;
713                         }
714                 }
715                 else
716                         estate->es_junkFilter = NULL;
717         }
718
719         /*
720          * If doing SELECT INTO, initialize the "into" relation.  We must wait
721          * till now so we have the "clean" result tuple type to create the new
722          * table from.
723          *
724          * If EXPLAIN, skip creating the "into" relation.
725          */
726         intoRelationDesc = NULL;
727
728         if (do_select_into && !(eflags & EXEC_FLAG_EXPLAIN_ONLY))
729         {
730                 char       *intoName;
731                 Oid                     namespaceId;
732                 Oid                     tablespaceId;
733                 Datum           reloptions;
734                 AclResult       aclresult;
735                 Oid                     intoRelationId;
736                 TupleDesc       tupdesc;
737
738                 /*
739                  * Check consistency of arguments
740                  */
741                 if (parseTree->intoOnCommit != ONCOMMIT_NOOP && !parseTree->into->istemp)
742                         ereport(ERROR,
743                                         (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
744                                          errmsg("ON COMMIT can only be used on temporary tables")));
745
746                 /*
747                  * find namespace to create in, check permissions
748                  */
749                 intoName = parseTree->into->relname;
750                 namespaceId = RangeVarGetCreationNamespace(parseTree->into);
751
752                 aclresult = pg_namespace_aclcheck(namespaceId, GetUserId(),
753                                                                                   ACL_CREATE);
754                 if (aclresult != ACLCHECK_OK)
755                         aclcheck_error(aclresult, ACL_KIND_NAMESPACE,
756                                                    get_namespace_name(namespaceId));
757
758                 /*
759                  * Select tablespace to use.  If not specified, use default_tablespace
760                  * (which may in turn default to database's default).
761                  */
762                 if (parseTree->intoTableSpaceName)
763                 {
764                         tablespaceId = get_tablespace_oid(parseTree->intoTableSpaceName);
765                         if (!OidIsValid(tablespaceId))
766                                 ereport(ERROR,
767                                                 (errcode(ERRCODE_UNDEFINED_OBJECT),
768                                                  errmsg("tablespace \"%s\" does not exist",
769                                                                 parseTree->intoTableSpaceName)));
770                 } else
771                 {
772                         tablespaceId = GetDefaultTablespace();
773                         /* note InvalidOid is OK in this case */
774                 }
775
776                 /* Parse and validate any reloptions */
777                 reloptions = transformRelOptions((Datum) 0,
778                                                                                  parseTree->intoOptions,
779                                                                                  true,
780                                                                                  false);
781                 (void) heap_reloptions(RELKIND_RELATION, reloptions, true);
782
783                 /* Check permissions except when using the database's default */
784                 if (OidIsValid(tablespaceId))
785                 {
786                         AclResult       aclresult;
787
788                         aclresult = pg_tablespace_aclcheck(tablespaceId, GetUserId(),
789                                                                                            ACL_CREATE);
790
791                         if (aclresult != ACLCHECK_OK)
792                                 aclcheck_error(aclresult, ACL_KIND_TABLESPACE,
793                                                            get_tablespace_name(tablespaceId));
794                 }
795
796                 /*
797                  * have to copy tupType to get rid of constraints
798                  */
799                 tupdesc = CreateTupleDescCopy(tupType);
800
801                 intoRelationId = heap_create_with_catalog(intoName,
802                                                                                                   namespaceId,
803                                                                                                   tablespaceId,
804                                                                                                   InvalidOid,
805                                                                                                   GetUserId(),
806                                                                                                   tupdesc,
807                                                                                                   RELKIND_RELATION,
808                                                                                                   false,
809                                                                                                   true,
810                                                                                                   0,
811                                                                                                   parseTree->intoOnCommit,
812                                                                                                   reloptions,
813                                                                                                   allowSystemTableMods);
814
815                 FreeTupleDesc(tupdesc);
816
817                 /*
818                  * Advance command counter so that the newly-created relation's
819                  * catalog tuples will be visible to heap_open.
820                  */
821                 CommandCounterIncrement();
822
823                 /*
824                  * If necessary, create a TOAST table for the into relation. Note that
825                  * AlterTableCreateToastTable ends with CommandCounterIncrement(), so
826                  * that the TOAST table will be visible for insertion.
827                  */
828                 AlterTableCreateToastTable(intoRelationId, true);
829
830                 /*
831                  * And open the constructed table for writing.
832                  */
833                 intoRelationDesc = heap_open(intoRelationId, AccessExclusiveLock);
834
835                 /* use_wal off requires rd_targblock be initially invalid */
836                 Assert(intoRelationDesc->rd_targblock == InvalidBlockNumber);
837
838                 /*
839                  * We can skip WAL-logging the insertions, unless PITR is in use.
840                  *
841                  * Note that for a non-temp INTO table, this is safe only because we
842                  * know that the catalog changes above will have been WAL-logged, and
843                  * so RecordTransactionCommit will think it needs to WAL-log the
844                  * eventual transaction commit.  Else the commit might be lost, even
845                  * though all the data is safely fsync'd ...
846                  */
847                 estate->es_into_relation_use_wal = XLogArchivingActive();
848         }
849
850         estate->es_into_relation_descriptor = intoRelationDesc;
851
852         queryDesc->tupDesc = tupType;
853         queryDesc->planstate = planstate;
854 }
855
856 /*
857  * Initialize ResultRelInfo data for one result relation
858  */
859 static void
860 initResultRelInfo(ResultRelInfo *resultRelInfo,
861                                   Index resultRelationIndex,
862                                   List *rangeTable,
863                                   CmdType operation,
864                                   bool doInstrument)
865 {
866         Oid                     resultRelationOid;
867         Relation        resultRelationDesc;
868
869         resultRelationOid = getrelid(resultRelationIndex, rangeTable);
870         resultRelationDesc = heap_open(resultRelationOid, RowExclusiveLock);
871
872         switch (resultRelationDesc->rd_rel->relkind)
873         {
874                 case RELKIND_SEQUENCE:
875                         ereport(ERROR,
876                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
877                                          errmsg("cannot change sequence \"%s\"",
878                                                         RelationGetRelationName(resultRelationDesc))));
879                         break;
880                 case RELKIND_TOASTVALUE:
881                         ereport(ERROR,
882                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
883                                          errmsg("cannot change TOAST relation \"%s\"",
884                                                         RelationGetRelationName(resultRelationDesc))));
885                         break;
886                 case RELKIND_VIEW:
887                         ereport(ERROR,
888                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
889                                          errmsg("cannot change view \"%s\"",
890                                                         RelationGetRelationName(resultRelationDesc))));
891                         break;
892         }
893
894         MemSet(resultRelInfo, 0, sizeof(ResultRelInfo));
895         resultRelInfo->type = T_ResultRelInfo;
896         resultRelInfo->ri_RangeTableIndex = resultRelationIndex;
897         resultRelInfo->ri_RelationDesc = resultRelationDesc;
898         resultRelInfo->ri_NumIndices = 0;
899         resultRelInfo->ri_IndexRelationDescs = NULL;
900         resultRelInfo->ri_IndexRelationInfo = NULL;
901         /* make a copy so as not to depend on relcache info not changing... */
902         resultRelInfo->ri_TrigDesc = CopyTriggerDesc(resultRelationDesc->trigdesc);
903         if (resultRelInfo->ri_TrigDesc)
904         {
905                 int                     n = resultRelInfo->ri_TrigDesc->numtriggers;
906
907                 resultRelInfo->ri_TrigFunctions = (FmgrInfo *)
908                         palloc0(n * sizeof(FmgrInfo));
909                 if (doInstrument)
910                         resultRelInfo->ri_TrigInstrument = InstrAlloc(n);
911                 else
912                         resultRelInfo->ri_TrigInstrument = NULL;
913         }
914         else
915         {
916                 resultRelInfo->ri_TrigFunctions = NULL;
917                 resultRelInfo->ri_TrigInstrument = NULL;
918         }
919         resultRelInfo->ri_ConstraintExprs = NULL;
920         resultRelInfo->ri_junkFilter = NULL;
921
922         /*
923          * If there are indices on the result relation, open them and save
924          * descriptors in the result relation info, so that we can add new index
925          * entries for the tuples we add/update.  We need not do this for a
926          * DELETE, however, since deletion doesn't affect indexes.
927          */
928         if (resultRelationDesc->rd_rel->relhasindex &&
929                 operation != CMD_DELETE)
930                 ExecOpenIndices(resultRelInfo);
931 }
932
933 /*
934  *              ExecContextForcesOids
935  *
936  * This is pretty grotty: when doing INSERT, UPDATE, or SELECT INTO,
937  * we need to ensure that result tuples have space for an OID iff they are
938  * going to be stored into a relation that has OIDs.  In other contexts
939  * we are free to choose whether to leave space for OIDs in result tuples
940  * (we generally don't want to, but we do if a physical-tlist optimization
941  * is possible).  This routine checks the plan context and returns TRUE if the
942  * choice is forced, FALSE if the choice is not forced.  In the TRUE case,
943  * *hasoids is set to the required value.
944  *
945  * One reason this is ugly is that all plan nodes in the plan tree will emit
946  * tuples with space for an OID, though we really only need the topmost node
947  * to do so.  However, node types like Sort don't project new tuples but just
948  * return their inputs, and in those cases the requirement propagates down
949  * to the input node.  Eventually we might make this code smart enough to
950  * recognize how far down the requirement really goes, but for now we just
951  * make all plan nodes do the same thing if the top level forces the choice.
952  *
953  * We assume that estate->es_result_relation_info is already set up to
954  * describe the target relation.  Note that in an UPDATE that spans an
955  * inheritance tree, some of the target relations may have OIDs and some not.
956  * We have to make the decisions on a per-relation basis as we initialize
957  * each of the child plans of the topmost Append plan.
958  *
959  * SELECT INTO is even uglier, because we don't have the INTO relation's
960  * descriptor available when this code runs; we have to look aside at a
961  * flag set by InitPlan().
962  */
963 bool
964 ExecContextForcesOids(PlanState *planstate, bool *hasoids)
965 {
966         if (planstate->state->es_select_into)
967         {
968                 *hasoids = planstate->state->es_into_oids;
969                 return true;
970         }
971         else
972         {
973                 ResultRelInfo *ri = planstate->state->es_result_relation_info;
974
975                 if (ri != NULL)
976                 {
977                         Relation        rel = ri->ri_RelationDesc;
978
979                         if (rel != NULL)
980                         {
981                                 *hasoids = rel->rd_rel->relhasoids;
982                                 return true;
983                         }
984                 }
985         }
986
987         return false;
988 }
989
990 /* ----------------------------------------------------------------
991  *              ExecEndPlan
992  *
993  *              Cleans up the query plan -- closes files and frees up storage
994  *
995  * NOTE: we are no longer very worried about freeing storage per se
996  * in this code; FreeExecutorState should be guaranteed to release all
997  * memory that needs to be released.  What we are worried about doing
998  * is closing relations and dropping buffer pins.  Thus, for example,
999  * tuple tables must be cleared or dropped to ensure pins are released.
1000  * ----------------------------------------------------------------
1001  */
1002 void
1003 ExecEndPlan(PlanState *planstate, EState *estate)
1004 {
1005         ResultRelInfo *resultRelInfo;
1006         int                     i;
1007         ListCell   *l;
1008
1009         /*
1010          * shut down any PlanQual processing we were doing
1011          */
1012         if (estate->es_evalPlanQual != NULL)
1013                 EndEvalPlanQual(estate);
1014
1015         /*
1016          * shut down the node-type-specific query processing
1017          */
1018         ExecEndNode(planstate);
1019
1020         /*
1021          * destroy the executor "tuple" table.
1022          */
1023         ExecDropTupleTable(estate->es_tupleTable, true);
1024         estate->es_tupleTable = NULL;
1025
1026         /*
1027          * close the result relation(s) if any, but hold locks until xact commit.
1028          */
1029         resultRelInfo = estate->es_result_relations;
1030         for (i = estate->es_num_result_relations; i > 0; i--)
1031         {
1032                 /* Close indices and then the relation itself */
1033                 ExecCloseIndices(resultRelInfo);
1034                 heap_close(resultRelInfo->ri_RelationDesc, NoLock);
1035                 resultRelInfo++;
1036         }
1037
1038         /*
1039          * close the "into" relation if necessary, again keeping lock
1040          */
1041         if (estate->es_into_relation_descriptor != NULL)
1042         {
1043                 /*
1044                  * If we skipped using WAL, and it's not a temp relation, we must
1045                  * force the relation down to disk before it's safe to commit the
1046                  * transaction.  This requires forcing out any dirty buffers and then
1047                  * doing a forced fsync.
1048                  */
1049                 if (!estate->es_into_relation_use_wal &&
1050                         !estate->es_into_relation_descriptor->rd_istemp)
1051                 {
1052                         FlushRelationBuffers(estate->es_into_relation_descriptor);
1053                         /* FlushRelationBuffers will have opened rd_smgr */
1054                         smgrimmedsync(estate->es_into_relation_descriptor->rd_smgr);
1055                 }
1056
1057                 heap_close(estate->es_into_relation_descriptor, NoLock);
1058         }
1059
1060         /*
1061          * close any relations selected FOR UPDATE/FOR SHARE, again keeping locks
1062          */
1063         foreach(l, estate->es_rowMarks)
1064         {
1065                 ExecRowMark *erm = lfirst(l);
1066
1067                 heap_close(erm->relation, NoLock);
1068         }
1069 }
1070
1071 /* ----------------------------------------------------------------
1072  *              ExecutePlan
1073  *
1074  *              processes the query plan to retrieve 'numberTuples' tuples in the
1075  *              direction specified.
1076  *
1077  *              Retrieves all tuples if numberTuples is 0
1078  *
1079  *              result is either a slot containing the last tuple in the case
1080  *              of a SELECT or NULL otherwise.
1081  *
1082  * Note: the ctid attribute is a 'junk' attribute that is removed before the
1083  * user can see it
1084  * ----------------------------------------------------------------
1085  */
1086 static TupleTableSlot *
1087 ExecutePlan(EState *estate,
1088                         PlanState *planstate,
1089                         CmdType operation,
1090                         long numberTuples,
1091                         ScanDirection direction,
1092                         DestReceiver *dest)
1093 {
1094         JunkFilter *junkfilter;
1095         TupleTableSlot *slot;
1096         ItemPointer tupleid = NULL;
1097         ItemPointerData tuple_ctid;
1098         long            current_tuple_count;
1099         TupleTableSlot *result;
1100
1101         /*
1102          * initialize local variables
1103          */
1104         slot = NULL;
1105         current_tuple_count = 0;
1106         result = NULL;
1107
1108         /*
1109          * Set the direction.
1110          */
1111         estate->es_direction = direction;
1112
1113         /*
1114          * Process BEFORE EACH STATEMENT triggers
1115          */
1116         switch (operation)
1117         {
1118                 case CMD_UPDATE:
1119                         ExecBSUpdateTriggers(estate, estate->es_result_relation_info);
1120                         break;
1121                 case CMD_DELETE:
1122                         ExecBSDeleteTriggers(estate, estate->es_result_relation_info);
1123                         break;
1124                 case CMD_INSERT:
1125                         ExecBSInsertTriggers(estate, estate->es_result_relation_info);
1126                         break;
1127                 default:
1128                         /* do nothing */
1129                         break;
1130         }
1131
1132         /*
1133          * Loop until we've processed the proper number of tuples from the plan.
1134          */
1135
1136         for (;;)
1137         {
1138                 /* Reset the per-output-tuple exprcontext */
1139                 ResetPerTupleExprContext(estate);
1140
1141                 /*
1142                  * Execute the plan and obtain a tuple
1143                  */
1144 lnext:  ;
1145                 if (estate->es_useEvalPlan)
1146                 {
1147                         slot = EvalPlanQualNext(estate);
1148                         if (TupIsNull(slot))
1149                                 slot = ExecProcNode(planstate);
1150                 }
1151                 else
1152                         slot = ExecProcNode(planstate);
1153
1154                 /*
1155                  * if the tuple is null, then we assume there is nothing more to
1156                  * process so we just return null...
1157                  */
1158                 if (TupIsNull(slot))
1159                 {
1160                         result = NULL;
1161                         break;
1162                 }
1163
1164                 /*
1165                  * if we have a junk filter, then project a new tuple with the junk
1166                  * removed.
1167                  *
1168                  * Store this new "clean" tuple in the junkfilter's resultSlot.
1169                  * (Formerly, we stored it back over the "dirty" tuple, which is WRONG
1170                  * because that tuple slot has the wrong descriptor.)
1171                  *
1172                  * Also, extract all the junk information we need.
1173                  */
1174                 if ((junkfilter = estate->es_junkFilter) != NULL)
1175                 {
1176                         Datum           datum;
1177                         bool            isNull;
1178
1179                         /*
1180                          * extract the 'ctid' junk attribute.
1181                          */
1182                         if (operation == CMD_UPDATE || operation == CMD_DELETE)
1183                         {
1184                                 if (!ExecGetJunkAttribute(junkfilter,
1185                                                                                   slot,
1186                                                                                   "ctid",
1187                                                                                   &datum,
1188                                                                                   &isNull))
1189                                         elog(ERROR, "could not find junk ctid column");
1190
1191                                 /* shouldn't ever get a null result... */
1192                                 if (isNull)
1193                                         elog(ERROR, "ctid is NULL");
1194
1195                                 tupleid = (ItemPointer) DatumGetPointer(datum);
1196                                 tuple_ctid = *tupleid;  /* make sure we don't free the ctid!! */
1197                                 tupleid = &tuple_ctid;
1198                         }
1199
1200                         /*
1201                          * Process any FOR UPDATE or FOR SHARE locking requested.
1202                          */
1203                         else if (estate->es_rowMarks != NIL)
1204                         {
1205                                 ListCell   *l;
1206
1207                 lmark:  ;
1208                                 foreach(l, estate->es_rowMarks)
1209                                 {
1210                                         ExecRowMark *erm = lfirst(l);
1211                                         HeapTupleData tuple;
1212                                         Buffer          buffer;
1213                                         ItemPointerData update_ctid;
1214                                         TransactionId update_xmax;
1215                                         TupleTableSlot *newSlot;
1216                                         LockTupleMode lockmode;
1217                                         HTSU_Result test;
1218
1219                                         if (!ExecGetJunkAttribute(junkfilter,
1220                                                                                           slot,
1221                                                                                           erm->resname,
1222                                                                                           &datum,
1223                                                                                           &isNull))
1224                                                 elog(ERROR, "could not find junk \"%s\" column",
1225                                                          erm->resname);
1226
1227                                         /* shouldn't ever get a null result... */
1228                                         if (isNull)
1229                                                 elog(ERROR, "\"%s\" is NULL", erm->resname);
1230
1231                                         tuple.t_self = *((ItemPointer) DatumGetPointer(datum));
1232
1233                                         if (erm->forUpdate)
1234                                                 lockmode = LockTupleExclusive;
1235                                         else
1236                                                 lockmode = LockTupleShared;
1237
1238                                         test = heap_lock_tuple(erm->relation, &tuple, &buffer,
1239                                                                                    &update_ctid, &update_xmax,
1240                                                                                    estate->es_snapshot->curcid,
1241                                                                                    lockmode, erm->noWait);
1242                                         ReleaseBuffer(buffer);
1243                                         switch (test)
1244                                         {
1245                                                 case HeapTupleSelfUpdated:
1246                                                         /* treat it as deleted; do not process */
1247                                                         goto lnext;
1248
1249                                                 case HeapTupleMayBeUpdated:
1250                                                         break;
1251
1252                                                 case HeapTupleUpdated:
1253                                                         if (IsXactIsoLevelSerializable)
1254                                                                 ereport(ERROR,
1255                                                                  (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1256                                                                   errmsg("could not serialize access due to concurrent update")));
1257                                                         if (!ItemPointerEquals(&update_ctid,
1258                                                                                                    &tuple.t_self))
1259                                                         {
1260                                                                 /* updated, so look at updated version */
1261                                                                 newSlot = EvalPlanQual(estate,
1262                                                                                                            erm->rti,
1263                                                                                                            &update_ctid,
1264                                                                                                            update_xmax,
1265                                                                                                            estate->es_snapshot->curcid);
1266                                                                 if (!TupIsNull(newSlot))
1267                                                                 {
1268                                                                         slot = newSlot;
1269                                                                         estate->es_useEvalPlan = true;
1270                                                                         goto lmark;
1271                                                                 }
1272                                                         }
1273
1274                                                         /*
1275                                                          * if tuple was deleted or PlanQual failed for
1276                                                          * updated tuple - we must not return this tuple!
1277                                                          */
1278                                                         goto lnext;
1279
1280                                                 default:
1281                                                         elog(ERROR, "unrecognized heap_lock_tuple status: %u",
1282                                                                  test);
1283                                                         return NULL;
1284                                         }
1285                                 }
1286                         }
1287
1288                         /*
1289                          * Finally create a new "clean" tuple with all junk attributes
1290                          * removed
1291                          */
1292                         slot = ExecFilterJunk(junkfilter, slot);
1293                 }
1294
1295                 /*
1296                  * now that we have a tuple, do the appropriate thing with it.. either
1297                  * return it to the user, add it to a relation someplace, delete it
1298                  * from a relation, or modify some of its attributes.
1299                  */
1300                 switch (operation)
1301                 {
1302                         case CMD_SELECT:
1303                                 ExecSelect(slot,        /* slot containing tuple */
1304                                                    dest,        /* destination's tuple-receiver obj */
1305                                                    estate);
1306                                 result = slot;
1307                                 break;
1308
1309                         case CMD_INSERT:
1310                                 ExecInsert(slot, tupleid, estate);
1311                                 result = NULL;
1312                                 break;
1313
1314                         case CMD_DELETE:
1315                                 ExecDelete(slot, tupleid, estate);
1316                                 result = NULL;
1317                                 break;
1318
1319                         case CMD_UPDATE:
1320                                 ExecUpdate(slot, tupleid, estate);
1321                                 result = NULL;
1322                                 break;
1323
1324                         default:
1325                                 elog(ERROR, "unrecognized operation code: %d",
1326                                          (int) operation);
1327                                 result = NULL;
1328                                 break;
1329                 }
1330
1331                 /*
1332                  * check our tuple count.. if we've processed the proper number then
1333                  * quit, else loop again and process more tuples.  Zero numberTuples
1334                  * means no limit.
1335                  */
1336                 current_tuple_count++;
1337                 if (numberTuples && numberTuples == current_tuple_count)
1338                         break;
1339         }
1340
1341         /*
1342          * Process AFTER EACH STATEMENT triggers
1343          */
1344         switch (operation)
1345         {
1346                 case CMD_UPDATE:
1347                         ExecASUpdateTriggers(estate, estate->es_result_relation_info);
1348                         break;
1349                 case CMD_DELETE:
1350                         ExecASDeleteTriggers(estate, estate->es_result_relation_info);
1351                         break;
1352                 case CMD_INSERT:
1353                         ExecASInsertTriggers(estate, estate->es_result_relation_info);
1354                         break;
1355                 default:
1356                         /* do nothing */
1357                         break;
1358         }
1359
1360         /*
1361          * here, result is either a slot containing a tuple in the case of a
1362          * SELECT or NULL otherwise.
1363          */
1364         return result;
1365 }
1366
1367 /* ----------------------------------------------------------------
1368  *              ExecSelect
1369  *
1370  *              SELECTs are easy.. we just pass the tuple to the appropriate
1371  *              print function.  The only complexity is when we do a
1372  *              "SELECT INTO", in which case we insert the tuple into
1373  *              the appropriate relation (note: this is a newly created relation
1374  *              so we don't need to worry about indices or locks.)
1375  * ----------------------------------------------------------------
1376  */
1377 static void
1378 ExecSelect(TupleTableSlot *slot,
1379                    DestReceiver *dest,
1380                    EState *estate)
1381 {
1382         /*
1383          * insert the tuple into the "into relation"
1384          *
1385          * XXX this probably ought to be replaced by a separate destination
1386          */
1387         if (estate->es_into_relation_descriptor != NULL)
1388         {
1389                 HeapTuple       tuple;
1390
1391                 tuple = ExecCopySlotTuple(slot);
1392                 heap_insert(estate->es_into_relation_descriptor, tuple,
1393                                         estate->es_snapshot->curcid,
1394                                         estate->es_into_relation_use_wal,
1395                                         false);         /* never any point in using FSM */
1396                 /* we know there are no indexes to update */
1397                 heap_freetuple(tuple);
1398                 IncrAppended();
1399         }
1400
1401         /*
1402          * send the tuple to the destination
1403          */
1404         (*dest->receiveSlot) (slot, dest);
1405         IncrRetrieved();
1406         (estate->es_processed)++;
1407 }
1408
1409 /* ----------------------------------------------------------------
1410  *              ExecInsert
1411  *
1412  *              INSERTs are trickier.. we have to insert the tuple into
1413  *              the base relation and insert appropriate tuples into the
1414  *              index relations.
1415  * ----------------------------------------------------------------
1416  */
1417 static void
1418 ExecInsert(TupleTableSlot *slot,
1419                    ItemPointer tupleid,
1420                    EState *estate)
1421 {
1422         HeapTuple       tuple;
1423         ResultRelInfo *resultRelInfo;
1424         Relation        resultRelationDesc;
1425         Oid                     newId;
1426
1427         /*
1428          * get the heap tuple out of the tuple table slot, making sure we have a
1429          * writable copy
1430          */
1431         tuple = ExecMaterializeSlot(slot);
1432
1433         /*
1434          * get information on the (current) result relation
1435          */
1436         resultRelInfo = estate->es_result_relation_info;
1437         resultRelationDesc = resultRelInfo->ri_RelationDesc;
1438
1439         /* BEFORE ROW INSERT Triggers */
1440         if (resultRelInfo->ri_TrigDesc &&
1441                 resultRelInfo->ri_TrigDesc->n_before_row[TRIGGER_EVENT_INSERT] > 0)
1442         {
1443                 HeapTuple       newtuple;
1444
1445                 newtuple = ExecBRInsertTriggers(estate, resultRelInfo, tuple);
1446
1447                 if (newtuple == NULL)   /* "do nothing" */
1448                         return;
1449
1450                 if (newtuple != tuple)  /* modified by Trigger(s) */
1451                 {
1452                         /*
1453                          * Put the modified tuple into a slot for convenience of routines
1454                          * below.  We assume the tuple was allocated in per-tuple memory
1455                          * context, and therefore will go away by itself. The tuple table
1456                          * slot should not try to clear it.
1457                          */
1458                         TupleTableSlot *newslot = estate->es_trig_tuple_slot;
1459
1460                         if (newslot->tts_tupleDescriptor != slot->tts_tupleDescriptor)
1461                                 ExecSetSlotDescriptor(newslot, slot->tts_tupleDescriptor);
1462                         ExecStoreTuple(newtuple, newslot, InvalidBuffer, false);
1463                         slot = newslot;
1464                         tuple = newtuple;
1465                 }
1466         }
1467
1468         /*
1469          * Check the constraints of the tuple
1470          */
1471         if (resultRelationDesc->rd_att->constr)
1472                 ExecConstraints(resultRelInfo, slot, estate);
1473
1474         /*
1475          * insert the tuple
1476          *
1477          * Note: heap_insert returns the tid (location) of the new tuple in the
1478          * t_self field.
1479          */
1480         newId = heap_insert(resultRelationDesc, tuple,
1481                                                 estate->es_snapshot->curcid,
1482                                                 true, true);
1483
1484         IncrAppended();
1485         (estate->es_processed)++;
1486         estate->es_lastoid = newId;
1487         setLastTid(&(tuple->t_self));
1488
1489         /*
1490          * insert index entries for tuple
1491          */
1492         if (resultRelInfo->ri_NumIndices > 0)
1493                 ExecInsertIndexTuples(slot, &(tuple->t_self), estate, false);
1494
1495         /* AFTER ROW INSERT Triggers */
1496         ExecARInsertTriggers(estate, resultRelInfo, tuple);
1497 }
1498
1499 /* ----------------------------------------------------------------
1500  *              ExecDelete
1501  *
1502  *              DELETE is like UPDATE, except that we delete the tuple and no
1503  *              index modifications are needed
1504  * ----------------------------------------------------------------
1505  */
1506 static void
1507 ExecDelete(TupleTableSlot *slot,
1508                    ItemPointer tupleid,
1509                    EState *estate)
1510 {
1511         ResultRelInfo *resultRelInfo;
1512         Relation        resultRelationDesc;
1513         HTSU_Result result;
1514         ItemPointerData update_ctid;
1515         TransactionId update_xmax;
1516
1517         /*
1518          * get information on the (current) result relation
1519          */
1520         resultRelInfo = estate->es_result_relation_info;
1521         resultRelationDesc = resultRelInfo->ri_RelationDesc;
1522
1523         /* BEFORE ROW DELETE Triggers */
1524         if (resultRelInfo->ri_TrigDesc &&
1525                 resultRelInfo->ri_TrigDesc->n_before_row[TRIGGER_EVENT_DELETE] > 0)
1526         {
1527                 bool            dodelete;
1528
1529                 dodelete = ExecBRDeleteTriggers(estate, resultRelInfo, tupleid,
1530                                                                                 estate->es_snapshot->curcid);
1531
1532                 if (!dodelete)                  /* "do nothing" */
1533                         return;
1534         }
1535
1536         /*
1537          * delete the tuple
1538          *
1539          * Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check that
1540          * the row to be deleted is visible to that snapshot, and throw a can't-
1541          * serialize error if not.      This is a special-case behavior needed for
1542          * referential integrity updates in serializable transactions.
1543          */
1544 ldelete:;
1545         result = heap_delete(resultRelationDesc, tupleid,
1546                                                  &update_ctid, &update_xmax,
1547                                                  estate->es_snapshot->curcid,
1548                                                  estate->es_crosscheck_snapshot,
1549                                                  true /* wait for commit */ );
1550         switch (result)
1551         {
1552                 case HeapTupleSelfUpdated:
1553                         /* already deleted by self; nothing to do */
1554                         return;
1555
1556                 case HeapTupleMayBeUpdated:
1557                         break;
1558
1559                 case HeapTupleUpdated:
1560                         if (IsXactIsoLevelSerializable)
1561                                 ereport(ERROR,
1562                                                 (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1563                                                  errmsg("could not serialize access due to concurrent update")));
1564                         else if (!ItemPointerEquals(tupleid, &update_ctid))
1565                         {
1566                                 TupleTableSlot *epqslot;
1567
1568                                 epqslot = EvalPlanQual(estate,
1569                                                                            resultRelInfo->ri_RangeTableIndex,
1570                                                                            &update_ctid,
1571                                                                            update_xmax,
1572                                                                            estate->es_snapshot->curcid);
1573                                 if (!TupIsNull(epqslot))
1574                                 {
1575                                         *tupleid = update_ctid;
1576                                         goto ldelete;
1577                                 }
1578                         }
1579                         /* tuple already deleted; nothing to do */
1580                         return;
1581
1582                 default:
1583                         elog(ERROR, "unrecognized heap_delete status: %u", result);
1584                         return;
1585         }
1586
1587         IncrDeleted();
1588         (estate->es_processed)++;
1589
1590         /*
1591          * Note: Normally one would think that we have to delete index tuples
1592          * associated with the heap tuple now...
1593          *
1594          * ... but in POSTGRES, we have no need to do this because VACUUM will
1595          * take care of it later.  We can't delete index tuples immediately
1596          * anyway, since the tuple is still visible to other transactions.
1597          */
1598
1599         /* AFTER ROW DELETE Triggers */
1600         ExecARDeleteTriggers(estate, resultRelInfo, tupleid);
1601 }
1602
1603 /* ----------------------------------------------------------------
1604  *              ExecUpdate
1605  *
1606  *              note: we can't run UPDATE queries with transactions
1607  *              off because UPDATEs are actually INSERTs and our
1608  *              scan will mistakenly loop forever, updating the tuple
1609  *              it just inserted..      This should be fixed but until it
1610  *              is, we don't want to get stuck in an infinite loop
1611  *              which corrupts your database..
1612  * ----------------------------------------------------------------
1613  */
1614 static void
1615 ExecUpdate(TupleTableSlot *slot,
1616                    ItemPointer tupleid,
1617                    EState *estate)
1618 {
1619         HeapTuple       tuple;
1620         ResultRelInfo *resultRelInfo;
1621         Relation        resultRelationDesc;
1622         HTSU_Result result;
1623         ItemPointerData update_ctid;
1624         TransactionId update_xmax;
1625
1626         /*
1627          * abort the operation if not running transactions
1628          */
1629         if (IsBootstrapProcessingMode())
1630                 elog(ERROR, "cannot UPDATE during bootstrap");
1631
1632         /*
1633          * get the heap tuple out of the tuple table slot, making sure we have a
1634          * writable copy
1635          */
1636         tuple = ExecMaterializeSlot(slot);
1637
1638         /*
1639          * get information on the (current) result relation
1640          */
1641         resultRelInfo = estate->es_result_relation_info;
1642         resultRelationDesc = resultRelInfo->ri_RelationDesc;
1643
1644         /* BEFORE ROW UPDATE Triggers */
1645         if (resultRelInfo->ri_TrigDesc &&
1646                 resultRelInfo->ri_TrigDesc->n_before_row[TRIGGER_EVENT_UPDATE] > 0)
1647         {
1648                 HeapTuple       newtuple;
1649
1650                 newtuple = ExecBRUpdateTriggers(estate, resultRelInfo,
1651                                                                                 tupleid, tuple,
1652                                                                                 estate->es_snapshot->curcid);
1653
1654                 if (newtuple == NULL)   /* "do nothing" */
1655                         return;
1656
1657                 if (newtuple != tuple)  /* modified by Trigger(s) */
1658                 {
1659                         /*
1660                          * Put the modified tuple into a slot for convenience of routines
1661                          * below.  We assume the tuple was allocated in per-tuple memory
1662                          * context, and therefore will go away by itself. The tuple table
1663                          * slot should not try to clear it.
1664                          */
1665                         TupleTableSlot *newslot = estate->es_trig_tuple_slot;
1666
1667                         if (newslot->tts_tupleDescriptor != slot->tts_tupleDescriptor)
1668                                 ExecSetSlotDescriptor(newslot, slot->tts_tupleDescriptor);
1669                         ExecStoreTuple(newtuple, newslot, InvalidBuffer, false);
1670                         slot = newslot;
1671                         tuple = newtuple;
1672                 }
1673         }
1674
1675         /*
1676          * Check the constraints of the tuple
1677          *
1678          * If we generate a new candidate tuple after EvalPlanQual testing, we
1679          * must loop back here and recheck constraints.  (We don't need to redo
1680          * triggers, however.  If there are any BEFORE triggers then trigger.c
1681          * will have done heap_lock_tuple to lock the correct tuple, so there's no
1682          * need to do them again.)
1683          */
1684 lreplace:;
1685         if (resultRelationDesc->rd_att->constr)
1686                 ExecConstraints(resultRelInfo, slot, estate);
1687
1688         /*
1689          * replace the heap tuple
1690          *
1691          * Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check that
1692          * the row to be updated is visible to that snapshot, and throw a can't-
1693          * serialize error if not.      This is a special-case behavior needed for
1694          * referential integrity updates in serializable transactions.
1695          */
1696         result = heap_update(resultRelationDesc, tupleid, tuple,
1697                                                  &update_ctid, &update_xmax,
1698                                                  estate->es_snapshot->curcid,
1699                                                  estate->es_crosscheck_snapshot,
1700                                                  true /* wait for commit */ );
1701         switch (result)
1702         {
1703                 case HeapTupleSelfUpdated:
1704                         /* already deleted by self; nothing to do */
1705                         return;
1706
1707                 case HeapTupleMayBeUpdated:
1708                         break;
1709
1710                 case HeapTupleUpdated:
1711                         if (IsXactIsoLevelSerializable)
1712                                 ereport(ERROR,
1713                                                 (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1714                                                  errmsg("could not serialize access due to concurrent update")));
1715                         else if (!ItemPointerEquals(tupleid, &update_ctid))
1716                         {
1717                                 TupleTableSlot *epqslot;
1718
1719                                 epqslot = EvalPlanQual(estate,
1720                                                                            resultRelInfo->ri_RangeTableIndex,
1721                                                                            &update_ctid,
1722                                                                            update_xmax,
1723                                                                            estate->es_snapshot->curcid);
1724                                 if (!TupIsNull(epqslot))
1725                                 {
1726                                         *tupleid = update_ctid;
1727                                         slot = ExecFilterJunk(estate->es_junkFilter, epqslot);
1728                                         tuple = ExecMaterializeSlot(slot);
1729                                         goto lreplace;
1730                                 }
1731                         }
1732                         /* tuple already deleted; nothing to do */
1733                         return;
1734
1735                 default:
1736                         elog(ERROR, "unrecognized heap_update status: %u", result);
1737                         return;
1738         }
1739
1740         IncrReplaced();
1741         (estate->es_processed)++;
1742
1743         /*
1744          * Note: instead of having to update the old index tuples associated with
1745          * the heap tuple, all we do is form and insert new index tuples. This is
1746          * because UPDATEs are actually DELETEs and INSERTs, and index tuple
1747          * deletion is done later by VACUUM (see notes in ExecDelete).  All we do
1748          * here is insert new index tuples.  -cim 9/27/89
1749          */
1750
1751         /*
1752          * insert index entries for tuple
1753          *
1754          * Note: heap_update returns the tid (location) of the new tuple in the
1755          * t_self field.
1756          */
1757         if (resultRelInfo->ri_NumIndices > 0)
1758                 ExecInsertIndexTuples(slot, &(tuple->t_self), estate, false);
1759
1760         /* AFTER ROW UPDATE Triggers */
1761         ExecARUpdateTriggers(estate, resultRelInfo, tupleid, tuple);
1762 }
1763
1764 static const char *
1765 ExecRelCheck(ResultRelInfo *resultRelInfo,
1766                          TupleTableSlot *slot, EState *estate)
1767 {
1768         Relation        rel = resultRelInfo->ri_RelationDesc;
1769         int                     ncheck = rel->rd_att->constr->num_check;
1770         ConstrCheck *check = rel->rd_att->constr->check;
1771         ExprContext *econtext;
1772         MemoryContext oldContext;
1773         List       *qual;
1774         int                     i;
1775
1776         /*
1777          * If first time through for this result relation, build expression
1778          * nodetrees for rel's constraint expressions.  Keep them in the per-query
1779          * memory context so they'll survive throughout the query.
1780          */
1781         if (resultRelInfo->ri_ConstraintExprs == NULL)
1782         {
1783                 oldContext = MemoryContextSwitchTo(estate->es_query_cxt);
1784                 resultRelInfo->ri_ConstraintExprs =
1785                         (List **) palloc(ncheck * sizeof(List *));
1786                 for (i = 0; i < ncheck; i++)
1787                 {
1788                         /* ExecQual wants implicit-AND form */
1789                         qual = make_ands_implicit(stringToNode(check[i].ccbin));
1790                         resultRelInfo->ri_ConstraintExprs[i] = (List *)
1791                                 ExecPrepareExpr((Expr *) qual, estate);
1792                 }
1793                 MemoryContextSwitchTo(oldContext);
1794         }
1795
1796         /*
1797          * We will use the EState's per-tuple context for evaluating constraint
1798          * expressions (creating it if it's not already there).
1799          */
1800         econtext = GetPerTupleExprContext(estate);
1801
1802         /* Arrange for econtext's scan tuple to be the tuple under test */
1803         econtext->ecxt_scantuple = slot;
1804
1805         /* And evaluate the constraints */
1806         for (i = 0; i < ncheck; i++)
1807         {
1808                 qual = resultRelInfo->ri_ConstraintExprs[i];
1809
1810                 /*
1811                  * NOTE: SQL92 specifies that a NULL result from a constraint
1812                  * expression is not to be treated as a failure.  Therefore, tell
1813                  * ExecQual to return TRUE for NULL.
1814                  */
1815                 if (!ExecQual(qual, econtext, true))
1816                         return check[i].ccname;
1817         }
1818
1819         /* NULL result means no error */
1820         return NULL;
1821 }
1822
1823 void
1824 ExecConstraints(ResultRelInfo *resultRelInfo,
1825                                 TupleTableSlot *slot, EState *estate)
1826 {
1827         Relation        rel = resultRelInfo->ri_RelationDesc;
1828         TupleConstr *constr = rel->rd_att->constr;
1829
1830         Assert(constr);
1831
1832         if (constr->has_not_null)
1833         {
1834                 int                     natts = rel->rd_att->natts;
1835                 int                     attrChk;
1836
1837                 for (attrChk = 1; attrChk <= natts; attrChk++)
1838                 {
1839                         if (rel->rd_att->attrs[attrChk - 1]->attnotnull &&
1840                                 slot_attisnull(slot, attrChk))
1841                                 ereport(ERROR,
1842                                                 (errcode(ERRCODE_NOT_NULL_VIOLATION),
1843                                                  errmsg("null value in column \"%s\" violates not-null constraint",
1844                                                 NameStr(rel->rd_att->attrs[attrChk - 1]->attname))));
1845                 }
1846         }
1847
1848         if (constr->num_check > 0)
1849         {
1850                 const char *failed;
1851
1852                 if ((failed = ExecRelCheck(resultRelInfo, slot, estate)) != NULL)
1853                         ereport(ERROR,
1854                                         (errcode(ERRCODE_CHECK_VIOLATION),
1855                                          errmsg("new row for relation \"%s\" violates check constraint \"%s\"",
1856                                                         RelationGetRelationName(rel), failed)));
1857         }
1858 }
1859
1860 /*
1861  * Check a modified tuple to see if we want to process its updated version
1862  * under READ COMMITTED rules.
1863  *
1864  * See backend/executor/README for some info about how this works.
1865  *
1866  *      estate - executor state data
1867  *      rti - rangetable index of table containing tuple
1868  *      *tid - t_ctid from the outdated tuple (ie, next updated version)
1869  *      priorXmax - t_xmax from the outdated tuple
1870  *      curCid - command ID of current command of my transaction
1871  *
1872  * *tid is also an output parameter: it's modified to hold the TID of the
1873  * latest version of the tuple (note this may be changed even on failure)
1874  *
1875  * Returns a slot containing the new candidate update/delete tuple, or
1876  * NULL if we determine we shouldn't process the row.
1877  */
1878 TupleTableSlot *
1879 EvalPlanQual(EState *estate, Index rti,
1880                          ItemPointer tid, TransactionId priorXmax, CommandId curCid)
1881 {
1882         evalPlanQual *epq;
1883         EState     *epqstate;
1884         Relation        relation;
1885         HeapTupleData tuple;
1886         HeapTuple       copyTuple = NULL;
1887         bool            endNode;
1888
1889         Assert(rti != 0);
1890
1891         /*
1892          * find relation containing target tuple
1893          */
1894         if (estate->es_result_relation_info != NULL &&
1895                 estate->es_result_relation_info->ri_RangeTableIndex == rti)
1896                 relation = estate->es_result_relation_info->ri_RelationDesc;
1897         else
1898         {
1899                 ListCell   *l;
1900
1901                 relation = NULL;
1902                 foreach(l, estate->es_rowMarks)
1903                 {
1904                         if (((ExecRowMark *) lfirst(l))->rti == rti)
1905                         {
1906                                 relation = ((ExecRowMark *) lfirst(l))->relation;
1907                                 break;
1908                         }
1909                 }
1910                 if (relation == NULL)
1911                         elog(ERROR, "could not find RowMark for RT index %u", rti);
1912         }
1913
1914         /*
1915          * fetch tid tuple
1916          *
1917          * Loop here to deal with updated or busy tuples
1918          */
1919         tuple.t_self = *tid;
1920         for (;;)
1921         {
1922                 Buffer          buffer;
1923
1924                 if (heap_fetch(relation, SnapshotDirty, &tuple, &buffer, true, NULL))
1925                 {
1926                         /*
1927                          * If xmin isn't what we're expecting, the slot must have been
1928                          * recycled and reused for an unrelated tuple.  This implies that
1929                          * the latest version of the row was deleted, so we need do
1930                          * nothing.  (Should be safe to examine xmin without getting
1931                          * buffer's content lock, since xmin never changes in an existing
1932                          * tuple.)
1933                          */
1934                         if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data),
1935                                                                          priorXmax))
1936                         {
1937                                 ReleaseBuffer(buffer);
1938                                 return NULL;
1939                         }
1940
1941                         /* otherwise xmin should not be dirty... */
1942                         if (TransactionIdIsValid(SnapshotDirty->xmin))
1943                                 elog(ERROR, "t_xmin is uncommitted in tuple to be updated");
1944
1945                         /*
1946                          * If tuple is being updated by other transaction then we have to
1947                          * wait for its commit/abort.
1948                          */
1949                         if (TransactionIdIsValid(SnapshotDirty->xmax))
1950                         {
1951                                 ReleaseBuffer(buffer);
1952                                 XactLockTableWait(SnapshotDirty->xmax);
1953                                 continue;               /* loop back to repeat heap_fetch */
1954                         }
1955
1956                         /*
1957                          * If tuple was inserted by our own transaction, we have to check
1958                          * cmin against curCid: cmin >= curCid means our command cannot
1959                          * see the tuple, so we should ignore it.  Without this we are
1960                          * open to the "Halloween problem" of indefinitely re-updating
1961                          * the same tuple.  (We need not check cmax because
1962                          * HeapTupleSatisfiesDirty will consider a tuple deleted by
1963                          * our transaction dead, regardless of cmax.)  We just checked
1964                          * that priorXmax == xmin, so we can test that variable instead
1965                          * of doing HeapTupleHeaderGetXmin again.
1966                          */
1967                         if (TransactionIdIsCurrentTransactionId(priorXmax) &&
1968                                 HeapTupleHeaderGetCmin(tuple.t_data) >= curCid)
1969                         {
1970                                 ReleaseBuffer(buffer);
1971                                 return NULL;
1972                         }
1973
1974                         /*
1975                          * We got tuple - now copy it for use by recheck query.
1976                          */
1977                         copyTuple = heap_copytuple(&tuple);
1978                         ReleaseBuffer(buffer);
1979                         break;
1980                 }
1981
1982                 /*
1983                  * If the referenced slot was actually empty, the latest version of
1984                  * the row must have been deleted, so we need do nothing.
1985                  */
1986                 if (tuple.t_data == NULL)
1987                 {
1988                         ReleaseBuffer(buffer);
1989                         return NULL;
1990                 }
1991
1992                 /*
1993                  * As above, if xmin isn't what we're expecting, do nothing.
1994                  */
1995                 if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data),
1996                                                                  priorXmax))
1997                 {
1998                         ReleaseBuffer(buffer);
1999                         return NULL;
2000                 }
2001
2002                 /*
2003                  * If we get here, the tuple was found but failed SnapshotDirty.
2004                  * Assuming the xmin is either a committed xact or our own xact (as it
2005                  * certainly should be if we're trying to modify the tuple), this must
2006                  * mean that the row was updated or deleted by either a committed xact
2007                  * or our own xact.  If it was deleted, we can ignore it; if it was
2008                  * updated then chain up to the next version and repeat the whole
2009                  * test.
2010                  *
2011                  * As above, it should be safe to examine xmax and t_ctid without the
2012                  * buffer content lock, because they can't be changing.
2013                  */
2014                 if (ItemPointerEquals(&tuple.t_self, &tuple.t_data->t_ctid))
2015                 {
2016                         /* deleted, so forget about it */
2017                         ReleaseBuffer(buffer);
2018                         return NULL;
2019                 }
2020
2021                 /* updated, so look at the updated row */
2022                 tuple.t_self = tuple.t_data->t_ctid;
2023                 /* updated row should have xmin matching this xmax */
2024                 priorXmax = HeapTupleHeaderGetXmax(tuple.t_data);
2025                 ReleaseBuffer(buffer);
2026                 /* loop back to fetch next in chain */
2027         }
2028
2029         /*
2030          * For UPDATE/DELETE we have to return tid of actual row we're executing
2031          * PQ for.
2032          */
2033         *tid = tuple.t_self;
2034
2035         /*
2036          * Need to run a recheck subquery.      Find or create a PQ stack entry.
2037          */
2038         epq = estate->es_evalPlanQual;
2039         endNode = true;
2040
2041         if (epq != NULL && epq->rti == 0)
2042         {
2043                 /* Top PQ stack entry is idle, so re-use it */
2044                 Assert(!(estate->es_useEvalPlan) && epq->next == NULL);
2045                 epq->rti = rti;
2046                 endNode = false;
2047         }
2048
2049         /*
2050          * If this is request for another RTE - Ra, - then we have to check wasn't
2051          * PlanQual requested for Ra already and if so then Ra' row was updated
2052          * again and we have to re-start old execution for Ra and forget all what
2053          * we done after Ra was suspended. Cool? -:))
2054          */
2055         if (epq != NULL && epq->rti != rti &&
2056                 epq->estate->es_evTuple[rti - 1] != NULL)
2057         {
2058                 do
2059                 {
2060                         evalPlanQual *oldepq;
2061
2062                         /* stop execution */
2063                         EvalPlanQualStop(epq);
2064                         /* pop previous PlanQual from the stack */
2065                         oldepq = epq->next;
2066                         Assert(oldepq && oldepq->rti != 0);
2067                         /* push current PQ to freePQ stack */
2068                         oldepq->free = epq;
2069                         epq = oldepq;
2070                         estate->es_evalPlanQual = epq;
2071                 } while (epq->rti != rti);
2072         }
2073
2074         /*
2075          * If we are requested for another RTE then we have to suspend execution
2076          * of current PlanQual and start execution for new one.
2077          */
2078         if (epq == NULL || epq->rti != rti)
2079         {
2080                 /* try to reuse plan used previously */
2081                 evalPlanQual *newepq = (epq != NULL) ? epq->free : NULL;
2082
2083                 if (newepq == NULL)             /* first call or freePQ stack is empty */
2084                 {
2085                         newepq = (evalPlanQual *) palloc0(sizeof(evalPlanQual));
2086                         newepq->free = NULL;
2087                         newepq->estate = NULL;
2088                         newepq->planstate = NULL;
2089                 }
2090                 else
2091                 {
2092                         /* recycle previously used PlanQual */
2093                         Assert(newepq->estate == NULL);
2094                         epq->free = NULL;
2095                 }
2096                 /* push current PQ to the stack */
2097                 newepq->next = epq;
2098                 epq = newepq;
2099                 estate->es_evalPlanQual = epq;
2100                 epq->rti = rti;
2101                 endNode = false;
2102         }
2103
2104         Assert(epq->rti == rti);
2105
2106         /*
2107          * Ok - we're requested for the same RTE.  Unfortunately we still have to
2108          * end and restart execution of the plan, because ExecReScan wouldn't
2109          * ensure that upper plan nodes would reset themselves.  We could make
2110          * that work if insertion of the target tuple were integrated with the
2111          * Param mechanism somehow, so that the upper plan nodes know that their
2112          * children's outputs have changed.
2113          *
2114          * Note that the stack of free evalPlanQual nodes is quite useless at the
2115          * moment, since it only saves us from pallocing/releasing the
2116          * evalPlanQual nodes themselves.  But it will be useful once we implement
2117          * ReScan instead of end/restart for re-using PlanQual nodes.
2118          */
2119         if (endNode)
2120         {
2121                 /* stop execution */
2122                 EvalPlanQualStop(epq);
2123         }
2124
2125         /*
2126          * Initialize new recheck query.
2127          *
2128          * Note: if we were re-using PlanQual plans via ExecReScan, we'd need to
2129          * instead copy down changeable state from the top plan (including
2130          * es_result_relation_info, es_junkFilter) and reset locally changeable
2131          * state in the epq (including es_param_exec_vals, es_evTupleNull).
2132          */
2133         EvalPlanQualStart(epq, estate, epq->next);
2134
2135         /*
2136          * free old RTE' tuple, if any, and store target tuple where relation's
2137          * scan node will see it
2138          */
2139         epqstate = epq->estate;
2140         if (epqstate->es_evTuple[rti - 1] != NULL)
2141                 heap_freetuple(epqstate->es_evTuple[rti - 1]);
2142         epqstate->es_evTuple[rti - 1] = copyTuple;
2143
2144         return EvalPlanQualNext(estate);
2145 }
2146
2147 static TupleTableSlot *
2148 EvalPlanQualNext(EState *estate)
2149 {
2150         evalPlanQual *epq = estate->es_evalPlanQual;
2151         MemoryContext oldcontext;
2152         TupleTableSlot *slot;
2153
2154         Assert(epq->rti != 0);
2155
2156 lpqnext:;
2157         oldcontext = MemoryContextSwitchTo(epq->estate->es_query_cxt);
2158         slot = ExecProcNode(epq->planstate);
2159         MemoryContextSwitchTo(oldcontext);
2160
2161         /*
2162          * No more tuples for this PQ. Continue previous one.
2163          */
2164         if (TupIsNull(slot))
2165         {
2166                 evalPlanQual *oldepq;
2167
2168                 /* stop execution */
2169                 EvalPlanQualStop(epq);
2170                 /* pop old PQ from the stack */
2171                 oldepq = epq->next;
2172                 if (oldepq == NULL)
2173                 {
2174                         /* this is the first (oldest) PQ - mark as free */
2175                         epq->rti = 0;
2176                         estate->es_useEvalPlan = false;
2177                         /* and continue Query execution */
2178                         return NULL;
2179                 }
2180                 Assert(oldepq->rti != 0);
2181                 /* push current PQ to freePQ stack */
2182                 oldepq->free = epq;
2183                 epq = oldepq;
2184                 estate->es_evalPlanQual = epq;
2185                 goto lpqnext;
2186         }
2187
2188         return slot;
2189 }
2190
2191 static void
2192 EndEvalPlanQual(EState *estate)
2193 {
2194         evalPlanQual *epq = estate->es_evalPlanQual;
2195
2196         if (epq->rti == 0)                      /* plans already shutdowned */
2197         {
2198                 Assert(epq->next == NULL);
2199                 return;
2200         }
2201
2202         for (;;)
2203         {
2204                 evalPlanQual *oldepq;
2205
2206                 /* stop execution */
2207                 EvalPlanQualStop(epq);
2208                 /* pop old PQ from the stack */
2209                 oldepq = epq->next;
2210                 if (oldepq == NULL)
2211                 {
2212                         /* this is the first (oldest) PQ - mark as free */
2213                         epq->rti = 0;
2214                         estate->es_useEvalPlan = false;
2215                         break;
2216                 }
2217                 Assert(oldepq->rti != 0);
2218                 /* push current PQ to freePQ stack */
2219                 oldepq->free = epq;
2220                 epq = oldepq;
2221                 estate->es_evalPlanQual = epq;
2222         }
2223 }
2224
2225 /*
2226  * Start execution of one level of PlanQual.
2227  *
2228  * This is a cut-down version of ExecutorStart(): we copy some state from
2229  * the top-level estate rather than initializing it fresh.
2230  */
2231 static void
2232 EvalPlanQualStart(evalPlanQual *epq, EState *estate, evalPlanQual *priorepq)
2233 {
2234         EState     *epqstate;
2235         int                     rtsize;
2236         MemoryContext oldcontext;
2237
2238         rtsize = list_length(estate->es_range_table);
2239
2240         epq->estate = epqstate = CreateExecutorState();
2241
2242         oldcontext = MemoryContextSwitchTo(epqstate->es_query_cxt);
2243
2244         /*
2245          * The epqstates share the top query's copy of unchanging state such as
2246          * the snapshot, rangetable, result-rel info, and external Param info.
2247          * They need their own copies of local state, including a tuple table,
2248          * es_param_exec_vals, etc.
2249          */
2250         epqstate->es_direction = ForwardScanDirection;
2251         epqstate->es_snapshot = estate->es_snapshot;
2252         epqstate->es_crosscheck_snapshot = estate->es_crosscheck_snapshot;
2253         epqstate->es_range_table = estate->es_range_table;
2254         epqstate->es_result_relations = estate->es_result_relations;
2255         epqstate->es_num_result_relations = estate->es_num_result_relations;
2256         epqstate->es_result_relation_info = estate->es_result_relation_info;
2257         epqstate->es_junkFilter = estate->es_junkFilter;
2258         epqstate->es_into_relation_descriptor = estate->es_into_relation_descriptor;
2259         epqstate->es_into_relation_use_wal = estate->es_into_relation_use_wal;
2260         epqstate->es_param_list_info = estate->es_param_list_info;
2261         if (estate->es_topPlan->nParamExec > 0)
2262                 epqstate->es_param_exec_vals = (ParamExecData *)
2263                         palloc0(estate->es_topPlan->nParamExec * sizeof(ParamExecData));
2264         epqstate->es_rowMarks = estate->es_rowMarks;
2265         epqstate->es_instrument = estate->es_instrument;
2266         epqstate->es_select_into = estate->es_select_into;
2267         epqstate->es_into_oids = estate->es_into_oids;
2268         epqstate->es_topPlan = estate->es_topPlan;
2269
2270         /*
2271          * Each epqstate must have its own es_evTupleNull state, but all the stack
2272          * entries share es_evTuple state.      This allows sub-rechecks to inherit
2273          * the value being examined by an outer recheck.
2274          */
2275         epqstate->es_evTupleNull = (bool *) palloc0(rtsize * sizeof(bool));
2276         if (priorepq == NULL)
2277                 /* first PQ stack entry */
2278                 epqstate->es_evTuple = (HeapTuple *)
2279                         palloc0(rtsize * sizeof(HeapTuple));
2280         else
2281                 /* later stack entries share the same storage */
2282                 epqstate->es_evTuple = priorepq->estate->es_evTuple;
2283
2284         epqstate->es_tupleTable =
2285                 ExecCreateTupleTable(estate->es_tupleTable->size);
2286
2287         epq->planstate = ExecInitNode(estate->es_topPlan, epqstate, 0);
2288
2289         MemoryContextSwitchTo(oldcontext);
2290 }
2291
2292 /*
2293  * End execution of one level of PlanQual.
2294  *
2295  * This is a cut-down version of ExecutorEnd(); basically we want to do most
2296  * of the normal cleanup, but *not* close result relations (which we are
2297  * just sharing from the outer query).
2298  */
2299 static void
2300 EvalPlanQualStop(evalPlanQual *epq)
2301 {
2302         EState     *epqstate = epq->estate;
2303         MemoryContext oldcontext;
2304
2305         oldcontext = MemoryContextSwitchTo(epqstate->es_query_cxt);
2306
2307         ExecEndNode(epq->planstate);
2308
2309         ExecDropTupleTable(epqstate->es_tupleTable, true);
2310         epqstate->es_tupleTable = NULL;
2311
2312         if (epqstate->es_evTuple[epq->rti - 1] != NULL)
2313         {
2314                 heap_freetuple(epqstate->es_evTuple[epq->rti - 1]);
2315                 epqstate->es_evTuple[epq->rti - 1] = NULL;
2316         }
2317
2318         MemoryContextSwitchTo(oldcontext);
2319
2320         FreeExecutorState(epqstate);
2321
2322         epq->estate = NULL;
2323         epq->planstate = NULL;
2324 }