]> granicus.if.org Git - postgresql/blob - src/backend/executor/execMain.c
Improve snapshot manager by keeping explicit track of snapshots.
[postgresql] / src / backend / executor / execMain.c
1 /*-------------------------------------------------------------------------
2  *
3  * execMain.c
4  *        top level executor interface routines
5  *
6  * INTERFACE ROUTINES
7  *      ExecutorStart()
8  *      ExecutorRun()
9  *      ExecutorEnd()
10  *
11  *      The old ExecutorMain() has been replaced by ExecutorStart(),
12  *      ExecutorRun() and ExecutorEnd()
13  *
14  *      These three procedures are the external interfaces to the executor.
15  *      In each case, the query descriptor is required as an argument.
16  *
17  *      ExecutorStart() must be called at the beginning of execution of any
18  *      query plan and ExecutorEnd() should always be called at the end of
19  *      execution of a plan.
20  *
21  *      ExecutorRun accepts direction and count arguments that specify whether
22  *      the plan is to be executed forwards, backwards, and for how many tuples.
23  *
24  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
25  * Portions Copyright (c) 1994, Regents of the University of California
26  *
27  *
28  * IDENTIFICATION
29  *        $PostgreSQL: pgsql/src/backend/executor/execMain.c,v 1.309 2008/05/12 20:02:00 alvherre Exp $
30  *
31  *-------------------------------------------------------------------------
32  */
33 #include "postgres.h"
34
35 #include "access/heapam.h"
36 #include "access/reloptions.h"
37 #include "access/transam.h"
38 #include "access/xact.h"
39 #include "catalog/heap.h"
40 #include "catalog/namespace.h"
41 #include "catalog/toasting.h"
42 #include "commands/tablespace.h"
43 #include "commands/trigger.h"
44 #include "executor/execdebug.h"
45 #include "executor/instrument.h"
46 #include "executor/nodeSubplan.h"
47 #include "miscadmin.h"
48 #include "optimizer/clauses.h"
49 #include "parser/parse_clause.h"
50 #include "parser/parsetree.h"
51 #include "storage/bufmgr.h"
52 #include "storage/lmgr.h"
53 #include "storage/smgr.h"
54 #include "utils/acl.h"
55 #include "utils/lsyscache.h"
56 #include "utils/memutils.h"
57 #include "utils/snapmgr.h"
58 #include "utils/tqual.h"
59
60
61 typedef struct evalPlanQual
62 {
63         Index           rti;
64         EState     *estate;
65         PlanState  *planstate;
66         struct evalPlanQual *next;      /* stack of active PlanQual plans */
67         struct evalPlanQual *free;      /* list of free PlanQual plans */
68 } evalPlanQual;
69
70 /* decls for local routines only used within this module */
71 static void InitPlan(QueryDesc *queryDesc, int eflags);
72 static void ExecEndPlan(PlanState *planstate, EState *estate);
73 static TupleTableSlot *ExecutePlan(EState *estate, PlanState *planstate,
74                         CmdType operation,
75                         long numberTuples,
76                         ScanDirection direction,
77                         DestReceiver *dest);
78 static void ExecSelect(TupleTableSlot *slot,
79                    DestReceiver *dest, EState *estate);
80 static void ExecInsert(TupleTableSlot *slot, ItemPointer tupleid,
81                    TupleTableSlot *planSlot,
82                    DestReceiver *dest, EState *estate);
83 static void ExecDelete(ItemPointer tupleid,
84                    TupleTableSlot *planSlot,
85                    DestReceiver *dest, EState *estate);
86 static void ExecUpdate(TupleTableSlot *slot, ItemPointer tupleid,
87                    TupleTableSlot *planSlot,
88                    DestReceiver *dest, EState *estate);
89 static void ExecProcessReturning(ProjectionInfo *projectReturning,
90                                          TupleTableSlot *tupleSlot,
91                                          TupleTableSlot *planSlot,
92                                          DestReceiver *dest);
93 static TupleTableSlot *EvalPlanQualNext(EState *estate);
94 static void EndEvalPlanQual(EState *estate);
95 static void ExecCheckRTPerms(List *rangeTable);
96 static void ExecCheckRTEPerms(RangeTblEntry *rte);
97 static void ExecCheckXactReadOnly(PlannedStmt *plannedstmt);
98 static void EvalPlanQualStart(evalPlanQual *epq, EState *estate,
99                                   evalPlanQual *priorepq);
100 static void EvalPlanQualStop(evalPlanQual *epq);
101 static void OpenIntoRel(QueryDesc *queryDesc);
102 static void CloseIntoRel(QueryDesc *queryDesc);
103 static void intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo);
104 static void intorel_receive(TupleTableSlot *slot, DestReceiver *self);
105 static void intorel_shutdown(DestReceiver *self);
106 static void intorel_destroy(DestReceiver *self);
107
108 /* end of local decls */
109
110
111 /* ----------------------------------------------------------------
112  *              ExecutorStart
113  *
114  *              This routine must be called at the beginning of any execution of any
115  *              query plan
116  *
117  * Takes a QueryDesc previously created by CreateQueryDesc (it's not real
118  * clear why we bother to separate the two functions, but...).  The tupDesc
119  * field of the QueryDesc is filled in to describe the tuples that will be
120  * returned, and the internal fields (estate and planstate) are set up.
121  *
122  * eflags contains flag bits as described in executor.h.
123  *
124  * NB: the CurrentMemoryContext when this is called will become the parent
125  * of the per-query context used for this Executor invocation.
126  * ----------------------------------------------------------------
127  */
128 void
129 ExecutorStart(QueryDesc *queryDesc, int eflags)
130 {
131         EState     *estate;
132         MemoryContext oldcontext;
133
134         /* sanity checks: queryDesc must not be started already */
135         Assert(queryDesc != NULL);
136         Assert(queryDesc->estate == NULL);
137
138         /*
139          * If the transaction is read-only, we need to check if any writes are
140          * planned to non-temporary tables.  EXPLAIN is considered read-only.
141          */
142         if (XactReadOnly && !(eflags & EXEC_FLAG_EXPLAIN_ONLY))
143                 ExecCheckXactReadOnly(queryDesc->plannedstmt);
144
145         /*
146          * Build EState, switch into per-query memory context for startup.
147          */
148         estate = CreateExecutorState();
149         queryDesc->estate = estate;
150
151         oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
152
153         /*
154          * Fill in parameters, if any, from queryDesc
155          */
156         estate->es_param_list_info = queryDesc->params;
157
158         if (queryDesc->plannedstmt->nParamExec > 0)
159                 estate->es_param_exec_vals = (ParamExecData *)
160                         palloc0(queryDesc->plannedstmt->nParamExec * sizeof(ParamExecData));
161
162         /*
163          * If non-read-only query, set the command ID to mark output tuples with
164          */
165         switch (queryDesc->operation)
166         {
167                 case CMD_SELECT:
168                         /* SELECT INTO and SELECT FOR UPDATE/SHARE need to mark tuples */
169                         if (queryDesc->plannedstmt->intoClause != NULL ||
170                                 queryDesc->plannedstmt->rowMarks != NIL)
171                                 estate->es_output_cid = GetCurrentCommandId(true);
172                         break;
173
174                 case CMD_INSERT:
175                 case CMD_DELETE:
176                 case CMD_UPDATE:
177                         estate->es_output_cid = GetCurrentCommandId(true);
178                         break;
179
180                 default:
181                         elog(ERROR, "unrecognized operation code: %d",
182                                  (int) queryDesc->operation);
183                         break;
184         }
185
186         /*
187          * Copy other important information into the EState
188          */
189         estate->es_snapshot = RegisterSnapshot(queryDesc->snapshot);
190         estate->es_crosscheck_snapshot = RegisterSnapshot(queryDesc->crosscheck_snapshot);
191         estate->es_instrument = queryDesc->doInstrument;
192
193         /*
194          * Initialize the plan state tree
195          */
196         InitPlan(queryDesc, eflags);
197
198         MemoryContextSwitchTo(oldcontext);
199 }
200
201 /* ----------------------------------------------------------------
202  *              ExecutorRun
203  *
204  *              This is the main routine of the executor module. It accepts
205  *              the query descriptor from the traffic cop and executes the
206  *              query plan.
207  *
208  *              ExecutorStart must have been called already.
209  *
210  *              If direction is NoMovementScanDirection then nothing is done
211  *              except to start up/shut down the destination.  Otherwise,
212  *              we retrieve up to 'count' tuples in the specified direction.
213  *
214  *              Note: count = 0 is interpreted as no portal limit, i.e., run to
215  *              completion.
216  *
217  * ----------------------------------------------------------------
218  */
219 TupleTableSlot *
220 ExecutorRun(QueryDesc *queryDesc,
221                         ScanDirection direction, long count)
222 {
223         EState     *estate;
224         CmdType         operation;
225         DestReceiver *dest;
226         bool            sendTuples;
227         TupleTableSlot *result;
228         MemoryContext oldcontext;
229
230         /* sanity checks */
231         Assert(queryDesc != NULL);
232
233         estate = queryDesc->estate;
234
235         Assert(estate != NULL);
236
237         /*
238          * Switch into per-query memory context
239          */
240         oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
241
242         /*
243          * extract information from the query descriptor and the query feature.
244          */
245         operation = queryDesc->operation;
246         dest = queryDesc->dest;
247
248         /*
249          * startup tuple receiver, if we will be emitting tuples
250          */
251         estate->es_processed = 0;
252         estate->es_lastoid = InvalidOid;
253
254         sendTuples = (operation == CMD_SELECT ||
255                                   queryDesc->plannedstmt->returningLists);
256
257         if (sendTuples)
258                 (*dest->rStartup) (dest, operation, queryDesc->tupDesc);
259
260         /*
261          * run plan
262          */
263         if (ScanDirectionIsNoMovement(direction))
264                 result = NULL;
265         else
266                 result = ExecutePlan(estate,
267                                                          queryDesc->planstate,
268                                                          operation,
269                                                          count,
270                                                          direction,
271                                                          dest);
272
273         /*
274          * shutdown tuple receiver, if we started it
275          */
276         if (sendTuples)
277                 (*dest->rShutdown) (dest);
278
279         MemoryContextSwitchTo(oldcontext);
280
281         return result;
282 }
283
284 /* ----------------------------------------------------------------
285  *              ExecutorEnd
286  *
287  *              This routine must be called at the end of execution of any
288  *              query plan
289  * ----------------------------------------------------------------
290  */
291 void
292 ExecutorEnd(QueryDesc *queryDesc)
293 {
294         EState     *estate;
295         MemoryContext oldcontext;
296
297         /* sanity checks */
298         Assert(queryDesc != NULL);
299
300         estate = queryDesc->estate;
301
302         Assert(estate != NULL);
303
304         /*
305          * Switch into per-query memory context to run ExecEndPlan
306          */
307         oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
308
309         ExecEndPlan(queryDesc->planstate, estate);
310
311         /*
312          * Close the SELECT INTO relation if any
313          */
314         if (estate->es_select_into)
315                 CloseIntoRel(queryDesc);
316
317         /* do away with our snapshots */
318         UnregisterSnapshot(estate->es_snapshot);
319         UnregisterSnapshot(estate->es_crosscheck_snapshot);
320
321         /*
322          * Must switch out of context before destroying it
323          */
324         MemoryContextSwitchTo(oldcontext);
325
326         /*
327          * Release EState and per-query memory context.  This should release
328          * everything the executor has allocated.
329          */
330         FreeExecutorState(estate);
331
332         /* Reset queryDesc fields that no longer point to anything */
333         queryDesc->tupDesc = NULL;
334         queryDesc->estate = NULL;
335         queryDesc->planstate = NULL;
336 }
337
338 /* ----------------------------------------------------------------
339  *              ExecutorRewind
340  *
341  *              This routine may be called on an open queryDesc to rewind it
342  *              to the start.
343  * ----------------------------------------------------------------
344  */
345 void
346 ExecutorRewind(QueryDesc *queryDesc)
347 {
348         EState     *estate;
349         MemoryContext oldcontext;
350
351         /* sanity checks */
352         Assert(queryDesc != NULL);
353
354         estate = queryDesc->estate;
355
356         Assert(estate != NULL);
357
358         /* It's probably not sensible to rescan updating queries */
359         Assert(queryDesc->operation == CMD_SELECT);
360
361         /*
362          * Switch into per-query memory context
363          */
364         oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
365
366         /*
367          * rescan plan
368          */
369         ExecReScan(queryDesc->planstate, NULL);
370
371         MemoryContextSwitchTo(oldcontext);
372 }
373
374
375 /*
376  * ExecCheckRTPerms
377  *              Check access permissions for all relations listed in a range table.
378  */
379 static void
380 ExecCheckRTPerms(List *rangeTable)
381 {
382         ListCell   *l;
383
384         foreach(l, rangeTable)
385         {
386                 ExecCheckRTEPerms((RangeTblEntry *) lfirst(l));
387         }
388 }
389
390 /*
391  * ExecCheckRTEPerms
392  *              Check access permissions for a single RTE.
393  */
394 static void
395 ExecCheckRTEPerms(RangeTblEntry *rte)
396 {
397         AclMode         requiredPerms;
398         Oid                     relOid;
399         Oid                     userid;
400
401         /*
402          * Only plain-relation RTEs need to be checked here.  Function RTEs are
403          * checked by init_fcache when the function is prepared for execution.
404          * Join, subquery, and special RTEs need no checks.
405          */
406         if (rte->rtekind != RTE_RELATION)
407                 return;
408
409         /*
410          * No work if requiredPerms is empty.
411          */
412         requiredPerms = rte->requiredPerms;
413         if (requiredPerms == 0)
414                 return;
415
416         relOid = rte->relid;
417
418         /*
419          * userid to check as: current user unless we have a setuid indication.
420          *
421          * Note: GetUserId() is presently fast enough that there's no harm in
422          * calling it separately for each RTE.  If that stops being true, we could
423          * call it once in ExecCheckRTPerms and pass the userid down from there.
424          * But for now, no need for the extra clutter.
425          */
426         userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
427
428         /*
429          * We must have *all* the requiredPerms bits, so use aclmask not aclcheck.
430          */
431         if (pg_class_aclmask(relOid, userid, requiredPerms, ACLMASK_ALL)
432                 != requiredPerms)
433                 aclcheck_error(ACLCHECK_NO_PRIV, ACL_KIND_CLASS,
434                                            get_rel_name(relOid));
435 }
436
437 /*
438  * Check that the query does not imply any writes to non-temp tables.
439  */
440 static void
441 ExecCheckXactReadOnly(PlannedStmt *plannedstmt)
442 {
443         ListCell   *l;
444
445         /*
446          * CREATE TABLE AS or SELECT INTO?
447          *
448          * XXX should we allow this if the destination is temp?
449          */
450         if (plannedstmt->intoClause != NULL)
451                 goto fail;
452
453         /* Fail if write permissions are requested on any non-temp table */
454         foreach(l, plannedstmt->rtable)
455         {
456                 RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
457
458                 if (rte->rtekind != RTE_RELATION)
459                         continue;
460
461                 if ((rte->requiredPerms & (~ACL_SELECT)) == 0)
462                         continue;
463
464                 if (isTempNamespace(get_rel_namespace(rte->relid)))
465                         continue;
466
467                 goto fail;
468         }
469
470         return;
471
472 fail:
473         ereport(ERROR,
474                         (errcode(ERRCODE_READ_ONLY_SQL_TRANSACTION),
475                          errmsg("transaction is read-only")));
476 }
477
478
479 /* ----------------------------------------------------------------
480  *              InitPlan
481  *
482  *              Initializes the query plan: open files, allocate storage
483  *              and start up the rule manager
484  * ----------------------------------------------------------------
485  */
486 static void
487 InitPlan(QueryDesc *queryDesc, int eflags)
488 {
489         CmdType         operation = queryDesc->operation;
490         PlannedStmt *plannedstmt = queryDesc->plannedstmt;
491         Plan       *plan = plannedstmt->planTree;
492         List       *rangeTable = plannedstmt->rtable;
493         EState     *estate = queryDesc->estate;
494         PlanState  *planstate;
495         TupleDesc       tupType;
496         ListCell   *l;
497         int                     i;
498
499         /*
500          * Do permissions checks
501          */
502         ExecCheckRTPerms(rangeTable);
503
504         /*
505          * initialize the node's execution state
506          */
507         estate->es_range_table = rangeTable;
508
509         /*
510          * initialize result relation stuff
511          */
512         if (plannedstmt->resultRelations)
513         {
514                 List       *resultRelations = plannedstmt->resultRelations;
515                 int                     numResultRelations = list_length(resultRelations);
516                 ResultRelInfo *resultRelInfos;
517                 ResultRelInfo *resultRelInfo;
518
519                 resultRelInfos = (ResultRelInfo *)
520                         palloc(numResultRelations * sizeof(ResultRelInfo));
521                 resultRelInfo = resultRelInfos;
522                 foreach(l, resultRelations)
523                 {
524                         Index           resultRelationIndex = lfirst_int(l);
525                         Oid                     resultRelationOid;
526                         Relation        resultRelation;
527
528                         resultRelationOid = getrelid(resultRelationIndex, rangeTable);
529                         resultRelation = heap_open(resultRelationOid, RowExclusiveLock);
530                         InitResultRelInfo(resultRelInfo,
531                                                           resultRelation,
532                                                           resultRelationIndex,
533                                                           operation,
534                                                           estate->es_instrument);
535                         resultRelInfo++;
536                 }
537                 estate->es_result_relations = resultRelInfos;
538                 estate->es_num_result_relations = numResultRelations;
539                 /* Initialize to first or only result rel */
540                 estate->es_result_relation_info = resultRelInfos;
541         }
542         else
543         {
544                 /*
545                  * if no result relation, then set state appropriately
546                  */
547                 estate->es_result_relations = NULL;
548                 estate->es_num_result_relations = 0;
549                 estate->es_result_relation_info = NULL;
550         }
551
552         /*
553          * Detect whether we're doing SELECT INTO.  If so, set the es_into_oids
554          * flag appropriately so that the plan tree will be initialized with the
555          * correct tuple descriptors.  (Other SELECT INTO stuff comes later.)
556          */
557         estate->es_select_into = false;
558         if (operation == CMD_SELECT && plannedstmt->intoClause != NULL)
559         {
560                 estate->es_select_into = true;
561                 estate->es_into_oids = interpretOidsOption(plannedstmt->intoClause->options);
562         }
563
564         /*
565          * Have to lock relations selected FOR UPDATE/FOR SHARE before we
566          * initialize the plan tree, else we'd be doing a lock upgrade. While we
567          * are at it, build the ExecRowMark list.
568          */
569         estate->es_rowMarks = NIL;
570         foreach(l, plannedstmt->rowMarks)
571         {
572                 RowMarkClause *rc = (RowMarkClause *) lfirst(l);
573                 Oid                     relid = getrelid(rc->rti, rangeTable);
574                 Relation        relation;
575                 ExecRowMark *erm;
576
577                 relation = heap_open(relid, RowShareLock);
578                 erm = (ExecRowMark *) palloc(sizeof(ExecRowMark));
579                 erm->relation = relation;
580                 erm->rti = rc->rti;
581                 erm->forUpdate = rc->forUpdate;
582                 erm->noWait = rc->noWait;
583                 /* We'll set up ctidAttno below */
584                 erm->ctidAttNo = InvalidAttrNumber;
585                 estate->es_rowMarks = lappend(estate->es_rowMarks, erm);
586         }
587
588         /*
589          * Initialize the executor "tuple" table.  We need slots for all the plan
590          * nodes, plus possibly output slots for the junkfilter(s). At this point
591          * we aren't sure if we need junkfilters, so just add slots for them
592          * unconditionally.  Also, if it's not a SELECT, set up a slot for use for
593          * trigger output tuples.  Also, one for RETURNING-list evaluation.
594          */
595         {
596                 int                     nSlots;
597
598                 /* Slots for the main plan tree */
599                 nSlots = ExecCountSlotsNode(plan);
600                 /* Add slots for subplans and initplans */
601                 foreach(l, plannedstmt->subplans)
602                 {
603                         Plan       *subplan = (Plan *) lfirst(l);
604
605                         nSlots += ExecCountSlotsNode(subplan);
606                 }
607                 /* Add slots for junkfilter(s) */
608                 if (plannedstmt->resultRelations != NIL)
609                         nSlots += list_length(plannedstmt->resultRelations);
610                 else
611                         nSlots += 1;
612                 if (operation != CMD_SELECT)
613                         nSlots++;                       /* for es_trig_tuple_slot */
614                 if (plannedstmt->returningLists)
615                         nSlots++;                       /* for RETURNING projection */
616
617                 estate->es_tupleTable = ExecCreateTupleTable(nSlots);
618
619                 if (operation != CMD_SELECT)
620                         estate->es_trig_tuple_slot =
621                                 ExecAllocTableSlot(estate->es_tupleTable);
622         }
623
624         /* mark EvalPlanQual not active */
625         estate->es_plannedstmt = plannedstmt;
626         estate->es_evalPlanQual = NULL;
627         estate->es_evTupleNull = NULL;
628         estate->es_evTuple = NULL;
629         estate->es_useEvalPlan = false;
630
631         /*
632          * Initialize private state information for each SubPlan.  We must do this
633          * before running ExecInitNode on the main query tree, since
634          * ExecInitSubPlan expects to be able to find these entries.
635          */
636         Assert(estate->es_subplanstates == NIL);
637         i = 1;                                          /* subplan indices count from 1 */
638         foreach(l, plannedstmt->subplans)
639         {
640                 Plan       *subplan = (Plan *) lfirst(l);
641                 PlanState  *subplanstate;
642                 int                     sp_eflags;
643
644                 /*
645                  * A subplan will never need to do BACKWARD scan nor MARK/RESTORE. If
646                  * it is a parameterless subplan (not initplan), we suggest that it be
647                  * prepared to handle REWIND efficiently; otherwise there is no need.
648                  */
649                 sp_eflags = eflags & EXEC_FLAG_EXPLAIN_ONLY;
650                 if (bms_is_member(i, plannedstmt->rewindPlanIDs))
651                         sp_eflags |= EXEC_FLAG_REWIND;
652
653                 subplanstate = ExecInitNode(subplan, estate, sp_eflags);
654
655                 estate->es_subplanstates = lappend(estate->es_subplanstates,
656                                                                                    subplanstate);
657
658                 i++;
659         }
660
661         /*
662          * Initialize the private state information for all the nodes in the query
663          * tree.  This opens files, allocates storage and leaves us ready to start
664          * processing tuples.
665          */
666         planstate = ExecInitNode(plan, estate, eflags);
667
668         /*
669          * Get the tuple descriptor describing the type of tuples to return. (this
670          * is especially important if we are creating a relation with "SELECT
671          * INTO")
672          */
673         tupType = ExecGetResultType(planstate);
674
675         /*
676          * Initialize the junk filter if needed.  SELECT and INSERT queries need a
677          * filter if there are any junk attrs in the tlist.  INSERT and SELECT
678          * INTO also need a filter if the plan may return raw disk tuples (else
679          * heap_insert will be scribbling on the source relation!). UPDATE and
680          * DELETE always need a filter, since there's always a junk 'ctid'
681          * attribute present --- no need to look first.
682          */
683         {
684                 bool            junk_filter_needed = false;
685                 ListCell   *tlist;
686
687                 switch (operation)
688                 {
689                         case CMD_SELECT:
690                         case CMD_INSERT:
691                                 foreach(tlist, plan->targetlist)
692                                 {
693                                         TargetEntry *tle = (TargetEntry *) lfirst(tlist);
694
695                                         if (tle->resjunk)
696                                         {
697                                                 junk_filter_needed = true;
698                                                 break;
699                                         }
700                                 }
701                                 if (!junk_filter_needed &&
702                                         (operation == CMD_INSERT || estate->es_select_into) &&
703                                         ExecMayReturnRawTuples(planstate))
704                                         junk_filter_needed = true;
705                                 break;
706                         case CMD_UPDATE:
707                         case CMD_DELETE:
708                                 junk_filter_needed = true;
709                                 break;
710                         default:
711                                 break;
712                 }
713
714                 if (junk_filter_needed)
715                 {
716                         /*
717                          * If there are multiple result relations, each one needs its own
718                          * junk filter.  Note this is only possible for UPDATE/DELETE, so
719                          * we can't be fooled by some needing a filter and some not.
720                          */
721                         if (list_length(plannedstmt->resultRelations) > 1)
722                         {
723                                 PlanState **appendplans;
724                                 int                     as_nplans;
725                                 ResultRelInfo *resultRelInfo;
726
727                                 /* Top plan had better be an Append here. */
728                                 Assert(IsA(plan, Append));
729                                 Assert(((Append *) plan)->isTarget);
730                                 Assert(IsA(planstate, AppendState));
731                                 appendplans = ((AppendState *) planstate)->appendplans;
732                                 as_nplans = ((AppendState *) planstate)->as_nplans;
733                                 Assert(as_nplans == estate->es_num_result_relations);
734                                 resultRelInfo = estate->es_result_relations;
735                                 for (i = 0; i < as_nplans; i++)
736                                 {
737                                         PlanState  *subplan = appendplans[i];
738                                         JunkFilter *j;
739
740                                         j = ExecInitJunkFilter(subplan->plan->targetlist,
741                                                         resultRelInfo->ri_RelationDesc->rd_att->tdhasoid,
742                                                                   ExecAllocTableSlot(estate->es_tupleTable));
743
744                                         /*
745                                          * Since it must be UPDATE/DELETE, there had better be a
746                                          * "ctid" junk attribute in the tlist ... but ctid could
747                                          * be at a different resno for each result relation. We
748                                          * look up the ctid resnos now and save them in the
749                                          * junkfilters.
750                                          */
751                                         j->jf_junkAttNo = ExecFindJunkAttribute(j, "ctid");
752                                         if (!AttributeNumberIsValid(j->jf_junkAttNo))
753                                                 elog(ERROR, "could not find junk ctid column");
754                                         resultRelInfo->ri_junkFilter = j;
755                                         resultRelInfo++;
756                                 }
757
758                                 /*
759                                  * Set active junkfilter too; at this point ExecInitAppend has
760                                  * already selected an active result relation...
761                                  */
762                                 estate->es_junkFilter =
763                                         estate->es_result_relation_info->ri_junkFilter;
764
765                                 /*
766                                  * We currently can't support rowmarks in this case, because
767                                  * the associated junk CTIDs might have different resnos in
768                                  * different subplans.
769                                  */
770                                 if (estate->es_rowMarks)
771                                         ereport(ERROR,
772                                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
773                                                          errmsg("SELECT FOR UPDATE/SHARE is not supported within a query with multiple result relations")));
774                         }
775                         else
776                         {
777                                 /* Normal case with just one JunkFilter */
778                                 JunkFilter *j;
779
780                                 j = ExecInitJunkFilter(planstate->plan->targetlist,
781                                                                            tupType->tdhasoid,
782                                                                   ExecAllocTableSlot(estate->es_tupleTable));
783                                 estate->es_junkFilter = j;
784                                 if (estate->es_result_relation_info)
785                                         estate->es_result_relation_info->ri_junkFilter = j;
786
787                                 if (operation == CMD_SELECT)
788                                 {
789                                         /* For SELECT, want to return the cleaned tuple type */
790                                         tupType = j->jf_cleanTupType;
791                                 }
792                                 else if (operation == CMD_UPDATE || operation == CMD_DELETE)
793                                 {
794                                         /* For UPDATE/DELETE, find the ctid junk attr now */
795                                         j->jf_junkAttNo = ExecFindJunkAttribute(j, "ctid");
796                                         if (!AttributeNumberIsValid(j->jf_junkAttNo))
797                                                 elog(ERROR, "could not find junk ctid column");
798                                 }
799
800                                 /* For SELECT FOR UPDATE/SHARE, find the ctid attrs now */
801                                 foreach(l, estate->es_rowMarks)
802                                 {
803                                         ExecRowMark *erm = (ExecRowMark *) lfirst(l);
804                                         char            resname[32];
805
806                                         snprintf(resname, sizeof(resname), "ctid%u", erm->rti);
807                                         erm->ctidAttNo = ExecFindJunkAttribute(j, resname);
808                                         if (!AttributeNumberIsValid(erm->ctidAttNo))
809                                                 elog(ERROR, "could not find junk \"%s\" column",
810                                                          resname);
811                                 }
812                         }
813                 }
814                 else
815                 {
816                         estate->es_junkFilter = NULL;
817                         if (estate->es_rowMarks)
818                                 elog(ERROR, "SELECT FOR UPDATE/SHARE, but no junk columns");
819                 }
820         }
821
822         /*
823          * Initialize RETURNING projections if needed.
824          */
825         if (plannedstmt->returningLists)
826         {
827                 TupleTableSlot *slot;
828                 ExprContext *econtext;
829                 ResultRelInfo *resultRelInfo;
830
831                 /*
832                  * We set QueryDesc.tupDesc to be the RETURNING rowtype in this case.
833                  * We assume all the sublists will generate the same output tupdesc.
834                  */
835                 tupType = ExecTypeFromTL((List *) linitial(plannedstmt->returningLists),
836                                                                  false);
837
838                 /* Set up a slot for the output of the RETURNING projection(s) */
839                 slot = ExecAllocTableSlot(estate->es_tupleTable);
840                 ExecSetSlotDescriptor(slot, tupType);
841                 /* Need an econtext too */
842                 econtext = CreateExprContext(estate);
843
844                 /*
845                  * Build a projection for each result rel.      Note that any SubPlans in
846                  * the RETURNING lists get attached to the topmost plan node.
847                  */
848                 Assert(list_length(plannedstmt->returningLists) == estate->es_num_result_relations);
849                 resultRelInfo = estate->es_result_relations;
850                 foreach(l, plannedstmt->returningLists)
851                 {
852                         List       *rlist = (List *) lfirst(l);
853                         List       *rliststate;
854
855                         rliststate = (List *) ExecInitExpr((Expr *) rlist, planstate);
856                         resultRelInfo->ri_projectReturning =
857                                 ExecBuildProjectionInfo(rliststate, econtext, slot,
858                                                                          resultRelInfo->ri_RelationDesc->rd_att);
859                         resultRelInfo++;
860                 }
861         }
862
863         queryDesc->tupDesc = tupType;
864         queryDesc->planstate = planstate;
865
866         /*
867          * If doing SELECT INTO, initialize the "into" relation.  We must wait
868          * till now so we have the "clean" result tuple type to create the new
869          * table from.
870          *
871          * If EXPLAIN, skip creating the "into" relation.
872          */
873         if (estate->es_select_into && !(eflags & EXEC_FLAG_EXPLAIN_ONLY))
874                 OpenIntoRel(queryDesc);
875 }
876
877 /*
878  * Initialize ResultRelInfo data for one result relation
879  */
880 void
881 InitResultRelInfo(ResultRelInfo *resultRelInfo,
882                                   Relation resultRelationDesc,
883                                   Index resultRelationIndex,
884                                   CmdType operation,
885                                   bool doInstrument)
886 {
887         /*
888          * Check valid relkind ... parser and/or planner should have noticed this
889          * already, but let's make sure.
890          */
891         switch (resultRelationDesc->rd_rel->relkind)
892         {
893                 case RELKIND_RELATION:
894                         /* OK */
895                         break;
896                 case RELKIND_SEQUENCE:
897                         ereport(ERROR,
898                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
899                                          errmsg("cannot change sequence \"%s\"",
900                                                         RelationGetRelationName(resultRelationDesc))));
901                         break;
902                 case RELKIND_TOASTVALUE:
903                         ereport(ERROR,
904                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
905                                          errmsg("cannot change TOAST relation \"%s\"",
906                                                         RelationGetRelationName(resultRelationDesc))));
907                         break;
908                 case RELKIND_VIEW:
909                         ereport(ERROR,
910                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
911                                          errmsg("cannot change view \"%s\"",
912                                                         RelationGetRelationName(resultRelationDesc))));
913                         break;
914                 default:
915                         ereport(ERROR,
916                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
917                                          errmsg("cannot change relation \"%s\"",
918                                                         RelationGetRelationName(resultRelationDesc))));
919                         break;
920         }
921
922         /* OK, fill in the node */
923         MemSet(resultRelInfo, 0, sizeof(ResultRelInfo));
924         resultRelInfo->type = T_ResultRelInfo;
925         resultRelInfo->ri_RangeTableIndex = resultRelationIndex;
926         resultRelInfo->ri_RelationDesc = resultRelationDesc;
927         resultRelInfo->ri_NumIndices = 0;
928         resultRelInfo->ri_IndexRelationDescs = NULL;
929         resultRelInfo->ri_IndexRelationInfo = NULL;
930         /* make a copy so as not to depend on relcache info not changing... */
931         resultRelInfo->ri_TrigDesc = CopyTriggerDesc(resultRelationDesc->trigdesc);
932         if (resultRelInfo->ri_TrigDesc)
933         {
934                 int                     n = resultRelInfo->ri_TrigDesc->numtriggers;
935
936                 resultRelInfo->ri_TrigFunctions = (FmgrInfo *)
937                         palloc0(n * sizeof(FmgrInfo));
938                 if (doInstrument)
939                         resultRelInfo->ri_TrigInstrument = InstrAlloc(n);
940                 else
941                         resultRelInfo->ri_TrigInstrument = NULL;
942         }
943         else
944         {
945                 resultRelInfo->ri_TrigFunctions = NULL;
946                 resultRelInfo->ri_TrigInstrument = NULL;
947         }
948         resultRelInfo->ri_ConstraintExprs = NULL;
949         resultRelInfo->ri_junkFilter = NULL;
950         resultRelInfo->ri_projectReturning = NULL;
951
952         /*
953          * If there are indices on the result relation, open them and save
954          * descriptors in the result relation info, so that we can add new index
955          * entries for the tuples we add/update.  We need not do this for a
956          * DELETE, however, since deletion doesn't affect indexes.
957          */
958         if (resultRelationDesc->rd_rel->relhasindex &&
959                 operation != CMD_DELETE)
960                 ExecOpenIndices(resultRelInfo);
961 }
962
963 /*
964  *              ExecGetTriggerResultRel
965  *
966  * Get a ResultRelInfo for a trigger target relation.  Most of the time,
967  * triggers are fired on one of the result relations of the query, and so
968  * we can just return a member of the es_result_relations array.  (Note: in
969  * self-join situations there might be multiple members with the same OID;
970  * if so it doesn't matter which one we pick.)  However, it is sometimes
971  * necessary to fire triggers on other relations; this happens mainly when an
972  * RI update trigger queues additional triggers on other relations, which will
973  * be processed in the context of the outer query.      For efficiency's sake,
974  * we want to have a ResultRelInfo for those triggers too; that can avoid
975  * repeated re-opening of the relation.  (It also provides a way for EXPLAIN
976  * ANALYZE to report the runtimes of such triggers.)  So we make additional
977  * ResultRelInfo's as needed, and save them in es_trig_target_relations.
978  */
979 ResultRelInfo *
980 ExecGetTriggerResultRel(EState *estate, Oid relid)
981 {
982         ResultRelInfo *rInfo;
983         int                     nr;
984         ListCell   *l;
985         Relation        rel;
986         MemoryContext oldcontext;
987
988         /* First, search through the query result relations */
989         rInfo = estate->es_result_relations;
990         nr = estate->es_num_result_relations;
991         while (nr > 0)
992         {
993                 if (RelationGetRelid(rInfo->ri_RelationDesc) == relid)
994                         return rInfo;
995                 rInfo++;
996                 nr--;
997         }
998         /* Nope, but maybe we already made an extra ResultRelInfo for it */
999         foreach(l, estate->es_trig_target_relations)
1000         {
1001                 rInfo = (ResultRelInfo *) lfirst(l);
1002                 if (RelationGetRelid(rInfo->ri_RelationDesc) == relid)
1003                         return rInfo;
1004         }
1005         /* Nope, so we need a new one */
1006
1007         /*
1008          * Open the target relation's relcache entry.  We assume that an
1009          * appropriate lock is still held by the backend from whenever the trigger
1010          * event got queued, so we need take no new lock here.
1011          */
1012         rel = heap_open(relid, NoLock);
1013
1014         /*
1015          * Make the new entry in the right context.  Currently, we don't need any
1016          * index information in ResultRelInfos used only for triggers, so tell
1017          * InitResultRelInfo it's a DELETE.
1018          */
1019         oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
1020         rInfo = makeNode(ResultRelInfo);
1021         InitResultRelInfo(rInfo,
1022                                           rel,
1023                                           0,            /* dummy rangetable index */
1024                                           CMD_DELETE,
1025                                           estate->es_instrument);
1026         estate->es_trig_target_relations =
1027                 lappend(estate->es_trig_target_relations, rInfo);
1028         MemoryContextSwitchTo(oldcontext);
1029
1030         return rInfo;
1031 }
1032
1033 /*
1034  *              ExecContextForcesOids
1035  *
1036  * This is pretty grotty: when doing INSERT, UPDATE, or SELECT INTO,
1037  * we need to ensure that result tuples have space for an OID iff they are
1038  * going to be stored into a relation that has OIDs.  In other contexts
1039  * we are free to choose whether to leave space for OIDs in result tuples
1040  * (we generally don't want to, but we do if a physical-tlist optimization
1041  * is possible).  This routine checks the plan context and returns TRUE if the
1042  * choice is forced, FALSE if the choice is not forced.  In the TRUE case,
1043  * *hasoids is set to the required value.
1044  *
1045  * One reason this is ugly is that all plan nodes in the plan tree will emit
1046  * tuples with space for an OID, though we really only need the topmost node
1047  * to do so.  However, node types like Sort don't project new tuples but just
1048  * return their inputs, and in those cases the requirement propagates down
1049  * to the input node.  Eventually we might make this code smart enough to
1050  * recognize how far down the requirement really goes, but for now we just
1051  * make all plan nodes do the same thing if the top level forces the choice.
1052  *
1053  * We assume that estate->es_result_relation_info is already set up to
1054  * describe the target relation.  Note that in an UPDATE that spans an
1055  * inheritance tree, some of the target relations may have OIDs and some not.
1056  * We have to make the decisions on a per-relation basis as we initialize
1057  * each of the child plans of the topmost Append plan.
1058  *
1059  * SELECT INTO is even uglier, because we don't have the INTO relation's
1060  * descriptor available when this code runs; we have to look aside at a
1061  * flag set by InitPlan().
1062  */
1063 bool
1064 ExecContextForcesOids(PlanState *planstate, bool *hasoids)
1065 {
1066         if (planstate->state->es_select_into)
1067         {
1068                 *hasoids = planstate->state->es_into_oids;
1069                 return true;
1070         }
1071         else
1072         {
1073                 ResultRelInfo *ri = planstate->state->es_result_relation_info;
1074
1075                 if (ri != NULL)
1076                 {
1077                         Relation        rel = ri->ri_RelationDesc;
1078
1079                         if (rel != NULL)
1080                         {
1081                                 *hasoids = rel->rd_rel->relhasoids;
1082                                 return true;
1083                         }
1084                 }
1085         }
1086
1087         return false;
1088 }
1089
1090 /* ----------------------------------------------------------------
1091  *              ExecEndPlan
1092  *
1093  *              Cleans up the query plan -- closes files and frees up storage
1094  *
1095  * NOTE: we are no longer very worried about freeing storage per se
1096  * in this code; FreeExecutorState should be guaranteed to release all
1097  * memory that needs to be released.  What we are worried about doing
1098  * is closing relations and dropping buffer pins.  Thus, for example,
1099  * tuple tables must be cleared or dropped to ensure pins are released.
1100  * ----------------------------------------------------------------
1101  */
1102 static void
1103 ExecEndPlan(PlanState *planstate, EState *estate)
1104 {
1105         ResultRelInfo *resultRelInfo;
1106         int                     i;
1107         ListCell   *l;
1108
1109         /*
1110          * shut down any PlanQual processing we were doing
1111          */
1112         if (estate->es_evalPlanQual != NULL)
1113                 EndEvalPlanQual(estate);
1114
1115         /*
1116          * shut down the node-type-specific query processing
1117          */
1118         ExecEndNode(planstate);
1119
1120         /*
1121          * for subplans too
1122          */
1123         foreach(l, estate->es_subplanstates)
1124         {
1125                 PlanState  *subplanstate = (PlanState *) lfirst(l);
1126
1127                 ExecEndNode(subplanstate);
1128         }
1129
1130         /*
1131          * destroy the executor "tuple" table.
1132          */
1133         ExecDropTupleTable(estate->es_tupleTable, true);
1134         estate->es_tupleTable = NULL;
1135
1136         /*
1137          * close the result relation(s) if any, but hold locks until xact commit.
1138          */
1139         resultRelInfo = estate->es_result_relations;
1140         for (i = estate->es_num_result_relations; i > 0; i--)
1141         {
1142                 /* Close indices and then the relation itself */
1143                 ExecCloseIndices(resultRelInfo);
1144                 heap_close(resultRelInfo->ri_RelationDesc, NoLock);
1145                 resultRelInfo++;
1146         }
1147
1148         /*
1149          * likewise close any trigger target relations
1150          */
1151         foreach(l, estate->es_trig_target_relations)
1152         {
1153                 resultRelInfo = (ResultRelInfo *) lfirst(l);
1154                 /* Close indices and then the relation itself */
1155                 ExecCloseIndices(resultRelInfo);
1156                 heap_close(resultRelInfo->ri_RelationDesc, NoLock);
1157         }
1158
1159         /*
1160          * close any relations selected FOR UPDATE/FOR SHARE, again keeping locks
1161          */
1162         foreach(l, estate->es_rowMarks)
1163         {
1164                 ExecRowMark *erm = lfirst(l);
1165
1166                 heap_close(erm->relation, NoLock);
1167         }
1168 }
1169
1170 /* ----------------------------------------------------------------
1171  *              ExecutePlan
1172  *
1173  *              processes the query plan to retrieve 'numberTuples' tuples in the
1174  *              direction specified.
1175  *
1176  *              Retrieves all tuples if numberTuples is 0
1177  *
1178  *              result is either a slot containing the last tuple in the case
1179  *              of a SELECT or NULL otherwise.
1180  *
1181  * Note: the ctid attribute is a 'junk' attribute that is removed before the
1182  * user can see it
1183  * ----------------------------------------------------------------
1184  */
1185 static TupleTableSlot *
1186 ExecutePlan(EState *estate,
1187                         PlanState *planstate,
1188                         CmdType operation,
1189                         long numberTuples,
1190                         ScanDirection direction,
1191                         DestReceiver *dest)
1192 {
1193         JunkFilter *junkfilter;
1194         TupleTableSlot *planSlot;
1195         TupleTableSlot *slot;
1196         ItemPointer tupleid = NULL;
1197         ItemPointerData tuple_ctid;
1198         long            current_tuple_count;
1199         TupleTableSlot *result;
1200
1201         /*
1202          * initialize local variables
1203          */
1204         current_tuple_count = 0;
1205         result = NULL;
1206
1207         /*
1208          * Set the direction.
1209          */
1210         estate->es_direction = direction;
1211
1212         /*
1213          * Process BEFORE EACH STATEMENT triggers
1214          */
1215         switch (operation)
1216         {
1217                 case CMD_UPDATE:
1218                         ExecBSUpdateTriggers(estate, estate->es_result_relation_info);
1219                         break;
1220                 case CMD_DELETE:
1221                         ExecBSDeleteTriggers(estate, estate->es_result_relation_info);
1222                         break;
1223                 case CMD_INSERT:
1224                         ExecBSInsertTriggers(estate, estate->es_result_relation_info);
1225                         break;
1226                 default:
1227                         /* do nothing */
1228                         break;
1229         }
1230
1231         /*
1232          * Loop until we've processed the proper number of tuples from the plan.
1233          */
1234
1235         for (;;)
1236         {
1237                 /* Reset the per-output-tuple exprcontext */
1238                 ResetPerTupleExprContext(estate);
1239
1240                 /*
1241                  * Execute the plan and obtain a tuple
1242                  */
1243 lnext:  ;
1244                 if (estate->es_useEvalPlan)
1245                 {
1246                         planSlot = EvalPlanQualNext(estate);
1247                         if (TupIsNull(planSlot))
1248                                 planSlot = ExecProcNode(planstate);
1249                 }
1250                 else
1251                         planSlot = ExecProcNode(planstate);
1252
1253                 /*
1254                  * if the tuple is null, then we assume there is nothing more to
1255                  * process so we just return null...
1256                  */
1257                 if (TupIsNull(planSlot))
1258                 {
1259                         result = NULL;
1260                         break;
1261                 }
1262                 slot = planSlot;
1263
1264                 /*
1265                  * If we have a junk filter, then project a new tuple with the junk
1266                  * removed.
1267                  *
1268                  * Store this new "clean" tuple in the junkfilter's resultSlot.
1269                  * (Formerly, we stored it back over the "dirty" tuple, which is WRONG
1270                  * because that tuple slot has the wrong descriptor.)
1271                  *
1272                  * But first, extract all the junk information we need.
1273                  */
1274                 if ((junkfilter = estate->es_junkFilter) != NULL)
1275                 {
1276                         /*
1277                          * Process any FOR UPDATE or FOR SHARE locking requested.
1278                          */
1279                         if (estate->es_rowMarks != NIL)
1280                         {
1281                                 ListCell   *l;
1282
1283                 lmark:  ;
1284                                 foreach(l, estate->es_rowMarks)
1285                                 {
1286                                         ExecRowMark *erm = lfirst(l);
1287                                         Datum           datum;
1288                                         bool            isNull;
1289                                         HeapTupleData tuple;
1290                                         Buffer          buffer;
1291                                         ItemPointerData update_ctid;
1292                                         TransactionId update_xmax;
1293                                         TupleTableSlot *newSlot;
1294                                         LockTupleMode lockmode;
1295                                         HTSU_Result test;
1296
1297                                         datum = ExecGetJunkAttribute(slot,
1298                                                                                                  erm->ctidAttNo,
1299                                                                                                  &isNull);
1300                                         /* shouldn't ever get a null result... */
1301                                         if (isNull)
1302                                                 elog(ERROR, "ctid is NULL");
1303
1304                                         tuple.t_self = *((ItemPointer) DatumGetPointer(datum));
1305
1306                                         if (erm->forUpdate)
1307                                                 lockmode = LockTupleExclusive;
1308                                         else
1309                                                 lockmode = LockTupleShared;
1310
1311                                         test = heap_lock_tuple(erm->relation, &tuple, &buffer,
1312                                                                                    &update_ctid, &update_xmax,
1313                                                                                    estate->es_output_cid,
1314                                                                                    lockmode, erm->noWait);
1315                                         ReleaseBuffer(buffer);
1316                                         switch (test)
1317                                         {
1318                                                 case HeapTupleSelfUpdated:
1319                                                         /* treat it as deleted; do not process */
1320                                                         goto lnext;
1321
1322                                                 case HeapTupleMayBeUpdated:
1323                                                         break;
1324
1325                                                 case HeapTupleUpdated:
1326                                                         if (IsXactIsoLevelSerializable)
1327                                                                 ereport(ERROR,
1328                                                                  (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1329                                                                   errmsg("could not serialize access due to concurrent update")));
1330                                                         if (!ItemPointerEquals(&update_ctid,
1331                                                                                                    &tuple.t_self))
1332                                                         {
1333                                                                 /* updated, so look at updated version */
1334                                                                 newSlot = EvalPlanQual(estate,
1335                                                                                                            erm->rti,
1336                                                                                                            &update_ctid,
1337                                                                                                            update_xmax);
1338                                                                 if (!TupIsNull(newSlot))
1339                                                                 {
1340                                                                         slot = planSlot = newSlot;
1341                                                                         estate->es_useEvalPlan = true;
1342                                                                         goto lmark;
1343                                                                 }
1344                                                         }
1345
1346                                                         /*
1347                                                          * if tuple was deleted or PlanQual failed for
1348                                                          * updated tuple - we must not return this tuple!
1349                                                          */
1350                                                         goto lnext;
1351
1352                                                 default:
1353                                                         elog(ERROR, "unrecognized heap_lock_tuple status: %u",
1354                                                                  test);
1355                                                         return NULL;
1356                                         }
1357                                 }
1358                         }
1359
1360                         /*
1361                          * extract the 'ctid' junk attribute.
1362                          */
1363                         if (operation == CMD_UPDATE || operation == CMD_DELETE)
1364                         {
1365                                 Datum           datum;
1366                                 bool            isNull;
1367
1368                                 datum = ExecGetJunkAttribute(slot, junkfilter->jf_junkAttNo,
1369                                                                                          &isNull);
1370                                 /* shouldn't ever get a null result... */
1371                                 if (isNull)
1372                                         elog(ERROR, "ctid is NULL");
1373
1374                                 tupleid = (ItemPointer) DatumGetPointer(datum);
1375                                 tuple_ctid = *tupleid;  /* make sure we don't free the ctid!! */
1376                                 tupleid = &tuple_ctid;
1377                         }
1378
1379                         /*
1380                          * Create a new "clean" tuple with all junk attributes removed. We
1381                          * don't need to do this for DELETE, however (there will in fact
1382                          * be no non-junk attributes in a DELETE!)
1383                          */
1384                         if (operation != CMD_DELETE)
1385                                 slot = ExecFilterJunk(junkfilter, slot);
1386                 }
1387
1388                 /*
1389                  * now that we have a tuple, do the appropriate thing with it.. either
1390                  * return it to the user, add it to a relation someplace, delete it
1391                  * from a relation, or modify some of its attributes.
1392                  */
1393                 switch (operation)
1394                 {
1395                         case CMD_SELECT:
1396                                 ExecSelect(slot, dest, estate);
1397                                 result = slot;
1398                                 break;
1399
1400                         case CMD_INSERT:
1401                                 ExecInsert(slot, tupleid, planSlot, dest, estate);
1402                                 result = NULL;
1403                                 break;
1404
1405                         case CMD_DELETE:
1406                                 ExecDelete(tupleid, planSlot, dest, estate);
1407                                 result = NULL;
1408                                 break;
1409
1410                         case CMD_UPDATE:
1411                                 ExecUpdate(slot, tupleid, planSlot, dest, estate);
1412                                 result = NULL;
1413                                 break;
1414
1415                         default:
1416                                 elog(ERROR, "unrecognized operation code: %d",
1417                                          (int) operation);
1418                                 result = NULL;
1419                                 break;
1420                 }
1421
1422                 /*
1423                  * check our tuple count.. if we've processed the proper number then
1424                  * quit, else loop again and process more tuples.  Zero numberTuples
1425                  * means no limit.
1426                  */
1427                 current_tuple_count++;
1428                 if (numberTuples && numberTuples == current_tuple_count)
1429                         break;
1430         }
1431
1432         /*
1433          * Process AFTER EACH STATEMENT triggers
1434          */
1435         switch (operation)
1436         {
1437                 case CMD_UPDATE:
1438                         ExecASUpdateTriggers(estate, estate->es_result_relation_info);
1439                         break;
1440                 case CMD_DELETE:
1441                         ExecASDeleteTriggers(estate, estate->es_result_relation_info);
1442                         break;
1443                 case CMD_INSERT:
1444                         ExecASInsertTriggers(estate, estate->es_result_relation_info);
1445                         break;
1446                 default:
1447                         /* do nothing */
1448                         break;
1449         }
1450
1451         /*
1452          * here, result is either a slot containing a tuple in the case of a
1453          * SELECT or NULL otherwise.
1454          */
1455         return result;
1456 }
1457
1458 /* ----------------------------------------------------------------
1459  *              ExecSelect
1460  *
1461  *              SELECTs are easy.. we just pass the tuple to the appropriate
1462  *              output function.
1463  * ----------------------------------------------------------------
1464  */
1465 static void
1466 ExecSelect(TupleTableSlot *slot,
1467                    DestReceiver *dest,
1468                    EState *estate)
1469 {
1470         (*dest->receiveSlot) (slot, dest);
1471         IncrRetrieved();
1472         (estate->es_processed)++;
1473 }
1474
1475 /* ----------------------------------------------------------------
1476  *              ExecInsert
1477  *
1478  *              INSERTs are trickier.. we have to insert the tuple into
1479  *              the base relation and insert appropriate tuples into the
1480  *              index relations.
1481  * ----------------------------------------------------------------
1482  */
1483 static void
1484 ExecInsert(TupleTableSlot *slot,
1485                    ItemPointer tupleid,
1486                    TupleTableSlot *planSlot,
1487                    DestReceiver *dest,
1488                    EState *estate)
1489 {
1490         HeapTuple       tuple;
1491         ResultRelInfo *resultRelInfo;
1492         Relation        resultRelationDesc;
1493         Oid                     newId;
1494
1495         /*
1496          * get the heap tuple out of the tuple table slot, making sure we have a
1497          * writable copy
1498          */
1499         tuple = ExecMaterializeSlot(slot);
1500
1501         /*
1502          * get information on the (current) result relation
1503          */
1504         resultRelInfo = estate->es_result_relation_info;
1505         resultRelationDesc = resultRelInfo->ri_RelationDesc;
1506
1507         /* BEFORE ROW INSERT Triggers */
1508         if (resultRelInfo->ri_TrigDesc &&
1509                 resultRelInfo->ri_TrigDesc->n_before_row[TRIGGER_EVENT_INSERT] > 0)
1510         {
1511                 HeapTuple       newtuple;
1512
1513                 newtuple = ExecBRInsertTriggers(estate, resultRelInfo, tuple);
1514
1515                 if (newtuple == NULL)   /* "do nothing" */
1516                         return;
1517
1518                 if (newtuple != tuple)  /* modified by Trigger(s) */
1519                 {
1520                         /*
1521                          * Put the modified tuple into a slot for convenience of routines
1522                          * below.  We assume the tuple was allocated in per-tuple memory
1523                          * context, and therefore will go away by itself. The tuple table
1524                          * slot should not try to clear it.
1525                          */
1526                         TupleTableSlot *newslot = estate->es_trig_tuple_slot;
1527
1528                         if (newslot->tts_tupleDescriptor != slot->tts_tupleDescriptor)
1529                                 ExecSetSlotDescriptor(newslot, slot->tts_tupleDescriptor);
1530                         ExecStoreTuple(newtuple, newslot, InvalidBuffer, false);
1531                         slot = newslot;
1532                         tuple = newtuple;
1533                 }
1534         }
1535
1536         /*
1537          * Check the constraints of the tuple
1538          */
1539         if (resultRelationDesc->rd_att->constr)
1540                 ExecConstraints(resultRelInfo, slot, estate);
1541
1542         /*
1543          * insert the tuple
1544          *
1545          * Note: heap_insert returns the tid (location) of the new tuple in the
1546          * t_self field.
1547          */
1548         newId = heap_insert(resultRelationDesc, tuple,
1549                                                 estate->es_output_cid,
1550                                                 true, true);
1551
1552         IncrAppended();
1553         (estate->es_processed)++;
1554         estate->es_lastoid = newId;
1555         setLastTid(&(tuple->t_self));
1556
1557         /*
1558          * insert index entries for tuple
1559          */
1560         if (resultRelInfo->ri_NumIndices > 0)
1561                 ExecInsertIndexTuples(slot, &(tuple->t_self), estate, false);
1562
1563         /* AFTER ROW INSERT Triggers */
1564         ExecARInsertTriggers(estate, resultRelInfo, tuple);
1565
1566         /* Process RETURNING if present */
1567         if (resultRelInfo->ri_projectReturning)
1568                 ExecProcessReturning(resultRelInfo->ri_projectReturning,
1569                                                          slot, planSlot, dest);
1570 }
1571
1572 /* ----------------------------------------------------------------
1573  *              ExecDelete
1574  *
1575  *              DELETE is like UPDATE, except that we delete the tuple and no
1576  *              index modifications are needed
1577  * ----------------------------------------------------------------
1578  */
1579 static void
1580 ExecDelete(ItemPointer tupleid,
1581                    TupleTableSlot *planSlot,
1582                    DestReceiver *dest,
1583                    EState *estate)
1584 {
1585         ResultRelInfo *resultRelInfo;
1586         Relation        resultRelationDesc;
1587         HTSU_Result result;
1588         ItemPointerData update_ctid;
1589         TransactionId update_xmax;
1590
1591         /*
1592          * get information on the (current) result relation
1593          */
1594         resultRelInfo = estate->es_result_relation_info;
1595         resultRelationDesc = resultRelInfo->ri_RelationDesc;
1596
1597         /* BEFORE ROW DELETE Triggers */
1598         if (resultRelInfo->ri_TrigDesc &&
1599                 resultRelInfo->ri_TrigDesc->n_before_row[TRIGGER_EVENT_DELETE] > 0)
1600         {
1601                 bool            dodelete;
1602
1603                 dodelete = ExecBRDeleteTriggers(estate, resultRelInfo, tupleid);
1604
1605                 if (!dodelete)                  /* "do nothing" */
1606                         return;
1607         }
1608
1609         /*
1610          * delete the tuple
1611          *
1612          * Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check that
1613          * the row to be deleted is visible to that snapshot, and throw a can't-
1614          * serialize error if not.      This is a special-case behavior needed for
1615          * referential integrity updates in serializable transactions.
1616          */
1617 ldelete:;
1618         result = heap_delete(resultRelationDesc, tupleid,
1619                                                  &update_ctid, &update_xmax,
1620                                                  estate->es_output_cid,
1621                                                  estate->es_crosscheck_snapshot,
1622                                                  true /* wait for commit */ );
1623         switch (result)
1624         {
1625                 case HeapTupleSelfUpdated:
1626                         /* already deleted by self; nothing to do */
1627                         return;
1628
1629                 case HeapTupleMayBeUpdated:
1630                         break;
1631
1632                 case HeapTupleUpdated:
1633                         if (IsXactIsoLevelSerializable)
1634                                 ereport(ERROR,
1635                                                 (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1636                                                  errmsg("could not serialize access due to concurrent update")));
1637                         else if (!ItemPointerEquals(tupleid, &update_ctid))
1638                         {
1639                                 TupleTableSlot *epqslot;
1640
1641                                 epqslot = EvalPlanQual(estate,
1642                                                                            resultRelInfo->ri_RangeTableIndex,
1643                                                                            &update_ctid,
1644                                                                            update_xmax);
1645                                 if (!TupIsNull(epqslot))
1646                                 {
1647                                         *tupleid = update_ctid;
1648                                         goto ldelete;
1649                                 }
1650                         }
1651                         /* tuple already deleted; nothing to do */
1652                         return;
1653
1654                 default:
1655                         elog(ERROR, "unrecognized heap_delete status: %u", result);
1656                         return;
1657         }
1658
1659         IncrDeleted();
1660         (estate->es_processed)++;
1661
1662         /*
1663          * Note: Normally one would think that we have to delete index tuples
1664          * associated with the heap tuple now...
1665          *
1666          * ... but in POSTGRES, we have no need to do this because VACUUM will
1667          * take care of it later.  We can't delete index tuples immediately
1668          * anyway, since the tuple is still visible to other transactions.
1669          */
1670
1671         /* AFTER ROW DELETE Triggers */
1672         ExecARDeleteTriggers(estate, resultRelInfo, tupleid);
1673
1674         /* Process RETURNING if present */
1675         if (resultRelInfo->ri_projectReturning)
1676         {
1677                 /*
1678                  * We have to put the target tuple into a slot, which means first we
1679                  * gotta fetch it.      We can use the trigger tuple slot.
1680                  */
1681                 TupleTableSlot *slot = estate->es_trig_tuple_slot;
1682                 HeapTupleData deltuple;
1683                 Buffer          delbuffer;
1684
1685                 deltuple.t_self = *tupleid;
1686                 if (!heap_fetch(resultRelationDesc, SnapshotAny,
1687                                                 &deltuple, &delbuffer, false, NULL))
1688                         elog(ERROR, "failed to fetch deleted tuple for DELETE RETURNING");
1689
1690                 if (slot->tts_tupleDescriptor != RelationGetDescr(resultRelationDesc))
1691                         ExecSetSlotDescriptor(slot, RelationGetDescr(resultRelationDesc));
1692                 ExecStoreTuple(&deltuple, slot, InvalidBuffer, false);
1693
1694                 ExecProcessReturning(resultRelInfo->ri_projectReturning,
1695                                                          slot, planSlot, dest);
1696
1697                 ExecClearTuple(slot);
1698                 ReleaseBuffer(delbuffer);
1699         }
1700 }
1701
1702 /* ----------------------------------------------------------------
1703  *              ExecUpdate
1704  *
1705  *              note: we can't run UPDATE queries with transactions
1706  *              off because UPDATEs are actually INSERTs and our
1707  *              scan will mistakenly loop forever, updating the tuple
1708  *              it just inserted..      This should be fixed but until it
1709  *              is, we don't want to get stuck in an infinite loop
1710  *              which corrupts your database..
1711  * ----------------------------------------------------------------
1712  */
1713 static void
1714 ExecUpdate(TupleTableSlot *slot,
1715                    ItemPointer tupleid,
1716                    TupleTableSlot *planSlot,
1717                    DestReceiver *dest,
1718                    EState *estate)
1719 {
1720         HeapTuple       tuple;
1721         ResultRelInfo *resultRelInfo;
1722         Relation        resultRelationDesc;
1723         HTSU_Result result;
1724         ItemPointerData update_ctid;
1725         TransactionId update_xmax;
1726
1727         /*
1728          * abort the operation if not running transactions
1729          */
1730         if (IsBootstrapProcessingMode())
1731                 elog(ERROR, "cannot UPDATE during bootstrap");
1732
1733         /*
1734          * get the heap tuple out of the tuple table slot, making sure we have a
1735          * writable copy
1736          */
1737         tuple = ExecMaterializeSlot(slot);
1738
1739         /*
1740          * get information on the (current) result relation
1741          */
1742         resultRelInfo = estate->es_result_relation_info;
1743         resultRelationDesc = resultRelInfo->ri_RelationDesc;
1744
1745         /* BEFORE ROW UPDATE Triggers */
1746         if (resultRelInfo->ri_TrigDesc &&
1747                 resultRelInfo->ri_TrigDesc->n_before_row[TRIGGER_EVENT_UPDATE] > 0)
1748         {
1749                 HeapTuple       newtuple;
1750
1751                 newtuple = ExecBRUpdateTriggers(estate, resultRelInfo,
1752                                                                                 tupleid, tuple);
1753
1754                 if (newtuple == NULL)   /* "do nothing" */
1755                         return;
1756
1757                 if (newtuple != tuple)  /* modified by Trigger(s) */
1758                 {
1759                         /*
1760                          * Put the modified tuple into a slot for convenience of routines
1761                          * below.  We assume the tuple was allocated in per-tuple memory
1762                          * context, and therefore will go away by itself. The tuple table
1763                          * slot should not try to clear it.
1764                          */
1765                         TupleTableSlot *newslot = estate->es_trig_tuple_slot;
1766
1767                         if (newslot->tts_tupleDescriptor != slot->tts_tupleDescriptor)
1768                                 ExecSetSlotDescriptor(newslot, slot->tts_tupleDescriptor);
1769                         ExecStoreTuple(newtuple, newslot, InvalidBuffer, false);
1770                         slot = newslot;
1771                         tuple = newtuple;
1772                 }
1773         }
1774
1775         /*
1776          * Check the constraints of the tuple
1777          *
1778          * If we generate a new candidate tuple after EvalPlanQual testing, we
1779          * must loop back here and recheck constraints.  (We don't need to redo
1780          * triggers, however.  If there are any BEFORE triggers then trigger.c
1781          * will have done heap_lock_tuple to lock the correct tuple, so there's no
1782          * need to do them again.)
1783          */
1784 lreplace:;
1785         if (resultRelationDesc->rd_att->constr)
1786                 ExecConstraints(resultRelInfo, slot, estate);
1787
1788         /*
1789          * replace the heap tuple
1790          *
1791          * Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check that
1792          * the row to be updated is visible to that snapshot, and throw a can't-
1793          * serialize error if not.      This is a special-case behavior needed for
1794          * referential integrity updates in serializable transactions.
1795          */
1796         result = heap_update(resultRelationDesc, tupleid, tuple,
1797                                                  &update_ctid, &update_xmax,
1798                                                  estate->es_output_cid,
1799                                                  estate->es_crosscheck_snapshot,
1800                                                  true /* wait for commit */ );
1801         switch (result)
1802         {
1803                 case HeapTupleSelfUpdated:
1804                         /* already deleted by self; nothing to do */
1805                         return;
1806
1807                 case HeapTupleMayBeUpdated:
1808                         break;
1809
1810                 case HeapTupleUpdated:
1811                         if (IsXactIsoLevelSerializable)
1812                                 ereport(ERROR,
1813                                                 (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1814                                                  errmsg("could not serialize access due to concurrent update")));
1815                         else if (!ItemPointerEquals(tupleid, &update_ctid))
1816                         {
1817                                 TupleTableSlot *epqslot;
1818
1819                                 epqslot = EvalPlanQual(estate,
1820                                                                            resultRelInfo->ri_RangeTableIndex,
1821                                                                            &update_ctid,
1822                                                                            update_xmax);
1823                                 if (!TupIsNull(epqslot))
1824                                 {
1825                                         *tupleid = update_ctid;
1826                                         slot = ExecFilterJunk(estate->es_junkFilter, epqslot);
1827                                         tuple = ExecMaterializeSlot(slot);
1828                                         goto lreplace;
1829                                 }
1830                         }
1831                         /* tuple already deleted; nothing to do */
1832                         return;
1833
1834                 default:
1835                         elog(ERROR, "unrecognized heap_update status: %u", result);
1836                         return;
1837         }
1838
1839         IncrReplaced();
1840         (estate->es_processed)++;
1841
1842         /*
1843          * Note: instead of having to update the old index tuples associated with
1844          * the heap tuple, all we do is form and insert new index tuples. This is
1845          * because UPDATEs are actually DELETEs and INSERTs, and index tuple
1846          * deletion is done later by VACUUM (see notes in ExecDelete).  All we do
1847          * here is insert new index tuples.  -cim 9/27/89
1848          */
1849
1850         /*
1851          * insert index entries for tuple
1852          *
1853          * Note: heap_update returns the tid (location) of the new tuple in the
1854          * t_self field.
1855          *
1856          * If it's a HOT update, we mustn't insert new index entries.
1857          */
1858         if (resultRelInfo->ri_NumIndices > 0 && !HeapTupleIsHeapOnly(tuple))
1859                 ExecInsertIndexTuples(slot, &(tuple->t_self), estate, false);
1860
1861         /* AFTER ROW UPDATE Triggers */
1862         ExecARUpdateTriggers(estate, resultRelInfo, tupleid, tuple);
1863
1864         /* Process RETURNING if present */
1865         if (resultRelInfo->ri_projectReturning)
1866                 ExecProcessReturning(resultRelInfo->ri_projectReturning,
1867                                                          slot, planSlot, dest);
1868 }
1869
1870 /*
1871  * ExecRelCheck --- check that tuple meets constraints for result relation
1872  */
1873 static const char *
1874 ExecRelCheck(ResultRelInfo *resultRelInfo,
1875                          TupleTableSlot *slot, EState *estate)
1876 {
1877         Relation        rel = resultRelInfo->ri_RelationDesc;
1878         int                     ncheck = rel->rd_att->constr->num_check;
1879         ConstrCheck *check = rel->rd_att->constr->check;
1880         ExprContext *econtext;
1881         MemoryContext oldContext;
1882         List       *qual;
1883         int                     i;
1884
1885         /*
1886          * If first time through for this result relation, build expression
1887          * nodetrees for rel's constraint expressions.  Keep them in the per-query
1888          * memory context so they'll survive throughout the query.
1889          */
1890         if (resultRelInfo->ri_ConstraintExprs == NULL)
1891         {
1892                 oldContext = MemoryContextSwitchTo(estate->es_query_cxt);
1893                 resultRelInfo->ri_ConstraintExprs =
1894                         (List **) palloc(ncheck * sizeof(List *));
1895                 for (i = 0; i < ncheck; i++)
1896                 {
1897                         /* ExecQual wants implicit-AND form */
1898                         qual = make_ands_implicit(stringToNode(check[i].ccbin));
1899                         resultRelInfo->ri_ConstraintExprs[i] = (List *)
1900                                 ExecPrepareExpr((Expr *) qual, estate);
1901                 }
1902                 MemoryContextSwitchTo(oldContext);
1903         }
1904
1905         /*
1906          * We will use the EState's per-tuple context for evaluating constraint
1907          * expressions (creating it if it's not already there).
1908          */
1909         econtext = GetPerTupleExprContext(estate);
1910
1911         /* Arrange for econtext's scan tuple to be the tuple under test */
1912         econtext->ecxt_scantuple = slot;
1913
1914         /* And evaluate the constraints */
1915         for (i = 0; i < ncheck; i++)
1916         {
1917                 qual = resultRelInfo->ri_ConstraintExprs[i];
1918
1919                 /*
1920                  * NOTE: SQL92 specifies that a NULL result from a constraint
1921                  * expression is not to be treated as a failure.  Therefore, tell
1922                  * ExecQual to return TRUE for NULL.
1923                  */
1924                 if (!ExecQual(qual, econtext, true))
1925                         return check[i].ccname;
1926         }
1927
1928         /* NULL result means no error */
1929         return NULL;
1930 }
1931
1932 void
1933 ExecConstraints(ResultRelInfo *resultRelInfo,
1934                                 TupleTableSlot *slot, EState *estate)
1935 {
1936         Relation        rel = resultRelInfo->ri_RelationDesc;
1937         TupleConstr *constr = rel->rd_att->constr;
1938
1939         Assert(constr);
1940
1941         if (constr->has_not_null)
1942         {
1943                 int                     natts = rel->rd_att->natts;
1944                 int                     attrChk;
1945
1946                 for (attrChk = 1; attrChk <= natts; attrChk++)
1947                 {
1948                         if (rel->rd_att->attrs[attrChk - 1]->attnotnull &&
1949                                 slot_attisnull(slot, attrChk))
1950                                 ereport(ERROR,
1951                                                 (errcode(ERRCODE_NOT_NULL_VIOLATION),
1952                                                  errmsg("null value in column \"%s\" violates not-null constraint",
1953                                                 NameStr(rel->rd_att->attrs[attrChk - 1]->attname))));
1954                 }
1955         }
1956
1957         if (constr->num_check > 0)
1958         {
1959                 const char *failed;
1960
1961                 if ((failed = ExecRelCheck(resultRelInfo, slot, estate)) != NULL)
1962                         ereport(ERROR,
1963                                         (errcode(ERRCODE_CHECK_VIOLATION),
1964                                          errmsg("new row for relation \"%s\" violates check constraint \"%s\"",
1965                                                         RelationGetRelationName(rel), failed)));
1966         }
1967 }
1968
1969 /*
1970  * ExecProcessReturning --- evaluate a RETURNING list and send to dest
1971  *
1972  * projectReturning: RETURNING projection info for current result rel
1973  * tupleSlot: slot holding tuple actually inserted/updated/deleted
1974  * planSlot: slot holding tuple returned by top plan node
1975  * dest: where to send the output
1976  */
1977 static void
1978 ExecProcessReturning(ProjectionInfo *projectReturning,
1979                                          TupleTableSlot *tupleSlot,
1980                                          TupleTableSlot *planSlot,
1981                                          DestReceiver *dest)
1982 {
1983         ExprContext *econtext = projectReturning->pi_exprContext;
1984         TupleTableSlot *retSlot;
1985
1986         /*
1987          * Reset per-tuple memory context to free any expression evaluation
1988          * storage allocated in the previous cycle.
1989          */
1990         ResetExprContext(econtext);
1991
1992         /* Make tuple and any needed join variables available to ExecProject */
1993         econtext->ecxt_scantuple = tupleSlot;
1994         econtext->ecxt_outertuple = planSlot;
1995
1996         /* Compute the RETURNING expressions */
1997         retSlot = ExecProject(projectReturning, NULL);
1998
1999         /* Send to dest */
2000         (*dest->receiveSlot) (retSlot, dest);
2001
2002         ExecClearTuple(retSlot);
2003 }
2004
2005 /*
2006  * Check a modified tuple to see if we want to process its updated version
2007  * under READ COMMITTED rules.
2008  *
2009  * See backend/executor/README for some info about how this works.
2010  *
2011  *      estate - executor state data
2012  *      rti - rangetable index of table containing tuple
2013  *      *tid - t_ctid from the outdated tuple (ie, next updated version)
2014  *      priorXmax - t_xmax from the outdated tuple
2015  *
2016  * *tid is also an output parameter: it's modified to hold the TID of the
2017  * latest version of the tuple (note this may be changed even on failure)
2018  *
2019  * Returns a slot containing the new candidate update/delete tuple, or
2020  * NULL if we determine we shouldn't process the row.
2021  */
2022 TupleTableSlot *
2023 EvalPlanQual(EState *estate, Index rti,
2024                          ItemPointer tid, TransactionId priorXmax)
2025 {
2026         evalPlanQual *epq;
2027         EState     *epqstate;
2028         Relation        relation;
2029         HeapTupleData tuple;
2030         HeapTuple       copyTuple = NULL;
2031         SnapshotData SnapshotDirty;
2032         bool            endNode;
2033
2034         Assert(rti != 0);
2035
2036         /*
2037          * find relation containing target tuple
2038          */
2039         if (estate->es_result_relation_info != NULL &&
2040                 estate->es_result_relation_info->ri_RangeTableIndex == rti)
2041                 relation = estate->es_result_relation_info->ri_RelationDesc;
2042         else
2043         {
2044                 ListCell   *l;
2045
2046                 relation = NULL;
2047                 foreach(l, estate->es_rowMarks)
2048                 {
2049                         if (((ExecRowMark *) lfirst(l))->rti == rti)
2050                         {
2051                                 relation = ((ExecRowMark *) lfirst(l))->relation;
2052                                 break;
2053                         }
2054                 }
2055                 if (relation == NULL)
2056                         elog(ERROR, "could not find RowMark for RT index %u", rti);
2057         }
2058
2059         /*
2060          * fetch tid tuple
2061          *
2062          * Loop here to deal with updated or busy tuples
2063          */
2064         InitDirtySnapshot(SnapshotDirty);
2065         tuple.t_self = *tid;
2066         for (;;)
2067         {
2068                 Buffer          buffer;
2069
2070                 if (heap_fetch(relation, &SnapshotDirty, &tuple, &buffer, true, NULL))
2071                 {
2072                         /*
2073                          * If xmin isn't what we're expecting, the slot must have been
2074                          * recycled and reused for an unrelated tuple.  This implies that
2075                          * the latest version of the row was deleted, so we need do
2076                          * nothing.  (Should be safe to examine xmin without getting
2077                          * buffer's content lock, since xmin never changes in an existing
2078                          * tuple.)
2079                          */
2080                         if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data),
2081                                                                          priorXmax))
2082                         {
2083                                 ReleaseBuffer(buffer);
2084                                 return NULL;
2085                         }
2086
2087                         /* otherwise xmin should not be dirty... */
2088                         if (TransactionIdIsValid(SnapshotDirty.xmin))
2089                                 elog(ERROR, "t_xmin is uncommitted in tuple to be updated");
2090
2091                         /*
2092                          * If tuple is being updated by other transaction then we have to
2093                          * wait for its commit/abort.
2094                          */
2095                         if (TransactionIdIsValid(SnapshotDirty.xmax))
2096                         {
2097                                 ReleaseBuffer(buffer);
2098                                 XactLockTableWait(SnapshotDirty.xmax);
2099                                 continue;               /* loop back to repeat heap_fetch */
2100                         }
2101
2102                         /*
2103                          * If tuple was inserted by our own transaction, we have to check
2104                          * cmin against es_output_cid: cmin >= current CID means our
2105                          * command cannot see the tuple, so we should ignore it.  Without
2106                          * this we are open to the "Halloween problem" of indefinitely
2107                          * re-updating the same tuple. (We need not check cmax because
2108                          * HeapTupleSatisfiesDirty will consider a tuple deleted by our
2109                          * transaction dead, regardless of cmax.)  We just checked that
2110                          * priorXmax == xmin, so we can test that variable instead of
2111                          * doing HeapTupleHeaderGetXmin again.
2112                          */
2113                         if (TransactionIdIsCurrentTransactionId(priorXmax) &&
2114                                 HeapTupleHeaderGetCmin(tuple.t_data) >= estate->es_output_cid)
2115                         {
2116                                 ReleaseBuffer(buffer);
2117                                 return NULL;
2118                         }
2119
2120                         /*
2121                          * We got tuple - now copy it for use by recheck query.
2122                          */
2123                         copyTuple = heap_copytuple(&tuple);
2124                         ReleaseBuffer(buffer);
2125                         break;
2126                 }
2127
2128                 /*
2129                  * If the referenced slot was actually empty, the latest version of
2130                  * the row must have been deleted, so we need do nothing.
2131                  */
2132                 if (tuple.t_data == NULL)
2133                 {
2134                         ReleaseBuffer(buffer);
2135                         return NULL;
2136                 }
2137
2138                 /*
2139                  * As above, if xmin isn't what we're expecting, do nothing.
2140                  */
2141                 if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data),
2142                                                                  priorXmax))
2143                 {
2144                         ReleaseBuffer(buffer);
2145                         return NULL;
2146                 }
2147
2148                 /*
2149                  * If we get here, the tuple was found but failed SnapshotDirty.
2150                  * Assuming the xmin is either a committed xact or our own xact (as it
2151                  * certainly should be if we're trying to modify the tuple), this must
2152                  * mean that the row was updated or deleted by either a committed xact
2153                  * or our own xact.  If it was deleted, we can ignore it; if it was
2154                  * updated then chain up to the next version and repeat the whole
2155                  * test.
2156                  *
2157                  * As above, it should be safe to examine xmax and t_ctid without the
2158                  * buffer content lock, because they can't be changing.
2159                  */
2160                 if (ItemPointerEquals(&tuple.t_self, &tuple.t_data->t_ctid))
2161                 {
2162                         /* deleted, so forget about it */
2163                         ReleaseBuffer(buffer);
2164                         return NULL;
2165                 }
2166
2167                 /* updated, so look at the updated row */
2168                 tuple.t_self = tuple.t_data->t_ctid;
2169                 /* updated row should have xmin matching this xmax */
2170                 priorXmax = HeapTupleHeaderGetXmax(tuple.t_data);
2171                 ReleaseBuffer(buffer);
2172                 /* loop back to fetch next in chain */
2173         }
2174
2175         /*
2176          * For UPDATE/DELETE we have to return tid of actual row we're executing
2177          * PQ for.
2178          */
2179         *tid = tuple.t_self;
2180
2181         /*
2182          * Need to run a recheck subquery.      Find or create a PQ stack entry.
2183          */
2184         epq = estate->es_evalPlanQual;
2185         endNode = true;
2186
2187         if (epq != NULL && epq->rti == 0)
2188         {
2189                 /* Top PQ stack entry is idle, so re-use it */
2190                 Assert(!(estate->es_useEvalPlan) && epq->next == NULL);
2191                 epq->rti = rti;
2192                 endNode = false;
2193         }
2194
2195         /*
2196          * If this is request for another RTE - Ra, - then we have to check wasn't
2197          * PlanQual requested for Ra already and if so then Ra' row was updated
2198          * again and we have to re-start old execution for Ra and forget all what
2199          * we done after Ra was suspended. Cool? -:))
2200          */
2201         if (epq != NULL && epq->rti != rti &&
2202                 epq->estate->es_evTuple[rti - 1] != NULL)
2203         {
2204                 do
2205                 {
2206                         evalPlanQual *oldepq;
2207
2208                         /* stop execution */
2209                         EvalPlanQualStop(epq);
2210                         /* pop previous PlanQual from the stack */
2211                         oldepq = epq->next;
2212                         Assert(oldepq && oldepq->rti != 0);
2213                         /* push current PQ to freePQ stack */
2214                         oldepq->free = epq;
2215                         epq = oldepq;
2216                         estate->es_evalPlanQual = epq;
2217                 } while (epq->rti != rti);
2218         }
2219
2220         /*
2221          * If we are requested for another RTE then we have to suspend execution
2222          * of current PlanQual and start execution for new one.
2223          */
2224         if (epq == NULL || epq->rti != rti)
2225         {
2226                 /* try to reuse plan used previously */
2227                 evalPlanQual *newepq = (epq != NULL) ? epq->free : NULL;
2228
2229                 if (newepq == NULL)             /* first call or freePQ stack is empty */
2230                 {
2231                         newepq = (evalPlanQual *) palloc0(sizeof(evalPlanQual));
2232                         newepq->free = NULL;
2233                         newepq->estate = NULL;
2234                         newepq->planstate = NULL;
2235                 }
2236                 else
2237                 {
2238                         /* recycle previously used PlanQual */
2239                         Assert(newepq->estate == NULL);
2240                         epq->free = NULL;
2241                 }
2242                 /* push current PQ to the stack */
2243                 newepq->next = epq;
2244                 epq = newepq;
2245                 estate->es_evalPlanQual = epq;
2246                 epq->rti = rti;
2247                 endNode = false;
2248         }
2249
2250         Assert(epq->rti == rti);
2251
2252         /*
2253          * Ok - we're requested for the same RTE.  Unfortunately we still have to
2254          * end and restart execution of the plan, because ExecReScan wouldn't
2255          * ensure that upper plan nodes would reset themselves.  We could make
2256          * that work if insertion of the target tuple were integrated with the
2257          * Param mechanism somehow, so that the upper plan nodes know that their
2258          * children's outputs have changed.
2259          *
2260          * Note that the stack of free evalPlanQual nodes is quite useless at the
2261          * moment, since it only saves us from pallocing/releasing the
2262          * evalPlanQual nodes themselves.  But it will be useful once we implement
2263          * ReScan instead of end/restart for re-using PlanQual nodes.
2264          */
2265         if (endNode)
2266         {
2267                 /* stop execution */
2268                 EvalPlanQualStop(epq);
2269         }
2270
2271         /*
2272          * Initialize new recheck query.
2273          *
2274          * Note: if we were re-using PlanQual plans via ExecReScan, we'd need to
2275          * instead copy down changeable state from the top plan (including
2276          * es_result_relation_info, es_junkFilter) and reset locally changeable
2277          * state in the epq (including es_param_exec_vals, es_evTupleNull).
2278          */
2279         EvalPlanQualStart(epq, estate, epq->next);
2280
2281         /*
2282          * free old RTE' tuple, if any, and store target tuple where relation's
2283          * scan node will see it
2284          */
2285         epqstate = epq->estate;
2286         if (epqstate->es_evTuple[rti - 1] != NULL)
2287                 heap_freetuple(epqstate->es_evTuple[rti - 1]);
2288         epqstate->es_evTuple[rti - 1] = copyTuple;
2289
2290         return EvalPlanQualNext(estate);
2291 }
2292
2293 static TupleTableSlot *
2294 EvalPlanQualNext(EState *estate)
2295 {
2296         evalPlanQual *epq = estate->es_evalPlanQual;
2297         MemoryContext oldcontext;
2298         TupleTableSlot *slot;
2299
2300         Assert(epq->rti != 0);
2301
2302 lpqnext:;
2303         oldcontext = MemoryContextSwitchTo(epq->estate->es_query_cxt);
2304         slot = ExecProcNode(epq->planstate);
2305         MemoryContextSwitchTo(oldcontext);
2306
2307         /*
2308          * No more tuples for this PQ. Continue previous one.
2309          */
2310         if (TupIsNull(slot))
2311         {
2312                 evalPlanQual *oldepq;
2313
2314                 /* stop execution */
2315                 EvalPlanQualStop(epq);
2316                 /* pop old PQ from the stack */
2317                 oldepq = epq->next;
2318                 if (oldepq == NULL)
2319                 {
2320                         /* this is the first (oldest) PQ - mark as free */
2321                         epq->rti = 0;
2322                         estate->es_useEvalPlan = false;
2323                         /* and continue Query execution */
2324                         return NULL;
2325                 }
2326                 Assert(oldepq->rti != 0);
2327                 /* push current PQ to freePQ stack */
2328                 oldepq->free = epq;
2329                 epq = oldepq;
2330                 estate->es_evalPlanQual = epq;
2331                 goto lpqnext;
2332         }
2333
2334         return slot;
2335 }
2336
2337 static void
2338 EndEvalPlanQual(EState *estate)
2339 {
2340         evalPlanQual *epq = estate->es_evalPlanQual;
2341
2342         if (epq->rti == 0)                      /* plans already shutdowned */
2343         {
2344                 Assert(epq->next == NULL);
2345                 return;
2346         }
2347
2348         for (;;)
2349         {
2350                 evalPlanQual *oldepq;
2351
2352                 /* stop execution */
2353                 EvalPlanQualStop(epq);
2354                 /* pop old PQ from the stack */
2355                 oldepq = epq->next;
2356                 if (oldepq == NULL)
2357                 {
2358                         /* this is the first (oldest) PQ - mark as free */
2359                         epq->rti = 0;
2360                         estate->es_useEvalPlan = false;
2361                         break;
2362                 }
2363                 Assert(oldepq->rti != 0);
2364                 /* push current PQ to freePQ stack */
2365                 oldepq->free = epq;
2366                 epq = oldepq;
2367                 estate->es_evalPlanQual = epq;
2368         }
2369 }
2370
2371 /*
2372  * Start execution of one level of PlanQual.
2373  *
2374  * This is a cut-down version of ExecutorStart(): we copy some state from
2375  * the top-level estate rather than initializing it fresh.
2376  */
2377 static void
2378 EvalPlanQualStart(evalPlanQual *epq, EState *estate, evalPlanQual *priorepq)
2379 {
2380         EState     *epqstate;
2381         int                     rtsize;
2382         MemoryContext oldcontext;
2383         ListCell   *l;
2384
2385         rtsize = list_length(estate->es_range_table);
2386
2387         epq->estate = epqstate = CreateExecutorState();
2388
2389         oldcontext = MemoryContextSwitchTo(epqstate->es_query_cxt);
2390
2391         /*
2392          * The epqstates share the top query's copy of unchanging state such as
2393          * the snapshot, rangetable, result-rel info, and external Param info.
2394          * They need their own copies of local state, including a tuple table,
2395          * es_param_exec_vals, etc.
2396          */
2397         epqstate->es_direction = ForwardScanDirection;
2398         epqstate->es_snapshot = estate->es_snapshot;
2399         epqstate->es_crosscheck_snapshot = estate->es_crosscheck_snapshot;
2400         epqstate->es_range_table = estate->es_range_table;
2401         epqstate->es_output_cid = estate->es_output_cid;
2402         epqstate->es_result_relations = estate->es_result_relations;
2403         epqstate->es_num_result_relations = estate->es_num_result_relations;
2404         epqstate->es_result_relation_info = estate->es_result_relation_info;
2405         epqstate->es_junkFilter = estate->es_junkFilter;
2406         /* es_trig_target_relations must NOT be copied */
2407         epqstate->es_into_relation_descriptor = estate->es_into_relation_descriptor;
2408         epqstate->es_into_relation_use_wal = estate->es_into_relation_use_wal;
2409         epqstate->es_param_list_info = estate->es_param_list_info;
2410         if (estate->es_plannedstmt->nParamExec > 0)
2411                 epqstate->es_param_exec_vals = (ParamExecData *)
2412                         palloc0(estate->es_plannedstmt->nParamExec * sizeof(ParamExecData));
2413         epqstate->es_rowMarks = estate->es_rowMarks;
2414         epqstate->es_instrument = estate->es_instrument;
2415         epqstate->es_select_into = estate->es_select_into;
2416         epqstate->es_into_oids = estate->es_into_oids;
2417         epqstate->es_plannedstmt = estate->es_plannedstmt;
2418
2419         /*
2420          * Each epqstate must have its own es_evTupleNull state, but all the stack
2421          * entries share es_evTuple state.      This allows sub-rechecks to inherit
2422          * the value being examined by an outer recheck.
2423          */
2424         epqstate->es_evTupleNull = (bool *) palloc0(rtsize * sizeof(bool));
2425         if (priorepq == NULL)
2426                 /* first PQ stack entry */
2427                 epqstate->es_evTuple = (HeapTuple *)
2428                         palloc0(rtsize * sizeof(HeapTuple));
2429         else
2430                 /* later stack entries share the same storage */
2431                 epqstate->es_evTuple = priorepq->estate->es_evTuple;
2432
2433         /*
2434          * Create sub-tuple-table; we needn't redo the CountSlots work though.
2435          */
2436         epqstate->es_tupleTable =
2437                 ExecCreateTupleTable(estate->es_tupleTable->size);
2438
2439         /*
2440          * Initialize private state information for each SubPlan.  We must do this
2441          * before running ExecInitNode on the main query tree, since
2442          * ExecInitSubPlan expects to be able to find these entries.
2443          */
2444         Assert(epqstate->es_subplanstates == NIL);
2445         foreach(l, estate->es_plannedstmt->subplans)
2446         {
2447                 Plan       *subplan = (Plan *) lfirst(l);
2448                 PlanState  *subplanstate;
2449
2450                 subplanstate = ExecInitNode(subplan, epqstate, 0);
2451
2452                 epqstate->es_subplanstates = lappend(epqstate->es_subplanstates,
2453                                                                                          subplanstate);
2454         }
2455
2456         /*
2457          * Initialize the private state information for all the nodes in the query
2458          * tree.  This opens files, allocates storage and leaves us ready to start
2459          * processing tuples.
2460          */
2461         epq->planstate = ExecInitNode(estate->es_plannedstmt->planTree, epqstate, 0);
2462
2463         MemoryContextSwitchTo(oldcontext);
2464 }
2465
2466 /*
2467  * End execution of one level of PlanQual.
2468  *
2469  * This is a cut-down version of ExecutorEnd(); basically we want to do most
2470  * of the normal cleanup, but *not* close result relations (which we are
2471  * just sharing from the outer query).  We do, however, have to close any
2472  * trigger target relations that got opened, since those are not shared.
2473  */
2474 static void
2475 EvalPlanQualStop(evalPlanQual *epq)
2476 {
2477         EState     *epqstate = epq->estate;
2478         MemoryContext oldcontext;
2479         ListCell   *l;
2480
2481         oldcontext = MemoryContextSwitchTo(epqstate->es_query_cxt);
2482
2483         ExecEndNode(epq->planstate);
2484
2485         foreach(l, epqstate->es_subplanstates)
2486         {
2487                 PlanState  *subplanstate = (PlanState *) lfirst(l);
2488
2489                 ExecEndNode(subplanstate);
2490         }
2491
2492         ExecDropTupleTable(epqstate->es_tupleTable, true);
2493         epqstate->es_tupleTable = NULL;
2494
2495         if (epqstate->es_evTuple[epq->rti - 1] != NULL)
2496         {
2497                 heap_freetuple(epqstate->es_evTuple[epq->rti - 1]);
2498                 epqstate->es_evTuple[epq->rti - 1] = NULL;
2499         }
2500
2501         foreach(l, epqstate->es_trig_target_relations)
2502         {
2503                 ResultRelInfo *resultRelInfo = (ResultRelInfo *) lfirst(l);
2504
2505                 /* Close indices and then the relation itself */
2506                 ExecCloseIndices(resultRelInfo);
2507                 heap_close(resultRelInfo->ri_RelationDesc, NoLock);
2508         }
2509
2510         MemoryContextSwitchTo(oldcontext);
2511
2512         FreeExecutorState(epqstate);
2513
2514         epq->estate = NULL;
2515         epq->planstate = NULL;
2516 }
2517
2518 /*
2519  * ExecGetActivePlanTree --- get the active PlanState tree from a QueryDesc
2520  *
2521  * Ordinarily this is just the one mentioned in the QueryDesc, but if we
2522  * are looking at a row returned by the EvalPlanQual machinery, we need
2523  * to look at the subsidiary state instead.
2524  */
2525 PlanState *
2526 ExecGetActivePlanTree(QueryDesc *queryDesc)
2527 {
2528         EState     *estate = queryDesc->estate;
2529
2530         if (estate && estate->es_useEvalPlan && estate->es_evalPlanQual != NULL)
2531                 return estate->es_evalPlanQual->planstate;
2532         else
2533                 return queryDesc->planstate;
2534 }
2535
2536
2537 /*
2538  * Support for SELECT INTO (a/k/a CREATE TABLE AS)
2539  *
2540  * We implement SELECT INTO by diverting SELECT's normal output with
2541  * a specialized DestReceiver type.
2542  *
2543  * TODO: remove some of the INTO-specific cruft from EState, and keep
2544  * it in the DestReceiver instead.
2545  */
2546
2547 typedef struct
2548 {
2549         DestReceiver pub;                       /* publicly-known function pointers */
2550         EState     *estate;                     /* EState we are working with */
2551 } DR_intorel;
2552
2553 /*
2554  * OpenIntoRel --- actually create the SELECT INTO target relation
2555  *
2556  * This also replaces QueryDesc->dest with the special DestReceiver for
2557  * SELECT INTO.  We assume that the correct result tuple type has already
2558  * been placed in queryDesc->tupDesc.
2559  */
2560 static void
2561 OpenIntoRel(QueryDesc *queryDesc)
2562 {
2563         IntoClause *into = queryDesc->plannedstmt->intoClause;
2564         EState     *estate = queryDesc->estate;
2565         Relation        intoRelationDesc;
2566         char       *intoName;
2567         Oid                     namespaceId;
2568         Oid                     tablespaceId;
2569         Datum           reloptions;
2570         AclResult       aclresult;
2571         Oid                     intoRelationId;
2572         TupleDesc       tupdesc;
2573         DR_intorel *myState;
2574
2575         Assert(into);
2576
2577         /*
2578          * Check consistency of arguments
2579          */
2580         if (into->onCommit != ONCOMMIT_NOOP && !into->rel->istemp)
2581                 ereport(ERROR,
2582                                 (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
2583                                  errmsg("ON COMMIT can only be used on temporary tables")));
2584
2585         /*
2586          * Find namespace to create in, check its permissions
2587          */
2588         intoName = into->rel->relname;
2589         namespaceId = RangeVarGetCreationNamespace(into->rel);
2590
2591         aclresult = pg_namespace_aclcheck(namespaceId, GetUserId(),
2592                                                                           ACL_CREATE);
2593         if (aclresult != ACLCHECK_OK)
2594                 aclcheck_error(aclresult, ACL_KIND_NAMESPACE,
2595                                            get_namespace_name(namespaceId));
2596
2597         /*
2598          * Select tablespace to use.  If not specified, use default tablespace
2599          * (which may in turn default to database's default).
2600          */
2601         if (into->tableSpaceName)
2602         {
2603                 tablespaceId = get_tablespace_oid(into->tableSpaceName);
2604                 if (!OidIsValid(tablespaceId))
2605                         ereport(ERROR,
2606                                         (errcode(ERRCODE_UNDEFINED_OBJECT),
2607                                          errmsg("tablespace \"%s\" does not exist",
2608                                                         into->tableSpaceName)));
2609         }
2610         else
2611         {
2612                 tablespaceId = GetDefaultTablespace(into->rel->istemp);
2613                 /* note InvalidOid is OK in this case */
2614         }
2615
2616         /* Check permissions except when using the database's default space */
2617         if (OidIsValid(tablespaceId) && tablespaceId != MyDatabaseTableSpace)
2618         {
2619                 AclResult       aclresult;
2620
2621                 aclresult = pg_tablespace_aclcheck(tablespaceId, GetUserId(),
2622                                                                                    ACL_CREATE);
2623
2624                 if (aclresult != ACLCHECK_OK)
2625                         aclcheck_error(aclresult, ACL_KIND_TABLESPACE,
2626                                                    get_tablespace_name(tablespaceId));
2627         }
2628
2629         /* Parse and validate any reloptions */
2630         reloptions = transformRelOptions((Datum) 0,
2631                                                                          into->options,
2632                                                                          true,
2633                                                                          false);
2634         (void) heap_reloptions(RELKIND_RELATION, reloptions, true);
2635
2636         /* Copy the tupdesc because heap_create_with_catalog modifies it */
2637         tupdesc = CreateTupleDescCopy(queryDesc->tupDesc);
2638
2639         /* Now we can actually create the new relation */
2640         intoRelationId = heap_create_with_catalog(intoName,
2641                                                                                           namespaceId,
2642                                                                                           tablespaceId,
2643                                                                                           InvalidOid,
2644                                                                                           GetUserId(),
2645                                                                                           tupdesc,
2646                                                                                           NIL,
2647                                                                                           RELKIND_RELATION,
2648                                                                                           false,
2649                                                                                           true,
2650                                                                                           0,
2651                                                                                           into->onCommit,
2652                                                                                           reloptions,
2653                                                                                           allowSystemTableMods);
2654
2655         FreeTupleDesc(tupdesc);
2656
2657         /*
2658          * Advance command counter so that the newly-created relation's catalog
2659          * tuples will be visible to heap_open.
2660          */
2661         CommandCounterIncrement();
2662
2663         /*
2664          * If necessary, create a TOAST table for the INTO relation. Note that
2665          * AlterTableCreateToastTable ends with CommandCounterIncrement(), so that
2666          * the TOAST table will be visible for insertion.
2667          */
2668         AlterTableCreateToastTable(intoRelationId);
2669
2670         /*
2671          * And open the constructed table for writing.
2672          */
2673         intoRelationDesc = heap_open(intoRelationId, AccessExclusiveLock);
2674
2675         /* use_wal off requires rd_targblock be initially invalid */
2676         Assert(intoRelationDesc->rd_targblock == InvalidBlockNumber);
2677
2678         /*
2679          * We can skip WAL-logging the insertions, unless PITR is in use.
2680          */
2681         estate->es_into_relation_use_wal = XLogArchivingActive();
2682         estate->es_into_relation_descriptor = intoRelationDesc;
2683
2684         /*
2685          * Now replace the query's DestReceiver with one for SELECT INTO
2686          */
2687         queryDesc->dest = CreateDestReceiver(DestIntoRel, NULL);
2688         myState = (DR_intorel *) queryDesc->dest;
2689         Assert(myState->pub.mydest == DestIntoRel);
2690         myState->estate = estate;
2691 }
2692
2693 /*
2694  * CloseIntoRel --- clean up SELECT INTO at ExecutorEnd time
2695  */
2696 static void
2697 CloseIntoRel(QueryDesc *queryDesc)
2698 {
2699         EState     *estate = queryDesc->estate;
2700
2701         /* OpenIntoRel might never have gotten called */
2702         if (estate->es_into_relation_descriptor)
2703         {
2704                 /* If we skipped using WAL, must heap_sync before commit */
2705                 if (!estate->es_into_relation_use_wal)
2706                         heap_sync(estate->es_into_relation_descriptor);
2707
2708                 /* close rel, but keep lock until commit */
2709                 heap_close(estate->es_into_relation_descriptor, NoLock);
2710
2711                 estate->es_into_relation_descriptor = NULL;
2712         }
2713 }
2714
2715 /*
2716  * CreateIntoRelDestReceiver -- create a suitable DestReceiver object
2717  *
2718  * Since CreateDestReceiver doesn't accept the parameters we'd need,
2719  * we just leave the private fields empty here.  OpenIntoRel will
2720  * fill them in.
2721  */
2722 DestReceiver *
2723 CreateIntoRelDestReceiver(void)
2724 {
2725         DR_intorel *self = (DR_intorel *) palloc(sizeof(DR_intorel));
2726
2727         self->pub.receiveSlot = intorel_receive;
2728         self->pub.rStartup = intorel_startup;
2729         self->pub.rShutdown = intorel_shutdown;
2730         self->pub.rDestroy = intorel_destroy;
2731         self->pub.mydest = DestIntoRel;
2732
2733         self->estate = NULL;
2734
2735         return (DestReceiver *) self;
2736 }
2737
2738 /*
2739  * intorel_startup --- executor startup
2740  */
2741 static void
2742 intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo)
2743 {
2744         /* no-op */
2745 }
2746
2747 /*
2748  * intorel_receive --- receive one tuple
2749  */
2750 static void
2751 intorel_receive(TupleTableSlot *slot, DestReceiver *self)
2752 {
2753         DR_intorel *myState = (DR_intorel *) self;
2754         EState     *estate = myState->estate;
2755         HeapTuple       tuple;
2756
2757         tuple = ExecCopySlotTuple(slot);
2758
2759         heap_insert(estate->es_into_relation_descriptor,
2760                                 tuple,
2761                                 estate->es_output_cid,
2762                                 estate->es_into_relation_use_wal,
2763                                 false);                 /* never any point in using FSM */
2764
2765         /* We know this is a newly created relation, so there are no indexes */
2766
2767         heap_freetuple(tuple);
2768
2769         IncrAppended();
2770 }
2771
2772 /*
2773  * intorel_shutdown --- executor end
2774  */
2775 static void
2776 intorel_shutdown(DestReceiver *self)
2777 {
2778         /* no-op */
2779 }
2780
2781 /*
2782  * intorel_destroy --- release DestReceiver object
2783  */
2784 static void
2785 intorel_destroy(DestReceiver *self)
2786 {
2787         pfree(self);
2788 }