]> granicus.if.org Git - postgresql/blob - src/backend/executor/execMain.c
Provide a function hook to let plug-ins get control around ExecutorRun.
[postgresql] / src / backend / executor / execMain.c
1 /*-------------------------------------------------------------------------
2  *
3  * execMain.c
4  *        top level executor interface routines
5  *
6  * INTERFACE ROUTINES
7  *      ExecutorStart()
8  *      ExecutorRun()
9  *      ExecutorEnd()
10  *
11  *      The old ExecutorMain() has been replaced by ExecutorStart(),
12  *      ExecutorRun() and ExecutorEnd()
13  *
14  *      These three procedures are the external interfaces to the executor.
15  *      In each case, the query descriptor is required as an argument.
16  *
17  *      ExecutorStart() must be called at the beginning of execution of any
18  *      query plan and ExecutorEnd() should always be called at the end of
19  *      execution of a plan.
20  *
21  *      ExecutorRun accepts direction and count arguments that specify whether
22  *      the plan is to be executed forwards, backwards, and for how many tuples.
23  *
24  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
25  * Portions Copyright (c) 1994, Regents of the University of California
26  *
27  *
28  * IDENTIFICATION
29  *        $PostgreSQL: pgsql/src/backend/executor/execMain.c,v 1.310 2008/07/18 18:23:46 tgl Exp $
30  *
31  *-------------------------------------------------------------------------
32  */
33 #include "postgres.h"
34
35 #include "access/heapam.h"
36 #include "access/reloptions.h"
37 #include "access/transam.h"
38 #include "access/xact.h"
39 #include "catalog/heap.h"
40 #include "catalog/namespace.h"
41 #include "catalog/toasting.h"
42 #include "commands/tablespace.h"
43 #include "commands/trigger.h"
44 #include "executor/execdebug.h"
45 #include "executor/instrument.h"
46 #include "executor/nodeSubplan.h"
47 #include "miscadmin.h"
48 #include "optimizer/clauses.h"
49 #include "parser/parse_clause.h"
50 #include "parser/parsetree.h"
51 #include "storage/bufmgr.h"
52 #include "storage/lmgr.h"
53 #include "storage/smgr.h"
54 #include "utils/acl.h"
55 #include "utils/lsyscache.h"
56 #include "utils/memutils.h"
57 #include "utils/snapmgr.h"
58 #include "utils/tqual.h"
59
60
61 /* Hook for plugins to get control in ExecutorRun() */
62 ExecutorRun_hook_type ExecutorRun_hook = NULL;
63
64 typedef struct evalPlanQual
65 {
66         Index           rti;
67         EState     *estate;
68         PlanState  *planstate;
69         struct evalPlanQual *next;      /* stack of active PlanQual plans */
70         struct evalPlanQual *free;      /* list of free PlanQual plans */
71 } evalPlanQual;
72
73 /* decls for local routines only used within this module */
74 static void InitPlan(QueryDesc *queryDesc, int eflags);
75 static void ExecEndPlan(PlanState *planstate, EState *estate);
76 static TupleTableSlot *ExecutePlan(EState *estate, PlanState *planstate,
77                         CmdType operation,
78                         long numberTuples,
79                         ScanDirection direction,
80                         DestReceiver *dest);
81 static void ExecSelect(TupleTableSlot *slot,
82                    DestReceiver *dest, EState *estate);
83 static void ExecInsert(TupleTableSlot *slot, ItemPointer tupleid,
84                    TupleTableSlot *planSlot,
85                    DestReceiver *dest, EState *estate);
86 static void ExecDelete(ItemPointer tupleid,
87                    TupleTableSlot *planSlot,
88                    DestReceiver *dest, EState *estate);
89 static void ExecUpdate(TupleTableSlot *slot, ItemPointer tupleid,
90                    TupleTableSlot *planSlot,
91                    DestReceiver *dest, EState *estate);
92 static void ExecProcessReturning(ProjectionInfo *projectReturning,
93                                          TupleTableSlot *tupleSlot,
94                                          TupleTableSlot *planSlot,
95                                          DestReceiver *dest);
96 static TupleTableSlot *EvalPlanQualNext(EState *estate);
97 static void EndEvalPlanQual(EState *estate);
98 static void ExecCheckRTPerms(List *rangeTable);
99 static void ExecCheckRTEPerms(RangeTblEntry *rte);
100 static void ExecCheckXactReadOnly(PlannedStmt *plannedstmt);
101 static void EvalPlanQualStart(evalPlanQual *epq, EState *estate,
102                                   evalPlanQual *priorepq);
103 static void EvalPlanQualStop(evalPlanQual *epq);
104 static void OpenIntoRel(QueryDesc *queryDesc);
105 static void CloseIntoRel(QueryDesc *queryDesc);
106 static void intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo);
107 static void intorel_receive(TupleTableSlot *slot, DestReceiver *self);
108 static void intorel_shutdown(DestReceiver *self);
109 static void intorel_destroy(DestReceiver *self);
110
111 /* end of local decls */
112
113
114 /* ----------------------------------------------------------------
115  *              ExecutorStart
116  *
117  *              This routine must be called at the beginning of any execution of any
118  *              query plan
119  *
120  * Takes a QueryDesc previously created by CreateQueryDesc (it's not real
121  * clear why we bother to separate the two functions, but...).  The tupDesc
122  * field of the QueryDesc is filled in to describe the tuples that will be
123  * returned, and the internal fields (estate and planstate) are set up.
124  *
125  * eflags contains flag bits as described in executor.h.
126  *
127  * NB: the CurrentMemoryContext when this is called will become the parent
128  * of the per-query context used for this Executor invocation.
129  * ----------------------------------------------------------------
130  */
131 void
132 ExecutorStart(QueryDesc *queryDesc, int eflags)
133 {
134         EState     *estate;
135         MemoryContext oldcontext;
136
137         /* sanity checks: queryDesc must not be started already */
138         Assert(queryDesc != NULL);
139         Assert(queryDesc->estate == NULL);
140
141         /*
142          * If the transaction is read-only, we need to check if any writes are
143          * planned to non-temporary tables.  EXPLAIN is considered read-only.
144          */
145         if (XactReadOnly && !(eflags & EXEC_FLAG_EXPLAIN_ONLY))
146                 ExecCheckXactReadOnly(queryDesc->plannedstmt);
147
148         /*
149          * Build EState, switch into per-query memory context for startup.
150          */
151         estate = CreateExecutorState();
152         queryDesc->estate = estate;
153
154         oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
155
156         /*
157          * Fill in parameters, if any, from queryDesc
158          */
159         estate->es_param_list_info = queryDesc->params;
160
161         if (queryDesc->plannedstmt->nParamExec > 0)
162                 estate->es_param_exec_vals = (ParamExecData *)
163                         palloc0(queryDesc->plannedstmt->nParamExec * sizeof(ParamExecData));
164
165         /*
166          * If non-read-only query, set the command ID to mark output tuples with
167          */
168         switch (queryDesc->operation)
169         {
170                 case CMD_SELECT:
171                         /* SELECT INTO and SELECT FOR UPDATE/SHARE need to mark tuples */
172                         if (queryDesc->plannedstmt->intoClause != NULL ||
173                                 queryDesc->plannedstmt->rowMarks != NIL)
174                                 estate->es_output_cid = GetCurrentCommandId(true);
175                         break;
176
177                 case CMD_INSERT:
178                 case CMD_DELETE:
179                 case CMD_UPDATE:
180                         estate->es_output_cid = GetCurrentCommandId(true);
181                         break;
182
183                 default:
184                         elog(ERROR, "unrecognized operation code: %d",
185                                  (int) queryDesc->operation);
186                         break;
187         }
188
189         /*
190          * Copy other important information into the EState
191          */
192         estate->es_snapshot = RegisterSnapshot(queryDesc->snapshot);
193         estate->es_crosscheck_snapshot = RegisterSnapshot(queryDesc->crosscheck_snapshot);
194         estate->es_instrument = queryDesc->doInstrument;
195
196         /*
197          * Initialize the plan state tree
198          */
199         InitPlan(queryDesc, eflags);
200
201         MemoryContextSwitchTo(oldcontext);
202 }
203
204 /* ----------------------------------------------------------------
205  *              ExecutorRun
206  *
207  *              This is the main routine of the executor module. It accepts
208  *              the query descriptor from the traffic cop and executes the
209  *              query plan.
210  *
211  *              ExecutorStart must have been called already.
212  *
213  *              If direction is NoMovementScanDirection then nothing is done
214  *              except to start up/shut down the destination.  Otherwise,
215  *              we retrieve up to 'count' tuples in the specified direction.
216  *
217  *              Note: count = 0 is interpreted as no portal limit, i.e., run to
218  *              completion.
219  *
220  *              We provide a function hook variable that lets loadable plugins
221  *              get control when ExecutorRun is called.  Such a plugin would
222  *              normally call standard_ExecutorRun().
223  *
224  * ----------------------------------------------------------------
225  */
226 TupleTableSlot *
227 ExecutorRun(QueryDesc *queryDesc,
228                         ScanDirection direction, long count)
229 {
230         TupleTableSlot *result;
231
232         if (ExecutorRun_hook)
233                 result = (*ExecutorRun_hook) (queryDesc, direction, count);
234         else
235                 result = standard_ExecutorRun(queryDesc, direction, count);
236         return result;
237 }
238
239 TupleTableSlot *
240 standard_ExecutorRun(QueryDesc *queryDesc,
241                                          ScanDirection direction, long count)
242 {
243         EState     *estate;
244         CmdType         operation;
245         DestReceiver *dest;
246         bool            sendTuples;
247         TupleTableSlot *result;
248         MemoryContext oldcontext;
249
250         /* sanity checks */
251         Assert(queryDesc != NULL);
252
253         estate = queryDesc->estate;
254
255         Assert(estate != NULL);
256
257         /*
258          * Switch into per-query memory context
259          */
260         oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
261
262         /*
263          * extract information from the query descriptor and the query feature.
264          */
265         operation = queryDesc->operation;
266         dest = queryDesc->dest;
267
268         /*
269          * startup tuple receiver, if we will be emitting tuples
270          */
271         estate->es_processed = 0;
272         estate->es_lastoid = InvalidOid;
273
274         sendTuples = (operation == CMD_SELECT ||
275                                   queryDesc->plannedstmt->returningLists);
276
277         if (sendTuples)
278                 (*dest->rStartup) (dest, operation, queryDesc->tupDesc);
279
280         /*
281          * run plan
282          */
283         if (ScanDirectionIsNoMovement(direction))
284                 result = NULL;
285         else
286                 result = ExecutePlan(estate,
287                                                          queryDesc->planstate,
288                                                          operation,
289                                                          count,
290                                                          direction,
291                                                          dest);
292
293         /*
294          * shutdown tuple receiver, if we started it
295          */
296         if (sendTuples)
297                 (*dest->rShutdown) (dest);
298
299         MemoryContextSwitchTo(oldcontext);
300
301         return result;
302 }
303
304 /* ----------------------------------------------------------------
305  *              ExecutorEnd
306  *
307  *              This routine must be called at the end of execution of any
308  *              query plan
309  * ----------------------------------------------------------------
310  */
311 void
312 ExecutorEnd(QueryDesc *queryDesc)
313 {
314         EState     *estate;
315         MemoryContext oldcontext;
316
317         /* sanity checks */
318         Assert(queryDesc != NULL);
319
320         estate = queryDesc->estate;
321
322         Assert(estate != NULL);
323
324         /*
325          * Switch into per-query memory context to run ExecEndPlan
326          */
327         oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
328
329         ExecEndPlan(queryDesc->planstate, estate);
330
331         /*
332          * Close the SELECT INTO relation if any
333          */
334         if (estate->es_select_into)
335                 CloseIntoRel(queryDesc);
336
337         /* do away with our snapshots */
338         UnregisterSnapshot(estate->es_snapshot);
339         UnregisterSnapshot(estate->es_crosscheck_snapshot);
340
341         /*
342          * Must switch out of context before destroying it
343          */
344         MemoryContextSwitchTo(oldcontext);
345
346         /*
347          * Release EState and per-query memory context.  This should release
348          * everything the executor has allocated.
349          */
350         FreeExecutorState(estate);
351
352         /* Reset queryDesc fields that no longer point to anything */
353         queryDesc->tupDesc = NULL;
354         queryDesc->estate = NULL;
355         queryDesc->planstate = NULL;
356 }
357
358 /* ----------------------------------------------------------------
359  *              ExecutorRewind
360  *
361  *              This routine may be called on an open queryDesc to rewind it
362  *              to the start.
363  * ----------------------------------------------------------------
364  */
365 void
366 ExecutorRewind(QueryDesc *queryDesc)
367 {
368         EState     *estate;
369         MemoryContext oldcontext;
370
371         /* sanity checks */
372         Assert(queryDesc != NULL);
373
374         estate = queryDesc->estate;
375
376         Assert(estate != NULL);
377
378         /* It's probably not sensible to rescan updating queries */
379         Assert(queryDesc->operation == CMD_SELECT);
380
381         /*
382          * Switch into per-query memory context
383          */
384         oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
385
386         /*
387          * rescan plan
388          */
389         ExecReScan(queryDesc->planstate, NULL);
390
391         MemoryContextSwitchTo(oldcontext);
392 }
393
394
395 /*
396  * ExecCheckRTPerms
397  *              Check access permissions for all relations listed in a range table.
398  */
399 static void
400 ExecCheckRTPerms(List *rangeTable)
401 {
402         ListCell   *l;
403
404         foreach(l, rangeTable)
405         {
406                 ExecCheckRTEPerms((RangeTblEntry *) lfirst(l));
407         }
408 }
409
410 /*
411  * ExecCheckRTEPerms
412  *              Check access permissions for a single RTE.
413  */
414 static void
415 ExecCheckRTEPerms(RangeTblEntry *rte)
416 {
417         AclMode         requiredPerms;
418         Oid                     relOid;
419         Oid                     userid;
420
421         /*
422          * Only plain-relation RTEs need to be checked here.  Function RTEs are
423          * checked by init_fcache when the function is prepared for execution.
424          * Join, subquery, and special RTEs need no checks.
425          */
426         if (rte->rtekind != RTE_RELATION)
427                 return;
428
429         /*
430          * No work if requiredPerms is empty.
431          */
432         requiredPerms = rte->requiredPerms;
433         if (requiredPerms == 0)
434                 return;
435
436         relOid = rte->relid;
437
438         /*
439          * userid to check as: current user unless we have a setuid indication.
440          *
441          * Note: GetUserId() is presently fast enough that there's no harm in
442          * calling it separately for each RTE.  If that stops being true, we could
443          * call it once in ExecCheckRTPerms and pass the userid down from there.
444          * But for now, no need for the extra clutter.
445          */
446         userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
447
448         /*
449          * We must have *all* the requiredPerms bits, so use aclmask not aclcheck.
450          */
451         if (pg_class_aclmask(relOid, userid, requiredPerms, ACLMASK_ALL)
452                 != requiredPerms)
453                 aclcheck_error(ACLCHECK_NO_PRIV, ACL_KIND_CLASS,
454                                            get_rel_name(relOid));
455 }
456
457 /*
458  * Check that the query does not imply any writes to non-temp tables.
459  */
460 static void
461 ExecCheckXactReadOnly(PlannedStmt *plannedstmt)
462 {
463         ListCell   *l;
464
465         /*
466          * CREATE TABLE AS or SELECT INTO?
467          *
468          * XXX should we allow this if the destination is temp?
469          */
470         if (plannedstmt->intoClause != NULL)
471                 goto fail;
472
473         /* Fail if write permissions are requested on any non-temp table */
474         foreach(l, plannedstmt->rtable)
475         {
476                 RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
477
478                 if (rte->rtekind != RTE_RELATION)
479                         continue;
480
481                 if ((rte->requiredPerms & (~ACL_SELECT)) == 0)
482                         continue;
483
484                 if (isTempNamespace(get_rel_namespace(rte->relid)))
485                         continue;
486
487                 goto fail;
488         }
489
490         return;
491
492 fail:
493         ereport(ERROR,
494                         (errcode(ERRCODE_READ_ONLY_SQL_TRANSACTION),
495                          errmsg("transaction is read-only")));
496 }
497
498
499 /* ----------------------------------------------------------------
500  *              InitPlan
501  *
502  *              Initializes the query plan: open files, allocate storage
503  *              and start up the rule manager
504  * ----------------------------------------------------------------
505  */
506 static void
507 InitPlan(QueryDesc *queryDesc, int eflags)
508 {
509         CmdType         operation = queryDesc->operation;
510         PlannedStmt *plannedstmt = queryDesc->plannedstmt;
511         Plan       *plan = plannedstmt->planTree;
512         List       *rangeTable = plannedstmt->rtable;
513         EState     *estate = queryDesc->estate;
514         PlanState  *planstate;
515         TupleDesc       tupType;
516         ListCell   *l;
517         int                     i;
518
519         /*
520          * Do permissions checks
521          */
522         ExecCheckRTPerms(rangeTable);
523
524         /*
525          * initialize the node's execution state
526          */
527         estate->es_range_table = rangeTable;
528
529         /*
530          * initialize result relation stuff
531          */
532         if (plannedstmt->resultRelations)
533         {
534                 List       *resultRelations = plannedstmt->resultRelations;
535                 int                     numResultRelations = list_length(resultRelations);
536                 ResultRelInfo *resultRelInfos;
537                 ResultRelInfo *resultRelInfo;
538
539                 resultRelInfos = (ResultRelInfo *)
540                         palloc(numResultRelations * sizeof(ResultRelInfo));
541                 resultRelInfo = resultRelInfos;
542                 foreach(l, resultRelations)
543                 {
544                         Index           resultRelationIndex = lfirst_int(l);
545                         Oid                     resultRelationOid;
546                         Relation        resultRelation;
547
548                         resultRelationOid = getrelid(resultRelationIndex, rangeTable);
549                         resultRelation = heap_open(resultRelationOid, RowExclusiveLock);
550                         InitResultRelInfo(resultRelInfo,
551                                                           resultRelation,
552                                                           resultRelationIndex,
553                                                           operation,
554                                                           estate->es_instrument);
555                         resultRelInfo++;
556                 }
557                 estate->es_result_relations = resultRelInfos;
558                 estate->es_num_result_relations = numResultRelations;
559                 /* Initialize to first or only result rel */
560                 estate->es_result_relation_info = resultRelInfos;
561         }
562         else
563         {
564                 /*
565                  * if no result relation, then set state appropriately
566                  */
567                 estate->es_result_relations = NULL;
568                 estate->es_num_result_relations = 0;
569                 estate->es_result_relation_info = NULL;
570         }
571
572         /*
573          * Detect whether we're doing SELECT INTO.  If so, set the es_into_oids
574          * flag appropriately so that the plan tree will be initialized with the
575          * correct tuple descriptors.  (Other SELECT INTO stuff comes later.)
576          */
577         estate->es_select_into = false;
578         if (operation == CMD_SELECT && plannedstmt->intoClause != NULL)
579         {
580                 estate->es_select_into = true;
581                 estate->es_into_oids = interpretOidsOption(plannedstmt->intoClause->options);
582         }
583
584         /*
585          * Have to lock relations selected FOR UPDATE/FOR SHARE before we
586          * initialize the plan tree, else we'd be doing a lock upgrade. While we
587          * are at it, build the ExecRowMark list.
588          */
589         estate->es_rowMarks = NIL;
590         foreach(l, plannedstmt->rowMarks)
591         {
592                 RowMarkClause *rc = (RowMarkClause *) lfirst(l);
593                 Oid                     relid = getrelid(rc->rti, rangeTable);
594                 Relation        relation;
595                 ExecRowMark *erm;
596
597                 relation = heap_open(relid, RowShareLock);
598                 erm = (ExecRowMark *) palloc(sizeof(ExecRowMark));
599                 erm->relation = relation;
600                 erm->rti = rc->rti;
601                 erm->forUpdate = rc->forUpdate;
602                 erm->noWait = rc->noWait;
603                 /* We'll set up ctidAttno below */
604                 erm->ctidAttNo = InvalidAttrNumber;
605                 estate->es_rowMarks = lappend(estate->es_rowMarks, erm);
606         }
607
608         /*
609          * Initialize the executor "tuple" table.  We need slots for all the plan
610          * nodes, plus possibly output slots for the junkfilter(s). At this point
611          * we aren't sure if we need junkfilters, so just add slots for them
612          * unconditionally.  Also, if it's not a SELECT, set up a slot for use for
613          * trigger output tuples.  Also, one for RETURNING-list evaluation.
614          */
615         {
616                 int                     nSlots;
617
618                 /* Slots for the main plan tree */
619                 nSlots = ExecCountSlotsNode(plan);
620                 /* Add slots for subplans and initplans */
621                 foreach(l, plannedstmt->subplans)
622                 {
623                         Plan       *subplan = (Plan *) lfirst(l);
624
625                         nSlots += ExecCountSlotsNode(subplan);
626                 }
627                 /* Add slots for junkfilter(s) */
628                 if (plannedstmt->resultRelations != NIL)
629                         nSlots += list_length(plannedstmt->resultRelations);
630                 else
631                         nSlots += 1;
632                 if (operation != CMD_SELECT)
633                         nSlots++;                       /* for es_trig_tuple_slot */
634                 if (plannedstmt->returningLists)
635                         nSlots++;                       /* for RETURNING projection */
636
637                 estate->es_tupleTable = ExecCreateTupleTable(nSlots);
638
639                 if (operation != CMD_SELECT)
640                         estate->es_trig_tuple_slot =
641                                 ExecAllocTableSlot(estate->es_tupleTable);
642         }
643
644         /* mark EvalPlanQual not active */
645         estate->es_plannedstmt = plannedstmt;
646         estate->es_evalPlanQual = NULL;
647         estate->es_evTupleNull = NULL;
648         estate->es_evTuple = NULL;
649         estate->es_useEvalPlan = false;
650
651         /*
652          * Initialize private state information for each SubPlan.  We must do this
653          * before running ExecInitNode on the main query tree, since
654          * ExecInitSubPlan expects to be able to find these entries.
655          */
656         Assert(estate->es_subplanstates == NIL);
657         i = 1;                                          /* subplan indices count from 1 */
658         foreach(l, plannedstmt->subplans)
659         {
660                 Plan       *subplan = (Plan *) lfirst(l);
661                 PlanState  *subplanstate;
662                 int                     sp_eflags;
663
664                 /*
665                  * A subplan will never need to do BACKWARD scan nor MARK/RESTORE. If
666                  * it is a parameterless subplan (not initplan), we suggest that it be
667                  * prepared to handle REWIND efficiently; otherwise there is no need.
668                  */
669                 sp_eflags = eflags & EXEC_FLAG_EXPLAIN_ONLY;
670                 if (bms_is_member(i, plannedstmt->rewindPlanIDs))
671                         sp_eflags |= EXEC_FLAG_REWIND;
672
673                 subplanstate = ExecInitNode(subplan, estate, sp_eflags);
674
675                 estate->es_subplanstates = lappend(estate->es_subplanstates,
676                                                                                    subplanstate);
677
678                 i++;
679         }
680
681         /*
682          * Initialize the private state information for all the nodes in the query
683          * tree.  This opens files, allocates storage and leaves us ready to start
684          * processing tuples.
685          */
686         planstate = ExecInitNode(plan, estate, eflags);
687
688         /*
689          * Get the tuple descriptor describing the type of tuples to return. (this
690          * is especially important if we are creating a relation with "SELECT
691          * INTO")
692          */
693         tupType = ExecGetResultType(planstate);
694
695         /*
696          * Initialize the junk filter if needed.  SELECT and INSERT queries need a
697          * filter if there are any junk attrs in the tlist.  INSERT and SELECT
698          * INTO also need a filter if the plan may return raw disk tuples (else
699          * heap_insert will be scribbling on the source relation!). UPDATE and
700          * DELETE always need a filter, since there's always a junk 'ctid'
701          * attribute present --- no need to look first.
702          */
703         {
704                 bool            junk_filter_needed = false;
705                 ListCell   *tlist;
706
707                 switch (operation)
708                 {
709                         case CMD_SELECT:
710                         case CMD_INSERT:
711                                 foreach(tlist, plan->targetlist)
712                                 {
713                                         TargetEntry *tle = (TargetEntry *) lfirst(tlist);
714
715                                         if (tle->resjunk)
716                                         {
717                                                 junk_filter_needed = true;
718                                                 break;
719                                         }
720                                 }
721                                 if (!junk_filter_needed &&
722                                         (operation == CMD_INSERT || estate->es_select_into) &&
723                                         ExecMayReturnRawTuples(planstate))
724                                         junk_filter_needed = true;
725                                 break;
726                         case CMD_UPDATE:
727                         case CMD_DELETE:
728                                 junk_filter_needed = true;
729                                 break;
730                         default:
731                                 break;
732                 }
733
734                 if (junk_filter_needed)
735                 {
736                         /*
737                          * If there are multiple result relations, each one needs its own
738                          * junk filter.  Note this is only possible for UPDATE/DELETE, so
739                          * we can't be fooled by some needing a filter and some not.
740                          */
741                         if (list_length(plannedstmt->resultRelations) > 1)
742                         {
743                                 PlanState **appendplans;
744                                 int                     as_nplans;
745                                 ResultRelInfo *resultRelInfo;
746
747                                 /* Top plan had better be an Append here. */
748                                 Assert(IsA(plan, Append));
749                                 Assert(((Append *) plan)->isTarget);
750                                 Assert(IsA(planstate, AppendState));
751                                 appendplans = ((AppendState *) planstate)->appendplans;
752                                 as_nplans = ((AppendState *) planstate)->as_nplans;
753                                 Assert(as_nplans == estate->es_num_result_relations);
754                                 resultRelInfo = estate->es_result_relations;
755                                 for (i = 0; i < as_nplans; i++)
756                                 {
757                                         PlanState  *subplan = appendplans[i];
758                                         JunkFilter *j;
759
760                                         j = ExecInitJunkFilter(subplan->plan->targetlist,
761                                                         resultRelInfo->ri_RelationDesc->rd_att->tdhasoid,
762                                                                   ExecAllocTableSlot(estate->es_tupleTable));
763
764                                         /*
765                                          * Since it must be UPDATE/DELETE, there had better be a
766                                          * "ctid" junk attribute in the tlist ... but ctid could
767                                          * be at a different resno for each result relation. We
768                                          * look up the ctid resnos now and save them in the
769                                          * junkfilters.
770                                          */
771                                         j->jf_junkAttNo = ExecFindJunkAttribute(j, "ctid");
772                                         if (!AttributeNumberIsValid(j->jf_junkAttNo))
773                                                 elog(ERROR, "could not find junk ctid column");
774                                         resultRelInfo->ri_junkFilter = j;
775                                         resultRelInfo++;
776                                 }
777
778                                 /*
779                                  * Set active junkfilter too; at this point ExecInitAppend has
780                                  * already selected an active result relation...
781                                  */
782                                 estate->es_junkFilter =
783                                         estate->es_result_relation_info->ri_junkFilter;
784
785                                 /*
786                                  * We currently can't support rowmarks in this case, because
787                                  * the associated junk CTIDs might have different resnos in
788                                  * different subplans.
789                                  */
790                                 if (estate->es_rowMarks)
791                                         ereport(ERROR,
792                                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
793                                                          errmsg("SELECT FOR UPDATE/SHARE is not supported within a query with multiple result relations")));
794                         }
795                         else
796                         {
797                                 /* Normal case with just one JunkFilter */
798                                 JunkFilter *j;
799
800                                 j = ExecInitJunkFilter(planstate->plan->targetlist,
801                                                                            tupType->tdhasoid,
802                                                                   ExecAllocTableSlot(estate->es_tupleTable));
803                                 estate->es_junkFilter = j;
804                                 if (estate->es_result_relation_info)
805                                         estate->es_result_relation_info->ri_junkFilter = j;
806
807                                 if (operation == CMD_SELECT)
808                                 {
809                                         /* For SELECT, want to return the cleaned tuple type */
810                                         tupType = j->jf_cleanTupType;
811                                 }
812                                 else if (operation == CMD_UPDATE || operation == CMD_DELETE)
813                                 {
814                                         /* For UPDATE/DELETE, find the ctid junk attr now */
815                                         j->jf_junkAttNo = ExecFindJunkAttribute(j, "ctid");
816                                         if (!AttributeNumberIsValid(j->jf_junkAttNo))
817                                                 elog(ERROR, "could not find junk ctid column");
818                                 }
819
820                                 /* For SELECT FOR UPDATE/SHARE, find the ctid attrs now */
821                                 foreach(l, estate->es_rowMarks)
822                                 {
823                                         ExecRowMark *erm = (ExecRowMark *) lfirst(l);
824                                         char            resname[32];
825
826                                         snprintf(resname, sizeof(resname), "ctid%u", erm->rti);
827                                         erm->ctidAttNo = ExecFindJunkAttribute(j, resname);
828                                         if (!AttributeNumberIsValid(erm->ctidAttNo))
829                                                 elog(ERROR, "could not find junk \"%s\" column",
830                                                          resname);
831                                 }
832                         }
833                 }
834                 else
835                 {
836                         estate->es_junkFilter = NULL;
837                         if (estate->es_rowMarks)
838                                 elog(ERROR, "SELECT FOR UPDATE/SHARE, but no junk columns");
839                 }
840         }
841
842         /*
843          * Initialize RETURNING projections if needed.
844          */
845         if (plannedstmt->returningLists)
846         {
847                 TupleTableSlot *slot;
848                 ExprContext *econtext;
849                 ResultRelInfo *resultRelInfo;
850
851                 /*
852                  * We set QueryDesc.tupDesc to be the RETURNING rowtype in this case.
853                  * We assume all the sublists will generate the same output tupdesc.
854                  */
855                 tupType = ExecTypeFromTL((List *) linitial(plannedstmt->returningLists),
856                                                                  false);
857
858                 /* Set up a slot for the output of the RETURNING projection(s) */
859                 slot = ExecAllocTableSlot(estate->es_tupleTable);
860                 ExecSetSlotDescriptor(slot, tupType);
861                 /* Need an econtext too */
862                 econtext = CreateExprContext(estate);
863
864                 /*
865                  * Build a projection for each result rel.      Note that any SubPlans in
866                  * the RETURNING lists get attached to the topmost plan node.
867                  */
868                 Assert(list_length(plannedstmt->returningLists) == estate->es_num_result_relations);
869                 resultRelInfo = estate->es_result_relations;
870                 foreach(l, plannedstmt->returningLists)
871                 {
872                         List       *rlist = (List *) lfirst(l);
873                         List       *rliststate;
874
875                         rliststate = (List *) ExecInitExpr((Expr *) rlist, planstate);
876                         resultRelInfo->ri_projectReturning =
877                                 ExecBuildProjectionInfo(rliststate, econtext, slot,
878                                                                          resultRelInfo->ri_RelationDesc->rd_att);
879                         resultRelInfo++;
880                 }
881         }
882
883         queryDesc->tupDesc = tupType;
884         queryDesc->planstate = planstate;
885
886         /*
887          * If doing SELECT INTO, initialize the "into" relation.  We must wait
888          * till now so we have the "clean" result tuple type to create the new
889          * table from.
890          *
891          * If EXPLAIN, skip creating the "into" relation.
892          */
893         if (estate->es_select_into && !(eflags & EXEC_FLAG_EXPLAIN_ONLY))
894                 OpenIntoRel(queryDesc);
895 }
896
897 /*
898  * Initialize ResultRelInfo data for one result relation
899  */
900 void
901 InitResultRelInfo(ResultRelInfo *resultRelInfo,
902                                   Relation resultRelationDesc,
903                                   Index resultRelationIndex,
904                                   CmdType operation,
905                                   bool doInstrument)
906 {
907         /*
908          * Check valid relkind ... parser and/or planner should have noticed this
909          * already, but let's make sure.
910          */
911         switch (resultRelationDesc->rd_rel->relkind)
912         {
913                 case RELKIND_RELATION:
914                         /* OK */
915                         break;
916                 case RELKIND_SEQUENCE:
917                         ereport(ERROR,
918                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
919                                          errmsg("cannot change sequence \"%s\"",
920                                                         RelationGetRelationName(resultRelationDesc))));
921                         break;
922                 case RELKIND_TOASTVALUE:
923                         ereport(ERROR,
924                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
925                                          errmsg("cannot change TOAST relation \"%s\"",
926                                                         RelationGetRelationName(resultRelationDesc))));
927                         break;
928                 case RELKIND_VIEW:
929                         ereport(ERROR,
930                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
931                                          errmsg("cannot change view \"%s\"",
932                                                         RelationGetRelationName(resultRelationDesc))));
933                         break;
934                 default:
935                         ereport(ERROR,
936                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
937                                          errmsg("cannot change relation \"%s\"",
938                                                         RelationGetRelationName(resultRelationDesc))));
939                         break;
940         }
941
942         /* OK, fill in the node */
943         MemSet(resultRelInfo, 0, sizeof(ResultRelInfo));
944         resultRelInfo->type = T_ResultRelInfo;
945         resultRelInfo->ri_RangeTableIndex = resultRelationIndex;
946         resultRelInfo->ri_RelationDesc = resultRelationDesc;
947         resultRelInfo->ri_NumIndices = 0;
948         resultRelInfo->ri_IndexRelationDescs = NULL;
949         resultRelInfo->ri_IndexRelationInfo = NULL;
950         /* make a copy so as not to depend on relcache info not changing... */
951         resultRelInfo->ri_TrigDesc = CopyTriggerDesc(resultRelationDesc->trigdesc);
952         if (resultRelInfo->ri_TrigDesc)
953         {
954                 int                     n = resultRelInfo->ri_TrigDesc->numtriggers;
955
956                 resultRelInfo->ri_TrigFunctions = (FmgrInfo *)
957                         palloc0(n * sizeof(FmgrInfo));
958                 if (doInstrument)
959                         resultRelInfo->ri_TrigInstrument = InstrAlloc(n);
960                 else
961                         resultRelInfo->ri_TrigInstrument = NULL;
962         }
963         else
964         {
965                 resultRelInfo->ri_TrigFunctions = NULL;
966                 resultRelInfo->ri_TrigInstrument = NULL;
967         }
968         resultRelInfo->ri_ConstraintExprs = NULL;
969         resultRelInfo->ri_junkFilter = NULL;
970         resultRelInfo->ri_projectReturning = NULL;
971
972         /*
973          * If there are indices on the result relation, open them and save
974          * descriptors in the result relation info, so that we can add new index
975          * entries for the tuples we add/update.  We need not do this for a
976          * DELETE, however, since deletion doesn't affect indexes.
977          */
978         if (resultRelationDesc->rd_rel->relhasindex &&
979                 operation != CMD_DELETE)
980                 ExecOpenIndices(resultRelInfo);
981 }
982
983 /*
984  *              ExecGetTriggerResultRel
985  *
986  * Get a ResultRelInfo for a trigger target relation.  Most of the time,
987  * triggers are fired on one of the result relations of the query, and so
988  * we can just return a member of the es_result_relations array.  (Note: in
989  * self-join situations there might be multiple members with the same OID;
990  * if so it doesn't matter which one we pick.)  However, it is sometimes
991  * necessary to fire triggers on other relations; this happens mainly when an
992  * RI update trigger queues additional triggers on other relations, which will
993  * be processed in the context of the outer query.      For efficiency's sake,
994  * we want to have a ResultRelInfo for those triggers too; that can avoid
995  * repeated re-opening of the relation.  (It also provides a way for EXPLAIN
996  * ANALYZE to report the runtimes of such triggers.)  So we make additional
997  * ResultRelInfo's as needed, and save them in es_trig_target_relations.
998  */
999 ResultRelInfo *
1000 ExecGetTriggerResultRel(EState *estate, Oid relid)
1001 {
1002         ResultRelInfo *rInfo;
1003         int                     nr;
1004         ListCell   *l;
1005         Relation        rel;
1006         MemoryContext oldcontext;
1007
1008         /* First, search through the query result relations */
1009         rInfo = estate->es_result_relations;
1010         nr = estate->es_num_result_relations;
1011         while (nr > 0)
1012         {
1013                 if (RelationGetRelid(rInfo->ri_RelationDesc) == relid)
1014                         return rInfo;
1015                 rInfo++;
1016                 nr--;
1017         }
1018         /* Nope, but maybe we already made an extra ResultRelInfo for it */
1019         foreach(l, estate->es_trig_target_relations)
1020         {
1021                 rInfo = (ResultRelInfo *) lfirst(l);
1022                 if (RelationGetRelid(rInfo->ri_RelationDesc) == relid)
1023                         return rInfo;
1024         }
1025         /* Nope, so we need a new one */
1026
1027         /*
1028          * Open the target relation's relcache entry.  We assume that an
1029          * appropriate lock is still held by the backend from whenever the trigger
1030          * event got queued, so we need take no new lock here.
1031          */
1032         rel = heap_open(relid, NoLock);
1033
1034         /*
1035          * Make the new entry in the right context.  Currently, we don't need any
1036          * index information in ResultRelInfos used only for triggers, so tell
1037          * InitResultRelInfo it's a DELETE.
1038          */
1039         oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
1040         rInfo = makeNode(ResultRelInfo);
1041         InitResultRelInfo(rInfo,
1042                                           rel,
1043                                           0,            /* dummy rangetable index */
1044                                           CMD_DELETE,
1045                                           estate->es_instrument);
1046         estate->es_trig_target_relations =
1047                 lappend(estate->es_trig_target_relations, rInfo);
1048         MemoryContextSwitchTo(oldcontext);
1049
1050         return rInfo;
1051 }
1052
1053 /*
1054  *              ExecContextForcesOids
1055  *
1056  * This is pretty grotty: when doing INSERT, UPDATE, or SELECT INTO,
1057  * we need to ensure that result tuples have space for an OID iff they are
1058  * going to be stored into a relation that has OIDs.  In other contexts
1059  * we are free to choose whether to leave space for OIDs in result tuples
1060  * (we generally don't want to, but we do if a physical-tlist optimization
1061  * is possible).  This routine checks the plan context and returns TRUE if the
1062  * choice is forced, FALSE if the choice is not forced.  In the TRUE case,
1063  * *hasoids is set to the required value.
1064  *
1065  * One reason this is ugly is that all plan nodes in the plan tree will emit
1066  * tuples with space for an OID, though we really only need the topmost node
1067  * to do so.  However, node types like Sort don't project new tuples but just
1068  * return their inputs, and in those cases the requirement propagates down
1069  * to the input node.  Eventually we might make this code smart enough to
1070  * recognize how far down the requirement really goes, but for now we just
1071  * make all plan nodes do the same thing if the top level forces the choice.
1072  *
1073  * We assume that estate->es_result_relation_info is already set up to
1074  * describe the target relation.  Note that in an UPDATE that spans an
1075  * inheritance tree, some of the target relations may have OIDs and some not.
1076  * We have to make the decisions on a per-relation basis as we initialize
1077  * each of the child plans of the topmost Append plan.
1078  *
1079  * SELECT INTO is even uglier, because we don't have the INTO relation's
1080  * descriptor available when this code runs; we have to look aside at a
1081  * flag set by InitPlan().
1082  */
1083 bool
1084 ExecContextForcesOids(PlanState *planstate, bool *hasoids)
1085 {
1086         if (planstate->state->es_select_into)
1087         {
1088                 *hasoids = planstate->state->es_into_oids;
1089                 return true;
1090         }
1091         else
1092         {
1093                 ResultRelInfo *ri = planstate->state->es_result_relation_info;
1094
1095                 if (ri != NULL)
1096                 {
1097                         Relation        rel = ri->ri_RelationDesc;
1098
1099                         if (rel != NULL)
1100                         {
1101                                 *hasoids = rel->rd_rel->relhasoids;
1102                                 return true;
1103                         }
1104                 }
1105         }
1106
1107         return false;
1108 }
1109
1110 /* ----------------------------------------------------------------
1111  *              ExecEndPlan
1112  *
1113  *              Cleans up the query plan -- closes files and frees up storage
1114  *
1115  * NOTE: we are no longer very worried about freeing storage per se
1116  * in this code; FreeExecutorState should be guaranteed to release all
1117  * memory that needs to be released.  What we are worried about doing
1118  * is closing relations and dropping buffer pins.  Thus, for example,
1119  * tuple tables must be cleared or dropped to ensure pins are released.
1120  * ----------------------------------------------------------------
1121  */
1122 static void
1123 ExecEndPlan(PlanState *planstate, EState *estate)
1124 {
1125         ResultRelInfo *resultRelInfo;
1126         int                     i;
1127         ListCell   *l;
1128
1129         /*
1130          * shut down any PlanQual processing we were doing
1131          */
1132         if (estate->es_evalPlanQual != NULL)
1133                 EndEvalPlanQual(estate);
1134
1135         /*
1136          * shut down the node-type-specific query processing
1137          */
1138         ExecEndNode(planstate);
1139
1140         /*
1141          * for subplans too
1142          */
1143         foreach(l, estate->es_subplanstates)
1144         {
1145                 PlanState  *subplanstate = (PlanState *) lfirst(l);
1146
1147                 ExecEndNode(subplanstate);
1148         }
1149
1150         /*
1151          * destroy the executor "tuple" table.
1152          */
1153         ExecDropTupleTable(estate->es_tupleTable, true);
1154         estate->es_tupleTable = NULL;
1155
1156         /*
1157          * close the result relation(s) if any, but hold locks until xact commit.
1158          */
1159         resultRelInfo = estate->es_result_relations;
1160         for (i = estate->es_num_result_relations; i > 0; i--)
1161         {
1162                 /* Close indices and then the relation itself */
1163                 ExecCloseIndices(resultRelInfo);
1164                 heap_close(resultRelInfo->ri_RelationDesc, NoLock);
1165                 resultRelInfo++;
1166         }
1167
1168         /*
1169          * likewise close any trigger target relations
1170          */
1171         foreach(l, estate->es_trig_target_relations)
1172         {
1173                 resultRelInfo = (ResultRelInfo *) lfirst(l);
1174                 /* Close indices and then the relation itself */
1175                 ExecCloseIndices(resultRelInfo);
1176                 heap_close(resultRelInfo->ri_RelationDesc, NoLock);
1177         }
1178
1179         /*
1180          * close any relations selected FOR UPDATE/FOR SHARE, again keeping locks
1181          */
1182         foreach(l, estate->es_rowMarks)
1183         {
1184                 ExecRowMark *erm = lfirst(l);
1185
1186                 heap_close(erm->relation, NoLock);
1187         }
1188 }
1189
1190 /* ----------------------------------------------------------------
1191  *              ExecutePlan
1192  *
1193  *              processes the query plan to retrieve 'numberTuples' tuples in the
1194  *              direction specified.
1195  *
1196  *              Retrieves all tuples if numberTuples is 0
1197  *
1198  *              result is either a slot containing the last tuple in the case
1199  *              of a SELECT or NULL otherwise.
1200  *
1201  * Note: the ctid attribute is a 'junk' attribute that is removed before the
1202  * user can see it
1203  * ----------------------------------------------------------------
1204  */
1205 static TupleTableSlot *
1206 ExecutePlan(EState *estate,
1207                         PlanState *planstate,
1208                         CmdType operation,
1209                         long numberTuples,
1210                         ScanDirection direction,
1211                         DestReceiver *dest)
1212 {
1213         JunkFilter *junkfilter;
1214         TupleTableSlot *planSlot;
1215         TupleTableSlot *slot;
1216         ItemPointer tupleid = NULL;
1217         ItemPointerData tuple_ctid;
1218         long            current_tuple_count;
1219         TupleTableSlot *result;
1220
1221         /*
1222          * initialize local variables
1223          */
1224         current_tuple_count = 0;
1225         result = NULL;
1226
1227         /*
1228          * Set the direction.
1229          */
1230         estate->es_direction = direction;
1231
1232         /*
1233          * Process BEFORE EACH STATEMENT triggers
1234          */
1235         switch (operation)
1236         {
1237                 case CMD_UPDATE:
1238                         ExecBSUpdateTriggers(estate, estate->es_result_relation_info);
1239                         break;
1240                 case CMD_DELETE:
1241                         ExecBSDeleteTriggers(estate, estate->es_result_relation_info);
1242                         break;
1243                 case CMD_INSERT:
1244                         ExecBSInsertTriggers(estate, estate->es_result_relation_info);
1245                         break;
1246                 default:
1247                         /* do nothing */
1248                         break;
1249         }
1250
1251         /*
1252          * Loop until we've processed the proper number of tuples from the plan.
1253          */
1254
1255         for (;;)
1256         {
1257                 /* Reset the per-output-tuple exprcontext */
1258                 ResetPerTupleExprContext(estate);
1259
1260                 /*
1261                  * Execute the plan and obtain a tuple
1262                  */
1263 lnext:  ;
1264                 if (estate->es_useEvalPlan)
1265                 {
1266                         planSlot = EvalPlanQualNext(estate);
1267                         if (TupIsNull(planSlot))
1268                                 planSlot = ExecProcNode(planstate);
1269                 }
1270                 else
1271                         planSlot = ExecProcNode(planstate);
1272
1273                 /*
1274                  * if the tuple is null, then we assume there is nothing more to
1275                  * process so we just return null...
1276                  */
1277                 if (TupIsNull(planSlot))
1278                 {
1279                         result = NULL;
1280                         break;
1281                 }
1282                 slot = planSlot;
1283
1284                 /*
1285                  * If we have a junk filter, then project a new tuple with the junk
1286                  * removed.
1287                  *
1288                  * Store this new "clean" tuple in the junkfilter's resultSlot.
1289                  * (Formerly, we stored it back over the "dirty" tuple, which is WRONG
1290                  * because that tuple slot has the wrong descriptor.)
1291                  *
1292                  * But first, extract all the junk information we need.
1293                  */
1294                 if ((junkfilter = estate->es_junkFilter) != NULL)
1295                 {
1296                         /*
1297                          * Process any FOR UPDATE or FOR SHARE locking requested.
1298                          */
1299                         if (estate->es_rowMarks != NIL)
1300                         {
1301                                 ListCell   *l;
1302
1303                 lmark:  ;
1304                                 foreach(l, estate->es_rowMarks)
1305                                 {
1306                                         ExecRowMark *erm = lfirst(l);
1307                                         Datum           datum;
1308                                         bool            isNull;
1309                                         HeapTupleData tuple;
1310                                         Buffer          buffer;
1311                                         ItemPointerData update_ctid;
1312                                         TransactionId update_xmax;
1313                                         TupleTableSlot *newSlot;
1314                                         LockTupleMode lockmode;
1315                                         HTSU_Result test;
1316
1317                                         datum = ExecGetJunkAttribute(slot,
1318                                                                                                  erm->ctidAttNo,
1319                                                                                                  &isNull);
1320                                         /* shouldn't ever get a null result... */
1321                                         if (isNull)
1322                                                 elog(ERROR, "ctid is NULL");
1323
1324                                         tuple.t_self = *((ItemPointer) DatumGetPointer(datum));
1325
1326                                         if (erm->forUpdate)
1327                                                 lockmode = LockTupleExclusive;
1328                                         else
1329                                                 lockmode = LockTupleShared;
1330
1331                                         test = heap_lock_tuple(erm->relation, &tuple, &buffer,
1332                                                                                    &update_ctid, &update_xmax,
1333                                                                                    estate->es_output_cid,
1334                                                                                    lockmode, erm->noWait);
1335                                         ReleaseBuffer(buffer);
1336                                         switch (test)
1337                                         {
1338                                                 case HeapTupleSelfUpdated:
1339                                                         /* treat it as deleted; do not process */
1340                                                         goto lnext;
1341
1342                                                 case HeapTupleMayBeUpdated:
1343                                                         break;
1344
1345                                                 case HeapTupleUpdated:
1346                                                         if (IsXactIsoLevelSerializable)
1347                                                                 ereport(ERROR,
1348                                                                  (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1349                                                                   errmsg("could not serialize access due to concurrent update")));
1350                                                         if (!ItemPointerEquals(&update_ctid,
1351                                                                                                    &tuple.t_self))
1352                                                         {
1353                                                                 /* updated, so look at updated version */
1354                                                                 newSlot = EvalPlanQual(estate,
1355                                                                                                            erm->rti,
1356                                                                                                            &update_ctid,
1357                                                                                                            update_xmax);
1358                                                                 if (!TupIsNull(newSlot))
1359                                                                 {
1360                                                                         slot = planSlot = newSlot;
1361                                                                         estate->es_useEvalPlan = true;
1362                                                                         goto lmark;
1363                                                                 }
1364                                                         }
1365
1366                                                         /*
1367                                                          * if tuple was deleted or PlanQual failed for
1368                                                          * updated tuple - we must not return this tuple!
1369                                                          */
1370                                                         goto lnext;
1371
1372                                                 default:
1373                                                         elog(ERROR, "unrecognized heap_lock_tuple status: %u",
1374                                                                  test);
1375                                                         return NULL;
1376                                         }
1377                                 }
1378                         }
1379
1380                         /*
1381                          * extract the 'ctid' junk attribute.
1382                          */
1383                         if (operation == CMD_UPDATE || operation == CMD_DELETE)
1384                         {
1385                                 Datum           datum;
1386                                 bool            isNull;
1387
1388                                 datum = ExecGetJunkAttribute(slot, junkfilter->jf_junkAttNo,
1389                                                                                          &isNull);
1390                                 /* shouldn't ever get a null result... */
1391                                 if (isNull)
1392                                         elog(ERROR, "ctid is NULL");
1393
1394                                 tupleid = (ItemPointer) DatumGetPointer(datum);
1395                                 tuple_ctid = *tupleid;  /* make sure we don't free the ctid!! */
1396                                 tupleid = &tuple_ctid;
1397                         }
1398
1399                         /*
1400                          * Create a new "clean" tuple with all junk attributes removed. We
1401                          * don't need to do this for DELETE, however (there will in fact
1402                          * be no non-junk attributes in a DELETE!)
1403                          */
1404                         if (operation != CMD_DELETE)
1405                                 slot = ExecFilterJunk(junkfilter, slot);
1406                 }
1407
1408                 /*
1409                  * now that we have a tuple, do the appropriate thing with it.. either
1410                  * return it to the user, add it to a relation someplace, delete it
1411                  * from a relation, or modify some of its attributes.
1412                  */
1413                 switch (operation)
1414                 {
1415                         case CMD_SELECT:
1416                                 ExecSelect(slot, dest, estate);
1417                                 result = slot;
1418                                 break;
1419
1420                         case CMD_INSERT:
1421                                 ExecInsert(slot, tupleid, planSlot, dest, estate);
1422                                 result = NULL;
1423                                 break;
1424
1425                         case CMD_DELETE:
1426                                 ExecDelete(tupleid, planSlot, dest, estate);
1427                                 result = NULL;
1428                                 break;
1429
1430                         case CMD_UPDATE:
1431                                 ExecUpdate(slot, tupleid, planSlot, dest, estate);
1432                                 result = NULL;
1433                                 break;
1434
1435                         default:
1436                                 elog(ERROR, "unrecognized operation code: %d",
1437                                          (int) operation);
1438                                 result = NULL;
1439                                 break;
1440                 }
1441
1442                 /*
1443                  * check our tuple count.. if we've processed the proper number then
1444                  * quit, else loop again and process more tuples.  Zero numberTuples
1445                  * means no limit.
1446                  */
1447                 current_tuple_count++;
1448                 if (numberTuples && numberTuples == current_tuple_count)
1449                         break;
1450         }
1451
1452         /*
1453          * Process AFTER EACH STATEMENT triggers
1454          */
1455         switch (operation)
1456         {
1457                 case CMD_UPDATE:
1458                         ExecASUpdateTriggers(estate, estate->es_result_relation_info);
1459                         break;
1460                 case CMD_DELETE:
1461                         ExecASDeleteTriggers(estate, estate->es_result_relation_info);
1462                         break;
1463                 case CMD_INSERT:
1464                         ExecASInsertTriggers(estate, estate->es_result_relation_info);
1465                         break;
1466                 default:
1467                         /* do nothing */
1468                         break;
1469         }
1470
1471         /*
1472          * here, result is either a slot containing a tuple in the case of a
1473          * SELECT or NULL otherwise.
1474          */
1475         return result;
1476 }
1477
1478 /* ----------------------------------------------------------------
1479  *              ExecSelect
1480  *
1481  *              SELECTs are easy.. we just pass the tuple to the appropriate
1482  *              output function.
1483  * ----------------------------------------------------------------
1484  */
1485 static void
1486 ExecSelect(TupleTableSlot *slot,
1487                    DestReceiver *dest,
1488                    EState *estate)
1489 {
1490         (*dest->receiveSlot) (slot, dest);
1491         IncrRetrieved();
1492         (estate->es_processed)++;
1493 }
1494
1495 /* ----------------------------------------------------------------
1496  *              ExecInsert
1497  *
1498  *              INSERTs are trickier.. we have to insert the tuple into
1499  *              the base relation and insert appropriate tuples into the
1500  *              index relations.
1501  * ----------------------------------------------------------------
1502  */
1503 static void
1504 ExecInsert(TupleTableSlot *slot,
1505                    ItemPointer tupleid,
1506                    TupleTableSlot *planSlot,
1507                    DestReceiver *dest,
1508                    EState *estate)
1509 {
1510         HeapTuple       tuple;
1511         ResultRelInfo *resultRelInfo;
1512         Relation        resultRelationDesc;
1513         Oid                     newId;
1514
1515         /*
1516          * get the heap tuple out of the tuple table slot, making sure we have a
1517          * writable copy
1518          */
1519         tuple = ExecMaterializeSlot(slot);
1520
1521         /*
1522          * get information on the (current) result relation
1523          */
1524         resultRelInfo = estate->es_result_relation_info;
1525         resultRelationDesc = resultRelInfo->ri_RelationDesc;
1526
1527         /* BEFORE ROW INSERT Triggers */
1528         if (resultRelInfo->ri_TrigDesc &&
1529                 resultRelInfo->ri_TrigDesc->n_before_row[TRIGGER_EVENT_INSERT] > 0)
1530         {
1531                 HeapTuple       newtuple;
1532
1533                 newtuple = ExecBRInsertTriggers(estate, resultRelInfo, tuple);
1534
1535                 if (newtuple == NULL)   /* "do nothing" */
1536                         return;
1537
1538                 if (newtuple != tuple)  /* modified by Trigger(s) */
1539                 {
1540                         /*
1541                          * Put the modified tuple into a slot for convenience of routines
1542                          * below.  We assume the tuple was allocated in per-tuple memory
1543                          * context, and therefore will go away by itself. The tuple table
1544                          * slot should not try to clear it.
1545                          */
1546                         TupleTableSlot *newslot = estate->es_trig_tuple_slot;
1547
1548                         if (newslot->tts_tupleDescriptor != slot->tts_tupleDescriptor)
1549                                 ExecSetSlotDescriptor(newslot, slot->tts_tupleDescriptor);
1550                         ExecStoreTuple(newtuple, newslot, InvalidBuffer, false);
1551                         slot = newslot;
1552                         tuple = newtuple;
1553                 }
1554         }
1555
1556         /*
1557          * Check the constraints of the tuple
1558          */
1559         if (resultRelationDesc->rd_att->constr)
1560                 ExecConstraints(resultRelInfo, slot, estate);
1561
1562         /*
1563          * insert the tuple
1564          *
1565          * Note: heap_insert returns the tid (location) of the new tuple in the
1566          * t_self field.
1567          */
1568         newId = heap_insert(resultRelationDesc, tuple,
1569                                                 estate->es_output_cid,
1570                                                 true, true);
1571
1572         IncrAppended();
1573         (estate->es_processed)++;
1574         estate->es_lastoid = newId;
1575         setLastTid(&(tuple->t_self));
1576
1577         /*
1578          * insert index entries for tuple
1579          */
1580         if (resultRelInfo->ri_NumIndices > 0)
1581                 ExecInsertIndexTuples(slot, &(tuple->t_self), estate, false);
1582
1583         /* AFTER ROW INSERT Triggers */
1584         ExecARInsertTriggers(estate, resultRelInfo, tuple);
1585
1586         /* Process RETURNING if present */
1587         if (resultRelInfo->ri_projectReturning)
1588                 ExecProcessReturning(resultRelInfo->ri_projectReturning,
1589                                                          slot, planSlot, dest);
1590 }
1591
1592 /* ----------------------------------------------------------------
1593  *              ExecDelete
1594  *
1595  *              DELETE is like UPDATE, except that we delete the tuple and no
1596  *              index modifications are needed
1597  * ----------------------------------------------------------------
1598  */
1599 static void
1600 ExecDelete(ItemPointer tupleid,
1601                    TupleTableSlot *planSlot,
1602                    DestReceiver *dest,
1603                    EState *estate)
1604 {
1605         ResultRelInfo *resultRelInfo;
1606         Relation        resultRelationDesc;
1607         HTSU_Result result;
1608         ItemPointerData update_ctid;
1609         TransactionId update_xmax;
1610
1611         /*
1612          * get information on the (current) result relation
1613          */
1614         resultRelInfo = estate->es_result_relation_info;
1615         resultRelationDesc = resultRelInfo->ri_RelationDesc;
1616
1617         /* BEFORE ROW DELETE Triggers */
1618         if (resultRelInfo->ri_TrigDesc &&
1619                 resultRelInfo->ri_TrigDesc->n_before_row[TRIGGER_EVENT_DELETE] > 0)
1620         {
1621                 bool            dodelete;
1622
1623                 dodelete = ExecBRDeleteTriggers(estate, resultRelInfo, tupleid);
1624
1625                 if (!dodelete)                  /* "do nothing" */
1626                         return;
1627         }
1628
1629         /*
1630          * delete the tuple
1631          *
1632          * Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check that
1633          * the row to be deleted is visible to that snapshot, and throw a can't-
1634          * serialize error if not.      This is a special-case behavior needed for
1635          * referential integrity updates in serializable transactions.
1636          */
1637 ldelete:;
1638         result = heap_delete(resultRelationDesc, tupleid,
1639                                                  &update_ctid, &update_xmax,
1640                                                  estate->es_output_cid,
1641                                                  estate->es_crosscheck_snapshot,
1642                                                  true /* wait for commit */ );
1643         switch (result)
1644         {
1645                 case HeapTupleSelfUpdated:
1646                         /* already deleted by self; nothing to do */
1647                         return;
1648
1649                 case HeapTupleMayBeUpdated:
1650                         break;
1651
1652                 case HeapTupleUpdated:
1653                         if (IsXactIsoLevelSerializable)
1654                                 ereport(ERROR,
1655                                                 (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1656                                                  errmsg("could not serialize access due to concurrent update")));
1657                         else if (!ItemPointerEquals(tupleid, &update_ctid))
1658                         {
1659                                 TupleTableSlot *epqslot;
1660
1661                                 epqslot = EvalPlanQual(estate,
1662                                                                            resultRelInfo->ri_RangeTableIndex,
1663                                                                            &update_ctid,
1664                                                                            update_xmax);
1665                                 if (!TupIsNull(epqslot))
1666                                 {
1667                                         *tupleid = update_ctid;
1668                                         goto ldelete;
1669                                 }
1670                         }
1671                         /* tuple already deleted; nothing to do */
1672                         return;
1673
1674                 default:
1675                         elog(ERROR, "unrecognized heap_delete status: %u", result);
1676                         return;
1677         }
1678
1679         IncrDeleted();
1680         (estate->es_processed)++;
1681
1682         /*
1683          * Note: Normally one would think that we have to delete index tuples
1684          * associated with the heap tuple now...
1685          *
1686          * ... but in POSTGRES, we have no need to do this because VACUUM will
1687          * take care of it later.  We can't delete index tuples immediately
1688          * anyway, since the tuple is still visible to other transactions.
1689          */
1690
1691         /* AFTER ROW DELETE Triggers */
1692         ExecARDeleteTriggers(estate, resultRelInfo, tupleid);
1693
1694         /* Process RETURNING if present */
1695         if (resultRelInfo->ri_projectReturning)
1696         {
1697                 /*
1698                  * We have to put the target tuple into a slot, which means first we
1699                  * gotta fetch it.      We can use the trigger tuple slot.
1700                  */
1701                 TupleTableSlot *slot = estate->es_trig_tuple_slot;
1702                 HeapTupleData deltuple;
1703                 Buffer          delbuffer;
1704
1705                 deltuple.t_self = *tupleid;
1706                 if (!heap_fetch(resultRelationDesc, SnapshotAny,
1707                                                 &deltuple, &delbuffer, false, NULL))
1708                         elog(ERROR, "failed to fetch deleted tuple for DELETE RETURNING");
1709
1710                 if (slot->tts_tupleDescriptor != RelationGetDescr(resultRelationDesc))
1711                         ExecSetSlotDescriptor(slot, RelationGetDescr(resultRelationDesc));
1712                 ExecStoreTuple(&deltuple, slot, InvalidBuffer, false);
1713
1714                 ExecProcessReturning(resultRelInfo->ri_projectReturning,
1715                                                          slot, planSlot, dest);
1716
1717                 ExecClearTuple(slot);
1718                 ReleaseBuffer(delbuffer);
1719         }
1720 }
1721
1722 /* ----------------------------------------------------------------
1723  *              ExecUpdate
1724  *
1725  *              note: we can't run UPDATE queries with transactions
1726  *              off because UPDATEs are actually INSERTs and our
1727  *              scan will mistakenly loop forever, updating the tuple
1728  *              it just inserted..      This should be fixed but until it
1729  *              is, we don't want to get stuck in an infinite loop
1730  *              which corrupts your database..
1731  * ----------------------------------------------------------------
1732  */
1733 static void
1734 ExecUpdate(TupleTableSlot *slot,
1735                    ItemPointer tupleid,
1736                    TupleTableSlot *planSlot,
1737                    DestReceiver *dest,
1738                    EState *estate)
1739 {
1740         HeapTuple       tuple;
1741         ResultRelInfo *resultRelInfo;
1742         Relation        resultRelationDesc;
1743         HTSU_Result result;
1744         ItemPointerData update_ctid;
1745         TransactionId update_xmax;
1746
1747         /*
1748          * abort the operation if not running transactions
1749          */
1750         if (IsBootstrapProcessingMode())
1751                 elog(ERROR, "cannot UPDATE during bootstrap");
1752
1753         /*
1754          * get the heap tuple out of the tuple table slot, making sure we have a
1755          * writable copy
1756          */
1757         tuple = ExecMaterializeSlot(slot);
1758
1759         /*
1760          * get information on the (current) result relation
1761          */
1762         resultRelInfo = estate->es_result_relation_info;
1763         resultRelationDesc = resultRelInfo->ri_RelationDesc;
1764
1765         /* BEFORE ROW UPDATE Triggers */
1766         if (resultRelInfo->ri_TrigDesc &&
1767                 resultRelInfo->ri_TrigDesc->n_before_row[TRIGGER_EVENT_UPDATE] > 0)
1768         {
1769                 HeapTuple       newtuple;
1770
1771                 newtuple = ExecBRUpdateTriggers(estate, resultRelInfo,
1772                                                                                 tupleid, tuple);
1773
1774                 if (newtuple == NULL)   /* "do nothing" */
1775                         return;
1776
1777                 if (newtuple != tuple)  /* modified by Trigger(s) */
1778                 {
1779                         /*
1780                          * Put the modified tuple into a slot for convenience of routines
1781                          * below.  We assume the tuple was allocated in per-tuple memory
1782                          * context, and therefore will go away by itself. The tuple table
1783                          * slot should not try to clear it.
1784                          */
1785                         TupleTableSlot *newslot = estate->es_trig_tuple_slot;
1786
1787                         if (newslot->tts_tupleDescriptor != slot->tts_tupleDescriptor)
1788                                 ExecSetSlotDescriptor(newslot, slot->tts_tupleDescriptor);
1789                         ExecStoreTuple(newtuple, newslot, InvalidBuffer, false);
1790                         slot = newslot;
1791                         tuple = newtuple;
1792                 }
1793         }
1794
1795         /*
1796          * Check the constraints of the tuple
1797          *
1798          * If we generate a new candidate tuple after EvalPlanQual testing, we
1799          * must loop back here and recheck constraints.  (We don't need to redo
1800          * triggers, however.  If there are any BEFORE triggers then trigger.c
1801          * will have done heap_lock_tuple to lock the correct tuple, so there's no
1802          * need to do them again.)
1803          */
1804 lreplace:;
1805         if (resultRelationDesc->rd_att->constr)
1806                 ExecConstraints(resultRelInfo, slot, estate);
1807
1808         /*
1809          * replace the heap tuple
1810          *
1811          * Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check that
1812          * the row to be updated is visible to that snapshot, and throw a can't-
1813          * serialize error if not.      This is a special-case behavior needed for
1814          * referential integrity updates in serializable transactions.
1815          */
1816         result = heap_update(resultRelationDesc, tupleid, tuple,
1817                                                  &update_ctid, &update_xmax,
1818                                                  estate->es_output_cid,
1819                                                  estate->es_crosscheck_snapshot,
1820                                                  true /* wait for commit */ );
1821         switch (result)
1822         {
1823                 case HeapTupleSelfUpdated:
1824                         /* already deleted by self; nothing to do */
1825                         return;
1826
1827                 case HeapTupleMayBeUpdated:
1828                         break;
1829
1830                 case HeapTupleUpdated:
1831                         if (IsXactIsoLevelSerializable)
1832                                 ereport(ERROR,
1833                                                 (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1834                                                  errmsg("could not serialize access due to concurrent update")));
1835                         else if (!ItemPointerEquals(tupleid, &update_ctid))
1836                         {
1837                                 TupleTableSlot *epqslot;
1838
1839                                 epqslot = EvalPlanQual(estate,
1840                                                                            resultRelInfo->ri_RangeTableIndex,
1841                                                                            &update_ctid,
1842                                                                            update_xmax);
1843                                 if (!TupIsNull(epqslot))
1844                                 {
1845                                         *tupleid = update_ctid;
1846                                         slot = ExecFilterJunk(estate->es_junkFilter, epqslot);
1847                                         tuple = ExecMaterializeSlot(slot);
1848                                         goto lreplace;
1849                                 }
1850                         }
1851                         /* tuple already deleted; nothing to do */
1852                         return;
1853
1854                 default:
1855                         elog(ERROR, "unrecognized heap_update status: %u", result);
1856                         return;
1857         }
1858
1859         IncrReplaced();
1860         (estate->es_processed)++;
1861
1862         /*
1863          * Note: instead of having to update the old index tuples associated with
1864          * the heap tuple, all we do is form and insert new index tuples. This is
1865          * because UPDATEs are actually DELETEs and INSERTs, and index tuple
1866          * deletion is done later by VACUUM (see notes in ExecDelete).  All we do
1867          * here is insert new index tuples.  -cim 9/27/89
1868          */
1869
1870         /*
1871          * insert index entries for tuple
1872          *
1873          * Note: heap_update returns the tid (location) of the new tuple in the
1874          * t_self field.
1875          *
1876          * If it's a HOT update, we mustn't insert new index entries.
1877          */
1878         if (resultRelInfo->ri_NumIndices > 0 && !HeapTupleIsHeapOnly(tuple))
1879                 ExecInsertIndexTuples(slot, &(tuple->t_self), estate, false);
1880
1881         /* AFTER ROW UPDATE Triggers */
1882         ExecARUpdateTriggers(estate, resultRelInfo, tupleid, tuple);
1883
1884         /* Process RETURNING if present */
1885         if (resultRelInfo->ri_projectReturning)
1886                 ExecProcessReturning(resultRelInfo->ri_projectReturning,
1887                                                          slot, planSlot, dest);
1888 }
1889
1890 /*
1891  * ExecRelCheck --- check that tuple meets constraints for result relation
1892  */
1893 static const char *
1894 ExecRelCheck(ResultRelInfo *resultRelInfo,
1895                          TupleTableSlot *slot, EState *estate)
1896 {
1897         Relation        rel = resultRelInfo->ri_RelationDesc;
1898         int                     ncheck = rel->rd_att->constr->num_check;
1899         ConstrCheck *check = rel->rd_att->constr->check;
1900         ExprContext *econtext;
1901         MemoryContext oldContext;
1902         List       *qual;
1903         int                     i;
1904
1905         /*
1906          * If first time through for this result relation, build expression
1907          * nodetrees for rel's constraint expressions.  Keep them in the per-query
1908          * memory context so they'll survive throughout the query.
1909          */
1910         if (resultRelInfo->ri_ConstraintExprs == NULL)
1911         {
1912                 oldContext = MemoryContextSwitchTo(estate->es_query_cxt);
1913                 resultRelInfo->ri_ConstraintExprs =
1914                         (List **) palloc(ncheck * sizeof(List *));
1915                 for (i = 0; i < ncheck; i++)
1916                 {
1917                         /* ExecQual wants implicit-AND form */
1918                         qual = make_ands_implicit(stringToNode(check[i].ccbin));
1919                         resultRelInfo->ri_ConstraintExprs[i] = (List *)
1920                                 ExecPrepareExpr((Expr *) qual, estate);
1921                 }
1922                 MemoryContextSwitchTo(oldContext);
1923         }
1924
1925         /*
1926          * We will use the EState's per-tuple context for evaluating constraint
1927          * expressions (creating it if it's not already there).
1928          */
1929         econtext = GetPerTupleExprContext(estate);
1930
1931         /* Arrange for econtext's scan tuple to be the tuple under test */
1932         econtext->ecxt_scantuple = slot;
1933
1934         /* And evaluate the constraints */
1935         for (i = 0; i < ncheck; i++)
1936         {
1937                 qual = resultRelInfo->ri_ConstraintExprs[i];
1938
1939                 /*
1940                  * NOTE: SQL92 specifies that a NULL result from a constraint
1941                  * expression is not to be treated as a failure.  Therefore, tell
1942                  * ExecQual to return TRUE for NULL.
1943                  */
1944                 if (!ExecQual(qual, econtext, true))
1945                         return check[i].ccname;
1946         }
1947
1948         /* NULL result means no error */
1949         return NULL;
1950 }
1951
1952 void
1953 ExecConstraints(ResultRelInfo *resultRelInfo,
1954                                 TupleTableSlot *slot, EState *estate)
1955 {
1956         Relation        rel = resultRelInfo->ri_RelationDesc;
1957         TupleConstr *constr = rel->rd_att->constr;
1958
1959         Assert(constr);
1960
1961         if (constr->has_not_null)
1962         {
1963                 int                     natts = rel->rd_att->natts;
1964                 int                     attrChk;
1965
1966                 for (attrChk = 1; attrChk <= natts; attrChk++)
1967                 {
1968                         if (rel->rd_att->attrs[attrChk - 1]->attnotnull &&
1969                                 slot_attisnull(slot, attrChk))
1970                                 ereport(ERROR,
1971                                                 (errcode(ERRCODE_NOT_NULL_VIOLATION),
1972                                                  errmsg("null value in column \"%s\" violates not-null constraint",
1973                                                 NameStr(rel->rd_att->attrs[attrChk - 1]->attname))));
1974                 }
1975         }
1976
1977         if (constr->num_check > 0)
1978         {
1979                 const char *failed;
1980
1981                 if ((failed = ExecRelCheck(resultRelInfo, slot, estate)) != NULL)
1982                         ereport(ERROR,
1983                                         (errcode(ERRCODE_CHECK_VIOLATION),
1984                                          errmsg("new row for relation \"%s\" violates check constraint \"%s\"",
1985                                                         RelationGetRelationName(rel), failed)));
1986         }
1987 }
1988
1989 /*
1990  * ExecProcessReturning --- evaluate a RETURNING list and send to dest
1991  *
1992  * projectReturning: RETURNING projection info for current result rel
1993  * tupleSlot: slot holding tuple actually inserted/updated/deleted
1994  * planSlot: slot holding tuple returned by top plan node
1995  * dest: where to send the output
1996  */
1997 static void
1998 ExecProcessReturning(ProjectionInfo *projectReturning,
1999                                          TupleTableSlot *tupleSlot,
2000                                          TupleTableSlot *planSlot,
2001                                          DestReceiver *dest)
2002 {
2003         ExprContext *econtext = projectReturning->pi_exprContext;
2004         TupleTableSlot *retSlot;
2005
2006         /*
2007          * Reset per-tuple memory context to free any expression evaluation
2008          * storage allocated in the previous cycle.
2009          */
2010         ResetExprContext(econtext);
2011
2012         /* Make tuple and any needed join variables available to ExecProject */
2013         econtext->ecxt_scantuple = tupleSlot;
2014         econtext->ecxt_outertuple = planSlot;
2015
2016         /* Compute the RETURNING expressions */
2017         retSlot = ExecProject(projectReturning, NULL);
2018
2019         /* Send to dest */
2020         (*dest->receiveSlot) (retSlot, dest);
2021
2022         ExecClearTuple(retSlot);
2023 }
2024
2025 /*
2026  * Check a modified tuple to see if we want to process its updated version
2027  * under READ COMMITTED rules.
2028  *
2029  * See backend/executor/README for some info about how this works.
2030  *
2031  *      estate - executor state data
2032  *      rti - rangetable index of table containing tuple
2033  *      *tid - t_ctid from the outdated tuple (ie, next updated version)
2034  *      priorXmax - t_xmax from the outdated tuple
2035  *
2036  * *tid is also an output parameter: it's modified to hold the TID of the
2037  * latest version of the tuple (note this may be changed even on failure)
2038  *
2039  * Returns a slot containing the new candidate update/delete tuple, or
2040  * NULL if we determine we shouldn't process the row.
2041  */
2042 TupleTableSlot *
2043 EvalPlanQual(EState *estate, Index rti,
2044                          ItemPointer tid, TransactionId priorXmax)
2045 {
2046         evalPlanQual *epq;
2047         EState     *epqstate;
2048         Relation        relation;
2049         HeapTupleData tuple;
2050         HeapTuple       copyTuple = NULL;
2051         SnapshotData SnapshotDirty;
2052         bool            endNode;
2053
2054         Assert(rti != 0);
2055
2056         /*
2057          * find relation containing target tuple
2058          */
2059         if (estate->es_result_relation_info != NULL &&
2060                 estate->es_result_relation_info->ri_RangeTableIndex == rti)
2061                 relation = estate->es_result_relation_info->ri_RelationDesc;
2062         else
2063         {
2064                 ListCell   *l;
2065
2066                 relation = NULL;
2067                 foreach(l, estate->es_rowMarks)
2068                 {
2069                         if (((ExecRowMark *) lfirst(l))->rti == rti)
2070                         {
2071                                 relation = ((ExecRowMark *) lfirst(l))->relation;
2072                                 break;
2073                         }
2074                 }
2075                 if (relation == NULL)
2076                         elog(ERROR, "could not find RowMark for RT index %u", rti);
2077         }
2078
2079         /*
2080          * fetch tid tuple
2081          *
2082          * Loop here to deal with updated or busy tuples
2083          */
2084         InitDirtySnapshot(SnapshotDirty);
2085         tuple.t_self = *tid;
2086         for (;;)
2087         {
2088                 Buffer          buffer;
2089
2090                 if (heap_fetch(relation, &SnapshotDirty, &tuple, &buffer, true, NULL))
2091                 {
2092                         /*
2093                          * If xmin isn't what we're expecting, the slot must have been
2094                          * recycled and reused for an unrelated tuple.  This implies that
2095                          * the latest version of the row was deleted, so we need do
2096                          * nothing.  (Should be safe to examine xmin without getting
2097                          * buffer's content lock, since xmin never changes in an existing
2098                          * tuple.)
2099                          */
2100                         if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data),
2101                                                                          priorXmax))
2102                         {
2103                                 ReleaseBuffer(buffer);
2104                                 return NULL;
2105                         }
2106
2107                         /* otherwise xmin should not be dirty... */
2108                         if (TransactionIdIsValid(SnapshotDirty.xmin))
2109                                 elog(ERROR, "t_xmin is uncommitted in tuple to be updated");
2110
2111                         /*
2112                          * If tuple is being updated by other transaction then we have to
2113                          * wait for its commit/abort.
2114                          */
2115                         if (TransactionIdIsValid(SnapshotDirty.xmax))
2116                         {
2117                                 ReleaseBuffer(buffer);
2118                                 XactLockTableWait(SnapshotDirty.xmax);
2119                                 continue;               /* loop back to repeat heap_fetch */
2120                         }
2121
2122                         /*
2123                          * If tuple was inserted by our own transaction, we have to check
2124                          * cmin against es_output_cid: cmin >= current CID means our
2125                          * command cannot see the tuple, so we should ignore it.  Without
2126                          * this we are open to the "Halloween problem" of indefinitely
2127                          * re-updating the same tuple. (We need not check cmax because
2128                          * HeapTupleSatisfiesDirty will consider a tuple deleted by our
2129                          * transaction dead, regardless of cmax.)  We just checked that
2130                          * priorXmax == xmin, so we can test that variable instead of
2131                          * doing HeapTupleHeaderGetXmin again.
2132                          */
2133                         if (TransactionIdIsCurrentTransactionId(priorXmax) &&
2134                                 HeapTupleHeaderGetCmin(tuple.t_data) >= estate->es_output_cid)
2135                         {
2136                                 ReleaseBuffer(buffer);
2137                                 return NULL;
2138                         }
2139
2140                         /*
2141                          * We got tuple - now copy it for use by recheck query.
2142                          */
2143                         copyTuple = heap_copytuple(&tuple);
2144                         ReleaseBuffer(buffer);
2145                         break;
2146                 }
2147
2148                 /*
2149                  * If the referenced slot was actually empty, the latest version of
2150                  * the row must have been deleted, so we need do nothing.
2151                  */
2152                 if (tuple.t_data == NULL)
2153                 {
2154                         ReleaseBuffer(buffer);
2155                         return NULL;
2156                 }
2157
2158                 /*
2159                  * As above, if xmin isn't what we're expecting, do nothing.
2160                  */
2161                 if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data),
2162                                                                  priorXmax))
2163                 {
2164                         ReleaseBuffer(buffer);
2165                         return NULL;
2166                 }
2167
2168                 /*
2169                  * If we get here, the tuple was found but failed SnapshotDirty.
2170                  * Assuming the xmin is either a committed xact or our own xact (as it
2171                  * certainly should be if we're trying to modify the tuple), this must
2172                  * mean that the row was updated or deleted by either a committed xact
2173                  * or our own xact.  If it was deleted, we can ignore it; if it was
2174                  * updated then chain up to the next version and repeat the whole
2175                  * test.
2176                  *
2177                  * As above, it should be safe to examine xmax and t_ctid without the
2178                  * buffer content lock, because they can't be changing.
2179                  */
2180                 if (ItemPointerEquals(&tuple.t_self, &tuple.t_data->t_ctid))
2181                 {
2182                         /* deleted, so forget about it */
2183                         ReleaseBuffer(buffer);
2184                         return NULL;
2185                 }
2186
2187                 /* updated, so look at the updated row */
2188                 tuple.t_self = tuple.t_data->t_ctid;
2189                 /* updated row should have xmin matching this xmax */
2190                 priorXmax = HeapTupleHeaderGetXmax(tuple.t_data);
2191                 ReleaseBuffer(buffer);
2192                 /* loop back to fetch next in chain */
2193         }
2194
2195         /*
2196          * For UPDATE/DELETE we have to return tid of actual row we're executing
2197          * PQ for.
2198          */
2199         *tid = tuple.t_self;
2200
2201         /*
2202          * Need to run a recheck subquery.      Find or create a PQ stack entry.
2203          */
2204         epq = estate->es_evalPlanQual;
2205         endNode = true;
2206
2207         if (epq != NULL && epq->rti == 0)
2208         {
2209                 /* Top PQ stack entry is idle, so re-use it */
2210                 Assert(!(estate->es_useEvalPlan) && epq->next == NULL);
2211                 epq->rti = rti;
2212                 endNode = false;
2213         }
2214
2215         /*
2216          * If this is request for another RTE - Ra, - then we have to check wasn't
2217          * PlanQual requested for Ra already and if so then Ra' row was updated
2218          * again and we have to re-start old execution for Ra and forget all what
2219          * we done after Ra was suspended. Cool? -:))
2220          */
2221         if (epq != NULL && epq->rti != rti &&
2222                 epq->estate->es_evTuple[rti - 1] != NULL)
2223         {
2224                 do
2225                 {
2226                         evalPlanQual *oldepq;
2227
2228                         /* stop execution */
2229                         EvalPlanQualStop(epq);
2230                         /* pop previous PlanQual from the stack */
2231                         oldepq = epq->next;
2232                         Assert(oldepq && oldepq->rti != 0);
2233                         /* push current PQ to freePQ stack */
2234                         oldepq->free = epq;
2235                         epq = oldepq;
2236                         estate->es_evalPlanQual = epq;
2237                 } while (epq->rti != rti);
2238         }
2239
2240         /*
2241          * If we are requested for another RTE then we have to suspend execution
2242          * of current PlanQual and start execution for new one.
2243          */
2244         if (epq == NULL || epq->rti != rti)
2245         {
2246                 /* try to reuse plan used previously */
2247                 evalPlanQual *newepq = (epq != NULL) ? epq->free : NULL;
2248
2249                 if (newepq == NULL)             /* first call or freePQ stack is empty */
2250                 {
2251                         newepq = (evalPlanQual *) palloc0(sizeof(evalPlanQual));
2252                         newepq->free = NULL;
2253                         newepq->estate = NULL;
2254                         newepq->planstate = NULL;
2255                 }
2256                 else
2257                 {
2258                         /* recycle previously used PlanQual */
2259                         Assert(newepq->estate == NULL);
2260                         epq->free = NULL;
2261                 }
2262                 /* push current PQ to the stack */
2263                 newepq->next = epq;
2264                 epq = newepq;
2265                 estate->es_evalPlanQual = epq;
2266                 epq->rti = rti;
2267                 endNode = false;
2268         }
2269
2270         Assert(epq->rti == rti);
2271
2272         /*
2273          * Ok - we're requested for the same RTE.  Unfortunately we still have to
2274          * end and restart execution of the plan, because ExecReScan wouldn't
2275          * ensure that upper plan nodes would reset themselves.  We could make
2276          * that work if insertion of the target tuple were integrated with the
2277          * Param mechanism somehow, so that the upper plan nodes know that their
2278          * children's outputs have changed.
2279          *
2280          * Note that the stack of free evalPlanQual nodes is quite useless at the
2281          * moment, since it only saves us from pallocing/releasing the
2282          * evalPlanQual nodes themselves.  But it will be useful once we implement
2283          * ReScan instead of end/restart for re-using PlanQual nodes.
2284          */
2285         if (endNode)
2286         {
2287                 /* stop execution */
2288                 EvalPlanQualStop(epq);
2289         }
2290
2291         /*
2292          * Initialize new recheck query.
2293          *
2294          * Note: if we were re-using PlanQual plans via ExecReScan, we'd need to
2295          * instead copy down changeable state from the top plan (including
2296          * es_result_relation_info, es_junkFilter) and reset locally changeable
2297          * state in the epq (including es_param_exec_vals, es_evTupleNull).
2298          */
2299         EvalPlanQualStart(epq, estate, epq->next);
2300
2301         /*
2302          * free old RTE' tuple, if any, and store target tuple where relation's
2303          * scan node will see it
2304          */
2305         epqstate = epq->estate;
2306         if (epqstate->es_evTuple[rti - 1] != NULL)
2307                 heap_freetuple(epqstate->es_evTuple[rti - 1]);
2308         epqstate->es_evTuple[rti - 1] = copyTuple;
2309
2310         return EvalPlanQualNext(estate);
2311 }
2312
2313 static TupleTableSlot *
2314 EvalPlanQualNext(EState *estate)
2315 {
2316         evalPlanQual *epq = estate->es_evalPlanQual;
2317         MemoryContext oldcontext;
2318         TupleTableSlot *slot;
2319
2320         Assert(epq->rti != 0);
2321
2322 lpqnext:;
2323         oldcontext = MemoryContextSwitchTo(epq->estate->es_query_cxt);
2324         slot = ExecProcNode(epq->planstate);
2325         MemoryContextSwitchTo(oldcontext);
2326
2327         /*
2328          * No more tuples for this PQ. Continue previous one.
2329          */
2330         if (TupIsNull(slot))
2331         {
2332                 evalPlanQual *oldepq;
2333
2334                 /* stop execution */
2335                 EvalPlanQualStop(epq);
2336                 /* pop old PQ from the stack */
2337                 oldepq = epq->next;
2338                 if (oldepq == NULL)
2339                 {
2340                         /* this is the first (oldest) PQ - mark as free */
2341                         epq->rti = 0;
2342                         estate->es_useEvalPlan = false;
2343                         /* and continue Query execution */
2344                         return NULL;
2345                 }
2346                 Assert(oldepq->rti != 0);
2347                 /* push current PQ to freePQ stack */
2348                 oldepq->free = epq;
2349                 epq = oldepq;
2350                 estate->es_evalPlanQual = epq;
2351                 goto lpqnext;
2352         }
2353
2354         return slot;
2355 }
2356
2357 static void
2358 EndEvalPlanQual(EState *estate)
2359 {
2360         evalPlanQual *epq = estate->es_evalPlanQual;
2361
2362         if (epq->rti == 0)                      /* plans already shutdowned */
2363         {
2364                 Assert(epq->next == NULL);
2365                 return;
2366         }
2367
2368         for (;;)
2369         {
2370                 evalPlanQual *oldepq;
2371
2372                 /* stop execution */
2373                 EvalPlanQualStop(epq);
2374                 /* pop old PQ from the stack */
2375                 oldepq = epq->next;
2376                 if (oldepq == NULL)
2377                 {
2378                         /* this is the first (oldest) PQ - mark as free */
2379                         epq->rti = 0;
2380                         estate->es_useEvalPlan = false;
2381                         break;
2382                 }
2383                 Assert(oldepq->rti != 0);
2384                 /* push current PQ to freePQ stack */
2385                 oldepq->free = epq;
2386                 epq = oldepq;
2387                 estate->es_evalPlanQual = epq;
2388         }
2389 }
2390
2391 /*
2392  * Start execution of one level of PlanQual.
2393  *
2394  * This is a cut-down version of ExecutorStart(): we copy some state from
2395  * the top-level estate rather than initializing it fresh.
2396  */
2397 static void
2398 EvalPlanQualStart(evalPlanQual *epq, EState *estate, evalPlanQual *priorepq)
2399 {
2400         EState     *epqstate;
2401         int                     rtsize;
2402         MemoryContext oldcontext;
2403         ListCell   *l;
2404
2405         rtsize = list_length(estate->es_range_table);
2406
2407         epq->estate = epqstate = CreateExecutorState();
2408
2409         oldcontext = MemoryContextSwitchTo(epqstate->es_query_cxt);
2410
2411         /*
2412          * The epqstates share the top query's copy of unchanging state such as
2413          * the snapshot, rangetable, result-rel info, and external Param info.
2414          * They need their own copies of local state, including a tuple table,
2415          * es_param_exec_vals, etc.
2416          */
2417         epqstate->es_direction = ForwardScanDirection;
2418         epqstate->es_snapshot = estate->es_snapshot;
2419         epqstate->es_crosscheck_snapshot = estate->es_crosscheck_snapshot;
2420         epqstate->es_range_table = estate->es_range_table;
2421         epqstate->es_output_cid = estate->es_output_cid;
2422         epqstate->es_result_relations = estate->es_result_relations;
2423         epqstate->es_num_result_relations = estate->es_num_result_relations;
2424         epqstate->es_result_relation_info = estate->es_result_relation_info;
2425         epqstate->es_junkFilter = estate->es_junkFilter;
2426         /* es_trig_target_relations must NOT be copied */
2427         epqstate->es_into_relation_descriptor = estate->es_into_relation_descriptor;
2428         epqstate->es_into_relation_use_wal = estate->es_into_relation_use_wal;
2429         epqstate->es_param_list_info = estate->es_param_list_info;
2430         if (estate->es_plannedstmt->nParamExec > 0)
2431                 epqstate->es_param_exec_vals = (ParamExecData *)
2432                         palloc0(estate->es_plannedstmt->nParamExec * sizeof(ParamExecData));
2433         epqstate->es_rowMarks = estate->es_rowMarks;
2434         epqstate->es_instrument = estate->es_instrument;
2435         epqstate->es_select_into = estate->es_select_into;
2436         epqstate->es_into_oids = estate->es_into_oids;
2437         epqstate->es_plannedstmt = estate->es_plannedstmt;
2438
2439         /*
2440          * Each epqstate must have its own es_evTupleNull state, but all the stack
2441          * entries share es_evTuple state.      This allows sub-rechecks to inherit
2442          * the value being examined by an outer recheck.
2443          */
2444         epqstate->es_evTupleNull = (bool *) palloc0(rtsize * sizeof(bool));
2445         if (priorepq == NULL)
2446                 /* first PQ stack entry */
2447                 epqstate->es_evTuple = (HeapTuple *)
2448                         palloc0(rtsize * sizeof(HeapTuple));
2449         else
2450                 /* later stack entries share the same storage */
2451                 epqstate->es_evTuple = priorepq->estate->es_evTuple;
2452
2453         /*
2454          * Create sub-tuple-table; we needn't redo the CountSlots work though.
2455          */
2456         epqstate->es_tupleTable =
2457                 ExecCreateTupleTable(estate->es_tupleTable->size);
2458
2459         /*
2460          * Initialize private state information for each SubPlan.  We must do this
2461          * before running ExecInitNode on the main query tree, since
2462          * ExecInitSubPlan expects to be able to find these entries.
2463          */
2464         Assert(epqstate->es_subplanstates == NIL);
2465         foreach(l, estate->es_plannedstmt->subplans)
2466         {
2467                 Plan       *subplan = (Plan *) lfirst(l);
2468                 PlanState  *subplanstate;
2469
2470                 subplanstate = ExecInitNode(subplan, epqstate, 0);
2471
2472                 epqstate->es_subplanstates = lappend(epqstate->es_subplanstates,
2473                                                                                          subplanstate);
2474         }
2475
2476         /*
2477          * Initialize the private state information for all the nodes in the query
2478          * tree.  This opens files, allocates storage and leaves us ready to start
2479          * processing tuples.
2480          */
2481         epq->planstate = ExecInitNode(estate->es_plannedstmt->planTree, epqstate, 0);
2482
2483         MemoryContextSwitchTo(oldcontext);
2484 }
2485
2486 /*
2487  * End execution of one level of PlanQual.
2488  *
2489  * This is a cut-down version of ExecutorEnd(); basically we want to do most
2490  * of the normal cleanup, but *not* close result relations (which we are
2491  * just sharing from the outer query).  We do, however, have to close any
2492  * trigger target relations that got opened, since those are not shared.
2493  */
2494 static void
2495 EvalPlanQualStop(evalPlanQual *epq)
2496 {
2497         EState     *epqstate = epq->estate;
2498         MemoryContext oldcontext;
2499         ListCell   *l;
2500
2501         oldcontext = MemoryContextSwitchTo(epqstate->es_query_cxt);
2502
2503         ExecEndNode(epq->planstate);
2504
2505         foreach(l, epqstate->es_subplanstates)
2506         {
2507                 PlanState  *subplanstate = (PlanState *) lfirst(l);
2508
2509                 ExecEndNode(subplanstate);
2510         }
2511
2512         ExecDropTupleTable(epqstate->es_tupleTable, true);
2513         epqstate->es_tupleTable = NULL;
2514
2515         if (epqstate->es_evTuple[epq->rti - 1] != NULL)
2516         {
2517                 heap_freetuple(epqstate->es_evTuple[epq->rti - 1]);
2518                 epqstate->es_evTuple[epq->rti - 1] = NULL;
2519         }
2520
2521         foreach(l, epqstate->es_trig_target_relations)
2522         {
2523                 ResultRelInfo *resultRelInfo = (ResultRelInfo *) lfirst(l);
2524
2525                 /* Close indices and then the relation itself */
2526                 ExecCloseIndices(resultRelInfo);
2527                 heap_close(resultRelInfo->ri_RelationDesc, NoLock);
2528         }
2529
2530         MemoryContextSwitchTo(oldcontext);
2531
2532         FreeExecutorState(epqstate);
2533
2534         epq->estate = NULL;
2535         epq->planstate = NULL;
2536 }
2537
2538 /*
2539  * ExecGetActivePlanTree --- get the active PlanState tree from a QueryDesc
2540  *
2541  * Ordinarily this is just the one mentioned in the QueryDesc, but if we
2542  * are looking at a row returned by the EvalPlanQual machinery, we need
2543  * to look at the subsidiary state instead.
2544  */
2545 PlanState *
2546 ExecGetActivePlanTree(QueryDesc *queryDesc)
2547 {
2548         EState     *estate = queryDesc->estate;
2549
2550         if (estate && estate->es_useEvalPlan && estate->es_evalPlanQual != NULL)
2551                 return estate->es_evalPlanQual->planstate;
2552         else
2553                 return queryDesc->planstate;
2554 }
2555
2556
2557 /*
2558  * Support for SELECT INTO (a/k/a CREATE TABLE AS)
2559  *
2560  * We implement SELECT INTO by diverting SELECT's normal output with
2561  * a specialized DestReceiver type.
2562  *
2563  * TODO: remove some of the INTO-specific cruft from EState, and keep
2564  * it in the DestReceiver instead.
2565  */
2566
2567 typedef struct
2568 {
2569         DestReceiver pub;                       /* publicly-known function pointers */
2570         EState     *estate;                     /* EState we are working with */
2571 } DR_intorel;
2572
2573 /*
2574  * OpenIntoRel --- actually create the SELECT INTO target relation
2575  *
2576  * This also replaces QueryDesc->dest with the special DestReceiver for
2577  * SELECT INTO.  We assume that the correct result tuple type has already
2578  * been placed in queryDesc->tupDesc.
2579  */
2580 static void
2581 OpenIntoRel(QueryDesc *queryDesc)
2582 {
2583         IntoClause *into = queryDesc->plannedstmt->intoClause;
2584         EState     *estate = queryDesc->estate;
2585         Relation        intoRelationDesc;
2586         char       *intoName;
2587         Oid                     namespaceId;
2588         Oid                     tablespaceId;
2589         Datum           reloptions;
2590         AclResult       aclresult;
2591         Oid                     intoRelationId;
2592         TupleDesc       tupdesc;
2593         DR_intorel *myState;
2594
2595         Assert(into);
2596
2597         /*
2598          * Check consistency of arguments
2599          */
2600         if (into->onCommit != ONCOMMIT_NOOP && !into->rel->istemp)
2601                 ereport(ERROR,
2602                                 (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
2603                                  errmsg("ON COMMIT can only be used on temporary tables")));
2604
2605         /*
2606          * Find namespace to create in, check its permissions
2607          */
2608         intoName = into->rel->relname;
2609         namespaceId = RangeVarGetCreationNamespace(into->rel);
2610
2611         aclresult = pg_namespace_aclcheck(namespaceId, GetUserId(),
2612                                                                           ACL_CREATE);
2613         if (aclresult != ACLCHECK_OK)
2614                 aclcheck_error(aclresult, ACL_KIND_NAMESPACE,
2615                                            get_namespace_name(namespaceId));
2616
2617         /*
2618          * Select tablespace to use.  If not specified, use default tablespace
2619          * (which may in turn default to database's default).
2620          */
2621         if (into->tableSpaceName)
2622         {
2623                 tablespaceId = get_tablespace_oid(into->tableSpaceName);
2624                 if (!OidIsValid(tablespaceId))
2625                         ereport(ERROR,
2626                                         (errcode(ERRCODE_UNDEFINED_OBJECT),
2627                                          errmsg("tablespace \"%s\" does not exist",
2628                                                         into->tableSpaceName)));
2629         }
2630         else
2631         {
2632                 tablespaceId = GetDefaultTablespace(into->rel->istemp);
2633                 /* note InvalidOid is OK in this case */
2634         }
2635
2636         /* Check permissions except when using the database's default space */
2637         if (OidIsValid(tablespaceId) && tablespaceId != MyDatabaseTableSpace)
2638         {
2639                 AclResult       aclresult;
2640
2641                 aclresult = pg_tablespace_aclcheck(tablespaceId, GetUserId(),
2642                                                                                    ACL_CREATE);
2643
2644                 if (aclresult != ACLCHECK_OK)
2645                         aclcheck_error(aclresult, ACL_KIND_TABLESPACE,
2646                                                    get_tablespace_name(tablespaceId));
2647         }
2648
2649         /* Parse and validate any reloptions */
2650         reloptions = transformRelOptions((Datum) 0,
2651                                                                          into->options,
2652                                                                          true,
2653                                                                          false);
2654         (void) heap_reloptions(RELKIND_RELATION, reloptions, true);
2655
2656         /* Copy the tupdesc because heap_create_with_catalog modifies it */
2657         tupdesc = CreateTupleDescCopy(queryDesc->tupDesc);
2658
2659         /* Now we can actually create the new relation */
2660         intoRelationId = heap_create_with_catalog(intoName,
2661                                                                                           namespaceId,
2662                                                                                           tablespaceId,
2663                                                                                           InvalidOid,
2664                                                                                           GetUserId(),
2665                                                                                           tupdesc,
2666                                                                                           NIL,
2667                                                                                           RELKIND_RELATION,
2668                                                                                           false,
2669                                                                                           true,
2670                                                                                           0,
2671                                                                                           into->onCommit,
2672                                                                                           reloptions,
2673                                                                                           allowSystemTableMods);
2674
2675         FreeTupleDesc(tupdesc);
2676
2677         /*
2678          * Advance command counter so that the newly-created relation's catalog
2679          * tuples will be visible to heap_open.
2680          */
2681         CommandCounterIncrement();
2682
2683         /*
2684          * If necessary, create a TOAST table for the INTO relation. Note that
2685          * AlterTableCreateToastTable ends with CommandCounterIncrement(), so that
2686          * the TOAST table will be visible for insertion.
2687          */
2688         AlterTableCreateToastTable(intoRelationId);
2689
2690         /*
2691          * And open the constructed table for writing.
2692          */
2693         intoRelationDesc = heap_open(intoRelationId, AccessExclusiveLock);
2694
2695         /* use_wal off requires rd_targblock be initially invalid */
2696         Assert(intoRelationDesc->rd_targblock == InvalidBlockNumber);
2697
2698         /*
2699          * We can skip WAL-logging the insertions, unless PITR is in use.
2700          */
2701         estate->es_into_relation_use_wal = XLogArchivingActive();
2702         estate->es_into_relation_descriptor = intoRelationDesc;
2703
2704         /*
2705          * Now replace the query's DestReceiver with one for SELECT INTO
2706          */
2707         queryDesc->dest = CreateDestReceiver(DestIntoRel, NULL);
2708         myState = (DR_intorel *) queryDesc->dest;
2709         Assert(myState->pub.mydest == DestIntoRel);
2710         myState->estate = estate;
2711 }
2712
2713 /*
2714  * CloseIntoRel --- clean up SELECT INTO at ExecutorEnd time
2715  */
2716 static void
2717 CloseIntoRel(QueryDesc *queryDesc)
2718 {
2719         EState     *estate = queryDesc->estate;
2720
2721         /* OpenIntoRel might never have gotten called */
2722         if (estate->es_into_relation_descriptor)
2723         {
2724                 /* If we skipped using WAL, must heap_sync before commit */
2725                 if (!estate->es_into_relation_use_wal)
2726                         heap_sync(estate->es_into_relation_descriptor);
2727
2728                 /* close rel, but keep lock until commit */
2729                 heap_close(estate->es_into_relation_descriptor, NoLock);
2730
2731                 estate->es_into_relation_descriptor = NULL;
2732         }
2733 }
2734
2735 /*
2736  * CreateIntoRelDestReceiver -- create a suitable DestReceiver object
2737  *
2738  * Since CreateDestReceiver doesn't accept the parameters we'd need,
2739  * we just leave the private fields empty here.  OpenIntoRel will
2740  * fill them in.
2741  */
2742 DestReceiver *
2743 CreateIntoRelDestReceiver(void)
2744 {
2745         DR_intorel *self = (DR_intorel *) palloc(sizeof(DR_intorel));
2746
2747         self->pub.receiveSlot = intorel_receive;
2748         self->pub.rStartup = intorel_startup;
2749         self->pub.rShutdown = intorel_shutdown;
2750         self->pub.rDestroy = intorel_destroy;
2751         self->pub.mydest = DestIntoRel;
2752
2753         self->estate = NULL;
2754
2755         return (DestReceiver *) self;
2756 }
2757
2758 /*
2759  * intorel_startup --- executor startup
2760  */
2761 static void
2762 intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo)
2763 {
2764         /* no-op */
2765 }
2766
2767 /*
2768  * intorel_receive --- receive one tuple
2769  */
2770 static void
2771 intorel_receive(TupleTableSlot *slot, DestReceiver *self)
2772 {
2773         DR_intorel *myState = (DR_intorel *) self;
2774         EState     *estate = myState->estate;
2775         HeapTuple       tuple;
2776
2777         tuple = ExecCopySlotTuple(slot);
2778
2779         heap_insert(estate->es_into_relation_descriptor,
2780                                 tuple,
2781                                 estate->es_output_cid,
2782                                 estate->es_into_relation_use_wal,
2783                                 false);                 /* never any point in using FSM */
2784
2785         /* We know this is a newly created relation, so there are no indexes */
2786
2787         heap_freetuple(tuple);
2788
2789         IncrAppended();
2790 }
2791
2792 /*
2793  * intorel_shutdown --- executor end
2794  */
2795 static void
2796 intorel_shutdown(DestReceiver *self)
2797 {
2798         /* no-op */
2799 }
2800
2801 /*
2802  * intorel_destroy --- release DestReceiver object
2803  */
2804 static void
2805 intorel_destroy(DestReceiver *self)
2806 {
2807         pfree(self);
2808 }