]> granicus.if.org Git - postgresql/blob - src/backend/executor/nodeSetOp.c
Make some small planner API cleanups.
[postgresql] / src / backend / executor / nodeSetOp.c
1 /*-------------------------------------------------------------------------
2  *
3  * nodeSetOp.c
4  *        Routines to handle INTERSECT and EXCEPT selection
5  *
6  * The input of a SetOp node consists of tuples from two relations,
7  * which have been combined into one dataset, with a junk attribute added
8  * that shows which relation each tuple came from.  In SETOP_SORTED mode,
9  * the input has furthermore been sorted according to all the grouping
10  * columns (ie, all the non-junk attributes).  The SetOp node scans each
11  * group of identical tuples to determine how many came from each input
12  * relation.  Then it is a simple matter to emit the output demanded by the
13  * SQL spec for INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL.
14  *
15  * In SETOP_HASHED mode, the input is delivered in no particular order,
16  * except that we know all the tuples from one input relation will come before
17  * all the tuples of the other.  The planner guarantees that the first input
18  * relation is the left-hand one for EXCEPT, and tries to make the smaller
19  * input relation come first for INTERSECT.  We build a hash table in memory
20  * with one entry for each group of identical tuples, and count the number of
21  * tuples in the group from each relation.  After seeing all the input, we
22  * scan the hashtable and generate the correct output using those counts.
23  * We can avoid making hashtable entries for any tuples appearing only in the
24  * second input relation, since they cannot result in any output.
25  *
26  * This node type is not used for UNION or UNION ALL, since those can be
27  * implemented more cheaply (there's no need for the junk attribute to
28  * identify the source relation).
29  *
30  * Note that SetOp does no qual checking nor projection.  The delivered
31  * output tuples are just copies of the first-to-arrive tuple in each
32  * input group.
33  *
34  *
35  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
36  * Portions Copyright (c) 1994, Regents of the University of California
37  *
38  *
39  * IDENTIFICATION
40  *        src/backend/executor/nodeSetOp.c
41  *
42  *-------------------------------------------------------------------------
43  */
44
45 #include "postgres.h"
46
47 #include "access/htup_details.h"
48 #include "executor/executor.h"
49 #include "executor/nodeSetOp.h"
50 #include "miscadmin.h"
51 #include "utils/memutils.h"
52
53
54 /*
55  * SetOpStatePerGroupData - per-group working state
56  *
57  * These values are working state that is initialized at the start of
58  * an input tuple group and updated for each input tuple.
59  *
60  * In SETOP_SORTED mode, we need only one of these structs, and it's kept in
61  * the plan state node.  In SETOP_HASHED mode, the hash table contains one
62  * of these for each tuple group.
63  */
64 typedef struct SetOpStatePerGroupData
65 {
66         long            numLeft;                /* number of left-input dups in group */
67         long            numRight;               /* number of right-input dups in group */
68 }                       SetOpStatePerGroupData;
69
70
71 static TupleTableSlot *setop_retrieve_direct(SetOpState *setopstate);
72 static void setop_fill_hash_table(SetOpState *setopstate);
73 static TupleTableSlot *setop_retrieve_hash_table(SetOpState *setopstate);
74
75
76 /*
77  * Initialize state for a new group of input values.
78  */
79 static inline void
80 initialize_counts(SetOpStatePerGroup pergroup)
81 {
82         pergroup->numLeft = pergroup->numRight = 0;
83 }
84
85 /*
86  * Advance the appropriate counter for one input tuple.
87  */
88 static inline void
89 advance_counts(SetOpStatePerGroup pergroup, int flag)
90 {
91         if (flag)
92                 pergroup->numRight++;
93         else
94                 pergroup->numLeft++;
95 }
96
97 /*
98  * Fetch the "flag" column from an input tuple.
99  * This is an integer column with value 0 for left side, 1 for right side.
100  */
101 static int
102 fetch_tuple_flag(SetOpState *setopstate, TupleTableSlot *inputslot)
103 {
104         SetOp      *node = (SetOp *) setopstate->ps.plan;
105         int                     flag;
106         bool            isNull;
107
108         flag = DatumGetInt32(slot_getattr(inputslot,
109                                                                           node->flagColIdx,
110                                                                           &isNull));
111         Assert(!isNull);
112         Assert(flag == 0 || flag == 1);
113         return flag;
114 }
115
116 /*
117  * Initialize the hash table to empty.
118  */
119 static void
120 build_hash_table(SetOpState *setopstate)
121 {
122         SetOp      *node = (SetOp *) setopstate->ps.plan;
123         ExprContext *econtext = setopstate->ps.ps_ExprContext;
124         TupleDesc       desc = ExecGetResultType(outerPlanState(setopstate));
125
126         Assert(node->strategy == SETOP_HASHED);
127         Assert(node->numGroups > 0);
128
129         setopstate->hashtable = BuildTupleHashTable(&setopstate->ps,
130                                                                                                 desc,
131                                                                                                 node->numCols,
132                                                                                                 node->dupColIdx,
133                                                                                                 setopstate->eqfuncoids,
134                                                                                                 setopstate->hashfunctions,
135                                                                                                 node->numGroups,
136                                                                                                 0,
137                                                                                                 setopstate->tableContext,
138                                                                                                 econtext->ecxt_per_tuple_memory,
139                                                                                                 false);
140 }
141
142 /*
143  * We've completed processing a tuple group.  Decide how many copies (if any)
144  * of its representative row to emit, and store the count into numOutput.
145  * This logic is straight from the SQL92 specification.
146  */
147 static void
148 set_output_count(SetOpState *setopstate, SetOpStatePerGroup pergroup)
149 {
150         SetOp      *plannode = (SetOp *) setopstate->ps.plan;
151
152         switch (plannode->cmd)
153         {
154                 case SETOPCMD_INTERSECT:
155                         if (pergroup->numLeft > 0 && pergroup->numRight > 0)
156                                 setopstate->numOutput = 1;
157                         else
158                                 setopstate->numOutput = 0;
159                         break;
160                 case SETOPCMD_INTERSECT_ALL:
161                         setopstate->numOutput =
162                                 (pergroup->numLeft < pergroup->numRight) ?
163                                 pergroup->numLeft : pergroup->numRight;
164                         break;
165                 case SETOPCMD_EXCEPT:
166                         if (pergroup->numLeft > 0 && pergroup->numRight == 0)
167                                 setopstate->numOutput = 1;
168                         else
169                                 setopstate->numOutput = 0;
170                         break;
171                 case SETOPCMD_EXCEPT_ALL:
172                         setopstate->numOutput =
173                                 (pergroup->numLeft < pergroup->numRight) ?
174                                 0 : (pergroup->numLeft - pergroup->numRight);
175                         break;
176                 default:
177                         elog(ERROR, "unrecognized set op: %d", (int) plannode->cmd);
178                         break;
179         }
180 }
181
182
183 /* ----------------------------------------------------------------
184  *              ExecSetOp
185  * ----------------------------------------------------------------
186  */
187 static TupleTableSlot *                 /* return: a tuple or NULL */
188 ExecSetOp(PlanState *pstate)
189 {
190         SetOpState *node = castNode(SetOpState, pstate);
191         SetOp      *plannode = (SetOp *) node->ps.plan;
192         TupleTableSlot *resultTupleSlot = node->ps.ps_ResultTupleSlot;
193
194         CHECK_FOR_INTERRUPTS();
195
196         /*
197          * If the previously-returned tuple needs to be returned more than once,
198          * keep returning it.
199          */
200         if (node->numOutput > 0)
201         {
202                 node->numOutput--;
203                 return resultTupleSlot;
204         }
205
206         /* Otherwise, we're done if we are out of groups */
207         if (node->setop_done)
208                 return NULL;
209
210         /* Fetch the next tuple group according to the correct strategy */
211         if (plannode->strategy == SETOP_HASHED)
212         {
213                 if (!node->table_filled)
214                         setop_fill_hash_table(node);
215                 return setop_retrieve_hash_table(node);
216         }
217         else
218                 return setop_retrieve_direct(node);
219 }
220
221 /*
222  * ExecSetOp for non-hashed case
223  */
224 static TupleTableSlot *
225 setop_retrieve_direct(SetOpState *setopstate)
226 {
227         PlanState  *outerPlan;
228         SetOpStatePerGroup pergroup;
229         TupleTableSlot *outerslot;
230         TupleTableSlot *resultTupleSlot;
231         ExprContext *econtext = setopstate->ps.ps_ExprContext;
232
233         /*
234          * get state info from node
235          */
236         outerPlan = outerPlanState(setopstate);
237         pergroup = (SetOpStatePerGroup) setopstate->pergroup;
238         resultTupleSlot = setopstate->ps.ps_ResultTupleSlot;
239
240         /*
241          * We loop retrieving groups until we find one we should return
242          */
243         while (!setopstate->setop_done)
244         {
245                 /*
246                  * If we don't already have the first tuple of the new group, fetch it
247                  * from the outer plan.
248                  */
249                 if (setopstate->grp_firstTuple == NULL)
250                 {
251                         outerslot = ExecProcNode(outerPlan);
252                         if (!TupIsNull(outerslot))
253                         {
254                                 /* Make a copy of the first input tuple */
255                                 setopstate->grp_firstTuple = ExecCopySlotHeapTuple(outerslot);
256                         }
257                         else
258                         {
259                                 /* outer plan produced no tuples at all */
260                                 setopstate->setop_done = true;
261                                 return NULL;
262                         }
263                 }
264
265                 /*
266                  * Store the copied first input tuple in the tuple table slot reserved
267                  * for it.  The tuple will be deleted when it is cleared from the
268                  * slot.
269                  */
270                 ExecStoreHeapTuple(setopstate->grp_firstTuple,
271                                                    resultTupleSlot,
272                                                    true);
273                 setopstate->grp_firstTuple = NULL;      /* don't keep two pointers */
274
275                 /* Initialize working state for a new input tuple group */
276                 initialize_counts(pergroup);
277
278                 /* Count the first input tuple */
279                 advance_counts(pergroup,
280                                            fetch_tuple_flag(setopstate, resultTupleSlot));
281
282                 /*
283                  * Scan the outer plan until we exhaust it or cross a group boundary.
284                  */
285                 for (;;)
286                 {
287                         outerslot = ExecProcNode(outerPlan);
288                         if (TupIsNull(outerslot))
289                         {
290                                 /* no more outer-plan tuples available */
291                                 setopstate->setop_done = true;
292                                 break;
293                         }
294
295                         /*
296                          * Check whether we've crossed a group boundary.
297                          */
298                         econtext->ecxt_outertuple = resultTupleSlot;
299                         econtext->ecxt_innertuple = outerslot;
300
301                         if (!ExecQualAndReset(setopstate->eqfunction, econtext))
302                         {
303                                 /*
304                                  * Save the first input tuple of the next group.
305                                  */
306                                 setopstate->grp_firstTuple = ExecCopySlotHeapTuple(outerslot);
307                                 break;
308                         }
309
310                         /* Still in same group, so count this tuple */
311                         advance_counts(pergroup,
312                                                    fetch_tuple_flag(setopstate, outerslot));
313                 }
314
315                 /*
316                  * Done scanning input tuple group.  See if we should emit any copies
317                  * of result tuple, and if so return the first copy.
318                  */
319                 set_output_count(setopstate, pergroup);
320
321                 if (setopstate->numOutput > 0)
322                 {
323                         setopstate->numOutput--;
324                         return resultTupleSlot;
325                 }
326         }
327
328         /* No more groups */
329         ExecClearTuple(resultTupleSlot);
330         return NULL;
331 }
332
333 /*
334  * ExecSetOp for hashed case: phase 1, read input and build hash table
335  */
336 static void
337 setop_fill_hash_table(SetOpState *setopstate)
338 {
339         SetOp      *node = (SetOp *) setopstate->ps.plan;
340         PlanState  *outerPlan;
341         int                     firstFlag;
342         bool            in_first_rel PG_USED_FOR_ASSERTS_ONLY;
343         ExprContext *econtext = setopstate->ps.ps_ExprContext;
344
345         /*
346          * get state info from node
347          */
348         outerPlan = outerPlanState(setopstate);
349         firstFlag = node->firstFlag;
350         /* verify planner didn't mess up */
351         Assert(firstFlag == 0 ||
352                    (firstFlag == 1 &&
353                         (node->cmd == SETOPCMD_INTERSECT ||
354                          node->cmd == SETOPCMD_INTERSECT_ALL)));
355
356         /*
357          * Process each outer-plan tuple, and then fetch the next one, until we
358          * exhaust the outer plan.
359          */
360         in_first_rel = true;
361         for (;;)
362         {
363                 TupleTableSlot *outerslot;
364                 int                     flag;
365                 TupleHashEntryData *entry;
366                 bool            isnew;
367
368                 outerslot = ExecProcNode(outerPlan);
369                 if (TupIsNull(outerslot))
370                         break;
371
372                 /* Identify whether it's left or right input */
373                 flag = fetch_tuple_flag(setopstate, outerslot);
374
375                 if (flag == firstFlag)
376                 {
377                         /* (still) in first input relation */
378                         Assert(in_first_rel);
379
380                         /* Find or build hashtable entry for this tuple's group */
381                         entry = LookupTupleHashEntry(setopstate->hashtable, outerslot,
382                                                                                  &isnew);
383
384                         /* If new tuple group, initialize counts */
385                         if (isnew)
386                         {
387                                 entry->additional = (SetOpStatePerGroup)
388                                         MemoryContextAlloc(setopstate->hashtable->tablecxt,
389                                                                            sizeof(SetOpStatePerGroupData));
390                                 initialize_counts((SetOpStatePerGroup) entry->additional);
391                         }
392
393                         /* Advance the counts */
394                         advance_counts((SetOpStatePerGroup) entry->additional, flag);
395                 }
396                 else
397                 {
398                         /* reached second relation */
399                         in_first_rel = false;
400
401                         /* For tuples not seen previously, do not make hashtable entry */
402                         entry = LookupTupleHashEntry(setopstate->hashtable, outerslot,
403                                                                                  NULL);
404
405                         /* Advance the counts if entry is already present */
406                         if (entry)
407                                 advance_counts((SetOpStatePerGroup) entry->additional, flag);
408                 }
409
410                 /* Must reset expression context after each hashtable lookup */
411                 ResetExprContext(econtext);
412         }
413
414         setopstate->table_filled = true;
415         /* Initialize to walk the hash table */
416         ResetTupleHashIterator(setopstate->hashtable, &setopstate->hashiter);
417 }
418
419 /*
420  * ExecSetOp for hashed case: phase 2, retrieving groups from hash table
421  */
422 static TupleTableSlot *
423 setop_retrieve_hash_table(SetOpState *setopstate)
424 {
425         TupleHashEntryData *entry;
426         TupleTableSlot *resultTupleSlot;
427
428         /*
429          * get state info from node
430          */
431         resultTupleSlot = setopstate->ps.ps_ResultTupleSlot;
432
433         /*
434          * We loop retrieving groups until we find one we should return
435          */
436         while (!setopstate->setop_done)
437         {
438                 CHECK_FOR_INTERRUPTS();
439
440                 /*
441                  * Find the next entry in the hash table
442                  */
443                 entry = ScanTupleHashTable(setopstate->hashtable, &setopstate->hashiter);
444                 if (entry == NULL)
445                 {
446                         /* No more entries in hashtable, so done */
447                         setopstate->setop_done = true;
448                         return NULL;
449                 }
450
451                 /*
452                  * See if we should emit any copies of this tuple, and if so return
453                  * the first copy.
454                  */
455                 set_output_count(setopstate, (SetOpStatePerGroup) entry->additional);
456
457                 if (setopstate->numOutput > 0)
458                 {
459                         setopstate->numOutput--;
460                         return ExecStoreMinimalTuple(entry->firstTuple,
461                                                                                  resultTupleSlot,
462                                                                                  false);
463                 }
464         }
465
466         /* No more groups */
467         ExecClearTuple(resultTupleSlot);
468         return NULL;
469 }
470
471 /* ----------------------------------------------------------------
472  *              ExecInitSetOp
473  *
474  *              This initializes the setop node state structures and
475  *              the node's subplan.
476  * ----------------------------------------------------------------
477  */
478 SetOpState *
479 ExecInitSetOp(SetOp *node, EState *estate, int eflags)
480 {
481         SetOpState *setopstate;
482         TupleDesc       outerDesc;
483
484         /* check for unsupported flags */
485         Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
486
487         /*
488          * create state structure
489          */
490         setopstate = makeNode(SetOpState);
491         setopstate->ps.plan = (Plan *) node;
492         setopstate->ps.state = estate;
493         setopstate->ps.ExecProcNode = ExecSetOp;
494
495         setopstate->eqfuncoids = NULL;
496         setopstate->hashfunctions = NULL;
497         setopstate->setop_done = false;
498         setopstate->numOutput = 0;
499         setopstate->pergroup = NULL;
500         setopstate->grp_firstTuple = NULL;
501         setopstate->hashtable = NULL;
502         setopstate->tableContext = NULL;
503
504         /*
505          * create expression context
506          */
507         ExecAssignExprContext(estate, &setopstate->ps);
508
509         /*
510          * If hashing, we also need a longer-lived context to store the hash
511          * table.  The table can't just be kept in the per-query context because
512          * we want to be able to throw it away in ExecReScanSetOp.
513          */
514         if (node->strategy == SETOP_HASHED)
515                 setopstate->tableContext =
516                         AllocSetContextCreate(CurrentMemoryContext,
517                                                                   "SetOp hash table",
518                                                                   ALLOCSET_DEFAULT_SIZES);
519
520         /*
521          * initialize child nodes
522          *
523          * If we are hashing then the child plan does not need to handle REWIND
524          * efficiently; see ExecReScanSetOp.
525          */
526         if (node->strategy == SETOP_HASHED)
527                 eflags &= ~EXEC_FLAG_REWIND;
528         outerPlanState(setopstate) = ExecInitNode(outerPlan(node), estate, eflags);
529         outerDesc = ExecGetResultType(outerPlanState(setopstate));
530
531         /*
532          * Initialize result slot and type. Setop nodes do no projections, so
533          * initialize projection info for this node appropriately.
534          */
535         ExecInitResultTupleSlotTL(&setopstate->ps,
536                                                           node->strategy == SETOP_HASHED ?
537                                                           &TTSOpsMinimalTuple : &TTSOpsHeapTuple);
538         setopstate->ps.ps_ProjInfo = NULL;
539
540         /*
541          * Precompute fmgr lookup data for inner loop. We need both equality and
542          * hashing functions to do it by hashing, but only equality if not
543          * hashing.
544          */
545         if (node->strategy == SETOP_HASHED)
546                 execTuplesHashPrepare(node->numCols,
547                                                           node->dupOperators,
548                                                           &setopstate->eqfuncoids,
549                                                           &setopstate->hashfunctions);
550         else
551                 setopstate->eqfunction =
552                         execTuplesMatchPrepare(outerDesc,
553                                                                    node->numCols,
554                                                                    node->dupColIdx,
555                                                                    node->dupOperators,
556                                                                    &setopstate->ps);
557
558         if (node->strategy == SETOP_HASHED)
559         {
560                 build_hash_table(setopstate);
561                 setopstate->table_filled = false;
562         }
563         else
564         {
565                 setopstate->pergroup =
566                         (SetOpStatePerGroup) palloc0(sizeof(SetOpStatePerGroupData));
567         }
568
569         return setopstate;
570 }
571
572 /* ----------------------------------------------------------------
573  *              ExecEndSetOp
574  *
575  *              This shuts down the subplan and frees resources allocated
576  *              to this node.
577  * ----------------------------------------------------------------
578  */
579 void
580 ExecEndSetOp(SetOpState *node)
581 {
582         /* clean up tuple table */
583         ExecClearTuple(node->ps.ps_ResultTupleSlot);
584
585         /* free subsidiary stuff including hashtable */
586         if (node->tableContext)
587                 MemoryContextDelete(node->tableContext);
588         ExecFreeExprContext(&node->ps);
589
590         ExecEndNode(outerPlanState(node));
591 }
592
593
594 void
595 ExecReScanSetOp(SetOpState *node)
596 {
597         ExecClearTuple(node->ps.ps_ResultTupleSlot);
598         node->setop_done = false;
599         node->numOutput = 0;
600
601         if (((SetOp *) node->ps.plan)->strategy == SETOP_HASHED)
602         {
603                 /*
604                  * In the hashed case, if we haven't yet built the hash table then we
605                  * can just return; nothing done yet, so nothing to undo. If subnode's
606                  * chgParam is not NULL then it will be re-scanned by ExecProcNode,
607                  * else no reason to re-scan it at all.
608                  */
609                 if (!node->table_filled)
610                         return;
611
612                 /*
613                  * If we do have the hash table and the subplan does not have any
614                  * parameter changes, then we can just rescan the existing hash table;
615                  * no need to build it again.
616                  */
617                 if (node->ps.lefttree->chgParam == NULL)
618                 {
619                         ResetTupleHashIterator(node->hashtable, &node->hashiter);
620                         return;
621                 }
622         }
623
624         /* Release first tuple of group, if we have made a copy */
625         if (node->grp_firstTuple != NULL)
626         {
627                 heap_freetuple(node->grp_firstTuple);
628                 node->grp_firstTuple = NULL;
629         }
630
631         /* Release any hashtable storage */
632         if (node->tableContext)
633                 MemoryContextResetAndDeleteChildren(node->tableContext);
634
635         /* And rebuild empty hashtable if needed */
636         if (((SetOp *) node->ps.plan)->strategy == SETOP_HASHED)
637         {
638                 build_hash_table(node);
639                 node->table_filled = false;
640         }
641
642         /*
643          * if chgParam of subnode is not null then plan will be re-scanned by
644          * first ExecProcNode.
645          */
646         if (node->ps.lefttree->chgParam == NULL)
647                 ExecReScan(node->ps.lefttree);
648 }