]> granicus.if.org Git - postgresql/blob - src/backend/executor/nodeSetOp.c
pgindent run for 9.4
[postgresql] / src / backend / executor / nodeSetOp.c
1 /*-------------------------------------------------------------------------
2  *
3  * nodeSetOp.c
4  *        Routines to handle INTERSECT and EXCEPT selection
5  *
6  * The input of a SetOp node consists of tuples from two relations,
7  * which have been combined into one dataset, with a junk attribute added
8  * that shows which relation each tuple came from.  In SETOP_SORTED mode,
9  * the input has furthermore been sorted according to all the grouping
10  * columns (ie, all the non-junk attributes).  The SetOp node scans each
11  * group of identical tuples to determine how many came from each input
12  * relation.  Then it is a simple matter to emit the output demanded by the
13  * SQL spec for INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL.
14  *
15  * In SETOP_HASHED mode, the input is delivered in no particular order,
16  * except that we know all the tuples from one input relation will come before
17  * all the tuples of the other.  The planner guarantees that the first input
18  * relation is the left-hand one for EXCEPT, and tries to make the smaller
19  * input relation come first for INTERSECT.  We build a hash table in memory
20  * with one entry for each group of identical tuples, and count the number of
21  * tuples in the group from each relation.  After seeing all the input, we
22  * scan the hashtable and generate the correct output using those counts.
23  * We can avoid making hashtable entries for any tuples appearing only in the
24  * second input relation, since they cannot result in any output.
25  *
26  * This node type is not used for UNION or UNION ALL, since those can be
27  * implemented more cheaply (there's no need for the junk attribute to
28  * identify the source relation).
29  *
30  * Note that SetOp does no qual checking nor projection.  The delivered
31  * output tuples are just copies of the first-to-arrive tuple in each
32  * input group.
33  *
34  *
35  * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
36  * Portions Copyright (c) 1994, Regents of the University of California
37  *
38  *
39  * IDENTIFICATION
40  *        src/backend/executor/nodeSetOp.c
41  *
42  *-------------------------------------------------------------------------
43  */
44
45 #include "postgres.h"
46
47 #include "access/htup_details.h"
48 #include "executor/executor.h"
49 #include "executor/nodeSetOp.h"
50 #include "utils/memutils.h"
51
52
53 /*
54  * SetOpStatePerGroupData - per-group working state
55  *
56  * These values are working state that is initialized at the start of
57  * an input tuple group and updated for each input tuple.
58  *
59  * In SETOP_SORTED mode, we need only one of these structs, and it's kept in
60  * the plan state node.  In SETOP_HASHED mode, the hash table contains one
61  * of these for each tuple group.
62  */
63 typedef struct SetOpStatePerGroupData
64 {
65         long            numLeft;                /* number of left-input dups in group */
66         long            numRight;               /* number of right-input dups in group */
67 } SetOpStatePerGroupData;
68
69 /*
70  * To implement hashed mode, we need a hashtable that stores a
71  * representative tuple and the duplicate counts for each distinct set
72  * of grouping columns.  We compute the hash key from the grouping columns.
73  */
74 typedef struct SetOpHashEntryData *SetOpHashEntry;
75
76 typedef struct SetOpHashEntryData
77 {
78         TupleHashEntryData shared;      /* common header for hash table entries */
79         SetOpStatePerGroupData pergroup;
80 }       SetOpHashEntryData;
81
82
83 static TupleTableSlot *setop_retrieve_direct(SetOpState *setopstate);
84 static void setop_fill_hash_table(SetOpState *setopstate);
85 static TupleTableSlot *setop_retrieve_hash_table(SetOpState *setopstate);
86
87
88 /*
89  * Initialize state for a new group of input values.
90  */
91 static inline void
92 initialize_counts(SetOpStatePerGroup pergroup)
93 {
94         pergroup->numLeft = pergroup->numRight = 0;
95 }
96
97 /*
98  * Advance the appropriate counter for one input tuple.
99  */
100 static inline void
101 advance_counts(SetOpStatePerGroup pergroup, int flag)
102 {
103         if (flag)
104                 pergroup->numRight++;
105         else
106                 pergroup->numLeft++;
107 }
108
109 /*
110  * Fetch the "flag" column from an input tuple.
111  * This is an integer column with value 0 for left side, 1 for right side.
112  */
113 static int
114 fetch_tuple_flag(SetOpState *setopstate, TupleTableSlot *inputslot)
115 {
116         SetOp      *node = (SetOp *) setopstate->ps.plan;
117         int                     flag;
118         bool            isNull;
119
120         flag = DatumGetInt32(slot_getattr(inputslot,
121                                                                           node->flagColIdx,
122                                                                           &isNull));
123         Assert(!isNull);
124         Assert(flag == 0 || flag == 1);
125         return flag;
126 }
127
128 /*
129  * Initialize the hash table to empty.
130  */
131 static void
132 build_hash_table(SetOpState *setopstate)
133 {
134         SetOp      *node = (SetOp *) setopstate->ps.plan;
135
136         Assert(node->strategy == SETOP_HASHED);
137         Assert(node->numGroups > 0);
138
139         setopstate->hashtable = BuildTupleHashTable(node->numCols,
140                                                                                                 node->dupColIdx,
141                                                                                                 setopstate->eqfunctions,
142                                                                                                 setopstate->hashfunctions,
143                                                                                                 node->numGroups,
144                                                                                                 sizeof(SetOpHashEntryData),
145                                                                                                 setopstate->tableContext,
146                                                                                                 setopstate->tempContext);
147 }
148
149 /*
150  * We've completed processing a tuple group.  Decide how many copies (if any)
151  * of its representative row to emit, and store the count into numOutput.
152  * This logic is straight from the SQL92 specification.
153  */
154 static void
155 set_output_count(SetOpState *setopstate, SetOpStatePerGroup pergroup)
156 {
157         SetOp      *plannode = (SetOp *) setopstate->ps.plan;
158
159         switch (plannode->cmd)
160         {
161                 case SETOPCMD_INTERSECT:
162                         if (pergroup->numLeft > 0 && pergroup->numRight > 0)
163                                 setopstate->numOutput = 1;
164                         else
165                                 setopstate->numOutput = 0;
166                         break;
167                 case SETOPCMD_INTERSECT_ALL:
168                         setopstate->numOutput =
169                                 (pergroup->numLeft < pergroup->numRight) ?
170                                 pergroup->numLeft : pergroup->numRight;
171                         break;
172                 case SETOPCMD_EXCEPT:
173                         if (pergroup->numLeft > 0 && pergroup->numRight == 0)
174                                 setopstate->numOutput = 1;
175                         else
176                                 setopstate->numOutput = 0;
177                         break;
178                 case SETOPCMD_EXCEPT_ALL:
179                         setopstate->numOutput =
180                                 (pergroup->numLeft < pergroup->numRight) ?
181                                 0 : (pergroup->numLeft - pergroup->numRight);
182                         break;
183                 default:
184                         elog(ERROR, "unrecognized set op: %d", (int) plannode->cmd);
185                         break;
186         }
187 }
188
189
190 /* ----------------------------------------------------------------
191  *              ExecSetOp
192  * ----------------------------------------------------------------
193  */
194 TupleTableSlot *                                /* return: a tuple or NULL */
195 ExecSetOp(SetOpState *node)
196 {
197         SetOp      *plannode = (SetOp *) node->ps.plan;
198         TupleTableSlot *resultTupleSlot = node->ps.ps_ResultTupleSlot;
199
200         /*
201          * If the previously-returned tuple needs to be returned more than once,
202          * keep returning it.
203          */
204         if (node->numOutput > 0)
205         {
206                 node->numOutput--;
207                 return resultTupleSlot;
208         }
209
210         /* Otherwise, we're done if we are out of groups */
211         if (node->setop_done)
212                 return NULL;
213
214         /* Fetch the next tuple group according to the correct strategy */
215         if (plannode->strategy == SETOP_HASHED)
216         {
217                 if (!node->table_filled)
218                         setop_fill_hash_table(node);
219                 return setop_retrieve_hash_table(node);
220         }
221         else
222                 return setop_retrieve_direct(node);
223 }
224
225 /*
226  * ExecSetOp for non-hashed case
227  */
228 static TupleTableSlot *
229 setop_retrieve_direct(SetOpState *setopstate)
230 {
231         SetOp      *node = (SetOp *) setopstate->ps.plan;
232         PlanState  *outerPlan;
233         SetOpStatePerGroup pergroup;
234         TupleTableSlot *outerslot;
235         TupleTableSlot *resultTupleSlot;
236
237         /*
238          * get state info from node
239          */
240         outerPlan = outerPlanState(setopstate);
241         pergroup = setopstate->pergroup;
242         resultTupleSlot = setopstate->ps.ps_ResultTupleSlot;
243
244         /*
245          * We loop retrieving groups until we find one we should return
246          */
247         while (!setopstate->setop_done)
248         {
249                 /*
250                  * If we don't already have the first tuple of the new group, fetch it
251                  * from the outer plan.
252                  */
253                 if (setopstate->grp_firstTuple == NULL)
254                 {
255                         outerslot = ExecProcNode(outerPlan);
256                         if (!TupIsNull(outerslot))
257                         {
258                                 /* Make a copy of the first input tuple */
259                                 setopstate->grp_firstTuple = ExecCopySlotTuple(outerslot);
260                         }
261                         else
262                         {
263                                 /* outer plan produced no tuples at all */
264                                 setopstate->setop_done = true;
265                                 return NULL;
266                         }
267                 }
268
269                 /*
270                  * Store the copied first input tuple in the tuple table slot reserved
271                  * for it.  The tuple will be deleted when it is cleared from the
272                  * slot.
273                  */
274                 ExecStoreTuple(setopstate->grp_firstTuple,
275                                            resultTupleSlot,
276                                            InvalidBuffer,
277                                            true);
278                 setopstate->grp_firstTuple = NULL;              /* don't keep two pointers */
279
280                 /* Initialize working state for a new input tuple group */
281                 initialize_counts(pergroup);
282
283                 /* Count the first input tuple */
284                 advance_counts(pergroup,
285                                            fetch_tuple_flag(setopstate, resultTupleSlot));
286
287                 /*
288                  * Scan the outer plan until we exhaust it or cross a group boundary.
289                  */
290                 for (;;)
291                 {
292                         outerslot = ExecProcNode(outerPlan);
293                         if (TupIsNull(outerslot))
294                         {
295                                 /* no more outer-plan tuples available */
296                                 setopstate->setop_done = true;
297                                 break;
298                         }
299
300                         /*
301                          * Check whether we've crossed a group boundary.
302                          */
303                         if (!execTuplesMatch(resultTupleSlot,
304                                                                  outerslot,
305                                                                  node->numCols, node->dupColIdx,
306                                                                  setopstate->eqfunctions,
307                                                                  setopstate->tempContext))
308                         {
309                                 /*
310                                  * Save the first input tuple of the next group.
311                                  */
312                                 setopstate->grp_firstTuple = ExecCopySlotTuple(outerslot);
313                                 break;
314                         }
315
316                         /* Still in same group, so count this tuple */
317                         advance_counts(pergroup,
318                                                    fetch_tuple_flag(setopstate, outerslot));
319                 }
320
321                 /*
322                  * Done scanning input tuple group.  See if we should emit any copies
323                  * of result tuple, and if so return the first copy.
324                  */
325                 set_output_count(setopstate, pergroup);
326
327                 if (setopstate->numOutput > 0)
328                 {
329                         setopstate->numOutput--;
330                         return resultTupleSlot;
331                 }
332         }
333
334         /* No more groups */
335         ExecClearTuple(resultTupleSlot);
336         return NULL;
337 }
338
339 /*
340  * ExecSetOp for hashed case: phase 1, read input and build hash table
341  */
342 static void
343 setop_fill_hash_table(SetOpState *setopstate)
344 {
345         SetOp      *node = (SetOp *) setopstate->ps.plan;
346         PlanState  *outerPlan;
347         int                     firstFlag;
348         bool in_first_rel PG_USED_FOR_ASSERTS_ONLY;
349
350         /*
351          * get state info from node
352          */
353         outerPlan = outerPlanState(setopstate);
354         firstFlag = node->firstFlag;
355         /* verify planner didn't mess up */
356         Assert(firstFlag == 0 ||
357                    (firstFlag == 1 &&
358                         (node->cmd == SETOPCMD_INTERSECT ||
359                          node->cmd == SETOPCMD_INTERSECT_ALL)));
360
361         /*
362          * Process each outer-plan tuple, and then fetch the next one, until we
363          * exhaust the outer plan.
364          */
365         in_first_rel = true;
366         for (;;)
367         {
368                 TupleTableSlot *outerslot;
369                 int                     flag;
370                 SetOpHashEntry entry;
371                 bool            isnew;
372
373                 outerslot = ExecProcNode(outerPlan);
374                 if (TupIsNull(outerslot))
375                         break;
376
377                 /* Identify whether it's left or right input */
378                 flag = fetch_tuple_flag(setopstate, outerslot);
379
380                 if (flag == firstFlag)
381                 {
382                         /* (still) in first input relation */
383                         Assert(in_first_rel);
384
385                         /* Find or build hashtable entry for this tuple's group */
386                         entry = (SetOpHashEntry)
387                                 LookupTupleHashEntry(setopstate->hashtable, outerslot, &isnew);
388
389                         /* If new tuple group, initialize counts */
390                         if (isnew)
391                                 initialize_counts(&entry->pergroup);
392
393                         /* Advance the counts */
394                         advance_counts(&entry->pergroup, flag);
395                 }
396                 else
397                 {
398                         /* reached second relation */
399                         in_first_rel = false;
400
401                         /* For tuples not seen previously, do not make hashtable entry */
402                         entry = (SetOpHashEntry)
403                                 LookupTupleHashEntry(setopstate->hashtable, outerslot, NULL);
404
405                         /* Advance the counts if entry is already present */
406                         if (entry)
407                                 advance_counts(&entry->pergroup, flag);
408                 }
409
410                 /* Must reset temp context after each hashtable lookup */
411                 MemoryContextReset(setopstate->tempContext);
412         }
413
414         setopstate->table_filled = true;
415         /* Initialize to walk the hash table */
416         ResetTupleHashIterator(setopstate->hashtable, &setopstate->hashiter);
417 }
418
419 /*
420  * ExecSetOp for hashed case: phase 2, retrieving groups from hash table
421  */
422 static TupleTableSlot *
423 setop_retrieve_hash_table(SetOpState *setopstate)
424 {
425         SetOpHashEntry entry;
426         TupleTableSlot *resultTupleSlot;
427
428         /*
429          * get state info from node
430          */
431         resultTupleSlot = setopstate->ps.ps_ResultTupleSlot;
432
433         /*
434          * We loop retrieving groups until we find one we should return
435          */
436         while (!setopstate->setop_done)
437         {
438                 /*
439                  * Find the next entry in the hash table
440                  */
441                 entry = (SetOpHashEntry) ScanTupleHashTable(&setopstate->hashiter);
442                 if (entry == NULL)
443                 {
444                         /* No more entries in hashtable, so done */
445                         setopstate->setop_done = true;
446                         return NULL;
447                 }
448
449                 /*
450                  * See if we should emit any copies of this tuple, and if so return
451                  * the first copy.
452                  */
453                 set_output_count(setopstate, &entry->pergroup);
454
455                 if (setopstate->numOutput > 0)
456                 {
457                         setopstate->numOutput--;
458                         return ExecStoreMinimalTuple(entry->shared.firstTuple,
459                                                                                  resultTupleSlot,
460                                                                                  false);
461                 }
462         }
463
464         /* No more groups */
465         ExecClearTuple(resultTupleSlot);
466         return NULL;
467 }
468
469 /* ----------------------------------------------------------------
470  *              ExecInitSetOp
471  *
472  *              This initializes the setop node state structures and
473  *              the node's subplan.
474  * ----------------------------------------------------------------
475  */
476 SetOpState *
477 ExecInitSetOp(SetOp *node, EState *estate, int eflags)
478 {
479         SetOpState *setopstate;
480
481         /* check for unsupported flags */
482         Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
483
484         /*
485          * create state structure
486          */
487         setopstate = makeNode(SetOpState);
488         setopstate->ps.plan = (Plan *) node;
489         setopstate->ps.state = estate;
490
491         setopstate->eqfunctions = NULL;
492         setopstate->hashfunctions = NULL;
493         setopstate->setop_done = false;
494         setopstate->numOutput = 0;
495         setopstate->pergroup = NULL;
496         setopstate->grp_firstTuple = NULL;
497         setopstate->hashtable = NULL;
498         setopstate->tableContext = NULL;
499
500         /*
501          * Miscellaneous initialization
502          *
503          * SetOp nodes have no ExprContext initialization because they never call
504          * ExecQual or ExecProject.  But they do need a per-tuple memory context
505          * anyway for calling execTuplesMatch.
506          */
507         setopstate->tempContext =
508                 AllocSetContextCreate(CurrentMemoryContext,
509                                                           "SetOp",
510                                                           ALLOCSET_DEFAULT_MINSIZE,
511                                                           ALLOCSET_DEFAULT_INITSIZE,
512                                                           ALLOCSET_DEFAULT_MAXSIZE);
513
514         /*
515          * If hashing, we also need a longer-lived context to store the hash
516          * table.  The table can't just be kept in the per-query context because
517          * we want to be able to throw it away in ExecReScanSetOp.
518          */
519         if (node->strategy == SETOP_HASHED)
520                 setopstate->tableContext =
521                         AllocSetContextCreate(CurrentMemoryContext,
522                                                                   "SetOp hash table",
523                                                                   ALLOCSET_DEFAULT_MINSIZE,
524                                                                   ALLOCSET_DEFAULT_INITSIZE,
525                                                                   ALLOCSET_DEFAULT_MAXSIZE);
526
527         /*
528          * Tuple table initialization
529          */
530         ExecInitResultTupleSlot(estate, &setopstate->ps);
531
532         /*
533          * initialize child nodes
534          *
535          * If we are hashing then the child plan does not need to handle REWIND
536          * efficiently; see ExecReScanSetOp.
537          */
538         if (node->strategy == SETOP_HASHED)
539                 eflags &= ~EXEC_FLAG_REWIND;
540         outerPlanState(setopstate) = ExecInitNode(outerPlan(node), estate, eflags);
541
542         /*
543          * setop nodes do no projections, so initialize projection info for this
544          * node appropriately
545          */
546         ExecAssignResultTypeFromTL(&setopstate->ps);
547         setopstate->ps.ps_ProjInfo = NULL;
548
549         /*
550          * Precompute fmgr lookup data for inner loop. We need both equality and
551          * hashing functions to do it by hashing, but only equality if not
552          * hashing.
553          */
554         if (node->strategy == SETOP_HASHED)
555                 execTuplesHashPrepare(node->numCols,
556                                                           node->dupOperators,
557                                                           &setopstate->eqfunctions,
558                                                           &setopstate->hashfunctions);
559         else
560                 setopstate->eqfunctions =
561                         execTuplesMatchPrepare(node->numCols,
562                                                                    node->dupOperators);
563
564         if (node->strategy == SETOP_HASHED)
565         {
566                 build_hash_table(setopstate);
567                 setopstate->table_filled = false;
568         }
569         else
570         {
571                 setopstate->pergroup =
572                         (SetOpStatePerGroup) palloc0(sizeof(SetOpStatePerGroupData));
573         }
574
575         return setopstate;
576 }
577
578 /* ----------------------------------------------------------------
579  *              ExecEndSetOp
580  *
581  *              This shuts down the subplan and frees resources allocated
582  *              to this node.
583  * ----------------------------------------------------------------
584  */
585 void
586 ExecEndSetOp(SetOpState *node)
587 {
588         /* clean up tuple table */
589         ExecClearTuple(node->ps.ps_ResultTupleSlot);
590
591         /* free subsidiary stuff including hashtable */
592         MemoryContextDelete(node->tempContext);
593         if (node->tableContext)
594                 MemoryContextDelete(node->tableContext);
595
596         ExecEndNode(outerPlanState(node));
597 }
598
599
600 void
601 ExecReScanSetOp(SetOpState *node)
602 {
603         ExecClearTuple(node->ps.ps_ResultTupleSlot);
604         node->setop_done = false;
605         node->numOutput = 0;
606
607         if (((SetOp *) node->ps.plan)->strategy == SETOP_HASHED)
608         {
609                 /*
610                  * In the hashed case, if we haven't yet built the hash table then we
611                  * can just return; nothing done yet, so nothing to undo. If subnode's
612                  * chgParam is not NULL then it will be re-scanned by ExecProcNode,
613                  * else no reason to re-scan it at all.
614                  */
615                 if (!node->table_filled)
616                         return;
617
618                 /*
619                  * If we do have the hash table and the subplan does not have any
620                  * parameter changes, then we can just rescan the existing hash table;
621                  * no need to build it again.
622                  */
623                 if (node->ps.lefttree->chgParam == NULL)
624                 {
625                         ResetTupleHashIterator(node->hashtable, &node->hashiter);
626                         return;
627                 }
628         }
629
630         /* Release first tuple of group, if we have made a copy */
631         if (node->grp_firstTuple != NULL)
632         {
633                 heap_freetuple(node->grp_firstTuple);
634                 node->grp_firstTuple = NULL;
635         }
636
637         /* Release any hashtable storage */
638         if (node->tableContext)
639                 MemoryContextResetAndDeleteChildren(node->tableContext);
640
641         /* And rebuild empty hashtable if needed */
642         if (((SetOp *) node->ps.plan)->strategy == SETOP_HASHED)
643         {
644                 build_hash_table(node);
645                 node->table_filled = false;
646         }
647
648         /*
649          * if chgParam of subnode is not null then plan will be re-scanned by
650          * first ExecProcNode.
651          */
652         if (node->ps.lefttree->chgParam == NULL)
653                 ExecReScan(node->ps.lefttree);
654 }