1 /*-------------------------------------------------------------------------
4 * Routines to handle INTERSECT and EXCEPT selection
6 * The input of a SetOp node consists of tuples from two relations,
7 * which have been combined into one dataset, with a junk attribute added
8 * that shows which relation each tuple came from. In SETOP_SORTED mode,
9 * the input has furthermore been sorted according to all the grouping
10 * columns (ie, all the non-junk attributes). The SetOp node scans each
11 * group of identical tuples to determine how many came from each input
12 * relation. Then it is a simple matter to emit the output demanded by the
13 * SQL spec for INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL.
15 * In SETOP_HASHED mode, the input is delivered in no particular order,
16 * except that we know all the tuples from one input relation will come before
17 * all the tuples of the other. The planner guarantees that the first input
18 * relation is the left-hand one for EXCEPT, and tries to make the smaller
19 * input relation come first for INTERSECT. We build a hash table in memory
20 * with one entry for each group of identical tuples, and count the number of
21 * tuples in the group from each relation. After seeing all the input, we
22 * scan the hashtable and generate the correct output using those counts.
23 * We can avoid making hashtable entries for any tuples appearing only in the
24 * second input relation, since they cannot result in any output.
26 * This node type is not used for UNION or UNION ALL, since those can be
27 * implemented more cheaply (there's no need for the junk attribute to
28 * identify the source relation).
30 * Note that SetOp does no qual checking nor projection. The delivered
31 * output tuples are just copies of the first-to-arrive tuple in each
35 * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
36 * Portions Copyright (c) 1994, Regents of the University of California
40 * src/backend/executor/nodeSetOp.c
42 *-------------------------------------------------------------------------
47 #include "access/htup_details.h"
48 #include "executor/executor.h"
49 #include "executor/nodeSetOp.h"
50 #include "utils/memutils.h"
54 * SetOpStatePerGroupData - per-group working state
56 * These values are working state that is initialized at the start of
57 * an input tuple group and updated for each input tuple.
59 * In SETOP_SORTED mode, we need only one of these structs, and it's kept in
60 * the plan state node. In SETOP_HASHED mode, the hash table contains one
61 * of these for each tuple group.
63 typedef struct SetOpStatePerGroupData
65 long numLeft; /* number of left-input dups in group */
66 long numRight; /* number of right-input dups in group */
67 } SetOpStatePerGroupData;
70 * To implement hashed mode, we need a hashtable that stores a
71 * representative tuple and the duplicate counts for each distinct set
72 * of grouping columns. We compute the hash key from the grouping columns.
74 typedef struct SetOpHashEntryData *SetOpHashEntry;
76 typedef struct SetOpHashEntryData
78 TupleHashEntryData shared; /* common header for hash table entries */
79 SetOpStatePerGroupData pergroup;
83 static TupleTableSlot *setop_retrieve_direct(SetOpState *setopstate);
84 static void setop_fill_hash_table(SetOpState *setopstate);
85 static TupleTableSlot *setop_retrieve_hash_table(SetOpState *setopstate);
89 * Initialize state for a new group of input values.
92 initialize_counts(SetOpStatePerGroup pergroup)
94 pergroup->numLeft = pergroup->numRight = 0;
98 * Advance the appropriate counter for one input tuple.
101 advance_counts(SetOpStatePerGroup pergroup, int flag)
104 pergroup->numRight++;
110 * Fetch the "flag" column from an input tuple.
111 * This is an integer column with value 0 for left side, 1 for right side.
114 fetch_tuple_flag(SetOpState *setopstate, TupleTableSlot *inputslot)
116 SetOp *node = (SetOp *) setopstate->ps.plan;
120 flag = DatumGetInt32(slot_getattr(inputslot,
124 Assert(flag == 0 || flag == 1);
129 * Initialize the hash table to empty.
132 build_hash_table(SetOpState *setopstate)
134 SetOp *node = (SetOp *) setopstate->ps.plan;
136 Assert(node->strategy == SETOP_HASHED);
137 Assert(node->numGroups > 0);
139 setopstate->hashtable = BuildTupleHashTable(node->numCols,
141 setopstate->eqfunctions,
142 setopstate->hashfunctions,
144 sizeof(SetOpHashEntryData),
145 setopstate->tableContext,
146 setopstate->tempContext);
150 * We've completed processing a tuple group. Decide how many copies (if any)
151 * of its representative row to emit, and store the count into numOutput.
152 * This logic is straight from the SQL92 specification.
155 set_output_count(SetOpState *setopstate, SetOpStatePerGroup pergroup)
157 SetOp *plannode = (SetOp *) setopstate->ps.plan;
159 switch (plannode->cmd)
161 case SETOPCMD_INTERSECT:
162 if (pergroup->numLeft > 0 && pergroup->numRight > 0)
163 setopstate->numOutput = 1;
165 setopstate->numOutput = 0;
167 case SETOPCMD_INTERSECT_ALL:
168 setopstate->numOutput =
169 (pergroup->numLeft < pergroup->numRight) ?
170 pergroup->numLeft : pergroup->numRight;
172 case SETOPCMD_EXCEPT:
173 if (pergroup->numLeft > 0 && pergroup->numRight == 0)
174 setopstate->numOutput = 1;
176 setopstate->numOutput = 0;
178 case SETOPCMD_EXCEPT_ALL:
179 setopstate->numOutput =
180 (pergroup->numLeft < pergroup->numRight) ?
181 0 : (pergroup->numLeft - pergroup->numRight);
184 elog(ERROR, "unrecognized set op: %d", (int) plannode->cmd);
190 /* ----------------------------------------------------------------
192 * ----------------------------------------------------------------
194 TupleTableSlot * /* return: a tuple or NULL */
195 ExecSetOp(SetOpState *node)
197 SetOp *plannode = (SetOp *) node->ps.plan;
198 TupleTableSlot *resultTupleSlot = node->ps.ps_ResultTupleSlot;
201 * If the previously-returned tuple needs to be returned more than once,
204 if (node->numOutput > 0)
207 return resultTupleSlot;
210 /* Otherwise, we're done if we are out of groups */
211 if (node->setop_done)
214 /* Fetch the next tuple group according to the correct strategy */
215 if (plannode->strategy == SETOP_HASHED)
217 if (!node->table_filled)
218 setop_fill_hash_table(node);
219 return setop_retrieve_hash_table(node);
222 return setop_retrieve_direct(node);
226 * ExecSetOp for non-hashed case
228 static TupleTableSlot *
229 setop_retrieve_direct(SetOpState *setopstate)
231 SetOp *node = (SetOp *) setopstate->ps.plan;
232 PlanState *outerPlan;
233 SetOpStatePerGroup pergroup;
234 TupleTableSlot *outerslot;
235 TupleTableSlot *resultTupleSlot;
238 * get state info from node
240 outerPlan = outerPlanState(setopstate);
241 pergroup = setopstate->pergroup;
242 resultTupleSlot = setopstate->ps.ps_ResultTupleSlot;
245 * We loop retrieving groups until we find one we should return
247 while (!setopstate->setop_done)
250 * If we don't already have the first tuple of the new group, fetch it
251 * from the outer plan.
253 if (setopstate->grp_firstTuple == NULL)
255 outerslot = ExecProcNode(outerPlan);
256 if (!TupIsNull(outerslot))
258 /* Make a copy of the first input tuple */
259 setopstate->grp_firstTuple = ExecCopySlotTuple(outerslot);
263 /* outer plan produced no tuples at all */
264 setopstate->setop_done = true;
270 * Store the copied first input tuple in the tuple table slot reserved
271 * for it. The tuple will be deleted when it is cleared from the
274 ExecStoreTuple(setopstate->grp_firstTuple,
278 setopstate->grp_firstTuple = NULL; /* don't keep two pointers */
280 /* Initialize working state for a new input tuple group */
281 initialize_counts(pergroup);
283 /* Count the first input tuple */
284 advance_counts(pergroup,
285 fetch_tuple_flag(setopstate, resultTupleSlot));
288 * Scan the outer plan until we exhaust it or cross a group boundary.
292 outerslot = ExecProcNode(outerPlan);
293 if (TupIsNull(outerslot))
295 /* no more outer-plan tuples available */
296 setopstate->setop_done = true;
301 * Check whether we've crossed a group boundary.
303 if (!execTuplesMatch(resultTupleSlot,
305 node->numCols, node->dupColIdx,
306 setopstate->eqfunctions,
307 setopstate->tempContext))
310 * Save the first input tuple of the next group.
312 setopstate->grp_firstTuple = ExecCopySlotTuple(outerslot);
316 /* Still in same group, so count this tuple */
317 advance_counts(pergroup,
318 fetch_tuple_flag(setopstate, outerslot));
322 * Done scanning input tuple group. See if we should emit any copies
323 * of result tuple, and if so return the first copy.
325 set_output_count(setopstate, pergroup);
327 if (setopstate->numOutput > 0)
329 setopstate->numOutput--;
330 return resultTupleSlot;
335 ExecClearTuple(resultTupleSlot);
340 * ExecSetOp for hashed case: phase 1, read input and build hash table
343 setop_fill_hash_table(SetOpState *setopstate)
345 SetOp *node = (SetOp *) setopstate->ps.plan;
346 PlanState *outerPlan;
348 bool in_first_rel PG_USED_FOR_ASSERTS_ONLY;
351 * get state info from node
353 outerPlan = outerPlanState(setopstate);
354 firstFlag = node->firstFlag;
355 /* verify planner didn't mess up */
356 Assert(firstFlag == 0 ||
358 (node->cmd == SETOPCMD_INTERSECT ||
359 node->cmd == SETOPCMD_INTERSECT_ALL)));
362 * Process each outer-plan tuple, and then fetch the next one, until we
363 * exhaust the outer plan.
368 TupleTableSlot *outerslot;
370 SetOpHashEntry entry;
373 outerslot = ExecProcNode(outerPlan);
374 if (TupIsNull(outerslot))
377 /* Identify whether it's left or right input */
378 flag = fetch_tuple_flag(setopstate, outerslot);
380 if (flag == firstFlag)
382 /* (still) in first input relation */
383 Assert(in_first_rel);
385 /* Find or build hashtable entry for this tuple's group */
386 entry = (SetOpHashEntry)
387 LookupTupleHashEntry(setopstate->hashtable, outerslot, &isnew);
389 /* If new tuple group, initialize counts */
391 initialize_counts(&entry->pergroup);
393 /* Advance the counts */
394 advance_counts(&entry->pergroup, flag);
398 /* reached second relation */
399 in_first_rel = false;
401 /* For tuples not seen previously, do not make hashtable entry */
402 entry = (SetOpHashEntry)
403 LookupTupleHashEntry(setopstate->hashtable, outerslot, NULL);
405 /* Advance the counts if entry is already present */
407 advance_counts(&entry->pergroup, flag);
410 /* Must reset temp context after each hashtable lookup */
411 MemoryContextReset(setopstate->tempContext);
414 setopstate->table_filled = true;
415 /* Initialize to walk the hash table */
416 ResetTupleHashIterator(setopstate->hashtable, &setopstate->hashiter);
420 * ExecSetOp for hashed case: phase 2, retrieving groups from hash table
422 static TupleTableSlot *
423 setop_retrieve_hash_table(SetOpState *setopstate)
425 SetOpHashEntry entry;
426 TupleTableSlot *resultTupleSlot;
429 * get state info from node
431 resultTupleSlot = setopstate->ps.ps_ResultTupleSlot;
434 * We loop retrieving groups until we find one we should return
436 while (!setopstate->setop_done)
439 * Find the next entry in the hash table
441 entry = (SetOpHashEntry) ScanTupleHashTable(&setopstate->hashiter);
444 /* No more entries in hashtable, so done */
445 setopstate->setop_done = true;
450 * See if we should emit any copies of this tuple, and if so return
453 set_output_count(setopstate, &entry->pergroup);
455 if (setopstate->numOutput > 0)
457 setopstate->numOutput--;
458 return ExecStoreMinimalTuple(entry->shared.firstTuple,
465 ExecClearTuple(resultTupleSlot);
469 /* ----------------------------------------------------------------
472 * This initializes the setop node state structures and
473 * the node's subplan.
474 * ----------------------------------------------------------------
477 ExecInitSetOp(SetOp *node, EState *estate, int eflags)
479 SetOpState *setopstate;
481 /* check for unsupported flags */
482 Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
485 * create state structure
487 setopstate = makeNode(SetOpState);
488 setopstate->ps.plan = (Plan *) node;
489 setopstate->ps.state = estate;
491 setopstate->eqfunctions = NULL;
492 setopstate->hashfunctions = NULL;
493 setopstate->setop_done = false;
494 setopstate->numOutput = 0;
495 setopstate->pergroup = NULL;
496 setopstate->grp_firstTuple = NULL;
497 setopstate->hashtable = NULL;
498 setopstate->tableContext = NULL;
501 * Miscellaneous initialization
503 * SetOp nodes have no ExprContext initialization because they never call
504 * ExecQual or ExecProject. But they do need a per-tuple memory context
505 * anyway for calling execTuplesMatch.
507 setopstate->tempContext =
508 AllocSetContextCreate(CurrentMemoryContext,
510 ALLOCSET_DEFAULT_MINSIZE,
511 ALLOCSET_DEFAULT_INITSIZE,
512 ALLOCSET_DEFAULT_MAXSIZE);
515 * If hashing, we also need a longer-lived context to store the hash
516 * table. The table can't just be kept in the per-query context because
517 * we want to be able to throw it away in ExecReScanSetOp.
519 if (node->strategy == SETOP_HASHED)
520 setopstate->tableContext =
521 AllocSetContextCreate(CurrentMemoryContext,
523 ALLOCSET_DEFAULT_MINSIZE,
524 ALLOCSET_DEFAULT_INITSIZE,
525 ALLOCSET_DEFAULT_MAXSIZE);
528 * Tuple table initialization
530 ExecInitResultTupleSlot(estate, &setopstate->ps);
533 * initialize child nodes
535 * If we are hashing then the child plan does not need to handle REWIND
536 * efficiently; see ExecReScanSetOp.
538 if (node->strategy == SETOP_HASHED)
539 eflags &= ~EXEC_FLAG_REWIND;
540 outerPlanState(setopstate) = ExecInitNode(outerPlan(node), estate, eflags);
543 * setop nodes do no projections, so initialize projection info for this
546 ExecAssignResultTypeFromTL(&setopstate->ps);
547 setopstate->ps.ps_ProjInfo = NULL;
550 * Precompute fmgr lookup data for inner loop. We need both equality and
551 * hashing functions to do it by hashing, but only equality if not
554 if (node->strategy == SETOP_HASHED)
555 execTuplesHashPrepare(node->numCols,
557 &setopstate->eqfunctions,
558 &setopstate->hashfunctions);
560 setopstate->eqfunctions =
561 execTuplesMatchPrepare(node->numCols,
564 if (node->strategy == SETOP_HASHED)
566 build_hash_table(setopstate);
567 setopstate->table_filled = false;
571 setopstate->pergroup =
572 (SetOpStatePerGroup) palloc0(sizeof(SetOpStatePerGroupData));
578 /* ----------------------------------------------------------------
581 * This shuts down the subplan and frees resources allocated
583 * ----------------------------------------------------------------
586 ExecEndSetOp(SetOpState *node)
588 /* clean up tuple table */
589 ExecClearTuple(node->ps.ps_ResultTupleSlot);
591 /* free subsidiary stuff including hashtable */
592 MemoryContextDelete(node->tempContext);
593 if (node->tableContext)
594 MemoryContextDelete(node->tableContext);
596 ExecEndNode(outerPlanState(node));
601 ExecReScanSetOp(SetOpState *node)
603 ExecClearTuple(node->ps.ps_ResultTupleSlot);
604 node->setop_done = false;
607 if (((SetOp *) node->ps.plan)->strategy == SETOP_HASHED)
610 * In the hashed case, if we haven't yet built the hash table then we
611 * can just return; nothing done yet, so nothing to undo. If subnode's
612 * chgParam is not NULL then it will be re-scanned by ExecProcNode,
613 * else no reason to re-scan it at all.
615 if (!node->table_filled)
619 * If we do have the hash table and the subplan does not have any
620 * parameter changes, then we can just rescan the existing hash table;
621 * no need to build it again.
623 if (node->ps.lefttree->chgParam == NULL)
625 ResetTupleHashIterator(node->hashtable, &node->hashiter);
630 /* Release first tuple of group, if we have made a copy */
631 if (node->grp_firstTuple != NULL)
633 heap_freetuple(node->grp_firstTuple);
634 node->grp_firstTuple = NULL;
637 /* Release any hashtable storage */
638 if (node->tableContext)
639 MemoryContextResetAndDeleteChildren(node->tableContext);
641 /* And rebuild empty hashtable if needed */
642 if (((SetOp *) node->ps.plan)->strategy == SETOP_HASHED)
644 build_hash_table(node);
645 node->table_filled = false;
649 * if chgParam of subnode is not null then plan will be re-scanned by
650 * first ExecProcNode.
652 if (node->ps.lefttree->chgParam == NULL)
653 ExecReScan(node->ps.lefttree);