1 /*-------------------------------------------------------------------------
4 * Routines to handle INTERSECT and EXCEPT selection
6 * The input of a SetOp node consists of tuples from two relations,
7 * which have been combined into one dataset, with a junk attribute added
8 * that shows which relation each tuple came from. In SETOP_SORTED mode,
9 * the input has furthermore been sorted according to all the grouping
10 * columns (ie, all the non-junk attributes). The SetOp node scans each
11 * group of identical tuples to determine how many came from each input
12 * relation. Then it is a simple matter to emit the output demanded by the
13 * SQL spec for INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL.
15 * In SETOP_HASHED mode, the input is delivered in no particular order,
16 * except that we know all the tuples from one input relation will come before
17 * all the tuples of the other. The planner guarantees that the first input
18 * relation is the left-hand one for EXCEPT, and tries to make the smaller
19 * input relation come first for INTERSECT. We build a hash table in memory
20 * with one entry for each group of identical tuples, and count the number of
21 * tuples in the group from each relation. After seeing all the input, we
22 * scan the hashtable and generate the correct output using those counts.
23 * We can avoid making hashtable entries for any tuples appearing only in the
24 * second input relation, since they cannot result in any output.
26 * This node type is not used for UNION or UNION ALL, since those can be
27 * implemented more cheaply (there's no need for the junk attribute to
28 * identify the source relation).
30 * Note that SetOp does no qual checking nor projection. The delivered
31 * output tuples are just copies of the first-to-arrive tuple in each
35 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
36 * Portions Copyright (c) 1994, Regents of the University of California
40 * src/backend/executor/nodeSetOp.c
42 *-------------------------------------------------------------------------
47 #include "access/htup_details.h"
48 #include "executor/executor.h"
49 #include "executor/nodeSetOp.h"
50 #include "miscadmin.h"
51 #include "utils/memutils.h"
55 * SetOpStatePerGroupData - per-group working state
57 * These values are working state that is initialized at the start of
58 * an input tuple group and updated for each input tuple.
60 * In SETOP_SORTED mode, we need only one of these structs, and it's kept in
61 * the plan state node. In SETOP_HASHED mode, the hash table contains one
62 * of these for each tuple group.
64 typedef struct SetOpStatePerGroupData
66 long numLeft; /* number of left-input dups in group */
67 long numRight; /* number of right-input dups in group */
68 } SetOpStatePerGroupData;
71 static TupleTableSlot *setop_retrieve_direct(SetOpState *setopstate);
72 static void setop_fill_hash_table(SetOpState *setopstate);
73 static TupleTableSlot *setop_retrieve_hash_table(SetOpState *setopstate);
77 * Initialize state for a new group of input values.
80 initialize_counts(SetOpStatePerGroup pergroup)
82 pergroup->numLeft = pergroup->numRight = 0;
86 * Advance the appropriate counter for one input tuple.
89 advance_counts(SetOpStatePerGroup pergroup, int flag)
98 * Fetch the "flag" column from an input tuple.
99 * This is an integer column with value 0 for left side, 1 for right side.
102 fetch_tuple_flag(SetOpState *setopstate, TupleTableSlot *inputslot)
104 SetOp *node = (SetOp *) setopstate->ps.plan;
108 flag = DatumGetInt32(slot_getattr(inputslot,
112 Assert(flag == 0 || flag == 1);
117 * Initialize the hash table to empty.
120 build_hash_table(SetOpState *setopstate)
122 SetOp *node = (SetOp *) setopstate->ps.plan;
123 ExprContext *econtext = setopstate->ps.ps_ExprContext;
124 TupleDesc desc = ExecGetResultType(outerPlanState(setopstate));
126 Assert(node->strategy == SETOP_HASHED);
127 Assert(node->numGroups > 0);
129 setopstate->hashtable = BuildTupleHashTable(&setopstate->ps,
133 setopstate->eqfuncoids,
134 setopstate->hashfunctions,
137 setopstate->tableContext,
138 econtext->ecxt_per_tuple_memory,
143 * We've completed processing a tuple group. Decide how many copies (if any)
144 * of its representative row to emit, and store the count into numOutput.
145 * This logic is straight from the SQL92 specification.
148 set_output_count(SetOpState *setopstate, SetOpStatePerGroup pergroup)
150 SetOp *plannode = (SetOp *) setopstate->ps.plan;
152 switch (plannode->cmd)
154 case SETOPCMD_INTERSECT:
155 if (pergroup->numLeft > 0 && pergroup->numRight > 0)
156 setopstate->numOutput = 1;
158 setopstate->numOutput = 0;
160 case SETOPCMD_INTERSECT_ALL:
161 setopstate->numOutput =
162 (pergroup->numLeft < pergroup->numRight) ?
163 pergroup->numLeft : pergroup->numRight;
165 case SETOPCMD_EXCEPT:
166 if (pergroup->numLeft > 0 && pergroup->numRight == 0)
167 setopstate->numOutput = 1;
169 setopstate->numOutput = 0;
171 case SETOPCMD_EXCEPT_ALL:
172 setopstate->numOutput =
173 (pergroup->numLeft < pergroup->numRight) ?
174 0 : (pergroup->numLeft - pergroup->numRight);
177 elog(ERROR, "unrecognized set op: %d", (int) plannode->cmd);
183 /* ----------------------------------------------------------------
185 * ----------------------------------------------------------------
187 static TupleTableSlot * /* return: a tuple or NULL */
188 ExecSetOp(PlanState *pstate)
190 SetOpState *node = castNode(SetOpState, pstate);
191 SetOp *plannode = (SetOp *) node->ps.plan;
192 TupleTableSlot *resultTupleSlot = node->ps.ps_ResultTupleSlot;
194 CHECK_FOR_INTERRUPTS();
197 * If the previously-returned tuple needs to be returned more than once,
200 if (node->numOutput > 0)
203 return resultTupleSlot;
206 /* Otherwise, we're done if we are out of groups */
207 if (node->setop_done)
210 /* Fetch the next tuple group according to the correct strategy */
211 if (plannode->strategy == SETOP_HASHED)
213 if (!node->table_filled)
214 setop_fill_hash_table(node);
215 return setop_retrieve_hash_table(node);
218 return setop_retrieve_direct(node);
222 * ExecSetOp for non-hashed case
224 static TupleTableSlot *
225 setop_retrieve_direct(SetOpState *setopstate)
227 PlanState *outerPlan;
228 SetOpStatePerGroup pergroup;
229 TupleTableSlot *outerslot;
230 TupleTableSlot *resultTupleSlot;
231 ExprContext *econtext = setopstate->ps.ps_ExprContext;
234 * get state info from node
236 outerPlan = outerPlanState(setopstate);
237 pergroup = (SetOpStatePerGroup) setopstate->pergroup;
238 resultTupleSlot = setopstate->ps.ps_ResultTupleSlot;
241 * We loop retrieving groups until we find one we should return
243 while (!setopstate->setop_done)
246 * If we don't already have the first tuple of the new group, fetch it
247 * from the outer plan.
249 if (setopstate->grp_firstTuple == NULL)
251 outerslot = ExecProcNode(outerPlan);
252 if (!TupIsNull(outerslot))
254 /* Make a copy of the first input tuple */
255 setopstate->grp_firstTuple = ExecCopySlotHeapTuple(outerslot);
259 /* outer plan produced no tuples at all */
260 setopstate->setop_done = true;
266 * Store the copied first input tuple in the tuple table slot reserved
267 * for it. The tuple will be deleted when it is cleared from the
270 ExecStoreHeapTuple(setopstate->grp_firstTuple,
273 setopstate->grp_firstTuple = NULL; /* don't keep two pointers */
275 /* Initialize working state for a new input tuple group */
276 initialize_counts(pergroup);
278 /* Count the first input tuple */
279 advance_counts(pergroup,
280 fetch_tuple_flag(setopstate, resultTupleSlot));
283 * Scan the outer plan until we exhaust it or cross a group boundary.
287 outerslot = ExecProcNode(outerPlan);
288 if (TupIsNull(outerslot))
290 /* no more outer-plan tuples available */
291 setopstate->setop_done = true;
296 * Check whether we've crossed a group boundary.
298 econtext->ecxt_outertuple = resultTupleSlot;
299 econtext->ecxt_innertuple = outerslot;
301 if (!ExecQualAndReset(setopstate->eqfunction, econtext))
304 * Save the first input tuple of the next group.
306 setopstate->grp_firstTuple = ExecCopySlotHeapTuple(outerslot);
310 /* Still in same group, so count this tuple */
311 advance_counts(pergroup,
312 fetch_tuple_flag(setopstate, outerslot));
316 * Done scanning input tuple group. See if we should emit any copies
317 * of result tuple, and if so return the first copy.
319 set_output_count(setopstate, pergroup);
321 if (setopstate->numOutput > 0)
323 setopstate->numOutput--;
324 return resultTupleSlot;
329 ExecClearTuple(resultTupleSlot);
334 * ExecSetOp for hashed case: phase 1, read input and build hash table
337 setop_fill_hash_table(SetOpState *setopstate)
339 SetOp *node = (SetOp *) setopstate->ps.plan;
340 PlanState *outerPlan;
342 bool in_first_rel PG_USED_FOR_ASSERTS_ONLY;
343 ExprContext *econtext = setopstate->ps.ps_ExprContext;
346 * get state info from node
348 outerPlan = outerPlanState(setopstate);
349 firstFlag = node->firstFlag;
350 /* verify planner didn't mess up */
351 Assert(firstFlag == 0 ||
353 (node->cmd == SETOPCMD_INTERSECT ||
354 node->cmd == SETOPCMD_INTERSECT_ALL)));
357 * Process each outer-plan tuple, and then fetch the next one, until we
358 * exhaust the outer plan.
363 TupleTableSlot *outerslot;
365 TupleHashEntryData *entry;
368 outerslot = ExecProcNode(outerPlan);
369 if (TupIsNull(outerslot))
372 /* Identify whether it's left or right input */
373 flag = fetch_tuple_flag(setopstate, outerslot);
375 if (flag == firstFlag)
377 /* (still) in first input relation */
378 Assert(in_first_rel);
380 /* Find or build hashtable entry for this tuple's group */
381 entry = LookupTupleHashEntry(setopstate->hashtable, outerslot,
384 /* If new tuple group, initialize counts */
387 entry->additional = (SetOpStatePerGroup)
388 MemoryContextAlloc(setopstate->hashtable->tablecxt,
389 sizeof(SetOpStatePerGroupData));
390 initialize_counts((SetOpStatePerGroup) entry->additional);
393 /* Advance the counts */
394 advance_counts((SetOpStatePerGroup) entry->additional, flag);
398 /* reached second relation */
399 in_first_rel = false;
401 /* For tuples not seen previously, do not make hashtable entry */
402 entry = LookupTupleHashEntry(setopstate->hashtable, outerslot,
405 /* Advance the counts if entry is already present */
407 advance_counts((SetOpStatePerGroup) entry->additional, flag);
410 /* Must reset expression context after each hashtable lookup */
411 ResetExprContext(econtext);
414 setopstate->table_filled = true;
415 /* Initialize to walk the hash table */
416 ResetTupleHashIterator(setopstate->hashtable, &setopstate->hashiter);
420 * ExecSetOp for hashed case: phase 2, retrieving groups from hash table
422 static TupleTableSlot *
423 setop_retrieve_hash_table(SetOpState *setopstate)
425 TupleHashEntryData *entry;
426 TupleTableSlot *resultTupleSlot;
429 * get state info from node
431 resultTupleSlot = setopstate->ps.ps_ResultTupleSlot;
434 * We loop retrieving groups until we find one we should return
436 while (!setopstate->setop_done)
438 CHECK_FOR_INTERRUPTS();
441 * Find the next entry in the hash table
443 entry = ScanTupleHashTable(setopstate->hashtable, &setopstate->hashiter);
446 /* No more entries in hashtable, so done */
447 setopstate->setop_done = true;
452 * See if we should emit any copies of this tuple, and if so return
455 set_output_count(setopstate, (SetOpStatePerGroup) entry->additional);
457 if (setopstate->numOutput > 0)
459 setopstate->numOutput--;
460 return ExecStoreMinimalTuple(entry->firstTuple,
467 ExecClearTuple(resultTupleSlot);
471 /* ----------------------------------------------------------------
474 * This initializes the setop node state structures and
475 * the node's subplan.
476 * ----------------------------------------------------------------
479 ExecInitSetOp(SetOp *node, EState *estate, int eflags)
481 SetOpState *setopstate;
484 /* check for unsupported flags */
485 Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
488 * create state structure
490 setopstate = makeNode(SetOpState);
491 setopstate->ps.plan = (Plan *) node;
492 setopstate->ps.state = estate;
493 setopstate->ps.ExecProcNode = ExecSetOp;
495 setopstate->eqfuncoids = NULL;
496 setopstate->hashfunctions = NULL;
497 setopstate->setop_done = false;
498 setopstate->numOutput = 0;
499 setopstate->pergroup = NULL;
500 setopstate->grp_firstTuple = NULL;
501 setopstate->hashtable = NULL;
502 setopstate->tableContext = NULL;
505 * create expression context
507 ExecAssignExprContext(estate, &setopstate->ps);
510 * If hashing, we also need a longer-lived context to store the hash
511 * table. The table can't just be kept in the per-query context because
512 * we want to be able to throw it away in ExecReScanSetOp.
514 if (node->strategy == SETOP_HASHED)
515 setopstate->tableContext =
516 AllocSetContextCreate(CurrentMemoryContext,
518 ALLOCSET_DEFAULT_SIZES);
521 * initialize child nodes
523 * If we are hashing then the child plan does not need to handle REWIND
524 * efficiently; see ExecReScanSetOp.
526 if (node->strategy == SETOP_HASHED)
527 eflags &= ~EXEC_FLAG_REWIND;
528 outerPlanState(setopstate) = ExecInitNode(outerPlan(node), estate, eflags);
529 outerDesc = ExecGetResultType(outerPlanState(setopstate));
532 * Initialize result slot and type. Setop nodes do no projections, so
533 * initialize projection info for this node appropriately.
535 ExecInitResultTupleSlotTL(&setopstate->ps,
536 node->strategy == SETOP_HASHED ?
537 &TTSOpsMinimalTuple : &TTSOpsHeapTuple);
538 setopstate->ps.ps_ProjInfo = NULL;
541 * Precompute fmgr lookup data for inner loop. We need both equality and
542 * hashing functions to do it by hashing, but only equality if not
545 if (node->strategy == SETOP_HASHED)
546 execTuplesHashPrepare(node->numCols,
548 &setopstate->eqfuncoids,
549 &setopstate->hashfunctions);
551 setopstate->eqfunction =
552 execTuplesMatchPrepare(outerDesc,
558 if (node->strategy == SETOP_HASHED)
560 build_hash_table(setopstate);
561 setopstate->table_filled = false;
565 setopstate->pergroup =
566 (SetOpStatePerGroup) palloc0(sizeof(SetOpStatePerGroupData));
572 /* ----------------------------------------------------------------
575 * This shuts down the subplan and frees resources allocated
577 * ----------------------------------------------------------------
580 ExecEndSetOp(SetOpState *node)
582 /* clean up tuple table */
583 ExecClearTuple(node->ps.ps_ResultTupleSlot);
585 /* free subsidiary stuff including hashtable */
586 if (node->tableContext)
587 MemoryContextDelete(node->tableContext);
588 ExecFreeExprContext(&node->ps);
590 ExecEndNode(outerPlanState(node));
595 ExecReScanSetOp(SetOpState *node)
597 ExecClearTuple(node->ps.ps_ResultTupleSlot);
598 node->setop_done = false;
601 if (((SetOp *) node->ps.plan)->strategy == SETOP_HASHED)
604 * In the hashed case, if we haven't yet built the hash table then we
605 * can just return; nothing done yet, so nothing to undo. If subnode's
606 * chgParam is not NULL then it will be re-scanned by ExecProcNode,
607 * else no reason to re-scan it at all.
609 if (!node->table_filled)
613 * If we do have the hash table and the subplan does not have any
614 * parameter changes, then we can just rescan the existing hash table;
615 * no need to build it again.
617 if (node->ps.lefttree->chgParam == NULL)
619 ResetTupleHashIterator(node->hashtable, &node->hashiter);
624 /* Release first tuple of group, if we have made a copy */
625 if (node->grp_firstTuple != NULL)
627 heap_freetuple(node->grp_firstTuple);
628 node->grp_firstTuple = NULL;
631 /* Release any hashtable storage */
632 if (node->tableContext)
633 MemoryContextResetAndDeleteChildren(node->tableContext);
635 /* And rebuild empty hashtable if needed */
636 if (((SetOp *) node->ps.plan)->strategy == SETOP_HASHED)
638 build_hash_table(node);
639 node->table_filled = false;
643 * if chgParam of subnode is not null then plan will be re-scanned by
644 * first ExecProcNode.
646 if (node->ps.lefttree->chgParam == NULL)
647 ExecReScan(node->ps.lefttree);