]> granicus.if.org Git - postgresql/commitdiff
Support hashing for duplicate-elimination in INTERSECT and EXCEPT queries.
authorTom Lane <tgl@sss.pgh.pa.us>
Thu, 7 Aug 2008 03:04:04 +0000 (03:04 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Thu, 7 Aug 2008 03:04:04 +0000 (03:04 +0000)
This completes my project of improving usage of hashing for duplicate
elimination (aggregate functions with DISTINCT remain undone, but that's
for some other day).

As with the previous patches, this means we can INTERSECT/EXCEPT on datatypes
that can hash but not sort, and it means that INTERSECT/EXCEPT without ORDER
BY are no longer certain to produce sorted output.

src/backend/commands/explain.c
src/backend/executor/nodeSetOp.c
src/backend/nodes/copyfuncs.c
src/backend/nodes/outfuncs.c
src/backend/optimizer/plan/createplan.c
src/backend/optimizer/prep/prepunion.c
src/include/nodes/execnodes.h
src/include/nodes/plannodes.h
src/include/optimizer/planmain.h
src/test/regress/expected/union.out
src/test/regress/sql/union.sql

index 0892cdbe3eb29f58de7fb91d18a32babd16c595d..1fd6f0cb331ed7a427f878c6f20f715f662a75dc 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1994-5, Regents of the University of California
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/commands/explain.c,v 1.175 2008/05/14 19:10:29 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/commands/explain.c,v 1.176 2008/08/07 03:04:03 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -558,19 +558,47 @@ explain_outNode(StringInfo str,
                        pname = "Unique";
                        break;
                case T_SetOp:
-                       switch (((SetOp *) plan)->cmd)
+                       switch (((SetOp *) plan)->strategy)
                        {
-                               case SETOPCMD_INTERSECT:
-                                       pname = "SetOp Intersect";
+                               case SETOP_SORTED:
+                                       switch (((SetOp *) plan)->cmd)
+                                       {
+                                               case SETOPCMD_INTERSECT:
+                                                       pname = "SetOp Intersect";
+                                                       break;
+                                               case SETOPCMD_INTERSECT_ALL:
+                                                       pname = "SetOp Intersect All";
+                                                       break;
+                                               case SETOPCMD_EXCEPT:
+                                                       pname = "SetOp Except";
+                                                       break;
+                                               case SETOPCMD_EXCEPT_ALL:
+                                                       pname = "SetOp Except All";
+                                                       break;
+                                               default:
+                                                       pname = "SetOp ???";
+                                                       break;
+                                       }
                                        break;
-                               case SETOPCMD_INTERSECT_ALL:
-                                       pname = "SetOp Intersect All";
-                                       break;
-                               case SETOPCMD_EXCEPT:
-                                       pname = "SetOp Except";
-                                       break;
-                               case SETOPCMD_EXCEPT_ALL:
-                                       pname = "SetOp Except All";
+                               case SETOP_HASHED:
+                                       switch (((SetOp *) plan)->cmd)
+                                       {
+                                               case SETOPCMD_INTERSECT:
+                                                       pname = "HashSetOp Intersect";
+                                                       break;
+                                               case SETOPCMD_INTERSECT_ALL:
+                                                       pname = "HashSetOp Intersect All";
+                                                       break;
+                                               case SETOPCMD_EXCEPT:
+                                                       pname = "HashSetOp Except";
+                                                       break;
+                                               case SETOPCMD_EXCEPT_ALL:
+                                                       pname = "HashSetOp Except All";
+                                                       break;
+                                               default:
+                                                       pname = "HashSetOp ???";
+                                                       break;
+                                       }
                                        break;
                                default:
                                        pname = "SetOp ???";
index c60ceff12597607fe27c6c7e88cf61d808087348..2421b0a6df4747cb4acc57f2df9b73414a161707 100644 (file)
@@ -4,33 +4,38 @@
  *       Routines to handle INTERSECT and EXCEPT selection
  *
  * The input of a SetOp node consists of tuples from two relations,
- * which have been combined into one dataset and sorted on all the nonjunk
- * attributes. In addition there is a junk attribute that shows which
- * relation each tuple came from.  The SetOp node scans each group of
- * identical tuples to determine how many came from each input relation.
- * Then it is a simple matter to emit the output demanded by the SQL spec
- * for INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL.
+ * which have been combined into one dataset, with a junk attribute added
+ * that shows which relation each tuple came from.  In SETOP_SORTED mode,
+ * the input has furthermore been sorted according to all the grouping
+ * columns (ie, all the non-junk attributes).  The SetOp node scans each
+ * group of identical tuples to determine how many came from each input
+ * relation.  Then it is a simple matter to emit the output demanded by the
+ * SQL spec for INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL.
+ *
+ * In SETOP_HASHED mode, the input is delivered in no particular order.
+ * We build a hash table in memory with one entry for each group of
+ * identical tuples, and count the number of tuples in the group from
+ * each relation.  After seeing all the input, we scan the hashtable and
+ * generate the correct output using those counts.
  *
  * This node type is not used for UNION or UNION ALL, since those can be
  * implemented more cheaply (there's no need for the junk attribute to
  * identify the source relation).
  *
+ * Note that SetOp does no qual checking nor projection.  The delivered
+ * output tuples are just copies of the first-to-arrive tuple in each
+ * input group.
+ *
  *
  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/executor/nodeSetOp.c,v 1.25 2008/01/01 19:45:49 momjian Exp $
+ *       $PostgreSQL: pgsql/src/backend/executor/nodeSetOp.c,v 1.26 2008/08/07 03:04:03 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
-/*
- * INTERFACE ROUTINES
- *             ExecSetOp               - filter input to generate INTERSECT/EXCEPT results
- *             ExecInitSetOp   - initialize node and subnodes..
- *             ExecEndSetOp    - shutdown node and subnodes
- */
 
 #include "postgres.h"
 
 #include "utils/memutils.h"
 
 
+/*
+ * SetOpStatePerGroupData - per-group working state
+ *
+ * These values are working state that is initialized at the start of
+ * an input tuple group and updated for each input tuple.
+ *
+ * In SETOP_SORTED mode, we need only one of these structs, and it's kept in
+ * the plan state node.  In SETOP_HASHED mode, the hash table contains one
+ * of these for each tuple group.
+ */
+typedef struct SetOpStatePerGroupData
+{
+       long            numLeft;                /* number of left-input dups in group */
+       long            numRight;               /* number of right-input dups in group */
+} SetOpStatePerGroupData;
+
+/*
+ * To implement hashed mode, we need a hashtable that stores a
+ * representative tuple and the duplicate counts for each distinct set
+ * of grouping columns.  We compute the hash key from the grouping columns.
+ */
+typedef struct SetOpHashEntryData *SetOpHashEntry;
+
+typedef struct SetOpHashEntryData
+{
+       TupleHashEntryData shared;      /* common header for hash table entries */
+       SetOpStatePerGroupData pergroup;
+} SetOpHashEntryData;
+
+
+static TupleTableSlot *setop_retrieve_direct(SetOpState *setopstate);
+static void setop_fill_hash_table(SetOpState *setopstate);
+static TupleTableSlot *setop_retrieve_hash_table(SetOpState *setopstate);
+
+
+/*
+ * Initialize state for a new group of input values.
+ */
+static void
+initialize_counts(SetOpState *setopstate, SetOpStatePerGroup pergroup)
+{
+       pergroup->numLeft = pergroup->numRight = 0;
+}
+
+/*
+ * Advance the appropriate counter for one input tuple.
+ */
+static void
+advance_counts(SetOpState *setopstate, SetOpStatePerGroup pergroup,
+                          TupleTableSlot *inputslot)
+{
+       SetOp      *node = (SetOp *) setopstate->ps.plan;
+       int                     flag;
+       bool            isNull;
+
+       flag = DatumGetInt32(slot_getattr(inputslot,
+                                                                         node->flagColIdx,
+                                                                         &isNull));
+       Assert(!isNull);
+       if (flag)
+               pergroup->numRight++;
+       else
+               pergroup->numLeft++;
+}
+
+/*
+ * Initialize the hash table to empty.
+ */
+static void
+build_hash_table(SetOpState *setopstate)
+{
+       SetOp      *node = (SetOp *) setopstate->ps.plan;
+
+       Assert(node->strategy == SETOP_HASHED);
+       Assert(node->numGroups > 0);
+
+       setopstate->hashtable = BuildTupleHashTable(node->numCols,
+                                                                                               node->dupColIdx,
+                                                                                               setopstate->eqfunctions,
+                                                                                               setopstate->hashfunctions,
+                                                                                               node->numGroups,
+                                                                                               sizeof(SetOpHashEntryData),
+                                                                                               setopstate->tableContext,
+                                                                                               setopstate->tempContext);
+}
+
+/*
+ * Find or create a hashtable entry for the tuple group containing the
+ * given tuple.
+ */
+static SetOpHashEntry
+lookup_hash_entry(SetOpState *setopstate, TupleTableSlot *inputslot)
+{
+       SetOpHashEntry entry;
+       bool            isnew;
+
+       /* find or create the hashtable entry */
+       entry = (SetOpHashEntry) LookupTupleHashEntry(setopstate->hashtable,
+                                                                                                 inputslot,
+                                                                                                 &isnew);
+
+       if (isnew)
+       {
+               /* initialize counts for new tuple group */
+               initialize_counts(setopstate, &entry->pergroup);
+       }
+
+       /* Must reset temp context after each hashtable lookup */
+       MemoryContextReset(setopstate->tempContext);
+
+       return entry;
+}
+
+/*
+ * We've completed processing a tuple group.  Decide how many copies (if any)
+ * of its representative row to emit, and store the count into numOutput.
+ * This logic is straight from the SQL92 specification.
+ */
+static void
+set_output_count(SetOpState *setopstate, SetOpStatePerGroup pergroup)
+{
+       SetOp      *plannode = (SetOp *) setopstate->ps.plan;
+
+       switch (plannode->cmd)
+       {
+               case SETOPCMD_INTERSECT:
+                       if (pergroup->numLeft > 0 && pergroup->numRight > 0)
+                               setopstate->numOutput = 1;
+                       else
+                               setopstate->numOutput = 0;
+                       break;
+               case SETOPCMD_INTERSECT_ALL:
+                       setopstate->numOutput =
+                               (pergroup->numLeft < pergroup->numRight) ?
+                               pergroup->numLeft : pergroup->numRight;
+                       break;
+               case SETOPCMD_EXCEPT:
+                       if (pergroup->numLeft > 0 && pergroup->numRight == 0)
+                               setopstate->numOutput = 1;
+                       else
+                               setopstate->numOutput = 0;
+                       break;
+               case SETOPCMD_EXCEPT_ALL:
+                       setopstate->numOutput =
+                               (pergroup->numLeft < pergroup->numRight) ?
+                               0 : (pergroup->numLeft - pergroup->numRight);
+                       break;
+               default:
+                       elog(ERROR, "unrecognized set op: %d", (int) plannode->cmd);
+                       break;
+       }
+}
+
+
 /* ----------------------------------------------------------------
  *             ExecSetOp
  * ----------------------------------------------------------------
@@ -47,14 +206,7 @@ TupleTableSlot *                            /* return: a tuple or NULL */
 ExecSetOp(SetOpState *node)
 {
        SetOp      *plannode = (SetOp *) node->ps.plan;
-       TupleTableSlot *resultTupleSlot;
-       PlanState  *outerPlan;
-
-       /*
-        * get information from the node
-        */
-       outerPlan = outerPlanState(node);
-       resultTupleSlot = node->ps.ps_ResultTupleSlot;
+       TupleTableSlot *resultTupleSlot = node->ps.ps_ResultTupleSlot;
 
        /*
         * If the previously-returned tuple needs to be returned more than once,
@@ -66,142 +218,218 @@ ExecSetOp(SetOpState *node)
                return resultTupleSlot;
        }
 
-       /* Flag that we have no current tuple */
-       ExecClearTuple(resultTupleSlot);
+       /* Otherwise, we're done if we are out of groups */
+       if (node->setop_done)
+               return NULL;
+
+       /* Fetch the next tuple group according to the correct strategy */
+       if (plannode->strategy == SETOP_HASHED)
+       {
+               if (!node->table_filled)
+                       setop_fill_hash_table(node);
+               return setop_retrieve_hash_table(node);
+       }
+       else
+               return setop_retrieve_direct(node);
+}
+
+/*
+ * ExecSetOp for non-hashed case
+ */
+static TupleTableSlot *
+setop_retrieve_direct(SetOpState *setopstate)
+{
+       SetOp      *node = (SetOp *) setopstate->ps.plan;
+       PlanState  *outerPlan;
+       SetOpStatePerGroup pergroup;
+       TupleTableSlot *outerslot;
+       TupleTableSlot *resultTupleSlot;
 
        /*
-        * Absorb groups of duplicate tuples, counting them, and saving the first
-        * of each group as a possible return value. At the end of each group,
-        * decide whether to return anything.
-        *
-        * We assume that the tuples arrive in sorted order so we can detect
-        * duplicates easily.
+        * get state info from node
         */
-       for (;;)
-       {
-               TupleTableSlot *inputTupleSlot;
-               bool            endOfGroup;
+       outerPlan = outerPlanState(setopstate);
+       pergroup = setopstate->pergroup;
+       resultTupleSlot = setopstate->ps.ps_ResultTupleSlot;
 
+       /*
+        * We loop retrieving groups until we find one we should return
+        */
+       while (!setopstate->setop_done)
+       {
                /*
-                * fetch a tuple from the outer subplan, unless we already did.
+                * If we don't already have the first tuple of the new group, fetch it
+                * from the outer plan.
                 */
-               if (node->ps.ps_OuterTupleSlot == NULL &&
-                       !node->subplan_done)
-               {
-                       node->ps.ps_OuterTupleSlot =
-                               ExecProcNode(outerPlan);
-                       if (TupIsNull(node->ps.ps_OuterTupleSlot))
-                               node->subplan_done = true;
-               }
-               inputTupleSlot = node->ps.ps_OuterTupleSlot;
-
-               if (TupIsNull(resultTupleSlot))
+               if (setopstate->grp_firstTuple == NULL)
                {
-                       /*
-                        * First of group: save a copy in result slot, and reset
-                        * duplicate-counters for new group.
-                        */
-                       if (node->subplan_done)
-                               return NULL;    /* no more tuples */
-                       ExecCopySlot(resultTupleSlot, inputTupleSlot);
-                       node->numLeft = 0;
-                       node->numRight = 0;
-                       endOfGroup = false;
-               }
-               else if (node->subplan_done)
-               {
-                       /*
-                        * Reached end of input, so finish processing final group
-                        */
-                       endOfGroup = true;
-               }
-               else
-               {
-                       /*
-                        * Else test if the new tuple and the previously saved tuple
-                        * match.
-                        */
-                       if (execTuplesMatch(inputTupleSlot,
-                                                               resultTupleSlot,
-                                                               plannode->numCols, plannode->dupColIdx,
-                                                               node->eqfunctions,
-                                                               node->tempContext))
-                               endOfGroup = false;
+                       outerslot = ExecProcNode(outerPlan);
+                       if (!TupIsNull(outerslot))
+                       {
+                               /* Make a copy of the first input tuple */
+                               setopstate->grp_firstTuple = ExecCopySlotTuple(outerslot);
+                       }
                        else
-                               endOfGroup = true;
+                       {
+                               /* outer plan produced no tuples at all */
+                               setopstate->setop_done = true;
+                               return NULL;
+                       }
                }
 
-               if (endOfGroup)
+               /*
+                * Store the copied first input tuple in the tuple table slot
+                * reserved for it.  The tuple will be deleted when it is cleared
+                * from the slot.
+                */
+               ExecStoreTuple(setopstate->grp_firstTuple,
+                                          resultTupleSlot,
+                                          InvalidBuffer,
+                                          true);
+               setopstate->grp_firstTuple = NULL;      /* don't keep two pointers */
+
+               /* Initialize working state for a new input tuple group */
+               initialize_counts(setopstate, pergroup);
+
+               /* Count the first input tuple */
+               advance_counts(setopstate, pergroup, resultTupleSlot);
+
+               /*
+                * Scan the outer plan until we exhaust it or cross a group boundary.
+                */
+               for (;;)
                {
+                       outerslot = ExecProcNode(outerPlan);
+                       if (TupIsNull(outerslot))
+                       {
+                               /* no more outer-plan tuples available */
+                               setopstate->setop_done = true;
+                               break;
+                       }
+
                        /*
-                        * We've reached the end of the group containing resultTuple.
-                        * Decide how many copies (if any) to emit.  This logic is
-                        * straight from the SQL92 specification.
+                        * Check whether we've crossed a group boundary.
                         */
-                       switch (plannode->cmd)
+                       if (!execTuplesMatch(resultTupleSlot,
+                                                                outerslot,
+                                                                node->numCols, node->dupColIdx,
+                                                                setopstate->eqfunctions,
+                                                                setopstate->tempContext))
                        {
-                               case SETOPCMD_INTERSECT:
-                                       if (node->numLeft > 0 && node->numRight > 0)
-                                               node->numOutput = 1;
-                                       else
-                                               node->numOutput = 0;
-                                       break;
-                               case SETOPCMD_INTERSECT_ALL:
-                                       node->numOutput =
-                                               (node->numLeft < node->numRight) ?
-                                               node->numLeft : node->numRight;
-                                       break;
-                               case SETOPCMD_EXCEPT:
-                                       if (node->numLeft > 0 && node->numRight == 0)
-                                               node->numOutput = 1;
-                                       else
-                                               node->numOutput = 0;
-                                       break;
-                               case SETOPCMD_EXCEPT_ALL:
-                                       node->numOutput =
-                                               (node->numLeft < node->numRight) ?
-                                               0 : (node->numLeft - node->numRight);
-                                       break;
-                               default:
-                                       elog(ERROR, "unrecognized set op: %d",
-                                                (int) plannode->cmd);
-                                       break;
-                       }
-                       /* Fall out of for-loop if we have tuples to emit */
-                       if (node->numOutput > 0)
+                               /*
+                                * Save the first input tuple of the next group.
+                                */
+                               setopstate->grp_firstTuple = ExecCopySlotTuple(outerslot);
                                break;
-                       /* Else flag that we have no current tuple, and loop around */
-                       ExecClearTuple(resultTupleSlot);
+                       }
+
+                       /* Still in same group, so count this tuple */
+                       advance_counts(setopstate, pergroup, outerslot);
                }
-               else
+
+               /*
+                * Done scanning input tuple group.  See if we should emit any
+                * copies of result tuple, and if so return the first copy.
+                */
+               set_output_count(setopstate, pergroup);
+
+               if (setopstate->numOutput > 0)
                {
-                       /*
-                        * Current tuple is member of same group as resultTuple. Count it
-                        * in the appropriate counter.
-                        */
-                       int                     flag;
-                       bool            isNull;
-
-                       flag = DatumGetInt32(slot_getattr(inputTupleSlot,
-                                                                                         plannode->flagColIdx,
-                                                                                         &isNull));
-                       Assert(!isNull);
-                       if (flag)
-                               node->numRight++;
-                       else
-                               node->numLeft++;
-                       /* Set flag to fetch a new input tuple, and loop around */
-                       node->ps.ps_OuterTupleSlot = NULL;
+                       setopstate->numOutput--;
+                       return resultTupleSlot;
                }
        }
 
+       /* No more groups */
+       ExecClearTuple(resultTupleSlot);
+       return NULL;
+}
+
+/*
+ * ExecSetOp for hashed case: phase 1, read input and build hash table
+ */
+static void
+setop_fill_hash_table(SetOpState *setopstate)
+{
+       PlanState  *outerPlan;
+       SetOpHashEntry entry;
+       TupleTableSlot *outerslot;
+
+       /*
+        * get state info from node
+        */
+       outerPlan = outerPlanState(setopstate);
+
        /*
-        * If we fall out of loop, then we need to emit at least one copy of
-        * resultTuple.
+        * Process each outer-plan tuple, and then fetch the next one, until we
+        * exhaust the outer plan.
         */
-       Assert(node->numOutput > 0);
-       node->numOutput--;
-       return resultTupleSlot;
+       for (;;)
+       {
+               outerslot = ExecProcNode(outerPlan);
+               if (TupIsNull(outerslot))
+                       break;
+
+               /* Find or build hashtable entry for this tuple's group */
+               entry = lookup_hash_entry(setopstate, outerslot);
+
+               /* Advance the counts */
+               advance_counts(setopstate, &entry->pergroup, outerslot);
+       }
+
+       setopstate->table_filled = true;
+       /* Initialize to walk the hash table */
+       ResetTupleHashIterator(setopstate->hashtable, &setopstate->hashiter);
+}
+
+/*
+ * ExecSetOp for hashed case: phase 2, retrieving groups from hash table
+ */
+static TupleTableSlot *
+setop_retrieve_hash_table(SetOpState *setopstate)
+{
+       SetOpHashEntry entry;
+       TupleTableSlot *resultTupleSlot;
+
+       /*
+        * get state info from node
+        */
+       resultTupleSlot = setopstate->ps.ps_ResultTupleSlot;
+
+       /*
+        * We loop retrieving groups until we find one we should return
+        */
+       while (!setopstate->setop_done)
+       {
+               /*
+                * Find the next entry in the hash table
+                */
+               entry = (SetOpHashEntry) ScanTupleHashTable(&setopstate->hashiter);
+               if (entry == NULL)
+               {
+                       /* No more entries in hashtable, so done */
+                       setopstate->setop_done = true;
+                       return NULL;
+               }
+
+               /*
+                * See if we should emit any copies of this tuple, and if so return
+                * the first copy.
+                */
+               set_output_count(setopstate, &entry->pergroup);
+
+               if (setopstate->numOutput > 0)
+               {
+                       setopstate->numOutput--;
+                       return ExecStoreMinimalTuple(entry->shared.firstTuple,
+                                                                                resultTupleSlot,
+                                                                                false);
+               }
+       }
+
+       /* No more groups */
+       ExecClearTuple(resultTupleSlot);
+       return NULL;
 }
 
 /* ----------------------------------------------------------------
@@ -226,9 +454,14 @@ ExecInitSetOp(SetOp *node, EState *estate, int eflags)
        setopstate->ps.plan = (Plan *) node;
        setopstate->ps.state = estate;
 
-       setopstate->ps.ps_OuterTupleSlot = NULL;
-       setopstate->subplan_done = false;
+       setopstate->eqfunctions = NULL;
+       setopstate->hashfunctions = NULL;
+       setopstate->setop_done = false;
        setopstate->numOutput = 0;
+       setopstate->pergroup = NULL;
+       setopstate->grp_firstTuple = NULL;
+       setopstate->hashtable = NULL;
+       setopstate->tableContext = NULL;
 
        /*
         * Miscellaneous initialization
@@ -244,6 +477,19 @@ ExecInitSetOp(SetOp *node, EState *estate, int eflags)
                                                          ALLOCSET_DEFAULT_INITSIZE,
                                                          ALLOCSET_DEFAULT_MAXSIZE);
 
+       /*
+        * If hashing, we also need a longer-lived context to store the hash
+        * table.  The table can't just be kept in the per-query context because
+        * we want to be able to throw it away in ExecReScanSetOp.
+        */
+       if (node->strategy == SETOP_HASHED)
+               setopstate->tableContext =
+                       AllocSetContextCreate(CurrentMemoryContext,
+                                                                 "SetOp hash table",
+                                                                 ALLOCSET_DEFAULT_MINSIZE,
+                                                                 ALLOCSET_DEFAULT_INITSIZE,
+                                                                 ALLOCSET_DEFAULT_MAXSIZE);
+
 #define SETOP_NSLOTS 1
 
        /*
@@ -252,8 +498,13 @@ ExecInitSetOp(SetOp *node, EState *estate, int eflags)
        ExecInitResultTupleSlot(estate, &setopstate->ps);
 
        /*
-        * then initialize outer plan
+        * initialize child nodes
+        *
+        * If we are hashing then the child plan does not need
+        * to handle REWIND efficiently; see ExecReScanSetOp.
         */
+       if (node->strategy == SETOP_HASHED)
+               eflags &= ~EXEC_FLAG_REWIND;
        outerPlanState(setopstate) = ExecInitNode(outerPlan(node), estate, eflags);
 
        /*
@@ -264,11 +515,30 @@ ExecInitSetOp(SetOp *node, EState *estate, int eflags)
        setopstate->ps.ps_ProjInfo = NULL;
 
        /*
-        * Precompute fmgr lookup data for inner loop
+        * Precompute fmgr lookup data for inner loop. We need both equality and
+        * hashing functions to do it by hashing, but only equality if not
+        * hashing.
         */
-       setopstate->eqfunctions =
-               execTuplesMatchPrepare(node->numCols,
-                                                          node->dupOperators);
+       if (node->strategy == SETOP_HASHED)
+               execTuplesHashPrepare(node->numCols,
+                                                         node->dupOperators,
+                                                         &setopstate->eqfunctions,
+                                                         &setopstate->hashfunctions);
+       else
+               setopstate->eqfunctions =
+                       execTuplesMatchPrepare(node->numCols,
+                                                                  node->dupOperators);
+
+       if (node->strategy == SETOP_HASHED)
+       {
+               build_hash_table(setopstate);
+               setopstate->table_filled = false;
+       }
+       else
+       {
+               setopstate->pergroup =
+                       (SetOpStatePerGroup) palloc0(sizeof(SetOpStatePerGroupData));
+       }
 
        return setopstate;
 }
@@ -293,9 +563,11 @@ ExecEndSetOp(SetOpState *node)
 {
        /* clean up tuple table */
        ExecClearTuple(node->ps.ps_ResultTupleSlot);
-       node->ps.ps_OuterTupleSlot = NULL;
 
+       /* free subsidiary stuff including hashtable */
        MemoryContextDelete(node->tempContext);
+       if (node->tableContext)
+               MemoryContextDelete(node->tableContext);
 
        ExecEndNode(outerPlanState(node));
 }
@@ -305,10 +577,50 @@ void
 ExecReScanSetOp(SetOpState *node, ExprContext *exprCtxt)
 {
        ExecClearTuple(node->ps.ps_ResultTupleSlot);
-       node->ps.ps_OuterTupleSlot = NULL;
-       node->subplan_done = false;
+       node->setop_done = false;
        node->numOutput = 0;
 
+       if (((SetOp *) node->ps.plan)->strategy == SETOP_HASHED)
+       {
+               /*
+                * In the hashed case, if we haven't yet built the hash table then we
+                * can just return; nothing done yet, so nothing to undo. If subnode's
+                * chgParam is not NULL then it will be re-scanned by ExecProcNode,
+                * else no reason to re-scan it at all.
+                */
+               if (!node->table_filled)
+                       return;
+
+               /*
+                * If we do have the hash table and the subplan does not have any
+                * parameter changes, then we can just rescan the existing hash table;
+                * no need to build it again.
+                */
+               if (((PlanState *) node)->lefttree->chgParam == NULL)
+               {
+                       ResetTupleHashIterator(node->hashtable, &node->hashiter);
+                       return;
+               }
+       }
+
+       /* Release first tuple of group, if we have made a copy */
+       if (node->grp_firstTuple != NULL)
+       {
+               heap_freetuple(node->grp_firstTuple);
+               node->grp_firstTuple = NULL;
+       }
+
+       /* Release any hashtable storage */
+       if (node->tableContext)
+               MemoryContextResetAndDeleteChildren(node->tableContext);
+
+       /* And rebuild empty hashtable if needed */
+       if (((SetOp *) node->ps.plan)->strategy == SETOP_HASHED)
+       {
+               build_hash_table(node);
+               node->table_filled = false;
+       }
+
        /*
         * if chgParam of subnode is not null then plan will be re-scanned by
         * first ExecProcNode.
index e562949d93495ebaf22d443afa848c625e2e47d8..4767b924e9ed16b0acc9377458d9db07fa93d87e 100644 (file)
@@ -15,7 +15,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.397 2008/08/07 01:11:46 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.398 2008/08/07 03:04:03 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -652,10 +652,12 @@ _copySetOp(SetOp *from)
         * copy remainder of node
         */
        COPY_SCALAR_FIELD(cmd);
+       COPY_SCALAR_FIELD(strategy);
        COPY_SCALAR_FIELD(numCols);
        COPY_POINTER_FIELD(dupColIdx, from->numCols * sizeof(AttrNumber));
        COPY_POINTER_FIELD(dupOperators, from->numCols * sizeof(Oid));
        COPY_SCALAR_FIELD(flagColIdx);
+       COPY_SCALAR_FIELD(numGroups);
 
        return newnode;
 }
index 26f8fb3810a4837deda0e8768ce97b370eaacdd7..8440864b0e6cf3992e3c17373b4e1114fa61f93f 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.331 2008/08/07 01:11:48 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.332 2008/08/07 03:04:03 tgl Exp $
  *
  * NOTES
  *       Every node type that can appear in stored rules' parsetrees *must*
@@ -599,6 +599,7 @@ _outSetOp(StringInfo str, SetOp *node)
        _outPlanInfo(str, (Plan *) node);
 
        WRITE_ENUM_FIELD(cmd, SetOpCmd);
+       WRITE_ENUM_FIELD(strategy, SetOpStrategy);
        WRITE_INT_FIELD(numCols);
 
        appendStringInfo(str, " :dupColIdx");
@@ -610,6 +611,7 @@ _outSetOp(StringInfo str, SetOp *node)
                appendStringInfo(str, " %u", node->dupOperators[i]);
 
        WRITE_INT_FIELD(flagColIdx);
+       WRITE_LONG_FIELD(numGroups);
 }
 
 static void
index 80b6ed2edfe0bcc64330c8c004725991325c21ba..4cf2a5208b1326274764f3e9207604a7c36cf5bf 100644 (file)
@@ -10,7 +10,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.242 2008/08/02 21:32:00 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.243 2008/08/07 03:04:03 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -3108,8 +3108,9 @@ make_unique(Plan *lefttree, List *distinctList)
  * already be sorted accordingly.
  */
 SetOp *
-make_setop(SetOpCmd cmd, Plan *lefttree,
-                  List *distinctList, AttrNumber flagColIdx)
+make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree,
+                  List *distinctList, AttrNumber flagColIdx, long numGroups,
+                  double outputRows)
 {
        SetOp      *node = makeNode(SetOp);
        Plan       *plan = &node->plan;
@@ -3120,20 +3121,13 @@ make_setop(SetOpCmd cmd, Plan *lefttree,
        ListCell   *slitem;
 
        copy_plan_costsize(plan, lefttree);
+       plan->plan_rows = outputRows;
 
        /*
         * Charge one cpu_operator_cost per comparison per input tuple. We assume
         * all columns get compared at most of the tuples.
         */
-       plan->total_cost += cpu_operator_cost * plan->plan_rows * numCols;
-
-       /*
-        * We make the unsupported assumption that there will be 10% as many
-        * tuples out as in.  Any way to do better?
-        */
-       plan->plan_rows *= 0.1;
-       if (plan->plan_rows < 1)
-               plan->plan_rows = 1;
+       plan->total_cost += cpu_operator_cost * lefttree->plan_rows * numCols;
 
        plan->targetlist = lefttree->targetlist;
        plan->qual = NIL;
@@ -3160,10 +3154,12 @@ make_setop(SetOpCmd cmd, Plan *lefttree,
        }
 
        node->cmd = cmd;
+       node->strategy = strategy;
        node->numCols = numCols;
        node->dupColIdx = dupColIdx;
        node->dupOperators = dupOperators;
        node->flagColIdx = flagColIdx;
+       node->numGroups = numGroups;
 
        return node;
 }
index 750d59a95155d302bfb260c8aa47f3545c92653c..3a155c0d0ad505e597227d61b11853d7543d4cdd 100644 (file)
@@ -22,7 +22,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.150 2008/08/07 01:11:50 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.151 2008/08/07 03:04:03 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -60,6 +60,7 @@ static Plan *generate_union_plan(SetOperationStmt *op, PlannerInfo *root,
                                        double tuple_fraction,
                                        List *refnames_tlist, List **sortClauses);
 static Plan *generate_nonunion_plan(SetOperationStmt *op, PlannerInfo *root,
+                                          double tuple_fraction,
                                           List *refnames_tlist, List **sortClauses);
 static List *recurse_union_children(Node *setOp, PlannerInfo *root,
                                           double tuple_fraction,
@@ -229,7 +230,7 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
                                                                           refnames_tlist,
                                                                           sortClauses);
                else
-                       plan = generate_nonunion_plan(op, root,
+                       plan = generate_nonunion_plan(op, root, tuple_fraction,
                                                                                  refnames_tlist,
                                                                                  sortClauses);
 
@@ -341,6 +342,7 @@ generate_union_plan(SetOperationStmt *op, PlannerInfo *root,
  */
 static Plan *
 generate_nonunion_plan(SetOperationStmt *op, PlannerInfo *root,
+                                          double tuple_fraction,
                                           List *refnames_tlist,
                                           List **sortClauses)
 {
@@ -351,6 +353,10 @@ generate_nonunion_plan(SetOperationStmt *op, PlannerInfo *root,
                           *groupList,
                           *planlist,
                           *child_sortclauses;
+       double          dNumDistinctRows;
+       double          dNumOutputRows;
+       long            numDistinctRows;
+       bool            use_hash;
        SetOpCmd        cmd;
 
        /* Recurse on children, ensuring their outputs are marked */
@@ -393,10 +399,32 @@ generate_nonunion_plan(SetOperationStmt *op, PlannerInfo *root,
                return plan;
        }
 
+       /*
+        * XXX for the moment, take the number of distinct groups as being the
+        * total input size, ie, the worst case.  This is too conservative, but
+        * we don't want to risk having the hashtable overrun memory; also,
+        * it's not clear how to get a decent estimate of the true size.
+        */
+       dNumDistinctRows = plan->plan_rows;
+
+       /* Also convert to long int --- but 'ware overflow! */
+       numDistinctRows = (long) Min(dNumDistinctRows, (double) LONG_MAX);
+
+       /*
+        * The output size is taken as 10% of that, which is a completely bogus
+        * guess, but it's what we've used historically.
+        */
+       dNumOutputRows = ceil(dNumDistinctRows * 0.1);
+
        /*
         * Decide whether to hash or sort, and add a sort node if needed.
         */
-       plan = (Plan *) make_sort_from_sortclauses(root, groupList, plan);
+       use_hash = choose_hashed_setop(root, groupList, plan,
+                                                                  tuple_fraction, dNumDistinctRows,
+                                                                  (op->op == SETOP_INTERSECT) ? "INTERSECT" : "EXCEPT");
+
+       if (!use_hash)
+               plan = (Plan *) make_sort_from_sortclauses(root, groupList, plan);
 
        /*
         * Finally, add a SetOp plan node to generate the correct output.
@@ -414,9 +442,12 @@ generate_nonunion_plan(SetOperationStmt *op, PlannerInfo *root,
                        cmd = SETOPCMD_INTERSECT;       /* keep compiler quiet */
                        break;
        }
-       plan = (Plan *) make_setop(cmd, plan, groupList, list_length(op->colTypes) + 1);
+       plan = (Plan *) make_setop(cmd, use_hash ? SETOP_HASHED : SETOP_SORTED,
+                                                          plan, groupList, list_length(op->colTypes) + 1,
+                                                          numDistinctRows, dNumOutputRows);
 
-       *sortClauses = groupList;
+       /* Result is sorted only if we're not hashing */
+       *sortClauses = use_hash ? NIL : groupList;
 
        return plan;
 }
index 96b7a16084b5b7e2f2f9857eb612f1921032cf0e..ce41c750afce3c8e9484154f42679171c5ae4dea 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.185 2008/07/26 19:15:35 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.186 2008/08/07 03:04:03 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1414,21 +1414,31 @@ typedef struct HashState
 /* ----------------
  *      SetOpState information
  *
- *             SetOp nodes are used "on top of" sort nodes to discard
- *             duplicate tuples returned from the sort phase.  These are
- *             more complex than a simple Unique since we have to count
- *             how many duplicates to return.
+ *             Even in "sorted" mode, SetOp nodes are more complex than a simple
+ *             Unique, since we have to count how many duplicates to return.  But
+ *             we also support hashing, so this is really more like a cut-down
+ *             form of Agg.
  * ----------------
  */
+/* this struct is private in nodeSetOp.c: */
+typedef struct SetOpStatePerGroupData *SetOpStatePerGroup;
+
 typedef struct SetOpState
 {
        PlanState       ps;                             /* its first field is NodeTag */
-       FmgrInfo   *eqfunctions;        /* per-field lookup data for equality fns */
-       bool            subplan_done;   /* has subplan returned EOF? */
-       long            numLeft;                /* number of left-input dups of cur group */
-       long            numRight;               /* number of right-input dups of cur group */
+       FmgrInfo   *eqfunctions;        /* per-grouping-field equality fns */
+       FmgrInfo   *hashfunctions;      /* per-grouping-field hash fns */
+       bool            setop_done;             /* indicates completion of output scan */
        long            numOutput;              /* number of dups left to output */
        MemoryContext tempContext;      /* short-term context for comparisons */
+       /* these fields are used in SETOP_SORTED mode: */
+       SetOpStatePerGroup pergroup;    /* per-group working state */
+       HeapTuple       grp_firstTuple; /* copy of first tuple of current group */
+       /* these fields are used in SETOP_HASHED mode: */
+       TupleHashTable hashtable;       /* hash table with one entry per group */
+       MemoryContext tableContext;     /* memory context containing hash table */
+       bool            table_filled;   /* hash table filled yet? */
+       TupleHashIterator hashiter; /* for iterating through hash table */
 } SetOpState;
 
 /* ----------------
index 6fb93e810daa0a4f34d085afdea555a672fd2bc7..e2eaadd29fff3636b0874125f9e242b954feaa8d 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/nodes/plannodes.h,v 1.100 2008/04/13 20:51:21 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/plannodes.h,v 1.101 2008/08/07 03:04:04 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -525,15 +525,23 @@ typedef enum SetOpCmd
        SETOPCMD_EXCEPT_ALL
 } SetOpCmd;
 
+typedef enum SetOpStrategy
+{
+       SETOP_SORTED,                           /* input must be sorted */
+       SETOP_HASHED                            /* use internal hashtable */
+} SetOpStrategy;
+
 typedef struct SetOp
 {
        Plan            plan;
        SetOpCmd        cmd;                    /* what to do */
+       SetOpStrategy strategy;         /* how to do it */
        int                     numCols;                /* number of columns to check for
                                                                 * duplicate-ness */
        AttrNumber *dupColIdx;          /* their indexes in the target list */
        Oid                *dupOperators;       /* equality operators to compare with */
        AttrNumber      flagColIdx;             /* where is the flag column, if any */
+       long            numGroups;              /* estimated number of groups in input */
 } SetOp;
 
 /* ----------------
index 0b46a173706bfacae48a97bf683b38882c0451c8..616f0ea2ad0e2ecb47bf170775d0bfedbf0353d4 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/optimizer/planmain.h,v 1.108 2008/05/02 21:26:10 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/planmain.h,v 1.109 2008/08/07 03:04:04 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -61,8 +61,9 @@ extern Plan *materialize_finished_plan(Plan *subplan);
 extern Unique *make_unique(Plan *lefttree, List *distinctList);
 extern Limit *make_limit(Plan *lefttree, Node *limitOffset, Node *limitCount,
                   int64 offset_est, int64 count_est);
-extern SetOp *make_setop(SetOpCmd cmd, Plan *lefttree,
-                  List *distinctList, AttrNumber flagColIdx);
+extern SetOp *make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree,
+                  List *distinctList, AttrNumber flagColIdx, long numGroups,
+                  double outputRows);
 extern Result *make_result(PlannerInfo *root, List *tlist,
                        Node *resconstantqual, Plan *subplan);
 extern bool is_projection_capable_plan(Plan *plan);
index 722f9651be1c3f95074ffec49b682fbfcaccf796..295dacea86d1768808a0dcddf48f8e999aae5856 100644 (file)
@@ -275,21 +275,21 @@ SELECT q2 FROM int8_tbl INTERSECT ALL SELECT q1 FROM int8_tbl;
  4567890123456789
 (3 rows)
 
-SELECT q2 FROM int8_tbl EXCEPT SELECT q1 FROM int8_tbl;
+SELECT q2 FROM int8_tbl EXCEPT SELECT q1 FROM int8_tbl ORDER BY 1;
         q2         
 -------------------
  -4567890123456789
                456
 (2 rows)
 
-SELECT q2 FROM int8_tbl EXCEPT ALL SELECT q1 FROM int8_tbl;
+SELECT q2 FROM int8_tbl EXCEPT ALL SELECT q1 FROM int8_tbl ORDER BY 1;
         q2         
 -------------------
  -4567890123456789
                456
 (2 rows)
 
-SELECT q2 FROM int8_tbl EXCEPT ALL SELECT DISTINCT q1 FROM int8_tbl;
+SELECT q2 FROM int8_tbl EXCEPT ALL SELECT DISTINCT q1 FROM int8_tbl ORDER BY 1;
         q2         
 -------------------
  -4567890123456789
@@ -326,7 +326,7 @@ SELECT f1 FROM float8_tbl INTERSECT SELECT f1 FROM int4_tbl;
   0
 (1 row)
 
-SELECT f1 FROM float8_tbl EXCEPT SELECT f1 FROM int4_tbl;
+SELECT f1 FROM float8_tbl EXCEPT SELECT f1 FROM int4_tbl ORDER BY 1;
           f1           
 -----------------------
  -1.2345678901234e+200
@@ -369,14 +369,14 @@ SELECT q1 FROM int8_tbl INTERSECT (((SELECT q2 FROM int8_tbl UNION ALL SELECT q2
  -4567890123456789
 (7 rows)
 
-SELECT q1 FROM int8_tbl UNION ALL SELECT q2 FROM int8_tbl EXCEPT SELECT q1 FROM int8_tbl;
+SELECT q1 FROM int8_tbl UNION ALL SELECT q2 FROM int8_tbl EXCEPT SELECT q1 FROM int8_tbl ORDER BY 1;
         q1         
 -------------------
  -4567890123456789
                456
 (2 rows)
 
-SELECT q1 FROM int8_tbl UNION ALL (((SELECT q2 FROM int8_tbl EXCEPT SELECT q1 FROM int8_tbl)));
+SELECT q1 FROM int8_tbl UNION ALL (((SELECT q2 FROM int8_tbl EXCEPT SELECT q1 FROM int8_tbl ORDER BY 1)));
         q1         
 -------------------
                123
@@ -388,7 +388,7 @@ SELECT q1 FROM int8_tbl UNION ALL (((SELECT q2 FROM int8_tbl EXCEPT SELECT q1 FR
                456
 (7 rows)
 
-(((SELECT q1 FROM int8_tbl UNION ALL SELECT q2 FROM int8_tbl))) EXCEPT SELECT q1 FROM int8_tbl;
+(((SELECT q1 FROM int8_tbl UNION ALL SELECT q2 FROM int8_tbl))) EXCEPT SELECT q1 FROM int8_tbl ORDER BY 1;
         q1         
 -------------------
  -4567890123456789
index 0b83d6b185b47cbdfa178005d3bc762546a8c541..81e19d165c43aad1e1a4f2b3f9b6d87dba5efd97 100644 (file)
@@ -97,11 +97,11 @@ SELECT q2 FROM int8_tbl INTERSECT SELECT q1 FROM int8_tbl;
 
 SELECT q2 FROM int8_tbl INTERSECT ALL SELECT q1 FROM int8_tbl;
 
-SELECT q2 FROM int8_tbl EXCEPT SELECT q1 FROM int8_tbl;
+SELECT q2 FROM int8_tbl EXCEPT SELECT q1 FROM int8_tbl ORDER BY 1;
 
-SELECT q2 FROM int8_tbl EXCEPT ALL SELECT q1 FROM int8_tbl;
+SELECT q2 FROM int8_tbl EXCEPT ALL SELECT q1 FROM int8_tbl ORDER BY 1;
 
-SELECT q2 FROM int8_tbl EXCEPT ALL SELECT DISTINCT q1 FROM int8_tbl;
+SELECT q2 FROM int8_tbl EXCEPT ALL SELECT DISTINCT q1 FROM int8_tbl ORDER BY 1;
 
 SELECT q1 FROM int8_tbl EXCEPT SELECT q2 FROM int8_tbl;
 
@@ -115,7 +115,7 @@ SELECT q1 FROM int8_tbl EXCEPT ALL SELECT DISTINCT q2 FROM int8_tbl;
 
 SELECT f1 FROM float8_tbl INTERSECT SELECT f1 FROM int4_tbl;
 
-SELECT f1 FROM float8_tbl EXCEPT SELECT f1 FROM int4_tbl;
+SELECT f1 FROM float8_tbl EXCEPT SELECT f1 FROM int4_tbl ORDER BY 1;
 
 --
 -- Operator precedence and (((((extra))))) parentheses
@@ -127,11 +127,11 @@ SELECT q1 FROM int8_tbl INTERSECT (((SELECT q2 FROM int8_tbl UNION ALL SELECT q2
 
 (((SELECT q1 FROM int8_tbl INTERSECT SELECT q2 FROM int8_tbl))) UNION ALL SELECT q2 FROM int8_tbl;
 
-SELECT q1 FROM int8_tbl UNION ALL SELECT q2 FROM int8_tbl EXCEPT SELECT q1 FROM int8_tbl;
+SELECT q1 FROM int8_tbl UNION ALL SELECT q2 FROM int8_tbl EXCEPT SELECT q1 FROM int8_tbl ORDER BY 1;
 
-SELECT q1 FROM int8_tbl UNION ALL (((SELECT q2 FROM int8_tbl EXCEPT SELECT q1 FROM int8_tbl)));
+SELECT q1 FROM int8_tbl UNION ALL (((SELECT q2 FROM int8_tbl EXCEPT SELECT q1 FROM int8_tbl ORDER BY 1)));
 
-(((SELECT q1 FROM int8_tbl UNION ALL SELECT q2 FROM int8_tbl))) EXCEPT SELECT q1 FROM int8_tbl;
+(((SELECT q1 FROM int8_tbl UNION ALL SELECT q2 FROM int8_tbl))) EXCEPT SELECT q1 FROM int8_tbl ORDER BY 1;
 
 --
 -- Subqueries with ORDER BY & LIMIT clauses