Teach the system how to use hashing for UNION. (INTERSECT/EXCEPT will follow,

author Tom Lane <tgl@sss.pgh.pa.us>

Thu, 7 Aug 2008 01:11:52 +0000 (01:11 +0000)

committer Tom Lane <tgl@sss.pgh.pa.us>

Thu, 7 Aug 2008 01:11:52 +0000 (01:11 +0000)
author Tom Lane <tgl@sss.pgh.pa.us>
Thu, 7 Aug 2008 01:11:52 +0000 (01:11 +0000)
committer Tom Lane <tgl@sss.pgh.pa.us>
Thu, 7 Aug 2008 01:11:52 +0000 (01:11 +0000)
diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c

index d58b9d4a2fe061290fe8617e9964e9b5a114cdd2..5f5c91366f7d5d32df4f6d7ccdb968679b79e44a 100644 (file)
--- a/src/backend/catalog/dependency.c
+++ b/src/backend/catalog/dependency.c
@@ -8,7 +8,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/catalog/dependency.c,v 1.77 2008/08/02 21:31:59 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/catalog/dependency.c,v 1.78 2008/08/07 01:11:46 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -1597,6 +1597,15 @@ find_expr_references_walker(Node *node,
                 context->rtables = list_delete_first(context->rtables);
                 return result;
         }
+       else if (IsA(node, SetOperationStmt))
+       {
+               SetOperationStmt *setop = (SetOperationStmt *) node;
+
+               /* we need to look at the groupClauses for operator references */
+               find_expr_references_walker((Node *) setop->groupClauses, context);
+               /* fall through to examine child nodes */
+       }
+
         return expression_tree_walker(node, find_expr_references_walker,
                                                                   (void *) context);
  }
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c

index 652c726a6703a8e20809b30f538b85257c0cd15e..e562949d93495ebaf22d443afa848c625e2e47d8 100644 (file)
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -15,7 +15,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.396 2008/08/02 21:31:59 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.397 2008/08/07 01:11:46 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -1943,6 +1943,7 @@ _copySetOperationStmt(SetOperationStmt *from)
         COPY_NODE_FIELD(rarg);
         COPY_NODE_FIELD(colTypes);
         COPY_NODE_FIELD(colTypmods);
+       COPY_NODE_FIELD(groupClauses);
  
         return newnode;
  }
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c

index 4e011947ef29dd8b2f35c9bafdbbe6c143b5241c..dc0b2cca3734e7a4eb0ba4ab2592a1f5dc109af4 100644 (file)
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -18,7 +18,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.325 2008/08/02 21:31:59 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.326 2008/08/07 01:11:47 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -839,6 +839,7 @@ _equalSetOperationStmt(SetOperationStmt *a, SetOperationStmt *b)
         COMPARE_NODE_FIELD(rarg);
         COMPARE_NODE_FIELD(colTypes);
         COMPARE_NODE_FIELD(colTypmods);
+       COMPARE_NODE_FIELD(groupClauses);
  
         return true;
  }
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c

index 408b9b2a757e24e08b1ff44ba473c31f0e545217..26f8fb3810a4837deda0e8768ce97b370eaacdd7 100644 (file)
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.330 2008/08/05 02:43:17 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.331 2008/08/07 01:11:48 tgl Exp $
   *
   * NOTES
   *       Every node type that can appear in stored rules' parsetrees *must*
@@ -1780,6 +1780,7 @@ _outSetOperationStmt(StringInfo str, SetOperationStmt *node)
         WRITE_NODE_FIELD(rarg);
         WRITE_NODE_FIELD(colTypes);
         WRITE_NODE_FIELD(colTypmods);
+       WRITE_NODE_FIELD(groupClauses);
  }
  
  static void
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c

index e7de4898c06c5e98517a86b53acc18d8e11c6a1a..c46593d22c72a7d28219546619ad7718857a61a3 100644 (file)
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/nodes/readfuncs.c,v 1.211 2008/08/02 21:31:59 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/nodes/readfuncs.c,v 1.212 2008/08/07 01:11:49 tgl Exp $
   *
   * NOTES
   *       Path and Plan nodes do not have any readfuncs support, because we
@@ -232,6 +232,7 @@ _readSetOperationStmt(void)
         READ_NODE_FIELD(rarg);
         READ_NODE_FIELD(colTypes);
         READ_NODE_FIELD(colTypmods);
+       READ_NODE_FIELD(groupClauses);
  
         READ_DONE();
  }
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c

index 5b7fde2533a33b2cabb75779cdb8d617818c597f..c818f0ddf10e20ae0d260bf2bef5dfeaf4082f3a 100644 (file)
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.239 2008/08/05 16:03:10 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.240 2008/08/07 01:11:50 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -68,10 +68,6 @@ static double preprocess_limit(PlannerInfo *root,
                                  double tuple_fraction,
                                  int64 *offset_est, int64 *count_est);
  static void preprocess_groupclause(PlannerInfo *root);
-static Oid *extract_grouping_ops(List *groupClause);
-static AttrNumber *extract_grouping_cols(List *groupClause, List *tlist);
-static bool grouping_is_sortable(List *groupClause);
-static bool grouping_is_hashable(List *groupClause);
  static bool choose_hashed_grouping(PlannerInfo *root,
                                            double tuple_fraction, double limit_tuples,
                                            Path *cheapest_path, Path *sorted_path,
@@ -784,10 +780,9 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
  
                 /*
                  * If there's a top-level ORDER BY, assume we have to fetch all the
-                * tuples.      This might seem too simplistic given all the hackery below
-                * to possibly avoid the sort ... but a nonzero tuple_fraction is only
-                * of use to plan_set_operations() when the setop is UNION ALL, and
-                * the result of UNION ALL is always unsorted.
+                * tuples.      This might be too simplistic given all the hackery below
+                * to possibly avoid the sort; but the odds of accurate estimates
+                * here are pretty low anyway.
                  */
                 if (parse->sortClause)
                         tuple_fraction = 0.0;
@@ -818,7 +813,8 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
                  */
                 Assert(parse->commandType == CMD_SELECT);
  
-               tlist = postprocess_setop_tlist(result_plan->targetlist, tlist);
+               tlist = postprocess_setop_tlist(copyObject(result_plan->targetlist),
+                                                                               tlist);
  
                 /*
                  * Can't handle FOR UPDATE/SHARE here (parser should have checked
@@ -1714,100 +1710,6 @@ preprocess_groupclause(PlannerInfo *root)
         parse->groupClause = new_groupclause;
  }
  
-/*
- * extract_grouping_ops - make an array of the equality operator OIDs
- *             for a SortGroupClause list
- */
-static Oid *
-extract_grouping_ops(List *groupClause)
-{
-       int                     numCols = list_length(groupClause);
-       int                     colno = 0;
-       Oid                *groupOperators;
-       ListCell   *glitem;
-
-       groupOperators = (Oid *) palloc(sizeof(Oid) * numCols);
-
-       foreach(glitem, groupClause)
-       {
-               SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
-
-               groupOperators[colno] = groupcl->eqop;
-               Assert(OidIsValid(groupOperators[colno]));
-               colno++;
-       }
-
-       return groupOperators;
-}
-
-/*
- * extract_grouping_cols - make an array of the grouping column resnos
- *             for a SortGroupClause list
- */
-static AttrNumber *
-extract_grouping_cols(List *groupClause, List *tlist)
-{
-       AttrNumber *grpColIdx;
-       int                     numCols = list_length(groupClause);
-       int                     colno = 0;
-       ListCell   *glitem;
-
-       grpColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
-
-       foreach(glitem, groupClause)
-       {
-               SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
-               TargetEntry *tle = get_sortgroupclause_tle(groupcl, tlist);
-
-               grpColIdx[colno++] = tle->resno;
-       }
-
-       return grpColIdx;
-}
-
-/*
- * grouping_is_sortable - is it possible to implement grouping list by sorting?
- *
- * This is easy since the parser will have included a sortop if one exists.
- */
-static bool
-grouping_is_sortable(List *groupClause)
-{
-       ListCell   *glitem;
-
-       foreach(glitem, groupClause)
-       {
-               SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
-
-               if (!OidIsValid(groupcl->sortop))
-                       return false;
-       }
-       return true;
-}
-
-/*
- * grouping_is_hashable - is it possible to implement grouping list by hashing?
- *
- * We assume hashing is OK if the equality operators are marked oprcanhash.
- * (If there isn't actually a supporting hash function, the executor will
- * complain at runtime; but this is a misdeclaration of the operator, not
- * a system bug.)
- */
-static bool
-grouping_is_hashable(List *groupClause)
-{
-       ListCell   *glitem;
-
-       foreach(glitem, groupClause)
-       {
-               SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
-
-               if (!op_hashjoinable(groupcl->eqop))
-                       return false;
-       }
-       return true;
-}
-
  /*
   * choose_hashed_grouping - should we use hashed grouping?
   *
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c

index 31ba005edb7ded341c4984699495116439d2acde..750d59a95155d302bfb260c8aa47f3545c92653c 100644 (file)
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -22,7 +22,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.149 2008/08/02 21:32:00 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.150 2008/08/07 01:11:50 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -32,8 +32,12 @@
  #include "access/heapam.h"
  #include "catalog/namespace.h"
  #include "catalog/pg_type.h"
+#include "miscadmin.h"
  #include "nodes/makefuncs.h"
  #include "optimizer/clauses.h"
+#include "optimizer/cost.h"
+#include "optimizer/pathnode.h"
+#include "optimizer/paths.h"
  #include "optimizer/plancat.h"
  #include "optimizer/planmain.h"
  #include "optimizer/planner.h"
@@ -61,6 +65,13 @@ static List *recurse_union_children(Node *setOp, PlannerInfo *root,
                                            double tuple_fraction,
                                            SetOperationStmt *top_union,
                                            List *refnames_tlist);
+static Plan *make_union_unique(SetOperationStmt *op, Plan *plan,
+                                 PlannerInfo *root, double tuple_fraction,
+                                 List **sortClauses);
+static bool choose_hashed_setop(PlannerInfo *root, List *groupClauses,
+                                       Plan *input_plan,
+                                       double tuple_fraction, double dNumDistinctRows,
+                                       const char *construct);
  static List *generate_setop_tlist(List *colTypes, int flag,
                                          Index varno,
                                          bool hack_constants,
@@ -69,7 +80,7 @@ static List *generate_setop_tlist(List *colTypes, int flag,
  static List *generate_append_tlist(List *colTypes, bool flag,
                                           List *input_plans,
                                           List *refnames_tlist);
-static List *generate_setop_sortlist(List *targetlist);
+static List *generate_setop_grouplist(SetOperationStmt *op, List *targetlist);
  static void expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte,
                                                  Index rti);
  static void make_inh_translation_lists(Relation oldrelation,
@@ -99,7 +110,8 @@ static List *adjust_inherited_tlist(List *tlist,
   * top level has already been factored into tuple_fraction.
   *
   * *sortClauses is an output argument: it is set to a list of SortGroupClauses
- * representing the result ordering of the topmost set operation.
+ * representing the result ordering of the topmost set operation.  (This will
+ * be NIL if the output isn't ordered.)
   */
  Plan *
  plan_set_operations(PlannerInfo *root, double tuple_fraction,
@@ -287,8 +299,8 @@ generate_union_plan(SetOperationStmt *op, PlannerInfo *root,
         /*
          * If any of my children are identical UNION nodes (same op, all-flag, and
          * colTypes) then they can be merged into this node so that we generate
-        * only one Append and Sort for the lot.  Recurse to find such nodes and
-        * compute their children's plans.
+        * only one Append and unique-ification for the lot.  Recurse to find such
+        * nodes and compute their children's plans.
          */
         planlist = list_concat(recurse_union_children(op->larg, root,
                                                                                                   tuple_fraction,
@@ -314,22 +326,12 @@ generate_union_plan(SetOperationStmt *op, PlannerInfo *root,
  
         /*
          * For UNION ALL, we just need the Append plan.  For UNION, need to add
-        * Sort and Unique nodes to produce unique output.
+        * node(s) to remove duplicates.
          */
-       if (!op->all)
-       {
-               List       *sortList;
-
-               sortList = generate_setop_sortlist(tlist);
-               if (sortList)
-               {
-                       plan = (Plan *) make_sort_from_sortclauses(root, sortList, plan);
-                       plan = (Plan *) make_unique(plan, sortList);
-               }
-               *sortClauses = sortList;
-       }
+       if (op->all)
+               *sortClauses = NIL;             /* result of UNION ALL is always unsorted */
         else
-               *sortClauses = NIL;
+               plan = make_union_unique(op, plan, root, tuple_fraction, sortClauses);
  
         return plan;
  }
@@ -346,7 +348,7 @@ generate_nonunion_plan(SetOperationStmt *op, PlannerInfo *root,
                            *rplan,
                            *plan;
         List       *tlist,
-                          *sortList,
+                          *groupList,
                            *planlist,
                            *child_sortclauses;
         SetOpCmd        cmd;
@@ -381,19 +383,24 @@ generate_nonunion_plan(SetOperationStmt *op, PlannerInfo *root,
          */
         plan = (Plan *) make_append(planlist, false, tlist);
  
-       /*
-        * Sort the child results, then add a SetOp plan node to generate the
-        * correct output.
-        */
-       sortList = generate_setop_sortlist(tlist);
+       /* Identify the grouping semantics */
+       groupList = generate_setop_grouplist(op, tlist);
  
-       if (sortList == NIL)            /* nothing to sort on? */
+       /* punt if nothing to group on (can this happen?) */
+       if (groupList == NIL)
         {
                 *sortClauses = NIL;
                 return plan;
         }
  
-       plan = (Plan *) make_sort_from_sortclauses(root, sortList, plan);
+       /*
+        * Decide whether to hash or sort, and add a sort node if needed.
+        */
+       plan = (Plan *) make_sort_from_sortclauses(root, groupList, plan);
+
+       /*
+        * Finally, add a SetOp plan node to generate the correct output.
+        */
         switch (op->op)
         {
                 case SETOP_INTERSECT:
@@ -403,14 +410,13 @@ generate_nonunion_plan(SetOperationStmt *op, PlannerInfo *root,
                         cmd = op->all ? SETOPCMD_EXCEPT_ALL : SETOPCMD_EXCEPT;
                         break;
                 default:
-                       elog(ERROR, "unrecognized set op: %d",
-                                (int) op->op);
+                       elog(ERROR, "unrecognized set op: %d", (int) op->op);
                         cmd = SETOPCMD_INTERSECT;       /* keep compiler quiet */
                         break;
         }
-       plan = (Plan *) make_setop(cmd, plan, sortList, list_length(op->colTypes) + 1);
+       plan = (Plan *) make_setop(cmd, plan, groupList, list_length(op->colTypes) + 1);
  
-       *sortClauses = sortList;
+       *sortClauses = groupList;
  
         return plan;
  }
@@ -465,6 +471,171 @@ recurse_union_children(Node *setOp, PlannerInfo *root,
                                                                                          &child_sortclauses));
  }
  
+/*
+ * Add nodes to the given plan tree to unique-ify the result of a UNION.
+ */
+static Plan *
+make_union_unique(SetOperationStmt *op, Plan *plan,
+                                 PlannerInfo *root, double tuple_fraction,
+                                 List **sortClauses)
+{
+       List       *groupList;
+       double          dNumDistinctRows;
+       long            numDistinctRows;
+
+       /* Identify the grouping semantics */
+       groupList = generate_setop_grouplist(op, plan->targetlist);
+
+       /* punt if nothing to group on (can this happen?) */
+       if (groupList == NIL)
+       {
+               *sortClauses = NIL;
+               return plan;
+       }
+
+       /*
+        * XXX for the moment, take the number of distinct groups as being the
+        * total input size, ie, the worst case.  This is too conservative, but
+        * we don't want to risk having the hashtable overrun memory; also,
+        * it's not clear how to get a decent estimate of the true size.  One
+        * should note as well the propensity of novices to write UNION rather
+        * than UNION ALL even when they don't expect any duplicates...
+        */
+       dNumDistinctRows = plan->plan_rows;
+
+       /* Also convert to long int --- but 'ware overflow! */
+       numDistinctRows = (long) Min(dNumDistinctRows, (double) LONG_MAX);
+
+       /* Decide whether to hash or sort */
+       if (choose_hashed_setop(root, groupList, plan,
+                                                       tuple_fraction, dNumDistinctRows,
+                                                       "UNION"))
+       {
+               /* Hashed aggregate plan --- no sort needed */
+               plan = (Plan *) make_agg(root,
+                                                                plan->targetlist,
+                                                                NIL,
+                                                                AGG_HASHED,
+                                                                list_length(groupList),
+                                                                extract_grouping_cols(groupList,
+                                                                                                          plan->targetlist),
+                                                                extract_grouping_ops(groupList),
+                                                                numDistinctRows,
+                                                                0,
+                                                                plan);
+               /* Hashed aggregation produces randomly-ordered results */
+               *sortClauses = NIL;
+       }
+       else
+       {
+               /* Sort and Unique */
+               plan = (Plan *) make_sort_from_sortclauses(root, groupList, plan);
+               plan = (Plan *) make_unique(plan, groupList);
+               plan->plan_rows = dNumDistinctRows;
+               /* We know the sort order of the result */
+               *sortClauses = groupList;
+       }
+
+       return plan;
+}
+
+/*
+ * choose_hashed_setop - should we use hashing for a set operation?
+ */
+static bool
+choose_hashed_setop(PlannerInfo *root, List *groupClauses,
+                                       Plan *input_plan,
+                                       double tuple_fraction, double dNumDistinctRows,
+                                       const char *construct)
+{
+       int                     numDistinctCols = list_length(groupClauses);
+       bool            can_sort;
+       bool            can_hash;
+       Size            hashentrysize;
+       List       *needed_pathkeys;
+       Path            hashed_p;
+       Path            sorted_p;
+
+       /* Check whether the operators support sorting or hashing */
+       can_sort = grouping_is_sortable(groupClauses);
+       can_hash = grouping_is_hashable(groupClauses);
+       if (can_hash && can_sort)
+       {
+               /* we have a meaningful choice to make, continue ... */
+       }
+       else if (can_hash)
+               return true;
+       else if (can_sort)
+               return false;
+       else
+               ereport(ERROR,
+                               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                                /* translator: %s is UNION, INTERSECT, or EXCEPT */
+                                errmsg("could not implement %s", construct),
+                                errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
+
+       /* Prefer sorting when enable_hashagg is off */
+       if (!enable_hashagg)
+               return false;
+
+       /*
+        * Don't do it if it doesn't look like the hashtable will fit into
+        * work_mem.
+        */
+       hashentrysize = MAXALIGN(input_plan->plan_width) + MAXALIGN(sizeof(MinimalTupleData));
+
+       if (hashentrysize * dNumDistinctRows > work_mem * 1024L)
+               return false;
+
+       /*
+        * See if the estimated cost is no more than doing it the other way.
+        *
+        * We need to consider input_plan + hashagg versus input_plan + sort +
+        * group.  Note that the actual result plan might involve a SetOp or
+        * Unique node, not Agg or Group, but the cost estimates for Agg and Group
+        * should be close enough for our purposes here.
+        *
+        * These path variables are dummies that just hold cost fields; we don't
+        * make actual Paths for these steps.
+        */
+       cost_agg(&hashed_p, root, AGG_HASHED, 0,
+                        numDistinctCols, dNumDistinctRows,
+                        input_plan->startup_cost, input_plan->total_cost,
+                        input_plan->plan_rows);
+
+       /*
+        * Now for the sorted case.  Note that the input is *always* unsorted,
+        * since it was made by appending unrelated sub-relations together.
+        */
+       sorted_p.startup_cost = input_plan->startup_cost;
+       sorted_p.total_cost = input_plan->total_cost;
+       /* XXX this is more expensive than cost_sort really needs: */
+       needed_pathkeys = make_pathkeys_for_sortclauses(root,
+                                                                                                       groupClauses,
+                                                                                                       input_plan->targetlist,
+                                                                                                       true);
+       cost_sort(&sorted_p, root, needed_pathkeys, sorted_p.total_cost,
+                         input_plan->plan_rows, input_plan->plan_width, -1.0);
+       cost_group(&sorted_p, root, numDistinctCols, dNumDistinctRows,
+                          sorted_p.startup_cost, sorted_p.total_cost,
+                          input_plan->plan_rows);
+
+       /*
+        * Now make the decision using the top-level tuple fraction.  First we
+        * have to convert an absolute count (LIMIT) into fractional form.
+        */
+       if (tuple_fraction >= 1.0)
+               tuple_fraction /= dNumDistinctRows;
+
+       if (compare_fractional_path_costs(&hashed_p, &sorted_p,
+                                                                         tuple_fraction) < 0)
+       {
+               /* Hashed is cheaper, so use it */
+               return true;
+       }
+       return false;
+}
+
  /*
   * Generate targetlist for a set-operation plan node
   *
@@ -677,30 +848,47 @@ generate_append_tlist(List *colTypes, bool flag,
  }
  
  /*
- * generate_setop_sortlist
- *             Build a SortGroupClause list enumerating all the non-resjunk
- *             tlist entries, using default ordering properties.
+ * generate_setop_grouplist
+ *             Build a SortGroupClause list defining the sort/grouping properties
+ *             of the setop's output columns.
   *
- * For now, we require all the items to be sortable.  Eventually we
- * should implement hashing setops and allow hash-only datatypes.
+ * Parse analysis already determined the properties and built a suitable
+ * list, except that the entries do not have sortgrouprefs set because
+ * the parser output representation doesn't include a tlist for each
+ * setop.  So what we need to do here is copy that list and install
+ * proper sortgrouprefs into it and into the targetlist.
   */
  static List *
-generate_setop_sortlist(List *targetlist)
+generate_setop_grouplist(SetOperationStmt *op, List *targetlist)
  {
-       List       *sortlist = NIL;
-       ListCell   *l;
+       List       *grouplist = (List *) copyObject(op->groupClauses);
+       ListCell   *lg;
+       ListCell   *lt;
+       Index           refno = 1;
  
-       foreach(l, targetlist)
+       lg = list_head(grouplist);
+       foreach(lt, targetlist)
         {
-               TargetEntry *tle = (TargetEntry *) lfirst(l);
+               TargetEntry *tle = (TargetEntry *) lfirst(lt);
+               SortGroupClause *sgc;
  
-               if (!tle->resjunk)
-                       sortlist = addTargetToGroupList(NULL, tle,
-                                                                                       sortlist, targetlist,
-                                                                                       true, /* XXX fixme someday */
-                                                                                       false);
+               /* tlist shouldn't have any sortgrouprefs yet */
+               Assert(tle->ressortgroupref == 0);
+
+               if (tle->resjunk)
+                       continue;                       /* ignore resjunk columns */
+
+               /* non-resjunk columns should have grouping clauses */
+               Assert(lg != NULL);
+               sgc = (SortGroupClause *) lfirst(lg);
+               lg = lnext(lg);
+               Assert(sgc->tleSortGroupRef == 0);
+
+               /* we could use assignSortGroupRef here, but seems a bit silly */
+               sgc->tleSortGroupRef = tle->ressortgroupref = refno++;
         }
-       return sortlist;
+       Assert(lg == NULL);
+       return grouplist;
  }
  
  
diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c

index 04ef2224c915f8367d1b8057b3d839bbeb5101d0..858d4abcbd87c1680e14c6cda67075cb94767c40 100644 (file)
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.260 2008/08/02 21:32:00 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.261 2008/08/07 01:11:50 tgl Exp $
   *
   * HISTORY
   *       AUTHOR                        DATE                    MAJOR EVENT
@@ -3933,6 +3933,8 @@ expression_tree_walker(Node *node,
                                         return true;
                                 if (walker(setop->rarg, context))
                                         return true;
+
+                               /* groupClauses are deemed uninteresting */
                         }
                         break;
                 case T_InClauseInfo:
@@ -4535,6 +4537,7 @@ expression_tree_mutator(Node *node,
                                 FLATCOPY(newnode, setop, SetOperationStmt);
                                 MUTATE(newnode->larg, setop->larg, Node *);
                                 MUTATE(newnode->rarg, setop->rarg, Node *);
+                               /* We do not mutate groupClauses by default */
                                 return (Node *) newnode;
                         }
                         break;
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c

index ad3c10ac27b5041c434ebea571067bedf10afeeb..655443e9efe03872fa51249005dad923d141e4ba 100644 (file)
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.144 2008/08/02 21:32:00 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.145 2008/08/07 01:11:50 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -24,7 +24,6 @@
  #include "optimizer/paths.h"
  #include "optimizer/tlist.h"
  #include "parser/parse_expr.h"
-#include "parser/parse_oper.h"
  #include "parser/parsetree.h"
  #include "utils/selfuncs.h"
  #include "utils/lsyscache.h"
@@ -1003,12 +1002,6 @@ query_is_distinct_for(Query *query, List *colnos, List *opids)
         /*
          * UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row,
          * except with ALL.
-        *
-        * XXX this code knows that prepunion.c will adopt the default sort/group
-        * operators for each column datatype to determine uniqueness.  It'd
-        * probably be better if these operators were chosen at parse time and
-        * stored into the parsetree, instead of leaving bits of the planner to
-        * decide semantics.
          */
         if (query->setOperations)
         {
@@ -1019,24 +1012,26 @@ query_is_distinct_for(Query *query, List *colnos, List *opids)
  
                 if (!topop->all)
                 {
+                       ListCell   *lg;
+
                         /* We're good if all the nonjunk output columns are in colnos */
+                       lg = list_head(topop->groupClauses);
                         foreach(l, query->targetList)
                         {
                                 TargetEntry *tle = (TargetEntry *) lfirst(l);
-                               Oid             tle_eq_opr;
+                               SortGroupClause *sgc;
  
                                 if (tle->resjunk)
                                         continue;       /* ignore resjunk columns */
  
+                               /* non-resjunk columns should have grouping clauses */
+                               Assert(lg != NULL);
+                               sgc = (SortGroupClause *) lfirst(lg);
+                               lg = lnext(lg);
+
                                 opid = distinct_col_search(tle->resno, colnos, opids);
-                               if (!OidIsValid(opid))
-                                       break;          /* exit early if no match */
-                               /* check for compatible semantics */
-                               get_sort_group_operators(exprType((Node *) tle->expr),
-                                                                                false, false, false,
-                                                                                NULL, &tle_eq_opr, NULL);
-                               if (!OidIsValid(tle_eq_opr) ||
-                                       !equality_ops_are_compatible(opid, tle_eq_opr))
+                               if (!OidIsValid(opid) ||
+                                       !equality_ops_are_compatible(opid, sgc->eqop))
                                         break;          /* exit early if no match */
                         }
                         if (l == NULL)          /* had matches for all? */
diff --git a/src/backend/optimizer/util/tlist.c b/src/backend/optimizer/util/tlist.c

index fc0880ad56cde97fa70b69035d539c80842b7cbc..a2c627fd4d51e23711d3bdc74ab76cd608886437 100644 (file)
--- a/src/backend/optimizer/util/tlist.c
+++ b/src/backend/optimizer/util/tlist.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/util/tlist.c,v 1.79 2008/08/02 21:32:00 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/util/tlist.c,v 1.80 2008/08/07 01:11:50 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -18,6 +18,7 @@
  #include "optimizer/tlist.h"
  #include "optimizer/var.h"
  #include "parser/parse_expr.h"
+#include "utils/lsyscache.h"
  
  
  /*****************************************************************************
@@ -202,6 +203,109 @@ get_sortgrouplist_exprs(List *sgClauses, List *targetList)
  }
  
  
+/*****************************************************************************
+ *             Functions to extract data from a list of SortGroupClauses
+ *
+ * These don't really belong in tlist.c, but they are sort of related to the
+ * functions just above, and they don't seem to deserve their own file.
+ *****************************************************************************/
+
+/*
+ * extract_grouping_ops - make an array of the equality operator OIDs
+ *             for a SortGroupClause list
+ */
+Oid *
+extract_grouping_ops(List *groupClause)
+{
+       int                     numCols = list_length(groupClause);
+       int                     colno = 0;
+       Oid                *groupOperators;
+       ListCell   *glitem;
+
+       groupOperators = (Oid *) palloc(sizeof(Oid) * numCols);
+
+       foreach(glitem, groupClause)
+       {
+               SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
+
+               groupOperators[colno] = groupcl->eqop;
+               Assert(OidIsValid(groupOperators[colno]));
+               colno++;
+       }
+
+       return groupOperators;
+}
+
+/*
+ * extract_grouping_cols - make an array of the grouping column resnos
+ *             for a SortGroupClause list
+ */
+AttrNumber *
+extract_grouping_cols(List *groupClause, List *tlist)
+{
+       AttrNumber *grpColIdx;
+       int                     numCols = list_length(groupClause);
+       int                     colno = 0;
+       ListCell   *glitem;
+
+       grpColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
+
+       foreach(glitem, groupClause)
+       {
+               SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
+               TargetEntry *tle = get_sortgroupclause_tle(groupcl, tlist);
+
+               grpColIdx[colno++] = tle->resno;
+       }
+
+       return grpColIdx;
+}
+
+/*
+ * grouping_is_sortable - is it possible to implement grouping list by sorting?
+ *
+ * This is easy since the parser will have included a sortop if one exists.
+ */
+bool
+grouping_is_sortable(List *groupClause)
+{
+       ListCell   *glitem;
+
+       foreach(glitem, groupClause)
+       {
+               SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
+
+               if (!OidIsValid(groupcl->sortop))
+                       return false;
+       }
+       return true;
+}
+
+/*
+ * grouping_is_hashable - is it possible to implement grouping list by hashing?
+ *
+ * We assume hashing is OK if the equality operators are marked oprcanhash.
+ * (If there isn't actually a supporting hash function, the executor will
+ * complain at runtime; but this is a misdeclaration of the operator, not
+ * a system bug.)
+ */
+bool
+grouping_is_hashable(List *groupClause)
+{
+       ListCell   *glitem;
+
+       foreach(glitem, groupClause)
+       {
+               SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
+
+               if (!op_hashjoinable(groupcl->eqop))
+                       return false;
+       }
+       return true;
+}
+
+
+
  /*
   * Does tlist have same output datatypes as listed in colTypes?
   *
diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c

index 2cc3f28d20697630f7a3ef392579090b1e49701f..3de232ba71bd18cc587641ed2bffd84eb7bea685 100644 (file)
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -17,7 +17,7 @@
   * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- *     $PostgreSQL: pgsql/src/backend/parser/analyze.c,v 1.375 2008/08/02 21:32:00 tgl Exp $
+ *     $PostgreSQL: pgsql/src/backend/parser/analyze.c,v 1.376 2008/08/07 01:11:51 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -33,6 +33,7 @@
  #include "parser/parse_clause.h"
  #include "parser/parse_coerce.h"
  #include "parser/parse_expr.h"
+#include "parser/parse_oper.h"
  #include "parser/parse_relation.h"
  #include "parser/parse_target.h"
  #include "parser/parsetree.h"
@@ -1326,6 +1327,7 @@ transformSetOperationTree(ParseState *pstate, SelectStmt *stmt)
  
                 op->colTypes = NIL;
                 op->colTypmods = NIL;
+               op->groupClauses = NIL;
                 /* don't have a "foreach4", so chase two of the lists by hand */
                 lcm = list_head(lcoltypmods);
                 rcm = list_head(rcoltypmods);
@@ -1349,6 +1351,31 @@ transformSetOperationTree(ParseState *pstate, SelectStmt *stmt)
                         op->colTypes = lappend_oid(op->colTypes, rescoltype);
                         op->colTypmods = lappend_int(op->colTypmods, rescoltypmod);
  
+                       /*
+                        * For all cases except UNION ALL, identify the grouping operators
+                        * (and, if available, sorting operators) that will be used to
+                        * eliminate duplicates.
+                        */
+                       if (op->op != SETOP_UNION || !op->all)
+                       {
+                               SortGroupClause *grpcl = makeNode(SortGroupClause);
+                               Oid                     sortop;
+                               Oid                     eqop;
+
+                               /* determine the eqop and optional sortop */
+                               get_sort_group_operators(rescoltype,
+                                                                                false, true, false,
+                                                                                &sortop, &eqop, NULL);
+
+                               /* we don't have a tlist yet, so can't assign sortgrouprefs */
+                               grpcl->tleSortGroupRef = 0;
+                               grpcl->eqop = eqop;
+                               grpcl->sortop = sortop;
+                               grpcl->nulls_first = false;             /* OK with or without sortop */
+
+                               op->groupClauses = lappend(op->groupClauses, grpcl);
+                       }
+
                         lcm = lnext(lcm);
                         rcm = lnext(rcm);
                 }
diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c

index 2b04ee5e33783301dc2450cd9b22c8798e01520e..79b45414d47fe94e3a1454b689c4adf87559a6a6 100644 (file)
--- a/src/backend/parser/parse_clause.c
+++ b/src/backend/parser/parse_clause.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.174 2008/08/05 02:43:17 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.175 2008/08/07 01:11:51 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -70,6 +70,9 @@ static List *addTargetToSortList(ParseState *pstate, TargetEntry *tle,
                                         List *sortlist, List *targetlist,
                                         SortByDir sortby_dir, SortByNulls sortby_nulls,
                                         List *sortby_opname, bool resolveUnknown);
+static List *addTargetToGroupList(ParseState *pstate, TargetEntry *tle,
+                                        List *grouplist, List *targetlist,
+                                        bool resolveUnknown);
  
  
  /*
@@ -1355,7 +1358,7 @@ transformGroupClause(ParseState *pstate, List *grouplist,
                 if (!found)
                         result = addTargetToGroupList(pstate, tle,
                                                                                   result, *targetlist,
-                                                                                 false, true);
+                                                                                 true);
         }
  
         return result;
@@ -1456,7 +1459,7 @@ transformDistinctClause(ParseState *pstate,
                         continue;                       /* ignore junk */
                 result = addTargetToGroupList(pstate, tle,
                                                                           result, *targetlist,
-                                                                         false, true);
+                                                                         true);
         }
  
         return result;
@@ -1551,7 +1554,7 @@ transformDistinctOnClause(ParseState *pstate, List *distinctlist,
                                          errmsg("SELECT DISTINCT ON expressions must match initial ORDER BY expressions")));
                 result = addTargetToGroupList(pstate, tle,
                                                                           result, *targetlist,
-                                                                         false, true);
+                                                                         true);
         }
  
         return result;
@@ -1679,10 +1682,6 @@ addTargetToSortList(ParseState *pstate, TargetEntry *tle,
   * the TLE is considered "already in the list" if it appears there with any
   * sorting semantics.
   *
- * If requireSortOp is TRUE, we require a sorting operator to be found too.
- * XXX this argument should eventually be obsolete, but for now there are
- * parts of the system that can't support non-sortable grouping lists.
- *
   * If resolveUnknown is TRUE, convert TLEs of type UNKNOWN to TEXT.  If not,
   * do nothing (which implies the search for an equality operator will fail).
   * pstate should be provided if resolveUnknown is TRUE, but can be NULL
@@ -1690,10 +1689,10 @@ addTargetToSortList(ParseState *pstate, TargetEntry *tle,
   *
   * Returns the updated SortGroupClause list.
   */
-List *
+static List *
  addTargetToGroupList(ParseState *pstate, TargetEntry *tle,
                                          List *grouplist, List *targetlist,
-                                        bool requireSortOp, bool resolveUnknown)
+                                        bool resolveUnknown)
  {
         Oid                     restype = exprType((Node *) tle->expr);
         Oid                     sortop;
@@ -1716,7 +1715,7 @@ addTargetToGroupList(ParseState *pstate, TargetEntry *tle,
  
                 /* determine the eqop and optional sortop */
                 get_sort_group_operators(restype,
-                                                                requireSortOp, true, false,
+                                                                false, true, false,
                                                                  &sortop, &eqop, NULL);
  
                 grpcl->tleSortGroupRef = assignSortGroupRef(tle, targetlist);
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h

index 6be25b05f979444a6e9dcc0f2164630914044d2a..b844577c883eee8185dc9a3921b6cef636cf3363 100644 (file)
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -37,7 +37,7 @@
   * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.475 2008/08/05 12:09:30 mha Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.476 2008/08/07 01:11:51 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -53,6 +53,6 @@
   */
  
  /*                                                     yyyymmddN */
-#define CATALOG_VERSION_NO     200808051
+#define CATALOG_VERSION_NO     200808061
  
  #endif
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h

index f1a1e828fc486cb33843cf3c8c004f8e744b409e..65c3698a841995ee793c6e72c75e3eb2b9750ca3 100644 (file)
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/nodes/parsenodes.h,v 1.370 2008/08/02 21:32:00 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/parsenodes.h,v 1.371 2008/08/07 01:11:51 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -797,7 +797,12 @@ typedef struct SelectStmt
   * top-level Query node containing the leaf SELECTs as subqueries in its
   * range table.  Its setOperations field shows the tree of set operations,
   * with leaf SelectStmt nodes replaced by RangeTblRef nodes, and internal
- * nodes replaced by SetOperationStmt nodes.
+ * nodes replaced by SetOperationStmt nodes.  Information about the output
+ * column types is added, too.  (Note that the child nodes do not necessarily
+ * produce these types directly, but we've checked that their output types
+ * can be coerced to the output column type.)  Also, if it's not UNION ALL,
+ * information about the types' sort/group semantics is provided in the form
+ * of a SortGroupClause list (same representation as, eg, DISTINCT).
   * ----------------------
   */
  typedef struct SetOperationStmt
@@ -812,6 +817,8 @@ typedef struct SetOperationStmt
         /* Fields derived during parse analysis: */
         List       *colTypes;           /* OID list of output column type OIDs */
         List       *colTypmods;         /* integer list of output column typmods */
+       List       *groupClauses;       /* a list of SortGroupClause's */
+       /* groupClauses is NIL if UNION ALL, but must be set otherwise */
  } SetOperationStmt;
  
  
diff --git a/src/include/optimizer/tlist.h b/src/include/optimizer/tlist.h

index 8a8966213b1908b47501398189b83cb3970f8a6e..9899b14c82f7438dbe3696c39168c106eed5a1ec 100644 (file)
--- a/src/include/optimizer/tlist.h
+++ b/src/include/optimizer/tlist.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/optimizer/tlist.h,v 1.50 2008/08/02 21:32:01 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/tlist.h,v 1.51 2008/08/07 01:11:52 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -32,6 +32,11 @@ extern Node *get_sortgroupclause_expr(SortGroupClause *sgClause,
  extern List *get_sortgrouplist_exprs(List *sgClauses,
                                                 List *targetList);
  
+extern Oid *extract_grouping_ops(List *groupClause);
+extern AttrNumber *extract_grouping_cols(List *groupClause, List *tlist);
+extern bool grouping_is_sortable(List *groupClause);
+extern bool grouping_is_hashable(List *groupClause);
+
  extern bool tlist_same_datatypes(List *tlist, List *colTypes, bool junkOK);
  
  #endif   /* TLIST_H */
diff --git a/src/include/parser/parse_clause.h b/src/include/parser/parse_clause.h

index 357a2947cb58dfba9d7dabedc18ceca197f76ece..ffea3466b5cd6d760e83492ce640518ef5fdf2e9 100644 (file)
--- a/src/include/parser/parse_clause.h
+++ b/src/include/parser/parse_clause.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/parser/parse_clause.h,v 1.51 2008/08/02 21:32:01 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/parser/parse_clause.h,v 1.52 2008/08/07 01:11:52 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -35,9 +35,6 @@ extern List *transformDistinctClause(ParseState *pstate,
  extern List *transformDistinctOnClause(ParseState *pstate, List *distinctlist,
                                                 List **targetlist, List *sortClause);
  
-extern List *addTargetToGroupList(ParseState *pstate, TargetEntry *tle,
-                                        List *grouplist, List *targetlist,
-                                        bool requireSortOp, bool resolveUnknown);
  extern Index assignSortGroupRef(TargetEntry *tle, List *tlist);
  extern bool targetIsInSortList(TargetEntry *tle, Oid sortop, List *sortList);
  
diff --git a/src/test/regress/expected/copyselect.out b/src/test/regress/expected/copyselect.out

index c42bad143e4d4e30e11ee294b48899d31258110b..8a42b0e3d807e6b99eecbb4ef8588df8650c357f 100644 (file)
--- a/src/test/regress/expected/copyselect.out
+++ b/src/test/regress/expected/copyselect.out
@@ -73,7 +73,7 @@ copy (select * from test1 join test2 using (id)) to stdout;
  --
  -- Test UNION SELECT
  --
-copy (select t from test1 where id = 1 UNION select * from v_test1) to stdout;
+copy (select t from test1 where id = 1 UNION select * from v_test1 ORDER BY 1) to stdout;
  a
  v_a
  v_b
@@ -83,7 +83,7 @@ v_e
  --
  -- Test subselect
  --
-copy (select * from (select t from test1 where id = 1 UNION select * from v_test1) t1) to stdout;
+copy (select * from (select t from test1 where id = 1 UNION select * from v_test1 ORDER BY 1) t1) to stdout;
  a
  v_a
  v_b
diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out

index abcbc9503a8bbebbb4be6115fb7c9a0d36bbc466..722f9651be1c3f95074ffec49b682fbfcaccf796 100644 (file)
--- a/src/test/regress/expected/union.out
+++ b/src/test/regress/expected/union.out
@@ -102,7 +102,7 @@ SELECT 1.1 AS three UNION SELECT 2 UNION SELECT 3;
       3
  (3 rows)
  
-SELECT 1.1::float8 AS two UNION SELECT 2 UNION SELECT 2.0::float8;
+SELECT 1.1::float8 AS two UNION SELECT 2 UNION SELECT 2.0::float8 ORDER BY 1;
   two 
  -----
   1.1
@@ -129,7 +129,8 @@ SELECT 1.1 AS two UNION (SELECT 2 UNION ALL SELECT 2);
  --
  SELECT f1 AS five FROM FLOAT8_TBL
  UNION
-SELECT f1 FROM FLOAT8_TBL;
+SELECT f1 FROM FLOAT8_TBL
+ORDER BY 1;
           five          
  -----------------------
   -1.2345678901234e+200
@@ -158,7 +159,8 @@ SELECT f1 FROM FLOAT8_TBL;
  
  SELECT f1 AS nine FROM FLOAT8_TBL
  UNION
-SELECT f1 FROM INT4_TBL;
+SELECT f1 FROM INT4_TBL
+ORDER BY 1;
           nine          
  -----------------------
   -1.2345678901234e+200
@@ -205,7 +207,8 @@ SELECT f1 FROM INT4_TBL
  
  SELECT CAST(f1 AS char(4)) AS three FROM VARCHAR_TBL
  UNION
-SELECT f1 FROM CHAR_TBL;
+SELECT f1 FROM CHAR_TBL
+ORDER BY 1;
   three 
  -------
   a   
@@ -215,7 +218,8 @@ SELECT f1 FROM CHAR_TBL;
  
  SELECT f1 AS three FROM VARCHAR_TBL
  UNION
-SELECT CAST(f1 AS varchar) FROM CHAR_TBL;
+SELECT CAST(f1 AS varchar) FROM CHAR_TBL
+ORDER BY 1;
   three 
  -------
   a
@@ -242,7 +246,8 @@ SELECT f1 AS five FROM TEXT_TBL
  UNION
  SELECT f1 FROM VARCHAR_TBL
  UNION
-SELECT TRIM(TRAILING FROM f1) FROM CHAR_TBL;
+SELECT TRIM(TRAILING FROM f1) FROM CHAR_TBL
+ORDER BY 1;
         five        
  -------------------
   a
diff --git a/src/test/regress/sql/copyselect.sql b/src/test/regress/sql/copyselect.sql

index c2526487c8c2d0a27b1e766ab6dd0f17411c54e8..beca507ae71e216985469c99c8ab13b33a850ca6 100644 (file)
--- a/src/test/regress/sql/copyselect.sql
+++ b/src/test/regress/sql/copyselect.sql
@@ -53,11 +53,11 @@ copy (select * from test1 join test2 using (id)) to stdout;
  --
  -- Test UNION SELECT
  --
-copy (select t from test1 where id = 1 UNION select * from v_test1) to stdout;
+copy (select t from test1 where id = 1 UNION select * from v_test1 ORDER BY 1) to stdout;
  --
  -- Test subselect
  --
-copy (select * from (select t from test1 where id = 1 UNION select * from v_test1) t1) to stdout;
+copy (select * from (select t from test1 where id = 1 UNION select * from v_test1 ORDER BY 1) t1) to stdout;
  --
  -- Test headers, CSV and quotes
  --
diff --git a/src/test/regress/sql/union.sql b/src/test/regress/sql/union.sql

index 0f846091cd10c298c41e87b5326e397e6866a3dc..0b83d6b185b47cbdfa178005d3bc762546a8c541 100644 (file)
--- a/src/test/regress/sql/union.sql
+++ b/src/test/regress/sql/union.sql
@@ -34,7 +34,7 @@ SELECT 1.0::float8 AS two UNION ALL SELECT 1;
  
  SELECT 1.1 AS three UNION SELECT 2 UNION SELECT 3;
  
-SELECT 1.1::float8 AS two UNION SELECT 2 UNION SELECT 2.0::float8;
+SELECT 1.1::float8 AS two UNION SELECT 2 UNION SELECT 2.0::float8 ORDER BY 1;
  
  SELECT 1.1 AS three UNION SELECT 2 UNION ALL SELECT 2;
  
@@ -46,7 +46,8 @@ SELECT 1.1 AS two UNION (SELECT 2 UNION ALL SELECT 2);
  
  SELECT f1 AS five FROM FLOAT8_TBL
  UNION
-SELECT f1 FROM FLOAT8_TBL;
+SELECT f1 FROM FLOAT8_TBL
+ORDER BY 1;
  
  SELECT f1 AS ten FROM FLOAT8_TBL
  UNION ALL
@@ -54,7 +55,8 @@ SELECT f1 FROM FLOAT8_TBL;
  
  SELECT f1 AS nine FROM FLOAT8_TBL
  UNION
-SELECT f1 FROM INT4_TBL;
+SELECT f1 FROM INT4_TBL
+ORDER BY 1;
  
  SELECT f1 AS ten FROM FLOAT8_TBL
  UNION ALL
@@ -68,11 +70,13 @@ SELECT f1 FROM INT4_TBL
  
  SELECT CAST(f1 AS char(4)) AS three FROM VARCHAR_TBL
  UNION
-SELECT f1 FROM CHAR_TBL;
+SELECT f1 FROM CHAR_TBL
+ORDER BY 1;
  
  SELECT f1 AS three FROM VARCHAR_TBL
  UNION
-SELECT CAST(f1 AS varchar) FROM CHAR_TBL;
+SELECT CAST(f1 AS varchar) FROM CHAR_TBL
+ORDER BY 1;
  
  SELECT f1 AS eight FROM VARCHAR_TBL
  UNION ALL
@@ -82,7 +86,8 @@ SELECT f1 AS five FROM TEXT_TBL
  UNION
  SELECT f1 FROM VARCHAR_TBL
  UNION
-SELECT TRIM(TRAILING FROM f1) FROM CHAR_TBL;
+SELECT TRIM(TRAILING FROM f1) FROM CHAR_TBL
+ORDER BY 1;
  
  --
  -- INTERSECT and EXCEPT
author	Tom Lane <tgl@sss.pgh.pa.us>
	Thu, 7 Aug 2008 01:11:52 +0000 (01:11 +0000)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Thu, 7 Aug 2008 01:11:52 +0000 (01:11 +0000)
src/backend/catalog/dependency.c		patch \| blob \| history
src/backend/nodes/copyfuncs.c		patch \| blob \| history
src/backend/nodes/equalfuncs.c		patch \| blob \| history
src/backend/nodes/outfuncs.c		patch \| blob \| history
src/backend/nodes/readfuncs.c		patch \| blob \| history
src/backend/optimizer/plan/planner.c		patch \| blob \| history
src/backend/optimizer/prep/prepunion.c		patch \| blob \| history
src/backend/optimizer/util/clauses.c		patch \| blob \| history
src/backend/optimizer/util/pathnode.c		patch \| blob \| history
src/backend/optimizer/util/tlist.c		patch \| blob \| history
src/backend/parser/analyze.c		patch \| blob \| history
src/backend/parser/parse_clause.c		patch \| blob \| history
src/include/catalog/catversion.h		patch \| blob \| history
src/include/nodes/parsenodes.h		patch \| blob \| history
src/include/optimizer/tlist.h		patch \| blob \| history
src/include/parser/parse_clause.h		patch \| blob \| history
src/test/regress/expected/copyselect.out		patch \| blob \| history
src/test/regress/expected/union.out		patch \| blob \| history
src/test/regress/sql/copyselect.sql		patch \| blob \| history
src/test/regress/sql/union.sql		patch \| blob \| history