Finish implementation of hashed aggregation. Add enable_hashagg GUC

author Tom Lane <tgl@sss.pgh.pa.us>

Thu, 21 Nov 2002 00:42:20 +0000 (00:42 +0000)

committer Tom Lane <tgl@sss.pgh.pa.us>

Thu, 21 Nov 2002 00:42:20 +0000 (00:42 +0000)
author Tom Lane <tgl@sss.pgh.pa.us>
Thu, 21 Nov 2002 00:42:20 +0000 (00:42 +0000)
committer Tom Lane <tgl@sss.pgh.pa.us>
Thu, 21 Nov 2002 00:42:20 +0000 (00:42 +0000)
diff --git a/doc/src/sgml/runtime.sgml b/doc/src/sgml/runtime.sgml

index 5083965b514cc83d47a4978e2577f74efb485858..70b29118a22e8e160e00a14a887dcc48e5aa1bc8 100644 (file)
--- a/doc/src/sgml/runtime.sgml
+++ b/doc/src/sgml/runtime.sgml
@@ -1,5 +1,5 @@
  <!--
-$Header: /cvsroot/pgsql/doc/src/sgml/runtime.sgml,v 1.156 2002/11/15 03:22:30 momjian Exp $
+$Header: /cvsroot/pgsql/doc/src/sgml/runtime.sgml,v 1.157 2002/11/21 00:42:18 tgl Exp $
  -->
  
  <Chapter Id="runtime">
@@ -670,6 +670,17 @@ env PGOPTIONS='-c geqo=off' psql
        </listitem>
       </varlistentry>
  
+     <varlistentry>
+      <term><varname>ENABLE_HASHAGG</varname> (<type>boolean</type>)</term>
+      <listitem>
+       <para>
+        Enables or disables the query planner's use of hashed aggregation
+       plan types. The default is on. This is used for debugging the query
+       planner.
+       </para>
+      </listitem>
+     </varlistentry>
+
       <varlistentry>
        <term><varname>ENABLE_HASHJOIN</varname> (<type>boolean</type>)</term>
        <listitem>
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c

index 9f987a4395b8eebd7b3d265eff38eceed875d2a8..6cf8b2af4b5f3db0dd1bc90c88d1651c42da3b1a 100644 (file)
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -42,7 +42,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.90 2002/09/04 20:31:20 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.91 2002/11/21 00:42:19 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -79,6 +79,7 @@ bool          enable_seqscan = true;
  bool           enable_indexscan = true;
  bool           enable_tidscan = true;
  bool           enable_sort = true;
+bool           enable_hashagg = true;
  bool           enable_nestloop = true;
  bool           enable_mergejoin = true;
  bool           enable_hashjoin = true;
@@ -423,10 +424,8 @@ cost_functionscan(Path *path, Query *root, RelOptInfo *baserel)
  
  /*
   * cost_sort
- *       Determines and returns the cost of sorting a relation.
- *
- * The cost of supplying the input data is NOT included; the caller should
- * add that cost to both startup and total costs returned from this routine!
+ *       Determines and returns the cost of sorting a relation, including
+ *       the cost of reading the input data.
   *
   * If the total volume of data to sort is less than SortMem, we will do
   * an in-memory sort, which requires no I/O and about t*log2(t) tuple
@@ -449,6 +448,7 @@ cost_functionscan(Path *path, Query *root, RelOptInfo *baserel)
   * the right ballpark in most cases.
   *
   * 'pathkeys' is a list of sort keys
+ * 'input_cost' is the total cost for reading the input data
   * 'tuples' is the number of tuples in the relation
   * 'width' is the average tuple width in bytes
   *
@@ -456,12 +456,14 @@ cost_functionscan(Path *path, Query *root, RelOptInfo *baserel)
   * can't conveniently supply the sort keys.  Since this routine doesn't
   * currently do anything with pathkeys anyway, that doesn't matter...
   * but if it ever does, it should react gracefully to lack of key data.
+ * (Actually, the thing we'd most likely be interested in is just the number
+ * of sort keys, which all callers *could* supply.)
   */
  void
  cost_sort(Path *path, Query *root,
-                 List *pathkeys, double tuples, int width)
+                 List *pathkeys, Cost input_cost, double tuples, int width)
  {
-       Cost            startup_cost = 0;
+       Cost            startup_cost = input_cost;
         Cost            run_cost = 0;
         double          nbytes = relation_byte_size(tuples, width);
         long            sortmembytes = SortMem * 1024L;
@@ -511,6 +513,92 @@ cost_sort(Path *path, Query *root,
         path->total_cost = startup_cost + run_cost;
  }
  
+/*
+ * cost_agg
+ *             Determines and returns the cost of performing an Agg plan node,
+ *             including the cost of its input.
+ *
+ * Note: when aggstrategy == AGG_SORTED, caller must ensure that input costs
+ * are for appropriately-sorted input.
+ */
+void
+cost_agg(Path *path, Query *root,
+                AggStrategy aggstrategy, int numAggs,
+                int numGroupCols, double numGroups,
+                Cost input_startup_cost, Cost input_total_cost,
+                double input_tuples)
+{
+       Cost            startup_cost;
+       Cost            total_cost;
+
+       /*
+        * We charge one cpu_operator_cost per aggregate function per input
+        * tuple, and another one per output tuple (corresponding to transfn
+        * and finalfn calls respectively).  If we are grouping, we charge an
+        * additional cpu_operator_cost per grouping column per input tuple
+        * for grouping comparisons.
+        *
+        * We will produce a single output tuple if not grouping,
+        * and a tuple per group otherwise.
+        */
+       if (aggstrategy == AGG_PLAIN)
+       {
+               startup_cost = input_total_cost;
+               startup_cost += cpu_operator_cost * (input_tuples + 1) * numAggs;
+               /* we aren't grouping */
+               total_cost = startup_cost;
+       }
+       else if (aggstrategy == AGG_SORTED)
+       {
+               /* Here we are able to deliver output on-the-fly */
+               startup_cost = input_startup_cost;
+               total_cost = input_total_cost;
+               total_cost += cpu_operator_cost * (input_tuples + numGroups) * numAggs;
+               total_cost += cpu_operator_cost * input_tuples * numGroupCols;
+       }
+       else
+       {
+               /* must be AGG_HASHED */
+               startup_cost = input_total_cost;
+               startup_cost += cpu_operator_cost * input_tuples * numAggs;
+               startup_cost += cpu_operator_cost * input_tuples * numGroupCols;
+               total_cost = startup_cost;
+               total_cost += cpu_operator_cost * numGroups * numAggs;
+       }
+
+       path->startup_cost = startup_cost;
+       path->total_cost = total_cost;
+}
+
+/*
+ * cost_group
+ *             Determines and returns the cost of performing a Group plan node,
+ *             including the cost of its input.
+ *
+ * Note: caller must ensure that input costs are for appropriately-sorted
+ * input.
+ */
+void
+cost_group(Path *path, Query *root,
+                  int numGroupCols, double numGroups,
+                  Cost input_startup_cost, Cost input_total_cost,
+                  double input_tuples)
+{
+       Cost            startup_cost;
+       Cost            total_cost;
+
+       startup_cost = input_startup_cost;
+       total_cost = input_total_cost;
+
+       /*
+        * Charge one cpu_operator_cost per comparison per input tuple. We
+        * assume all columns get compared at most of the tuples.
+        */
+       total_cost += cpu_operator_cost * input_tuples * numGroupCols;
+
+       path->startup_cost = startup_cost;
+       path->total_cost = total_cost;
+}
  
  /*
   * cost_nestloop
@@ -658,10 +746,10 @@ cost_mergejoin(Path *path, Query *root,
          */
         if (outersortkeys)                      /* do we need to sort outer? */
         {
-               startup_cost += outer_path->total_cost;
                 cost_sort(&sort_path,
                                   root,
                                   outersortkeys,
+                                 outer_path->total_cost,
                                   outer_path->parent->rows,
                                   outer_path->parent->width);
                 startup_cost += sort_path.startup_cost;
@@ -677,10 +765,10 @@ cost_mergejoin(Path *path, Query *root,
  
         if (innersortkeys)                      /* do we need to sort inner? */
         {
-               startup_cost += inner_path->total_cost;
                 cost_sort(&sort_path,
                                   root,
                                   innersortkeys,
+                                 inner_path->total_cost,
                                   inner_path->parent->rows,
                                   inner_path->parent->width);
                 startup_cost += sort_path.startup_cost;
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c

index 74e6f237b3ed90b33e3dee6d896f5da6793820d5..b393252542fc81316f940fecf12bb355f6d744b6 100644 (file)
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -10,7 +10,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.123 2002/11/19 23:21:58 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.124 2002/11/21 00:42:19 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -1573,9 +1573,11 @@ make_sort(Query *root, List *tlist, Plan *lefttree, int keycount)
  
         copy_plan_costsize(plan, lefttree); /* only care about copying size */
         cost_sort(&sort_path, root, NIL,
-                         lefttree->plan_rows, lefttree->plan_width);
-       plan->startup_cost = sort_path.startup_cost + lefttree->total_cost;
-       plan->total_cost = sort_path.total_cost + lefttree->total_cost;
+                         lefttree->total_cost,
+                         lefttree->plan_rows,
+                         lefttree->plan_width);
+       plan->startup_cost = sort_path.startup_cost;
+       plan->total_cost = sort_path.total_cost;
         plan->state = (EState *) NULL;
         plan->targetlist = tlist;
         plan->qual = NIL;
@@ -1683,39 +1685,39 @@ make_material(List *tlist, Plan *lefttree)
  }
  
  Agg *
-make_agg(List *tlist, List *qual, AggStrategy aggstrategy,
-                int ngrp, AttrNumber *grpColIdx, long numGroups, int numAggs,
+make_agg(Query *root, List *tlist, List *qual,
+                AggStrategy aggstrategy,
+                int numGroupCols, AttrNumber *grpColIdx,
+                long numGroups, int numAggs,
                  Plan *lefttree)
  {
         Agg                *node = makeNode(Agg);
         Plan       *plan = &node->plan;
+       Path            agg_path;               /* dummy for result of cost_agg */
  
         node->aggstrategy = aggstrategy;
-       node->numCols = ngrp;
+       node->numCols = numGroupCols;
         node->grpColIdx = grpColIdx;
         node->numGroups = numGroups;
  
-       copy_plan_costsize(plan, lefttree);
-
-       /*
-        * Charge one cpu_operator_cost per aggregate function per input
-        * tuple.
-        */
-       plan->total_cost += cpu_operator_cost * plan->plan_rows * numAggs;
+       copy_plan_costsize(plan, lefttree); /* only care about copying size */
+       cost_agg(&agg_path, root,
+                        aggstrategy, numAggs,
+                        numGroupCols, numGroups,
+                        lefttree->startup_cost,
+                        lefttree->total_cost,
+                        lefttree->plan_rows);
+       plan->startup_cost = agg_path.startup_cost;
+       plan->total_cost = agg_path.total_cost;
  
         /*
          * We will produce a single output tuple if not grouping,
          * and a tuple per group otherwise.
          */
         if (aggstrategy == AGG_PLAIN)
-       {
                 plan->plan_rows = 1;
-               plan->startup_cost = plan->total_cost;
-       }
         else
-       {
                 plan->plan_rows = numGroups;
-       }
  
         plan->state = (EState *) NULL;
         plan->qual = qual;
@@ -1727,22 +1729,28 @@ make_agg(List *tlist, List *qual, AggStrategy aggstrategy,
  }
  
  Group *
-make_group(List *tlist,
-                  int ngrp,
+make_group(Query *root,
+                  List *tlist,
+                  int numGroupCols,
                    AttrNumber *grpColIdx,
                    double numGroups,
                    Plan *lefttree)
  {
         Group      *node = makeNode(Group);
         Plan       *plan = &node->plan;
+       Path            group_path;             /* dummy for result of cost_group */
  
-       copy_plan_costsize(plan, lefttree);
+       node->numCols = numGroupCols;
+       node->grpColIdx = grpColIdx;
  
-       /*
-        * Charge one cpu_operator_cost per comparison per input tuple. We
-        * assume all columns get compared at most of the tuples.
-        */
-       plan->total_cost += cpu_operator_cost * plan->plan_rows * ngrp;
+       copy_plan_costsize(plan, lefttree); /* only care about copying size */
+       cost_group(&group_path, root,
+                          numGroupCols, numGroups,
+                          lefttree->startup_cost,
+                          lefttree->total_cost,
+                          lefttree->plan_rows);
+       plan->startup_cost = group_path.startup_cost;
+       plan->total_cost = group_path.total_cost;
  
         /* One output tuple per estimated result group */
         plan->plan_rows = numGroups;
@@ -1752,8 +1760,6 @@ make_group(List *tlist,
         plan->targetlist = tlist;
         plan->lefttree = lefttree;
         plan->righttree = (Plan *) NULL;
-       node->numCols = ngrp;
-       node->grpColIdx = grpColIdx;
  
         return node;
  }
diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c

index 4354a5eb035f723b8b01067cc4e47531fa1fae5d..5d2634bf82d883d55d87da2bb7c6cebdc447c4d2 100644 (file)
--- a/src/backend/optimizer/plan/planmain.c
+++ b/src/backend/optimizer/plan/planmain.c
@@ -14,19 +14,17 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planmain.c,v 1.71 2002/11/06 00:00:44 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planmain.c,v 1.72 2002/11/21 00:42:19 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
  #include "postgres.h"
  
  #include "optimizer/clauses.h"
+#include "optimizer/cost.h"
  #include "optimizer/pathnode.h"
  #include "optimizer/paths.h"
  #include "optimizer/planmain.h"
-#include "optimizer/tlist.h"
-#include "parser/parsetree.h"
-#include "utils/memutils.h"
  
  
  /*--------------------
@@ -36,11 +34,12 @@
   *
   * Since query_planner does not handle the toplevel processing (grouping,
   * sorting, etc) it cannot select the best path by itself.  It selects
- * two paths: the cheapest path that produces the required tuples, independent
- * of any ordering considerations, and the cheapest path that produces the
- * required tuples in the required ordering, if there is a path that
- * can produce them without an explicit top-level sort step.  The caller
- * (grouping_planner) will make the final decision about which to use.
+ * two paths: the cheapest path that produces all the required tuples,
+ * independent of any ordering considerations, and the cheapest path that
+ * produces the expected fraction of the required tuples in the required
+ * ordering, if there is a path that is cheaper for this than just sorting
+ * the output of the cheapest overall path.  The caller (grouping_planner)
+ * will make the final decision about which to use.
   *
   * Input parameters:
   * root is the query to plan
@@ -50,7 +49,7 @@
   * Output parameters:
   * *cheapest_path receives the overall-cheapest path for the query
   * *sorted_path receives the cheapest presorted path for the query,
- *                             if any (it may be NULL, or the same as cheapest_path)
+ *                             if any (NULL if there is no useful presorted path)
   *
   * Note: the Query node also includes a query_pathkeys field, which is both
   * an input and an output of query_planner().  The input value signals
@@ -78,6 +77,8 @@ query_planner(Query *root, List *tlist, double tuple_fraction,
  {
         List       *constant_quals;
         RelOptInfo *final_rel;
+       Path       *cheapestpath;
+       Path       *sortedpath;
  
         /*
          * If the query has an empty join tree, then it's something easy like
@@ -166,34 +167,76 @@ query_planner(Query *root, List *tlist, double tuple_fraction,
  
         /*
          * Pick out the cheapest-total path and the cheapest presorted path
-        * for the requested pathkeys (if there is one).  We can take the
+        * for the requested pathkeys (if there is one).  We should take the
          * tuple fraction into account when selecting the cheapest presorted
          * path, but not when selecting the cheapest-total path, since if we
          * have to sort then we'll have to fetch all the tuples.  (But there's
          * a special case: if query_pathkeys is NIL, meaning order doesn't
          * matter, then the "cheapest presorted" path will be the cheapest
          * overall for the tuple fraction.)
+        *
+        * The cheapest-total path is also the one to use if grouping_planner
+        * decides to use hashed aggregation, so we return it separately even
+        * if this routine thinks the presorted path is the winner.
          */
-       *cheapest_path = final_rel->cheapest_total_path;
+       cheapestpath = final_rel->cheapest_total_path;
  
-       *sorted_path =
+       sortedpath =
                 get_cheapest_fractional_path_for_pathkeys(final_rel->pathlist,
                                                                                                   root->query_pathkeys,
                                                                                                   tuple_fraction);
  
+       /* Don't return same path in both guises; just wastes effort */
+       if (sortedpath == cheapestpath)
+               sortedpath = NULL;
+
+       /*
+        * Forget about the presorted path if it would be cheaper to sort the
+        * cheapest-total path.  Here we need consider only the behavior at
+        * the tuple fraction point.
+        */
+       if (sortedpath)
+       {
+               Path            sort_path;      /* dummy for result of cost_sort */
+
+               if (root->query_pathkeys == NIL ||
+                       pathkeys_contained_in(root->query_pathkeys,
+                                                                 cheapestpath->pathkeys))
+               {
+                       /* No sort needed for cheapest path */
+                       sort_path.startup_cost = cheapestpath->startup_cost;
+                       sort_path.total_cost = cheapestpath->total_cost;
+               }
+               else
+               {
+                       /* Figure cost for sorting */
+                       cost_sort(&sort_path, root, root->query_pathkeys,
+                                         cheapestpath->total_cost,
+                                         final_rel->rows, final_rel->width);
+               }
+
+               if (compare_fractional_path_costs(sortedpath, &sort_path,
+                                                                                 tuple_fraction) > 0)
+               {
+                       /* Presorted path is a loser */
+                       sortedpath = NULL;
+               }
+       }
+
         /*
          * If we have constant quals, add a toplevel Result step to process them.
          */
         if (constant_quals)
         {
-               *cheapest_path = (Path *)
-                       create_result_path((*cheapest_path)->parent,
-                                                          *cheapest_path,
-                                                          constant_quals);
-               if (*sorted_path)
-                       *sorted_path = (Path *)
-                               create_result_path((*sorted_path)->parent,
-                                                                  *sorted_path,
-                                                                  constant_quals);
+               cheapestpath = (Path *) create_result_path(final_rel,
+                                                                                                  cheapestpath,
+                                                                                                  constant_quals);
+               if (sortedpath)
+                       sortedpath = (Path *) create_result_path(final_rel,
+                                                                                                        sortedpath,
+                                                                                                        constant_quals);
         }
+
+       *cheapest_path = cheapestpath;
+       *sorted_path = sortedpath;
  }
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c

index baccf2ffbda32342fcffc91e2ddb9653231c34e5..9a5141ea6b94891b55eda99014626783568652a0 100644 (file)
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.129 2002/11/19 23:21:59 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.130 2002/11/21 00:42:19 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -933,11 +933,13 @@ grouping_planner(Query *parse, double tuple_fraction)
                 List       *sub_tlist;
                 List       *group_pathkeys;
                 AttrNumber *groupColIdx = NULL;
+               double          sub_tuple_fraction;
                 Path       *cheapest_path;
                 Path       *sorted_path;
                 double          dNumGroups = 0;
                 long            numGroups = 0;
                 int                     numAggs = 0;
+               int                     numGroupCols = length(parse->groupClause);
                 bool            use_hashed_grouping = false;
  
                 /* Preprocess targetlist in case we are inside an INSERT/UPDATE. */
@@ -1169,6 +1171,12 @@ grouping_planner(Query *parse, double tuple_fraction)
                         }
                 }
  
+               /*
+                * With grouping or aggregation, the tuple fraction to pass to
+                * query_planner() may be different from what it is at top level.
+                */
+               sub_tuple_fraction = tuple_fraction;
+
                 if (parse->groupClause)
                 {
                         /*
@@ -1182,8 +1190,8 @@ grouping_planner(Query *parse, double tuple_fraction)
                          * amounts to assuming that all the groups are about the same
                          * size).
                          */
-                       if (tuple_fraction >= 1.0)
-                               tuple_fraction = 0.25;
+                       if (sub_tuple_fraction >= 1.0)
+                               sub_tuple_fraction = 0.25;
  
                         /*
                          * If both GROUP BY and ORDER BY are specified, we will need
@@ -1195,7 +1203,7 @@ grouping_planner(Query *parse, double tuple_fraction)
                         if (parse->groupClause && parse->sortClause &&
                                 !noncanonical_pathkeys_contained_in(sort_pathkeys,
                                                                                                         group_pathkeys))
-                               tuple_fraction = 0.0;
+                               sub_tuple_fraction = 0.0;
                 }
                 else if (parse->hasAggs)
                 {
@@ -1203,7 +1211,7 @@ grouping_planner(Query *parse, double tuple_fraction)
                          * Ungrouped aggregate will certainly want all the input
                          * tuples.
                          */
-                       tuple_fraction = 0.0;
+                       sub_tuple_fraction = 0.0;
                 }
                 else if (parse->distinctClause)
                 {
@@ -1212,15 +1220,15 @@ grouping_planner(Query *parse, double tuple_fraction)
                          * number of input tuples per output tuple.  Handle the same
                          * way.
                          */
-                       if (tuple_fraction >= 1.0)
-                               tuple_fraction = 0.25;
+                       if (sub_tuple_fraction >= 1.0)
+                               sub_tuple_fraction = 0.25;
                 }
  
                 /*
                  * Generate the best unsorted and presorted paths for this Query
                  * (but note there may not be any presorted path).
                  */
-               query_planner(parse, sub_tlist, tuple_fraction,
+               query_planner(parse, sub_tlist, sub_tuple_fraction,
                                           &cheapest_path, &sorted_path);
  
                 /*
@@ -1236,11 +1244,13 @@ grouping_planner(Query *parse, double tuple_fraction)
                 if (parse->groupClause)
                 {
                         /*
-                        * Always estimate the number of groups.
+                        * Always estimate the number of groups.  We can't do this until
+                        * after running query_planner(), either.
                          */
                         dNumGroups = estimate_num_groups(parse,
                                                                                          parse->groupClause,
                                                                                          cheapest_path->parent->rows);
+                       /* Also want it as a long int --- but 'ware overflow! */
                         numGroups = (long) Min(dNumGroups, (double) LONG_MAX);
  
                         /*
@@ -1248,9 +1258,11 @@ grouping_planner(Query *parse, double tuple_fraction)
                          * aggregates.  (Doing so would imply storing *all* the input
                          * values in the hash table, which seems like a certain loser.)
                          */
-                       if (parse->hasAggs &&
-                               (contain_distinct_agg_clause((Node *) tlist) ||
-                                contain_distinct_agg_clause(parse->havingQual)))
+                       if (!enable_hashagg)
+                               use_hashed_grouping = false;
+                       else if (parse->hasAggs &&
+                                        (contain_distinct_agg_clause((Node *) tlist) ||
+                                         contain_distinct_agg_clause(parse->havingQual)))
                                 use_hashed_grouping = false;
                         else
                         {
@@ -1272,11 +1284,96 @@ grouping_planner(Query *parse, double tuple_fraction)
  
                                 if (hashentrysize * dNumGroups <= SortMem * 1024L)
                                 {
-                                       /* much more to do here */
-#if 0
-                                       /* TEMPORARY HOTWIRE FOR TESTING */
-                                       use_hashed_grouping = true;
-#endif
+                                       /*
+                                        * Okay, do the cost comparison.  We need to consider
+                                        *      cheapest_path + hashagg [+ final sort]
+                                        * versus either
+                                        *      cheapest_path [+ sort] + group or agg [+ final sort]
+                                        * or
+                                        *      presorted_path + group or agg [+ final sort]
+                                        * where brackets indicate a step that may not be needed.
+                                        * We assume query_planner() will have returned a
+                                        * presorted path only if it's a winner compared to
+                                        * cheapest_path for this purpose.
+                                        *
+                                        * These path variables are dummies that just hold cost
+                                        * fields; we don't make actual Paths for these steps.
+                                        */
+                                       Path            hashed_p;
+                                       Path            sorted_p;
+
+                                       cost_agg(&hashed_p, parse,
+                                                        AGG_HASHED, numAggs,
+                                                        numGroupCols, dNumGroups,
+                                                        cheapest_path->startup_cost,
+                                                        cheapest_path->total_cost,
+                                                        cheapest_path->parent->rows);
+                                       /* Result of hashed agg is always unsorted */
+                                       if (sort_pathkeys)
+                                               cost_sort(&hashed_p, parse, sort_pathkeys,
+                                                                 hashed_p.total_cost,
+                                                                 dNumGroups,
+                                                                 cheapest_path->parent->width);
+
+                                       if (sorted_path)
+                                       {
+                                               sorted_p.startup_cost = sorted_path->startup_cost;
+                                               sorted_p.total_cost = sorted_path->total_cost;
+                                               current_pathkeys = sorted_path->pathkeys;
+                                       }
+                                       else
+                                       {
+                                               sorted_p.startup_cost = cheapest_path->startup_cost;
+                                               sorted_p.total_cost = cheapest_path->total_cost;
+                                               current_pathkeys = cheapest_path->pathkeys;
+                                       }
+                                       if (!pathkeys_contained_in(group_pathkeys,
+                                                                                          current_pathkeys))
+                                       {
+                                               cost_sort(&sorted_p, parse, group_pathkeys,
+                                                                 sorted_p.total_cost,
+                                                                 cheapest_path->parent->rows,
+                                                                 cheapest_path->parent->width);
+                                               current_pathkeys = group_pathkeys;
+                                       }
+                                       if (parse->hasAggs)
+                                               cost_agg(&sorted_p, parse,
+                                                                AGG_SORTED, numAggs,
+                                                                numGroupCols, dNumGroups,
+                                                                sorted_p.startup_cost,
+                                                                sorted_p.total_cost,
+                                                                cheapest_path->parent->rows);
+                                       else
+                                               cost_group(&sorted_p, parse,
+                                                                  numGroupCols, dNumGroups,
+                                                                  sorted_p.startup_cost,
+                                                                  sorted_p.total_cost,
+                                                                  cheapest_path->parent->rows);
+                                       /* The Agg or Group node will preserve ordering */
+                                       if (sort_pathkeys &&
+                                               !pathkeys_contained_in(sort_pathkeys,
+                                                                                          current_pathkeys))
+                                       {
+                                               cost_sort(&sorted_p, parse, sort_pathkeys,
+                                                                 sorted_p.total_cost,
+                                                                 dNumGroups,
+                                                                 cheapest_path->parent->width);
+                                       }
+
+                                       /*
+                                        * Now make the decision using the top-level tuple
+                                        * fraction.  First we have to convert an absolute
+                                        * count (LIMIT) into fractional form.
+                                        */
+                                       if (tuple_fraction >= 1.0)
+                                               tuple_fraction /= dNumGroups;
+
+                                       if (compare_fractional_path_costs(&hashed_p, &sorted_p,
+                                                                                                         tuple_fraction) <= 0)
+                                       {
+                                               /* Hashed is cheaper, so use it */
+                                               use_hashed_grouping = true;
+                                       }
                                 }
                         }
                 }
@@ -1284,50 +1381,17 @@ grouping_planner(Query *parse, double tuple_fraction)
                 /*
                  * Select the best path and create a plan to execute it.
                  *
-                * If no special sort order is wanted, or if the cheapest path is
-                * already appropriately ordered, use the cheapest path.
-                * Otherwise, look to see if we have an already-ordered path that is
-                * cheaper than doing an explicit sort on the cheapest-total-cost
-                * path.
+                * If we are doing hashed grouping, we will always read all the
+                * input tuples, so use the cheapest-total path.  Otherwise,
+                * trust query_planner's decision about which to use.
                  */
-               if (parse->query_pathkeys == NIL ||
-                       pathkeys_contained_in(parse->query_pathkeys,
-                                                                 cheapest_path->pathkeys))
+               if (sorted_path && !use_hashed_grouping)
                 {
-                       result_plan = create_plan(parse, cheapest_path);
-                       current_pathkeys = cheapest_path->pathkeys;
-               }
-               else if (sorted_path)
-               {
-                       Path            sort_path;      /* dummy for result of cost_sort */
-
-                       cost_sort(&sort_path, parse, parse->query_pathkeys,
-                                         sorted_path->parent->rows, sorted_path->parent->width);
-                       sort_path.startup_cost += cheapest_path->total_cost;
-                       sort_path.total_cost += cheapest_path->total_cost;
-                       /* Convert absolute-count tuple_fraction into a fraction */
-                       if (tuple_fraction >= 1.0)
-                               tuple_fraction /= sorted_path->parent->rows;
-                       if (compare_fractional_path_costs(sorted_path, &sort_path,
-                                                                                         tuple_fraction) <= 0)
-                       {
-                               /* Presorted path is cheaper, use it */
-                               result_plan = create_plan(parse, sorted_path);
-                               current_pathkeys = sorted_path->pathkeys;
-                       }
-                       else
-                       {
-                               /* otherwise, doing it the hard way is still cheaper */
-                               result_plan = create_plan(parse, cheapest_path);
-                               current_pathkeys = cheapest_path->pathkeys;
-                       }
+                       result_plan = create_plan(parse, sorted_path);
+                       current_pathkeys = sorted_path->pathkeys;
                 }
                 else
                 {
-                       /*
-                        * No sorted path, so we must use the cheapest-total path.
-                        * The actual sort step will be generated below.
-                        */
                         result_plan = create_plan(parse, cheapest_path);
                         current_pathkeys = cheapest_path->pathkeys;
                 }
@@ -1362,10 +1426,11 @@ grouping_planner(Query *parse, double tuple_fraction)
                 if (use_hashed_grouping)
                 {
                         /* Hashed aggregate plan --- no sort needed */
-                       result_plan = (Plan *) make_agg(tlist,
+                       result_plan = (Plan *) make_agg(parse,
+                                                                                       tlist,
                                                                                         (List *) parse->havingQual,
                                                                                         AGG_HASHED,
-                                                                                       length(parse->groupClause),
+                                                                                       numGroupCols,
                                                                                         groupColIdx,
                                                                                         numGroups,
                                                                                         numAggs,
@@ -1401,10 +1466,11 @@ grouping_planner(Query *parse, double tuple_fraction)
                                 current_pathkeys = NIL;
                         }
  
-                       result_plan = (Plan *) make_agg(tlist,
+                       result_plan = (Plan *) make_agg(parse,
+                                                                                       tlist,
                                                                                         (List *) parse->havingQual,
                                                                                         aggstrategy,
-                                                                                       length(parse->groupClause),
+                                                                                       numGroupCols,
                                                                                         groupColIdx,
                                                                                         numGroups,
                                                                                         numAggs,
@@ -1436,11 +1502,13 @@ grouping_planner(Query *parse, double tuple_fraction)
                                         current_pathkeys = group_pathkeys;
                                 }
  
-                               result_plan = (Plan *) make_group(tlist,
-                                                                                                 length(parse->groupClause),
+                               result_plan = (Plan *) make_group(parse,
+                                                                                                 tlist,
+                                                                                                 numGroupCols,
                                                                                                   groupColIdx,
                                                                                                   dNumGroups,
                                                                                                   result_plan);
+                               /* The Group node won't change sort ordering */
                         }
                 }
         } /* end of if (setOperations) */
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c

index 62417bfe11826686b54cf6426dddbaf189fdebf8..c26c4c740765309bcf78738955d331f48a6a16d2 100644 (file)
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -5,7 +5,7 @@
   * command, configuration file, and command line options.
   * See src/backend/utils/misc/README for more information.
   *
- * $Header: /cvsroot/pgsql/src/backend/utils/misc/guc.c,v 1.106 2002/11/15 02:44:57 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/utils/misc/guc.c,v 1.107 2002/11/21 00:42:19 tgl Exp $
   *
   * Copyright 2000 by PostgreSQL Global Development Group
   * Written by Peter Eisentraut <peter_e@gmx.net>.
@@ -323,6 +323,10 @@ static struct config_bool
                 {"enable_sort", PGC_USERSET}, &enable_sort,
                 true, NULL, NULL
         },
+       {
+               {"enable_hashagg", PGC_USERSET}, &enable_hashagg,
+               true, NULL, NULL
+       },
         {
                 {"enable_nestloop", PGC_USERSET}, &enable_nestloop,
                 true, NULL, NULL
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample

index 5fbfafd214a3581cc8ea936421812da2fc32f897..1c4595a66bf82a36508448efa5e2d1331521adc9 100644 (file)
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -83,6 +83,7 @@
  #enable_indexscan = true
  #enable_tidscan = true
  #enable_sort = true
+#enable_hashagg = true
  #enable_nestloop = true
  #enable_mergejoin = true
  #enable_hashjoin = true
diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c

index 6eec1c92ed504703c5286bc30a891c72cf5b0f73..ff0ae410214bade6152aeac5226a24cb2074e1b2 100644 (file)
--- a/src/bin/psql/tab-complete.c
+++ b/src/bin/psql/tab-complete.c
@@ -3,7 +3,7 @@
   *
   * Copyright 2000-2002 by PostgreSQL Global Development Group
   *
- * $Header: /cvsroot/pgsql/src/bin/psql/tab-complete.c,v 1.68 2002/11/15 03:07:52 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/bin/psql/tab-complete.c,v 1.69 2002/11/21 00:42:19 tgl Exp $
   */
  
  /*----------------------------------------------------------------------
@@ -251,6 +251,7 @@ psql_completion(char *text, int start, int end)
                 "enable_nestloop",
                 "enable_seqscan",
                 "enable_sort",
+               "enable_hashagg",
                 "enable_tidscan",
                 "explain_pretty_print",
                 "extra_float_digits",
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h

index b3076d7bb4039741128136629ea1d4bd55864aa3..fd9a3c1def0e779ba58e41779b11bc1c9f357d04 100644 (file)
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -7,15 +7,17 @@
   * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: cost.h,v 1.47 2002/09/04 20:31:44 momjian Exp $
+ * $Id: cost.h,v 1.48 2002/11/21 00:42:19 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
  #ifndef COST_H
  #define COST_H
  
+#include "nodes/plannodes.h"
  #include "nodes/relation.h"
  
+
  /* defaults for costsize.c's Cost parameters */
  /* NB: cost-estimation code should use the variables, not these constants! */
  /* If you change these, update backend/utils/misc/postgresql.sample.conf */
@@ -42,6 +44,7 @@ extern bool enable_seqscan;
  extern bool enable_indexscan;
  extern bool enable_tidscan;
  extern bool enable_sort;
+extern bool enable_hashagg;
  extern bool enable_nestloop;
  extern bool enable_mergejoin;
  extern bool enable_hashjoin;
@@ -56,7 +59,16 @@ extern void cost_tidscan(Path *path, Query *root,
  extern void cost_functionscan(Path *path, Query *root,
                                   RelOptInfo *baserel);
  extern void cost_sort(Path *path, Query *root,
-                 List *pathkeys, double tuples, int width);
+                 List *pathkeys, Cost input_cost, double tuples, int width);
+extern void cost_agg(Path *path, Query *root,
+                                        AggStrategy aggstrategy, int numAggs,
+                                        int numGroupCols, double numGroups,
+                                        Cost input_startup_cost, Cost input_total_cost,
+                                        double input_tuples);
+extern void cost_group(Path *path, Query *root,
+                                          int numGroupCols, double numGroups,
+                                          Cost input_startup_cost, Cost input_total_cost,
+                                          double input_tuples);
  extern void cost_nestloop(Path *path, Query *root,
                           Path *outer_path, Path *inner_path,
                           List *restrictlist);
diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h

index bd4bcddd308bfe3dc0b61566b16b4e4af8eb338a..ecd7bca604246765eb75a0c0d60a288483adb882 100644 (file)
--- a/src/include/optimizer/planmain.h
+++ b/src/include/optimizer/planmain.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: planmain.h,v 1.62 2002/11/19 23:22:00 tgl Exp $
+ * $Id: planmain.h,v 1.63 2002/11/21 00:42:19 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -34,12 +34,14 @@ extern Sort *make_sort(Query *root, List *tlist,
                   Plan *lefttree, int keycount);
  extern Sort *make_sort_from_pathkeys(Query *root, List *tlist,
                                                 Plan *lefttree, List *pathkeys);
-extern Agg *make_agg(List *tlist, List *qual, AggStrategy aggstrategy,
-                                        int ngrp, AttrNumber *grpColIdx,
+extern Agg *make_agg(Query *root, List *tlist, List *qual,
+                                        AggStrategy aggstrategy,
+                                        int numGroupCols, AttrNumber *grpColIdx,
                                          long numGroups, int numAggs,
                                          Plan *lefttree);
-extern Group *make_group(List *tlist,
-                                                int ngrp, AttrNumber *grpColIdx, double numGroups,
+extern Group *make_group(Query *root, List *tlist,
+                                                int numGroupCols, AttrNumber *grpColIdx,
+                                                double numGroups,
                                                  Plan *lefttree);
  extern Material *make_material(List *tlist, Plan *lefttree);
  extern Unique *make_unique(List *tlist, Plan *lefttree, List *distinctList);
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out

index 7519f6e5392a9ee8d45a7e4ae94b54b70d0d2e89..5a6287551909ff1dc774ecac1c3abf3a4538ee49 100644 (file)
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -87,7 +87,8 @@ SELECT count(DISTINCT four) AS cnt_4 FROM onek;
       4
  (1 row)
  
-select ten, count(*), sum(four) from onek group by ten;
+select ten, count(*), sum(four) from onek
+group by ten order by ten;
   ten | count | sum 
  -----+-------+-----
     0 |   100 | 100
@@ -102,7 +103,8 @@ select ten, count(*), sum(four) from onek group by ten;
     9 |   100 | 200
  (10 rows)
  
-select ten, count(four), sum(DISTINCT four) from onek group by ten;
+select ten, count(four), sum(DISTINCT four) from onek
+group by ten order by ten;
   ten | count | sum 
  -----+-------+-----
     0 |   100 |   2
diff --git a/src/test/regress/expected/rangefuncs.out b/src/test/regress/expected/rangefuncs.out

index 844cccc63921c942486abc1ce51a0cb3e970423a..e24c7fe05cd70e0bbe95e3fa2761052a2f858ae3 100644 (file)
--- a/src/test/regress/expected/rangefuncs.out
+++ b/src/test/regress/expected/rangefuncs.out
@@ -1,6 +1,7 @@
  SELECT * FROM pg_settings WHERE name LIKE 'enable%';
         name       | setting 
  ------------------+---------
+ enable_hashagg   | on
   enable_hashjoin  | on
   enable_indexscan | on
   enable_mergejoin | on
@@ -8,7 +9,7 @@ SELECT * FROM pg_settings WHERE name LIKE 'enable%';
   enable_seqscan   | on
   enable_sort      | on
   enable_tidscan   | on
-(7 rows)
+(8 rows)
  
  CREATE TABLE foo2(fooid int, f2 int);
  INSERT INTO foo2 VALUES(1, 11);
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out

index c0d84fd1bcd306abd2318088ef5327cd931b8925..aecdf85d73a2f52b4617181fe46cd2817983766e 100644 (file)
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -804,7 +804,7 @@ select * from rtest_vview3;
   7 | item 7
  (3 rows)
  
-select * from rtest_vview4;
+select * from rtest_vview4 order by a, b;
   a |   b    | refcount 
  ---+--------+----------
   2 | item 2 |        2
@@ -868,7 +868,7 @@ select * from rtest_view4;
  
  delete from rtest_view4;
  insert into rtest_view4 select * from rtest_vview5 where a > 2 and refcount = 0;
-select * from rtest_view4;
+select * from rtest_view4 order by a, b;
   a |   b    | c 
  ---+--------+---
   3 | item 3 | 0
diff --git a/src/test/regress/expected/select_having.out b/src/test/regress/expected/select_having.out

index 29321e441464b8aff9bc3a3eef43cc28dd910f3d..4447a9df4d6f804da027e560284ac15b4b09fabc 100644 (file)
--- a/src/test/regress/expected/select_having.out
+++ b/src/test/regress/expected/select_having.out
@@ -14,7 +14,7 @@ INSERT INTO test_having VALUES (7, 4, 'cccc', 'h');
  INSERT INTO test_having VALUES (8, 4, 'CCCC', 'I');
  INSERT INTO test_having VALUES (9, 4, 'CCCC', 'j');
  SELECT b, c FROM test_having
-       GROUP BY b, c HAVING count(*) = 1;
+       GROUP BY b, c HAVING count(*) = 1 ORDER BY b, c;
   b |    c     
  ---+----------
   1 | XXXX    
@@ -23,7 +23,7 @@ SELECT b, c FROM test_having
  
  -- HAVING is equivalent to WHERE in this case
  SELECT b, c FROM test_having
-       GROUP BY b, c HAVING b = 3; 
+       GROUP BY b, c HAVING b = 3 ORDER BY b, c;
   b |    c     
  ---+----------
   3 | BBBB    
@@ -31,7 +31,8 @@ SELECT b, c FROM test_having
  (2 rows)
  
  SELECT lower(c), count(c) FROM test_having
-       GROUP BY lower(c) HAVING count(*) > 2 OR min(a) = max(a);
+       GROUP BY lower(c) HAVING count(*) > 2 OR min(a) = max(a)
+       ORDER BY lower(c);
    lower   | count 
  ----------+-------
   bbbb     |     3
@@ -40,7 +41,8 @@ SELECT lower(c), count(c) FROM test_having
  (3 rows)
  
  SELECT c, max(a) FROM test_having
-       GROUP BY c HAVING count(*) > 2 OR min(a) = max(a);
+       GROUP BY c HAVING count(*) > 2 OR min(a) = max(a)
+       ORDER BY c;
      c     | max 
  ----------+-----
   XXXX     |   0
diff --git a/src/test/regress/expected/select_having_1.out b/src/test/regress/expected/select_having_1.out

index b53f4a7f11ade86ea02b2804287494fcbb5096d1..ffbb591b12533c8a77381c2658b5e894d9cf5f17 100644 (file)
--- a/src/test/regress/expected/select_having_1.out
+++ b/src/test/regress/expected/select_having_1.out
@@ -14,15 +14,25 @@ INSERT INTO test_having VALUES (7, 4, 'cccc', 'h');
  INSERT INTO test_having VALUES (8, 4, 'CCCC', 'I');
  INSERT INTO test_having VALUES (9, 4, 'CCCC', 'j');
  SELECT b, c FROM test_having
-       GROUP BY b, c HAVING count(*) = 1;
+       GROUP BY b, c HAVING count(*) = 1 ORDER BY b, c;
   b |    c     
  ---+----------
   1 | XXXX    
   3 | bbbb    
  (2 rows)
  
+-- HAVING is equivalent to WHERE in this case
+SELECT b, c FROM test_having
+       GROUP BY b, c HAVING b = 3 ORDER BY b, c;
+ b |    c     
+---+----------
+ 3 | BBBB    
+ 3 | bbbb    
+(2 rows)
+
  SELECT lower(c), count(c) FROM test_having
-       GROUP BY lower(c) HAVING count(*) > 2 OR min(a) = max(a);
+       GROUP BY lower(c) HAVING count(*) > 2 OR min(a) = max(a)
+       ORDER BY lower(c);
    lower   | count 
  ----------+-------
   bbbb     |     3
@@ -31,7 +41,8 @@ SELECT lower(c), count(c) FROM test_having
  (3 rows)
  
  SELECT c, max(a) FROM test_having
-       GROUP BY c HAVING count(*) > 2 OR min(a) = max(a);
+       GROUP BY c HAVING count(*) > 2 OR min(a) = max(a)
+       ORDER BY c;
      c     | max 
  ----------+-----
   bbbb     |   5
diff --git a/src/test/regress/expected/select_implicit.out b/src/test/regress/expected/select_implicit.out

index e3d74e5daba3f7227348819f8e275602f6fb7ec9..47d2e80289eb77db45c00a7047ba41c5ffc7c387 100644 (file)
--- a/src/test/regress/expected/select_implicit.out
+++ b/src/test/regress/expected/select_implicit.out
@@ -18,7 +18,7 @@ INSERT INTO test_missing_target VALUES (7, 4, 'cccc', 'h');
  INSERT INTO test_missing_target VALUES (8, 4, 'CCCC', 'I');
  INSERT INTO test_missing_target VALUES (9, 4, 'CCCC', 'j');
  --   w/ existing GROUP BY target
-SELECT c, count(*) FROM test_missing_target GROUP BY test_missing_target.c;
+SELECT c, count(*) FROM test_missing_target GROUP BY test_missing_target.c ORDER BY c;
      c     | count 
  ----------+-------
   AAAA     |     2
@@ -30,7 +30,7 @@ SELECT c, count(*) FROM test_missing_target GROUP BY test_missing_target.c;
  (6 rows)
  
  --   w/o existing GROUP BY target using a relation name in GROUP BY clause
-SELECT count(*) FROM test_missing_target GROUP BY test_missing_target.c;
+SELECT count(*) FROM test_missing_target GROUP BY test_missing_target.c ORDER BY c;
   count 
  -------
       2
@@ -100,7 +100,7 @@ SELECT count(*) FROM test_missing_target ORDER BY 1 desc;
  (1 row)
  
  --   order using reference number
-SELECT c, count(*) FROM test_missing_target GROUP BY 1;
+SELECT c, count(*) FROM test_missing_target GROUP BY 1 ORDER BY 1;
      c     | count 
  ----------+-------
   AAAA     |     2
@@ -160,7 +160,7 @@ SELECT a/2, a/2 FROM test_missing_target
  --   group expression w/ target under ambiguous condition
  --   failure NOT expected
  SELECT a/2, a/2 FROM test_missing_target
-       GROUP BY a/2;
+       GROUP BY a/2 ORDER BY a/2;
   ?column? | ?column? 
  ----------+----------
          0 |        0
@@ -173,7 +173,7 @@ SELECT a/2, a/2 FROM test_missing_target
  --   group w/ existing GROUP BY target under ambiguous condition
  SELECT x.b, count(*) FROM test_missing_target x, test_missing_target y 
         WHERE x.a = y.a
-       GROUP BY x.b;
+       GROUP BY x.b ORDER BY x.b;
   b | count 
  ---+-------
   1 |     1
@@ -185,7 +185,7 @@ SELECT x.b, count(*) FROM test_missing_target x, test_missing_target y
  --   group w/o existing GROUP BY target under ambiguous condition
  SELECT count(*) FROM test_missing_target x, test_missing_target y 
         WHERE x.a = y.a
-       GROUP BY x.b;
+       GROUP BY x.b ORDER BY x.b;
   count 
  -------
       1
@@ -199,7 +199,7 @@ SELECT count(*) FROM test_missing_target x, test_missing_target y
  SELECT count(*) INTO TABLE test_missing_target2 
  FROM test_missing_target x, test_missing_target y 
         WHERE x.a = y.a
-       GROUP BY x.b;
+       GROUP BY x.b ORDER BY x.b;
  SELECT * FROM test_missing_target2;
   count 
  -------
@@ -211,7 +211,9 @@ SELECT * FROM test_missing_target2;
  
  --  Functions and expressions
  --   w/ existing GROUP BY target
-SELECT a%2, count(b) FROM test_missing_target GROUP BY test_missing_target.a%2;
+SELECT a%2, count(b) FROM test_missing_target
+GROUP BY test_missing_target.a%2
+ORDER BY test_missing_target.a%2;
   ?column? | count 
  ----------+-------
          0 |     5
@@ -219,7 +221,9 @@ SELECT a%2, count(b) FROM test_missing_target GROUP BY test_missing_target.a%2;
  (2 rows)
  
  --   w/o existing GROUP BY target using a relation name in GROUP BY clause
-SELECT count(c) FROM test_missing_target GROUP BY lower(test_missing_target.c);
+SELECT count(c) FROM test_missing_target
+GROUP BY lower(test_missing_target.c)
+ORDER BY lower(test_missing_target.c);
   count 
  -------
       2
@@ -286,7 +290,7 @@ ERROR:  Column reference "b" is ambiguous
  --   group w/ existing GROUP BY target under ambiguous condition
  SELECT x.b/2, count(x.b) FROM test_missing_target x, test_missing_target y 
         WHERE x.a = y.a
-       GROUP BY x.b/2;
+       GROUP BY x.b/2 ORDER BY x.b/2;
   ?column? | count 
  ----------+-------
          0 |     1
@@ -305,7 +309,7 @@ ERROR:  Column reference "b" is ambiguous
  SELECT count(x.b) INTO TABLE test_missing_target3 
  FROM test_missing_target x, test_missing_target y 
         WHERE x.a = y.a
-       GROUP BY x.b/2;
+       GROUP BY x.b/2 ORDER BY x.b/2;
  SELECT * FROM test_missing_target3;
   count 
  -------
diff --git a/src/test/regress/expected/select_implicit_1.out b/src/test/regress/expected/select_implicit_1.out

index d63a5727368958c0752b6cc131e256fa42829aa1..80abfd8f9a271face02cdc35b35bedbfeffa57ee 100644 (file)
--- a/src/test/regress/expected/select_implicit_1.out
+++ b/src/test/regress/expected/select_implicit_1.out
@@ -18,7 +18,7 @@ INSERT INTO test_missing_target VALUES (7, 4, 'cccc', 'h');
  INSERT INTO test_missing_target VALUES (8, 4, 'CCCC', 'I');
  INSERT INTO test_missing_target VALUES (9, 4, 'CCCC', 'j');
  --   w/ existing GROUP BY target
-SELECT c, count(*) FROM test_missing_target GROUP BY test_missing_target.c;
+SELECT c, count(*) FROM test_missing_target GROUP BY test_missing_target.c ORDER BY c;
      c     | count 
  ----------+-------
   AAAA     |     2
@@ -30,7 +30,7 @@ SELECT c, count(*) FROM test_missing_target GROUP BY test_missing_target.c;
  (6 rows)
  
  --   w/o existing GROUP BY target using a relation name in GROUP BY clause
-SELECT count(*) FROM test_missing_target GROUP BY test_missing_target.c;
+SELECT count(*) FROM test_missing_target GROUP BY test_missing_target.c ORDER BY c;
   count 
  -------
       2
@@ -100,7 +100,7 @@ SELECT count(*) FROM test_missing_target ORDER BY 1 desc;
  (1 row)
  
  --   order using reference number
-SELECT c, count(*) FROM test_missing_target GROUP BY 1;
+SELECT c, count(*) FROM test_missing_target GROUP BY 1 ORDER BY 1;
      c     | count 
  ----------+-------
   AAAA     |     2
@@ -160,7 +160,7 @@ SELECT a/2, a/2 FROM test_missing_target
  --   group expression w/ target under ambiguous condition
  --   failure NOT expected
  SELECT a/2, a/2 FROM test_missing_target
-       GROUP BY a/2;
+       GROUP BY a/2 ORDER BY a/2;
   ?column? | ?column? 
  ----------+----------
          0 |        0
@@ -173,7 +173,7 @@ SELECT a/2, a/2 FROM test_missing_target
  --   group w/ existing GROUP BY target under ambiguous condition
  SELECT x.b, count(*) FROM test_missing_target x, test_missing_target y 
         WHERE x.a = y.a
-       GROUP BY x.b;
+       GROUP BY x.b ORDER BY x.b;
   b | count 
  ---+-------
   1 |     1
@@ -185,7 +185,7 @@ SELECT x.b, count(*) FROM test_missing_target x, test_missing_target y
  --   group w/o existing GROUP BY target under ambiguous condition
  SELECT count(*) FROM test_missing_target x, test_missing_target y 
         WHERE x.a = y.a
-       GROUP BY x.b;
+       GROUP BY x.b ORDER BY x.b;
   count 
  -------
       1
@@ -199,7 +199,7 @@ SELECT count(*) FROM test_missing_target x, test_missing_target y
  SELECT count(*) INTO TABLE test_missing_target2 
  FROM test_missing_target x, test_missing_target y 
         WHERE x.a = y.a
-       GROUP BY x.b;
+       GROUP BY x.b ORDER BY x.b;
  SELECT * FROM test_missing_target2;
   count 
  -------
@@ -211,7 +211,9 @@ SELECT * FROM test_missing_target2;
  
  --  Functions and expressions
  --   w/ existing GROUP BY target
-SELECT a%2, count(b) FROM test_missing_target GROUP BY test_missing_target.a%2;
+SELECT a%2, count(b) FROM test_missing_target
+GROUP BY test_missing_target.a%2
+ORDER BY test_missing_target.a%2;
   ?column? | count 
  ----------+-------
          0 |     5
@@ -219,7 +221,9 @@ SELECT a%2, count(b) FROM test_missing_target GROUP BY test_missing_target.a%2;
  (2 rows)
  
  --   w/o existing GROUP BY target using a relation name in GROUP BY clause
-SELECT count(c) FROM test_missing_target GROUP BY lower(test_missing_target.c);
+SELECT count(c) FROM test_missing_target
+GROUP BY lower(test_missing_target.c)
+ORDER BY lower(test_missing_target.c);
   count 
  -------
       2
@@ -286,7 +290,7 @@ ERROR:  Column reference "b" is ambiguous
  --   group w/ existing GROUP BY target under ambiguous condition
  SELECT x.b/2, count(x.b) FROM test_missing_target x, test_missing_target y 
         WHERE x.a = y.a
-       GROUP BY x.b/2;
+       GROUP BY x.b/2 ORDER BY x.b/2;
   ?column? | count 
  ----------+-------
          0 |     1
@@ -305,7 +309,7 @@ ERROR:  Column reference "b" is ambiguous
  SELECT count(x.b) INTO TABLE test_missing_target3 
  FROM test_missing_target x, test_missing_target y 
         WHERE x.a = y.a
-       GROUP BY x.b/2;
+       GROUP BY x.b/2 ORDER BY x.b/2;
  SELECT * FROM test_missing_target3;
   count 
  -------
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out

index 96070f77062dff5f3ef9c3a49bfb19a45abfca32..5a2ef11c21b9cb294b74e89b8a9fdf1638f059d1 100644 (file)
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -158,7 +158,7 @@ SELECT '' AS eight, ss.f1 AS "Correlated Field", ss.f3 AS "Second Field"
  (6 rows)
  
  select q1, float8(count(*)) / (select count(*) from int8_tbl)
-from int8_tbl group by q1;
+from int8_tbl group by q1 order by q1;
          q1        | ?column? 
  ------------------+----------
                123 |      0.4
diff --git a/src/test/regress/input/misc.source b/src/test/regress/input/misc.source

index 18bf4e8af7d778be8a4cf7ddbdb496068b405034..c10f73c5c657d732977674bd54f429af49c25166 100644 (file)
--- a/src/test/regress/input/misc.source
+++ b/src/test/regress/input/misc.source
@@ -105,7 +105,7 @@ SELECT * FROM f_star* x WHERE x.c ISNULL;
  
  SELECT sum(a) FROM a_star*;
  
-SELECT class, sum(a) FROM a_star* GROUP BY class;
+SELECT class, sum(a) FROM a_star* GROUP BY class ORDER BY class;
  
  
  ALTER TABLE f_star RENAME COLUMN f TO ff;
diff --git a/src/test/regress/output/misc.source b/src/test/regress/output/misc.source

index 01b88045ac13b9ccb73cb7d3622231f93851ba36..ae60699356a77e31b6829ef66bbb7fb89c513be6 100644 (file)
--- a/src/test/regress/output/misc.source
+++ b/src/test/regress/output/misc.source
@@ -223,7 +223,7 @@ SELECT sum(a) FROM a_star*;
   355
  (1 row)
  
-SELECT class, sum(a) FROM a_star* GROUP BY class;
+SELECT class, sum(a) FROM a_star* GROUP BY class ORDER BY class;
   class | sum 
  -------+-----
   a     |   3
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql

index 33c8fb5f35155113d2be664c0fac6ac5b0d71070..00cc6daf9f5a9fd13688bd659043459df346aaf8 100644 (file)
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -36,9 +36,11 @@ SELECT count(four) AS cnt_1000 FROM onek;
  
  SELECT count(DISTINCT four) AS cnt_4 FROM onek;
  
-select ten, count(*), sum(four) from onek group by ten;
+select ten, count(*), sum(four) from onek
+group by ten order by ten;
  
-select ten, count(four), sum(DISTINCT four) from onek group by ten;
+select ten, count(four), sum(DISTINCT four) from onek
+group by ten order by ten;
  
  
  SELECT newavg(four) AS avg_1 FROM onek;
diff --git a/src/test/regress/sql/rules.sql b/src/test/regress/sql/rules.sql

index 6ded00a445eb1cd8a03226251ae190e83db906ad..0a630194f2769d4ff63f1ca8c263b9f68ceabd62 100644 (file)
--- a/src/test/regress/sql/rules.sql
+++ b/src/test/regress/sql/rules.sql
@@ -451,7 +451,7 @@ insert into rtest_view2 values (7);
  select * from rtest_vview1;
  select * from rtest_vview2;
  select * from rtest_vview3;
-select * from rtest_vview4;
+select * from rtest_vview4 order by a, b;
  select * from rtest_vview5;
  
  insert into rtest_view3 select * from rtest_vview1 where a < 7;
@@ -471,7 +471,7 @@ select * from rtest_view4;
  delete from rtest_view4;
  
  insert into rtest_view4 select * from rtest_vview5 where a > 2 and refcount = 0;
-select * from rtest_view4;
+select * from rtest_view4 order by a, b;
  delete from rtest_view4;
  --
  -- Test for computations in views
diff --git a/src/test/regress/sql/select_having.sql b/src/test/regress/sql/select_having.sql

index 28b22d9859f7ad601c24ef2eb2bd5dd56d54816b..72887d63c84e1da4cbc13a0b28b1922d9ddecc18 100644 (file)
--- a/src/test/regress/sql/select_having.sql
+++ b/src/test/regress/sql/select_having.sql
@@ -16,17 +16,18 @@ INSERT INTO test_having VALUES (8, 4, 'CCCC', 'I');
  INSERT INTO test_having VALUES (9, 4, 'CCCC', 'j');
  
  SELECT b, c FROM test_having
-       GROUP BY b, c HAVING count(*) = 1;
+       GROUP BY b, c HAVING count(*) = 1 ORDER BY b, c;
  
  -- HAVING is equivalent to WHERE in this case
  SELECT b, c FROM test_having
-       GROUP BY b, c HAVING b = 3; 
+       GROUP BY b, c HAVING b = 3 ORDER BY b, c;
  
  SELECT lower(c), count(c) FROM test_having
-       GROUP BY lower(c) HAVING count(*) > 2 OR min(a) = max(a);
+       GROUP BY lower(c) HAVING count(*) > 2 OR min(a) = max(a)
+       ORDER BY lower(c);
  
  SELECT c, max(a) FROM test_having
-       GROUP BY c HAVING count(*) > 2 OR min(a) = max(a);
+       GROUP BY c HAVING count(*) > 2 OR min(a) = max(a)
+       ORDER BY c;
  
  DROP TABLE test_having;
-
diff --git a/src/test/regress/sql/select_implicit.sql b/src/test/regress/sql/select_implicit.sql

index 607ea95bb11dc5697688707190255e3f942c2c3b..60fac0b8bde197a31c099337bf0ddc2f54e05471 100644 (file)
--- a/src/test/regress/sql/select_implicit.sql
+++ b/src/test/regress/sql/select_implicit.sql
@@ -21,10 +21,10 @@ INSERT INTO test_missing_target VALUES (9, 4, 'CCCC', 'j');
  
  
  --   w/ existing GROUP BY target
-SELECT c, count(*) FROM test_missing_target GROUP BY test_missing_target.c;
+SELECT c, count(*) FROM test_missing_target GROUP BY test_missing_target.c ORDER BY c;
  
  --   w/o existing GROUP BY target using a relation name in GROUP BY clause
-SELECT count(*) FROM test_missing_target GROUP BY test_missing_target.c;
+SELECT count(*) FROM test_missing_target GROUP BY test_missing_target.c ORDER BY c;
  
  --   w/o existing GROUP BY target and w/o existing a different ORDER BY target
  --   failure expected
@@ -47,7 +47,7 @@ SELECT count(*) FROM test_missing_target GROUP BY b ORDER BY b desc;
  SELECT count(*) FROM test_missing_target ORDER BY 1 desc;
  
  --   order using reference number
-SELECT c, count(*) FROM test_missing_target GROUP BY 1;
+SELECT c, count(*) FROM test_missing_target GROUP BY 1 ORDER BY 1;
  
  --   group using reference number out of range
  --   failure expected
@@ -72,34 +72,38 @@ SELECT a/2, a/2 FROM test_missing_target
  --   group expression w/ target under ambiguous condition
  --   failure NOT expected
  SELECT a/2, a/2 FROM test_missing_target
-       GROUP BY a/2;
+       GROUP BY a/2 ORDER BY a/2;
  
  --   group w/ existing GROUP BY target under ambiguous condition
  SELECT x.b, count(*) FROM test_missing_target x, test_missing_target y 
         WHERE x.a = y.a
-       GROUP BY x.b;
+       GROUP BY x.b ORDER BY x.b;
  
  --   group w/o existing GROUP BY target under ambiguous condition
  SELECT count(*) FROM test_missing_target x, test_missing_target y 
         WHERE x.a = y.a
-       GROUP BY x.b;
+       GROUP BY x.b ORDER BY x.b;
  
  --   group w/o existing GROUP BY target under ambiguous condition
  --   into a table
  SELECT count(*) INTO TABLE test_missing_target2 
  FROM test_missing_target x, test_missing_target y 
         WHERE x.a = y.a
-       GROUP BY x.b;
+       GROUP BY x.b ORDER BY x.b;
  SELECT * FROM test_missing_target2;
  
  
  --  Functions and expressions
  
  --   w/ existing GROUP BY target
-SELECT a%2, count(b) FROM test_missing_target GROUP BY test_missing_target.a%2;
+SELECT a%2, count(b) FROM test_missing_target
+GROUP BY test_missing_target.a%2
+ORDER BY test_missing_target.a%2;
  
  --   w/o existing GROUP BY target using a relation name in GROUP BY clause
-SELECT count(c) FROM test_missing_target GROUP BY lower(test_missing_target.c);
+SELECT count(c) FROM test_missing_target
+GROUP BY lower(test_missing_target.c)
+ORDER BY lower(test_missing_target.c);
  
  --   w/o existing GROUP BY target and w/o existing a different ORDER BY target
  --   failure expected
@@ -128,7 +132,7 @@ SELECT count(x.a) FROM test_missing_target x, test_missing_target y
  --   group w/ existing GROUP BY target under ambiguous condition
  SELECT x.b/2, count(x.b) FROM test_missing_target x, test_missing_target y 
         WHERE x.a = y.a
-       GROUP BY x.b/2;
+       GROUP BY x.b/2 ORDER BY x.b/2;
  
  --   group w/o existing GROUP BY target under ambiguous condition
  --   failure expected due to ambiguous b in count(b)
@@ -141,7 +145,7 @@ SELECT count(b) FROM test_missing_target x, test_missing_target y
  SELECT count(x.b) INTO TABLE test_missing_target3 
  FROM test_missing_target x, test_missing_target y 
         WHERE x.a = y.a
-       GROUP BY x.b/2;
+       GROUP BY x.b/2 ORDER BY x.b/2;
  SELECT * FROM test_missing_target3;
  
  --   Cleanup
diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql

index b3a4ff528455eb83f36af5df7cbbca990107720b..c2cde6ac2071d7441669e79b215aa7388d57248f 100644 (file)
--- a/src/test/regress/sql/subselect.sql
+++ b/src/test/regress/sql/subselect.sql
@@ -74,4 +74,4 @@ SELECT '' AS eight, ss.f1 AS "Correlated Field", ss.f3 AS "Second Field"
    WHERE f1 NOT IN (SELECT f1+1 FROM INT4_TBL WHERE f1 != ss.f1);
  
  select q1, float8(count(*)) / (select count(*) from int8_tbl)
-from int8_tbl group by q1;
+from int8_tbl group by q1 order by q1;
author	Tom Lane <tgl@sss.pgh.pa.us>
	Thu, 21 Nov 2002 00:42:20 +0000 (00:42 +0000)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Thu, 21 Nov 2002 00:42:20 +0000 (00:42 +0000)
doc/src/sgml/runtime.sgml		patch \| blob \| history
src/backend/optimizer/path/costsize.c		patch \| blob \| history
src/backend/optimizer/plan/createplan.c		patch \| blob \| history
src/backend/optimizer/plan/planmain.c		patch \| blob \| history
src/backend/optimizer/plan/planner.c		patch \| blob \| history
src/backend/utils/misc/guc.c		patch \| blob \| history
src/backend/utils/misc/postgresql.conf.sample		patch \| blob \| history
src/bin/psql/tab-complete.c		patch \| blob \| history
src/include/optimizer/cost.h		patch \| blob \| history
src/include/optimizer/planmain.h		patch \| blob \| history
src/test/regress/expected/aggregates.out		patch \| blob \| history
src/test/regress/expected/rangefuncs.out		patch \| blob \| history
src/test/regress/expected/rules.out		patch \| blob \| history
src/test/regress/expected/select_having.out		patch \| blob \| history
src/test/regress/expected/select_having_1.out		patch \| blob \| history
src/test/regress/expected/select_implicit.out		patch \| blob \| history
src/test/regress/expected/select_implicit_1.out		patch \| blob \| history
src/test/regress/expected/subselect.out		patch \| blob \| history
src/test/regress/input/misc.source		patch \| blob \| history
src/test/regress/output/misc.source		patch \| blob \| history
src/test/regress/sql/aggregates.sql		patch \| blob \| history
src/test/regress/sql/rules.sql		patch \| blob \| history
src/test/regress/sql/select_having.sql		patch \| blob \| history
src/test/regress/sql/select_implicit.sql		patch \| blob \| history
src/test/regress/sql/subselect.sql		patch \| blob \| history