parameter to allow it to be forced off for comparison purposes.
Add ORDER BY clauses to a bunch of regression test queries that will
otherwise produce randomly-ordered output in the new regime.
<!--
-$Header: /cvsroot/pgsql/doc/src/sgml/runtime.sgml,v 1.156 2002/11/15 03:22:30 momjian Exp $
+$Header: /cvsroot/pgsql/doc/src/sgml/runtime.sgml,v 1.157 2002/11/21 00:42:18 tgl Exp $
-->
<Chapter Id="runtime">
</listitem>
</varlistentry>
+ <varlistentry>
+ <term><varname>ENABLE_HASHAGG</varname> (<type>boolean</type>)</term>
+ <listitem>
+ <para>
+ Enables or disables the query planner's use of hashed aggregation
+ plan types. The default is on. This is used for debugging the query
+ planner.
+ </para>
+ </listitem>
+ </varlistentry>
+
<varlistentry>
<term><varname>ENABLE_HASHJOIN</varname> (<type>boolean</type>)</term>
<listitem>
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.90 2002/09/04 20:31:20 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.91 2002/11/21 00:42:19 tgl Exp $
*
*-------------------------------------------------------------------------
*/
bool enable_indexscan = true;
bool enable_tidscan = true;
bool enable_sort = true;
+bool enable_hashagg = true;
bool enable_nestloop = true;
bool enable_mergejoin = true;
bool enable_hashjoin = true;
/*
* cost_sort
- * Determines and returns the cost of sorting a relation.
- *
- * The cost of supplying the input data is NOT included; the caller should
- * add that cost to both startup and total costs returned from this routine!
+ * Determines and returns the cost of sorting a relation, including
+ * the cost of reading the input data.
*
* If the total volume of data to sort is less than SortMem, we will do
* an in-memory sort, which requires no I/O and about t*log2(t) tuple
* the right ballpark in most cases.
*
* 'pathkeys' is a list of sort keys
+ * 'input_cost' is the total cost for reading the input data
* 'tuples' is the number of tuples in the relation
* 'width' is the average tuple width in bytes
*
* can't conveniently supply the sort keys. Since this routine doesn't
* currently do anything with pathkeys anyway, that doesn't matter...
* but if it ever does, it should react gracefully to lack of key data.
+ * (Actually, the thing we'd most likely be interested in is just the number
+ * of sort keys, which all callers *could* supply.)
*/
void
cost_sort(Path *path, Query *root,
- List *pathkeys, double tuples, int width)
+ List *pathkeys, Cost input_cost, double tuples, int width)
{
- Cost startup_cost = 0;
+ Cost startup_cost = input_cost;
Cost run_cost = 0;
double nbytes = relation_byte_size(tuples, width);
long sortmembytes = SortMem * 1024L;
path->total_cost = startup_cost + run_cost;
}
+/*
+ * cost_agg
+ * Determines and returns the cost of performing an Agg plan node,
+ * including the cost of its input.
+ *
+ * Note: when aggstrategy == AGG_SORTED, caller must ensure that input costs
+ * are for appropriately-sorted input.
+ */
+void
+cost_agg(Path *path, Query *root,
+ AggStrategy aggstrategy, int numAggs,
+ int numGroupCols, double numGroups,
+ Cost input_startup_cost, Cost input_total_cost,
+ double input_tuples)
+{
+ Cost startup_cost;
+ Cost total_cost;
+
+ /*
+ * We charge one cpu_operator_cost per aggregate function per input
+ * tuple, and another one per output tuple (corresponding to transfn
+ * and finalfn calls respectively). If we are grouping, we charge an
+ * additional cpu_operator_cost per grouping column per input tuple
+ * for grouping comparisons.
+ *
+ * We will produce a single output tuple if not grouping,
+ * and a tuple per group otherwise.
+ */
+ if (aggstrategy == AGG_PLAIN)
+ {
+ startup_cost = input_total_cost;
+ startup_cost += cpu_operator_cost * (input_tuples + 1) * numAggs;
+ /* we aren't grouping */
+ total_cost = startup_cost;
+ }
+ else if (aggstrategy == AGG_SORTED)
+ {
+ /* Here we are able to deliver output on-the-fly */
+ startup_cost = input_startup_cost;
+ total_cost = input_total_cost;
+ total_cost += cpu_operator_cost * (input_tuples + numGroups) * numAggs;
+ total_cost += cpu_operator_cost * input_tuples * numGroupCols;
+ }
+ else
+ {
+ /* must be AGG_HASHED */
+ startup_cost = input_total_cost;
+ startup_cost += cpu_operator_cost * input_tuples * numAggs;
+ startup_cost += cpu_operator_cost * input_tuples * numGroupCols;
+ total_cost = startup_cost;
+ total_cost += cpu_operator_cost * numGroups * numAggs;
+ }
+
+ path->startup_cost = startup_cost;
+ path->total_cost = total_cost;
+}
+
+/*
+ * cost_group
+ * Determines and returns the cost of performing a Group plan node,
+ * including the cost of its input.
+ *
+ * Note: caller must ensure that input costs are for appropriately-sorted
+ * input.
+ */
+void
+cost_group(Path *path, Query *root,
+ int numGroupCols, double numGroups,
+ Cost input_startup_cost, Cost input_total_cost,
+ double input_tuples)
+{
+ Cost startup_cost;
+ Cost total_cost;
+
+ startup_cost = input_startup_cost;
+ total_cost = input_total_cost;
+
+ /*
+ * Charge one cpu_operator_cost per comparison per input tuple. We
+ * assume all columns get compared at most of the tuples.
+ */
+ total_cost += cpu_operator_cost * input_tuples * numGroupCols;
+
+ path->startup_cost = startup_cost;
+ path->total_cost = total_cost;
+}
/*
* cost_nestloop
*/
if (outersortkeys) /* do we need to sort outer? */
{
- startup_cost += outer_path->total_cost;
cost_sort(&sort_path,
root,
outersortkeys,
+ outer_path->total_cost,
outer_path->parent->rows,
outer_path->parent->width);
startup_cost += sort_path.startup_cost;
if (innersortkeys) /* do we need to sort inner? */
{
- startup_cost += inner_path->total_cost;
cost_sort(&sort_path,
root,
innersortkeys,
+ inner_path->total_cost,
inner_path->parent->rows,
inner_path->parent->width);
startup_cost += sort_path.startup_cost;
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.123 2002/11/19 23:21:58 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.124 2002/11/21 00:42:19 tgl Exp $
*
*-------------------------------------------------------------------------
*/
copy_plan_costsize(plan, lefttree); /* only care about copying size */
cost_sort(&sort_path, root, NIL,
- lefttree->plan_rows, lefttree->plan_width);
- plan->startup_cost = sort_path.startup_cost + lefttree->total_cost;
- plan->total_cost = sort_path.total_cost + lefttree->total_cost;
+ lefttree->total_cost,
+ lefttree->plan_rows,
+ lefttree->plan_width);
+ plan->startup_cost = sort_path.startup_cost;
+ plan->total_cost = sort_path.total_cost;
plan->state = (EState *) NULL;
plan->targetlist = tlist;
plan->qual = NIL;
}
Agg *
-make_agg(List *tlist, List *qual, AggStrategy aggstrategy,
- int ngrp, AttrNumber *grpColIdx, long numGroups, int numAggs,
+make_agg(Query *root, List *tlist, List *qual,
+ AggStrategy aggstrategy,
+ int numGroupCols, AttrNumber *grpColIdx,
+ long numGroups, int numAggs,
Plan *lefttree)
{
Agg *node = makeNode(Agg);
Plan *plan = &node->plan;
+ Path agg_path; /* dummy for result of cost_agg */
node->aggstrategy = aggstrategy;
- node->numCols = ngrp;
+ node->numCols = numGroupCols;
node->grpColIdx = grpColIdx;
node->numGroups = numGroups;
- copy_plan_costsize(plan, lefttree);
-
- /*
- * Charge one cpu_operator_cost per aggregate function per input
- * tuple.
- */
- plan->total_cost += cpu_operator_cost * plan->plan_rows * numAggs;
+ copy_plan_costsize(plan, lefttree); /* only care about copying size */
+ cost_agg(&agg_path, root,
+ aggstrategy, numAggs,
+ numGroupCols, numGroups,
+ lefttree->startup_cost,
+ lefttree->total_cost,
+ lefttree->plan_rows);
+ plan->startup_cost = agg_path.startup_cost;
+ plan->total_cost = agg_path.total_cost;
/*
* We will produce a single output tuple if not grouping,
* and a tuple per group otherwise.
*/
if (aggstrategy == AGG_PLAIN)
- {
plan->plan_rows = 1;
- plan->startup_cost = plan->total_cost;
- }
else
- {
plan->plan_rows = numGroups;
- }
plan->state = (EState *) NULL;
plan->qual = qual;
}
Group *
-make_group(List *tlist,
- int ngrp,
+make_group(Query *root,
+ List *tlist,
+ int numGroupCols,
AttrNumber *grpColIdx,
double numGroups,
Plan *lefttree)
{
Group *node = makeNode(Group);
Plan *plan = &node->plan;
+ Path group_path; /* dummy for result of cost_group */
- copy_plan_costsize(plan, lefttree);
+ node->numCols = numGroupCols;
+ node->grpColIdx = grpColIdx;
- /*
- * Charge one cpu_operator_cost per comparison per input tuple. We
- * assume all columns get compared at most of the tuples.
- */
- plan->total_cost += cpu_operator_cost * plan->plan_rows * ngrp;
+ copy_plan_costsize(plan, lefttree); /* only care about copying size */
+ cost_group(&group_path, root,
+ numGroupCols, numGroups,
+ lefttree->startup_cost,
+ lefttree->total_cost,
+ lefttree->plan_rows);
+ plan->startup_cost = group_path.startup_cost;
+ plan->total_cost = group_path.total_cost;
/* One output tuple per estimated result group */
plan->plan_rows = numGroups;
plan->targetlist = tlist;
plan->lefttree = lefttree;
plan->righttree = (Plan *) NULL;
- node->numCols = ngrp;
- node->grpColIdx = grpColIdx;
return node;
}
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planmain.c,v 1.71 2002/11/06 00:00:44 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planmain.c,v 1.72 2002/11/21 00:42:19 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "optimizer/clauses.h"
+#include "optimizer/cost.h"
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
#include "optimizer/planmain.h"
-#include "optimizer/tlist.h"
-#include "parser/parsetree.h"
-#include "utils/memutils.h"
/*--------------------
*
* Since query_planner does not handle the toplevel processing (grouping,
* sorting, etc) it cannot select the best path by itself. It selects
- * two paths: the cheapest path that produces the required tuples, independent
- * of any ordering considerations, and the cheapest path that produces the
- * required tuples in the required ordering, if there is a path that
- * can produce them without an explicit top-level sort step. The caller
- * (grouping_planner) will make the final decision about which to use.
+ * two paths: the cheapest path that produces all the required tuples,
+ * independent of any ordering considerations, and the cheapest path that
+ * produces the expected fraction of the required tuples in the required
+ * ordering, if there is a path that is cheaper for this than just sorting
+ * the output of the cheapest overall path. The caller (grouping_planner)
+ * will make the final decision about which to use.
*
* Input parameters:
* root is the query to plan
* Output parameters:
* *cheapest_path receives the overall-cheapest path for the query
* *sorted_path receives the cheapest presorted path for the query,
- * if any (it may be NULL, or the same as cheapest_path)
+ * if any (NULL if there is no useful presorted path)
*
* Note: the Query node also includes a query_pathkeys field, which is both
* an input and an output of query_planner(). The input value signals
{
List *constant_quals;
RelOptInfo *final_rel;
+ Path *cheapestpath;
+ Path *sortedpath;
/*
* If the query has an empty join tree, then it's something easy like
/*
* Pick out the cheapest-total path and the cheapest presorted path
- * for the requested pathkeys (if there is one). We can take the
+ * for the requested pathkeys (if there is one). We should take the
* tuple fraction into account when selecting the cheapest presorted
* path, but not when selecting the cheapest-total path, since if we
* have to sort then we'll have to fetch all the tuples. (But there's
* a special case: if query_pathkeys is NIL, meaning order doesn't
* matter, then the "cheapest presorted" path will be the cheapest
* overall for the tuple fraction.)
+ *
+ * The cheapest-total path is also the one to use if grouping_planner
+ * decides to use hashed aggregation, so we return it separately even
+ * if this routine thinks the presorted path is the winner.
*/
- *cheapest_path = final_rel->cheapest_total_path;
+ cheapestpath = final_rel->cheapest_total_path;
- *sorted_path =
+ sortedpath =
get_cheapest_fractional_path_for_pathkeys(final_rel->pathlist,
root->query_pathkeys,
tuple_fraction);
+ /* Don't return same path in both guises; just wastes effort */
+ if (sortedpath == cheapestpath)
+ sortedpath = NULL;
+
+ /*
+ * Forget about the presorted path if it would be cheaper to sort the
+ * cheapest-total path. Here we need consider only the behavior at
+ * the tuple fraction point.
+ */
+ if (sortedpath)
+ {
+ Path sort_path; /* dummy for result of cost_sort */
+
+ if (root->query_pathkeys == NIL ||
+ pathkeys_contained_in(root->query_pathkeys,
+ cheapestpath->pathkeys))
+ {
+ /* No sort needed for cheapest path */
+ sort_path.startup_cost = cheapestpath->startup_cost;
+ sort_path.total_cost = cheapestpath->total_cost;
+ }
+ else
+ {
+ /* Figure cost for sorting */
+ cost_sort(&sort_path, root, root->query_pathkeys,
+ cheapestpath->total_cost,
+ final_rel->rows, final_rel->width);
+ }
+
+ if (compare_fractional_path_costs(sortedpath, &sort_path,
+ tuple_fraction) > 0)
+ {
+ /* Presorted path is a loser */
+ sortedpath = NULL;
+ }
+ }
+
/*
* If we have constant quals, add a toplevel Result step to process them.
*/
if (constant_quals)
{
- *cheapest_path = (Path *)
- create_result_path((*cheapest_path)->parent,
- *cheapest_path,
- constant_quals);
- if (*sorted_path)
- *sorted_path = (Path *)
- create_result_path((*sorted_path)->parent,
- *sorted_path,
- constant_quals);
+ cheapestpath = (Path *) create_result_path(final_rel,
+ cheapestpath,
+ constant_quals);
+ if (sortedpath)
+ sortedpath = (Path *) create_result_path(final_rel,
+ sortedpath,
+ constant_quals);
}
+
+ *cheapest_path = cheapestpath;
+ *sorted_path = sortedpath;
}
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.129 2002/11/19 23:21:59 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.130 2002/11/21 00:42:19 tgl Exp $
*
*-------------------------------------------------------------------------
*/
List *sub_tlist;
List *group_pathkeys;
AttrNumber *groupColIdx = NULL;
+ double sub_tuple_fraction;
Path *cheapest_path;
Path *sorted_path;
double dNumGroups = 0;
long numGroups = 0;
int numAggs = 0;
+ int numGroupCols = length(parse->groupClause);
bool use_hashed_grouping = false;
/* Preprocess targetlist in case we are inside an INSERT/UPDATE. */
}
}
+ /*
+ * With grouping or aggregation, the tuple fraction to pass to
+ * query_planner() may be different from what it is at top level.
+ */
+ sub_tuple_fraction = tuple_fraction;
+
if (parse->groupClause)
{
/*
* amounts to assuming that all the groups are about the same
* size).
*/
- if (tuple_fraction >= 1.0)
- tuple_fraction = 0.25;
+ if (sub_tuple_fraction >= 1.0)
+ sub_tuple_fraction = 0.25;
/*
* If both GROUP BY and ORDER BY are specified, we will need
if (parse->groupClause && parse->sortClause &&
!noncanonical_pathkeys_contained_in(sort_pathkeys,
group_pathkeys))
- tuple_fraction = 0.0;
+ sub_tuple_fraction = 0.0;
}
else if (parse->hasAggs)
{
* Ungrouped aggregate will certainly want all the input
* tuples.
*/
- tuple_fraction = 0.0;
+ sub_tuple_fraction = 0.0;
}
else if (parse->distinctClause)
{
* number of input tuples per output tuple. Handle the same
* way.
*/
- if (tuple_fraction >= 1.0)
- tuple_fraction = 0.25;
+ if (sub_tuple_fraction >= 1.0)
+ sub_tuple_fraction = 0.25;
}
/*
* Generate the best unsorted and presorted paths for this Query
* (but note there may not be any presorted path).
*/
- query_planner(parse, sub_tlist, tuple_fraction,
+ query_planner(parse, sub_tlist, sub_tuple_fraction,
&cheapest_path, &sorted_path);
/*
if (parse->groupClause)
{
/*
- * Always estimate the number of groups.
+ * Always estimate the number of groups. We can't do this until
+ * after running query_planner(), either.
*/
dNumGroups = estimate_num_groups(parse,
parse->groupClause,
cheapest_path->parent->rows);
+ /* Also want it as a long int --- but 'ware overflow! */
numGroups = (long) Min(dNumGroups, (double) LONG_MAX);
/*
* aggregates. (Doing so would imply storing *all* the input
* values in the hash table, which seems like a certain loser.)
*/
- if (parse->hasAggs &&
- (contain_distinct_agg_clause((Node *) tlist) ||
- contain_distinct_agg_clause(parse->havingQual)))
+ if (!enable_hashagg)
+ use_hashed_grouping = false;
+ else if (parse->hasAggs &&
+ (contain_distinct_agg_clause((Node *) tlist) ||
+ contain_distinct_agg_clause(parse->havingQual)))
use_hashed_grouping = false;
else
{
if (hashentrysize * dNumGroups <= SortMem * 1024L)
{
- /* much more to do here */
-#if 0
- /* TEMPORARY HOTWIRE FOR TESTING */
- use_hashed_grouping = true;
-#endif
+ /*
+ * Okay, do the cost comparison. We need to consider
+ * cheapest_path + hashagg [+ final sort]
+ * versus either
+ * cheapest_path [+ sort] + group or agg [+ final sort]
+ * or
+ * presorted_path + group or agg [+ final sort]
+ * where brackets indicate a step that may not be needed.
+ * We assume query_planner() will have returned a
+ * presorted path only if it's a winner compared to
+ * cheapest_path for this purpose.
+ *
+ * These path variables are dummies that just hold cost
+ * fields; we don't make actual Paths for these steps.
+ */
+ Path hashed_p;
+ Path sorted_p;
+
+ cost_agg(&hashed_p, parse,
+ AGG_HASHED, numAggs,
+ numGroupCols, dNumGroups,
+ cheapest_path->startup_cost,
+ cheapest_path->total_cost,
+ cheapest_path->parent->rows);
+ /* Result of hashed agg is always unsorted */
+ if (sort_pathkeys)
+ cost_sort(&hashed_p, parse, sort_pathkeys,
+ hashed_p.total_cost,
+ dNumGroups,
+ cheapest_path->parent->width);
+
+ if (sorted_path)
+ {
+ sorted_p.startup_cost = sorted_path->startup_cost;
+ sorted_p.total_cost = sorted_path->total_cost;
+ current_pathkeys = sorted_path->pathkeys;
+ }
+ else
+ {
+ sorted_p.startup_cost = cheapest_path->startup_cost;
+ sorted_p.total_cost = cheapest_path->total_cost;
+ current_pathkeys = cheapest_path->pathkeys;
+ }
+ if (!pathkeys_contained_in(group_pathkeys,
+ current_pathkeys))
+ {
+ cost_sort(&sorted_p, parse, group_pathkeys,
+ sorted_p.total_cost,
+ cheapest_path->parent->rows,
+ cheapest_path->parent->width);
+ current_pathkeys = group_pathkeys;
+ }
+ if (parse->hasAggs)
+ cost_agg(&sorted_p, parse,
+ AGG_SORTED, numAggs,
+ numGroupCols, dNumGroups,
+ sorted_p.startup_cost,
+ sorted_p.total_cost,
+ cheapest_path->parent->rows);
+ else
+ cost_group(&sorted_p, parse,
+ numGroupCols, dNumGroups,
+ sorted_p.startup_cost,
+ sorted_p.total_cost,
+ cheapest_path->parent->rows);
+ /* The Agg or Group node will preserve ordering */
+ if (sort_pathkeys &&
+ !pathkeys_contained_in(sort_pathkeys,
+ current_pathkeys))
+ {
+ cost_sort(&sorted_p, parse, sort_pathkeys,
+ sorted_p.total_cost,
+ dNumGroups,
+ cheapest_path->parent->width);
+ }
+
+ /*
+ * Now make the decision using the top-level tuple
+ * fraction. First we have to convert an absolute
+ * count (LIMIT) into fractional form.
+ */
+ if (tuple_fraction >= 1.0)
+ tuple_fraction /= dNumGroups;
+
+ if (compare_fractional_path_costs(&hashed_p, &sorted_p,
+ tuple_fraction) <= 0)
+ {
+ /* Hashed is cheaper, so use it */
+ use_hashed_grouping = true;
+ }
}
}
}
/*
* Select the best path and create a plan to execute it.
*
- * If no special sort order is wanted, or if the cheapest path is
- * already appropriately ordered, use the cheapest path.
- * Otherwise, look to see if we have an already-ordered path that is
- * cheaper than doing an explicit sort on the cheapest-total-cost
- * path.
+ * If we are doing hashed grouping, we will always read all the
+ * input tuples, so use the cheapest-total path. Otherwise,
+ * trust query_planner's decision about which to use.
*/
- if (parse->query_pathkeys == NIL ||
- pathkeys_contained_in(parse->query_pathkeys,
- cheapest_path->pathkeys))
+ if (sorted_path && !use_hashed_grouping)
{
- result_plan = create_plan(parse, cheapest_path);
- current_pathkeys = cheapest_path->pathkeys;
- }
- else if (sorted_path)
- {
- Path sort_path; /* dummy for result of cost_sort */
-
- cost_sort(&sort_path, parse, parse->query_pathkeys,
- sorted_path->parent->rows, sorted_path->parent->width);
- sort_path.startup_cost += cheapest_path->total_cost;
- sort_path.total_cost += cheapest_path->total_cost;
- /* Convert absolute-count tuple_fraction into a fraction */
- if (tuple_fraction >= 1.0)
- tuple_fraction /= sorted_path->parent->rows;
- if (compare_fractional_path_costs(sorted_path, &sort_path,
- tuple_fraction) <= 0)
- {
- /* Presorted path is cheaper, use it */
- result_plan = create_plan(parse, sorted_path);
- current_pathkeys = sorted_path->pathkeys;
- }
- else
- {
- /* otherwise, doing it the hard way is still cheaper */
- result_plan = create_plan(parse, cheapest_path);
- current_pathkeys = cheapest_path->pathkeys;
- }
+ result_plan = create_plan(parse, sorted_path);
+ current_pathkeys = sorted_path->pathkeys;
}
else
{
- /*
- * No sorted path, so we must use the cheapest-total path.
- * The actual sort step will be generated below.
- */
result_plan = create_plan(parse, cheapest_path);
current_pathkeys = cheapest_path->pathkeys;
}
if (use_hashed_grouping)
{
/* Hashed aggregate plan --- no sort needed */
- result_plan = (Plan *) make_agg(tlist,
+ result_plan = (Plan *) make_agg(parse,
+ tlist,
(List *) parse->havingQual,
AGG_HASHED,
- length(parse->groupClause),
+ numGroupCols,
groupColIdx,
numGroups,
numAggs,
current_pathkeys = NIL;
}
- result_plan = (Plan *) make_agg(tlist,
+ result_plan = (Plan *) make_agg(parse,
+ tlist,
(List *) parse->havingQual,
aggstrategy,
- length(parse->groupClause),
+ numGroupCols,
groupColIdx,
numGroups,
numAggs,
current_pathkeys = group_pathkeys;
}
- result_plan = (Plan *) make_group(tlist,
- length(parse->groupClause),
+ result_plan = (Plan *) make_group(parse,
+ tlist,
+ numGroupCols,
groupColIdx,
dNumGroups,
result_plan);
+ /* The Group node won't change sort ordering */
}
}
} /* end of if (setOperations) */
* command, configuration file, and command line options.
* See src/backend/utils/misc/README for more information.
*
- * $Header: /cvsroot/pgsql/src/backend/utils/misc/guc.c,v 1.106 2002/11/15 02:44:57 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/utils/misc/guc.c,v 1.107 2002/11/21 00:42:19 tgl Exp $
*
* Copyright 2000 by PostgreSQL Global Development Group
* Written by Peter Eisentraut <peter_e@gmx.net>.
{"enable_sort", PGC_USERSET}, &enable_sort,
true, NULL, NULL
},
+ {
+ {"enable_hashagg", PGC_USERSET}, &enable_hashagg,
+ true, NULL, NULL
+ },
{
{"enable_nestloop", PGC_USERSET}, &enable_nestloop,
true, NULL, NULL
#enable_indexscan = true
#enable_tidscan = true
#enable_sort = true
+#enable_hashagg = true
#enable_nestloop = true
#enable_mergejoin = true
#enable_hashjoin = true
*
* Copyright 2000-2002 by PostgreSQL Global Development Group
*
- * $Header: /cvsroot/pgsql/src/bin/psql/tab-complete.c,v 1.68 2002/11/15 03:07:52 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/bin/psql/tab-complete.c,v 1.69 2002/11/21 00:42:19 tgl Exp $
*/
/*----------------------------------------------------------------------
"enable_nestloop",
"enable_seqscan",
"enable_sort",
+ "enable_hashagg",
"enable_tidscan",
"explain_pretty_print",
"extra_float_digits",
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $Id: cost.h,v 1.47 2002/09/04 20:31:44 momjian Exp $
+ * $Id: cost.h,v 1.48 2002/11/21 00:42:19 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#ifndef COST_H
#define COST_H
+#include "nodes/plannodes.h"
#include "nodes/relation.h"
+
/* defaults for costsize.c's Cost parameters */
/* NB: cost-estimation code should use the variables, not these constants! */
/* If you change these, update backend/utils/misc/postgresql.sample.conf */
extern bool enable_indexscan;
extern bool enable_tidscan;
extern bool enable_sort;
+extern bool enable_hashagg;
extern bool enable_nestloop;
extern bool enable_mergejoin;
extern bool enable_hashjoin;
extern void cost_functionscan(Path *path, Query *root,
RelOptInfo *baserel);
extern void cost_sort(Path *path, Query *root,
- List *pathkeys, double tuples, int width);
+ List *pathkeys, Cost input_cost, double tuples, int width);
+extern void cost_agg(Path *path, Query *root,
+ AggStrategy aggstrategy, int numAggs,
+ int numGroupCols, double numGroups,
+ Cost input_startup_cost, Cost input_total_cost,
+ double input_tuples);
+extern void cost_group(Path *path, Query *root,
+ int numGroupCols, double numGroups,
+ Cost input_startup_cost, Cost input_total_cost,
+ double input_tuples);
extern void cost_nestloop(Path *path, Query *root,
Path *outer_path, Path *inner_path,
List *restrictlist);
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $Id: planmain.h,v 1.62 2002/11/19 23:22:00 tgl Exp $
+ * $Id: planmain.h,v 1.63 2002/11/21 00:42:19 tgl Exp $
*
*-------------------------------------------------------------------------
*/
Plan *lefttree, int keycount);
extern Sort *make_sort_from_pathkeys(Query *root, List *tlist,
Plan *lefttree, List *pathkeys);
-extern Agg *make_agg(List *tlist, List *qual, AggStrategy aggstrategy,
- int ngrp, AttrNumber *grpColIdx,
+extern Agg *make_agg(Query *root, List *tlist, List *qual,
+ AggStrategy aggstrategy,
+ int numGroupCols, AttrNumber *grpColIdx,
long numGroups, int numAggs,
Plan *lefttree);
-extern Group *make_group(List *tlist,
- int ngrp, AttrNumber *grpColIdx, double numGroups,
+extern Group *make_group(Query *root, List *tlist,
+ int numGroupCols, AttrNumber *grpColIdx,
+ double numGroups,
Plan *lefttree);
extern Material *make_material(List *tlist, Plan *lefttree);
extern Unique *make_unique(List *tlist, Plan *lefttree, List *distinctList);
4
(1 row)
-select ten, count(*), sum(four) from onek group by ten;
+select ten, count(*), sum(four) from onek
+group by ten order by ten;
ten | count | sum
-----+-------+-----
0 | 100 | 100
9 | 100 | 200
(10 rows)
-select ten, count(four), sum(DISTINCT four) from onek group by ten;
+select ten, count(four), sum(DISTINCT four) from onek
+group by ten order by ten;
ten | count | sum
-----+-------+-----
0 | 100 | 2
SELECT * FROM pg_settings WHERE name LIKE 'enable%';
name | setting
------------------+---------
+ enable_hashagg | on
enable_hashjoin | on
enable_indexscan | on
enable_mergejoin | on
enable_seqscan | on
enable_sort | on
enable_tidscan | on
-(7 rows)
+(8 rows)
CREATE TABLE foo2(fooid int, f2 int);
INSERT INTO foo2 VALUES(1, 11);
7 | item 7
(3 rows)
-select * from rtest_vview4;
+select * from rtest_vview4 order by a, b;
a | b | refcount
---+--------+----------
2 | item 2 | 2
delete from rtest_view4;
insert into rtest_view4 select * from rtest_vview5 where a > 2 and refcount = 0;
-select * from rtest_view4;
+select * from rtest_view4 order by a, b;
a | b | c
---+--------+---
3 | item 3 | 0
INSERT INTO test_having VALUES (8, 4, 'CCCC', 'I');
INSERT INTO test_having VALUES (9, 4, 'CCCC', 'j');
SELECT b, c FROM test_having
- GROUP BY b, c HAVING count(*) = 1;
+ GROUP BY b, c HAVING count(*) = 1 ORDER BY b, c;
b | c
---+----------
1 | XXXX
-- HAVING is equivalent to WHERE in this case
SELECT b, c FROM test_having
- GROUP BY b, c HAVING b = 3;
+ GROUP BY b, c HAVING b = 3 ORDER BY b, c;
b | c
---+----------
3 | BBBB
(2 rows)
SELECT lower(c), count(c) FROM test_having
- GROUP BY lower(c) HAVING count(*) > 2 OR min(a) = max(a);
+ GROUP BY lower(c) HAVING count(*) > 2 OR min(a) = max(a)
+ ORDER BY lower(c);
lower | count
----------+-------
bbbb | 3
(3 rows)
SELECT c, max(a) FROM test_having
- GROUP BY c HAVING count(*) > 2 OR min(a) = max(a);
+ GROUP BY c HAVING count(*) > 2 OR min(a) = max(a)
+ ORDER BY c;
c | max
----------+-----
XXXX | 0
INSERT INTO test_having VALUES (8, 4, 'CCCC', 'I');
INSERT INTO test_having VALUES (9, 4, 'CCCC', 'j');
SELECT b, c FROM test_having
- GROUP BY b, c HAVING count(*) = 1;
+ GROUP BY b, c HAVING count(*) = 1 ORDER BY b, c;
b | c
---+----------
1 | XXXX
3 | bbbb
(2 rows)
+-- HAVING is equivalent to WHERE in this case
+SELECT b, c FROM test_having
+ GROUP BY b, c HAVING b = 3 ORDER BY b, c;
+ b | c
+---+----------
+ 3 | BBBB
+ 3 | bbbb
+(2 rows)
+
SELECT lower(c), count(c) FROM test_having
- GROUP BY lower(c) HAVING count(*) > 2 OR min(a) = max(a);
+ GROUP BY lower(c) HAVING count(*) > 2 OR min(a) = max(a)
+ ORDER BY lower(c);
lower | count
----------+-------
bbbb | 3
(3 rows)
SELECT c, max(a) FROM test_having
- GROUP BY c HAVING count(*) > 2 OR min(a) = max(a);
+ GROUP BY c HAVING count(*) > 2 OR min(a) = max(a)
+ ORDER BY c;
c | max
----------+-----
bbbb | 5
INSERT INTO test_missing_target VALUES (8, 4, 'CCCC', 'I');
INSERT INTO test_missing_target VALUES (9, 4, 'CCCC', 'j');
-- w/ existing GROUP BY target
-SELECT c, count(*) FROM test_missing_target GROUP BY test_missing_target.c;
+SELECT c, count(*) FROM test_missing_target GROUP BY test_missing_target.c ORDER BY c;
c | count
----------+-------
AAAA | 2
(6 rows)
-- w/o existing GROUP BY target using a relation name in GROUP BY clause
-SELECT count(*) FROM test_missing_target GROUP BY test_missing_target.c;
+SELECT count(*) FROM test_missing_target GROUP BY test_missing_target.c ORDER BY c;
count
-------
2
(1 row)
-- order using reference number
-SELECT c, count(*) FROM test_missing_target GROUP BY 1;
+SELECT c, count(*) FROM test_missing_target GROUP BY 1 ORDER BY 1;
c | count
----------+-------
AAAA | 2
-- group expression w/ target under ambiguous condition
-- failure NOT expected
SELECT a/2, a/2 FROM test_missing_target
- GROUP BY a/2;
+ GROUP BY a/2 ORDER BY a/2;
?column? | ?column?
----------+----------
0 | 0
-- group w/ existing GROUP BY target under ambiguous condition
SELECT x.b, count(*) FROM test_missing_target x, test_missing_target y
WHERE x.a = y.a
- GROUP BY x.b;
+ GROUP BY x.b ORDER BY x.b;
b | count
---+-------
1 | 1
-- group w/o existing GROUP BY target under ambiguous condition
SELECT count(*) FROM test_missing_target x, test_missing_target y
WHERE x.a = y.a
- GROUP BY x.b;
+ GROUP BY x.b ORDER BY x.b;
count
-------
1
SELECT count(*) INTO TABLE test_missing_target2
FROM test_missing_target x, test_missing_target y
WHERE x.a = y.a
- GROUP BY x.b;
+ GROUP BY x.b ORDER BY x.b;
SELECT * FROM test_missing_target2;
count
-------
-- Functions and expressions
-- w/ existing GROUP BY target
-SELECT a%2, count(b) FROM test_missing_target GROUP BY test_missing_target.a%2;
+SELECT a%2, count(b) FROM test_missing_target
+GROUP BY test_missing_target.a%2
+ORDER BY test_missing_target.a%2;
?column? | count
----------+-------
0 | 5
(2 rows)
-- w/o existing GROUP BY target using a relation name in GROUP BY clause
-SELECT count(c) FROM test_missing_target GROUP BY lower(test_missing_target.c);
+SELECT count(c) FROM test_missing_target
+GROUP BY lower(test_missing_target.c)
+ORDER BY lower(test_missing_target.c);
count
-------
2
-- group w/ existing GROUP BY target under ambiguous condition
SELECT x.b/2, count(x.b) FROM test_missing_target x, test_missing_target y
WHERE x.a = y.a
- GROUP BY x.b/2;
+ GROUP BY x.b/2 ORDER BY x.b/2;
?column? | count
----------+-------
0 | 1
SELECT count(x.b) INTO TABLE test_missing_target3
FROM test_missing_target x, test_missing_target y
WHERE x.a = y.a
- GROUP BY x.b/2;
+ GROUP BY x.b/2 ORDER BY x.b/2;
SELECT * FROM test_missing_target3;
count
-------
INSERT INTO test_missing_target VALUES (8, 4, 'CCCC', 'I');
INSERT INTO test_missing_target VALUES (9, 4, 'CCCC', 'j');
-- w/ existing GROUP BY target
-SELECT c, count(*) FROM test_missing_target GROUP BY test_missing_target.c;
+SELECT c, count(*) FROM test_missing_target GROUP BY test_missing_target.c ORDER BY c;
c | count
----------+-------
AAAA | 2
(6 rows)
-- w/o existing GROUP BY target using a relation name in GROUP BY clause
-SELECT count(*) FROM test_missing_target GROUP BY test_missing_target.c;
+SELECT count(*) FROM test_missing_target GROUP BY test_missing_target.c ORDER BY c;
count
-------
2
(1 row)
-- order using reference number
-SELECT c, count(*) FROM test_missing_target GROUP BY 1;
+SELECT c, count(*) FROM test_missing_target GROUP BY 1 ORDER BY 1;
c | count
----------+-------
AAAA | 2
-- group expression w/ target under ambiguous condition
-- failure NOT expected
SELECT a/2, a/2 FROM test_missing_target
- GROUP BY a/2;
+ GROUP BY a/2 ORDER BY a/2;
?column? | ?column?
----------+----------
0 | 0
-- group w/ existing GROUP BY target under ambiguous condition
SELECT x.b, count(*) FROM test_missing_target x, test_missing_target y
WHERE x.a = y.a
- GROUP BY x.b;
+ GROUP BY x.b ORDER BY x.b;
b | count
---+-------
1 | 1
-- group w/o existing GROUP BY target under ambiguous condition
SELECT count(*) FROM test_missing_target x, test_missing_target y
WHERE x.a = y.a
- GROUP BY x.b;
+ GROUP BY x.b ORDER BY x.b;
count
-------
1
SELECT count(*) INTO TABLE test_missing_target2
FROM test_missing_target x, test_missing_target y
WHERE x.a = y.a
- GROUP BY x.b;
+ GROUP BY x.b ORDER BY x.b;
SELECT * FROM test_missing_target2;
count
-------
-- Functions and expressions
-- w/ existing GROUP BY target
-SELECT a%2, count(b) FROM test_missing_target GROUP BY test_missing_target.a%2;
+SELECT a%2, count(b) FROM test_missing_target
+GROUP BY test_missing_target.a%2
+ORDER BY test_missing_target.a%2;
?column? | count
----------+-------
0 | 5
(2 rows)
-- w/o existing GROUP BY target using a relation name in GROUP BY clause
-SELECT count(c) FROM test_missing_target GROUP BY lower(test_missing_target.c);
+SELECT count(c) FROM test_missing_target
+GROUP BY lower(test_missing_target.c)
+ORDER BY lower(test_missing_target.c);
count
-------
2
-- group w/ existing GROUP BY target under ambiguous condition
SELECT x.b/2, count(x.b) FROM test_missing_target x, test_missing_target y
WHERE x.a = y.a
- GROUP BY x.b/2;
+ GROUP BY x.b/2 ORDER BY x.b/2;
?column? | count
----------+-------
0 | 1
SELECT count(x.b) INTO TABLE test_missing_target3
FROM test_missing_target x, test_missing_target y
WHERE x.a = y.a
- GROUP BY x.b/2;
+ GROUP BY x.b/2 ORDER BY x.b/2;
SELECT * FROM test_missing_target3;
count
-------
(6 rows)
select q1, float8(count(*)) / (select count(*) from int8_tbl)
-from int8_tbl group by q1;
+from int8_tbl group by q1 order by q1;
q1 | ?column?
------------------+----------
123 | 0.4
SELECT sum(a) FROM a_star*;
-SELECT class, sum(a) FROM a_star* GROUP BY class;
+SELECT class, sum(a) FROM a_star* GROUP BY class ORDER BY class;
ALTER TABLE f_star RENAME COLUMN f TO ff;
355
(1 row)
-SELECT class, sum(a) FROM a_star* GROUP BY class;
+SELECT class, sum(a) FROM a_star* GROUP BY class ORDER BY class;
class | sum
-------+-----
a | 3
SELECT count(DISTINCT four) AS cnt_4 FROM onek;
-select ten, count(*), sum(four) from onek group by ten;
+select ten, count(*), sum(four) from onek
+group by ten order by ten;
-select ten, count(four), sum(DISTINCT four) from onek group by ten;
+select ten, count(four), sum(DISTINCT four) from onek
+group by ten order by ten;
SELECT newavg(four) AS avg_1 FROM onek;
select * from rtest_vview1;
select * from rtest_vview2;
select * from rtest_vview3;
-select * from rtest_vview4;
+select * from rtest_vview4 order by a, b;
select * from rtest_vview5;
insert into rtest_view3 select * from rtest_vview1 where a < 7;
delete from rtest_view4;
insert into rtest_view4 select * from rtest_vview5 where a > 2 and refcount = 0;
-select * from rtest_view4;
+select * from rtest_view4 order by a, b;
delete from rtest_view4;
--
-- Test for computations in views
INSERT INTO test_having VALUES (9, 4, 'CCCC', 'j');
SELECT b, c FROM test_having
- GROUP BY b, c HAVING count(*) = 1;
+ GROUP BY b, c HAVING count(*) = 1 ORDER BY b, c;
-- HAVING is equivalent to WHERE in this case
SELECT b, c FROM test_having
- GROUP BY b, c HAVING b = 3;
+ GROUP BY b, c HAVING b = 3 ORDER BY b, c;
SELECT lower(c), count(c) FROM test_having
- GROUP BY lower(c) HAVING count(*) > 2 OR min(a) = max(a);
+ GROUP BY lower(c) HAVING count(*) > 2 OR min(a) = max(a)
+ ORDER BY lower(c);
SELECT c, max(a) FROM test_having
- GROUP BY c HAVING count(*) > 2 OR min(a) = max(a);
+ GROUP BY c HAVING count(*) > 2 OR min(a) = max(a)
+ ORDER BY c;
DROP TABLE test_having;
-
-- w/ existing GROUP BY target
-SELECT c, count(*) FROM test_missing_target GROUP BY test_missing_target.c;
+SELECT c, count(*) FROM test_missing_target GROUP BY test_missing_target.c ORDER BY c;
-- w/o existing GROUP BY target using a relation name in GROUP BY clause
-SELECT count(*) FROM test_missing_target GROUP BY test_missing_target.c;
+SELECT count(*) FROM test_missing_target GROUP BY test_missing_target.c ORDER BY c;
-- w/o existing GROUP BY target and w/o existing a different ORDER BY target
-- failure expected
SELECT count(*) FROM test_missing_target ORDER BY 1 desc;
-- order using reference number
-SELECT c, count(*) FROM test_missing_target GROUP BY 1;
+SELECT c, count(*) FROM test_missing_target GROUP BY 1 ORDER BY 1;
-- group using reference number out of range
-- failure expected
-- group expression w/ target under ambiguous condition
-- failure NOT expected
SELECT a/2, a/2 FROM test_missing_target
- GROUP BY a/2;
+ GROUP BY a/2 ORDER BY a/2;
-- group w/ existing GROUP BY target under ambiguous condition
SELECT x.b, count(*) FROM test_missing_target x, test_missing_target y
WHERE x.a = y.a
- GROUP BY x.b;
+ GROUP BY x.b ORDER BY x.b;
-- group w/o existing GROUP BY target under ambiguous condition
SELECT count(*) FROM test_missing_target x, test_missing_target y
WHERE x.a = y.a
- GROUP BY x.b;
+ GROUP BY x.b ORDER BY x.b;
-- group w/o existing GROUP BY target under ambiguous condition
-- into a table
SELECT count(*) INTO TABLE test_missing_target2
FROM test_missing_target x, test_missing_target y
WHERE x.a = y.a
- GROUP BY x.b;
+ GROUP BY x.b ORDER BY x.b;
SELECT * FROM test_missing_target2;
-- Functions and expressions
-- w/ existing GROUP BY target
-SELECT a%2, count(b) FROM test_missing_target GROUP BY test_missing_target.a%2;
+SELECT a%2, count(b) FROM test_missing_target
+GROUP BY test_missing_target.a%2
+ORDER BY test_missing_target.a%2;
-- w/o existing GROUP BY target using a relation name in GROUP BY clause
-SELECT count(c) FROM test_missing_target GROUP BY lower(test_missing_target.c);
+SELECT count(c) FROM test_missing_target
+GROUP BY lower(test_missing_target.c)
+ORDER BY lower(test_missing_target.c);
-- w/o existing GROUP BY target and w/o existing a different ORDER BY target
-- failure expected
-- group w/ existing GROUP BY target under ambiguous condition
SELECT x.b/2, count(x.b) FROM test_missing_target x, test_missing_target y
WHERE x.a = y.a
- GROUP BY x.b/2;
+ GROUP BY x.b/2 ORDER BY x.b/2;
-- group w/o existing GROUP BY target under ambiguous condition
-- failure expected due to ambiguous b in count(b)
SELECT count(x.b) INTO TABLE test_missing_target3
FROM test_missing_target x, test_missing_target y
WHERE x.a = y.a
- GROUP BY x.b/2;
+ GROUP BY x.b/2 ORDER BY x.b/2;
SELECT * FROM test_missing_target3;
-- Cleanup
WHERE f1 NOT IN (SELECT f1+1 FROM INT4_TBL WHERE f1 != ss.f1);
select q1, float8(count(*)) / (select count(*) from int8_tbl)
-from int8_tbl group by q1;
+from int8_tbl group by q1 order by q1;