From 6c1d4662afc6344ea7d98b5d1b248214ea0c7635 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 21 Nov 2002 00:42:20 +0000 Subject: [PATCH] Finish implementation of hashed aggregation. Add enable_hashagg GUC parameter to allow it to be forced off for comparison purposes. Add ORDER BY clauses to a bunch of regression test queries that will otherwise produce randomly-ordered output in the new regime. --- doc/src/sgml/runtime.sgml | 13 +- src/backend/optimizer/path/costsize.c | 106 +++++++++- src/backend/optimizer/plan/createplan.c | 64 +++--- src/backend/optimizer/plan/planmain.c | 87 ++++++-- src/backend/optimizer/plan/planner.c | 192 ++++++++++++------ src/backend/utils/misc/guc.c | 6 +- src/backend/utils/misc/postgresql.conf.sample | 1 + src/bin/psql/tab-complete.c | 3 +- src/include/optimizer/cost.h | 16 +- src/include/optimizer/planmain.h | 12 +- src/test/regress/expected/aggregates.out | 6 +- src/test/regress/expected/rangefuncs.out | 3 +- src/test/regress/expected/rules.out | 4 +- src/test/regress/expected/select_having.out | 10 +- src/test/regress/expected/select_having_1.out | 17 +- src/test/regress/expected/select_implicit.out | 26 ++- .../regress/expected/select_implicit_1.out | 26 ++- src/test/regress/expected/subselect.out | 2 +- src/test/regress/input/misc.source | 2 +- src/test/regress/output/misc.source | 2 +- src/test/regress/sql/aggregates.sql | 6 +- src/test/regress/sql/rules.sql | 4 +- src/test/regress/sql/select_having.sql | 11 +- src/test/regress/sql/select_implicit.sql | 26 ++- src/test/regress/sql/subselect.sql | 2 +- 25 files changed, 457 insertions(+), 190 deletions(-) diff --git a/doc/src/sgml/runtime.sgml b/doc/src/sgml/runtime.sgml index 5083965b51..70b29118a2 100644 --- a/doc/src/sgml/runtime.sgml +++ b/doc/src/sgml/runtime.sgml @@ -1,5 +1,5 @@ @@ -670,6 +670,17 @@ env PGOPTIONS='-c geqo=off' psql + + ENABLE_HASHAGG (boolean) + + + Enables or disables the query planner's use of hashed aggregation + plan types. The default is on. This is used for debugging the query + planner. + + + + ENABLE_HASHJOIN (boolean) diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 9f987a4395..6cf8b2af4b 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -42,7 +42,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.90 2002/09/04 20:31:20 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.91 2002/11/21 00:42:19 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -79,6 +79,7 @@ bool enable_seqscan = true; bool enable_indexscan = true; bool enable_tidscan = true; bool enable_sort = true; +bool enable_hashagg = true; bool enable_nestloop = true; bool enable_mergejoin = true; bool enable_hashjoin = true; @@ -423,10 +424,8 @@ cost_functionscan(Path *path, Query *root, RelOptInfo *baserel) /* * cost_sort - * Determines and returns the cost of sorting a relation. - * - * The cost of supplying the input data is NOT included; the caller should - * add that cost to both startup and total costs returned from this routine! + * Determines and returns the cost of sorting a relation, including + * the cost of reading the input data. * * If the total volume of data to sort is less than SortMem, we will do * an in-memory sort, which requires no I/O and about t*log2(t) tuple @@ -449,6 +448,7 @@ cost_functionscan(Path *path, Query *root, RelOptInfo *baserel) * the right ballpark in most cases. * * 'pathkeys' is a list of sort keys + * 'input_cost' is the total cost for reading the input data * 'tuples' is the number of tuples in the relation * 'width' is the average tuple width in bytes * @@ -456,12 +456,14 @@ cost_functionscan(Path *path, Query *root, RelOptInfo *baserel) * can't conveniently supply the sort keys. Since this routine doesn't * currently do anything with pathkeys anyway, that doesn't matter... * but if it ever does, it should react gracefully to lack of key data. + * (Actually, the thing we'd most likely be interested in is just the number + * of sort keys, which all callers *could* supply.) */ void cost_sort(Path *path, Query *root, - List *pathkeys, double tuples, int width) + List *pathkeys, Cost input_cost, double tuples, int width) { - Cost startup_cost = 0; + Cost startup_cost = input_cost; Cost run_cost = 0; double nbytes = relation_byte_size(tuples, width); long sortmembytes = SortMem * 1024L; @@ -511,6 +513,92 @@ cost_sort(Path *path, Query *root, path->total_cost = startup_cost + run_cost; } +/* + * cost_agg + * Determines and returns the cost of performing an Agg plan node, + * including the cost of its input. + * + * Note: when aggstrategy == AGG_SORTED, caller must ensure that input costs + * are for appropriately-sorted input. + */ +void +cost_agg(Path *path, Query *root, + AggStrategy aggstrategy, int numAggs, + int numGroupCols, double numGroups, + Cost input_startup_cost, Cost input_total_cost, + double input_tuples) +{ + Cost startup_cost; + Cost total_cost; + + /* + * We charge one cpu_operator_cost per aggregate function per input + * tuple, and another one per output tuple (corresponding to transfn + * and finalfn calls respectively). If we are grouping, we charge an + * additional cpu_operator_cost per grouping column per input tuple + * for grouping comparisons. + * + * We will produce a single output tuple if not grouping, + * and a tuple per group otherwise. + */ + if (aggstrategy == AGG_PLAIN) + { + startup_cost = input_total_cost; + startup_cost += cpu_operator_cost * (input_tuples + 1) * numAggs; + /* we aren't grouping */ + total_cost = startup_cost; + } + else if (aggstrategy == AGG_SORTED) + { + /* Here we are able to deliver output on-the-fly */ + startup_cost = input_startup_cost; + total_cost = input_total_cost; + total_cost += cpu_operator_cost * (input_tuples + numGroups) * numAggs; + total_cost += cpu_operator_cost * input_tuples * numGroupCols; + } + else + { + /* must be AGG_HASHED */ + startup_cost = input_total_cost; + startup_cost += cpu_operator_cost * input_tuples * numAggs; + startup_cost += cpu_operator_cost * input_tuples * numGroupCols; + total_cost = startup_cost; + total_cost += cpu_operator_cost * numGroups * numAggs; + } + + path->startup_cost = startup_cost; + path->total_cost = total_cost; +} + +/* + * cost_group + * Determines and returns the cost of performing a Group plan node, + * including the cost of its input. + * + * Note: caller must ensure that input costs are for appropriately-sorted + * input. + */ +void +cost_group(Path *path, Query *root, + int numGroupCols, double numGroups, + Cost input_startup_cost, Cost input_total_cost, + double input_tuples) +{ + Cost startup_cost; + Cost total_cost; + + startup_cost = input_startup_cost; + total_cost = input_total_cost; + + /* + * Charge one cpu_operator_cost per comparison per input tuple. We + * assume all columns get compared at most of the tuples. + */ + total_cost += cpu_operator_cost * input_tuples * numGroupCols; + + path->startup_cost = startup_cost; + path->total_cost = total_cost; +} /* * cost_nestloop @@ -658,10 +746,10 @@ cost_mergejoin(Path *path, Query *root, */ if (outersortkeys) /* do we need to sort outer? */ { - startup_cost += outer_path->total_cost; cost_sort(&sort_path, root, outersortkeys, + outer_path->total_cost, outer_path->parent->rows, outer_path->parent->width); startup_cost += sort_path.startup_cost; @@ -677,10 +765,10 @@ cost_mergejoin(Path *path, Query *root, if (innersortkeys) /* do we need to sort inner? */ { - startup_cost += inner_path->total_cost; cost_sort(&sort_path, root, innersortkeys, + inner_path->total_cost, inner_path->parent->rows, inner_path->parent->width); startup_cost += sort_path.startup_cost; diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 74e6f237b3..b393252542 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -10,7 +10,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.123 2002/11/19 23:21:58 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.124 2002/11/21 00:42:19 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1573,9 +1573,11 @@ make_sort(Query *root, List *tlist, Plan *lefttree, int keycount) copy_plan_costsize(plan, lefttree); /* only care about copying size */ cost_sort(&sort_path, root, NIL, - lefttree->plan_rows, lefttree->plan_width); - plan->startup_cost = sort_path.startup_cost + lefttree->total_cost; - plan->total_cost = sort_path.total_cost + lefttree->total_cost; + lefttree->total_cost, + lefttree->plan_rows, + lefttree->plan_width); + plan->startup_cost = sort_path.startup_cost; + plan->total_cost = sort_path.total_cost; plan->state = (EState *) NULL; plan->targetlist = tlist; plan->qual = NIL; @@ -1683,39 +1685,39 @@ make_material(List *tlist, Plan *lefttree) } Agg * -make_agg(List *tlist, List *qual, AggStrategy aggstrategy, - int ngrp, AttrNumber *grpColIdx, long numGroups, int numAggs, +make_agg(Query *root, List *tlist, List *qual, + AggStrategy aggstrategy, + int numGroupCols, AttrNumber *grpColIdx, + long numGroups, int numAggs, Plan *lefttree) { Agg *node = makeNode(Agg); Plan *plan = &node->plan; + Path agg_path; /* dummy for result of cost_agg */ node->aggstrategy = aggstrategy; - node->numCols = ngrp; + node->numCols = numGroupCols; node->grpColIdx = grpColIdx; node->numGroups = numGroups; - copy_plan_costsize(plan, lefttree); - - /* - * Charge one cpu_operator_cost per aggregate function per input - * tuple. - */ - plan->total_cost += cpu_operator_cost * plan->plan_rows * numAggs; + copy_plan_costsize(plan, lefttree); /* only care about copying size */ + cost_agg(&agg_path, root, + aggstrategy, numAggs, + numGroupCols, numGroups, + lefttree->startup_cost, + lefttree->total_cost, + lefttree->plan_rows); + plan->startup_cost = agg_path.startup_cost; + plan->total_cost = agg_path.total_cost; /* * We will produce a single output tuple if not grouping, * and a tuple per group otherwise. */ if (aggstrategy == AGG_PLAIN) - { plan->plan_rows = 1; - plan->startup_cost = plan->total_cost; - } else - { plan->plan_rows = numGroups; - } plan->state = (EState *) NULL; plan->qual = qual; @@ -1727,22 +1729,28 @@ make_agg(List *tlist, List *qual, AggStrategy aggstrategy, } Group * -make_group(List *tlist, - int ngrp, +make_group(Query *root, + List *tlist, + int numGroupCols, AttrNumber *grpColIdx, double numGroups, Plan *lefttree) { Group *node = makeNode(Group); Plan *plan = &node->plan; + Path group_path; /* dummy for result of cost_group */ - copy_plan_costsize(plan, lefttree); + node->numCols = numGroupCols; + node->grpColIdx = grpColIdx; - /* - * Charge one cpu_operator_cost per comparison per input tuple. We - * assume all columns get compared at most of the tuples. - */ - plan->total_cost += cpu_operator_cost * plan->plan_rows * ngrp; + copy_plan_costsize(plan, lefttree); /* only care about copying size */ + cost_group(&group_path, root, + numGroupCols, numGroups, + lefttree->startup_cost, + lefttree->total_cost, + lefttree->plan_rows); + plan->startup_cost = group_path.startup_cost; + plan->total_cost = group_path.total_cost; /* One output tuple per estimated result group */ plan->plan_rows = numGroups; @@ -1752,8 +1760,6 @@ make_group(List *tlist, plan->targetlist = tlist; plan->lefttree = lefttree; plan->righttree = (Plan *) NULL; - node->numCols = ngrp; - node->grpColIdx = grpColIdx; return node; } diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c index 4354a5eb03..5d2634bf82 100644 --- a/src/backend/optimizer/plan/planmain.c +++ b/src/backend/optimizer/plan/planmain.c @@ -14,19 +14,17 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planmain.c,v 1.71 2002/11/06 00:00:44 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planmain.c,v 1.72 2002/11/21 00:42:19 tgl Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" #include "optimizer/clauses.h" +#include "optimizer/cost.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "optimizer/planmain.h" -#include "optimizer/tlist.h" -#include "parser/parsetree.h" -#include "utils/memutils.h" /*-------------------- @@ -36,11 +34,12 @@ * * Since query_planner does not handle the toplevel processing (grouping, * sorting, etc) it cannot select the best path by itself. It selects - * two paths: the cheapest path that produces the required tuples, independent - * of any ordering considerations, and the cheapest path that produces the - * required tuples in the required ordering, if there is a path that - * can produce them without an explicit top-level sort step. The caller - * (grouping_planner) will make the final decision about which to use. + * two paths: the cheapest path that produces all the required tuples, + * independent of any ordering considerations, and the cheapest path that + * produces the expected fraction of the required tuples in the required + * ordering, if there is a path that is cheaper for this than just sorting + * the output of the cheapest overall path. The caller (grouping_planner) + * will make the final decision about which to use. * * Input parameters: * root is the query to plan @@ -50,7 +49,7 @@ * Output parameters: * *cheapest_path receives the overall-cheapest path for the query * *sorted_path receives the cheapest presorted path for the query, - * if any (it may be NULL, or the same as cheapest_path) + * if any (NULL if there is no useful presorted path) * * Note: the Query node also includes a query_pathkeys field, which is both * an input and an output of query_planner(). The input value signals @@ -78,6 +77,8 @@ query_planner(Query *root, List *tlist, double tuple_fraction, { List *constant_quals; RelOptInfo *final_rel; + Path *cheapestpath; + Path *sortedpath; /* * If the query has an empty join tree, then it's something easy like @@ -166,34 +167,76 @@ query_planner(Query *root, List *tlist, double tuple_fraction, /* * Pick out the cheapest-total path and the cheapest presorted path - * for the requested pathkeys (if there is one). We can take the + * for the requested pathkeys (if there is one). We should take the * tuple fraction into account when selecting the cheapest presorted * path, but not when selecting the cheapest-total path, since if we * have to sort then we'll have to fetch all the tuples. (But there's * a special case: if query_pathkeys is NIL, meaning order doesn't * matter, then the "cheapest presorted" path will be the cheapest * overall for the tuple fraction.) + * + * The cheapest-total path is also the one to use if grouping_planner + * decides to use hashed aggregation, so we return it separately even + * if this routine thinks the presorted path is the winner. */ - *cheapest_path = final_rel->cheapest_total_path; + cheapestpath = final_rel->cheapest_total_path; - *sorted_path = + sortedpath = get_cheapest_fractional_path_for_pathkeys(final_rel->pathlist, root->query_pathkeys, tuple_fraction); + /* Don't return same path in both guises; just wastes effort */ + if (sortedpath == cheapestpath) + sortedpath = NULL; + + /* + * Forget about the presorted path if it would be cheaper to sort the + * cheapest-total path. Here we need consider only the behavior at + * the tuple fraction point. + */ + if (sortedpath) + { + Path sort_path; /* dummy for result of cost_sort */ + + if (root->query_pathkeys == NIL || + pathkeys_contained_in(root->query_pathkeys, + cheapestpath->pathkeys)) + { + /* No sort needed for cheapest path */ + sort_path.startup_cost = cheapestpath->startup_cost; + sort_path.total_cost = cheapestpath->total_cost; + } + else + { + /* Figure cost for sorting */ + cost_sort(&sort_path, root, root->query_pathkeys, + cheapestpath->total_cost, + final_rel->rows, final_rel->width); + } + + if (compare_fractional_path_costs(sortedpath, &sort_path, + tuple_fraction) > 0) + { + /* Presorted path is a loser */ + sortedpath = NULL; + } + } + /* * If we have constant quals, add a toplevel Result step to process them. */ if (constant_quals) { - *cheapest_path = (Path *) - create_result_path((*cheapest_path)->parent, - *cheapest_path, - constant_quals); - if (*sorted_path) - *sorted_path = (Path *) - create_result_path((*sorted_path)->parent, - *sorted_path, - constant_quals); + cheapestpath = (Path *) create_result_path(final_rel, + cheapestpath, + constant_quals); + if (sortedpath) + sortedpath = (Path *) create_result_path(final_rel, + sortedpath, + constant_quals); } + + *cheapest_path = cheapestpath; + *sorted_path = sortedpath; } diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index baccf2ffbd..9a5141ea6b 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.129 2002/11/19 23:21:59 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.130 2002/11/21 00:42:19 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -933,11 +933,13 @@ grouping_planner(Query *parse, double tuple_fraction) List *sub_tlist; List *group_pathkeys; AttrNumber *groupColIdx = NULL; + double sub_tuple_fraction; Path *cheapest_path; Path *sorted_path; double dNumGroups = 0; long numGroups = 0; int numAggs = 0; + int numGroupCols = length(parse->groupClause); bool use_hashed_grouping = false; /* Preprocess targetlist in case we are inside an INSERT/UPDATE. */ @@ -1169,6 +1171,12 @@ grouping_planner(Query *parse, double tuple_fraction) } } + /* + * With grouping or aggregation, the tuple fraction to pass to + * query_planner() may be different from what it is at top level. + */ + sub_tuple_fraction = tuple_fraction; + if (parse->groupClause) { /* @@ -1182,8 +1190,8 @@ grouping_planner(Query *parse, double tuple_fraction) * amounts to assuming that all the groups are about the same * size). */ - if (tuple_fraction >= 1.0) - tuple_fraction = 0.25; + if (sub_tuple_fraction >= 1.0) + sub_tuple_fraction = 0.25; /* * If both GROUP BY and ORDER BY are specified, we will need @@ -1195,7 +1203,7 @@ grouping_planner(Query *parse, double tuple_fraction) if (parse->groupClause && parse->sortClause && !noncanonical_pathkeys_contained_in(sort_pathkeys, group_pathkeys)) - tuple_fraction = 0.0; + sub_tuple_fraction = 0.0; } else if (parse->hasAggs) { @@ -1203,7 +1211,7 @@ grouping_planner(Query *parse, double tuple_fraction) * Ungrouped aggregate will certainly want all the input * tuples. */ - tuple_fraction = 0.0; + sub_tuple_fraction = 0.0; } else if (parse->distinctClause) { @@ -1212,15 +1220,15 @@ grouping_planner(Query *parse, double tuple_fraction) * number of input tuples per output tuple. Handle the same * way. */ - if (tuple_fraction >= 1.0) - tuple_fraction = 0.25; + if (sub_tuple_fraction >= 1.0) + sub_tuple_fraction = 0.25; } /* * Generate the best unsorted and presorted paths for this Query * (but note there may not be any presorted path). */ - query_planner(parse, sub_tlist, tuple_fraction, + query_planner(parse, sub_tlist, sub_tuple_fraction, &cheapest_path, &sorted_path); /* @@ -1236,11 +1244,13 @@ grouping_planner(Query *parse, double tuple_fraction) if (parse->groupClause) { /* - * Always estimate the number of groups. + * Always estimate the number of groups. We can't do this until + * after running query_planner(), either. */ dNumGroups = estimate_num_groups(parse, parse->groupClause, cheapest_path->parent->rows); + /* Also want it as a long int --- but 'ware overflow! */ numGroups = (long) Min(dNumGroups, (double) LONG_MAX); /* @@ -1248,9 +1258,11 @@ grouping_planner(Query *parse, double tuple_fraction) * aggregates. (Doing so would imply storing *all* the input * values in the hash table, which seems like a certain loser.) */ - if (parse->hasAggs && - (contain_distinct_agg_clause((Node *) tlist) || - contain_distinct_agg_clause(parse->havingQual))) + if (!enable_hashagg) + use_hashed_grouping = false; + else if (parse->hasAggs && + (contain_distinct_agg_clause((Node *) tlist) || + contain_distinct_agg_clause(parse->havingQual))) use_hashed_grouping = false; else { @@ -1272,11 +1284,96 @@ grouping_planner(Query *parse, double tuple_fraction) if (hashentrysize * dNumGroups <= SortMem * 1024L) { - /* much more to do here */ -#if 0 - /* TEMPORARY HOTWIRE FOR TESTING */ - use_hashed_grouping = true; -#endif + /* + * Okay, do the cost comparison. We need to consider + * cheapest_path + hashagg [+ final sort] + * versus either + * cheapest_path [+ sort] + group or agg [+ final sort] + * or + * presorted_path + group or agg [+ final sort] + * where brackets indicate a step that may not be needed. + * We assume query_planner() will have returned a + * presorted path only if it's a winner compared to + * cheapest_path for this purpose. + * + * These path variables are dummies that just hold cost + * fields; we don't make actual Paths for these steps. + */ + Path hashed_p; + Path sorted_p; + + cost_agg(&hashed_p, parse, + AGG_HASHED, numAggs, + numGroupCols, dNumGroups, + cheapest_path->startup_cost, + cheapest_path->total_cost, + cheapest_path->parent->rows); + /* Result of hashed agg is always unsorted */ + if (sort_pathkeys) + cost_sort(&hashed_p, parse, sort_pathkeys, + hashed_p.total_cost, + dNumGroups, + cheapest_path->parent->width); + + if (sorted_path) + { + sorted_p.startup_cost = sorted_path->startup_cost; + sorted_p.total_cost = sorted_path->total_cost; + current_pathkeys = sorted_path->pathkeys; + } + else + { + sorted_p.startup_cost = cheapest_path->startup_cost; + sorted_p.total_cost = cheapest_path->total_cost; + current_pathkeys = cheapest_path->pathkeys; + } + if (!pathkeys_contained_in(group_pathkeys, + current_pathkeys)) + { + cost_sort(&sorted_p, parse, group_pathkeys, + sorted_p.total_cost, + cheapest_path->parent->rows, + cheapest_path->parent->width); + current_pathkeys = group_pathkeys; + } + if (parse->hasAggs) + cost_agg(&sorted_p, parse, + AGG_SORTED, numAggs, + numGroupCols, dNumGroups, + sorted_p.startup_cost, + sorted_p.total_cost, + cheapest_path->parent->rows); + else + cost_group(&sorted_p, parse, + numGroupCols, dNumGroups, + sorted_p.startup_cost, + sorted_p.total_cost, + cheapest_path->parent->rows); + /* The Agg or Group node will preserve ordering */ + if (sort_pathkeys && + !pathkeys_contained_in(sort_pathkeys, + current_pathkeys)) + { + cost_sort(&sorted_p, parse, sort_pathkeys, + sorted_p.total_cost, + dNumGroups, + cheapest_path->parent->width); + } + + /* + * Now make the decision using the top-level tuple + * fraction. First we have to convert an absolute + * count (LIMIT) into fractional form. + */ + if (tuple_fraction >= 1.0) + tuple_fraction /= dNumGroups; + + if (compare_fractional_path_costs(&hashed_p, &sorted_p, + tuple_fraction) <= 0) + { + /* Hashed is cheaper, so use it */ + use_hashed_grouping = true; + } } } } @@ -1284,50 +1381,17 @@ grouping_planner(Query *parse, double tuple_fraction) /* * Select the best path and create a plan to execute it. * - * If no special sort order is wanted, or if the cheapest path is - * already appropriately ordered, use the cheapest path. - * Otherwise, look to see if we have an already-ordered path that is - * cheaper than doing an explicit sort on the cheapest-total-cost - * path. + * If we are doing hashed grouping, we will always read all the + * input tuples, so use the cheapest-total path. Otherwise, + * trust query_planner's decision about which to use. */ - if (parse->query_pathkeys == NIL || - pathkeys_contained_in(parse->query_pathkeys, - cheapest_path->pathkeys)) + if (sorted_path && !use_hashed_grouping) { - result_plan = create_plan(parse, cheapest_path); - current_pathkeys = cheapest_path->pathkeys; - } - else if (sorted_path) - { - Path sort_path; /* dummy for result of cost_sort */ - - cost_sort(&sort_path, parse, parse->query_pathkeys, - sorted_path->parent->rows, sorted_path->parent->width); - sort_path.startup_cost += cheapest_path->total_cost; - sort_path.total_cost += cheapest_path->total_cost; - /* Convert absolute-count tuple_fraction into a fraction */ - if (tuple_fraction >= 1.0) - tuple_fraction /= sorted_path->parent->rows; - if (compare_fractional_path_costs(sorted_path, &sort_path, - tuple_fraction) <= 0) - { - /* Presorted path is cheaper, use it */ - result_plan = create_plan(parse, sorted_path); - current_pathkeys = sorted_path->pathkeys; - } - else - { - /* otherwise, doing it the hard way is still cheaper */ - result_plan = create_plan(parse, cheapest_path); - current_pathkeys = cheapest_path->pathkeys; - } + result_plan = create_plan(parse, sorted_path); + current_pathkeys = sorted_path->pathkeys; } else { - /* - * No sorted path, so we must use the cheapest-total path. - * The actual sort step will be generated below. - */ result_plan = create_plan(parse, cheapest_path); current_pathkeys = cheapest_path->pathkeys; } @@ -1362,10 +1426,11 @@ grouping_planner(Query *parse, double tuple_fraction) if (use_hashed_grouping) { /* Hashed aggregate plan --- no sort needed */ - result_plan = (Plan *) make_agg(tlist, + result_plan = (Plan *) make_agg(parse, + tlist, (List *) parse->havingQual, AGG_HASHED, - length(parse->groupClause), + numGroupCols, groupColIdx, numGroups, numAggs, @@ -1401,10 +1466,11 @@ grouping_planner(Query *parse, double tuple_fraction) current_pathkeys = NIL; } - result_plan = (Plan *) make_agg(tlist, + result_plan = (Plan *) make_agg(parse, + tlist, (List *) parse->havingQual, aggstrategy, - length(parse->groupClause), + numGroupCols, groupColIdx, numGroups, numAggs, @@ -1436,11 +1502,13 @@ grouping_planner(Query *parse, double tuple_fraction) current_pathkeys = group_pathkeys; } - result_plan = (Plan *) make_group(tlist, - length(parse->groupClause), + result_plan = (Plan *) make_group(parse, + tlist, + numGroupCols, groupColIdx, dNumGroups, result_plan); + /* The Group node won't change sort ordering */ } } } /* end of if (setOperations) */ diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 62417bfe11..c26c4c7407 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -5,7 +5,7 @@ * command, configuration file, and command line options. * See src/backend/utils/misc/README for more information. * - * $Header: /cvsroot/pgsql/src/backend/utils/misc/guc.c,v 1.106 2002/11/15 02:44:57 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/misc/guc.c,v 1.107 2002/11/21 00:42:19 tgl Exp $ * * Copyright 2000 by PostgreSQL Global Development Group * Written by Peter Eisentraut . @@ -323,6 +323,10 @@ static struct config_bool {"enable_sort", PGC_USERSET}, &enable_sort, true, NULL, NULL }, + { + {"enable_hashagg", PGC_USERSET}, &enable_hashagg, + true, NULL, NULL + }, { {"enable_nestloop", PGC_USERSET}, &enable_nestloop, true, NULL, NULL diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 5fbfafd214..1c4595a66b 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -83,6 +83,7 @@ #enable_indexscan = true #enable_tidscan = true #enable_sort = true +#enable_hashagg = true #enable_nestloop = true #enable_mergejoin = true #enable_hashjoin = true diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c index 6eec1c92ed..ff0ae41021 100644 --- a/src/bin/psql/tab-complete.c +++ b/src/bin/psql/tab-complete.c @@ -3,7 +3,7 @@ * * Copyright 2000-2002 by PostgreSQL Global Development Group * - * $Header: /cvsroot/pgsql/src/bin/psql/tab-complete.c,v 1.68 2002/11/15 03:07:52 momjian Exp $ + * $Header: /cvsroot/pgsql/src/bin/psql/tab-complete.c,v 1.69 2002/11/21 00:42:19 tgl Exp $ */ /*---------------------------------------------------------------------- @@ -251,6 +251,7 @@ psql_completion(char *text, int start, int end) "enable_nestloop", "enable_seqscan", "enable_sort", + "enable_hashagg", "enable_tidscan", "explain_pretty_print", "extra_float_digits", diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h index b3076d7bb4..fd9a3c1def 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -7,15 +7,17 @@ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: cost.h,v 1.47 2002/09/04 20:31:44 momjian Exp $ + * $Id: cost.h,v 1.48 2002/11/21 00:42:19 tgl Exp $ * *------------------------------------------------------------------------- */ #ifndef COST_H #define COST_H +#include "nodes/plannodes.h" #include "nodes/relation.h" + /* defaults for costsize.c's Cost parameters */ /* NB: cost-estimation code should use the variables, not these constants! */ /* If you change these, update backend/utils/misc/postgresql.sample.conf */ @@ -42,6 +44,7 @@ extern bool enable_seqscan; extern bool enable_indexscan; extern bool enable_tidscan; extern bool enable_sort; +extern bool enable_hashagg; extern bool enable_nestloop; extern bool enable_mergejoin; extern bool enable_hashjoin; @@ -56,7 +59,16 @@ extern void cost_tidscan(Path *path, Query *root, extern void cost_functionscan(Path *path, Query *root, RelOptInfo *baserel); extern void cost_sort(Path *path, Query *root, - List *pathkeys, double tuples, int width); + List *pathkeys, Cost input_cost, double tuples, int width); +extern void cost_agg(Path *path, Query *root, + AggStrategy aggstrategy, int numAggs, + int numGroupCols, double numGroups, + Cost input_startup_cost, Cost input_total_cost, + double input_tuples); +extern void cost_group(Path *path, Query *root, + int numGroupCols, double numGroups, + Cost input_startup_cost, Cost input_total_cost, + double input_tuples); extern void cost_nestloop(Path *path, Query *root, Path *outer_path, Path *inner_path, List *restrictlist); diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h index bd4bcddd30..ecd7bca604 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: planmain.h,v 1.62 2002/11/19 23:22:00 tgl Exp $ + * $Id: planmain.h,v 1.63 2002/11/21 00:42:19 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -34,12 +34,14 @@ extern Sort *make_sort(Query *root, List *tlist, Plan *lefttree, int keycount); extern Sort *make_sort_from_pathkeys(Query *root, List *tlist, Plan *lefttree, List *pathkeys); -extern Agg *make_agg(List *tlist, List *qual, AggStrategy aggstrategy, - int ngrp, AttrNumber *grpColIdx, +extern Agg *make_agg(Query *root, List *tlist, List *qual, + AggStrategy aggstrategy, + int numGroupCols, AttrNumber *grpColIdx, long numGroups, int numAggs, Plan *lefttree); -extern Group *make_group(List *tlist, - int ngrp, AttrNumber *grpColIdx, double numGroups, +extern Group *make_group(Query *root, List *tlist, + int numGroupCols, AttrNumber *grpColIdx, + double numGroups, Plan *lefttree); extern Material *make_material(List *tlist, Plan *lefttree); extern Unique *make_unique(List *tlist, Plan *lefttree, List *distinctList); diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out index 7519f6e539..5a62875519 100644 --- a/src/test/regress/expected/aggregates.out +++ b/src/test/regress/expected/aggregates.out @@ -87,7 +87,8 @@ SELECT count(DISTINCT four) AS cnt_4 FROM onek; 4 (1 row) -select ten, count(*), sum(four) from onek group by ten; +select ten, count(*), sum(four) from onek +group by ten order by ten; ten | count | sum -----+-------+----- 0 | 100 | 100 @@ -102,7 +103,8 @@ select ten, count(*), sum(four) from onek group by ten; 9 | 100 | 200 (10 rows) -select ten, count(four), sum(DISTINCT four) from onek group by ten; +select ten, count(four), sum(DISTINCT four) from onek +group by ten order by ten; ten | count | sum -----+-------+----- 0 | 100 | 2 diff --git a/src/test/regress/expected/rangefuncs.out b/src/test/regress/expected/rangefuncs.out index 844cccc639..e24c7fe05c 100644 --- a/src/test/regress/expected/rangefuncs.out +++ b/src/test/regress/expected/rangefuncs.out @@ -1,6 +1,7 @@ SELECT * FROM pg_settings WHERE name LIKE 'enable%'; name | setting ------------------+--------- + enable_hashagg | on enable_hashjoin | on enable_indexscan | on enable_mergejoin | on @@ -8,7 +9,7 @@ SELECT * FROM pg_settings WHERE name LIKE 'enable%'; enable_seqscan | on enable_sort | on enable_tidscan | on -(7 rows) +(8 rows) CREATE TABLE foo2(fooid int, f2 int); INSERT INTO foo2 VALUES(1, 11); diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index c0d84fd1bc..aecdf85d73 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -804,7 +804,7 @@ select * from rtest_vview3; 7 | item 7 (3 rows) -select * from rtest_vview4; +select * from rtest_vview4 order by a, b; a | b | refcount ---+--------+---------- 2 | item 2 | 2 @@ -868,7 +868,7 @@ select * from rtest_view4; delete from rtest_view4; insert into rtest_view4 select * from rtest_vview5 where a > 2 and refcount = 0; -select * from rtest_view4; +select * from rtest_view4 order by a, b; a | b | c ---+--------+--- 3 | item 3 | 0 diff --git a/src/test/regress/expected/select_having.out b/src/test/regress/expected/select_having.out index 29321e4414..4447a9df4d 100644 --- a/src/test/regress/expected/select_having.out +++ b/src/test/regress/expected/select_having.out @@ -14,7 +14,7 @@ INSERT INTO test_having VALUES (7, 4, 'cccc', 'h'); INSERT INTO test_having VALUES (8, 4, 'CCCC', 'I'); INSERT INTO test_having VALUES (9, 4, 'CCCC', 'j'); SELECT b, c FROM test_having - GROUP BY b, c HAVING count(*) = 1; + GROUP BY b, c HAVING count(*) = 1 ORDER BY b, c; b | c ---+---------- 1 | XXXX @@ -23,7 +23,7 @@ SELECT b, c FROM test_having -- HAVING is equivalent to WHERE in this case SELECT b, c FROM test_having - GROUP BY b, c HAVING b = 3; + GROUP BY b, c HAVING b = 3 ORDER BY b, c; b | c ---+---------- 3 | BBBB @@ -31,7 +31,8 @@ SELECT b, c FROM test_having (2 rows) SELECT lower(c), count(c) FROM test_having - GROUP BY lower(c) HAVING count(*) > 2 OR min(a) = max(a); + GROUP BY lower(c) HAVING count(*) > 2 OR min(a) = max(a) + ORDER BY lower(c); lower | count ----------+------- bbbb | 3 @@ -40,7 +41,8 @@ SELECT lower(c), count(c) FROM test_having (3 rows) SELECT c, max(a) FROM test_having - GROUP BY c HAVING count(*) > 2 OR min(a) = max(a); + GROUP BY c HAVING count(*) > 2 OR min(a) = max(a) + ORDER BY c; c | max ----------+----- XXXX | 0 diff --git a/src/test/regress/expected/select_having_1.out b/src/test/regress/expected/select_having_1.out index b53f4a7f11..ffbb591b12 100644 --- a/src/test/regress/expected/select_having_1.out +++ b/src/test/regress/expected/select_having_1.out @@ -14,15 +14,25 @@ INSERT INTO test_having VALUES (7, 4, 'cccc', 'h'); INSERT INTO test_having VALUES (8, 4, 'CCCC', 'I'); INSERT INTO test_having VALUES (9, 4, 'CCCC', 'j'); SELECT b, c FROM test_having - GROUP BY b, c HAVING count(*) = 1; + GROUP BY b, c HAVING count(*) = 1 ORDER BY b, c; b | c ---+---------- 1 | XXXX 3 | bbbb (2 rows) +-- HAVING is equivalent to WHERE in this case +SELECT b, c FROM test_having + GROUP BY b, c HAVING b = 3 ORDER BY b, c; + b | c +---+---------- + 3 | BBBB + 3 | bbbb +(2 rows) + SELECT lower(c), count(c) FROM test_having - GROUP BY lower(c) HAVING count(*) > 2 OR min(a) = max(a); + GROUP BY lower(c) HAVING count(*) > 2 OR min(a) = max(a) + ORDER BY lower(c); lower | count ----------+------- bbbb | 3 @@ -31,7 +41,8 @@ SELECT lower(c), count(c) FROM test_having (3 rows) SELECT c, max(a) FROM test_having - GROUP BY c HAVING count(*) > 2 OR min(a) = max(a); + GROUP BY c HAVING count(*) > 2 OR min(a) = max(a) + ORDER BY c; c | max ----------+----- bbbb | 5 diff --git a/src/test/regress/expected/select_implicit.out b/src/test/regress/expected/select_implicit.out index e3d74e5dab..47d2e80289 100644 --- a/src/test/regress/expected/select_implicit.out +++ b/src/test/regress/expected/select_implicit.out @@ -18,7 +18,7 @@ INSERT INTO test_missing_target VALUES (7, 4, 'cccc', 'h'); INSERT INTO test_missing_target VALUES (8, 4, 'CCCC', 'I'); INSERT INTO test_missing_target VALUES (9, 4, 'CCCC', 'j'); -- w/ existing GROUP BY target -SELECT c, count(*) FROM test_missing_target GROUP BY test_missing_target.c; +SELECT c, count(*) FROM test_missing_target GROUP BY test_missing_target.c ORDER BY c; c | count ----------+------- AAAA | 2 @@ -30,7 +30,7 @@ SELECT c, count(*) FROM test_missing_target GROUP BY test_missing_target.c; (6 rows) -- w/o existing GROUP BY target using a relation name in GROUP BY clause -SELECT count(*) FROM test_missing_target GROUP BY test_missing_target.c; +SELECT count(*) FROM test_missing_target GROUP BY test_missing_target.c ORDER BY c; count ------- 2 @@ -100,7 +100,7 @@ SELECT count(*) FROM test_missing_target ORDER BY 1 desc; (1 row) -- order using reference number -SELECT c, count(*) FROM test_missing_target GROUP BY 1; +SELECT c, count(*) FROM test_missing_target GROUP BY 1 ORDER BY 1; c | count ----------+------- AAAA | 2 @@ -160,7 +160,7 @@ SELECT a/2, a/2 FROM test_missing_target -- group expression w/ target under ambiguous condition -- failure NOT expected SELECT a/2, a/2 FROM test_missing_target - GROUP BY a/2; + GROUP BY a/2 ORDER BY a/2; ?column? | ?column? ----------+---------- 0 | 0 @@ -173,7 +173,7 @@ SELECT a/2, a/2 FROM test_missing_target -- group w/ existing GROUP BY target under ambiguous condition SELECT x.b, count(*) FROM test_missing_target x, test_missing_target y WHERE x.a = y.a - GROUP BY x.b; + GROUP BY x.b ORDER BY x.b; b | count ---+------- 1 | 1 @@ -185,7 +185,7 @@ SELECT x.b, count(*) FROM test_missing_target x, test_missing_target y -- group w/o existing GROUP BY target under ambiguous condition SELECT count(*) FROM test_missing_target x, test_missing_target y WHERE x.a = y.a - GROUP BY x.b; + GROUP BY x.b ORDER BY x.b; count ------- 1 @@ -199,7 +199,7 @@ SELECT count(*) FROM test_missing_target x, test_missing_target y SELECT count(*) INTO TABLE test_missing_target2 FROM test_missing_target x, test_missing_target y WHERE x.a = y.a - GROUP BY x.b; + GROUP BY x.b ORDER BY x.b; SELECT * FROM test_missing_target2; count ------- @@ -211,7 +211,9 @@ SELECT * FROM test_missing_target2; -- Functions and expressions -- w/ existing GROUP BY target -SELECT a%2, count(b) FROM test_missing_target GROUP BY test_missing_target.a%2; +SELECT a%2, count(b) FROM test_missing_target +GROUP BY test_missing_target.a%2 +ORDER BY test_missing_target.a%2; ?column? | count ----------+------- 0 | 5 @@ -219,7 +221,9 @@ SELECT a%2, count(b) FROM test_missing_target GROUP BY test_missing_target.a%2; (2 rows) -- w/o existing GROUP BY target using a relation name in GROUP BY clause -SELECT count(c) FROM test_missing_target GROUP BY lower(test_missing_target.c); +SELECT count(c) FROM test_missing_target +GROUP BY lower(test_missing_target.c) +ORDER BY lower(test_missing_target.c); count ------- 2 @@ -286,7 +290,7 @@ ERROR: Column reference "b" is ambiguous -- group w/ existing GROUP BY target under ambiguous condition SELECT x.b/2, count(x.b) FROM test_missing_target x, test_missing_target y WHERE x.a = y.a - GROUP BY x.b/2; + GROUP BY x.b/2 ORDER BY x.b/2; ?column? | count ----------+------- 0 | 1 @@ -305,7 +309,7 @@ ERROR: Column reference "b" is ambiguous SELECT count(x.b) INTO TABLE test_missing_target3 FROM test_missing_target x, test_missing_target y WHERE x.a = y.a - GROUP BY x.b/2; + GROUP BY x.b/2 ORDER BY x.b/2; SELECT * FROM test_missing_target3; count ------- diff --git a/src/test/regress/expected/select_implicit_1.out b/src/test/regress/expected/select_implicit_1.out index d63a572736..80abfd8f9a 100644 --- a/src/test/regress/expected/select_implicit_1.out +++ b/src/test/regress/expected/select_implicit_1.out @@ -18,7 +18,7 @@ INSERT INTO test_missing_target VALUES (7, 4, 'cccc', 'h'); INSERT INTO test_missing_target VALUES (8, 4, 'CCCC', 'I'); INSERT INTO test_missing_target VALUES (9, 4, 'CCCC', 'j'); -- w/ existing GROUP BY target -SELECT c, count(*) FROM test_missing_target GROUP BY test_missing_target.c; +SELECT c, count(*) FROM test_missing_target GROUP BY test_missing_target.c ORDER BY c; c | count ----------+------- AAAA | 2 @@ -30,7 +30,7 @@ SELECT c, count(*) FROM test_missing_target GROUP BY test_missing_target.c; (6 rows) -- w/o existing GROUP BY target using a relation name in GROUP BY clause -SELECT count(*) FROM test_missing_target GROUP BY test_missing_target.c; +SELECT count(*) FROM test_missing_target GROUP BY test_missing_target.c ORDER BY c; count ------- 2 @@ -100,7 +100,7 @@ SELECT count(*) FROM test_missing_target ORDER BY 1 desc; (1 row) -- order using reference number -SELECT c, count(*) FROM test_missing_target GROUP BY 1; +SELECT c, count(*) FROM test_missing_target GROUP BY 1 ORDER BY 1; c | count ----------+------- AAAA | 2 @@ -160,7 +160,7 @@ SELECT a/2, a/2 FROM test_missing_target -- group expression w/ target under ambiguous condition -- failure NOT expected SELECT a/2, a/2 FROM test_missing_target - GROUP BY a/2; + GROUP BY a/2 ORDER BY a/2; ?column? | ?column? ----------+---------- 0 | 0 @@ -173,7 +173,7 @@ SELECT a/2, a/2 FROM test_missing_target -- group w/ existing GROUP BY target under ambiguous condition SELECT x.b, count(*) FROM test_missing_target x, test_missing_target y WHERE x.a = y.a - GROUP BY x.b; + GROUP BY x.b ORDER BY x.b; b | count ---+------- 1 | 1 @@ -185,7 +185,7 @@ SELECT x.b, count(*) FROM test_missing_target x, test_missing_target y -- group w/o existing GROUP BY target under ambiguous condition SELECT count(*) FROM test_missing_target x, test_missing_target y WHERE x.a = y.a - GROUP BY x.b; + GROUP BY x.b ORDER BY x.b; count ------- 1 @@ -199,7 +199,7 @@ SELECT count(*) FROM test_missing_target x, test_missing_target y SELECT count(*) INTO TABLE test_missing_target2 FROM test_missing_target x, test_missing_target y WHERE x.a = y.a - GROUP BY x.b; + GROUP BY x.b ORDER BY x.b; SELECT * FROM test_missing_target2; count ------- @@ -211,7 +211,9 @@ SELECT * FROM test_missing_target2; -- Functions and expressions -- w/ existing GROUP BY target -SELECT a%2, count(b) FROM test_missing_target GROUP BY test_missing_target.a%2; +SELECT a%2, count(b) FROM test_missing_target +GROUP BY test_missing_target.a%2 +ORDER BY test_missing_target.a%2; ?column? | count ----------+------- 0 | 5 @@ -219,7 +221,9 @@ SELECT a%2, count(b) FROM test_missing_target GROUP BY test_missing_target.a%2; (2 rows) -- w/o existing GROUP BY target using a relation name in GROUP BY clause -SELECT count(c) FROM test_missing_target GROUP BY lower(test_missing_target.c); +SELECT count(c) FROM test_missing_target +GROUP BY lower(test_missing_target.c) +ORDER BY lower(test_missing_target.c); count ------- 2 @@ -286,7 +290,7 @@ ERROR: Column reference "b" is ambiguous -- group w/ existing GROUP BY target under ambiguous condition SELECT x.b/2, count(x.b) FROM test_missing_target x, test_missing_target y WHERE x.a = y.a - GROUP BY x.b/2; + GROUP BY x.b/2 ORDER BY x.b/2; ?column? | count ----------+------- 0 | 1 @@ -305,7 +309,7 @@ ERROR: Column reference "b" is ambiguous SELECT count(x.b) INTO TABLE test_missing_target3 FROM test_missing_target x, test_missing_target y WHERE x.a = y.a - GROUP BY x.b/2; + GROUP BY x.b/2 ORDER BY x.b/2; SELECT * FROM test_missing_target3; count ------- diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out index 96070f7706..5a2ef11c21 100644 --- a/src/test/regress/expected/subselect.out +++ b/src/test/regress/expected/subselect.out @@ -158,7 +158,7 @@ SELECT '' AS eight, ss.f1 AS "Correlated Field", ss.f3 AS "Second Field" (6 rows) select q1, float8(count(*)) / (select count(*) from int8_tbl) -from int8_tbl group by q1; +from int8_tbl group by q1 order by q1; q1 | ?column? ------------------+---------- 123 | 0.4 diff --git a/src/test/regress/input/misc.source b/src/test/regress/input/misc.source index 18bf4e8af7..c10f73c5c6 100644 --- a/src/test/regress/input/misc.source +++ b/src/test/regress/input/misc.source @@ -105,7 +105,7 @@ SELECT * FROM f_star* x WHERE x.c ISNULL; SELECT sum(a) FROM a_star*; -SELECT class, sum(a) FROM a_star* GROUP BY class; +SELECT class, sum(a) FROM a_star* GROUP BY class ORDER BY class; ALTER TABLE f_star RENAME COLUMN f TO ff; diff --git a/src/test/regress/output/misc.source b/src/test/regress/output/misc.source index 01b88045ac..ae60699356 100644 --- a/src/test/regress/output/misc.source +++ b/src/test/regress/output/misc.source @@ -223,7 +223,7 @@ SELECT sum(a) FROM a_star*; 355 (1 row) -SELECT class, sum(a) FROM a_star* GROUP BY class; +SELECT class, sum(a) FROM a_star* GROUP BY class ORDER BY class; class | sum -------+----- a | 3 diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql index 33c8fb5f35..00cc6daf9f 100644 --- a/src/test/regress/sql/aggregates.sql +++ b/src/test/regress/sql/aggregates.sql @@ -36,9 +36,11 @@ SELECT count(four) AS cnt_1000 FROM onek; SELECT count(DISTINCT four) AS cnt_4 FROM onek; -select ten, count(*), sum(four) from onek group by ten; +select ten, count(*), sum(four) from onek +group by ten order by ten; -select ten, count(four), sum(DISTINCT four) from onek group by ten; +select ten, count(four), sum(DISTINCT four) from onek +group by ten order by ten; SELECT newavg(four) AS avg_1 FROM onek; diff --git a/src/test/regress/sql/rules.sql b/src/test/regress/sql/rules.sql index 6ded00a445..0a630194f2 100644 --- a/src/test/regress/sql/rules.sql +++ b/src/test/regress/sql/rules.sql @@ -451,7 +451,7 @@ insert into rtest_view2 values (7); select * from rtest_vview1; select * from rtest_vview2; select * from rtest_vview3; -select * from rtest_vview4; +select * from rtest_vview4 order by a, b; select * from rtest_vview5; insert into rtest_view3 select * from rtest_vview1 where a < 7; @@ -471,7 +471,7 @@ select * from rtest_view4; delete from rtest_view4; insert into rtest_view4 select * from rtest_vview5 where a > 2 and refcount = 0; -select * from rtest_view4; +select * from rtest_view4 order by a, b; delete from rtest_view4; -- -- Test for computations in views diff --git a/src/test/regress/sql/select_having.sql b/src/test/regress/sql/select_having.sql index 28b22d9859..72887d63c8 100644 --- a/src/test/regress/sql/select_having.sql +++ b/src/test/regress/sql/select_having.sql @@ -16,17 +16,18 @@ INSERT INTO test_having VALUES (8, 4, 'CCCC', 'I'); INSERT INTO test_having VALUES (9, 4, 'CCCC', 'j'); SELECT b, c FROM test_having - GROUP BY b, c HAVING count(*) = 1; + GROUP BY b, c HAVING count(*) = 1 ORDER BY b, c; -- HAVING is equivalent to WHERE in this case SELECT b, c FROM test_having - GROUP BY b, c HAVING b = 3; + GROUP BY b, c HAVING b = 3 ORDER BY b, c; SELECT lower(c), count(c) FROM test_having - GROUP BY lower(c) HAVING count(*) > 2 OR min(a) = max(a); + GROUP BY lower(c) HAVING count(*) > 2 OR min(a) = max(a) + ORDER BY lower(c); SELECT c, max(a) FROM test_having - GROUP BY c HAVING count(*) > 2 OR min(a) = max(a); + GROUP BY c HAVING count(*) > 2 OR min(a) = max(a) + ORDER BY c; DROP TABLE test_having; - diff --git a/src/test/regress/sql/select_implicit.sql b/src/test/regress/sql/select_implicit.sql index 607ea95bb1..60fac0b8bd 100644 --- a/src/test/regress/sql/select_implicit.sql +++ b/src/test/regress/sql/select_implicit.sql @@ -21,10 +21,10 @@ INSERT INTO test_missing_target VALUES (9, 4, 'CCCC', 'j'); -- w/ existing GROUP BY target -SELECT c, count(*) FROM test_missing_target GROUP BY test_missing_target.c; +SELECT c, count(*) FROM test_missing_target GROUP BY test_missing_target.c ORDER BY c; -- w/o existing GROUP BY target using a relation name in GROUP BY clause -SELECT count(*) FROM test_missing_target GROUP BY test_missing_target.c; +SELECT count(*) FROM test_missing_target GROUP BY test_missing_target.c ORDER BY c; -- w/o existing GROUP BY target and w/o existing a different ORDER BY target -- failure expected @@ -47,7 +47,7 @@ SELECT count(*) FROM test_missing_target GROUP BY b ORDER BY b desc; SELECT count(*) FROM test_missing_target ORDER BY 1 desc; -- order using reference number -SELECT c, count(*) FROM test_missing_target GROUP BY 1; +SELECT c, count(*) FROM test_missing_target GROUP BY 1 ORDER BY 1; -- group using reference number out of range -- failure expected @@ -72,34 +72,38 @@ SELECT a/2, a/2 FROM test_missing_target -- group expression w/ target under ambiguous condition -- failure NOT expected SELECT a/2, a/2 FROM test_missing_target - GROUP BY a/2; + GROUP BY a/2 ORDER BY a/2; -- group w/ existing GROUP BY target under ambiguous condition SELECT x.b, count(*) FROM test_missing_target x, test_missing_target y WHERE x.a = y.a - GROUP BY x.b; + GROUP BY x.b ORDER BY x.b; -- group w/o existing GROUP BY target under ambiguous condition SELECT count(*) FROM test_missing_target x, test_missing_target y WHERE x.a = y.a - GROUP BY x.b; + GROUP BY x.b ORDER BY x.b; -- group w/o existing GROUP BY target under ambiguous condition -- into a table SELECT count(*) INTO TABLE test_missing_target2 FROM test_missing_target x, test_missing_target y WHERE x.a = y.a - GROUP BY x.b; + GROUP BY x.b ORDER BY x.b; SELECT * FROM test_missing_target2; -- Functions and expressions -- w/ existing GROUP BY target -SELECT a%2, count(b) FROM test_missing_target GROUP BY test_missing_target.a%2; +SELECT a%2, count(b) FROM test_missing_target +GROUP BY test_missing_target.a%2 +ORDER BY test_missing_target.a%2; -- w/o existing GROUP BY target using a relation name in GROUP BY clause -SELECT count(c) FROM test_missing_target GROUP BY lower(test_missing_target.c); +SELECT count(c) FROM test_missing_target +GROUP BY lower(test_missing_target.c) +ORDER BY lower(test_missing_target.c); -- w/o existing GROUP BY target and w/o existing a different ORDER BY target -- failure expected @@ -128,7 +132,7 @@ SELECT count(x.a) FROM test_missing_target x, test_missing_target y -- group w/ existing GROUP BY target under ambiguous condition SELECT x.b/2, count(x.b) FROM test_missing_target x, test_missing_target y WHERE x.a = y.a - GROUP BY x.b/2; + GROUP BY x.b/2 ORDER BY x.b/2; -- group w/o existing GROUP BY target under ambiguous condition -- failure expected due to ambiguous b in count(b) @@ -141,7 +145,7 @@ SELECT count(b) FROM test_missing_target x, test_missing_target y SELECT count(x.b) INTO TABLE test_missing_target3 FROM test_missing_target x, test_missing_target y WHERE x.a = y.a - GROUP BY x.b/2; + GROUP BY x.b/2 ORDER BY x.b/2; SELECT * FROM test_missing_target3; -- Cleanup diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql index b3a4ff5284..c2cde6ac20 100644 --- a/src/test/regress/sql/subselect.sql +++ b/src/test/regress/sql/subselect.sql @@ -74,4 +74,4 @@ SELECT '' AS eight, ss.f1 AS "Correlated Field", ss.f3 AS "Second Field" WHERE f1 NOT IN (SELECT f1+1 FROM INT4_TBL WHERE f1 != ss.f1); select q1, float8(count(*)) / (select count(*) from int8_tbl) -from int8_tbl group by q1; +from int8_tbl group by q1 order by q1; -- 2.40.0