]> granicus.if.org Git - postgresql/commitdiff
Split out into a separate function the code in grouping_planner() that
authorTom Lane <tgl@sss.pgh.pa.us>
Sun, 10 Apr 2005 19:50:08 +0000 (19:50 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Sun, 10 Apr 2005 19:50:08 +0000 (19:50 +0000)
decides whether to use hashed grouping instead of sort-plus-uniq
grouping. The function needs an annoyingly large number of parameters,
but this still seems like a win for legibility, since it removes over
a hundred lines from grouping_planner (which is still too big :-().

src/backend/optimizer/plan/planner.c

index 9f898997f0064882ddf981ab4074afae6d71ff0b..eea58e45a15ee90f38d1911cd269153a5e21513f 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.182 2005/04/06 16:34:05 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.183 2005/04/10 19:50:08 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -58,6 +58,10 @@ static Node *preprocess_expression(Query *parse, Node *expr, int kind);
 static void preprocess_qual_conditions(Query *parse, Node *jtnode);
 static Plan *inheritance_planner(Query *parse, List *inheritlist);
 static Plan *grouping_planner(Query *parse, double tuple_fraction);
+static bool choose_hashed_grouping(Query *parse, double tuple_fraction,
+                                          Path *cheapest_path, Path *sorted_path,
+                                          List *sort_pathkeys, List *group_pathkeys,
+                                          double dNumGroups, AggClauseCounts *agg_counts);
 static bool hash_safe_grouping(Query *parse);
 static List *make_subplanTargetList(Query *parse, List *tlist,
                                           AttrNumber **groupColIdx, bool *need_tlist_eval);
@@ -920,34 +924,25 @@ grouping_planner(Query *parse, double tuple_fraction)
                sort_pathkeys = canonicalize_pathkeys(parse, sort_pathkeys);
 
                /*
-                * Consider whether we might want to use hashed grouping.
+                * If grouping, estimate the number of groups.  (We can't do this
+                * until after running query_planner(), either.)  Then decide
+                * whether we want to use hashed grouping.
                 */
                if (parse->groupClause)
                {
                        List       *groupExprs;
                        double          cheapest_path_rows;
-                       int                     cheapest_path_width;
 
                        /*
-                        * Beware in this section of the possibility that
-                        * cheapest_path->parent is NULL.  This could happen if user
-                        * does something silly like SELECT 'foo' GROUP BY 1;
+                        * Beware of the possibility that cheapest_path->parent is NULL.
+                        * This could happen if user does something silly like
+                        *              SELECT 'foo' GROUP BY 1;
                         */
                        if (cheapest_path->parent)
-                       {
                                cheapest_path_rows = cheapest_path->parent->rows;
-                               cheapest_path_width = cheapest_path->parent->width;
-                       }
                        else
-                       {
                                cheapest_path_rows = 1; /* assume non-set result */
-                               cheapest_path_width = 100;              /* arbitrary */
-                       }
 
-                       /*
-                        * Always estimate the number of groups.  We can't do this
-                        * until after running query_planner(), either.
-                        */
                        groupExprs = get_sortgrouplist_exprs(parse->groupClause,
                                                                                                 parse->targetList);
                        dNumGroups = estimate_num_groups(parse,
@@ -956,130 +951,11 @@ grouping_planner(Query *parse, double tuple_fraction)
                        /* Also want it as a long int --- but 'ware overflow! */
                        numGroups = (long) Min(dNumGroups, (double) LONG_MAX);
 
-                       /*
-                        * Check can't-do-it conditions, including whether the
-                        * grouping operators are hashjoinable.
-                        *
-                        * Executor doesn't support hashed aggregation with DISTINCT
-                        * aggregates.  (Doing so would imply storing *all* the input
-                        * values in the hash table, which seems like a certain
-                        * loser.)
-                        */
-                       if (!enable_hashagg || !hash_safe_grouping(parse))
-                               use_hashed_grouping = false;
-                       else if (agg_counts.numDistinctAggs != 0)
-                               use_hashed_grouping = false;
-                       else
-                       {
-                               /*
-                                * Use hashed grouping if (a) we think we can fit the
-                                * hashtable into work_mem, *and* (b) the estimated cost
-                                * is no more than doing it the other way.      While avoiding
-                                * the need for sorted input is usually a win, the fact
-                                * that the output won't be sorted may be a loss; so we
-                                * need to do an actual cost comparison.
-                                */
-                               Size            hashentrysize;
-
-                               /* Estimate per-hash-entry space at tuple width... */
-                               hashentrysize = cheapest_path_width;
-                               /* plus space for pass-by-ref transition values... */
-                               hashentrysize += agg_counts.transitionSpace;
-                               /* plus the per-hash-entry overhead */
-                               hashentrysize += hash_agg_entry_size(agg_counts.numAggs);
-
-                               if (hashentrysize * dNumGroups <= work_mem * 1024L)
-                               {
-                                       /*
-                                        * Okay, do the cost comparison.  We need to consider
-                                        * cheapest_path + hashagg [+ final sort] versus
-                                        * either cheapest_path [+ sort] + group or agg [+
-                                        * final sort] or presorted_path + group or agg [+
-                                        * final sort] where brackets indicate a step that may
-                                        * not be needed. We assume query_planner() will have
-                                        * returned a presorted path only if it's a winner
-                                        * compared to cheapest_path for this purpose.
-                                        *
-                                        * These path variables are dummies that just hold cost
-                                        * fields; we don't make actual Paths for these steps.
-                                        */
-                                       Path            hashed_p;
-                                       Path            sorted_p;
-
-                                       cost_agg(&hashed_p, parse,
-                                                        AGG_HASHED, agg_counts.numAggs,
-                                                        numGroupCols, dNumGroups,
-                                                        cheapest_path->startup_cost,
-                                                        cheapest_path->total_cost,
-                                                        cheapest_path_rows);
-                                       /* Result of hashed agg is always unsorted */
-                                       if (sort_pathkeys)
-                                               cost_sort(&hashed_p, parse, sort_pathkeys,
-                                                                 hashed_p.total_cost,
-                                                                 dNumGroups,
-                                                                 cheapest_path_width);
-
-                                       if (sorted_path)
-                                       {
-                                               sorted_p.startup_cost = sorted_path->startup_cost;
-                                               sorted_p.total_cost = sorted_path->total_cost;
-                                               current_pathkeys = sorted_path->pathkeys;
-                                       }
-                                       else
-                                       {
-                                               sorted_p.startup_cost = cheapest_path->startup_cost;
-                                               sorted_p.total_cost = cheapest_path->total_cost;
-                                               current_pathkeys = cheapest_path->pathkeys;
-                                       }
-                                       if (!pathkeys_contained_in(group_pathkeys,
-                                                                                          current_pathkeys))
-                                       {
-                                               cost_sort(&sorted_p, parse, group_pathkeys,
-                                                                 sorted_p.total_cost,
-                                                                 cheapest_path_rows,
-                                                                 cheapest_path_width);
-                                               current_pathkeys = group_pathkeys;
-                                       }
-                                       if (parse->hasAggs)
-                                               cost_agg(&sorted_p, parse,
-                                                                AGG_SORTED, agg_counts.numAggs,
-                                                                numGroupCols, dNumGroups,
-                                                                sorted_p.startup_cost,
-                                                                sorted_p.total_cost,
-                                                                cheapest_path_rows);
-                                       else
-                                               cost_group(&sorted_p, parse,
-                                                                  numGroupCols, dNumGroups,
-                                                                  sorted_p.startup_cost,
-                                                                  sorted_p.total_cost,
-                                                                  cheapest_path_rows);
-                                       /* The Agg or Group node will preserve ordering */
-                                       if (sort_pathkeys &&
-                                               !pathkeys_contained_in(sort_pathkeys,
-                                                                                          current_pathkeys))
-                                       {
-                                               cost_sort(&sorted_p, parse, sort_pathkeys,
-                                                                 sorted_p.total_cost,
-                                                                 dNumGroups,
-                                                                 cheapest_path_width);
-                                       }
-
-                                       /*
-                                        * Now make the decision using the top-level tuple
-                                        * fraction.  First we have to convert an absolute
-                                        * count (LIMIT) into fractional form.
-                                        */
-                                       if (tuple_fraction >= 1.0)
-                                               tuple_fraction /= dNumGroups;
-
-                                       if (compare_fractional_path_costs(&hashed_p, &sorted_p,
-                                                                                                         tuple_fraction) < 0)
-                                       {
-                                               /* Hashed is cheaper, so use it */
-                                               use_hashed_grouping = true;
-                                       }
-                               }
-                       }
+                       use_hashed_grouping =
+                               choose_hashed_grouping(parse, tuple_fraction,
+                                                                          cheapest_path, sorted_path,
+                                                                          sort_pathkeys, group_pathkeys,
+                                                                          dNumGroups, &agg_counts);
                }
 
                /*
@@ -1331,6 +1207,146 @@ grouping_planner(Query *parse, double tuple_fraction)
        return result_plan;
 }
 
+/*
+ * choose_hashed_grouping - should we use hashed grouping?
+ */
+static bool
+choose_hashed_grouping(Query *parse, double tuple_fraction,
+                                          Path *cheapest_path, Path *sorted_path,
+                                          List *sort_pathkeys, List *group_pathkeys,
+                                          double dNumGroups, AggClauseCounts *agg_counts)
+{
+       int                     numGroupCols = list_length(parse->groupClause);
+       double          cheapest_path_rows;
+       int                     cheapest_path_width;
+       Size            hashentrysize;
+       List       *current_pathkeys;
+       Path            hashed_p;
+       Path            sorted_p;
+
+       /*
+        * Check can't-do-it conditions, including whether the grouping operators
+        * are hashjoinable.
+        *
+        * Executor doesn't support hashed aggregation with DISTINCT aggregates.
+        * (Doing so would imply storing *all* the input values in the hash table,
+        * which seems like a certain loser.)
+        */
+       if (!enable_hashagg)
+               return false;
+       if (agg_counts->numDistinctAggs != 0)
+               return false;
+       if (!hash_safe_grouping(parse))
+               return false;
+
+       /*
+        * Don't do it if it doesn't look like the hashtable will fit into
+        * work_mem.
+        *
+        * Beware here of the possibility that cheapest_path->parent is NULL.
+        * This could happen if user does something silly like
+        *              SELECT 'foo' GROUP BY 1;
+        */
+       if (cheapest_path->parent)
+       {
+               cheapest_path_rows = cheapest_path->parent->rows;
+               cheapest_path_width = cheapest_path->parent->width;
+       }
+       else
+       {
+               cheapest_path_rows = 1;                         /* assume non-set result */
+               cheapest_path_width = 100;                      /* arbitrary */
+       }
+
+       /* Estimate per-hash-entry space at tuple width... */
+       hashentrysize = cheapest_path_width;
+       /* plus space for pass-by-ref transition values... */
+       hashentrysize += agg_counts->transitionSpace;
+       /* plus the per-hash-entry overhead */
+       hashentrysize += hash_agg_entry_size(agg_counts->numAggs);
+
+       if (hashentrysize * dNumGroups > work_mem * 1024L)
+               return false;
+
+       /*
+        * See if the estimated cost is no more than doing it the other way.
+        * While avoiding the need for sorted input is usually a win, the fact
+        * that the output won't be sorted may be a loss; so we need to do an
+        * actual cost comparison.
+        *
+        * We need to consider
+        *              cheapest_path + hashagg [+ final sort]
+        * versus either
+        *              cheapest_path [+ sort] + group or agg [+ final sort]
+        * or
+        *              presorted_path + group or agg [+ final sort]
+        * where brackets indicate a step that may not be needed. We assume
+        * query_planner() will have returned a presorted path only if it's a
+        * winner compared to cheapest_path for this purpose.
+        *
+        * These path variables are dummies that just hold cost fields; we don't
+        * make actual Paths for these steps.
+        */
+       cost_agg(&hashed_p, parse, AGG_HASHED, agg_counts->numAggs,
+                        numGroupCols, dNumGroups,
+                        cheapest_path->startup_cost, cheapest_path->total_cost,
+                        cheapest_path_rows);
+       /* Result of hashed agg is always unsorted */
+       if (sort_pathkeys)
+               cost_sort(&hashed_p, parse, sort_pathkeys, hashed_p.total_cost,
+                                 dNumGroups, cheapest_path_width);
+
+       if (sorted_path)
+       {
+               sorted_p.startup_cost = sorted_path->startup_cost;
+               sorted_p.total_cost = sorted_path->total_cost;
+               current_pathkeys = sorted_path->pathkeys;
+       }
+       else
+       {
+               sorted_p.startup_cost = cheapest_path->startup_cost;
+               sorted_p.total_cost = cheapest_path->total_cost;
+               current_pathkeys = cheapest_path->pathkeys;
+       }
+       if (!pathkeys_contained_in(group_pathkeys,
+                                                          current_pathkeys))
+       {
+               cost_sort(&sorted_p, parse, group_pathkeys, sorted_p.total_cost,
+                                 cheapest_path_rows, cheapest_path_width);
+               current_pathkeys = group_pathkeys;
+       }
+
+       if (parse->hasAggs)
+               cost_agg(&sorted_p, parse, AGG_SORTED, agg_counts->numAggs,
+                                numGroupCols, dNumGroups,
+                                sorted_p.startup_cost, sorted_p.total_cost,
+                                cheapest_path_rows);
+       else
+               cost_group(&sorted_p, parse, numGroupCols, dNumGroups,
+                                  sorted_p.startup_cost, sorted_p.total_cost,
+                                  cheapest_path_rows);
+       /* The Agg or Group node will preserve ordering */
+       if (sort_pathkeys &&
+               !pathkeys_contained_in(sort_pathkeys, current_pathkeys))
+               cost_sort(&sorted_p, parse, sort_pathkeys, sorted_p.total_cost,
+                                 dNumGroups, cheapest_path_width);
+
+       /*
+        * Now make the decision using the top-level tuple fraction.  First we
+        * have to convert an absolute count (LIMIT) into fractional form.
+        */
+       if (tuple_fraction >= 1.0)
+               tuple_fraction /= dNumGroups;
+
+       if (compare_fractional_path_costs(&hashed_p, &sorted_p,
+                                                                         tuple_fraction) < 0)
+       {
+               /* Hashed is cheaper, so use it */
+               return true;
+       }
+       return false;
+}
+
 /*
  * hash_safe_grouping - are grouping operators hashable?
  *