From b5996c2791f36a79332e3cb7130e9125a0372730 Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Tue, 20 Mar 2018 11:31:06 -0400 Subject: [PATCH] Determine grouping strategies in create_grouping_paths. Partition-wise aggregate will call create_ordinary_grouping_paths multiple times and we don't want to redo this work every time; have the caller do it instead and pass the details down. Patch by me, reviewed by Ashutosh Bapat. Discussion: http://postgr.es/m/CA+TgmoY7VYYn9a7YHj1nJL6zj6BkHmt4K-un9LRmXkyqRZyynA@mail.gmail.com --- src/backend/optimizer/plan/planner.c | 142 ++++++++++++++++----------- 1 file changed, 82 insertions(+), 60 deletions(-) diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 59802423fc..7b623496e3 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -93,6 +93,25 @@ typedef struct List *groupClause; /* overrides parse->groupClause */ } standard_qp_extra; +/* + * Various flags indicating what kinds of grouping are possible. + * + * GROUPING_CAN_USE_SORT should be set if it's possible to perform + * sort-based implementations of grouping. When grouping sets are in use, + * this will be true if sorting is potentially usable for any of the grouping + * sets, even if it's not usable for all of them. + * + * GROUPING_CAN_USE_HASH should be set if it's possible to perform + * hash-based implementations of grouping. + * + * GROUPING_CAN_PARTIAL_AGG should be set if the aggregation is of a type + * for which we support partial aggregation (not, for example, grouping sets). + * It says nothing about parallel-safety or the availability of suitable paths. + */ +#define GROUPING_CAN_USE_SORT 0x0001 +#define GROUPING_CAN_USE_HASH 0x0002 +#define GROUPING_CAN_PARTIAL_AGG 0x0004 + /* * Data specific to grouping sets */ @@ -149,7 +168,7 @@ static void create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, PathTarget *target, RelOptInfo *grouped_rel, const AggClauseCosts *agg_costs, - grouping_sets_data *gd); + grouping_sets_data *gd, int flags); static void consider_groupingsets_paths(PlannerInfo *root, RelOptInfo *grouped_rel, Path *path, @@ -215,8 +234,8 @@ static RelOptInfo *create_partial_grouping_paths(PlannerInfo *root, bool can_hash, AggClauseCosts *agg_final_costs); static void gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel); -static bool can_parallel_agg(PlannerInfo *root, RelOptInfo *input_rel, - RelOptInfo *grouped_rel, const AggClauseCosts *agg_costs); +static bool can_partial_agg(PlannerInfo *root, + const AggClauseCosts *agg_costs); /***************************************************************************** @@ -3720,8 +3739,58 @@ create_grouping_paths(PlannerInfo *root, if (is_degenerate_grouping(root)) create_degenerate_grouping_paths(root, input_rel, target, grouped_rel); else + { + int flags = 0; + + /* + * Determine whether it's possible to perform sort-based + * implementations of grouping. (Note that if groupClause is empty, + * grouping_is_sortable() is trivially true, and all the + * pathkeys_contained_in() tests will succeed too, so that we'll + * consider every surviving input path.) + * + * If we have grouping sets, we might be able to sort some but not all + * of them; in this case, we need can_sort to be true as long as we + * must consider any sorted-input plan. + */ + if ((gd && gd->rollups != NIL) + || grouping_is_sortable(parse->groupClause)) + flags |= GROUPING_CAN_USE_SORT; + + /* + * Determine whether we should consider hash-based implementations of + * grouping. + * + * Hashed aggregation only applies if we're grouping. If we have + * grouping sets, some groups might be hashable but others not; in + * this case we set can_hash true as long as there is nothing globally + * preventing us from hashing (and we should therefore consider plans + * with hashes). + * + * Executor doesn't support hashed aggregation with DISTINCT or ORDER + * BY aggregates. (Doing so would imply storing *all* the input + * values in the hash table, and/or running many sorts in parallel, + * either of which seems like a certain loser.) We similarly don't + * support ordered-set aggregates in hashed aggregation, but that case + * is also included in the numOrderedAggs count. + * + * Note: grouping_is_hashable() is much more expensive to check than + * the other gating conditions, so we want to do it last. + */ + if ((parse->groupClause != NIL && + agg_costs->numOrderedAggs == 0 && + (gd ? gd->any_hashable : grouping_is_hashable(parse->groupClause)))) + flags |= GROUPING_CAN_USE_HASH; + + /* + * Determine whether partial aggregation is possible. + */ + if (can_partial_agg(root, agg_costs)) + flags |= GROUPING_CAN_PARTIAL_AGG; + create_ordinary_grouping_paths(root, input_rel, target, grouped_rel, - agg_costs, gd); + agg_costs, gd, flags); + } set_cheapest(grouped_rel); return grouped_rel; @@ -3820,15 +3889,15 @@ static void create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, PathTarget *target, RelOptInfo *grouped_rel, const AggClauseCosts *agg_costs, - grouping_sets_data *gd) + grouping_sets_data *gd, int flags) { Query *parse = root->parse; Path *cheapest_path = input_rel->cheapest_total_path; RelOptInfo *partially_grouped_rel = NULL; AggClauseCosts agg_final_costs; /* parallel only */ double dNumGroups; - bool can_hash; - bool can_sort; + bool can_hash = (flags & GROUPING_CAN_USE_HASH) != 0; + bool can_sort = (flags & GROUPING_CAN_USE_SORT) != 0; /* * Estimate number of groups. @@ -3838,50 +3907,14 @@ create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, gd, parse->targetList); - /* - * Determine whether it's possible to perform sort-based implementations - * of grouping. (Note that if groupClause is empty, - * grouping_is_sortable() is trivially true, and all the - * pathkeys_contained_in() tests will succeed too, so that we'll consider - * every surviving input path.) - * - * If we have grouping sets, we might be able to sort some but not all of - * them; in this case, we need can_sort to be true as long as we must - * consider any sorted-input plan. - */ - can_sort = (gd && gd->rollups != NIL) - || grouping_is_sortable(parse->groupClause); - - /* - * Determine whether we should consider hash-based implementations of - * grouping. - * - * Hashed aggregation only applies if we're grouping. If we have grouping - * sets, some groups might be hashable but others not; in this case we set - * can_hash true as long as there is nothing globally preventing us from - * hashing (and we should therefore consider plans with hashes). - * - * Executor doesn't support hashed aggregation with DISTINCT or ORDER BY - * aggregates. (Doing so would imply storing *all* the input values in - * the hash table, and/or running many sorts in parallel, either of which - * seems like a certain loser.) We similarly don't support ordered-set - * aggregates in hashed aggregation, but that case is also included in the - * numOrderedAggs count. - * - * Note: grouping_is_hashable() is much more expensive to check than the - * other gating conditions, so we want to do it last. - */ - can_hash = (parse->groupClause != NIL && - agg_costs->numOrderedAggs == 0 && - (gd ? gd->any_hashable : grouping_is_hashable(parse->groupClause))); - /* * Before generating paths for grouped_rel, we first generate any possible * partially grouped paths; that way, later code can easily consider both * parallel and non-parallel approaches to grouping. */ MemSet(&agg_final_costs, 0, sizeof(AggClauseCosts)); - if (can_parallel_agg(root, input_rel, grouped_rel, agg_costs)) + if (grouped_rel->consider_parallel && input_rel->partial_pathlist != NIL + && (flags & GROUPING_CAN_PARTIAL_AGG) != 0) { partially_grouped_rel = create_partial_grouping_paths(root, @@ -6490,28 +6523,17 @@ gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel) } /* - * can_parallel_agg + * can_partial_agg * - * Determines whether or not parallel grouping and/or aggregation is possible. + * Determines whether or not partial grouping and/or aggregation is possible. * Returns true when possible, false otherwise. */ static bool -can_parallel_agg(PlannerInfo *root, RelOptInfo *input_rel, - RelOptInfo *grouped_rel, const AggClauseCosts *agg_costs) +can_partial_agg(PlannerInfo *root, const AggClauseCosts *agg_costs) { Query *parse = root->parse; - if (!grouped_rel->consider_parallel) - { - /* Not even parallel-safe. */ - return false; - } - else if (input_rel->partial_pathlist == NIL) - { - /* Nothing to use as input for partial aggregate. */ - return false; - } - else if (!parse->hasAggs && parse->groupClause == NIL) + if (!parse->hasAggs && parse->groupClause == NIL) { /* * We don't know how to do parallel aggregation unless we have either -- 2.40.0