From 4f15e5d09de276fb77326be5567dd9796008ca2e Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Tue, 20 Mar 2018 11:18:04 -0400 Subject: [PATCH] Defer creation of partially-grouped relation until it's needed. This avoids unnecessarily creating a RelOptInfo for which we have no actual need. This idea is from Ashutosh Bapat, who wrote a very different patch to accomplish a similar goal. It will be more important if and when we get partition-wise aggregate, since then there could be many partially grouped relations all of which could potentially be unnecessary. In passing, this sets the grouping relation's reltarget, which wasn't done previously but makes things simpler for this refactoring. Along the way, adjust things so that add_paths_to_partial_grouping_rel, now renamed create_partial_grouping_paths, does not perform the Gather or Gather Merge steps to generate non-partial paths from partial paths; have the caller do it instead. This is again for the convenience of partition-wise aggregate, which wants to inject additional partial paths are created and before we decide which ones to Gather/Gather Merge. This might seem like a separate change, but it's actually pretty closely entangled; I couldn't really see much value in separating it and having to change some things twice. Patch by me, reviewed by Ashutosh Bapat. Discussion: http://postgr.es/m/CA+TgmoZ+ZJTVad-=vEq393N99KTooxv9k7M+z73qnTAqkb49BQ@mail.gmail.com --- src/backend/optimizer/plan/planner.c | 324 ++++++++++++++------------- 1 file changed, 170 insertions(+), 154 deletions(-) diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 9c4a1baf5f..59802423fc 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -148,7 +148,6 @@ static void create_degenerate_grouping_paths(PlannerInfo *root, static void create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, PathTarget *target, RelOptInfo *grouped_rel, - RelOptInfo *partially_grouped_rel, const AggClauseCosts *agg_costs, grouping_sets_data *gd); static void consider_groupingsets_paths(PlannerInfo *root, @@ -208,13 +207,14 @@ static void add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, const AggClauseCosts *agg_final_costs, grouping_sets_data *gd, bool can_sort, bool can_hash, double dNumGroups, List *havingQual); -static void add_paths_to_partial_grouping_rel(PlannerInfo *root, - RelOptInfo *input_rel, - RelOptInfo *partially_grouped_rel, - AggClauseCosts *agg_partial_costs, - grouping_sets_data *gd, - bool can_sort, - bool can_hash); +static RelOptInfo *create_partial_grouping_paths(PlannerInfo *root, + RelOptInfo *grouped_rel, + RelOptInfo *input_rel, + grouping_sets_data *gd, + bool can_sort, + bool can_hash, + AggClauseCosts *agg_final_costs); +static void gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel); static bool can_parallel_agg(PlannerInfo *root, RelOptInfo *input_rel, RelOptInfo *grouped_rel, const AggClauseCosts *agg_costs); @@ -3688,42 +3688,30 @@ create_grouping_paths(PlannerInfo *root, { Query *parse = root->parse; RelOptInfo *grouped_rel; - RelOptInfo *partially_grouped_rel; /* * For now, all aggregated paths are added to the (GROUP_AGG, NULL) - * upperrel. Paths that are only partially aggregated go into the - * (UPPERREL_PARTIAL_GROUP_AGG, NULL) upperrel. + * upperrel. */ grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, NULL); - partially_grouped_rel = fetch_upper_rel(root, UPPERREL_PARTIAL_GROUP_AGG, - NULL); + grouped_rel->reltarget = target; /* * If the input relation is not parallel-safe, then the grouped relation * can't be parallel-safe, either. Otherwise, it's parallel-safe if the - * target list and HAVING quals are parallel-safe. The partially grouped - * relation obeys the same rules. + * target list and HAVING quals are parallel-safe. */ if (input_rel->consider_parallel && target_parallel_safe && is_parallel_safe(root, (Node *) parse->havingQual)) - { grouped_rel->consider_parallel = true; - partially_grouped_rel->consider_parallel = true; - } /* - * If the input rel belongs to a single FDW, so does the grouped rel. Same - * for the partially_grouped_rel. + * If the input rel belongs to a single FDW, so does the grouped rel. */ grouped_rel->serverid = input_rel->serverid; grouped_rel->userid = input_rel->userid; grouped_rel->useridiscurrent = input_rel->useridiscurrent; grouped_rel->fdwroutine = input_rel->fdwroutine; - partially_grouped_rel->serverid = input_rel->serverid; - partially_grouped_rel->userid = input_rel->userid; - partially_grouped_rel->useridiscurrent = input_rel->useridiscurrent; - partially_grouped_rel->fdwroutine = input_rel->fdwroutine; /* * Create either paths for a degenerate grouping or paths for ordinary @@ -3733,7 +3721,7 @@ create_grouping_paths(PlannerInfo *root, create_degenerate_grouping_paths(root, input_rel, target, grouped_rel); else create_ordinary_grouping_paths(root, input_rel, target, grouped_rel, - partially_grouped_rel, agg_costs, gd); + agg_costs, gd); set_cheapest(grouped_rel); return grouped_rel; @@ -3831,18 +3819,16 @@ create_degenerate_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, static void create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, PathTarget *target, RelOptInfo *grouped_rel, - RelOptInfo *partially_grouped_rel, const AggClauseCosts *agg_costs, grouping_sets_data *gd) { Query *parse = root->parse; Path *cheapest_path = input_rel->cheapest_total_path; - AggClauseCosts agg_partial_costs; /* parallel only */ + RelOptInfo *partially_grouped_rel = NULL; AggClauseCosts agg_final_costs; /* parallel only */ double dNumGroups; bool can_hash; bool can_sort; - bool try_parallel_aggregation; /* * Estimate number of groups. @@ -3889,59 +3875,24 @@ create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, agg_costs->numOrderedAggs == 0 && (gd ? gd->any_hashable : grouping_is_hashable(parse->groupClause))); - /* - * Figure out whether a PartialAggregate/Finalize Aggregate execution - * strategy is viable. - */ - try_parallel_aggregation = can_parallel_agg(root, input_rel, grouped_rel, - agg_costs); - /* * Before generating paths for grouped_rel, we first generate any possible - * partial paths for partially_grouped_rel; that way, later code can - * easily consider both parallel and non-parallel approaches to grouping. + * partially grouped paths; that way, later code can easily consider both + * parallel and non-parallel approaches to grouping. */ - if (try_parallel_aggregation) + MemSet(&agg_final_costs, 0, sizeof(AggClauseCosts)); + if (can_parallel_agg(root, input_rel, grouped_rel, agg_costs)) { - PathTarget *partial_grouping_target; - - /* - * Build target list for partial aggregate paths. These paths cannot - * just emit the same tlist as regular aggregate paths, because (1) we - * must include Vars and Aggrefs needed in HAVING, which might not - * appear in the result tlist, and (2) the Aggrefs must be set in - * partial mode. - */ - partial_grouping_target = make_partial_grouping_target(root, target, - (Node *) parse->havingQual); - partially_grouped_rel->reltarget = partial_grouping_target; - - /* - * Collect statistics about aggregates for estimating costs of - * performing aggregation in parallel. - */ - MemSet(&agg_partial_costs, 0, sizeof(AggClauseCosts)); - MemSet(&agg_final_costs, 0, sizeof(AggClauseCosts)); - if (parse->hasAggs) - { - /* partial phase */ - get_agg_clause_costs(root, (Node *) partial_grouping_target->exprs, - AGGSPLIT_INITIAL_SERIAL, - &agg_partial_costs); - - /* final phase */ - get_agg_clause_costs(root, (Node *) target->exprs, - AGGSPLIT_FINAL_DESERIAL, - &agg_final_costs); - get_agg_clause_costs(root, parse->havingQual, - AGGSPLIT_FINAL_DESERIAL, - &agg_final_costs); - } - - add_paths_to_partial_grouping_rel(root, input_rel, - partially_grouped_rel, - &agg_partial_costs, - gd, can_sort, can_hash); + partially_grouped_rel = + create_partial_grouping_paths(root, + grouped_rel, + input_rel, + gd, + can_sort, + can_hash, + &agg_final_costs); + gather_grouping_paths(root, partially_grouped_rel); + set_cheapest(partially_grouped_rel); } /* Build final grouping paths */ @@ -6189,46 +6140,49 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, * Instead of operating directly on the input relation, we can * consider finalizing a partially aggregated path. */ - foreach(lc, partially_grouped_rel->pathlist) + if (partially_grouped_rel != NULL) { - Path *path = (Path *) lfirst(lc); - - /* - * Insert a Sort node, if required. But there's no point in - * sorting anything but the cheapest path. - */ - if (!pathkeys_contained_in(root->group_pathkeys, path->pathkeys)) + foreach(lc, partially_grouped_rel->pathlist) { - if (path != partially_grouped_rel->cheapest_total_path) - continue; - path = (Path *) create_sort_path(root, - grouped_rel, - path, - root->group_pathkeys, - -1.0); - } + Path *path = (Path *) lfirst(lc); - if (parse->hasAggs) - add_path(grouped_rel, (Path *) - create_agg_path(root, - grouped_rel, - path, - target, - parse->groupClause ? AGG_SORTED : AGG_PLAIN, - AGGSPLIT_FINAL_DESERIAL, - parse->groupClause, - havingQual, - agg_final_costs, - dNumGroups)); - else - add_path(grouped_rel, (Path *) - create_group_path(root, - grouped_rel, - path, - target, - parse->groupClause, - havingQual, - dNumGroups)); + /* + * Insert a Sort node, if required. But there's no point in + * sorting anything but the cheapest path. + */ + if (!pathkeys_contained_in(root->group_pathkeys, path->pathkeys)) + { + if (path != partially_grouped_rel->cheapest_total_path) + continue; + path = (Path *) create_sort_path(root, + grouped_rel, + path, + root->group_pathkeys, + -1.0); + } + + if (parse->hasAggs) + add_path(grouped_rel, (Path *) + create_agg_path(root, + grouped_rel, + path, + target, + parse->groupClause ? AGG_SORTED : AGG_PLAIN, + AGGSPLIT_FINAL_DESERIAL, + parse->groupClause, + havingQual, + agg_final_costs, + dNumGroups)); + else + add_path(grouped_rel, (Path *) + create_group_path(root, + grouped_rel, + path, + target, + parse->groupClause, + havingQual, + dNumGroups)); + } } } @@ -6279,10 +6233,10 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, /* * Generate a Finalize HashAgg Path atop of the cheapest partially - * grouped path. Once again, we'll only do this if it looks as though - * the hash table won't exceed work_mem. + * grouped path, assuming there is one. Once again, we'll only do this + * if it looks as though the hash table won't exceed work_mem. */ - if (partially_grouped_rel->pathlist) + if (partially_grouped_rel && partially_grouped_rel->pathlist) { Path *path = partially_grouped_rel->cheapest_total_path; @@ -6307,29 +6261,83 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, } /* - * add_paths_to_partial_grouping_rel + * create_partial_grouping_paths * - * First, generate partially aggregated partial paths from the partial paths - * for the input relation, and then generate partially aggregated non-partial - * paths using Gather or Gather Merge. All paths for this relation -- both - * partial and non-partial -- have been partially aggregated but require a - * subsequent FinalizeAggregate step. + * Create a new upper relation representing the result of partial aggregation + * and populate it with appropriate paths. Note that we don't finalize the + * lists of paths here, so the caller can add additional partial or non-partial + * paths and must afterward call gather_grouping_paths and set_cheapest on + * the returned upper relation. + * + * All paths for this new upper relation -- both partial and non-partial -- + * have been partially aggregated but require a subsequent FinalizeAggregate + * step. */ -static void -add_paths_to_partial_grouping_rel(PlannerInfo *root, - RelOptInfo *input_rel, - RelOptInfo *partially_grouped_rel, - AggClauseCosts *agg_partial_costs, - grouping_sets_data *gd, - bool can_sort, - bool can_hash) +static RelOptInfo * +create_partial_grouping_paths(PlannerInfo *root, + RelOptInfo *grouped_rel, + RelOptInfo *input_rel, + grouping_sets_data *gd, + bool can_sort, + bool can_hash, + AggClauseCosts *agg_final_costs) { Query *parse = root->parse; + RelOptInfo *partially_grouped_rel; + AggClauseCosts agg_partial_costs; Path *cheapest_partial_path = linitial(input_rel->partial_pathlist); Size hashaggtablesize; double dNumPartialGroups = 0; ListCell *lc; + /* + * Build a new upper relation to represent the result of partially + * aggregating the rows from the input relation. + */ + partially_grouped_rel = fetch_upper_rel(root, + UPPERREL_PARTIAL_GROUP_AGG, + grouped_rel->relids); + partially_grouped_rel->consider_parallel = + grouped_rel->consider_parallel; + partially_grouped_rel->serverid = grouped_rel->serverid; + partially_grouped_rel->userid = grouped_rel->userid; + partially_grouped_rel->useridiscurrent = grouped_rel->useridiscurrent; + partially_grouped_rel->fdwroutine = grouped_rel->fdwroutine; + + /* + * Build target list for partial aggregate paths. These paths cannot just + * emit the same tlist as regular aggregate paths, because (1) we must + * include Vars and Aggrefs needed in HAVING, which might not appear in + * the result tlist, and (2) the Aggrefs must be set in partial mode. + */ + partially_grouped_rel->reltarget = + make_partial_grouping_target(root, grouped_rel->reltarget, + (Node *) parse->havingQual); + + /* + * Collect statistics about aggregates for estimating costs of performing + * aggregation in parallel. + */ + MemSet(&agg_partial_costs, 0, sizeof(AggClauseCosts)); + if (parse->hasAggs) + { + List *partial_target_exprs; + + /* partial phase */ + partial_target_exprs = partially_grouped_rel->reltarget->exprs; + get_agg_clause_costs(root, (Node *) partial_target_exprs, + AGGSPLIT_INITIAL_SERIAL, + &agg_partial_costs); + + /* final phase */ + get_agg_clause_costs(root, (Node *) grouped_rel->reltarget->exprs, + AGGSPLIT_FINAL_DESERIAL, + agg_final_costs); + get_agg_clause_costs(root, parse->havingQual, + AGGSPLIT_FINAL_DESERIAL, + agg_final_costs); + } + /* Estimate number of partial groups. */ dNumPartialGroups = get_number_of_groups(root, cheapest_partial_path->rows, @@ -6372,7 +6380,7 @@ add_paths_to_partial_grouping_rel(PlannerInfo *root, AGGSPLIT_INITIAL_SERIAL, parse->groupClause, NIL, - agg_partial_costs, + &agg_partial_costs, dNumPartialGroups)); else add_partial_path(partially_grouped_rel, (Path *) @@ -6394,7 +6402,7 @@ add_paths_to_partial_grouping_rel(PlannerInfo *root, hashaggtablesize = estimate_hashagg_tablesize(cheapest_partial_path, - agg_partial_costs, + &agg_partial_costs, dNumPartialGroups); /* @@ -6412,7 +6420,7 @@ add_paths_to_partial_grouping_rel(PlannerInfo *root, AGGSPLIT_INITIAL_SERIAL, parse->groupClause, NIL, - agg_partial_costs, + &agg_partial_costs, dNumPartialGroups)); } } @@ -6431,20 +6439,32 @@ add_paths_to_partial_grouping_rel(PlannerInfo *root, input_rel, partially_grouped_rel); } - /* - * Try adding Gather or Gather Merge to partial paths to produce - * non-partial paths. - */ - generate_gather_paths(root, partially_grouped_rel, true); + return partially_grouped_rel; +} - /* Get cheapest partial path from partially_grouped_rel */ - cheapest_partial_path = linitial(partially_grouped_rel->partial_pathlist); +/* + * Generate Gather and Gather Merge paths for a grouping relation or partial + * grouping relation. + * + * generate_gather_paths does most of the work, but we also consider a special + * case: we could try sorting the data by the group_pathkeys and then applying + * Gather Merge. + * + * NB: This function shouldn't be used for anything other than a grouped or + * partially grouped relation not only because of the fact that it explcitly + * references group_pathkeys but we pass "true" as the third argument to + * generate_gather_paths(). + */ +static void +gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel) +{ + Path *cheapest_partial_path; - /* - * generate_gather_paths won't consider sorting the cheapest path to match - * the group keys and then applying a Gather Merge node to the result; - * that might be a winning strategy. - */ + /* Try Gather for unordered paths and Gather Merge for ordered ones. */ + generate_gather_paths(root, rel, true); + + /* Try cheapest partial path + explicit Sort + Gather Merge. */ + cheapest_partial_path = linitial(rel->partial_pathlist); if (!pathkeys_contained_in(root->group_pathkeys, cheapest_partial_path->pathkeys)) { @@ -6453,24 +6473,20 @@ add_paths_to_partial_grouping_rel(PlannerInfo *root, total_groups = cheapest_partial_path->rows * cheapest_partial_path->parallel_workers; - path = (Path *) create_sort_path(root, partially_grouped_rel, - cheapest_partial_path, + path = (Path *) create_sort_path(root, rel, cheapest_partial_path, root->group_pathkeys, -1.0); path = (Path *) create_gather_merge_path(root, - partially_grouped_rel, + rel, path, - partially_grouped_rel->reltarget, + rel->reltarget, root->group_pathkeys, NULL, &total_groups); - add_path(partially_grouped_rel, path); + add_path(rel, path); } - - /* Now choose the best path(s) */ - set_cheapest(partially_grouped_rel); } /* -- 2.40.0