From 54f5c5150fa05d7ad15f8406debd5a2b394885b5 Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Fri, 17 Jun 2016 16:25:02 -0400 Subject: [PATCH] Try again to fix the way the scanjoin_target is used with partial paths. Commit 04ae11f62e643e07c411c4935ea6af46cb112aa9 removed some broken code to apply the scan/join target to partial paths, but its theory that this processing step is totally unnecessary turns out to be wrong. Put similar code back again, but this time, check for parallel-safety and avoid in-place modifications to paths that may already have been used as part of some other path. (This is not an entirely elegant solution to this problem; it might be better, for example, to postpone generate_gather_paths for the topmost scan/join rel until after the scan/join target has been applied. But this is not the time for such redesign work.) Amit Kapila and Robert Haas --- src/backend/optimizer/plan/planagg.c | 3 +- src/backend/optimizer/plan/planner.c | 81 ++++++++++++++++++- src/backend/optimizer/prep/prepunion.c | 6 +- src/backend/optimizer/util/pathnode.c | 7 +- src/include/optimizer/pathnode.h | 3 +- src/test/regress/expected/select_parallel.out | 32 ++++++++ src/test/regress/sql/select_parallel.sql | 11 +++ 7 files changed, 133 insertions(+), 10 deletions(-) diff --git a/src/backend/optimizer/plan/planagg.c b/src/backend/optimizer/plan/planagg.c index cefec7bdf1..0434a5a8e7 100644 --- a/src/backend/optimizer/plan/planagg.c +++ b/src/backend/optimizer/plan/planagg.c @@ -465,7 +465,8 @@ build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo, * cheapest path.) */ sorted_path = apply_projection_to_path(subroot, final_rel, sorted_path, - create_pathtarget(subroot, tlist)); + create_pathtarget(subroot, tlist), + false); /* * Determine cost to get just the first row of the presorted path. diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 07b925e74c..dad35e56f5 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -1500,6 +1500,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, PathTarget *grouping_target; PathTarget *scanjoin_target; bool have_grouping; + bool scanjoin_target_parallel_safe = false; WindowFuncLists *wflists = NULL; List *activeWindows = NIL; List *rollup_lists = NIL; @@ -1730,7 +1731,16 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, scanjoin_target = grouping_target; /* - * Forcibly apply that target to all the Paths for the scan/join rel. + * Check whether scan/join target is parallel safe ... unless there + * are no partial paths, in which case we don't care. + */ + if (current_rel->partial_pathlist && + !has_parallel_hazard((Node *) scanjoin_target->exprs, false)) + scanjoin_target_parallel_safe = true; + + /* + * Forcibly apply scan/join target to all the Paths for the scan/join + * rel. * * In principle we should re-run set_cheapest() here to identify the * cheapest path, but it seems unlikely that adding the same tlist @@ -1746,7 +1756,8 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, Assert(subpath->param_info == NULL); path = apply_projection_to_path(root, current_rel, - subpath, scanjoin_target); + subpath, scanjoin_target, + scanjoin_target_parallel_safe); /* If we had to add a Result, path is different from subpath */ if (path != subpath) { @@ -1758,6 +1769,70 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, } } + /* + * Upper planning steps which make use of the top scan/join rel's + * partial pathlist will expect partial paths for that rel to produce + * the same output as complete paths ... and we just changed the + * output for the complete paths, so we'll need to do the same thing + * for partial paths. + */ + if (scanjoin_target_parallel_safe) + { + /* + * Apply the scan/join target to each partial path. Otherwise, + * anything that attempts to use the partial paths for further + * upper planning may go wrong. + */ + foreach(lc, current_rel->partial_pathlist) + { + Path *subpath = (Path *) lfirst(lc); + Path *newpath; + + /* + * We can't use apply_projection_to_path() here, because there + * could already be pointers to these paths, and therefore we + * cannot modify them in place. Instead, we must use + * create_projection_path(). The good news is this won't + * actually insert a Result node into the final plan unless + * it's needed, but the bad news is that it will charge for + * the node whether it's needed or not. Therefore, if the + * target list is already what we need it to be, just leave + * this partial path alone. + */ + if (equal(scanjoin_target->exprs, subpath->pathtarget->exprs)) + continue; + + Assert(subpath->param_info == NULL); + newpath = (Path *) create_projection_path(root, + current_rel, + subpath, + scanjoin_target); + if (is_projection_capable_path(subpath)) + { + /* + * Since the target lists differ, a projection path is + * essential, but it will disappear at plan creation time + * because the subpath is projection-capable. So avoid + * charging anything for the disappearing node. + */ + newpath->startup_cost = subpath->startup_cost; + newpath->total_cost = subpath->total_cost; + } + + lfirst(lc) = newpath; + } + } + else + { + /* + * In the unfortunate event that scanjoin_target is not + * parallel-safe, we can't apply it to the partial paths; in that + * case, we'll need to forget about the partial paths, which + * aren't valid input for upper planning steps. + */ + current_rel->partial_pathlist = NIL; + } + /* * Save the various upper-rel PathTargets we just computed into * root->upper_targets[]. The core code doesn't use this, but it @@ -4153,7 +4228,7 @@ create_ordered_paths(PlannerInfo *root, /* Add projection step if needed */ if (path->pathtarget != target) path = apply_projection_to_path(root, ordered_rel, - path, target); + path, target, false); add_path(ordered_rel, path); } diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c index 552b756b8b..30975e0106 100644 --- a/src/backend/optimizer/prep/prepunion.c +++ b/src/backend/optimizer/prep/prepunion.c @@ -325,7 +325,8 @@ recurse_set_operations(Node *setOp, PlannerInfo *root, refnames_tlist); path = apply_projection_to_path(root, rel, path, - create_pathtarget(root, tlist)); + create_pathtarget(root, tlist), + false); /* Return the fully-fledged tlist to caller, too */ *pTargetList = tlist; @@ -394,7 +395,8 @@ recurse_set_operations(Node *setOp, PlannerInfo *root, path->parent, path, create_pathtarget(root, - *pTargetList)); + *pTargetList), + false); } return path; } diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 6b57de350d..0ff353fa11 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -2217,12 +2217,14 @@ create_projection_path(PlannerInfo *root, * 'rel' is the parent relation associated with the result * 'path' is the path representing the source of data * 'target' is the PathTarget to be computed + * 'target_parallel' indicates that target expressions are all parallel-safe */ Path * apply_projection_to_path(PlannerInfo *root, RelOptInfo *rel, Path *path, - PathTarget *target) + PathTarget *target, + bool target_parallel) { QualCost oldcost; @@ -2248,8 +2250,7 @@ apply_projection_to_path(PlannerInfo *root, * project. But if there is something that is not parallel-safe in the * target expressions, then we can't. */ - if (IsA(path, GatherPath) && - !has_parallel_hazard((Node *) target->exprs, false)) + if (IsA(path, GatherPath) &&target_parallel) { GatherPath *gpath = (GatherPath *) path; diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index 5de4c34a2b..586ecdd9b8 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -143,7 +143,8 @@ extern ProjectionPath *create_projection_path(PlannerInfo *root, extern Path *apply_projection_to_path(PlannerInfo *root, RelOptInfo *rel, Path *path, - PathTarget *target); + PathTarget *target, + bool target_parallel); extern SortPath *create_sort_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, diff --git a/src/test/regress/expected/select_parallel.out b/src/test/regress/expected/select_parallel.out index 6f1f24748b..3c906407df 100644 --- a/src/test/regress/expected/select_parallel.out +++ b/src/test/regress/expected/select_parallel.out @@ -51,6 +51,38 @@ select parallel_restricted(unique1) from tenk1 Filter: (tenk1.stringu1 = 'GRAAAA'::name) (9 rows) +-- test parallel plan when group by expression is in target list. +explain (costs off) + select length(stringu1) from tenk1 group by length(stringu1); + QUERY PLAN +--------------------------------------------------- + Finalize HashAggregate + Group Key: (length((stringu1)::text)) + -> Gather + Workers Planned: 4 + -> Partial HashAggregate + Group Key: length((stringu1)::text) + -> Parallel Seq Scan on tenk1 +(7 rows) + +select length(stringu1) from tenk1 group by length(stringu1); + length +-------- + 6 +(1 row) + +-- test that parallel plan for aggregates is not selected when +-- target list contains parallel restricted clause. +explain (costs off) + select sum(parallel_restricted(unique1)) from tenk1 + group by(parallel_restricted(unique1)); + QUERY PLAN +---------------------------------------------------- + HashAggregate + Group Key: parallel_restricted(unique1) + -> Index Only Scan using tenk1_unique1 on tenk1 +(3 rows) + set force_parallel_mode=1; explain (costs off) select stringu1::int2 from tenk1 where unique1 = 1; diff --git a/src/test/regress/sql/select_parallel.sql b/src/test/regress/sql/select_parallel.sql index 7b607c203a..a8cd993332 100644 --- a/src/test/regress/sql/select_parallel.sql +++ b/src/test/regress/sql/select_parallel.sql @@ -24,6 +24,17 @@ explain (verbose, costs off) select parallel_restricted(unique1) from tenk1 where stringu1 = 'GRAAAA' order by 1; +-- test parallel plan when group by expression is in target list. +explain (costs off) + select length(stringu1) from tenk1 group by length(stringu1); +select length(stringu1) from tenk1 group by length(stringu1); + +-- test that parallel plan for aggregates is not selected when +-- target list contains parallel restricted clause. +explain (costs off) + select sum(parallel_restricted(unique1)) from tenk1 + group by(parallel_restricted(unique1)); + set force_parallel_mode=1; explain (costs off) -- 2.40.0