From 88ba0ae2aa4aaba8ea0d85c0ff81cc46912d9308 Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Thu, 22 Mar 2018 16:09:28 -0400 Subject: [PATCH] Consider Parallel Append of partial paths for UNION [ALL]. Without this patch, we can implement a UNION or UNION ALL as an Append where Gather appears beneath one or more of the Append branches, but this lets us put the Gather node on top, with a partial path for each relation underneath. There is considerably more work that could be done to improve planning in this area, but that will probably need to wait for a future release. Patch by me, reviewed and tested by Ashutosh Bapat and Rajkumar Raghuwanshi. Discussion: http://postgr.es/m/CA+TgmoaLRAOqHmMZx=ESM3VDEPceg+-XXZsRXQ8GtFJO_zbMSw@mail.gmail.com --- src/backend/optimizer/prep/prepunion.c | 93 +++++++++++++++++++++++++- 1 file changed, 91 insertions(+), 2 deletions(-) diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c index f087369f75..6e510f9d94 100644 --- a/src/backend/optimizer/prep/prepunion.c +++ b/src/backend/optimizer/prep/prepunion.c @@ -298,12 +298,18 @@ recurse_set_operations(Node *setOp, PlannerInfo *root, */ set_subquery_size_estimates(root, rel); + /* + * Since we may want to add a partial path to this relation, we must + * set its consider_parallel flag correctly. + */ + final_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL); + rel->consider_parallel = final_rel->consider_parallel; + /* * For the moment, we consider only a single Path for the subquery. * This should change soon (make it look more like * set_subquery_pathlist). */ - final_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL); subpath = get_cheapest_fractional_path(final_rel, root->tuple_fraction); @@ -320,6 +326,23 @@ recurse_set_operations(Node *setOp, PlannerInfo *root, add_path(rel, path); + /* + * If we have a partial path for the child relation, we can use that + * to build a partial path for this relation. But there's no point in + * considering any path but the cheapest. + */ + if (final_rel->partial_pathlist != NIL) + { + Path *partial_subpath; + Path *partial_path; + + partial_subpath = linitial(final_rel->partial_pathlist); + partial_path = (Path *) + create_subqueryscan_path(root, rel, partial_subpath, + NIL, NULL); + add_partial_path(rel, partial_path); + } + /* * Estimate number of groups if caller wants it. If the subquery used * grouping or aggregation, its output is probably mostly unique @@ -552,6 +575,9 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root, double save_fraction = root->tuple_fraction; ListCell *lc; List *pathlist = NIL; + List *partial_pathlist = NIL; + bool partial_paths_valid = true; + bool consider_parallel = true; List *rellist; List *tlist_list; List *tlist; @@ -591,18 +617,34 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root, *pTargetList = tlist; - /* Build path list and relid set. */ + /* Build path lists and relid set. */ foreach(lc, rellist) { RelOptInfo *rel = lfirst(lc); pathlist = lappend(pathlist, rel->cheapest_total_path); + + if (consider_parallel) + { + if (!rel->consider_parallel) + { + consider_parallel = false; + partial_paths_valid = false; + } + else if (rel->partial_pathlist == NIL) + partial_paths_valid = false; + else + partial_pathlist = lappend(partial_pathlist, + linitial(rel->partial_pathlist)); + } + relids = bms_union(relids, rel->relids); } /* Build result relation. */ result_rel = fetch_upper_rel(root, UPPERREL_SETOP, relids); result_rel->reltarget = create_pathtarget(root, tlist); + result_rel->consider_parallel = consider_parallel; /* * Append the child results together. @@ -626,6 +668,53 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root, */ result_rel->rows = path->rows; + /* + * Now consider doing the same thing using the partial paths plus Append + * plus Gather. + */ + if (partial_paths_valid) + { + Path *ppath; + ListCell *lc; + int parallel_workers = 0; + + /* Find the highest number of workers requested for any subpath. */ + foreach(lc, partial_pathlist) + { + Path *path = lfirst(lc); + + parallel_workers = Max(parallel_workers, path->parallel_workers); + } + Assert(parallel_workers > 0); + + /* + * If the use of parallel append is permitted, always request at least + * log2(# of children) paths. We assume it can be useful to have + * extra workers in this case because they will be spread out across + * the children. The precise formula is just a guess; see + * add_paths_to_append_rel. + */ + if (enable_parallel_append) + { + parallel_workers = Max(parallel_workers, + fls(list_length(partial_pathlist))); + parallel_workers = Min(parallel_workers, + max_parallel_workers_per_gather); + } + Assert(parallel_workers > 0); + + ppath = (Path *) + create_append_path(result_rel, NIL, partial_pathlist, + NULL, parallel_workers, enable_parallel_append, + NIL, -1); + ppath = (Path *) + create_gather_path(root, result_rel, ppath, + result_rel->reltarget, NULL, NULL); + if (!op->all) + ppath = make_union_unique(op, ppath, tlist, root); + add_path(result_rel, ppath); + } + /* Undo effects of possibly forcing tuple_fraction to 0 */ root->tuple_fraction = save_fraction; -- 2.40.0