From da1c91631e3577ea5818f855ebb5bd206d559006 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 19 Aug 2016 14:03:07 -0400 Subject: [PATCH] Speed up planner's scanning for parallel-query hazards. We need to scan the whole parse tree for parallel-unsafe functions. If there are none, we'll later need to determine whether particular subtrees contain any parallel-restricted functions. The previous coding retained no knowledge from the first scan, even though this is very wasteful in the common case where the query contains only parallel-safe functions. We can bypass all of the later scans by remembering that fact. This provides a small but measurable speed improvement when the case applies, and shouldn't cost anything when it doesn't. Patch by me, reviewed by Robert Haas Discussion: <3740.1471538387@sss.pgh.pa.us> --- src/backend/nodes/outfuncs.c | 1 + src/backend/optimizer/path/allpaths.c | 30 +------ src/backend/optimizer/plan/planmain.c | 9 +- src/backend/optimizer/plan/planner.c | 43 ++++++---- src/backend/optimizer/util/clauses.c | 115 +++++++++++++++++++------- src/backend/optimizer/util/pathnode.c | 6 +- src/backend/optimizer/util/relnode.c | 4 +- src/include/nodes/relation.h | 2 + src/include/optimizer/clauses.h | 3 +- 9 files changed, 133 insertions(+), 80 deletions(-) diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 1fab807772..50019f4164 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -2029,6 +2029,7 @@ _outPlannerGlobal(StringInfo str, const PlannerGlobal *node) WRITE_BOOL_FIELD(dependsOnRole); WRITE_BOOL_FIELD(parallelModeOK); WRITE_BOOL_FIELD(parallelModeNeeded); + WRITE_CHAR_FIELD(maxParallelHazard); } static void diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 88d833a2e8..af73792227 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -78,7 +78,6 @@ static void set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel, static void create_plain_partial_paths(PlannerInfo *root, RelOptInfo *rel); static void set_rel_consider_parallel(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte); -static bool function_rte_parallel_ok(RangeTblEntry *rte); static void set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte); static void set_tablesample_rel_size(PlannerInfo *root, RelOptInfo *rel, @@ -542,8 +541,7 @@ set_rel_consider_parallel(PlannerInfo *root, RelOptInfo *rel, if (proparallel != PROPARALLEL_SAFE) return; - if (has_parallel_hazard((Node *) rte->tablesample->args, - false)) + if (!is_parallel_safe(root, (Node *) rte->tablesample->args)) return; } @@ -596,7 +594,7 @@ set_rel_consider_parallel(PlannerInfo *root, RelOptInfo *rel, case RTE_FUNCTION: /* Check for parallel-restricted functions. */ - if (!function_rte_parallel_ok(rte)) + if (!is_parallel_safe(root, (Node *) rte->functions)) return; break; @@ -629,40 +627,20 @@ set_rel_consider_parallel(PlannerInfo *root, RelOptInfo *rel, * outer join clauses work correctly. It would likely break equivalence * classes, too. */ - if (has_parallel_hazard((Node *) rel->baserestrictinfo, false)) + if (!is_parallel_safe(root, (Node *) rel->baserestrictinfo)) return; /* * Likewise, if the relation's outputs are not parallel-safe, give up. * (Usually, they're just Vars, but sometimes they're not.) */ - if (has_parallel_hazard((Node *) rel->reltarget->exprs, false)) + if (!is_parallel_safe(root, (Node *) rel->reltarget->exprs)) return; /* We have a winner. */ rel->consider_parallel = true; } -/* - * Check whether a function RTE is scanning something parallel-restricted. - */ -static bool -function_rte_parallel_ok(RangeTblEntry *rte) -{ - ListCell *lc; - - foreach(lc, rte->functions) - { - RangeTblFunction *rtfunc = (RangeTblFunction *) lfirst(lc); - - Assert(IsA(rtfunc, RangeTblFunction)); - if (has_parallel_hazard(rtfunc->funcexpr, false)) - return false; - } - - return true; -} - /* * set_plain_rel_pathlist * Build access paths for a plain relation (no subquery, no inheritance) diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c index 27234ffa22..e7ae7ae8ae 100644 --- a/src/backend/optimizer/plan/planmain.c +++ b/src/backend/optimizer/plan/planmain.c @@ -71,14 +71,13 @@ query_planner(PlannerInfo *root, List *tlist, /* * If query allows parallelism in general, check whether the quals are - * parallel-restricted. There's currently no real benefit to setting - * this flag correctly because we can't yet reference subplans from - * parallel workers. But that might change someday, so set this - * correctly anyway. + * parallel-restricted. (We need not check final_rel->reltarget + * because it's empty at this point. Anything parallel-restricted in + * the query tlist will be dealt with later.) */ if (root->glob->parallelModeOK) final_rel->consider_parallel = - !has_parallel_hazard(parse->jointree->quals, false); + is_parallel_safe(root, parse->jointree->quals); /* The only path for it is a trivial Result path */ add_path(final_rel, (Path *) diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index b265628325..174210be6c 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -23,6 +23,7 @@ #include "access/sysattr.h" #include "access/xact.h" #include "catalog/pg_constraint_fn.h" +#include "catalog/pg_proc.h" #include "catalog/pg_type.h" #include "executor/executor.h" #include "executor/nodeAgg.h" @@ -241,12 +242,26 @@ standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams) * time and execution time, so don't generate a parallel plan if we're in * serializable mode. */ - glob->parallelModeOK = (cursorOptions & CURSOR_OPT_PARALLEL_OK) != 0 && - IsUnderPostmaster && dynamic_shared_memory_type != DSM_IMPL_NONE && - parse->commandType == CMD_SELECT && !parse->hasModifyingCTE && - parse->utilityStmt == NULL && max_parallel_workers_per_gather > 0 && - !IsParallelWorker() && !IsolationIsSerializable() && - !has_parallel_hazard((Node *) parse, true); + if ((cursorOptions & CURSOR_OPT_PARALLEL_OK) != 0 && + IsUnderPostmaster && + dynamic_shared_memory_type != DSM_IMPL_NONE && + parse->commandType == CMD_SELECT && + parse->utilityStmt == NULL && + !parse->hasModifyingCTE && + max_parallel_workers_per_gather > 0 && + !IsParallelWorker() && + !IsolationIsSerializable()) + { + /* all the cheap tests pass, so scan the query tree */ + glob->maxParallelHazard = max_parallel_hazard(parse); + glob->parallelModeOK = (glob->maxParallelHazard != PROPARALLEL_UNSAFE); + } + else + { + /* skip the query tree scan, just assume it's unsafe */ + glob->maxParallelHazard = PROPARALLEL_UNSAFE; + glob->parallelModeOK = false; + } /* * glob->parallelModeNeeded should tell us whether it's necessary to @@ -1802,7 +1817,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, * computed by partial paths. */ if (current_rel->partial_pathlist && - !has_parallel_hazard((Node *) scanjoin_target->exprs, false)) + is_parallel_safe(root, (Node *) scanjoin_target->exprs)) { /* Apply the scan/join target to each partial path */ foreach(lc, current_rel->partial_pathlist) @@ -1948,8 +1963,8 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, * query. */ if (current_rel->consider_parallel && - !has_parallel_hazard(parse->limitOffset, false) && - !has_parallel_hazard(parse->limitCount, false)) + is_parallel_safe(root, parse->limitOffset) && + is_parallel_safe(root, parse->limitCount)) final_rel->consider_parallel = true; /* @@ -3326,8 +3341,8 @@ create_grouping_paths(PlannerInfo *root, * target list and HAVING quals are parallel-safe. */ if (input_rel->consider_parallel && - !has_parallel_hazard((Node *) target->exprs, false) && - !has_parallel_hazard((Node *) parse->havingQual, false)) + is_parallel_safe(root, (Node *) target->exprs) && + is_parallel_safe(root, (Node *) parse->havingQual)) grouped_rel->consider_parallel = true; /* @@ -3881,8 +3896,8 @@ create_window_paths(PlannerInfo *root, * target list and active windows for non-parallel-safe constructs. */ if (input_rel->consider_parallel && - !has_parallel_hazard((Node *) output_target->exprs, false) && - !has_parallel_hazard((Node *) activeWindows, false)) + is_parallel_safe(root, (Node *) output_target->exprs) && + is_parallel_safe(root, (Node *) activeWindows)) window_rel->consider_parallel = true; /* @@ -4272,7 +4287,7 @@ create_ordered_paths(PlannerInfo *root, * target list is parallel-safe. */ if (input_rel->consider_parallel && - !has_parallel_hazard((Node *) target->exprs, false)) + is_parallel_safe(root, (Node *) target->exprs)) ordered_rel->consider_parallel = true; /* diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index a40ad40606..4496fde056 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -91,8 +91,9 @@ typedef struct typedef struct { - bool allow_restricted; -} has_parallel_hazard_arg; + char max_hazard; /* worst proparallel hazard found so far */ + char max_interesting; /* worst proparallel hazard of interest */ +} max_parallel_hazard_context; static bool contain_agg_clause_walker(Node *node, void *context); static bool get_agg_clause_costs_walker(Node *node, @@ -103,8 +104,8 @@ static bool contain_subplans_walker(Node *node, void *context); static bool contain_mutable_functions_walker(Node *node, void *context); static bool contain_volatile_functions_walker(Node *node, void *context); static bool contain_volatile_functions_not_nextval_walker(Node *node, void *context); -static bool has_parallel_hazard_walker(Node *node, - has_parallel_hazard_arg *context); +static bool max_parallel_hazard_walker(Node *node, + max_parallel_hazard_context *context); static bool contain_nonstrict_functions_walker(Node *node, void *context); static bool contain_context_dependent_node(Node *clause); static bool contain_context_dependent_node_walker(Node *node, int *flags); @@ -1100,46 +1101,98 @@ contain_volatile_functions_not_nextval_walker(Node *node, void *context) context); } + /***************************************************************************** * Check queries for parallel unsafe and/or restricted constructs *****************************************************************************/ /* - * Check whether a node tree contains parallel hazards. This is used both on - * the entire query tree, to see whether the query can be parallelized at all - * (with allow_restricted = true), and also to evaluate whether a particular - * expression is safe to run within a parallel worker (with allow_restricted = - * false). We could separate these concerns into two different functions, but - * there's enough overlap that it doesn't seem worthwhile. + * max_parallel_hazard + * Find the worst parallel-hazard level in the given query + * + * Returns the worst function hazard property (the earliest in this list: + * PROPARALLEL_UNSAFE, PROPARALLEL_RESTRICTED, PROPARALLEL_SAFE) that can + * be found in the given parsetree. We use this to find out whether the query + * can be parallelized at all. The caller will also save the result in + * PlannerGlobal so as to short-circuit checks of portions of the querytree + * later, in the common case where everything is SAFE. + */ +char +max_parallel_hazard(Query *parse) +{ + max_parallel_hazard_context context; + + context.max_hazard = PROPARALLEL_SAFE; + context.max_interesting = PROPARALLEL_UNSAFE; + (void) max_parallel_hazard_walker((Node *) parse, &context); + return context.max_hazard; +} + +/* + * is_parallel_safe + * Detect whether the given expr contains only parallel-safe functions + * + * root->glob->maxParallelHazard must previously have been set to the + * result of max_parallel_hazard() on the whole query. */ bool -has_parallel_hazard(Node *node, bool allow_restricted) +is_parallel_safe(PlannerInfo *root, Node *node) { - has_parallel_hazard_arg context; + max_parallel_hazard_context context; - context.allow_restricted = allow_restricted; - return has_parallel_hazard_walker(node, &context); + /* If max_parallel_hazard found nothing unsafe, we don't need to look */ + if (root->glob->maxParallelHazard == PROPARALLEL_SAFE) + return true; + /* Else use max_parallel_hazard's search logic, but stop on RESTRICTED */ + context.max_hazard = PROPARALLEL_SAFE; + context.max_interesting = PROPARALLEL_RESTRICTED; + return !max_parallel_hazard_walker(node, &context); } +/* core logic for all parallel-hazard checks */ static bool -has_parallel_hazard_checker(Oid func_id, void *context) +max_parallel_hazard_test(char proparallel, max_parallel_hazard_context *context) { - char proparallel = func_parallel(func_id); + switch (proparallel) + { + case PROPARALLEL_SAFE: + /* nothing to see here, move along */ + break; + case PROPARALLEL_RESTRICTED: + /* increase max_hazard to RESTRICTED */ + Assert(context->max_hazard != PROPARALLEL_UNSAFE); + context->max_hazard = proparallel; + /* done if we are not expecting any unsafe functions */ + if (context->max_interesting == proparallel) + return true; + break; + case PROPARALLEL_UNSAFE: + context->max_hazard = proparallel; + /* we're always done at the first unsafe construct */ + return true; + default: + elog(ERROR, "unrecognized proparallel value \"%c\"", proparallel); + break; + } + return false; +} - if (((has_parallel_hazard_arg *) context)->allow_restricted) - return (proparallel == PROPARALLEL_UNSAFE); - else - return (proparallel != PROPARALLEL_SAFE); +/* check_functions_in_node callback */ +static bool +max_parallel_hazard_checker(Oid func_id, void *context) +{ + return max_parallel_hazard_test(func_parallel(func_id), + (max_parallel_hazard_context *) context); } static bool -has_parallel_hazard_walker(Node *node, has_parallel_hazard_arg *context) +max_parallel_hazard_walker(Node *node, max_parallel_hazard_context *context) { if (node == NULL) return false; /* Check for hazardous functions in node itself */ - if (check_functions_in_node(node, has_parallel_hazard_checker, + if (check_functions_in_node(node, max_parallel_hazard_checker, context)) return true; @@ -1156,7 +1209,7 @@ has_parallel_hazard_walker(Node *node, has_parallel_hazard_arg *context) */ if (IsA(node, CoerceToDomain)) { - if (!context->allow_restricted) + if (max_parallel_hazard_test(PROPARALLEL_RESTRICTED, context)) return true; } @@ -1167,7 +1220,7 @@ has_parallel_hazard_walker(Node *node, has_parallel_hazard_arg *context) { RestrictInfo *rinfo = (RestrictInfo *) node; - return has_parallel_hazard_walker((Node *) rinfo->clause, context); + return max_parallel_hazard_walker((Node *) rinfo->clause, context); } /* @@ -1176,13 +1229,13 @@ has_parallel_hazard_walker(Node *node, has_parallel_hazard_arg *context) * not worry about examining their contents; if they are unsafe, we would * have found that out while examining the whole tree before reduction of * sublinks to subplans. (Really we should not see SubLink during a - * not-allow_restricted scan, but if we do, return true.) + * max_interesting == restricted scan, but if we do, return true.) */ else if (IsA(node, SubLink) || IsA(node, SubPlan) || IsA(node, AlternativeSubPlan)) { - if (!context->allow_restricted) + if (max_parallel_hazard_test(PROPARALLEL_RESTRICTED, context)) return true; } @@ -1192,7 +1245,7 @@ has_parallel_hazard_walker(Node *node, has_parallel_hazard_arg *context) */ else if (IsA(node, Param)) { - if (!context->allow_restricted) + if (max_parallel_hazard_test(PROPARALLEL_RESTRICTED, context)) return true; } @@ -1207,20 +1260,24 @@ has_parallel_hazard_walker(Node *node, has_parallel_hazard_arg *context) /* SELECT FOR UPDATE/SHARE must be treated as unsafe */ if (query->rowMarks != NULL) + { + context->max_hazard = PROPARALLEL_UNSAFE; return true; + } /* Recurse into subselects */ return query_tree_walker(query, - has_parallel_hazard_walker, + max_parallel_hazard_walker, context, 0); } /* Recurse to check arguments */ return expression_tree_walker(node, - has_parallel_hazard_walker, + max_parallel_hazard_walker, context); } + /***************************************************************************** * Check clauses for nonstrict functions *****************************************************************************/ diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index ce7ad545a9..abb7507d8e 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -2178,7 +2178,7 @@ create_projection_path(PlannerInfo *root, pathnode->path.parallel_aware = false; pathnode->path.parallel_safe = rel->consider_parallel && subpath->parallel_safe && - !has_parallel_hazard((Node *) target->exprs, false); + is_parallel_safe(root, (Node *) target->exprs); pathnode->path.parallel_workers = subpath->parallel_workers; /* Projection does not change the sort order */ pathnode->path.pathkeys = subpath->pathkeys; @@ -2285,7 +2285,7 @@ apply_projection_to_path(PlannerInfo *root, * target expressions, then we can't. */ if (IsA(path, GatherPath) && - !has_parallel_hazard((Node *) target->exprs, false)) + is_parallel_safe(root, (Node *) target->exprs)) { GatherPath *gpath = (GatherPath *) path; @@ -2306,7 +2306,7 @@ apply_projection_to_path(PlannerInfo *root, target); } else if (path->parallel_safe && - has_parallel_hazard((Node *) target->exprs, false)) + !is_parallel_safe(root, (Node *) target->exprs)) { /* * We're inserting a parallel-restricted target list into a path diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index 806600ed10..deef5605b7 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -513,8 +513,8 @@ build_join_rel(PlannerInfo *root, * here. */ if (inner_rel->consider_parallel && outer_rel->consider_parallel && - !has_parallel_hazard((Node *) restrictlist, false) && - !has_parallel_hazard((Node *) joinrel->reltarget->exprs, false)) + is_parallel_safe(root, (Node *) restrictlist) && + is_parallel_safe(root, (Node *) joinrel->reltarget->exprs)) joinrel->consider_parallel = true; /* diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 2be8908445..fcfb0d4d0f 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -126,6 +126,8 @@ typedef struct PlannerGlobal bool parallelModeOK; /* parallel mode potentially OK? */ bool parallelModeNeeded; /* parallel mode actually required? */ + + char maxParallelHazard; /* worst PROPARALLEL hazard level */ } PlannerGlobal; /* macro for fetching the Plan associated with a SubPlan node */ diff --git a/src/include/optimizer/clauses.h b/src/include/optimizer/clauses.h index be7c639f7b..9abef37cb6 100644 --- a/src/include/optimizer/clauses.h +++ b/src/include/optimizer/clauses.h @@ -61,7 +61,8 @@ extern bool contain_subplans(Node *clause); extern bool contain_mutable_functions(Node *clause); extern bool contain_volatile_functions(Node *clause); extern bool contain_volatile_functions_not_nextval(Node *clause); -extern bool has_parallel_hazard(Node *node, bool allow_restricted); +extern char max_parallel_hazard(Query *parse); +extern bool is_parallel_safe(PlannerInfo *root, Node *node); extern bool contain_nonstrict_functions(Node *clause); extern bool contain_leaked_vars(Node *clause); -- 2.40.0