From: Robert Haas Date: Tue, 14 Feb 2017 23:09:47 +0000 (-0500) Subject: Allow parallel workers to execute subplans. X-Git-Tag: REL_10_BETA1~900 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=5e6d8d2bbbcace304450b309e79366c0da4063e4;p=postgresql Allow parallel workers to execute subplans. This doesn't do anything to make Param nodes anything other than parallel-restricted, so this only helps with uncorrelated subplans, and it's not necessarily very cheap because each worker will run the subplan separately (just as a Hash Join will build a separate copy of the hash table in each participating process), but it's a first step toward supporting cases that are more likely to help in practice, and is occasionally useful on its own. Amit Kapila, reviewed and tested by Rafia Sabih, Dilip Kumar, and me. Discussion: http://postgr.es/m/CAA4eK1+e8Z45D2n+rnDMDYsVEb5iW7jqaCH_tvPMYau=1Rru9w@mail.gmail.com --- diff --git a/src/backend/executor/execParallel.c b/src/backend/executor/execParallel.c index fe87c9ae71..784dbaf590 100644 --- a/src/backend/executor/execParallel.c +++ b/src/backend/executor/execParallel.c @@ -156,7 +156,7 @@ ExecSerializePlan(Plan *plan, EState *estate) pstmt->planTree = plan; pstmt->rtable = estate->es_range_table; pstmt->resultRelations = NIL; - pstmt->subplans = NIL; + pstmt->subplans = estate->es_plannedstmt->subplans; pstmt->rewindPlanIDs = NULL; pstmt->rowMarks = NIL; pstmt->relationOids = NIL; diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 30d733e57a..12324ab63f 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -1495,6 +1495,7 @@ _copySubPlan(const SubPlan *from) COPY_SCALAR_FIELD(firstColCollation); COPY_SCALAR_FIELD(useHashTable); COPY_SCALAR_FIELD(unknownEqFalse); + COPY_SCALAR_FIELD(parallel_safe); COPY_NODE_FIELD(setParam); COPY_NODE_FIELD(parParam); COPY_NODE_FIELD(args); diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index 55c73b7292..6d1dabe17e 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -423,6 +423,7 @@ _equalSubPlan(const SubPlan *a, const SubPlan *b) COMPARE_SCALAR_FIELD(firstColCollation); COMPARE_SCALAR_FIELD(useHashTable); COMPARE_SCALAR_FIELD(unknownEqFalse); + COMPARE_SCALAR_FIELD(parallel_safe); COMPARE_NODE_FIELD(setParam); COMPARE_NODE_FIELD(parParam); COMPARE_NODE_FIELD(args); diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 1560ac3989..b3802b4428 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -1226,6 +1226,7 @@ _outSubPlan(StringInfo str, const SubPlan *node) WRITE_OID_FIELD(firstColCollation); WRITE_BOOL_FIELD(useHashTable); WRITE_BOOL_FIELD(unknownEqFalse); + WRITE_BOOL_FIELD(parallel_safe); WRITE_NODE_FIELD(setParam); WRITE_NODE_FIELD(parParam); WRITE_NODE_FIELD(args); diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index dcfa6ee28d..d2f69fe70b 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -2233,6 +2233,7 @@ _readSubPlan(void) READ_OID_FIELD(firstColCollation); READ_BOOL_FIELD(useHashTable); READ_BOOL_FIELD(unknownEqFalse); + READ_BOOL_FIELD(parallel_safe); READ_NODE_FIELD(setParam); READ_NODE_FIELD(parParam); READ_NODE_FIELD(args); diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index 9fc748973e..7954c445dd 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -58,7 +58,7 @@ static Node *build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot, List *plan_params, SubLinkType subLinkType, int subLinkId, Node *testexpr, bool adjust_testexpr, - bool unknownEqFalse); + bool unknownEqFalse, bool parallel_safe); static List *generate_subquery_params(PlannerInfo *root, List *tlist, List **paramIds); static List *generate_subquery_vars(PlannerInfo *root, List *tlist, @@ -551,7 +551,8 @@ make_subplan(PlannerInfo *root, Query *orig_subquery, /* And convert to SubPlan or InitPlan format. */ result = build_subplan(root, plan, subroot, plan_params, subLinkType, subLinkId, - testexpr, true, isTopQual); + testexpr, true, isTopQual, + best_path->parallel_safe); /* * If it's a correlated EXISTS with an unimportant targetlist, we might be @@ -604,7 +605,8 @@ make_subplan(PlannerInfo *root, Query *orig_subquery, plan_params, ANY_SUBLINK, 0, newtestexpr, - false, true); + false, true, + best_path->parallel_safe); /* Check we got what we expected */ Assert(IsA(hashplan, SubPlan)); Assert(hashplan->parParam == NIL); @@ -634,7 +636,7 @@ build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot, List *plan_params, SubLinkType subLinkType, int subLinkId, Node *testexpr, bool adjust_testexpr, - bool unknownEqFalse) + bool unknownEqFalse, bool parallel_safe) { Node *result; SubPlan *splan; @@ -653,6 +655,7 @@ build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot, &splan->firstColCollation); splan->useHashTable = false; splan->unknownEqFalse = unknownEqFalse; + splan->parallel_safe = parallel_safe; splan->setParam = NIL; splan->parParam = NIL; splan->args = NIL; @@ -1213,6 +1216,12 @@ SS_process_ctes(PlannerInfo *root) &splan->firstColCollation); splan->useHashTable = false; splan->unknownEqFalse = false; + + /* + * CTE scans are not considered for parallelism (cf + * set_rel_consider_parallel). + */ + splan->parallel_safe = false; splan->setParam = NIL; splan->parParam = NIL; splan->args = NIL; diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index d589dc2544..3dedee6d69 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -1162,21 +1162,19 @@ max_parallel_hazard_walker(Node *node, max_parallel_hazard_context *context) } /* - * Since we don't have the ability to push subplans down to workers at - * present, we treat subplan references as parallel-restricted. We need - * not worry about examining their contents; if they are unsafe, we would - * have found that out while examining the whole tree before reduction of - * sublinks to subplans. (Really we should not see SubLink during a - * max_interesting == restricted scan, but if we do, return true.) + * Really we should not see SubLink during a max_interesting == restricted + * scan, but if we do, return true. */ - else if (IsA(node, SubLink) || - IsA(node, SubPlan) || - IsA(node, AlternativeSubPlan)) + else if (IsA(node, SubLink)) { if (max_parallel_hazard_test(PROPARALLEL_RESTRICTED, context)) return true; } + /* We can push the subplans only if they are parallel-safe. */ + else if (IsA(node, SubPlan)) + return !((SubPlan *) node)->parallel_safe; + /* * We can't pass Params to workers at the moment either, so they are also * parallel-restricted. diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h index f72ec247ff..235bc75096 100644 --- a/src/include/nodes/primnodes.h +++ b/src/include/nodes/primnodes.h @@ -677,6 +677,7 @@ typedef struct SubPlan bool unknownEqFalse; /* TRUE if it's okay to return FALSE when the * spec result is UNKNOWN; this allows much * simpler handling of null values */ + bool parallel_safe; /* OK to use as part of parallel plan? */ /* Information for passing params into and out of the subselect: */ /* setParam and parParam are lists of integers (param IDs) */ List *setParam; /* initplan subqueries have to set these diff --git a/src/test/regress/expected/select_parallel.out b/src/test/regress/expected/select_parallel.out index 18e21b7f13..8786678f0c 100644 --- a/src/test/regress/expected/select_parallel.out +++ b/src/test/regress/expected/select_parallel.out @@ -99,6 +99,32 @@ explain (costs off) -> Index Only Scan using tenk1_unique1 on tenk1 (3 rows) +-- test parallel plans for queries containing un-correlated subplans. +alter table tenk2 set (parallel_workers = 0); +explain (costs off) + select count(*) from tenk1 where (two, four) not in + (select hundred, thousand from tenk2 where thousand > 100); + QUERY PLAN +------------------------------------------------------ + Finalize Aggregate + -> Gather + Workers Planned: 4 + -> Partial Aggregate + -> Parallel Seq Scan on tenk1 + Filter: (NOT (hashed SubPlan 1)) + SubPlan 1 + -> Seq Scan on tenk2 + Filter: (thousand > 100) +(9 rows) + +select count(*) from tenk1 where (two, four) not in + (select hundred, thousand from tenk2 where thousand > 100); + count +------- + 10000 +(1 row) + +alter table tenk2 reset (parallel_workers); set force_parallel_mode=1; explain (costs off) select stringu1::int2 from tenk1 where unique1 = 1; diff --git a/src/test/regress/sql/select_parallel.sql b/src/test/regress/sql/select_parallel.sql index 8b4090f2ec..def9939d2e 100644 --- a/src/test/regress/sql/select_parallel.sql +++ b/src/test/regress/sql/select_parallel.sql @@ -39,6 +39,15 @@ explain (costs off) select sum(parallel_restricted(unique1)) from tenk1 group by(parallel_restricted(unique1)); +-- test parallel plans for queries containing un-correlated subplans. +alter table tenk2 set (parallel_workers = 0); +explain (costs off) + select count(*) from tenk1 where (two, four) not in + (select hundred, thousand from tenk2 where thousand > 100); +select count(*) from tenk1 where (two, four) not in + (select hundred, thousand from tenk2 where thousand > 100); +alter table tenk2 reset (parallel_workers); + set force_parallel_mode=1; explain (costs off)