From 5220bb7533f9891b1e071da6461d5c387e8f7b09 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Thu, 19 Jul 2018 13:49:43 +0300 Subject: [PATCH] Expand run-time partition pruning to work with MergeAppend This expands the support for the run-time partition pruning which was added for Append in 499be013de to also allow unneeded subnodes of a MergeAppend to be removed. Author: David Rowley Discussion: https://www.postgresql.org/message-id/CAKJS1f_F_V8D7Wu-HVdnH7zCUxhoGK8XhLLtd%3DCu85qDZzXrgg%40mail.gmail.com --- doc/src/sgml/perform.sgml | 12 +- src/backend/executor/nodeAppend.c | 2 +- src/backend/executor/nodeMergeAppend.c | 137 +++++++++++++++--- src/backend/nodes/copyfuncs.c | 1 + src/backend/nodes/outfuncs.c | 2 + src/backend/nodes/readfuncs.c | 1 + src/backend/optimizer/plan/createplan.c | 46 +++++- src/include/nodes/execnodes.h | 9 ++ src/include/nodes/plannodes.h | 3 + src/test/regress/expected/partition_prune.out | 137 ++++++++++++++++++ src/test/regress/sql/partition_prune.sql | 41 ++++++ 11 files changed, 356 insertions(+), 35 deletions(-) diff --git a/doc/src/sgml/perform.sgml b/doc/src/sgml/perform.sgml index b50357c3b4..262f30fd4c 100644 --- a/doc/src/sgml/perform.sgml +++ b/doc/src/sgml/perform.sgml @@ -899,12 +899,12 @@ EXPLAIN ANALYZE SELECT * FROM tenk1 WHERE unique1 < 100 AND unique2 > 9000 Generally, the EXPLAIN output will display details for every plan node which was generated by the query planner. However, there are cases where the executor is able to determine that certain nodes are - not required; currently, the only node type to support this is the - Append node. This node type has the ability to discard - subnodes which it is able to determine won't contain any records required - by the query. It is possible to determine that nodes have been removed in - this way by the presence of a "Subplans Removed" property in the - EXPLAIN output. + not required; currently, the only node types to support this are the + Append and MergeAppend nodes. These + node types have the ability to discard subnodes which they are able to + determine won't contain any records required by the query. It is possible + to determine that nodes have been removed in this way by the presence of a + "Subplans Removed" property in the EXPLAIN output. diff --git a/src/backend/executor/nodeAppend.c b/src/backend/executor/nodeAppend.c index 5ce4fb43e1..e7188b2d31 100644 --- a/src/backend/executor/nodeAppend.c +++ b/src/backend/executor/nodeAppend.c @@ -151,7 +151,7 @@ ExecInitAppend(Append *node, EState *estate, int eflags) /* * The case where no subplans survive pruning must be handled * specially. The problem here is that code in explain.c requires - * an Append to have at least one subplan in order for it to + * a MergeAppend to have at least one subplan in order for it to * properly determine the Vars in that subplan's targetlist. We * sidestep this issue by just initializing the first subplan and * setting as_whichplan to NO_MATCHING_SUBPLANS to indicate that diff --git a/src/backend/executor/nodeMergeAppend.c b/src/backend/executor/nodeMergeAppend.c index 118f4ef07d..ec8a49c3a8 100644 --- a/src/backend/executor/nodeMergeAppend.c +++ b/src/backend/executor/nodeMergeAppend.c @@ -39,6 +39,7 @@ #include "postgres.h" #include "executor/execdebug.h" +#include "executor/execPartition.h" #include "executor/nodeMergeAppend.h" #include "lib/binaryheap.h" #include "miscadmin.h" @@ -65,8 +66,10 @@ ExecInitMergeAppend(MergeAppend *node, EState *estate, int eflags) { MergeAppendState *mergestate = makeNode(MergeAppendState); PlanState **mergeplanstates; + Bitmapset *validsubplans; int nplans; - int i; + int i, + j; ListCell *lc; /* check for unsupported flags */ @@ -78,19 +81,80 @@ ExecInitMergeAppend(MergeAppend *node, EState *estate, int eflags) */ ExecLockNonLeafAppendTables(node->partitioned_rels, estate); - /* - * Set up empty vector of subplan states - */ - nplans = list_length(node->mergeplans); - - mergeplanstates = (PlanState **) palloc0(nplans * sizeof(PlanState *)); - /* * create new MergeAppendState for our node */ mergestate->ps.plan = (Plan *) node; mergestate->ps.state = estate; mergestate->ps.ExecProcNode = ExecMergeAppend; + mergestate->ms_noopscan = false; + + /* If run-time partition pruning is enabled, then set that up now */ + if (node->part_prune_infos != NIL) + { + PartitionPruneState *prunestate; + + /* We may need an expression context to evaluate partition exprs */ + ExecAssignExprContext(estate, &mergestate->ps); + + prunestate = ExecCreatePartitionPruneState(&mergestate->ps, + node->part_prune_infos); + mergestate->ms_prune_state = prunestate; + + /* Perform an initial partition prune, if required. */ + if (prunestate->do_initial_prune) + { + /* Determine which subplans survive initial pruning */ + validsubplans = ExecFindInitialMatchingSubPlans(prunestate, + list_length(node->mergeplans)); + + /* + * The case where no subplans survive pruning must be handled + * specially. The problem here is that code in explain.c requires + * an Append to have at least one subplan in order for it to + * properly determine the Vars in that subplan's targetlist. We + * sidestep this issue by just initializing the first subplan and + * setting ms_noopscan to true to indicate that we don't really + * need to scan any subnodes. + */ + if (bms_is_empty(validsubplans)) + { + mergestate->ms_noopscan = true; + + /* Mark the first as valid so that it's initialized below */ + validsubplans = bms_make_singleton(0); + } + + nplans = bms_num_members(validsubplans); + } + else + { + /* We'll need to initialize all subplans */ + nplans = list_length(node->mergeplans); + validsubplans = bms_add_range(NULL, 0, nplans - 1); + } + + /* + * If no runtime pruning is required, we can fill ms_valid_subplans + * immediately, preventing later calls to ExecFindMatchingSubPlans. + */ + if (!prunestate->do_exec_prune) + mergestate->ms_valid_subplans = bms_add_range(NULL, 0, nplans - 1); + } + else + { + nplans = list_length(node->mergeplans); + + /* + * When run-time partition pruning is not enabled we can just mark all + * subplans as valid; they must also all be initialized. + */ + mergestate->ms_valid_subplans = validsubplans = + bms_add_range(NULL, 0, nplans - 1); + mergestate->ms_prune_state = NULL; + } + + mergeplanstates = (PlanState **) palloc(nplans * sizeof(PlanState *)); mergestate->mergeplans = mergeplanstates; mergestate->ms_nplans = nplans; @@ -101,26 +165,24 @@ ExecInitMergeAppend(MergeAppend *node, EState *estate, int eflags) /* * Miscellaneous initialization * - * MergeAppend plans don't have expression contexts because they never - * call ExecQual or ExecProject. - */ - - /* * MergeAppend nodes do have Result slots, which hold pointers to tuples, * so we have to initialize them. */ ExecInitResultTupleSlotTL(estate, &mergestate->ps); /* - * call ExecInitNode on each of the plans to be executed and save the - * results into the array "mergeplans". + * call ExecInitNode on each of the valid plans to be executed and save + * the results into the mergeplanstates array. */ - i = 0; + j = i = 0; foreach(lc, node->mergeplans) { - Plan *initNode = (Plan *) lfirst(lc); + if (bms_is_member(i, validsubplans)) + { + Plan *initNode = (Plan *) lfirst(lc); - mergeplanstates[i] = ExecInitNode(initNode, estate, eflags); + mergeplanstates[j++] = ExecInitNode(initNode, estate, eflags); + } i++; } @@ -178,11 +240,25 @@ ExecMergeAppend(PlanState *pstate) if (!node->ms_initialized) { + /* Nothing to do if all subplans were pruned */ + if (node->ms_noopscan) + return ExecClearTuple(node->ps.ps_ResultTupleSlot); + /* - * First time through: pull the first tuple from each subplan, and set - * up the heap. + * If we've yet to determine the valid subplans then do so now. If + * run-time pruning is disabled then the valid subplans will always be + * set to all subplans. */ - for (i = 0; i < node->ms_nplans; i++) + if (node->ms_valid_subplans == NULL) + node->ms_valid_subplans = + ExecFindMatchingSubPlans(node->ms_prune_state); + + /* + * First time through: pull the first tuple from each valid subplan, + * and set up the heap. + */ + i = -1; + while ((i = bms_next_member(node->ms_valid_subplans, i)) >= 0) { node->ms_slots[i] = ExecProcNode(node->mergeplans[i]); if (!TupIsNull(node->ms_slots[i])) @@ -288,6 +364,12 @@ ExecEndMergeAppend(MergeAppendState *node) */ for (i = 0; i < nplans; i++) ExecEndNode(mergeplans[i]); + + /* + * release any resources associated with run-time pruning + */ + if (node->ms_prune_state) + ExecDestroyPartitionPruneState(node->ms_prune_state); } void @@ -295,6 +377,19 @@ ExecReScanMergeAppend(MergeAppendState *node) { int i; + /* + * If any PARAM_EXEC Params used in pruning expressions have changed, then + * we'd better unset the valid subplans so that they are reselected for + * the new parameter values. + */ + if (node->ms_prune_state && + bms_overlap(node->ps.chgParam, + node->ms_prune_state->execparamids)) + { + bms_free(node->ms_valid_subplans); + node->ms_valid_subplans = NULL; + } + for (i = 0; i < node->ms_nplans; i++) { PlanState *subnode = node->mergeplans[i]; diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 1c12075b01..96836ef19c 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -273,6 +273,7 @@ _copyMergeAppend(const MergeAppend *from) COPY_POINTER_FIELD(sortOperators, from->numCols * sizeof(Oid)); COPY_POINTER_FIELD(collations, from->numCols * sizeof(Oid)); COPY_POINTER_FIELD(nullsFirst, from->numCols * sizeof(bool)); + COPY_NODE_FIELD(part_prune_infos); return newnode; } diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index a88c0aecd0..a6454ce28b 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -434,6 +434,8 @@ _outMergeAppend(StringInfo str, const MergeAppend *node) appendStringInfoString(str, " :nullsFirst"); for (i = 0; i < node->numCols; i++) appendStringInfo(str, " %s", booltostr(node->nullsFirst[i])); + + WRITE_NODE_FIELD(part_prune_infos); } static void diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index 42aff7f57a..9a01eb6b63 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -1634,6 +1634,7 @@ _readMergeAppend(void) READ_OID_ARRAY(sortOperators, local_node->numCols); READ_OID_ARRAY(collations, local_node->numCols); READ_BOOL_ARRAY(nullsFirst, local_node->numCols); + READ_NODE_FIELD(part_prune_infos); READ_DONE(); } diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 165a9e9b8e..0a0bec3bfc 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -1068,6 +1068,11 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path) subplans = lappend(subplans, subplan); } + /* + * If any quals exist, they may be useful to perform further partition + * pruning during execution. Gather information needed by the executor + * to do partition pruning. + */ if (enable_partition_pruning && rel->reloptkind == RELOPT_BASEREL && best_path->partitioned_rels != NIL) @@ -1078,7 +1083,6 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path) if (best_path->path.param_info) { - List *prmquals = best_path->path.param_info->ppi_clauses; prmquals = extract_actual_clauses(prmquals, false); @@ -1088,12 +1092,6 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path) prunequal = list_concat(prunequal, prmquals); } - /* - * If any quals exist, they may be useful to perform further partition - * pruning during execution. Generate a PartitionPruneInfo for each - * partitioned rel to store these quals and allow translation of - * partition indexes into subpath indexes. - */ if (prunequal != NIL) partpruneinfos = make_partition_pruneinfo(root, @@ -1133,6 +1131,8 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path) List *pathkeys = best_path->path.pathkeys; List *subplans = NIL; ListCell *subpaths; + RelOptInfo *rel = best_path->path.parent; + List *partpruneinfos = NIL; /* * We don't have the actual creation of the MergeAppend node split out @@ -1218,8 +1218,40 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path) subplans = lappend(subplans, subplan); } + /* + * If any quals exist, they may be useful to perform further partition + * pruning during execution. Gather information needed by the executor + * to do partition pruning. + */ + if (enable_partition_pruning && + rel->reloptkind == RELOPT_BASEREL && + best_path->partitioned_rels != NIL) + { + List *prunequal; + + prunequal = extract_actual_clauses(rel->baserestrictinfo, false); + + if (best_path->path.param_info) + { + + List *prmquals = best_path->path.param_info->ppi_clauses; + + prmquals = extract_actual_clauses(prmquals, false); + prmquals = (List *) replace_nestloop_params(root, + (Node *) prmquals); + + prunequal = list_concat(prunequal, prmquals); + } + + if (prunequal != NIL) + partpruneinfos = make_partition_pruneinfo(root, + best_path->partitioned_rels, + best_path->subpaths, prunequal); + } + node->partitioned_rels = best_path->partitioned_rels; node->mergeplans = subplans; + node->part_prune_infos = partpruneinfos; return (Plan *) node; } diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 20140a35e5..018f50bbb7 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1106,6 +1106,12 @@ struct AppendState * slots current output tuple of each subplan * heap heap of active tuples * initialized true if we have fetched first tuple from each subplan + * noopscan true if partition pruning proved that none of the + * mergeplans can contain a record to satisfy this query. + * prune_state details required to allow partitions to be + * eliminated from the scan, or NULL if not possible. + * valid_subplans for runtime pruning, valid mergeplans indexes to + * scan. * ---------------- */ typedef struct MergeAppendState @@ -1118,6 +1124,9 @@ typedef struct MergeAppendState TupleTableSlot **ms_slots; /* array of length ms_nplans */ struct binaryheap *ms_heap; /* binary heap of slot indices */ bool ms_initialized; /* are subplans started? */ + bool ms_noopscan; + struct PartitionPruneState *ms_prune_state; + Bitmapset *ms_valid_subplans; } MergeAppendState; /* ---------------- diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 5201c6d4bc..b80df601cd 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -281,6 +281,9 @@ typedef struct MergeAppend Oid *sortOperators; /* OIDs of operators to sort them by */ Oid *collations; /* OIDs of collations */ bool *nullsFirst; /* NULLS FIRST/LAST directions */ + + /* Info for run-time subplan pruning, one entry per partitioned_rels */ + List *part_prune_infos; /* List of PartitionPruneInfo */ } MergeAppend; /* ---------------- diff --git a/src/test/regress/expected/partition_prune.out b/src/test/regress/expected/partition_prune.out index 022b7c55c7..38bd179c22 100644 --- a/src/test/regress/expected/partition_prune.out +++ b/src/test/regress/expected/partition_prune.out @@ -2824,6 +2824,143 @@ select * from boolp where a = (select value from boolvalues where not value); (9 rows) drop table boolp; +-- +-- Test run-time pruning of MergeAppend subnodes +-- +set enable_seqscan = off; +set enable_sort = off; +create table ma_test (a int) partition by range (a); +create table ma_test_p1 partition of ma_test for values from (0) to (10); +create table ma_test_p2 partition of ma_test for values from (10) to (20); +create table ma_test_p3 partition of ma_test for values from (20) to (30); +insert into ma_test select x from generate_series(0,29) t(x); +create index on ma_test (a); +analyze ma_test; +prepare mt_q1 (int) as select * from ma_test where a >= $1 and a % 10 = 5 order by a; +-- Execute query 5 times to allow choose_custom_plan +-- to start considering a generic plan. +execute mt_q1(0); + a +---- + 5 + 15 + 25 +(3 rows) + +execute mt_q1(0); + a +---- + 5 + 15 + 25 +(3 rows) + +execute mt_q1(0); + a +---- + 5 + 15 + 25 +(3 rows) + +execute mt_q1(0); + a +---- + 5 + 15 + 25 +(3 rows) + +execute mt_q1(0); + a +---- + 5 + 15 + 25 +(3 rows) + +explain (analyze, costs off, summary off, timing off) execute mt_q1(15); + QUERY PLAN +------------------------------------------------------------------------------- + Merge Append (actual rows=2 loops=1) + Sort Key: ma_test_p2.a + Subplans Removed: 1 + -> Index Scan using ma_test_p2_a_idx on ma_test_p2 (actual rows=1 loops=1) + Index Cond: (a >= $1) + Filter: ((a % 10) = 5) + Rows Removed by Filter: 4 + -> Index Scan using ma_test_p3_a_idx on ma_test_p3 (actual rows=1 loops=1) + Index Cond: (a >= $1) + Filter: ((a % 10) = 5) + Rows Removed by Filter: 9 +(11 rows) + +execute mt_q1(15); + a +---- + 15 + 25 +(2 rows) + +explain (analyze, costs off, summary off, timing off) execute mt_q1(25); + QUERY PLAN +------------------------------------------------------------------------------- + Merge Append (actual rows=1 loops=1) + Sort Key: ma_test_p3.a + Subplans Removed: 2 + -> Index Scan using ma_test_p3_a_idx on ma_test_p3 (actual rows=1 loops=1) + Index Cond: (a >= $1) + Filter: ((a % 10) = 5) + Rows Removed by Filter: 4 +(7 rows) + +execute mt_q1(25); + a +---- + 25 +(1 row) + +-- Ensure MergeAppend behaves correctly when no subplans match +explain (analyze, costs off, summary off, timing off) execute mt_q1(35); + QUERY PLAN +------------------------------------------------------------------------ + Merge Append (actual rows=0 loops=1) + Sort Key: ma_test_p1.a + Subplans Removed: 2 + -> Index Scan using ma_test_p1_a_idx on ma_test_p1 (never executed) + Index Cond: (a >= $1) + Filter: ((a % 10) = 5) +(6 rows) + +execute mt_q1(35); + a +--- +(0 rows) + +deallocate mt_q1; +-- ensure initplan params properly prune partitions +explain (analyze, costs off, summary off, timing off) select * from ma_test where a >= (select min(a) from ma_test_p2) order by a; + QUERY PLAN +------------------------------------------------------------------------------------------------------------ + Merge Append (actual rows=20 loops=1) + Sort Key: ma_test_p1.a + InitPlan 2 (returns $1) + -> Result (actual rows=1 loops=1) + InitPlan 1 (returns $0) + -> Limit (actual rows=1 loops=1) + -> Index Scan using ma_test_p2_a_idx on ma_test_p2 ma_test_p2_1 (actual rows=1 loops=1) + Index Cond: (a IS NOT NULL) + -> Index Scan using ma_test_p1_a_idx on ma_test_p1 (never executed) + Index Cond: (a >= $1) + -> Index Scan using ma_test_p2_a_idx on ma_test_p2 (actual rows=10 loops=1) + Index Cond: (a >= $1) + -> Index Scan using ma_test_p3_a_idx on ma_test_p3 (actual rows=10 loops=1) + Index Cond: (a >= $1) +(14 rows) + +reset enable_seqscan; +reset enable_sort; +drop table ma_test; reset enable_indexonlyscan; -- -- check that pruning works properly when the partition key is of a diff --git a/src/test/regress/sql/partition_prune.sql b/src/test/regress/sql/partition_prune.sql index 2357f02cde..e5e7789fc5 100644 --- a/src/test/regress/sql/partition_prune.sql +++ b/src/test/regress/sql/partition_prune.sql @@ -714,6 +714,47 @@ select * from boolp where a = (select value from boolvalues where not value); drop table boolp; +-- +-- Test run-time pruning of MergeAppend subnodes +-- +set enable_seqscan = off; +set enable_sort = off; +create table ma_test (a int) partition by range (a); +create table ma_test_p1 partition of ma_test for values from (0) to (10); +create table ma_test_p2 partition of ma_test for values from (10) to (20); +create table ma_test_p3 partition of ma_test for values from (20) to (30); +insert into ma_test select x from generate_series(0,29) t(x); +create index on ma_test (a); + +analyze ma_test; +prepare mt_q1 (int) as select * from ma_test where a >= $1 and a % 10 = 5 order by a; + +-- Execute query 5 times to allow choose_custom_plan +-- to start considering a generic plan. +execute mt_q1(0); +execute mt_q1(0); +execute mt_q1(0); +execute mt_q1(0); +execute mt_q1(0); + +explain (analyze, costs off, summary off, timing off) execute mt_q1(15); +execute mt_q1(15); +explain (analyze, costs off, summary off, timing off) execute mt_q1(25); +execute mt_q1(25); +-- Ensure MergeAppend behaves correctly when no subplans match +explain (analyze, costs off, summary off, timing off) execute mt_q1(35); +execute mt_q1(35); + +deallocate mt_q1; + +-- ensure initplan params properly prune partitions +explain (analyze, costs off, summary off, timing off) select * from ma_test where a >= (select min(a) from ma_test_p2) order by a; + +reset enable_seqscan; +reset enable_sort; + +drop table ma_test; + reset enable_indexonlyscan; -- -- 2.40.0