From 2c00fad2864350508f666da1a2c04e0cbe9cbf58 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 24 Aug 2016 14:37:50 -0400 Subject: [PATCH] Fix improper repetition of previous results from a hashed aggregate. ExecReScanAgg's check for whether it could re-use a previously calculated hashtable neglected the possibility that the Agg node might reference PARAM_EXEC Params that are not referenced by its input plan node. That's okay if the Params are in upper tlist or qual expressions; but if one appears in aggregate input expressions, then the hashtable contents need to be recomputed when the Param's value changes. To avoid unnecessary performance degradation in the case of a Param that isn't within an aggregate input, add logic to the planner to determine which Params are within aggregate inputs. This requires a new field in struct Agg, but fortunately we never write plans to disk, so this isn't an initdb-forcing change. Per report from Jeevan Chalke. This has been broken since forever, so back-patch to all supported branches. Andrew Gierth, with minor adjustments by me Report: --- src/backend/executor/nodeAgg.c | 10 ++-- src/backend/nodes/copyfuncs.c | 1 + src/backend/nodes/outfuncs.c | 1 + src/backend/nodes/readfuncs.c | 1 + src/backend/optimizer/plan/createplan.c | 1 + src/backend/optimizer/plan/subselect.c | 48 ++++++++++++++- src/include/nodes/plannodes.h | 3 +- src/test/regress/expected/aggregates.out | 75 ++++++++++++++++++++++++ src/test/regress/sql/aggregates.sql | 22 +++++++ 9 files changed, 156 insertions(+), 6 deletions(-) diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c index 1ec2515090..ce2fc281a4 100644 --- a/src/backend/executor/nodeAgg.c +++ b/src/backend/executor/nodeAgg.c @@ -3425,11 +3425,13 @@ ExecReScanAgg(AggState *node) return; /* - * If we do have the hash table and the subplan does not have any - * parameter changes, then we can just rescan the existing hash table; - * no need to build it again. + * If we do have the hash table, and the subplan does not have any + * parameter changes, and none of our own parameter changes affect + * input expressions of the aggregated functions, then we can just + * rescan the existing hash table; no need to build it again. */ - if (outerPlan->chgParam == NULL) + if (outerPlan->chgParam == NULL && + !bms_overlap(node->ss.ps.chgParam, aggnode->aggParams)) { ResetTupleHashIterator(node->hashtable, &node->hashiter); return; diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index c7a06442ba..1877fb45e5 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -877,6 +877,7 @@ _copyAgg(const Agg *from) COPY_POINTER_FIELD(grpOperators, from->numCols * sizeof(Oid)); } COPY_SCALAR_FIELD(numGroups); + COPY_BITMAPSET_FIELD(aggParams); COPY_NODE_FIELD(groupingSets); COPY_NODE_FIELD(chain); diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 50019f4164..29b7712584 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -716,6 +716,7 @@ _outAgg(StringInfo str, const Agg *node) appendStringInfo(str, " %u", node->grpOperators[i]); WRITE_LONG_FIELD(numGroups); + WRITE_BITMAPSET_FIELD(aggParams); WRITE_NODE_FIELD(groupingSets); WRITE_NODE_FIELD(chain); } diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index c83063e219..6f9a81e3e3 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -2007,6 +2007,7 @@ _readAgg(void) READ_ATTRNUMBER_ARRAY(grpColIdx, local_node->numCols); READ_OID_ARRAY(grpOperators, local_node->numCols); READ_LONG_FIELD(numGroups); + READ_BITMAPSET_FIELD(aggParams); READ_NODE_FIELD(groupingSets); READ_NODE_FIELD(chain); diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 54d601fc47..47158f6468 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -5664,6 +5664,7 @@ make_agg(List *tlist, List *qual, node->grpColIdx = grpColIdx; node->grpOperators = grpOperators; node->numGroups = numGroups; + node->aggParams = NULL; /* SS_finalize_plan() will fill this */ node->groupingSets = groupingSets; node->chain = chain; diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index a46cc10820..6edefb1138 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -82,6 +82,7 @@ static Bitmapset *finalize_plan(PlannerInfo *root, Bitmapset *valid_params, Bitmapset *scan_params); static bool finalize_primnode(Node *node, finalize_primnode_context *context); +static bool finalize_agg_primnode(Node *node, finalize_primnode_context *context); /* @@ -2652,6 +2653,29 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params, locally_added_param); break; + case T_Agg: + { + Agg *agg = (Agg *) plan; + + /* + * AGG_HASHED plans need to know which Params are referenced + * in aggregate calls. Do a separate scan to identify them. + */ + if (agg->aggstrategy == AGG_HASHED) + { + finalize_primnode_context aggcontext; + + aggcontext.root = root; + aggcontext.paramids = NULL; + finalize_agg_primnode((Node *) agg->plan.targetlist, + &aggcontext); + finalize_agg_primnode((Node *) agg->plan.qual, + &aggcontext); + agg->aggParams = aggcontext.paramids; + } + } + break; + case T_WindowAgg: finalize_primnode(((WindowAgg *) plan)->startOffset, &context); @@ -2660,7 +2684,6 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params, break; case T_Hash: - case T_Agg: case T_Material: case T_Sort: case T_Unique: @@ -2811,6 +2834,29 @@ finalize_primnode(Node *node, finalize_primnode_context *context) (void *) context); } +/* + * finalize_agg_primnode: find all Aggref nodes in the given expression tree, + * and add IDs of all PARAM_EXEC params appearing within their aggregated + * arguments to the result set. + */ +static bool +finalize_agg_primnode(Node *node, finalize_primnode_context *context) +{ + if (node == NULL) + return false; + if (IsA(node, Aggref)) + { + Aggref *agg = (Aggref *) node; + + /* we should not consider the direct arguments, if any */ + finalize_primnode((Node *) agg->args, context); + finalize_primnode((Node *) agg->aggfilter, context); + return false; /* there can't be any Aggrefs below here */ + } + return expression_tree_walker(node, finalize_agg_primnode, + (void *) context); +} + /* * SS_make_initplan_output_param - make a Param for an initPlan's output * diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 369179f291..bc5463b8f7 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -715,7 +715,8 @@ typedef struct Agg AttrNumber *grpColIdx; /* their indexes in the target list */ Oid *grpOperators; /* equality operators to compare with */ long numGroups; /* estimated number of groups in input */ - /* Note: the planner only provides numGroups in AGG_HASHED case */ + Bitmapset *aggParams; /* IDs of Params used in Aggref inputs */ + /* Note: planner provides numGroups & aggParams only in AGG_HASHED case */ List *groupingSets; /* grouping sets to use */ List *chain; /* chained Agg/Sort nodes */ } Agg; diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out index 14646c6397..45208a6da6 100644 --- a/src/test/regress/expected/aggregates.out +++ b/src/test/regress/expected/aggregates.out @@ -366,6 +366,81 @@ from tenk1 o; 9999 (1 row) +-- Test handling of Params within aggregate arguments in hashed aggregation. +-- Per bug report from Jeevan Chalke. +explain (verbose, costs off) +select s1, s2, sm +from generate_series(1, 3) s1, + lateral (select s2, sum(s1 + s2) sm + from generate_series(1, 3) s2 group by s2) ss +order by 1, 2; + QUERY PLAN +------------------------------------------------------------------ + Sort + Output: s1.s1, s2.s2, (sum((s1.s1 + s2.s2))) + Sort Key: s1.s1, s2.s2 + -> Nested Loop + Output: s1.s1, s2.s2, (sum((s1.s1 + s2.s2))) + -> Function Scan on pg_catalog.generate_series s1 + Output: s1.s1 + Function Call: generate_series(1, 3) + -> HashAggregate + Output: s2.s2, sum((s1.s1 + s2.s2)) + Group Key: s2.s2 + -> Function Scan on pg_catalog.generate_series s2 + Output: s2.s2 + Function Call: generate_series(1, 3) +(14 rows) + +select s1, s2, sm +from generate_series(1, 3) s1, + lateral (select s2, sum(s1 + s2) sm + from generate_series(1, 3) s2 group by s2) ss +order by 1, 2; + s1 | s2 | sm +----+----+---- + 1 | 1 | 2 + 1 | 2 | 3 + 1 | 3 | 4 + 2 | 1 | 3 + 2 | 2 | 4 + 2 | 3 | 5 + 3 | 1 | 4 + 3 | 2 | 5 + 3 | 3 | 6 +(9 rows) + +explain (verbose, costs off) +select array(select sum(x+y) s + from generate_series(1,3) y group by y order by s) + from generate_series(1,3) x; + QUERY PLAN +------------------------------------------------------------------- + Function Scan on pg_catalog.generate_series x + Output: (SubPlan 1) + Function Call: generate_series(1, 3) + SubPlan 1 + -> Sort + Output: (sum((x.x + y.y))), y.y + Sort Key: (sum((x.x + y.y))) + -> HashAggregate + Output: sum((x.x + y.y)), y.y + Group Key: y.y + -> Function Scan on pg_catalog.generate_series y + Output: y.y + Function Call: generate_series(1, 3) +(13 rows) + +select array(select sum(x+y) s + from generate_series(1,3) y group by y order by s) + from generate_series(1,3) x; + array +--------- + {2,3,4} + {3,4,5} + {4,5,6} +(3 rows) + -- -- test for bitwise integer aggregates -- diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql index 9983ff3a89..430ac49385 100644 --- a/src/test/regress/sql/aggregates.sql +++ b/src/test/regress/sql/aggregates.sql @@ -98,6 +98,28 @@ select (select max((select i.unique2 from tenk1 i where i.unique1 = o.unique1))) from tenk1 o; +-- Test handling of Params within aggregate arguments in hashed aggregation. +-- Per bug report from Jeevan Chalke. +explain (verbose, costs off) +select s1, s2, sm +from generate_series(1, 3) s1, + lateral (select s2, sum(s1 + s2) sm + from generate_series(1, 3) s2 group by s2) ss +order by 1, 2; +select s1, s2, sm +from generate_series(1, 3) s1, + lateral (select s2, sum(s1 + s2) sm + from generate_series(1, 3) s2 group by s2) ss +order by 1, 2; + +explain (verbose, costs off) +select array(select sum(x+y) s + from generate_series(1,3) y group by y order by s) + from generate_series(1,3) x; +select array(select sum(x+y) s + from generate_series(1,3) y group by y order by s) + from generate_series(1,3) x; + -- -- test for bitwise integer aggregates -- -- 2.40.0