From b61b28fbe8147b003550548b4976977252ee39b3 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 12 Jul 2011 18:24:53 -0400 Subject: [PATCH] Avoid listing ungrouped Vars in the targetlist of Agg-underneath-Window. Regular aggregate functions in combination with, or within the arguments of, window functions are OK per spec; they have the semantics that the aggregate output rows are computed and then we run the window functions over that row set. (Thus, this combination is not really useful unless there's a GROUP BY so that more than one aggregate output row is possible.) The case without GROUP BY could fail, as recently reported by Jeff Davis, because sloppy construction of the Agg node's targetlist resulted in extra references to possibly-ungrouped Vars appearing outside the aggregate function calls themselves. See the added regression test case for an example. Fixing this requires modifying the API of flatten_tlist and its underlying function pull_var_clause. I chose to make pull_var_clause's API for aggregates identical to what it was already doing for placeholders, since the useful behaviors turn out to be the same (error, report node as-is, or recurse into it). I also tightened the error checking in this area a bit: if it was ever valid to see an uplevel Var, Aggref, or PlaceHolderVar here, that was a long time ago, so complain instead of ignoring them. Backpatch into 9.1. The failure exists in 8.4 and 9.0 as well, but seeing that it only occurs in a basically-useless corner case, it doesn't seem worth the risks of changing a function API in a minor release. There might be third-party code using pull_var_clause. --- src/backend/catalog/heap.c | 8 +++- src/backend/commands/trigger.c | 4 +- src/backend/optimizer/path/allpaths.c | 7 ++- src/backend/optimizer/path/equivclass.c | 1 + src/backend/optimizer/plan/createplan.c | 1 + src/backend/optimizer/plan/initsplan.c | 5 ++- src/backend/optimizer/plan/planner.c | 31 ++++++++----- src/backend/optimizer/prep/preptlist.c | 1 + src/backend/optimizer/util/clauses.c | 36 ---------------- src/backend/optimizer/util/placeholder.c | 5 ++- src/backend/optimizer/util/tlist.c | 11 ++--- src/backend/optimizer/util/var.c | 55 ++++++++++++++++++------ src/backend/utils/adt/selfuncs.c | 4 +- src/include/optimizer/clauses.h | 1 - src/include/optimizer/tlist.h | 5 ++- src/include/optimizer/var.h | 12 +++++- src/test/regress/expected/window.out | 7 +++ src/test/regress/sql/window.sql | 3 ++ 18 files changed, 119 insertions(+), 78 deletions(-) diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index e606ac2b9e..3d2b183a4e 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -1874,7 +1874,9 @@ StoreRelCheck(Relation rel, char *ccname, Node *expr, * in check constraints; it would fail to examine the contents of * subselects. */ - varList = pull_var_clause(expr, PVC_REJECT_PLACEHOLDERS); + varList = pull_var_clause(expr, + PVC_REJECT_AGGREGATES, + PVC_REJECT_PLACEHOLDERS); keycount = list_length(varList); if (keycount > 0) @@ -2170,7 +2172,9 @@ AddRelationNewConstraints(Relation rel, List *vars; char *colname; - vars = pull_var_clause(expr, PVC_REJECT_PLACEHOLDERS); + vars = pull_var_clause(expr, + PVC_REJECT_AGGREGATES, + PVC_REJECT_PLACEHOLDERS); /* eliminate duplicates */ vars = list_union(NIL, vars); diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c index 2464626637..27dcd1707a 100644 --- a/src/backend/commands/trigger.c +++ b/src/backend/commands/trigger.c @@ -302,7 +302,9 @@ CreateTrigger(CreateTrigStmt *stmt, const char *queryString, * subselects in WHEN clauses; it would fail to examine the contents * of subselects. */ - varList = pull_var_clause(whenClause, PVC_REJECT_PLACEHOLDERS); + varList = pull_var_clause(whenClause, + PVC_REJECT_AGGREGATES, + PVC_REJECT_PLACEHOLDERS); foreach(lc, varList) { Var *var = (Var *) lfirst(lc); diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 47ab08e502..6b43aee183 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -1304,7 +1304,8 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual, /* * It would be unsafe to push down window function calls, but at least for - * the moment we could never see any in a qual anyhow. + * the moment we could never see any in a qual anyhow. (The same applies + * to aggregates, which we check for in pull_var_clause below.) */ Assert(!contain_window_function(qual)); @@ -1312,7 +1313,9 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual, * Examine all Vars used in clause; since it's a restriction clause, all * such Vars must refer to subselect output columns. */ - vars = pull_var_clause(qual, PVC_INCLUDE_PLACEHOLDERS); + vars = pull_var_clause(qual, + PVC_REJECT_AGGREGATES, + PVC_INCLUDE_PLACEHOLDERS); foreach(vl, vars) { Var *var = (Var *) lfirst(vl); diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index a365beecd8..f2beb410e7 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -847,6 +847,7 @@ generate_base_implied_equalities_no_const(PlannerInfo *root, { EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc); List *vars = pull_var_clause((Node *) cur_em->em_expr, + PVC_RECURSE_AGGREGATES, PVC_INCLUDE_PLACEHOLDERS); add_vars_to_targetlist(root, vars, ec->ec_relids); diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index e4ccf5ce79..a3a82ec123 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -3552,6 +3552,7 @@ prepare_sort_from_pathkeys(PlannerInfo *root, Plan *lefttree, List *pathkeys, continue; sortexpr = em->em_expr; exprvars = pull_var_clause((Node *) sortexpr, + PVC_RECURSE_AGGREGATES, PVC_INCLUDE_PLACEHOLDERS); foreach(k, exprvars) { diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c index 333ede218e..394cda32e5 100644 --- a/src/backend/optimizer/plan/initsplan.c +++ b/src/backend/optimizer/plan/initsplan.c @@ -132,6 +132,7 @@ void build_base_rel_tlists(PlannerInfo *root, List *final_tlist) { List *tlist_vars = pull_var_clause((Node *) final_tlist, + PVC_RECURSE_AGGREGATES, PVC_INCLUDE_PLACEHOLDERS); if (tlist_vars != NIL) @@ -1030,7 +1031,9 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause, */ if (bms_membership(relids) == BMS_MULTIPLE) { - List *vars = pull_var_clause(clause, PVC_INCLUDE_PLACEHOLDERS); + List *vars = pull_var_clause(clause, + PVC_RECURSE_AGGREGATES, + PVC_INCLUDE_PLACEHOLDERS); add_vars_to_targetlist(root, vars, relids); list_free(vars); diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 9aafc8adcc..d4d9c58cff 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -1473,11 +1473,16 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) * step. That's handled internally by make_sort_from_pathkeys, * but we need the copyObject steps here to ensure that each plan * node has a separately modifiable tlist. + * + * Note: it's essential here to use PVC_INCLUDE_AGGREGATES so that + * Vars mentioned only in aggregate expressions aren't pulled out + * as separate targetlist entries. Otherwise we could be putting + * ungrouped Vars directly into an Agg node's tlist, resulting in + * undefined behavior. */ - window_tlist = flatten_tlist(tlist); - if (parse->hasAggs) - window_tlist = add_to_flat_tlist(window_tlist, - pull_agg_clause((Node *) tlist)); + window_tlist = flatten_tlist(tlist, + PVC_INCLUDE_AGGREGATES, + PVC_INCLUDE_PLACEHOLDERS); window_tlist = add_volatile_sort_exprs(window_tlist, tlist, activeWindows); result_plan->targetlist = (List *) copyObject(window_tlist); @@ -2576,14 +2581,18 @@ make_subplanTargetList(PlannerInfo *root, } /* - * Otherwise, start with a "flattened" tlist (having just the vars - * mentioned in the targetlist and HAVING qual --- but not upper-level - * Vars; they will be replaced by Params later on). Note this includes - * vars used in resjunk items, so we are covering the needs of ORDER BY - * and window specifications. + * Otherwise, start with a "flattened" tlist (having just the Vars + * mentioned in the targetlist and HAVING qual). Note this includes Vars + * used in resjunk items, so we are covering the needs of ORDER BY and + * window specifications. Vars used within Aggrefs will be pulled out + * here, too. */ - sub_tlist = flatten_tlist(tlist); - extravars = pull_var_clause(parse->havingQual, PVC_INCLUDE_PLACEHOLDERS); + sub_tlist = flatten_tlist(tlist, + PVC_RECURSE_AGGREGATES, + PVC_INCLUDE_PLACEHOLDERS); + extravars = pull_var_clause(parse->havingQual, + PVC_RECURSE_AGGREGATES, + PVC_INCLUDE_PLACEHOLDERS); sub_tlist = add_to_flat_tlist(sub_tlist, extravars); list_free(extravars); *need_tlist_eval = false; /* only eval if not flat tlist */ diff --git a/src/backend/optimizer/prep/preptlist.c b/src/backend/optimizer/prep/preptlist.c index c97150c6f7..fa2514d2a4 100644 --- a/src/backend/optimizer/prep/preptlist.c +++ b/src/backend/optimizer/prep/preptlist.c @@ -154,6 +154,7 @@ preprocess_targetlist(PlannerInfo *root, List *tlist) ListCell *l; vars = pull_var_clause((Node *) parse->returningList, + PVC_RECURSE_AGGREGATES, PVC_INCLUDE_PLACEHOLDERS); foreach(l, vars) { diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index 2914c39818..51e0033139 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -84,7 +84,6 @@ typedef struct } inline_error_callback_arg; static bool contain_agg_clause_walker(Node *node, void *context); -static bool pull_agg_clause_walker(Node *node, List **context); static bool count_agg_clauses_walker(Node *node, count_agg_clauses_context *context); static bool find_window_functions_walker(Node *node, WindowFuncLists *lists); @@ -418,41 +417,6 @@ contain_agg_clause_walker(Node *node, void *context) return expression_tree_walker(node, contain_agg_clause_walker, context); } -/* - * pull_agg_clause - * Recursively search for Aggref nodes within a clause. - * - * Returns a List of all Aggrefs found. - * - * This does not descend into subqueries, and so should be used only after - * reduction of sublinks to subplans, or in contexts where it's known there - * are no subqueries. There mustn't be outer-aggregate references either. - */ -List * -pull_agg_clause(Node *clause) -{ - List *result = NIL; - - (void) pull_agg_clause_walker(clause, &result); - return result; -} - -static bool -pull_agg_clause_walker(Node *node, List **context) -{ - if (node == NULL) - return false; - if (IsA(node, Aggref)) - { - Assert(((Aggref *) node)->agglevelsup == 0); - *context = lappend(*context, node); - return false; /* no need to descend into arguments */ - } - Assert(!IsA(node, SubLink)); - return expression_tree_walker(node, pull_agg_clause_walker, - (void *) context); -} - /* * count_agg_clauses * Recursively count the Aggref nodes in an expression tree, and diff --git a/src/backend/optimizer/util/placeholder.c b/src/backend/optimizer/util/placeholder.c index 9796fbf9b6..19862f39be 100644 --- a/src/backend/optimizer/util/placeholder.c +++ b/src/backend/optimizer/util/placeholder.c @@ -201,7 +201,9 @@ find_placeholders_in_qual(PlannerInfo *root, Node *qual, Relids relids) * pull_var_clause does more than we need here, but it'll do and it's * convenient to use. */ - vars = pull_var_clause(qual, PVC_INCLUDE_PLACEHOLDERS); + vars = pull_var_clause(qual, + PVC_RECURSE_AGGREGATES, + PVC_INCLUDE_PLACEHOLDERS); foreach(vl, vars) { PlaceHolderVar *phv = (PlaceHolderVar *) lfirst(vl); @@ -348,6 +350,7 @@ fix_placeholder_input_needed_levels(PlannerInfo *root) if (bms_membership(eval_at) == BMS_MULTIPLE) { List *vars = pull_var_clause((Node *) phinfo->ph_var->phexpr, + PVC_RECURSE_AGGREGATES, PVC_INCLUDE_PLACEHOLDERS); add_vars_to_targetlist(root, vars, eval_at); diff --git a/src/backend/optimizer/util/tlist.c b/src/backend/optimizer/util/tlist.c index d17424e40f..718057a773 100644 --- a/src/backend/optimizer/util/tlist.c +++ b/src/backend/optimizer/util/tlist.c @@ -76,9 +76,8 @@ tlist_member_ignore_relabel(Node *node, List *targetlist) * flatten_tlist * Create a target list that only contains unique variables. * - * Note that Vars with varlevelsup > 0 are not included in the output - * tlist. We expect that those will eventually be replaced with Params, - * but that probably has not happened at the time this routine is called. + * Aggrefs and PlaceHolderVars in the input are treated according to + * aggbehavior and phbehavior, for which see pull_var_clause(). * * 'tlist' is the current target list * @@ -88,10 +87,12 @@ tlist_member_ignore_relabel(Node *node, List *targetlist) * Copying the Var nodes is probably overkill, but be safe for now. */ List * -flatten_tlist(List *tlist) +flatten_tlist(List *tlist, PVCAggregateBehavior aggbehavior, + PVCPlaceHolderBehavior phbehavior) { List *vlist = pull_var_clause((Node *) tlist, - PVC_INCLUDE_PLACEHOLDERS); + aggbehavior, + phbehavior); List *new_tlist; new_tlist = add_to_flat_tlist(NIL, vlist); diff --git a/src/backend/optimizer/util/var.c b/src/backend/optimizer/util/var.c index edf507c405..8ce7ee41a1 100644 --- a/src/backend/optimizer/util/var.c +++ b/src/backend/optimizer/util/var.c @@ -56,7 +56,8 @@ typedef struct typedef struct { List *varlist; - PVCPlaceHolderBehavior behavior; + PVCAggregateBehavior aggbehavior; + PVCPlaceHolderBehavior phbehavior; } pull_var_clause_context; typedef struct @@ -616,16 +617,22 @@ find_minimum_var_level_walker(Node *node, * pull_var_clause * Recursively pulls all Var nodes from an expression clause. * - * PlaceHolderVars are handled according to 'behavior': + * Aggrefs are handled according to 'aggbehavior': + * PVC_REJECT_AGGREGATES throw error if Aggref found + * PVC_INCLUDE_AGGREGATES include Aggrefs in output list + * PVC_RECURSE_AGGREGATES recurse into Aggref arguments + * Vars within an Aggref's expression are included only in the last case. + * + * PlaceHolderVars are handled according to 'phbehavior': * PVC_REJECT_PLACEHOLDERS throw error if PlaceHolderVar found * PVC_INCLUDE_PLACEHOLDERS include PlaceHolderVars in output list - * PVC_RECURSE_PLACEHOLDERS recurse into PlaceHolderVar argument + * PVC_RECURSE_PLACEHOLDERS recurse into PlaceHolderVar arguments * Vars within a PHV's expression are included only in the last case. * * CurrentOfExpr nodes are ignored in all cases. * - * Upper-level vars (with varlevelsup > 0) are not included. - * (These probably represent errors too, but we don't complain.) + * Upper-level vars (with varlevelsup > 0) should not be seen here, + * likewise for upper-level Aggrefs and PlaceHolderVars. * * Returns list of nodes found. Note the nodes themselves are not * copied, only referenced. @@ -634,12 +641,14 @@ find_minimum_var_level_walker(Node *node, * of sublinks to subplans! */ List * -pull_var_clause(Node *node, PVCPlaceHolderBehavior behavior) +pull_var_clause(Node *node, PVCAggregateBehavior aggbehavior, + PVCPlaceHolderBehavior phbehavior) { pull_var_clause_context context; context.varlist = NIL; - context.behavior = behavior; + context.aggbehavior = aggbehavior; + context.phbehavior = phbehavior; pull_var_clause_walker(node, &context); return context.varlist; @@ -652,20 +661,40 @@ pull_var_clause_walker(Node *node, pull_var_clause_context *context) return false; if (IsA(node, Var)) { - if (((Var *) node)->varlevelsup == 0) - context->varlist = lappend(context->varlist, node); + if (((Var *) node)->varlevelsup != 0) + elog(ERROR, "Upper-level Var found where not expected"); + context->varlist = lappend(context->varlist, node); return false; } - if (IsA(node, PlaceHolderVar)) + else if (IsA(node, Aggref)) + { + if (((Aggref *) node)->agglevelsup != 0) + elog(ERROR, "Upper-level Aggref found where not expected"); + switch (context->aggbehavior) + { + case PVC_REJECT_AGGREGATES: + elog(ERROR, "Aggref found where not expected"); + break; + case PVC_INCLUDE_AGGREGATES: + context->varlist = lappend(context->varlist, node); + /* we do NOT descend into the contained expression */ + return false; + case PVC_RECURSE_AGGREGATES: + /* ignore the aggregate, look at its argument instead */ + break; + } + } + else if (IsA(node, PlaceHolderVar)) { - switch (context->behavior) + if (((PlaceHolderVar *) node)->phlevelsup != 0) + elog(ERROR, "Upper-level PlaceHolderVar found where not expected"); + switch (context->phbehavior) { case PVC_REJECT_PLACEHOLDERS: elog(ERROR, "PlaceHolderVar found where not expected"); break; case PVC_INCLUDE_PLACEHOLDERS: - if (((PlaceHolderVar *) node)->phlevelsup == 0) - context->varlist = lappend(context->varlist, node); + context->varlist = lappend(context->varlist, node); /* we do NOT descend into the contained expression */ return false; case PVC_RECURSE_PLACEHOLDERS: diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 00ba19ec6c..a11444deae 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -3089,7 +3089,9 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows) * PlaceHolderVar doesn't change the number of groups, which boils * down to ignoring the possible addition of nulls to the result set). */ - varshere = pull_var_clause(groupexpr, PVC_RECURSE_PLACEHOLDERS); + varshere = pull_var_clause(groupexpr, + PVC_RECURSE_AGGREGATES, + PVC_RECURSE_PLACEHOLDERS); /* * If we find any variable-free GROUP BY item, then either it is a diff --git a/src/include/optimizer/clauses.h b/src/include/optimizer/clauses.h index dde6d82db4..4cef7fadb2 100644 --- a/src/include/optimizer/clauses.h +++ b/src/include/optimizer/clauses.h @@ -48,7 +48,6 @@ extern Expr *make_ands_explicit(List *andclauses); extern List *make_ands_implicit(Expr *clause); extern bool contain_agg_clause(Node *clause); -extern List *pull_agg_clause(Node *clause); extern void count_agg_clauses(PlannerInfo *root, Node *clause, AggClauseCosts *costs); diff --git a/src/include/optimizer/tlist.h b/src/include/optimizer/tlist.h index 7af59589dc..a3b307d7fb 100644 --- a/src/include/optimizer/tlist.h +++ b/src/include/optimizer/tlist.h @@ -14,13 +14,14 @@ #ifndef TLIST_H #define TLIST_H -#include "nodes/relation.h" +#include "optimizer/var.h" extern TargetEntry *tlist_member(Node *node, List *targetlist); extern TargetEntry *tlist_member_ignore_relabel(Node *node, List *targetlist); -extern List *flatten_tlist(List *tlist); +extern List *flatten_tlist(List *tlist, PVCAggregateBehavior aggbehavior, + PVCPlaceHolderBehavior phbehavior); extern List *add_to_flat_tlist(List *tlist, List *exprs); extern List *get_tlist_exprs(List *tlist, bool includeJunk); diff --git a/src/include/optimizer/var.h b/src/include/optimizer/var.h index 33340dda0f..5d7e2d93e9 100644 --- a/src/include/optimizer/var.h +++ b/src/include/optimizer/var.h @@ -16,11 +16,18 @@ #include "nodes/relation.h" +typedef enum +{ + PVC_REJECT_AGGREGATES, /* throw error if Aggref found */ + PVC_INCLUDE_AGGREGATES, /* include Aggrefs in output list */ + PVC_RECURSE_AGGREGATES /* recurse into Aggref arguments */ +} PVCAggregateBehavior; + typedef enum { PVC_REJECT_PLACEHOLDERS, /* throw error if PlaceHolderVar found */ PVC_INCLUDE_PLACEHOLDERS, /* include PlaceHolderVars in output list */ - PVC_RECURSE_PLACEHOLDERS /* recurse into PlaceHolderVar argument */ + PVC_RECURSE_PLACEHOLDERS /* recurse into PlaceHolderVar arguments */ } PVCPlaceHolderBehavior; extern Relids pull_varnos(Node *node); @@ -30,7 +37,8 @@ extern bool contain_vars_of_level(Node *node, int levelsup); extern int locate_var_of_level(Node *node, int levelsup); extern int locate_var_of_relation(Node *node, int relid, int levelsup); extern int find_minimum_var_level(Node *node); -extern List *pull_var_clause(Node *node, PVCPlaceHolderBehavior behavior); +extern List *pull_var_clause(Node *node, PVCAggregateBehavior aggbehavior, + PVCPlaceHolderBehavior phbehavior); extern Node *flatten_join_alias_vars(PlannerInfo *root, Node *node); #endif /* VAR_H */ diff --git a/src/test/regress/expected/window.out b/src/test/regress/expected/window.out index aa0a0c2067..e42ce17438 100644 --- a/src/test/regress/expected/window.out +++ b/src/test/regress/expected/window.out @@ -587,6 +587,13 @@ SELECT empno, depname, salary, bonus, depadj, MIN(bonus) OVER (ORDER BY empno), 11 | develop | 5200 | 500 | 200 | 500 | 200 (10 rows) +-- window function over ungrouped agg over empty row set (bug before 9.1) +SELECT SUM(COUNT(f1)) OVER () FROM int4_tbl WHERE f1=42; + sum +----- + 0 +(1 row) + -- test non-default frame specifications SELECT four, ten, sum(ten) over (partition by four order by ten), diff --git a/src/test/regress/sql/window.sql b/src/test/regress/sql/window.sql index 6a5c855ead..61da23a4a3 100644 --- a/src/test/regress/sql/window.sql +++ b/src/test/regress/sql/window.sql @@ -135,6 +135,9 @@ SELECT empno, depname, salary, bonus, depadj, MIN(bonus) OVER (ORDER BY empno), THEN 200 END AS depadj FROM empsalary )s; +-- window function over ungrouped agg over empty row set (bug before 9.1) +SELECT SUM(COUNT(f1)) OVER () FROM int4_tbl WHERE f1=42; + -- test non-default frame specifications SELECT four, ten, sum(ten) over (partition by four order by ten), -- 2.40.0