From: Tom Lane Date: Tue, 30 Oct 2001 19:58:58 +0000 (+0000) Subject: Fix problems with subselects used in GROUP BY expressions, per gripe X-Git-Tag: REL7_2_BETA2~72 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=96ca8ffebcb43df1c575ce6831324c553a31c891;p=postgresql Fix problems with subselects used in GROUP BY expressions, per gripe from Philip Warner. Side effect of change is that GROUP BY expressions will not be re-evaluated at multiple plan levels anymore, whereas this sometimes happened with old code. --- diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index d13035ca9c..faca5728e6 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.111 2001/10/28 06:25:45 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.112 2001/10/30 19:58:58 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -173,6 +173,17 @@ subquery_planner(Query *parse, double tuple_fraction) parse->havingQual = preprocess_expression(parse, parse->havingQual, EXPRKIND_HAVING); + /* + * Check for ungrouped variables passed to subplans in targetlist and + * HAVING clause (but not in WHERE or JOIN/ON clauses, since those are + * evaluated before grouping). We can't do this any earlier because + * we must use the preprocessed targetlist for comparisons of grouped + * expressions. + */ + if (parse->hasSubLinks && + (parse->groupClause != NIL || parse->hasAggs)) + check_subplans_for_ungrouped_vars(parse); + /* * A HAVING clause without aggregates is equivalent to a WHERE clause * (except it can only refer to grouped fields). Transfer any @@ -623,23 +634,10 @@ preprocess_expression(Query *parse, Node *expr, int kind) #endif } + /* Expand SubLinks to SubPlans */ if (parse->hasSubLinks) - { - /* Expand SubLinks to SubPlans */ expr = SS_process_sublinks(expr); - if (kind != EXPRKIND_WHERE && - (parse->groupClause != NIL || parse->hasAggs)) - { - /* - * Check for ungrouped variables passed to subplans. Note we - * do NOT do this for subplans in WHERE (or JOIN/ON); it's - * legal there because WHERE is evaluated pre-GROUP. - */ - check_subplans_for_ungrouped_vars(expr, parse); - } - } - /* Replace uplevel vars with Param nodes */ if (PlannerQueryLevel > 1) expr = SS_replace_correlation_vars(expr); @@ -1122,12 +1120,11 @@ grouping_planner(Query *parse, double tuple_fraction) /* * If there are aggregates then the Group node should just return - * the same set of vars as the subplan did (but we can exclude any - * GROUP BY expressions). If there are no aggregates then the - * Group node had better compute the final tlist. + * the same set of vars as the subplan did. If there are no aggs + * then the Group node had better compute the final tlist. */ if (parse->hasAggs) - group_tlist = flatten_tlist(result_plan->targetlist); + group_tlist = new_unsorted_tlist(result_plan->targetlist); else group_tlist = tlist; @@ -1234,7 +1231,10 @@ grouping_planner(Query *parse, double tuple_fraction) * where the a+b target will be used by the Sort/Group steps, and the * other targets will be used for computing the final results. (In the * above example we could theoretically suppress the a and b targets and - * use only a+b, but it's not really worth the trouble.) + * pass down only c,d,a+b, but it's not really worth the trouble to + * eliminate simple var references from the subplan. We will avoid doing + * the extra computation to recompute a+b at the outer level; see + * replace_vars_with_subplan_refs() in setrefs.c.) * * 'parse' is the query being processed. * 'tlist' is the query's target list. diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index e7f8361b9a..652e7c294a 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/setrefs.c,v 1.71 2001/03/22 03:59:37 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/setrefs.c,v 1.72 2001/10/30 19:58:58 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -33,7 +33,8 @@ typedef struct typedef struct { Index subvarno; - List *subplanTargetList; + List *subplan_targetlist; + bool tlist_has_non_vars; } replace_vars_with_subplan_refs_context; static void fix_expr_references(Plan *plan, Node *node); @@ -43,7 +44,8 @@ static Node *join_references_mutator(Node *node, join_references_context *context); static Node *replace_vars_with_subplan_refs(Node *node, Index subvarno, - List *subplanTargetList); + List *subplan_targetlist, + bool tlist_has_non_vars); static Node *replace_vars_with_subplan_refs_mutator(Node *node, replace_vars_with_subplan_refs_context *context); static bool fix_opids_walker(Node *node, void *context); @@ -278,75 +280,62 @@ set_join_references(Join *join) * qual expressions with elements of the subplan's tlist (which was * generated by flatten_tlist() from these selfsame expressions, so it * should have all the required variables). There is an important exception, - * however: a GROUP BY expression that is also an output expression will - * have been pushed into the subplan tlist unflattened. We want to detect - * this case and reference the subplan output directly. Therefore, check - * for equality of the whole tlist expression to any subplan element before - * we resort to picking the expression apart for individual Vars. + * however: GROUP BY and ORDER BY expressions will have been pushed into the + * subplan tlist unflattened. If these values are also needed in the output + * then we want to reference the subplan tlist element rather than recomputing + * the expression. */ static void set_uppernode_references(Plan *plan, Index subvarno) { Plan *subplan = plan->lefttree; - List *subplanTargetList, - *outputTargetList, + List *subplan_targetlist, + *output_targetlist, *l; + bool tlist_has_non_vars; if (subplan != NULL) - subplanTargetList = subplan->targetlist; + subplan_targetlist = subplan->targetlist; else - subplanTargetList = NIL; + subplan_targetlist = NIL; - outputTargetList = NIL; - foreach(l, plan->targetlist) + /* + * Detect whether subplan tlist has any non-Vars (typically it won't + * because it's been flattened). This allows us to save comparisons + * in common cases. + */ + tlist_has_non_vars = false; + foreach(l, subplan_targetlist) { TargetEntry *tle = (TargetEntry *) lfirst(l); - TargetEntry *subplantle; - Node *newexpr; - - subplantle = tlistentry_member(tle->expr, subplanTargetList); - if (subplantle) - { - /* Found a matching subplan output expression */ - Resdom *resdom = subplantle->resdom; - Var *newvar; - newvar = makeVar(subvarno, - resdom->resno, - resdom->restype, - resdom->restypmod, - 0); - /* If we're just copying a simple Var, copy up original info */ - if (subplantle->expr && IsA(subplantle->expr, Var)) - { - Var *subvar = (Var *) subplantle->expr; - - newvar->varnoold = subvar->varnoold; - newvar->varoattno = subvar->varoattno; - } - else - { - newvar->varnoold = 0; - newvar->varoattno = 0; - } - newexpr = (Node *) newvar; - } - else + if (tle->expr && ! IsA(tle->expr, Var)) { - /* No matching expression, so replace individual Vars */ - newexpr = replace_vars_with_subplan_refs(tle->expr, - subvarno, - subplanTargetList); + tlist_has_non_vars = true; + break; } - outputTargetList = lappend(outputTargetList, - makeTargetEntry(tle->resdom, newexpr)); } - plan->targetlist = outputTargetList; + + output_targetlist = NIL; + foreach(l, plan->targetlist) + { + TargetEntry *tle = (TargetEntry *) lfirst(l); + Node *newexpr; + + newexpr = replace_vars_with_subplan_refs(tle->expr, + subvarno, + subplan_targetlist, + tlist_has_non_vars); + output_targetlist = lappend(output_targetlist, + makeTargetEntry(tle->resdom, newexpr)); + } + plan->targetlist = output_targetlist; plan->qual = (List *) replace_vars_with_subplan_refs((Node *) plan->qual, subvarno, - subplanTargetList); + subplan_targetlist, + tlist_has_non_vars); } /* @@ -439,9 +428,16 @@ join_references_mutator(Node *node, * --- so this routine should only be applied to nodes whose subplans' * targetlists were generated via flatten_tlist() or some such method. * - * 'node': the tree to be fixed (a targetlist or qual list) + * If tlist_has_non_vars is true, then we try to match whole subexpressions + * against elements of the subplan tlist, so that we can avoid recomputing + * expressions that were already computed by the subplan. (This is relatively + * expensive, so we don't want to try it in the common case where the + * subplan tlist is just a flattened list of Vars.) + * + * 'node': the tree to be fixed (a target item or qual) * 'subvarno': varno to be assigned to all Vars - * 'subplanTargetList': target list for subplan + * 'subplan_targetlist': target list for subplan + * 'tlist_has_non_vars': true if subplan_targetlist contains non-Var exprs * * The resulting tree is a copy of the original in which all Var nodes have * varno = subvarno, varattno = resno of corresponding subplan target. @@ -450,12 +446,14 @@ join_references_mutator(Node *node, static Node * replace_vars_with_subplan_refs(Node *node, Index subvarno, - List *subplanTargetList) + List *subplan_targetlist, + bool tlist_has_non_vars) { replace_vars_with_subplan_refs_context context; context.subvarno = subvarno; - context.subplanTargetList = subplanTargetList; + context.subplan_targetlist = subplan_targetlist; + context.tlist_has_non_vars = tlist_has_non_vars; return replace_vars_with_subplan_refs_mutator(node, &context); } @@ -468,17 +466,38 @@ replace_vars_with_subplan_refs_mutator(Node *node, if (IsA(node, Var)) { Var *var = (Var *) node; - Var *newvar = (Var *) copyObject(var); Resdom *resdom; + Var *newvar; - resdom = tlist_member((Node *) var, context->subplanTargetList); + resdom = tlist_member((Node *) var, context->subplan_targetlist); if (!resdom) elog(ERROR, "replace_vars_with_subplan_refs: variable not in subplan target list"); - + newvar = (Var *) copyObject(var); newvar->varno = context->subvarno; newvar->varattno = resdom->resno; return (Node *) newvar; } + /* Try matching more complex expressions too, if tlist has any */ + if (context->tlist_has_non_vars) + { + Resdom *resdom; + + resdom = tlist_member(node, context->subplan_targetlist); + if (resdom) + { + /* Found a matching subplan output expression */ + Var *newvar; + + newvar = makeVar(context->subvarno, + resdom->resno, + resdom->restype, + resdom->restypmod, + 0); + newvar->varnoold = 0; /* wasn't ever a plain Var */ + newvar->varoattno = 0; + return (Node *) newvar; + } + } return expression_tree_mutator(node, replace_vars_with_subplan_refs_mutator, (void *) context); diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index c92581082b..859c409709 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/util/clauses.c,v 1.89 2001/10/25 05:49:34 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/util/clauses.c,v 1.90 2001/10/30 19:58:58 tgl Exp $ * * HISTORY * AUTHOR DATE MAJOR EVENT @@ -39,12 +39,18 @@ ((Node *) makeConst(BOOLOID, 1, (Datum) (val), \ (isnull), true, false, false)) +typedef struct +{ + Query *query; + List *groupClauses; +} check_subplans_for_ungrouped_vars_context; + static bool contain_agg_clause_walker(Node *node, void *context); static bool pull_agg_clause_walker(Node *node, List **listptr); static bool contain_subplans_walker(Node *node, void *context); static bool pull_subplans_walker(Node *node, List **listptr); static bool check_subplans_for_ungrouped_vars_walker(Node *node, - Query *context); + check_subplans_for_ungrouped_vars_context *context); static bool contain_noncachable_functions_walker(Node *node, void *context); static Node *eval_const_expressions_mutator(Node *node, void *context); static Expr *simplify_op_or_func(Expr *expr, List *args); @@ -517,36 +523,81 @@ pull_subplans_walker(Node *node, List **listptr) * planner runs. So we do it here, after we have processed sublinks into * subplans. This ought to be cleaned up someday. * - * 'clause' is the expression tree to be searched for subplans. - * 'query' provides the GROUP BY list, the target list that the group clauses - * refer to, and the range table. + * A deficiency in this scheme is that any outer reference var must be + * grouped by itself; we don't recognize groupable expressions within + * subselects. For example, consider + * SELECT + * (SELECT x FROM bar where y = (foo.a + foo.b)) + * FROM foo + * GROUP BY a + b; + * This query will be rejected although it could be allowed. */ void -check_subplans_for_ungrouped_vars(Node *clause, - Query *query) +check_subplans_for_ungrouped_vars(Query *query) { + check_subplans_for_ungrouped_vars_context context; + List *gl; + + context.query = query; + /* + * Build a list of the acceptable GROUP BY expressions for use in + * the walker (to avoid repeated scans of the targetlist within the + * recursive routine). + */ + context.groupClauses = NIL; + foreach(gl, query->groupClause) + { + GroupClause *grpcl = lfirst(gl); + Node *expr; + + expr = get_sortgroupclause_expr(grpcl, query->targetList); + context.groupClauses = lcons(expr, context.groupClauses); + } + /* - * No special setup needed; context for walker is just the Query - * pointer + * Recursively scan the targetlist and the HAVING clause. + * WHERE and JOIN/ON conditions are not examined, since they are + * evaluated before grouping. */ - check_subplans_for_ungrouped_vars_walker(clause, query); + check_subplans_for_ungrouped_vars_walker((Node *) query->targetList, + &context); + check_subplans_for_ungrouped_vars_walker(query->havingQual, + &context); + + freeList(context.groupClauses); } static bool check_subplans_for_ungrouped_vars_walker(Node *node, - Query *context) + check_subplans_for_ungrouped_vars_context *context) { + List *gl; + if (node == NULL) return false; + if (IsA(node, Const) || IsA(node, Param)) + return false; /* constants are always acceptable */ /* * If we find an aggregate function, do not recurse into its * arguments. Subplans invoked within aggregate calls are allowed to - * receive ungrouped variables. + * receive ungrouped variables. (This test and the next one should + * match the logic in parse_agg.c's check_ungrouped_columns().) */ if (IsA(node, Aggref)) return false; + /* + * Check to see if subexpression as a whole matches any GROUP BY item. + * We need to do this at every recursion level so that we recognize + * GROUPed-BY expressions before reaching variables within them. + */ + foreach(gl, context->groupClauses) + { + if (equal(node, lfirst(gl))) + return false; /* acceptable, do not descend more */ + } + /* * We can ignore Vars other than in subplan args lists, since the * parser already checked 'em. @@ -564,7 +615,6 @@ check_subplans_for_ungrouped_vars_walker(Node *node, Node *thisarg = lfirst(t); Var *var; bool contained_in_group_clause; - List *gl; /* * We do not care about args that are not local variables; @@ -582,14 +632,9 @@ check_subplans_for_ungrouped_vars_walker(Node *node, * Else, see if it is a grouping column. */ contained_in_group_clause = false; - foreach(gl, context->groupClause) + foreach(gl, context->groupClauses) { - GroupClause *gcl = lfirst(gl); - Node *groupexpr; - - groupexpr = get_sortgroupclause_expr(gcl, - context->targetList); - if (equal(thisarg, groupexpr)) + if (equal(thisarg, lfirst(gl))) { contained_in_group_clause = true; break; @@ -603,8 +648,8 @@ check_subplans_for_ungrouped_vars_walker(Node *node, char *attname; Assert(var->varno > 0 && - (int) var->varno <= length(context->rtable)); - rte = rt_fetch(var->varno, context->rtable); + (int) var->varno <= length(context->query->rtable)); + rte = rt_fetch(var->varno, context->query->rtable); attname = get_rte_attribute_name(rte, var->varattno); elog(ERROR, "Sub-SELECT uses un-GROUPed attribute %s.%s from outer query", rte->eref->relname, attname); diff --git a/src/include/optimizer/clauses.h b/src/include/optimizer/clauses.h index aa97bcc862..da8429ca09 100644 --- a/src/include/optimizer/clauses.h +++ b/src/include/optimizer/clauses.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: clauses.h,v 1.47 2001/10/28 06:26:07 momjian Exp $ + * $Id: clauses.h,v 1.48 2001/10/30 19:58:58 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -44,7 +44,7 @@ extern List *pull_agg_clause(Node *clause); extern bool contain_subplans(Node *clause); extern List *pull_subplans(Node *clause); -extern void check_subplans_for_ungrouped_vars(Node *clause, Query *query); +extern void check_subplans_for_ungrouped_vars(Query *query); extern bool contain_noncachable_functions(Node *clause);