]> granicus.if.org Git - postgresql/commitdiff
Fix problems with subselects used in GROUP BY expressions, per gripe
authorTom Lane <tgl@sss.pgh.pa.us>
Tue, 30 Oct 2001 19:58:58 +0000 (19:58 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Tue, 30 Oct 2001 19:58:58 +0000 (19:58 +0000)
from Philip Warner.  Side effect of change is that GROUP BY expressions
will not be re-evaluated at multiple plan levels anymore, whereas this
sometimes happened with old code.

src/backend/optimizer/plan/planner.c
src/backend/optimizer/plan/setrefs.c
src/backend/optimizer/util/clauses.c
src/include/optimizer/clauses.h

index d13035ca9ca5dc37ddb8122ed1d202441b412697..faca5728e6b9f3b4fd290c75f84a9ac5e31902a7 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.111 2001/10/28 06:25:45 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.112 2001/10/30 19:58:58 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -173,6 +173,17 @@ subquery_planner(Query *parse, double tuple_fraction)
        parse->havingQual = preprocess_expression(parse, parse->havingQual,
                                                                                          EXPRKIND_HAVING);
 
+       /*
+        * Check for ungrouped variables passed to subplans in targetlist and
+        * HAVING clause (but not in WHERE or JOIN/ON clauses, since those are
+        * evaluated before grouping).  We can't do this any earlier because
+        * we must use the preprocessed targetlist for comparisons of grouped
+        * expressions.
+        */
+       if (parse->hasSubLinks &&
+               (parse->groupClause != NIL || parse->hasAggs))
+               check_subplans_for_ungrouped_vars(parse);
+
        /*
         * A HAVING clause without aggregates is equivalent to a WHERE clause
         * (except it can only refer to grouped fields).  Transfer any
@@ -623,23 +634,10 @@ preprocess_expression(Query *parse, Node *expr, int kind)
 #endif
        }
 
+       /* Expand SubLinks to SubPlans */
        if (parse->hasSubLinks)
-       {
-               /* Expand SubLinks to SubPlans */
                expr = SS_process_sublinks(expr);
 
-               if (kind != EXPRKIND_WHERE &&
-                       (parse->groupClause != NIL || parse->hasAggs))
-               {
-                       /*
-                        * Check for ungrouped variables passed to subplans.  Note we
-                        * do NOT do this for subplans in WHERE (or JOIN/ON); it's
-                        * legal there because WHERE is evaluated pre-GROUP.
-                        */
-                       check_subplans_for_ungrouped_vars(expr, parse);
-               }
-       }
-
        /* Replace uplevel vars with Param nodes */
        if (PlannerQueryLevel > 1)
                expr = SS_replace_correlation_vars(expr);
@@ -1122,12 +1120,11 @@ grouping_planner(Query *parse, double tuple_fraction)
 
                /*
                 * If there are aggregates then the Group node should just return
-                * the same set of vars as the subplan did (but we can exclude any
-                * GROUP BY expressions).  If there are no aggregates then the
-                * Group node had better compute the final tlist.
+                * the same set of vars as the subplan did.  If there are no aggs
+                * then the Group node had better compute the final tlist.
                 */
                if (parse->hasAggs)
-                       group_tlist = flatten_tlist(result_plan->targetlist);
+                       group_tlist = new_unsorted_tlist(result_plan->targetlist);
                else
                        group_tlist = tlist;
 
@@ -1234,7 +1231,10 @@ grouping_planner(Query *parse, double tuple_fraction)
  * where the a+b target will be used by the Sort/Group steps, and the
  * other targets will be used for computing the final results. (In the
  * above example we could theoretically suppress the a and b targets and
- * use only a+b, but it's not really worth the trouble.)
+ * pass down only c,d,a+b, but it's not really worth the trouble to
+ * eliminate simple var references from the subplan.  We will avoid doing
+ * the extra computation to recompute a+b at the outer level; see
+ * replace_vars_with_subplan_refs() in setrefs.c.)
  *
  * 'parse' is the query being processed.
  * 'tlist' is the query's target list.
index e7f8361b9a726e5d3c25970bce108ad1f3462623..652e7c294a040d4729b459e68fe8cd020eff093b 100644 (file)
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/plan/setrefs.c,v 1.71 2001/03/22 03:59:37 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/plan/setrefs.c,v 1.72 2001/10/30 19:58:58 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -33,7 +33,8 @@ typedef struct
 typedef struct
 {
        Index           subvarno;
-       List       *subplanTargetList;
+       List       *subplan_targetlist;
+       bool            tlist_has_non_vars;
 } replace_vars_with_subplan_refs_context;
 
 static void fix_expr_references(Plan *plan, Node *node);
@@ -43,7 +44,8 @@ static Node *join_references_mutator(Node *node,
                                                join_references_context *context);
 static Node *replace_vars_with_subplan_refs(Node *node,
                                                           Index subvarno,
-                                                          List *subplanTargetList);
+                                                          List *subplan_targetlist,
+                                                          bool tlist_has_non_vars);
 static Node *replace_vars_with_subplan_refs_mutator(Node *node,
                                                replace_vars_with_subplan_refs_context *context);
 static bool fix_opids_walker(Node *node, void *context);
@@ -278,75 +280,62 @@ set_join_references(Join *join)
  * qual expressions with elements of the subplan's tlist (which was
  * generated by flatten_tlist() from these selfsame expressions, so it
  * should have all the required variables).  There is an important exception,
- * however: a GROUP BY expression that is also an output expression will
- * have been pushed into the subplan tlist unflattened.  We want to detect
- * this case and reference the subplan output directly.  Therefore, check
- * for equality of the whole tlist expression to any subplan element before
- * we resort to picking the expression apart for individual Vars.
+ * however: GROUP BY and ORDER BY expressions will have been pushed into the
+ * subplan tlist unflattened.  If these values are also needed in the output
+ * then we want to reference the subplan tlist element rather than recomputing
+ * the expression.
  */
 static void
 set_uppernode_references(Plan *plan, Index subvarno)
 {
        Plan       *subplan = plan->lefttree;
-       List       *subplanTargetList,
-                          *outputTargetList,
+       List       *subplan_targetlist,
+                          *output_targetlist,
                           *l;
+       bool            tlist_has_non_vars;
 
        if (subplan != NULL)
-               subplanTargetList = subplan->targetlist;
+               subplan_targetlist = subplan->targetlist;
        else
-               subplanTargetList = NIL;
+               subplan_targetlist = NIL;
 
-       outputTargetList = NIL;
-       foreach(l, plan->targetlist)
+       /*
+        * Detect whether subplan tlist has any non-Vars (typically it won't
+        * because it's been flattened).  This allows us to save comparisons
+        * in common cases.
+        */
+       tlist_has_non_vars = false;
+       foreach(l, subplan_targetlist)
        {
                TargetEntry *tle = (TargetEntry *) lfirst(l);
-               TargetEntry *subplantle;
-               Node       *newexpr;
-
-               subplantle = tlistentry_member(tle->expr, subplanTargetList);
-               if (subplantle)
-               {
-                       /* Found a matching subplan output expression */
-                       Resdom     *resdom = subplantle->resdom;
-                       Var                *newvar;
 
-                       newvar = makeVar(subvarno,
-                                                        resdom->resno,
-                                                        resdom->restype,
-                                                        resdom->restypmod,
-                                                        0);
-                       /* If we're just copying a simple Var, copy up original info */
-                       if (subplantle->expr && IsA(subplantle->expr, Var))
-                       {
-                               Var                *subvar = (Var *) subplantle->expr;
-
-                               newvar->varnoold = subvar->varnoold;
-                               newvar->varoattno = subvar->varoattno;
-                       }
-                       else
-                       {
-                               newvar->varnoold = 0;
-                               newvar->varoattno = 0;
-                       }
-                       newexpr = (Node *) newvar;
-               }
-               else
+               if (tle->expr && ! IsA(tle->expr, Var))
                {
-                       /* No matching expression, so replace individual Vars */
-                       newexpr = replace_vars_with_subplan_refs(tle->expr,
-                                                                                                        subvarno,
-                                                                                                        subplanTargetList);
+                       tlist_has_non_vars = true;
+                       break;
                }
-               outputTargetList = lappend(outputTargetList,
-                                                                  makeTargetEntry(tle->resdom, newexpr));
        }
-       plan->targetlist = outputTargetList;
+
+       output_targetlist = NIL;
+       foreach(l, plan->targetlist)
+       {
+               TargetEntry *tle = (TargetEntry *) lfirst(l);
+               Node       *newexpr;
+
+               newexpr = replace_vars_with_subplan_refs(tle->expr,
+                                                                                                subvarno,
+                                                                                                subplan_targetlist,
+                                                                                                tlist_has_non_vars);
+               output_targetlist = lappend(output_targetlist,
+                                                                       makeTargetEntry(tle->resdom, newexpr));
+       }
+       plan->targetlist = output_targetlist;
 
        plan->qual = (List *)
                replace_vars_with_subplan_refs((Node *) plan->qual,
                                                                           subvarno,
-                                                                          subplanTargetList);
+                                                                          subplan_targetlist,
+                                                                          tlist_has_non_vars);
 }
 
 /*
@@ -439,9 +428,16 @@ join_references_mutator(Node *node,
  * --- so this routine should only be applied to nodes whose subplans'
  * targetlists were generated via flatten_tlist() or some such method.
  *
- * 'node': the tree to be fixed (a targetlist or qual list)
+ * If tlist_has_non_vars is true, then we try to match whole subexpressions
+ * against elements of the subplan tlist, so that we can avoid recomputing
+ * expressions that were already computed by the subplan.  (This is relatively
+ * expensive, so we don't want to try it in the common case where the
+ * subplan tlist is just a flattened list of Vars.)
+ *
+ * 'node': the tree to be fixed (a target item or qual)
  * 'subvarno': varno to be assigned to all Vars
- * 'subplanTargetList': target list for subplan
+ * 'subplan_targetlist': target list for subplan
+ * 'tlist_has_non_vars': true if subplan_targetlist contains non-Var exprs
  *
  * The resulting tree is a copy of the original in which all Var nodes have
  * varno = subvarno, varattno = resno of corresponding subplan target.
@@ -450,12 +446,14 @@ join_references_mutator(Node *node,
 static Node *
 replace_vars_with_subplan_refs(Node *node,
                                                           Index subvarno,
-                                                          List *subplanTargetList)
+                                                          List *subplan_targetlist,
+                                                          bool tlist_has_non_vars)
 {
        replace_vars_with_subplan_refs_context context;
 
        context.subvarno = subvarno;
-       context.subplanTargetList = subplanTargetList;
+       context.subplan_targetlist = subplan_targetlist;
+       context.tlist_has_non_vars = tlist_has_non_vars;
        return replace_vars_with_subplan_refs_mutator(node, &context);
 }
 
@@ -468,17 +466,38 @@ replace_vars_with_subplan_refs_mutator(Node *node,
        if (IsA(node, Var))
        {
                Var                *var = (Var *) node;
-               Var                *newvar = (Var *) copyObject(var);
                Resdom     *resdom;
+               Var                *newvar;
 
-               resdom = tlist_member((Node *) var, context->subplanTargetList);
+               resdom = tlist_member((Node *) var, context->subplan_targetlist);
                if (!resdom)
                        elog(ERROR, "replace_vars_with_subplan_refs: variable not in subplan target list");
-
+               newvar = (Var *) copyObject(var);
                newvar->varno = context->subvarno;
                newvar->varattno = resdom->resno;
                return (Node *) newvar;
        }
+       /* Try matching more complex expressions too, if tlist has any */
+       if (context->tlist_has_non_vars)
+       {
+               Resdom     *resdom;
+
+               resdom = tlist_member(node, context->subplan_targetlist);
+               if (resdom)
+               {
+                       /* Found a matching subplan output expression */
+                       Var                *newvar;
+
+                       newvar = makeVar(context->subvarno,
+                                                        resdom->resno,
+                                                        resdom->restype,
+                                                        resdom->restypmod,
+                                                        0);
+                       newvar->varnoold = 0; /* wasn't ever a plain Var */
+                       newvar->varoattno = 0;
+                       return (Node *) newvar;
+               }
+       }
        return expression_tree_mutator(node,
                                                                   replace_vars_with_subplan_refs_mutator,
                                                                   (void *) context);
index c92581082b2ec5038a66f22ef2a67d63db40337d..859c409709c4539a322552713417a5628bd2c475 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/util/clauses.c,v 1.89 2001/10/25 05:49:34 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/util/clauses.c,v 1.90 2001/10/30 19:58:58 tgl Exp $
  *
  * HISTORY
  *       AUTHOR                        DATE                    MAJOR EVENT
        ((Node *) makeConst(BOOLOID, 1, (Datum) (val), \
                                                (isnull), true, false, false))
 
+typedef struct
+{
+       Query      *query;
+       List       *groupClauses;
+} check_subplans_for_ungrouped_vars_context;
+
 static bool contain_agg_clause_walker(Node *node, void *context);
 static bool pull_agg_clause_walker(Node *node, List **listptr);
 static bool contain_subplans_walker(Node *node, void *context);
 static bool pull_subplans_walker(Node *node, List **listptr);
 static bool check_subplans_for_ungrouped_vars_walker(Node *node,
-                                                                                Query *context);
+                                               check_subplans_for_ungrouped_vars_context *context);
 static bool contain_noncachable_functions_walker(Node *node, void *context);
 static Node *eval_const_expressions_mutator(Node *node, void *context);
 static Expr *simplify_op_or_func(Expr *expr, List *args);
@@ -517,36 +523,81 @@ pull_subplans_walker(Node *node, List **listptr)
  * planner runs.  So we do it here, after we have processed sublinks into
  * subplans.  This ought to be cleaned up someday.
  *
- * 'clause' is the expression tree to be searched for subplans.
- * 'query' provides the GROUP BY list, the target list that the group clauses
- * refer to, and the range table.
+ * A deficiency in this scheme is that any outer reference var must be
+ * grouped by itself; we don't recognize groupable expressions within
+ * subselects.  For example, consider
+ *             SELECT
+ *                     (SELECT x FROM bar where y = (foo.a + foo.b))
+ *             FROM foo
+ *             GROUP BY a + b;
+ * This query will be rejected although it could be allowed.
  */
 void
-check_subplans_for_ungrouped_vars(Node *clause,
-                                                                 Query *query)
+check_subplans_for_ungrouped_vars(Query *query)
 {
+       check_subplans_for_ungrouped_vars_context context;
+       List    *gl;
+
+       context.query = query;
+       /*
+        * Build a list of the acceptable GROUP BY expressions for use in
+        * the walker (to avoid repeated scans of the targetlist within the
+        * recursive routine).
+        */
+       context.groupClauses = NIL;
+       foreach(gl, query->groupClause)
+       {
+               GroupClause *grpcl = lfirst(gl);
+               Node       *expr;
+
+               expr = get_sortgroupclause_expr(grpcl, query->targetList);
+               context.groupClauses = lcons(expr, context.groupClauses);
+       }
+
        /*
-        * No special setup needed; context for walker is just the Query
-        * pointer
+        * Recursively scan the targetlist and the HAVING clause.
+        * WHERE and JOIN/ON conditions are not examined, since they are
+        * evaluated before grouping.
         */
-       check_subplans_for_ungrouped_vars_walker(clause, query);
+       check_subplans_for_ungrouped_vars_walker((Node *) query->targetList,
+                                                                                        &context);
+       check_subplans_for_ungrouped_vars_walker(query->havingQual,
+                                                                                        &context);
+
+       freeList(context.groupClauses);
 }
 
 static bool
 check_subplans_for_ungrouped_vars_walker(Node *node,
-                                                                                Query *context)
+                                               check_subplans_for_ungrouped_vars_context *context)
 {
+       List       *gl;
+
        if (node == NULL)
                return false;
+       if (IsA(node, Const) || IsA(node, Param))
+               return false;                   /* constants are always acceptable */
 
        /*
         * If we find an aggregate function, do not recurse into its
         * arguments.  Subplans invoked within aggregate calls are allowed to
-        * receive ungrouped variables.
+        * receive ungrouped variables.  (This test and the next one should
+        * match the logic in parse_agg.c's check_ungrouped_columns().)
         */
        if (IsA(node, Aggref))
                return false;
 
+       /*
+        * Check to see if subexpression as a whole matches any GROUP BY item.
+        * We need to do this at every recursion level so that we recognize
+        * GROUPed-BY expressions before reaching variables within them.
+        */
+       foreach(gl, context->groupClauses)
+       {
+               if (equal(node, lfirst(gl)))
+                       return false;           /* acceptable, do not descend more */
+       }
+
        /*
         * We can ignore Vars other than in subplan args lists, since the
         * parser already checked 'em.
@@ -564,7 +615,6 @@ check_subplans_for_ungrouped_vars_walker(Node *node,
                        Node       *thisarg = lfirst(t);
                        Var                *var;
                        bool            contained_in_group_clause;
-                       List       *gl;
 
                        /*
                         * We do not care about args that are not local variables;
@@ -582,14 +632,9 @@ check_subplans_for_ungrouped_vars_walker(Node *node,
                         * Else, see if it is a grouping column.
                         */
                        contained_in_group_clause = false;
-                       foreach(gl, context->groupClause)
+                       foreach(gl, context->groupClauses)
                        {
-                               GroupClause *gcl = lfirst(gl);
-                               Node       *groupexpr;
-
-                               groupexpr = get_sortgroupclause_expr(gcl,
-                                                                                                        context->targetList);
-                               if (equal(thisarg, groupexpr))
+                               if (equal(thisarg, lfirst(gl)))
                                {
                                        contained_in_group_clause = true;
                                        break;
@@ -603,8 +648,8 @@ check_subplans_for_ungrouped_vars_walker(Node *node,
                                char       *attname;
 
                                Assert(var->varno > 0 &&
-                                          (int) var->varno <= length(context->rtable));
-                               rte = rt_fetch(var->varno, context->rtable);
+                                          (int) var->varno <= length(context->query->rtable));
+                               rte = rt_fetch(var->varno, context->query->rtable);
                                attname = get_rte_attribute_name(rte, var->varattno);
                                elog(ERROR, "Sub-SELECT uses un-GROUPed attribute %s.%s from outer query",
                                         rte->eref->relname, attname);
index aa97bcc862c34359acad1216e02b30d4caa53f4f..da8429ca0940677d42ef8f22a6fa005d77f57f8a 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: clauses.h,v 1.47 2001/10/28 06:26:07 momjian Exp $
+ * $Id: clauses.h,v 1.48 2001/10/30 19:58:58 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -44,7 +44,7 @@ extern List *pull_agg_clause(Node *clause);
 
 extern bool contain_subplans(Node *clause);
 extern List *pull_subplans(Node *clause);
-extern void check_subplans_for_ungrouped_vars(Node *clause, Query *query);
+extern void check_subplans_for_ungrouped_vars(Query *query);
 
 extern bool contain_noncachable_functions(Node *clause);