From: Tom Lane Date: Thu, 10 Mar 2005 23:21:26 +0000 (+0000) Subject: Make the behavior of HAVING without GROUP BY conform to the SQL spec. X-Git-Tag: REL8_1_0BETA1~1265 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=595ed2a8550e34c0abe64569a104d92ad077ec08;p=postgresql Make the behavior of HAVING without GROUP BY conform to the SQL spec. Formerly, if such a clause contained no aggregate functions we mistakenly treated it as equivalent to WHERE. Per spec it must cause the query to be treated as a grouped query of a single group, the same as appearance of aggregate functions would do. Also, the HAVING filter must execute after aggregate function computation even if it itself contains no aggregate functions. --- diff --git a/doc/src/sgml/query.sgml b/doc/src/sgml/query.sgml index 366820e554..e573db56cb 100644 --- a/doc/src/sgml/query.sgml +++ b/doc/src/sgml/query.sgml @@ -1,5 +1,5 @@ @@ -783,8 +783,9 @@ SELECT city, max(temp_lo) will be inputs to the aggregates. On the other hand, the HAVING clause always contains aggregate functions. (Strictly speaking, you are allowed to write a HAVING - clause that doesn't use aggregates, but it's wasteful. The same condition - could be used more efficiently at the WHERE stage.) + clause that doesn't use aggregates, but it's seldom useful. The same + condition could be used more efficiently at the WHERE + stage.) diff --git a/doc/src/sgml/ref/select.sgml b/doc/src/sgml/ref/select.sgml index 855412c36c..93218e16c2 100644 --- a/doc/src/sgml/ref/select.sgml +++ b/doc/src/sgml/ref/select.sgml @@ -1,5 +1,5 @@ @@ -452,6 +452,17 @@ HAVING condition unambiguously reference a grouping column, unless the reference appears within an aggregate function. + + + The presence of HAVING turns a query into a grouped + query even if there is no GROUP BY clause. This is the + same as what happens when the query contains aggregate functions but + no GROUP BY clause. All the selected rows are considered to + form a single group, and the SELECT list and + HAVING clause can only reference table columns from + within aggregate functions. Such a query will emit a single row if the + HAVING condition is true, zero rows if it is not true. + diff --git a/src/backend/executor/nodeGroup.c b/src/backend/executor/nodeGroup.c index bdc7fd8992..f8bd18f349 100644 --- a/src/backend/executor/nodeGroup.c +++ b/src/backend/executor/nodeGroup.c @@ -15,7 +15,7 @@ * locate group boundaries. * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/executor/nodeGroup.c,v 1.59 2004/12/31 21:59:45 pgsql Exp $ + * $PostgreSQL: pgsql/src/backend/executor/nodeGroup.c,v 1.60 2005/03/10 23:21:21 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -35,23 +35,19 @@ TupleTableSlot * ExecGroup(GroupState *node) { - EState *estate; ExprContext *econtext; TupleDesc tupdesc; int numCols; AttrNumber *grpColIdx; - HeapTuple outerTuple = NULL; + HeapTuple outerTuple; HeapTuple firsttuple; TupleTableSlot *outerslot; - ProjectionInfo *projInfo; - TupleTableSlot *resultSlot; /* * get state info from node */ if (node->grp_done) return NULL; - estate = node->ss.ps.state; econtext = node->ss.ps.ps_ExprContext; tupdesc = ExecGetScanType(&node->ss); numCols = ((Group *) node->ss.ps.plan)->numCols; @@ -62,67 +58,101 @@ ExecGroup(GroupState *node) * reset the per-tuple memory context once per input tuple. */ - /* If we don't already have first tuple of group, fetch it */ - /* this should occur on the first call only */ + /* + * If first time through, acquire first input tuple and determine + * whether to return it or not. + */ firsttuple = node->grp_firstTuple; if (firsttuple == NULL) { outerslot = ExecProcNode(outerPlanState(node)); if (TupIsNull(outerslot)) { + /* empty input, so return nothing */ node->grp_done = TRUE; return NULL; } - node->grp_firstTuple = firsttuple = - heap_copytuple(outerslot->val); + node->grp_firstTuple = firsttuple = heap_copytuple(outerslot->val); + /* Set up tuple as input for qual test and projection */ + ExecStoreTuple(firsttuple, + node->ss.ss_ScanTupleSlot, + InvalidBuffer, + false); + econtext->ecxt_scantuple = node->ss.ss_ScanTupleSlot; + /* + * Check the qual (HAVING clause); if the group does not match, + * ignore it and fall into scan loop. + */ + if (ExecQual(node->ss.ps.qual, econtext, false)) + { + /* + * Form and return a projection tuple using the first input + * tuple. + */ + return ExecProject(node->ss.ps.ps_ProjInfo, NULL); + } } /* - * Scan over all tuples that belong to this group + * This loop iterates once per input tuple group. At the head of the + * loop, we have finished processing the first tuple of the group and + * now need to scan over all the other group members. */ for (;;) { - outerslot = ExecProcNode(outerPlanState(node)); - if (TupIsNull(outerslot)) + /* + * Scan over all remaining tuples that belong to this group + */ + for (;;) { - node->grp_done = TRUE; - outerTuple = NULL; - break; - } - outerTuple = outerslot->val; + outerslot = ExecProcNode(outerPlanState(node)); + if (TupIsNull(outerslot)) + { + /* no more groups, so we're done */ + node->grp_done = TRUE; + return NULL; + } + outerTuple = outerslot->val; + /* + * Compare with first tuple and see if this tuple is of the same + * group. If so, ignore it and keep scanning. + */ + if (!execTuplesMatch(firsttuple, outerTuple, + tupdesc, + numCols, grpColIdx, + node->eqfunctions, + econtext->ecxt_per_tuple_memory)) + break; + } /* - * Compare with first tuple and see if this tuple is of the same - * group. + * We have the first tuple of the next input group. See if we + * want to return it. */ - if (!execTuplesMatch(firsttuple, outerTuple, - tupdesc, - numCols, grpColIdx, - node->eqfunctions, - econtext->ecxt_per_tuple_memory)) - break; - } - - /* - * form a projection tuple based on the (copied) first tuple of the - * group, and store it in the result tuple slot. - */ - ExecStoreTuple(firsttuple, - node->ss.ss_ScanTupleSlot, - InvalidBuffer, - false); - econtext->ecxt_scantuple = node->ss.ss_ScanTupleSlot; - projInfo = node->ss.ps.ps_ProjInfo; - resultSlot = ExecProject(projInfo, NULL); - - /* save first tuple of next group, if we are not done yet */ - if (!node->grp_done) - { heap_freetuple(firsttuple); - node->grp_firstTuple = heap_copytuple(outerTuple); + node->grp_firstTuple = firsttuple = heap_copytuple(outerTuple); + /* Set up tuple as input for qual test and projection */ + ExecStoreTuple(firsttuple, + node->ss.ss_ScanTupleSlot, + InvalidBuffer, + false); + econtext->ecxt_scantuple = node->ss.ss_ScanTupleSlot; + /* + * Check the qual (HAVING clause); if the group does not match, + * ignore it and loop back to scan the rest of the group. + */ + if (ExecQual(node->ss.ps.qual, econtext, false)) + { + /* + * Form and return a projection tuple using the first input + * tuple. + */ + return ExecProject(node->ss.ps.ps_ProjInfo, NULL); + } } - return resultSlot; + /* NOTREACHED */ + return NULL; } /* ----------------- diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index f138402b3c..4acf02908b 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -15,7 +15,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.296 2005/01/27 03:17:45 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.297 2005/03/10 23:21:21 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1544,14 +1544,15 @@ _copyQuery(Query *from) COPY_NODE_FIELD(resultRelations); COPY_NODE_FIELD(in_info_list); COPY_SCALAR_FIELD(hasJoinRTEs); + COPY_SCALAR_FIELD(hasHavingQual); /* * We do not copy the other planner internal fields: base_rel_list, * other_rel_list, join_rel_list, equi_key_list, query_pathkeys. That * would get us into copying RelOptInfo/Path trees, which we don't - * want to do. It is necessary to copy in_info_list and hasJoinRTEs - * for the benefit of inheritance_planner(), which may try to copy a - * Query in which these are already set. + * want to do. It is necessary to copy in_info_list, hasJoinRTEs, + * and hasHavingQual for the benefit of inheritance_planner(), which + * may try to copy a Query in which these are already set. */ return newnode; diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index 8430eddf69..f80da7572b 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -18,7 +18,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.235 2005/01/27 03:17:45 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.236 2005/03/10 23:21:21 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -661,14 +661,10 @@ _equalQuery(Query *a, Query *b) COMPARE_NODE_FIELD(limitCount); COMPARE_NODE_FIELD(setOperations); COMPARE_NODE_FIELD(resultRelations); - COMPARE_NODE_FIELD(in_info_list); - COMPARE_SCALAR_FIELD(hasJoinRTEs); /* - * We do not check the other planner internal fields: base_rel_list, - * other_rel_list, join_rel_list, equi_key_list, query_pathkeys. They - * might not be set yet, and in any case they should be derivable from - * the other fields. + * We do not check the planner-internal fields. They might not be set + * yet, and in any case they should be derivable from the other fields. */ return true; } diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 12081c2cda..42d8761845 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.123 2004/12/31 22:00:00 pgsql Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.124 2005/03/10 23:21:21 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -334,13 +334,10 @@ set_subquery_pathlist(Query *root, RelOptInfo *rel, /* * If there are any restriction clauses that have been attached to the - * subquery relation, consider pushing them down to become HAVING - * quals of the subquery itself. (Not WHERE clauses, since they may - * refer to subquery outputs that are aggregate results. But - * planner.c will transfer them into the subquery's WHERE if they do - * not.) This transformation is useful because it may allow us to - * generate a better plan for the subquery than evaluating all the - * subquery output rows and then filtering them. + * subquery relation, consider pushing them down to become WHERE or + * HAVING quals of the subquery itself. This transformation is useful + * because it may allow us to generate a better plan for the subquery + * than evaluating all the subquery output rows and then filtering them. * * There are several cases where we cannot push down clauses. * Restrictions involving the subquery are checked by @@ -795,8 +792,17 @@ subquery_push_qual(Query *subquery, List *rtable, Index rti, Node *qual) qual = ResolveNew(qual, rti, 0, rtable, subquery->targetList, CMD_SELECT, 0); - subquery->havingQual = make_and_qual(subquery->havingQual, - qual); + + /* + * Now attach the qual to the proper place: normally WHERE, but + * if the subquery uses grouping or aggregation, put it in HAVING + * (since the qual really refers to the group-result rows). + */ + if (subquery->hasAggs || subquery->groupClause || subquery->havingQual) + subquery->havingQual = make_and_qual(subquery->havingQual, qual); + else + subquery->jointree->quals = + make_and_qual(subquery->jointree->quals, qual); /* * We need not change the subquery's hasAggs or hasSublinks flags, diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index f1a1651839..d1b94c483a 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -10,7 +10,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.175 2004/12/31 22:00:08 pgsql Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.176 2005/03/10 23:21:22 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -2158,6 +2158,7 @@ make_agg(Query *root, List *tlist, List *qual, Group * make_group(Query *root, List *tlist, + List *qual, int numGroupCols, AttrNumber *grpColIdx, double numGroups, @@ -2184,7 +2185,8 @@ make_group(Query *root, plan->plan_rows = numGroups; /* - * We also need to account for the cost of evaluation of the tlist. + * We also need to account for the cost of evaluation of the qual (ie, + * the HAVING clause) and the tlist. * * XXX this double-counts the cost of evaluation of any expressions used * for grouping, since in reality those will have been evaluated at a @@ -2194,12 +2196,19 @@ make_group(Query *root, * See notes in grouping_planner about why this routine and make_agg are * the only ones in this file that worry about tlist eval cost. */ + if (qual) + { + cost_qual_eval(&qual_cost, qual); + plan->startup_cost += qual_cost.startup; + plan->total_cost += qual_cost.startup; + plan->total_cost += qual_cost.per_tuple * plan->plan_rows; + } cost_qual_eval(&qual_cost, tlist); plan->startup_cost += qual_cost.startup; plan->total_cost += qual_cost.startup; plan->total_cost += qual_cost.per_tuple * plan->plan_rows; - plan->qual = NIL; + plan->qual = qual; plan->targetlist = tlist; plan->lefttree = lefttree; plan->righttree = NULL; diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 819879209b..d9e3ad0f84 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.178 2005/01/28 19:34:05 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.179 2005/03/10 23:21:22 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -235,6 +235,13 @@ subquery_planner(Query *parse, double tuple_fraction) } } + /* + * Set hasHavingQual to remember if HAVING clause is present. Needed + * because preprocess_expression will reduce a constant-true condition + * to an empty qual list ... but "HAVING TRUE" is not a semantic no-op. + */ + parse->hasHavingQual = (parse->havingQual != NULL); + /* * Do expression preprocessing on targetlist and quals. */ @@ -267,17 +274,25 @@ subquery_planner(Query *parse, double tuple_fraction) } /* - * A HAVING clause without aggregates is equivalent to a WHERE clause - * (except it can only refer to grouped fields). Transfer any - * agg-free clauses of the HAVING qual into WHERE. This may seem like - * wasting cycles to cater to stupidly-written queries, but there are - * other reasons for doing it. Firstly, if the query contains no aggs - * at all, then we aren't going to generate an Agg plan node, and so - * there'll be no place to execute HAVING conditions; without this - * transfer, we'd lose the HAVING condition entirely, which is wrong. - * Secondly, when we push down a qual condition into a sub-query, it's - * easiest to push the qual into HAVING always, in case it contains - * aggs, and then let this code sort it out. + * In some cases we may want to transfer a HAVING clause into WHERE. + * We cannot do so if the HAVING clause contains aggregates (obviously) + * or volatile functions (since a HAVING clause is supposed to be executed + * only once per group). Also, it may be that the clause is so expensive + * to execute that we're better off doing it only once per group, despite + * the loss of selectivity. This is hard to estimate short of doing the + * entire planning process twice, so we use a heuristic: clauses + * containing subplans are left in HAVING. Otherwise, we move or copy + * the HAVING clause into WHERE, in hopes of eliminating tuples before + * aggregation instead of after. + * + * If the query has explicit grouping then we can simply move such a + * clause into WHERE; any group that fails the clause will not be + * in the output because none of its tuples will reach the grouping + * or aggregation stage. Otherwise we must have a degenerate + * (variable-free) HAVING clause, which we put in WHERE so that + * query_planner() can use it in a gating Result node, but also keep + * in HAVING to ensure that we don't emit a bogus aggregated row. + * (This could be done better, but it seems not worth optimizing.) * * Note that both havingQual and parse->jointree->quals are in * implicitly-ANDed-list form at this point, even though they are @@ -288,11 +303,27 @@ subquery_planner(Query *parse, double tuple_fraction) { Node *havingclause = (Node *) lfirst(l); - if (contain_agg_clause(havingclause)) + if (contain_agg_clause(havingclause) || + contain_volatile_functions(havingclause) || + contain_subplans(havingclause)) + { + /* keep it in HAVING */ newHaving = lappend(newHaving, havingclause); - else + } + else if (parse->groupClause) + { + /* move it to WHERE */ parse->jointree->quals = (Node *) lappend((List *) parse->jointree->quals, havingclause); + } + else + { + /* put a copy in WHERE, keep it in HAVING */ + parse->jointree->quals = (Node *) + lappend((List *) parse->jointree->quals, + copyObject(havingclause)); + newHaving = lappend(newHaving, havingclause); + } } parse->havingQual = (Node *) newHaving; @@ -1195,7 +1226,7 @@ grouping_planner(Query *parse, double tuple_fraction) * Insert AGG or GROUP node if needed, plus an explicit sort step * if necessary. * - * HAVING clause, if any, becomes qual of the Agg node + * HAVING clause, if any, becomes qual of the Agg or Group node. */ if (use_hashed_grouping) { @@ -1252,42 +1283,50 @@ grouping_planner(Query *parse, double tuple_fraction) agg_counts.numAggs, result_plan); } - else + else if (parse->groupClause) { /* - * If there are no Aggs, we shouldn't have any HAVING qual - * anymore - */ - Assert(parse->havingQual == NULL); - - /* - * If we have a GROUP BY clause, insert a group node (plus the + * GROUP BY without aggregation, so insert a group node (plus the * appropriate sort node, if necessary). + * + * Add an explicit sort if we couldn't make the path come + * out the way the GROUP node needs it. */ - if (parse->groupClause) + if (!pathkeys_contained_in(group_pathkeys, current_pathkeys)) { - /* - * Add an explicit sort if we couldn't make the path come - * out the way the GROUP node needs it. - */ - if (!pathkeys_contained_in(group_pathkeys, current_pathkeys)) - { - result_plan = (Plan *) - make_sort_from_groupcols(parse, - parse->groupClause, - groupColIdx, - result_plan); - current_pathkeys = group_pathkeys; - } - - result_plan = (Plan *) make_group(parse, - tlist, - numGroupCols, - groupColIdx, - dNumGroups, - result_plan); - /* The Group node won't change sort ordering */ + result_plan = (Plan *) + make_sort_from_groupcols(parse, + parse->groupClause, + groupColIdx, + result_plan); + current_pathkeys = group_pathkeys; } + + result_plan = (Plan *) make_group(parse, + tlist, + (List *) parse->havingQual, + numGroupCols, + groupColIdx, + dNumGroups, + result_plan); + /* The Group node won't change sort ordering */ + } + else if (parse->hasHavingQual) + { + /* + * No aggregates, and no GROUP BY, but we have a HAVING qual. + * This is a degenerate case in which we are supposed to emit + * either 0 or 1 row depending on whether HAVING succeeds. + * Furthermore, there cannot be any variables in either HAVING + * or the targetlist, so we actually do not need the FROM table + * at all! We can just throw away the plan-so-far and generate + * a Result node. This is a sufficiently unusual corner case + * that it's not worth contorting the structure of this routine + * to avoid having to generate the plan in the first place. + */ + result_plan = (Plan *) make_result(tlist, + parse->havingQual, + NULL); } } /* end of if (setOperations) */ @@ -1320,7 +1359,7 @@ grouping_planner(Query *parse, double tuple_fraction) * it's reasonable to assume the UNIQUE filter has effects * comparable to GROUP BY. */ - if (!parse->groupClause && !parse->hasAggs) + if (!parse->groupClause && !parse->hasHavingQual && !parse->hasAggs) { List *distinctExprs; @@ -1384,19 +1423,18 @@ hash_safe_grouping(Query *parse) * make_subplanTargetList * Generate appropriate target list when grouping is required. * - * When grouping_planner inserts Aggregate or Group plan nodes above - * the result of query_planner, we typically want to pass a different + * When grouping_planner inserts Aggregate, Group, or Result plan nodes + * above the result of query_planner, we typically want to pass a different * target list to query_planner than the outer plan nodes should have. * This routine generates the correct target list for the subplan. * * The initial target list passed from the parser already contains entries * for all ORDER BY and GROUP BY expressions, but it will not have entries * for variables used only in HAVING clauses; so we need to add those - * variables to the subplan target list. Also, if we are doing either - * grouping or aggregation, we flatten all expressions except GROUP BY items - * into their component variables; the other expressions will be computed by - * the inserted nodes rather than by the subplan. For example, - * given a query like + * variables to the subplan target list. Also, we flatten all expressions + * except GROUP BY items into their component variables; the other expressions + * will be computed by the inserted nodes rather than by the subplan. + * For example, given a query like * SELECT a+b,SUM(c+d) FROM table GROUP BY a+b; * we want to pass this targetlist to the subplan: * a,b,c,d,a+b @@ -1436,10 +1474,10 @@ make_subplanTargetList(Query *parse, *groupColIdx = NULL; /* - * If we're not grouping or aggregating, nothing to do here; + * If we're not grouping or aggregating, there's nothing to do here; * query_planner should receive the unmodified target list. */ - if (!parse->hasAggs && !parse->groupClause) + if (!parse->hasAggs && !parse->groupClause && !parse->hasHavingQual) { *need_tlist_eval = true; return tlist; diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 64c802805a..f20c95299f 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.111 2004/12/31 22:00:23 pgsql Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.112 2005/03/10 23:21:22 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -797,6 +797,15 @@ is_distinct_query(Query *query) if (!gl) /* got to the end? */ return true; } + else + { + /* + * If we have no GROUP BY, but do have aggregates or HAVING, then + * the result is at most one row so it's surely unique. + */ + if (query->hasAggs || query->havingQual) + return true; + } /* * XXX Are there any other cases in which we can easily see the result diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c index cc27b0b7ba..6e16086991 100644 --- a/src/backend/parser/analyze.c +++ b/src/backend/parser/analyze.c @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/parser/analyze.c,v 1.315 2005/02/19 19:33:08 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/parser/analyze.c,v 1.316 2005/03/10 23:21:23 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1854,7 +1854,6 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt) /* * Initial processing of HAVING clause is just like WHERE clause. - * Additional work will be done in optimizer/plan/planner.c. */ qry->havingQual = transformWhereClause(pstate, stmt->havingClause, "HAVING"); @@ -1888,7 +1887,7 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt) qry->hasSubLinks = pstate->p_hasSubLinks; qry->hasAggs = pstate->p_hasAggs; - if (pstate->p_hasAggs || qry->groupClause) + if (pstate->p_hasAggs || qry->groupClause || qry->havingQual) parseCheckAggregates(pstate, qry); if (stmt->forUpdate != NIL) @@ -2104,7 +2103,7 @@ transformSetOperationStmt(ParseState *pstate, SelectStmt *stmt) qry->hasSubLinks = pstate->p_hasSubLinks; qry->hasAggs = pstate->p_hasAggs; - if (pstate->p_hasAggs || qry->groupClause) + if (pstate->p_hasAggs || qry->groupClause || qry->havingQual) parseCheckAggregates(pstate, qry); if (forUpdate != NIL) @@ -2751,6 +2750,10 @@ CheckSelectForUpdate(Query *qry) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("SELECT FOR UPDATE is not allowed with GROUP BY clause"))); + if (qry->havingQual != NULL) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("SELECT FOR UPDATE is not allowed with HAVING clause"))); if (qry->hasAggs) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), diff --git a/src/backend/parser/parse_agg.c b/src/backend/parser/parse_agg.c index 3ac47e3c7b..4781a58317 100644 --- a/src/backend/parser/parse_agg.c +++ b/src/backend/parser/parse_agg.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/parser/parse_agg.c,v 1.66 2004/12/31 22:00:27 pgsql Exp $ + * $PostgreSQL: pgsql/src/backend/parser/parse_agg.c,v 1.67 2005/03/10 23:21:23 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -104,7 +104,7 @@ parseCheckAggregates(ParseState *pstate, Query *qry) Node *clause; /* This should only be called if we found aggregates or grouping */ - Assert(pstate->p_hasAggs || qry->groupClause); + Assert(pstate->p_hasAggs || qry->groupClause || qry->havingQual); /* * Aggregates must never appear in WHERE or JOIN/ON clauses. diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c index 4bc66db0bc..cc26e8eb76 100644 --- a/src/backend/rewrite/rewriteHandler.c +++ b/src/backend/rewrite/rewriteHandler.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/rewrite/rewriteHandler.c,v 1.147 2004/12/31 22:00:45 pgsql Exp $ + * $PostgreSQL: pgsql/src/backend/rewrite/rewriteHandler.c,v 1.148 2005/03/10 23:21:24 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -181,18 +181,6 @@ rewriteRuleAction(Query *parsetree, } } - /* - * We copy the qualifications of the parsetree to the action and vice - * versa. So force hasSubLinks if one of them has it. If this is not - * right, the flag will get cleared later, but we mustn't risk having - * it not set when it needs to be. (XXX this should probably be - * handled by AddQual and friends, not here...) - */ - if (parsetree->hasSubLinks) - sub_action->hasSubLinks = TRUE; - else if (sub_action->hasSubLinks) - parsetree->hasSubLinks = TRUE; - /* * Event Qualification forces copying of parsetree and splitting into * two queries one w/rule_qual, one w/NOT rule_qual. Also add user @@ -996,23 +984,6 @@ fireRIRrules(Query *parsetree, List *activeRIRs) query_tree_walker(parsetree, fireRIRonSubLink, (void *) activeRIRs, QTW_IGNORE_RT_SUBQUERIES); - /* - * If the query was marked having aggregates, check if this is still - * true after rewriting. Ditto for sublinks. Note there should be no - * aggs in the qual at this point. (Does this code still do anything - * useful? The view-becomes-subselect-in-FROM approach doesn't look - * like it could remove aggs or sublinks...) - */ - if (parsetree->hasAggs) - { - parsetree->hasAggs = checkExprHasAggs((Node *) parsetree); - if (parsetree->hasAggs) - if (checkExprHasAggs((Node *) parsetree->jointree)) - elog(ERROR, "failed to remove aggregates from qual"); - } - if (parsetree->hasSubLinks) - parsetree->hasSubLinks = checkExprHasSubLink((Node *) parsetree); - return parsetree; } diff --git a/src/backend/rewrite/rewriteManip.c b/src/backend/rewrite/rewriteManip.c index 5f29dbb820..71143e85e2 100644 --- a/src/backend/rewrite/rewriteManip.c +++ b/src/backend/rewrite/rewriteManip.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/rewrite/rewriteManip.c,v 1.89 2004/12/31 22:00:46 pgsql Exp $ + * $PostgreSQL: pgsql/src/backend/rewrite/rewriteManip.c,v 1.90 2005/03/10 23:21:24 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -37,8 +37,7 @@ static Relids adjust_relid_set(Relids relids, int oldrelid, int newrelid); /* * checkExprHasAggs - - * Queries marked hasAggs might not have them any longer after - * rewriting. Check it. + * Check if an expression contains an aggregate function call. * * The objective of this routine is to detect whether there are aggregates * belonging to the initial query level. Aggregates belonging to subqueries @@ -93,8 +92,7 @@ checkExprHasAggs_walker(Node *node, checkExprHasAggs_context *context) /* * checkExprHasSubLink - - * Queries marked hasSubLinks might not have them any longer after - * rewriting. Check it. + * Check if an expression contains a SubLink. */ bool checkExprHasSubLink(Node *node) @@ -756,68 +754,14 @@ AddQual(Query *parsetree, Node *qual) copy); /* - * Make sure query is marked correctly if added qual has sublinks or - * aggregates (not sure it can ever have aggs, but sublinks - * definitely). Need not search qual when query is already marked. + * We had better not have stuck an aggregate into the WHERE clause. */ - if (!parsetree->hasAggs) - parsetree->hasAggs = checkExprHasAggs(copy); - if (!parsetree->hasSubLinks) - parsetree->hasSubLinks = checkExprHasSubLink(copy); -} - -/* - * Add the given havingQual to the one already contained in the parsetree - * just as AddQual does for the normal 'where' qual - */ -void -AddHavingQual(Query *parsetree, Node *havingQual) -{ - Node *copy; - - if (havingQual == NULL) - return; - - if (parsetree->commandType == CMD_UTILITY) - { - /* - * There's noplace to put the qual on a utility statement. - * - * See comments in AddQual for motivation. - */ - if (parsetree->utilityStmt && IsA(parsetree->utilityStmt, NotifyStmt)) - return; - else - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("conditional utility statements are not implemented"))); - } - - if (parsetree->setOperations != NULL) - { - /* - * There's noplace to put the qual on a setop statement, either. - * (This could be fixed, but right now the planner simply ignores - * any qual condition on a setop query.) - */ - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("conditional UNION/INTERSECT/EXCEPT statements are not implemented"))); - } - - /* INTERSECT want's the original, but we need to copy - Jan */ - copy = copyObject(havingQual); - - parsetree->havingQual = make_and_qual(parsetree->havingQual, - copy); + Assert(!checkExprHasAggs(copy)); /* - * Make sure query is marked correctly if added qual has sublinks or - * aggregates (not sure it can ever have aggs, but sublinks - * definitely). Need not search qual when query is already marked. + * Make sure query is marked correctly if added qual has sublinks. + * Need not search qual when query is already marked. */ - if (!parsetree->hasAggs) - parsetree->hasAggs = checkExprHasAggs(copy); if (!parsetree->hasSubLinks) parsetree->hasSubLinks = checkExprHasSubLink(copy); } diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index f5c3861978..cd3eb634d4 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/nodes/parsenodes.h,v 1.272 2005/01/27 03:18:42 tgl Exp $ + * $PostgreSQL: pgsql/src/include/nodes/parsenodes.h,v 1.273 2005/03/10 23:21:24 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -128,6 +128,7 @@ typedef struct Query List *in_info_list; /* list of InClauseInfos */ List *query_pathkeys; /* desired pathkeys for query_planner() */ bool hasJoinRTEs; /* true if any RTEs are RTE_JOIN kind */ + bool hasHavingQual; /* true if havingQual was non-null */ } Query; diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h index 5a7dfa55fe..e76d55ea49 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/optimizer/planmain.h,v 1.79 2004/12/31 22:03:36 pgsql Exp $ + * $PostgreSQL: pgsql/src/include/optimizer/planmain.h,v 1.80 2005/03/10 23:21:25 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -39,7 +39,7 @@ extern Agg *make_agg(Query *root, List *tlist, List *qual, int numGroupCols, AttrNumber *grpColIdx, long numGroups, int numAggs, Plan *lefttree); -extern Group *make_group(Query *root, List *tlist, +extern Group *make_group(Query *root, List *tlist, List *qual, int numGroupCols, AttrNumber *grpColIdx, double numGroups, Plan *lefttree); diff --git a/src/include/rewrite/rewriteManip.h b/src/include/rewrite/rewriteManip.h index 106f76fdc7..d53f72878e 100644 --- a/src/include/rewrite/rewriteManip.h +++ b/src/include/rewrite/rewriteManip.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/rewrite/rewriteManip.h,v 1.39 2004/12/31 22:03:41 pgsql Exp $ + * $PostgreSQL: pgsql/src/include/rewrite/rewriteManip.h,v 1.40 2005/03/10 23:21:25 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -31,7 +31,6 @@ extern bool attribute_used(Node *node, int rt_index, int attno, extern Query *getInsertSelectQuery(Query *parsetree, Query ***subquery_ptr); extern void AddQual(Query *parsetree, Node *qual); -extern void AddHavingQual(Query *parsetree, Node *havingQual); extern void AddInvertedQual(Query *parsetree, Node *qual); extern bool checkExprHasAggs(Node *node); diff --git a/src/test/regress/expected/select_having.out b/src/test/regress/expected/select_having.out index 37793d49b5..dc4dee06b1 100644 --- a/src/test/regress/expected/select_having.out +++ b/src/test/regress/expected/select_having.out @@ -21,7 +21,7 @@ SELECT b, c FROM test_having 3 | bbbb (2 rows) --- HAVING is equivalent to WHERE in this case +-- HAVING is effectively equivalent to WHERE in this case SELECT b, c FROM test_having GROUP BY b, c HAVING b = 3 ORDER BY b, c; b | c @@ -49,4 +49,41 @@ SELECT c, max(a) FROM test_having bbbb | 5 (2 rows) +-- test degenerate cases involving HAVING without GROUP BY +-- Per SQL spec, these should generate 0 or 1 row, even without aggregates +SELECT min(a), max(a) FROM test_having HAVING min(a) = max(a); + min | max +-----+----- +(0 rows) + +SELECT min(a), max(a) FROM test_having HAVING min(a) < max(a); + min | max +-----+----- + 0 | 9 +(1 row) + +-- errors: ungrouped column references +SELECT a FROM test_having HAVING min(a) < max(a); +ERROR: column "test_having.a" must appear in the GROUP BY clause or be used in an aggregate function +SELECT 1 AS one FROM test_having HAVING a > 1; +ERROR: column "test_having.a" must appear in the GROUP BY clause or be used in an aggregate function +-- the really degenerate case: need not scan table at all +SELECT 1 AS one FROM test_having HAVING 1 > 2; + one +----- +(0 rows) + +SELECT 1 AS one FROM test_having HAVING 1 < 2; + one +----- + 1 +(1 row) + +-- and just to prove that we aren't scanning the table: +SELECT 1 AS one FROM test_having WHERE 1/a = 1 HAVING 1 < 2; + one +----- + 1 +(1 row) + DROP TABLE test_having; diff --git a/src/test/regress/expected/select_having_1.out b/src/test/regress/expected/select_having_1.out index 6154bcbfe8..de721dd803 100644 --- a/src/test/regress/expected/select_having_1.out +++ b/src/test/regress/expected/select_having_1.out @@ -21,7 +21,7 @@ SELECT b, c FROM test_having 3 | bbbb (2 rows) --- HAVING is equivalent to WHERE in this case +-- HAVING is effectively equivalent to WHERE in this case SELECT b, c FROM test_having GROUP BY b, c HAVING b = 3 ORDER BY b, c; b | c @@ -49,4 +49,41 @@ SELECT c, max(a) FROM test_having XXXX | 0 (2 rows) +-- test degenerate cases involving HAVING without GROUP BY +-- Per SQL spec, these should generate 0 or 1 row, even without aggregates +SELECT min(a), max(a) FROM test_having HAVING min(a) = max(a); + min | max +-----+----- +(0 rows) + +SELECT min(a), max(a) FROM test_having HAVING min(a) < max(a); + min | max +-----+----- + 0 | 9 +(1 row) + +-- errors: ungrouped column references +SELECT a FROM test_having HAVING min(a) < max(a); +ERROR: column "test_having.a" must appear in the GROUP BY clause or be used in an aggregate function +SELECT 1 AS one FROM test_having HAVING a > 1; +ERROR: column "test_having.a" must appear in the GROUP BY clause or be used in an aggregate function +-- the really degenerate case: need not scan table at all +SELECT 1 AS one FROM test_having HAVING 1 > 2; + one +----- +(0 rows) + +SELECT 1 AS one FROM test_having HAVING 1 < 2; + one +----- + 1 +(1 row) + +-- and just to prove that we aren't scanning the table: +SELECT 1 AS one FROM test_having WHERE 1/a = 1 HAVING 1 < 2; + one +----- + 1 +(1 row) + DROP TABLE test_having; diff --git a/src/test/regress/expected/select_having_2.out b/src/test/regress/expected/select_having_2.out index 239e49f3d3..542436e5ea 100644 --- a/src/test/regress/expected/select_having_2.out +++ b/src/test/regress/expected/select_having_2.out @@ -21,7 +21,7 @@ SELECT b, c FROM test_having 3 | bbbb (2 rows) --- HAVING is equivalent to WHERE in this case +-- HAVING is effectively equivalent to WHERE in this case SELECT b, c FROM test_having GROUP BY b, c HAVING b = 3 ORDER BY b, c; b | c @@ -49,4 +49,41 @@ SELECT c, max(a) FROM test_having XXXX | 0 (2 rows) +-- test degenerate cases involving HAVING without GROUP BY +-- Per SQL spec, these should generate 0 or 1 row, even without aggregates +SELECT min(a), max(a) FROM test_having HAVING min(a) = max(a); + min | max +-----+----- +(0 rows) + +SELECT min(a), max(a) FROM test_having HAVING min(a) < max(a); + min | max +-----+----- + 0 | 9 +(1 row) + +-- errors: ungrouped column references +SELECT a FROM test_having HAVING min(a) < max(a); +ERROR: column "test_having.a" must appear in the GROUP BY clause or be used in an aggregate function +SELECT 1 AS one FROM test_having HAVING a > 1; +ERROR: column "test_having.a" must appear in the GROUP BY clause or be used in an aggregate function +-- the really degenerate case: need not scan table at all +SELECT 1 AS one FROM test_having HAVING 1 > 2; + one +----- +(0 rows) + +SELECT 1 AS one FROM test_having HAVING 1 < 2; + one +----- + 1 +(1 row) + +-- and just to prove that we aren't scanning the table: +SELECT 1 AS one FROM test_having WHERE 1/a = 1 HAVING 1 < 2; + one +----- + 1 +(1 row) + DROP TABLE test_having; diff --git a/src/test/regress/sql/select_having.sql b/src/test/regress/sql/select_having.sql index 72887d63c8..bc0cdc0630 100644 --- a/src/test/regress/sql/select_having.sql +++ b/src/test/regress/sql/select_having.sql @@ -18,7 +18,7 @@ INSERT INTO test_having VALUES (9, 4, 'CCCC', 'j'); SELECT b, c FROM test_having GROUP BY b, c HAVING count(*) = 1 ORDER BY b, c; --- HAVING is equivalent to WHERE in this case +-- HAVING is effectively equivalent to WHERE in this case SELECT b, c FROM test_having GROUP BY b, c HAVING b = 3 ORDER BY b, c; @@ -30,4 +30,21 @@ SELECT c, max(a) FROM test_having GROUP BY c HAVING count(*) > 2 OR min(a) = max(a) ORDER BY c; +-- test degenerate cases involving HAVING without GROUP BY +-- Per SQL spec, these should generate 0 or 1 row, even without aggregates + +SELECT min(a), max(a) FROM test_having HAVING min(a) = max(a); +SELECT min(a), max(a) FROM test_having HAVING min(a) < max(a); + +-- errors: ungrouped column references +SELECT a FROM test_having HAVING min(a) < max(a); +SELECT 1 AS one FROM test_having HAVING a > 1; + +-- the really degenerate case: need not scan table at all +SELECT 1 AS one FROM test_having HAVING 1 > 2; +SELECT 1 AS one FROM test_having HAVING 1 < 2; + +-- and just to prove that we aren't scanning the table: +SELECT 1 AS one FROM test_having WHERE 1/a = 1 HAVING 1 < 2; + DROP TABLE test_having;