*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.177 2004/12/31 22:00:09 pgsql Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.178 2005/01/28 19:34:05 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "catalog/pg_operator.h"
#include "catalog/pg_type.h"
#include "executor/executor.h"
+#include "executor/nodeAgg.h"
#include "miscadmin.h"
#include "nodes/makefuncs.h"
#ifdef OPTIMIZER_DEBUG
Path *sorted_path;
double dNumGroups = 0;
long numGroups = 0;
- int numAggs = 0;
+ AggClauseCounts agg_counts;
int numGroupCols = list_length(parse->groupClause);
bool use_hashed_grouping = false;
+ MemSet(&agg_counts, 0, sizeof(AggClauseCounts));
+
/* Preprocess targetlist in case we are inside an INSERT/UPDATE. */
tlist = preprocess_targetlist(tlist,
parse->commandType,
* the aggregate semantics (eg, producing only one output row).
*/
if (parse->hasAggs)
- numAggs = count_agg_clause((Node *) tlist) +
- count_agg_clause(parse->havingQual);
+ {
+ count_agg_clauses((Node *) tlist, &agg_counts);
+ count_agg_clauses(parse->havingQual, &agg_counts);
+ }
/*
* Figure out whether we need a sorted result from query_planner.
*/
if (!enable_hashagg || !hash_safe_grouping(parse))
use_hashed_grouping = false;
- else if (parse->hasAggs &&
- (contain_distinct_agg_clause((Node *) tlist) ||
- contain_distinct_agg_clause(parse->havingQual)))
+ else if (agg_counts.numDistinctAggs != 0)
use_hashed_grouping = false;
else
{
* the need for sorted input is usually a win, the fact
* that the output won't be sorted may be a loss; so we
* need to do an actual cost comparison.
- *
- * In most cases we have no good way to estimate the size of
- * the transition value needed by an aggregate;
- * arbitrarily assume it is 100 bytes. Also set the
- * overhead per hashtable entry at 64 bytes.
*/
- int hashentrysize = cheapest_path_width + 64 + numAggs * 100;
+ Size hashentrysize;
+
+ /* Estimate per-hash-entry space at tuple width... */
+ hashentrysize = cheapest_path_width;
+ /* plus space for pass-by-ref transition values... */
+ hashentrysize += agg_counts.transitionSpace;
+ /* plus the per-hash-entry overhead */
+ hashentrysize += hash_agg_entry_size(agg_counts.numAggs);
if (hashentrysize * dNumGroups <= work_mem * 1024L)
{
Path sorted_p;
cost_agg(&hashed_p, parse,
- AGG_HASHED, numAggs,
+ AGG_HASHED, agg_counts.numAggs,
numGroupCols, dNumGroups,
cheapest_path->startup_cost,
cheapest_path->total_cost,
}
if (parse->hasAggs)
cost_agg(&sorted_p, parse,
- AGG_SORTED, numAggs,
+ AGG_SORTED, agg_counts.numAggs,
numGroupCols, dNumGroups,
sorted_p.startup_cost,
sorted_p.total_cost,
numGroupCols,
groupColIdx,
numGroups,
- numAggs,
+ agg_counts.numAggs,
result_plan);
/* Hashed aggregation produces randomly-ordered results */
current_pathkeys = NIL;
numGroupCols,
groupColIdx,
numGroups,
- numAggs,
+ agg_counts.numAggs,
result_plan);
}
else
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.186 2004/12/31 22:00:23 pgsql Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.187 2005/01/28 19:34:07 tgl Exp $
*
* HISTORY
* AUTHOR DATE MAJOR EVENT
#include "postgres.h"
+#include "catalog/pg_aggregate.h"
#include "catalog/pg_language.h"
#include "catalog/pg_proc.h"
#include "catalog/pg_type.h"
#include "optimizer/var.h"
#include "parser/analyze.h"
#include "parser/parse_clause.h"
+#include "parser/parse_coerce.h"
#include "parser/parse_expr.h"
#include "tcop/tcopprot.h"
#include "utils/acl.h"
} substitute_actual_parameters_context;
static bool contain_agg_clause_walker(Node *node, void *context);
-static bool contain_distinct_agg_clause_walker(Node *node, void *context);
-static bool count_agg_clause_walker(Node *node, int *count);
+static bool count_agg_clauses_walker(Node *node, AggClauseCounts *counts);
static bool expression_returns_set_walker(Node *node, void *context);
static bool contain_subplans_walker(Node *node, void *context);
static bool contain_mutable_functions_walker(Node *node, void *context);
}
/*
- * contain_distinct_agg_clause
- * Recursively search for DISTINCT Aggref nodes within a clause.
+ * count_agg_clauses
+ * Recursively count the Aggref nodes in an expression tree.
+ *
+ * Note: this also checks for nested aggregates, which are an error.
*
- * Returns true if any DISTINCT aggregate found.
+ * We not only count the nodes, but attempt to estimate the total space
+ * needed for their transition state values if all are evaluated in parallel
+ * (as would be done in a HashAgg plan). See AggClauseCounts for the exact
+ * set of statistics returned.
+ *
+ * NOTE that the counts are ADDED to those already in *counts ... so the
+ * caller is responsible for zeroing the struct initially.
*
* This does not descend into subqueries, and so should be used only after
* reduction of sublinks to subplans, or in contexts where it's known there
* are no subqueries. There mustn't be outer-aggregate references either.
*/
-bool
-contain_distinct_agg_clause(Node *clause)
+void
+count_agg_clauses(Node *clause, AggClauseCounts *counts)
{
- return contain_distinct_agg_clause_walker(clause, NULL);
+ /* no setup needed */
+ count_agg_clauses_walker(clause, counts);
}
static bool
-contain_distinct_agg_clause_walker(Node *node, void *context)
+count_agg_clauses_walker(Node *node, AggClauseCounts *counts)
{
if (node == NULL)
return false;
if (IsA(node, Aggref))
{
- Assert(((Aggref *) node)->agglevelsup == 0);
- if (((Aggref *) node)->aggdistinct)
- return true; /* abort the tree traversal and return
- * true */
- }
- Assert(!IsA(node, SubLink));
- return expression_tree_walker(node, contain_distinct_agg_clause_walker, context);
-}
+ Aggref *aggref = (Aggref *) node;
+ Oid inputType;
+ HeapTuple aggTuple;
+ Form_pg_aggregate aggform;
+ Oid aggtranstype;
+
+ Assert(aggref->agglevelsup == 0);
+ counts->numAggs++;
+ if (aggref->aggdistinct)
+ counts->numDistinctAggs++;
+
+ inputType = exprType((Node *) aggref->target);
+
+ /* fetch aggregate transition datatype from pg_aggregate */
+ aggTuple = SearchSysCache(AGGFNOID,
+ ObjectIdGetDatum(aggref->aggfnoid),
+ 0, 0, 0);
+ if (!HeapTupleIsValid(aggTuple))
+ elog(ERROR, "cache lookup failed for aggregate %u",
+ aggref->aggfnoid);
+ aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple);
+ aggtranstype = aggform->aggtranstype;
+ ReleaseSysCache(aggTuple);
+
+ /* resolve actual type of transition state, if polymorphic */
+ if (aggtranstype == ANYARRAYOID || aggtranstype == ANYELEMENTOID)
+ {
+ /* have to fetch the agg's declared input type... */
+ Oid agg_arg_types[FUNC_MAX_ARGS];
+ int agg_nargs;
+
+ (void) get_func_signature(aggref->aggfnoid,
+ agg_arg_types, &agg_nargs);
+ Assert(agg_nargs == 1);
+ aggtranstype = resolve_generic_type(aggtranstype,
+ inputType,
+ agg_arg_types[0]);
+ }
-/*
- * count_agg_clause
- * Recursively count the Aggref nodes in an expression tree.
- *
- * Note: this also checks for nested aggregates, which are an error.
- *
- * This does not descend into subqueries, and so should be used only after
- * reduction of sublinks to subplans, or in contexts where it's known there
- * are no subqueries. There mustn't be outer-aggregate references either.
- */
-int
-count_agg_clause(Node *clause)
-{
- int result = 0;
+ /*
+ * If the transition type is pass-by-value then it doesn't add
+ * anything to the required size of the hashtable. If it is
+ * pass-by-reference then we have to add the estimated size of
+ * the value itself, plus palloc overhead.
+ */
+ if (!get_typbyval(aggtranstype))
+ {
+ int32 aggtranstypmod;
+ int32 avgwidth;
- count_agg_clause_walker(clause, &result);
- return result;
-}
+ /*
+ * If transition state is of same type as input, assume it's the
+ * same typmod (same width) as well. This works for cases like
+ * MAX/MIN and is probably somewhat reasonable otherwise.
+ */
+ if (aggtranstype == inputType)
+ aggtranstypmod = exprTypmod((Node *) aggref->target);
+ else
+ aggtranstypmod = -1;
-static bool
-count_agg_clause_walker(Node *node, int *count)
-{
- if (node == NULL)
- return false;
- if (IsA(node, Aggref))
- {
- Assert(((Aggref *) node)->agglevelsup == 0);
- (*count)++;
+ avgwidth = get_typavgwidth(aggtranstype, aggtranstypmod);
+ avgwidth = MAXALIGN(avgwidth);
+
+ counts->transitionSpace += avgwidth + 2 * sizeof(void *);
+ }
/*
* Complain if the aggregate's argument contains any aggregates;
* nested agg functions are semantically nonsensical.
*/
- if (contain_agg_clause((Node *) ((Aggref *) node)->target))
+ if (contain_agg_clause((Node *) aggref->target))
ereport(ERROR,
(errcode(ERRCODE_GROUPING_ERROR),
errmsg("aggregate function calls may not be nested")));
return false;
}
Assert(!IsA(node, SubLink));
- return expression_tree_walker(node, count_agg_clause_walker,
- (void *) count);
+ return expression_tree_walker(node, count_agg_clauses_walker,
+ (void *) counts);
}
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/optimizer/clauses.h,v 1.77 2004/12/31 22:03:36 pgsql Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/clauses.h,v 1.78 2005/01/28 19:34:28 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "nodes/relation.h"
-
#define is_opclause(clause) ((clause) != NULL && IsA(clause, OpExpr))
#define is_funcclause(clause) ((clause) != NULL && IsA(clause, FuncExpr))
#define is_subplan(clause) ((clause) != NULL && IsA(clause, SubPlan))
+typedef struct
+{
+ int numAggs; /* total number of aggregate calls */
+ int numDistinctAggs; /* number that use DISTINCT */
+ Size transitionSpace; /* for pass-by-ref transition data */
+} AggClauseCounts;
+
extern Expr *make_opclause(Oid opno, Oid opresulttype, bool opretset,
Expr *leftop, Expr *rightop);
extern List *make_ands_implicit(Expr *clause);
extern bool contain_agg_clause(Node *clause);
-extern bool contain_distinct_agg_clause(Node *clause);
-extern int count_agg_clause(Node *clause);
+extern void count_agg_clauses(Node *clause, AggClauseCounts *counts);
extern bool expression_returns_set(Node *clause);