]> granicus.if.org Git - postgresql/blobdiff - src/backend/optimizer/path/costsize.c
Pgindent run before 9.1 beta2.
[postgresql] / src / backend / optimizer / path / costsize.c
index c292e24ac303ffce75cd2d0c4516be9360da4fa5..bb38768bd4358f72896e2d2c549bbd64dedcd24d 100644 (file)
@@ -1096,7 +1096,7 @@ cost_recursive_union(Plan *runion, Plan *nrterm, Plan *rterm)
  * accesses (XXX can't we refine that guess?)
  *
  * By default, we charge two operator evals per tuple comparison, which should
- * be in the right ballpark in most cases.  The caller can tweak this by
+ * be in the right ballpark in most cases.     The caller can tweak this by
  * specifying nonzero comparison_cost; typically that's used for any extra
  * work that has to be done to prepare the inputs to the comparison operators.
  *
@@ -1218,7 +1218,7 @@ cost_sort(Path *path, PlannerInfo *root,
  *       Determines and returns the cost of a MergeAppend node.
  *
  * MergeAppend merges several pre-sorted input streams, using a heap that
- * at any given instant holds the next tuple from each stream.  If there
+ * at any given instant holds the next tuple from each stream. If there
  * are N streams, we need about N*log2(N) tuple comparisons to construct
  * the heap at startup, and then for each output tuple, about log2(N)
  * comparisons to delete the top heap entry and another log2(N) comparisons
@@ -1335,25 +1335,40 @@ cost_material(Path *path,
  *             Determines and returns the cost of performing an Agg plan node,
  *             including the cost of its input.
  *
+ * aggcosts can be NULL when there are no actual aggregate functions (i.e.,
+ * we are using a hashed Agg node just to do grouping).
+ *
  * Note: when aggstrategy == AGG_SORTED, caller must ensure that input costs
  * are for appropriately-sorted input.
  */
 void
 cost_agg(Path *path, PlannerInfo *root,
-                AggStrategy aggstrategy, int numAggs,
+                AggStrategy aggstrategy, const AggClauseCosts *aggcosts,
                 int numGroupCols, double numGroups,
                 Cost input_startup_cost, Cost input_total_cost,
                 double input_tuples)
 {
        Cost            startup_cost;
        Cost            total_cost;
+       AggClauseCosts dummy_aggcosts;
+
+       /* Use all-zero per-aggregate costs if NULL is passed */
+       if (aggcosts == NULL)
+       {
+               Assert(aggstrategy == AGG_HASHED);
+               MemSet(&dummy_aggcosts, 0, sizeof(AggClauseCosts));
+               aggcosts = &dummy_aggcosts;
+       }
 
        /*
-        * We charge one cpu_operator_cost per aggregate function per input tuple,
-        * and another one per output tuple (corresponding to transfn and finalfn
-        * calls respectively).  If we are grouping, we charge an additional
-        * cpu_operator_cost per grouping column per input tuple for grouping
-        * comparisons.
+        * The transCost.per_tuple component of aggcosts should be charged once
+        * per input tuple, corresponding to the costs of evaluating the aggregate
+        * transfns and their input expressions (with any startup cost of course
+        * charged but once).  The finalCost component is charged once per output
+        * tuple, corresponding to the costs of evaluating the finalfns.
+        *
+        * If we are grouping, we charge an additional cpu_operator_cost per
+        * grouping column per input tuple for grouping comparisons.
         *
         * We will produce a single output tuple if not grouping, and a tuple per
         * group otherwise.  We charge cpu_tuple_cost for each output tuple.
@@ -1366,15 +1381,13 @@ cost_agg(Path *path, PlannerInfo *root,
         * there's roundoff error we might do the wrong thing.  So be sure that
         * the computations below form the same intermediate values in the same
         * order.
-        *
-        * Note: ideally we should use the pg_proc.procost costs of each
-        * aggregate's component functions, but for now that seems like an
-        * excessive amount of work.
         */
        if (aggstrategy == AGG_PLAIN)
        {
                startup_cost = input_total_cost;
-               startup_cost += cpu_operator_cost * (input_tuples + 1) * numAggs;
+               startup_cost += aggcosts->transCost.startup;
+               startup_cost += aggcosts->transCost.per_tuple * input_tuples;
+               startup_cost += aggcosts->finalCost;
                /* we aren't grouping */
                total_cost = startup_cost + cpu_tuple_cost;
        }
@@ -1384,19 +1397,21 @@ cost_agg(Path *path, PlannerInfo *root,
                startup_cost = input_startup_cost;
                total_cost = input_total_cost;
                /* calcs phrased this way to match HASHED case, see note above */
-               total_cost += cpu_operator_cost * input_tuples * numGroupCols;
-               total_cost += cpu_operator_cost * input_tuples * numAggs;
-               total_cost += cpu_operator_cost * numGroups * numAggs;
+               total_cost += aggcosts->transCost.startup;
+               total_cost += aggcosts->transCost.per_tuple * input_tuples;
+               total_cost += (cpu_operator_cost * numGroupCols) * input_tuples;
+               total_cost += aggcosts->finalCost * numGroups;
                total_cost += cpu_tuple_cost * numGroups;
        }
        else
        {
                /* must be AGG_HASHED */
                startup_cost = input_total_cost;
-               startup_cost += cpu_operator_cost * input_tuples * numGroupCols;
-               startup_cost += cpu_operator_cost * input_tuples * numAggs;
+               startup_cost += aggcosts->transCost.startup;
+               startup_cost += aggcosts->transCost.per_tuple * input_tuples;
+               startup_cost += (cpu_operator_cost * numGroupCols) * input_tuples;
                total_cost = startup_cost;
-               total_cost += cpu_operator_cost * numGroups * numAggs;
+               total_cost += aggcosts->finalCost * numGroups;
                total_cost += cpu_tuple_cost * numGroups;
        }
 
@@ -1413,25 +1428,53 @@ cost_agg(Path *path, PlannerInfo *root,
  */
 void
 cost_windowagg(Path *path, PlannerInfo *root,
-                          int numWindowFuncs, int numPartCols, int numOrderCols,
+                          List *windowFuncs, int numPartCols, int numOrderCols,
                           Cost input_startup_cost, Cost input_total_cost,
                           double input_tuples)
 {
        Cost            startup_cost;
        Cost            total_cost;
+       ListCell   *lc;
 
        startup_cost = input_startup_cost;
        total_cost = input_total_cost;
 
        /*
-        * We charge one cpu_operator_cost per window function per tuple (often a
-        * drastic underestimate, but without a way to gauge how many tuples the
-        * window function will fetch, it's hard to do better).  We also charge
-        * cpu_operator_cost per grouping column per tuple for grouping
-        * comparisons, plus cpu_tuple_cost per tuple for general overhead.
-        */
-       total_cost += cpu_operator_cost * input_tuples * numWindowFuncs;
-       total_cost += cpu_operator_cost * input_tuples * (numPartCols + numOrderCols);
+        * Window functions are assumed to cost their stated execution cost, plus
+        * the cost of evaluating their input expressions, per tuple.  Since they
+        * may in fact evaluate their inputs at multiple rows during each cycle,
+        * this could be a drastic underestimate; but without a way to know how
+        * many rows the window function will fetch, it's hard to do better.  In
+        * any case, it's a good estimate for all the built-in window functions,
+        * so we'll just do this for now.
+        */
+       foreach(lc, windowFuncs)
+       {
+               WindowFunc *wfunc = (WindowFunc *) lfirst(lc);
+               Cost            wfunccost;
+               QualCost        argcosts;
+
+               Assert(IsA(wfunc, WindowFunc));
+
+               wfunccost = get_func_cost(wfunc->winfnoid) * cpu_operator_cost;
+
+               /* also add the input expressions' cost to per-input-row costs */
+               cost_qual_eval_node(&argcosts, (Node *) wfunc->args, root);
+               startup_cost += argcosts.startup;
+               wfunccost += argcosts.per_tuple;
+
+               total_cost += wfunccost * input_tuples;
+       }
+
+       /*
+        * We also charge cpu_operator_cost per grouping column per tuple for
+        * grouping comparisons, plus cpu_tuple_cost per tuple for general
+        * overhead.
+        *
+        * XXX this neglects costs of spooling the data to disk when it overflows
+        * work_mem.  Sooner or later that should get accounted for.
+        */
+       total_cost += cpu_operator_cost * (numPartCols + numOrderCols) * input_tuples;
        total_cost += cpu_tuple_cost * input_tuples;
 
        path->startup_cost = startup_cost;
@@ -1704,10 +1747,10 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
                                innerendsel;
        Path            sort_path;              /* dummy for result of cost_sort */
 
-       /* Protect some assumptions below that rowcounts aren't zero */
-       if (outer_path_rows <= 0)
+       /* Protect some assumptions below that rowcounts aren't zero or NaN */
+       if (outer_path_rows <= 0 || isnan(outer_path_rows))
                outer_path_rows = 1;
-       if (inner_path_rows <= 0)
+       if (inner_path_rows <= 0 || isnan(inner_path_rows))
                inner_path_rows = 1;
 
        if (!enable_mergejoin)
@@ -1795,7 +1838,7 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
                ipathkey = (PathKey *) linitial(ipathkeys);
                /* debugging check */
                if (opathkey->pk_opfamily != ipathkey->pk_opfamily ||
-                       opathkey->pk_collation != ipathkey->pk_collation ||
+                       opathkey->pk_eclass->ec_collation != ipathkey->pk_eclass->ec_collation ||
                        opathkey->pk_strategy != ipathkey->pk_strategy ||
                        opathkey->pk_nulls_first != ipathkey->pk_nulls_first)
                        elog(ERROR, "left and right pathkeys do not match in mergejoin");
@@ -2046,7 +2089,7 @@ cached_scansel(PlannerInfo *root, RestrictInfo *rinfo, PathKey *pathkey)
        {
                cache = (MergeScanSelCache *) lfirst(lc);
                if (cache->opfamily == pathkey->pk_opfamily &&
-                       cache->collation == pathkey->pk_collation &&
+                       cache->collation == pathkey->pk_eclass->ec_collation &&
                        cache->strategy == pathkey->pk_strategy &&
                        cache->nulls_first == pathkey->pk_nulls_first)
                        return cache;
@@ -2056,7 +2099,6 @@ cached_scansel(PlannerInfo *root, RestrictInfo *rinfo, PathKey *pathkey)
        mergejoinscansel(root,
                                         (Node *) rinfo->clause,
                                         pathkey->pk_opfamily,
-                                        pathkey->pk_collation,
                                         pathkey->pk_strategy,
                                         pathkey->pk_nulls_first,
                                         &leftstartsel,
@@ -2069,7 +2111,7 @@ cached_scansel(PlannerInfo *root, RestrictInfo *rinfo, PathKey *pathkey)
 
        cache = (MergeScanSelCache *) palloc(sizeof(MergeScanSelCache));
        cache->opfamily = pathkey->pk_opfamily;
-       cache->collation = pathkey->pk_collation;
+       cache->collation = pathkey->pk_eclass->ec_collation;
        cache->strategy = pathkey->pk_strategy;
        cache->nulls_first = pathkey->pk_nulls_first;
        cache->leftstartsel = leftstartsel;
@@ -2641,17 +2683,12 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
         * Vars and Consts are charged zero, and so are boolean operators (AND,
         * OR, NOT). Simplistic, but a lot better than no model at all.
         *
-        * Note that Aggref and WindowFunc nodes are (and should be) treated like
-        * Vars --- whatever execution cost they have is absorbed into
-        * plan-node-specific costing.  As far as expression evaluation is
-        * concerned they're just like Vars.
-        *
         * Should we try to account for the possibility of short-circuit
         * evaluation of AND/OR?  Probably *not*, because that would make the
         * results depend on the clause ordering, and we are not in any position
         * to expect that the current ordering of the clauses is the one that's
-        * going to end up being used.  (Is it worth applying order_qual_clauses
-        * much earlier in the planning process to fix this?)
+        * going to end up being used.  The above per-RestrictInfo caching would
+        * not mix well with trying to re-order clauses anyway.
         */
        if (IsA(node, FuncExpr))
        {
@@ -2680,6 +2717,20 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
                context->total.per_tuple += get_func_cost(saop->opfuncid) *
                        cpu_operator_cost * estimate_array_length(arraynode) * 0.5;
        }
+       else if (IsA(node, Aggref) ||
+                        IsA(node, WindowFunc))
+       {
+               /*
+                * Aggref and WindowFunc nodes are (and should be) treated like Vars,
+                * ie, zero execution cost in the current model, because they behave
+                * essentially like Vars in execQual.c.  We disregard the costs of
+                * their input expressions for the same reason.  The actual execution
+                * costs of the aggregate/window functions and their arguments have to
+                * be factored into plan-node-specific costing of the Agg or WindowAgg
+                * plan node.
+                */
+               return false;                   /* don't recurse into children */
+       }
        else if (IsA(node, CoerceViaIO))
        {
                CoerceViaIO *iocoerce = (CoerceViaIO *) node;
@@ -2910,7 +2961,7 @@ adjust_semi_join(PlannerInfo *root, JoinPath *path, SpecialJoinInfo *sjinfo,
                        List       *nrclauses;
 
                        nrclauses = select_nonredundant_join_clauses(root,
-                                                                                                                path->joinrestrictinfo,
+                                                                                                         path->joinrestrictinfo,
                                                                                                                 path->innerjoinpath);
                        *indexed_join_quals = (nrclauses == NIL);
                }
@@ -3186,7 +3237,7 @@ set_subquery_size_estimates(PlannerInfo *root, RelOptInfo *rel,
 
        /*
         * Compute per-output-column width estimates by examining the subquery's
-        * targetlist.  For any output that is a plain Var, get the width estimate
+        * targetlist.  For any output that is a plain Var, get the width estimate
         * that was made while planning the subquery.  Otherwise, fall back on a
         * datatype-based estimate.
         */
@@ -3211,7 +3262,7 @@ set_subquery_size_estimates(PlannerInfo *root, RelOptInfo *rel,
                if (IsA(texpr, Var) &&
                        subroot->parse->setOperations == NULL)
                {
-                       Var        *var = (Var *) texpr;
+                       Var                *var = (Var *) texpr;
                        RelOptInfo *subrel = find_base_rel(subroot, var->varno);
 
                        item_width = subrel->attr_widths[var->varattno - subrel->min_attr];
@@ -3333,7 +3384,7 @@ set_cte_size_estimates(PlannerInfo *root, RelOptInfo *rel, Plan *cteplan)
  * of estimating baserestrictcost, so we set that, and we also set up width
  * using what will be purely datatype-driven estimates from the targetlist.
  * There is no way to do anything sane with the rows value, so we just put
- * a default estimate and hope that the wrapper can improve on it.  The
+ * a default estimate and hope that the wrapper can improve on it.     The
  * wrapper's PlanForeignScan function will be called momentarily.
  *
  * The rel's targetlist and restrictinfo list must have been constructed
@@ -3397,8 +3448,8 @@ set_rel_width(PlannerInfo *root, RelOptInfo *rel)
                        ndx = var->varattno - rel->min_attr;
 
                        /*
-                        * If it's a whole-row Var, we'll deal with it below after we
-                        * have already cached as many attr widths as possible.
+                        * If it's a whole-row Var, we'll deal with it below after we have
+                        * already cached as many attr widths as possible.
                         */
                        if (var->varattno == 0)
                        {
@@ -3407,8 +3458,8 @@ set_rel_width(PlannerInfo *root, RelOptInfo *rel)
                        }
 
                        /*
-                        * The width may have been cached already (especially if it's
-                        * subquery), so don't duplicate effort.
+                        * The width may have been cached already (especially if it's a
+                        * subquery), so don't duplicate effort.
                         */
                        if (rel->attr_widths[ndx] > 0)
                        {
@@ -3465,13 +3516,13 @@ set_rel_width(PlannerInfo *root, RelOptInfo *rel)
         */
        if (have_wholerow_var)
        {
-               int32   wholerow_width = sizeof(HeapTupleHeaderData);
+               int32           wholerow_width = sizeof(HeapTupleHeaderData);
 
                if (reloid != InvalidOid)
                {
                        /* Real relation, so estimate true tuple width */
                        wholerow_width += get_relation_data_width(reloid,
-                                                                                                         rel->attr_widths - rel->min_attr);
+                                                                                  rel->attr_widths - rel->min_attr);
                }
                else
                {
@@ -3485,8 +3536,8 @@ set_rel_width(PlannerInfo *root, RelOptInfo *rel)
                rel->attr_widths[0 - rel->min_attr] = wholerow_width;
 
                /*
-                * Include the whole-row Var as part of the output tuple.  Yes,
-                * that really is what happens at runtime.
+                * Include the whole-row Var as part of the output tuple.  Yes, that
+                * really is what happens at runtime.
                 */
                tuple_width += wholerow_width;
        }