#include "optimizer/cost.h"
#include "optimizer/pathnode.h"
#include "optimizer/placeholder.h"
+#include "optimizer/plancat.h"
#include "optimizer/planmain.h"
#include "optimizer/restrictinfo.h"
#include "parser/parsetree.h"
* Set the size estimates for the given base relation.
*
* The rel's targetlist and restrictinfo list must have been constructed
- * already.
+ * already, and rel->tuples must be set.
*
* We set the following fields of the rel node:
* rows: the estimated number of output tuples (after applying
rel->rows = clamp_row_est(nrows);
}
+/*
+ * set_subquery_size_estimates
+ * Set the size estimates for a base relation that is a subquery.
+ *
+ * The rel's targetlist and restrictinfo list must have been constructed
+ * already, and the plan for the subquery must have been completed.
+ * We look at the subquery's plan and PlannerInfo to extract data.
+ *
+ * We set the same fields as set_baserel_size_estimates.
+ */
+void
+set_subquery_size_estimates(PlannerInfo *root, RelOptInfo *rel,
+ PlannerInfo *subroot)
+{
+ RangeTblEntry *rte;
+ ListCell *lc;
+
+ /* Should only be applied to base relations that are subqueries */
+ Assert(rel->relid > 0);
+ rte = planner_rt_fetch(rel->relid, root);
+ Assert(rte->rtekind == RTE_SUBQUERY);
+
+ /* Copy raw number of output rows from subplan */
+ rel->tuples = rel->subplan->plan_rows;
+
+ /*
+ * Compute per-output-column width estimates by examining the subquery's
+ * targetlist. For any output that is a plain Var, get the width estimate
+ * that was made while planning the subquery. Otherwise, fall back on a
+ * datatype-based estimate.
+ */
+ foreach(lc, subroot->parse->targetList)
+ {
+ TargetEntry *te = (TargetEntry *) lfirst(lc);
+ Node *texpr = (Node *) te->expr;
+ int32 item_width;
+
+ Assert(IsA(te, TargetEntry));
+ /* junk columns aren't visible to upper query */
+ if (te->resjunk)
+ continue;
+
+ /*
+ * XXX This currently doesn't work for subqueries containing set
+ * operations, because the Vars in their tlists are bogus references
+ * to the first leaf subquery, which wouldn't give the right answer
+ * even if we could still get to its PlannerInfo. So fall back on
+ * datatype in that case.
+ */
+ if (IsA(texpr, Var) &&
+ subroot->parse->setOperations == NULL)
+ {
+ Var *var = (Var *) texpr;
+ RelOptInfo *subrel = find_base_rel(subroot, var->varno);
+
+ item_width = subrel->attr_widths[var->varattno - subrel->min_attr];
+ }
+ else
+ {
+ item_width = get_typavgwidth(exprType(texpr), exprTypmod(texpr));
+ }
+ Assert(item_width > 0);
+ Assert(te->resno >= rel->min_attr && te->resno <= rel->max_attr);
+ rel->attr_widths[te->resno - rel->min_attr] = item_width;
+ }
+
+ /* Now estimate number of output rows, etc */
+ set_baserel_size_estimates(root, rel);
+}
+
/*
* set_function_size_estimates
* Set the size estimates for a base relation that is a function call.
* set_rel_width
* Set the estimated output width of a base relation.
*
+ * The estimated output width is the sum of the per-attribute width estimates
+ * for the actually-referenced columns, plus any PHVs or other expressions
+ * that have to be calculated at this relation. This is the amount of data
+ * we'd need to pass upwards in case of a sort, hash, etc.
+ *
* NB: this works best on plain relations because it prefers to look at
- * real Vars. It will fail to make use of pg_statistic info when applied
- * to a subquery relation, even if the subquery outputs are simple vars
- * that we could have gotten info for. Is it worth trying to be smarter
- * about subqueries?
+ * real Vars. For subqueries, set_subquery_size_estimates will already have
+ * copied up whatever per-column estimates were made within the subquery,
+ * and for other types of rels there isn't much we can do anyway. We fall
+ * back on (fairly stupid) datatype-based width estimates if we can't get
+ * any better number.
*
* The per-attribute width estimates are cached for possible re-use while
* building join relations.
{
Oid reloid = planner_rt_fetch(rel->relid, root)->relid;
int32 tuple_width = 0;
+ bool have_wholerow_var = false;
ListCell *lc;
foreach(lc, rel->reltargetlist)
ndx = var->varattno - rel->min_attr;
/*
- * The width probably hasn't been cached yet, but may as well
- * check
+ * If it's a whole-row Var, we'll deal with it below after we
+ * have already cached as many attr widths as possible.
+ */
+ if (var->varattno == 0)
+ {
+ have_wholerow_var = true;
+ continue;
+ }
+
+ /*
+ * The width may have been cached already (especially if it's
+ * a subquery), so don't duplicate effort.
*/
if (rel->attr_widths[ndx] > 0)
{
}
/* Try to get column width from statistics */
- if (reloid != InvalidOid)
+ if (reloid != InvalidOid && var->varattno > 0)
{
item_width = get_attavgwidth(reloid, var->varattno);
if (item_width > 0)
tuple_width += item_width;
}
}
+
+ /*
+ * If we have a whole-row reference, estimate its width as the sum of
+ * per-column widths plus sizeof(HeapTupleHeaderData).
+ */
+ if (have_wholerow_var)
+ {
+ int32 wholerow_width = sizeof(HeapTupleHeaderData);
+
+ if (reloid != InvalidOid)
+ {
+ /* Real relation, so estimate true tuple width */
+ wholerow_width += get_relation_data_width(reloid,
+ rel->attr_widths - rel->min_attr);
+ }
+ else
+ {
+ /* Do what we can with info for a phony rel */
+ AttrNumber i;
+
+ for (i = 1; i <= rel->max_attr; i++)
+ wholerow_width += rel->attr_widths[i - rel->min_attr];
+ }
+
+ rel->attr_widths[0 - rel->min_attr] = wholerow_width;
+
+ /*
+ * Include the whole-row Var as part of the output tuple. Yes,
+ * that really is what happens at runtime.
+ */
+ tuple_width += wholerow_width;
+ }
+
Assert(tuple_width >= 0);
rel->width = tuple_width;
}
* estimate_rel_size - estimate # pages and # tuples in a table or index
*
* If attr_widths isn't NULL, it points to the zero-index entry of the
- * relation's attr_width[] cache; we fill this in if we have need to compute
+ * relation's attr_widths[] cache; we fill this in if we have need to compute
* the attribute widths for estimation purposes.
*/
void
* get_rel_data_width
*
* Estimate the average width of (the data part of) the relation's tuples.
- * If attr_widths isn't NULL, also store per-column width estimates into
- * that array.
+ *
+ * If attr_widths isn't NULL, it points to the zero-index entry of the
+ * relation's attr_widths[] cache; use and update that cache as appropriate.
*
* Currently we ignore dropped columns. Ideally those should be included
* in the result, but we haven't got any way to get info about them; and
if (att->attisdropped)
continue;
+
+ /* use previously cached data, if any */
+ if (attr_widths != NULL && attr_widths[i] > 0)
+ {
+ tuple_width += attr_widths[i];
+ continue;
+ }
+
/* This should match set_rel_width() in costsize.c */
item_width = get_attavgwidth(RelationGetRelid(rel), i);
if (item_width <= 0)
/*
* get_relation_data_width
*
- * External API for get_rel_data_width
+ * External API for get_rel_data_width: same behavior except we have to
+ * open the relcache entry.
*/
int32
-get_relation_data_width(Oid relid)
+get_relation_data_width(Oid relid, int32 *attr_widths)
{
int32 result;
Relation relation;
/* As above, assume relation is already locked */
relation = heap_open(relid, NoLock);
- result = get_rel_data_width(relation, NULL);
+ result = get_rel_data_width(relation, attr_widths);
heap_close(relation, NoLock);