* cpu_tuple_cost Cost of typical CPU time to process a tuple
* cpu_index_tuple_cost Cost of typical CPU time to process an index tuple
* cpu_operator_cost Cost of CPU time to execute an operator or function
+ * parallel_tuple_cost Cost of CPU time to pass a tuple from worker to master backend
+ * parallel_setup_cost Cost of setting up shared memory for parallelism
*
* We expect that the kernel will typically do some amount of read-ahead
* optimization; this in conjunction with seek costs means that seq_page_cost
*
* Obviously, taking constants for these values is an oversimplification,
* but it's tough enough to get any useful estimates even at this level of
- * detail. Note that all of these parameters are user-settable, in case
+ * detail. Note that all of these parameters are user-settable, in case
* the default values are drastically off for a particular platform.
*
* seq_page_cost and random_page_cost can also be overridden for an individual
* values.
*
*
- * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
#include "postgres.h"
+#ifdef _MSC_VER
+#include <float.h> /* for _isnan */
+#endif
#include <math.h>
+#include "access/htup_details.h"
+#include "access/tsmapi.h"
#include "executor/executor.h"
#include "executor/nodeHash.h"
#include "miscadmin.h"
#include "optimizer/clauses.h"
#include "optimizer/cost.h"
#include "optimizer/pathnode.h"
+#include "optimizer/paths.h"
#include "optimizer/placeholder.h"
#include "optimizer/plancat.h"
#include "optimizer/planmain.h"
double cpu_tuple_cost = DEFAULT_CPU_TUPLE_COST;
double cpu_index_tuple_cost = DEFAULT_CPU_INDEX_TUPLE_COST;
double cpu_operator_cost = DEFAULT_CPU_OPERATOR_COST;
+double parallel_tuple_cost = DEFAULT_PARALLEL_TUPLE_COST;
+double parallel_setup_cost = DEFAULT_PARALLEL_SETUP_COST;
int effective_cache_size = DEFAULT_EFFECTIVE_CACHE_SIZE;
Cost disable_cost = 1.0e10;
+int max_parallel_degree = 0;
+
bool enable_seqscan = true;
bool enable_indexscan = true;
bool enable_indexonlyscan = true;
QualCost total;
} cost_qual_eval_context;
+static List *extract_nonindex_conditions(List *qual_clauses, List *indexquals);
static MergeScanSelCache *cached_scansel(PlannerInfo *root,
RestrictInfo *rinfo,
PathKey *pathkey);
static void cost_rescan(PlannerInfo *root, Path *path,
Cost *rescan_startup_cost, Cost *rescan_total_cost);
static bool cost_qual_eval_walker(Node *node, cost_qual_eval_context *context);
-static bool has_indexed_join_quals(NestPath *path, List *joinclauses);
+static void get_restriction_qual_cost(PlannerInfo *root, RelOptInfo *baserel,
+ ParamPathInfo *param_info,
+ QualCost *qpqual_cost);
+static bool has_indexed_join_quals(NestPath *joinpath);
static double approx_tuple_count(PlannerInfo *root, JoinPath *path,
List *quals);
-static void set_joinpath_size_estimate(PlannerInfo *root, JoinPath *path,
- SpecialJoinInfo *sjinfo,
- List *restrictlist);
static double calc_joinrel_size_estimate(PlannerInfo *root,
double outer_rows,
double inner_rows,
/*
* cost_seqscan
* Determines and returns the cost of scanning a relation sequentially.
+ *
+ * 'baserel' is the relation to be scanned
+ * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
*/
void
cost_seqscan(Path *path, PlannerInfo *root,
- RelOptInfo *baserel)
+ RelOptInfo *baserel, ParamPathInfo *param_info)
{
- double spc_seq_page_cost;
Cost startup_cost = 0;
Cost run_cost = 0;
+ double spc_seq_page_cost;
+ QualCost qpqual_cost;
Cost cpu_per_tuple;
/* Should only be applied to base relations */
Assert(baserel->relid > 0);
Assert(baserel->rtekind == RTE_RELATION);
- /* For now, at least, seqscans are never parameterized */
- path->rows = baserel->rows;
+ /* Mark the path with the correct row estimate */
+ if (param_info)
+ path->rows = param_info->ppi_rows;
+ else
+ path->rows = baserel->rows;
if (!enable_seqscan)
startup_cost += disable_cost;
run_cost += spc_seq_page_cost * baserel->pages;
/* CPU costs */
- startup_cost += baserel->baserestrictcost.startup;
- cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost.per_tuple;
+ get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
+
+ startup_cost += qpqual_cost.startup;
+ cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple;
+ run_cost += cpu_per_tuple * baserel->tuples;
+
+ path->startup_cost = startup_cost;
+ path->total_cost = startup_cost + run_cost;
+}
+
+/*
+ * cost_samplescan
+ * Determines and returns the cost of scanning a relation using sampling.
+ *
+ * 'baserel' is the relation to be scanned
+ * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
+ */
+void
+cost_samplescan(Path *path, PlannerInfo *root,
+ RelOptInfo *baserel, ParamPathInfo *param_info)
+{
+ Cost startup_cost = 0;
+ Cost run_cost = 0;
+ RangeTblEntry *rte;
+ TableSampleClause *tsc;
+ TsmRoutine *tsm;
+ double spc_seq_page_cost,
+ spc_random_page_cost,
+ spc_page_cost;
+ QualCost qpqual_cost;
+ Cost cpu_per_tuple;
+
+ /* Should only be applied to base relations with tablesample clauses */
+ Assert(baserel->relid > 0);
+ rte = planner_rt_fetch(baserel->relid, root);
+ Assert(rte->rtekind == RTE_RELATION);
+ tsc = rte->tablesample;
+ Assert(tsc != NULL);
+ tsm = GetTsmRoutine(tsc->tsmhandler);
+
+ /* Mark the path with the correct row estimate */
+ if (param_info)
+ path->rows = param_info->ppi_rows;
+ else
+ path->rows = baserel->rows;
+
+ /* fetch estimated page cost for tablespace containing table */
+ get_tablespace_page_costs(baserel->reltablespace,
+ &spc_random_page_cost,
+ &spc_seq_page_cost);
+
+ /* if NextSampleBlock is used, assume random access, else sequential */
+ spc_page_cost = (tsm->NextSampleBlock != NULL) ?
+ spc_random_page_cost : spc_seq_page_cost;
+
+ /*
+ * disk costs (recall that baserel->pages has already been set to the
+ * number of pages the sampling method will visit)
+ */
+ run_cost += spc_page_cost * baserel->pages;
+
+ /*
+ * CPU costs (recall that baserel->tuples has already been set to the
+ * number of tuples the sampling method will select). Note that we ignore
+ * execution cost of the TABLESAMPLE parameter expressions; they will be
+ * evaluated only once per scan, and in most usages they'll likely be
+ * simple constants anyway. We also don't charge anything for the
+ * calculations the sampling method might do internally.
+ */
+ get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
+
+ startup_cost += qpqual_cost.startup;
+ cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple;
run_cost += cpu_per_tuple * baserel->tuples;
path->startup_cost = startup_cost;
path->total_cost = startup_cost + run_cost;
}
+/*
+ * cost_gather
+ * Determines and returns the cost of gather path.
+ *
+ * 'rel' is the relation to be operated upon
+ * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
+ */
+void
+cost_gather(GatherPath *path, PlannerInfo *root,
+ RelOptInfo *rel, ParamPathInfo *param_info)
+{
+ Cost startup_cost = 0;
+ Cost run_cost = 0;
+
+ /* Mark the path with the correct row estimate */
+ if (param_info)
+ path->path.rows = param_info->ppi_rows;
+ else
+ path->path.rows = rel->rows;
+
+ startup_cost = path->subpath->startup_cost;
+
+ run_cost = path->subpath->total_cost - path->subpath->startup_cost;
+
+ /* Parallel setup and communication cost. */
+ startup_cost += parallel_setup_cost;
+ run_cost += parallel_tuple_cost * rel->tuples;
+
+ path->path.startup_cost = startup_cost;
+ path->path.total_cost = (startup_cost + run_cost);
+}
+
/*
* cost_index
* Determines and returns the cost of scanning a relation using an index.
IndexOptInfo *index = path->indexinfo;
RelOptInfo *baserel = index->rel;
bool indexonly = (path->path.pathtype == T_IndexOnlyScan);
- List *allclauses;
+ List *qpquals;
Cost startup_cost = 0;
Cost run_cost = 0;
Cost indexStartupCost;
Assert(baserel->relid > 0);
Assert(baserel->rtekind == RTE_RELATION);
- /* Estimate the number of rows returned by the indexscan */
- if (path->path.required_outer)
+ /*
+ * Mark the path with the correct row estimate, and identify which quals
+ * will need to be enforced as qpquals.
+ */
+ if (path->path.param_info)
{
- /*
- * The estimate should be less than baserel->rows because of the
- * additional selectivity of the join clauses. Since indexclauses may
- * contain both restriction and join clauses, we have to do a set
- * union to get the full set of clauses that must be considered to
- * compute the correct selectivity. (Without the union operation, we
- * might have some restriction clauses appearing twice, which'd
- * mislead clauselist_selectivity into double-counting their
- * selectivity. However, since RestrictInfo nodes aren't copied when
- * linking them into different lists, it should be sufficient to use
- * pointer comparison to remove duplicates.)
- *
- * Note that we force the clauses to be treated as non-join clauses
- * during selectivity estimation.
- */
- allclauses = list_union_ptr(baserel->baserestrictinfo,
- path->indexclauses);
- path->path.rows = baserel->tuples *
- clauselist_selectivity(root,
- allclauses,
- baserel->relid, /* do not use 0! */
- JOIN_INNER,
- NULL);
- if (path->path.rows > baserel->rows)
- path->path.rows = baserel->rows;
- path->path.rows = clamp_row_est(path->path.rows);
+ path->path.rows = path->path.param_info->ppi_rows;
+ /* qpquals come from the rel's restriction clauses and ppi_clauses */
+ qpquals = list_concat(
+ extract_nonindex_conditions(baserel->baserestrictinfo,
+ path->indexquals),
+ extract_nonindex_conditions(path->path.param_info->ppi_clauses,
+ path->indexquals));
}
else
{
- /* allclauses should just be the rel's restriction clauses */
- allclauses = baserel->baserestrictinfo;
-
- /*
- * The number of rows is the same as the parent rel's estimate, since
- * this isn't a parameterized path.
- */
path->path.rows = baserel->rows;
+ /* qpquals come from just the rel's restriction clauses */
+ qpquals = extract_nonindex_conditions(baserel->baserestrictinfo,
+ path->indexquals);
}
if (!enable_indexscan)
* Estimate CPU costs per tuple.
*
* What we want here is cpu_tuple_cost plus the evaluation costs of any
- * qual clauses that we have to evaluate as qpquals. We approximate that
- * list as allclauses minus any clauses appearing in indexquals (as
- * before, assuming that pointer equality is enough to recognize duplicate
- * RestrictInfos). This method neglects some considerations such as
- * clauses that needn't be checked because they are implied by a partial
- * index's predicate. It does not seem worth the cycles to try to factor
- * those things in at this stage, even though createplan.c will take pains
- * to remove such unnecessary clauses from the qpquals list if this path
- * is selected for use.
- */
- cost_qual_eval(&qpqual_cost,
- list_difference_ptr(allclauses, path->indexquals),
- root);
+ * qual clauses that we have to evaluate as qpquals.
+ */
+ cost_qual_eval(&qpqual_cost, qpquals, root);
startup_cost += qpqual_cost.startup;
cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple;
path->path.total_cost = startup_cost + run_cost;
}
+/*
+ * extract_nonindex_conditions
+ *
+ * Given a list of quals to be enforced in an indexscan, extract the ones that
+ * will have to be applied as qpquals (ie, the index machinery won't handle
+ * them). The actual rules for this appear in create_indexscan_plan() in
+ * createplan.c, but the full rules are fairly expensive and we don't want to
+ * go to that much effort for index paths that don't get selected for the
+ * final plan. So we approximate it as quals that don't appear directly in
+ * indexquals and also are not redundant children of the same EquivalenceClass
+ * as some indexqual. This method neglects some infrequently-relevant
+ * considerations such as clauses that needn't be checked because they are
+ * implied by a partial index's predicate. It does not seem worth the cycles
+ * to try to factor those things in at this stage, even though createplan.c
+ * will take pains to remove such unnecessary clauses from the qpquals list if
+ * this path is selected for use.
+ */
+static List *
+extract_nonindex_conditions(List *qual_clauses, List *indexquals)
+{
+ List *result = NIL;
+ ListCell *lc;
+
+ foreach(lc, qual_clauses)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+
+ Assert(IsA(rinfo, RestrictInfo));
+ if (rinfo->pseudoconstant)
+ continue; /* we may drop pseudoconstants here */
+ if (list_member_ptr(indexquals, rinfo))
+ continue; /* simple duplicate */
+ if (is_redundant_derived_clause(rinfo, indexquals))
+ continue; /* derived from same EquivalenceClass */
+ /* ... skip the predicate proof attempts createplan.c will try ... */
+ result = lappend(result, rinfo);
+ }
+ return result;
+}
+
/*
* index_pages_fetched
* Estimate the number of pages actually fetched after accounting for
* computed for us by query_planner.
*
* Caller is expected to have ensured that tuples_fetched is greater than zero
- * and rounded to integer (see clamp_row_est). The result will likewise be
+ * and rounded to integer (see clamp_row_est). The result will likewise be
* greater than zero and integral.
*/
double
* index-then-heap plan.
*
* 'baserel' is the relation to be scanned
+ * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
* 'bitmapqual' is a tree of IndexPaths, BitmapAndPaths, and BitmapOrPaths
* 'loop_count' is the number of repetitions of the indexscan to factor into
* estimates of caching behavior
*/
void
cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
+ ParamPathInfo *param_info,
Path *bitmapqual, double loop_count)
{
Cost startup_cost = 0;
Cost run_cost = 0;
Cost indexTotalCost;
Selectivity indexSelectivity;
+ QualCost qpqual_cost;
Cost cpu_per_tuple;
Cost cost_per_page;
double tuples_fetched;
Assert(baserel->relid > 0);
Assert(baserel->rtekind == RTE_RELATION);
- /* Estimate the number of rows returned by the bitmap scan */
- if (path->required_outer)
- {
- /*
- * The estimate should be less than baserel->rows because of the
- * additional selectivity of the join clauses. We make use of the
- * selectivity estimated for the bitmap to do this; this isn't really
- * quite right since there may be restriction conditions not included
- * in the bitmap ...
- */
- Cost indexTotalCost;
- Selectivity indexSelectivity;
-
- cost_bitmap_tree_node(bitmapqual, &indexTotalCost, &indexSelectivity);
- path->rows = baserel->tuples * indexSelectivity;
- if (path->rows > baserel->rows)
- path->rows = baserel->rows;
- path->rows = clamp_row_est(path->rows);
- }
+ /* Mark the path with the correct row estimate */
+ if (param_info)
+ path->rows = param_info->ppi_rows;
else
- {
- /*
- * The number of rows is the same as the parent rel's estimate, since
- * this isn't a parameterized path.
- */
path->rows = baserel->rows;
- }
if (!enable_bitmapscan)
startup_cost += disable_cost;
/*
* For small numbers of pages we should charge spc_random_page_cost
* apiece, while if nearly all the table's pages are being read, it's more
- * appropriate to charge spc_seq_page_cost apiece. The effect is
+ * appropriate to charge spc_seq_page_cost apiece. The effect is
* nonlinear, too. For lack of a better idea, interpolate like this to
* determine the cost per page.
*/
* Often the indexquals don't need to be rechecked at each tuple ... but
* not always, especially not if there are enough tuples involved that the
* bitmaps become lossy. For the moment, just assume they will be
- * rechecked always.
+ * rechecked always. This means we charge the full freight for all the
+ * scan clauses.
*/
- startup_cost += baserel->baserestrictcost.startup;
- cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost.per_tuple;
+ get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
+
+ startup_cost += qpqual_cost.startup;
+ cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple;
run_cost += cpu_per_tuple * tuples_fetched;
* Estimate the cost of a BitmapAnd node
*
* Note that this considers only the costs of index scanning and bitmap
- * creation, not the eventual heap access. In that sense the object isn't
+ * creation, not the eventual heap access. In that sense the object isn't
* truly a Path, but it has enough path-like properties (costs in particular)
* to warrant treating it as one. We don't bother to set the path rows field,
* however.
/*
* We estimate OR selectivity on the assumption that the inputs are
* non-overlapping, since that's often the case in "x IN (list)" type
- * situations. Of course, we clamp to 1.0 at the end.
+ * situations. Of course, we clamp to 1.0 at the end.
*
* The runtime cost of the BitmapOr itself is estimated at 100x
* cpu_operator_cost for each tbm_union needed. Probably too small,
/*
* cost_tidscan
* Determines and returns the cost of scanning a relation using TIDs.
+ *
+ * 'baserel' is the relation to be scanned
+ * 'tidquals' is the list of TID-checkable quals
+ * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
*/
void
cost_tidscan(Path *path, PlannerInfo *root,
- RelOptInfo *baserel, List *tidquals)
+ RelOptInfo *baserel, List *tidquals, ParamPathInfo *param_info)
{
Cost startup_cost = 0;
Cost run_cost = 0;
bool isCurrentOf = false;
+ QualCost qpqual_cost;
Cost cpu_per_tuple;
QualCost tid_qual_cost;
int ntuples;
Assert(baserel->relid > 0);
Assert(baserel->rtekind == RTE_RELATION);
- /* For now, tidscans are never parameterized */
- path->rows = baserel->rows;
+ /* Mark the path with the correct row estimate */
+ if (param_info)
+ path->rows = param_info->ppi_rows;
+ else
+ path->rows = baserel->rows;
/* Count how many tuples we expect to retrieve */
ntuples = 0;
/*
* We must force TID scan for WHERE CURRENT OF, because only nodeTidscan.c
- * understands how to do it correctly. Therefore, honor enable_tidscan
+ * understands how to do it correctly. Therefore, honor enable_tidscan
* only when CURRENT OF isn't present. Also note that cost_qual_eval
* counts a CurrentOfExpr as having startup cost disable_cost, which we
* subtract off here; that's to prevent other plan types such as seqscan
/*
* The TID qual expressions will be computed once, any other baserestrict
- * quals once per retrived tuple.
+ * quals once per retrieved tuple.
*/
cost_qual_eval(&tid_qual_cost, tidquals, root);
/* disk costs --- assume each tuple on a different page */
run_cost += spc_random_page_cost * ntuples;
- /* CPU costs */
- startup_cost += baserel->baserestrictcost.startup +
- tid_qual_cost.per_tuple;
- cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost.per_tuple -
+ /* Add scanning CPU costs */
+ get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
+
+ /* XXX currently we assume TID quals are a subset of qpquals */
+ startup_cost += qpqual_cost.startup + tid_qual_cost.per_tuple;
+ cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple -
tid_qual_cost.per_tuple;
run_cost += cpu_per_tuple * ntuples;
/*
* cost_subqueryscan
* Determines and returns the cost of scanning a subquery RTE.
+ *
+ * 'baserel' is the relation to be scanned
+ * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
*/
void
-cost_subqueryscan(Path *path, RelOptInfo *baserel)
+cost_subqueryscan(Path *path, PlannerInfo *root,
+ RelOptInfo *baserel, ParamPathInfo *param_info)
{
Cost startup_cost;
Cost run_cost;
+ QualCost qpqual_cost;
Cost cpu_per_tuple;
/* Should only be applied to base relations that are subqueries */
Assert(baserel->relid > 0);
Assert(baserel->rtekind == RTE_SUBQUERY);
- /* subqueryscans are never parameterized */
- path->rows = baserel->rows;
+ /* Mark the path with the correct row estimate */
+ if (param_info)
+ path->rows = param_info->ppi_rows;
+ else
+ path->rows = baserel->rows;
/*
* Cost of path is cost of evaluating the subplan, plus cost of evaluating
path->startup_cost = baserel->subplan->startup_cost;
path->total_cost = baserel->subplan->total_cost;
- startup_cost = baserel->baserestrictcost.startup;
- cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost.per_tuple;
+ get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
+
+ startup_cost = qpqual_cost.startup;
+ cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple;
run_cost = cpu_per_tuple * baserel->tuples;
path->startup_cost += startup_cost;
/*
* cost_functionscan
* Determines and returns the cost of scanning a function RTE.
+ *
+ * 'baserel' is the relation to be scanned
+ * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
*/
void
-cost_functionscan(Path *path, PlannerInfo *root, RelOptInfo *baserel)
+cost_functionscan(Path *path, PlannerInfo *root,
+ RelOptInfo *baserel, ParamPathInfo *param_info)
{
Cost startup_cost = 0;
Cost run_cost = 0;
+ QualCost qpqual_cost;
Cost cpu_per_tuple;
RangeTblEntry *rte;
QualCost exprcost;
rte = planner_rt_fetch(baserel->relid, root);
Assert(rte->rtekind == RTE_FUNCTION);
- /* functionscans are never parameterized */
- path->rows = baserel->rows;
+ /* Mark the path with the correct row estimate */
+ if (param_info)
+ path->rows = param_info->ppi_rows;
+ else
+ path->rows = baserel->rows;
/*
- * Estimate costs of executing the function expression.
+ * Estimate costs of executing the function expression(s).
*
- * Currently, nodeFunctionscan.c always executes the function to
+ * Currently, nodeFunctionscan.c always executes the functions to
* completion before returning any rows, and caches the results in a
- * tuplestore. So the function eval cost is all startup cost, and per-row
+ * tuplestore. So the function eval cost is all startup cost, and per-row
* costs are minimal.
*
* XXX in principle we ought to charge tuplestore spill costs if the
* estimates for functions tend to be, there's not a lot of point in that
* refinement right now.
*/
- cost_qual_eval_node(&exprcost, rte->funcexpr, root);
+ cost_qual_eval_node(&exprcost, (Node *) rte->functions, root);
startup_cost += exprcost.startup + exprcost.per_tuple;
/* Add scanning CPU costs */
- startup_cost += baserel->baserestrictcost.startup;
- cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost.per_tuple;
+ get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
+
+ startup_cost += qpqual_cost.startup;
+ cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple;
run_cost += cpu_per_tuple * baserel->tuples;
path->startup_cost = startup_cost;
/*
* cost_valuesscan
* Determines and returns the cost of scanning a VALUES RTE.
+ *
+ * 'baserel' is the relation to be scanned
+ * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
*/
void
-cost_valuesscan(Path *path, PlannerInfo *root, RelOptInfo *baserel)
+cost_valuesscan(Path *path, PlannerInfo *root,
+ RelOptInfo *baserel, ParamPathInfo *param_info)
{
Cost startup_cost = 0;
Cost run_cost = 0;
+ QualCost qpqual_cost;
Cost cpu_per_tuple;
/* Should only be applied to base relations that are values lists */
Assert(baserel->relid > 0);
Assert(baserel->rtekind == RTE_VALUES);
- /* valuesscans are never parameterized */
- path->rows = baserel->rows;
+ /* Mark the path with the correct row estimate */
+ if (param_info)
+ path->rows = param_info->ppi_rows;
+ else
+ path->rows = baserel->rows;
/*
* For now, estimate list evaluation cost at one operator eval per list
cpu_per_tuple = cpu_operator_cost;
/* Add scanning CPU costs */
- startup_cost += baserel->baserestrictcost.startup;
- cpu_per_tuple += cpu_tuple_cost + baserel->baserestrictcost.per_tuple;
+ get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
+
+ startup_cost += qpqual_cost.startup;
+ cpu_per_tuple += cpu_tuple_cost + qpqual_cost.per_tuple;
run_cost += cpu_per_tuple * baserel->tuples;
path->startup_cost = startup_cost;
*
* Note: this is used for both self-reference and regular CTEs; the
* possible cost differences are below the threshold of what we could
- * estimate accurately anyway. Note that the costs of evaluating the
+ * estimate accurately anyway. Note that the costs of evaluating the
* referenced CTE query are added into the final plan as initplan costs,
* and should NOT be counted here.
*/
void
-cost_ctescan(Path *path, PlannerInfo *root, RelOptInfo *baserel)
+cost_ctescan(Path *path, PlannerInfo *root,
+ RelOptInfo *baserel, ParamPathInfo *param_info)
{
Cost startup_cost = 0;
Cost run_cost = 0;
+ QualCost qpqual_cost;
Cost cpu_per_tuple;
/* Should only be applied to base relations that are CTEs */
Assert(baserel->relid > 0);
Assert(baserel->rtekind == RTE_CTE);
- /* ctescans are never parameterized */
- path->rows = baserel->rows;
+ /* Mark the path with the correct row estimate */
+ if (param_info)
+ path->rows = param_info->ppi_rows;
+ else
+ path->rows = baserel->rows;
/* Charge one CPU tuple cost per row for tuplestore manipulation */
cpu_per_tuple = cpu_tuple_cost;
/* Add scanning CPU costs */
- startup_cost += baserel->baserestrictcost.startup;
- cpu_per_tuple += cpu_tuple_cost + baserel->baserestrictcost.per_tuple;
+ get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
+
+ startup_cost += qpqual_cost.startup;
+ cpu_per_tuple += cpu_tuple_cost + qpqual_cost.per_tuple;
run_cost += cpu_per_tuple * baserel->tuples;
path->startup_cost = startup_cost;
* If the total volume exceeds sort_mem, we switch to a tape-style merge
* algorithm. There will still be about t*log2(t) tuple comparisons in
* total, but we will also need to write and read each tuple once per
- * merge pass. We expect about ceil(logM(r)) merge passes where r is the
+ * merge pass. We expect about ceil(logM(r)) merge passes where r is the
* number of initial runs formed and M is the merge order used by tuplesort.c.
* Since the average initial run should be about twice sort_mem, we have
* disk traffic = 2 * relsize * ceil(logM(p / (2*sort_mem)))
* accesses (XXX can't we refine that guess?)
*
* By default, we charge two operator evals per tuple comparison, which should
- * be in the right ballpark in most cases. The caller can tweak this by
+ * be in the right ballpark in most cases. The caller can tweak this by
* specifying nonzero comparison_cost; typically that's used for any extra
* work that has to be done to prepare the inputs to the comparison operators.
*
* Determines and returns the cost of a MergeAppend node.
*
* MergeAppend merges several pre-sorted input streams, using a heap that
- * at any given instant holds the next tuple from each stream. If there
+ * at any given instant holds the next tuple from each stream. If there
* are N streams, we need about N*log2(N) tuple comparisons to construct
* the heap at startup, and then for each output tuple, about log2(N)
* comparisons to delete the top heap entry and another log2(N) comparisons
* group otherwise. We charge cpu_tuple_cost for each output tuple.
*
* Note: in this cost model, AGG_SORTED and AGG_HASHED have exactly the
- * same total CPU cost, but AGG_SORTED has lower startup cost. If the
+ * same total CPU cost, but AGG_SORTED has lower startup cost. If the
* input path is already sorted appropriately, AGG_SORTED should be
* preferred (since it has no risk of memory overflow). This will happen
* as long as the computed total costs are indeed exactly equal --- but if
startup_cost += argcosts.startup;
wfunccost += argcosts.per_tuple;
+ /*
+ * Add the filter's cost to per-input-row costs. XXX We should reduce
+ * input expression costs according to filter selectivity.
+ */
+ cost_qual_eval_node(&argcosts, (Node *) wfunc->aggfilter, root);
+ startup_cost += argcosts.startup;
+ wfunccost += argcosts.per_tuple;
+
total_cost += wfunccost * input_tuples;
}
* estimate and getting a tight lower bound. We choose to not examine the
* join quals here, since that's by far the most expensive part of the
* calculations. The end result is that CPU-cost considerations must be
- * left for the second phase.
+ * left for the second phase; and for SEMI/ANTI joins, we must also postpone
+ * incorporation of the inner path's run cost.
*
* 'workspace' is to be filled with startup_cost, total_cost, and perhaps
* other data to be used by final_cost_nestloop
if (jointype == JOIN_SEMI || jointype == JOIN_ANTI)
{
- double outer_matched_rows;
- Selectivity inner_scan_frac;
-
/*
* SEMI or ANTI join: executor will stop after first match.
*
- * For an outer-rel row that has at least one match, we can expect the
- * inner scan to stop after a fraction 1/(match_count+1) of the inner
- * rows, if the matches are evenly distributed. Since they probably
- * aren't quite evenly distributed, we apply a fuzz factor of 2.0 to
- * that fraction. (If we used a larger fuzz factor, we'd have to
- * clamp inner_scan_frac to at most 1.0; but since match_count is at
- * least 1, no such clamp is needed now.)
- *
- * A complicating factor is that rescans may be cheaper than first
- * scans. If we never scan all the way to the end of the inner rel,
- * it might be (depending on the plan type) that we'd never pay the
- * whole inner first-scan run cost. However it is difficult to
- * estimate whether that will happen, so be conservative and always
- * charge the whole first-scan cost once.
- */
- run_cost += inner_run_cost;
-
- outer_matched_rows = rint(outer_path_rows * semifactors->outer_match_frac);
- inner_scan_frac = 2.0 / (semifactors->match_count + 1.0);
-
- /* Add inner run cost for additional outer tuples having matches */
- if (outer_matched_rows > 1)
- run_cost += (outer_matched_rows - 1) * inner_rescan_run_cost * inner_scan_frac;
-
- /*
- * The cost of processing unmatched rows varies depending on the
- * details of the joinclauses, so we leave that part for later.
+ * Getting decent estimates requires inspection of the join quals,
+ * which we choose to postpone to final_cost_nestloop.
*/
/* Save private data for final_cost_nestloop */
- workspace->outer_matched_rows = outer_matched_rows;
- workspace->inner_scan_frac = inner_scan_frac;
+ workspace->inner_run_cost = inner_run_cost;
+ workspace->inner_rescan_run_cost = inner_rescan_run_cost;
}
else
{
workspace->total_cost = startup_cost + run_cost;
/* Save private data for final_cost_nestloop */
workspace->run_cost = run_cost;
- workspace->inner_rescan_run_cost = inner_rescan_run_cost;
}
/*
double inner_path_rows = inner_path->rows;
Cost startup_cost = workspace->startup_cost;
Cost run_cost = workspace->run_cost;
- Cost inner_rescan_run_cost = workspace->inner_rescan_run_cost;
Cost cpu_per_tuple;
- List *joinclauses;
QualCost restrict_qual_cost;
double ntuples;
- /* Estimate the number of rows returned by the join */
- if (path->path.required_outer)
- {
- /*
- * The nestloop is (still) parameterized because of upper-level join
- * clauses used by the input paths. So the rowcount estimate should
- * be less than the joinrel's row count because of the additional
- * selectivity of those join clauses. To estimate the size we need
- * to know which of the joinrestrictinfo clauses nominally associated
- * with the join have been applied in the inner input path.
- *
- * We should also assume that such clauses won't be evaluated at the
- * join node at runtime, so exclude them from restrict_qual_cost.
- */
- joinclauses = select_nonredundant_join_clauses(path->joinrestrictinfo,
- path->innerjoinpath->param_clauses);
- set_joinpath_size_estimate(root, path, sjinfo, joinclauses);
- }
+ /* Mark the path with the correct row estimate */
+ if (path->path.param_info)
+ path->path.rows = path->path.param_info->ppi_rows;
else
- {
- joinclauses = path->joinrestrictinfo;
path->path.rows = path->path.parent->rows;
- }
/*
* We could include disable_cost in the preliminary estimate, but that
if (!enable_nestloop)
startup_cost += disable_cost;
- /* cost of source data */
+ /* cost of inner-relation source data (we already dealt with outer rel) */
if (path->jointype == JOIN_SEMI || path->jointype == JOIN_ANTI)
{
- double outer_matched_rows = workspace->outer_matched_rows;
- Selectivity inner_scan_frac = workspace->inner_scan_frac;
-
/*
* SEMI or ANTI join: executor will stop after first match.
*/
+ Cost inner_run_cost = workspace->inner_run_cost;
+ Cost inner_rescan_run_cost = workspace->inner_rescan_run_cost;
+ double outer_matched_rows;
+ Selectivity inner_scan_frac;
- /* Compute number of tuples processed (not number emitted!) */
+ /*
+ * For an outer-rel row that has at least one match, we can expect the
+ * inner scan to stop after a fraction 1/(match_count+1) of the inner
+ * rows, if the matches are evenly distributed. Since they probably
+ * aren't quite evenly distributed, we apply a fuzz factor of 2.0 to
+ * that fraction. (If we used a larger fuzz factor, we'd have to
+ * clamp inner_scan_frac to at most 1.0; but since match_count is at
+ * least 1, no such clamp is needed now.)
+ */
+ outer_matched_rows = rint(outer_path_rows * semifactors->outer_match_frac);
+ inner_scan_frac = 2.0 / (semifactors->match_count + 1.0);
+
+ /*
+ * Compute number of tuples processed (not number emitted!). First,
+ * account for successfully-matched outer rows.
+ */
ntuples = outer_matched_rows * inner_path_rows * inner_scan_frac;
/*
- * For unmatched outer-rel rows, there are two cases. If the inner
- * path is an indexscan using all the joinquals as indexquals, then an
- * unmatched row results in an indexscan returning no rows, which is
- * probably quite cheap. We estimate this case as the same cost to
- * return the first tuple of a nonempty scan. Otherwise, the executor
- * will have to scan the whole inner rel; not so cheap.
+ * Now we need to estimate the actual costs of scanning the inner
+ * relation, which may be quite a bit less than N times inner_run_cost
+ * due to early scan stops. We consider two cases. If the inner path
+ * is an indexscan using all the joinquals as indexquals, then an
+ * unmatched outer row results in an indexscan returning no rows,
+ * which is probably quite cheap. Otherwise, the executor will have
+ * to scan the whole inner rel for an unmatched row; not so cheap.
*/
- if (has_indexed_join_quals(path, joinclauses))
+ if (has_indexed_join_quals(path))
{
+ /*
+ * Successfully-matched outer rows will only require scanning
+ * inner_scan_frac of the inner relation. In this case, we don't
+ * need to charge the full inner_run_cost even when that's more
+ * than inner_rescan_run_cost, because we can assume that none of
+ * the inner scans ever scan the whole inner relation. So it's
+ * okay to assume that all the inner scan executions can be
+ * fractions of the full cost, even if materialization is reducing
+ * the rescan cost. At this writing, it's impossible to get here
+ * for a materialized inner scan, so inner_run_cost and
+ * inner_rescan_run_cost will be the same anyway; but just in
+ * case, use inner_run_cost for the first matched tuple and
+ * inner_rescan_run_cost for additional ones.
+ */
+ run_cost += inner_run_cost * inner_scan_frac;
+ if (outer_matched_rows > 1)
+ run_cost += (outer_matched_rows - 1) * inner_rescan_run_cost * inner_scan_frac;
+
+ /*
+ * Add the cost of inner-scan executions for unmatched outer rows.
+ * We estimate this as the same cost as returning the first tuple
+ * of a nonempty scan. We consider that these are all rescans,
+ * since we used inner_run_cost once already.
+ */
run_cost += (outer_path_rows - outer_matched_rows) *
inner_rescan_run_cost / inner_path_rows;
/*
- * We won't be evaluating any quals at all for these rows, so
+ * We won't be evaluating any quals at all for unmatched rows, so
* don't add them to ntuples.
*/
}
else
{
+ /*
+ * Here, a complicating factor is that rescans may be cheaper than
+ * first scans. If we never scan all the way to the end of the
+ * inner rel, it might be (depending on the plan type) that we'd
+ * never pay the whole inner first-scan run cost. However it is
+ * difficult to estimate whether that will happen (and it could
+ * not happen if there are any unmatched outer rows!), so be
+ * conservative and always charge the whole first-scan cost once.
+ */
+ run_cost += inner_run_cost;
+
+ /* Add inner run cost for additional outer tuples having matches */
+ if (outer_matched_rows > 1)
+ run_cost += (outer_matched_rows - 1) * inner_rescan_run_cost * inner_scan_frac;
+
+ /* Add inner run cost for unmatched outer tuples */
run_cost += (outer_path_rows - outer_matched_rows) *
inner_rescan_run_cost;
+
+ /* And count the unmatched join tuples as being processed */
ntuples += (outer_path_rows - outer_matched_rows) *
inner_path_rows;
}
}
/* CPU costs */
- cost_qual_eval(&restrict_qual_cost, joinclauses, root);
+ cost_qual_eval(&restrict_qual_cost, path->joinrestrictinfo, root);
startup_cost += restrict_qual_cost.startup;
cpu_per_tuple = cpu_tuple_cost + restrict_qual_cost.per_tuple;
run_cost += cpu_per_tuple * ntuples;
* Unlike other costsize functions, this routine makes one actual decision:
* whether we should materialize the inner path. We do that either because
* the inner path can't support mark/restore, or because it's cheaper to
- * use an interposed Material node to handle mark/restore. When the decision
+ * use an interposed Material node to handle mark/restore. When the decision
* is cost-based it would be logically cleaner to build and cost two separate
* paths with and without that flag set; but that would require repeating most
- * of the cost calculations, which are not all that cheap. Since the choice
+ * of the cost calculations, which are not all that cheap. Since the choice
* will not affect output pathkeys or startup cost, only total cost, there is
* no possibility of wanting to keep both paths. So it seems best to make
* the decision here and record it in the path's materialize_inner field.
if (inner_path_rows <= 0 || isnan(inner_path_rows))
inner_path_rows = 1;
- /* Estimate the number of rows returned by the join */
- set_joinpath_size_estimate(root, &path->jpath, sjinfo,
- path->jpath.joinrestrictinfo);
+ /* Mark the path with the correct row estimate */
+ if (path->jpath.path.param_info)
+ path->jpath.path.rows = path->jpath.path.param_info->ppi_rows;
+ else
+ path->jpath.path.rows = path->jpath.path.parent->rows;
/*
* We could include disable_cost in the preliminary estimate, but that
qp_qual_cost.per_tuple -= merge_qual_cost.per_tuple;
/*
- * Get approx # tuples passing the mergequals. We use approx_tuple_count
+ * Get approx # tuples passing the mergequals. We use approx_tuple_count
* here because we need an estimate done with JOIN_INNER semantics.
*/
mergejointuples = approx_tuple_count(root, &path->jpath, mergeclauses);
* estimated approximately as size of merge join output minus size of
* inner relation. Assume that the distinct key values are 1, 2, ..., and
* denote the number of values of each key in the outer relation as m1,
- * m2, ...; in the inner relation, n1, n2, ... Then we have
+ * m2, ...; in the inner relation, n1, n2, ... Then we have
*
* size of join = m1 * n1 + m2 * n2 + ...
*
* This equation works correctly for outer tuples having no inner match
* (nk = 0), but not for inner tuples having no outer match (mk = 0); we
* are effectively subtracting those from the number of rescanned tuples,
- * when we should not. Can we do better without expensive selectivity
+ * when we should not. Can we do better without expensive selectivity
* computations?
*
* The whole issue is moot if we are working from a unique-ified outer
/*
* Decide whether we want to materialize the inner input to shield it from
- * mark/restore and performing re-fetches. Our cost model for regular
+ * mark/restore and performing re-fetches. Our cost model for regular
* re-fetches is that a re-fetch costs the same as an original fetch,
* which is probably an overestimate; but on the other hand we ignore the
* bookkeeping costs of mark/restore. Not clear if it's worth developing
* it off does not entitle us to deliver an invalid plan.
*/
else if (innersortkeys == NIL &&
- !ExecSupportsMarkRestore(inner_path->pathtype))
+ !ExecSupportsMarkRestore(inner_path))
path->materialize_inner = true;
/*
/*
* For each tuple that gets through the mergejoin proper, we charge
* cpu_tuple_cost plus the cost of evaluating additional restriction
- * clauses that are to be applied at the join. (This is pessimistic since
+ * clauses that are to be applied at the join. (This is pessimistic since
* not all of the quals may get evaluated at each tuple.)
*
* Note: we could adjust for SEMI/ANTI joins skipping some qual
* If inner relation is too big then we will need to "batch" the join,
* which implies writing and reading most of the tuples to disk an extra
* time. Charge seq_page_cost per page, since the I/O should be nice and
- * sequential. Writing the inner rel counts as startup cost, all the rest
+ * sequential. Writing the inner rel counts as startup cost, all the rest
* as run cost.
*/
if (numbatches > 1)
Selectivity innerbucketsize;
ListCell *hcl;
- /* Estimate the number of rows returned by the join */
- set_joinpath_size_estimate(root, &path->jpath, sjinfo,
- path->jpath.joinrestrictinfo);
+ /* Mark the path with the correct row estimate */
+ if (path->jpath.path.param_info)
+ path->jpath.path.rows = path->jpath.path.param_info->ppi_rows;
+ else
+ path->jpath.path.rows = path->jpath.path.parent->rows;
/*
* We could include disable_cost in the preliminary estimate, but that
/*
* For each tuple that gets through the hashjoin proper, we charge
* cpu_tuple_cost plus the cost of evaluating additional restriction
- * clauses that are to be applied at the join. (This is pessimistic since
+ * clauses that are to be applied at the join. (This is pessimistic since
* not all of the quals may get evaluated at each tuple.)
*/
startup_cost += qp_qual_cost.startup;
{
/*
* Otherwise we will be rescanning the subplan output on each
- * evaluation. We need to estimate how much of the output we will
+ * evaluation. We need to estimate how much of the output we will
* actually need to scan. NOTE: this logic should agree with the
* tuple_fraction estimates used by make_subplan() in
* plan/subselect.c.
/*
* cost_rescan
* Given a finished Path, estimate the costs of rescanning it after
- * having done so the first time. For some Path types a rescan is
+ * having done so the first time. For some Path types a rescan is
* cheaper than an original scan (if no parameters change), and this
* function embodies knowledge about that. The default is to return
- * the same costs stored in the Path. (Note that the cost estimates
+ * the same costs stored in the Path. (Note that the cost estimates
* actually stored in Paths are always for first scans.)
*
* This function is not currently intended to model effects such as rescans
{
/*
* These plan types materialize their final result in a
- * tuplestore or tuplesort object. So the rescan cost is only
+ * tuplestore or tuplesort object. So the rescan cost is only
* cpu_tuple_cost per tuple, unless the result is large enough
* to spill to disk.
*/
{
/*
* These plan types not only materialize their results, but do
- * not implement qual filtering or projection. So they are
- * even cheaper to rescan than the ones above. We charge only
+ * not implement qual filtering or projection. So they are
+ * even cheaper to rescan than the ones above. We charge only
* cpu_operator_cost per tuple. (Note: keep that in sync with
* the run_cost charge in cost_sort, and also see comments in
* cost_material before you change it.)
* evaluation of AND/OR? Probably *not*, because that would make the
* results depend on the clause ordering, and we are not in any position
* to expect that the current ordering of the clauses is the one that's
- * going to end up being used. The above per-RestrictInfo caching would
+ * going to end up being used. The above per-RestrictInfo caching would
* not mix well with trying to re-order clauses anyway.
+ *
+ * Another issue that is entirely ignored here is that if a set-returning
+ * function is below top level in the tree, the functions/operators above
+ * it will need to be evaluated multiple times. In practical use, such
+ * cases arise so seldom as to not be worth the added complexity needed;
+ * moreover, since our rowcount estimates for functions tend to be pretty
+ * phony, the results would also be pretty phony.
*/
if (IsA(node, FuncExpr))
{
else if (IsA(node, AlternativeSubPlan))
{
/*
- * Arbitrarily use the first alternative plan for costing. (We should
+ * Arbitrarily use the first alternative plan for costing. (We should
* certainly only include one alternative, and we don't yet have
* enough information to know which one the executor is most likely to
* use.)
(void *) context);
}
+/*
+ * get_restriction_qual_cost
+ * Compute evaluation costs of a baserel's restriction quals, plus any
+ * movable join quals that have been pushed down to the scan.
+ * Results are returned into *qpqual_cost.
+ *
+ * This is a convenience subroutine that works for seqscans and other cases
+ * where all the given quals will be evaluated the hard way. It's not useful
+ * for cost_index(), for example, where the index machinery takes care of
+ * some of the quals. We assume baserestrictcost was previously set by
+ * set_baserel_size_estimates().
+ */
+static void
+get_restriction_qual_cost(PlannerInfo *root, RelOptInfo *baserel,
+ ParamPathInfo *param_info,
+ QualCost *qpqual_cost)
+{
+ if (param_info)
+ {
+ /* Include costs of pushed-down clauses */
+ cost_qual_eval(qpqual_cost, param_info->ppi_clauses, root);
+
+ qpqual_cost->startup += baserel->baserestrictcost.startup;
+ qpqual_cost->per_tuple += baserel->baserestrictcost.per_tuple;
+ }
+ else
+ *qpqual_cost = baserel->baserestrictcost;
+}
+
/*
* compute_semi_anti_join_factors
* innerrel: inner relation under consideration
* jointype: must be JOIN_SEMI or JOIN_ANTI
* sjinfo: SpecialJoinInfo relevant to this join
- * restrictlist: join quals
+ * restrictlist: join quals
* Output parameters:
* *semifactors is filled in (see relation.h for field definitions)
*/
/* we don't bother trying to make the remaining fields valid */
norm_sjinfo.lhs_strict = false;
norm_sjinfo.delay_upper_joins = false;
- norm_sjinfo.join_quals = NIL;
+ norm_sjinfo.semi_can_btree = false;
+ norm_sjinfo.semi_can_hash = false;
+ norm_sjinfo.semi_operators = NIL;
+ norm_sjinfo.semi_rhs_exprs = NIL;
nselec = clauselist_selectivity(root,
joinquals,
/*
* jselec can be interpreted as the fraction of outer-rel rows that have
* any matches (this is true for both SEMI and ANTI cases). And nselec is
- * the fraction of the Cartesian product that matches. So, the average
+ * the fraction of the Cartesian product that matches. So, the average
* number of matches for each outer-rel row that has at least one match is
* nselec * inner_rows / jselec.
*
* Note: it is correct to use the inner rel's "rows" count here, even
* though we might later be considering a parameterized inner path with
- * fewer rows. This is because we have included all the join clauses
- * in the selectivity estimate.
+ * fewer rows. This is because we have included all the join clauses in
+ * the selectivity estimate.
*/
if (jselec > 0) /* protect against zero divide */
{
* expensive.
*/
static bool
-has_indexed_join_quals(NestPath *path, List *joinclauses)
+has_indexed_join_quals(NestPath *joinpath)
{
- NodeTag pathtype = path->innerjoinpath->pathtype;
+ Relids joinrelids = joinpath->path.parent->relids;
+ Path *innerpath = joinpath->innerjoinpath;
+ List *indexclauses;
+ bool found_one;
+ ListCell *lc;
+
+ /* If join still has quals to evaluate, it's not fast */
+ if (joinpath->joinrestrictinfo != NIL)
+ return false;
+ /* Nor if the inner path isn't parameterized at all */
+ if (innerpath->param_info == NULL)
+ return false;
- if (pathtype == T_IndexScan ||
- pathtype == T_IndexOnlyScan ||
- pathtype == T_BitmapHeapScan)
+ /* Find the indexclauses list for the inner scan */
+ switch (innerpath->pathtype)
{
- if (path->joinrestrictinfo != NIL)
- {
- /* OK if all those clauses were found to be redundant */
- return (joinclauses == NIL);
- }
- else
- {
- /* a clauseless join does NOT qualify */
+ case T_IndexScan:
+ case T_IndexOnlyScan:
+ indexclauses = ((IndexPath *) innerpath)->indexclauses;
+ break;
+ case T_BitmapHeapScan:
+ {
+ /* Accept only a simple bitmap scan, not AND/OR cases */
+ Path *bmqual = ((BitmapHeapPath *) innerpath)->bitmapqual;
+
+ if (IsA(bmqual, IndexPath))
+ indexclauses = ((IndexPath *) bmqual)->indexclauses;
+ else
+ return false;
+ break;
+ }
+ default:
+
+ /*
+ * If it's not a simple indexscan, it probably doesn't run quickly
+ * for zero rows out, even if it's a parameterized path using all
+ * the joinquals.
+ */
return false;
- }
}
- else
+
+ /*
+ * Examine the inner path's param clauses. Any that are from the outer
+ * path must be found in the indexclauses list, either exactly or in an
+ * equivalent form generated by equivclass.c. Also, we must find at least
+ * one such clause, else it's a clauseless join which isn't fast.
+ */
+ found_one = false;
+ foreach(lc, innerpath->param_info->ppi_clauses)
{
- /*
- * If it's not a simple indexscan, it probably doesn't run quickly for
- * zero rows out, even if it's a parameterized path using all the
- * joinquals.
- */
- return false;
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+
+ if (join_clause_is_movable_into(rinfo,
+ innerpath->parent->relids,
+ joinrelids))
+ {
+ if (!(list_member_ptr(indexclauses, rinfo) ||
+ is_redundant_derived_clause(rinfo, indexclauses)))
+ return false;
+ found_one = true;
+ }
}
+ return found_one;
}
/* we don't bother trying to make the remaining fields valid */
sjinfo.lhs_strict = false;
sjinfo.delay_upper_joins = false;
- sjinfo.join_quals = NIL;
+ sjinfo.semi_can_btree = false;
+ sjinfo.semi_can_hash = false;
+ sjinfo.semi_operators = NIL;
+ sjinfo.semi_rhs_exprs = NIL;
/* Get the approximate selectivity */
foreach(l, quals)
set_rel_width(root, rel);
}
+/*
+ * get_parameterized_baserel_size
+ * Make a size estimate for a parameterized scan of a base relation.
+ *
+ * 'param_clauses' lists the additional join clauses to be used.
+ *
+ * set_baserel_size_estimates must have been applied already.
+ */
+double
+get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel,
+ List *param_clauses)
+{
+ List *allclauses;
+ double nrows;
+
+ /*
+ * Estimate the number of rows returned by the parameterized scan, knowing
+ * that it will apply all the extra join clauses as well as the rel's own
+ * restriction clauses. Note that we force the clauses to be treated as
+ * non-join clauses during selectivity estimation.
+ */
+ allclauses = list_concat(list_copy(param_clauses),
+ rel->baserestrictinfo);
+ nrows = rel->tuples *
+ clauselist_selectivity(root,
+ allclauses,
+ rel->relid, /* do not use 0! */
+ JOIN_INNER,
+ NULL);
+ nrows = clamp_row_est(nrows);
+ /* For safety, make sure result is not more than the base estimate */
+ if (nrows > rel->rows)
+ nrows = rel->rows;
+ return nrows;
+}
+
/*
* set_joinrel_size_estimates
* Set the size estimates for the given join relation.
* routines don't handle all cases equally well, we might not. But there's
* not much to be done about it. (Would it make sense to repeat the
* calculations for each pair of input rels that's encountered, and somehow
- * average the results? Probably way more trouble than it's worth.)
+ * average the results? Probably way more trouble than it's worth, and
+ * anyway we must keep the rowcount estimate the same for all paths for the
+ * joinrel.)
*
* We set only the rows field here. The width field was already set by
* build_joinrel_tlist, and baserestrictcost is not used for join rels.
}
/*
- * set_joinpath_size_estimate
- * Set the rows estimate for the given join path.
- *
- * If the join is not parameterized by any joinclauses from higher joins, the
- * estimate is the same as previously computed by set_joinrel_size_estimates.
- * Otherwise, we estimate afresh using the identical logic, but with the rows
- * estimates from the input paths (which are typically less than their rels'
- * regular row estimates) and the restriction clauses actually being applied
- * at the join.
+ * get_parameterized_joinrel_size
+ * Make a size estimate for a parameterized scan of a join relation.
+ *
+ * 'rel' is the joinrel under consideration.
+ * 'outer_rows', 'inner_rows' are the sizes of the (probably also
+ * parameterized) join inputs under consideration.
+ * 'sjinfo' is any SpecialJoinInfo relevant to this join.
+ * 'restrict_clauses' lists the join clauses that need to be applied at the
+ * join node (including any movable clauses that were moved down to this join,
+ * and not including any movable clauses that were pushed down into the
+ * child paths).
+ *
+ * set_joinrel_size_estimates must have been applied already.
*/
-static void
-set_joinpath_size_estimate(PlannerInfo *root, JoinPath *path,
- SpecialJoinInfo *sjinfo,
- List *restrictlist)
+double
+get_parameterized_joinrel_size(PlannerInfo *root, RelOptInfo *rel,
+ double outer_rows,
+ double inner_rows,
+ SpecialJoinInfo *sjinfo,
+ List *restrict_clauses)
{
- if (path->path.required_outer)
- {
- path->path.rows = calc_joinrel_size_estimate(root,
- path->outerjoinpath->rows,
- path->innerjoinpath->rows,
- sjinfo,
- restrictlist);
- /* For safety, make sure result is not more than the base estimate */
- if (path->path.rows > path->path.parent->rows)
- path->path.rows = path->path.parent->rows;
- }
- else
- path->path.rows = path->path.parent->rows;
+ double nrows;
+
+ /*
+ * Estimate the number of rows returned by the parameterized join as the
+ * sizes of the input paths times the selectivity of the clauses that have
+ * ended up at this join node.
+ *
+ * As with set_joinrel_size_estimates, the rowcount estimate could depend
+ * on the pair of input paths provided, though ideally we'd get the same
+ * estimate for any pair with the same parameterization.
+ */
+ nrows = calc_joinrel_size_estimate(root,
+ outer_rows,
+ inner_rows,
+ sjinfo,
+ restrict_clauses);
+ /* For safety, make sure result is not more than the base estimate */
+ if (nrows > rel->rows)
+ nrows = rel->rows;
+ return nrows;
}
/*
* calc_joinrel_size_estimate
- * Workhorse for set_joinrel_size_estimates and set_joinpath_size_estimate
+ * Workhorse for set_joinrel_size_estimates and
+ * get_parameterized_joinrel_size.
*/
static double
calc_joinrel_size_estimate(PlannerInfo *root,
double nrows;
/*
- * Compute joinclause selectivity. Note that we are only considering
+ * Compute joinclause selectivity. Note that we are only considering
* clauses that become restriction clauses at this join level; we are not
* double-counting them because they were not considered in estimating the
* sizes of the component rels.
*
* If we are doing an outer join, take that into account: the joinqual
* selectivity has to be clamped using the knowledge that the output must
- * be at least as large as the non-nullable input. However, any
+ * be at least as large as the non-nullable input. However, any
* pushed-down quals are applied after the outer join, so their
* selectivity applies fully.
*
/*
* Compute per-output-column width estimates by examining the subquery's
- * targetlist. For any output that is a plain Var, get the width estimate
+ * targetlist. For any output that is a plain Var, get the width estimate
* that was made while planning the subquery. Otherwise, we leave it to
* set_rel_width to fill in a datatype-based default estimate.
*/
if (te->resjunk)
continue;
+ /*
+ * The subquery could be an expansion of a view that's had columns
+ * added to it since the current query was parsed, so that there are
+ * non-junk tlist columns in it that don't correspond to any column
+ * visible at our query level. Ignore such columns.
+ */
+ if (te->resno < rel->min_attr || te->resno > rel->max_attr)
+ continue;
+
/*
* XXX This currently doesn't work for subqueries containing set
* operations, because the Vars in their tlists are bogus references
item_width = subrel->attr_widths[var->varattno - subrel->min_attr];
}
- Assert(te->resno >= rel->min_attr && te->resno <= rel->max_attr);
rel->attr_widths[te->resno - rel->min_attr] = item_width;
}
set_function_size_estimates(PlannerInfo *root, RelOptInfo *rel)
{
RangeTblEntry *rte;
+ ListCell *lc;
/* Should only be applied to base relations that are functions */
Assert(rel->relid > 0);
rte = planner_rt_fetch(rel->relid, root);
Assert(rte->rtekind == RTE_FUNCTION);
- /* Estimate number of rows the function itself will return */
- rel->tuples = clamp_row_est(expression_returns_set_rows(rte->funcexpr));
+ /*
+ * Estimate number of rows the functions will return. The rowcount of the
+ * node is that of the largest function result.
+ */
+ rel->tuples = 0;
+ foreach(lc, rte->functions)
+ {
+ RangeTblFunction *rtfunc = (RangeTblFunction *) lfirst(lc);
+ double ntup = expression_returns_set_rows(rtfunc->funcexpr);
+
+ if (ntup > rel->tuples)
+ rel->tuples = ntup;
+ }
/* Now estimate number of output rows, etc */
set_baserel_size_estimates(root, rel);
* of estimating baserestrictcost, so we set that, and we also set up width
* using what will be purely datatype-driven estimates from the targetlist.
* There is no way to do anything sane with the rows value, so we just put
- * a default estimate and hope that the wrapper can improve on it. The
+ * a default estimate and hope that the wrapper can improve on it. The
* wrapper's GetForeignRelSize function will be called momentarily.
*
* The rel's targetlist and restrictinfo list must have been constructed
{
Node *node = (Node *) lfirst(lc);
- if (IsA(node, Var))
+ /*
+ * Ordinarily, a Var in a rel's reltargetlist must belong to that rel;
+ * but there are corner cases involving LATERAL references where that
+ * isn't so. If the Var has the wrong varno, fall through to the
+ * generic case (it doesn't seem worth the trouble to be any smarter).
+ */
+ if (IsA(node, Var) &&
+ ((Var *) node)->varno == rel->relid)
{
Var *var = (Var *) node;
int ndx;
int32 item_width;
- Assert(var->varno == rel->relid);
Assert(var->varattno >= rel->min_attr);
Assert(var->varattno <= rel->max_attr);
{
/*
* We could be looking at an expression pulled up from a subquery,
- * or a ROW() representing a whole-row child Var, etc. Do what we
+ * or a ROW() representing a whole-row child Var, etc. Do what we
* can using the expression type information.
*/
int32 item_width;
/*
* If we have a whole-row reference, estimate its width as the sum of
- * per-column widths plus sizeof(HeapTupleHeaderData).
+ * per-column widths plus heap tuple header overhead.
*/
if (have_wholerow_var)
{
- int32 wholerow_width = sizeof(HeapTupleHeaderData);
+ int32 wholerow_width = MAXALIGN(SizeofHeapTupleHeader);
if (reloid != InvalidOid)
{
static double
relation_byte_size(double tuples, int width)
{
- return tuples * (MAXALIGN(width) + MAXALIGN(sizeof(HeapTupleHeaderData)));
+ return tuples * (MAXALIGN(width) + MAXALIGN(SizeofHeapTupleHeader));
}
/*