WRITE_FLOAT_FIELD(rows, "%.0f");
WRITE_INT_FIELD(width);
WRITE_BOOL_FIELD(consider_startup);
+ WRITE_BOOL_FIELD(consider_param_startup);
WRITE_NODE_FIELD(reltargetlist);
WRITE_NODE_FIELD(pathlist);
WRITE_NODE_FIELD(ppilist);
do not ignore merge joins entirely, joinpath.c does not fully explore the
space of potential merge joins with parameterized inputs. Also, add_path
treats parameterized paths as having no pathkeys, so that they compete
-only on total cost and rowcount; they don't get preference for producing a
+only on cost and rowcount; they don't get preference for producing a
special sort order. This creates additional bias against merge joins,
since we might discard a path that could have been useful for performing
a merge without an explicit sort step. Since a parameterized path must
output order of a query --- they only make it harder to use a merge join
at a lower level. The savings in planning work justifies that.
+Similarly, parameterized paths do not normally get preference in add_path
+for having cheap startup cost; that's seldom of much value when on the
+inside of a nestloop, so it seems not worth keeping extra paths solely for
+that. An exception occurs for parameterized paths for the RHS relation of
+a SEMI or ANTI join: in those cases, we can stop the inner scan after the
+first match, so it's primarily startup not total cost that we care about.
+
LATERAL subqueries
------------------
join_search_hook_type join_search_hook = NULL;
+static void set_base_rel_consider_startup(PlannerInfo *root);
static void set_base_rel_sizes(PlannerInfo *root);
static void set_base_rel_pathlists(PlannerInfo *root);
static void set_rel_size(PlannerInfo *root, RelOptInfo *rel,
root->all_baserels = bms_add_member(root->all_baserels, brel->relid);
}
+ /* Mark base rels as to whether we care about fast-start plans */
+ set_base_rel_consider_startup(root);
+
/*
* Generate access paths for the base rels.
*/
return rel;
}
+/*
+ * set_base_rel_consider_startup
+ * Set the consider_[param_]startup flags for each base-relation entry.
+ *
+ * For the moment, we only deal with consider_param_startup here; because the
+ * logic for consider_startup is pretty trivial and is the same for every base
+ * relation, we just let build_simple_rel() initialize that flag correctly to
+ * start with. If that logic ever gets more complicated it would probably
+ * be better to move it here.
+ */
+static void
+set_base_rel_consider_startup(PlannerInfo *root)
+{
+ /*
+ * Since parameterized paths can only be used on the inside of a nestloop
+ * join plan, there is usually little value in considering fast-start
+ * plans for them. However, for relations that are on the RHS of a SEMI
+ * or ANTI join, a fast-start plan can be useful because we're only going
+ * to care about fetching one tuple anyway.
+ *
+ * To minimize growth of planning time, we currently restrict this to
+ * cases where the RHS is a single base relation, not a join; there is no
+ * provision for consider_param_startup to get set at all on joinrels.
+ * Also we don't worry about appendrels. costsize.c's costing rules for
+ * nestloop semi/antijoins don't consider such cases either.
+ */
+ ListCell *lc;
+
+ foreach(lc, root->join_info_list)
+ {
+ SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
+ int varno;
+
+ if ((sjinfo->jointype == JOIN_SEMI || sjinfo->jointype == JOIN_ANTI) &&
+ bms_get_singleton_member(sjinfo->syn_righthand, &varno))
+ {
+ RelOptInfo *rel = find_base_rel(root, varno);
+
+ rel->consider_param_startup = true;
+ }
+ }
+}
+
/*
* set_base_rel_sizes
* Set the size estimates (rows and widths) for each base-relation entry.
* estimate and getting a tight lower bound. We choose to not examine the
* join quals here, since that's by far the most expensive part of the
* calculations. The end result is that CPU-cost considerations must be
- * left for the second phase.
+ * left for the second phase; and for SEMI/ANTI joins, we must also postpone
+ * incorporation of the inner path's run cost.
*
* 'workspace' is to be filled with startup_cost, total_cost, and perhaps
* other data to be used by final_cost_nestloop
if (jointype == JOIN_SEMI || jointype == JOIN_ANTI)
{
- double outer_matched_rows;
- Selectivity inner_scan_frac;
-
/*
* SEMI or ANTI join: executor will stop after first match.
*
- * For an outer-rel row that has at least one match, we can expect the
- * inner scan to stop after a fraction 1/(match_count+1) of the inner
- * rows, if the matches are evenly distributed. Since they probably
- * aren't quite evenly distributed, we apply a fuzz factor of 2.0 to
- * that fraction. (If we used a larger fuzz factor, we'd have to
- * clamp inner_scan_frac to at most 1.0; but since match_count is at
- * least 1, no such clamp is needed now.)
- *
- * A complicating factor is that rescans may be cheaper than first
- * scans. If we never scan all the way to the end of the inner rel,
- * it might be (depending on the plan type) that we'd never pay the
- * whole inner first-scan run cost. However it is difficult to
- * estimate whether that will happen, so be conservative and always
- * charge the whole first-scan cost once.
- */
- run_cost += inner_run_cost;
-
- outer_matched_rows = rint(outer_path_rows * semifactors->outer_match_frac);
- inner_scan_frac = 2.0 / (semifactors->match_count + 1.0);
-
- /* Add inner run cost for additional outer tuples having matches */
- if (outer_matched_rows > 1)
- run_cost += (outer_matched_rows - 1) * inner_rescan_run_cost * inner_scan_frac;
-
- /*
- * The cost of processing unmatched rows varies depending on the
- * details of the joinclauses, so we leave that part for later.
+ * Getting decent estimates requires inspection of the join quals,
+ * which we choose to postpone to final_cost_nestloop.
*/
/* Save private data for final_cost_nestloop */
- workspace->outer_matched_rows = outer_matched_rows;
- workspace->inner_scan_frac = inner_scan_frac;
+ workspace->inner_run_cost = inner_run_cost;
+ workspace->inner_rescan_run_cost = inner_rescan_run_cost;
}
else
{
workspace->total_cost = startup_cost + run_cost;
/* Save private data for final_cost_nestloop */
workspace->run_cost = run_cost;
- workspace->inner_rescan_run_cost = inner_rescan_run_cost;
}
/*
double inner_path_rows = inner_path->rows;
Cost startup_cost = workspace->startup_cost;
Cost run_cost = workspace->run_cost;
- Cost inner_rescan_run_cost = workspace->inner_rescan_run_cost;
Cost cpu_per_tuple;
QualCost restrict_qual_cost;
double ntuples;
if (!enable_nestloop)
startup_cost += disable_cost;
- /* cost of source data */
+ /* cost of inner-relation source data (we already dealt with outer rel) */
if (path->jointype == JOIN_SEMI || path->jointype == JOIN_ANTI)
{
- double outer_matched_rows = workspace->outer_matched_rows;
- Selectivity inner_scan_frac = workspace->inner_scan_frac;
-
/*
* SEMI or ANTI join: executor will stop after first match.
*/
+ Cost inner_run_cost = workspace->inner_run_cost;
+ Cost inner_rescan_run_cost = workspace->inner_rescan_run_cost;
+ double outer_matched_rows;
+ Selectivity inner_scan_frac;
- /* Compute number of tuples processed (not number emitted!) */
+ /*
+ * For an outer-rel row that has at least one match, we can expect the
+ * inner scan to stop after a fraction 1/(match_count+1) of the inner
+ * rows, if the matches are evenly distributed. Since they probably
+ * aren't quite evenly distributed, we apply a fuzz factor of 2.0 to
+ * that fraction. (If we used a larger fuzz factor, we'd have to
+ * clamp inner_scan_frac to at most 1.0; but since match_count is at
+ * least 1, no such clamp is needed now.)
+ */
+ outer_matched_rows = rint(outer_path_rows * semifactors->outer_match_frac);
+ inner_scan_frac = 2.0 / (semifactors->match_count + 1.0);
+
+ /*
+ * Compute number of tuples processed (not number emitted!). First,
+ * account for successfully-matched outer rows.
+ */
ntuples = outer_matched_rows * inner_path_rows * inner_scan_frac;
/*
- * For unmatched outer-rel rows, there are two cases. If the inner
- * path is an indexscan using all the joinquals as indexquals, then an
- * unmatched row results in an indexscan returning no rows, which is
- * probably quite cheap. We estimate this case as the same cost to
- * return the first tuple of a nonempty scan. Otherwise, the executor
- * will have to scan the whole inner rel; not so cheap.
+ * Now we need to estimate the actual costs of scanning the inner
+ * relation, which may be quite a bit less than N times inner_run_cost
+ * due to early scan stops. We consider two cases. If the inner path
+ * is an indexscan using all the joinquals as indexquals, then an
+ * unmatched outer row results in an indexscan returning no rows,
+ * which is probably quite cheap. Otherwise, the executor will have
+ * to scan the whole inner rel for an unmatched row; not so cheap.
*/
if (has_indexed_join_quals(path))
{
+ /*
+ * Successfully-matched outer rows will only require scanning
+ * inner_scan_frac of the inner relation. In this case, we don't
+ * need to charge the full inner_run_cost even when that's more
+ * than inner_rescan_run_cost, because we can assume that none of
+ * the inner scans ever scan the whole inner relation. So it's
+ * okay to assume that all the inner scan executions can be
+ * fractions of the full cost, even if materialization is reducing
+ * the rescan cost. At this writing, it's impossible to get here
+ * for a materialized inner scan, so inner_run_cost and
+ * inner_rescan_run_cost will be the same anyway; but just in
+ * case, use inner_run_cost for the first matched tuple and
+ * inner_rescan_run_cost for additional ones.
+ */
+ run_cost += inner_run_cost * inner_scan_frac;
+ if (outer_matched_rows > 1)
+ run_cost += (outer_matched_rows - 1) * inner_rescan_run_cost * inner_scan_frac;
+
+ /*
+ * Add the cost of inner-scan executions for unmatched outer rows.
+ * We estimate this as the same cost as returning the first tuple
+ * of a nonempty scan. We consider that these are all rescans,
+ * since we used inner_run_cost once already.
+ */
run_cost += (outer_path_rows - outer_matched_rows) *
inner_rescan_run_cost / inner_path_rows;
/*
- * We won't be evaluating any quals at all for these rows, so
+ * We won't be evaluating any quals at all for unmatched rows, so
* don't add them to ntuples.
*/
}
else
{
+ /*
+ * Here, a complicating factor is that rescans may be cheaper than
+ * first scans. If we never scan all the way to the end of the
+ * inner rel, it might be (depending on the plan type) that we'd
+ * never pay the whole inner first-scan run cost. However it is
+ * difficult to estimate whether that will happen (and it could
+ * not happen if there are any unmatched outer rows!), so be
+ * conservative and always charge the whole first-scan cost once.
+ */
+ run_cost += inner_run_cost;
+
+ /* Add inner run cost for additional outer tuples having matches */
+ if (outer_matched_rows > 1)
+ run_cost += (outer_matched_rows - 1) * inner_rescan_run_cost * inner_scan_frac;
+
+ /* Add inner run cost for unmatched outer tuples */
run_cost += (outer_path_rows - outer_matched_rows) *
inner_rescan_run_cost;
+
+ /* And count the unmatched join tuples as being processed */
ntuples += (outer_path_rows - outer_matched_rows) *
inner_path_rows;
}
* total cost, we just say that their costs are "different", since neither
* dominates the other across the whole performance spectrum.
*
- * If consider_startup is false, then we don't care about keeping paths with
- * good startup cost, so we'll never return COSTS_DIFFERENT.
- *
- * This function also includes special hacks to support a policy enforced
- * by its sole caller, add_path(): paths that have any parameterization
- * cannot win comparisons on the grounds of having cheaper startup cost,
- * since we deem only total cost to be of interest for a parameterized path.
- * (Unparameterized paths are more common, so we check for this case last.)
+ * This function also enforces a policy rule that paths for which the relevant
+ * one of parent->consider_startup and parent->consider_param_startup is false
+ * cannot win comparisons on the grounds of good startup cost, so we never
+ * return COSTS_DIFFERENT when that is true for the total-cost loser.
*/
static PathCostComparison
-compare_path_costs_fuzzily(Path *path1, Path *path2, double fuzz_factor,
- bool consider_startup)
+compare_path_costs_fuzzily(Path *path1, Path *path2, double fuzz_factor)
{
+#define CONSIDER_PATH_STARTUP_COST(p) \
+ ((p)->param_info == NULL ? (p)->parent->consider_startup : (p)->parent->consider_param_startup)
+
/*
* Check total cost first since it's more likely to be different; many
* paths have zero startup cost.
if (path1->total_cost > path2->total_cost * fuzz_factor)
{
/* path1 fuzzily worse on total cost */
- if (consider_startup &&
- path2->startup_cost > path1->startup_cost * fuzz_factor &&
- path1->param_info == NULL)
+ if (CONSIDER_PATH_STARTUP_COST(path1) &&
+ path2->startup_cost > path1->startup_cost * fuzz_factor)
{
/* ... but path2 fuzzily worse on startup, so DIFFERENT */
return COSTS_DIFFERENT;
if (path2->total_cost > path1->total_cost * fuzz_factor)
{
/* path2 fuzzily worse on total cost */
- if (consider_startup &&
- path1->startup_cost > path2->startup_cost * fuzz_factor &&
- path2->param_info == NULL)
+ if (CONSIDER_PATH_STARTUP_COST(path2) &&
+ path1->startup_cost > path2->startup_cost * fuzz_factor)
{
/* ... but path1 fuzzily worse on startup, so DIFFERENT */
return COSTS_DIFFERENT;
/* else path1 dominates */
return COSTS_BETTER1;
}
- /* fuzzily the same on total cost */
- /* (so we may as well compare startup cost, even if !consider_startup) */
+
+ /*
+ * Fuzzily the same on total cost (so we might as well compare startup
+ * cost, even when that would otherwise be uninteresting; but
+ * parameterized paths aren't allowed to win this way, we'd rather move on
+ * to other comparison heuristics)
+ */
if (path1->startup_cost > path2->startup_cost * fuzz_factor &&
path2->param_info == NULL)
{
}
/* fuzzily the same on both costs */
return COSTS_EQUAL;
+
+#undef CONSIDER_PATH_STARTUP_COST
}
/*
*
* The cheapest_parameterized_paths list collects all parameterized paths
* that have survived the add_path() tournament for this relation. (Since
- * add_path ignores pathkeys and startup cost for a parameterized path,
- * these will be paths that have best total cost or best row count for their
- * parameterization.) cheapest_parameterized_paths always includes the
- * cheapest-total unparameterized path, too, if there is one; the users of
- * that list find it more convenient if that's included.
+ * add_path ignores pathkeys for a parameterized path, these will be paths
+ * that have best cost or best row count for their parameterization.)
+ * cheapest_parameterized_paths always includes the cheapest-total
+ * unparameterized path, too, if there is one; the users of that list find
+ * it more convenient if that's included.
*
* This is normally called only after we've finished constructing the path
* list for the rel node.
* cases do arise, so we make the full set of checks anyway.
*
* There are two policy decisions embedded in this function, along with
- * its sibling add_path_precheck: we treat all parameterized paths as
- * having NIL pathkeys, and we ignore their startup costs, so that they
- * compete only on parameterization, total cost and rowcount. This is to
- * reduce the number of parameterized paths that are kept. See discussion
- * in src/backend/optimizer/README.
+ * its sibling add_path_precheck. First, we treat all parameterized paths
+ * as having NIL pathkeys, so that they cannot win comparisons on the
+ * basis of sort order. This is to reduce the number of parameterized
+ * paths that are kept; see discussion in src/backend/optimizer/README.
*
- * Another policy that is enforced here is that we only consider cheap
- * startup cost to be interesting if parent_rel->consider_startup is true.
+ * Second, we only consider cheap startup cost to be interesting if
+ * parent_rel->consider_startup is true for an unparameterized path, or
+ * parent_rel->consider_param_startup is true for a parameterized one.
+ * Again, this allows discarding useless paths sooner.
*
* The pathlist is kept sorted by total_cost, with cheaper paths
* at the front. Within this routine, that's simply a speed hack:
* Do a fuzzy cost comparison with 1% fuzziness limit. (XXX does this
* percentage need to be user-configurable?)
*/
- costcmp = compare_path_costs_fuzzily(new_path, old_path, 1.01,
- parent_rel->consider_startup);
+ costcmp = compare_path_costs_fuzzily(new_path, old_path, 1.01);
/*
* If the two paths compare differently for startup and total cost,
accept_new = false; /* old dominates new */
else if (compare_path_costs_fuzzily(new_path,
old_path,
- 1.0000000001,
- parent_rel->consider_startup) == COSTS_BETTER1)
+ 1.0000000001) == COSTS_BETTER1)
remove_old = true; /* new dominates old */
else
accept_new = false; /* old equals or
rel->width = 0;
/* cheap startup cost is interesting iff not all tuples to be retrieved */
rel->consider_startup = (root->tuple_fraction > 0);
+ rel->consider_param_startup = false; /* might get changed later */
rel->reltargetlist = NIL;
rel->pathlist = NIL;
rel->ppilist = NIL;
joinrel->width = 0;
/* cheap startup cost is interesting iff not all tuples to be retrieved */
joinrel->consider_startup = (root->tuple_fraction > 0);
+ joinrel->consider_param_startup = false;
joinrel->reltargetlist = NIL;
joinrel->pathlist = NIL;
joinrel->ppilist = NIL;
* clauses have been applied (ie, output rows of a plan for it)
* width - avg. number of bytes per tuple in the relation after the
* appropriate projections have been done (ie, output width)
- * consider_startup - true if there is any value in keeping paths for
+ * consider_startup - true if there is any value in keeping plain paths for
* this rel on the basis of having cheap startup cost
+ * consider_param_startup - the same for parameterized paths
* reltargetlist - List of Var and PlaceHolderVar nodes for the values
* we need to output from this relation.
* List is in no particular order, but all rels of an
/* per-relation planner control flags */
bool consider_startup; /* keep cheap-startup-cost paths? */
+ bool consider_param_startup; /* ditto, for parameterized paths? */
/* materialization information */
List *reltargetlist; /* Vars to be output by scan of relation */
Cost run_cost; /* non-startup cost components */
/* private for cost_nestloop code */
+ Cost inner_run_cost; /* also used by cost_mergejoin code */
Cost inner_rescan_run_cost;
- double outer_matched_rows;
- Selectivity inner_scan_frac;
/* private for cost_mergejoin code */
- Cost inner_run_cost;
double outer_rows;
double inner_rows;
double outer_skip_rows;