From 3f59be836c555fa679bbe0ec76de50a8b5cb23e0 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Wed, 3 Jun 2015 11:58:47 -0400
Subject: [PATCH] Fix planner's cost estimation for SEMI/ANTI joins with inner
 indexscans.

When the inner side of a nestloop SEMI or ANTI join is an indexscan that
uses all the join clauses as indexquals, it can be presumed that both
matched and unmatched outer rows will be processed very quickly: for
matched rows, we'll stop after fetching one row from the indexscan, while
for unmatched rows we'll have an indexscan that finds no matching index
entries, which should also be quick.  The planner already knew about this,
but it was nonetheless charging for at least one full run of the inner
indexscan, as a consequence of concerns about the behavior of materialized
inner scans --- but those concerns don't apply in the fast case.  If the
inner side has low cardinality (many matching rows) this could make an
indexscan plan look far more expensive than it actually is.  To fix,
rearrange the work in initial_cost_nestloop/final_cost_nestloop so that we
don't add the inner scan cost until we've inspected the indexquals, and
then we can add either the full-run cost or just the first tuple's cost as
appropriate.

Experimentation with this fix uncovered another problem: add_path and
friends were coded to disregard cheap startup cost when considering
parameterized paths.  That's usually okay (and desirable, because it thins
the path herd faster); but in this fast case for SEMI/ANTI joins, it could
result in throwing away the desired plain indexscan path in favor of a
bitmap scan path before we ever get to the join costing logic.  In the
many-matching-rows cases of interest here, a bitmap scan will do a lot more
work than required, so this is a problem.  To fix, add a per-relation flag
consider_param_startup that works like the existing consider_startup flag,
but applies to parameterized paths, and set it for relations that are the
inside of a SEMI or ANTI join.

To make this patch reasonably safe to back-patch, care has been taken to
avoid changing the planner's behavior except in the very narrow case of
SEMI/ANTI joins with inner indexscans.  There are places in
compare_path_costs_fuzzily and add_path_precheck that are not terribly
consistent with the new approach, but changing them will affect planner
decisions at the margins in other cases, so we'll leave that for a
HEAD-only fix.

Back-patch to 9.3; before that, the consider_startup flag didn't exist,
meaning that the second aspect of the patch would be too invasive.

Per a complaint from Peter Holzer and analysis by Tomas Vondra.
---
 src/backend/nodes/outfuncs.c          |   1 +
 src/backend/optimizer/README          |   9 +-
 src/backend/optimizer/path/allpaths.c |  47 ++++++++++
 src/backend/optimizer/path/costsize.c | 124 ++++++++++++++++----------
 src/backend/optimizer/util/pathnode.c |  70 ++++++++-------
 src/backend/optimizer/util/relnode.c  |   2 +
 src/include/nodes/relation.h          |   8 +-
 7 files changed, 175 insertions(+), 86 deletions(-)

diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index 4775acfcfc..87304ba9bf 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -1844,6 +1844,7 @@ _outRelOptInfo(StringInfo str, const RelOptInfo *node)
 	WRITE_FLOAT_FIELD(rows, "%.0f");
 	WRITE_INT_FIELD(width);
 	WRITE_BOOL_FIELD(consider_startup);
+	WRITE_BOOL_FIELD(consider_param_startup);
 	WRITE_NODE_FIELD(reltargetlist);
 	WRITE_NODE_FIELD(pathlist);
 	WRITE_NODE_FIELD(ppilist);
diff --git a/src/backend/optimizer/README b/src/backend/optimizer/README
index d51fd57516..99c300a657 100644
--- a/src/backend/optimizer/README
+++ b/src/backend/optimizer/README
@@ -795,7 +795,7 @@ a nestloop that provides parameters to the lower join's inputs).  While we
 do not ignore merge joins entirely, joinpath.c does not fully explore the
 space of potential merge joins with parameterized inputs.  Also, add_path
 treats parameterized paths as having no pathkeys, so that they compete
-only on total cost and rowcount; they don't get preference for producing a
+only on cost and rowcount; they don't get preference for producing a
 special sort order.  This creates additional bias against merge joins,
 since we might discard a path that could have been useful for performing
 a merge without an explicit sort step.  Since a parameterized path must
@@ -804,6 +804,13 @@ uninteresting, these choices do not affect any requirement for the final
 output order of a query --- they only make it harder to use a merge join
 at a lower level.  The savings in planning work justifies that.
 
+Similarly, parameterized paths do not normally get preference in add_path
+for having cheap startup cost; that's seldom of much value when on the
+inside of a nestloop, so it seems not worth keeping extra paths solely for
+that.  An exception occurs for parameterized paths for the RHS relation of
+a SEMI or ANTI join: in those cases, we can stop the inner scan after the
+first match, so it's primarily startup not total cost that we care about.
+
 
 LATERAL subqueries
 ------------------
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 4e6d90d8d8..0b831891fc 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -61,6 +61,7 @@ set_rel_pathlist_hook_type set_rel_pathlist_hook = NULL;
 join_search_hook_type join_search_hook = NULL;
 
 
+static void set_base_rel_consider_startup(PlannerInfo *root);
 static void set_base_rel_sizes(PlannerInfo *root);
 static void set_base_rel_pathlists(PlannerInfo *root);
 static void set_rel_size(PlannerInfo *root, RelOptInfo *rel,
@@ -152,6 +153,9 @@ make_one_rel(PlannerInfo *root, List *joinlist)
 		root->all_baserels = bms_add_member(root->all_baserels, brel->relid);
 	}
 
+	/* Mark base rels as to whether we care about fast-start plans */
+	set_base_rel_consider_startup(root);
+
 	/*
 	 * Generate access paths for the base rels.
 	 */
@@ -171,6 +175,49 @@ make_one_rel(PlannerInfo *root, List *joinlist)
 	return rel;
 }
 
+/*
+ * set_base_rel_consider_startup
+ *	  Set the consider_[param_]startup flags for each base-relation entry.
+ *
+ * For the moment, we only deal with consider_param_startup here; because the
+ * logic for consider_startup is pretty trivial and is the same for every base
+ * relation, we just let build_simple_rel() initialize that flag correctly to
+ * start with.  If that logic ever gets more complicated it would probably
+ * be better to move it here.
+ */
+static void
+set_base_rel_consider_startup(PlannerInfo *root)
+{
+	/*
+	 * Since parameterized paths can only be used on the inside of a nestloop
+	 * join plan, there is usually little value in considering fast-start
+	 * plans for them.  However, for relations that are on the RHS of a SEMI
+	 * or ANTI join, a fast-start plan can be useful because we're only going
+	 * to care about fetching one tuple anyway.
+	 *
+	 * To minimize growth of planning time, we currently restrict this to
+	 * cases where the RHS is a single base relation, not a join; there is no
+	 * provision for consider_param_startup to get set at all on joinrels.
+	 * Also we don't worry about appendrels.  costsize.c's costing rules for
+	 * nestloop semi/antijoins don't consider such cases either.
+	 */
+	ListCell   *lc;
+
+	foreach(lc, root->join_info_list)
+	{
+		SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
+		int			varno;
+
+		if ((sjinfo->jointype == JOIN_SEMI || sjinfo->jointype == JOIN_ANTI) &&
+			bms_get_singleton_member(sjinfo->syn_righthand, &varno))
+		{
+			RelOptInfo *rel = find_base_rel(root, varno);
+
+			rel->consider_param_startup = true;
+		}
+	}
+}
+
 /*
  * set_base_rel_sizes
  *	  Set the size estimates (rows and widths) for each base-relation entry.
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index ac865be637..0d302f66be 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -1767,7 +1767,8 @@ cost_group(Path *path, PlannerInfo *root,
  * estimate and getting a tight lower bound.  We choose to not examine the
  * join quals here, since that's by far the most expensive part of the
  * calculations.  The end result is that CPU-cost considerations must be
- * left for the second phase.
+ * left for the second phase; and for SEMI/ANTI joins, we must also postpone
+ * incorporation of the inner path's run cost.
  *
  * 'workspace' is to be filled with startup_cost, total_cost, and perhaps
  *		other data to be used by final_cost_nestloop
@@ -1815,44 +1816,16 @@ initial_cost_nestloop(PlannerInfo *root, JoinCostWorkspace *workspace,
 
 	if (jointype == JOIN_SEMI || jointype == JOIN_ANTI)
 	{
-		double		outer_matched_rows;
-		Selectivity inner_scan_frac;
-
 		/*
 		 * SEMI or ANTI join: executor will stop after first match.
 		 *
-		 * For an outer-rel row that has at least one match, we can expect the
-		 * inner scan to stop after a fraction 1/(match_count+1) of the inner
-		 * rows, if the matches are evenly distributed.  Since they probably
-		 * aren't quite evenly distributed, we apply a fuzz factor of 2.0 to
-		 * that fraction.  (If we used a larger fuzz factor, we'd have to
-		 * clamp inner_scan_frac to at most 1.0; but since match_count is at
-		 * least 1, no such clamp is needed now.)
-		 *
-		 * A complicating factor is that rescans may be cheaper than first
-		 * scans.  If we never scan all the way to the end of the inner rel,
-		 * it might be (depending on the plan type) that we'd never pay the
-		 * whole inner first-scan run cost.  However it is difficult to
-		 * estimate whether that will happen, so be conservative and always
-		 * charge the whole first-scan cost once.
-		 */
-		run_cost += inner_run_cost;
-
-		outer_matched_rows = rint(outer_path_rows * semifactors->outer_match_frac);
-		inner_scan_frac = 2.0 / (semifactors->match_count + 1.0);
-
-		/* Add inner run cost for additional outer tuples having matches */
-		if (outer_matched_rows > 1)
-			run_cost += (outer_matched_rows - 1) * inner_rescan_run_cost * inner_scan_frac;
-
-		/*
-		 * The cost of processing unmatched rows varies depending on the
-		 * details of the joinclauses, so we leave that part for later.
+		 * Getting decent estimates requires inspection of the join quals,
+		 * which we choose to postpone to final_cost_nestloop.
 		 */
 
 		/* Save private data for final_cost_nestloop */
-		workspace->outer_matched_rows = outer_matched_rows;
-		workspace->inner_scan_frac = inner_scan_frac;
+		workspace->inner_run_cost = inner_run_cost;
+		workspace->inner_rescan_run_cost = inner_rescan_run_cost;
 	}
 	else
 	{
@@ -1869,7 +1842,6 @@ initial_cost_nestloop(PlannerInfo *root, JoinCostWorkspace *workspace,
 	workspace->total_cost = startup_cost + run_cost;
 	/* Save private data for final_cost_nestloop */
 	workspace->run_cost = run_cost;
-	workspace->inner_rescan_run_cost = inner_rescan_run_cost;
 }
 
 /*
@@ -1893,7 +1865,6 @@ final_cost_nestloop(PlannerInfo *root, NestPath *path,
 	double		inner_path_rows = inner_path->rows;
 	Cost		startup_cost = workspace->startup_cost;
 	Cost		run_cost = workspace->run_cost;
-	Cost		inner_rescan_run_cost = workspace->inner_rescan_run_cost;
 	Cost		cpu_per_tuple;
 	QualCost	restrict_qual_cost;
 	double		ntuples;
@@ -1912,42 +1883,101 @@ final_cost_nestloop(PlannerInfo *root, NestPath *path,
 	if (!enable_nestloop)
 		startup_cost += disable_cost;
 
-	/* cost of source data */
+	/* cost of inner-relation source data (we already dealt with outer rel) */
 
 	if (path->jointype == JOIN_SEMI || path->jointype == JOIN_ANTI)
 	{
-		double		outer_matched_rows = workspace->outer_matched_rows;
-		Selectivity inner_scan_frac = workspace->inner_scan_frac;
-
 		/*
 		 * SEMI or ANTI join: executor will stop after first match.
 		 */
+		Cost		inner_run_cost = workspace->inner_run_cost;
+		Cost		inner_rescan_run_cost = workspace->inner_rescan_run_cost;
+		double		outer_matched_rows;
+		Selectivity inner_scan_frac;
 
-		/* Compute number of tuples processed (not number emitted!) */
+		/*
+		 * For an outer-rel row that has at least one match, we can expect the
+		 * inner scan to stop after a fraction 1/(match_count+1) of the inner
+		 * rows, if the matches are evenly distributed.  Since they probably
+		 * aren't quite evenly distributed, we apply a fuzz factor of 2.0 to
+		 * that fraction.  (If we used a larger fuzz factor, we'd have to
+		 * clamp inner_scan_frac to at most 1.0; but since match_count is at
+		 * least 1, no such clamp is needed now.)
+		 */
+		outer_matched_rows = rint(outer_path_rows * semifactors->outer_match_frac);
+		inner_scan_frac = 2.0 / (semifactors->match_count + 1.0);
+
+		/*
+		 * Compute number of tuples processed (not number emitted!).  First,
+		 * account for successfully-matched outer rows.
+		 */
 		ntuples = outer_matched_rows * inner_path_rows * inner_scan_frac;
 
 		/*
-		 * For unmatched outer-rel rows, there are two cases.  If the inner
-		 * path is an indexscan using all the joinquals as indexquals, then an
-		 * unmatched row results in an indexscan returning no rows, which is
-		 * probably quite cheap.  We estimate this case as the same cost to
-		 * return the first tuple of a nonempty scan.  Otherwise, the executor
-		 * will have to scan the whole inner rel; not so cheap.
+		 * Now we need to estimate the actual costs of scanning the inner
+		 * relation, which may be quite a bit less than N times inner_run_cost
+		 * due to early scan stops.  We consider two cases.  If the inner path
+		 * is an indexscan using all the joinquals as indexquals, then an
+		 * unmatched outer row results in an indexscan returning no rows,
+		 * which is probably quite cheap.  Otherwise, the executor will have
+		 * to scan the whole inner rel for an unmatched row; not so cheap.
 		 */
 		if (has_indexed_join_quals(path))
 		{
+			/*
+			 * Successfully-matched outer rows will only require scanning
+			 * inner_scan_frac of the inner relation.  In this case, we don't
+			 * need to charge the full inner_run_cost even when that's more
+			 * than inner_rescan_run_cost, because we can assume that none of
+			 * the inner scans ever scan the whole inner relation.  So it's
+			 * okay to assume that all the inner scan executions can be
+			 * fractions of the full cost, even if materialization is reducing
+			 * the rescan cost.  At this writing, it's impossible to get here
+			 * for a materialized inner scan, so inner_run_cost and
+			 * inner_rescan_run_cost will be the same anyway; but just in
+			 * case, use inner_run_cost for the first matched tuple and
+			 * inner_rescan_run_cost for additional ones.
+			 */
+			run_cost += inner_run_cost * inner_scan_frac;
+			if (outer_matched_rows > 1)
+				run_cost += (outer_matched_rows - 1) * inner_rescan_run_cost * inner_scan_frac;
+
+			/*
+			 * Add the cost of inner-scan executions for unmatched outer rows.
+			 * We estimate this as the same cost as returning the first tuple
+			 * of a nonempty scan.  We consider that these are all rescans,
+			 * since we used inner_run_cost once already.
+			 */
 			run_cost += (outer_path_rows - outer_matched_rows) *
 				inner_rescan_run_cost / inner_path_rows;
 
 			/*
-			 * We won't be evaluating any quals at all for these rows, so
+			 * We won't be evaluating any quals at all for unmatched rows, so
 			 * don't add them to ntuples.
 			 */
 		}
 		else
 		{
+			/*
+			 * Here, a complicating factor is that rescans may be cheaper than
+			 * first scans.  If we never scan all the way to the end of the
+			 * inner rel, it might be (depending on the plan type) that we'd
+			 * never pay the whole inner first-scan run cost.  However it is
+			 * difficult to estimate whether that will happen (and it could
+			 * not happen if there are any unmatched outer rows!), so be
+			 * conservative and always charge the whole first-scan cost once.
+			 */
+			run_cost += inner_run_cost;
+
+			/* Add inner run cost for additional outer tuples having matches */
+			if (outer_matched_rows > 1)
+				run_cost += (outer_matched_rows - 1) * inner_rescan_run_cost * inner_scan_frac;
+
+			/* Add inner run cost for unmatched outer tuples */
 			run_cost += (outer_path_rows - outer_matched_rows) *
 				inner_rescan_run_cost;
+
+			/* And count the unmatched join tuples as being processed */
 			ntuples += (outer_path_rows - outer_matched_rows) *
 				inner_path_rows;
 		}
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 7f7aa24bb8..5075c8752a 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -138,19 +138,17 @@ compare_fractional_path_costs(Path *path1, Path *path2,
  * total cost, we just say that their costs are "different", since neither
  * dominates the other across the whole performance spectrum.
  *
- * If consider_startup is false, then we don't care about keeping paths with
- * good startup cost, so we'll never return COSTS_DIFFERENT.
- *
- * This function also includes special hacks to support a policy enforced
- * by its sole caller, add_path(): paths that have any parameterization
- * cannot win comparisons on the grounds of having cheaper startup cost,
- * since we deem only total cost to be of interest for a parameterized path.
- * (Unparameterized paths are more common, so we check for this case last.)
+ * This function also enforces a policy rule that paths for which the relevant
+ * one of parent->consider_startup and parent->consider_param_startup is false
+ * cannot win comparisons on the grounds of good startup cost, so we never
+ * return COSTS_DIFFERENT when that is true for the total-cost loser.
  */
 static PathCostComparison
-compare_path_costs_fuzzily(Path *path1, Path *path2, double fuzz_factor,
-						   bool consider_startup)
+compare_path_costs_fuzzily(Path *path1, Path *path2, double fuzz_factor)
 {
+#define CONSIDER_PATH_STARTUP_COST(p)  \
+	((p)->param_info == NULL ? (p)->parent->consider_startup : (p)->parent->consider_param_startup)
+
 	/*
 	 * Check total cost first since it's more likely to be different; many
 	 * paths have zero startup cost.
@@ -158,9 +156,8 @@ compare_path_costs_fuzzily(Path *path1, Path *path2, double fuzz_factor,
 	if (path1->total_cost > path2->total_cost * fuzz_factor)
 	{
 		/* path1 fuzzily worse on total cost */
-		if (consider_startup &&
-			path2->startup_cost > path1->startup_cost * fuzz_factor &&
-			path1->param_info == NULL)
+		if (CONSIDER_PATH_STARTUP_COST(path1) &&
+			path2->startup_cost > path1->startup_cost * fuzz_factor)
 		{
 			/* ... but path2 fuzzily worse on startup, so DIFFERENT */
 			return COSTS_DIFFERENT;
@@ -171,9 +168,8 @@ compare_path_costs_fuzzily(Path *path1, Path *path2, double fuzz_factor,
 	if (path2->total_cost > path1->total_cost * fuzz_factor)
 	{
 		/* path2 fuzzily worse on total cost */
-		if (consider_startup &&
-			path1->startup_cost > path2->startup_cost * fuzz_factor &&
-			path2->param_info == NULL)
+		if (CONSIDER_PATH_STARTUP_COST(path2) &&
+			path1->startup_cost > path2->startup_cost * fuzz_factor)
 		{
 			/* ... but path1 fuzzily worse on startup, so DIFFERENT */
 			return COSTS_DIFFERENT;
@@ -181,8 +177,13 @@ compare_path_costs_fuzzily(Path *path1, Path *path2, double fuzz_factor,
 		/* else path1 dominates */
 		return COSTS_BETTER1;
 	}
-	/* fuzzily the same on total cost */
-	/* (so we may as well compare startup cost, even if !consider_startup) */
+
+	/*
+	 * Fuzzily the same on total cost (so we might as well compare startup
+	 * cost, even when that would otherwise be uninteresting; but
+	 * parameterized paths aren't allowed to win this way, we'd rather move on
+	 * to other comparison heuristics)
+	 */
 	if (path1->startup_cost > path2->startup_cost * fuzz_factor &&
 		path2->param_info == NULL)
 	{
@@ -197,6 +198,8 @@ compare_path_costs_fuzzily(Path *path1, Path *path2, double fuzz_factor,
 	}
 	/* fuzzily the same on both costs */
 	return COSTS_EQUAL;
+
+#undef CONSIDER_PATH_STARTUP_COST
 }
 
 /*
@@ -212,11 +215,11 @@ compare_path_costs_fuzzily(Path *path1, Path *path2, double fuzz_factor,
  *
  * The cheapest_parameterized_paths list collects all parameterized paths
  * that have survived the add_path() tournament for this relation.  (Since
- * add_path ignores pathkeys and startup cost for a parameterized path,
- * these will be paths that have best total cost or best row count for their
- * parameterization.)  cheapest_parameterized_paths always includes the
- * cheapest-total unparameterized path, too, if there is one; the users of
- * that list find it more convenient if that's included.
+ * add_path ignores pathkeys for a parameterized path, these will be paths
+ * that have best cost or best row count for their parameterization.)
+ * cheapest_parameterized_paths always includes the cheapest-total
+ * unparameterized path, too, if there is one; the users of that list find
+ * it more convenient if that's included.
  *
  * This is normally called only after we've finished constructing the path
  * list for the rel node.
@@ -361,14 +364,15 @@ set_cheapest(RelOptInfo *parent_rel)
  *	  cases do arise, so we make the full set of checks anyway.
  *
  *	  There are two policy decisions embedded in this function, along with
- *	  its sibling add_path_precheck: we treat all parameterized paths as
- *	  having NIL pathkeys, and we ignore their startup costs, so that they
- *	  compete only on parameterization, total cost and rowcount.  This is to
- *	  reduce the number of parameterized paths that are kept.  See discussion
- *	  in src/backend/optimizer/README.
+ *	  its sibling add_path_precheck.  First, we treat all parameterized paths
+ *	  as having NIL pathkeys, so that they cannot win comparisons on the
+ *	  basis of sort order.  This is to reduce the number of parameterized
+ *	  paths that are kept; see discussion in src/backend/optimizer/README.
  *
- *	  Another policy that is enforced here is that we only consider cheap
- *	  startup cost to be interesting if parent_rel->consider_startup is true.
+ *	  Second, we only consider cheap startup cost to be interesting if
+ *	  parent_rel->consider_startup is true for an unparameterized path, or
+ *	  parent_rel->consider_param_startup is true for a parameterized one.
+ *	  Again, this allows discarding useless paths sooner.
  *
  *	  The pathlist is kept sorted by total_cost, with cheaper paths
  *	  at the front.  Within this routine, that's simply a speed hack:
@@ -433,8 +437,7 @@ add_path(RelOptInfo *parent_rel, Path *new_path)
 		 * Do a fuzzy cost comparison with 1% fuzziness limit.  (XXX does this
 		 * percentage need to be user-configurable?)
 		 */
-		costcmp = compare_path_costs_fuzzily(new_path, old_path, 1.01,
-											 parent_rel->consider_startup);
+		costcmp = compare_path_costs_fuzzily(new_path, old_path, 1.01);
 
 		/*
 		 * If the two paths compare differently for startup and total cost,
@@ -501,8 +504,7 @@ add_path(RelOptInfo *parent_rel, Path *new_path)
 									accept_new = false; /* old dominates new */
 								else if (compare_path_costs_fuzzily(new_path,
 																	old_path,
-																1.0000000001,
-																	parent_rel->consider_startup) == COSTS_BETTER1)
+											  1.0000000001) == COSTS_BETTER1)
 									remove_old = true;	/* new dominates old */
 								else
 									accept_new = false; /* old equals or
diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c
index 1d635cd6d2..be2ef3becf 100644
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -101,6 +101,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptKind reloptkind)
 	rel->width = 0;
 	/* cheap startup cost is interesting iff not all tuples to be retrieved */
 	rel->consider_startup = (root->tuple_fraction > 0);
+	rel->consider_param_startup = false;		/* might get changed later */
 	rel->reltargetlist = NIL;
 	rel->pathlist = NIL;
 	rel->ppilist = NIL;
@@ -361,6 +362,7 @@ build_join_rel(PlannerInfo *root,
 	joinrel->width = 0;
 	/* cheap startup cost is interesting iff not all tuples to be retrieved */
 	joinrel->consider_startup = (root->tuple_fraction > 0);
+	joinrel->consider_param_startup = false;
 	joinrel->reltargetlist = NIL;
 	joinrel->pathlist = NIL;
 	joinrel->ppilist = NIL;
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index 279051ed18..33b0874570 100644
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -321,8 +321,9 @@ typedef struct PlannerInfo
  *			   clauses have been applied (ie, output rows of a plan for it)
  *		width - avg. number of bytes per tuple in the relation after the
  *				appropriate projections have been done (ie, output width)
- *		consider_startup - true if there is any value in keeping paths for
+ *		consider_startup - true if there is any value in keeping plain paths for
  *						   this rel on the basis of having cheap startup cost
+ *		consider_param_startup - the same for parameterized paths
  *		reltargetlist - List of Var and PlaceHolderVar nodes for the values
  *						we need to output from this relation.
  *						List is in no particular order, but all rels of an
@@ -437,6 +438,7 @@ typedef struct RelOptInfo
 
 	/* per-relation planner control flags */
 	bool		consider_startup;		/* keep cheap-startup-cost paths? */
+	bool		consider_param_startup; /* ditto, for parameterized paths? */
 
 	/* materialization information */
 	List	   *reltargetlist;	/* Vars to be output by scan of relation */
@@ -1719,12 +1721,10 @@ typedef struct JoinCostWorkspace
 	Cost		run_cost;		/* non-startup cost components */
 
 	/* private for cost_nestloop code */
+	Cost		inner_run_cost; /* also used by cost_mergejoin code */
 	Cost		inner_rescan_run_cost;
-	double		outer_matched_rows;
-	Selectivity inner_scan_frac;
 
 	/* private for cost_mergejoin code */
-	Cost		inner_run_cost;
 	double		outer_rows;
 	double		inner_rows;
 	double		outer_skip_rows;
-- 
2.40.0