void
amcostestimate (PlannerInfo *root,
IndexPath *path,
- RelOptInfo *outer_rel,
+ double loop_count,
Cost *indexStartupCost,
Cost *indexTotalCost,
Selectivity *indexSelectivity,
void
amcostestimate (PlannerInfo *root,
IndexPath *path,
- RelOptInfo *outer_rel,
+ double loop_count,
Cost *indexStartupCost,
Cost *indexTotalCost,
Selectivity *indexSelectivity,
</varlistentry>
<varlistentry>
- <term><parameter>outer_rel</></term>
+ <term><parameter>loop_count</></term>
<listitem>
<para>
- If the index is being considered for use in a join inner indexscan,
- the planner's information about the outer side of the join. Otherwise
- <symbol>NULL</>. When non-<symbol>NULL</>, some of the qual clauses
- will be join clauses for joins
- with this rel rather than being simple restriction clauses. Also,
- the cost estimator should expect that the index scan will be repeated
- for each row of the outer rel.
+ The number of repetitions of the index scan that should be factored
+ into the cost estimates. This will typically be greater than one when
+ considering a parameterized scan for use in the inside of a nestloop
+ join. Note that the cost estimates should still be for just one scan;
+ a larger <parameter>loop_count</> means that it may be appropriate
+ to allow for some caching effects across multiple scans.
</para>
</listitem>
</varlistentry>
</para>
<para>
- In the join case, the returned numbers should be averages expected for
- any one scan of the index.
+ When <parameter>loop_count</> is greater than one, the returned numbers
+ should be averages expected for any one scan of the index.
</para>
<procedure>
</programlisting>
However, the above does not account for amortization of index reads
- across repeated index scans in the join case.
+ across repeated index scans.
</para>
</step>
return true;
}
+/*
+ * bms_subset_compare - compare A and B for equality/subset relationships
+ *
+ * This is more efficient than testing bms_is_subset in both directions.
+ */
+BMS_Comparison
+bms_subset_compare(const Bitmapset *a, const Bitmapset *b)
+{
+ BMS_Comparison result;
+ int shortlen;
+ int longlen;
+ int i;
+
+ /* Handle cases where either input is NULL */
+ if (a == NULL)
+ {
+ if (b == NULL)
+ return BMS_EQUAL;
+ return bms_is_empty(b) ? BMS_EQUAL : BMS_SUBSET1;
+ }
+ if (b == NULL)
+ return bms_is_empty(a) ? BMS_EQUAL : BMS_SUBSET2;
+ /* Check common words */
+ result = BMS_EQUAL; /* status so far */
+ shortlen = Min(a->nwords, b->nwords);
+ for (i = 0; i < shortlen; i++)
+ {
+ bitmapword aword = a->words[i];
+ bitmapword bword = b->words[i];
+
+ if ((aword & ~bword) != 0)
+ {
+ /* a is not a subset of b */
+ if (result == BMS_SUBSET1)
+ return BMS_DIFFERENT;
+ result = BMS_SUBSET2;
+ }
+ if ((bword & ~aword) != 0)
+ {
+ /* b is not a subset of a */
+ if (result == BMS_SUBSET2)
+ return BMS_DIFFERENT;
+ result = BMS_SUBSET1;
+ }
+ }
+ /* Check extra words */
+ if (a->nwords > b->nwords)
+ {
+ longlen = a->nwords;
+ for (; i < longlen; i++)
+ {
+ if (a->words[i] != 0)
+ {
+ /* a is not a subset of b */
+ if (result == BMS_SUBSET1)
+ return BMS_DIFFERENT;
+ result = BMS_SUBSET2;
+ }
+ }
+ }
+ else if (a->nwords < b->nwords)
+ {
+ longlen = b->nwords;
+ for (; i < longlen; i++)
+ {
+ if (b->words[i] != 0)
+ {
+ /* b is not a subset of a */
+ if (result == BMS_SUBSET2)
+ return BMS_DIFFERENT;
+ result = BMS_SUBSET1;
+ }
+ }
+ }
+ return result;
+}
+
/*
* bms_is_member - is X a member of A?
*/
WRITE_ENUM_FIELD(pathtype, NodeTag);
appendStringInfo(str, " :parent_relids ");
_outBitmapset(str, node->parent->relids);
+ WRITE_FLOAT_FIELD(rows, "%.0f");
WRITE_FLOAT_FIELD(startup_cost, "%.2f");
WRITE_FLOAT_FIELD(total_cost, "%.2f");
WRITE_NODE_FIELD(pathkeys);
+ WRITE_BITMAPSET_FIELD(required_outer);
+ WRITE_NODE_FIELD(param_clauses);
}
/*
WRITE_NODE_FIELD(indexqualcols);
WRITE_NODE_FIELD(indexorderbys);
WRITE_NODE_FIELD(indexorderbycols);
- WRITE_BOOL_FIELD(isjoininner);
WRITE_ENUM_FIELD(indexscandir, ScanDirection);
WRITE_FLOAT_FIELD(indextotalcost, "%.2f");
WRITE_FLOAT_FIELD(indexselectivity, "%.4f");
- WRITE_FLOAT_FIELD(rows, "%.0f");
}
static void
_outPathInfo(str, (const Path *) node);
WRITE_NODE_FIELD(bitmapqual);
- WRITE_BOOL_FIELD(isjoininner);
- WRITE_FLOAT_FIELD(rows, "%.0f");
}
static void
WRITE_ENUM_FIELD(umethod, UniquePathMethod);
WRITE_NODE_FIELD(in_operators);
WRITE_NODE_FIELD(uniq_exprs);
- WRITE_FLOAT_FIELD(rows, "%.0f");
}
static void
WRITE_NODE_FIELD(parse);
WRITE_NODE_FIELD(glob);
WRITE_UINT_FIELD(query_level);
+ WRITE_BITMAPSET_FIELD(all_baserels);
WRITE_NODE_FIELD(join_rel_list);
WRITE_INT_FIELD(join_cur_level);
WRITE_NODE_FIELD(init_plans);
WRITE_NODE_FIELD(cheapest_startup_path);
WRITE_NODE_FIELD(cheapest_total_path);
WRITE_NODE_FIELD(cheapest_unique_path);
+ WRITE_NODE_FIELD(cheapest_parameterized_paths);
WRITE_UINT_FIELD(relid);
WRITE_UINT_FIELD(reltablespace);
WRITE_ENUM_FIELD(rtekind, RTEKind);
WRITE_NODE_FIELD(baserestrictinfo);
WRITE_NODE_FIELD(joininfo);
WRITE_BOOL_FIELD(has_eclass_joins);
- WRITE_BITMAPSET_FIELD(index_outer_relids);
- WRITE_NODE_FIELD(index_inner_paths);
}
static void
WRITE_OID_FIELD(hashjoinoperator);
}
-static void
-_outInnerIndexscanInfo(StringInfo str, const InnerIndexscanInfo *node)
-{
- WRITE_NODE_TYPE("INNERINDEXSCANINFO");
- WRITE_BITMAPSET_FIELD(other_relids);
- WRITE_BOOL_FIELD(isouterjoin);
- WRITE_NODE_FIELD(cheapest_startup_innerpath);
- WRITE_NODE_FIELD(cheapest_total_innerpath);
-}
-
static void
_outPlaceHolderVar(StringInfo str, const PlaceHolderVar *node)
{
case T_RestrictInfo:
_outRestrictInfo(str, obj);
break;
- case T_InnerIndexscanInfo:
- _outInnerIndexscanInfo(str, obj);
- break;
case T_PlaceHolderVar:
_outPlaceHolderVar(str, obj);
break;
RelOptInfo.pathlist. (Actually, we discard Paths that are obviously
inferior alternatives before they ever get into the pathlist --- what
ends up in the pathlist is the cheapest way of generating each potentially
-useful sort ordering of the relation.) Also create a RelOptInfo.joininfo
-list including all the join clauses that involve this relation. For
-example, the WHERE clause "tab1.col1 = tab2.col1" generates entries in
-both tab1 and tab2's joininfo lists.
+useful sort ordering and parameterization of the relation.) Also create a
+RelOptInfo.joininfo list including all the join clauses that involve this
+relation. For example, the WHERE clause "tab1.col1 = tab2.col1" generates
+entries in both tab1 and tab2's joininfo lists.
If we have only a single base relation in the query, we are done.
Otherwise we have to figure out how to join the base relations into a
than applying a sort to the cheapest other path).
If the query contains one-sided outer joins (LEFT or RIGHT joins), or
-IN or EXISTS WHERE clauses that were converted to joins, then some of
-the possible join orders may be illegal. These are excluded by having
-join_is_legal consult a side list of such "special" joins to see
-whether a proposed join is illegal. (The same consultation allows it
-to see which join style should be applied for a valid join, ie,
-JOIN_INNER, JOIN_LEFT, etc.)
+IN or EXISTS WHERE clauses that were converted to semijoins or antijoins,
+then some of the possible join orders may be illegal. These are excluded
+by having join_is_legal consult a side list of such "special" joins to see
+whether a proposed join is illegal. (The same consultation allows it to
+see which join style should be applied for a valid join, ie, JOIN_INNER,
+JOIN_LEFT, etc.)
Valid OUTER JOIN Optimizations
are two possible sort orders and two possible PathKey lists it can
generate.)
-Note that a bitmap scan or multi-pass indexscan (OR clause scan) has NIL
-pathkeys since we can say nothing about the overall order of its result.
-Also, an indexscan on an unordered type of index generates NIL pathkeys.
-However, we can always create a pathkey by doing an explicit sort. The
-pathkeys for a Sort plan's output just represent the sort key fields and
-the ordering operators used.
+Note that a bitmap scan has NIL pathkeys since we can say nothing about
+the overall order of its result. Also, an indexscan on an unordered type
+of index generates NIL pathkeys. However, we can always create a pathkey
+by doing an explicit sort. The pathkeys for a Sort plan's output just
+represent the sort key fields and the ordering operators used.
Things get more interesting when we consider joins. Suppose we do a
mergejoin between A and B using the mergeclause A.X = B.Y. The output
with different orderings, for which extra sorts are needed anyway.
+Parameterized Paths
+-------------------
+
+The naive way to join two relations using a clause like WHERE A.X = B.Y
+is to generate a nestloop plan like this:
+
+ NestLoop
+ Filter: A.X = B.Y
+ -> Seq Scan on A
+ -> Seq Scan on B
+
+We can make this better by using a merge or hash join, but it still
+requires scanning all of both input relations. If A is very small and B is
+very large, but there is an index on B.Y, it can be enormously better to do
+something like this:
+
+ NestLoop
+ -> Seq Scan on A
+ -> Index Scan using B_Y_IDX on B
+ Index Condition: B.Y = A.X
+
+Here, we are expecting that for each row scanned from A, the nestloop
+plan node will pass down the current value of A.X into the scan of B.
+That allows the indexscan to treat A.X as a constant for any one
+invocation, and thereby use it as an index key. This is the only plan type
+that can avoid fetching all of B, and for small numbers of rows coming from
+A, that will dominate every other consideration. (As A gets larger, this
+gets less attractive, and eventually a merge or hash join will win instead.
+So we have to cost out all the alternatives to decide what to do.)
+
+It can be useful for the parameter value to be passed down through
+intermediate layers of joins, for example:
+
+ NestLoop
+ -> Seq Scan on A
+ Hash Join
+ Join Condition: B.Y = C.W
+ -> Seq Scan on B
+ -> Index Scan using C_Z_IDX on C
+ Index Condition: C.Z = A.X
+
+If all joins are plain inner joins then this is unnecessary, because
+it's always possible to reorder the joins so that a parameter is used
+immediately below the nestloop node that provides it. But in the
+presence of outer joins, join reordering may not be possible, and then
+this option can be critical. Before version 9.2, Postgres used ad-hoc
+methods for planning and executing such queries, and those methods could
+not handle passing parameters down through multiple join levels.
+
+To plan such queries, we now use a notion of a "parameterized path",
+which is a path that makes use of a join clause to a relation that's not
+scanned by the path. In the example just above, we would construct a
+path representing the possibility of doing this:
+
+ -> Index Scan using C_Z_IDX on C
+ Index Condition: C.Z = A.X
+
+This path will be marked as being parameterized by relation A. (Note that
+this is only one of the possible access paths for C; we'd still have a
+plain unparameterized seqscan, and perhaps other possibilities.) The
+parameterization marker does not prevent joining the path to B, so one of
+the paths generated for the joinrel {B C} will represent
+
+ Hash Join
+ Join Condition: B.Y = C.W
+ -> Seq Scan on B
+ -> Index Scan using C_Z_IDX on C
+ Index Condition: C.Z = A.X
+
+This path is still marked as being parameterized by A. When we attempt to
+join {B C} to A to form the complete join tree, such a path can only be
+used as the inner side of a nestloop join: it will be ignored for other
+possible join types. So we will form a join path representing the query
+plan shown above, and it will compete in the usual way with paths built
+from non-parameterized scans.
+
+To limit planning time, we have to avoid generating an unreasonably large
+number of parameterized paths. We do this by only generating parameterized
+relation scan paths for index scans, and then only for indexes for which
+suitable join clauses are available. There are also heuristics in join
+planning that try to limit the number of parameterized paths considered.
+
+In particular, there's been a deliberate policy decision to favor hash
+joins over merge joins for parameterized join steps (those occurring below
+a nestloop that provides parameters to the lower join's inputs). While we
+do not ignore merge joins entirely, joinpath.c does not fully explore the
+space of potential merge joins with parameterized inputs. Also, add_path
+treats parameterized paths as having no pathkeys, so that they compete
+only on cost and don't get preference for producing a special sort order.
+This creates additional bias against merge joins, since we might discard
+a path that could have been useful for performing a merge without an
+explicit sort step. Since a parameterized path must ultimately be used
+on the inside of a nestloop, where its sort order is uninteresting, these
+choices do not affect any requirement for the final output order of a
+query --- they only make it harder to use a merge join at a lower level.
+The savings in planning work justifies that.
+
+
-- bjm & tgl
join_search_hook_type join_search_hook = NULL;
+static void set_base_rel_sizes(PlannerInfo *root);
static void set_base_rel_pathlists(PlannerInfo *root);
+static void set_rel_size(PlannerInfo *root, RelOptInfo *rel,
+ Index rti, RangeTblEntry *rte);
static void set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
Index rti, RangeTblEntry *rte);
+static void set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel,
+ RangeTblEntry *rte);
static void set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
RangeTblEntry *rte);
+static void set_foreign_size(PlannerInfo *root, RelOptInfo *rel,
+ RangeTblEntry *rte);
+static void set_foreign_pathlist(PlannerInfo *root, RelOptInfo *rel,
+ RangeTblEntry *rte);
+static void set_append_rel_size(PlannerInfo *root, RelOptInfo *rel,
+ Index rti, RangeTblEntry *rte);
static void set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
Index rti, RangeTblEntry *rte);
+static void generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel,
+ List *live_childrels,
+ List *all_child_pathkeys,
+ Relids required_outer);
static List *accumulate_append_subpath(List *subpaths, Path *path);
static void set_dummy_rel_pathlist(RelOptInfo *rel);
static void set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
RangeTblEntry *rte);
static void set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel,
RangeTblEntry *rte);
-static void set_foreign_pathlist(PlannerInfo *root, RelOptInfo *rel,
- RangeTblEntry *rte);
static RelOptInfo *make_rel_from_joinlist(PlannerInfo *root, List *joinlist);
static bool subquery_is_pushdown_safe(Query *subquery, Query *topquery,
bool *differentTypes);
make_one_rel(PlannerInfo *root, List *joinlist)
{
RelOptInfo *rel;
+ Index rti;
+
+ /*
+ * Construct the all_baserels Relids set.
+ */
+ root->all_baserels = NULL;
+ for (rti = 1; rti < root->simple_rel_array_size; rti++)
+ {
+ RelOptInfo *brel = root->simple_rel_array[rti];
+
+ /* there may be empty slots corresponding to non-baserel RTEs */
+ if (brel == NULL)
+ continue;
+
+ Assert(brel->relid == rti); /* sanity check on array */
+
+ /* ignore RTEs that are "other rels" */
+ if (brel->reloptkind != RELOPT_BASEREL)
+ continue;
+
+ root->all_baserels = bms_add_member(root->all_baserels, brel->relid);
+ }
/*
* Generate access paths for the base rels.
*/
+ set_base_rel_sizes(root);
set_base_rel_pathlists(root);
/*
/*
* The result should join all and only the query's base rels.
*/
-#ifdef USE_ASSERT_CHECKING
- {
- int num_base_rels = 0;
- Index rti;
+ Assert(bms_equal(rel->relids, root->all_baserels));
- for (rti = 1; rti < root->simple_rel_array_size; rti++)
- {
- RelOptInfo *brel = root->simple_rel_array[rti];
+ return rel;
+}
- if (brel == NULL)
- continue;
+/*
+ * set_base_rel_sizes
+ * Set the size estimates (rows and widths) for each base-relation entry.
+ *
+ * We do this in a separate pass over the base rels so that rowcount
+ * estimates are available for parameterized path generation.
+ */
+static void
+set_base_rel_sizes(PlannerInfo *root)
+{
+ Index rti;
- Assert(brel->relid == rti); /* sanity check on array */
+ for (rti = 1; rti < root->simple_rel_array_size; rti++)
+ {
+ RelOptInfo *rel = root->simple_rel_array[rti];
- /* ignore RTEs that are "other rels" */
- if (brel->reloptkind != RELOPT_BASEREL)
- continue;
+ /* there may be empty slots corresponding to non-baserel RTEs */
+ if (rel == NULL)
+ continue;
- Assert(bms_is_member(rti, rel->relids));
- num_base_rels++;
- }
+ Assert(rel->relid == rti); /* sanity check on array */
- Assert(bms_num_members(rel->relids) == num_base_rels);
- }
-#endif
+ /* ignore RTEs that are "other rels" */
+ if (rel->reloptkind != RELOPT_BASEREL)
+ continue;
- return rel;
+ set_rel_size(root, rel, rti, root->simple_rte_array[rti]);
+ }
}
/*
}
/*
- * set_rel_pathlist
- * Build access paths for a base relation
+ * set_rel_size
+ * Set size estimates for a base relation
*/
static void
-set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
+set_rel_size(PlannerInfo *root, RelOptInfo *rel,
Index rti, RangeTblEntry *rte)
{
if (rel->reloptkind == RELOPT_BASEREL &&
* so set up a single dummy path for it. Here we only check this for
* regular baserels; if it's an otherrel, CE was already checked in
* set_append_rel_pathlist().
+ *
+ * In this case, we go ahead and set up the relation's path right away
+ * instead of leaving it for set_rel_pathlist to do. This is because
+ * we don't have a convention for marking a rel as dummy except by
+ * assigning a dummy path to it.
*/
set_dummy_rel_pathlist(rel);
}
else if (rte->inh)
{
/* It's an "append relation", process accordingly */
- set_append_rel_pathlist(root, rel, rti, rte);
+ set_append_rel_size(root, rel, rti, rte);
}
else
{
if (rte->relkind == RELKIND_FOREIGN_TABLE)
{
/* Foreign table */
- set_foreign_pathlist(root, rel, rte);
+ set_foreign_size(root, rel, rte);
}
else
{
/* Plain relation */
- set_plain_rel_pathlist(root, rel, rte);
+ set_plain_rel_size(root, rel, rte);
}
break;
case RTE_SUBQUERY:
- /* Subquery --- generate a separate plan for it */
+ /*
+ * Subqueries don't support parameterized paths, so just go
+ * ahead and build their paths immediately.
+ */
set_subquery_pathlist(root, rel, rti, rte);
break;
case RTE_FUNCTION:
- /* RangeFunction --- generate a suitable path for it */
- set_function_pathlist(root, rel, rte);
+ set_function_size_estimates(root, rel);
break;
case RTE_VALUES:
- /* Values list --- generate a suitable path for it */
- set_values_pathlist(root, rel, rte);
+ set_values_size_estimates(root, rel);
break;
case RTE_CTE:
- /* CTE reference --- generate a suitable path for it */
+ /*
+ * CTEs don't support parameterized paths, so just go ahead
+ * and build their paths immediately.
+ */
if (rte->self_reference)
set_worktable_pathlist(root, rel, rte);
else
break;
}
}
+}
+
+/*
+ * set_rel_pathlist
+ * Build access paths for a base relation
+ */
+static void
+set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
+ Index rti, RangeTblEntry *rte)
+{
+ if (IS_DUMMY_REL(rel))
+ {
+ /* We already proved the relation empty, so nothing more to do */
+ }
+ else if (rte->inh)
+ {
+ /* It's an "append relation", process accordingly */
+ set_append_rel_pathlist(root, rel, rti, rte);
+ }
+ else
+ {
+ switch (rel->rtekind)
+ {
+ case RTE_RELATION:
+ if (rte->relkind == RELKIND_FOREIGN_TABLE)
+ {
+ /* Foreign table */
+ set_foreign_pathlist(root, rel, rte);
+ }
+ else
+ {
+ /* Plain relation */
+ set_plain_rel_pathlist(root, rel, rte);
+ }
+ break;
+ case RTE_SUBQUERY:
+ /* Subquery --- fully handled during set_rel_size */
+ break;
+ case RTE_FUNCTION:
+ /* RangeFunction */
+ set_function_pathlist(root, rel, rte);
+ break;
+ case RTE_VALUES:
+ /* Values list */
+ set_values_pathlist(root, rel, rte);
+ break;
+ case RTE_CTE:
+ /* CTE reference --- fully handled during set_rel_size */
+ break;
+ default:
+ elog(ERROR, "unexpected rtekind: %d", (int) rel->rtekind);
+ break;
+ }
+ }
#ifdef OPTIMIZER_DEBUG
debug_print_rel(root, rel);
}
/*
- * set_plain_rel_pathlist
- * Build access paths for a plain relation (no subquery, no inheritance)
+ * set_plain_rel_size
+ * Set size estimates for a plain relation (no subquery, no inheritance)
*/
static void
-set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
+set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
{
/*
* Test any partial indexes of rel for applicability. We must do this
check_partial_indexes(root, rel);
set_baserel_size_estimates(root, rel);
}
+}
- /*
- * Generate paths and add them to the rel's pathlist.
- *
- * Note: add_path() will discard any paths that are dominated by another
- * available path, keeping only those paths that are superior along at
- * least one dimension of cost or sortedness.
- */
-
+/*
+ * set_plain_rel_pathlist
+ * Build access paths for a plain relation (no subquery, no inheritance)
+ */
+static void
+set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
+{
/* Consider sequential scan */
add_path(rel, create_seqscan_path(root, rel));
}
/*
- * set_append_rel_pathlist
- * Build access paths for an "append relation"
+ * set_foreign_size
+ * Set size estimates for a foreign table RTE
+ */
+static void
+set_foreign_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
+{
+ /* Mark rel with estimated output rows, width, etc */
+ set_foreign_size_estimates(root, rel);
+}
+
+/*
+ * set_foreign_pathlist
+ * Build the (single) access path for a foreign table RTE
+ */
+static void
+set_foreign_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
+{
+ /* Generate appropriate path */
+ add_path(rel, (Path *) create_foreignscan_path(root, rel));
+
+ /* Select cheapest path (pretty easy in this case...) */
+ set_cheapest(rel);
+}
+
+/*
+ * set_append_rel_size
+ * Set size estimates for an "append relation"
*
* The passed-in rel and RTE represent the entire append relation. The
* relation's contents are computed by appending together the output of
* a good thing because their outputs are not the same size.
*/
static void
-set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
- Index rti, RangeTblEntry *rte)
+set_append_rel_size(PlannerInfo *root, RelOptInfo *rel,
+ Index rti, RangeTblEntry *rte)
{
int parentRTindex = rti;
- List *live_childrels = NIL;
- List *subpaths = NIL;
- List *all_child_pathkeys = NIL;
double parent_rows;
double parent_size;
double *parent_attrsizes;
nattrs = rel->max_attr - rel->min_attr + 1;
parent_attrsizes = (double *) palloc0(nattrs * sizeof(double));
- /*
- * Generate access paths for each member relation, and pick the cheapest
- * path for each one.
- */
foreach(l, root->append_rel_list)
{
AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
RelOptInfo *childrel;
List *childquals;
Node *childqual;
- ListCell *lcp;
ListCell *parentvars;
ListCell *childvars;
{
/*
* This child need not be scanned, so we can omit it from the
- * appendrel. Mark it with a dummy cheapest-path though, in case
- * best_appendrel_indexscan() looks at it later.
+ * appendrel.
*/
set_dummy_rel_pathlist(childrel);
continue;
*/
/*
- * Compute the child's access paths.
+ * Compute the child's size.
*/
- set_rel_pathlist(root, childrel, childRTindex, childRTE);
+ set_rel_size(root, childrel, childRTindex, childRTE);
/*
* It is possible that constraint exclusion detected a contradiction
* within a child subquery, even though we didn't prove one above.
- * If what we got back was a dummy path, we can skip this child.
+ * If so, we can skip this child.
*/
- if (IS_DUMMY_PATH(childrel->cheapest_total_path))
+ if (IS_DUMMY_REL(childrel))
continue;
/*
- * Child is live, so add its cheapest access path to the Append path
- * we are constructing for the parent.
- */
- subpaths = accumulate_append_subpath(subpaths,
- childrel->cheapest_total_path);
-
- /* Remember which childrels are live, for MergeAppend logic below */
- live_childrels = lappend(live_childrels, childrel);
-
- /*
- * Collect a list of all the available path orderings for all the
- * children. We use this as a heuristic to indicate which sort
- * orderings we should build MergeAppend paths for.
- */
- foreach(lcp, childrel->pathlist)
- {
- Path *childpath = (Path *) lfirst(lcp);
- List *childkeys = childpath->pathkeys;
- ListCell *lpk;
- bool found = false;
-
- /* Ignore unsorted paths */
- if (childkeys == NIL)
- continue;
-
- /* Have we already seen this ordering? */
- foreach(lpk, all_child_pathkeys)
- {
- List *existing_pathkeys = (List *) lfirst(lpk);
-
- if (compare_pathkeys(existing_pathkeys,
- childkeys) == PATHKEYS_EQUAL)
- {
- found = true;
- break;
- }
- }
- if (!found)
- {
- /* No, so add it to all_child_pathkeys */
- all_child_pathkeys = lappend(all_child_pathkeys, childkeys);
- }
- }
-
- /*
- * Accumulate size information from each child.
+ * Accumulate size information from each live child.
*/
if (childrel->rows > 0)
{
rel->tuples = parent_rows;
pfree(parent_attrsizes);
+}
+
+/*
+ * set_append_rel_pathlist
+ * Build access paths for an "append relation"
+ */
+static void
+set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
+ Index rti, RangeTblEntry *rte)
+{
+ int parentRTindex = rti;
+ List *live_childrels = NIL;
+ List *subpaths = NIL;
+ List *all_child_pathkeys = NIL;
+ List *all_child_outers = NIL;
+ ListCell *l;
/*
- * Next, build an unordered Append path for the rel. (Note: this is
- * correct even if we have zero or one live subpath due to constraint
- * exclusion.)
+ * Generate access paths for each member relation, and remember the
+ * cheapest path for each one. Also, identify all pathkeys (orderings)
+ * and parameterizations (required_outer sets) available for the member
+ * relations.
+ */
+ foreach(l, root->append_rel_list)
+ {
+ AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
+ int childRTindex;
+ RangeTblEntry *childRTE;
+ RelOptInfo *childrel;
+ ListCell *lcp;
+
+ /* append_rel_list contains all append rels; ignore others */
+ if (appinfo->parent_relid != parentRTindex)
+ continue;
+
+ /* Re-locate the child RTE and RelOptInfo */
+ childRTindex = appinfo->child_relid;
+ childRTE = root->simple_rte_array[childRTindex];
+ childrel = root->simple_rel_array[childRTindex];
+
+ /*
+ * Compute the child's access paths.
+ */
+ set_rel_pathlist(root, childrel, childRTindex, childRTE);
+
+ /*
+ * If child is dummy, ignore it.
+ */
+ if (IS_DUMMY_REL(childrel))
+ continue;
+
+ /*
+ * Child is live, so add its cheapest access path to the Append path
+ * we are constructing for the parent.
+ */
+ subpaths = accumulate_append_subpath(subpaths,
+ childrel->cheapest_total_path);
+
+ /* Remember which childrels are live, for logic below */
+ live_childrels = lappend(live_childrels, childrel);
+
+ /*
+ * Collect lists of all the available path orderings and
+ * parameterizations for all the children. We use these as a
+ * heuristic to indicate which sort orderings and parameterizations we
+ * should build Append and MergeAppend paths for.
+ */
+ foreach(lcp, childrel->pathlist)
+ {
+ Path *childpath = (Path *) lfirst(lcp);
+ List *childkeys = childpath->pathkeys;
+ Relids childouter = childpath->required_outer;
+
+ /* Unsorted paths don't contribute to pathkey list */
+ if (childkeys != NIL)
+ {
+ ListCell *lpk;
+ bool found = false;
+
+ /* Have we already seen this ordering? */
+ foreach(lpk, all_child_pathkeys)
+ {
+ List *existing_pathkeys = (List *) lfirst(lpk);
+
+ if (compare_pathkeys(existing_pathkeys,
+ childkeys) == PATHKEYS_EQUAL)
+ {
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ {
+ /* No, so add it to all_child_pathkeys */
+ all_child_pathkeys = lappend(all_child_pathkeys,
+ childkeys);
+ }
+ }
+
+ /* Unparameterized paths don't contribute to param-set list */
+ if (childouter)
+ {
+ ListCell *lco;
+ bool found = false;
+
+ /* Have we already seen this param set? */
+ foreach(lco, all_child_outers)
+ {
+ Relids existing_outers = (Relids) lfirst(lco);
+
+ if (bms_equal(existing_outers, childouter))
+ {
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ {
+ /* No, so add it to all_child_outers */
+ all_child_outers = lappend(all_child_outers,
+ childouter);
+ }
+ }
+ }
+ }
+
+ /*
+ * Next, build an unordered, unparameterized Append path for the rel.
+ * (Note: this is correct even if we have zero or one live subpath due to
+ * constraint exclusion.)
*/
add_path(rel, (Path *) create_append_path(rel, subpaths));
/*
- * Next, build MergeAppend paths based on the collected list of child
- * pathkeys. We consider both cheapest-startup and cheapest-total cases,
- * ie, for each interesting ordering, collect all the cheapest startup
- * subpaths and all the cheapest total paths, and build a MergeAppend path
- * for each list.
+ * Build unparameterized MergeAppend paths based on the collected list of
+ * child pathkeys.
+ */
+ generate_mergeappend_paths(root, rel, live_childrels,
+ all_child_pathkeys, NULL);
+
+ /*
+ * Build Append and MergeAppend paths for each parameterization seen
+ * among the child rels. (This may look pretty expensive, but in most
+ * cases of practical interest, the child relations will tend to expose
+ * the same parameterizations and pathkeys, so that not that many cases
+ * actually get considered here.)
*/
- foreach(l, all_child_pathkeys)
+ foreach(l, all_child_outers)
+ {
+ Relids required_outer = (Relids) lfirst(l);
+ ListCell *lcr;
+
+ /* Select the child paths for an Append with this parameterization */
+ subpaths = NIL;
+ foreach(lcr, live_childrels)
+ {
+ RelOptInfo *childrel = (RelOptInfo *) lfirst(lcr);
+ Path *cheapest_total;
+
+ cheapest_total =
+ get_cheapest_path_for_pathkeys(childrel->pathlist,
+ NIL,
+ required_outer,
+ TOTAL_COST);
+ Assert(cheapest_total != NULL);
+
+ subpaths = accumulate_append_subpath(subpaths, cheapest_total);
+ }
+ add_path(rel, (Path *) create_append_path(rel, subpaths));
+
+ /* And build parameterized MergeAppend paths */
+ generate_mergeappend_paths(root, rel, live_childrels,
+ all_child_pathkeys, required_outer);
+ }
+
+ /* Select cheapest paths */
+ set_cheapest(rel);
+}
+
+/*
+ * generate_mergeappend_paths
+ * Generate MergeAppend paths for an append relation
+ *
+ * Generate a path for each ordering (pathkey list) appearing in
+ * all_child_pathkeys. If required_outer isn't NULL, accept paths having
+ * those relations as required outer relations.
+ *
+ * We consider both cheapest-startup and cheapest-total cases, ie, for each
+ * interesting ordering, collect all the cheapest startup subpaths and all the
+ * cheapest total paths, and build a MergeAppend path for each case.
+ */
+static void
+generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel,
+ List *live_childrels,
+ List *all_child_pathkeys,
+ Relids required_outer)
+{
+ ListCell *lcp;
+
+ foreach(lcp, all_child_pathkeys)
{
- List *pathkeys = (List *) lfirst(l);
+ List *pathkeys = (List *) lfirst(lcp);
List *startup_subpaths = NIL;
List *total_subpaths = NIL;
bool startup_neq_total = false;
cheapest_startup =
get_cheapest_path_for_pathkeys(childrel->pathlist,
pathkeys,
+ required_outer,
STARTUP_COST);
cheapest_total =
get_cheapest_path_for_pathkeys(childrel->pathlist,
pathkeys,
+ required_outer,
TOTAL_COST);
/*
- * If we can't find any paths with the right order just add the
- * cheapest-total path; we'll have to sort it.
+ * If we can't find any paths with the right order just use the
+ * cheapest-total path; we'll have to sort it later. We can
+ * use the cheapest path for the parameterization, though.
*/
- if (cheapest_startup == NULL)
- cheapest_startup = childrel->cheapest_total_path;
- if (cheapest_total == NULL)
- cheapest_total = childrel->cheapest_total_path;
+ if (cheapest_startup == NULL || cheapest_total == NULL)
+ {
+ if (required_outer)
+ cheapest_startup = cheapest_total =
+ get_cheapest_path_for_pathkeys(childrel->pathlist,
+ NIL,
+ required_outer,
+ TOTAL_COST);
+ else
+ cheapest_startup = cheapest_total =
+ childrel->cheapest_total_path;
+ Assert(cheapest_total != NULL);
+ }
/*
* Notice whether we actually have different paths for the
total_subpaths,
pathkeys));
}
-
- /* Select cheapest path */
- set_cheapest(rel);
}
/*
* Build a dummy path for a relation that's been excluded by constraints
*
* Rather than inventing a special "dummy" path type, we represent this as an
- * AppendPath with no members (see also IS_DUMMY_PATH macro).
+ * AppendPath with no members (see also IS_DUMMY_PATH/IS_DUMMY_REL macros).
*/
static void
set_dummy_rel_pathlist(RelOptInfo *rel)
rel->rows = 0;
rel->width = 0;
+ /* Discard any pre-existing paths; no further need for them */
+ rel->pathlist = NIL;
+
add_path(rel, (Path *) create_append_path(rel, NIL));
/* Select cheapest path (pretty easy in this case...) */
/*
* set_subquery_pathlist
* Build the (single) access path for a subquery RTE
+ *
+ * There's no need for a separate set_subquery_size phase, since we don't
+ * support parameterized paths for subqueries.
*/
static void
set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
static void
set_function_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
{
- /* Mark rel with estimated output rows, width, etc */
- set_function_size_estimates(root, rel);
-
/* Generate appropriate path */
add_path(rel, create_functionscan_path(root, rel));
static void
set_values_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
{
- /* Mark rel with estimated output rows, width, etc */
- set_values_size_estimates(root, rel);
-
/* Generate appropriate path */
add_path(rel, create_valuesscan_path(root, rel));
/*
* set_cte_pathlist
* Build the (single) access path for a non-self-reference CTE RTE
+ *
+ * There's no need for a separate set_cte_size phase, since we don't
+ * support parameterized paths for CTEs.
*/
static void
set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
/*
* set_worktable_pathlist
* Build the (single) access path for a self-reference CTE RTE
+ *
+ * There's no need for a separate set_worktable_size phase, since we don't
+ * support parameterized paths for CTEs.
*/
static void
set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
set_cheapest(rel);
}
-/*
- * set_foreign_pathlist
- * Build the (single) access path for a foreign table RTE
- */
-static void
-set_foreign_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
-{
- /* Mark rel with estimated output rows, width, etc */
- set_foreign_size_estimates(root, rel);
-
- /* Generate appropriate path */
- add_path(rel, (Path *) create_foreignscan_path(root, rel));
-
- /* Select cheapest path (pretty easy in this case...) */
- set_cheapest(rel);
-}
-
/*
* make_rel_from_joinlist
* Build access paths using a "joinlist" to guide the join path search.
* path's result. A caller can estimate the cost of fetching a partial
* result by interpolating between startup_cost and total_cost. In detail:
* actual_cost = startup_cost +
- * (total_cost - startup_cost) * tuples_to_fetch / path->parent->rows;
+ * (total_cost - startup_cost) * tuples_to_fetch / path->rows;
* Note that a base relation's rows count (and, by extension, plan_rows for
* plan nodes below the LIMIT node) are set without regard to any LIMIT, so
* that this equation works properly. (Also, these routines guarantee not to
* applied as a top-level plan node.
*
* For largely historical reasons, most of the routines in this module use
- * the passed result Path only to store their startup_cost and total_cost
- * results into. All the input data they need is passed as separate
+ * the passed result Path only to store their results (rows, startup_cost and
+ * total_cost) into. All the input data they need is passed as separate
* parameters, even though much of it could be extracted from the Path.
* An exception is made for the cost_XXXjoin() routines, which expect all
- * the non-cost fields of the passed XXXPath to be filled in, and similarly
+ * the other fields of the passed XXXPath to be filled in, and similarly
* cost_index() assumes the passed IndexPath is valid except for its output
* values.
*
#define LOG2(x) (log(x) / 0.693147180559945)
-/*
- * Some Paths return less than the nominal number of rows of their parent
- * relations; join nodes need to do this to get the correct input count:
- */
-#define PATH_ROWS(path) \
- (IsA(path, UniquePath) ? \
- ((UniquePath *) (path))->rows : \
- (path)->parent->rows)
-
double seq_page_cost = DEFAULT_SEQ_PAGE_COST;
double random_page_cost = DEFAULT_RANDOM_PAGE_COST;
static void cost_rescan(PlannerInfo *root, Path *path,
Cost *rescan_startup_cost, Cost *rescan_total_cost);
static bool cost_qual_eval_walker(Node *node, cost_qual_eval_context *context);
-static bool adjust_semi_join(PlannerInfo *root, JoinPath *path,
- SpecialJoinInfo *sjinfo,
- Selectivity *outer_match_frac,
- Selectivity *match_count,
- bool *indexed_join_quals);
+static bool has_indexed_join_quals(NestPath *path, List *joinclauses);
static double approx_tuple_count(PlannerInfo *root, JoinPath *path,
List *quals);
+static void set_joinpath_size_estimate(PlannerInfo *root, JoinPath *path,
+ SpecialJoinInfo *sjinfo,
+ List *restrictlist);
+static double calc_joinrel_size_estimate(PlannerInfo *root,
+ double outer_rows,
+ double inner_rows,
+ SpecialJoinInfo *sjinfo,
+ List *restrictlist);
static void set_rel_width(PlannerInfo *root, RelOptInfo *rel);
static double relation_byte_size(double tuples, int width);
static double page_size(double tuples, int width);
Assert(baserel->relid > 0);
Assert(baserel->rtekind == RTE_RELATION);
+ /* For now, at least, seqscans are never parameterized */
+ path->rows = baserel->rows;
+
if (!enable_seqscan)
startup_cost += disable_cost;
*
* 'path' describes the indexscan under consideration, and is complete
* except for the fields to be set by this routine
- * 'outer_rel' is the outer relation when we are considering using the index
- * scan as the inside of a nestloop join (hence, some of the indexquals
- * are join clauses, and we should expect repeated scans of the index);
- * NULL for a plain index scan
+ * 'loop_count' is the number of repetitions of the indexscan to factor into
+ * estimates of caching behavior
*
- * In addition to startup_cost and total_cost, cost_index() sets the path's
- * indextotalcost and indexselectivity fields. These values are needed if
- * the IndexPath is used in a BitmapIndexScan.
+ * In addition to rows, startup_cost and total_cost, cost_index() sets the
+ * path's indextotalcost and indexselectivity fields. These values will be
+ * needed if the IndexPath is used in a BitmapIndexScan.
*
* NOTE: path->indexquals must contain only clauses usable as index
* restrictions. Any additional quals evaluated as qpquals may reduce the
* we have to fetch from the table, so they don't reduce the scan cost.
*/
void
-cost_index(IndexPath *path, PlannerInfo *root, RelOptInfo *outer_rel)
+cost_index(IndexPath *path, PlannerInfo *root, double loop_count)
{
IndexOptInfo *index = path->indexinfo;
RelOptInfo *baserel = index->rel;
Assert(baserel->relid > 0);
Assert(baserel->rtekind == RTE_RELATION);
+ /* Estimate the number of rows returned by the indexscan */
+ if (path->path.required_outer)
+ {
+ /*
+ * The estimate should be less than baserel->rows because of the
+ * additional selectivity of the join clauses. Since indexclauses may
+ * contain both restriction and join clauses, we have to do a set
+ * union to get the full set of clauses that must be considered to
+ * compute the correct selectivity. (Without the union operation, we
+ * might have some restriction clauses appearing twice, which'd
+ * mislead clauselist_selectivity into double-counting their
+ * selectivity. However, since RestrictInfo nodes aren't copied when
+ * linking them into different lists, it should be sufficient to use
+ * pointer comparison to remove duplicates.)
+ *
+ * Note that we force the clauses to be treated as non-join clauses
+ * during selectivity estimation.
+ */
+ List *allclauses;
+
+ allclauses = list_union_ptr(baserel->baserestrictinfo,
+ path->indexclauses);
+ path->path.rows = baserel->tuples *
+ clauselist_selectivity(root,
+ allclauses,
+ baserel->relid, /* do not use 0! */
+ JOIN_INNER,
+ NULL);
+ if (path->path.rows > baserel->rows)
+ path->path.rows = baserel->rows;
+ path->path.rows = clamp_row_est(path->path.rows);
+ }
+ else
+ {
+ /*
+ * The number of rows is the same as the parent rel's estimate, since
+ * this isn't a parameterized path.
+ */
+ path->path.rows = baserel->rows;
+ }
+
if (!enable_indexscan)
startup_cost += disable_cost;
/* we don't need to check enable_indexonlyscan; indxpath.c does that */
OidFunctionCall7(index->amcostestimate,
PointerGetDatum(root),
PointerGetDatum(path),
- PointerGetDatum(outer_rel),
+ Float8GetDatum(loop_count),
PointerGetDatum(&indexStartupCost),
PointerGetDatum(&indexTotalCost),
PointerGetDatum(&indexSelectivity),
* that this query will fetch; but it's not clear how to do better.
*----------
*/
- if (outer_rel != NULL && outer_rel->rows > 1)
+ if (loop_count > 1)
{
/*
* For repeated indexscans, the appropriate estimate for the
* pro-rate the costs for one scan. In this case we assume all the
* fetches are random accesses.
*/
- double num_scans = outer_rel->rows;
-
- pages_fetched = index_pages_fetched(tuples_fetched * num_scans,
+ pages_fetched = index_pages_fetched(tuples_fetched * loop_count,
baserel->pages,
(double) index->pages,
root);
if (indexonly)
pages_fetched = ceil(pages_fetched * (1.0 - baserel->allvisfrac));
- max_IO_cost = (pages_fetched * spc_random_page_cost) / num_scans;
+ max_IO_cost = (pages_fetched * spc_random_page_cost) / loop_count;
/*
* In the perfectly correlated case, the number of pages touched by
*/
pages_fetched = ceil(indexSelectivity * (double) baserel->pages);
- pages_fetched = index_pages_fetched(pages_fetched * num_scans,
+ pages_fetched = index_pages_fetched(pages_fetched * loop_count,
baserel->pages,
(double) index->pages,
root);
if (indexonly)
pages_fetched = ceil(pages_fetched * (1.0 - baserel->allvisfrac));
- min_IO_cost = (pages_fetched * spc_random_page_cost) / num_scans;
+ min_IO_cost = (pages_fetched * spc_random_page_cost) / loop_count;
}
else
{
startup_cost += baserel->baserestrictcost.startup;
cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost.per_tuple;
- if (outer_rel == NULL)
+ if (path->path.required_outer == NULL)
{
QualCost index_qual_cost;
*
* 'baserel' is the relation to be scanned
* 'bitmapqual' is a tree of IndexPaths, BitmapAndPaths, and BitmapOrPaths
- * 'outer_rel' is the outer relation when we are considering using the bitmap
- * scan as the inside of a nestloop join (hence, some of the indexquals
- * are join clauses, and we should expect repeated scans of the table);
- * NULL for a plain bitmap scan
+ * 'loop_count' is the number of repetitions of the indexscan to factor into
+ * estimates of caching behavior
*
- * Note: if this is a join inner path, the component IndexPaths in bitmapqual
- * should have been costed accordingly.
+ * Note: the component IndexPaths in bitmapqual should have been costed
+ * using the same loop_count.
*/
void
cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
- Path *bitmapqual, RelOptInfo *outer_rel)
+ Path *bitmapqual, double loop_count)
{
Cost startup_cost = 0;
Cost run_cost = 0;
Assert(baserel->relid > 0);
Assert(baserel->rtekind == RTE_RELATION);
+ /* Estimate the number of rows returned by the bitmap scan */
+ if (path->required_outer)
+ {
+ /*
+ * The estimate should be less than baserel->rows because of the
+ * additional selectivity of the join clauses. We make use of the
+ * selectivity estimated for the bitmap to do this; this isn't really
+ * quite right since there may be restriction conditions not included
+ * in the bitmap ...
+ */
+ Cost indexTotalCost;
+ Selectivity indexSelectivity;
+
+ cost_bitmap_tree_node(bitmapqual, &indexTotalCost, &indexSelectivity);
+ path->rows = baserel->tuples * indexSelectivity;
+ if (path->rows > baserel->rows)
+ path->rows = baserel->rows;
+ path->rows = clamp_row_est(path->rows);
+ }
+ else
+ {
+ /*
+ * The number of rows is the same as the parent rel's estimate, since
+ * this isn't a parameterized path.
+ */
+ path->rows = baserel->rows;
+ }
+
if (!enable_bitmapscan)
startup_cost += disable_cost;
T = (baserel->pages > 1) ? (double) baserel->pages : 1.0;
- if (outer_rel != NULL && outer_rel->rows > 1)
+ if (loop_count > 1)
{
/*
* For repeated bitmap scans, scale up the number of tuples fetched in
* estimate the number of pages fetched by all the scans. Then
* pro-rate for one scan.
*/
- double num_scans = outer_rel->rows;
-
- pages_fetched = index_pages_fetched(tuples_fetched * num_scans,
+ pages_fetched = index_pages_fetched(tuples_fetched * loop_count,
baserel->pages,
get_indexpath_pages(bitmapqual),
root);
- pages_fetched /= num_scans;
+ pages_fetched /= loop_count;
}
else
{
* scan doesn't look to be the same cost as an indexscan to retrieve a
* single tuple.
*/
- *cost += 0.1 * cpu_operator_cost * ((IndexPath *) path)->rows;
+ *cost += 0.1 * cpu_operator_cost * path->rows;
}
else if (IsA(path, BitmapAndPath))
{
* Note that this considers only the costs of index scanning and bitmap
* creation, not the eventual heap access. In that sense the object isn't
* truly a Path, but it has enough path-like properties (costs in particular)
- * to warrant treating it as one.
+ * to warrant treating it as one. We don't bother to set the path rows field,
+ * however.
*/
void
cost_bitmap_and_node(BitmapAndPath *path, PlannerInfo *root)
totalCost += 100.0 * cpu_operator_cost;
}
path->bitmapselectivity = selec;
+ path->path.rows = 0; /* per above, not used */
path->path.startup_cost = totalCost;
path->path.total_cost = totalCost;
}
totalCost += 100.0 * cpu_operator_cost;
}
path->bitmapselectivity = Min(selec, 1.0);
+ path->path.rows = 0; /* per above, not used */
path->path.startup_cost = totalCost;
path->path.total_cost = totalCost;
}
Assert(baserel->relid > 0);
Assert(baserel->rtekind == RTE_RELATION);
+ /* For now, tidscans are never parameterized */
+ path->rows = baserel->rows;
+
/* Count how many tuples we expect to retrieve */
ntuples = 0;
foreach(l, tidquals)
Assert(baserel->relid > 0);
Assert(baserel->rtekind == RTE_SUBQUERY);
+ /* subqueryscans are never parameterized */
+ path->rows = baserel->rows;
+
/*
* Cost of path is cost of evaluating the subplan, plus cost of evaluating
* any restriction clauses that will be attached to the SubqueryScan node,
rte = planner_rt_fetch(baserel->relid, root);
Assert(rte->rtekind == RTE_FUNCTION);
+ /* functionscans are never parameterized */
+ path->rows = baserel->rows;
+
/*
* Estimate costs of executing the function expression.
*
Assert(baserel->relid > 0);
Assert(baserel->rtekind == RTE_VALUES);
+ /* valuesscans are never parameterized */
+ path->rows = baserel->rows;
+
/*
* For now, estimate list evaluation cost at one operator eval per list
* (probably pretty bogus, but is it worth being smarter?)
Assert(baserel->relid > 0);
Assert(baserel->rtekind == RTE_CTE);
+ /* ctescans are never parameterized */
+ path->rows = baserel->rows;
+
/* Charge one CPU tuple cost per row for tuplestore manipulation */
cpu_per_tuple = cpu_tuple_cost;
if (!enable_sort)
startup_cost += disable_cost;
+ path->rows = tuples;
+
/*
* We want to be sure the cost of a sort is never estimated as zero, even
* if passed-in tuple count is zero. Besides, mustn't do log(0)...
double nbytes = relation_byte_size(tuples, width);
long work_mem_bytes = work_mem * 1024L;
+ path->rows = tuples;
+
/*
* Whether spilling or not, charge 2x cpu_operator_cost per tuple to
* reflect bookkeeping overhead. (This rate must be more than what
Cost input_startup_cost, Cost input_total_cost,
double input_tuples)
{
+ double output_tuples;
Cost startup_cost;
Cost total_cost;
AggClauseCosts dummy_aggcosts;
startup_cost += aggcosts->finalCost;
/* we aren't grouping */
total_cost = startup_cost + cpu_tuple_cost;
+ output_tuples = 1;
}
else if (aggstrategy == AGG_SORTED)
{
total_cost += (cpu_operator_cost * numGroupCols) * input_tuples;
total_cost += aggcosts->finalCost * numGroups;
total_cost += cpu_tuple_cost * numGroups;
+ output_tuples = numGroups;
}
else
{
total_cost = startup_cost;
total_cost += aggcosts->finalCost * numGroups;
total_cost += cpu_tuple_cost * numGroups;
+ output_tuples = numGroups;
}
+ path->rows = output_tuples;
path->startup_cost = startup_cost;
path->total_cost = total_cost;
}
total_cost += cpu_operator_cost * (numPartCols + numOrderCols) * input_tuples;
total_cost += cpu_tuple_cost * input_tuples;
+ path->rows = input_tuples;
path->startup_cost = startup_cost;
path->total_cost = total_cost;
}
*/
total_cost += cpu_operator_cost * input_tuples * numGroupCols;
+ path->rows = numGroups;
path->startup_cost = startup_cost;
path->total_cost = total_cost;
}
/*
- * If a nestloop's inner path is an indexscan, be sure to use its estimated
- * output row count, which may be lower than the restriction-clause-only row
- * count of its parent. (We don't include this case in the PATH_ROWS macro
- * because it applies *only* to a nestloop's inner relation.) We have to
- * be prepared to recurse through Append or MergeAppend nodes in case of an
- * appendrel. (It's not clear MergeAppend can be seen here, but we may as
- * well handle it if so.)
- */
-static double
-nestloop_inner_path_rows(Path *path)
-{
- double result;
-
- if (IsA(path, IndexPath))
- result = ((IndexPath *) path)->rows;
- else if (IsA(path, BitmapHeapPath))
- result = ((BitmapHeapPath *) path)->rows;
- else if (IsA(path, AppendPath))
- {
- ListCell *l;
-
- result = 0;
- foreach(l, ((AppendPath *) path)->subpaths)
- {
- result += nestloop_inner_path_rows((Path *) lfirst(l));
- }
- }
- else if (IsA(path, MergeAppendPath))
- {
- ListCell *l;
-
- result = 0;
- foreach(l, ((MergeAppendPath *) path)->subpaths)
- {
- result += nestloop_inner_path_rows((Path *) lfirst(l));
- }
- }
- else
- result = PATH_ROWS(path);
-
- return result;
-}
-
-/*
- * cost_nestloop
- * Determines and returns the cost of joining two relations using the
- * nested loop algorithm.
- *
- * 'path' is already filled in except for the cost fields
+ * initial_cost_nestloop
+ * Preliminary estimate of the cost of a nestloop join path.
+ *
+ * This must quickly produce lower-bound estimates of the path's startup and
+ * total costs. If we are unable to eliminate the proposed path from
+ * consideration using the lower bounds, final_cost_nestloop will be called
+ * to obtain the final estimates.
+ *
+ * The exact division of labor between this function and final_cost_nestloop
+ * is private to them, and represents a tradeoff between speed of the initial
+ * estimate and getting a tight lower bound. We choose to not examine the
+ * join quals here, since that's by far the most expensive part of the
+ * calculations. The end result is that CPU-cost considerations must be
+ * left for the second phase.
+ *
+ * 'workspace' is to be filled with startup_cost, total_cost, and perhaps
+ * other data to be used by final_cost_nestloop
+ * 'jointype' is the type of join to be performed
+ * 'outer_path' is the outer input to the join
+ * 'inner_path' is the inner input to the join
* 'sjinfo' is extra info about the join for selectivity estimation
+ * 'semifactors' contains valid data if jointype is SEMI or ANTI
*/
void
-cost_nestloop(NestPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
+initial_cost_nestloop(PlannerInfo *root, JoinCostWorkspace *workspace,
+ JoinType jointype,
+ Path *outer_path, Path *inner_path,
+ SpecialJoinInfo *sjinfo,
+ SemiAntiJoinFactors *semifactors)
{
- Path *outer_path = path->outerjoinpath;
- Path *inner_path = path->innerjoinpath;
Cost startup_cost = 0;
Cost run_cost = 0;
+ double outer_path_rows = outer_path->rows;
Cost inner_rescan_start_cost;
Cost inner_rescan_total_cost;
Cost inner_run_cost;
Cost inner_rescan_run_cost;
- Cost cpu_per_tuple;
- QualCost restrict_qual_cost;
- double outer_path_rows = PATH_ROWS(outer_path);
- double inner_path_rows = nestloop_inner_path_rows(inner_path);
- double ntuples;
- Selectivity outer_match_frac;
- Selectivity match_count;
- bool indexed_join_quals;
-
- if (!enable_nestloop)
- startup_cost += disable_cost;
/* estimate costs to rescan the inner relation */
cost_rescan(root, inner_path,
inner_run_cost = inner_path->total_cost - inner_path->startup_cost;
inner_rescan_run_cost = inner_rescan_total_cost - inner_rescan_start_cost;
- if (adjust_semi_join(root, path, sjinfo,
- &outer_match_frac,
- &match_count,
- &indexed_join_quals))
+ if (jointype == JOIN_SEMI || jointype == JOIN_ANTI)
{
double outer_matched_rows;
Selectivity inner_scan_frac;
*/
run_cost += inner_run_cost;
- outer_matched_rows = rint(outer_path_rows * outer_match_frac);
- inner_scan_frac = 2.0 / (match_count + 1.0);
+ outer_matched_rows = rint(outer_path_rows * semifactors->outer_match_frac);
+ inner_scan_frac = 2.0 / (semifactors->match_count + 1.0);
/* Add inner run cost for additional outer tuples having matches */
if (outer_matched_rows > 1)
run_cost += (outer_matched_rows - 1) * inner_rescan_run_cost * inner_scan_frac;
+ /*
+ * The cost of processing unmatched rows varies depending on the
+ * details of the joinclauses, so we leave that part for later.
+ */
+
+ /* Save private data for final_cost_nestloop */
+ workspace->outer_matched_rows = outer_matched_rows;
+ workspace->inner_scan_frac = inner_scan_frac;
+ }
+ else
+ {
+ /* Normal case; we'll scan whole input rel for each outer row */
+ run_cost += inner_run_cost;
+ if (outer_path_rows > 1)
+ run_cost += (outer_path_rows - 1) * inner_rescan_run_cost;
+ }
+
+ /* CPU costs left for later */
+
+ /* Public result fields */
+ workspace->startup_cost = startup_cost;
+ workspace->total_cost = startup_cost + run_cost;
+ /* Save private data for final_cost_nestloop */
+ workspace->run_cost = run_cost;
+ workspace->inner_rescan_run_cost = inner_rescan_run_cost;
+}
+
+/*
+ * final_cost_nestloop
+ * Final estimate of the cost and result size of a nestloop join path.
+ *
+ * 'path' is already filled in except for the rows and cost fields
+ * 'workspace' is the result from initial_cost_nestloop
+ * 'sjinfo' is extra info about the join for selectivity estimation
+ * 'semifactors' contains valid data if path->jointype is SEMI or ANTI
+ */
+void
+final_cost_nestloop(PlannerInfo *root, NestPath *path,
+ JoinCostWorkspace *workspace,
+ SpecialJoinInfo *sjinfo,
+ SemiAntiJoinFactors *semifactors)
+{
+ Path *outer_path = path->outerjoinpath;
+ Path *inner_path = path->innerjoinpath;
+ double outer_path_rows = outer_path->rows;
+ double inner_path_rows = inner_path->rows;
+ Cost startup_cost = workspace->startup_cost;
+ Cost run_cost = workspace->run_cost;
+ Cost inner_rescan_run_cost = workspace->inner_rescan_run_cost;
+ Cost cpu_per_tuple;
+ List *joinclauses;
+ QualCost restrict_qual_cost;
+ double ntuples;
+
+ /* Estimate the number of rows returned by the join */
+ if (path->path.required_outer)
+ {
+ /*
+ * The nestloop is (still) parameterized because of upper-level join
+ * clauses used by the input paths. So the rowcount estimate should
+ * be less than the joinrel's row count because of the additional
+ * selectivity of those join clauses. To estimate the size we need
+ * to know which of the joinrestrictinfo clauses nominally associated
+ * with the join have been applied in the inner input path.
+ *
+ * We should also assume that such clauses won't be evaluated at the
+ * join node at runtime, so exclude them from restrict_qual_cost.
+ */
+ joinclauses = select_nonredundant_join_clauses(path->joinrestrictinfo,
+ path->innerjoinpath->param_clauses);
+ set_joinpath_size_estimate(root, path, sjinfo, joinclauses);
+ }
+ else
+ {
+ joinclauses = path->joinrestrictinfo;
+ path->path.rows = path->path.parent->rows;
+ }
+
+ /*
+ * We could include disable_cost in the preliminary estimate, but that
+ * would amount to optimizing for the case where the join method is
+ * disabled, which doesn't seem like the way to bet.
+ */
+ if (!enable_nestloop)
+ startup_cost += disable_cost;
+
+ /* cost of source data */
+
+ if (path->jointype == JOIN_SEMI || path->jointype == JOIN_ANTI)
+ {
+ double outer_matched_rows = workspace->outer_matched_rows;
+ Selectivity inner_scan_frac = workspace->inner_scan_frac;
+
+ /*
+ * SEMI or ANTI join: executor will stop after first match.
+ */
+
/* Compute number of tuples processed (not number emitted!) */
ntuples = outer_matched_rows * inner_path_rows * inner_scan_frac;
* return the first tuple of a nonempty scan. Otherwise, the executor
* will have to scan the whole inner rel; not so cheap.
*/
- if (indexed_join_quals)
+ if (has_indexed_join_quals(path, joinclauses))
{
run_cost += (outer_path_rows - outer_matched_rows) *
inner_rescan_run_cost / inner_path_rows;
}
else
{
- /* Normal case; we'll scan whole input rel for each outer row */
- run_cost += inner_run_cost;
- if (outer_path_rows > 1)
- run_cost += (outer_path_rows - 1) * inner_rescan_run_cost;
+ /* Normal-case source costs were included in preliminary estimate */
/* Compute number of tuples processed (not number emitted!) */
ntuples = outer_path_rows * inner_path_rows;
}
/* CPU costs */
- cost_qual_eval(&restrict_qual_cost, path->joinrestrictinfo, root);
+ cost_qual_eval(&restrict_qual_cost, joinclauses, root);
startup_cost += restrict_qual_cost.startup;
cpu_per_tuple = cpu_tuple_cost + restrict_qual_cost.per_tuple;
run_cost += cpu_per_tuple * ntuples;
}
/*
- * cost_mergejoin
- * Determines and returns the cost of joining two relations using the
- * merge join algorithm.
- *
- * Unlike other costsize functions, this routine makes one actual decision:
- * whether we should materialize the inner path. We do that either because
- * the inner path can't support mark/restore, or because it's cheaper to
- * use an interposed Material node to handle mark/restore. When the decision
- * is cost-based it would be logically cleaner to build and cost two separate
- * paths with and without that flag set; but that would require repeating most
- * of the calculations here, which are not all that cheap. Since the choice
- * will not affect output pathkeys or startup cost, only total cost, there is
- * no possibility of wanting to keep both paths. So it seems best to make
- * the decision here and record it in the path's materialize_inner field.
- *
- * 'path' is already filled in except for the cost fields and materialize_inner
+ * initial_cost_mergejoin
+ * Preliminary estimate of the cost of a mergejoin path.
+ *
+ * This must quickly produce lower-bound estimates of the path's startup and
+ * total costs. If we are unable to eliminate the proposed path from
+ * consideration using the lower bounds, final_cost_mergejoin will be called
+ * to obtain the final estimates.
+ *
+ * The exact division of labor between this function and final_cost_mergejoin
+ * is private to them, and represents a tradeoff between speed of the initial
+ * estimate and getting a tight lower bound. We choose to not examine the
+ * join quals here, except for obtaining the scan selectivity estimate which
+ * is really essential (but fortunately, use of caching keeps the cost of
+ * getting that down to something reasonable).
+ * We also assume that cost_sort is cheap enough to use here.
+ *
+ * 'workspace' is to be filled with startup_cost, total_cost, and perhaps
+ * other data to be used by final_cost_mergejoin
+ * 'jointype' is the type of join to be performed
+ * 'mergeclauses' is the list of joinclauses to be used as merge clauses
+ * 'outer_path' is the outer input to the join
+ * 'inner_path' is the inner input to the join
+ * 'outersortkeys' is the list of sort keys for the outer path
+ * 'innersortkeys' is the list of sort keys for the inner path
* 'sjinfo' is extra info about the join for selectivity estimation
*
- * Notes: path's mergeclauses should be a subset of the joinrestrictinfo list;
- * outersortkeys and innersortkeys are lists of the keys to be used
- * to sort the outer and inner relations, or NIL if no explicit
- * sort is needed because the source path is already ordered.
+ * Note: outersortkeys and innersortkeys should be NIL if no explicit
+ * sort is needed because the respective source path is already ordered.
*/
void
-cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
+initial_cost_mergejoin(PlannerInfo *root, JoinCostWorkspace *workspace,
+ JoinType jointype,
+ List *mergeclauses,
+ Path *outer_path, Path *inner_path,
+ List *outersortkeys, List *innersortkeys,
+ SpecialJoinInfo *sjinfo)
{
- Path *outer_path = path->jpath.outerjoinpath;
- Path *inner_path = path->jpath.innerjoinpath;
- List *mergeclauses = path->path_mergeclauses;
- List *outersortkeys = path->outersortkeys;
- List *innersortkeys = path->innersortkeys;
Cost startup_cost = 0;
Cost run_cost = 0;
- Cost cpu_per_tuple,
- inner_run_cost,
- bare_inner_cost,
- mat_inner_cost;
- QualCost merge_qual_cost;
- QualCost qp_qual_cost;
- double outer_path_rows = PATH_ROWS(outer_path);
- double inner_path_rows = PATH_ROWS(inner_path);
+ double outer_path_rows = outer_path->rows;
+ double inner_path_rows = inner_path->rows;
+ Cost inner_run_cost;
double outer_rows,
inner_rows,
outer_skip_rows,
inner_skip_rows;
- double mergejointuples,
- rescannedtuples;
- double rescanratio;
Selectivity outerstartsel,
outerendsel,
innerstartsel,
if (inner_path_rows <= 0 || isnan(inner_path_rows))
inner_path_rows = 1;
- if (!enable_mergejoin)
- startup_cost += disable_cost;
-
- /*
- * Compute cost of the mergequals and qpquals (other restriction clauses)
- * separately.
- */
- cost_qual_eval(&merge_qual_cost, mergeclauses, root);
- cost_qual_eval(&qp_qual_cost, path->jpath.joinrestrictinfo, root);
- qp_qual_cost.startup -= merge_qual_cost.startup;
- qp_qual_cost.per_tuple -= merge_qual_cost.per_tuple;
-
- /*
- * Get approx # tuples passing the mergequals. We use approx_tuple_count
- * here because we need an estimate done with JOIN_INNER semantics.
- */
- mergejointuples = approx_tuple_count(root, &path->jpath, mergeclauses);
-
- /*
- * When there are equal merge keys in the outer relation, the mergejoin
- * must rescan any matching tuples in the inner relation. This means
- * re-fetching inner tuples; we have to estimate how often that happens.
- *
- * For regular inner and outer joins, the number of re-fetches can be
- * estimated approximately as size of merge join output minus size of
- * inner relation. Assume that the distinct key values are 1, 2, ..., and
- * denote the number of values of each key in the outer relation as m1,
- * m2, ...; in the inner relation, n1, n2, ... Then we have
- *
- * size of join = m1 * n1 + m2 * n2 + ...
- *
- * number of rescanned tuples = (m1 - 1) * n1 + (m2 - 1) * n2 + ... = m1 *
- * n1 + m2 * n2 + ... - (n1 + n2 + ...) = size of join - size of inner
- * relation
- *
- * This equation works correctly for outer tuples having no inner match
- * (nk = 0), but not for inner tuples having no outer match (mk = 0); we
- * are effectively subtracting those from the number of rescanned tuples,
- * when we should not. Can we do better without expensive selectivity
- * computations?
- *
- * The whole issue is moot if we are working from a unique-ified outer
- * input.
- */
- if (IsA(outer_path, UniquePath))
- rescannedtuples = 0;
- else
- {
- rescannedtuples = mergejointuples - inner_path_rows;
- /* Must clamp because of possible underestimate */
- if (rescannedtuples < 0)
- rescannedtuples = 0;
- }
- /* We'll inflate various costs this much to account for rescanning */
- rescanratio = 1.0 + (rescannedtuples / inner_path_rows);
-
/*
* A merge join will stop as soon as it exhausts either input stream
* (unless it's an outer join, in which case the outer side has to be
* mergejoinscansel() is a fairly expensive computation, we cache the
* results in the merge clause RestrictInfo.
*/
- if (mergeclauses && path->jpath.jointype != JOIN_FULL)
+ if (mergeclauses && jointype != JOIN_FULL)
{
RestrictInfo *firstclause = (RestrictInfo *) linitial(mergeclauses);
List *opathkeys;
innerstartsel = cache->leftstartsel;
innerendsel = cache->leftendsel;
}
- if (path->jpath.jointype == JOIN_LEFT ||
- path->jpath.jointype == JOIN_ANTI)
+ if (jointype == JOIN_LEFT ||
+ jointype == JOIN_ANTI)
{
outerstartsel = 0.0;
outerendsel = 1.0;
}
- else if (path->jpath.jointype == JOIN_RIGHT)
+ else if (jointype == JOIN_RIGHT)
{
innerstartsel = 0.0;
innerendsel = 1.0;
* (innerendsel - innerstartsel);
}
+ /*
+ * We can't yet determine whether rescanning occurs, or whether
+ * materialization of the inner input should be done. The minimum
+ * possible inner input cost, regardless of rescan and materialization
+ * considerations, is inner_run_cost. We include that in
+ * workspace->total_cost, but not yet in run_cost.
+ */
+
+ /* CPU costs left for later */
+
+ /* Public result fields */
+ workspace->startup_cost = startup_cost;
+ workspace->total_cost = startup_cost + run_cost + inner_run_cost;
+ /* Save private data for final_cost_mergejoin */
+ workspace->run_cost = run_cost;
+ workspace->inner_run_cost = inner_run_cost;
+ workspace->outer_rows = outer_rows;
+ workspace->inner_rows = inner_rows;
+ workspace->outer_skip_rows = outer_skip_rows;
+ workspace->inner_skip_rows = inner_skip_rows;
+}
+
+/*
+ * final_cost_mergejoin
+ * Final estimate of the cost and result size of a mergejoin path.
+ *
+ * Unlike other costsize functions, this routine makes one actual decision:
+ * whether we should materialize the inner path. We do that either because
+ * the inner path can't support mark/restore, or because it's cheaper to
+ * use an interposed Material node to handle mark/restore. When the decision
+ * is cost-based it would be logically cleaner to build and cost two separate
+ * paths with and without that flag set; but that would require repeating most
+ * of the cost calculations, which are not all that cheap. Since the choice
+ * will not affect output pathkeys or startup cost, only total cost, there is
+ * no possibility of wanting to keep both paths. So it seems best to make
+ * the decision here and record it in the path's materialize_inner field.
+ *
+ * 'path' is already filled in except for the rows and cost fields and
+ * materialize_inner
+ * 'workspace' is the result from initial_cost_mergejoin
+ * 'sjinfo' is extra info about the join for selectivity estimation
+ */
+void
+final_cost_mergejoin(PlannerInfo *root, MergePath *path,
+ JoinCostWorkspace *workspace,
+ SpecialJoinInfo *sjinfo)
+{
+ Path *outer_path = path->jpath.outerjoinpath;
+ Path *inner_path = path->jpath.innerjoinpath;
+ double inner_path_rows = inner_path->rows;
+ List *mergeclauses = path->path_mergeclauses;
+ List *innersortkeys = path->innersortkeys;
+ Cost startup_cost = workspace->startup_cost;
+ Cost run_cost = workspace->run_cost;
+ Cost inner_run_cost = workspace->inner_run_cost;
+ double outer_rows = workspace->outer_rows;
+ double inner_rows = workspace->inner_rows;
+ double outer_skip_rows = workspace->outer_skip_rows;
+ double inner_skip_rows = workspace->inner_skip_rows;
+ Cost cpu_per_tuple,
+ bare_inner_cost,
+ mat_inner_cost;
+ QualCost merge_qual_cost;
+ QualCost qp_qual_cost;
+ double mergejointuples,
+ rescannedtuples;
+ double rescanratio;
+
+ /* Protect some assumptions below that rowcounts aren't zero or NaN */
+ if (inner_path_rows <= 0 || isnan(inner_path_rows))
+ inner_path_rows = 1;
+
+ /* Estimate the number of rows returned by the join */
+ set_joinpath_size_estimate(root, &path->jpath, sjinfo,
+ path->jpath.joinrestrictinfo);
+
+ /*
+ * We could include disable_cost in the preliminary estimate, but that
+ * would amount to optimizing for the case where the join method is
+ * disabled, which doesn't seem like the way to bet.
+ */
+ if (!enable_mergejoin)
+ startup_cost += disable_cost;
+
+ /*
+ * Compute cost of the mergequals and qpquals (other restriction clauses)
+ * separately.
+ */
+ cost_qual_eval(&merge_qual_cost, mergeclauses, root);
+ cost_qual_eval(&qp_qual_cost, path->jpath.joinrestrictinfo, root);
+ qp_qual_cost.startup -= merge_qual_cost.startup;
+ qp_qual_cost.per_tuple -= merge_qual_cost.per_tuple;
+
+ /*
+ * Get approx # tuples passing the mergequals. We use approx_tuple_count
+ * here because we need an estimate done with JOIN_INNER semantics.
+ */
+ mergejointuples = approx_tuple_count(root, &path->jpath, mergeclauses);
+
+ /*
+ * When there are equal merge keys in the outer relation, the mergejoin
+ * must rescan any matching tuples in the inner relation. This means
+ * re-fetching inner tuples; we have to estimate how often that happens.
+ *
+ * For regular inner and outer joins, the number of re-fetches can be
+ * estimated approximately as size of merge join output minus size of
+ * inner relation. Assume that the distinct key values are 1, 2, ..., and
+ * denote the number of values of each key in the outer relation as m1,
+ * m2, ...; in the inner relation, n1, n2, ... Then we have
+ *
+ * size of join = m1 * n1 + m2 * n2 + ...
+ *
+ * number of rescanned tuples = (m1 - 1) * n1 + (m2 - 1) * n2 + ... = m1 *
+ * n1 + m2 * n2 + ... - (n1 + n2 + ...) = size of join - size of inner
+ * relation
+ *
+ * This equation works correctly for outer tuples having no inner match
+ * (nk = 0), but not for inner tuples having no outer match (mk = 0); we
+ * are effectively subtracting those from the number of rescanned tuples,
+ * when we should not. Can we do better without expensive selectivity
+ * computations?
+ *
+ * The whole issue is moot if we are working from a unique-ified outer
+ * input.
+ */
+ if (IsA(outer_path, UniquePath))
+ rescannedtuples = 0;
+ else
+ {
+ rescannedtuples = mergejointuples - inner_path_rows;
+ /* Must clamp because of possible underestimate */
+ if (rescannedtuples < 0)
+ rescannedtuples = 0;
+ }
+ /* We'll inflate various costs this much to account for rescanning */
+ rescanratio = 1.0 + (rescannedtuples / inner_path_rows);
+
/*
* Decide whether we want to materialize the inner input to shield it from
* mark/restore and performing re-fetches. Our cost model for regular
}
/*
- * cost_hashjoin
- * Determines and returns the cost of joining two relations using the
- * hash join algorithm.
- *
- * 'path' is already filled in except for the cost fields
+ * initial_cost_hashjoin
+ * Preliminary estimate of the cost of a hashjoin path.
+ *
+ * This must quickly produce lower-bound estimates of the path's startup and
+ * total costs. If we are unable to eliminate the proposed path from
+ * consideration using the lower bounds, final_cost_hashjoin will be called
+ * to obtain the final estimates.
+ *
+ * The exact division of labor between this function and final_cost_hashjoin
+ * is private to them, and represents a tradeoff between speed of the initial
+ * estimate and getting a tight lower bound. We choose to not examine the
+ * join quals here (other than by counting the number of hash clauses),
+ * so we can't do much with CPU costs. We do assume that
+ * ExecChooseHashTableSize is cheap enough to use here.
+ *
+ * 'workspace' is to be filled with startup_cost, total_cost, and perhaps
+ * other data to be used by final_cost_hashjoin
+ * 'jointype' is the type of join to be performed
+ * 'hashclauses' is the list of joinclauses to be used as hash clauses
+ * 'outer_path' is the outer input to the join
+ * 'inner_path' is the inner input to the join
* 'sjinfo' is extra info about the join for selectivity estimation
- *
- * Note: path's hashclauses should be a subset of the joinrestrictinfo list
+ * 'semifactors' contains valid data if jointype is SEMI or ANTI
*/
void
-cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
+initial_cost_hashjoin(PlannerInfo *root, JoinCostWorkspace *workspace,
+ JoinType jointype,
+ List *hashclauses,
+ Path *outer_path, Path *inner_path,
+ SpecialJoinInfo *sjinfo,
+ SemiAntiJoinFactors *semifactors)
{
- Path *outer_path = path->jpath.outerjoinpath;
- Path *inner_path = path->jpath.innerjoinpath;
- List *hashclauses = path->path_hashclauses;
Cost startup_cost = 0;
Cost run_cost = 0;
- Cost cpu_per_tuple;
- QualCost hash_qual_cost;
- QualCost qp_qual_cost;
- double hashjointuples;
- double outer_path_rows = PATH_ROWS(outer_path);
- double inner_path_rows = PATH_ROWS(inner_path);
+ double outer_path_rows = outer_path->rows;
+ double inner_path_rows = inner_path->rows;
int num_hashclauses = list_length(hashclauses);
int numbuckets;
int numbatches;
int num_skew_mcvs;
- double virtualbuckets;
- Selectivity innerbucketsize;
- Selectivity outer_match_frac;
- Selectivity match_count;
- ListCell *hcl;
-
- if (!enable_hashjoin)
- startup_cost += disable_cost;
-
- /*
- * Compute cost of the hashquals and qpquals (other restriction clauses)
- * separately.
- */
- cost_qual_eval(&hash_qual_cost, hashclauses, root);
- cost_qual_eval(&qp_qual_cost, path->jpath.joinrestrictinfo, root);
- qp_qual_cost.startup -= hash_qual_cost.startup;
- qp_qual_cost.per_tuple -= hash_qual_cost.per_tuple;
/* cost of source data */
startup_cost += outer_path->startup_cost;
&numbuckets,
&numbatches,
&num_skew_mcvs);
- virtualbuckets = (double) numbuckets *(double) numbatches;
+
+ /*
+ * If inner relation is too big then we will need to "batch" the join,
+ * which implies writing and reading most of the tuples to disk an extra
+ * time. Charge seq_page_cost per page, since the I/O should be nice and
+ * sequential. Writing the inner rel counts as startup cost, all the rest
+ * as run cost.
+ */
+ if (numbatches > 1)
+ {
+ double outerpages = page_size(outer_path_rows,
+ outer_path->parent->width);
+ double innerpages = page_size(inner_path_rows,
+ inner_path->parent->width);
+
+ startup_cost += seq_page_cost * innerpages;
+ run_cost += seq_page_cost * (innerpages + 2 * outerpages);
+ }
+
+ /* CPU costs left for later */
+
+ /* Public result fields */
+ workspace->startup_cost = startup_cost;
+ workspace->total_cost = startup_cost + run_cost;
+ /* Save private data for final_cost_hashjoin */
+ workspace->run_cost = run_cost;
+ workspace->numbuckets = numbuckets;
+ workspace->numbatches = numbatches;
+}
+
+/*
+ * final_cost_hashjoin
+ * Final estimate of the cost and result size of a hashjoin path.
+ *
+ * Note: the numbatches estimate is also saved into 'path' for use later
+ *
+ * 'path' is already filled in except for the rows and cost fields and
+ * num_batches
+ * 'workspace' is the result from initial_cost_hashjoin
+ * 'sjinfo' is extra info about the join for selectivity estimation
+ * 'semifactors' contains valid data if path->jointype is SEMI or ANTI
+ */
+void
+final_cost_hashjoin(PlannerInfo *root, HashPath *path,
+ JoinCostWorkspace *workspace,
+ SpecialJoinInfo *sjinfo,
+ SemiAntiJoinFactors *semifactors)
+{
+ Path *outer_path = path->jpath.outerjoinpath;
+ Path *inner_path = path->jpath.innerjoinpath;
+ double outer_path_rows = outer_path->rows;
+ double inner_path_rows = inner_path->rows;
+ List *hashclauses = path->path_hashclauses;
+ Cost startup_cost = workspace->startup_cost;
+ Cost run_cost = workspace->run_cost;
+ int numbuckets = workspace->numbuckets;
+ int numbatches = workspace->numbatches;
+ Cost cpu_per_tuple;
+ QualCost hash_qual_cost;
+ QualCost qp_qual_cost;
+ double hashjointuples;
+ double virtualbuckets;
+ Selectivity innerbucketsize;
+ ListCell *hcl;
+
+ /* Estimate the number of rows returned by the join */
+ set_joinpath_size_estimate(root, &path->jpath, sjinfo,
+ path->jpath.joinrestrictinfo);
+
+ /*
+ * We could include disable_cost in the preliminary estimate, but that
+ * would amount to optimizing for the case where the join method is
+ * disabled, which doesn't seem like the way to bet.
+ */
+ if (!enable_hashjoin)
+ startup_cost += disable_cost;
/* mark the path with estimated # of batches */
path->num_batches = numbatches;
+ /* and compute the number of "virtual" buckets in the whole join */
+ virtualbuckets = (double) numbuckets *(double) numbatches;
+
/*
* Determine bucketsize fraction for inner relation. We use the smallest
* bucketsize estimated for any individual hashclause; this is undoubtedly
}
/*
- * If inner relation is too big then we will need to "batch" the join,
- * which implies writing and reading most of the tuples to disk an extra
- * time. Charge seq_page_cost per page, since the I/O should be nice and
- * sequential. Writing the inner rel counts as startup cost, all the rest
- * as run cost.
+ * Compute cost of the hashquals and qpquals (other restriction clauses)
+ * separately.
*/
- if (numbatches > 1)
- {
- double outerpages = page_size(outer_path_rows,
- outer_path->parent->width);
- double innerpages = page_size(inner_path_rows,
- inner_path->parent->width);
-
- startup_cost += seq_page_cost * innerpages;
- run_cost += seq_page_cost * (innerpages + 2 * outerpages);
- }
+ cost_qual_eval(&hash_qual_cost, hashclauses, root);
+ cost_qual_eval(&qp_qual_cost, path->jpath.joinrestrictinfo, root);
+ qp_qual_cost.startup -= hash_qual_cost.startup;
+ qp_qual_cost.per_tuple -= hash_qual_cost.per_tuple;
/* CPU costs */
- if (adjust_semi_join(root, &path->jpath, sjinfo,
- &outer_match_frac,
- &match_count,
- NULL))
+ if (path->jpath.jointype == JOIN_SEMI || path->jpath.jointype == JOIN_ANTI)
{
double outer_matched_rows;
Selectivity inner_scan_frac;
* to clamp inner_scan_frac to at most 1.0; but since match_count is
* at least 1, no such clamp is needed now.)
*/
- outer_matched_rows = rint(outer_path_rows * outer_match_frac);
- inner_scan_frac = 2.0 / (match_count + 1.0);
+ outer_matched_rows = rint(outer_path_rows * semifactors->outer_match_frac);
+ inner_scan_frac = 2.0 / (semifactors->match_count + 1.0);
startup_cost += hash_qual_cost.startup;
run_cost += hash_qual_cost.per_tuple * outer_matched_rows *
* cpu_tuple_cost per tuple, unless the result is large enough
* to spill to disk.
*/
- Cost run_cost = cpu_tuple_cost * path->parent->rows;
- double nbytes = relation_byte_size(path->parent->rows,
+ Cost run_cost = cpu_tuple_cost * path->rows;
+ double nbytes = relation_byte_size(path->rows,
path->parent->width);
long work_mem_bytes = work_mem * 1024L;
* the run_cost charge in cost_sort, and also see comments in
* cost_material before you change it.)
*/
- Cost run_cost = cpu_operator_cost * path->parent->rows;
- double nbytes = relation_byte_size(path->parent->rows,
+ Cost run_cost = cpu_operator_cost * path->rows;
+ double nbytes = relation_byte_size(path->rows,
path->parent->width);
long work_mem_bytes = work_mem * 1024L;
/*
- * adjust_semi_join
+ * compute_semi_anti_join_factors
* Estimate how much of the inner input a SEMI or ANTI join
* can be expected to scan.
*
* inner rows as soon as it finds a match to the current outer row.
* We should therefore adjust some of the cost components for this effect.
* This function computes some estimates needed for these adjustments.
- *
- * 'path' is already filled in except for the cost fields
- * 'sjinfo' is extra info about the join for selectivity estimation
- *
- * Returns TRUE if this is a SEMI or ANTI join, FALSE if not.
- *
- * Output parameters (set only in TRUE-result case):
- * *outer_match_frac is set to the fraction of the outer tuples that are
- * expected to have at least one match.
- * *match_count is set to the average number of matches expected for
- * outer tuples that have at least one match.
- * *indexed_join_quals is set to TRUE if all the joinquals are used as
- * inner index quals, FALSE if not.
- *
- * indexed_join_quals can be passed as NULL if that information is not
- * relevant (it is only useful for the nestloop case).
+ * These estimates will be the same regardless of the particular paths used
+ * for the outer and inner relation, so we compute these once and then pass
+ * them to all the join cost estimation functions.
+ *
+ * Input parameters:
+ * outerrel: outer relation under consideration
+ * innerrel: inner relation under consideration
+ * jointype: must be JOIN_SEMI or JOIN_ANTI
+ * sjinfo: SpecialJoinInfo relevant to this join
+ * restrictlist: join quals
+ * Output parameters:
+ * *semifactors is filled in (see relation.h for field definitions)
*/
-static bool
-adjust_semi_join(PlannerInfo *root, JoinPath *path, SpecialJoinInfo *sjinfo,
- Selectivity *outer_match_frac,
- Selectivity *match_count,
- bool *indexed_join_quals)
+void
+compute_semi_anti_join_factors(PlannerInfo *root,
+ RelOptInfo *outerrel,
+ RelOptInfo *innerrel,
+ JoinType jointype,
+ SpecialJoinInfo *sjinfo,
+ List *restrictlist,
+ SemiAntiJoinFactors *semifactors)
{
- JoinType jointype = path->jointype;
Selectivity jselec;
Selectivity nselec;
Selectivity avgmatch;
List *joinquals;
ListCell *l;
- /* Fall out if it's not JOIN_SEMI or JOIN_ANTI */
- if (jointype != JOIN_SEMI && jointype != JOIN_ANTI)
- return false;
-
- /*
- * Note: it's annoying to repeat this selectivity estimation on each call,
- * when the joinclause list will be the same for all path pairs
- * implementing a given join. clausesel.c will save us from the worst
- * effects of this by caching at the RestrictInfo level; but perhaps it'd
- * be worth finding a way to cache the results at a higher level.
- */
+ /* Should only be called in these cases */
+ Assert(jointype == JOIN_SEMI || jointype == JOIN_ANTI);
/*
* In an ANTI join, we must ignore clauses that are "pushed down", since
if (jointype == JOIN_ANTI)
{
joinquals = NIL;
- foreach(l, path->joinrestrictinfo)
+ foreach(l, restrictlist)
{
RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
}
}
else
- joinquals = path->joinrestrictinfo;
+ joinquals = restrictlist;
/*
* Get the JOIN_SEMI or JOIN_ANTI selectivity of the join clauses.
* Also get the normal inner-join selectivity of the join clauses.
*/
norm_sjinfo.type = T_SpecialJoinInfo;
- norm_sjinfo.min_lefthand = path->outerjoinpath->parent->relids;
- norm_sjinfo.min_righthand = path->innerjoinpath->parent->relids;
- norm_sjinfo.syn_lefthand = path->outerjoinpath->parent->relids;
- norm_sjinfo.syn_righthand = path->innerjoinpath->parent->relids;
+ norm_sjinfo.min_lefthand = outerrel->relids;
+ norm_sjinfo.min_righthand = innerrel->relids;
+ norm_sjinfo.syn_lefthand = outerrel->relids;
+ norm_sjinfo.syn_righthand = innerrel->relids;
norm_sjinfo.jointype = JOIN_INNER;
/* we don't bother trying to make the remaining fields valid */
norm_sjinfo.lhs_strict = false;
* number of matches for each outer-rel row that has at least one match is
* nselec * inner_rows / jselec.
*
- * Note: it is correct to use the inner rel's "rows" count here, not
- * PATH_ROWS(), even if the inner path under consideration is an inner
- * indexscan. This is because we have included all the join clauses in
- * the selectivity estimate, even ones used in an inner indexscan.
+ * Note: it is correct to use the inner rel's "rows" count here, even
+ * though we might later be considering a parameterized inner path with
+ * fewer rows. This is because we have included all the join clauses
+ * in the selectivity estimate.
*/
if (jselec > 0) /* protect against zero divide */
{
- avgmatch = nselec * path->innerjoinpath->parent->rows / jselec;
+ avgmatch = nselec * innerrel->rows / jselec;
/* Clamp to sane range */
avgmatch = Max(1.0, avgmatch);
}
else
avgmatch = 1.0;
- *outer_match_frac = jselec;
- *match_count = avgmatch;
+ semifactors->outer_match_frac = jselec;
+ semifactors->match_count = avgmatch;
+}
- /*
- * If requested, check whether the inner path uses all the joinquals as
- * indexquals. (If that's true, we can assume that an unmatched outer
- * tuple is cheap to process, whereas otherwise it's probably expensive.)
- */
- if (indexed_join_quals)
+/*
+ * has_indexed_join_quals
+ * Check whether all the joinquals of a nestloop join are used as
+ * inner index quals.
+ *
+ * If the inner path of a SEMI/ANTI join is an indexscan (including bitmap
+ * indexscan) that uses all the joinquals as indexquals, we can assume that an
+ * unmatched outer tuple is cheap to process, whereas otherwise it's probably
+ * expensive.
+ */
+static bool
+has_indexed_join_quals(NestPath *path, List *joinclauses)
+{
+ NodeTag pathtype = path->innerjoinpath->pathtype;
+
+ if (pathtype == T_IndexScan ||
+ pathtype == T_IndexOnlyScan ||
+ pathtype == T_BitmapHeapScan)
{
if (path->joinrestrictinfo != NIL)
{
- List *nrclauses;
-
- nrclauses = select_nonredundant_join_clauses(root,
- path->joinrestrictinfo,
- path->innerjoinpath);
- *indexed_join_quals = (nrclauses == NIL);
+ /* OK if all those clauses were found to be redundant */
+ return (joinclauses == NIL);
}
else
{
/* a clauseless join does NOT qualify */
- *indexed_join_quals = false;
+ return false;
}
}
-
- return true;
+ else
+ {
+ /*
+ * If it's not a simple indexscan, it probably doesn't run quickly for
+ * zero rows out, even if it's a parameterized path using all the
+ * joinquals.
+ */
+ return false;
+ }
}
approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals)
{
double tuples;
- double outer_tuples = path->outerjoinpath->parent->rows;
- double inner_tuples = path->innerjoinpath->parent->rows;
+ double outer_tuples = path->outerjoinpath->rows;
+ double inner_tuples = path->innerjoinpath->rows;
SpecialJoinInfo sjinfo;
Selectivity selec = 1.0;
ListCell *l;
RelOptInfo *inner_rel,
SpecialJoinInfo *sjinfo,
List *restrictlist)
+{
+ rel->rows = calc_joinrel_size_estimate(root,
+ outer_rel->rows,
+ inner_rel->rows,
+ sjinfo,
+ restrictlist);
+}
+
+/*
+ * set_joinpath_size_estimate
+ * Set the rows estimate for the given join path.
+ *
+ * If the join is not parameterized by any joinclauses from higher joins, the
+ * estimate is the same as previously computed by set_joinrel_size_estimates.
+ * Otherwise, we estimate afresh using the identical logic, but with the rows
+ * estimates from the input paths (which are typically less than their rels'
+ * regular row estimates) and the restriction clauses actually being applied
+ * at the join.
+ */
+static void
+set_joinpath_size_estimate(PlannerInfo *root, JoinPath *path,
+ SpecialJoinInfo *sjinfo,
+ List *restrictlist)
+{
+ if (path->path.required_outer)
+ {
+ path->path.rows = calc_joinrel_size_estimate(root,
+ path->outerjoinpath->rows,
+ path->innerjoinpath->rows,
+ sjinfo,
+ restrictlist);
+ /* For safety, make sure result is not more than the base estimate */
+ if (path->path.rows > path->path.parent->rows)
+ path->path.rows = path->path.parent->rows;
+ }
+ else
+ path->path.rows = path->path.parent->rows;
+}
+
+/*
+ * calc_joinrel_size_estimate
+ * Workhorse for set_joinrel_size_estimates and set_joinpath_size_estimate
+ */
+static double
+calc_joinrel_size_estimate(PlannerInfo *root,
+ double outer_rows,
+ double inner_rows,
+ SpecialJoinInfo *sjinfo,
+ List *restrictlist)
{
JoinType jointype = sjinfo->jointype;
Selectivity jselec;
switch (jointype)
{
case JOIN_INNER:
- nrows = outer_rel->rows * inner_rel->rows * jselec;
+ nrows = outer_rows * inner_rows * jselec;
break;
case JOIN_LEFT:
- nrows = outer_rel->rows * inner_rel->rows * jselec;
- if (nrows < outer_rel->rows)
- nrows = outer_rel->rows;
+ nrows = outer_rows * inner_rows * jselec;
+ if (nrows < outer_rows)
+ nrows = outer_rows;
nrows *= pselec;
break;
case JOIN_FULL:
- nrows = outer_rel->rows * inner_rel->rows * jselec;
- if (nrows < outer_rel->rows)
- nrows = outer_rel->rows;
- if (nrows < inner_rel->rows)
- nrows = inner_rel->rows;
+ nrows = outer_rows * inner_rows * jselec;
+ if (nrows < outer_rows)
+ nrows = outer_rows;
+ if (nrows < inner_rows)
+ nrows = inner_rows;
nrows *= pselec;
break;
case JOIN_SEMI:
- nrows = outer_rel->rows * jselec;
+ nrows = outer_rows * jselec;
/* pselec not used */
break;
case JOIN_ANTI:
- nrows = outer_rel->rows * (1.0 - jselec);
+ nrows = outer_rows * (1.0 - jselec);
nrows *= pselec;
break;
default:
break;
}
- rel->rows = clamp_row_est(nrows);
+ return clamp_row_est(nrows);
}
/*
bool outer_on_left);
static bool reconsider_full_join_clause(PlannerInfo *root,
RestrictInfo *rinfo);
+static Index get_parent_relid(PlannerInfo *root, RelOptInfo *rel);
/*
*
* The results are sufficient for use in merge, hash, and plain nestloop join
* methods. We do not worry here about selecting clauses that are optimal
- * for use in a nestloop-with-inner-indexscan join, however. indxpath.c makes
+ * for use in a nestloop-with-parameterized-inner-scan. indxpath.c makes
* its own selections of clauses to use, and if the ones we pick here are
* redundant with those, the extras will be eliminated in createplan.c.
*
/*
- * find_eclass_clauses_for_index_join
- * Create joinclauses usable for a nestloop-with-inner-indexscan
- * scanning the given inner rel with the specified set of outer rels.
+ * generate_implied_equalities_for_indexcol
+ * Create EC-derived joinclauses usable with a specific index column.
+ *
+ * We assume that any given index column could appear in only one EC.
+ * (This should be true in all but the most pathological cases, and if it
+ * isn't, we stop on the first match anyway.) Therefore, what we return
+ * is a redundant list of clauses equating the index column to each of
+ * the other-relation values it is known to be equal to. Any one of
+ * these clauses can be used to create a parameterized indexscan, and there
+ * is no value in using more than one. (But it *is* worthwhile to create
+ * a separate parameterized path for each one, since that leads to different
+ * join orders.)
*/
List *
-find_eclass_clauses_for_index_join(PlannerInfo *root, RelOptInfo *rel,
- Relids outer_relids)
+generate_implied_equalities_for_indexcol(PlannerInfo *root,
+ IndexOptInfo *index,
+ int indexcol)
{
List *result = NIL;
+ RelOptInfo *rel = index->rel;
bool is_child_rel = (rel->reloptkind == RELOPT_OTHER_MEMBER_REL);
+ Index parent_relid;
ListCell *lc1;
+ /* If it's a child rel, we'll need to know what its parent is */
+ if (is_child_rel)
+ parent_relid = get_parent_relid(root, rel);
+ else
+ parent_relid = 0; /* not used, but keep compiler quiet */
+
foreach(lc1, root->eq_classes)
{
EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1);
+ EquivalenceMember *cur_em;
ListCell *lc2;
/*
if (!is_child_rel &&
!bms_is_subset(rel->relids, cur_ec->ec_relids))
continue;
- /* ... nor if no overlap with outer_relids */
- if (!bms_overlap(outer_relids, cur_ec->ec_relids))
+
+ /* Scan members, looking for a match to the indexable column */
+ cur_em = NULL;
+ foreach(lc2, cur_ec->ec_members)
+ {
+ cur_em = (EquivalenceMember *) lfirst(lc2);
+ if (bms_equal(cur_em->em_relids, rel->relids) &&
+ eclass_member_matches_indexcol(cur_ec, cur_em,
+ index, indexcol))
+ break;
+ cur_em = NULL;
+ }
+
+ if (!cur_em)
continue;
- /* Scan members, looking for indexable columns */
+ /*
+ * Found our match. Scan the other EC members and attempt to generate
+ * joinclauses.
+ */
foreach(lc2, cur_ec->ec_members)
{
- EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2);
- EquivalenceMember *best_outer_em = NULL;
- Oid best_eq_op = InvalidOid;
- ListCell *lc3;
+ EquivalenceMember *other_em = (EquivalenceMember *) lfirst(lc2);
+ Oid eq_op;
+ RestrictInfo *rinfo;
- if (!bms_equal(cur_em->em_relids, rel->relids) ||
- !eclass_matches_any_index(cur_ec, cur_em, rel))
+ /* Make sure it'll be a join to a different rel */
+ if (other_em == cur_em ||
+ bms_overlap(other_em->em_relids, rel->relids))
continue;
/*
- * Found one, so try to generate a join clause. This is like
- * generate_join_implied_equalities_normal, except simpler since
- * our only preference item is to pick a Var on the outer side. We
- * only need one join clause per index col.
+ * Also, if this is a child rel, avoid generating a useless join
+ * to its parent rel.
*/
- foreach(lc3, cur_ec->ec_members)
- {
- EquivalenceMember *outer_em = (EquivalenceMember *) lfirst(lc3);
- Oid eq_op;
-
- if (!bms_is_subset(outer_em->em_relids, outer_relids))
- continue;
- eq_op = select_equality_operator(cur_ec,
- cur_em->em_datatype,
- outer_em->em_datatype);
- if (!OidIsValid(eq_op))
- continue;
- best_outer_em = outer_em;
- best_eq_op = eq_op;
- if (IsA(outer_em->em_expr, Var) ||
- (IsA(outer_em->em_expr, RelabelType) &&
- IsA(((RelabelType *) outer_em->em_expr)->arg, Var)))
- break; /* no need to look further */
- }
+ if (is_child_rel &&
+ bms_is_member(parent_relid, other_em->em_relids))
+ continue;
- if (best_outer_em)
- {
- /* Found a suitable joinclause */
- RestrictInfo *rinfo;
+ eq_op = select_equality_operator(cur_ec,
+ cur_em->em_datatype,
+ other_em->em_datatype);
+ if (!OidIsValid(eq_op))
+ continue;
- /* set parent_ec to mark as redundant with other joinclauses */
- rinfo = create_join_clause(root, cur_ec, best_eq_op,
- cur_em, best_outer_em,
- cur_ec);
+ /* set parent_ec to mark as redundant with other joinclauses */
+ rinfo = create_join_clause(root, cur_ec, eq_op,
+ cur_em, other_em,
+ cur_ec);
- result = lappend(result, rinfo);
-
- /*
- * Note: we keep scanning here because we want to provide a
- * clause for every possible indexcol.
- */
- }
+ result = lappend(result, rinfo);
}
+
+ /*
+ * If somehow we failed to create any join clauses, we might as well
+ * keep scanning the ECs for another match. But if we did make any,
+ * we're done, because we don't want to return non-redundant clauses.
+ */
+ if (result)
+ break;
}
return result;
}
+/*
+ * get_parent_relid
+ * Get the relid of a child rel's parent appendrel
+ *
+ * Possibly this should be somewhere else, but right now the logic is only
+ * needed here.
+ */
+static Index
+get_parent_relid(PlannerInfo *root, RelOptInfo *rel)
+{
+ ListCell *lc;
+
+ foreach(lc, root->append_rel_list)
+ {
+ AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(lc);
+
+ if (appinfo->child_relid == rel->relid)
+ return appinfo->parent_relid;
+ }
+ /* should have found the entry ... */
+ elog(ERROR, "child rel not found in append_rel_list");
+ return 0;
+}
/*
* have_relevant_eclass_joinclause
ST_ANYSCAN /* either is okay */
} ScanTypeControl;
+/* Data structure for collecting qual clauses that match an index */
+typedef struct
+{
+ bool nonempty; /* True if lists are not all empty */
+ /* Lists of RestrictInfos, one per index column */
+ List *indexclauses[INDEX_MAX_KEYS];
+} IndexClauseSet;
+
/* Per-path data used within choose_bitmap_and() */
typedef struct
{
} PathClauseUsage;
-static List *find_usable_indexes(PlannerInfo *root, RelOptInfo *rel,
- List *clauses, List *outer_clauses,
- bool istoplevel, RelOptInfo *outer_rel,
- SaOpControl saop_control, ScanTypeControl scantype);
-static List *find_saop_paths(PlannerInfo *root, RelOptInfo *rel,
- List *clauses, List *outer_clauses,
- bool istoplevel, RelOptInfo *outer_rel);
+static void consider_index_join_clauses(PlannerInfo *root, RelOptInfo *rel,
+ IndexOptInfo *index,
+ IndexClauseSet *rclauseset,
+ IndexClauseSet *jclauseset,
+ IndexClauseSet *eclauseset,
+ List **bitindexpaths);
+static void expand_eclass_clause_combinations(PlannerInfo *root,
+ RelOptInfo *rel,
+ IndexOptInfo *index,
+ int thiscol, int lastcol,
+ IndexClauseSet *clauseset,
+ IndexClauseSet *eclauseset,
+ List **bitindexpaths);
+static void get_index_paths(PlannerInfo *root, RelOptInfo *rel,
+ IndexOptInfo *index, IndexClauseSet *clauses,
+ List **bitindexpaths);
+static List *build_index_paths(PlannerInfo *root, RelOptInfo *rel,
+ IndexOptInfo *index, IndexClauseSet *clauses,
+ bool useful_predicate,
+ SaOpControl saop_control, ScanTypeControl scantype);
+static List *build_paths_for_OR(PlannerInfo *root, RelOptInfo *rel,
+ List *clauses, List *other_clauses);
+static List *drop_indexable_join_clauses(RelOptInfo *rel, List *clauses);
static Path *choose_bitmap_and(PlannerInfo *root, RelOptInfo *rel,
- List *paths, RelOptInfo *outer_rel);
+ List *paths);
static int path_usage_comparator(const void *a, const void *b);
static Cost bitmap_scan_cost_est(PlannerInfo *root, RelOptInfo *rel,
- Path *ipath, RelOptInfo *outer_rel);
+ Path *ipath);
static Cost bitmap_and_cost_est(PlannerInfo *root, RelOptInfo *rel,
- List *paths, RelOptInfo *outer_rel);
+ List *paths);
static PathClauseUsage *classify_index_clause_usage(Path *path,
List **clauselist);
static void find_indexpath_quals(Path *bitmapqual, List **quals, List **preds);
static int find_list_position(Node *node, List **nodelist);
static bool check_index_only(RelOptInfo *rel, IndexOptInfo *index);
+static double get_loop_count(PlannerInfo *root, Relids outer_relids);
+static void match_restriction_clauses_to_index(RelOptInfo *rel,
+ IndexOptInfo *index,
+ IndexClauseSet *clauseset);
+static void match_join_clauses_to_index(PlannerInfo *root,
+ RelOptInfo *rel, IndexOptInfo *index,
+ IndexClauseSet *clauseset,
+ List **joinorclauses);
+static void match_eclass_clauses_to_index(PlannerInfo *root,
+ IndexOptInfo *index,
+ IndexClauseSet *clauseset);
static void match_clauses_to_index(IndexOptInfo *index,
- List *clauses, List *outer_clauses,
- Relids outer_relids,
- SaOpControl saop_control,
- List **index_clauses_p,
- List **clause_columns_p,
- bool *found_clause);
+ List *clauses,
+ IndexClauseSet *clauseset);
+static void match_clause_to_index(IndexOptInfo *index,
+ RestrictInfo *rinfo,
+ IndexClauseSet *clauseset);
static bool match_clause_to_indexcol(IndexOptInfo *index,
int indexcol,
- RestrictInfo *rinfo,
- Relids outer_relids,
- SaOpControl saop_control);
+ RestrictInfo *rinfo);
static bool is_indexable_operator(Oid expr_op, Oid opfamily,
bool indexkey_on_left);
static bool match_rowcompare_to_indexcol(IndexOptInfo *index,
int indexcol,
Oid opfamily,
Oid idxcollation,
- RowCompareExpr *clause,
- Relids outer_relids);
+ RowCompareExpr *clause);
static void match_pathkeys_to_index(IndexOptInfo *index, List *pathkeys,
List **orderby_clauses_p,
List **clause_columns_p);
static Expr *match_clause_to_ordering_op(IndexOptInfo *index,
int indexcol, Expr *clause, Oid pk_opfamily);
-static Relids indexable_outerrelids(PlannerInfo *root, RelOptInfo *rel);
-static bool matches_any_index(RestrictInfo *rinfo, RelOptInfo *rel,
- Relids outer_relids);
-static List *find_clauses_for_join(PlannerInfo *root, RelOptInfo *rel,
- Relids outer_relids, bool isouterjoin);
static bool match_boolean_index_clause(Node *clause, int indexcol,
IndexOptInfo *index);
static bool match_special_index_operator(Expr *clause,
*
* There are two basic kinds of index scans. A "plain" index scan uses
* only restriction clauses (possibly none at all) in its indexqual,
- * so it can be applied in any context. An "innerjoin" index scan uses
+ * so it can be applied in any context. A "parameterized" index scan uses
* join clauses (plus restriction clauses, if available) in its indexqual.
- * Therefore it can only be used as the inner relation of a nestloop
- * join against an outer rel that includes all the other rels mentioned
- * in its join clauses. In that context, values for the other rels'
+ * When joining such a scan to one of the relations supplying the other
+ * variables used in its indexqual, the parameterized scan must appear as
+ * the inner relation of a nestloop join; it can't be used on the outer side,
+ * nor in a merge or hash join. In that context, values for the other rels'
* attributes are available and fixed during any one scan of the indexpath.
*
- * An IndexPath is generated and submitted to add_path() for each plain index
- * scan this routine deems potentially interesting for the current query.
- *
- * We also determine the set of other relids that participate in join
- * clauses that could be used with each index. The actually best innerjoin
- * path will be generated for each outer relation later on, but knowing the
- * set of potential otherrels allows us to identify equivalent outer relations
- * and avoid repeated computation.
+ * An IndexPath is generated and submitted to add_path() for each plain or
+ * parameterized index scan this routine deems potentially interesting for
+ * the current query.
*
* 'rel' is the relation for which we want to generate index paths
*
{
List *indexpaths;
List *bitindexpaths;
- ListCell *l;
+ List *bitjoinpaths;
+ List *joinorclauses;
+ IndexClauseSet rclauseset;
+ IndexClauseSet jclauseset;
+ IndexClauseSet eclauseset;
+ ListCell *ilist;
/* Skip the whole mess if no indexes */
if (rel->indexlist == NIL)
- {
- rel->index_outer_relids = NULL;
return;
+
+ /* Bitmap paths are collected and then dealt with at the end */
+ bitindexpaths = bitjoinpaths = joinorclauses = NIL;
+
+ /* Examine each index in turn */
+ foreach(ilist, rel->indexlist)
+ {
+ IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist);
+
+ /* Protect limited-size array in IndexClauseSets */
+ Assert(index->ncolumns <= INDEX_MAX_KEYS);
+
+ /*
+ * Ignore partial indexes that do not match the query.
+ * (generate_bitmap_or_paths() might be able to do something with
+ * them, but that's of no concern here.)
+ */
+ if (index->indpred != NIL && !index->predOK)
+ continue;
+
+ /*
+ * Identify the restriction clauses that can match the index.
+ */
+ MemSet(&rclauseset, 0, sizeof(rclauseset));
+ match_restriction_clauses_to_index(rel, index, &rclauseset);
+
+ /*
+ * Build index paths from the restriction clauses. These will be
+ * non-parameterized paths. Plain paths go directly to add_path(),
+ * bitmap paths are added to bitindexpaths to be handled below.
+ */
+ get_index_paths(root, rel, index, &rclauseset,
+ &bitindexpaths);
+
+ /*
+ * Identify the join clauses that can match the index. For the moment
+ * we keep them separate from the restriction clauses. Note that
+ * this finds only "loose" join clauses that have not been merged
+ * into EquivalenceClasses. Also, collect join OR clauses for later.
+ */
+ MemSet(&jclauseset, 0, sizeof(jclauseset));
+ match_join_clauses_to_index(root, rel, index,
+ &jclauseset, &joinorclauses);
+
+ /*
+ * Look for EquivalenceClasses that can generate joinclauses
+ * matching the index.
+ */
+ MemSet(&eclauseset, 0, sizeof(eclauseset));
+ match_eclass_clauses_to_index(root, index, &eclauseset);
+
+ /*
+ * If we found any plain or eclass join clauses, decide what to
+ * do with 'em.
+ */
+ if (jclauseset.nonempty || eclauseset.nonempty)
+ consider_index_join_clauses(root, rel, index,
+ &rclauseset,
+ &jclauseset,
+ &eclauseset,
+ &bitjoinpaths);
}
/*
- * Examine join clauses to see which ones are potentially usable with
- * indexes of this rel, and generate the set of all other relids that
- * participate in such join clauses. We'll use this set later to
- * recognize outer rels that are equivalent for joining purposes.
+ * Generate BitmapOrPaths for any suitable OR-clauses present in the
+ * restriction list. Add these to bitindexpaths.
*/
- rel->index_outer_relids = indexable_outerrelids(root, rel);
+ indexpaths = generate_bitmap_or_paths(root, rel,
+ rel->baserestrictinfo, NIL,
+ false);
+ bitindexpaths = list_concat(bitindexpaths, indexpaths);
/*
- * Find all the index paths that are directly usable for this relation
- * (ie, are valid without considering OR or JOIN clauses).
+ * Likewise, generate BitmapOrPaths for any suitable OR-clauses present in
+ * the joinclause list. Add these to bitjoinpaths.
*/
- indexpaths = find_usable_indexes(root, rel,
- rel->baserestrictinfo, NIL,
- true, NULL, SAOP_PER_AM, ST_ANYSCAN);
+ indexpaths = generate_bitmap_or_paths(root, rel,
+ joinorclauses, rel->baserestrictinfo,
+ false);
+ bitjoinpaths = list_concat(bitjoinpaths, indexpaths);
+
+ /*
+ * If we found anything usable, generate a BitmapHeapPath for the most
+ * promising combination of restriction bitmap index paths. Note there
+ * will be only one such path no matter how many indexes exist. This
+ * should be sufficient since there's basically only one figure of merit
+ * (total cost) for such a path.
+ */
+ if (bitindexpaths != NIL)
+ {
+ Path *bitmapqual;
+ BitmapHeapPath *bpath;
+
+ bitmapqual = choose_bitmap_and(root, rel, bitindexpaths);
+ bpath = create_bitmap_heap_path(root, rel, bitmapqual, 1.0);
+ add_path(rel, (Path *) bpath);
+ }
+
+ /*
+ * Likewise, if we found anything usable, generate a BitmapHeapPath for
+ * the most promising combination of join bitmap index paths. Note there
+ * will be only one such path no matter how many join clauses are
+ * available. (XXX is that good enough, or do we need to consider even
+ * more paths for different subsets of possible join partners? Also,
+ * should we add in restriction bitmap paths as well?)
+ */
+ if (bitjoinpaths != NIL)
+ {
+ Path *bitmapqual;
+ double loop_count;
+ BitmapHeapPath *bpath;
+
+ bitmapqual = choose_bitmap_and(root, rel, bitjoinpaths);
+ loop_count = get_loop_count(root, bitmapqual->required_outer);
+ bpath = create_bitmap_heap_path(root, rel, bitmapqual, loop_count);
+ add_path(rel, (Path *) bpath);
+ }
+}
+
+/*
+ * consider_index_join_clauses
+ * Given sets of join clauses for an index, decide which parameterized
+ * index paths to build.
+ *
+ * Plain indexpaths are sent directly to add_path, while potential
+ * bitmap indexpaths are added to *bitindexpaths for later processing.
+ *
+ * 'rel' is the index's heap relation
+ * 'index' is the index for which we want to generate paths
+ * 'rclauseset' is the collection of indexable restriction clauses
+ * 'jclauseset' is the collection of indexable simple join clauses
+ * 'eclauseset' is the collection of indexable clauses from EquivalenceClasses
+ * '*bitindexpaths' is the list to add bitmap paths to
+ *
+ * Note: this changes the clause lists contained in the passed clausesets,
+ * but we don't care since the caller is done with them.
+ */
+static void
+consider_index_join_clauses(PlannerInfo *root, RelOptInfo *rel,
+ IndexOptInfo *index,
+ IndexClauseSet *rclauseset,
+ IndexClauseSet *jclauseset,
+ IndexClauseSet *eclauseset,
+ List **bitindexpaths)
+{
+ IndexClauseSet clauseset;
+ int last_eclass_col;
+ int indexcol;
+
+ /*
+ * We can always include any restriction clauses in the index clauses.
+ * However, it's not obvious which subsets of the join clauses are worth
+ * generating paths from, and it's unlikely that considering every
+ * possible subset is worth the cycles. Our current heuristic is based
+ * on the index columns, with the idea that later index columns are less
+ * useful than earlier ones; therefore it's unlikely to be worth trying
+ * combinations that would remove a clause from an earlier index column
+ * while adding one to a later column. Also, we know that all the
+ * eclass clauses for a particular column are redundant, so we should
+ * use only one of them. However, eclass clauses will always represent
+ * equality which is the strongest type of index constraint, so those
+ * are high-value and we should try every available combination when we
+ * have eclass clauses for more than one column. Furthermore, it's
+ * unlikely to be useful to combine an eclass clause with non-eclass
+ * clauses for the same index column. These considerations lead to the
+ * following heuristics:
+ *
+ * First, start with the restriction clauses, and add on all simple join
+ * clauses for column 1. If there are any such join clauses, generate
+ * paths with this collection of clauses. Then, if there are eclass
+ * clauses for column 1, generate paths with each one of them replacing
+ * any other clauses we have for column 1.
+ *
+ * Next, add on all simple join clauses for column 2. If there are any
+ * such join clauses, generate paths with this collection. If there are
+ * eclass clauses for columns 1 or 2, generate paths with each such clause
+ * replacing other clauses for its index column, including cases where we
+ * use restriction or simple join clauses for one column and an eclass
+ * clause for the other.
+ *
+ * Repeat for each additional index column.
+ */
+
+ /* Set up working set with just the restriction clauses */
+ memcpy(&clauseset, rclauseset, sizeof(clauseset));
+ /* Even if it's empty right now, it won't be by the time we use it */
+ clauseset.nonempty = true;
+
+ last_eclass_col = -1;
+ for (indexcol = 0; indexcol < index->ncolumns; indexcol++)
+ {
+ /*
+ * If we don't have either simple join clauses or eclass clauses for
+ * this column, no new paths can be created in this iteration.
+ */
+ if (jclauseset->indexclauses[indexcol] == NIL &&
+ eclauseset->indexclauses[indexcol] == NIL)
+ continue;
+
+ /* Add any simple join clauses to the working set */
+ clauseset.indexclauses[indexcol] =
+ list_concat(clauseset.indexclauses[indexcol],
+ jclauseset->indexclauses[indexcol]);
+
+ /* Set recursion depth to reach last col with eclass clauses */
+ if (eclauseset->indexclauses[indexcol] != NIL)
+ last_eclass_col = indexcol;
+
+ /* Do we have eclass clauses for any column now under consideration? */
+ if (last_eclass_col >= 0)
+ {
+ /* Yes, so recursively generate all eclass clause combinations */
+ expand_eclass_clause_combinations(root, rel, index,
+ 0, last_eclass_col,
+ &clauseset, eclauseset,
+ bitindexpaths);
+ }
+ else
+ {
+ /* No, consider the newly-enlarged set of simple join clauses */
+ get_index_paths(root, rel, index, &clauseset, bitindexpaths);
+ }
+ }
+}
+
+/*
+ * expand_eclass_clause_combinations
+ * Generate all combinations of eclass join clauses for first N columns,
+ * and construct parameterized index paths for each combination.
+ *
+ * Workhorse for consider_index_join_clauses; see notes therein for rationale.
+ * It's convenient to use recursion to implement the enumeration, since we
+ * can have at most INDEX_MAX_KEYS recursion levels.
+ *
+ * 'rel', 'index', 'eclauseset', 'bitindexpaths' as above
+ * 'thiscol' is the current index column number/recursion level
+ * 'lastcol' is the last index column we should consider eclass clauses for
+ * 'clauseset' is the current collection of indexable clauses
+ */
+static void
+expand_eclass_clause_combinations(PlannerInfo *root, RelOptInfo *rel,
+ IndexOptInfo *index,
+ int thiscol, int lastcol,
+ IndexClauseSet *clauseset,
+ IndexClauseSet *eclauseset,
+ List **bitindexpaths)
+{
+ List *save_clauses;
+ ListCell *lc;
+
+ /* If past last eclass column, end the recursion and generate paths */
+ if (thiscol > lastcol)
+ {
+ get_index_paths(root, rel, index, clauseset, bitindexpaths);
+ return;
+ }
+
+ /* If no eclass clauses to consider for this column, just recurse */
+ if (eclauseset->indexclauses[thiscol] == NIL)
+ {
+ Assert(thiscol < lastcol);
+ expand_eclass_clause_combinations(root, rel, index,
+ thiscol + 1, lastcol,
+ clauseset, eclauseset,
+ bitindexpaths);
+ return;
+ }
+
+ /* We'll momentarily save and restore the list of non-eclass clauses */
+ save_clauses = clauseset->indexclauses[thiscol];
+
+ /* If we have non-eclass clauses for this column, first try with those */
+ if (save_clauses)
+ expand_eclass_clause_combinations(root, rel, index,
+ thiscol + 1, lastcol,
+ clauseset, eclauseset,
+ bitindexpaths);
+
+ /* For each eclass clause alternative ... */
+ foreach(lc, eclauseset->indexclauses[thiscol])
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+
+ /* Replace any existing clauses with the eclass clause */
+ clauseset->indexclauses[thiscol] = list_make1(rinfo);
+
+ /* Recurse to advance to next column */
+ expand_eclass_clause_combinations(root, rel, index,
+ thiscol + 1, lastcol,
+ clauseset, eclauseset,
+ bitindexpaths);
+ }
+
+ /* Restore previous list contents */
+ clauseset->indexclauses[thiscol] = save_clauses;
+}
+
+
+/*
+ * get_index_paths
+ * Given an index and a set of index clauses for it, construct IndexPaths.
+ *
+ * Plain indexpaths are sent directly to add_path, while potential
+ * bitmap indexpaths are added to *bitindexpaths for later processing.
+ *
+ * This is a fairly simple frontend to build_index_paths(). Its reason for
+ * existence is mainly to handle ScalarArrayOpExpr quals properly. If the
+ * index AM supports them natively, we should just include them in simple
+ * index paths. If not, we should exclude them while building simple index
+ * paths, and then make a separate attempt to include them in bitmap paths.
+ */
+static void
+get_index_paths(PlannerInfo *root, RelOptInfo *rel,
+ IndexOptInfo *index, IndexClauseSet *clauses,
+ List **bitindexpaths)
+{
+ List *indexpaths;
+ ListCell *lc;
+
+ /*
+ * Build simple index paths using the clauses. Allow ScalarArrayOpExpr
+ * clauses only if the index AM supports them natively.
+ */
+ indexpaths = build_index_paths(root, rel,
+ index, clauses,
+ index->predOK,
+ SAOP_PER_AM, ST_ANYSCAN);
/*
* Submit all the ones that can form plain IndexScan plans to add_path.
- * (A plain IndexPath might represent either a plain IndexScan or an
- * IndexOnlyScan, but for our purposes here the distinction does not
+ * (A plain IndexPath can represent either a plain IndexScan or an
+ * IndexOnlyScan, but for our purposes here that distinction does not
* matter. However, some of the indexes might support only bitmap scans,
- * and those we mustn't submit to add_path here.) Also, pick out the ones
- * that might be useful as bitmap scans. For that, we must discard
- * indexes that don't support bitmap scans, and we also are only
- * interested in paths that have some selectivity; we should discard
- * anything that was generated solely for ordering purposes.
+ * and those we mustn't submit to add_path here.)
+ *
+ * Also, pick out the ones that are usable as bitmap scans. For that,
+ * we must discard indexes that don't support bitmap scans, and we
+ * also are only interested in paths that have some selectivity; we
+ * should discard anything that was generated solely for ordering
+ * purposes.
*/
- bitindexpaths = NIL;
- foreach(l, indexpaths)
+ foreach(lc, indexpaths)
{
- IndexPath *ipath = (IndexPath *) lfirst(l);
+ IndexPath *ipath = (IndexPath *) lfirst(lc);
- if (ipath->indexinfo->amhasgettuple)
+ if (index->amhasgettuple)
add_path(rel, (Path *) ipath);
- if (ipath->indexinfo->amhasgetbitmap &&
+ if (index->amhasgetbitmap &&
(ipath->path.pathkeys == NIL ||
ipath->indexselectivity < 1.0))
- bitindexpaths = lappend(bitindexpaths, ipath);
+ *bitindexpaths = lappend(*bitindexpaths, ipath);
}
/*
- * Generate BitmapOrPaths for any suitable OR-clauses present in the
- * restriction list. Add these to bitindexpaths.
+ * If the index doesn't handle ScalarArrayOpExpr clauses natively,
+ * check to see if there are any such clauses, and if so generate
+ * bitmap scan paths relying on executor-managed ScalarArrayOpExpr.
*/
- indexpaths = generate_bitmap_or_paths(root, rel,
- rel->baserestrictinfo, NIL,
- NULL);
- bitindexpaths = list_concat(bitindexpaths, indexpaths);
+ if (!index->amsearcharray)
+ {
+ indexpaths = build_index_paths(root, rel,
+ index, clauses,
+ false,
+ SAOP_REQUIRE, ST_BITMAPSCAN);
+ *bitindexpaths = list_concat(*bitindexpaths, indexpaths);
+ }
+}
+
+/*
+ * build_index_paths
+ * Given an index and a set of index clauses for it, construct zero
+ * or more IndexPaths.
+ *
+ * We return a list of paths because (1) this routine checks some cases
+ * that should cause us to not generate any IndexPath, and (2) in some
+ * cases we want to consider both a forward and a backward scan, so as
+ * to obtain both sort orders. Note that the paths are just returned
+ * to the caller and not immediately fed to add_path().
+ *
+ * At top level, useful_predicate should be exactly the index's predOK flag
+ * (ie, true if it has a predicate that was proven from the restriction
+ * clauses). When working on an arm of an OR clause, useful_predicate
+ * should be true if the predicate required the current OR list to be proven.
+ * Note that this routine should never be called at all if the index has an
+ * unprovable predicate.
+ *
+ * saop_control indicates whether ScalarArrayOpExpr clauses can be used.
+ * When it's SAOP_REQUIRE, index paths are created only if we found at least
+ * one ScalarArrayOpExpr clause.
+ *
+ * scantype indicates whether we want to create plain indexscans, bitmap
+ * indexscans, or both. When it's ST_BITMAPSCAN, we will not consider
+ * index ordering while deciding if a Path is worth generating.
+ *
+ * 'rel' is the index's heap relation
+ * 'index' is the index for which we want to generate paths
+ * 'clauses' is the collection of indexable clauses (RestrictInfo nodes)
+ * 'useful_predicate' indicates whether the index has a useful predicate
+ * 'saop_control' indicates whether ScalarArrayOpExpr clauses can be used
+ * 'scantype' indicates whether we need plain or bitmap scan support
+ */
+static List *
+build_index_paths(PlannerInfo *root, RelOptInfo *rel,
+ IndexOptInfo *index, IndexClauseSet *clauses,
+ bool useful_predicate,
+ SaOpControl saop_control, ScanTypeControl scantype)
+{
+ List *result = NIL;
+ IndexPath *ipath;
+ List *index_clauses;
+ List *clause_columns;
+ Relids outer_relids;
+ double loop_count;
+ List *orderbyclauses;
+ List *orderbyclausecols;
+ List *index_pathkeys;
+ List *useful_pathkeys;
+ bool found_clause;
+ bool pathkeys_possibly_useful;
+ bool index_is_ordered;
+ bool index_only_scan;
+ int indexcol;
/*
- * Likewise, generate paths using executor-managed ScalarArrayOpExpr
- * clauses; these can't be simple indexscans but they can be used in
- * bitmap scans.
+ * Check that index supports the desired scan type(s)
*/
- indexpaths = find_saop_paths(root, rel,
- rel->baserestrictinfo, NIL,
- true, NULL);
- bitindexpaths = list_concat(bitindexpaths, indexpaths);
+ switch (scantype)
+ {
+ case ST_INDEXSCAN:
+ if (!index->amhasgettuple)
+ return NIL;
+ break;
+ case ST_BITMAPSCAN:
+ if (!index->amhasgetbitmap)
+ return NIL;
+ break;
+ case ST_ANYSCAN:
+ /* either or both are OK */
+ break;
+ }
/*
- * If we found anything usable, generate a BitmapHeapPath for the most
- * promising combination of bitmap index paths.
+ * 1. Collect the index clauses into a single list.
+ *
+ * We build a list of RestrictInfo nodes for clauses to be used with
+ * this index, along with an integer list of the index column numbers
+ * (zero based) that each clause should be used with. The clauses are
+ * ordered by index key, so that the column numbers form a nondecreasing
+ * sequence. (This order is depended on by btree and possibly other
+ * places.) The lists can be empty, if the index AM allows that.
+ *
+ * found_clause is set true only if there's at least one index clause;
+ * and if saop_control is SAOP_REQUIRE, it has to be a ScalarArrayOpExpr
+ * clause.
+ *
+ * We also build a Relids set showing which outer rels are required
+ * by the selected clauses.
*/
- if (bitindexpaths != NIL)
+ index_clauses = NIL;
+ clause_columns = NIL;
+ found_clause = false;
+ outer_relids = NULL;
+ for (indexcol = 0; indexcol < index->ncolumns; indexcol++)
{
- Path *bitmapqual;
- BitmapHeapPath *bpath;
+ ListCell *lc;
- bitmapqual = choose_bitmap_and(root, rel, bitindexpaths, NULL);
- bpath = create_bitmap_heap_path(root, rel, bitmapqual, NULL);
- add_path(rel, (Path *) bpath);
+ foreach(lc, clauses->indexclauses[indexcol])
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+
+ if (IsA(rinfo->clause, ScalarArrayOpExpr))
+ {
+ /* Ignore if not supported by index */
+ if (saop_control == SAOP_PER_AM && !index->amsearcharray)
+ continue;
+ found_clause = true;
+ }
+ else
+ {
+ if (saop_control != SAOP_REQUIRE)
+ found_clause = true;
+ }
+ index_clauses = lappend(index_clauses, rinfo);
+ clause_columns = lappend_int(clause_columns, indexcol);
+ outer_relids = bms_add_members(outer_relids,
+ rinfo->clause_relids);
+ }
+
+ /*
+ * If no clauses match the first index column, check for amoptionalkey
+ * restriction. We can't generate a scan over an index with
+ * amoptionalkey = false unless there's at least one index clause.
+ * (When working on columns after the first, this test cannot fail.
+ * It is always okay for columns after the first to not have any
+ * clauses.)
+ */
+ if (index_clauses == NIL && !index->amoptionalkey)
+ return NIL;
+ }
+
+ /* We do not want the index's rel itself listed in outer_relids */
+ outer_relids = bms_del_member(outer_relids, rel->relid);
+ /* Enforce convention that outer_relids is exactly NULL if empty */
+ if (bms_is_empty(outer_relids))
+ outer_relids = NULL;
+
+ /* Compute loop_count for cost estimation purposes */
+ loop_count = get_loop_count(root, outer_relids);
+
+ /*
+ * 2. Compute pathkeys describing index's ordering, if any, then see how
+ * many of them are actually useful for this query. This is not relevant
+ * if we are only trying to build bitmap indexscans.
+ */
+ pathkeys_possibly_useful = (scantype != ST_BITMAPSCAN &&
+ has_useful_pathkeys(root, rel));
+ index_is_ordered = (index->sortopfamily != NULL);
+ if (index_is_ordered && pathkeys_possibly_useful)
+ {
+ index_pathkeys = build_index_pathkeys(root, index,
+ ForwardScanDirection);
+ useful_pathkeys = truncate_useless_pathkeys(root, rel,
+ index_pathkeys);
+ orderbyclauses = NIL;
+ orderbyclausecols = NIL;
+ }
+ else if (index->amcanorderbyop && pathkeys_possibly_useful)
+ {
+ /* see if we can generate ordering operators for query_pathkeys */
+ match_pathkeys_to_index(index, root->query_pathkeys,
+ &orderbyclauses,
+ &orderbyclausecols);
+ if (orderbyclauses)
+ useful_pathkeys = root->query_pathkeys;
+ else
+ useful_pathkeys = NIL;
+ }
+ else
+ {
+ useful_pathkeys = NIL;
+ orderbyclauses = NIL;
+ orderbyclausecols = NIL;
+ }
+
+ /*
+ * 3. Check if an index-only scan is possible. If we're not building
+ * plain indexscans, this isn't relevant since bitmap scans don't support
+ * index data retrieval anyway.
+ */
+ index_only_scan = (scantype != ST_BITMAPSCAN &&
+ check_index_only(rel, index));
+
+ /*
+ * 4. Generate an indexscan path if there are relevant restriction clauses
+ * in the current clauses, OR the index ordering is potentially useful for
+ * later merging or final output ordering, OR the index has a useful
+ * predicate, OR an index-only scan is possible.
+ */
+ if (found_clause || useful_pathkeys != NIL || useful_predicate ||
+ index_only_scan)
+ {
+ ipath = create_index_path(root, index,
+ index_clauses,
+ clause_columns,
+ orderbyclauses,
+ orderbyclausecols,
+ useful_pathkeys,
+ index_is_ordered ?
+ ForwardScanDirection :
+ NoMovementScanDirection,
+ index_only_scan,
+ outer_relids,
+ loop_count);
+ result = lappend(result, ipath);
+ }
+
+ /*
+ * 5. If the index is ordered, a backwards scan might be interesting.
+ */
+ if (index_is_ordered && pathkeys_possibly_useful)
+ {
+ index_pathkeys = build_index_pathkeys(root, index,
+ BackwardScanDirection);
+ useful_pathkeys = truncate_useless_pathkeys(root, rel,
+ index_pathkeys);
+ if (useful_pathkeys != NIL)
+ {
+ ipath = create_index_path(root, index,
+ index_clauses,
+ clause_columns,
+ NIL,
+ NIL,
+ useful_pathkeys,
+ BackwardScanDirection,
+ index_only_scan,
+ outer_relids,
+ loop_count);
+ result = lappend(result, ipath);
+ }
}
-}
+ return result;
+}
-/*----------
- * find_usable_indexes
- * Given a list of restriction clauses, find all the potentially usable
- * indexes for the given relation, and return a list of IndexPaths.
+/*
+ * build_paths_for_OR
+ * Given a list of restriction clauses from one arm of an OR clause,
+ * construct all matching IndexPaths for the relation.
+ *
+ * Here we must scan all indexes of the relation, since a bitmap OR tree
+ * can use multiple indexes.
*
* The caller actually supplies two lists of restriction clauses: some
- * "current" ones and some "outer" ones. Both lists can be used freely
+ * "current" ones and some "other" ones. Both lists can be used freely
* to match keys of the index, but an index must use at least one of the
* "current" clauses to be considered usable. The motivation for this is
* examples like
* When dealing with a partial index, a match of the index predicate to
* one of the "current" clauses also makes the index usable.
*
- * If istoplevel is true (indicating we are considering the top level of a
- * rel's restriction clauses), we will include indexes in the result that
- * have an interesting sort order, even if they have no matching restriction
- * clauses.
- *
* 'rel' is the relation for which we want to generate index paths
* 'clauses' is the current list of clauses (RestrictInfo nodes)
- * 'outer_clauses' is the list of additional upper-level clauses
- * 'istoplevel' is true if clauses are the rel's top-level restriction list
- * (outer_clauses must be NIL when this is true)
- * 'outer_rel' is the outer side of the join if forming an inner indexscan
- * (so some of the given clauses are join clauses); NULL if not
- * 'saop_control' indicates whether ScalarArrayOpExpr clauses can be used
- * 'scantype' indicates whether we need plain or bitmap scan support
- *
- * Note: check_partial_indexes() must have been run previously.
- *----------
+ * 'other_clauses' is the list of additional upper-level clauses
*/
static List *
-find_usable_indexes(PlannerInfo *root, RelOptInfo *rel,
- List *clauses, List *outer_clauses,
- bool istoplevel, RelOptInfo *outer_rel,
- SaOpControl saop_control, ScanTypeControl scantype)
+build_paths_for_OR(PlannerInfo *root, RelOptInfo *rel,
+ List *clauses, List *other_clauses)
{
- Relids outer_relids = outer_rel ? outer_rel->relids : NULL;
- bool possibly_useful_pathkeys = has_useful_pathkeys(root, rel);
List *result = NIL;
List *all_clauses = NIL; /* not computed till needed */
- ListCell *ilist;
+ ListCell *lc;
- foreach(ilist, rel->indexlist)
+ foreach(lc, rel->indexlist)
{
- IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist);
- IndexPath *ipath;
- List *restrictclauses;
- List *restrictclausecols;
- List *orderbyclauses;
- List *orderbyclausecols;
- List *index_pathkeys;
- List *useful_pathkeys;
+ IndexOptInfo *index = (IndexOptInfo *) lfirst(lc);
+ IndexClauseSet clauseset;
+ List *indexpaths;
bool useful_predicate;
- bool found_clause;
- bool index_is_ordered;
- bool index_only_scan;
- /*
- * Check that index supports the desired scan type(s)
- */
- switch (scantype)
- {
- case ST_INDEXSCAN:
- if (!index->amhasgettuple)
- continue;
- break;
- case ST_BITMAPSCAN:
- if (!index->amhasgetbitmap)
- continue;
- break;
- case ST_ANYSCAN:
- /* either or both are OK */
- break;
- }
-
- /*
- * If we're doing find_saop_paths(), we can skip indexes that support
- * ScalarArrayOpExpr natively. We already generated all the potential
- * indexpaths for them, so no need to do anything more.
- */
- if (saop_control == SAOP_REQUIRE && index->amsearcharray)
+ /* Ignore index if it doesn't support bitmap scans */
+ if (!index->amhasgetbitmap)
continue;
/*
* Ignore partial indexes that do not match the query. If a partial
- * index is marked predOK then we know it's OK; otherwise, if we are
- * at top level we know it's not OK (since predOK is exactly whether
- * its predicate could be proven from the toplevel clauses).
- * Otherwise, we have to test whether the added clauses are sufficient
- * to imply the predicate. If so, we could use the index in the
- * current context.
+ * index is marked predOK then we know it's OK. Otherwise, we have
+ * to test whether the added clauses are sufficient to imply the
+ * predicate. If so, we can use the index in the current context.
*
* We set useful_predicate to true iff the predicate was proven using
* the current set of clauses. This is needed to prevent matching a
{
if (index->predOK)
{
- if (istoplevel)
- {
- /* we know predicate was proven from these clauses */
- useful_predicate = true;
- }
+ /* Usable, but don't set useful_predicate */
}
else
{
- if (istoplevel)
- continue; /* no point in trying to prove it */
-
/* Form all_clauses if not done already */
if (all_clauses == NIL)
all_clauses = list_concat(list_copy(clauses),
- outer_clauses);
+ other_clauses);
if (!predicate_implied_by(index->indpred, all_clauses))
continue; /* can't use it at all */
- if (!predicate_implied_by(index->indpred, outer_clauses))
+ if (!predicate_implied_by(index->indpred, other_clauses))
useful_predicate = true;
}
}
/*
- * 1. Match the index against the available restriction clauses.
- * found_clause is set true only if at least one of the current
- * clauses was used (and, if saop_control is SAOP_REQUIRE, it has to
- * have been a ScalarArrayOpExpr clause).
+ * Identify the restriction clauses that can match the index.
*/
- match_clauses_to_index(index,
- clauses,
- outer_clauses,
- outer_relids,
- saop_control,
- &restrictclauses,
- &restrictclausecols,
- &found_clause);
+ MemSet(&clauseset, 0, sizeof(clauseset));
+ match_clauses_to_index(index, clauses, &clauseset);
/*
- * Not all index AMs support scans with no restriction clauses. We
- * can't generate a scan over an index with amoptionalkey = false
- * unless there's at least one restriction clause.
+ * If no matches so far, and the index predicate isn't useful,
+ * we don't want it.
*/
- if (restrictclauses == NIL && !index->amoptionalkey)
+ if (!clauseset.nonempty && !useful_predicate)
continue;
/*
- * 2. Compute pathkeys describing index's ordering, if any, then see
- * how many of them are actually useful for this query. This is not
- * relevant unless we are at top level.
+ * Add "other" restriction clauses to the clauseset.
*/
- index_is_ordered = (index->sortopfamily != NULL);
- if (index_is_ordered && possibly_useful_pathkeys &&
- istoplevel && outer_rel == NULL)
- {
- index_pathkeys = build_index_pathkeys(root, index,
- ForwardScanDirection);
- useful_pathkeys = truncate_useless_pathkeys(root, rel,
- index_pathkeys);
- orderbyclauses = NIL;
- orderbyclausecols = NIL;
- }
- else if (index->amcanorderbyop && possibly_useful_pathkeys &&
- istoplevel && outer_rel == NULL && scantype != ST_BITMAPSCAN)
- {
- /* see if we can generate ordering operators for query_pathkeys */
- match_pathkeys_to_index(index, root->query_pathkeys,
- &orderbyclauses,
- &orderbyclausecols);
- if (orderbyclauses)
- useful_pathkeys = root->query_pathkeys;
- else
- useful_pathkeys = NIL;
- }
- else
- {
- useful_pathkeys = NIL;
- orderbyclauses = NIL;
- orderbyclausecols = NIL;
- }
-
- /*
- * 3. Check if an index-only scan is possible.
- */
- index_only_scan = check_index_only(rel, index);
-
- /*
- * 4. Generate an indexscan path if there are relevant restriction
- * clauses in the current clauses, OR the index ordering is
- * potentially useful for later merging or final output ordering, OR
- * the index has a predicate that was proven by the current clauses,
- * OR an index-only scan is possible.
- */
- if (found_clause || useful_pathkeys != NIL || useful_predicate ||
- index_only_scan)
- {
- ipath = create_index_path(root, index,
- restrictclauses,
- restrictclausecols,
- orderbyclauses,
- orderbyclausecols,
- useful_pathkeys,
- index_is_ordered ?
- ForwardScanDirection :
- NoMovementScanDirection,
- index_only_scan,
- outer_rel);
- result = lappend(result, ipath);
- }
+ match_clauses_to_index(index, other_clauses, &clauseset);
/*
- * 5. If the index is ordered, a backwards scan might be interesting.
- * Again, this is only interesting at top level.
+ * Construct paths if possible.
*/
- if (index_is_ordered && possibly_useful_pathkeys &&
- istoplevel && outer_rel == NULL)
- {
- index_pathkeys = build_index_pathkeys(root, index,
- BackwardScanDirection);
- useful_pathkeys = truncate_useless_pathkeys(root, rel,
- index_pathkeys);
- if (useful_pathkeys != NIL)
- {
- ipath = create_index_path(root, index,
- restrictclauses,
- restrictclausecols,
- NIL,
- NIL,
- useful_pathkeys,
- BackwardScanDirection,
- index_only_scan,
- outer_rel);
- result = lappend(result, ipath);
- }
- }
+ indexpaths = build_index_paths(root, rel,
+ index, &clauseset,
+ useful_predicate,
+ SAOP_ALLOW, ST_BITMAPSCAN);
+ result = list_concat(result, indexpaths);
}
return result;
}
-
-/*
- * find_saop_paths
- * Find all the potential indexpaths that make use of executor-managed
- * ScalarArrayOpExpr clauses. The executor only supports these in bitmap
- * scans, not plain indexscans, so we need to segregate them from the
- * normal case. Otherwise, same API as find_usable_indexes().
- * Returns a list of IndexPaths.
- */
-static List *
-find_saop_paths(PlannerInfo *root, RelOptInfo *rel,
- List *clauses, List *outer_clauses,
- bool istoplevel, RelOptInfo *outer_rel)
-{
- bool have_saop = false;
- ListCell *l;
-
- /*
- * Since find_usable_indexes is relatively expensive, don't bother to run
- * it unless there are some top-level ScalarArrayOpExpr clauses.
- */
- foreach(l, clauses)
- {
- RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
-
- Assert(IsA(rinfo, RestrictInfo));
- if (IsA(rinfo->clause, ScalarArrayOpExpr))
- {
- have_saop = true;
- break;
- }
- }
- if (!have_saop)
- return NIL;
-
- return find_usable_indexes(root, rel,
- clauses, outer_clauses,
- istoplevel, outer_rel,
- SAOP_REQUIRE, ST_BITMAPSCAN);
-}
-
-
/*
* generate_bitmap_or_paths
* Look through the list of clauses to find OR clauses, and generate
* a BitmapOrPath for each one we can handle that way. Return a list
* of the generated BitmapOrPaths.
*
- * outer_clauses is a list of additional clauses that can be assumed true
+ * other_clauses is a list of additional clauses that can be assumed true
* for the purpose of generating indexquals, but are not to be searched for
- * ORs. (See find_usable_indexes() for motivation.) outer_rel is the outer
- * side when we are considering a nestloop inner indexpath.
+ * ORs. (See build_paths_for_OR() for motivation.)
+ *
+ * If restriction_only is true, ignore OR elements that are join clauses.
+ * When using this feature it is caller's responsibility that neither clauses
+ * nor other_clauses contain any join clauses that are not ORs, as we do not
+ * re-filter those lists.
*/
List *
generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel,
- List *clauses, List *outer_clauses,
- RelOptInfo *outer_rel)
+ List *clauses, List *other_clauses,
+ bool restriction_only)
{
List *result = NIL;
List *all_clauses;
- ListCell *l;
+ ListCell *lc;
/*
- * We can use both the current and outer clauses as context for
- * find_usable_indexes
+ * We can use both the current and other clauses as context for
+ * build_paths_for_OR; no need to remove ORs from the lists.
*/
- all_clauses = list_concat(list_copy(clauses), outer_clauses);
+ all_clauses = list_concat(list_copy(clauses), other_clauses);
- foreach(l, clauses)
+ foreach(lc, clauses)
{
- RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
List *pathlist;
Path *bitmapqual;
ListCell *j;
{
List *andargs = ((BoolExpr *) orarg)->args;
- indlist = find_usable_indexes(root, rel,
- andargs,
- all_clauses,
- false,
- outer_rel,
- SAOP_ALLOW,
- ST_BITMAPSCAN);
+ if (restriction_only)
+ andargs = drop_indexable_join_clauses(rel, andargs);
+
+ indlist = build_paths_for_OR(root, rel,
+ andargs,
+ all_clauses);
+
/* Recurse in case there are sub-ORs */
indlist = list_concat(indlist,
generate_bitmap_or_paths(root, rel,
andargs,
all_clauses,
- outer_rel));
+ restriction_only));
}
else
{
+ List *orargs;
+
Assert(IsA(orarg, RestrictInfo));
Assert(!restriction_is_or_clause((RestrictInfo *) orarg));
- indlist = find_usable_indexes(root, rel,
- list_make1(orarg),
- all_clauses,
- false,
- outer_rel,
- SAOP_ALLOW,
- ST_BITMAPSCAN);
+ orargs = list_make1(orarg);
+
+ if (restriction_only)
+ orargs = drop_indexable_join_clauses(rel, orargs);
+
+ indlist = build_paths_for_OR(root, rel,
+ orargs,
+ all_clauses);
}
/*
* OK, pick the most promising AND combination, and add it to
* pathlist.
*/
- bitmapqual = choose_bitmap_and(root, rel, indlist, outer_rel);
+ bitmapqual = choose_bitmap_and(root, rel, indlist);
pathlist = lappend(pathlist, bitmapqual);
}
return result;
}
+/*
+ * drop_indexable_join_clauses
+ * Remove any indexable join clauses from the list.
+ *
+ * This is a helper for generate_bitmap_or_paths(). We leave OR clauses
+ * in the list whether they are joins or not, since we might be able to
+ * extract a restriction item from an OR list. It's safe to leave such
+ * clauses in the list because match_clauses_to_index() will ignore them,
+ * so there's no harm in passing such clauses to build_paths_for_OR().
+ */
+static List *
+drop_indexable_join_clauses(RelOptInfo *rel, List *clauses)
+{
+ List *result = NIL;
+ ListCell *lc;
+
+ foreach(lc, clauses)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+
+ Assert(IsA(rinfo, RestrictInfo));
+ if (restriction_is_or_clause(rinfo) ||
+ bms_is_subset(rinfo->clause_relids, rel->relids))
+ result = lappend(result, rinfo);
+ }
+ return result;
+}
+
/*
* choose_bitmap_and
* combining multiple inputs.
*/
static Path *
-choose_bitmap_and(PlannerInfo *root, RelOptInfo *rel,
- List *paths, RelOptInfo *outer_rel)
+choose_bitmap_and(PlannerInfo *root, RelOptInfo *rel, List *paths)
{
int npaths = list_length(paths);
PathClauseUsage **pathinfoarray;
* reduces the total cost. Perhaps someday that code will be smarter and
* we can remove this limitation. (But note that this also defends
* against flat-out duplicate input paths, which can happen because
- * best_inner_indexscan will find the same OR join clauses that
+ * match_join_clauses_to_index will find the same OR join clauses that
* create_or_index_quals has pulled OR restriction clauses out of.)
*
* For the same reason, we reject AND combinations in which an index
pathinfo = pathinfoarray[i];
paths = list_make1(pathinfo->path);
- costsofar = bitmap_scan_cost_est(root, rel, pathinfo->path, outer_rel);
+ costsofar = bitmap_scan_cost_est(root, rel, pathinfo->path);
qualsofar = list_concat(list_copy(pathinfo->quals),
list_copy(pathinfo->preds));
clauseidsofar = bms_copy(pathinfo->clauseids);
}
/* tentatively add new path to paths, so we can estimate cost */
paths = lappend(paths, pathinfo->path);
- newcost = bitmap_and_cost_est(root, rel, paths, outer_rel);
+ newcost = bitmap_and_cost_est(root, rel, paths);
if (newcost < costsofar)
{
/* keep new path in paths, update subsidiary variables */
* index path (no BitmapAnd, at least not at this level).
*/
static Cost
-bitmap_scan_cost_est(PlannerInfo *root, RelOptInfo *rel,
- Path *ipath, RelOptInfo *outer_rel)
+bitmap_scan_cost_est(PlannerInfo *root, RelOptInfo *rel, Path *ipath)
{
- Path bpath;
+ BitmapHeapPath bpath;
+
+ /* Set up a dummy BitmapHeapPath */
+ bpath.path.type = T_BitmapHeapPath;
+ bpath.path.pathtype = T_BitmapHeapScan;
+ bpath.path.parent = rel;
+ bpath.path.pathkeys = NIL;
+ bpath.path.required_outer = ipath->required_outer;
+ bpath.path.param_clauses = ipath->param_clauses;
+ bpath.bitmapqual = ipath;
- cost_bitmap_heap_scan(&bpath, root, rel, ipath, outer_rel);
+ cost_bitmap_heap_scan((Path *) &bpath, root, rel, ipath,
+ get_loop_count(root, bpath.path.required_outer));
- return bpath.total_cost;
+ return bpath.path.total_cost;
}
/*
* inputs.
*/
static Cost
-bitmap_and_cost_est(PlannerInfo *root, RelOptInfo *rel,
- List *paths, RelOptInfo *outer_rel)
+bitmap_and_cost_est(PlannerInfo *root, RelOptInfo *rel, List *paths)
{
- BitmapAndPath apath;
- Path bpath;
+ BitmapAndPath *apath;
+ BitmapHeapPath bpath;
+
+ /*
+ * Create a temporary BitmapAndPath. (Because it needs realistic
+ * required_outer and param_clauses values, making a dummy one would
+ * take more code than it's worth.)
+ */
+ apath = create_bitmap_and_path(root, rel, paths);
- /* Set up a dummy BitmapAndPath */
- apath.path.type = T_BitmapAndPath;
- apath.path.parent = rel;
- apath.bitmapquals = paths;
- cost_bitmap_and_node(&apath, root);
+ /* Set up a dummy BitmapHeapPath */
+ bpath.path.type = T_BitmapHeapPath;
+ bpath.path.pathtype = T_BitmapHeapScan;
+ bpath.path.parent = rel;
+ bpath.path.pathkeys = NIL;
+ bpath.path.required_outer = apath->path.required_outer;
+ bpath.path.param_clauses = apath->path.param_clauses;
+ bpath.bitmapqual = (Path *) apath;
/* Now we can do cost_bitmap_heap_scan */
- cost_bitmap_heap_scan(&bpath, root, rel, (Path *) &apath, outer_rel);
+ cost_bitmap_heap_scan((Path *) &bpath, root, rel, (Path *) apath,
+ get_loop_count(root, bpath.path.required_outer));
- return bpath.total_cost;
+ return bpath.path.total_cost;
}
return result;
}
+/*
+ * get_loop_count
+ * Choose the loop count estimate to use for costing a parameterized path
+ * with the given set of outer relids.
+ *
+ * Since we produce parameterized paths before we've begun to generate join
+ * relations, it's impossible to predict exactly how many times a parameterized
+ * path will be iterated; we don't know the size of the relation that will be
+ * on the outside of the nestloop. However, we should try to account for
+ * multiple iterations somehow in costing the path. The heuristic embodied
+ * here is to use the rowcount of the smallest other base relation needed in
+ * the join clauses used by the path. (We could alternatively consider the
+ * largest one, but that seems too optimistic.) This is of course the right
+ * answer for single-other-relation cases, and it seems like a reasonable
+ * zero-order approximation for multiway-join cases.
+ *
+ * Note: for this to work, allpaths.c must establish all baserel size
+ * estimates before it begins to compute paths, or at least before it
+ * calls create_index_paths().
+ */
+static double
+get_loop_count(PlannerInfo *root, Relids outer_relids)
+{
+ double result = 1.0;
+
+ /* For a non-parameterized path, just return 1.0 quickly */
+ if (outer_relids != NULL)
+ {
+ int relid;
+
+ /* Need a working copy since bms_first_member is destructive */
+ outer_relids = bms_copy(outer_relids);
+ while ((relid = bms_first_member(outer_relids)) >= 0)
+ {
+ RelOptInfo *outer_rel;
+
+ /* Paranoia: ignore bogus relid indexes */
+ if (relid >= root->simple_rel_array_size)
+ continue;
+ outer_rel = root->simple_rel_array[relid];
+ if (outer_rel == NULL)
+ continue;
+ Assert(outer_rel->relid == relid); /* sanity check on array */
+
+ /* Other relation could be proven empty, if so ignore */
+ if (IS_DUMMY_REL(outer_rel))
+ continue;
+
+ /* Otherwise, rel's rows estimate should be valid by now */
+ Assert(outer_rel->rows > 0);
+
+ /* Remember smallest row count estimate among the outer rels */
+ if (result == 1.0 || result > outer_rel->rows)
+ result = outer_rel->rows;
+ }
+ bms_free(outer_relids);
+ }
+ return result;
+}
+
+
+/****************************************************************************
+ * ---- ROUTINES TO CHECK QUERY CLAUSES ----
+ ****************************************************************************/
+
+/*
+ * match_restriction_clauses_to_index
+ * Identify restriction clauses for the rel that match the index.
+ * Matching clauses are added to *clauseset.
+ */
+static void
+match_restriction_clauses_to_index(RelOptInfo *rel, IndexOptInfo *index,
+ IndexClauseSet *clauseset)
+{
+ match_clauses_to_index(index, rel->baserestrictinfo, clauseset);
+}
+
+/*
+ * match_join_clauses_to_index
+ * Identify join clauses for the rel that match the index.
+ * Matching clauses are added to *clauseset.
+ * Also, add any potentially usable join OR clauses to *joinorclauses.
+ */
+static void
+match_join_clauses_to_index(PlannerInfo *root,
+ RelOptInfo *rel, IndexOptInfo *index,
+ IndexClauseSet *clauseset,
+ List **joinorclauses)
+{
+ Relids inner_baserels;
+ ListCell *lc;
+
+ /*
+ * There is no value in considering join clauses for outer joins in which
+ * the indexed relation is on the outside, since there'd be no way to
+ * perform such a join with a parameterized nestloop. So first, identify
+ * all baserels that are on the inside of such joins.
+ */
+ inner_baserels = NULL;
+ foreach(lc, root->join_info_list)
+ {
+ SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
+
+ if (bms_overlap(rel->relids, sjinfo->min_lefthand))
+ inner_baserels = bms_add_members(inner_baserels,
+ sjinfo->min_righthand);
+ /* full joins constrain both sides symmetrically */
+ if (sjinfo->jointype == JOIN_FULL &&
+ bms_overlap(rel->relids, sjinfo->min_righthand))
+ inner_baserels = bms_add_members(inner_baserels,
+ sjinfo->min_lefthand);
+ }
+
+ /* Now scan the rel's join clauses */
+ foreach(lc, rel->joininfo)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+
+ /* Ignore if it mentions anything from wrong side of an outer join */
+ if (bms_overlap(rinfo->clause_relids, inner_baserels))
+ continue;
+
+ /*
+ * Note that we ignore required_relids; that's okay because we are
+ * intentionally ignoring the normal rules for placing evaluation of
+ * join clauses. The whole point here is to evaluate join clauses
+ * below their join, even if they would normally be delayed by
+ * outer join rules.
+ *
+ * Instead of considering required_relids, we ignore clauses for which
+ * any referenced rel is in nullable_relids; that means there's an
+ * outer join below the clause and so it can't be checked at the
+ * relation scan level.
+ *
+ * Note: unlike create_or_index_quals(), we can accept clauses that
+ * are marked !is_pushed_down (ie they are themselves outer-join
+ * clauses). This is OK because any path generated with these clauses
+ * could only be used in the inside of a nestloop join, which will be
+ * the nullable side.
+ */
+ if (bms_overlap(rinfo->clause_relids, rinfo->nullable_relids))
+ continue;
+
+ /* Potentially usable, so see if it matches the index or is an OR */
+ if (restriction_is_or_clause(rinfo))
+ *joinorclauses = lappend(*joinorclauses, rinfo);
+ else
+ match_clause_to_index(index, rinfo, clauseset);
+ }
+}
+
+/*
+ * match_eclass_clauses_to_index
+ * Identify EquivalenceClass join clauses for the rel that match the index.
+ * Matching clauses are added to *clauseset.
+ */
+static void
+match_eclass_clauses_to_index(PlannerInfo *root, IndexOptInfo *index,
+ IndexClauseSet *clauseset)
+{
+ int indexcol;
+
+ /* No work if rel is not in any such ECs */
+ if (!index->rel->has_eclass_joins)
+ return;
+
+ for (indexcol = 0; indexcol < index->ncolumns; indexcol++)
+ {
+ List *clauses;
-/****************************************************************************
- * ---- ROUTINES TO CHECK RESTRICTIONS ----
- ****************************************************************************/
+ clauses = generate_implied_equalities_for_indexcol(root,
+ index,
+ indexcol);
+ /*
+ * We have to check whether the results actually do match the index,
+ * since for non-btree indexes the EC's equality operators might not
+ * be in the index opclass (cf eclass_member_matches_indexcol).
+ */
+ match_clauses_to_index(index, clauses, clauseset);
+ }
+}
/*
* match_clauses_to_index
- * Find restriction clauses that can be used with an index.
- *
- * Returns a list of RestrictInfo nodes for clauses that can be used with
- * this index, along with an integer list of the index column numbers
- * (zero based) that each clause would be used with. The clauses are
- * ordered by index key, so that the column numbers form a nondecreasing
- * sequence. (This order is depended on by btree and possibly other places.)
- * NIL lists are returned if there are no matching clauses.
- *
- * We can use clauses from either the current clauses or outer_clauses lists,
- * but *found_clause is set TRUE only if we used at least one clause from
- * the "current clauses" list. See find_usable_indexes() for motivation.
- *
- * outer_relids determines what Vars will be allowed on the other side
- * of a possible index qual; see match_clause_to_indexcol().
- *
- * 'saop_control' indicates whether ScalarArrayOpExpr clauses can be used.
- * When it's SAOP_REQUIRE, *found_clause is set TRUE only if we used at least
- * one ScalarArrayOpExpr from the current clauses list.
- *
- * If the index has amoptionalkey = false, we give up and return NIL when
- * there are no restriction clauses matching the first index key. Otherwise,
- * we return NIL only if there are no restriction clauses matching any index
- * key. There could be unused index keys after the first one in any case.
- *
- * Note: in some circumstances we may find the same RestrictInfos coming
- * from multiple places. Defend against redundant outputs by refusing to
- * match an already-used clause (pointer equality should be a good enough
- * check for this). This also keeps us from matching the same clause to
- * multiple columns of a badly-defined index, which is unlikely to be helpful
- * and is likely to give us an inflated idea of the index's selectivity.
+ * Perform match_clause_to_index() for each clause in a list.
+ * Matching clauses are added to *clauseset.
*/
static void
match_clauses_to_index(IndexOptInfo *index,
- List *clauses, List *outer_clauses,
- Relids outer_relids,
- SaOpControl saop_control,
- List **index_clauses_p,
- List **clause_columns_p,
- bool *found_clause)
+ List *clauses,
+ IndexClauseSet *clauseset)
{
- List *index_clauses = NIL;
- List *clause_columns = NIL;
- int indexcol;
-
- *index_clauses_p = NIL; /* set default results */
- *clause_columns_p = NIL;
- *found_clause = false;
-
- if (clauses == NIL && outer_clauses == NIL)
- return; /* cannot succeed */
+ ListCell *lc;
- for (indexcol = 0; indexcol < index->ncolumns; indexcol++)
+ foreach(lc, clauses)
{
- ListCell *l;
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
- /* check the current clauses */
- foreach(l, clauses)
- {
- RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
+ Assert(IsA(rinfo, RestrictInfo));
+ match_clause_to_index(index, rinfo, clauseset);
+ }
+}
- Assert(IsA(rinfo, RestrictInfo));
- if (list_member_ptr(index_clauses, rinfo))
- continue;
- if (match_clause_to_indexcol(index,
- indexcol,
- rinfo,
- outer_relids,
- saop_control))
- {
- index_clauses = lappend(index_clauses, rinfo);
- clause_columns = lappend_int(clause_columns, indexcol);
- if (saop_control != SAOP_REQUIRE ||
- IsA(rinfo->clause, ScalarArrayOpExpr))
- *found_clause = true;
- }
- }
+/*
+ * match_clause_to_index
+ * Test whether a qual clause can be used with an index.
+ *
+ * If the clause is usable, add it to the appropriate list in *clauseset.
+ * *clauseset must be initialized to zeroes before first call.
+ *
+ * Note: in some circumstances we may find the same RestrictInfos coming from
+ * multiple places. Defend against redundant outputs by refusing to add a
+ * clause twice (pointer equality should be a good enough check for this).
+ *
+ * Note: it's possible that a badly-defined index could have multiple matching
+ * columns. We always select the first match if so; this avoids scenarios
+ * wherein we get an inflated idea of the index's selectivity by using the
+ * same clause multiple times with different index columns.
+ */
+static void
+match_clause_to_index(IndexOptInfo *index,
+ RestrictInfo *rinfo,
+ IndexClauseSet *clauseset)
+{
+ int indexcol;
- /* check the outer clauses */
- foreach(l, outer_clauses)
+ for (indexcol = 0; indexcol < index->ncolumns; indexcol++)
+ {
+ if (match_clause_to_indexcol(index,
+ indexcol,
+ rinfo))
{
- RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
-
- Assert(IsA(rinfo, RestrictInfo));
- if (list_member_ptr(index_clauses, rinfo))
- continue;
- if (match_clause_to_indexcol(index,
- indexcol,
- rinfo,
- outer_relids,
- saop_control))
- {
- index_clauses = lappend(index_clauses, rinfo);
- clause_columns = lappend_int(clause_columns, indexcol);
- }
- }
-
- /*
- * If no clauses match this key, check for amoptionalkey restriction.
- */
- if (index_clauses == NIL && !index->amoptionalkey)
+ clauseset->indexclauses[indexcol] =
+ list_append_unique_ptr(clauseset->indexclauses[indexcol],
+ rinfo);
+ clauseset->nonempty = true;
return;
+ }
}
-
- *index_clauses_p = index_clauses;
- *clause_columns_p = clause_columns;
}
-
/*
* match_clause_to_indexcol()
* Determines whether a restriction clause matches a column of an index.
*
- * To match a normal index, the clause:
+ * To match an index normally, the clause:
*
* (1) must be in the form (indexkey op const) or (const op indexkey);
* and
* and
* (3) must match the collation of the index, if collation is relevant.
*
- * Our definition of "const" is pretty liberal: we allow Vars belonging
- * to the caller-specified outer_relids relations (which had better not
- * include the relation whose index is being tested). outer_relids should
- * be NULL when checking simple restriction clauses, and the outer side
- * of the join when building a join inner scan. Other than that, the
- * only thing we don't like is volatile functions.
+ * Our definition of "const" is exceedingly liberal: we allow anything that
+ * doesn't involve a volatile function or a Var of the index's relation.
+ * In particular, Vars belonging to other relations of the query are
+ * accepted here, since a clause of that form can be used in a
+ * parameterized indexscan. It's the responsibility of higher code levels
+ * to manage restriction and join clauses appropriately.
*
- * Note: in most cases we already know that the clause as a whole uses
- * vars from the interesting set of relations. The reason for the
- * outer_relids test is to reject clauses like (a.f1 OP (b.f2 OP a.f3));
- * that's not processable by an indexscan nestloop join on A, whereas
- * (a.f1 OP (b.f2 OP c.f3)) is.
+ * Note: we do need to check for Vars of the index's relation on the
+ * "const" side of the clause, since clauses like (a.f1 OP (b.f2 OP a.f3))
+ * are not processable by a parameterized indexscan on a.f1, whereas
+ * something like (a.f1 OP (b.f2 OP c.f3)) is.
*
* Presently, the executor can only deal with indexquals that have the
* indexkey on the left, so we can only use clauses that have the indexkey
* adjust_rowcompare_for_index().
*
* It is also possible to match ScalarArrayOpExpr clauses to indexes, when
- * the clause is of the form "indexkey op ANY (arrayconst)". Since not
- * all indexes handle these natively, and the executor implements them
- * only in the context of bitmap index scans, our caller specifies whether
- * to allow these or not.
+ * the clause is of the form "indexkey op ANY (arrayconst)".
*
* For boolean indexes, it is also possible to match the clause directly
* to the indexkey; or perhaps the clause is (NOT indexkey).
* 'index' is the index of interest.
* 'indexcol' is a column number of 'index' (counting from 0).
* 'rinfo' is the clause to be tested (as a RestrictInfo node).
- * 'outer_relids' lists rels whose Vars can be considered pseudoconstant.
- * 'saop_control' indicates whether ScalarArrayOpExpr clauses can be used.
*
* Returns true if the clause can be used with this index key.
*
static bool
match_clause_to_indexcol(IndexOptInfo *index,
int indexcol,
- RestrictInfo *rinfo,
- Relids outer_relids,
- SaOpControl saop_control)
+ RestrictInfo *rinfo)
{
Expr *clause = rinfo->clause;
+ Index index_relid = index->rel->relid;
Oid opfamily = index->opfamily[indexcol];
Oid idxcollation = index->indexcollations[indexcol];
Node *leftop,
expr_coll = ((OpExpr *) clause)->inputcollid;
plain_op = true;
}
- else if (clause && IsA(clause, ScalarArrayOpExpr) &&
- (index->amsearcharray || saop_control != SAOP_PER_AM))
+ else if (clause && IsA(clause, ScalarArrayOpExpr))
{
ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
{
return match_rowcompare_to_indexcol(index, indexcol,
opfamily, idxcollation,
- (RowCompareExpr *) clause,
- outer_relids);
+ (RowCompareExpr *) clause);
}
else if (index->amsearchnulls && IsA(clause, NullTest))
{
* (constant operator indexkey). See above notes about const-ness.
*/
if (match_index_to_operand(leftop, indexcol, index) &&
- bms_is_subset(right_relids, outer_relids) &&
+ !bms_is_member(index_relid, right_relids) &&
!contain_volatile_functions(rightop))
{
if (IndexCollMatchesExprColl(idxcollation, expr_coll) &&
* is a "special" indexable operator.
*/
if (plain_op &&
- match_special_index_operator(clause, opfamily, idxcollation, true))
+ match_special_index_operator(clause, opfamily,
+ idxcollation, true))
return true;
return false;
}
if (plain_op &&
match_index_to_operand(rightop, indexcol, index) &&
- bms_is_subset(left_relids, outer_relids) &&
+ !bms_is_member(index_relid, left_relids) &&
!contain_volatile_functions(leftop))
{
if (IndexCollMatchesExprColl(idxcollation, expr_coll) &&
* If we didn't find a member of the index's opfamily, see whether it
* is a "special" indexable operator.
*/
- if (match_special_index_operator(clause, opfamily, idxcollation, false))
+ if (match_special_index_operator(clause, opfamily,
+ idxcollation, false))
return true;
return false;
}
int indexcol,
Oid opfamily,
Oid idxcollation,
- RowCompareExpr *clause,
- Relids outer_relids)
+ RowCompareExpr *clause)
{
+ Index index_relid = index->rel->relid;
Node *leftop,
*rightop;
Oid expr_op;
* These syntactic tests are the same as in match_clause_to_indexcol()
*/
if (match_index_to_operand(leftop, indexcol, index) &&
- bms_is_subset(pull_varnos(rightop), outer_relids) &&
+ !bms_is_member(index_relid, pull_varnos(rightop)) &&
!contain_volatile_functions(rightop))
{
/* OK, indexkey is on left */
}
else if (match_index_to_operand(rightop, indexcol, index) &&
- bms_is_subset(pull_varnos(leftop), outer_relids) &&
+ !bms_is_member(index_relid, pull_varnos(leftop)) &&
!contain_volatile_functions(leftop))
{
/* indexkey is on right, so commute the operator */
}
/****************************************************************************
- * ---- ROUTINES TO CHECK JOIN CLAUSES ----
+ * ---- ROUTINES TO CHECK EXTERNALLY-VISIBLE CONDITIONS ----
****************************************************************************/
/*
- * indexable_outerrelids
- * Finds all other relids that participate in any indexable join clause
- * for the specified table. Returns a set of relids.
- */
-static Relids
-indexable_outerrelids(PlannerInfo *root, RelOptInfo *rel)
-{
- Relids outer_relids = NULL;
- bool is_child_rel = (rel->reloptkind == RELOPT_OTHER_MEMBER_REL);
- ListCell *lc1;
-
- /*
- * Examine each joinclause in the joininfo list to see if it matches any
- * key of any index. If so, add the clause's other rels to the result.
- */
- foreach(lc1, rel->joininfo)
- {
- RestrictInfo *joininfo = (RestrictInfo *) lfirst(lc1);
- Relids other_rels;
-
- other_rels = bms_difference(joininfo->required_relids, rel->relids);
- if (matches_any_index(joininfo, rel, other_rels))
- outer_relids = bms_join(outer_relids, other_rels);
- else
- bms_free(other_rels);
- }
-
- /*
- * We also have to look through the query's EquivalenceClasses to see if
- * any of them could generate indexable join conditions for this rel.
- */
- if (rel->has_eclass_joins)
- {
- foreach(lc1, root->eq_classes)
- {
- EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1);
- Relids other_rels = NULL;
- bool found_index = false;
- ListCell *lc2;
-
- /*
- * Won't generate joinclauses if const or single-member (the
- * latter test covers the volatile case too)
- */
- if (cur_ec->ec_has_const || list_length(cur_ec->ec_members) <= 1)
- continue;
-
- /*
- * Note we don't test ec_broken; if we did, we'd need a separate
- * code path to look through ec_sources. Checking the members
- * anyway is OK as a possibly-overoptimistic heuristic.
- */
-
- /*
- * No point in searching if rel not mentioned in eclass (but we
- * can't tell that for a child rel).
- */
- if (!is_child_rel &&
- !bms_is_subset(rel->relids, cur_ec->ec_relids))
- continue;
-
- /*
- * Scan members, looking for both an index match and join
- * candidates
- */
- foreach(lc2, cur_ec->ec_members)
- {
- EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2);
-
- /* Join candidate? */
- if (!cur_em->em_is_child &&
- !bms_overlap(cur_em->em_relids, rel->relids))
- {
- other_rels = bms_add_members(other_rels,
- cur_em->em_relids);
- continue;
- }
-
- /* Check for index match (only need one) */
- if (!found_index &&
- bms_equal(cur_em->em_relids, rel->relids) &&
- eclass_matches_any_index(cur_ec, cur_em, rel))
- found_index = true;
- }
-
- if (found_index)
- outer_relids = bms_join(outer_relids, other_rels);
- else
- bms_free(other_rels);
- }
- }
-
- return outer_relids;
-}
-
-/*
- * matches_any_index
- * Workhorse for indexable_outerrelids: see if a joinclause can be
- * matched to any index of the given rel.
- */
-static bool
-matches_any_index(RestrictInfo *rinfo, RelOptInfo *rel, Relids outer_relids)
-{
- ListCell *l;
-
- Assert(IsA(rinfo, RestrictInfo));
-
- if (restriction_is_or_clause(rinfo))
- {
- foreach(l, ((BoolExpr *) rinfo->orclause)->args)
- {
- Node *orarg = (Node *) lfirst(l);
-
- /* OR arguments should be ANDs or sub-RestrictInfos */
- if (and_clause(orarg))
- {
- ListCell *j;
-
- /* Recurse to examine AND items and sub-ORs */
- foreach(j, ((BoolExpr *) orarg)->args)
- {
- RestrictInfo *arinfo = (RestrictInfo *) lfirst(j);
-
- if (matches_any_index(arinfo, rel, outer_relids))
- return true;
- }
- }
- else
- {
- /* Recurse to examine simple clause */
- Assert(IsA(orarg, RestrictInfo));
- Assert(!restriction_is_or_clause((RestrictInfo *) orarg));
- if (matches_any_index((RestrictInfo *) orarg, rel,
- outer_relids))
- return true;
- }
- }
-
- return false;
- }
-
- /* Normal case for a simple restriction clause */
- foreach(l, rel->indexlist)
- {
- IndexOptInfo *index = (IndexOptInfo *) lfirst(l);
- int indexcol;
-
- for (indexcol = 0; indexcol < index->ncolumns; indexcol++)
- {
- if (match_clause_to_indexcol(index,
- indexcol,
- rinfo,
- outer_relids,
- SAOP_ALLOW))
- return true;
- }
- }
-
- return false;
-}
-
-/*
- * eclass_matches_any_index
- * Workhorse for indexable_outerrelids: see if an EquivalenceClass member
- * can be matched to any index column of the given rel.
+ * eclass_member_matches_indexcol
+ * Test whether an EquivalenceClass member matches an index column.
*
- * This is also exported for use by find_eclass_clauses_for_index_join.
+ * This is exported for use by generate_implied_equalities_for_indexcol.
*/
bool
-eclass_matches_any_index(EquivalenceClass *ec, EquivalenceMember *em,
- RelOptInfo *rel)
-{
- ListCell *l;
-
- foreach(l, rel->indexlist)
- {
- IndexOptInfo *index = (IndexOptInfo *) lfirst(l);
- int indexcol;
-
- for (indexcol = 0; indexcol < index->ncolumns; indexcol++)
- {
- Oid curFamily = index->opfamily[indexcol];
- Oid curCollation = index->indexcollations[indexcol];
-
- /*
- * If it's a btree index, we can reject it if its opfamily isn't
- * compatible with the EC, since no clause generated from the EC
- * could be used with the index. For non-btree indexes, we can't
- * easily tell whether clauses generated from the EC could be used
- * with the index, so only check for expression match. This might
- * mean we return "true" for a useless index, but that will just
- * cause some wasted planner cycles; it's better than ignoring
- * useful indexes.
- *
- * We insist on collation match for all index types, though.
- */
- if ((index->relam != BTREE_AM_OID ||
- list_member_oid(ec->ec_opfamilies, curFamily)) &&
- IndexCollMatchesExprColl(curCollation, ec->ec_collation) &&
- match_index_to_operand((Node *) em->em_expr, indexcol, index))
- return true;
- }
- }
-
- return false;
-}
-
-
-/*
- * best_inner_indexscan
- * Finds the best available inner indexscans for a nestloop join
- * with the given rel on the inside and the given outer_rel outside.
- *
- * *cheapest_startup gets the path with least startup cost
- * *cheapest_total gets the path with least total cost (often the same path)
- * Both are set to NULL if there are no possible inner indexscans.
- *
- * We ignore ordering considerations, since a nestloop's inner scan's order
- * is uninteresting. Hence startup cost and total cost are the only figures
- * of merit to consider.
- *
- * Note: create_index_paths() must have been run previously for this rel,
- * else the results will always be NULL.
- */
-void
-best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel,
- RelOptInfo *outer_rel, JoinType jointype,
- Path **cheapest_startup, Path **cheapest_total)
-{
- Relids outer_relids;
- bool isouterjoin;
- List *clause_list;
- List *indexpaths;
- List *bitindexpaths;
- List *allindexpaths;
- ListCell *l;
- InnerIndexscanInfo *info;
- MemoryContext oldcontext;
-
- /* Initialize results for failure returns */
- *cheapest_startup = *cheapest_total = NULL;
-
- /*
- * Nestloop only supports inner, left, semi, and anti joins.
- */
- switch (jointype)
- {
- case JOIN_INNER:
- case JOIN_SEMI:
- isouterjoin = false;
- break;
- case JOIN_LEFT:
- case JOIN_ANTI:
- isouterjoin = true;
- break;
- default:
- return;
- }
-
- /*
- * If there are no indexable joinclauses for this rel, exit quickly.
- */
- if (bms_is_empty(rel->index_outer_relids))
- return;
-
- /*
- * Otherwise, we have to do path selection in the main planning context,
- * so that any created path can be safely attached to the rel's cache of
- * best inner paths. (This is not currently an issue for normal planning,
- * but it is an issue for GEQO planning.)
- */
- oldcontext = MemoryContextSwitchTo(root->planner_cxt);
-
- /*
- * Intersect the given outer relids with index_outer_relids to find the
- * set of outer relids actually relevant for this rel. If there are none,
- * again we can fail immediately.
- */
- outer_relids = bms_intersect(rel->index_outer_relids, outer_rel->relids);
- if (bms_is_empty(outer_relids))
- {
- bms_free(outer_relids);
- MemoryContextSwitchTo(oldcontext);
- return;
- }
-
- /*
- * Look to see if we already computed the result for this set of relevant
- * outerrels. (We include the isouterjoin status in the cache lookup key
- * for safety. In practice I suspect this is not necessary because it
- * should always be the same for a given combination of rels.)
- *
- * NOTE: because we cache on outer_relids rather than outer_rel->relids,
- * we will report the same paths and hence path cost for joins with
- * different sets of irrelevant rels on the outside. Now that cost_index
- * is sensitive to outer_rel->rows, this is not really right. However the
- * error is probably not large. Is it worth establishing a separate cache
- * entry for each distinct outer_rel->relids set to get this right?
- */
- foreach(l, rel->index_inner_paths)
- {
- info = (InnerIndexscanInfo *) lfirst(l);
- if (bms_equal(info->other_relids, outer_relids) &&
- info->isouterjoin == isouterjoin)
- {
- bms_free(outer_relids);
- MemoryContextSwitchTo(oldcontext);
- *cheapest_startup = info->cheapest_startup_innerpath;
- *cheapest_total = info->cheapest_total_innerpath;
- return;
- }
- }
-
- /*
- * Find all the relevant restriction and join clauses.
- *
- * Note: because we include restriction clauses, we will find indexscans
- * that could be plain indexscans, ie, they don't require the join context
- * at all. This may seem redundant, but we need to include those scans in
- * the input given to choose_bitmap_and() to be sure we find optimal AND
- * combinations of join and non-join scans. Also, even if the "best inner
- * indexscan" is just a plain indexscan, it will have a different cost
- * estimate because of cache effects.
- */
- clause_list = find_clauses_for_join(root, rel, outer_relids, isouterjoin);
-
- /*
- * Find all the index paths that are usable for this join, except for
- * stuff involving OR and executor-managed ScalarArrayOpExpr clauses.
- */
- allindexpaths = find_usable_indexes(root, rel,
- clause_list, NIL,
- false, outer_rel,
- SAOP_PER_AM,
- ST_ANYSCAN);
-
- /*
- * Include the ones that are usable as plain indexscans in indexpaths, and
- * include the ones that are usable as bitmap scans in bitindexpaths.
- */
- indexpaths = bitindexpaths = NIL;
- foreach(l, allindexpaths)
- {
- IndexPath *ipath = (IndexPath *) lfirst(l);
-
- if (ipath->indexinfo->amhasgettuple)
- indexpaths = lappend(indexpaths, ipath);
-
- if (ipath->indexinfo->amhasgetbitmap)
- bitindexpaths = lappend(bitindexpaths, ipath);
- }
-
- /*
- * Generate BitmapOrPaths for any suitable OR-clauses present in the
- * clause list.
- */
- bitindexpaths = list_concat(bitindexpaths,
- generate_bitmap_or_paths(root, rel,
- clause_list, NIL,
- outer_rel));
-
- /*
- * Likewise, generate paths using executor-managed ScalarArrayOpExpr
- * clauses; these can't be simple indexscans but they can be used in
- * bitmap scans.
- */
- bitindexpaths = list_concat(bitindexpaths,
- find_saop_paths(root, rel,
- clause_list, NIL,
- false, outer_rel));
-
- /*
- * If we found anything usable, generate a BitmapHeapPath for the most
- * promising combination of bitmap index paths.
- */
- if (bitindexpaths != NIL)
- {
- Path *bitmapqual;
- BitmapHeapPath *bpath;
-
- bitmapqual = choose_bitmap_and(root, rel, bitindexpaths, outer_rel);
- bpath = create_bitmap_heap_path(root, rel, bitmapqual, outer_rel);
- indexpaths = lappend(indexpaths, bpath);
- }
-
- /*
- * Now choose the cheapest members of indexpaths.
- */
- if (indexpaths != NIL)
- {
- *cheapest_startup = *cheapest_total = (Path *) linitial(indexpaths);
-
- for_each_cell(l, lnext(list_head(indexpaths)))
- {
- Path *path = (Path *) lfirst(l);
-
- if (compare_path_costs(path, *cheapest_startup, STARTUP_COST) < 0)
- *cheapest_startup = path;
- if (compare_path_costs(path, *cheapest_total, TOTAL_COST) < 0)
- *cheapest_total = path;
- }
- }
-
- /* Cache the results --- whether positive or negative */
- info = makeNode(InnerIndexscanInfo);
- info->other_relids = outer_relids;
- info->isouterjoin = isouterjoin;
- info->cheapest_startup_innerpath = *cheapest_startup;
- info->cheapest_total_innerpath = *cheapest_total;
- rel->index_inner_paths = lcons(info, rel->index_inner_paths);
-
- MemoryContextSwitchTo(oldcontext);
-}
-
-/*
- * find_clauses_for_join
- * Generate a list of clauses that are potentially useful for
- * scanning rel as the inner side of a nestloop join.
- *
- * We consider both join and restriction clauses. Any joinclause that uses
- * only otherrels in the specified outer_relids is fair game. But there must
- * be at least one such joinclause in the final list, otherwise we return NIL
- * indicating that there isn't any potential win here.
- */
-static List *
-find_clauses_for_join(PlannerInfo *root, RelOptInfo *rel,
- Relids outer_relids, bool isouterjoin)
+eclass_member_matches_indexcol(EquivalenceClass *ec, EquivalenceMember *em,
+ IndexOptInfo *index, int indexcol)
{
- List *clause_list = NIL;
- Relids join_relids;
- ListCell *l;
+ Oid curFamily = index->opfamily[indexcol];
+ Oid curCollation = index->indexcollations[indexcol];
/*
- * Look for joinclauses that are usable with given outer_relids. Note
- * we'll take anything that's applicable to the join whether it has
- * anything to do with an index or not; since we're only building a list,
- * it's not worth filtering more finely here.
+ * If it's a btree index, we can reject it if its opfamily isn't
+ * compatible with the EC, since no clause generated from the EC could be
+ * used with the index. For non-btree indexes, we can't easily tell
+ * whether clauses generated from the EC could be used with the index,
+ * so don't check the opfamily. This might mean we return "true" for a
+ * useless EC, so we have to recheck the results of
+ * generate_implied_equalities_for_indexcol; see
+ * match_eclass_clauses_to_index.
*/
- join_relids = bms_union(rel->relids, outer_relids);
-
- foreach(l, rel->joininfo)
- {
- RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
-
- /* Can't use pushed-down join clauses in outer join */
- if (isouterjoin && rinfo->is_pushed_down)
- continue;
- if (!bms_is_subset(rinfo->required_relids, join_relids))
- continue;
-
- clause_list = lappend(clause_list, rinfo);
- }
-
- bms_free(join_relids);
-
- /*
- * Also check to see if any EquivalenceClasses can produce a relevant
- * joinclause. Since all such clauses are effectively pushed-down, this
- * doesn't apply to outer joins.
- */
- if (!isouterjoin && rel->has_eclass_joins)
- clause_list = list_concat(clause_list,
- find_eclass_clauses_for_index_join(root,
- rel,
- outer_relids));
-
- /* If no join clause was matched then forget it, per comments above */
- if (clause_list == NIL)
- return NIL;
+ if (index->relam == BTREE_AM_OID &&
+ !list_member_oid(ec->ec_opfamilies, curFamily))
+ return false;
- /* We can also use any plain restriction clauses for the rel */
- clause_list = list_concat(list_copy(rel->baserestrictinfo), clause_list);
+ /* We insist on collation match for all index types, though */
+ if (!IndexCollMatchesExprColl(curCollation, ec->ec_collation))
+ return false;
- return clause_list;
+ return match_index_to_operand((Node *) em->em_expr, indexcol, index);
}
/*
*
* Note that we aren't interested in collations here; the caller must check
* for a collation match, if it's dealing with an operator where that matters.
+ *
+ * This is exported for use in selfuncs.c.
*/
bool
match_index_to_operand(Node *operand,
* ---- ROUTINES FOR "SPECIAL" INDEXABLE OPERATORS ----
****************************************************************************/
-/*----------
+/*
* These routines handle special optimization of operators that can be
* used with index scans even though they are not known to the executor's
* indexscan machinery. The key idea is that these operators allow us
* the index's opfamily this transformation is a no-op, but clauses recognized
* by match_special_index_operator() or match_boolean_index_clause() must be
* converted into one or more "regular" indexqual conditions.
- *----------
*/
/*
/*
* See how many of the remaining columns match some index column in the
- * same way. A note about rel membership tests: we assume that the clause
- * as a whole is already known to use only Vars from the indexed relation
- * and possibly some acceptable outer relations. So the "other" side of
+ * same way. As in match_clause_to_indexcol(), the "other" side of
* any potential index condition is OK as long as it doesn't use Vars from
* the indexed relation.
*/
static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
RelOptInfo *outerrel, RelOptInfo *innerrel,
List *restrictlist, List *mergeclause_list,
- JoinType jointype, SpecialJoinInfo *sjinfo);
+ JoinType jointype, SpecialJoinInfo *sjinfo,
+ Relids param_source_rels);
static void match_unsorted_outer(PlannerInfo *root, RelOptInfo *joinrel,
RelOptInfo *outerrel, RelOptInfo *innerrel,
List *restrictlist, List *mergeclause_list,
- JoinType jointype, SpecialJoinInfo *sjinfo);
+ JoinType jointype, SpecialJoinInfo *sjinfo,
+ SemiAntiJoinFactors *semifactors,
+ Relids param_source_rels);
static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
RelOptInfo *outerrel, RelOptInfo *innerrel,
List *restrictlist,
- JoinType jointype, SpecialJoinInfo *sjinfo);
-static Path *best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel,
- RelOptInfo *outer_rel, JoinType jointype);
+ JoinType jointype, SpecialJoinInfo *sjinfo,
+ SemiAntiJoinFactors *semifactors,
+ Relids param_source_rels);
static List *select_mergejoin_clauses(PlannerInfo *root,
RelOptInfo *joinrel,
RelOptInfo *outerrel,
{
List *mergeclause_list = NIL;
bool mergejoin_allowed = true;
+ SemiAntiJoinFactors semifactors;
+ Relids param_source_rels = NULL;
+ ListCell *lc;
/*
* Find potential mergejoin clauses. We can skip this if we are not
jointype,
&mergejoin_allowed);
+ /*
+ * If it's SEMI or ANTI join, compute correction factors for cost
+ * estimation. These will be the same for all paths.
+ */
+ if (jointype == JOIN_SEMI || jointype == JOIN_ANTI)
+ compute_semi_anti_join_factors(root, outerrel, innerrel,
+ jointype, sjinfo, restrictlist,
+ &semifactors);
+
+ /*
+ * Decide whether it's sensible to generate parameterized paths for this
+ * joinrel, and if so, which relations such paths should require. There
+ * is no need to create a parameterized result path unless there is a join
+ * order restriction that prevents joining one of our input rels directly
+ * to the parameter source rel instead of joining to the other input rel.
+ * This restriction reduces the number of parameterized paths we have to
+ * deal with at higher join levels, without compromising the quality of
+ * the resulting plan. We express the restriction as a Relids set that
+ * must overlap the parameterization of any proposed join path.
+ */
+ foreach(lc, root->join_info_list)
+ {
+ SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
+
+ /*
+ * SJ is relevant to this join if we have some part of its RHS
+ * (possibly not all of it), and haven't yet joined to its LHS. (This
+ * test is pretty simplistic, but should be sufficient considering the
+ * join has already been proven legal.) If the SJ is relevant, it
+ * presents constraints for joining to anything not in its RHS.
+ */
+ if (bms_overlap(joinrel->relids, sjinfo->min_righthand) &&
+ !bms_overlap(joinrel->relids, sjinfo->min_lefthand))
+ param_source_rels = bms_join(param_source_rels,
+ bms_difference(root->all_baserels,
+ sjinfo->min_righthand));
+
+ /* full joins constrain both sides symmetrically */
+ if (sjinfo->jointype == JOIN_FULL &&
+ bms_overlap(joinrel->relids, sjinfo->min_lefthand) &&
+ !bms_overlap(joinrel->relids, sjinfo->min_righthand))
+ param_source_rels = bms_join(param_source_rels,
+ bms_difference(root->all_baserels,
+ sjinfo->min_lefthand));
+ }
+
/*
* 1. Consider mergejoin paths where both relations must be explicitly
* sorted. Skip this if we can't mergejoin.
*/
if (mergejoin_allowed)
sort_inner_and_outer(root, joinrel, outerrel, innerrel,
- restrictlist, mergeclause_list, jointype, sjinfo);
+ restrictlist, mergeclause_list, jointype,
+ sjinfo, param_source_rels);
/*
* 2. Consider paths where the outer relation need not be explicitly
*/
if (mergejoin_allowed)
match_unsorted_outer(root, joinrel, outerrel, innerrel,
- restrictlist, mergeclause_list, jointype, sjinfo);
+ restrictlist, mergeclause_list, jointype,
+ sjinfo, &semifactors, param_source_rels);
#ifdef NOT_USED
*/
if (mergejoin_allowed)
match_unsorted_inner(root, joinrel, outerrel, innerrel,
- restrictlist, mergeclause_list, jointype, sjinfo);
+ restrictlist, mergeclause_list, jointype,
+ sjinfo, &semifactors, param_source_rels);
#endif
/*
*/
if (enable_hashjoin || jointype == JOIN_FULL)
hash_inner_and_outer(root, joinrel, outerrel, innerrel,
- restrictlist, jointype, sjinfo);
+ restrictlist, jointype,
+ sjinfo, &semifactors, param_source_rels);
+}
+
+/*
+ * try_nestloop_path
+ * Consider a nestloop join path; if it appears useful, push it into
+ * the joinrel's pathlist via add_path().
+ */
+static void
+try_nestloop_path(PlannerInfo *root,
+ RelOptInfo *joinrel,
+ JoinType jointype,
+ SpecialJoinInfo *sjinfo,
+ SemiAntiJoinFactors *semifactors,
+ Relids param_source_rels,
+ Path *outer_path,
+ Path *inner_path,
+ List *restrict_clauses,
+ List *pathkeys)
+{
+ Relids required_outer;
+ JoinCostWorkspace workspace;
+
+ /*
+ * Check to see if proposed path is still parameterized, and reject if
+ * the parameterization wouldn't be sensible.
+ */
+ required_outer = calc_nestloop_required_outer(outer_path,
+ inner_path);
+ if (required_outer &&
+ !bms_overlap(required_outer, param_source_rels))
+ {
+ /* Waste no memory when we reject a path here */
+ bms_free(required_outer);
+ return;
+ }
+
+ /*
+ * Do a precheck to quickly eliminate obviously-inferior paths. We
+ * calculate a cheap lower bound on the path's cost and then use
+ * add_path_precheck() to see if the path is clearly going to be dominated
+ * by some existing path for the joinrel. If not, do the full pushup with
+ * creating a fully valid path structure and submitting it to add_path().
+ * The latter two steps are expensive enough to make this two-phase
+ * methodology worthwhile.
+ */
+ initial_cost_nestloop(root, &workspace, jointype,
+ outer_path, inner_path,
+ sjinfo, semifactors);
+
+ if (add_path_precheck(joinrel,
+ workspace.startup_cost, workspace.total_cost,
+ pathkeys, required_outer))
+ {
+ add_path(joinrel, (Path *)
+ create_nestloop_path(root,
+ joinrel,
+ jointype,
+ &workspace,
+ sjinfo,
+ semifactors,
+ outer_path,
+ inner_path,
+ restrict_clauses,
+ pathkeys,
+ required_outer));
+ }
+ else
+ {
+ /* Waste no memory when we reject a path here */
+ bms_free(required_outer);
+ }
+}
+
+/*
+ * try_mergejoin_path
+ * Consider a merge join path; if it appears useful, push it into
+ * the joinrel's pathlist via add_path().
+ */
+static void
+try_mergejoin_path(PlannerInfo *root,
+ RelOptInfo *joinrel,
+ JoinType jointype,
+ SpecialJoinInfo *sjinfo,
+ Relids param_source_rels,
+ Path *outer_path,
+ Path *inner_path,
+ List *restrict_clauses,
+ List *pathkeys,
+ List *mergeclauses,
+ List *outersortkeys,
+ List *innersortkeys)
+{
+ Relids required_outer;
+ JoinCostWorkspace workspace;
+
+ /*
+ * Check to see if proposed path is still parameterized, and reject if
+ * the parameterization wouldn't be sensible.
+ */
+ required_outer = calc_non_nestloop_required_outer(outer_path,
+ inner_path);
+ if (required_outer &&
+ !bms_overlap(required_outer, param_source_rels))
+ {
+ /* Waste no memory when we reject a path here */
+ bms_free(required_outer);
+ return;
+ }
+
+ /*
+ * If the given paths are already well enough ordered, we can skip doing
+ * an explicit sort.
+ */
+ if (outersortkeys &&
+ pathkeys_contained_in(outersortkeys, outer_path->pathkeys))
+ outersortkeys = NIL;
+ if (innersortkeys &&
+ pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
+ innersortkeys = NIL;
+
+ /*
+ * See comments in try_nestloop_path().
+ */
+ initial_cost_mergejoin(root, &workspace, jointype, mergeclauses,
+ outer_path, inner_path,
+ outersortkeys, innersortkeys,
+ sjinfo);
+
+ if (add_path_precheck(joinrel,
+ workspace.startup_cost, workspace.total_cost,
+ pathkeys, required_outer))
+ {
+ add_path(joinrel, (Path *)
+ create_mergejoin_path(root,
+ joinrel,
+ jointype,
+ &workspace,
+ sjinfo,
+ outer_path,
+ inner_path,
+ restrict_clauses,
+ pathkeys,
+ required_outer,
+ mergeclauses,
+ outersortkeys,
+ innersortkeys));
+ }
+ else
+ {
+ /* Waste no memory when we reject a path here */
+ bms_free(required_outer);
+ }
+}
+
+/*
+ * try_hashjoin_path
+ * Consider a hash join path; if it appears useful, push it into
+ * the joinrel's pathlist via add_path().
+ */
+static void
+try_hashjoin_path(PlannerInfo *root,
+ RelOptInfo *joinrel,
+ JoinType jointype,
+ SpecialJoinInfo *sjinfo,
+ SemiAntiJoinFactors *semifactors,
+ Relids param_source_rels,
+ Path *outer_path,
+ Path *inner_path,
+ List *restrict_clauses,
+ List *hashclauses)
+{
+ Relids required_outer;
+ JoinCostWorkspace workspace;
+
+ /*
+ * Check to see if proposed path is still parameterized, and reject if
+ * the parameterization wouldn't be sensible.
+ */
+ required_outer = calc_non_nestloop_required_outer(outer_path,
+ inner_path);
+ if (required_outer &&
+ !bms_overlap(required_outer, param_source_rels))
+ {
+ /* Waste no memory when we reject a path here */
+ bms_free(required_outer);
+ return;
+ }
+
+ /*
+ * See comments in try_nestloop_path(). Also note that hashjoin paths
+ * never have any output pathkeys, per comments in create_hashjoin_path.
+ */
+ initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
+ outer_path, inner_path,
+ sjinfo, semifactors);
+
+ if (add_path_precheck(joinrel,
+ workspace.startup_cost, workspace.total_cost,
+ NIL, required_outer))
+ {
+ add_path(joinrel, (Path *)
+ create_hashjoin_path(root,
+ joinrel,
+ jointype,
+ &workspace,
+ sjinfo,
+ semifactors,
+ outer_path,
+ inner_path,
+ restrict_clauses,
+ required_outer,
+ hashclauses));
+ }
+ else
+ {
+ /* Waste no memory when we reject a path here */
+ bms_free(required_outer);
+ }
}
/*
* mergejoin clauses in this join
* 'jointype' is the type of join to do
* 'sjinfo' is extra info about the join for selectivity estimation
+ * 'param_source_rels' are OK targets for parameterization of result paths
*/
static void
sort_inner_and_outer(PlannerInfo *root,
List *restrictlist,
List *mergeclause_list,
JoinType jointype,
- SpecialJoinInfo *sjinfo)
+ SpecialJoinInfo *sjinfo,
+ Relids param_source_rels)
{
Path *outer_path;
Path *inner_path;
* cheapest-startup-cost input paths later, and only if they don't need a
* sort.
*
+ * This function intentionally does not consider parameterized input paths
+ * (implicit in the fact that it only looks at cheapest_total_path, which
+ * is always unparameterized). If we did so, we'd have a combinatorial
+ * explosion of mergejoin paths of dubious value. This interacts with
+ * decisions elsewhere that also discriminate against mergejoins with
+ * parameterized inputs; see comments in src/backend/optimizer/README.
+ *
* If unique-ification is requested, do it and then handle as a plain
* inner join.
*/
* And now we can make the path.
*
* Note: it's possible that the cheapest paths will already be sorted
- * properly. create_mergejoin_path will detect that case and suppress
+ * properly. try_mergejoin_path will detect that case and suppress
* an explicit sort step, so we needn't do so here.
*/
- add_path(joinrel, (Path *)
- create_mergejoin_path(root,
- joinrel,
- jointype,
- sjinfo,
- outer_path,
- inner_path,
- restrictlist,
- merge_pathkeys,
- cur_mergeclauses,
- outerkeys,
- innerkeys));
+ try_mergejoin_path(root,
+ joinrel,
+ jointype,
+ sjinfo,
+ param_source_rels,
+ outer_path,
+ inner_path,
+ restrictlist,
+ merge_pathkeys,
+ cur_mergeclauses,
+ outerkeys,
+ innerkeys);
}
}
* mergejoin clauses in this join
* 'jointype' is the type of join to do
* 'sjinfo' is extra info about the join for selectivity estimation
+ * 'semifactors' contains valid data if jointype is SEMI or ANTI
+ * 'param_source_rels' are OK targets for parameterization of result paths
*/
static void
match_unsorted_outer(PlannerInfo *root,
List *restrictlist,
List *mergeclause_list,
JoinType jointype,
- SpecialJoinInfo *sjinfo)
+ SpecialJoinInfo *sjinfo,
+ SemiAntiJoinFactors *semifactors,
+ Relids param_source_rels)
{
JoinType save_jointype = jointype;
bool nestjoinOK;
bool useallclauses;
- Path *inner_cheapest_startup = innerrel->cheapest_startup_path;
Path *inner_cheapest_total = innerrel->cheapest_total_path;
Path *matpath = NULL;
- Path *index_cheapest_startup = NULL;
- Path *index_cheapest_total = NULL;
- ListCell *l;
+ ListCell *lc1;
/*
* Nestloop only supports inner, left, semi, and anti joins. Also, if we
/*
* If we need to unique-ify the inner path, we will consider only the
- * cheapest inner.
+ * cheapest-total inner.
*/
if (save_jointype == JOIN_UNIQUE_INNER)
{
inner_cheapest_total = (Path *)
create_unique_path(root, innerrel, inner_cheapest_total, sjinfo);
Assert(inner_cheapest_total);
- inner_cheapest_startup = inner_cheapest_total;
}
else if (nestjoinOK)
{
!ExecMaterializesOutput(inner_cheapest_total->pathtype))
matpath = (Path *)
create_material_path(innerrel, inner_cheapest_total);
-
- /*
- * Get the best innerjoin indexpaths (if any) for this outer rel.
- * They're the same for all outer paths.
- */
- if (innerrel->reloptkind != RELOPT_JOINREL)
- {
- if (IsA(inner_cheapest_total, AppendPath))
- index_cheapest_total = best_appendrel_indexscan(root,
- innerrel,
- outerrel,
- jointype);
- else if (innerrel->rtekind == RTE_RELATION)
- best_inner_indexscan(root, innerrel, outerrel, jointype,
- &index_cheapest_startup,
- &index_cheapest_total);
- }
}
- foreach(l, outerrel->pathlist)
+ foreach(lc1, outerrel->pathlist)
{
- Path *outerpath = (Path *) lfirst(l);
+ Path *outerpath = (Path *) lfirst(lc1);
List *merge_pathkeys;
List *mergeclauses;
List *innersortkeys;
int num_sortkeys;
int sortkeycnt;
+ /*
+ * We cannot use an outer path that is parameterized by the inner rel.
+ */
+ if (bms_overlap(outerpath->required_outer, innerrel->relids))
+ continue;
+
/*
* If we need to unique-ify the outer path, it's pointless to consider
- * any but the cheapest outer.
+ * any but the cheapest outer. (XXX we don't consider parameterized
+ * outers, nor inners, for unique-ified cases. Should we?)
*/
if (save_jointype == JOIN_UNIQUE_OUTER)
{
merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
outerpath->pathkeys);
- if (nestjoinOK)
+ if (save_jointype == JOIN_UNIQUE_INNER)
+ {
+ /*
+ * Consider nestloop join, but only with the unique-ified cheapest
+ * inner path
+ */
+ try_nestloop_path(root,
+ joinrel,
+ jointype,
+ sjinfo,
+ semifactors,
+ param_source_rels,
+ outerpath,
+ inner_cheapest_total,
+ restrictlist,
+ merge_pathkeys);
+ }
+ else if (nestjoinOK)
{
/*
- * Always consider a nestloop join with this outer and
- * cheapest-total-cost inner. When appropriate, also consider
- * using the materialized form of the cheapest inner, the
- * cheapest-startup-cost inner path, and the cheapest innerjoin
- * indexpaths.
+ * Consider nestloop joins using this outer path and various
+ * available paths for the inner relation. We consider the
+ * cheapest-total paths for each available parameterization of
+ * the inner relation, including the unparameterized case.
*/
- add_path(joinrel, (Path *)
- create_nestloop_path(root,
- joinrel,
- jointype,
- sjinfo,
- outerpath,
- inner_cheapest_total,
- restrictlist,
- merge_pathkeys));
+ ListCell *lc2;
+
+ foreach(lc2, innerrel->cheapest_parameterized_paths)
+ {
+ Path *innerpath = (Path *) lfirst(lc2);
+
+ try_nestloop_path(root,
+ joinrel,
+ jointype,
+ sjinfo,
+ semifactors,
+ param_source_rels,
+ outerpath,
+ innerpath,
+ restrictlist,
+ merge_pathkeys);
+ }
+
+ /* Also consider materialized form of the cheapest inner path */
if (matpath != NULL)
- add_path(joinrel, (Path *)
- create_nestloop_path(root,
- joinrel,
- jointype,
- sjinfo,
- outerpath,
- matpath,
- restrictlist,
- merge_pathkeys));
- if (inner_cheapest_startup != inner_cheapest_total)
- add_path(joinrel, (Path *)
- create_nestloop_path(root,
- joinrel,
- jointype,
- sjinfo,
- outerpath,
- inner_cheapest_startup,
- restrictlist,
- merge_pathkeys));
- if (index_cheapest_total != NULL)
- add_path(joinrel, (Path *)
- create_nestloop_path(root,
- joinrel,
- jointype,
- sjinfo,
- outerpath,
- index_cheapest_total,
- restrictlist,
- merge_pathkeys));
- if (index_cheapest_startup != NULL &&
- index_cheapest_startup != index_cheapest_total)
- add_path(joinrel, (Path *)
- create_nestloop_path(root,
- joinrel,
- jointype,
- sjinfo,
- outerpath,
- index_cheapest_startup,
- restrictlist,
- merge_pathkeys));
+ try_nestloop_path(root,
+ joinrel,
+ jointype,
+ sjinfo,
+ semifactors,
+ param_source_rels,
+ outerpath,
+ matpath,
+ restrictlist,
+ merge_pathkeys);
}
/* Can't do anything else if outer path needs to be unique'd */
/*
* Generate a mergejoin on the basis of sorting the cheapest inner.
* Since a sort will be needed, only cheapest total cost matters. (But
- * create_mergejoin_path will do the right thing if
+ * try_mergejoin_path will do the right thing if
* inner_cheapest_total is already correctly sorted.)
*/
- add_path(joinrel, (Path *)
- create_mergejoin_path(root,
- joinrel,
- jointype,
- sjinfo,
- outerpath,
- inner_cheapest_total,
- restrictlist,
- merge_pathkeys,
- mergeclauses,
- NIL,
- innersortkeys));
+ try_mergejoin_path(root,
+ joinrel,
+ jointype,
+ sjinfo,
+ param_source_rels,
+ outerpath,
+ inner_cheapest_total,
+ restrictlist,
+ merge_pathkeys,
+ mergeclauses,
+ NIL,
+ innersortkeys);
/* Can't do anything else if inner path needs to be unique'd */
if (save_jointype == JOIN_UNIQUE_INNER)
* mergejoin using a subset of the merge clauses. Here, we consider
* both cheap startup cost and cheap total cost.
*
+ * Currently we do not consider parameterized inner paths here.
+ * This interacts with decisions elsewhere that also discriminate
+ * against mergejoins with parameterized inputs; see comments in
+ * src/backend/optimizer/README.
+ *
* As we shorten the sortkey list, we should consider only paths that
* are strictly cheaper than (in particular, not the same as) any path
* found in an earlier iteration. Otherwise we'd be intentionally
trialsortkeys = list_truncate(trialsortkeys, sortkeycnt);
innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
trialsortkeys,
+ NULL,
TOTAL_COST);
if (innerpath != NULL &&
(cheapest_total_inner == NULL ||
}
else
newclauses = mergeclauses;
- add_path(joinrel, (Path *)
- create_mergejoin_path(root,
- joinrel,
- jointype,
- sjinfo,
- outerpath,
- innerpath,
- restrictlist,
- merge_pathkeys,
- newclauses,
- NIL,
- NIL));
+ try_mergejoin_path(root,
+ joinrel,
+ jointype,
+ sjinfo,
+ param_source_rels,
+ outerpath,
+ innerpath,
+ restrictlist,
+ merge_pathkeys,
+ newclauses,
+ NIL,
+ NIL);
cheapest_total_inner = innerpath;
}
/* Same on the basis of cheapest startup cost ... */
innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
trialsortkeys,
+ NULL,
STARTUP_COST);
if (innerpath != NULL &&
(cheapest_startup_inner == NULL ||
else
newclauses = mergeclauses;
}
- add_path(joinrel, (Path *)
- create_mergejoin_path(root,
- joinrel,
- jointype,
- sjinfo,
- outerpath,
- innerpath,
- restrictlist,
- merge_pathkeys,
- newclauses,
- NIL,
- NIL));
+ try_mergejoin_path(root,
+ joinrel,
+ jointype,
+ sjinfo,
+ param_source_rels,
+ outerpath,
+ innerpath,
+ restrictlist,
+ merge_pathkeys,
+ newclauses,
+ NIL,
+ NIL);
}
cheapest_startup_inner = innerpath;
}
* clauses that apply to this join
* 'jointype' is the type of join to do
* 'sjinfo' is extra info about the join for selectivity estimation
+ * 'semifactors' contains valid data if jointype is SEMI or ANTI
+ * 'param_source_rels' are OK targets for parameterization of result paths
*/
static void
hash_inner_and_outer(PlannerInfo *root,
RelOptInfo *innerrel,
List *restrictlist,
JoinType jointype,
- SpecialJoinInfo *sjinfo)
+ SpecialJoinInfo *sjinfo,
+ SemiAntiJoinFactors *semifactors,
+ Relids param_source_rels)
{
bool isouterjoin = IS_OUTER_JOIN(jointype);
List *hashclauses;
ListCell *l;
/*
- * We need to build only one hashpath for any given pair of outer and
- * inner relations; all of the hashable clauses will be used as keys.
+ * We need to build only one hashclauses list for any given pair of outer
+ * and inner relations; all of the hashable clauses will be used as keys.
*
* Scan the join's restrictinfo list to find hashjoinable clauses that are
* usable with this pair of sub-relations.
hashclauses = lappend(hashclauses, restrictinfo);
}
- /* If we found any usable hashclauses, make a path */
+ /* If we found any usable hashclauses, make paths */
if (hashclauses)
{
/*
Path *cheapest_total_outer = outerrel->cheapest_total_path;
Path *cheapest_total_inner = innerrel->cheapest_total_path;
- /* Unique-ify if need be */
+ /* Unique-ify if need be; we ignore parameterized possibilities */
if (jointype == JOIN_UNIQUE_OUTER)
{
cheapest_total_outer = (Path *)
create_unique_path(root, outerrel,
cheapest_total_outer, sjinfo);
Assert(cheapest_total_outer);
- cheapest_startup_outer = cheapest_total_outer;
jointype = JOIN_INNER;
+ try_hashjoin_path(root,
+ joinrel,
+ jointype,
+ sjinfo,
+ semifactors,
+ param_source_rels,
+ cheapest_total_outer,
+ cheapest_total_inner,
+ restrictlist,
+ hashclauses);
+ /* no possibility of cheap startup here */
}
else if (jointype == JOIN_UNIQUE_INNER)
{
cheapest_total_inner, sjinfo);
Assert(cheapest_total_inner);
jointype = JOIN_INNER;
+ try_hashjoin_path(root,
+ joinrel,
+ jointype,
+ sjinfo,
+ semifactors,
+ param_source_rels,
+ cheapest_total_outer,
+ cheapest_total_inner,
+ restrictlist,
+ hashclauses);
+ if (cheapest_startup_outer != cheapest_total_outer)
+ try_hashjoin_path(root,
+ joinrel,
+ jointype,
+ sjinfo,
+ semifactors,
+ param_source_rels,
+ cheapest_startup_outer,
+ cheapest_total_inner,
+ restrictlist,
+ hashclauses);
}
+ else
+ {
+ /*
+ * For other jointypes, we consider the cheapest startup outer
+ * together with the cheapest total inner, and then consider
+ * pairings of cheapest-total paths including parameterized ones.
+ * There is no use in generating parameterized paths on the basis
+ * of possibly cheap startup cost, so this is sufficient.
+ */
+ ListCell *lc1;
+ ListCell *lc2;
+
+ try_hashjoin_path(root,
+ joinrel,
+ jointype,
+ sjinfo,
+ semifactors,
+ param_source_rels,
+ cheapest_startup_outer,
+ cheapest_total_inner,
+ restrictlist,
+ hashclauses);
+
+ foreach(lc1, outerrel->cheapest_parameterized_paths)
+ {
+ Path *outerpath = (Path *) lfirst(lc1);
- add_path(joinrel, (Path *)
- create_hashjoin_path(root,
- joinrel,
- jointype,
- sjinfo,
- cheapest_total_outer,
- cheapest_total_inner,
- restrictlist,
- hashclauses));
- if (cheapest_startup_outer != cheapest_total_outer)
- add_path(joinrel, (Path *)
- create_hashjoin_path(root,
- joinrel,
- jointype,
- sjinfo,
- cheapest_startup_outer,
- cheapest_total_inner,
- restrictlist,
- hashclauses));
- }
-}
-
-/*
- * best_appendrel_indexscan
- * Finds the best available set of inner indexscans for a nestloop join
- * with the given append relation on the inside and the given outer_rel
- * outside. Returns an AppendPath comprising the best inner scans, or
- * NULL if there are no possible inner indexscans.
- *
- * Note that we currently consider only cheapest-total-cost. It's not
- * very clear what cheapest-startup-cost might mean for an AppendPath.
- */
-static Path *
-best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel,
- RelOptInfo *outer_rel, JoinType jointype)
-{
- int parentRTindex = rel->relid;
- List *append_paths = NIL;
- bool found_indexscan = false;
- ListCell *l;
-
- foreach(l, root->append_rel_list)
- {
- AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
- int childRTindex;
- RelOptInfo *childrel;
- Path *index_cheapest_startup;
- Path *index_cheapest_total;
-
- /* append_rel_list contains all append rels; ignore others */
- if (appinfo->parent_relid != parentRTindex)
- continue;
-
- childRTindex = appinfo->child_relid;
- childrel = find_base_rel(root, childRTindex);
- Assert(childrel->reloptkind == RELOPT_OTHER_MEMBER_REL);
+ /*
+ * We cannot use an outer path that is parameterized by the
+ * inner rel.
+ */
+ if (bms_overlap(outerpath->required_outer, innerrel->relids))
+ continue;
- /*
- * Check to see if child was rejected by constraint exclusion. If so,
- * it will have a cheapest_total_path that's a "dummy" path.
- */
- if (IS_DUMMY_PATH(childrel->cheapest_total_path))
- continue; /* OK, we can ignore it */
+ foreach(lc2, innerrel->cheapest_parameterized_paths)
+ {
+ Path *innerpath = (Path *) lfirst(lc2);
- /*
- * Get the best innerjoin indexpaths (if any) for this child rel.
- */
- best_inner_indexscan(root, childrel, outer_rel, jointype,
- &index_cheapest_startup, &index_cheapest_total);
+ /*
+ * We cannot use an inner path that is parameterized by
+ * the outer rel, either.
+ */
+ if (bms_overlap(innerpath->required_outer,
+ outerrel->relids))
+ continue;
- /*
- * If no luck on an indexpath for this rel, we'll still consider an
- * Append substituting the cheapest-total inner path. However we must
- * find at least one indexpath, else there's not going to be any
- * improvement over the base path for the appendrel.
- */
- if (index_cheapest_total)
- found_indexscan = true;
- else
- index_cheapest_total = childrel->cheapest_total_path;
+ if (outerpath == cheapest_startup_outer &&
+ innerpath == cheapest_total_inner)
+ continue; /* already tried it */
- append_paths = lappend(append_paths, index_cheapest_total);
+ try_hashjoin_path(root,
+ joinrel,
+ jointype,
+ sjinfo,
+ semifactors,
+ param_source_rels,
+ outerpath,
+ innerpath,
+ restrictlist,
+ hashclauses);
+ }
+ }
+ }
}
-
- if (!found_indexscan)
- return NULL;
-
- /* Form and return the completed Append path. */
- return (Path *) create_append_path(rel, append_paths);
}
/*
/*
* is_dummy_rel --- has relation been proven empty?
- *
- * If so, it will have a single path that is dummy.
*/
static bool
is_dummy_rel(RelOptInfo *rel)
{
- return (rel->cheapest_total_path != NULL &&
- IS_DUMMY_PATH(rel->cheapest_total_path));
+ return IS_DUMMY_REL(rel);
}
/*
/* Set up the dummy path */
add_path(rel, (Path *) create_append_path(rel, NIL));
- /* Set or update cheapest_total_path */
+ /* Set or update cheapest_total_path and related fields */
set_cheapest(rel);
MemoryContextSwitchTo(oldcontext);
orig_selec;
ListCell *i;
+ /* Skip the whole mess if no indexes */
+ if (rel->indexlist == NIL)
+ return false;
+
/*
* Find potentially interesting OR joinclauses.
*
* Use the generate_bitmap_or_paths() machinery to estimate the
* value of each OR clause. We can use regular restriction
* clauses along with the OR clause contents to generate
- * indexquals. We pass outer_rel = NULL so that sub-clauses that
- * are actually joins will be ignored.
+ * indexquals. We pass restriction_only = true so that any
+ * sub-clauses that are actually joins will be ignored.
*/
List *orpaths;
ListCell *k;
orpaths = generate_bitmap_or_paths(root, rel,
list_make1(rinfo),
rel->baserestrictinfo,
- NULL);
+ true);
/* Locate the cheapest OR path */
foreach(k, orpaths)
/*
* get_cheapest_path_for_pathkeys
* Find the cheapest path (according to the specified criterion) that
- * satisfies the given pathkeys. Return NULL if no such path.
+ * satisfies the given pathkeys and parameterization.
+ * Return NULL if no such path.
*
* 'paths' is a list of possible paths that all generate the same relation
* 'pathkeys' represents a required ordering (already canonicalized!)
+ * 'required_outer' denotes allowable outer relations for parameterized paths
* 'cost_criterion' is STARTUP_COST or TOTAL_COST
*/
Path *
get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
+ Relids required_outer,
CostSelector cost_criterion)
{
Path *matched_path = NULL;
compare_path_costs(matched_path, path, cost_criterion) <= 0)
continue;
- if (pathkeys_contained_in(pathkeys, path->pathkeys))
+ if (pathkeys_contained_in(pathkeys, path->pathkeys) &&
+ bms_is_subset(path->required_outer, required_outer))
matched_path = path;
}
return matched_path;
/*
* get_cheapest_fractional_path_for_pathkeys
* Find the cheapest path (for retrieving a specified fraction of all
- * the tuples) that satisfies the given pathkeys.
+ * the tuples) that satisfies the given pathkeys and parameterization.
* Return NULL if no such path.
*
* See compare_fractional_path_costs() for the interpretation of the fraction
*
* 'paths' is a list of possible paths that all generate the same relation
* 'pathkeys' represents a required ordering (already canonicalized!)
+ * 'required_outer' denotes allowable outer relations for parameterized paths
* 'fraction' is the fraction of the total tuples expected to be retrieved
*/
Path *
get_cheapest_fractional_path_for_pathkeys(List *paths,
List *pathkeys,
+ Relids required_outer,
double fraction)
{
Path *matched_path = NULL;
/*
* Since cost comparison is a lot cheaper than pathkey comparison, do
- * that first.
+ * that first. (XXX is that still true?)
*/
if (matched_path != NULL &&
compare_fractional_path_costs(matched_path, path, fraction) <= 0)
continue;
- if (pathkeys_contained_in(pathkeys, path->pathkeys))
+ if (pathkeys_contained_in(pathkeys, path->pathkeys) &&
+ bms_is_subset(path->required_outer, required_outer))
matched_path = path;
}
return matched_path;
long numGroups;
Oid *groupOperators;
- numGroups = (long) Min(best_path->rows, (double) LONG_MAX);
+ numGroups = (long) Min(best_path->path.rows, (double) LONG_MAX);
/*
* Get the hashable equality operators for the Agg node to use.
}
/* Adjust output size estimate (other fields should be OK already) */
- plan->plan_rows = best_path->rows;
+ plan->plan_rows = best_path->path.rows;
return plan;
}
fixed_indexorderbys = fix_indexorderby_references(root, best_path);
/*
- * If this is an innerjoin scan, the indexclauses will contain join
+ * If this is a parameterized scan, the indexclauses will contain join
* clauses that are not present in scan_clauses (since the passed-in value
* is just the rel's baserestrictinfo list). We must add these clauses to
* scan_clauses to ensure they get checked. In most cases we will remove
* Note: pointer comparison should be enough to determine RestrictInfo
* matches.
*/
- if (best_path->isjoininner)
+ if (best_path->path.required_outer)
scan_clauses = list_union_ptr(scan_clauses, best_path->indexclauses);
/*
* it'd break the comparisons to predicates above ... (or would it? Those
* wouldn't have outer refs)
*/
- if (best_path->isjoininner)
+ if (best_path->path.required_outer)
{
stripped_indexquals = (List *)
replace_nestloop_params(root, (Node *) stripped_indexquals);
best_path->indexscandir);
copy_path_costsize(&scan_plan->plan, &best_path->path);
- /* use the indexscan-specific rows estimate, not the parent rel's */
- scan_plan->plan.plan_rows = best_path->rows;
return scan_plan;
}
scan_clauses = extract_actual_clauses(scan_clauses, false);
/*
- * If this is a innerjoin scan, the indexclauses will contain join clauses
+ * If this is a parameterized scan, the indexclauses will contain join clauses
* that are not present in scan_clauses (since the passed-in value is just
* the rel's baserestrictinfo list). We must add these clauses to
* scan_clauses to ensure they get checked. In most cases we will remove
* the join clauses again below, but if a join clause contains a special
* operator, we need to make sure it gets into the scan_clauses.
*/
- if (best_path->isjoininner)
+ if (best_path->path.required_outer)
{
scan_clauses = list_concat_unique(scan_clauses, bitmapqualorig);
}
baserelid);
copy_path_costsize(&scan_plan->scan.plan, &best_path->path);
- /* use the indexscan-specific rows estimate, not the parent rel's */
- scan_plan->scan.plan.plan_rows = best_path->rows;
return scan_plan;
}
* Replace outer-relation variables with nestloop params, but only
* after doing the above comparisons to index predicates.
*/
- if (ipath->isjoininner)
+ if (ipath->path.required_outer)
{
*qual = (List *)
replace_nestloop_params(root, (Node *) *qual);
ListCell *next;
/*
- * If the inner path is a nestloop inner indexscan, it might be using some
- * of the join quals as index quals, in which case we don't have to check
- * them again at the join node. Remove any join quals that are redundant.
+ * If the inner path is parameterized, it might have already used some of
+ * the join quals, in which case we don't have to check them again at the
+ * join node. Remove any join quals that are redundant.
*/
joinrestrictclauses =
- select_nonredundant_join_clauses(root,
- joinrestrictclauses,
- best_path->innerjoinpath);
+ select_nonredundant_join_clauses(joinrestrictclauses,
+ best_path->innerjoinpath->param_clauses);
/* Sort join qual clauses into best execution order */
joinrestrictclauses = order_qual_clauses(root, joinrestrictclauses);
/*
* We assume the materialize will not spill to disk, and therefore
* charge just cpu_operator_cost per tuple. (Keep this estimate in
- * sync with cost_mergejoin.)
+ * sync with final_cost_mergejoin.)
*/
copy_plan_costsize(matplan, inner_plan);
matplan->total_cost += cpu_operator_cost * matplan->plan_rows;
{
dest->startup_cost = src->startup_cost;
dest->total_cost = src->total_cost;
- dest->plan_rows = src->parent->rows;
+ dest->plan_rows = src->rows;
dest->plan_width = src->parent->width;
}
else
sortedpath =
get_cheapest_fractional_path_for_pathkeys(final_rel->pathlist,
root->query_pathkeys,
+ NULL,
tuple_fraction);
/* Don't return same path in both guises; just wastes effort */
/* Estimate the cost of index scan */
indexScanPath = create_index_path(root, indexInfo,
NIL, NIL, NIL, NIL, NIL,
- ForwardScanDirection, false, NULL);
+ ForwardScanDirection, false,
+ NULL, 1.0);
return (seqScanAndSortPath.total_cost < indexScanPath->path.total_cost);
}
#include "utils/selfuncs.h"
+typedef enum
+{
+ COSTS_EQUAL, /* path costs are fuzzily equal */
+ COSTS_BETTER1, /* first path is cheaper than second */
+ COSTS_BETTER2, /* second path is cheaper than first */
+ COSTS_DIFFERENT /* neither path dominates the other on cost */
+} PathCostComparison;
+
+static void add_parameterized_path(RelOptInfo *parent_rel, Path *new_path);
static List *translate_sub_tlist(List *tlist, int relid);
static bool query_is_distinct_for(Query *query, List *colnos, List *opids);
static Oid distinct_col_search(int colno, List *colnos, List *opids);
return 0;
}
-/*
- * compare_fuzzy_path_costs
- * Return -1, 0, or +1 according as path1 is cheaper, the same cost,
- * or more expensive than path2 for the specified criterion.
- *
- * This differs from compare_path_costs in that we consider the costs the
- * same if they agree to within a "fuzz factor". This is used by add_path
- * to avoid keeping both of a pair of paths that really have insignificantly
- * different cost.
- */
-static int
-compare_fuzzy_path_costs(Path *path1, Path *path2, CostSelector criterion)
-{
- /*
- * We use a fuzz factor of 1% of the smaller cost.
- *
- * XXX does this percentage need to be user-configurable?
- */
- if (criterion == STARTUP_COST)
- {
- if (path1->startup_cost > path2->startup_cost * 1.01)
- return +1;
- if (path2->startup_cost > path1->startup_cost * 1.01)
- return -1;
-
- /*
- * If paths have the same startup cost (not at all unlikely), order
- * them by total cost.
- */
- if (path1->total_cost > path2->total_cost * 1.01)
- return +1;
- if (path2->total_cost > path1->total_cost * 1.01)
- return -1;
- }
- else
- {
- if (path1->total_cost > path2->total_cost * 1.01)
- return +1;
- if (path2->total_cost > path1->total_cost * 1.01)
- return -1;
-
- /*
- * If paths have the same total cost, order them by startup cost.
- */
- if (path1->startup_cost > path2->startup_cost * 1.01)
- return +1;
- if (path2->startup_cost > path1->startup_cost * 1.01)
- return -1;
- }
- return 0;
-}
-
/*
* compare_path_fractional_costs
* Return -1, 0, or +1 according as path1 is cheaper, the same cost,
return 0;
}
+/*
+ * compare_path_costs_fuzzily
+ * Compare the costs of two paths to see if either can be said to
+ * dominate the other.
+ *
+ * We use fuzzy comparisons so that add_path() can avoid keeping both of
+ * a pair of paths that really have insignificantly different cost.
+ * The fuzz factor is 1% of the smaller cost. (XXX does this percentage
+ * need to be user-configurable?)
+ *
+ * The two paths are said to have "equal" costs if both startup and total
+ * costs are fuzzily the same. Path1 is said to be better than path2 if
+ * it has fuzzily better startup cost and fuzzily no worse total cost,
+ * or if it has fuzzily better total cost and fuzzily no worse startup cost.
+ * Path2 is better than path1 if the reverse holds. Finally, if one path
+ * is fuzzily better than the other on startup cost and fuzzily worse on
+ * total cost, we just say that their costs are "different", since neither
+ * dominates the other across the whole performance spectrum.
+ */
+static PathCostComparison
+compare_path_costs_fuzzily(Path *path1, Path *path2)
+{
+ /*
+ * Check total cost first since it's more likely to be different; many
+ * paths have zero startup cost.
+ */
+ if (path1->total_cost > path2->total_cost * 1.01)
+ {
+ /* path1 fuzzily worse on total cost */
+ if (path2->startup_cost > path1->startup_cost * 1.01)
+ {
+ /* ... but path2 fuzzily worse on startup, so DIFFERENT */
+ return COSTS_DIFFERENT;
+ }
+ /* else path2 dominates */
+ return COSTS_BETTER2;
+ }
+ if (path2->total_cost > path1->total_cost * 1.01)
+ {
+ /* path2 fuzzily worse on total cost */
+ if (path1->startup_cost > path2->startup_cost * 1.01)
+ {
+ /* ... but path1 fuzzily worse on startup, so DIFFERENT */
+ return COSTS_DIFFERENT;
+ }
+ /* else path1 dominates */
+ return COSTS_BETTER1;
+ }
+ /* fuzzily the same on total cost */
+ if (path1->startup_cost > path2->startup_cost * 1.01)
+ {
+ /* ... but path1 fuzzily worse on startup, so path2 wins */
+ return COSTS_BETTER2;
+ }
+ if (path2->startup_cost > path1->startup_cost * 1.01)
+ {
+ /* ... but path2 fuzzily worse on startup, so path1 wins */
+ return COSTS_BETTER1;
+ }
+ /* fuzzily the same on both costs */
+ return COSTS_EQUAL;
+}
+
/*
* set_cheapest
* Find the minimum-cost paths from among a relation's paths,
* and save them in the rel's cheapest-path fields.
*
+ * Only unparameterized paths are considered candidates for cheapest_startup
+ * and cheapest_total. The cheapest_parameterized_paths list collects paths
+ * that are cheapest-total for their parameterization (i.e., there is no
+ * cheaper path with the same or weaker parameterization). This list always
+ * includes the unparameterized cheapest-total path, too.
+ *
* This is normally called only after we've finished constructing the path
* list for the rel node.
- *
- * If we find two paths of identical costs, try to keep the better-sorted one.
- * The paths might have unrelated sort orderings, in which case we can only
- * guess which might be better to keep, but if one is superior then we
- * definitely should keep it.
*/
void
set_cheapest(RelOptInfo *parent_rel)
{
- List *pathlist = parent_rel->pathlist;
- ListCell *p;
Path *cheapest_startup_path;
Path *cheapest_total_path;
+ bool have_parameterized_paths;
+ ListCell *p;
Assert(IsA(parent_rel, RelOptInfo));
- if (pathlist == NIL)
- elog(ERROR, "could not devise a query plan for the given query");
-
- cheapest_startup_path = cheapest_total_path = (Path *) linitial(pathlist);
+ cheapest_startup_path = cheapest_total_path = NULL;
+ have_parameterized_paths = false;
- for_each_cell(p, lnext(list_head(pathlist)))
+ foreach(p, parent_rel->pathlist)
{
Path *path = (Path *) lfirst(p);
int cmp;
+ /* We only consider unparameterized paths in this step */
+ if (path->required_outer)
+ {
+ have_parameterized_paths = true;
+ continue;
+ }
+
+ if (cheapest_total_path == NULL)
+ {
+ cheapest_startup_path = cheapest_total_path = path;
+ continue;
+ }
+
+ /*
+ * If we find two paths of identical costs, try to keep the
+ * better-sorted one. The paths might have unrelated sort orderings,
+ * in which case we can only guess which might be better to keep, but
+ * if one is superior then we definitely should keep that one.
+ */
cmp = compare_path_costs(cheapest_startup_path, path, STARTUP_COST);
if (cmp > 0 ||
(cmp == 0 &&
cheapest_total_path = path;
}
+ if (cheapest_total_path == NULL)
+ elog(ERROR, "could not devise a query plan for the given query");
+
parent_rel->cheapest_startup_path = cheapest_startup_path;
parent_rel->cheapest_total_path = cheapest_total_path;
parent_rel->cheapest_unique_path = NULL; /* computed only if needed */
+
+ /* Seed the parameterized-paths list with the cheapest total */
+ parent_rel->cheapest_parameterized_paths = list_make1(cheapest_total_path);
+
+ /* And, if there are any parameterized paths, add them in one at a time */
+ if (have_parameterized_paths)
+ {
+ foreach(p, parent_rel->pathlist)
+ {
+ Path *path = (Path *) lfirst(p);
+
+ if (path->required_outer)
+ add_parameterized_path(parent_rel, path);
+ }
+ }
}
/*
* Consider a potential implementation path for the specified parent rel,
* and add it to the rel's pathlist if it is worthy of consideration.
* A path is worthy if it has either a better sort order (better pathkeys)
- * or cheaper cost (on either dimension) than any of the existing old paths.
+ * or cheaper cost (on either dimension) than any of the existing old paths
+ * that have the same or superset required_outer rels.
*
* We also remove from the rel's pathlist any old paths that are dominated
- * by new_path --- that is, new_path is both cheaper and at least as well
- * ordered.
+ * by new_path --- that is, new_path is cheaper, at least as well ordered,
+ * and requires no outer rels not required by old path.
+ *
+ * There is one policy decision embedded in this function, along with its
+ * sibling add_path_precheck: we treat all parameterized paths as having
+ * NIL pathkeys, so that they compete only on cost. This is to reduce
+ * the number of parameterized paths that are kept. See discussion in
+ * src/backend/optimizer/README.
*
- * The pathlist is kept sorted by TOTAL_COST metric, with cheaper paths
- * at the front. No code depends on that for correctness; it's simply
- * a speed hack within this routine. Doing it that way makes it more
- * likely that we will reject an inferior path after a few comparisons,
- * rather than many comparisons.
+ * The pathlist is kept sorted by total_cost, with cheaper paths
+ * at the front. Within this routine, that's simply a speed hack:
+ * doing it that way makes it more likely that we will reject an inferior
+ * path after a few comparisons, rather than many comparisons.
+ * However, add_path_precheck relies on this ordering to exit early
+ * when possible.
*
* NOTE: discarded Path objects are immediately pfree'd to reduce planner
* memory consumption. We dare not try to free the substructure of a Path,
{
bool accept_new = true; /* unless we find a superior old path */
ListCell *insert_after = NULL; /* where to insert new item */
+ List *new_path_pathkeys;
ListCell *p1;
ListCell *p1_prev;
ListCell *p1_next;
*/
CHECK_FOR_INTERRUPTS();
+ /* Pretend parameterized paths have no pathkeys, per comment above */
+ new_path_pathkeys = new_path->required_outer ? NIL : new_path->pathkeys;
+
/*
* Loop to check proposed new path against old paths. Note it is possible
* for more than one old path to be tossed out because new_path dominates
{
Path *old_path = (Path *) lfirst(p1);
bool remove_old = false; /* unless new proves superior */
- int costcmp;
+ PathCostComparison costcmp;
+ PathKeysComparison keyscmp;
+ BMS_Comparison outercmp;
p1_next = lnext(p1);
- /*
- * As of Postgres 8.0, we use fuzzy cost comparison to avoid wasting
- * cycles keeping paths that are really not significantly different in
- * cost.
- */
- costcmp = compare_fuzzy_path_costs(new_path, old_path, TOTAL_COST);
+ costcmp = compare_path_costs_fuzzily(new_path, old_path);
/*
* If the two paths compare differently for startup and total cost,
- * then we want to keep both, and we can skip the (much slower)
- * comparison of pathkeys. If they compare the same, proceed with the
- * pathkeys comparison. Note: this test relies on the fact that
- * compare_fuzzy_path_costs will only return 0 if both costs are
- * effectively equal (and, therefore, there's no need to call it twice
- * in that case).
+ * then we want to keep both, and we can skip comparing pathkeys and
+ * required_outer rels. If they compare the same, proceed with the
+ * other comparisons. (We make the tests in this order because the
+ * cost comparison is most likely to turn out "different", and the
+ * pathkeys comparison next most likely.)
*/
- if (costcmp == 0 ||
- costcmp == compare_fuzzy_path_costs(new_path, old_path,
- STARTUP_COST))
+ if (costcmp != COSTS_DIFFERENT)
{
- switch (compare_pathkeys(new_path->pathkeys, old_path->pathkeys))
+ /* Similarly check to see if either dominates on pathkeys */
+ List *old_path_pathkeys;
+
+ old_path_pathkeys = old_path->required_outer ? NIL : old_path->pathkeys;
+ keyscmp = compare_pathkeys(new_path_pathkeys,
+ old_path_pathkeys);
+ if (keyscmp != PATHKEYS_DIFFERENT)
{
- case PATHKEYS_EQUAL:
- if (costcmp < 0)
- remove_old = true; /* new dominates old */
- else if (costcmp > 0)
- accept_new = false; /* old dominates new */
- else
- {
+ switch (costcmp)
+ {
+ case COSTS_EQUAL:
+ outercmp = bms_subset_compare(new_path->required_outer,
+ old_path->required_outer);
+ if (keyscmp == PATHKEYS_BETTER1)
+ {
+ if (outercmp == BMS_EQUAL ||
+ outercmp == BMS_SUBSET1)
+ remove_old = true; /* new dominates old */
+ }
+ else if (keyscmp == PATHKEYS_BETTER2)
+ {
+ if (outercmp == BMS_EQUAL ||
+ outercmp == BMS_SUBSET2)
+ accept_new = false; /* old dominates new */
+ }
+ else /* keyscmp == PATHKEYS_EQUAL */
+ {
+ if (outercmp == BMS_EQUAL)
+ {
+ /*
+ * Same pathkeys and outer rels, and fuzzily
+ * the same cost, so keep just one --- but
+ * we'll do an exact cost comparison to decide
+ * which.
+ */
+ if (compare_path_costs(new_path, old_path,
+ TOTAL_COST) < 0)
+ remove_old = true; /* new dominates old */
+ else
+ accept_new = false; /* old equals or dominates new */
+ }
+ else if (outercmp == BMS_SUBSET1)
+ remove_old = true; /* new dominates old */
+ else if (outercmp == BMS_SUBSET2)
+ accept_new = false; /* old dominates new */
+ /* else different parameterizations, keep both */
+ }
+ break;
+ case COSTS_BETTER1:
+ if (keyscmp != PATHKEYS_BETTER2)
+ {
+ outercmp = bms_subset_compare(new_path->required_outer,
+ old_path->required_outer);
+ if (outercmp == BMS_EQUAL ||
+ outercmp == BMS_SUBSET1)
+ remove_old = true; /* new dominates old */
+ }
+ break;
+ case COSTS_BETTER2:
+ if (keyscmp != PATHKEYS_BETTER1)
+ {
+ outercmp = bms_subset_compare(new_path->required_outer,
+ old_path->required_outer);
+ if (outercmp == BMS_EQUAL ||
+ outercmp == BMS_SUBSET2)
+ accept_new = false; /* old dominates new */
+ }
+ break;
+ case COSTS_DIFFERENT:
/*
- * Same pathkeys, and fuzzily the same cost, so keep
- * just one --- but we'll do an exact cost comparison
- * to decide which.
+ * can't get here, but keep this case to keep compiler
+ * quiet
*/
- if (compare_path_costs(new_path, old_path,
- TOTAL_COST) < 0)
- remove_old = true; /* new dominates old */
- else
- accept_new = false; /* old equals or dominates new */
- }
- break;
- case PATHKEYS_BETTER1:
- if (costcmp <= 0)
- remove_old = true; /* new dominates old */
- break;
- case PATHKEYS_BETTER2:
- if (costcmp >= 0)
- accept_new = false; /* old dominates new */
- break;
- case PATHKEYS_DIFFERENT:
- /* keep both paths, since they have different ordering */
- break;
+ break;
+ }
}
}
else
{
/* new belongs after this old path if it has cost >= old's */
- if (costcmp >= 0)
+ if (new_path->total_cost >= old_path->total_cost)
insert_after = p1;
/* p1_prev advances */
p1_prev = p1;
}
}
+/*
+ * add_path_precheck
+ * Check whether a proposed new path could possibly get accepted.
+ * We assume we know the path's pathkeys and parameterization accurately,
+ * and have lower bounds for its costs.
+ *
+ * At the time this is called, we haven't actually built a Path structure,
+ * so the required information has to be passed piecemeal.
+ */
+bool
+add_path_precheck(RelOptInfo *parent_rel,
+ Cost startup_cost, Cost total_cost,
+ List *pathkeys, Relids required_outer)
+{
+ List *new_path_pathkeys;
+ ListCell *p1;
+
+ /* Pretend parameterized paths have no pathkeys, per comment above */
+ new_path_pathkeys = required_outer ? NIL : pathkeys;
+
+ foreach(p1, parent_rel->pathlist)
+ {
+ Path *old_path = (Path *) lfirst(p1);
+ PathKeysComparison keyscmp;
+ BMS_Comparison outercmp;
+
+ /*
+ * We are looking for an old_path that dominates the new path across
+ * all four metrics. If we find one, we can reject the new path.
+ *
+ * For speed, we make exact rather than fuzzy cost comparisons.
+ * If an old path dominates the new path exactly on both costs, it
+ * will surely do so fuzzily.
+ */
+ if (total_cost >= old_path->total_cost)
+ {
+ if (startup_cost >= old_path->startup_cost)
+ {
+ List *old_path_pathkeys;
+
+ old_path_pathkeys = old_path->required_outer ? NIL : old_path->pathkeys;
+ keyscmp = compare_pathkeys(new_path_pathkeys,
+ old_path_pathkeys);
+ if (keyscmp == PATHKEYS_EQUAL ||
+ keyscmp == PATHKEYS_BETTER2)
+ {
+ outercmp = bms_subset_compare(required_outer,
+ old_path->required_outer);
+ if (outercmp == BMS_EQUAL ||
+ outercmp == BMS_SUBSET2)
+ return false;
+ }
+ }
+ }
+ else
+ {
+ /*
+ * Since the pathlist is sorted by total_cost, we can stop
+ * looking once we reach a path with a total_cost larger
+ * than the new path's.
+ */
+ break;
+ }
+ }
+
+ return true;
+}
+
+/*
+ * add_parameterized_path
+ * Consider a parameterized implementation path for the specified rel,
+ * and add it to the rel's cheapest_parameterized_paths list if it
+ * belongs there, removing any old entries that it dominates.
+ *
+ * This is essentially a cut-down form of add_path(): we do not care about
+ * startup cost or sort ordering, only total cost and parameterization.
+ * Also, we should not recycle rejected paths, since they will still be
+ * present in the rel's pathlist.
+ *
+ * 'parent_rel' is the relation entry to which the path corresponds.
+ * 'new_path' is a parameterized path for parent_rel.
+ *
+ * Returns nothing, but modifies parent_rel->cheapest_parameterized_paths.
+ */
+static void
+add_parameterized_path(RelOptInfo *parent_rel, Path *new_path)
+{
+ bool accept_new = true; /* unless we find a superior old path */
+ ListCell *insert_after = NULL; /* where to insert new item */
+ ListCell *p1;
+ ListCell *p1_prev;
+ ListCell *p1_next;
+
+ /*
+ * Loop to check proposed new path against old paths. Note it is possible
+ * for more than one old path to be tossed out because new_path dominates
+ * it.
+ *
+ * We can't use foreach here because the loop body may delete the current
+ * list cell.
+ */
+ p1_prev = NULL;
+ for (p1 = list_head(parent_rel->cheapest_parameterized_paths);
+ p1 != NULL; p1 = p1_next)
+ {
+ Path *old_path = (Path *) lfirst(p1);
+ bool remove_old = false; /* unless new proves superior */
+ int costcmp;
+ BMS_Comparison outercmp;
+
+ p1_next = lnext(p1);
+
+ costcmp = compare_path_costs(new_path, old_path, TOTAL_COST);
+ outercmp = bms_subset_compare(new_path->required_outer,
+ old_path->required_outer);
+ if (outercmp != BMS_DIFFERENT)
+ {
+ if (costcmp < 0)
+ {
+ if (outercmp != BMS_SUBSET2)
+ remove_old = true; /* new dominates old */
+ }
+ else if (costcmp > 0)
+ {
+ if (outercmp != BMS_SUBSET1)
+ accept_new = false; /* old dominates new */
+ }
+ else if (outercmp == BMS_SUBSET1)
+ remove_old = true; /* new dominates old */
+ else if (outercmp == BMS_SUBSET2)
+ accept_new = false; /* old dominates new */
+ else
+ {
+ /* Same cost and outer rels, arbitrarily keep the old */
+ accept_new = false; /* old equals or dominates new */
+ }
+ }
+
+ /*
+ * Remove current element from cheapest_parameterized_paths if
+ * dominated by new.
+ */
+ if (remove_old)
+ {
+ parent_rel->cheapest_parameterized_paths =
+ list_delete_cell(parent_rel->cheapest_parameterized_paths,
+ p1, p1_prev);
+ /* p1_prev does not advance */
+ }
+ else
+ {
+ /* new belongs after this old path if it has cost >= old's */
+ if (costcmp >= 0)
+ insert_after = p1;
+ /* p1_prev advances */
+ p1_prev = p1;
+ }
+
+ /*
+ * If we found an old path that dominates new_path, we can quit
+ * scanning the list; we will not add new_path, and we assume
+ * new_path cannot dominate any other elements of the list.
+ */
+ if (!accept_new)
+ break;
+ }
+
+ if (accept_new)
+ {
+ /* Accept the new path: insert it at proper place in list */
+ if (insert_after)
+ lappend_cell(parent_rel->cheapest_parameterized_paths,
+ insert_after, new_path);
+ else
+ parent_rel->cheapest_parameterized_paths =
+ lcons(new_path, parent_rel->cheapest_parameterized_paths);
+ }
+}
+
/*****************************************************************************
* PATH NODE CREATION ROUTINES
pathnode->pathtype = T_SeqScan;
pathnode->parent = rel;
pathnode->pathkeys = NIL; /* seqscan has unordered result */
+ pathnode->required_outer = NULL;
+ pathnode->param_clauses = NIL;
cost_seqscan(pathnode, root, rel);
* for an ordered index, or NoMovementScanDirection for
* an unordered index.
* 'indexonly' is true if an index-only scan is wanted.
- * 'outer_rel' is the outer relation if this is a join inner indexscan path.
- * (pathkeys and indexscandir are ignored if so.) NULL if not.
+ * 'required_outer' is the set of outer relids referenced in indexclauses.
+ * 'loop_count' is the number of repetitions of the indexscan to factor into
+ * estimates of caching behavior.
*
* Returns the new path node.
*/
List *pathkeys,
ScanDirection indexscandir,
bool indexonly,
- RelOptInfo *outer_rel)
+ Relids required_outer,
+ double loop_count)
{
IndexPath *pathnode = makeNode(IndexPath);
RelOptInfo *rel = index->rel;
List *indexquals,
*indexqualcols;
- /*
- * For a join inner scan, there's no point in marking the path with any
- * pathkeys, since it will only ever be used as the inner path of a
- * nestloop, and so its ordering does not matter. For the same reason we
- * don't really care what order it's scanned in. (We could expect the
- * caller to supply the correct values, but it's easier to force it here.)
- */
- if (outer_rel != NULL)
- {
- pathkeys = NIL;
- indexscandir = NoMovementScanDirection;
- }
-
pathnode->path.pathtype = indexonly ? T_IndexOnlyScan : T_IndexScan;
pathnode->path.parent = rel;
pathnode->path.pathkeys = pathkeys;
+ pathnode->path.required_outer = required_outer;
+ if (required_outer)
+ {
+ /* Identify index clauses that are join clauses */
+ List *jclauses = NIL;
+ ListCell *lc;
+
+ foreach(lc, indexclauses)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+
+ if (!bms_is_subset(rinfo->clause_relids, rel->relids))
+ jclauses = lappend(jclauses, rinfo);
+ }
+ pathnode->path.param_clauses = jclauses;
+ }
+ else
+ pathnode->path.param_clauses = NIL;
/* Convert clauses to indexquals the executor can handle */
expand_indexqual_conditions(index, indexclauses, indexclausecols,
pathnode->indexqualcols = indexqualcols;
pathnode->indexorderbys = indexorderbys;
pathnode->indexorderbycols = indexorderbycols;
-
- pathnode->isjoininner = (outer_rel != NULL);
pathnode->indexscandir = indexscandir;
- if (outer_rel != NULL)
- {
- /*
- * We must compute the estimated number of output rows for the
- * indexscan. This is less than rel->rows because of the additional
- * selectivity of the join clauses. Since indexclauses may contain
- * both restriction and join clauses, we have to do a set union to get
- * the full set of clauses that must be considered to compute the
- * correct selectivity. (Without the union operation, we might have
- * some restriction clauses appearing twice, which'd mislead
- * clauselist_selectivity into double-counting their selectivity.
- * However, since RestrictInfo nodes aren't copied when linking them
- * into different lists, it should be sufficient to use pointer
- * comparison to remove duplicates.)
- *
- * Note that we force the clauses to be treated as non-join clauses
- * during selectivity estimation.
- */
- List *allclauses;
-
- allclauses = list_union_ptr(rel->baserestrictinfo, indexclauses);
- pathnode->rows = rel->tuples *
- clauselist_selectivity(root,
- allclauses,
- rel->relid, /* do not use 0! */
- JOIN_INNER,
- NULL);
- /* Like costsize.c, force estimate to be at least one row */
- pathnode->rows = clamp_row_est(pathnode->rows);
- }
- else
- {
- /*
- * The number of rows is the same as the parent rel's estimate, since
- * this isn't a join inner indexscan.
- */
- pathnode->rows = rel->rows;
- }
-
- cost_index(pathnode, root, outer_rel);
+ cost_index(pathnode, root, loop_count);
return pathnode;
}
* Creates a path node for a bitmap scan.
*
* 'bitmapqual' is a tree of IndexPath, BitmapAndPath, and BitmapOrPath nodes.
+ * 'loop_count' is the number of repetitions of the indexscan to factor into
+ * estimates of caching behavior.
*
- * If this is a join inner indexscan path, 'outer_rel' is the outer relation,
- * and all the component IndexPaths should have been costed accordingly.
+ * loop_count should match the value used when creating the component
+ * IndexPaths.
*/
BitmapHeapPath *
create_bitmap_heap_path(PlannerInfo *root,
RelOptInfo *rel,
Path *bitmapqual,
- RelOptInfo *outer_rel)
+ double loop_count)
{
BitmapHeapPath *pathnode = makeNode(BitmapHeapPath);
pathnode->path.pathtype = T_BitmapHeapScan;
pathnode->path.parent = rel;
pathnode->path.pathkeys = NIL; /* always unordered */
+ pathnode->path.required_outer = bitmapqual->required_outer;
+ pathnode->path.param_clauses = bitmapqual->param_clauses;
pathnode->bitmapqual = bitmapqual;
- pathnode->isjoininner = (outer_rel != NULL);
-
- if (pathnode->isjoininner)
- {
- /*
- * We must compute the estimated number of output rows for the
- * indexscan. This is less than rel->rows because of the additional
- * selectivity of the join clauses. We make use of the selectivity
- * estimated for the bitmap to do this; this isn't really quite right
- * since there may be restriction conditions not included in the
- * bitmap ...
- */
- Cost indexTotalCost;
- Selectivity indexSelectivity;
-
- cost_bitmap_tree_node(bitmapqual, &indexTotalCost, &indexSelectivity);
- pathnode->rows = rel->tuples * indexSelectivity;
- if (pathnode->rows > rel->rows)
- pathnode->rows = rel->rows;
- /* Like costsize.c, force estimate to be at least one row */
- pathnode->rows = clamp_row_est(pathnode->rows);
- }
- else
- {
- /*
- * The number of rows is the same as the parent rel's estimate, since
- * this isn't a join inner indexscan.
- */
- pathnode->rows = rel->rows;
- }
- cost_bitmap_heap_scan(&pathnode->path, root, rel, bitmapqual, outer_rel);
+ cost_bitmap_heap_scan(&pathnode->path, root, rel, bitmapqual, loop_count);
return pathnode;
}
List *bitmapquals)
{
BitmapAndPath *pathnode = makeNode(BitmapAndPath);
+ ListCell *lc;
pathnode->path.pathtype = T_BitmapAnd;
pathnode->path.parent = rel;
pathnode->path.pathkeys = NIL; /* always unordered */
+ pathnode->path.required_outer = NULL;
+ pathnode->path.param_clauses = NIL;
pathnode->bitmapquals = bitmapquals;
+ /* required_outer and param_clauses are the union of the inputs' values */
+ foreach(lc, bitmapquals)
+ {
+ Path *bpath = (Path *) lfirst(lc);
+
+ pathnode->path.required_outer =
+ bms_add_members(pathnode->path.required_outer,
+ bpath->required_outer);
+ pathnode->path.param_clauses =
+ list_concat(pathnode->path.param_clauses,
+ list_copy(bpath->param_clauses));
+ }
+
/* this sets bitmapselectivity as well as the regular cost fields: */
cost_bitmap_and_node(pathnode, root);
List *bitmapquals)
{
BitmapOrPath *pathnode = makeNode(BitmapOrPath);
+ ListCell *lc;
pathnode->path.pathtype = T_BitmapOr;
pathnode->path.parent = rel;
pathnode->path.pathkeys = NIL; /* always unordered */
+ pathnode->path.required_outer = NULL;
+ pathnode->path.param_clauses = NIL;
pathnode->bitmapquals = bitmapquals;
+ /* required_outer and param_clauses are the union of the inputs' values */
+ foreach(lc, bitmapquals)
+ {
+ Path *bpath = (Path *) lfirst(lc);
+
+ pathnode->path.required_outer =
+ bms_add_members(pathnode->path.required_outer,
+ bpath->required_outer);
+ pathnode->path.param_clauses =
+ list_concat(pathnode->path.param_clauses,
+ list_copy(bpath->param_clauses));
+ }
+
/* this sets bitmapselectivity as well as the regular cost fields: */
cost_bitmap_or_node(pathnode, root);
pathnode->path.pathtype = T_TidScan;
pathnode->path.parent = rel;
pathnode->path.pathkeys = NIL;
+ pathnode->path.required_outer = NULL;
+ pathnode->path.param_clauses = NIL;
pathnode->tidquals = tidquals;
pathnode->path.parent = rel;
pathnode->path.pathkeys = NIL; /* result is always considered
* unsorted */
+ pathnode->path.required_outer = NULL; /* updated below */
+ pathnode->path.param_clauses = NIL; /* XXX see below */
pathnode->subpaths = subpaths;
/*
- * Compute cost as sum of subplan costs. We charge nothing extra for the
- * Append itself, which perhaps is too optimistic, but since it doesn't do
- * any selection or projection, it is a pretty cheap node.
+ * We don't bother with inventing a cost_append(), but just do it here.
+ *
+ * Compute rows and costs as sums of subplan rows and costs. We charge
+ * nothing extra for the Append itself, which perhaps is too optimistic,
+ * but since it doesn't do any selection or projection, it is a pretty
+ * cheap node. If you change this, see also make_append().
*
- * If you change this, see also make_append().
+ * We also compute the correct required_outer set, namely the union of
+ * the input paths' requirements.
+ *
+ * XXX We should also compute a proper param_clauses list, but that
+ * will require identifying which joinclauses are enforced by all the
+ * subplans, as well as locating the original parent RestrictInfo from
+ * which they were generated. For the moment we punt and leave the list
+ * as NIL. This will result in uselessly rechecking such joinclauses
+ * at the parameter-supplying nestloop join, which is slightly annoying,
+ * as well as overestimating the sizes of any intermediate joins, which
+ * is significantly more annoying.
*/
+ pathnode->path.rows = 0;
pathnode->path.startup_cost = 0;
pathnode->path.total_cost = 0;
foreach(l, subpaths)
{
Path *subpath = (Path *) lfirst(l);
+ pathnode->path.rows += subpath->rows;
+
if (l == list_head(subpaths)) /* first node? */
pathnode->path.startup_cost = subpath->startup_cost;
pathnode->path.total_cost += subpath->total_cost;
+
+ pathnode->path.required_outer =
+ bms_add_members(pathnode->path.required_outer,
+ subpath->required_outer);
}
return pathnode;
pathnode->path.pathtype = T_MergeAppend;
pathnode->path.parent = rel;
pathnode->path.pathkeys = pathkeys;
+ pathnode->path.required_outer = NULL; /* updated below */
+ pathnode->path.param_clauses = NIL; /* XXX see below */
pathnode->subpaths = subpaths;
/*
}
}
- /* Add up all the costs of the input paths */
+ /*
+ * Add up the sizes and costs of the input paths, and also compute the
+ * real required_outer value.
+ *
+ * XXX as in create_append_path(), we should compute param_clauses but
+ * it will require more work.
+ */
+ pathnode->path.rows = 0;
input_startup_cost = 0;
input_total_cost = 0;
foreach(l, subpaths)
{
Path *subpath = (Path *) lfirst(l);
+ pathnode->path.rows += subpath->rows;
+
if (pathkeys_contained_in(pathkeys, subpath->pathkeys))
{
/* Subpath is adequately ordered, we won't need to sort it */
input_startup_cost += sort_path.startup_cost;
input_total_cost += sort_path.total_cost;
}
+
+ pathnode->path.required_outer =
+ bms_add_members(pathnode->path.required_outer,
+ subpath->required_outer);
}
/* Now we can compute total costs of the MergeAppend */
pathnode->path.pathtype = T_Result;
pathnode->path.parent = NULL;
pathnode->path.pathkeys = NIL;
+ pathnode->path.required_outer = NULL;
+ pathnode->path.param_clauses = NIL;
pathnode->quals = quals;
- /* Ideally should define cost_result(), but I'm too lazy */
+ /* Hardly worth defining a cost_result() function ... just do it */
+ pathnode->path.rows = 1;
pathnode->path.startup_cost = 0;
pathnode->path.total_cost = cpu_tuple_cost;
pathnode->path.pathtype = T_Material;
pathnode->path.parent = rel;
-
pathnode->path.pathkeys = subpath->pathkeys;
+ pathnode->path.required_outer = subpath->required_outer;
+ pathnode->path.param_clauses = subpath->param_clauses;
pathnode->subpath = subpath;
cost_material(&pathnode->path,
subpath->startup_cost,
subpath->total_cost,
- rel->rows,
+ subpath->rows,
rel->width);
return pathnode;
/*
* We must ensure path struct and subsidiary data are allocated in main
* planning context; otherwise GEQO memory management causes trouble.
- * (Compare best_inner_indexscan().)
*/
oldcontext = MemoryContextSwitchTo(root->planner_cxt);
* to represent it. (This might get overridden below.)
*/
pathnode->path.pathkeys = NIL;
+ pathnode->path.required_outer = subpath->required_outer;
+ pathnode->path.param_clauses = subpath->param_clauses;
pathnode->subpath = subpath;
pathnode->in_operators = in_operators;
uniq_exprs, in_operators))
{
pathnode->umethod = UNIQUE_PATH_NOOP;
- pathnode->rows = rel->rows;
+ pathnode->path.rows = rel->rows;
pathnode->path.startup_cost = subpath->startup_cost;
pathnode->path.total_cost = subpath->total_cost;
pathnode->path.pathkeys = subpath->pathkeys;
sub_tlist_colnos, in_operators))
{
pathnode->umethod = UNIQUE_PATH_NOOP;
- pathnode->rows = rel->rows;
+ pathnode->path.rows = rel->rows;
pathnode->path.startup_cost = subpath->startup_cost;
pathnode->path.total_cost = subpath->total_cost;
pathnode->path.pathkeys = subpath->pathkeys;
}
/* Estimate number of output rows */
- pathnode->rows = estimate_num_groups(root, uniq_exprs, rel->rows);
+ pathnode->path.rows = estimate_num_groups(root, uniq_exprs, rel->rows);
numCols = list_length(uniq_exprs);
if (all_btree)
*/
int hashentrysize = rel->width + 64;
- if (hashentrysize * pathnode->rows > work_mem * 1024L)
+ if (hashentrysize * pathnode->path.rows > work_mem * 1024L)
all_hash = false; /* don't try to hash */
else
cost_agg(&agg_path, root,
AGG_HASHED, NULL,
- numCols, pathnode->rows,
+ numCols, pathnode->path.rows,
subpath->startup_cost,
subpath->total_cost,
rel->rows);
pathnode->pathtype = T_SubqueryScan;
pathnode->parent = rel;
pathnode->pathkeys = pathkeys;
+ pathnode->required_outer = NULL;
+ pathnode->param_clauses = NIL;
cost_subqueryscan(pathnode, rel);
pathnode->pathtype = T_FunctionScan;
pathnode->parent = rel;
pathnode->pathkeys = NIL; /* for now, assume unordered result */
+ pathnode->required_outer = NULL;
+ pathnode->param_clauses = NIL;
cost_functionscan(pathnode, root, rel);
pathnode->pathtype = T_ValuesScan;
pathnode->parent = rel;
pathnode->pathkeys = NIL; /* result is always unordered */
+ pathnode->required_outer = NULL;
+ pathnode->param_clauses = NIL;
cost_valuesscan(pathnode, root, rel);
pathnode->pathtype = T_CteScan;
pathnode->parent = rel;
pathnode->pathkeys = NIL; /* XXX for now, result is always unordered */
+ pathnode->required_outer = NULL;
+ pathnode->param_clauses = NIL;
cost_ctescan(pathnode, root, rel);
pathnode->pathtype = T_WorkTableScan;
pathnode->parent = rel;
pathnode->pathkeys = NIL; /* result is always unordered */
+ pathnode->required_outer = NULL;
+ pathnode->param_clauses = NIL;
/* Cost is the same as for a regular CTE scan */
cost_ctescan(pathnode, root, rel);
pathnode->path.pathtype = T_ForeignScan;
pathnode->path.parent = rel;
pathnode->path.pathkeys = NIL; /* result is always unordered */
+ pathnode->path.required_outer = NULL;
+ pathnode->path.param_clauses = NIL;
/* Get FDW's callback info */
rte = planner_rt_fetch(rel->relid, root);
pathnode->fdwplan = fdwplan;
/* use costs estimated by FDW */
+ pathnode->path.rows = rel->rows;
pathnode->path.startup_cost = fdwplan->startup_cost;
pathnode->path.total_cost = fdwplan->total_cost;
return pathnode;
}
+/*
+ * calc_nestloop_required_outer
+ * Compute the required_outer set for a nestloop join path
+ *
+ * Note: result must not share storage with either input
+ */
+Relids
+calc_nestloop_required_outer(Path *outer_path, Path *inner_path)
+{
+ Relids required_outer;
+
+ /* inner_path can require rels from outer path, but not vice versa */
+ Assert(!bms_overlap(outer_path->required_outer,
+ inner_path->parent->relids));
+ /* easy case if inner path is not parameterized */
+ if (!inner_path->required_outer)
+ return bms_copy(outer_path->required_outer);
+ /* else, form the union ... */
+ required_outer = bms_union(outer_path->required_outer,
+ inner_path->required_outer);
+ /* ... and remove any mention of now-satisfied outer rels */
+ required_outer = bms_del_members(required_outer,
+ outer_path->parent->relids);
+ /* maintain invariant that required_outer is exactly NULL if empty */
+ if (bms_is_empty(required_outer))
+ {
+ bms_free(required_outer);
+ required_outer = NULL;
+ }
+ return required_outer;
+}
+
+/*
+ * calc_non_nestloop_required_outer
+ * Compute the required_outer set for a merge or hash join path
+ *
+ * Note: result must not share storage with either input
+ */
+Relids
+calc_non_nestloop_required_outer(Path *outer_path, Path *inner_path)
+{
+ Relids required_outer;
+
+ /* neither path can require rels from the other */
+ Assert(!bms_overlap(outer_path->required_outer,
+ inner_path->parent->relids));
+ Assert(!bms_overlap(inner_path->required_outer,
+ outer_path->parent->relids));
+ /* form the union ... */
+ required_outer = bms_union(outer_path->required_outer,
+ inner_path->required_outer);
+ /* we do not need an explicit test for empty; bms_union gets it right */
+ return required_outer;
+}
+
/*
* create_nestloop_path
* Creates a pathnode corresponding to a nestloop join between two
*
* 'joinrel' is the join relation.
* 'jointype' is the type of join required
+ * 'workspace' is the result from initial_cost_nestloop
* 'sjinfo' is extra info about the join for selectivity estimation
+ * 'semifactors' contains valid data if jointype is SEMI or ANTI
* 'outer_path' is the outer path
* 'inner_path' is the inner path
* 'restrict_clauses' are the RestrictInfo nodes to apply at the join
* 'pathkeys' are the path keys of the new join path
+ * 'required_outer' is the set of required outer rels
*
* Returns the resulting path node.
*/
create_nestloop_path(PlannerInfo *root,
RelOptInfo *joinrel,
JoinType jointype,
+ JoinCostWorkspace *workspace,
SpecialJoinInfo *sjinfo,
+ SemiAntiJoinFactors *semifactors,
Path *outer_path,
Path *inner_path,
List *restrict_clauses,
- List *pathkeys)
+ List *pathkeys,
+ Relids required_outer)
{
NestPath *pathnode = makeNode(NestPath);
pathnode->path.pathtype = T_NestLoop;
pathnode->path.parent = joinrel;
+ pathnode->path.pathkeys = pathkeys;
+ pathnode->path.required_outer = required_outer;
+ if (pathnode->path.required_outer)
+ {
+ /* Identify parameter clauses not yet applied here */
+ List *jclauses;
+ ListCell *lc;
+
+ /* LHS clauses could not be satisfied here */
+ jclauses = list_copy(outer_path->param_clauses);
+ foreach(lc, inner_path->param_clauses)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+
+ if (!bms_is_subset(rinfo->clause_relids, joinrel->relids))
+ jclauses = lappend(jclauses, rinfo);
+ }
+ pathnode->path.param_clauses = jclauses;
+ }
+ else
+ pathnode->path.param_clauses = NIL;
pathnode->jointype = jointype;
pathnode->outerjoinpath = outer_path;
pathnode->innerjoinpath = inner_path;
pathnode->joinrestrictinfo = restrict_clauses;
- pathnode->path.pathkeys = pathkeys;
- cost_nestloop(pathnode, root, sjinfo);
+ final_cost_nestloop(root, pathnode, workspace, sjinfo, semifactors);
return pathnode;
}
*
* 'joinrel' is the join relation
* 'jointype' is the type of join required
+ * 'workspace' is the result from initial_cost_mergejoin
* 'sjinfo' is extra info about the join for selectivity estimation
* 'outer_path' is the outer path
* 'inner_path' is the inner path
* 'restrict_clauses' are the RestrictInfo nodes to apply at the join
* 'pathkeys' are the path keys of the new join path
+ * 'required_outer' is the set of required outer rels
* 'mergeclauses' are the RestrictInfo nodes to use as merge clauses
* (this should be a subset of the restrict_clauses list)
* 'outersortkeys' are the sort varkeys for the outer relation
create_mergejoin_path(PlannerInfo *root,
RelOptInfo *joinrel,
JoinType jointype,
+ JoinCostWorkspace *workspace,
SpecialJoinInfo *sjinfo,
Path *outer_path,
Path *inner_path,
List *restrict_clauses,
List *pathkeys,
+ Relids required_outer,
List *mergeclauses,
List *outersortkeys,
List *innersortkeys)
{
MergePath *pathnode = makeNode(MergePath);
- /*
- * If the given paths are already well enough ordered, we can skip doing
- * an explicit sort.
- */
- if (outersortkeys &&
- pathkeys_contained_in(outersortkeys, outer_path->pathkeys))
- outersortkeys = NIL;
- if (innersortkeys &&
- pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
- innersortkeys = NIL;
-
pathnode->jpath.path.pathtype = T_MergeJoin;
pathnode->jpath.path.parent = joinrel;
+ pathnode->jpath.path.pathkeys = pathkeys;
+ pathnode->jpath.path.required_outer = required_outer;
+ pathnode->jpath.path.param_clauses =
+ list_concat(list_copy(outer_path->param_clauses),
+ inner_path->param_clauses);
pathnode->jpath.jointype = jointype;
pathnode->jpath.outerjoinpath = outer_path;
pathnode->jpath.innerjoinpath = inner_path;
pathnode->jpath.joinrestrictinfo = restrict_clauses;
- pathnode->jpath.path.pathkeys = pathkeys;
pathnode->path_mergeclauses = mergeclauses;
pathnode->outersortkeys = outersortkeys;
pathnode->innersortkeys = innersortkeys;
- /* pathnode->materialize_inner will be set by cost_mergejoin */
+ /* pathnode->materialize_inner will be set by final_cost_mergejoin */
- cost_mergejoin(pathnode, root, sjinfo);
+ final_cost_mergejoin(root, pathnode, workspace, sjinfo);
return pathnode;
}
*
* 'joinrel' is the join relation
* 'jointype' is the type of join required
+ * 'workspace' is the result from initial_cost_hashjoin
* 'sjinfo' is extra info about the join for selectivity estimation
+ * 'semifactors' contains valid data if jointype is SEMI or ANTI
* 'outer_path' is the cheapest outer path
* 'inner_path' is the cheapest inner path
* 'restrict_clauses' are the RestrictInfo nodes to apply at the join
+ * 'required_outer' is the set of required outer rels
* 'hashclauses' are the RestrictInfo nodes to use as hash clauses
* (this should be a subset of the restrict_clauses list)
*/
create_hashjoin_path(PlannerInfo *root,
RelOptInfo *joinrel,
JoinType jointype,
+ JoinCostWorkspace *workspace,
SpecialJoinInfo *sjinfo,
+ SemiAntiJoinFactors *semifactors,
Path *outer_path,
Path *inner_path,
List *restrict_clauses,
+ Relids required_outer,
List *hashclauses)
{
HashPath *pathnode = makeNode(HashPath);
pathnode->jpath.path.pathtype = T_HashJoin;
pathnode->jpath.path.parent = joinrel;
- pathnode->jpath.jointype = jointype;
- pathnode->jpath.outerjoinpath = outer_path;
- pathnode->jpath.innerjoinpath = inner_path;
- pathnode->jpath.joinrestrictinfo = restrict_clauses;
/*
* A hashjoin never has pathkeys, since its output ordering is
* outer rel than it does now.)
*/
pathnode->jpath.path.pathkeys = NIL;
+ pathnode->jpath.path.required_outer = required_outer;
+ pathnode->jpath.path.param_clauses =
+ list_concat(list_copy(outer_path->param_clauses),
+ inner_path->param_clauses);
+ pathnode->jpath.jointype = jointype;
+ pathnode->jpath.outerjoinpath = outer_path;
+ pathnode->jpath.innerjoinpath = inner_path;
+ pathnode->jpath.joinrestrictinfo = restrict_clauses;
pathnode->path_hashclauses = hashclauses;
- /* cost_hashjoin will fill in pathnode->num_batches */
+ /* final_cost_hashjoin will fill in pathnode->num_batches */
- cost_hashjoin(pathnode, root, sjinfo);
+ final_cost_hashjoin(root, pathnode, workspace, sjinfo, semifactors);
return pathnode;
}
rel->cheapest_startup_path = NULL;
rel->cheapest_total_path = NULL;
rel->cheapest_unique_path = NULL;
+ rel->cheapest_parameterized_paths = NIL;
rel->relid = relid;
rel->rtekind = rte->rtekind;
/* min_attr, max_attr, attr_needed, attr_widths are set below */
rel->baserestrictcost.per_tuple = 0;
rel->joininfo = NIL;
rel->has_eclass_joins = false;
- rel->index_outer_relids = NULL;
- rel->index_inner_paths = NIL;
/* Check type of rtable entry */
switch (rte->rtekind)
joinrel->cheapest_startup_path = NULL;
joinrel->cheapest_total_path = NULL;
joinrel->cheapest_unique_path = NULL;
+ joinrel->cheapest_parameterized_paths = NIL;
joinrel->relid = 0; /* indicates not a baserel */
joinrel->rtekind = RTE_JOIN;
joinrel->min_attr = 0;
joinrel->baserestrictcost.per_tuple = 0;
joinrel->joininfo = NIL;
joinrel->has_eclass_joins = false;
- joinrel->index_outer_relids = NULL;
- joinrel->index_inner_paths = NIL;
/*
* Create a new tlist containing just the vars that need to be output from
bool pseudoconstant,
Relids required_relids,
Relids nullable_relids);
-static List *select_nonredundant_join_list(List *restrictinfo_list,
- List *reference_list);
static bool join_clause_is_redundant(RestrictInfo *rinfo,
List *reference_list);
/*
* select_nonredundant_join_clauses
+ * Select the members of restrictinfo_list that are not redundant with
+ * any member of reference_list.
*
* Given a list of RestrictInfo clauses that are to be applied in a join,
- * select the ones that are not redundant with any clause that's enforced
- * by the inner_path. This is used for nestloop joins, wherein any clause
- * being used in an inner indexscan need not be checked again at the join.
+ * select the ones that are not redundant with any clause that's listed in
+ * reference_list. This is used, for example, to avoid checking joinclauses
+ * again at a nestloop join when they've already been enforced by a
+ * parameterized inner path.
*
* "Redundant" means either equal() or derived from the same EquivalenceClass.
* We have to check the latter because indxpath.c may select different derived
* restrictinfo_list; that should have been handled elsewhere.
*/
List *
-select_nonredundant_join_clauses(PlannerInfo *root,
- List *restrictinfo_list,
- Path *inner_path)
-{
- if (IsA(inner_path, IndexPath))
- {
- /*
- * Check the index quals to see if any of them are join clauses.
- *
- * We can skip this if the index path is an ordinary indexpath and not
- * a special innerjoin path, since it then wouldn't be using any join
- * clauses.
- */
- IndexPath *innerpath = (IndexPath *) inner_path;
-
- if (innerpath->isjoininner)
- restrictinfo_list =
- select_nonredundant_join_list(restrictinfo_list,
- innerpath->indexclauses);
- }
- else if (IsA(inner_path, BitmapHeapPath))
- {
- /*
- * Same deal for bitmapped index scans.
- *
- * Note: both here and above, we ignore any implicit index
- * restrictions associated with the use of partial indexes. This is
- * OK because we're only trying to prove we can dispense with some
- * join quals; failing to prove that doesn't result in an incorrect
- * plan. It's quite unlikely that a join qual could be proven
- * redundant by an index predicate anyway. (Also, if we did manage to
- * prove it, we'd have to have a special case for update targets; see
- * notes about EvalPlanQual testing in create_indexscan_plan().)
- */
- BitmapHeapPath *innerpath = (BitmapHeapPath *) inner_path;
-
- if (innerpath->isjoininner)
- {
- List *bitmapclauses;
-
- bitmapclauses =
- make_restrictinfo_from_bitmapqual(innerpath->bitmapqual,
- true,
- false);
- restrictinfo_list =
- select_nonredundant_join_list(restrictinfo_list,
- bitmapclauses);
- }
- }
-
- /*
- * XXX the inner path of a nestloop could also be an append relation whose
- * elements use join quals. However, they might each use different quals;
- * we could only remove join quals that are enforced by all the appendrel
- * members. For the moment we don't bother to try.
- */
-
- return restrictinfo_list;
-}
-
-/*
- * select_nonredundant_join_list
- * Select the members of restrictinfo_list that are not redundant with
- * any member of reference_list. See above for more info.
- */
-static List *
-select_nonredundant_join_list(List *restrictinfo_list,
- List *reference_list)
+select_nonredundant_join_clauses(List *restrictinfo_list,
+ List *reference_list)
{
List *result = NIL;
ListCell *item;
+ /* Quick out if nothing could be removed */
+ if (reference_list == NIL)
+ return restrictinfo_list;
+
foreach(item, restrictinfo_list)
{
RestrictInfo *rinfo = (RestrictInfo *) lfirst(item);
static void
genericcostestimate(PlannerInfo *root,
IndexPath *path,
- RelOptInfo *outer_rel,
+ double loop_count,
double numIndexTuples,
Cost *indexStartupCost,
Cost *indexTotalCost,
* Note that we are counting pages not tuples anymore, so we take N = T =
* index size, as if there were one "tuple" per page.
*/
- if (outer_rel != NULL && outer_rel->rows > 1)
- {
- num_outer_scans = outer_rel->rows;
- num_scans = num_sa_scans * num_outer_scans;
- }
- else
- {
- num_outer_scans = 1;
- num_scans = num_sa_scans;
- }
+ num_outer_scans = loop_count;
+ num_scans = num_sa_scans * num_outer_scans;
if (num_scans > 1)
{
{
PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
IndexPath *path = (IndexPath *) PG_GETARG_POINTER(1);
- RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(2);
+ double loop_count = PG_GETARG_FLOAT8(2);
Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
numIndexTuples = rint(numIndexTuples / num_sa_scans);
}
- genericcostestimate(root, path, outer_rel,
+ genericcostestimate(root, path, loop_count,
numIndexTuples,
indexStartupCost, indexTotalCost,
indexSelectivity, indexCorrelation);
{
PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
IndexPath *path = (IndexPath *) PG_GETARG_POINTER(1);
- RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(2);
+ double loop_count = PG_GETARG_FLOAT8(2);
Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
double *indexCorrelation = (double *) PG_GETARG_POINTER(6);
- genericcostestimate(root, path, outer_rel, 0.0,
+ genericcostestimate(root, path, loop_count, 0.0,
indexStartupCost, indexTotalCost,
indexSelectivity, indexCorrelation);
{
PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
IndexPath *path = (IndexPath *) PG_GETARG_POINTER(1);
- RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(2);
+ double loop_count = PG_GETARG_FLOAT8(2);
Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
double *indexCorrelation = (double *) PG_GETARG_POINTER(6);
- genericcostestimate(root, path, outer_rel, 0.0,
+ genericcostestimate(root, path, loop_count, 0.0,
indexStartupCost, indexTotalCost,
indexSelectivity, indexCorrelation);
{
PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
IndexPath *path = (IndexPath *) PG_GETARG_POINTER(1);
- RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(2);
+ double loop_count = PG_GETARG_FLOAT8(2);
Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
double *indexCorrelation = (double *) PG_GETARG_POINTER(6);
- genericcostestimate(root, path, outer_rel, 0.0,
+ genericcostestimate(root, path, loop_count, 0.0,
indexStartupCost, indexTotalCost,
indexSelectivity, indexCorrelation);
{
PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
IndexPath *path = (IndexPath *) PG_GETARG_POINTER(1);
- RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(2);
+ double loop_count = PG_GETARG_FLOAT8(2);
Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
}
/* Will we have more than one iteration of a nestloop scan? */
- if (outer_rel != NULL && outer_rel->rows > 1)
- outer_scans = outer_rel->rows;
- else
- outer_scans = 1;
+ outer_scans = loop_count;
/*
* Compute cost to begin scan, first of all, pay attention to pending list.
} Bitmapset; /* VARIABLE LENGTH STRUCT */
+/* result of bms_subset_compare */
+typedef enum
+{
+ BMS_EQUAL, /* sets are equal */
+ BMS_SUBSET1, /* first set is a subset of the second */
+ BMS_SUBSET2, /* second set is a subset of the first */
+ BMS_DIFFERENT /* neither set is a subset of the other */
+} BMS_Comparison;
+
/* result of bms_membership */
typedef enum
{
extern Bitmapset *bms_intersect(const Bitmapset *a, const Bitmapset *b);
extern Bitmapset *bms_difference(const Bitmapset *a, const Bitmapset *b);
extern bool bms_is_subset(const Bitmapset *a, const Bitmapset *b);
+extern BMS_Comparison bms_subset_compare(const Bitmapset *a, const Bitmapset *b);
extern bool bms_is_member(int x, const Bitmapset *a);
extern bool bms_overlap(const Bitmapset *a, const Bitmapset *b);
extern bool bms_nonempty_difference(const Bitmapset *a, const Bitmapset *b);
T_EquivalenceMember,
T_PathKey,
T_RestrictInfo,
- T_InnerIndexscanInfo,
T_PlaceHolderVar,
T_SpecialJoinInfo,
T_AppendRelInfo,
*/
RangeTblEntry **simple_rte_array; /* rangetable as an array */
+ /*
+ * all_baserels is a Relids set of all base relids (but not "other"
+ * relids) in the query; that is, the Relids identifier of the final
+ * join we need to form.
+ */
+ Relids all_baserels;
+
/*
* join_rel_list is a list of all join-relation RelOptInfos we have
* considered in this planning run. For small problems we just scan the
* pathlist - List of Path nodes, one for each potentially useful
* method of generating the relation
* cheapest_startup_path - the pathlist member with lowest startup cost
- * (regardless of its ordering)
+ * (regardless of its ordering; but must be
+ * unparameterized)
* cheapest_total_path - the pathlist member with lowest total cost
- * (regardless of its ordering)
+ * (regardless of its ordering; but must be
+ * unparameterized)
* cheapest_unique_path - for caching cheapest path to produce unique
* (no duplicates) output from relation
+ * cheapest_parameterized_paths - paths with cheapest total costs for
+ * their parameterizations; always includes
+ * cheapest_total_path
*
* If the relation is a base relation it will have these fields set:
*
* note this excludes clauses that might be derivable from
* EquivalenceClasses)
* has_eclass_joins - flag that EquivalenceClass joins are possible
- * index_outer_relids - only used for base rels; set of outer relids
- * that participate in indexable joinclauses for this rel
- * index_inner_paths - only used for base rels; list of InnerIndexscanInfo
- * nodes showing best indexpaths for various subsets of
- * index_outer_relids.
*
* Note: Keeping a restrictinfo list in the RelOptInfo is useful only for
* base rels, because for a join rel the set of clauses that are treated as
struct Path *cheapest_startup_path;
struct Path *cheapest_total_path;
struct Path *cheapest_unique_path;
+ List *cheapest_parameterized_paths;
/* information about a base rel (not set for join rels!) */
Index relid;
List *joininfo; /* RestrictInfo structures for join clauses
* involving this rel */
bool has_eclass_joins; /* T means joininfo is incomplete */
-
- /* cached info about inner indexscan paths for relation: */
- Relids index_outer_relids; /* other relids in indexable join
- * clauses */
- List *index_inner_paths; /* InnerIndexscanInfo nodes */
-
- /*
- * Inner indexscans are not in the main pathlist because they are not
- * usable except in specific join contexts. We use the index_inner_paths
- * list just to avoid recomputing the best inner indexscan repeatedly for
- * similar outer relations. See comments for InnerIndexscanInfo.
- */
} RelOptInfo;
/*
* BTGreaterStrategyNumber (for DESC). We assume that all ordering-capable
* index types will use btree-compatible strategy numbers.
*/
-
typedef struct PathKey
{
NodeTag type;
* simple plan types that we don't need any extra information in the path for.
* For other path types it is the first component of a larger struct.
*
- * Note: "pathtype" is the NodeTag of the Plan node we could build from this
- * Path. It is partially redundant with the Path's NodeTag, but allows us
- * to use the same Path type for multiple Plan types where there is no need
- * to distinguish the Plan type during path processing.
+ * "pathtype" is the NodeTag of the Plan node we could build from this Path.
+ * It is partially redundant with the Path's NodeTag, but allows us to use
+ * the same Path type for multiple Plan types when there is no need to
+ * distinguish the Plan type during path processing.
+ *
+ * "rows" is the same as parent->rows in simple paths, but in parameterized
+ * paths and UniquePaths it can be less than parent->rows, reflecting the
+ * fact that we've filtered by extra join conditions or removed duplicates.
+ *
+ * "pathkeys" is a List of PathKey nodes (see above), describing the sort
+ * ordering of the path's output rows.
+ *
+ * "required_outer", if not NULL, contains the relids of one or more relations
+ * that must provide parameter values to each scan of this path, because the
+ * path relies on join clauses using those rels. That means this path can only
+ * be joined to those rels by means of nestloop joins with this path on the
+ * inside. Note: for a normal unparameterized path, required_outer must be
+ * NULL, not an empty-but-not-null Bitmapset.
+ *
+ * "param_clauses" is a List of RestrictInfo nodes, containing the join
+ * clauses used by a parameterized path. Ideally param_clauses should be NIL
+ * if and only if required_outer is NULL. XXX for the moment, however, we do
+ * not compute param_clauses for Append and MergeAppend paths, so the list
+ * is inaccurate in those paths and possibly paths above them.
*/
-
typedef struct Path
{
NodeTag type;
RelOptInfo *parent; /* the relation this path can build */
- /* estimated execution costs for path (see costsize.c for more info) */
+ /* estimated size/costs for path (see costsize.c for more info) */
+ double rows; /* estimated number of result tuples */
Cost startup_cost; /* cost expended before fetching any tuples */
Cost total_cost; /* total cost (assuming all tuples fetched) */
List *pathkeys; /* sort ordering of path's output */
- /* pathkeys is a List of PathKey nodes; see above */
+
+ Relids required_outer; /* rels supplying parameters used by path */
+ List *param_clauses; /* join clauses that use such parameters */
} Path;
/*----------
* ORDER BY expression is meant to be used with. (There is no restriction
* on which index column each ORDER BY can be used with.)
*
- * 'isjoininner' is TRUE if the path is a nestloop inner scan (that is,
- * some of the index conditions are join rather than restriction clauses).
- * Note that the path costs will be calculated differently from a plain
- * indexscan in this case, and in addition there's a special 'rows' value
- * different from the parent RelOptInfo's (see below).
- *
* 'indexscandir' is one of:
* ForwardScanDirection: forward scan of an ordered index
* BackwardScanDirection: backward scan of an ordered index
* we need not recompute them when considering using the same index in a
* bitmap index/heap scan (see BitmapHeapPath). The costs of the IndexPath
* itself represent the costs of an IndexScan or IndexOnlyScan plan type.
- *
- * 'rows' is the estimated result tuple count for the indexscan. This
- * is the same as path.parent->rows for a simple indexscan, but it is
- * different for a nestloop inner scan, because the additional indexquals
- * coming from join clauses make the scan more selective than the parent
- * rel's restrict clauses alone would do.
*----------
*/
typedef struct IndexPath
List *indexqualcols;
List *indexorderbys;
List *indexorderbycols;
- bool isjoininner;
ScanDirection indexscandir;
Cost indextotalcost;
Selectivity indexselectivity;
- double rows; /* estimated number of result tuples */
} IndexPath;
/*
* always represent the costs to use it as a regular (or index-only)
* IndexScan. The costs of a BitmapIndexScan can be computed using the
* IndexPath's indextotalcost and indexselectivity.
- *
- * BitmapHeapPaths can be nestloop inner indexscans. The isjoininner and
- * rows fields serve the same purpose as for plain IndexPaths.
*/
typedef struct BitmapHeapPath
{
Path path;
Path *bitmapqual; /* IndexPath, BitmapAndPath, BitmapOrPath */
- bool isjoininner; /* T if it's a nestloop inner scan */
- double rows; /* estimated number of result tuples */
} BitmapHeapPath;
/*
#define IS_DUMMY_PATH(p) \
(IsA((p), AppendPath) && ((AppendPath *) (p))->subpaths == NIL)
+/* A relation that's been proven empty will have one path that is dummy */
+#define IS_DUMMY_REL(r) \
+ ((r)->cheapest_total_path != NULL && \
+ IS_DUMMY_PATH((r)->cheapest_total_path))
+
/*
* MergeAppendPath represents a MergeAppend plan, ie, the merging of sorted
* results from several member plans to produce similarly-sorted output.
UniquePathMethod umethod;
List *in_operators; /* equality operators of the IN clause */
List *uniq_exprs; /* expressions to be made unique */
- double rows; /* estimated number of result tuples */
} UniquePath;
/*
Selectivity rightendsel; /* last-join fraction for clause right side */
} MergeScanSelCache;
-/*
- * Inner indexscan info.
- *
- * An inner indexscan is one that uses one or more joinclauses as index
- * conditions (perhaps in addition to plain restriction clauses). So it
- * can only be used as the inner path of a nestloop join where the outer
- * relation includes all other relids appearing in those joinclauses.
- * The set of usable joinclauses, and thus the best inner indexscan,
- * thus varies depending on which outer relation we consider; so we have
- * to recompute the best such paths for every join. To avoid lots of
- * redundant computation, we cache the results of such searches. For
- * each relation we compute the set of possible otherrelids (all relids
- * appearing in joinquals that could become indexquals for this table).
- * Two outer relations whose relids have the same intersection with this
- * set will have the same set of available joinclauses and thus the same
- * best inner indexscans for the inner relation. By taking the intersection
- * before scanning the cache, we avoid recomputing when considering
- * join rels that differ only by the inclusion of irrelevant other rels.
- *
- * The search key also includes a bool showing whether the join being
- * considered is an outer join. Since we constrain the join order for
- * outer joins, I believe that this bool can only have one possible value
- * for any particular lookup key; but store it anyway to avoid confusion.
- */
-
-typedef struct InnerIndexscanInfo
-{
- NodeTag type;
- /* The lookup key: */
- Relids other_relids; /* a set of relevant other relids */
- bool isouterjoin; /* true if join is outer */
- /* Best paths for this lookup key (NULL if no available indexscans): */
- Path *cheapest_startup_innerpath; /* cheapest startup cost */
- Path *cheapest_total_innerpath; /* cheapest total cost */
-} InnerIndexscanInfo;
-
/*
* Placeholder node for an expression to be evaluated below the top level
* of a plan tree. This is used during planning to represent the contained
Index abslevel; /* its absolute query level */
} PlannerParamItem;
+/*
+ * When making cost estimates for a SEMI or ANTI join, there are some
+ * correction factors that are needed in both nestloop and hash joins
+ * to account for the fact that the executor can stop scanning inner rows
+ * as soon as it finds a match to the current outer row. These numbers
+ * depend only on the selected outer and inner join relations, not on the
+ * particular paths used for them, so it's worthwhile to calculate them
+ * just once per relation pair not once per considered path. This struct
+ * is filled by compute_semi_anti_join_factors and must be passed along
+ * to the join cost estimation functions.
+ *
+ * outer_match_frac is the fraction of the outer tuples that are
+ * expected to have at least one match.
+ * match_count is the average number of matches expected for
+ * outer tuples that have at least one match.
+ */
+typedef struct SemiAntiJoinFactors
+{
+ Selectivity outer_match_frac;
+ Selectivity match_count;
+} SemiAntiJoinFactors;
+
+/*
+ * For speed reasons, cost estimation for join paths is performed in two
+ * phases: the first phase tries to quickly derive a lower bound for the
+ * join cost, and then we check if that's sufficient to reject the path.
+ * If not, we come back for a more refined cost estimate. The first phase
+ * fills a JoinCostWorkspace struct with its preliminary cost estimates
+ * and possibly additional intermediate values. The second phase takes
+ * these values as inputs to avoid repeating work.
+ *
+ * (Ideally we'd declare this in cost.h, but it's also needed in pathnode.h,
+ * so seems best to put it here.)
+ */
+typedef struct JoinCostWorkspace
+{
+ /* Preliminary cost estimates --- must not be larger than final ones! */
+ Cost startup_cost; /* cost expended before fetching any tuples */
+ Cost total_cost; /* total cost (assuming all tuples fetched) */
+
+ /* Fields below here should be treated as private to costsize.c */
+ Cost run_cost; /* non-startup cost components */
+
+ /* private for cost_nestloop code */
+ Cost inner_rescan_run_cost;
+ double outer_matched_rows;
+ Selectivity inner_scan_frac;
+
+ /* private for cost_mergejoin code */
+ Cost inner_run_cost;
+ double outer_rows;
+ double inner_rows;
+ double outer_skip_rows;
+ double inner_skip_rows;
+
+ /* private for cost_hashjoin code */
+ int numbuckets;
+ int numbatches;
+} JoinCostWorkspace;
+
#endif /* RELATION_H */
double index_pages, PlannerInfo *root);
extern void cost_seqscan(Path *path, PlannerInfo *root, RelOptInfo *baserel);
extern void cost_index(IndexPath *path, PlannerInfo *root,
- RelOptInfo *outer_rel);
+ double loop_count);
extern void cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
- Path *bitmapqual, RelOptInfo *outer_rel);
+ Path *bitmapqual, double loop_count);
extern void cost_bitmap_and_node(BitmapAndPath *path, PlannerInfo *root);
extern void cost_bitmap_or_node(BitmapOrPath *path, PlannerInfo *root);
extern void cost_bitmap_tree_node(Path *path, Cost *cost, Selectivity *selec);
int numGroupCols, double numGroups,
Cost input_startup_cost, Cost input_total_cost,
double input_tuples);
-extern void cost_nestloop(NestPath *path, PlannerInfo *root,
- SpecialJoinInfo *sjinfo);
-extern void cost_mergejoin(MergePath *path, PlannerInfo *root,
- SpecialJoinInfo *sjinfo);
-extern void cost_hashjoin(HashPath *path, PlannerInfo *root,
- SpecialJoinInfo *sjinfo);
+extern void initial_cost_nestloop(PlannerInfo *root,
+ JoinCostWorkspace *workspace,
+ JoinType jointype,
+ Path *outer_path, Path *inner_path,
+ SpecialJoinInfo *sjinfo,
+ SemiAntiJoinFactors *semifactors);
+extern void final_cost_nestloop(PlannerInfo *root, NestPath *path,
+ JoinCostWorkspace *workspace,
+ SpecialJoinInfo *sjinfo,
+ SemiAntiJoinFactors *semifactors);
+extern void initial_cost_mergejoin(PlannerInfo *root,
+ JoinCostWorkspace *workspace,
+ JoinType jointype,
+ List *mergeclauses,
+ Path *outer_path, Path *inner_path,
+ List *outersortkeys, List *innersortkeys,
+ SpecialJoinInfo *sjinfo);
+extern void final_cost_mergejoin(PlannerInfo *root, MergePath *path,
+ JoinCostWorkspace *workspace,
+ SpecialJoinInfo *sjinfo);
+extern void initial_cost_hashjoin(PlannerInfo *root,
+ JoinCostWorkspace *workspace,
+ JoinType jointype,
+ List *hashclauses,
+ Path *outer_path, Path *inner_path,
+ SpecialJoinInfo *sjinfo,
+ SemiAntiJoinFactors *semifactors);
+extern void final_cost_hashjoin(PlannerInfo *root, HashPath *path,
+ JoinCostWorkspace *workspace,
+ SpecialJoinInfo *sjinfo,
+ SemiAntiJoinFactors *semifactors);
extern void cost_subplan(PlannerInfo *root, SubPlan *subplan, Plan *plan);
extern void cost_qual_eval(QualCost *cost, List *quals, PlannerInfo *root);
extern void cost_qual_eval_node(QualCost *cost, Node *qual, PlannerInfo *root);
+extern void compute_semi_anti_join_factors(PlannerInfo *root,
+ RelOptInfo *outerrel,
+ RelOptInfo *innerrel,
+ JoinType jointype,
+ SpecialJoinInfo *sjinfo,
+ List *restrictlist,
+ SemiAntiJoinFactors *semifactors);
extern void set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel);
extern void set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
RelOptInfo *outer_rel,
double fraction);
extern void set_cheapest(RelOptInfo *parent_rel);
extern void add_path(RelOptInfo *parent_rel, Path *new_path);
+extern bool add_path_precheck(RelOptInfo *parent_rel,
+ Cost startup_cost, Cost total_cost,
+ List *pathkeys, Relids required_outer);
extern Path *create_seqscan_path(PlannerInfo *root, RelOptInfo *rel);
extern IndexPath *create_index_path(PlannerInfo *root,
List *pathkeys,
ScanDirection indexscandir,
bool indexonly,
- RelOptInfo *outer_rel);
+ Relids required_outer,
+ double loop_count);
extern BitmapHeapPath *create_bitmap_heap_path(PlannerInfo *root,
RelOptInfo *rel,
Path *bitmapqual,
- RelOptInfo *outer_rel);
+ double loop_count);
extern BitmapAndPath *create_bitmap_and_path(PlannerInfo *root,
RelOptInfo *rel,
List *bitmapquals);
extern Path *create_worktablescan_path(PlannerInfo *root, RelOptInfo *rel);
extern ForeignPath *create_foreignscan_path(PlannerInfo *root, RelOptInfo *rel);
+extern Relids calc_nestloop_required_outer(Path *outer_path, Path *inner_path);
+extern Relids calc_non_nestloop_required_outer(Path *outer_path, Path *inner_path);
+
extern NestPath *create_nestloop_path(PlannerInfo *root,
RelOptInfo *joinrel,
JoinType jointype,
+ JoinCostWorkspace *workspace,
SpecialJoinInfo *sjinfo,
+ SemiAntiJoinFactors *semifactors,
Path *outer_path,
Path *inner_path,
List *restrict_clauses,
- List *pathkeys);
+ List *pathkeys,
+ Relids required_outer);
extern MergePath *create_mergejoin_path(PlannerInfo *root,
RelOptInfo *joinrel,
JoinType jointype,
+ JoinCostWorkspace *workspace,
SpecialJoinInfo *sjinfo,
Path *outer_path,
Path *inner_path,
List *restrict_clauses,
List *pathkeys,
+ Relids required_outer,
List *mergeclauses,
List *outersortkeys,
List *innersortkeys);
extern HashPath *create_hashjoin_path(PlannerInfo *root,
RelOptInfo *joinrel,
JoinType jointype,
+ JoinCostWorkspace *workspace,
SpecialJoinInfo *sjinfo,
+ SemiAntiJoinFactors *semifactors,
Path *outer_path,
Path *inner_path,
List *restrict_clauses,
+ Relids required_outer,
List *hashclauses);
/*
*/
extern void create_index_paths(PlannerInfo *root, RelOptInfo *rel);
extern List *generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel,
- List *clauses, List *outer_clauses,
- RelOptInfo *outer_rel);
-extern void best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel,
- RelOptInfo *outer_rel, JoinType jointype,
- Path **cheapest_startup, Path **cheapest_total);
+ List *clauses, List *other_clauses,
+ bool restriction_only);
extern bool relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel,
List *restrictlist,
List *exprlist, List *oprlist);
-extern bool eclass_matches_any_index(EquivalenceClass *ec,
- EquivalenceMember *em,
- RelOptInfo *rel);
+extern bool eclass_member_matches_indexcol(EquivalenceClass *ec,
+ EquivalenceMember *em,
+ IndexOptInfo *index, int indexcol);
extern bool match_index_to_operand(Node *operand, int indexcol,
IndexOptInfo *index);
extern void expand_indexqual_conditions(IndexOptInfo *index,
extern void mutate_eclass_expressions(PlannerInfo *root,
Node *(*mutator) (),
void *context);
-extern List *find_eclass_clauses_for_index_join(PlannerInfo *root,
- RelOptInfo *rel,
- Relids outer_relids);
+extern List *generate_implied_equalities_for_indexcol(PlannerInfo *root,
+ IndexOptInfo *index,
+ int indexcol);
extern bool have_relevant_eclass_joinclause(PlannerInfo *root,
RelOptInfo *rel1, RelOptInfo *rel2);
extern bool has_relevant_eclass_joinclause(PlannerInfo *root,
extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2);
extern bool pathkeys_contained_in(List *keys1, List *keys2);
extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
+ Relids required_outer,
CostSelector cost_criterion);
extern Path *get_cheapest_fractional_path_for_pathkeys(List *paths,
List *pathkeys,
+ Relids required_outer,
double fraction);
extern List *build_index_pathkeys(PlannerInfo *root, IndexOptInfo *index,
ScanDirection scandir);
extern void extract_actual_join_clauses(List *restrictinfo_list,
List **joinquals,
List **otherquals);
-extern List *select_nonredundant_join_clauses(PlannerInfo *root,
- List *restrictinfo_list,
- Path *inner_path);
+extern List *select_nonredundant_join_clauses(List *restrictinfo_list,
+ List *reference_list);
#endif /* RESTRICTINFO_H */