#include "optimizer/paths.h"
+#define PATH_PARAM_BY_REL(path, rel) \
+ ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), (rel)->relids))
+
static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
RelOptInfo *outerrel, RelOptInfo *innerrel,
List *restrictlist, List *mergeclause_list,
* cheapest-startup-cost input paths later, and only if they don't need a
* sort.
*
- * This function intentionally does not consider parameterized input paths
- * (implicit in the fact that it only looks at cheapest_total_path, which
- * is always unparameterized). If we did so, we'd have a combinatorial
- * explosion of mergejoin paths of dubious value. This interacts with
- * decisions elsewhere that also discriminate against mergejoins with
- * parameterized inputs; see comments in src/backend/optimizer/README.
+ * This function intentionally does not consider parameterized input
+ * paths, except when the cheapest-total is parameterized. If we did so,
+ * we'd have a combinatorial explosion of mergejoin paths of dubious
+ * value. This interacts with decisions elsewhere that also discriminate
+ * against mergejoins with parameterized inputs; see comments in
+ * src/backend/optimizer/README.
*/
outer_path = outerrel->cheapest_total_path;
inner_path = innerrel->cheapest_total_path;
- /* Punt if either rel has only parameterized paths */
- if (!outer_path || !inner_path)
+ /*
+ * If either cheapest-total path is parameterized by the other rel, we
+ * can't use a mergejoin. (There's no use looking for alternative input
+ * paths, since these should already be the least-parameterized available
+ * paths.)
+ */
+ if (PATH_PARAM_BY_REL(outer_path, innerrel) ||
+ PATH_PARAM_BY_REL(inner_path, outerrel))
return;
/*
break;
}
+ /*
+ * If inner_cheapest_total is parameterized by the outer rel, ignore it;
+ * we will consider it below as a member of cheapest_parameterized_paths,
+ * but the other possibilities considered in this routine aren't usable.
+ */
+ if (PATH_PARAM_BY_REL(inner_cheapest_total, outerrel))
+ inner_cheapest_total = NULL;
+
/*
* If we need to unique-ify the inner path, we will consider only the
* cheapest-total inner.
*/
if (save_jointype == JOIN_UNIQUE_INNER)
{
- /* XXX for the moment, don't crash on LATERAL --- rethink this */
+ /* No way to do this with an inner path parameterized by outer rel */
if (inner_cheapest_total == NULL)
return;
-
inner_cheapest_total = (Path *)
create_unique_path(root, innerrel, inner_cheapest_total, sjinfo);
Assert(inner_cheapest_total);
/*
* We cannot use an outer path that is parameterized by the inner rel.
*/
- if (bms_overlap(PATH_REQ_OUTER(outerpath), innerrel->relids))
+ if (PATH_PARAM_BY_REL(outerpath, innerrel))
continue;
/*
* If we need to unique-ify the outer path, it's pointless to consider
* any but the cheapest outer. (XXX we don't consider parameterized
* outers, nor inners, for unique-ified cases. Should we?)
- *
- * XXX does nothing for LATERAL, rethink
*/
if (save_jointype == JOIN_UNIQUE_OUTER)
{
if (save_jointype == JOIN_UNIQUE_OUTER)
continue;
- /* Can't do anything else if inner has no unparameterized paths */
- if (!inner_cheapest_total)
+ /* Can't do anything else if inner rel is parameterized by outer */
+ if (inner_cheapest_total == NULL)
continue;
/* Look for useful mergeclauses (if any) */
Path *cheapest_total_outer = outerrel->cheapest_total_path;
Path *cheapest_total_inner = innerrel->cheapest_total_path;
- /* Punt if either rel has only parameterized paths */
- if (!cheapest_startup_outer ||
- !cheapest_total_outer ||
- !cheapest_total_inner)
+ /*
+ * If either cheapest-total path is parameterized by the other rel, we
+ * can't use a hashjoin. (There's no use looking for alternative
+ * input paths, since these should already be the least-parameterized
+ * available paths.)
+ */
+ if (PATH_PARAM_BY_REL(cheapest_total_outer, innerrel) ||
+ PATH_PARAM_BY_REL(cheapest_total_inner, outerrel))
return;
/* Unique-ify if need be; we ignore parameterized possibilities */
cheapest_total_inner,
restrictlist,
hashclauses);
- if (cheapest_startup_outer != cheapest_total_outer)
+ if (cheapest_startup_outer != NULL &&
+ cheapest_startup_outer != cheapest_total_outer)
try_hashjoin_path(root,
joinrel,
jointype,
ListCell *lc1;
ListCell *lc2;
- try_hashjoin_path(root,
- joinrel,
- jointype,
- sjinfo,
- semifactors,
- param_source_rels,
- cheapest_startup_outer,
- cheapest_total_inner,
- restrictlist,
- hashclauses);
+ if (cheapest_startup_outer != NULL)
+ try_hashjoin_path(root,
+ joinrel,
+ jointype,
+ sjinfo,
+ semifactors,
+ param_source_rels,
+ cheapest_startup_outer,
+ cheapest_total_inner,
+ restrictlist,
+ hashclauses);
foreach(lc1, outerrel->cheapest_parameterized_paths)
{
* We cannot use an outer path that is parameterized by the
* inner rel.
*/
- if (bms_overlap(PATH_REQ_OUTER(outerpath), innerrel->relids))
+ if (PATH_PARAM_BY_REL(outerpath, innerrel))
continue;
foreach(lc2, innerrel->cheapest_parameterized_paths)
* We cannot use an inner path that is parameterized by
* the outer rel, either.
*/
- if (bms_overlap(PATH_REQ_OUTER(innerpath),
- outerrel->relids))
+ if (PATH_PARAM_BY_REL(innerpath, outerrel))
continue;
if (outerpath == cheapest_startup_outer &&
COSTS_DIFFERENT /* neither path dominates the other on cost */
} PathCostComparison;
-static void add_parameterized_path(RelOptInfo *parent_rel, Path *new_path);
static List *translate_sub_tlist(List *tlist, int relid);
static bool query_is_distinct_for(Query *query, List *colnos, List *opids);
static Oid distinct_col_search(int colno, List *colnos, List *opids);
* is fuzzily better than the other on startup cost and fuzzily worse on
* total cost, we just say that their costs are "different", since neither
* dominates the other across the whole performance spectrum.
+ *
+ * This function also includes special hacks to support a policy enforced
+ * by its sole caller, add_path(): paths that have any parameterization
+ * cannot win comparisons on the grounds of having cheaper startup cost,
+ * since we deem only total cost to be of interest for a parameterized path.
+ * (Unparameterized paths are more common, so we check for this case last.)
*/
static PathCostComparison
compare_path_costs_fuzzily(Path *path1, Path *path2, double fuzz_factor)
if (path1->total_cost > path2->total_cost * fuzz_factor)
{
/* path1 fuzzily worse on total cost */
- if (path2->startup_cost > path1->startup_cost * fuzz_factor)
+ if (path2->startup_cost > path1->startup_cost * fuzz_factor &&
+ path1->param_info == NULL)
{
/* ... but path2 fuzzily worse on startup, so DIFFERENT */
return COSTS_DIFFERENT;
if (path2->total_cost > path1->total_cost * fuzz_factor)
{
/* path2 fuzzily worse on total cost */
- if (path1->startup_cost > path2->startup_cost * fuzz_factor)
+ if (path1->startup_cost > path2->startup_cost * fuzz_factor &&
+ path2->param_info == NULL)
{
/* ... but path1 fuzzily worse on startup, so DIFFERENT */
return COSTS_DIFFERENT;
return COSTS_BETTER1;
}
/* fuzzily the same on total cost */
- if (path1->startup_cost > path2->startup_cost * fuzz_factor)
+ if (path1->startup_cost > path2->startup_cost * fuzz_factor &&
+ path2->param_info == NULL)
{
/* ... but path1 fuzzily worse on startup, so path2 wins */
return COSTS_BETTER2;
}
- if (path2->startup_cost > path1->startup_cost * fuzz_factor)
+ if (path2->startup_cost > path1->startup_cost * fuzz_factor &&
+ path1->param_info == NULL)
{
/* ... but path2 fuzzily worse on startup, so path1 wins */
return COSTS_BETTER1;
* Find the minimum-cost paths from among a relation's paths,
* and save them in the rel's cheapest-path fields.
*
- * Only unparameterized paths are considered candidates for cheapest_startup
- * and cheapest_total. The cheapest_parameterized_paths list collects paths
- * that are cheapest-total for their parameterization (i.e., there is no
- * cheaper path with the same or weaker parameterization). This list always
- * includes the unparameterized cheapest-total path, too, if there is one.
+ * cheapest_total_path is normally the cheapest-total-cost unparameterized
+ * path; but if there are no unparameterized paths, we assign it to be the
+ * best (cheapest least-parameterized) parameterized path. However, only
+ * unparameterized paths are considered candidates for cheapest_startup_path,
+ * so that will be NULL if there are no unparameterized paths.
+ *
+ * The cheapest_parameterized_paths list collects all parameterized paths
+ * that have survived the add_path() tournament for this relation. (Since
+ * add_path ignores pathkeys and startup cost for a parameterized path,
+ * these will be paths that have best total cost or best row count for their
+ * parameterization.) cheapest_parameterized_paths always includes the
+ * cheapest-total unparameterized path, too, if there is one; the users of
+ * that list find it more convenient if that's included.
*
* This is normally called only after we've finished constructing the path
* list for the rel node.
{
Path *cheapest_startup_path;
Path *cheapest_total_path;
- bool have_parameterized_paths;
+ Path *best_param_path;
+ List *parameterized_paths;
ListCell *p;
Assert(IsA(parent_rel, RelOptInfo));
- cheapest_startup_path = cheapest_total_path = NULL;
- have_parameterized_paths = false;
+ if (parent_rel->pathlist == NIL)
+ elog(ERROR, "could not devise a query plan for the given query");
+
+ cheapest_startup_path = cheapest_total_path = best_param_path = NULL;
+ parameterized_paths = NIL;
foreach(p, parent_rel->pathlist)
{
Path *path = (Path *) lfirst(p);
int cmp;
- /* We only consider unparameterized paths in this step */
if (path->param_info)
{
- have_parameterized_paths = true;
- continue;
- }
+ /* Parameterized path, so add it to parameterized_paths */
+ parameterized_paths = lappend(parameterized_paths, path);
- if (cheapest_total_path == NULL)
- {
- cheapest_startup_path = cheapest_total_path = path;
- continue;
+ /*
+ * If we have an unparameterized cheapest-total, we no longer care
+ * about finding the best parameterized path, so move on.
+ */
+ if (cheapest_total_path)
+ continue;
+
+ /*
+ * Otherwise, track the best parameterized path, which is the one
+ * with least total cost among those of the minimum
+ * parameterization.
+ */
+ if (best_param_path == NULL)
+ best_param_path = path;
+ else
+ {
+ switch (bms_subset_compare(PATH_REQ_OUTER(path),
+ PATH_REQ_OUTER(best_param_path)))
+ {
+ case BMS_EQUAL:
+ /* keep the cheaper one */
+ if (compare_path_costs(path, best_param_path,
+ TOTAL_COST) < 0)
+ best_param_path = path;
+ break;
+ case BMS_SUBSET1:
+ /* new path is less-parameterized */
+ best_param_path = path;
+ break;
+ case BMS_SUBSET2:
+ /* old path is less-parameterized, keep it */
+ break;
+ case BMS_DIFFERENT:
+ /*
+ * This means that neither path has the least possible
+ * parameterization for the rel. We'll sit on the old
+ * path until something better comes along.
+ */
+ break;
+ }
+ }
}
+ else
+ {
+ /* Unparameterized path, so consider it for cheapest slots */
+ if (cheapest_total_path == NULL)
+ {
+ cheapest_startup_path = cheapest_total_path = path;
+ continue;
+ }
- /*
- * If we find two paths of identical costs, try to keep the
- * better-sorted one. The paths might have unrelated sort orderings,
- * in which case we can only guess which might be better to keep, but
- * if one is superior then we definitely should keep that one.
- */
- cmp = compare_path_costs(cheapest_startup_path, path, STARTUP_COST);
- if (cmp > 0 ||
- (cmp == 0 &&
- compare_pathkeys(cheapest_startup_path->pathkeys,
- path->pathkeys) == PATHKEYS_BETTER2))
- cheapest_startup_path = path;
-
- cmp = compare_path_costs(cheapest_total_path, path, TOTAL_COST);
- if (cmp > 0 ||
- (cmp == 0 &&
- compare_pathkeys(cheapest_total_path->pathkeys,
- path->pathkeys) == PATHKEYS_BETTER2))
- cheapest_total_path = path;
+ /*
+ * If we find two paths of identical costs, try to keep the
+ * better-sorted one. The paths might have unrelated sort
+ * orderings, in which case we can only guess which might be
+ * better to keep, but if one is superior then we definitely
+ * should keep that one.
+ */
+ cmp = compare_path_costs(cheapest_startup_path, path, STARTUP_COST);
+ if (cmp > 0 ||
+ (cmp == 0 &&
+ compare_pathkeys(cheapest_startup_path->pathkeys,
+ path->pathkeys) == PATHKEYS_BETTER2))
+ cheapest_startup_path = path;
+
+ cmp = compare_path_costs(cheapest_total_path, path, TOTAL_COST);
+ if (cmp > 0 ||
+ (cmp == 0 &&
+ compare_pathkeys(cheapest_total_path->pathkeys,
+ path->pathkeys) == PATHKEYS_BETTER2))
+ cheapest_total_path = path;
+ }
}
- if (cheapest_total_path == NULL && !have_parameterized_paths)
- elog(ERROR, "could not devise a query plan for the given query");
+ /* Add cheapest unparameterized path, if any, to parameterized_paths */
+ if (cheapest_total_path)
+ parameterized_paths = lcons(cheapest_total_path, parameterized_paths);
+
+ /*
+ * If there is no unparameterized path, use the best parameterized path
+ * as cheapest_total_path (but not as cheapest_startup_path).
+ */
+ if (cheapest_total_path == NULL)
+ cheapest_total_path = best_param_path;
+ Assert(cheapest_total_path != NULL);
parent_rel->cheapest_startup_path = cheapest_startup_path;
parent_rel->cheapest_total_path = cheapest_total_path;
parent_rel->cheapest_unique_path = NULL; /* computed only if needed */
-
- /* Seed the parameterized-paths list with the cheapest total, if any */
- if (cheapest_total_path)
- parent_rel->cheapest_parameterized_paths = list_make1(cheapest_total_path);
- else
- parent_rel->cheapest_parameterized_paths = NIL;
-
- /* And, if there are any parameterized paths, add them in one at a time */
- if (have_parameterized_paths)
- {
- foreach(p, parent_rel->pathlist)
- {
- Path *path = (Path *) lfirst(p);
-
- if (path->param_info)
- add_parameterized_path(parent_rel, path);
- }
- }
+ parent_rel->cheapest_parameterized_paths = parameterized_paths;
}
/*
* one parameterization can seldom dominate a path of another. But such
* cases do arise, so we make the full set of checks anyway.
*
- * There is one policy decision embedded in this function, along with its
- * sibling add_path_precheck: we treat all parameterized paths as having
- * NIL pathkeys, so that they compete only on cost. This is to reduce
- * the number of parameterized paths that are kept. See discussion in
- * src/backend/optimizer/README.
+ * There are two policy decisions embedded in this function, along with
+ * its sibling add_path_precheck: we treat all parameterized paths as
+ * having NIL pathkeys, and we ignore their startup costs, so that they
+ * compete only on parameterization, total cost and rowcount. This is to
+ * reduce the number of parameterized paths that are kept. See discussion
+ * in src/backend/optimizer/README.
*
* The pathlist is kept sorted by total_cost, with cheaper paths
* at the front. Within this routine, that's simply a speed hack:
List *new_path_pathkeys;
ListCell *p1;
- /* Pretend parameterized paths have no pathkeys, per add_path comment */
+ /* Pretend parameterized paths have no pathkeys, per add_path policy */
new_path_pathkeys = required_outer ? NIL : pathkeys;
foreach(p1, parent_rel->pathlist)
*/
if (total_cost >= old_path->total_cost)
{
- if (startup_cost >= old_path->startup_cost)
+ /* can win on startup cost only if unparameterized */
+ if (startup_cost >= old_path->startup_cost || required_outer)
{
+ /* new path does not win on cost, so check pathkeys... */
List *old_path_pathkeys;
old_path_pathkeys = old_path->param_info ? NIL : old_path->pathkeys;
if (keyscmp == PATHKEYS_EQUAL ||
keyscmp == PATHKEYS_BETTER2)
{
+ /* new path does not win on pathkeys... */
if (bms_equal(required_outer, PATH_REQ_OUTER(old_path)))
{
/* Found an old path that dominates the new one */
return true;
}
-/*
- * add_parameterized_path
- * Consider a parameterized implementation path for the specified rel,
- * and add it to the rel's cheapest_parameterized_paths list if it
- * belongs there, removing any old entries that it dominates.
- *
- * This is essentially a cut-down form of add_path(): we do not care
- * about startup cost or sort ordering, only total cost, rowcount, and
- * parameterization. Also, we must not recycle rejected paths, since
- * they will still be present in the rel's pathlist.
- *
- * 'parent_rel' is the relation entry to which the path corresponds.
- * 'new_path' is a parameterized path for parent_rel.
- *
- * Returns nothing, but modifies parent_rel->cheapest_parameterized_paths.
- */
-static void
-add_parameterized_path(RelOptInfo *parent_rel, Path *new_path)
-{
- bool accept_new = true; /* unless we find a superior old path */
- ListCell *insert_after = NULL; /* where to insert new item */
- ListCell *p1;
- ListCell *p1_prev;
- ListCell *p1_next;
-
- /*
- * Loop to check proposed new path against old paths. Note it is possible
- * for more than one old path to be tossed out because new_path dominates
- * it.
- *
- * We can't use foreach here because the loop body may delete the current
- * list cell.
- */
- p1_prev = NULL;
- for (p1 = list_head(parent_rel->cheapest_parameterized_paths);
- p1 != NULL; p1 = p1_next)
- {
- Path *old_path = (Path *) lfirst(p1);
- bool remove_old = false; /* unless new proves superior */
- int costcmp;
- BMS_Comparison outercmp;
-
- p1_next = lnext(p1);
-
- costcmp = compare_path_costs(new_path, old_path, TOTAL_COST);
- outercmp = bms_subset_compare(PATH_REQ_OUTER(new_path),
- PATH_REQ_OUTER(old_path));
- if (outercmp != BMS_DIFFERENT)
- {
- if (costcmp < 0)
- {
- if (outercmp != BMS_SUBSET2 &&
- new_path->rows <= old_path->rows)
- remove_old = true; /* new dominates old */
- }
- else if (costcmp > 0)
- {
- if (outercmp != BMS_SUBSET1 &&
- new_path->rows >= old_path->rows)
- accept_new = false; /* old dominates new */
- }
- else if (outercmp == BMS_SUBSET1 &&
- new_path->rows <= old_path->rows)
- remove_old = true; /* new dominates old */
- else if (outercmp == BMS_SUBSET2 &&
- new_path->rows >= old_path->rows)
- accept_new = false; /* old dominates new */
- else if (new_path->rows < old_path->rows)
- remove_old = true; /* new dominates old */
- else
- {
- /* Same cost, rows, and param rels; arbitrarily keep old */
- accept_new = false; /* old equals or dominates new */
- }
- }
-
- /*
- * Remove current element from cheapest_parameterized_paths if
- * dominated by new.
- */
- if (remove_old)
- {
- parent_rel->cheapest_parameterized_paths =
- list_delete_cell(parent_rel->cheapest_parameterized_paths,
- p1, p1_prev);
- /* p1_prev does not advance */
- }
- else
- {
- /* new belongs after this old path if it has cost >= old's */
- if (costcmp >= 0)
- insert_after = p1;
- /* p1_prev advances */
- p1_prev = p1;
- }
-
- /*
- * If we found an old path that dominates new_path, we can quit
- * scanning the list; we will not add new_path, and we assume new_path
- * cannot dominate any other elements of the list.
- */
- if (!accept_new)
- break;
- }
-
- if (accept_new)
- {
- /* Accept the new path: insert it at proper place in list */
- if (insert_after)
- lappend_cell(parent_rel->cheapest_parameterized_paths,
- insert_after, new_path);
- else
- parent_rel->cheapest_parameterized_paths =
- lcons(new_path, parent_rel->cheapest_parameterized_paths);
- }
-}
-
/*****************************************************************************
* PATH NODE CREATION ROUTINES
int numCols;
ListCell *lc;
- /* XXX temporary band-aid to not crash on LATERAL queries */
- if (subpath == NULL)
- {
- Assert(subpath == rel->cheapest_total_path);
- return NULL;
- }
-
/* Caller made a mistake if subpath isn't cheapest_total ... */
Assert(subpath == rel->cheapest_total_path);
Assert(subpath->parent == rel);