List *restrictlist,
JoinType jointype,
bool *mergejoin_allowed);
+static void generate_mergejoin_paths(PlannerInfo *root,
+ RelOptInfo *joinrel,
+ RelOptInfo *innerrel,
+ Path *outerpath,
+ JoinType jointype,
+ JoinPathExtraData *extra,
+ bool useallclauses,
+ Path *inner_cheapest_total,
+ List *merge_pathkeys);
/*
}
}
+/*
+ * generate_mergejoin_paths
+ * Creates possible mergejoin paths for input outerpath.
+ *
+ * We generate mergejoins if mergejoin clauses are available. We have
+ * two ways to generate the inner path for a mergejoin: sort the cheapest
+ * inner path, or use an inner path that is already suitably ordered for the
+ * merge. If we have several mergeclauses, it could be that there is no inner
+ * path (or only a very expensive one) for the full list of mergeclauses, but
+ * better paths exist if we truncate the mergeclause list (thereby discarding
+ * some sort key requirements). So, we consider truncations of the
+ * mergeclause list as well as the full list. (Ideally we'd consider all
+ * subsets of the mergeclause list, but that seems way too expensive.)
+ */
+static void
+generate_mergejoin_paths(PlannerInfo *root,
+ RelOptInfo *joinrel,
+ RelOptInfo *innerrel,
+ Path *outerpath,
+ JoinType jointype,
+ JoinPathExtraData *extra,
+ bool useallclauses,
+ Path *inner_cheapest_total,
+ List *merge_pathkeys)
+{
+ List *mergeclauses;
+ List *innersortkeys;
+ List *trialsortkeys;
+ Path *cheapest_startup_inner;
+ Path *cheapest_total_inner;
+ JoinType save_jointype = jointype;
+ int num_sortkeys;
+ int sortkeycnt;
+
+ if (jointype == JOIN_UNIQUE_OUTER || jointype == JOIN_UNIQUE_INNER)
+ jointype = JOIN_INNER;
+
+ /* Look for useful mergeclauses (if any) */
+ mergeclauses = find_mergeclauses_for_pathkeys(root,
+ outerpath->pathkeys,
+ true,
+ extra->mergeclause_list);
+
+ /*
+ * Done with this outer path if no chance for a mergejoin.
+ *
+ * Special corner case: for "x FULL JOIN y ON true", there will be no join
+ * clauses at all. Ordinarily we'd generate a clauseless nestloop path,
+ * but since mergejoin is our only join type that supports FULL JOIN
+ * without any join clauses, it's necessary to generate a clauseless
+ * mergejoin path instead.
+ */
+ if (mergeclauses == NIL)
+ {
+ if (jointype == JOIN_FULL)
+ /* okay to try for mergejoin */ ;
+ else
+ return;
+ }
+ if (useallclauses &&
+ list_length(mergeclauses) != list_length(extra->mergeclause_list))
+ return;
+
+ /* Compute the required ordering of the inner path */
+ innersortkeys = make_inner_pathkeys_for_merge(root,
+ mergeclauses,
+ outerpath->pathkeys);
+
+ /*
+ * Generate a mergejoin on the basis of sorting the cheapest inner. Since
+ * a sort will be needed, only cheapest total cost matters. (But
+ * try_mergejoin_path will do the right thing if inner_cheapest_total is
+ * already correctly sorted.)
+ */
+ try_mergejoin_path(root,
+ joinrel,
+ outerpath,
+ inner_cheapest_total,
+ merge_pathkeys,
+ mergeclauses,
+ NIL,
+ innersortkeys,
+ jointype,
+ extra);
+
+ /* Can't do anything else if inner path needs to be unique'd */
+ if (save_jointype == JOIN_UNIQUE_INNER)
+ return;
+
+ /*
+ * Look for presorted inner paths that satisfy the innersortkey list ---
+ * or any truncation thereof, if we are allowed to build a mergejoin using
+ * a subset of the merge clauses. Here, we consider both cheap startup
+ * cost and cheap total cost.
+ *
+ * Currently we do not consider parameterized inner paths here. This
+ * interacts with decisions elsewhere that also discriminate against
+ * mergejoins with parameterized inputs; see comments in
+ * src/backend/optimizer/README.
+ *
+ * As we shorten the sortkey list, we should consider only paths that are
+ * strictly cheaper than (in particular, not the same as) any path found
+ * in an earlier iteration. Otherwise we'd be intentionally using fewer
+ * merge keys than a given path allows (treating the rest as plain
+ * joinquals), which is unlikely to be a good idea. Also, eliminating
+ * paths here on the basis of compare_path_costs is a lot cheaper than
+ * building the mergejoin path only to throw it away.
+ *
+ * If inner_cheapest_total is well enough sorted to have not required a
+ * sort in the path made above, we shouldn't make a duplicate path with
+ * it, either. We handle that case with the same logic that handles the
+ * previous consideration, by initializing the variables that track
+ * cheapest-so-far properly. Note that we do NOT reject
+ * inner_cheapest_total if we find it matches some shorter set of
+ * pathkeys. That case corresponds to using fewer mergekeys to avoid
+ * sorting inner_cheapest_total, whereas we did sort it above, so the
+ * plans being considered are different.
+ */
+ if (pathkeys_contained_in(innersortkeys,
+ inner_cheapest_total->pathkeys))
+ {
+ /* inner_cheapest_total didn't require a sort */
+ cheapest_startup_inner = inner_cheapest_total;
+ cheapest_total_inner = inner_cheapest_total;
+ }
+ else
+ {
+ /* it did require a sort, at least for the full set of keys */
+ cheapest_startup_inner = NULL;
+ cheapest_total_inner = NULL;
+ }
+ num_sortkeys = list_length(innersortkeys);
+ if (num_sortkeys > 1 && !useallclauses)
+ trialsortkeys = list_copy(innersortkeys); /* need modifiable copy */
+ else
+ trialsortkeys = innersortkeys; /* won't really truncate */
+
+ for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--)
+ {
+ Path *innerpath;
+ List *newclauses = NIL;
+
+ /*
+ * Look for an inner path ordered well enough for the first
+ * 'sortkeycnt' innersortkeys. NB: trialsortkeys list is modified
+ * destructively, which is why we made a copy...
+ */
+ trialsortkeys = list_truncate(trialsortkeys, sortkeycnt);
+ innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
+ trialsortkeys,
+ NULL,
+ TOTAL_COST);
+ if (innerpath != NULL &&
+ (cheapest_total_inner == NULL ||
+ compare_path_costs(innerpath, cheapest_total_inner,
+ TOTAL_COST) < 0))
+ {
+ /* Found a cheap (or even-cheaper) sorted path */
+ /* Select the right mergeclauses, if we didn't already */
+ if (sortkeycnt < num_sortkeys)
+ {
+ newclauses =
+ find_mergeclauses_for_pathkeys(root,
+ trialsortkeys,
+ false,
+ mergeclauses);
+ Assert(newclauses != NIL);
+ }
+ else
+ newclauses = mergeclauses;
+ try_mergejoin_path(root,
+ joinrel,
+ outerpath,
+ innerpath,
+ merge_pathkeys,
+ newclauses,
+ NIL,
+ NIL,
+ jointype,
+ extra);
+ cheapest_total_inner = innerpath;
+ }
+ /* Same on the basis of cheapest startup cost ... */
+ innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
+ trialsortkeys,
+ NULL,
+ STARTUP_COST);
+ if (innerpath != NULL &&
+ (cheapest_startup_inner == NULL ||
+ compare_path_costs(innerpath, cheapest_startup_inner,
+ STARTUP_COST) < 0))
+ {
+ /* Found a cheap (or even-cheaper) sorted path */
+ if (innerpath != cheapest_total_inner)
+ {
+ /*
+ * Avoid rebuilding clause list if we already made one; saves
+ * memory in big join trees...
+ */
+ if (newclauses == NIL)
+ {
+ if (sortkeycnt < num_sortkeys)
+ {
+ newclauses =
+ find_mergeclauses_for_pathkeys(root,
+ trialsortkeys,
+ false,
+ mergeclauses);
+ Assert(newclauses != NIL);
+ }
+ else
+ newclauses = mergeclauses;
+ }
+ try_mergejoin_path(root,
+ joinrel,
+ outerpath,
+ innerpath,
+ merge_pathkeys,
+ newclauses,
+ NIL,
+ NIL,
+ jointype,
+ extra);
+ }
+ cheapest_startup_inner = innerpath;
+ }
+
+ /*
+ * Don't consider truncated sortkeys if we need all clauses.
+ */
+ if (useallclauses)
+ break;
+ }
+}
+
/*
* match_unsorted_outer
* Creates possible join paths for processing a single join relation
* cheapest-total inner-indexscan path (if any), and one on the
* cheapest-startup inner-indexscan path (if different).
*
- * We also consider mergejoins if mergejoin clauses are available. We have
- * two ways to generate the inner path for a mergejoin: sort the cheapest
- * inner path, or use an inner path that is already suitably ordered for the
- * merge. If we have several mergeclauses, it could be that there is no inner
- * path (or only a very expensive one) for the full list of mergeclauses, but
- * better paths exist if we truncate the mergeclause list (thereby discarding
- * some sort key requirements). So, we consider truncations of the
- * mergeclause list as well as the full list. (Ideally we'd consider all
- * subsets of the mergeclause list, but that seems way too expensive.)
+ * We also consider mergejoins if mergejoin clauses are available. See
+ * detailed comments in generate_mergejoin_paths.
*
* 'joinrel' is the join relation
* 'outerrel' is the outer join relation
{
Path *outerpath = (Path *) lfirst(lc1);
List *merge_pathkeys;
- List *mergeclauses;
- List *innersortkeys;
- List *trialsortkeys;
- Path *cheapest_startup_inner;
- Path *cheapest_total_inner;
- int num_sortkeys;
- int sortkeycnt;
/*
* We cannot use an outer path that is parameterized by the inner rel.
if (inner_cheapest_total == NULL)
continue;
- /* Look for useful mergeclauses (if any) */
- mergeclauses = find_mergeclauses_for_pathkeys(root,
- outerpath->pathkeys,
- true,
- extra->mergeclause_list);
-
- /*
- * Done with this outer path if no chance for a mergejoin.
- *
- * Special corner case: for "x FULL JOIN y ON true", there will be no
- * join clauses at all. Ordinarily we'd generate a clauseless
- * nestloop path, but since mergejoin is our only join type that
- * supports FULL JOIN without any join clauses, it's necessary to
- * generate a clauseless mergejoin path instead.
- */
- if (mergeclauses == NIL)
- {
- if (jointype == JOIN_FULL)
- /* okay to try for mergejoin */ ;
- else
- continue;
- }
- if (useallclauses && list_length(mergeclauses) != list_length(extra->mergeclause_list))
- continue;
-
- /* Compute the required ordering of the inner path */
- innersortkeys = make_inner_pathkeys_for_merge(root,
- mergeclauses,
- outerpath->pathkeys);
-
- /*
- * Generate a mergejoin on the basis of sorting the cheapest inner.
- * Since a sort will be needed, only cheapest total cost matters. (But
- * try_mergejoin_path will do the right thing if inner_cheapest_total
- * is already correctly sorted.)
- */
- try_mergejoin_path(root,
- joinrel,
- outerpath,
- inner_cheapest_total,
- merge_pathkeys,
- mergeclauses,
- NIL,
- innersortkeys,
- jointype,
- extra);
-
- /* Can't do anything else if inner path needs to be unique'd */
- if (save_jointype == JOIN_UNIQUE_INNER)
- continue;
-
- /*
- * Look for presorted inner paths that satisfy the innersortkey list
- * --- or any truncation thereof, if we are allowed to build a
- * mergejoin using a subset of the merge clauses. Here, we consider
- * both cheap startup cost and cheap total cost.
- *
- * Currently we do not consider parameterized inner paths here. This
- * interacts with decisions elsewhere that also discriminate against
- * mergejoins with parameterized inputs; see comments in
- * src/backend/optimizer/README.
- *
- * As we shorten the sortkey list, we should consider only paths that
- * are strictly cheaper than (in particular, not the same as) any path
- * found in an earlier iteration. Otherwise we'd be intentionally
- * using fewer merge keys than a given path allows (treating the rest
- * as plain joinquals), which is unlikely to be a good idea. Also,
- * eliminating paths here on the basis of compare_path_costs is a lot
- * cheaper than building the mergejoin path only to throw it away.
- *
- * If inner_cheapest_total is well enough sorted to have not required
- * a sort in the path made above, we shouldn't make a duplicate path
- * with it, either. We handle that case with the same logic that
- * handles the previous consideration, by initializing the variables
- * that track cheapest-so-far properly. Note that we do NOT reject
- * inner_cheapest_total if we find it matches some shorter set of
- * pathkeys. That case corresponds to using fewer mergekeys to avoid
- * sorting inner_cheapest_total, whereas we did sort it above, so the
- * plans being considered are different.
- */
- if (pathkeys_contained_in(innersortkeys,
- inner_cheapest_total->pathkeys))
- {
- /* inner_cheapest_total didn't require a sort */
- cheapest_startup_inner = inner_cheapest_total;
- cheapest_total_inner = inner_cheapest_total;
- }
- else
- {
- /* it did require a sort, at least for the full set of keys */
- cheapest_startup_inner = NULL;
- cheapest_total_inner = NULL;
- }
- num_sortkeys = list_length(innersortkeys);
- if (num_sortkeys > 1 && !useallclauses)
- trialsortkeys = list_copy(innersortkeys); /* need modifiable copy */
- else
- trialsortkeys = innersortkeys; /* won't really truncate */
-
- for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--)
- {
- Path *innerpath;
- List *newclauses = NIL;
-
- /*
- * Look for an inner path ordered well enough for the first
- * 'sortkeycnt' innersortkeys. NB: trialsortkeys list is modified
- * destructively, which is why we made a copy...
- */
- trialsortkeys = list_truncate(trialsortkeys, sortkeycnt);
- innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
- trialsortkeys,
- NULL,
- TOTAL_COST);
- if (innerpath != NULL &&
- (cheapest_total_inner == NULL ||
- compare_path_costs(innerpath, cheapest_total_inner,
- TOTAL_COST) < 0))
- {
- /* Found a cheap (or even-cheaper) sorted path */
- /* Select the right mergeclauses, if we didn't already */
- if (sortkeycnt < num_sortkeys)
- {
- newclauses =
- find_mergeclauses_for_pathkeys(root,
- trialsortkeys,
- false,
- mergeclauses);
- Assert(newclauses != NIL);
- }
- else
- newclauses = mergeclauses;
- try_mergejoin_path(root,
- joinrel,
- outerpath,
- innerpath,
- merge_pathkeys,
- newclauses,
- NIL,
- NIL,
- jointype,
- extra);
- cheapest_total_inner = innerpath;
- }
- /* Same on the basis of cheapest startup cost ... */
- innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
- trialsortkeys,
- NULL,
- STARTUP_COST);
- if (innerpath != NULL &&
- (cheapest_startup_inner == NULL ||
- compare_path_costs(innerpath, cheapest_startup_inner,
- STARTUP_COST) < 0))
- {
- /* Found a cheap (or even-cheaper) sorted path */
- if (innerpath != cheapest_total_inner)
- {
- /*
- * Avoid rebuilding clause list if we already made one;
- * saves memory in big join trees...
- */
- if (newclauses == NIL)
- {
- if (sortkeycnt < num_sortkeys)
- {
- newclauses =
- find_mergeclauses_for_pathkeys(root,
- trialsortkeys,
- false,
- mergeclauses);
- Assert(newclauses != NIL);
- }
- else
- newclauses = mergeclauses;
- }
- try_mergejoin_path(root,
- joinrel,
- outerpath,
- innerpath,
- merge_pathkeys,
- newclauses,
- NIL,
- NIL,
- jointype,
- extra);
- }
- cheapest_startup_inner = innerpath;
- }
-
- /*
- * Don't consider truncated sortkeys if we need all clauses.
- */
- if (useallclauses)
- break;
- }
+ /* Generate merge join paths */
+ generate_mergejoin_paths(root, joinrel, innerrel, outerpath,
+ save_jointype, extra, useallclauses,
+ inner_cheapest_total, merge_pathkeys);
}
/*