]> granicus.if.org Git - postgresql/commitdiff
Refactor merge path generation code.
authorRobert Haas <rhaas@postgresql.org>
Wed, 21 Dec 2016 14:44:33 +0000 (09:44 -0500)
committerRobert Haas <rhaas@postgresql.org>
Wed, 21 Dec 2016 14:45:50 +0000 (09:45 -0500)
This shouldn't change the set of paths that get generated in any
way, but it is preparatory work for further changes to allow a
partial path to be merge-joined witih a non-partial path to produce
a partial join path.

Dilip Kumar, with cosmetic adjustments by me.

src/backend/optimizer/path/joinpath.c

index 96f00fca5bf5cc9315b18d639cd82fe35e5e120c..b5cbcf4d0494e9de14f4637155c6f5eb96539ac9 100644 (file)
@@ -50,6 +50,15 @@ static List *select_mergejoin_clauses(PlannerInfo *root,
                                                 List *restrictlist,
                                                 JoinType jointype,
                                                 bool *mergejoin_allowed);
+static void generate_mergejoin_paths(PlannerInfo *root,
+                                                RelOptInfo *joinrel,
+                                                RelOptInfo *innerrel,
+                                                Path *outerpath,
+                                                JoinType jointype,
+                                                JoinPathExtraData *extra,
+                                                bool useallclauses,
+                                                Path *inner_cheapest_total,
+                                                List *merge_pathkeys);
 
 
 /*
@@ -776,6 +785,241 @@ sort_inner_and_outer(PlannerInfo *root,
        }
 }
 
+/*
+ * generate_mergejoin_paths
+ *     Creates possible mergejoin paths for input outerpath.
+ *
+ * We generate mergejoins if mergejoin clauses are available.  We have
+ * two ways to generate the inner path for a mergejoin: sort the cheapest
+ * inner path, or use an inner path that is already suitably ordered for the
+ * merge.  If we have several mergeclauses, it could be that there is no inner
+ * path (or only a very expensive one) for the full list of mergeclauses, but
+ * better paths exist if we truncate the mergeclause list (thereby discarding
+ * some sort key requirements).  So, we consider truncations of the
+ * mergeclause list as well as the full list.  (Ideally we'd consider all
+ * subsets of the mergeclause list, but that seems way too expensive.)
+ */
+static void
+generate_mergejoin_paths(PlannerInfo *root,
+                                                RelOptInfo *joinrel,
+                                                RelOptInfo *innerrel,
+                                                Path *outerpath,
+                                                JoinType jointype,
+                                                JoinPathExtraData *extra,
+                                                bool useallclauses,
+                                                Path *inner_cheapest_total,
+                                                List *merge_pathkeys)
+{
+       List       *mergeclauses;
+       List       *innersortkeys;
+       List       *trialsortkeys;
+       Path       *cheapest_startup_inner;
+       Path       *cheapest_total_inner;
+       JoinType        save_jointype = jointype;
+       int                     num_sortkeys;
+       int                     sortkeycnt;
+
+       if (jointype == JOIN_UNIQUE_OUTER || jointype == JOIN_UNIQUE_INNER)
+               jointype = JOIN_INNER;
+
+       /* Look for useful mergeclauses (if any) */
+       mergeclauses = find_mergeclauses_for_pathkeys(root,
+                                                                                                 outerpath->pathkeys,
+                                                                                                 true,
+                                                                                                 extra->mergeclause_list);
+
+       /*
+        * Done with this outer path if no chance for a mergejoin.
+        *
+        * Special corner case: for "x FULL JOIN y ON true", there will be no join
+        * clauses at all.  Ordinarily we'd generate a clauseless nestloop path,
+        * but since mergejoin is our only join type that supports FULL JOIN
+        * without any join clauses, it's necessary to generate a clauseless
+        * mergejoin path instead.
+        */
+       if (mergeclauses == NIL)
+       {
+               if (jointype == JOIN_FULL)
+                        /* okay to try for mergejoin */ ;
+               else
+                       return;
+       }
+       if (useallclauses &&
+               list_length(mergeclauses) != list_length(extra->mergeclause_list))
+               return;
+
+       /* Compute the required ordering of the inner path */
+       innersortkeys = make_inner_pathkeys_for_merge(root,
+                                                                                                 mergeclauses,
+                                                                                                 outerpath->pathkeys);
+
+       /*
+        * Generate a mergejoin on the basis of sorting the cheapest inner. Since
+        * a sort will be needed, only cheapest total cost matters. (But
+        * try_mergejoin_path will do the right thing if inner_cheapest_total is
+        * already correctly sorted.)
+        */
+       try_mergejoin_path(root,
+                                          joinrel,
+                                          outerpath,
+                                          inner_cheapest_total,
+                                          merge_pathkeys,
+                                          mergeclauses,
+                                          NIL,
+                                          innersortkeys,
+                                          jointype,
+                                          extra);
+
+       /* Can't do anything else if inner path needs to be unique'd */
+       if (save_jointype == JOIN_UNIQUE_INNER)
+               return;
+
+       /*
+        * Look for presorted inner paths that satisfy the innersortkey list ---
+        * or any truncation thereof, if we are allowed to build a mergejoin using
+        * a subset of the merge clauses.  Here, we consider both cheap startup
+        * cost and cheap total cost.
+        *
+        * Currently we do not consider parameterized inner paths here. This
+        * interacts with decisions elsewhere that also discriminate against
+        * mergejoins with parameterized inputs; see comments in
+        * src/backend/optimizer/README.
+        *
+        * As we shorten the sortkey list, we should consider only paths that are
+        * strictly cheaper than (in particular, not the same as) any path found
+        * in an earlier iteration.  Otherwise we'd be intentionally using fewer
+        * merge keys than a given path allows (treating the rest as plain
+        * joinquals), which is unlikely to be a good idea.  Also, eliminating
+        * paths here on the basis of compare_path_costs is a lot cheaper than
+        * building the mergejoin path only to throw it away.
+        *
+        * If inner_cheapest_total is well enough sorted to have not required a
+        * sort in the path made above, we shouldn't make a duplicate path with
+        * it, either.  We handle that case with the same logic that handles the
+        * previous consideration, by initializing the variables that track
+        * cheapest-so-far properly.  Note that we do NOT reject
+        * inner_cheapest_total if we find it matches some shorter set of
+        * pathkeys.  That case corresponds to using fewer mergekeys to avoid
+        * sorting inner_cheapest_total, whereas we did sort it above, so the
+        * plans being considered are different.
+        */
+       if (pathkeys_contained_in(innersortkeys,
+                                                         inner_cheapest_total->pathkeys))
+       {
+               /* inner_cheapest_total didn't require a sort */
+               cheapest_startup_inner = inner_cheapest_total;
+               cheapest_total_inner = inner_cheapest_total;
+       }
+       else
+       {
+               /* it did require a sort, at least for the full set of keys */
+               cheapest_startup_inner = NULL;
+               cheapest_total_inner = NULL;
+       }
+       num_sortkeys = list_length(innersortkeys);
+       if (num_sortkeys > 1 && !useallclauses)
+               trialsortkeys = list_copy(innersortkeys);               /* need modifiable copy */
+       else
+               trialsortkeys = innersortkeys;  /* won't really truncate */
+
+       for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--)
+       {
+               Path       *innerpath;
+               List       *newclauses = NIL;
+
+               /*
+                * Look for an inner path ordered well enough for the first
+                * 'sortkeycnt' innersortkeys.  NB: trialsortkeys list is modified
+                * destructively, which is why we made a copy...
+                */
+               trialsortkeys = list_truncate(trialsortkeys, sortkeycnt);
+               innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
+                                                                                                  trialsortkeys,
+                                                                                                  NULL,
+                                                                                                  TOTAL_COST);
+               if (innerpath != NULL &&
+                       (cheapest_total_inner == NULL ||
+                        compare_path_costs(innerpath, cheapest_total_inner,
+                                                               TOTAL_COST) < 0))
+               {
+                       /* Found a cheap (or even-cheaper) sorted path */
+                       /* Select the right mergeclauses, if we didn't already */
+                       if (sortkeycnt < num_sortkeys)
+                       {
+                               newclauses =
+                                       find_mergeclauses_for_pathkeys(root,
+                                                                                                  trialsortkeys,
+                                                                                                  false,
+                                                                                                  mergeclauses);
+                               Assert(newclauses != NIL);
+                       }
+                       else
+                               newclauses = mergeclauses;
+                       try_mergejoin_path(root,
+                                                          joinrel,
+                                                          outerpath,
+                                                          innerpath,
+                                                          merge_pathkeys,
+                                                          newclauses,
+                                                          NIL,
+                                                          NIL,
+                                                          jointype,
+                                                          extra);
+                       cheapest_total_inner = innerpath;
+               }
+               /* Same on the basis of cheapest startup cost ... */
+               innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
+                                                                                                  trialsortkeys,
+                                                                                                  NULL,
+                                                                                                  STARTUP_COST);
+               if (innerpath != NULL &&
+                       (cheapest_startup_inner == NULL ||
+                        compare_path_costs(innerpath, cheapest_startup_inner,
+                                                               STARTUP_COST) < 0))
+               {
+                       /* Found a cheap (or even-cheaper) sorted path */
+                       if (innerpath != cheapest_total_inner)
+                       {
+                               /*
+                                * Avoid rebuilding clause list if we already made one; saves
+                                * memory in big join trees...
+                                */
+                               if (newclauses == NIL)
+                               {
+                                       if (sortkeycnt < num_sortkeys)
+                                       {
+                                               newclauses =
+                                                       find_mergeclauses_for_pathkeys(root,
+                                                                                                                  trialsortkeys,
+                                                                                                                  false,
+                                                                                                                  mergeclauses);
+                                               Assert(newclauses != NIL);
+                                       }
+                                       else
+                                               newclauses = mergeclauses;
+                               }
+                               try_mergejoin_path(root,
+                                                                  joinrel,
+                                                                  outerpath,
+                                                                  innerpath,
+                                                                  merge_pathkeys,
+                                                                  newclauses,
+                                                                  NIL,
+                                                                  NIL,
+                                                                  jointype,
+                                                                  extra);
+                       }
+                       cheapest_startup_inner = innerpath;
+               }
+
+               /*
+                * Don't consider truncated sortkeys if we need all clauses.
+                */
+               if (useallclauses)
+                       break;
+       }
+}
+
 /*
  * match_unsorted_outer
  *       Creates possible join paths for processing a single join relation
@@ -790,15 +1034,8 @@ sort_inner_and_outer(PlannerInfo *root,
  * cheapest-total inner-indexscan path (if any), and one on the
  * cheapest-startup inner-indexscan path (if different).
  *
- * We also consider mergejoins if mergejoin clauses are available.  We have
- * two ways to generate the inner path for a mergejoin: sort the cheapest
- * inner path, or use an inner path that is already suitably ordered for the
- * merge.  If we have several mergeclauses, it could be that there is no inner
- * path (or only a very expensive one) for the full list of mergeclauses, but
- * better paths exist if we truncate the mergeclause list (thereby discarding
- * some sort key requirements).  So, we consider truncations of the
- * mergeclause list as well as the full list.  (Ideally we'd consider all
- * subsets of the mergeclause list, but that seems way too expensive.)
+ * We also consider mergejoins if mergejoin clauses are available.  See
+ * detailed comments in generate_mergejoin_paths.
  *
  * 'joinrel' is the join relation
  * 'outerrel' is the outer join relation
@@ -894,13 +1131,6 @@ match_unsorted_outer(PlannerInfo *root,
        {
                Path       *outerpath = (Path *) lfirst(lc1);
                List       *merge_pathkeys;
-               List       *mergeclauses;
-               List       *innersortkeys;
-               List       *trialsortkeys;
-               Path       *cheapest_startup_inner;
-               Path       *cheapest_total_inner;
-               int                     num_sortkeys;
-               int                     sortkeycnt;
 
                /*
                 * We cannot use an outer path that is parameterized by the inner rel.
@@ -986,201 +1216,10 @@ match_unsorted_outer(PlannerInfo *root,
                if (inner_cheapest_total == NULL)
                        continue;
 
-               /* Look for useful mergeclauses (if any) */
-               mergeclauses = find_mergeclauses_for_pathkeys(root,
-                                                                                                         outerpath->pathkeys,
-                                                                                                         true,
-                                                                                                       extra->mergeclause_list);
-
-               /*
-                * Done with this outer path if no chance for a mergejoin.
-                *
-                * Special corner case: for "x FULL JOIN y ON true", there will be no
-                * join clauses at all.  Ordinarily we'd generate a clauseless
-                * nestloop path, but since mergejoin is our only join type that
-                * supports FULL JOIN without any join clauses, it's necessary to
-                * generate a clauseless mergejoin path instead.
-                */
-               if (mergeclauses == NIL)
-               {
-                       if (jointype == JOIN_FULL)
-                                /* okay to try for mergejoin */ ;
-                       else
-                               continue;
-               }
-               if (useallclauses && list_length(mergeclauses) != list_length(extra->mergeclause_list))
-                       continue;
-
-               /* Compute the required ordering of the inner path */
-               innersortkeys = make_inner_pathkeys_for_merge(root,
-                                                                                                         mergeclauses,
-                                                                                                         outerpath->pathkeys);
-
-               /*
-                * Generate a mergejoin on the basis of sorting the cheapest inner.
-                * Since a sort will be needed, only cheapest total cost matters. (But
-                * try_mergejoin_path will do the right thing if inner_cheapest_total
-                * is already correctly sorted.)
-                */
-               try_mergejoin_path(root,
-                                                  joinrel,
-                                                  outerpath,
-                                                  inner_cheapest_total,
-                                                  merge_pathkeys,
-                                                  mergeclauses,
-                                                  NIL,
-                                                  innersortkeys,
-                                                  jointype,
-                                                  extra);
-
-               /* Can't do anything else if inner path needs to be unique'd */
-               if (save_jointype == JOIN_UNIQUE_INNER)
-                       continue;
-
-               /*
-                * Look for presorted inner paths that satisfy the innersortkey list
-                * --- or any truncation thereof, if we are allowed to build a
-                * mergejoin using a subset of the merge clauses.  Here, we consider
-                * both cheap startup cost and cheap total cost.
-                *
-                * Currently we do not consider parameterized inner paths here. This
-                * interacts with decisions elsewhere that also discriminate against
-                * mergejoins with parameterized inputs; see comments in
-                * src/backend/optimizer/README.
-                *
-                * As we shorten the sortkey list, we should consider only paths that
-                * are strictly cheaper than (in particular, not the same as) any path
-                * found in an earlier iteration.  Otherwise we'd be intentionally
-                * using fewer merge keys than a given path allows (treating the rest
-                * as plain joinquals), which is unlikely to be a good idea.  Also,
-                * eliminating paths here on the basis of compare_path_costs is a lot
-                * cheaper than building the mergejoin path only to throw it away.
-                *
-                * If inner_cheapest_total is well enough sorted to have not required
-                * a sort in the path made above, we shouldn't make a duplicate path
-                * with it, either.  We handle that case with the same logic that
-                * handles the previous consideration, by initializing the variables
-                * that track cheapest-so-far properly.  Note that we do NOT reject
-                * inner_cheapest_total if we find it matches some shorter set of
-                * pathkeys.  That case corresponds to using fewer mergekeys to avoid
-                * sorting inner_cheapest_total, whereas we did sort it above, so the
-                * plans being considered are different.
-                */
-               if (pathkeys_contained_in(innersortkeys,
-                                                                 inner_cheapest_total->pathkeys))
-               {
-                       /* inner_cheapest_total didn't require a sort */
-                       cheapest_startup_inner = inner_cheapest_total;
-                       cheapest_total_inner = inner_cheapest_total;
-               }
-               else
-               {
-                       /* it did require a sort, at least for the full set of keys */
-                       cheapest_startup_inner = NULL;
-                       cheapest_total_inner = NULL;
-               }
-               num_sortkeys = list_length(innersortkeys);
-               if (num_sortkeys > 1 && !useallclauses)
-                       trialsortkeys = list_copy(innersortkeys);       /* need modifiable copy */
-               else
-                       trialsortkeys = innersortkeys;          /* won't really truncate */
-
-               for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--)
-               {
-                       Path       *innerpath;
-                       List       *newclauses = NIL;
-
-                       /*
-                        * Look for an inner path ordered well enough for the first
-                        * 'sortkeycnt' innersortkeys.  NB: trialsortkeys list is modified
-                        * destructively, which is why we made a copy...
-                        */
-                       trialsortkeys = list_truncate(trialsortkeys, sortkeycnt);
-                       innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
-                                                                                                          trialsortkeys,
-                                                                                                          NULL,
-                                                                                                          TOTAL_COST);
-                       if (innerpath != NULL &&
-                               (cheapest_total_inner == NULL ||
-                                compare_path_costs(innerpath, cheapest_total_inner,
-                                                                       TOTAL_COST) < 0))
-                       {
-                               /* Found a cheap (or even-cheaper) sorted path */
-                               /* Select the right mergeclauses, if we didn't already */
-                               if (sortkeycnt < num_sortkeys)
-                               {
-                                       newclauses =
-                                               find_mergeclauses_for_pathkeys(root,
-                                                                                                          trialsortkeys,
-                                                                                                          false,
-                                                                                                          mergeclauses);
-                                       Assert(newclauses != NIL);
-                               }
-                               else
-                                       newclauses = mergeclauses;
-                               try_mergejoin_path(root,
-                                                                  joinrel,
-                                                                  outerpath,
-                                                                  innerpath,
-                                                                  merge_pathkeys,
-                                                                  newclauses,
-                                                                  NIL,
-                                                                  NIL,
-                                                                  jointype,
-                                                                  extra);
-                               cheapest_total_inner = innerpath;
-                       }
-                       /* Same on the basis of cheapest startup cost ... */
-                       innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
-                                                                                                          trialsortkeys,
-                                                                                                          NULL,
-                                                                                                          STARTUP_COST);
-                       if (innerpath != NULL &&
-                               (cheapest_startup_inner == NULL ||
-                                compare_path_costs(innerpath, cheapest_startup_inner,
-                                                                       STARTUP_COST) < 0))
-                       {
-                               /* Found a cheap (or even-cheaper) sorted path */
-                               if (innerpath != cheapest_total_inner)
-                               {
-                                       /*
-                                        * Avoid rebuilding clause list if we already made one;
-                                        * saves memory in big join trees...
-                                        */
-                                       if (newclauses == NIL)
-                                       {
-                                               if (sortkeycnt < num_sortkeys)
-                                               {
-                                                       newclauses =
-                                                               find_mergeclauses_for_pathkeys(root,
-                                                                                                                          trialsortkeys,
-                                                                                                                          false,
-                                                                                                                          mergeclauses);
-                                                       Assert(newclauses != NIL);
-                                               }
-                                               else
-                                                       newclauses = mergeclauses;
-                                       }
-                                       try_mergejoin_path(root,
-                                                                          joinrel,
-                                                                          outerpath,
-                                                                          innerpath,
-                                                                          merge_pathkeys,
-                                                                          newclauses,
-                                                                          NIL,
-                                                                          NIL,
-                                                                          jointype,
-                                                                          extra);
-                               }
-                               cheapest_startup_inner = innerpath;
-                       }
-
-                       /*
-                        * Don't consider truncated sortkeys if we need all clauses.
-                        */
-                       if (useallclauses)
-                               break;
-               }
+               /* Generate merge join paths */
+               generate_mergejoin_paths(root, joinrel, innerrel, outerpath,
+                                                                save_jointype, extra, useallclauses,
+                                                                inner_cheapest_total, merge_pathkeys);
        }
 
        /*