]> granicus.if.org Git - postgresql/commitdiff
Adjust definition of cheapest_total_path to work better with LATERAL.
authorTom Lane <tgl@sss.pgh.pa.us>
Thu, 30 Aug 2012 02:05:27 +0000 (22:05 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Thu, 30 Aug 2012 02:06:07 +0000 (22:06 -0400)
In the initial cut at LATERAL, I kept the rule that cheapest_total_path
was always unparameterized, which meant it had to be NULL if the relation
has no unparameterized paths.  It turns out to work much more nicely if
we always have *some* path nominated as cheapest-total for each relation.
In particular, let's still say it's the cheapest unparameterized path if
there is one; if not, take the cheapest-total-cost path among those of
the minimum available parameterization.  (The first rule is actually
a special case of the second.)

This allows reversion of some temporary lobotomizations I'd put in place.
In particular, the planner can now consider hash and merge joins for
joins below a parameter-supplying nestloop, even if there aren't any
unparameterized paths available.  This should bring planning of
LATERAL-containing queries to the same level as queries not using that
feature.

Along the way, simplify management of parameterized paths in add_path()
and friends.  In the original coding for parameterized paths in 9.2,
I tried to minimize the logic changes in add_path(), so it just treated
parameterization as yet another dimension of comparison for paths.
We later made it ignore pathkeys (sort ordering) of parameterized paths,
on the grounds that ordering isn't a useful property for the path on the
inside of a nestloop, so we might as well get rid of useless parameterized
paths as quickly as possible.  But we didn't take that reasoning as far as
we should have.  Startup cost isn't a useful property inside a nestloop
either, so add_path() ought to discount startup cost of parameterized paths
as well.  Having done that, the secondary sorting I'd implemented (in
add_parameterized_path) is no longer needed --- any parameterized path that
survives add_path() at all is worth considering at higher levels.  So this
should be a bit faster as well as simpler.

src/backend/optimizer/README
src/backend/optimizer/geqo/geqo_eval.c
src/backend/optimizer/path/allpaths.c
src/backend/optimizer/path/joinpath.c
src/backend/optimizer/plan/planmain.c
src/backend/optimizer/util/pathnode.c
src/include/nodes/relation.h
src/test/regress/expected/join.out

index 1aa0dbcdbb447d17d7ddf63f77210df79e37742c..438c2ca1826da788d4b40ecaa9d51006efb6c985 100644 (file)
@@ -789,7 +789,7 @@ a nestloop that provides parameters to the lower join's inputs).  While we
 do not ignore merge joins entirely, joinpath.c does not fully explore the
 space of potential merge joins with parameterized inputs.  Also, add_path
 treats parameterized paths as having no pathkeys, so that they compete
-only on cost and rowcount; they don't get preference for producing a
+only on total cost and rowcount; they don't get preference for producing a
 special sort order.  This creates additional bias against merge joins,
 since we might discard a path that could have been useful for performing
 a merge without an explicit sort step.  Since a parameterized path must
@@ -799,4 +799,19 @@ output order of a query --- they only make it harder to use a merge join
 at a lower level.  The savings in planning work justifies that.
 
 
+LATERAL subqueries
+------------------
+
+As of 9.3 we support SQL-standard LATERAL references from subqueries in
+FROM (and also functions in FROM).  The planner implements these by
+generating parameterized paths for any RTE that contains lateral
+references.  In such cases, *all* paths for that relation will be
+parameterized by at least the set of relations used in its lateral
+references.  (And in turn, join relations including such a subquery might
+not have any unparameterized paths.)  All the other comments made above for
+parameterized paths still apply, though; in particular, each such path is
+still expected to enforce any join clauses that can be pushed down to it,
+so that all paths of the same parameterization have the same rowcount.
+
+
 -- bjm & tgl
index 57eb2c39a4f315dbc8df8595c8690cadf9b6653c..0ded23c11a16078ca7e5142962993059819b6b46 100644 (file)
@@ -102,18 +102,12 @@ geqo_eval(PlannerInfo *root, Gene *tour, int num_gene)
        joinrel = gimme_tree(root, tour, num_gene);
        best_path = joinrel->cheapest_total_path;
 
-       /*
-        * If no unparameterized path, use the cheapest parameterized path for
-        * costing purposes.  XXX revisit this after LATERAL dust settles
-        */
-       if (!best_path)
-               best_path = linitial(joinrel->cheapest_parameterized_paths);
-
        /*
         * compute fitness
         *
         * XXX geqo does not currently support optimization for partial result
-        * retrieval --- how to fix?
+        * retrieval, nor do we take any cognizance of possible use of
+        * parameterized paths --- how to fix?
         */
        fitness = best_path->total_cost;
 
index 6369da9ef46106648079e54151e7503a1e075a8e..69a1b93b33746370457bff2daf4d4ece66535803 100644 (file)
@@ -722,7 +722,7 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
                 * the unparameterized Append path we are constructing for the parent.
                 * If not, there's no workable unparameterized path.
                 */
-               if (childrel->cheapest_total_path)
+               if (childrel->cheapest_total_path->param_info == NULL)
                        subpaths = accumulate_append_subpath(subpaths,
                                                                                         childrel->cheapest_total_path);
                else
@@ -932,7 +932,6 @@ generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel,
                                cheapest_startup = cheapest_total =
                                        childrel->cheapest_total_path;
                                /* Assert we do have an unparameterized path for this child */
-                               Assert(cheapest_total != NULL);
                                Assert(cheapest_total->param_info == NULL);
                        }
 
index d87ba4640147e483d620bbc07ad2732bbbe7cb34..5a17cb05d8ab6372628d3358bf62143da044bafa 100644 (file)
@@ -22,6 +22,9 @@
 #include "optimizer/paths.h"
 
 
+#define PATH_PARAM_BY_REL(path, rel)  \
+       ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), (rel)->relids))
+
 static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
                                         RelOptInfo *outerrel, RelOptInfo *innerrel,
                                         List *restrictlist, List *mergeclause_list,
@@ -503,18 +506,24 @@ sort_inner_and_outer(PlannerInfo *root,
         * cheapest-startup-cost input paths later, and only if they don't need a
         * sort.
         *
-        * This function intentionally does not consider parameterized input paths
-        * (implicit in the fact that it only looks at cheapest_total_path, which
-        * is always unparameterized).  If we did so, we'd have a combinatorial
-        * explosion of mergejoin paths of dubious value.  This interacts with
-        * decisions elsewhere that also discriminate against mergejoins with
-        * parameterized inputs; see comments in src/backend/optimizer/README.
+        * This function intentionally does not consider parameterized input
+        * paths, except when the cheapest-total is parameterized.  If we did so,
+        * we'd have a combinatorial explosion of mergejoin paths of dubious
+        * value.  This interacts with decisions elsewhere that also discriminate
+        * against mergejoins with parameterized inputs; see comments in
+        * src/backend/optimizer/README.
         */
        outer_path = outerrel->cheapest_total_path;
        inner_path = innerrel->cheapest_total_path;
 
-       /* Punt if either rel has only parameterized paths */
-       if (!outer_path || !inner_path)
+       /*
+        * If either cheapest-total path is parameterized by the other rel, we
+        * can't use a mergejoin.  (There's no use looking for alternative input
+        * paths, since these should already be the least-parameterized available
+        * paths.)
+        */
+       if (PATH_PARAM_BY_REL(outer_path, innerrel) ||
+               PATH_PARAM_BY_REL(inner_path, outerrel))
                return;
 
        /*
@@ -714,16 +723,23 @@ match_unsorted_outer(PlannerInfo *root,
                        break;
        }
 
+       /*
+        * If inner_cheapest_total is parameterized by the outer rel, ignore it;
+        * we will consider it below as a member of cheapest_parameterized_paths,
+        * but the other possibilities considered in this routine aren't usable.
+        */
+       if (PATH_PARAM_BY_REL(inner_cheapest_total, outerrel))
+               inner_cheapest_total = NULL;
+
        /*
         * If we need to unique-ify the inner path, we will consider only the
         * cheapest-total inner.
         */
        if (save_jointype == JOIN_UNIQUE_INNER)
        {
-               /* XXX for the moment, don't crash on LATERAL --- rethink this */
+               /* No way to do this with an inner path parameterized by outer rel */
                if (inner_cheapest_total == NULL)
                        return;
-
                inner_cheapest_total = (Path *)
                        create_unique_path(root, innerrel, inner_cheapest_total, sjinfo);
                Assert(inner_cheapest_total);
@@ -756,15 +772,13 @@ match_unsorted_outer(PlannerInfo *root,
                /*
                 * We cannot use an outer path that is parameterized by the inner rel.
                 */
-               if (bms_overlap(PATH_REQ_OUTER(outerpath), innerrel->relids))
+               if (PATH_PARAM_BY_REL(outerpath, innerrel))
                        continue;
 
                /*
                 * If we need to unique-ify the outer path, it's pointless to consider
                 * any but the cheapest outer.  (XXX we don't consider parameterized
                 * outers, nor inners, for unique-ified cases.  Should we?)
-                *
-                * XXX does nothing for LATERAL, rethink
                 */
                if (save_jointype == JOIN_UNIQUE_OUTER)
                {
@@ -844,8 +858,8 @@ match_unsorted_outer(PlannerInfo *root,
                if (save_jointype == JOIN_UNIQUE_OUTER)
                        continue;
 
-               /* Can't do anything else if inner has no unparameterized paths */
-               if (!inner_cheapest_total)
+               /* Can't do anything else if inner rel is parameterized by outer */
+               if (inner_cheapest_total == NULL)
                        continue;
 
                /* Look for useful mergeclauses (if any) */
@@ -1126,10 +1140,14 @@ hash_inner_and_outer(PlannerInfo *root,
                Path       *cheapest_total_outer = outerrel->cheapest_total_path;
                Path       *cheapest_total_inner = innerrel->cheapest_total_path;
 
-               /* Punt if either rel has only parameterized paths */
-               if (!cheapest_startup_outer ||
-                       !cheapest_total_outer ||
-                       !cheapest_total_inner)
+               /*
+                * If either cheapest-total path is parameterized by the other rel, we
+                * can't use a hashjoin.  (There's no use looking for alternative
+                * input paths, since these should already be the least-parameterized
+                * available paths.)
+                */
+               if (PATH_PARAM_BY_REL(cheapest_total_outer, innerrel) ||
+                       PATH_PARAM_BY_REL(cheapest_total_inner, outerrel))
                        return;
 
                /* Unique-ify if need be; we ignore parameterized possibilities */
@@ -1169,7 +1187,8 @@ hash_inner_and_outer(PlannerInfo *root,
                                                          cheapest_total_inner,
                                                          restrictlist,
                                                          hashclauses);
-                       if (cheapest_startup_outer != cheapest_total_outer)
+                       if (cheapest_startup_outer != NULL &&
+                               cheapest_startup_outer != cheapest_total_outer)
                                try_hashjoin_path(root,
                                                                  joinrel,
                                                                  jointype,
@@ -1193,16 +1212,17 @@ hash_inner_and_outer(PlannerInfo *root,
                        ListCell   *lc1;
                        ListCell   *lc2;
 
-                       try_hashjoin_path(root,
-                                                         joinrel,
-                                                         jointype,
-                                                         sjinfo,
-                                                         semifactors,
-                                                         param_source_rels,
-                                                         cheapest_startup_outer,
-                                                         cheapest_total_inner,
-                                                         restrictlist,
-                                                         hashclauses);
+                       if (cheapest_startup_outer != NULL)
+                               try_hashjoin_path(root,
+                                                                 joinrel,
+                                                                 jointype,
+                                                                 sjinfo,
+                                                                 semifactors,
+                                                                 param_source_rels,
+                                                                 cheapest_startup_outer,
+                                                                 cheapest_total_inner,
+                                                                 restrictlist,
+                                                                 hashclauses);
 
                        foreach(lc1, outerrel->cheapest_parameterized_paths)
                        {
@@ -1212,7 +1232,7 @@ hash_inner_and_outer(PlannerInfo *root,
                                 * We cannot use an outer path that is parameterized by the
                                 * inner rel.
                                 */
-                               if (bms_overlap(PATH_REQ_OUTER(outerpath), innerrel->relids))
+                               if (PATH_PARAM_BY_REL(outerpath, innerrel))
                                        continue;
 
                                foreach(lc2, innerrel->cheapest_parameterized_paths)
@@ -1223,8 +1243,7 @@ hash_inner_and_outer(PlannerInfo *root,
                                         * We cannot use an inner path that is parameterized by
                                         * the outer rel, either.
                                         */
-                                       if (bms_overlap(PATH_REQ_OUTER(innerpath),
-                                                                       outerrel->relids))
+                                       if (PATH_PARAM_BY_REL(innerpath, outerrel))
                                                continue;
 
                                        if (outerpath == cheapest_startup_outer &&
index 04acc006f53e328a932c0879e90cba7f2c116c18..c2488a4923bd24e697c620af7da6b9288065be27 100644 (file)
@@ -267,7 +267,8 @@ query_planner(PlannerInfo *root, List *tlist,
         */
        final_rel = make_one_rel(root, joinlist);
 
-       if (!final_rel || !final_rel->cheapest_total_path)
+       if (!final_rel || !final_rel->cheapest_total_path ||
+               final_rel->cheapest_total_path->param_info != NULL)
                elog(ERROR, "failed to construct the join relation");
 
        /*
index 691afbf0ed6d80fd0926c7d35ececeedbac6f58d..8677ff20fd545c805825e0a14769d9dfb10d6a00 100644 (file)
@@ -37,7 +37,6 @@ typedef enum
        COSTS_DIFFERENT                         /* neither path dominates the other on cost */
 } PathCostComparison;
 
-static void add_parameterized_path(RelOptInfo *parent_rel, Path *new_path);
 static List *translate_sub_tlist(List *tlist, int relid);
 static bool query_is_distinct_for(Query *query, List *colnos, List *opids);
 static Oid     distinct_col_search(int colno, List *colnos, List *opids);
@@ -139,6 +138,12 @@ compare_fractional_path_costs(Path *path1, Path *path2,
  * is fuzzily better than the other on startup cost and fuzzily worse on
  * total cost, we just say that their costs are "different", since neither
  * dominates the other across the whole performance spectrum.
+ *
+ * This function also includes special hacks to support a policy enforced
+ * by its sole caller, add_path(): paths that have any parameterization
+ * cannot win comparisons on the grounds of having cheaper startup cost,
+ * since we deem only total cost to be of interest for a parameterized path.
+ * (Unparameterized paths are more common, so we check for this case last.)
  */
 static PathCostComparison
 compare_path_costs_fuzzily(Path *path1, Path *path2, double fuzz_factor)
@@ -150,7 +155,8 @@ compare_path_costs_fuzzily(Path *path1, Path *path2, double fuzz_factor)
        if (path1->total_cost > path2->total_cost * fuzz_factor)
        {
                /* path1 fuzzily worse on total cost */
-               if (path2->startup_cost > path1->startup_cost * fuzz_factor)
+               if (path2->startup_cost > path1->startup_cost * fuzz_factor &&
+                       path1->param_info == NULL)
                {
                        /* ... but path2 fuzzily worse on startup, so DIFFERENT */
                        return COSTS_DIFFERENT;
@@ -161,7 +167,8 @@ compare_path_costs_fuzzily(Path *path1, Path *path2, double fuzz_factor)
        if (path2->total_cost > path1->total_cost * fuzz_factor)
        {
                /* path2 fuzzily worse on total cost */
-               if (path1->startup_cost > path2->startup_cost * fuzz_factor)
+               if (path1->startup_cost > path2->startup_cost * fuzz_factor &&
+                       path2->param_info == NULL)
                {
                        /* ... but path1 fuzzily worse on startup, so DIFFERENT */
                        return COSTS_DIFFERENT;
@@ -170,12 +177,14 @@ compare_path_costs_fuzzily(Path *path1, Path *path2, double fuzz_factor)
                return COSTS_BETTER1;
        }
        /* fuzzily the same on total cost */
-       if (path1->startup_cost > path2->startup_cost * fuzz_factor)
+       if (path1->startup_cost > path2->startup_cost * fuzz_factor &&
+               path2->param_info == NULL)
        {
                /* ... but path1 fuzzily worse on startup, so path2 wins */
                return COSTS_BETTER2;
        }
-       if (path2->startup_cost > path1->startup_cost * fuzz_factor)
+       if (path2->startup_cost > path1->startup_cost * fuzz_factor &&
+               path1->param_info == NULL)
        {
                /* ... but path2 fuzzily worse on startup, so path1 wins */
                return COSTS_BETTER1;
@@ -189,11 +198,19 @@ compare_path_costs_fuzzily(Path *path1, Path *path2, double fuzz_factor)
  *       Find the minimum-cost paths from among a relation's paths,
  *       and save them in the rel's cheapest-path fields.
  *
- * Only unparameterized paths are considered candidates for cheapest_startup
- * and cheapest_total. The cheapest_parameterized_paths list collects paths
- * that are cheapest-total for their parameterization (i.e., there is no
- * cheaper path with the same or weaker parameterization).     This list always
- * includes the unparameterized cheapest-total path, too, if there is one.
+ * cheapest_total_path is normally the cheapest-total-cost unparameterized
+ * path; but if there are no unparameterized paths, we assign it to be the
+ * best (cheapest least-parameterized) parameterized path.  However, only
+ * unparameterized paths are considered candidates for cheapest_startup_path,
+ * so that will be NULL if there are no unparameterized paths.
+ *
+ * The cheapest_parameterized_paths list collects all parameterized paths
+ * that have survived the add_path() tournament for this relation.  (Since
+ * add_path ignores pathkeys and startup cost for a parameterized path,
+ * these will be paths that have best total cost or best row count for their
+ * parameterization.)  cheapest_parameterized_paths always includes the
+ * cheapest-total unparameterized path, too, if there is one; the users of
+ * that list find it more convenient if that's included.
  *
  * This is normally called only after we've finished constructing the path
  * list for the rel node.
@@ -203,77 +220,118 @@ set_cheapest(RelOptInfo *parent_rel)
 {
        Path       *cheapest_startup_path;
        Path       *cheapest_total_path;
-       bool            have_parameterized_paths;
+       Path       *best_param_path;
+       List       *parameterized_paths;
        ListCell   *p;
 
        Assert(IsA(parent_rel, RelOptInfo));
 
-       cheapest_startup_path = cheapest_total_path = NULL;
-       have_parameterized_paths = false;
+       if (parent_rel->pathlist == NIL)
+               elog(ERROR, "could not devise a query plan for the given query");
+
+       cheapest_startup_path = cheapest_total_path = best_param_path = NULL;
+       parameterized_paths = NIL;
 
        foreach(p, parent_rel->pathlist)
        {
                Path       *path = (Path *) lfirst(p);
                int                     cmp;
 
-               /* We only consider unparameterized paths in this step */
                if (path->param_info)
                {
-                       have_parameterized_paths = true;
-                       continue;
-               }
+                       /* Parameterized path, so add it to parameterized_paths */
+                       parameterized_paths = lappend(parameterized_paths, path);
 
-               if (cheapest_total_path == NULL)
-               {
-                       cheapest_startup_path = cheapest_total_path = path;
-                       continue;
+                       /*
+                        * If we have an unparameterized cheapest-total, we no longer care
+                        * about finding the best parameterized path, so move on.
+                        */
+                       if (cheapest_total_path)
+                               continue;
+
+                       /*
+                        * Otherwise, track the best parameterized path, which is the one
+                        * with least total cost among those of the minimum
+                        * parameterization.
+                        */
+                       if (best_param_path == NULL)
+                               best_param_path = path;
+                       else
+                       {
+                               switch (bms_subset_compare(PATH_REQ_OUTER(path),
+                                                                                  PATH_REQ_OUTER(best_param_path)))
+                               {
+                                       case BMS_EQUAL:
+                                               /* keep the cheaper one */
+                                               if (compare_path_costs(path, best_param_path,
+                                                                                          TOTAL_COST) < 0)
+                                                       best_param_path = path;
+                                               break;
+                                       case BMS_SUBSET1:
+                                               /* new path is less-parameterized */
+                                               best_param_path = path;
+                                               break;
+                                       case BMS_SUBSET2:
+                                               /* old path is less-parameterized, keep it */
+                                               break;
+                                       case BMS_DIFFERENT:
+                                               /*
+                                                * This means that neither path has the least possible
+                                                * parameterization for the rel.  We'll sit on the old
+                                                * path until something better comes along.
+                                                */
+                                               break;
+                               }
+                       }
                }
+               else
+               {
+                       /* Unparameterized path, so consider it for cheapest slots */
+                       if (cheapest_total_path == NULL)
+                       {
+                               cheapest_startup_path = cheapest_total_path = path;
+                               continue;
+                       }
 
-               /*
-                * If we find two paths of identical costs, try to keep the
-                * better-sorted one.  The paths might have unrelated sort orderings,
-                * in which case we can only guess which might be better to keep, but
-                * if one is superior then we definitely should keep that one.
-                */
-               cmp = compare_path_costs(cheapest_startup_path, path, STARTUP_COST);
-               if (cmp > 0 ||
-                       (cmp == 0 &&
-                        compare_pathkeys(cheapest_startup_path->pathkeys,
-                                                         path->pathkeys) == PATHKEYS_BETTER2))
-                       cheapest_startup_path = path;
-
-               cmp = compare_path_costs(cheapest_total_path, path, TOTAL_COST);
-               if (cmp > 0 ||
-                       (cmp == 0 &&
-                        compare_pathkeys(cheapest_total_path->pathkeys,
-                                                         path->pathkeys) == PATHKEYS_BETTER2))
-                       cheapest_total_path = path;
+                       /*
+                        * If we find two paths of identical costs, try to keep the
+                        * better-sorted one.  The paths might have unrelated sort
+                        * orderings, in which case we can only guess which might be
+                        * better to keep, but if one is superior then we definitely
+                        * should keep that one.
+                        */
+                       cmp = compare_path_costs(cheapest_startup_path, path, STARTUP_COST);
+                       if (cmp > 0 ||
+                               (cmp == 0 &&
+                                compare_pathkeys(cheapest_startup_path->pathkeys,
+                                                                 path->pathkeys) == PATHKEYS_BETTER2))
+                               cheapest_startup_path = path;
+
+                       cmp = compare_path_costs(cheapest_total_path, path, TOTAL_COST);
+                       if (cmp > 0 ||
+                               (cmp == 0 &&
+                                compare_pathkeys(cheapest_total_path->pathkeys,
+                                                                 path->pathkeys) == PATHKEYS_BETTER2))
+                               cheapest_total_path = path;
+               }
        }
 
-       if (cheapest_total_path == NULL && !have_parameterized_paths)
-               elog(ERROR, "could not devise a query plan for the given query");
+       /* Add cheapest unparameterized path, if any, to parameterized_paths */
+       if (cheapest_total_path)
+               parameterized_paths = lcons(cheapest_total_path, parameterized_paths);
+
+       /*
+        * If there is no unparameterized path, use the best parameterized path
+        * as cheapest_total_path (but not as cheapest_startup_path).
+        */
+       if (cheapest_total_path == NULL)
+               cheapest_total_path = best_param_path;
+       Assert(cheapest_total_path != NULL);
 
        parent_rel->cheapest_startup_path = cheapest_startup_path;
        parent_rel->cheapest_total_path = cheapest_total_path;
        parent_rel->cheapest_unique_path = NULL;        /* computed only if needed */
-
-       /* Seed the parameterized-paths list with the cheapest total, if any */
-       if (cheapest_total_path)
-               parent_rel->cheapest_parameterized_paths = list_make1(cheapest_total_path);
-       else
-               parent_rel->cheapest_parameterized_paths = NIL;
-
-       /* And, if there are any parameterized paths, add them in one at a time */
-       if (have_parameterized_paths)
-       {
-               foreach(p, parent_rel->pathlist)
-               {
-                       Path       *path = (Path *) lfirst(p);
-
-                       if (path->param_info)
-                               add_parameterized_path(parent_rel, path);
-               }
-       }
+       parent_rel->cheapest_parameterized_paths = parameterized_paths;
 }
 
 /*
@@ -295,11 +353,12 @@ set_cheapest(RelOptInfo *parent_rel)
  *       one parameterization can seldom dominate a path of another.  But such
  *       cases do arise, so we make the full set of checks anyway.
  *
- *       There is one policy decision embedded in this function, along with its
- *       sibling add_path_precheck: we treat all parameterized paths as having
- *       NIL pathkeys, so that they compete only on cost.      This is to reduce
- *       the number of parameterized paths that are kept.      See discussion in
- *       src/backend/optimizer/README.
+ *       There are two policy decisions embedded in this function, along with
+ *       its sibling add_path_precheck: we treat all parameterized paths as
+ *       having NIL pathkeys, and we ignore their startup costs, so that they
+ *       compete only on parameterization, total cost and rowcount.  This is to
+ *       reduce the number of parameterized paths that are kept.  See discussion
+ *       in src/backend/optimizer/README.
  *
  *       The pathlist is kept sorted by total_cost, with cheaper paths
  *       at the front.  Within this routine, that's simply a speed hack:
@@ -552,7 +611,7 @@ add_path_precheck(RelOptInfo *parent_rel,
        List       *new_path_pathkeys;
        ListCell   *p1;
 
-       /* Pretend parameterized paths have no pathkeys, per add_path comment */
+       /* Pretend parameterized paths have no pathkeys, per add_path policy */
        new_path_pathkeys = required_outer ? NIL : pathkeys;
 
        foreach(p1, parent_rel->pathlist)
@@ -572,8 +631,10 @@ add_path_precheck(RelOptInfo *parent_rel,
                 */
                if (total_cost >= old_path->total_cost)
                {
-                       if (startup_cost >= old_path->startup_cost)
+                       /* can win on startup cost only if unparameterized */
+                       if (startup_cost >= old_path->startup_cost || required_outer)
                        {
+                               /* new path does not win on cost, so check pathkeys... */
                                List       *old_path_pathkeys;
 
                                old_path_pathkeys = old_path->param_info ? NIL : old_path->pathkeys;
@@ -582,6 +643,7 @@ add_path_precheck(RelOptInfo *parent_rel,
                                if (keyscmp == PATHKEYS_EQUAL ||
                                        keyscmp == PATHKEYS_BETTER2)
                                {
+                                       /* new path does not win on pathkeys... */
                                        if (bms_equal(required_outer, PATH_REQ_OUTER(old_path)))
                                        {
                                                /* Found an old path that dominates the new one */
@@ -604,123 +666,6 @@ add_path_precheck(RelOptInfo *parent_rel,
        return true;
 }
 
-/*
- * add_parameterized_path
- *       Consider a parameterized implementation path for the specified rel,
- *       and add it to the rel's cheapest_parameterized_paths list if it
- *       belongs there, removing any old entries that it dominates.
- *
- *       This is essentially a cut-down form of add_path(): we do not care
- *       about startup cost or sort ordering, only total cost, rowcount, and
- *       parameterization.  Also, we must not recycle rejected paths, since
- *       they will still be present in the rel's pathlist.
- *
- * 'parent_rel' is the relation entry to which the path corresponds.
- * 'new_path' is a parameterized path for parent_rel.
- *
- * Returns nothing, but modifies parent_rel->cheapest_parameterized_paths.
- */
-static void
-add_parameterized_path(RelOptInfo *parent_rel, Path *new_path)
-{
-       bool            accept_new = true;              /* unless we find a superior old path */
-       ListCell   *insert_after = NULL;        /* where to insert new item */
-       ListCell   *p1;
-       ListCell   *p1_prev;
-       ListCell   *p1_next;
-
-       /*
-        * Loop to check proposed new path against old paths.  Note it is possible
-        * for more than one old path to be tossed out because new_path dominates
-        * it.
-        *
-        * We can't use foreach here because the loop body may delete the current
-        * list cell.
-        */
-       p1_prev = NULL;
-       for (p1 = list_head(parent_rel->cheapest_parameterized_paths);
-                p1 != NULL; p1 = p1_next)
-       {
-               Path       *old_path = (Path *) lfirst(p1);
-               bool            remove_old = false; /* unless new proves superior */
-               int                     costcmp;
-               BMS_Comparison outercmp;
-
-               p1_next = lnext(p1);
-
-               costcmp = compare_path_costs(new_path, old_path, TOTAL_COST);
-               outercmp = bms_subset_compare(PATH_REQ_OUTER(new_path),
-                                                                         PATH_REQ_OUTER(old_path));
-               if (outercmp != BMS_DIFFERENT)
-               {
-                       if (costcmp < 0)
-                       {
-                               if (outercmp != BMS_SUBSET2 &&
-                                       new_path->rows <= old_path->rows)
-                                       remove_old = true;      /* new dominates old */
-                       }
-                       else if (costcmp > 0)
-                       {
-                               if (outercmp != BMS_SUBSET1 &&
-                                       new_path->rows >= old_path->rows)
-                                       accept_new = false; /* old dominates new */
-                       }
-                       else if (outercmp == BMS_SUBSET1 &&
-                                        new_path->rows <= old_path->rows)
-                               remove_old = true;              /* new dominates old */
-                       else if (outercmp == BMS_SUBSET2 &&
-                                        new_path->rows >= old_path->rows)
-                               accept_new = false;             /* old dominates new */
-                       else if (new_path->rows < old_path->rows)
-                               remove_old = true;              /* new dominates old */
-                       else
-                       {
-                               /* Same cost, rows, and param rels; arbitrarily keep old */
-                               accept_new = false;             /* old equals or dominates new */
-                       }
-               }
-
-               /*
-                * Remove current element from cheapest_parameterized_paths if
-                * dominated by new.
-                */
-               if (remove_old)
-               {
-                       parent_rel->cheapest_parameterized_paths =
-                               list_delete_cell(parent_rel->cheapest_parameterized_paths,
-                                                                p1, p1_prev);
-                       /* p1_prev does not advance */
-               }
-               else
-               {
-                       /* new belongs after this old path if it has cost >= old's */
-                       if (costcmp >= 0)
-                               insert_after = p1;
-                       /* p1_prev advances */
-                       p1_prev = p1;
-               }
-
-               /*
-                * If we found an old path that dominates new_path, we can quit
-                * scanning the list; we will not add new_path, and we assume new_path
-                * cannot dominate any other elements of the list.
-                */
-               if (!accept_new)
-                       break;
-       }
-
-       if (accept_new)
-       {
-               /* Accept the new path: insert it at proper place in list */
-               if (insert_after)
-                       lappend_cell(parent_rel->cheapest_parameterized_paths,
-                                                insert_after, new_path);
-               else
-                       parent_rel->cheapest_parameterized_paths =
-                               lcons(new_path, parent_rel->cheapest_parameterized_paths);
-       }
-}
-
 
 /*****************************************************************************
  *             PATH NODE CREATION ROUTINES
@@ -1137,13 +1082,6 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
        int                     numCols;
        ListCell   *lc;
 
-       /* XXX temporary band-aid to not crash on LATERAL queries */
-       if (subpath == NULL)
-       {
-               Assert(subpath == rel->cheapest_total_path);
-               return NULL;
-       }
-
        /* Caller made a mistake if subpath isn't cheapest_total ... */
        Assert(subpath == rel->cheapest_total_path);
        Assert(subpath->parent == rel);
index 43c2956fdf54e13ba3c12622e04a633fcf24b92c..7b4eaf4f5e540158ac2f88eba232ad91e17dc8f2 100644 (file)
@@ -309,18 +309,16 @@ typedef struct PlannerInfo
  *                                method of generating the relation
  *             ppilist - ParamPathInfo nodes for parameterized Paths, if any
  *             cheapest_startup_path - the pathlist member with lowest startup cost
- *                                                             (regardless of its ordering; but must be
- *                                                              unparameterized; hence will be NULL for
- *                                                              a LATERAL subquery)
+ *                     (regardless of ordering) among the unparameterized paths;
+ *                     or NULL if there is no unparameterized path
  *             cheapest_total_path - the pathlist member with lowest total cost
- *                                                       (regardless of its ordering; but must be
- *                                                        unparameterized; hence will be NULL for
- *                                                        a LATERAL subquery)
+ *                     (regardless of ordering) among the unparameterized paths;
+ *                     or if there is no unparameterized path, the path with lowest
+ *                     total cost among the paths with minimum parameterization
  *             cheapest_unique_path - for caching cheapest path to produce unique
- *                                                        (no duplicates) output from relation
- *             cheapest_parameterized_paths - paths with cheapest total costs for
- *                                                              their parameterizations; always includes
- *                                                              cheapest_total_path, if that exists
+ *                     (no duplicates) output from relation; NULL if not yet requested
+ *             cheapest_parameterized_paths - best paths for their parameterizations;
+ *                     always includes cheapest_total_path, even if that's unparameterized
  *
  * If the relation is a base relation it will have these fields set:
  *
index 264d89666051c3ef726d3a2ef83d2934dc620273..2e54ede838709c1dc5b917fef2d13891660b7540 100644 (file)
@@ -3167,15 +3167,16 @@ explain (costs off)
   select * from int8_tbl a,
     int8_tbl x left join lateral (select a.q1 from int4_tbl y) ss(z)
       on x.q2 = ss.z;
-             QUERY PLAN             
-------------------------------------
+                QUERY PLAN                
+------------------------------------------
  Nested Loop
    ->  Seq Scan on int8_tbl a
-   ->  Nested Loop Left Join
-         Join Filter: (x.q2 = ($0))
+   ->  Hash Left Join
+         Hash Cond: (x.q2 = ($0))
          ->  Seq Scan on int8_tbl x
-         ->  Seq Scan on int4_tbl y
-(6 rows)
+         ->  Hash
+               ->  Seq Scan on int4_tbl y
+(7 rows)
 
 select * from int8_tbl a,
   int8_tbl x left join lateral (select a.q1 from int4_tbl y) ss(z)