]> granicus.if.org Git - postgresql/commitdiff
Still more fixes for planner's handling of LATERAL references.
authorTom Lane <tgl@sss.pgh.pa.us>
Fri, 11 Dec 2015 19:22:20 +0000 (14:22 -0500)
committerTom Lane <tgl@sss.pgh.pa.us>
Fri, 11 Dec 2015 19:22:20 +0000 (14:22 -0500)
More fuzz testing by Andreas Seltenreich exposed that the planner did not
cope well with chains of lateral references.  If relation X references Y
laterally, and Y references Z laterally, then we will have to scan X on the
inside of a nestloop with Z, so for all intents and purposes X is laterally
dependent on Z too.  The planner did not understand this and would generate
intermediate joins that could not be used.  While that was usually harmless
except for wasting some planning cycles, under the right circumstances it
would lead to "failed to build any N-way joins" or "could not devise a
query plan" planner failures.

To fix that, convert the existing per-relation lateral_relids and
lateral_referencers relid sets into their transitive closures; that is,
they now show all relations on which a rel is directly or indirectly
laterally dependent.  This not only fixes the chained-reference problem
but allows some of the relevant tests to be made substantially simpler
and faster, since they can be reduced to simple bitmap manipulations
instead of searches of the LateralJoinInfo list.

Also, when a PlaceHolderVar that is due to be evaluated at a join contains
lateral references, we should treat those references as indirect lateral
dependencies of each of the join's base relations.  This prevents us from
trying to join any individual base relations to the lateral reference
source before the join is formed, which again cannot work.

Andreas' testing also exposed another oversight in the "dangerous
PlaceHolderVar" test added in commit 85e5e222b1dd02f1.  Simply rejecting
unsafe join paths in joinpath.c is insufficient, because in some cases
we will end up rejecting *all* possible paths for a particular join, again
leading to "could not devise a query plan" failures.  The restriction has
to be known also to join_is_legal and its cohort functions, so that they
will not select a join for which that will happen.  I chose to move the
supporting logic into joinrels.c where the latter functions are.

Back-patch to 9.3 where LATERAL support was introduced.

src/backend/optimizer/path/joinpath.c
src/backend/optimizer/path/joinrels.c
src/backend/optimizer/plan/initsplan.c
src/backend/optimizer/util/relnode.c
src/include/nodes/relation.h
src/include/optimizer/pathnode.h
src/include/optimizer/paths.h
src/test/regress/expected/join.out
src/test/regress/sql/join.sql

index 0f04033166588adc157d18f5d50ce1c1f07ddb30..53d8fdd6e5bb28c34b3867580c6f037b818c548c 100644 (file)
@@ -254,54 +254,6 @@ allow_star_schema_join(PlannerInfo *root,
                        bms_nonempty_difference(innerparams, outerrelids));
 }
 
-/*
- * There's a pitfall for creating parameterized nestloops: suppose the inner
- * rel (call it A) has a parameter that is a PlaceHolderVar, and that PHV's
- * minimum eval_at set includes the outer rel (B) and some third rel (C).
- * We might think we could create a B/A nestloop join that's parameterized by
- * C.  But we would end up with a plan in which the PHV's expression has to be
- * evaluated as a nestloop parameter at the B/A join; and the executor is only
- * set up to handle simple Vars as NestLoopParams.  Rather than add complexity
- * and overhead to the executor for such corner cases, it seems better to
- * forbid the join.  (Note that existence of such a PHV probably means there
- * is a join order constraint that will cause us to consider joining B and C
- * directly; so we can still make use of A's parameterized path with B+C.)
- * So we check whether any PHVs used in the query could pose such a hazard.
- * We don't have any simple way of checking whether a risky PHV would actually
- * be used in the inner plan, and the case is so unusual that it doesn't seem
- * worth working very hard on it.
- *
- * This case can occur whether or not the join's remaining parameterization
- * overlaps param_source_rels, so we have to check for it separately from
- * allow_star_schema_join, even though it looks much like a star-schema case.
- */
-static inline bool
-check_hazardous_phv(PlannerInfo *root,
-                                       Path *outer_path,
-                                       Path *inner_path)
-{
-       Relids          innerparams = PATH_REQ_OUTER(inner_path);
-       Relids          outerrelids = outer_path->parent->relids;
-       ListCell   *lc;
-
-       foreach(lc, root->placeholder_list)
-       {
-               PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(lc);
-
-               if (!bms_is_subset(phinfo->ph_eval_at, innerparams))
-                       continue;                       /* ignore, could not be a nestloop param */
-               if (!bms_overlap(phinfo->ph_eval_at, outerrelids))
-                       continue;                       /* ignore, not relevant to this join */
-               if (bms_is_subset(phinfo->ph_eval_at, outerrelids))
-                       continue;                       /* safe, it can be eval'd within outerrel */
-               /* Otherwise, it's potentially unsafe, so reject the join */
-               return false;
-       }
-
-       /* OK to perform the join */
-       return true;
-}
-
 /*
  * try_nestloop_path
  *       Consider a nestloop join path; if it appears useful, push it into
@@ -322,31 +274,24 @@ try_nestloop_path(PlannerInfo *root,
        /*
         * Check to see if proposed path is still parameterized, and reject if the
         * parameterization wouldn't be sensible --- unless allow_star_schema_join
-        * says to allow it anyway.  Also, we must reject if check_hazardous_phv
-        * doesn't like the look of it.
+        * says to allow it anyway.  Also, we must reject if have_dangerous_phv
+        * doesn't like the look of it, which could only happen if the nestloop is
+        * still parameterized.
         */
        required_outer = calc_nestloop_required_outer(outer_path,
                                                                                                  inner_path);
        if (required_outer &&
                ((!bms_overlap(required_outer, extra->param_source_rels) &&
                  !allow_star_schema_join(root, outer_path, inner_path)) ||
-                !check_hazardous_phv(root, outer_path, inner_path)))
+                have_dangerous_phv(root,
+                                                       outer_path->parent->relids,
+                                                       PATH_REQ_OUTER(inner_path))))
        {
                /* Waste no memory when we reject a path here */
                bms_free(required_outer);
                return;
        }
 
-       /*
-        * Independently of that, add parameterization needed for any
-        * PlaceHolderVars that need to be computed at the join.  We can handle
-        * that just by adding joinrel->lateral_relids; that might include some
-        * rels that are already in required_outer, but no harm done.  (Note that
-        * lateral_relids is exactly NULL if empty, so this will not break the
-        * property that required_outer is too.)
-        */
-       required_outer = bms_add_members(required_outer, joinrel->lateral_relids);
-
        /*
         * Do a precheck to quickly eliminate obviously-inferior paths.  We
         * calculate a cheap lower bound on the path's cost and then use
@@ -418,12 +363,6 @@ try_mergejoin_path(PlannerInfo *root,
                return;
        }
 
-       /*
-        * Independently of that, add parameterization needed for any
-        * PlaceHolderVars that need to be computed at the join.
-        */
-       required_outer = bms_add_members(required_outer, joinrel->lateral_relids);
-
        /*
         * If the given paths are already well enough ordered, we can skip doing
         * an explicit sort.
@@ -500,12 +439,6 @@ try_hashjoin_path(PlannerInfo *root,
                return;
        }
 
-       /*
-        * Independently of that, add parameterization needed for any
-        * PlaceHolderVars that need to be computed at the join.
-        */
-       required_outer = bms_add_members(required_outer, joinrel->lateral_relids);
-
        /*
         * See comments in try_nestloop_path().  Also note that hashjoin paths
         * never have any output pathkeys, per comments in create_hashjoin_path.
index 9f0212fad23b0cefc05ee7a8c8d5a038206defe4..47837e1eda8762635166a290aa38b975af6920f0 100644 (file)
@@ -332,9 +332,6 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
        bool            reversed;
        bool            unique_ified;
        bool            must_be_leftjoin;
-       bool            lateral_fwd;
-       bool            lateral_rev;
-       Relids          join_lateral_rels;
        ListCell   *l;
 
        /*
@@ -527,75 +524,126 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
 
        /*
         * We also have to check for constraints imposed by LATERAL references.
-        * The proposed rels could each contain lateral references to the other,
-        * in which case the join is impossible.  If there are lateral references
-        * in just one direction, then the join has to be done with a nestloop
-        * with the lateral referencer on the inside.  If the join matches an SJ
-        * that cannot be implemented by such a nestloop, the join is impossible.
         */
-       lateral_fwd = lateral_rev = false;
-       foreach(l, root->lateral_info_list)
+       if (root->hasLateralRTEs)
        {
-               LateralJoinInfo *ljinfo = (LateralJoinInfo *) lfirst(l);
+               bool            lateral_fwd;
+               bool            lateral_rev;
+               Relids          join_lateral_rels;
 
-               if (bms_is_subset(ljinfo->lateral_rhs, rel2->relids) &&
-                       bms_overlap(ljinfo->lateral_lhs, rel1->relids))
+               /*
+                * The proposed rels could each contain lateral references to the
+                * other, in which case the join is impossible.  If there are lateral
+                * references in just one direction, then the join has to be done with
+                * a nestloop with the lateral referencer on the inside.  If the join
+                * matches an SJ that cannot be implemented by such a nestloop, the
+                * join is impossible.
+                *
+                * Also, if the lateral reference is only indirect, we should reject
+                * the join; whatever rel(s) the reference chain goes through must be
+                * joined to first.
+                *
+                * Another case that might keep us from building a valid plan is the
+                * implementation restriction described by have_dangerous_phv().
+                */
+               lateral_fwd = bms_overlap(rel1->relids, rel2->lateral_relids);
+               lateral_rev = bms_overlap(rel2->relids, rel1->lateral_relids);
+               if (lateral_fwd && lateral_rev)
+                       return false;           /* have lateral refs in both directions */
+               if (lateral_fwd)
                {
                        /* has to be implemented as nestloop with rel1 on left */
-                       if (lateral_rev)
-                               return false;   /* have lateral refs in both directions */
-                       lateral_fwd = true;
-                       if (!bms_is_subset(ljinfo->lateral_lhs, rel1->relids))
-                               return false;   /* rel1 can't compute the required parameter */
                        if (match_sjinfo &&
                                (reversed ||
                                 unique_ified ||
                                 match_sjinfo->jointype == JOIN_FULL))
                                return false;   /* not implementable as nestloop */
+                       /* check there is a direct reference from rel2 to rel1 */
+                       foreach(l, root->lateral_info_list)
+                       {
+                               LateralJoinInfo *ljinfo = (LateralJoinInfo *) lfirst(l);
+
+                               if (bms_is_subset(ljinfo->lateral_rhs, rel2->relids) &&
+                                       bms_is_subset(ljinfo->lateral_lhs, rel1->relids))
+                                       break;
+                       }
+                       if (l == NULL)
+                               return false;   /* only indirect refs, so reject */
+                       /* check we won't have a dangerous PHV */
+                       if (have_dangerous_phv(root, rel1->relids, rel2->lateral_relids))
+                               return false;   /* might be unable to handle required PHV */
                }
-               if (bms_is_subset(ljinfo->lateral_rhs, rel1->relids) &&
-                       bms_overlap(ljinfo->lateral_lhs, rel2->relids))
+               else if (lateral_rev)
                {
                        /* has to be implemented as nestloop with rel2 on left */
-                       if (lateral_fwd)
-                               return false;   /* have lateral refs in both directions */
-                       lateral_rev = true;
-                       if (!bms_is_subset(ljinfo->lateral_lhs, rel2->relids))
-                               return false;   /* rel2 can't compute the required parameter */
                        if (match_sjinfo &&
                                (!reversed ||
                                 unique_ified ||
                                 match_sjinfo->jointype == JOIN_FULL))
                                return false;   /* not implementable as nestloop */
+                       /* check there is a direct reference from rel1 to rel2 */
+                       foreach(l, root->lateral_info_list)
+                       {
+                               LateralJoinInfo *ljinfo = (LateralJoinInfo *) lfirst(l);
+
+                               if (bms_is_subset(ljinfo->lateral_rhs, rel1->relids) &&
+                                       bms_is_subset(ljinfo->lateral_lhs, rel2->relids))
+                                       break;
+                       }
+                       if (l == NULL)
+                               return false;   /* only indirect refs, so reject */
+                       /* check we won't have a dangerous PHV */
+                       if (have_dangerous_phv(root, rel2->relids, rel1->lateral_relids))
+                               return false;   /* might be unable to handle required PHV */
                }
-       }
 
-       /*
-        * LATERAL references could also cause problems later on if we accept this
-        * join: if the join's minimum parameterization includes any rels that
-        * would have to be on the inside of an outer join with this join rel,
-        * then it's never going to be possible to build the complete query using
-        * this join.  We should reject this join not only because it'll save
-        * work, but because if we don't, the clauseless-join heuristics might
-        * think that legality of this join means that some other join rel need
-        * not be formed, and that could lead to failure to find any plan at all.
-        * It seems best not to merge this check into the main loop above, because
-        * it is concerned with SJs that are not otherwise relevant to this join.
-        */
-       join_lateral_rels = min_join_parameterization(root, joinrelids);
-       if (join_lateral_rels)
-       {
-               foreach(l, root->join_info_list)
+               /*
+                * LATERAL references could also cause problems later on if we accept
+                * this join: if the join's minimum parameterization includes any rels
+                * that would have to be on the inside of an outer join with this join
+                * rel, then it's never going to be possible to build the complete
+                * query using this join.  We should reject this join not only because
+                * it'll save work, but because if we don't, the clauseless-join
+                * heuristics might think that legality of this join means that some
+                * other join rel need not be formed, and that could lead to failure
+                * to find any plan at all.  We have to consider not only rels that
+                * are directly on the inner side of an OJ with the joinrel, but also
+                * ones that are indirectly so, so search to find all such rels.
+                */
+               join_lateral_rels = min_join_parameterization(root, joinrelids,
+                                                                                                         rel1, rel2);
+               if (join_lateral_rels)
                {
-                       SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);
-
-                       if (bms_overlap(sjinfo->min_righthand, join_lateral_rels) &&
-                               bms_overlap(sjinfo->min_lefthand, joinrelids))
-                               return false;   /* will not be able to join to min_righthand */
-                       if (sjinfo->jointype == JOIN_FULL &&
-                               bms_overlap(sjinfo->min_lefthand, join_lateral_rels) &&
-                               bms_overlap(sjinfo->min_righthand, joinrelids))
-                               return false;   /* will not be able to join to min_lefthand */
+                       Relids          join_plus_rhs = bms_copy(joinrelids);
+                       bool            more;
+
+                       do
+                       {
+                               more = false;
+                               foreach(l, root->join_info_list)
+                               {
+                                       SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);
+
+                                       if (bms_overlap(sjinfo->min_lefthand, join_plus_rhs) &&
+                                               !bms_is_subset(sjinfo->min_righthand, join_plus_rhs))
+                                       {
+                                               join_plus_rhs = bms_add_members(join_plus_rhs,
+                                                                                                         sjinfo->min_righthand);
+                                               more = true;
+                                       }
+                                       /* full joins constrain both sides symmetrically */
+                                       if (sjinfo->jointype == JOIN_FULL &&
+                                               bms_overlap(sjinfo->min_righthand, join_plus_rhs) &&
+                                               !bms_is_subset(sjinfo->min_lefthand, join_plus_rhs))
+                                       {
+                                               join_plus_rhs = bms_add_members(join_plus_rhs,
+                                                                                                               sjinfo->min_lefthand);
+                                               more = true;
+                                       }
+                               }
+                       } while (more);
+                       if (bms_overlap(join_plus_rhs, join_lateral_rels))
+                               return false;   /* will not be able to join to some RHS rel */
                }
        }
 
@@ -852,8 +900,9 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
  * could be merged with that function, but it seems clearer to separate the
  * two concerns.  We need this test because there are degenerate cases where
  * a clauseless join must be performed to satisfy join-order restrictions.
- * Also, if one rel has a lateral reference to the other, we should consider
- * joining them even if the join would be clauseless.
+ * Also, if one rel has a lateral reference to the other, or both are needed
+ * to compute some PHV, we should consider joining them even if the join would
+ * be clauseless.
  *
  * Note: this is only a problem if one side of a degenerate outer join
  * contains multiple rels, or a clauseless join is required within an
@@ -870,8 +919,8 @@ have_join_order_restriction(PlannerInfo *root,
        ListCell   *l;
 
        /*
-        * If either side has a lateral reference to the other, attempt the join
-        * regardless of outer-join considerations.
+        * If either side has a direct lateral reference to the other, attempt the
+        * join regardless of outer-join considerations.
         */
        foreach(l, root->lateral_info_list)
        {
@@ -885,6 +934,22 @@ have_join_order_restriction(PlannerInfo *root,
                        return true;
        }
 
+       /*
+        * Likewise, if both rels are needed to compute some PlaceHolderVar,
+        * attempt the join regardless of outer-join considerations.  (This is not
+        * very desirable, because a PHV with a large eval_at set will cause a lot
+        * of probably-useless joins to be considered, but failing to do this can
+        * cause us to fail to construct a plan at all.)
+        */
+       foreach(l, root->placeholder_list)
+       {
+               PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
+
+               if (bms_is_subset(rel1->relids, phinfo->ph_eval_at) &&
+                       bms_is_subset(rel2->relids, phinfo->ph_eval_at))
+                       return true;
+       }
+
        /*
         * It's possible that the rels correspond to the left and right sides of a
         * degenerate outer join, that is, one with no joinclause mentioning the
@@ -960,7 +1025,7 @@ have_join_order_restriction(PlannerInfo *root,
  * has_join_restriction
  *             Detect whether the specified relation has join-order restrictions,
  *             due to being inside an outer join or an IN (sub-SELECT),
- *             or participating in any LATERAL references.
+ *             or participating in any LATERAL references or multi-rel PHVs.
  *
  * Essentially, this tests whether have_join_order_restriction() could
  * succeed with this rel and some other one.  It's OK if we sometimes
@@ -972,12 +1037,15 @@ has_join_restriction(PlannerInfo *root, RelOptInfo *rel)
 {
        ListCell   *l;
 
-       foreach(l, root->lateral_info_list)
+       if (rel->lateral_relids != NULL || rel->lateral_referencers != NULL)
+               return true;
+
+       foreach(l, root->placeholder_list)
        {
-               LateralJoinInfo *ljinfo = (LateralJoinInfo *) lfirst(l);
+               PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
 
-               if (bms_is_subset(ljinfo->lateral_rhs, rel->relids) ||
-                       bms_overlap(ljinfo->lateral_lhs, rel->relids))
+               if (bms_is_subset(rel->relids, phinfo->ph_eval_at) &&
+                       !bms_equal(rel->relids, phinfo->ph_eval_at))
                        return true;
        }
 
@@ -1058,6 +1126,57 @@ has_legal_joinclause(PlannerInfo *root, RelOptInfo *rel)
 }
 
 
+/*
+ * There's a pitfall for creating parameterized nestloops: suppose the inner
+ * rel (call it A) has a parameter that is a PlaceHolderVar, and that PHV's
+ * minimum eval_at set includes the outer rel (B) and some third rel (C).
+ * We might think we could create a B/A nestloop join that's parameterized by
+ * C.  But we would end up with a plan in which the PHV's expression has to be
+ * evaluated as a nestloop parameter at the B/A join; and the executor is only
+ * set up to handle simple Vars as NestLoopParams.  Rather than add complexity
+ * and overhead to the executor for such corner cases, it seems better to
+ * forbid the join.  (Note that we can still make use of A's parameterized
+ * path with pre-joined B+C as the outer rel.  have_join_order_restriction()
+ * ensures that we will consider making such a join even if there are not
+ * other reasons to do so.)
+ *
+ * So we check whether any PHVs used in the query could pose such a hazard.
+ * We don't have any simple way of checking whether a risky PHV would actually
+ * be used in the inner plan, and the case is so unusual that it doesn't seem
+ * worth working very hard on it.
+ *
+ * This needs to be checked in two places.  If the inner rel's minimum
+ * parameterization would trigger the restriction, then join_is_legal() should
+ * reject the join altogether, because there will be no workable paths for it.
+ * But joinpath.c has to check again for every proposed nestloop path, because
+ * the inner path might have more than the minimum parameterization, causing
+ * some PHV to be dangerous for it that otherwise wouldn't be.
+ */
+bool
+have_dangerous_phv(PlannerInfo *root,
+                                  Relids outer_relids, Relids inner_params)
+{
+       ListCell   *lc;
+
+       foreach(lc, root->placeholder_list)
+       {
+               PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(lc);
+
+               if (!bms_is_subset(phinfo->ph_eval_at, inner_params))
+                       continue;                       /* ignore, could not be a nestloop param */
+               if (!bms_overlap(phinfo->ph_eval_at, outer_relids))
+                       continue;                       /* ignore, not relevant to this join */
+               if (bms_is_subset(phinfo->ph_eval_at, outer_relids))
+                       continue;                       /* safe, it can be eval'd within outerrel */
+               /* Otherwise, it's potentially unsafe, so reject the join */
+               return true;
+       }
+
+       /* OK to perform the join */
+       return false;
+}
+
+
 /*
  * is_dummy_rel --- has relation been proven empty?
  */
index 2f4e8181eb5a70bc4183ec9270c2215acf9f8d4e..dded896064d05667832cbf353527b4a51ab95703 100644 (file)
@@ -449,9 +449,7 @@ create_lateral_join_info(PlannerInfo *root)
                                Assert(false);
                }
 
-               /* We now know all the relids needed for lateral refs in this rel */
-               if (bms_is_empty(lateral_relids))
-                       continue;                       /* ensure lateral_relids is NULL if empty */
+               /* We now have all the direct lateral refs from this rel */
                brel->lateral_relids = lateral_relids;
        }
 
@@ -459,6 +457,14 @@ create_lateral_join_info(PlannerInfo *root)
         * Now check for lateral references within PlaceHolderVars, and make
         * LateralJoinInfos describing each such reference.  Unlike references in
         * unflattened LATERAL RTEs, the referencing location could be a join.
+        *
+        * For a PHV that is due to be evaluated at a join, we mark each of the
+        * join's member baserels as having the PHV's lateral references too. Even
+        * though the baserels could be scanned without considering those lateral
+        * refs, we will never be able to form the join except as a path
+        * parameterized by the lateral refs, so there is no point in considering
+        * unparameterized paths for the baserels; and we mustn't try to join any
+        * of those baserels to the lateral refs too soon, either.
         */
        foreach(lc, root->placeholder_list)
        {
@@ -471,6 +477,7 @@ create_lateral_join_info(PlannerInfo *root)
                                                                                           PVC_RECURSE_AGGREGATES,
                                                                                           PVC_INCLUDE_PLACEHOLDERS);
                        ListCell   *lc2;
+                       int                     ev_at;
 
                        foreach(lc2, vars)
                        {
@@ -500,6 +507,15 @@ create_lateral_join_info(PlannerInfo *root)
                        }
 
                        list_free(vars);
+
+                       ev_at = -1;
+                       while ((ev_at = bms_next_member(eval_at, ev_at)) >= 0)
+                       {
+                               RelOptInfo *brel = find_base_rel(root, ev_at);
+
+                               brel->lateral_relids = bms_add_members(brel->lateral_relids,
+                                                                                                          phinfo->ph_lateral);
+                       }
                }
        }
 
@@ -508,46 +524,103 @@ create_lateral_join_info(PlannerInfo *root)
                return;
 
        /*
-        * Now that we've identified all lateral references, make a second pass in
-        * which we mark each baserel with the set of relids of rels that
-        * reference it laterally (essentially, the inverse mapping of
-        * lateral_relids).  We'll need this for join_clause_is_movable_to().
+        * At this point the lateral_relids sets represent only direct lateral
+        * references.  Replace them by their transitive closure, so that they
+        * describe both direct and indirect lateral references.  If relation X
+        * references Y laterally, and Y references Z laterally, then we will have
+        * to scan X on the inside of a nestloop with Z, so for all intents and
+        * purposes X is laterally dependent on Z too.
         *
-        * Also, propagate lateral_relids and lateral_referencers from appendrel
-        * parent rels to their child rels.  We intentionally give each child rel
-        * the same minimum parameterization, even though it's quite possible that
-        * some don't reference all the lateral rels.  This is because any append
-        * path for the parent will have to have the same parameterization for
-        * every child anyway, and there's no value in forcing extra
-        * reparameterize_path() calls.  Similarly, a lateral reference to the
-        * parent prevents use of otherwise-movable join rels for each child.
+        * This code is essentially Warshall's algorithm for transitive closure.
+        * The outer loop considers each baserel, and propagates its lateral
+        * dependencies to those baserels that have a lateral dependency on it.
         */
        for (rti = 1; rti < root->simple_rel_array_size; rti++)
        {
                RelOptInfo *brel = root->simple_rel_array[rti];
-               Relids          lateral_referencers;
+               Relids          outer_lateral_relids;
+               Index           rti2;
 
-               if (brel == NULL)
+               if (brel == NULL || brel->reloptkind != RELOPT_BASEREL)
                        continue;
-               if (brel->reloptkind != RELOPT_BASEREL)
+
+               /* need not consider baserel further if it has no lateral refs */
+               outer_lateral_relids = brel->lateral_relids;
+               if (outer_lateral_relids == NULL)
                        continue;
 
-               /* Compute lateral_referencers using the finished lateral_info_list */
-               lateral_referencers = NULL;
-               foreach(lc, root->lateral_info_list)
+               /* else scan all baserels */
+               for (rti2 = 1; rti2 < root->simple_rel_array_size; rti2++)
                {
-                       LateralJoinInfo *ljinfo = (LateralJoinInfo *) lfirst(lc);
+                       RelOptInfo *brel2 = root->simple_rel_array[rti2];
+
+                       if (brel2 == NULL || brel2->reloptkind != RELOPT_BASEREL)
+                               continue;
 
-                       if (bms_is_member(brel->relid, ljinfo->lateral_lhs))
-                               lateral_referencers = bms_add_members(lateral_referencers,
-                                                                                                         ljinfo->lateral_rhs);
+                       /* if brel2 has lateral ref to brel, propagate brel's refs */
+                       if (bms_is_member(rti, brel2->lateral_relids))
+                               brel2->lateral_relids = bms_add_members(brel2->lateral_relids,
+                                                                                                               outer_lateral_relids);
                }
-               brel->lateral_referencers = lateral_referencers;
+       }
+
+       /*
+        * Now that we've identified all lateral references, mark each baserel
+        * with the set of relids of rels that reference it laterally (possibly
+        * indirectly) --- that is, the inverse mapping of lateral_relids.
+        */
+       for (rti = 1; rti < root->simple_rel_array_size; rti++)
+       {
+               RelOptInfo *brel = root->simple_rel_array[rti];
+               Relids          lateral_relids;
+               int                     rti2;
+
+               if (brel == NULL || brel->reloptkind != RELOPT_BASEREL)
+                       continue;
+
+               /* Nothing to do at rels with no lateral refs */
+               lateral_relids = brel->lateral_relids;
+               if (lateral_relids == NULL)
+                       continue;
 
                /*
-                * If it's an appendrel parent, copy its lateral_relids and
-                * lateral_referencers to each child rel.
+                * We should not have broken the invariant that lateral_relids is
+                * exactly NULL if empty.
                 */
+               Assert(!bms_is_empty(lateral_relids));
+
+               /* Also, no rel should have a lateral dependency on itself */
+               Assert(!bms_is_member(rti, lateral_relids));
+
+               /* Mark this rel's referencees */
+               rti2 = -1;
+               while ((rti2 = bms_next_member(lateral_relids, rti2)) >= 0)
+               {
+                       RelOptInfo *brel2 = root->simple_rel_array[rti2];
+
+                       Assert(brel2 != NULL && brel2->reloptkind == RELOPT_BASEREL);
+                       brel2->lateral_referencers =
+                               bms_add_member(brel2->lateral_referencers, rti);
+               }
+       }
+
+       /*
+        * Lastly, propagate lateral_relids and lateral_referencers from appendrel
+        * parent rels to their child rels.  We intentionally give each child rel
+        * the same minimum parameterization, even though it's quite possible that
+        * some don't reference all the lateral rels.  This is because any append
+        * path for the parent will have to have the same parameterization for
+        * every child anyway, and there's no value in forcing extra
+        * reparameterize_path() calls.  Similarly, a lateral reference to the
+        * parent prevents use of otherwise-movable join rels for each child.
+        */
+       for (rti = 1; rti < root->simple_rel_array_size; rti++)
+       {
+               RelOptInfo *brel = root->simple_rel_array[rti];
+
+               if (brel == NULL || brel->reloptkind != RELOPT_BASEREL)
+                       continue;
+
                if (root->simple_rte_array[rti]->inh)
                {
                        foreach(lc, root->append_rel_list)
index b197f144117f3b0e479919f33c203266c55b89b7..a2be2ede85e1dbf6512137bd59156e9a47f33908 100644 (file)
@@ -373,7 +373,8 @@ build_join_rel(PlannerInfo *root,
        joinrel->cheapest_total_path = NULL;
        joinrel->cheapest_unique_path = NULL;
        joinrel->cheapest_parameterized_paths = NIL;
-       joinrel->lateral_relids = min_join_parameterization(root, joinrel->relids);
+       joinrel->lateral_relids = min_join_parameterization(root, joinrel->relids,
+                                                                                                               outer_rel, inner_rel);
        joinrel->relid = 0;                     /* indicates not a baserel */
        joinrel->rtekind = RTE_JOIN;
        joinrel->min_attr = 0;
@@ -508,29 +509,27 @@ build_join_rel(PlannerInfo *root,
  * because join_is_legal() needs the value to check a prospective join.
  */
 Relids
-min_join_parameterization(PlannerInfo *root, Relids joinrelids)
+min_join_parameterization(PlannerInfo *root,
+                                                 Relids joinrelids,
+                                                 RelOptInfo *outer_rel,
+                                                 RelOptInfo *inner_rel)
 {
        Relids          result;
-       ListCell   *lc;
-
-       /* Easy if there are no lateral references */
-       if (root->lateral_info_list == NIL)
-               return NULL;
 
        /*
-        * Scan lateral_info_list to find all the lateral references occurring in
-        * or below this join.
+        * Basically we just need the union of the inputs' lateral_relids, less
+        * whatever is already in the join.
+        *
+        * It's not immediately obvious that this is a valid way to compute the
+        * result, because it might seem that we're ignoring possible lateral refs
+        * of PlaceHolderVars that are due to be computed at the join but not in
+        * either input.  However, because create_lateral_join_info() already
+        * charged all such PHV refs to each member baserel of the join, they'll
+        * be accounted for already in the inputs' lateral_relids.  Likewise, we
+        * do not need to worry about doing transitive closure here, because that
+        * was already accounted for in the original baserel lateral_relids.
         */
-       result = NULL;
-       foreach(lc, root->lateral_info_list)
-       {
-               LateralJoinInfo *ljinfo = (LateralJoinInfo *) lfirst(lc);
-
-               if (bms_is_subset(ljinfo->lateral_rhs, joinrelids))
-                       result = bms_add_members(result, ljinfo->lateral_lhs);
-       }
-
-       /* Remove any rels that are already included in the join */
+       result = bms_union(outer_rel->lateral_relids, inner_rel->lateral_relids);
        result = bms_del_members(result, joinrelids);
 
        /* Maintain invariant that result is exactly NULL if empty */
index 32321238ae4c4f6b8412de17cd65ad5f95ef3dc8..f7796f8efbeaef07ccff4ef65793d52772da3508 100644 (file)
@@ -358,6 +358,7 @@ typedef struct PlannerInfo
  *             cheapest_parameterized_paths - best paths for their parameterizations;
  *                     always includes cheapest_total_path, even if that's unparameterized
  *             lateral_relids - required outer rels for LATERAL, as a Relids set
+ *                     (includes both direct and indirect lateral references)
  *
  * If the relation is a base relation it will have these fields set:
  *
@@ -373,6 +374,7 @@ typedef struct PlannerInfo
  *             lateral_vars - lateral cross-references of rel, if any (list of
  *                                        Vars and PlaceHolderVars)
  *             lateral_referencers - relids of rels that reference this one laterally
+ *                             (includes both direct and indirect lateral references)
  *             indexlist - list of IndexOptInfo nodes for relation's indexes
  *                                     (always NIL if it's not a table)
  *             pages - number of disk pages in relation (zero if not a table)
index f41847f4093ba19bb11cb97fa71a2265a1810ed0..8fb9eda01b67e2b529124f41f9a50117f05a48a0 100644 (file)
@@ -148,7 +148,10 @@ extern RelOptInfo *build_join_rel(PlannerInfo *root,
                           RelOptInfo *inner_rel,
                           SpecialJoinInfo *sjinfo,
                           List **restrictlist_ptr);
-extern Relids min_join_parameterization(PlannerInfo *root, Relids joinrelids);
+extern Relids min_join_parameterization(PlannerInfo *root,
+                                                 Relids joinrelids,
+                                                 RelOptInfo *outer_rel,
+                                                 RelOptInfo *inner_rel);
 extern RelOptInfo *build_empty_join_rel(PlannerInfo *root);
 extern AppendRelInfo *find_childrel_appendrelinfo(PlannerInfo *root,
                                                        RelOptInfo *rel);
index 87123a57295d271f9b69a7581190721f31e41ace..7757741c10467ad621f5be3d13ae086f4b8144e3 100644 (file)
@@ -98,6 +98,8 @@ extern RelOptInfo *make_join_rel(PlannerInfo *root,
                          RelOptInfo *rel1, RelOptInfo *rel2);
 extern bool have_join_order_restriction(PlannerInfo *root,
                                                        RelOptInfo *rel1, RelOptInfo *rel2);
+extern bool have_dangerous_phv(PlannerInfo *root,
+                                  Relids outer_relids, Relids inner_params);
 
 /*
  * equivclass.c
index dba5d2dbab383c2e4578fea12324982bd279ceb1..64f046ee0bb83235cc7708eee24893411b08a1f4 100644 (file)
@@ -3620,6 +3620,159 @@ where t1.f1 = ss.f1;
  doh! | 4567890123456789 | 123 | 4567890123456789 | doh!
 (1 row)
 
+explain (verbose, costs off)
+select * from
+  text_tbl t1
+  left join int8_tbl i8
+  on i8.q2 = 123,
+  lateral (select i8.q1, t2.f1 from text_tbl t2 limit 1) as ss1,
+  lateral (select ss1.* from text_tbl t3 limit 1) as ss2
+where t1.f1 = ss2.f1;
+                            QUERY PLAN                             
+-------------------------------------------------------------------
+ Nested Loop
+   Output: t1.f1, i8.q1, i8.q2, (i8.q1), t2.f1, ((i8.q1)), (t2.f1)
+   Join Filter: (t1.f1 = (t2.f1))
+   ->  Nested Loop
+         Output: t1.f1, i8.q1, i8.q2, (i8.q1), t2.f1
+         ->  Nested Loop Left Join
+               Output: t1.f1, i8.q1, i8.q2
+               ->  Seq Scan on public.text_tbl t1
+                     Output: t1.f1
+               ->  Materialize
+                     Output: i8.q1, i8.q2
+                     ->  Seq Scan on public.int8_tbl i8
+                           Output: i8.q1, i8.q2
+                           Filter: (i8.q2 = 123)
+         ->  Limit
+               Output: (i8.q1), t2.f1
+               ->  Seq Scan on public.text_tbl t2
+                     Output: i8.q1, t2.f1
+   ->  Limit
+         Output: ((i8.q1)), (t2.f1)
+         ->  Seq Scan on public.text_tbl t3
+               Output: (i8.q1), t2.f1
+(22 rows)
+
+select * from
+  text_tbl t1
+  left join int8_tbl i8
+  on i8.q2 = 123,
+  lateral (select i8.q1, t2.f1 from text_tbl t2 limit 1) as ss1,
+  lateral (select ss1.* from text_tbl t3 limit 1) as ss2
+where t1.f1 = ss2.f1;
+  f1  |        q1        | q2  |        q1        |  f1  |        q1        |  f1  
+------+------------------+-----+------------------+------+------------------+------
+ doh! | 4567890123456789 | 123 | 4567890123456789 | doh! | 4567890123456789 | doh!
+(1 row)
+
+explain (verbose, costs off)
+select 1 from
+  text_tbl as tt1
+  inner join text_tbl as tt2 on (tt1.f1 = 'foo')
+  left join text_tbl as tt3 on (tt3.f1 = 'foo')
+  left join text_tbl as tt4 on (tt3.f1 = tt4.f1),
+  lateral (select tt4.f1 as c0 from text_tbl as tt5 limit 1) as ss1
+where tt1.f1 = ss1.c0;
+                        QUERY PLAN                        
+----------------------------------------------------------
+ Nested Loop
+   Output: 1
+   ->  Nested Loop Left Join
+         Output: tt1.f1, tt4.f1
+         ->  Nested Loop
+               Output: tt1.f1
+               ->  Seq Scan on public.text_tbl tt1
+                     Output: tt1.f1
+                     Filter: (tt1.f1 = 'foo'::text)
+               ->  Seq Scan on public.text_tbl tt2
+                     Output: tt2.f1
+         ->  Materialize
+               Output: tt4.f1
+               ->  Nested Loop Left Join
+                     Output: tt4.f1
+                     Join Filter: (tt3.f1 = tt4.f1)
+                     ->  Seq Scan on public.text_tbl tt3
+                           Output: tt3.f1
+                           Filter: (tt3.f1 = 'foo'::text)
+                     ->  Seq Scan on public.text_tbl tt4
+                           Output: tt4.f1
+                           Filter: (tt4.f1 = 'foo'::text)
+   ->  Subquery Scan on ss1
+         Output: ss1.c0
+         Filter: (ss1.c0 = 'foo'::text)
+         ->  Limit
+               Output: (tt4.f1)
+               ->  Seq Scan on public.text_tbl tt5
+                     Output: tt4.f1
+(29 rows)
+
+select 1 from
+  text_tbl as tt1
+  inner join text_tbl as tt2 on (tt1.f1 = 'foo')
+  left join text_tbl as tt3 on (tt3.f1 = 'foo')
+  left join text_tbl as tt4 on (tt3.f1 = tt4.f1),
+  lateral (select tt4.f1 as c0 from text_tbl as tt5 limit 1) as ss1
+where tt1.f1 = ss1.c0;
+ ?column? 
+----------
+(0 rows)
+
+--
+-- check a case in which a PlaceHolderVar forces join order
+--
+explain (verbose, costs off)
+select ss2.* from
+  int4_tbl i41
+  left join int8_tbl i8
+    join (select i42.f1 as c1, i43.f1 as c2, 42 as c3
+          from int4_tbl i42, int4_tbl i43) ss1
+    on i8.q1 = ss1.c2
+  on i41.f1 = ss1.c1,
+  lateral (select i41.*, i8.*, ss1.* from text_tbl limit 1) ss2
+where ss1.c2 = 0;
+                               QUERY PLAN                               
+------------------------------------------------------------------------
+ Nested Loop
+   Output: (i41.f1), (i8.q1), (i8.q2), (i42.f1), (i43.f1), ((42))
+   ->  Hash Join
+         Output: i41.f1, i42.f1, i8.q1, i8.q2, i43.f1, 42
+         Hash Cond: (i41.f1 = i42.f1)
+         ->  Nested Loop
+               Output: i8.q1, i8.q2, i43.f1, i41.f1
+               ->  Nested Loop
+                     Output: i8.q1, i8.q2, i43.f1
+                     ->  Seq Scan on public.int8_tbl i8
+                           Output: i8.q1, i8.q2
+                           Filter: (i8.q1 = 0)
+                     ->  Seq Scan on public.int4_tbl i43
+                           Output: i43.f1
+                           Filter: (i43.f1 = 0)
+               ->  Seq Scan on public.int4_tbl i41
+                     Output: i41.f1
+         ->  Hash
+               Output: i42.f1
+               ->  Seq Scan on public.int4_tbl i42
+                     Output: i42.f1
+   ->  Limit
+         Output: (i41.f1), (i8.q1), (i8.q2), (i42.f1), (i43.f1), ((42))
+         ->  Seq Scan on public.text_tbl
+               Output: i41.f1, i8.q1, i8.q2, i42.f1, i43.f1, (42)
+(25 rows)
+
+select ss2.* from
+  int4_tbl i41
+  left join int8_tbl i8
+    join (select i42.f1 as c1, i43.f1 as c2, 42 as c3
+          from int4_tbl i42, int4_tbl i43) ss1
+    on i8.q1 = ss1.c2
+  on i41.f1 = ss1.c1,
+  lateral (select i41.*, i8.*, ss1.* from text_tbl limit 1) ss2
+where ss1.c2 = 0;
+ f1 | q1 | q2 | c1 | c2 | c3 
+----+----+----+----+----+----
+(0 rows)
+
 --
 -- test ability to push constants through outer join clauses
 --
@@ -4741,14 +4894,12 @@ select * from
          Output: a.q1, a.q2
    ->  Nested Loop
          Output: b.q1, c.q1, LEAST(a.q1, b.q1, c.q1)
-         Join Filter: (a.q2 = b.q1)
          ->  Seq Scan on public.int8_tbl b
                Output: b.q1, b.q2
-         ->  Materialize
-               Output: c.q1
-               ->  Seq Scan on public.int8_tbl c
-                     Output: c.q1
-(13 rows)
+               Filter: (a.q2 = b.q1)
+         ->  Seq Scan on public.int8_tbl c
+               Output: c.q1, c.q2
+(11 rows)
 
 select * from
   int8_tbl a left join lateral
index fdd4e78cf281da3a009fcc5b1daebceef2aabf5a..0358d00e4045055dae952686bfd19a1a204014f6 100644 (file)
@@ -1134,6 +1134,65 @@ select * from
   lateral (select i8.q1, t2.f1 from text_tbl t2 limit 1) as ss
 where t1.f1 = ss.f1;
 
+explain (verbose, costs off)
+select * from
+  text_tbl t1
+  left join int8_tbl i8
+  on i8.q2 = 123,
+  lateral (select i8.q1, t2.f1 from text_tbl t2 limit 1) as ss1,
+  lateral (select ss1.* from text_tbl t3 limit 1) as ss2
+where t1.f1 = ss2.f1;
+
+select * from
+  text_tbl t1
+  left join int8_tbl i8
+  on i8.q2 = 123,
+  lateral (select i8.q1, t2.f1 from text_tbl t2 limit 1) as ss1,
+  lateral (select ss1.* from text_tbl t3 limit 1) as ss2
+where t1.f1 = ss2.f1;
+
+explain (verbose, costs off)
+select 1 from
+  text_tbl as tt1
+  inner join text_tbl as tt2 on (tt1.f1 = 'foo')
+  left join text_tbl as tt3 on (tt3.f1 = 'foo')
+  left join text_tbl as tt4 on (tt3.f1 = tt4.f1),
+  lateral (select tt4.f1 as c0 from text_tbl as tt5 limit 1) as ss1
+where tt1.f1 = ss1.c0;
+
+select 1 from
+  text_tbl as tt1
+  inner join text_tbl as tt2 on (tt1.f1 = 'foo')
+  left join text_tbl as tt3 on (tt3.f1 = 'foo')
+  left join text_tbl as tt4 on (tt3.f1 = tt4.f1),
+  lateral (select tt4.f1 as c0 from text_tbl as tt5 limit 1) as ss1
+where tt1.f1 = ss1.c0;
+
+--
+-- check a case in which a PlaceHolderVar forces join order
+--
+
+explain (verbose, costs off)
+select ss2.* from
+  int4_tbl i41
+  left join int8_tbl i8
+    join (select i42.f1 as c1, i43.f1 as c2, 42 as c3
+          from int4_tbl i42, int4_tbl i43) ss1
+    on i8.q1 = ss1.c2
+  on i41.f1 = ss1.c1,
+  lateral (select i41.*, i8.*, ss1.* from text_tbl limit 1) ss2
+where ss1.c2 = 0;
+
+select ss2.* from
+  int4_tbl i41
+  left join int8_tbl i8
+    join (select i42.f1 as c1, i43.f1 as c2, 42 as c3
+          from int4_tbl i42, int4_tbl i43) ss1
+    on i8.q1 = ss1.c2
+  on i41.f1 = ss1.c1,
+  lateral (select i41.*, i8.*, ss1.* from text_tbl limit 1) ss2
+where ss1.c2 = 0;
+
 --
 -- test ability to push constants through outer join clauses
 --