From 75c85bd1997f6fba1cb9cdf0fa48fab220b0e745 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 27 Feb 2009 22:41:38 +0000 Subject: [PATCH] Tighten up join ordering rules to account for recent more-careful analysis of the associativity of antijoins. Also improve optimizer/README discussion of outer join ordering rules. --- src/backend/optimizer/README | 24 +++++++++++++++++------ src/backend/optimizer/path/joinrels.c | 6 +++--- src/backend/optimizer/plan/initsplan.c | 27 ++++++++++++++++---------- 3 files changed, 38 insertions(+), 19 deletions(-) diff --git a/src/backend/optimizer/README b/src/backend/optimizer/README index 18763841ae..a3ce21b690 100644 --- a/src/backend/optimizer/README +++ b/src/backend/optimizer/README @@ -1,4 +1,4 @@ -$PostgreSQL: pgsql/src/backend/optimizer/README,v 1.48 2008/08/14 18:47:59 tgl Exp $ +$PostgreSQL: pgsql/src/backend/optimizer/README,v 1.49 2009/02/27 22:41:37 tgl Exp $ Optimizer ========= @@ -205,8 +205,7 @@ strict, the first form might produce some rows with nonnull C columns where the second form would make those entries null. RIGHT JOIN is equivalent to LEFT JOIN after switching the two input -tables, so the same identities work for right joins. Only FULL JOIN -cannot be re-ordered at all. +tables, so the same identities work for right joins. An example of a case that does *not* work is moving an innerjoin into or out of the nullable side of an outer join: @@ -214,11 +213,24 @@ out of the nullable side of an outer join: A leftjoin (B join C on (Pbc)) on (Pab) != (A leftjoin B on (Pab)) join C on (Pbc) +SEMI joins work a little bit differently. A semijoin can be reassociated +into or out of the lefthand side of another semijoin, but not into or out +of the righthand side. Likewise, an inner join, left join, or antijoin +can be reassociated into or out of the lefthand side of a semijoin, but +not into or out of the righthand side. + +ANTI joins work approximately like LEFT joins, except that identity 3 +fails if the join to C is an antijoin (even if Pbc is strict, and in +both the cases where the other join is a leftjoin and where it is an +antijoin). So we can't reorder antijoins into or out of the RHS of a +leftjoin or antijoin, even if the relevant clause is strict. + +The current code does not attempt to re-order FULL JOINs at all. FULL JOIN ordering is enforced by not collapsing FULL JOIN nodes when -translating the jointree to "joinlist" representation. LEFT and RIGHT +translating the jointree to "joinlist" representation. Other types of JOIN nodes are normally collapsed so that they participate fully in the join order search. To avoid generating illegal join orders, the planner -creates a SpecialJoinInfo node for each outer join, and join_is_legal +creates a SpecialJoinInfo node for each non-inner join, and join_is_legal checks this list to decide if a proposed join is legal. What we store in SpecialJoinInfo nodes are the minimum sets of Relids @@ -226,7 +238,7 @@ required on each side of the join to form the outer join. Note that these are minimums; there's no explicit maximum, since joining other rels to the OJ's syntactic rels may be legal. Per identities 1 and 2, non-FULL joins can be freely associated into the lefthand side of an -OJ, but in general they can't be associated into the righthand side. +OJ, but in some cases they can't be associated into the righthand side. So the restriction enforced by join_is_legal is that a proposed join can't join a rel within or partly within an RHS boundary to one outside the boundary, unless the join validly implements some outer join. diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c index d3287b687c..7c38607db6 100644 --- a/src/backend/optimizer/path/joinrels.c +++ b/src/backend/optimizer/path/joinrels.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/joinrels.c,v 1.98 2009/02/19 20:32:45 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/joinrels.c,v 1.99 2009/02/27 22:41:37 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -185,7 +185,7 @@ join_search_one_level(PlannerInfo *root, int level, List **joinrels) * Last-ditch effort: if we failed to find any usable joins so far, force * a set of cartesian-product joins to be generated. This handles the * special case where all the available rels have join clauses but we - * cannot use any of the joins yet. An example is + * cannot use any of those clauses yet. An example is * * SELECT * FROM a,b,c WHERE (a.f1 + b.f2 + c.f3) = 0; * @@ -737,7 +737,7 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2) * * In practice this is always used with have_relevant_joinclause(), and so * could be merged with that function, but it seems clearer to separate the - * two concerns. We need these tests because there are degenerate cases where + * two concerns. We need this test because there are degenerate cases where * a clauseless join must be performed to satisfy join-order restrictions. * * Note: this is only a problem if one side of a degenerate outer join diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c index 25117716c3..cf05f033b9 100644 --- a/src/backend/optimizer/plan/initsplan.c +++ b/src/backend/optimizer/plan/initsplan.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/initsplan.c,v 1.148 2009/02/25 03:30:37 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/initsplan.c,v 1.149 2009/02/27 22:41:38 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -628,26 +628,32 @@ make_outerjoininfo(PlannerInfo *root, * min_lefthand. (We must use its full syntactic relset, not just its * min_lefthand + min_righthand. This is because there might be other * OJs below this one that this one can commute with, but we cannot - * commute with them if we don't with this one.) + * commute with them if we don't with this one.) Also, if the + * current join is an antijoin, we must preserve ordering regardless + * of strictness. * * Note: I believe we have to insist on being strict for at least one * rel in the lower OJ's min_righthand, not its whole syn_righthand. */ - if (bms_overlap(left_rels, otherinfo->syn_righthand) && - bms_overlap(clause_relids, otherinfo->syn_righthand) && - !bms_overlap(strict_relids, otherinfo->min_righthand)) + if (bms_overlap(left_rels, otherinfo->syn_righthand)) { - min_lefthand = bms_add_members(min_lefthand, - otherinfo->syn_lefthand); - min_lefthand = bms_add_members(min_lefthand, - otherinfo->syn_righthand); + if (bms_overlap(clause_relids, otherinfo->syn_righthand) && + (jointype == JOIN_ANTI || + !bms_overlap(strict_relids, otherinfo->min_righthand))) + { + min_lefthand = bms_add_members(min_lefthand, + otherinfo->syn_lefthand); + min_lefthand = bms_add_members(min_lefthand, + otherinfo->syn_righthand); + } } /* * For a lower OJ in our RHS, if our join condition does not use the * lower join's RHS and the lower OJ's join condition is strict, we * can interchange the ordering of the two OJs; otherwise we must add - * lower OJ's full syntactic relset to min_righthand. + * lower OJ's full syntactic relset to min_righthand. Here, we must + * preserve ordering anyway if the lower OJ is an antijoin. * * Here, we have to consider that "our join condition" includes any * clauses that syntactically appeared above the lower OJ and below @@ -663,6 +669,7 @@ make_outerjoininfo(PlannerInfo *root, if (bms_overlap(right_rels, otherinfo->syn_righthand)) { if (bms_overlap(clause_relids, otherinfo->syn_righthand) || + otherinfo->jointype == JOIN_ANTI || !otherinfo->lhs_strict || otherinfo->delay_upper_joins) { min_righthand = bms_add_members(min_righthand, -- 2.40.0