From: Tom Lane Date: Tue, 22 May 2007 23:23:58 +0000 (+0000) Subject: Repair planner bug introduced in 8.2 by ability to rearrange outer joins: X-Git-Tag: REL8_3_BETA1~652 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=11086f2f2bfec3a5fd37daf3c2e44b51abaf9b69;p=postgresql Repair planner bug introduced in 8.2 by ability to rearrange outer joins: in cases where a sub-SELECT inserts a WHERE clause between two outer joins, that clause may prevent us from re-ordering the two outer joins. The code was considering only the joins' own ON-conditions in determining reordering safety, which is not good enough. Add a "delay_upper_joins" flag to OuterJoinInfo to flag that we have detected such a clause and higher-level outer joins shouldn't be permitted to commute with this one. (This might seem overly coarse, but given the current rules for OJ reordering, it's sufficient AFAICT.) The failure case is actually pretty narrow: it needs a WHERE clause within the RHS of a left join that checks the RHS of a lower left join, but is not strict for that RHS (else we'd have simplified the lower join to a plain join). Even then no failure will be manifest unless the planner chooses to rearrange the join order. Per bug report from Adam Terrey. --- diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 62853461a8..34ef2808fb 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -15,7 +15,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.375 2007/04/27 22:05:47 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.376 2007/05/22 23:23:55 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1425,6 +1425,7 @@ _copyOuterJoinInfo(OuterJoinInfo *from) COPY_BITMAPSET_FIELD(min_righthand); COPY_SCALAR_FIELD(is_full_join); COPY_SCALAR_FIELD(lhs_strict); + COPY_SCALAR_FIELD(delay_upper_joins); return newnode; } diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index 99fb8f18b6..bd237cd6f4 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -18,7 +18,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.306 2007/04/27 22:05:47 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.307 2007/05/22 23:23:56 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -680,6 +680,7 @@ _equalOuterJoinInfo(OuterJoinInfo *a, OuterJoinInfo *b) COMPARE_BITMAPSET_FIELD(min_righthand); COMPARE_SCALAR_FIELD(is_full_join); COMPARE_SCALAR_FIELD(lhs_strict); + COMPARE_SCALAR_FIELD(delay_upper_joins); return true; } diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 4bd923a38b..4bf6764c18 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.307 2007/05/22 01:40:33 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.308 2007/05/22 23:23:56 tgl Exp $ * * NOTES * Every node type that can appear in stored rules' parsetrees *must* @@ -1453,6 +1453,7 @@ _outOuterJoinInfo(StringInfo str, OuterJoinInfo *node) WRITE_BITMAPSET_FIELD(min_righthand); WRITE_BOOL_FIELD(is_full_join); WRITE_BOOL_FIELD(lhs_strict); + WRITE_BOOL_FIELD(delay_upper_joins); } static void diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c index 2ccc53bb68..fe08dfcc9c 100644 --- a/src/backend/optimizer/plan/initsplan.c +++ b/src/backend/optimizer/plan/initsplan.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/initsplan.c,v 1.131 2007/02/16 20:57:19 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/initsplan.c,v 1.132 2007/05/22 23:23:56 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -48,7 +48,8 @@ static void distribute_qual_to_rels(PlannerInfo *root, Node *clause, Relids qualscope, Relids ojscope, Relids outerjoin_nonnullable); -static bool check_outerjoin_delay(PlannerInfo *root, Relids *relids_p); +static bool check_outerjoin_delay(PlannerInfo *root, Relids *relids_p, + bool is_pushed_down); static void check_mergejoinable(RestrictInfo *restrictinfo); static void check_hashjoinable(RestrictInfo *restrictinfo); @@ -492,6 +493,9 @@ make_outerjoininfo(PlannerInfo *root, errmsg("SELECT FOR UPDATE/SHARE cannot be applied to the nullable side of an outer join"))); } + /* this always starts out false */ + ojinfo->delay_upper_joins = false; + /* If it's a full join, no need to be very smart */ ojinfo->is_full_join = is_full_join; if (is_full_join) @@ -561,10 +565,21 @@ make_outerjoininfo(PlannerInfo *root, * lower join's RHS and the lower OJ's join condition is strict, we * can interchange the ordering of the two OJs, so exclude the lower * RHS from our min_righthand. + * + * Here, we have to consider that "our join condition" includes + * any clauses that syntactically appeared above the lower OJ and + * below ours; those are equivalent to degenerate clauses in our + * OJ and must be treated as such. Such clauses obviously can't + * reference our LHS, and they must be non-strict for the lower OJ's + * RHS (else reduce_outer_joins would have reduced the lower OJ to + * a plain join). Hence the other ways in which we handle clauses + * within our join condition are not affected by them. The net + * effect is therefore sufficiently represented by the + * delay_upper_joins flag saved for us by check_outerjoin_delay. */ if (bms_overlap(ojinfo->min_righthand, otherinfo->min_righthand) && !bms_overlap(clause_relids, otherinfo->min_righthand) && - otherinfo->lhs_strict) + otherinfo->lhs_strict && !otherinfo->delay_upper_joins) { ojinfo->min_righthand = bms_del_members(ojinfo->min_righthand, otherinfo->min_righthand); @@ -749,7 +764,7 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause, * clauses. */ maybe_equivalence = false; - maybe_outer_join = !check_outerjoin_delay(root, &relids); + maybe_outer_join = !check_outerjoin_delay(root, &relids, false); /* * Now force the qual to be evaluated exactly at the level of joining @@ -776,7 +791,7 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause, is_pushed_down = true; /* Check to see if must be delayed by outer join */ - outerjoin_delayed = check_outerjoin_delay(root, &relids); + outerjoin_delayed = check_outerjoin_delay(root, &relids, true); if (outerjoin_delayed) { @@ -918,10 +933,13 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause, /* * check_outerjoin_delay * Detect whether a qual referencing the given relids must be delayed - * in application due to the presence of a lower outer join. + * in application due to the presence of a lower outer join, and/or + * may force extra delay of higher-level outer joins. * - * If so, add relids to *relids_p to reflect the lowest safe level for - * evaluating the qual, and return TRUE. + * If the qual must be delayed, add relids to *relids_p to reflect the lowest + * safe level for evaluating the qual, and return TRUE. Any extra delay for + * higher-level joins is reflected by setting delay_upper_joins to TRUE in + * OuterJoinInfo structs. * * For an is_pushed_down qual, we can evaluate the qual as soon as (1) we have * all the rels it mentions, and (2) we are at or above any outer joins that @@ -946,9 +964,23 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause, * For a non-pushed-down qual, this isn't going to determine where we place the * qual, but we need to determine outerjoin_delayed anyway so we can decide * whether the qual is potentially useful for equivalence deductions. + * + * Lastly, a pushed-down qual that references the nullable side of any current + * oj_info_list member and has to be evaluated above that OJ (because its + * required relids overlap the LHS too) causes that OJ's delay_upper_joins + * flag to be set TRUE. This will prevent any higher-level OJs from + * being interchanged with that OJ, which would result in not having any + * correct place to evaluate the qual. (The case we care about here is a + * sub-select WHERE clause within the RHS of some outer join. The WHERE + * clause must effectively be treated as a degenerate clause of that outer + * join's condition. Rather than trying to match such clauses with joins + * directly, we set delay_upper_joins here, and when the upper outer join + * is processed by make_outerjoininfo, it will refrain from allowing the + * two OJs to commute.) */ static bool -check_outerjoin_delay(PlannerInfo *root, Relids *relids_p) +check_outerjoin_delay(PlannerInfo *root, Relids *relids_p, + bool is_pushed_down) { Relids relids = *relids_p; bool outerjoin_delayed; @@ -979,6 +1011,10 @@ check_outerjoin_delay(PlannerInfo *root, Relids *relids_p) /* we'll need another iteration */ found_some = true; } + /* set delay_upper_joins if needed */ + if (is_pushed_down && !ojinfo->is_full_join && + bms_overlap(relids, ojinfo->min_lefthand)) + ojinfo->delay_upper_joins = true; } } } while (found_some); diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index c7225a1e69..d6b13ca8e3 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.142 2007/05/22 01:40:33 tgl Exp $ + * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.143 2007/05/22 23:23:57 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1042,6 +1042,12 @@ typedef struct InnerIndexscanInfo * It is not valid for either min_lefthand or min_righthand to be empty sets; * if they were, this would break the logic that enforces join order. * + * delay_upper_joins is set TRUE if we detect a pushed-down clause that has + * to be evaluated after this join is formed (because it references the RHS). + * Any outer joins that have such a clause and this join in their RHS cannot + * commute with this join, because that would leave noplace to check the + * pushed-down clause. (We don't track this for FULL JOINs, either.) + * * Note: OuterJoinInfo directly represents only LEFT JOIN and FULL JOIN; * RIGHT JOIN is handled by switching the inputs to make it a LEFT JOIN. * We make an OuterJoinInfo for FULL JOINs even though there is no flexibility @@ -1055,6 +1061,7 @@ typedef struct OuterJoinInfo Relids min_righthand; /* base relids in minimum RHS for join */ bool is_full_join; /* it's a FULL OUTER JOIN */ bool lhs_strict; /* joinclause is strict for some LHS rel */ + bool delay_upper_joins; /* can't commute with upper RHS */ } OuterJoinInfo; /* diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 245e22dcd0..8b6716def8 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -2223,3 +2223,30 @@ select tt1.*, tt2.* from tt2 right join tt1 on tt1.joincol = tt2.joincol; 2 | | | (3 rows) +reset enable_hashjoin; +reset enable_nestloop; +-- +-- regression test for 8.2 bug with improper re-ordering of left joins +-- +create temp table tt3(f1 int, f2 text); +insert into tt3 select x, repeat('xyzzy', 100) from generate_series(1,10000) x; +create index tt3i on tt3(f1); +analyze tt3; +create temp table tt4(f1 int); +insert into tt4 values (0),(1),(9999); +analyze tt4; +SELECT a.f1 +FROM tt4 a +LEFT JOIN ( + SELECT b.f1 + FROM tt3 b LEFT JOIN tt3 c ON (b.f1 = c.f1) + WHERE c.f1 IS NULL +) AS d ON (a.f1 = d.f1) +WHERE d.f1 IS NULL; + f1 +------ + 0 + 1 + 9999 +(3 rows) + diff --git a/src/test/regress/expected/join_1.out b/src/test/regress/expected/join_1.out index 9677408707..8e7e4de014 100644 --- a/src/test/regress/expected/join_1.out +++ b/src/test/regress/expected/join_1.out @@ -2223,3 +2223,30 @@ select tt1.*, tt2.* from tt2 right join tt1 on tt1.joincol = tt2.joincol; 2 | | | (3 rows) +reset enable_hashjoin; +reset enable_nestloop; +-- +-- regression test for 8.2 bug with improper re-ordering of left joins +-- +create temp table tt3(f1 int, f2 text); +insert into tt3 select x, repeat('xyzzy', 100) from generate_series(1,10000) x; +create index tt3i on tt3(f1); +analyze tt3; +create temp table tt4(f1 int); +insert into tt4 values (0),(1),(9999); +analyze tt4; +SELECT a.f1 +FROM tt4 a +LEFT JOIN ( + SELECT b.f1 + FROM tt3 b LEFT JOIN tt3 c ON (b.f1 = c.f1) + WHERE c.f1 IS NULL +) AS d ON (a.f1 = d.f1) +WHERE d.f1 IS NULL; + f1 +------ + 0 + 1 + 9999 +(3 rows) + diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index 71208d2d2d..d3433effd1 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -399,3 +399,28 @@ set enable_nestloop to off; select tt1.*, tt2.* from tt1 left join tt2 on tt1.joincol = tt2.joincol; select tt1.*, tt2.* from tt2 right join tt1 on tt1.joincol = tt2.joincol; + +reset enable_hashjoin; +reset enable_nestloop; + +-- +-- regression test for 8.2 bug with improper re-ordering of left joins +-- + +create temp table tt3(f1 int, f2 text); +insert into tt3 select x, repeat('xyzzy', 100) from generate_series(1,10000) x; +create index tt3i on tt3(f1); +analyze tt3; + +create temp table tt4(f1 int); +insert into tt4 values (0),(1),(9999); +analyze tt4; + +SELECT a.f1 +FROM tt4 a +LEFT JOIN ( + SELECT b.f1 + FROM tt3 b LEFT JOIN tt3 c ON (b.f1 = c.f1) + WHERE c.f1 IS NULL +) AS d ON (a.f1 = d.f1) +WHERE d.f1 IS NULL;