From 6a6522529fb0c1b42050f322d37d91df14d7994c Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 9 Jan 2008 20:42:29 +0000 Subject: [PATCH] Fix some planner issues found while investigating Kevin Grittner's report of poorer planning in 8.3 than 8.2: 1. After pushing a constant across an outer join --- ie, given "a LEFT JOIN b ON (a.x = b.y) WHERE a.x = 42", we can deduce that b.y is sort of equal to 42, in the sense that we needn't fetch any b rows where it isn't 42 --- loop to see if any additional deductions can be made. Previous releases did that by recursing, but I had mistakenly thought that this was no longer necessary given the EquivalenceClass machinery. 2. Allow pushing constants across outer join conditions even if the condition is outerjoin_delayed due to a lower outer join. This is safe as long as the condition is strict and we re-test it at the upper join. 3. Keep the outer-join clause even if we successfully push a constant across it. This is *necessary* in the outerjoin_delayed case, but even in the simple case, it seems better to do this to ensure that the join search order heuristics will consider the join as reasonable to make. Mark such a clause as having selectivity 1.0, though, since it's not going to eliminate very many rows after application of the constant condition. 4. Tweak have_relevant_eclass_joinclause to report that two relations are joinable when they have vars that are equated to the same constant. We won't actually generate any joinclause from such an EquivalenceClass, but again it seems that in such a case it's a good idea to consider the join as worth costing out. 5. Fix a bug in select_mergejoin_clauses that was exposed by these changes: we have to reject candidate mergejoin clauses if either side was equated to a constant, because we can't construct a canonical pathkey list for such a clause. This is an implementation restriction that might be worth fixing someday, but it doesn't seem critical to get it done for 8.3. --- src/backend/optimizer/path/equivclass.c | 221 ++++++++++++++++++------ src/backend/optimizer/path/joinpath.c | 40 ++++- src/backend/optimizer/path/orindxpath.c | 12 +- src/backend/optimizer/path/pathkeys.c | 13 +- src/backend/optimizer/plan/initsplan.c | 25 +-- src/include/nodes/relation.h | 33 ++-- src/test/regress/expected/join.out | 31 ++++ src/test/regress/expected/join_1.out | 31 ++++ src/test/regress/sql/join.sql | 23 +++ 9 files changed, 336 insertions(+), 93 deletions(-) diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index c438d974c2..1bc6d15a3e 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -10,7 +10,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/equivclass.c,v 1.8 2008/01/01 19:45:50 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/equivclass.c,v 1.9 2008/01/09 20:42:27 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -52,10 +52,10 @@ static RestrictInfo *create_join_clause(PlannerInfo *root, EquivalenceMember *leftem, EquivalenceMember *rightem, EquivalenceClass *parent_ec); -static void reconsider_outer_join_clause(PlannerInfo *root, +static bool reconsider_outer_join_clause(PlannerInfo *root, RestrictInfo *rinfo, bool outer_on_left); -static void reconsider_full_join_clause(PlannerInfo *root, +static bool reconsider_full_join_clause(PlannerInfo *root, RestrictInfo *rinfo); @@ -1094,8 +1094,9 @@ create_join_clause(PlannerInfo *root, /* * reconsider_outer_join_clauses * Re-examine any outer-join clauses that were set aside by - * distribute_qual_to_rels(), and either create EquivalenceClasses - * to replace them or push them out into the regular join-clause lists. + * distribute_qual_to_rels(), and see if we can derive any + * EquivalenceClasses from them. Then, if they were not made + * redundant, push them out into the regular join-clause lists. * * When we have mergejoinable clauses A = B that are outer-join clauses, * we can't blindly combine them with other clauses A = C to deduce B = C, @@ -1112,8 +1113,8 @@ create_join_clause(PlannerInfo *root, * equivalence clause.) * * Note that the above rule does not work for full outer joins; nor is it - * very interesting to consider cases where the equivalence clause involves - * relations entirely outside the outer join, since such clauses couldn't + * very interesting to consider cases where the generated equivalence clause + * would involve relations outside the outer join, since such clauses couldn't * be pushed into the inner side's scan anyway. So the restriction to * outervar = pseudoconstant is not really giving up anything. * @@ -1141,57 +1142,161 @@ create_join_clause(PlannerInfo *root, * that could result in propagating constant restrictions from * INNERVAR to OUTERVAR, which would be very wrong. * + * It's possible that the INNERVAR is also an OUTERVAR for some other + * outer-join clause, in which case the process can be repeated. So we repeat + * looping over the lists of clauses until no further deductions can be made. + * Whenever we do make a deduction, we remove the generating clause from the + * lists, since we don't want to make the same deduction twice. + * * If we don't find any match for a set-aside outer join clause, we must * throw it back into the regular joinclause processing by passing it to - * distribute_restrictinfo_to_rels(). + * distribute_restrictinfo_to_rels(). If we do generate a derived clause, + * however, the outer-join clause is redundant. We still throw it back, + * because otherwise the join will be seen as a clauseless join and avoided + * during join order searching; but we mark it as redundant to keep from + * messing up the joinrel's size estimate. (This behavior means that the + * API for this routine is uselessly complex: we could have just put all + * the clauses into the regular processing initially. We keep it because + * someday we might want to do something else, such as inserting "dummy" + * joinclauses instead of real ones.) + * + * Outer join clauses that are marked outerjoin_delayed are special: this + * condition means that one or both VARs might go to null due to a lower + * outer join. We can still push a constant through the clause, but only + * if its operator is strict; and we *have to* throw the clause back into + * regular joinclause processing. By keeping the strict join clause, + * we ensure that any null-extended rows that are mistakenly generated due + * to suppressing rows not matching the constant will be rejected at the + * upper outer join. (This doesn't work for full-join clauses.) */ void reconsider_outer_join_clauses(PlannerInfo *root) { - ListCell *lc; + bool found; + ListCell *cell; + ListCell *prev; + ListCell *next; + + /* Outer loop repeats until we find no more deductions */ + do + { + found = false; + + /* Process the LEFT JOIN clauses */ + prev = NULL; + for (cell = list_head(root->left_join_clauses); cell; cell = next) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell); + + next = lnext(cell); + if (reconsider_outer_join_clause(root, rinfo, true)) + { + found = true; + /* remove it from the list */ + root->left_join_clauses = + list_delete_cell(root->left_join_clauses, cell, prev); + /* we throw it back anyway (see notes above) */ + /* but the thrown-back clause has no extra selectivity */ + rinfo->this_selec = 1.0; + distribute_restrictinfo_to_rels(root, rinfo); + } + else + prev = cell; + } + + /* Process the RIGHT JOIN clauses */ + prev = NULL; + for (cell = list_head(root->right_join_clauses); cell; cell = next) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell); + + next = lnext(cell); + if (reconsider_outer_join_clause(root, rinfo, false)) + { + found = true; + /* remove it from the list */ + root->right_join_clauses = + list_delete_cell(root->right_join_clauses, cell, prev); + /* we throw it back anyway (see notes above) */ + /* but the thrown-back clause has no extra selectivity */ + rinfo->this_selec = 1.0; + distribute_restrictinfo_to_rels(root, rinfo); + } + else + prev = cell; + } + + /* Process the FULL JOIN clauses */ + prev = NULL; + for (cell = list_head(root->full_join_clauses); cell; cell = next) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell); + + next = lnext(cell); + if (reconsider_full_join_clause(root, rinfo)) + { + found = true; + /* remove it from the list */ + root->full_join_clauses = + list_delete_cell(root->full_join_clauses, cell, prev); + /* we throw it back anyway (see notes above) */ + /* but the thrown-back clause has no extra selectivity */ + rinfo->this_selec = 1.0; + distribute_restrictinfo_to_rels(root, rinfo); + } + else + prev = cell; + } + } while (found); - /* Process the LEFT JOIN clauses */ - foreach(lc, root->left_join_clauses) + /* Now, any remaining clauses have to be thrown back */ + foreach(cell, root->left_join_clauses) { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell); - reconsider_outer_join_clause(root, rinfo, true); + distribute_restrictinfo_to_rels(root, rinfo); } - /* And the RIGHT JOIN clauses */ - foreach(lc, root->right_join_clauses) + foreach(cell, root->right_join_clauses) { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell); - reconsider_outer_join_clause(root, rinfo, false); + distribute_restrictinfo_to_rels(root, rinfo); } - /* And the FULL JOIN clauses */ - foreach(lc, root->full_join_clauses) + foreach(cell, root->full_join_clauses) { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell); - reconsider_full_join_clause(root, rinfo); + distribute_restrictinfo_to_rels(root, rinfo); } } /* * reconsider_outer_join_clauses for a single LEFT/RIGHT JOIN clause + * + * Returns TRUE if we were able to propagate a constant through the clause. */ -static void +static bool reconsider_outer_join_clause(PlannerInfo *root, RestrictInfo *rinfo, bool outer_on_left) { Expr *outervar, *innervar; - Oid left_type, + Oid opno, + left_type, right_type, inner_datatype; Relids inner_relids; ListCell *lc1; - /* Extract needed info from the clause */ Assert(is_opclause(rinfo->clause)); - op_input_types(((OpExpr *) rinfo->clause)->opno, - &left_type, &right_type); + opno = ((OpExpr *) rinfo->clause)->opno; + + /* If clause is outerjoin_delayed, operator must be strict */ + if (rinfo->outerjoin_delayed && !op_strict(opno)) + return false; + + /* Extract needed info from the clause */ + op_input_types(opno, &left_type, &right_type); if (outer_on_left) { outervar = (Expr *) get_leftop(rinfo->clause); @@ -1266,41 +1371,46 @@ reconsider_outer_join_clause(PlannerInfo *root, RestrictInfo *rinfo, } /* - * If we were able to equate INNERVAR to any constant, we're done, and - * we can throw away the outer-join clause as redundant. Otherwise, - * fall out of the search loop, since we know the OUTERVAR appears in - * at most one EC. + * If we were able to equate INNERVAR to any constant, report success. + * Otherwise, fall out of the search loop, since we know the OUTERVAR + * appears in at most one EC. */ if (match) - return; + return true; else break; } - /* We did not find a match, so throw it back into regular processing */ - distribute_restrictinfo_to_rels(root, rinfo); + return false; /* failed to make any deduction */ } /* * reconsider_outer_join_clauses for a single FULL JOIN clause + * + * Returns TRUE if we were able to propagate a constant through the clause. */ -static void +static bool reconsider_full_join_clause(PlannerInfo *root, RestrictInfo *rinfo) { Expr *leftvar; Expr *rightvar; - Oid left_type, + Oid opno, + left_type, right_type; Relids left_relids, right_relids; ListCell *lc1; + /* Can't use an outerjoin_delayed clause here */ + if (rinfo->outerjoin_delayed) + return false; + /* Extract needed info from the clause */ Assert(is_opclause(rinfo->clause)); + opno = ((OpExpr *) rinfo->clause)->opno; + op_input_types(opno, &left_type, &right_type); leftvar = (Expr *) get_leftop(rinfo->clause); rightvar = (Expr *) get_rightop(rinfo->clause); - op_input_types(((OpExpr *) rinfo->clause)->opno, - &left_type, &right_type); left_relids = rinfo->left_relids; right_relids = rinfo->right_relids; @@ -1412,7 +1522,7 @@ reconsider_full_join_clause(PlannerInfo *root, RestrictInfo *rinfo) if (matchleft && matchright) { cur_ec->ec_members = list_delete_ptr(cur_ec->ec_members, coal_em); - return; + return true; } /* @@ -1423,8 +1533,7 @@ reconsider_full_join_clause(PlannerInfo *root, RestrictInfo *rinfo) break; } - /* We did not find a match, so throw it back into regular processing */ - distribute_restrictinfo_to_rels(root, rinfo); + return false; /* failed to make any deduction */ } @@ -1651,16 +1760,22 @@ have_relevant_eclass_joinclause(PlannerInfo *root, ListCell *lc2; /* - * Won't generate joinclauses if const or single-member (the latter - * test covers the volatile case too) + * Won't generate joinclauses if single-member (this test covers the + * volatile case too) */ - if (ec->ec_has_const || list_length(ec->ec_members) <= 1) + if (list_length(ec->ec_members) <= 1) continue; /* * Note we don't test ec_broken; if we did, we'd need a separate code * path to look through ec_sources. Checking the members anyway is OK * as a possibly-overoptimistic heuristic. + * + * We don't test ec_has_const either, even though a const eclass + * won't generate real join clauses. This is because if we had + * "WHERE a.x = b.y and a.x = 42", it is worth considering a join + * between a and b, since the join result is likely to be small even + * though it'll end up being an unqualified nestloop. */ /* Needn't scan if it couldn't contain members from each rel */ @@ -1674,8 +1789,8 @@ have_relevant_eclass_joinclause(PlannerInfo *root, { EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2); - if (cur_em->em_is_child) - continue; /* ignore children here */ + if (cur_em->em_is_const || cur_em->em_is_child) + continue; /* ignore consts and children here */ if (bms_is_subset(cur_em->em_relids, rel1->relids)) { has_rel1 = true; @@ -1719,16 +1834,22 @@ has_relevant_eclass_joinclause(PlannerInfo *root, RelOptInfo *rel1) ListCell *lc2; /* - * Won't generate joinclauses if const or single-member (the latter - * test covers the volatile case too) + * Won't generate joinclauses if single-member (this test covers the + * volatile case too) */ - if (ec->ec_has_const || list_length(ec->ec_members) <= 1) + if (list_length(ec->ec_members) <= 1) continue; /* * Note we don't test ec_broken; if we did, we'd need a separate code * path to look through ec_sources. Checking the members anyway is OK * as a possibly-overoptimistic heuristic. + * + * We don't test ec_has_const either, even though a const eclass + * won't generate real join clauses. This is because if we had + * "WHERE a.x = b.y and a.x = 42", it is worth considering a join + * between a and b, since the join result is likely to be small even + * though it'll end up being an unqualified nestloop. */ /* Needn't scan if it couldn't contain members from each rel */ @@ -1742,8 +1863,8 @@ has_relevant_eclass_joinclause(PlannerInfo *root, RelOptInfo *rel1) { EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2); - if (cur_em->em_is_child) - continue; /* ignore children here */ + if (cur_em->em_is_const || cur_em->em_is_child) + continue; /* ignore consts and children here */ if (bms_is_subset(cur_em->em_relids, rel1->relids)) { has_rel1 = true; diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index 5d595ce215..d5e2506d7a 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.114 2008/01/01 19:45:50 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.115 2008/01/09 20:42:27 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -34,7 +34,8 @@ static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel, List *restrictlist, JoinType jointype); static Path *best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel, RelOptInfo *outer_rel, JoinType jointype); -static List *select_mergejoin_clauses(RelOptInfo *joinrel, +static List *select_mergejoin_clauses(PlannerInfo *root, + RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, List *restrictlist, @@ -69,7 +70,8 @@ add_paths_to_joinrel(PlannerInfo *root, * disable if it's a full join. */ if (enable_mergejoin || jointype == JOIN_FULL) - mergeclause_list = select_mergejoin_clauses(joinrel, + mergeclause_list = select_mergejoin_clauses(root, + joinrel, outerrel, innerrel, restrictlist, @@ -883,7 +885,8 @@ best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel, * currently of interest. */ static List * -select_mergejoin_clauses(RelOptInfo *joinrel, +select_mergejoin_clauses(PlannerInfo *root, + RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, List *restrictlist, @@ -937,6 +940,35 @@ select_mergejoin_clauses(RelOptInfo *joinrel, continue; /* no good for these input relations */ } + /* + * Insist that each side have a non-redundant eclass. This + * restriction is needed because various bits of the planner expect + * that each clause in a merge be associatable with some pathkey in a + * canonical pathkey list, but redundant eclasses can't appear in + * canonical sort orderings. (XXX it might be worth relaxing this, + * but not enough time to address it for 8.3.) + * + * Note: it would be bad if this condition failed for an otherwise + * mergejoinable FULL JOIN clause, since that would result in + * undesirable planner failure. I believe that is not possible + * however; a variable involved in a full join could only appear + * in below_outer_join eclasses, which aren't considered redundant. + * + * This case *can* happen for left/right join clauses: the + * outer-side variable could be equated to a constant. Because we + * will propagate that constant across the join clause, the loss of + * ability to do a mergejoin is not really all that big a deal, and + * so it's not clear that improving this is important. + */ + cache_mergeclause_eclasses(root, restrictinfo); + + if (EC_MUST_BE_REDUNDANT(restrictinfo->left_ec) || + EC_MUST_BE_REDUNDANT(restrictinfo->right_ec)) + { + have_nonmergeable_joinclause = true; + continue; /* can't handle redundant eclasses */ + } + result_list = lappend(result_list, restrictinfo); } diff --git a/src/backend/optimizer/path/orindxpath.c b/src/backend/optimizer/path/orindxpath.c index ba332b5ead..1556bf74df 100644 --- a/src/backend/optimizer/path/orindxpath.c +++ b/src/backend/optimizer/path/orindxpath.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/orindxpath.c,v 1.83 2008/01/01 19:45:50 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/orindxpath.c,v 1.84 2008/01/09 20:42:27 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -90,16 +90,18 @@ create_or_index_quals(PlannerInfo *root, RelOptInfo *rel) /* * Find potentially interesting OR joinclauses. Note we must ignore any - * joinclauses that are marked outerjoin_delayed, because they cannot be - * pushed down to the per-relation level due to outer-join rules. (XXX in - * some cases it might be possible to allow this, but it would require - * substantially more bookkeeping about where the clause came from.) + * joinclauses that are marked outerjoin_delayed or !is_pushed_down, + * because they cannot be pushed down to the per-relation level due to + * outer-join rules. (XXX in some cases it might be possible to allow + * this, but it would require substantially more bookkeeping about where + * the clause came from.) */ foreach(i, rel->joininfo) { RestrictInfo *rinfo = (RestrictInfo *) lfirst(i); if (restriction_is_or_clause(rinfo) && + rinfo->is_pushed_down && !rinfo->outerjoin_delayed) { /* diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index 1ef44ddfa4..f0141b2366 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -11,7 +11,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/pathkeys.c,v 1.92 2008/01/01 19:45:50 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/pathkeys.c,v 1.93 2008/01/09 20:42:28 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -30,13 +30,6 @@ #include "utils/lsyscache.h" -/* - * If an EC contains a const and isn't below-outer-join, any PathKey depending - * on it must be redundant, since there's only one possible value of the key. - */ -#define MUST_BE_REDUNDANT(eclass) \ - ((eclass)->ec_has_const && !(eclass)->ec_below_outer_join) - static PathKey *makePathKey(EquivalenceClass *eclass, Oid opfamily, int strategy, bool nulls_first); static PathKey *make_canonical_pathkey(PlannerInfo *root, @@ -164,7 +157,7 @@ pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys) Assert(!new_ec->ec_merged); /* Check for EC containing a constant --- unconditionally redundant */ - if (MUST_BE_REDUNDANT(new_ec)) + if (EC_MUST_BE_REDUNDANT(new_ec)) return true; /* If same EC already used in list, then redundant */ @@ -211,7 +204,7 @@ canonicalize_pathkeys(PlannerInfo *root, List *pathkeys) * pathkey_is_redundant would notice that, but we needn't even bother * constructing the node... */ - if (MUST_BE_REDUNDANT(eclass)) + if (EC_MUST_BE_REDUNDANT(eclass)) continue; /* OK, build a canonicalized PathKey struct */ diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c index 42a202b950..0255b646a3 100644 --- a/src/backend/optimizer/plan/initsplan.c +++ b/src/backend/optimizer/plan/initsplan.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/initsplan.c,v 1.137 2008/01/01 19:45:50 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/initsplan.c,v 1.138 2008/01/09 20:42:28 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -778,13 +778,13 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause, if (ojscope) { /* clause is attached to outer join, eval it there */ - relids = ojscope; + relids = bms_copy(ojscope); /* mustn't use as gating qual, so don't mark pseudoconstant */ } else { /* eval at original syntactic level */ - relids = qualscope; + relids = bms_copy(qualscope); if (!contain_volatile_functions(clause)) { /* mark as gating qual */ @@ -849,12 +849,15 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause, * We can't use such a clause to deduce equivalence (the left and * right sides might be unequal above the join because one of them has * gone to NULL) ... but we might be able to use it for more limited - * deductions, if there are no lower outer joins that delay its - * application. If so, consider adding it to the lists of set-aside - * clauses. + * deductions, if it is mergejoinable. So consider adding it to the + * lists of set-aside outer-join clauses. */ + is_pushed_down = false; maybe_equivalence = false; - maybe_outer_join = !check_outerjoin_delay(root, &relids, false); + maybe_outer_join = true; + + /* Check to see if must be delayed by lower outer join */ + outerjoin_delayed = check_outerjoin_delay(root, &relids, false); /* * Now force the qual to be evaluated exactly at the level of joining @@ -868,8 +871,6 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause, */ Assert(ojscope); relids = ojscope; - is_pushed_down = false; - outerjoin_delayed = true; Assert(!pseudoconstant); } else @@ -880,7 +881,7 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause, */ is_pushed_down = true; - /* Check to see if must be delayed by outer join */ + /* Check to see if must be delayed by lower outer join */ outerjoin_delayed = check_outerjoin_delay(root, &relids, true); if (outerjoin_delayed) @@ -1052,8 +1053,8 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause, * mentioning only C cannot be applied below the join to A. * * For a non-pushed-down qual, this isn't going to determine where we place the - * qual, but we need to determine outerjoin_delayed anyway so we can decide - * whether the qual is potentially useful for equivalence deductions. + * qual, but we need to determine outerjoin_delayed anyway for possible use + * in reconsider_outer_join_clauses(). * * Lastly, a pushed-down qual that references the nullable side of any current * oj_info_list member and has to be evaluated above that OJ (because its diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 204bcb7f24..f01f1e9e51 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.152 2008/01/01 19:45:58 momjian Exp $ + * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.153 2008/01/09 20:42:28 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -479,6 +479,13 @@ typedef struct EquivalenceClass struct EquivalenceClass *ec_merged; /* set if merged into another EC */ } EquivalenceClass; +/* + * If an EC contains a const and isn't below-outer-join, any PathKey depending + * on it must be redundant, since there's only one possible value of the key. + */ +#define EC_MUST_BE_REDUNDANT(eclass) \ + ((eclass)->ec_has_const && !(eclass)->ec_below_outer_join) + /* * EquivalenceMember - one member expression of an EquivalenceClass * @@ -856,17 +863,17 @@ typedef struct HashPath * * When dealing with outer joins we have to be very careful about pushing qual * clauses up and down the tree. An outer join's own JOIN/ON conditions must - * be evaluated exactly at that join node, and any quals appearing in WHERE or - * in a JOIN above the outer join cannot be pushed down below the outer join. - * Otherwise the outer join will produce wrong results because it will see the - * wrong sets of input rows. All quals are stored as RestrictInfo nodes - * during planning, but there's a flag to indicate whether a qual has been + * be evaluated exactly at that join node, unless they are "degenerate" + * conditions that reference only Vars from the nullable side of the join. + * Quals appearing in WHERE or in a JOIN above the outer join cannot be pushed + * down below the outer join, if they reference any nullable Vars. + * RestrictInfo nodes contain a flag to indicate whether a qual has been * pushed down to a lower level than its original syntactic placement in the * join tree would suggest. If an outer join prevents us from pushing a qual * down to its "natural" semantic level (the level associated with just the * base rels used in the qual) then we mark the qual with a "required_relids" * value including more than just the base rels it actually uses. By - * pretending that the qual references all the rels appearing in the outer + * pretending that the qual references all the rels required to form the outer * join, we prevent it from being evaluated below the outer join's joinrel. * When we do form the outer join's joinrel, we still need to distinguish * those quals that are actually in that join's JOIN/ON condition from those @@ -878,11 +885,13 @@ typedef struct HashPath * It's possible for an OUTER JOIN clause to be marked is_pushed_down too, * if we decide that it can be pushed down into the nullable side of the join. * In that case it acts as a plain filter qual for wherever it gets evaluated. + * (In short, is_pushed_down is only false for non-degenerate outer join + * conditions. Possibly we should rename it to reflect that meaning?) * - * When application of a qual must be delayed by outer join, we also mark it - * with outerjoin_delayed = true. This isn't redundant with required_relids - * because that might equal clause_relids whether or not it's an outer-join - * clause. + * RestrictInfo nodes also contain an outerjoin_delayed flag, which is true + * if the clause's applicability must be delayed due to any outer joins + * appearing below its own syntactic level (ie, it references any Vars from + * the nullable side of any lower outer join). * * In general, the referenced clause might be arbitrarily complex. The * kinds of clauses we can handle as indexscan quals, mergejoin clauses, @@ -932,7 +941,7 @@ typedef struct RestrictInfo bool is_pushed_down; /* TRUE if clause was pushed down in level */ - bool outerjoin_delayed; /* TRUE if delayed by outer join */ + bool outerjoin_delayed; /* TRUE if delayed by lower outer join */ bool can_join; /* see comment above */ diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 3dd4f5ed7d..bd33cb5c12 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -2290,3 +2290,34 @@ from yy 301 | | | | 1 (3 rows) +-- +-- regression test for improper pushing of constants across outer-join clauses +-- (as seen in early 8.2.x releases) +-- +create temp table zt1 (f1 int primary key); +NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "zt1_pkey" for table "zt1" +create temp table zt2 (f2 int primary key); +NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "zt2_pkey" for table "zt2" +create temp table zt3 (f3 int primary key); +NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "zt3_pkey" for table "zt3" +insert into zt1 values(53); +insert into zt2 values(53); +select * from + zt2 left join zt3 on (f2 = f3) + left join zt1 on (f3 = f1) +where f2 = 53; + f2 | f3 | f1 +----+----+---- + 53 | | +(1 row) + +create temp view zv1 as select *,'dummy'::text AS junk from zt1; +select * from + zt2 left join zt3 on (f2 = f3) + left join zv1 on (f3 = f1) +where f2 = 53; + f2 | f3 | f1 | junk +----+----+----+------ + 53 | | | +(1 row) + diff --git a/src/test/regress/expected/join_1.out b/src/test/regress/expected/join_1.out index 9e10c73acb..4690b71140 100644 --- a/src/test/regress/expected/join_1.out +++ b/src/test/regress/expected/join_1.out @@ -2290,3 +2290,34 @@ from yy 301 | | | | 1 (3 rows) +-- +-- regression test for improper pushing of constants across outer-join clauses +-- (as seen in early 8.2.x releases) +-- +create temp table zt1 (f1 int primary key); +NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "zt1_pkey" for table "zt1" +create temp table zt2 (f2 int primary key); +NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "zt2_pkey" for table "zt2" +create temp table zt3 (f3 int primary key); +NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "zt3_pkey" for table "zt3" +insert into zt1 values(53); +insert into zt2 values(53); +select * from + zt2 left join zt3 on (f2 = f3) + left join zt1 on (f3 = f1) +where f2 = 53; + f2 | f3 | f1 +----+----+---- + 53 | | +(1 row) + +create temp view zv1 as select *,'dummy'::text AS junk from zt1; +select * from + zt2 left join zt3 on (f2 = f3) + left join zv1 on (f3 = f1) +where f2 = 53; + f2 | f3 | f1 | junk +----+----+----+------ + 53 | | | +(1 row) + diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index 4efb3c7283..96e15b526c 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -462,3 +462,26 @@ from yy left join (SELECT * FROM yy where pkyy = 101) as yya ON yy.pkyy = yya.pkyy left join xx xxa on yya.pkxx = xxa.pkxx left join xx xxb on coalesce (xxa.pkxx, 1) = xxb.pkxx; + +-- +-- regression test for improper pushing of constants across outer-join clauses +-- (as seen in early 8.2.x releases) +-- + +create temp table zt1 (f1 int primary key); +create temp table zt2 (f2 int primary key); +create temp table zt3 (f3 int primary key); +insert into zt1 values(53); +insert into zt2 values(53); + +select * from + zt2 left join zt3 on (f2 = f3) + left join zt1 on (f3 = f1) +where f2 = 53; + +create temp view zv1 as select *,'dummy'::text AS junk from zt1; + +select * from + zt2 left join zt3 on (f2 = f3) + left join zv1 on (f3 = f1) +where f2 = 53; -- 2.40.0