From 64ad85860ce6b3a7ce01a392f90a322bf61d068f Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 20 Apr 2018 15:19:17 -0400 Subject: [PATCH] Change more places to be less trusting of RestrictInfo.is_pushed_down. On further reflection, commit e5d83995e didn't go far enough: pretty much everywhere in the planner that examines a clause's is_pushed_down flag ought to be changed to use the more complicated behavior where we also check the clause's required_relids. Otherwise we could make incorrect decisions about whether, say, a clause is safe to use as a hash clause. Some (many?) of these places are safe as-is, either because they are never reached while considering a parameterized path, or because there are additional checks that would reject a pushed-down clause anyway. However, it seems smarter to just code them all the same way rather than rely on easily-broken reasoning of that sort. In support of that, invent a new macro RINFO_IS_PUSHED_DOWN that should be used in place of direct tests on the is_pushed_down flag. Like the previous patch, back-patch to all supported branches. Discussion: https://postgr.es/m/f8128b11-c5bf-3539-48cd-234178b2314d@proxel.se --- src/backend/optimizer/path/costsize.c | 10 ++++++-- src/backend/optimizer/path/joinpath.c | 4 +-- src/backend/optimizer/path/joinrels.c | 30 +++++++++++++---------- src/backend/optimizer/plan/analyzejoins.c | 6 ++--- src/backend/optimizer/plan/initsplan.c | 5 ++++ src/backend/optimizer/util/restrictinfo.c | 12 ++------- src/include/nodes/relation.h | 17 ++++++++++++- 7 files changed, 52 insertions(+), 32 deletions(-) diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index fa9b322610..d89b72e07a 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -147,6 +147,7 @@ static bool has_indexed_join_quals(NestPath *joinpath); static double approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals); static double calc_joinrel_size_estimate(PlannerInfo *root, + RelOptInfo *joinrel, RelOptInfo *outer_rel, RelOptInfo *inner_rel, double outer_rows, @@ -3542,13 +3543,15 @@ compute_semi_anti_join_factors(PlannerInfo *root, */ if (jointype == JOIN_ANTI) { + Relids joinrelids = bms_union(outerrel->relids, innerrel->relids); + joinquals = NIL; foreach(l, restrictlist) { RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); Assert(IsA(rinfo, RestrictInfo)); - if (!rinfo->is_pushed_down) + if (!RINFO_IS_PUSHED_DOWN(rinfo, joinrelids)) joinquals = lappend(joinquals, rinfo); } } @@ -3863,6 +3866,7 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, List *restrictlist) { rel->rows = calc_joinrel_size_estimate(root, + rel, outer_rel, inner_rel, outer_rel->rows, @@ -3905,6 +3909,7 @@ get_parameterized_joinrel_size(PlannerInfo *root, RelOptInfo *rel, * estimate for any pair with the same parameterization. */ nrows = calc_joinrel_size_estimate(root, + rel, outer_path->parent, inner_path->parent, outer_path->rows, @@ -3928,6 +3933,7 @@ get_parameterized_joinrel_size(PlannerInfo *root, RelOptInfo *rel, */ static double calc_joinrel_size_estimate(PlannerInfo *root, + RelOptInfo *joinrel, RelOptInfo *outer_rel, RelOptInfo *inner_rel, double outer_rows, @@ -3981,7 +3987,7 @@ calc_joinrel_size_estimate(PlannerInfo *root, RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); Assert(IsA(rinfo, RestrictInfo)); - if (rinfo->is_pushed_down) + if (RINFO_IS_PUSHED_DOWN(rinfo, joinrel->relids)) pushedquals = lappend(pushedquals, rinfo); else joinquals = lappend(joinquals, rinfo); diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index 9c69e73cce..83abb585c2 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -1308,7 +1308,7 @@ hash_inner_and_outer(PlannerInfo *root, * If processing an outer join, only use its own join clauses for * hashing. For inner joins we need not be so picky. */ - if (isouterjoin && restrictinfo->is_pushed_down) + if (isouterjoin && RINFO_IS_PUSHED_DOWN(restrictinfo, joinrel->relids)) continue; if (!restrictinfo->can_join || @@ -1547,7 +1547,7 @@ select_mergejoin_clauses(PlannerInfo *root, * we don't set have_nonmergeable_joinclause here because pushed-down * clauses will become otherquals not joinquals.) */ - if (isouterjoin && restrictinfo->is_pushed_down) + if (isouterjoin && RINFO_IS_PUSHED_DOWN(restrictinfo, joinrel->relids)) continue; /* Check that clause is a mergeable operator clause */ diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c index 01d4fea78c..95d47d9c62 100644 --- a/src/backend/optimizer/path/joinrels.c +++ b/src/backend/optimizer/path/joinrels.c @@ -31,6 +31,7 @@ static bool has_legal_joinclause(PlannerInfo *root, RelOptInfo *rel); static bool is_dummy_rel(RelOptInfo *rel); static void mark_dummy_rel(RelOptInfo *rel); static bool restriction_is_constant_false(List *restrictlist, + RelOptInfo *joinrel, bool only_pushed_down); @@ -746,7 +747,7 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2) { case JOIN_INNER: if (is_dummy_rel(rel1) || is_dummy_rel(rel2) || - restriction_is_constant_false(restrictlist, false)) + restriction_is_constant_false(restrictlist, joinrel, false)) { mark_dummy_rel(joinrel); break; @@ -760,12 +761,12 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2) break; case JOIN_LEFT: if (is_dummy_rel(rel1) || - restriction_is_constant_false(restrictlist, true)) + restriction_is_constant_false(restrictlist, joinrel, true)) { mark_dummy_rel(joinrel); break; } - if (restriction_is_constant_false(restrictlist, false) && + if (restriction_is_constant_false(restrictlist, joinrel, false) && bms_is_subset(rel2->relids, sjinfo->syn_righthand)) mark_dummy_rel(rel2); add_paths_to_joinrel(root, joinrel, rel1, rel2, @@ -777,7 +778,7 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2) break; case JOIN_FULL: if ((is_dummy_rel(rel1) && is_dummy_rel(rel2)) || - restriction_is_constant_false(restrictlist, true)) + restriction_is_constant_false(restrictlist, joinrel, true)) { mark_dummy_rel(joinrel); break; @@ -813,7 +814,7 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2) bms_is_subset(sjinfo->min_righthand, rel2->relids)) { if (is_dummy_rel(rel1) || is_dummy_rel(rel2) || - restriction_is_constant_false(restrictlist, false)) + restriction_is_constant_false(restrictlist, joinrel, false)) { mark_dummy_rel(joinrel); break; @@ -836,7 +837,7 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2) sjinfo) != NULL) { if (is_dummy_rel(rel1) || is_dummy_rel(rel2) || - restriction_is_constant_false(restrictlist, false)) + restriction_is_constant_false(restrictlist, joinrel, false)) { mark_dummy_rel(joinrel); break; @@ -851,12 +852,12 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2) break; case JOIN_ANTI: if (is_dummy_rel(rel1) || - restriction_is_constant_false(restrictlist, true)) + restriction_is_constant_false(restrictlist, joinrel, true)) { mark_dummy_rel(joinrel); break; } - if (restriction_is_constant_false(restrictlist, false) && + if (restriction_is_constant_false(restrictlist, joinrel, false) && bms_is_subset(rel2->relids, sjinfo->syn_righthand)) mark_dummy_rel(rel2); add_paths_to_joinrel(root, joinrel, rel1, rel2, @@ -1207,18 +1208,21 @@ mark_dummy_rel(RelOptInfo *rel) /* - * restriction_is_constant_false --- is a restrictlist just FALSE? + * restriction_is_constant_false --- is a restrictlist just false? * - * In cases where a qual is provably constant FALSE, eval_const_expressions + * In cases where a qual is provably constant false, eval_const_expressions * will generally have thrown away anything that's ANDed with it. In outer * join situations this will leave us computing cartesian products only to * decide there's no match for an outer row, which is pretty stupid. So, * we need to detect the case. * - * If only_pushed_down is TRUE, then consider only pushed-down quals. + * If only_pushed_down is true, then consider only quals that are pushed-down + * from the point of view of the joinrel. */ static bool -restriction_is_constant_false(List *restrictlist, bool only_pushed_down) +restriction_is_constant_false(List *restrictlist, + RelOptInfo *joinrel, + bool only_pushed_down) { ListCell *lc; @@ -1233,7 +1237,7 @@ restriction_is_constant_false(List *restrictlist, bool only_pushed_down) RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); Assert(IsA(rinfo, RestrictInfo)); - if (only_pushed_down && !rinfo->is_pushed_down) + if (only_pushed_down && !RINFO_IS_PUSHED_DOWN(rinfo, joinrel->relids)) continue; if (rinfo->clause && IsA(rinfo->clause, Const)) diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c index 614fd293ed..37f11f87fb 100644 --- a/src/backend/optimizer/plan/analyzejoins.c +++ b/src/backend/optimizer/plan/analyzejoins.c @@ -248,8 +248,7 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) * above the outer join, even if it references no other rels (it might * be from WHERE, for example). */ - if (restrictinfo->is_pushed_down || - !bms_equal(restrictinfo->required_relids, joinrelids)) + if (RINFO_IS_PUSHED_DOWN(restrictinfo, joinrelids)) { /* * If such a clause actually references the inner rel then join @@ -417,8 +416,7 @@ remove_rel_from_query(PlannerInfo *root, int relid, Relids joinrelids) remove_join_clause_from_rels(root, rinfo, rinfo->required_relids); - if (rinfo->is_pushed_down || - !bms_equal(rinfo->required_relids, joinrelids)) + if (RINFO_IS_PUSHED_DOWN(rinfo, joinrelids)) { /* Recheck that qual doesn't actually reference the target rel */ Assert(!bms_is_member(relid, rinfo->clause_relids)); diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c index 84ce6b3125..0e470ae96a 100644 --- a/src/backend/optimizer/plan/initsplan.c +++ b/src/backend/optimizer/plan/initsplan.c @@ -1676,6 +1676,11 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause, * attach quals to the lowest level where they can be evaluated. But * if we were ever to re-introduce a mechanism for delaying evaluation * of "expensive" quals, this area would need work. + * + * Note: generally, use of is_pushed_down has to go through the macro + * RINFO_IS_PUSHED_DOWN, because that flag alone is not always sufficient + * to tell whether a clause must be treated as pushed-down in context. + * This seems like another reason why it should perhaps be rethought. *---------- */ if (is_deduced) diff --git a/src/backend/optimizer/util/restrictinfo.c b/src/backend/optimizer/util/restrictinfo.c index cc0b747008..19d4c36f05 100644 --- a/src/backend/optimizer/util/restrictinfo.c +++ b/src/backend/optimizer/util/restrictinfo.c @@ -410,7 +410,7 @@ extract_actual_clauses(List *restrictinfo_list, * extract_actual_join_clauses * * Extract bare clauses from 'restrictinfo_list', separating those that - * syntactically match the join level from those that were pushed down. + * semantically match the join level from those that were pushed down. * Pseudoconstant clauses are excluded from the results. * * This is only used at outer joins, since for plain joins we don't care @@ -433,15 +433,7 @@ extract_actual_join_clauses(List *restrictinfo_list, Assert(IsA(rinfo, RestrictInfo)); - /* - * We must check both is_pushed_down and required_relids, since an - * outer-join clause that's been pushed down to some lower join level - * via path parameterization will not be marked is_pushed_down; - * nonetheless, it must be treated as a filter clause not a join - * clause so far as the lower join level is concerned. - */ - if (rinfo->is_pushed_down || - !bms_is_subset(rinfo->required_relids, joinrelids)) + if (RINFO_IS_PUSHED_DOWN(rinfo, joinrelids)) { if (!rinfo->pseudoconstant) *otherquals = lappend(*otherquals, rinfo->clause); diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 48862d93ae..7b215f0075 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -1537,7 +1537,8 @@ typedef struct LimitPath * if we decide that it can be pushed down into the nullable side of the join. * In that case it acts as a plain filter qual for wherever it gets evaluated. * (In short, is_pushed_down is only false for non-degenerate outer join - * conditions. Possibly we should rename it to reflect that meaning?) + * conditions. Possibly we should rename it to reflect that meaning? But + * see also the comments for RINFO_IS_PUSHED_DOWN, below.) * * RestrictInfo nodes also contain an outerjoin_delayed flag, which is true * if the clause's applicability must be delayed due to any outer joins @@ -1664,6 +1665,20 @@ typedef struct RestrictInfo Selectivity right_bucketsize; /* avg bucketsize of right side */ } RestrictInfo; +/* + * This macro embodies the correct way to test whether a RestrictInfo is + * "pushed down" to a given outer join, that is, should be treated as a filter + * clause rather than a join clause at that outer join. This is certainly so + * if is_pushed_down is true; but examining that is not sufficient anymore, + * because outer-join clauses will get pushed down to lower outer joins when + * we generate a path for the lower outer join that is parameterized by the + * LHS of the upper one. We can detect such a clause by noting that its + * required_relids exceed the scope of the join. + */ +#define RINFO_IS_PUSHED_DOWN(rinfo, joinrelids) \ + ((rinfo)->is_pushed_down || \ + !bms_is_subset((rinfo)->required_relids, joinrelids)) + /* * Since mergejoinscansel() is a relatively expensive function, and would * otherwise be invoked many times while planning a large join tree, -- 2.40.0