From: Tom Lane Date: Fri, 20 Apr 2018 19:19:16 +0000 (-0400) Subject: Change more places to be less trusting of RestrictInfo.is_pushed_down. X-Git-Tag: REL_10_4~27 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=8b6294c7a560c115fb9027e9cc5a3eee17fdf419;p=postgresql Change more places to be less trusting of RestrictInfo.is_pushed_down. On further reflection, commit e5d83995e didn't go far enough: pretty much everywhere in the planner that examines a clause's is_pushed_down flag ought to be changed to use the more complicated behavior where we also check the clause's required_relids. Otherwise we could make incorrect decisions about whether, say, a clause is safe to use as a hash clause. Some (many?) of these places are safe as-is, either because they are never reached while considering a parameterized path, or because there are additional checks that would reject a pushed-down clause anyway. However, it seems smarter to just code them all the same way rather than rely on easily-broken reasoning of that sort. In support of that, invent a new macro RINFO_IS_PUSHED_DOWN that should be used in place of direct tests on the is_pushed_down flag. Like the previous patch, back-patch to all supported branches. Discussion: https://postgr.es/m/f8128b11-c5bf-3539-48cd-234178b2314d@proxel.se --- diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c index b653187a77..d8ae029faa 100644 --- a/contrib/postgres_fdw/postgres_fdw.c +++ b/contrib/postgres_fdw/postgres_fdw.c @@ -4127,7 +4127,8 @@ foreign_join_ok(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, bool is_remote_clause = is_foreign_expr(root, joinrel, rinfo->clause); - if (IS_OUTER_JOIN(jointype) && !rinfo->is_pushed_down) + if (IS_OUTER_JOIN(jointype) && + !RINFO_IS_PUSHED_DOWN(rinfo, joinrel->relids)) { if (!is_remote_clause) return false; diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index b35acb7bdc..0cb9a88790 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -148,6 +148,7 @@ static bool has_indexed_join_quals(NestPath *joinpath); static double approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals); static double calc_joinrel_size_estimate(PlannerInfo *root, + RelOptInfo *joinrel, RelOptInfo *outer_rel, RelOptInfo *inner_rel, double outer_rows, @@ -3776,12 +3777,14 @@ compute_semi_anti_join_factors(PlannerInfo *root, */ if (IS_OUTER_JOIN(jointype)) { + Relids joinrelids = bms_union(outerrel->relids, innerrel->relids); + joinquals = NIL; foreach(l, restrictlist) { RestrictInfo *rinfo = lfirst_node(RestrictInfo, l); - if (!rinfo->is_pushed_down) + if (!RINFO_IS_PUSHED_DOWN(rinfo, joinrelids)) joinquals = lappend(joinquals, rinfo); } } @@ -4096,6 +4099,7 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, List *restrictlist) { rel->rows = calc_joinrel_size_estimate(root, + rel, outer_rel, inner_rel, outer_rel->rows, @@ -4138,6 +4142,7 @@ get_parameterized_joinrel_size(PlannerInfo *root, RelOptInfo *rel, * estimate for any pair with the same parameterization. */ nrows = calc_joinrel_size_estimate(root, + rel, outer_path->parent, inner_path->parent, outer_path->rows, @@ -4161,6 +4166,7 @@ get_parameterized_joinrel_size(PlannerInfo *root, RelOptInfo *rel, */ static double calc_joinrel_size_estimate(PlannerInfo *root, + RelOptInfo *joinrel, RelOptInfo *outer_rel, RelOptInfo *inner_rel, double outer_rows, @@ -4213,7 +4219,7 @@ calc_joinrel_size_estimate(PlannerInfo *root, { RestrictInfo *rinfo = lfirst_node(RestrictInfo, l); - if (rinfo->is_pushed_down) + if (RINFO_IS_PUSHED_DOWN(rinfo, joinrel->relids)) pushedquals = lappend(pushedquals, rinfo); else joinquals = lappend(joinquals, rinfo); diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index bb1b3cf0a8..2fb94ccdc5 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -1606,7 +1606,7 @@ hash_inner_and_outer(PlannerInfo *root, * If processing an outer join, only use its own join clauses for * hashing. For inner joins we need not be so picky. */ - if (isouterjoin && restrictinfo->is_pushed_down) + if (isouterjoin && RINFO_IS_PUSHED_DOWN(restrictinfo, joinrel->relids)) continue; if (!restrictinfo->can_join || @@ -1832,7 +1832,7 @@ select_mergejoin_clauses(PlannerInfo *root, * we don't set have_nonmergeable_joinclause here because pushed-down * clauses will become otherquals not joinquals.) */ - if (isouterjoin && restrictinfo->is_pushed_down) + if (isouterjoin && RINFO_IS_PUSHED_DOWN(restrictinfo, joinrel->relids)) continue; /* Check that clause is a mergeable operator clause */ diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c index 6ee23509c5..4bbad04c05 100644 --- a/src/backend/optimizer/path/joinrels.c +++ b/src/backend/optimizer/path/joinrels.c @@ -31,6 +31,7 @@ static bool has_legal_joinclause(PlannerInfo *root, RelOptInfo *rel); static bool is_dummy_rel(RelOptInfo *rel); static void mark_dummy_rel(RelOptInfo *rel); static bool restriction_is_constant_false(List *restrictlist, + RelOptInfo *joinrel, bool only_pushed_down); static void populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, RelOptInfo *joinrel, @@ -770,7 +771,7 @@ populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1, { case JOIN_INNER: if (is_dummy_rel(rel1) || is_dummy_rel(rel2) || - restriction_is_constant_false(restrictlist, false)) + restriction_is_constant_false(restrictlist, joinrel, false)) { mark_dummy_rel(joinrel); break; @@ -784,12 +785,12 @@ populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1, break; case JOIN_LEFT: if (is_dummy_rel(rel1) || - restriction_is_constant_false(restrictlist, true)) + restriction_is_constant_false(restrictlist, joinrel, true)) { mark_dummy_rel(joinrel); break; } - if (restriction_is_constant_false(restrictlist, false) && + if (restriction_is_constant_false(restrictlist, joinrel, false) && bms_is_subset(rel2->relids, sjinfo->syn_righthand)) mark_dummy_rel(rel2); add_paths_to_joinrel(root, joinrel, rel1, rel2, @@ -801,7 +802,7 @@ populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1, break; case JOIN_FULL: if ((is_dummy_rel(rel1) && is_dummy_rel(rel2)) || - restriction_is_constant_false(restrictlist, true)) + restriction_is_constant_false(restrictlist, joinrel, true)) { mark_dummy_rel(joinrel); break; @@ -837,7 +838,7 @@ populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1, bms_is_subset(sjinfo->min_righthand, rel2->relids)) { if (is_dummy_rel(rel1) || is_dummy_rel(rel2) || - restriction_is_constant_false(restrictlist, false)) + restriction_is_constant_false(restrictlist, joinrel, false)) { mark_dummy_rel(joinrel); break; @@ -860,7 +861,7 @@ populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1, sjinfo) != NULL) { if (is_dummy_rel(rel1) || is_dummy_rel(rel2) || - restriction_is_constant_false(restrictlist, false)) + restriction_is_constant_false(restrictlist, joinrel, false)) { mark_dummy_rel(joinrel); break; @@ -875,12 +876,12 @@ populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1, break; case JOIN_ANTI: if (is_dummy_rel(rel1) || - restriction_is_constant_false(restrictlist, true)) + restriction_is_constant_false(restrictlist, joinrel, true)) { mark_dummy_rel(joinrel); break; } - if (restriction_is_constant_false(restrictlist, false) && + if (restriction_is_constant_false(restrictlist, joinrel, false) && bms_is_subset(rel2->relids, sjinfo->syn_righthand)) mark_dummy_rel(rel2); add_paths_to_joinrel(root, joinrel, rel1, rel2, @@ -1227,18 +1228,21 @@ mark_dummy_rel(RelOptInfo *rel) /* - * restriction_is_constant_false --- is a restrictlist just FALSE? + * restriction_is_constant_false --- is a restrictlist just false? * - * In cases where a qual is provably constant FALSE, eval_const_expressions + * In cases where a qual is provably constant false, eval_const_expressions * will generally have thrown away anything that's ANDed with it. In outer * join situations this will leave us computing cartesian products only to * decide there's no match for an outer row, which is pretty stupid. So, * we need to detect the case. * - * If only_pushed_down is TRUE, then consider only pushed-down quals. + * If only_pushed_down is true, then consider only quals that are pushed-down + * from the point of view of the joinrel. */ static bool -restriction_is_constant_false(List *restrictlist, bool only_pushed_down) +restriction_is_constant_false(List *restrictlist, + RelOptInfo *joinrel, + bool only_pushed_down) { ListCell *lc; @@ -1252,7 +1256,7 @@ restriction_is_constant_false(List *restrictlist, bool only_pushed_down) { RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc); - if (only_pushed_down && !rinfo->is_pushed_down) + if (only_pushed_down && !RINFO_IS_PUSHED_DOWN(rinfo, joinrel->relids)) continue; if (rinfo->clause && IsA(rinfo->clause, Const)) diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c index 7dfce5bb55..30bfde45ce 100644 --- a/src/backend/optimizer/plan/analyzejoins.c +++ b/src/backend/optimizer/plan/analyzejoins.c @@ -253,8 +253,7 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) * above the outer join, even if it references no other rels (it might * be from WHERE, for example). */ - if (restrictinfo->is_pushed_down || - !bms_equal(restrictinfo->required_relids, joinrelids)) + if (RINFO_IS_PUSHED_DOWN(restrictinfo, joinrelids)) { /* * If such a clause actually references the inner rel then join @@ -422,8 +421,7 @@ remove_rel_from_query(PlannerInfo *root, int relid, Relids joinrelids) remove_join_clause_from_rels(root, rinfo, rinfo->required_relids); - if (rinfo->is_pushed_down || - !bms_equal(rinfo->required_relids, joinrelids)) + if (RINFO_IS_PUSHED_DOWN(rinfo, joinrelids)) { /* Recheck that qual doesn't actually reference the target rel */ Assert(!bms_is_member(relid, rinfo->clause_relids)); @@ -1080,6 +1078,7 @@ is_innerrel_unique_for(PlannerInfo *root, JoinType jointype, List *restrictlist) { + Relids joinrelids = bms_union(outerrelids, innerrel->relids); List *clause_list = NIL; ListCell *lc; @@ -1098,7 +1097,8 @@ is_innerrel_unique_for(PlannerInfo *root, * As noted above, if it's a pushed-down clause and we're at an outer * join, we can't use it. */ - if (restrictinfo->is_pushed_down && IS_OUTER_JOIN(jointype)) + if (IS_OUTER_JOIN(jointype) && + RINFO_IS_PUSHED_DOWN(restrictinfo, joinrelids)) continue; /* Ignore if it's not a mergejoinable clause */ diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c index 987c20ac9f..4d0cc2a1c6 100644 --- a/src/backend/optimizer/plan/initsplan.c +++ b/src/backend/optimizer/plan/initsplan.c @@ -1754,6 +1754,11 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause, * attach quals to the lowest level where they can be evaluated. But * if we were ever to re-introduce a mechanism for delaying evaluation * of "expensive" quals, this area would need work. + * + * Note: generally, use of is_pushed_down has to go through the macro + * RINFO_IS_PUSHED_DOWN, because that flag alone is not always sufficient + * to tell whether a clause must be treated as pushed-down in context. + * This seems like another reason why it should perhaps be rethought. *---------- */ if (is_deduced) diff --git a/src/backend/optimizer/util/restrictinfo.c b/src/backend/optimizer/util/restrictinfo.c index fd66c1303d..d5652b0f72 100644 --- a/src/backend/optimizer/util/restrictinfo.c +++ b/src/backend/optimizer/util/restrictinfo.c @@ -371,7 +371,7 @@ extract_actual_clauses(List *restrictinfo_list, * extract_actual_join_clauses * * Extract bare clauses from 'restrictinfo_list', separating those that - * syntactically match the join level from those that were pushed down. + * semantically match the join level from those that were pushed down. * Pseudoconstant clauses are excluded from the results. * * This is only used at outer joins, since for plain joins we don't care @@ -392,15 +392,7 @@ extract_actual_join_clauses(List *restrictinfo_list, { RestrictInfo *rinfo = lfirst_node(RestrictInfo, l); - /* - * We must check both is_pushed_down and required_relids, since an - * outer-join clause that's been pushed down to some lower join level - * via path parameterization will not be marked is_pushed_down; - * nonetheless, it must be treated as a filter clause not a join - * clause so far as the lower join level is concerned. - */ - if (rinfo->is_pushed_down || - !bms_is_subset(rinfo->required_relids, joinrelids)) + if (RINFO_IS_PUSHED_DOWN(rinfo, joinrelids)) { if (!rinfo->pseudoconstant) *otherquals = lappend(*otherquals, rinfo->clause); diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 1e1c69155b..9f789e78c1 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -1669,7 +1669,8 @@ typedef struct LimitPath * if we decide that it can be pushed down into the nullable side of the join. * In that case it acts as a plain filter qual for wherever it gets evaluated. * (In short, is_pushed_down is only false for non-degenerate outer join - * conditions. Possibly we should rename it to reflect that meaning?) + * conditions. Possibly we should rename it to reflect that meaning? But + * see also the comments for RINFO_IS_PUSHED_DOWN, below.) * * RestrictInfo nodes also contain an outerjoin_delayed flag, which is true * if the clause's applicability must be delayed due to any outer joins @@ -1809,6 +1810,20 @@ typedef struct RestrictInfo Selectivity right_bucketsize; /* avg bucketsize of right side */ } RestrictInfo; +/* + * This macro embodies the correct way to test whether a RestrictInfo is + * "pushed down" to a given outer join, that is, should be treated as a filter + * clause rather than a join clause at that outer join. This is certainly so + * if is_pushed_down is true; but examining that is not sufficient anymore, + * because outer-join clauses will get pushed down to lower outer joins when + * we generate a path for the lower outer join that is parameterized by the + * LHS of the upper one. We can detect such a clause by noting that its + * required_relids exceed the scope of the join. + */ +#define RINFO_IS_PUSHED_DOWN(rinfo, joinrelids) \ + ((rinfo)->is_pushed_down || \ + !bms_is_subset((rinfo)->required_relids, joinrelids)) + /* * Since mergejoinscansel() is a relatively expensive function, and would * otherwise be invoked many times while planning a large join tree,