From c792c7db41466ff02107e3233ec9d92d8e3df866 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 20 Apr 2018 15:19:16 -0400 Subject: [PATCH] Change more places to be less trusting of RestrictInfo.is_pushed_down. On further reflection, commit e5d83995e didn't go far enough: pretty much everywhere in the planner that examines a clause's is_pushed_down flag ought to be changed to use the more complicated behavior where we also check the clause's required_relids. Otherwise we could make incorrect decisions about whether, say, a clause is safe to use as a hash clause. Some (many?) of these places are safe as-is, either because they are never reached while considering a parameterized path, or because there are additional checks that would reject a pushed-down clause anyway. However, it seems smarter to just code them all the same way rather than rely on easily-broken reasoning of that sort. In support of that, invent a new macro RINFO_IS_PUSHED_DOWN that should be used in place of direct tests on the is_pushed_down flag. Like the previous patch, back-patch to all supported branches. Discussion: https://postgr.es/m/f8128b11-c5bf-3539-48cd-234178b2314d@proxel.se --- contrib/postgres_fdw/postgres_fdw.c | 3 +- src/backend/optimizer/path/costsize.c | 10 +++++-- src/backend/optimizer/path/joinpath.c | 4 +-- src/backend/optimizer/path/joinrels.c | 34 +++++++++++++---------- src/backend/optimizer/plan/analyzejoins.c | 10 +++---- src/backend/optimizer/plan/initsplan.c | 5 ++++ src/backend/optimizer/util/relnode.c | 3 +- src/backend/optimizer/util/restrictinfo.c | 12 ++------ src/include/nodes/relation.h | 17 +++++++++++- src/include/optimizer/paths.h | 3 +- 10 files changed, 64 insertions(+), 37 deletions(-) diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c index 30e572632e..a46160df7c 100644 --- a/contrib/postgres_fdw/postgres_fdw.c +++ b/contrib/postgres_fdw/postgres_fdw.c @@ -4705,7 +4705,8 @@ foreign_join_ok(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, bool is_remote_clause = is_foreign_expr(root, joinrel, rinfo->clause); - if (IS_OUTER_JOIN(jointype) && !rinfo->is_pushed_down) + if (IS_OUTER_JOIN(jointype) && + !RINFO_IS_PUSHED_DOWN(rinfo, joinrel->relids)) { if (!is_remote_clause) return false; diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 47729de896..10d41141f2 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -159,6 +159,7 @@ static bool has_indexed_join_quals(NestPath *joinpath); static double approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals); static double calc_joinrel_size_estimate(PlannerInfo *root, + RelOptInfo *joinrel, RelOptInfo *outer_rel, RelOptInfo *inner_rel, double outer_rows, @@ -4055,12 +4056,14 @@ compute_semi_anti_join_factors(PlannerInfo *root, */ if (IS_OUTER_JOIN(jointype)) { + Relids joinrelids = bms_union(outerrel->relids, innerrel->relids); + joinquals = NIL; foreach(l, restrictlist) { RestrictInfo *rinfo = lfirst_node(RestrictInfo, l); - if (!rinfo->is_pushed_down) + if (!RINFO_IS_PUSHED_DOWN(rinfo, joinrelids)) joinquals = lappend(joinquals, rinfo); } } @@ -4375,6 +4378,7 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, List *restrictlist) { rel->rows = calc_joinrel_size_estimate(root, + rel, outer_rel, inner_rel, outer_rel->rows, @@ -4417,6 +4421,7 @@ get_parameterized_joinrel_size(PlannerInfo *root, RelOptInfo *rel, * estimate for any pair with the same parameterization. */ nrows = calc_joinrel_size_estimate(root, + rel, outer_path->parent, inner_path->parent, outer_path->rows, @@ -4440,6 +4445,7 @@ get_parameterized_joinrel_size(PlannerInfo *root, RelOptInfo *rel, */ static double calc_joinrel_size_estimate(PlannerInfo *root, + RelOptInfo *joinrel, RelOptInfo *outer_rel, RelOptInfo *inner_rel, double outer_rows, @@ -4492,7 +4498,7 @@ calc_joinrel_size_estimate(PlannerInfo *root, { RestrictInfo *rinfo = lfirst_node(RestrictInfo, l); - if (rinfo->is_pushed_down) + if (RINFO_IS_PUSHED_DOWN(rinfo, joinrel->relids)) pushedquals = lappend(pushedquals, rinfo); else joinquals = lappend(joinquals, rinfo); diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index 3fd3cc7670..f47dd8185b 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -1700,7 +1700,7 @@ hash_inner_and_outer(PlannerInfo *root, * If processing an outer join, only use its own join clauses for * hashing. For inner joins we need not be so picky. */ - if (isouterjoin && restrictinfo->is_pushed_down) + if (isouterjoin && RINFO_IS_PUSHED_DOWN(restrictinfo, joinrel->relids)) continue; if (!restrictinfo->can_join || @@ -1947,7 +1947,7 @@ select_mergejoin_clauses(PlannerInfo *root, * we don't set have_nonmergeable_joinclause here because pushed-down * clauses will become otherquals not joinquals.) */ - if (isouterjoin && restrictinfo->is_pushed_down) + if (isouterjoin && RINFO_IS_PUSHED_DOWN(restrictinfo, joinrel->relids)) continue; /* Check that clause is a mergeable operator clause */ diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c index 5d66cfb87b..6d41d307ea 100644 --- a/src/backend/optimizer/path/joinrels.c +++ b/src/backend/optimizer/path/joinrels.c @@ -35,6 +35,7 @@ static bool has_join_restriction(PlannerInfo *root, RelOptInfo *rel); static bool has_legal_joinclause(PlannerInfo *root, RelOptInfo *rel); static bool is_dummy_rel(RelOptInfo *rel); static bool restriction_is_constant_false(List *restrictlist, + RelOptInfo *joinrel, bool only_pushed_down); static void populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, RelOptInfo *joinrel, @@ -780,7 +781,7 @@ populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1, { case JOIN_INNER: if (is_dummy_rel(rel1) || is_dummy_rel(rel2) || - restriction_is_constant_false(restrictlist, false)) + restriction_is_constant_false(restrictlist, joinrel, false)) { mark_dummy_rel(joinrel); break; @@ -794,12 +795,12 @@ populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1, break; case JOIN_LEFT: if (is_dummy_rel(rel1) || - restriction_is_constant_false(restrictlist, true)) + restriction_is_constant_false(restrictlist, joinrel, true)) { mark_dummy_rel(joinrel); break; } - if (restriction_is_constant_false(restrictlist, false) && + if (restriction_is_constant_false(restrictlist, joinrel, false) && bms_is_subset(rel2->relids, sjinfo->syn_righthand)) mark_dummy_rel(rel2); add_paths_to_joinrel(root, joinrel, rel1, rel2, @@ -811,7 +812,7 @@ populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1, break; case JOIN_FULL: if ((is_dummy_rel(rel1) && is_dummy_rel(rel2)) || - restriction_is_constant_false(restrictlist, true)) + restriction_is_constant_false(restrictlist, joinrel, true)) { mark_dummy_rel(joinrel); break; @@ -847,7 +848,7 @@ populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1, bms_is_subset(sjinfo->min_righthand, rel2->relids)) { if (is_dummy_rel(rel1) || is_dummy_rel(rel2) || - restriction_is_constant_false(restrictlist, false)) + restriction_is_constant_false(restrictlist, joinrel, false)) { mark_dummy_rel(joinrel); break; @@ -870,7 +871,7 @@ populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1, sjinfo) != NULL) { if (is_dummy_rel(rel1) || is_dummy_rel(rel2) || - restriction_is_constant_false(restrictlist, false)) + restriction_is_constant_false(restrictlist, joinrel, false)) { mark_dummy_rel(joinrel); break; @@ -885,12 +886,12 @@ populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1, break; case JOIN_ANTI: if (is_dummy_rel(rel1) || - restriction_is_constant_false(restrictlist, true)) + restriction_is_constant_false(restrictlist, joinrel, true)) { mark_dummy_rel(joinrel); break; } - if (restriction_is_constant_false(restrictlist, false) && + if (restriction_is_constant_false(restrictlist, joinrel, false) && bms_is_subset(rel2->relids, sjinfo->syn_righthand)) mark_dummy_rel(rel2); add_paths_to_joinrel(root, joinrel, rel1, rel2, @@ -1249,10 +1250,13 @@ mark_dummy_rel(RelOptInfo *rel) * decide there's no match for an outer row, which is pretty stupid. So, * we need to detect the case. * - * If only_pushed_down is true, then consider only pushed-down quals. + * If only_pushed_down is true, then consider only quals that are pushed-down + * from the point of view of the joinrel. */ static bool -restriction_is_constant_false(List *restrictlist, bool only_pushed_down) +restriction_is_constant_false(List *restrictlist, + RelOptInfo *joinrel, + bool only_pushed_down) { ListCell *lc; @@ -1266,7 +1270,7 @@ restriction_is_constant_false(List *restrictlist, bool only_pushed_down) { RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc); - if (only_pushed_down && !rinfo->is_pushed_down) + if (only_pushed_down && !RINFO_IS_PUSHED_DOWN(rinfo, joinrel->relids)) continue; if (rinfo->clause && IsA(rinfo->clause, Const)) @@ -1411,8 +1415,9 @@ try_partitionwise_join(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, * partition keys from given relations being joined. */ bool -have_partkey_equi_join(RelOptInfo *rel1, RelOptInfo *rel2, JoinType jointype, - List *restrictlist) +have_partkey_equi_join(RelOptInfo *joinrel, + RelOptInfo *rel1, RelOptInfo *rel2, + JoinType jointype, List *restrictlist) { PartitionScheme part_scheme = rel1->part_scheme; ListCell *lc; @@ -1438,7 +1443,8 @@ have_partkey_equi_join(RelOptInfo *rel1, RelOptInfo *rel2, JoinType jointype, int ipk2; /* If processing an outer join, only use its own join clauses. */ - if (IS_OUTER_JOIN(jointype) && rinfo->is_pushed_down) + if (IS_OUTER_JOIN(jointype) && + RINFO_IS_PUSHED_DOWN(rinfo, joinrel->relids)) continue; /* Skip clauses which can not be used for a join. */ diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c index ef25fefa45..c5c4362609 100644 --- a/src/backend/optimizer/plan/analyzejoins.c +++ b/src/backend/optimizer/plan/analyzejoins.c @@ -253,8 +253,7 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) * above the outer join, even if it references no other rels (it might * be from WHERE, for example). */ - if (restrictinfo->is_pushed_down || - !bms_equal(restrictinfo->required_relids, joinrelids)) + if (RINFO_IS_PUSHED_DOWN(restrictinfo, joinrelids)) { /* * If such a clause actually references the inner rel then join @@ -422,8 +421,7 @@ remove_rel_from_query(PlannerInfo *root, int relid, Relids joinrelids) remove_join_clause_from_rels(root, rinfo, rinfo->required_relids); - if (rinfo->is_pushed_down || - !bms_equal(rinfo->required_relids, joinrelids)) + if (RINFO_IS_PUSHED_DOWN(rinfo, joinrelids)) { /* Recheck that qual doesn't actually reference the target rel */ Assert(!bms_is_member(relid, rinfo->clause_relids)); @@ -1080,6 +1078,7 @@ is_innerrel_unique_for(PlannerInfo *root, JoinType jointype, List *restrictlist) { + Relids joinrelids = bms_union(outerrelids, innerrel->relids); List *clause_list = NIL; ListCell *lc; @@ -1098,7 +1097,8 @@ is_innerrel_unique_for(PlannerInfo *root, * As noted above, if it's a pushed-down clause and we're at an outer * join, we can't use it. */ - if (restrictinfo->is_pushed_down && IS_OUTER_JOIN(jointype)) + if (IS_OUTER_JOIN(jointype) && + RINFO_IS_PUSHED_DOWN(restrictinfo, joinrelids)) continue; /* Ignore if it's not a mergejoinable clause */ diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c index a436b53806..01335db511 100644 --- a/src/backend/optimizer/plan/initsplan.c +++ b/src/backend/optimizer/plan/initsplan.c @@ -1775,6 +1775,11 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause, * attach quals to the lowest level where they can be evaluated. But * if we were ever to re-introduce a mechanism for delaying evaluation * of "expensive" quals, this area would need work. + * + * Note: generally, use of is_pushed_down has to go through the macro + * RINFO_IS_PUSHED_DOWN, because that flag alone is not always sufficient + * to tell whether a clause must be treated as pushed-down in context. + * This seems like another reason why it should perhaps be rethought. *---------- */ if (is_deduced) diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index fc19f892c5..82b78420e7 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -1629,7 +1629,8 @@ build_joinrel_partition_info(RelOptInfo *joinrel, RelOptInfo *outer_rel, */ if (!IS_PARTITIONED_REL(outer_rel) || !IS_PARTITIONED_REL(inner_rel) || outer_rel->part_scheme != inner_rel->part_scheme || - !have_partkey_equi_join(outer_rel, inner_rel, jointype, restrictlist)) + !have_partkey_equi_join(joinrel, outer_rel, inner_rel, + jointype, restrictlist)) { Assert(!IS_PARTITIONED_REL(joinrel)); return; diff --git a/src/backend/optimizer/util/restrictinfo.c b/src/backend/optimizer/util/restrictinfo.c index 65c1abcfe1..edf5a4807f 100644 --- a/src/backend/optimizer/util/restrictinfo.c +++ b/src/backend/optimizer/util/restrictinfo.c @@ -373,7 +373,7 @@ extract_actual_clauses(List *restrictinfo_list, * extract_actual_join_clauses * * Extract bare clauses from 'restrictinfo_list', separating those that - * syntactically match the join level from those that were pushed down. + * semantically match the join level from those that were pushed down. * Pseudoconstant clauses are excluded from the results. * * This is only used at outer joins, since for plain joins we don't care @@ -394,15 +394,7 @@ extract_actual_join_clauses(List *restrictinfo_list, { RestrictInfo *rinfo = lfirst_node(RestrictInfo, l); - /* - * We must check both is_pushed_down and required_relids, since an - * outer-join clause that's been pushed down to some lower join level - * via path parameterization will not be marked is_pushed_down; - * nonetheless, it must be treated as a filter clause not a join - * clause so far as the lower join level is concerned. - */ - if (rinfo->is_pushed_down || - !bms_is_subset(rinfo->required_relids, joinrelids)) + if (RINFO_IS_PUSHED_DOWN(rinfo, joinrelids)) { if (!rinfo->pseudoconstant) *otherquals = lappend(*otherquals, rinfo->clause); diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index d6dbf15a4b..2108b6ab1d 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -1789,7 +1789,8 @@ typedef struct LimitPath * if we decide that it can be pushed down into the nullable side of the join. * In that case it acts as a plain filter qual for wherever it gets evaluated. * (In short, is_pushed_down is only false for non-degenerate outer join - * conditions. Possibly we should rename it to reflect that meaning?) + * conditions. Possibly we should rename it to reflect that meaning? But + * see also the comments for RINFO_IS_PUSHED_DOWN, below.) * * RestrictInfo nodes also contain an outerjoin_delayed flag, which is true * if the clause's applicability must be delayed due to any outer joins @@ -1931,6 +1932,20 @@ typedef struct RestrictInfo Selectivity right_mcvfreq; /* right side's most common val's freq */ } RestrictInfo; +/* + * This macro embodies the correct way to test whether a RestrictInfo is + * "pushed down" to a given outer join, that is, should be treated as a filter + * clause rather than a join clause at that outer join. This is certainly so + * if is_pushed_down is true; but examining that is not sufficient anymore, + * because outer-join clauses will get pushed down to lower outer joins when + * we generate a path for the lower outer join that is parameterized by the + * LHS of the upper one. We can detect such a clause by noting that its + * required_relids exceed the scope of the join. + */ +#define RINFO_IS_PUSHED_DOWN(rinfo, joinrelids) \ + ((rinfo)->is_pushed_down || \ + !bms_is_subset((rinfo)->required_relids, joinrelids)) + /* * Since mergejoinscansel() is a relatively expensive function, and would * otherwise be invoked many times while planning a large join tree, diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 50e180c554..f181586a53 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -115,7 +115,8 @@ extern bool have_join_order_restriction(PlannerInfo *root, extern bool have_dangerous_phv(PlannerInfo *root, Relids outer_relids, Relids inner_params); extern void mark_dummy_rel(RelOptInfo *rel); -extern bool have_partkey_equi_join(RelOptInfo *rel1, RelOptInfo *rel2, +extern bool have_partkey_equi_join(RelOptInfo *joinrel, + RelOptInfo *rel1, RelOptInfo *rel2, JoinType jointype, List *restrictlist); /* -- 2.40.0