From 6c5f68ea1a836f8c5cf9711f24d285067e67f23f Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 14 Aug 2013 18:38:36 -0400 Subject: [PATCH] Remove ph_may_need from PlaceHolderInfo, with attendant simplifications. The planner logic that attempted to make a preliminary estimate of the ph_needed levels for PlaceHolderVars seems to be completely broken by lateral references. Fortunately, the potential join order optimization that this code supported seems to be of relatively little value in practice; so let's just get rid of it rather than trying to fix it. Getting rid of this allows fairly substantial simplifications in placeholder.c, too, so planning in such cases should be a bit faster. Issue noted while pursuing bugs reported by Jeremy Evans and Antonin Houska, though this doesn't in itself fix either of their reported cases. What this does do is prevent an Assert crash in the kind of query illustrated by the added regression test. (I'm not sure that the plan for that query is stable enough across platforms to be usable as a regression test output ... but we'll soon find out from the buildfarm.) Back-patch to 9.3. The problem case can't arise without LATERAL, so no need to touch older branches. --- src/backend/nodes/copyfuncs.c | 1 - src/backend/nodes/equalfuncs.c | 1 - src/backend/nodes/outfuncs.c | 1 - src/backend/optimizer/plan/analyzejoins.c | 2 - src/backend/optimizer/plan/initsplan.c | 36 ++----- src/backend/optimizer/util/placeholder.c | 114 +++++----------------- src/include/nodes/relation.h | 16 +-- src/include/optimizer/placeholder.h | 2 - src/test/regress/expected/join.out | 52 ++++++++++ src/test/regress/sql/join.sql | 13 +++ 10 files changed, 103 insertions(+), 135 deletions(-) diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index b5b8d63cff..525e3fa2d1 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -1953,7 +1953,6 @@ _copyPlaceHolderInfo(const PlaceHolderInfo *from) COPY_NODE_FIELD(ph_var); COPY_BITMAPSET_FIELD(ph_eval_at); COPY_BITMAPSET_FIELD(ph_needed); - COPY_BITMAPSET_FIELD(ph_may_need); COPY_SCALAR_FIELD(ph_width); return newnode; diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index 3f96595e8e..379cbc0c20 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -820,7 +820,6 @@ _equalPlaceHolderInfo(const PlaceHolderInfo *a, const PlaceHolderInfo *b) COMPARE_NODE_FIELD(ph_var); COMPARE_BITMAPSET_FIELD(ph_eval_at); COMPARE_BITMAPSET_FIELD(ph_needed); - COMPARE_BITMAPSET_FIELD(ph_may_need); COMPARE_SCALAR_FIELD(ph_width); return true; diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index b2183f4213..67aeb7e65c 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -1935,7 +1935,6 @@ _outPlaceHolderInfo(StringInfo str, const PlaceHolderInfo *node) WRITE_NODE_FIELD(ph_var); WRITE_BITMAPSET_FIELD(ph_eval_at); WRITE_BITMAPSET_FIELD(ph_needed); - WRITE_BITMAPSET_FIELD(ph_may_need); WRITE_INT_FIELD(ph_width); } diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c index a7db69c85b..daab355b1d 100644 --- a/src/backend/optimizer/plan/analyzejoins.c +++ b/src/backend/optimizer/plan/analyzejoins.c @@ -388,8 +388,6 @@ remove_rel_from_query(PlannerInfo *root, int relid, Relids joinrelids) phinfo->ph_eval_at = bms_add_member(phinfo->ph_eval_at, relid); phinfo->ph_needed = bms_del_member(phinfo->ph_needed, relid); - /* ph_may_need probably isn't used after this, but fix it anyway */ - phinfo->ph_may_need = bms_del_member(phinfo->ph_may_need, relid); } /* diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c index 8efb94b44d..07c4dddd24 100644 --- a/src/backend/optimizer/plan/initsplan.c +++ b/src/backend/optimizer/plan/initsplan.c @@ -155,10 +155,9 @@ build_base_rel_tlists(PlannerInfo *root, List *final_tlist) * The list may also contain PlaceHolderVars. These don't necessarily * have a single owning relation; we keep their attr_needed info in * root->placeholder_list instead. If create_new_ph is true, it's OK - * to create new PlaceHolderInfos, and we also have to update ph_may_need; - * otherwise, the PlaceHolderInfos must already exist, and we should only - * update their ph_needed. (It should be true before deconstruct_jointree - * begins, and false after that.) + * to create new PlaceHolderInfos; otherwise, the PlaceHolderInfos must + * already exist, and we should only update their ph_needed. (This should + * be true before deconstruct_jointree begins, and false after that.) */ void add_vars_to_targetlist(PlannerInfo *root, List *vars, @@ -196,20 +195,8 @@ add_vars_to_targetlist(PlannerInfo *root, List *vars, PlaceHolderInfo *phinfo = find_placeholder_info(root, phv, create_new_ph); - /* Always adjust ph_needed */ phinfo->ph_needed = bms_add_members(phinfo->ph_needed, where_needed); - - /* - * If we are creating PlaceHolderInfos, mark them with the correct - * maybe-needed locations. Otherwise, it's too late to change - * that, so we'd better not have set ph_needed to more than - * ph_may_need. - */ - if (create_new_ph) - mark_placeholder_maybe_needed(root, phinfo, where_needed); - else - Assert(bms_is_subset(phinfo->ph_needed, phinfo->ph_may_need)); } else elog(ERROR, "unrecognized node type: %d", (int) nodeTag(node)); @@ -235,7 +222,7 @@ add_vars_to_targetlist(PlannerInfo *root, List *vars, * means setting suitable where_needed values for them. * * This has to run before deconstruct_jointree, since it might result in - * creation of PlaceHolderInfos or extension of their ph_may_need sets. + * creation of PlaceHolderInfos. */ void find_lateral_references(PlannerInfo *root) @@ -1005,11 +992,11 @@ make_outerjoininfo(PlannerInfo *root, /* * Examine PlaceHolderVars. If a PHV is supposed to be evaluated within - * this join's nullable side, and it may get used above this join, then - * ensure that min_righthand contains the full eval_at set of the PHV. - * This ensures that the PHV actually can be evaluated within the RHS. - * Note that this works only because we should already have determined the - * final eval_at level for any PHV syntactically within this join. + * this join's nullable side, then ensure that min_righthand contains the + * full eval_at set of the PHV. This ensures that the PHV actually can be + * evaluated within the RHS. Note that this works only because we should + * already have determined the final eval_at level for any PHV + * syntactically within this join. */ foreach(l, root->placeholder_list) { @@ -1020,11 +1007,6 @@ make_outerjoininfo(PlannerInfo *root, if (!bms_is_subset(ph_syn_level, right_rels)) continue; - /* We can also ignore it if it's certainly not used above this join */ - /* XXX this test is probably overly conservative */ - if (bms_is_subset(phinfo->ph_may_need, min_righthand)) - continue; - /* Else, prevent join from being formed before we eval the PHV */ min_righthand = bms_add_members(min_righthand, phinfo->ph_eval_at); } diff --git a/src/backend/optimizer/util/placeholder.c b/src/backend/optimizer/util/placeholder.c index c2ff2229e2..da92497689 100644 --- a/src/backend/optimizer/util/placeholder.c +++ b/src/backend/optimizer/util/placeholder.c @@ -23,9 +23,8 @@ #include "utils/lsyscache.h" /* Local functions */ -static Relids find_placeholders_recurse(PlannerInfo *root, Node *jtnode); -static void mark_placeholders_in_expr(PlannerInfo *root, Node *expr, - Relids relids); +static void find_placeholders_recurse(PlannerInfo *root, Node *jtnode); +static void find_placeholders_in_expr(PlannerInfo *root, Node *expr); /* @@ -90,16 +89,23 @@ find_placeholder_info(PlannerInfo *root, PlaceHolderVar *phv, phinfo->phid = phv->phid; phinfo->ph_var = copyObject(phv); + /* initialize ph_eval_at as the set of contained relids */ phinfo->ph_eval_at = pull_varnos((Node *) phv); /* ph_eval_at may change later, see update_placeholder_eval_levels */ phinfo->ph_needed = NULL; /* initially it's unused */ - phinfo->ph_may_need = NULL; /* for the moment, estimate width using just the datatype info */ phinfo->ph_width = get_typavgwidth(exprType((Node *) phv->phexpr), exprTypmod((Node *) phv->phexpr)); root->placeholder_list = lappend(root->placeholder_list, phinfo); + /* + * The PHV's contained expression may contain other, lower-level PHVs. We + * now know we need to get those into the PlaceHolderInfo list, too, so we + * may as well do that immediately. + */ + find_placeholders_in_expr(root, (Node *) phinfo->ph_var->phexpr); + return phinfo; } @@ -119,7 +125,7 @@ find_placeholders_in_jointree(PlannerInfo *root) /* Start recursion at top of jointree */ Assert(root->parse->jointree != NULL && IsA(root->parse->jointree, FromExpr)); - (void) find_placeholders_recurse(root, (Node *) root->parse->jointree); + find_placeholders_recurse(root, (Node *) root->parse->jointree); } } @@ -128,23 +134,15 @@ find_placeholders_in_jointree(PlannerInfo *root) * One recursion level of find_placeholders_in_jointree. * * jtnode is the current jointree node to examine. - * - * The result is the set of base Relids contained in or below jtnode. - * This is just an internal convenience, it's not used at the top level. */ -static Relids +static void find_placeholders_recurse(PlannerInfo *root, Node *jtnode) { - Relids jtrelids; - if (jtnode == NULL) - return NULL; + return; if (IsA(jtnode, RangeTblRef)) { - int varno = ((RangeTblRef *) jtnode)->rtindex; - - /* No quals to deal with, just return correct result */ - jtrelids = bms_make_singleton(varno); + /* No quals to deal with here */ } else if (IsA(jtnode, FromExpr)) { @@ -152,57 +150,43 @@ find_placeholders_recurse(PlannerInfo *root, Node *jtnode) ListCell *l; /* - * First, recurse to handle child joins, and form their relid set. + * First, recurse to handle child joins. */ - jtrelids = NULL; foreach(l, f->fromlist) { - Relids sub_relids; - - sub_relids = find_placeholders_recurse(root, lfirst(l)); - jtrelids = bms_join(jtrelids, sub_relids); + find_placeholders_recurse(root, lfirst(l)); } /* * Now process the top-level quals. */ - mark_placeholders_in_expr(root, f->quals, jtrelids); + find_placeholders_in_expr(root, f->quals); } else if (IsA(jtnode, JoinExpr)) { JoinExpr *j = (JoinExpr *) jtnode; - Relids leftids, - rightids; /* - * First, recurse to handle child joins, and form their relid set. + * First, recurse to handle child joins. */ - leftids = find_placeholders_recurse(root, j->larg); - rightids = find_placeholders_recurse(root, j->rarg); - jtrelids = bms_join(leftids, rightids); + find_placeholders_recurse(root, j->larg); + find_placeholders_recurse(root, j->rarg); /* Process the qual clauses */ - mark_placeholders_in_expr(root, j->quals, jtrelids); + find_placeholders_in_expr(root, j->quals); } else - { elog(ERROR, "unrecognized node type: %d", (int) nodeTag(jtnode)); - jtrelids = NULL; /* keep compiler quiet */ - } - return jtrelids; } /* - * mark_placeholders_in_expr - * Find all PlaceHolderVars in the given expression, and mark them - * as possibly needed at the specified join level. - * - * relids is the syntactic join level to mark as the "maybe needed" level - * for each PlaceHolderVar found in the expression. + * find_placeholders_in_expr + * Find all PlaceHolderVars in the given expression, and create + * PlaceHolderInfo entries for them. */ static void -mark_placeholders_in_expr(PlannerInfo *root, Node *expr, Relids relids) +find_placeholders_in_expr(PlannerInfo *root, Node *expr) { List *vars; ListCell *vl; @@ -217,63 +201,17 @@ mark_placeholders_in_expr(PlannerInfo *root, Node *expr, Relids relids) foreach(vl, vars) { PlaceHolderVar *phv = (PlaceHolderVar *) lfirst(vl); - PlaceHolderInfo *phinfo; /* Ignore any plain Vars */ if (!IsA(phv, PlaceHolderVar)) continue; /* Create a PlaceHolderInfo entry if there's not one already */ - phinfo = find_placeholder_info(root, phv, true); - - /* Mark it, and recursively process any contained placeholders */ - mark_placeholder_maybe_needed(root, phinfo, relids); + (void) find_placeholder_info(root, phv, true); } list_free(vars); } -/* - * mark_placeholder_maybe_needed - * Mark a placeholder as possibly needed at the specified join level. - * - * relids is the syntactic join level to mark as the "maybe needed" level - * for the placeholder. - * - * This is called during an initial scan of the query's targetlist and quals - * before we begin deconstruct_jointree. Once we begin deconstruct_jointree, - * all active placeholders must be present in root->placeholder_list with - * their correct ph_may_need values, because make_outerjoininfo and - * update_placeholder_eval_levels require this info to be available while - * we crawl up the join tree. - */ -void -mark_placeholder_maybe_needed(PlannerInfo *root, PlaceHolderInfo *phinfo, - Relids relids) -{ - Relids est_eval_level; - - /* Mark the PHV as possibly needed at the given syntactic level */ - phinfo->ph_may_need = bms_add_members(phinfo->ph_may_need, relids); - - /* - * This is a bit tricky: the PHV's contained expression may contain other, - * lower-level PHVs. We need to get those into the PlaceHolderInfo list, - * but they aren't going to be needed where the outer PHV is referenced. - * Rather, they'll be needed where the outer PHV is evaluated. We can - * estimate that conservatively as the syntactic location of the PHV's - * expression, but not less than the level of any Vars it contains. - * (Normally the Vars would come from below the syntactic location anyway, - * but this might not be true if the PHV contains any LATERAL references.) - */ - est_eval_level = bms_union(phinfo->ph_var->phrels, phinfo->ph_eval_at); - - /* Now recurse to take care of any such PHVs */ - mark_placeholders_in_expr(root, (Node *) phinfo->ph_var->phexpr, - est_eval_level); - - bms_free(est_eval_level); -} - /* * update_placeholder_eval_levels * Adjust the target evaluation levels for placeholders diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index c0a636b9d7..b611e0b905 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -1465,18 +1465,9 @@ typedef struct AppendRelInfo * then allow it to bubble up like a Var until the ph_needed join level. * ph_needed has the same definition as attr_needed for a regular Var. * - * ph_may_need is an initial estimate of ph_needed, formed using the - * syntactic locations of references to the PHV. We need this in order to - * determine whether the PHV reference forces a join ordering constraint: - * if the PHV has to be evaluated below the nullable side of an outer join, - * and then used above that outer join, we must constrain join order to ensure - * there's a valid place to evaluate the PHV below the join. The final - * actual ph_needed level might be lower than ph_may_need, but we can't - * determine that until later on. Fortunately this doesn't matter for what - * we need ph_may_need for: if there's a PHV reference syntactically - * above the outer join, it's not going to be allowed to drop below the outer - * join, so we would come to the same conclusions about join order even if - * we had the final ph_needed value to compare to. + * Notice that when ph_eval_at is a join rather than a single baserel, the + * PlaceHolderInfo may create constraints on join order: the ph_eval_at join + * has to be formed below any outer joins that should null the PlaceHolderVar. * * We create a PlaceHolderInfo only after determining that the PlaceHolderVar * is actually referenced in the plan tree, so that unreferenced placeholders @@ -1491,7 +1482,6 @@ typedef struct PlaceHolderInfo PlaceHolderVar *ph_var; /* copy of PlaceHolderVar tree */ Relids ph_eval_at; /* lowest level we can evaluate value at */ Relids ph_needed; /* highest level the value is needed at */ - Relids ph_may_need; /* highest level it might be needed at */ int32 ph_width; /* estimated attribute width */ } PlaceHolderInfo; diff --git a/src/include/optimizer/placeholder.h b/src/include/optimizer/placeholder.h index c99ab0f610..a45b17d64c 100644 --- a/src/include/optimizer/placeholder.h +++ b/src/include/optimizer/placeholder.h @@ -22,8 +22,6 @@ extern PlaceHolderVar *make_placeholder_expr(PlannerInfo *root, Expr *expr, extern PlaceHolderInfo *find_placeholder_info(PlannerInfo *root, PlaceHolderVar *phv, bool create_new_ph); extern void find_placeholders_in_jointree(PlannerInfo *root); -extern void mark_placeholder_maybe_needed(PlannerInfo *root, - PlaceHolderInfo *phinfo, Relids relids); extern void update_placeholder_eval_levels(PlannerInfo *root, SpecialJoinInfo *new_sjinfo); extern void fix_placeholder_input_needed_levels(PlannerInfo *root); diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 31c2a320a6..814ddd8046 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -3617,6 +3617,58 @@ select * from Output: (COALESCE((COALESCE(b.q2, 42::bigint)), d.q2)) (26 rows) +-- case that breaks the old ph_may_need optimization +explain (verbose, costs off) +select c.*,a.*,ss1.q1,ss2.q1,ss3.* from + int8_tbl c left join ( + int8_tbl a left join + (select q1, coalesce(q2,f1) as x from int8_tbl b, int4_tbl b2 + where q1 < f1) ss1 + on a.q2 = ss1.q1 + cross join + lateral (select q1, coalesce(ss1.x,q2) as y from int8_tbl d) ss2 + ) on c.q2 = ss2.q1, + lateral (select * from int4_tbl i where ss2.y > f1) ss3; + QUERY PLAN +------------------------------------------------------------------------------------------- + Hash Right Join + Output: c.q1, c.q2, a.q1, a.q2, b.q1, d.q1, i.f1 + Hash Cond: (d.q1 = c.q2) + Filter: ((COALESCE((COALESCE(b.q2, (b2.f1)::bigint)), d.q2)) > i.f1) + -> Nested Loop + Output: a.q1, a.q2, b.q1, d.q1, COALESCE((COALESCE(b.q2, (b2.f1)::bigint)), d.q2) + -> Hash Right Join + Output: a.q1, a.q2, b.q1, (COALESCE(b.q2, (b2.f1)::bigint)) + Hash Cond: (b.q1 = a.q2) + -> Nested Loop + Output: b.q1, COALESCE(b.q2, (b2.f1)::bigint) + Join Filter: (b.q1 < b2.f1) + -> Seq Scan on public.int8_tbl b + Output: b.q1, b.q2 + -> Materialize + Output: b2.f1 + -> Seq Scan on public.int4_tbl b2 + Output: b2.f1 + -> Hash + Output: a.q1, a.q2 + -> Seq Scan on public.int8_tbl a + Output: a.q1, a.q2 + -> Materialize + Output: d.q1, d.q2 + -> Seq Scan on public.int8_tbl d + Output: d.q1, d.q2 + -> Hash + Output: c.q1, c.q2, i.f1 + -> Nested Loop + Output: c.q1, c.q2, i.f1 + -> Seq Scan on public.int8_tbl c + Output: c.q1, c.q2 + -> Materialize + Output: i.f1 + -> Seq Scan on public.int4_tbl i + Output: i.f1 +(36 rows) + -- test some error cases where LATERAL should have been used but wasn't select f1,g from int4_tbl a, (select f1 as g) ss; ERROR: column "f1" does not exist diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index 656766acd3..7ec2cbeea4 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -1006,6 +1006,19 @@ select * from ) on c.q2 = ss2.q1, lateral (select ss2.y) ss3; +-- case that breaks the old ph_may_need optimization +explain (verbose, costs off) +select c.*,a.*,ss1.q1,ss2.q1,ss3.* from + int8_tbl c left join ( + int8_tbl a left join + (select q1, coalesce(q2,f1) as x from int8_tbl b, int4_tbl b2 + where q1 < f1) ss1 + on a.q2 = ss1.q1 + cross join + lateral (select q1, coalesce(ss1.x,q2) as y from int8_tbl d) ss2 + ) on c.q2 = ss2.q1, + lateral (select * from int4_tbl i where ss2.y > f1) ss3; + -- test some error cases where LATERAL should have been used but wasn't select f1,g from int4_tbl a, (select f1 as g) ss; select f1,g from int4_tbl a, (select a.f1 as g) ss; -- 2.40.0