From c0aa210f6ebab06ca3933c735c7c6d2b8bdd024e Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 22 Nov 2013 14:37:29 -0500 Subject: [PATCH] Flatten join alias Vars before pulling up targetlist items from a subquery. pullup_replace_vars()'s decisions about whether a pulled-up replacement expression needs to be wrapped in a PlaceHolderVar depend on the assumption that what looks like a Var behaves like a Var. However, if the Var is a join alias reference, later flattening of join aliases might replace the Var with something that's not a Var at all, and should have been wrapped. To fix, do a forcible pass of flatten_join_alias_vars() on the subquery targetlist before we start to copy items out of it. We'll re-run that processing on the pulled-up expressions later, but that's harmless. Per report from Ken Tanzer; the added regression test case is based on his example. This bug has been there since the PlaceHolderVar mechanism was invented, but has escaped detection because the circumstances that trigger it are fairly narrow. You need a flattenable query underneath an outer join, which contains another flattenable query inside a join of its own, with a dangerous expression (a constant or something else non-strict) in that one's targetlist. Having seen this, I'm wondering if it wouldn't be prudent to do all alias-variable flattening earlier, perhaps even in the rewriter. But that would probably not be a back-patchable change. --- src/backend/optimizer/prep/prepjointree.c | 12 ++++++ src/test/regress/expected/join.out | 52 +++++++++++++++++++++++ src/test/regress/sql/join.sql | 31 ++++++++++++++ 3 files changed, 95 insertions(+) diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c index 40627a3693..ba16e0b8fe 100644 --- a/src/backend/optimizer/prep/prepjointree.c +++ b/src/backend/optimizer/prep/prepjointree.c @@ -818,6 +818,18 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte, return jtnode; } + /* + * We must flatten any join alias Vars in the subquery's targetlist, + * because pulling up the subquery's subqueries might have changed their + * expansions into arbitrary expressions, which could affect + * pullup_replace_vars' decisions about whether PlaceHolderVar wrappers + * are needed for tlist entries. (Likely it'd be better to do + * flatten_join_alias_vars on the whole query tree at some earlier stage, + * maybe even in the rewriter; but for now let's just fix this case here.) + */ + subquery->targetList = (List *) + flatten_join_alias_vars(subroot, (Node *) subquery->targetList); + /* * Adjust level-0 varnos in subquery so that we can append its rangetable * to upper query's. We have to fix the subquery's append_rel_list as diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 9a60ccf4ed..8ea15e8627 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -2934,6 +2934,58 @@ select a.unique1, b.unique1, c.unique1, coalesce(b.twothousand, a.twothousand) ---------+---------+---------+---------- (0 rows) +-- +-- check handling of join aliases when flattening multiple levels of subquery +-- +explain (verbose, costs off) +select foo1.join_key as foo1_id, foo3.join_key AS foo3_id, bug_field from + (values (0),(1)) foo1(join_key) +left join + (select join_key, bug_field from + (select ss1.join_key, ss1.bug_field from + (select f1 as join_key, 666 as bug_field from int4_tbl i1) ss1 + ) foo2 + left join + (select unique2 as join_key from tenk1 i2) ss2 + using (join_key) + ) foo3 +using (join_key); + QUERY PLAN +-------------------------------------------------------------------------- + Nested Loop Left Join + Output: "*VALUES*".column1, i1.f1, (666) + Join Filter: ("*VALUES*".column1 = i1.f1) + -> Values Scan on "*VALUES*" + Output: "*VALUES*".column1 + -> Materialize + Output: i1.f1, (666) + -> Nested Loop Left Join + Output: i1.f1, 666 + -> Seq Scan on public.int4_tbl i1 + Output: i1.f1 + -> Index Only Scan using tenk1_unique2 on public.tenk1 i2 + Output: i2.unique2 + Index Cond: (i2.unique2 = i1.f1) +(14 rows) + +select foo1.join_key as foo1_id, foo3.join_key AS foo3_id, bug_field from + (values (0),(1)) foo1(join_key) +left join + (select join_key, bug_field from + (select ss1.join_key, ss1.bug_field from + (select f1 as join_key, 666 as bug_field from int4_tbl i1) ss1 + ) foo2 + left join + (select unique2 as join_key from tenk1 i2) ss2 + using (join_key) + ) foo3 +using (join_key); + foo1_id | foo3_id | bug_field +---------+---------+----------- + 0 | 0 | 666 + 1 | | +(2 rows) + -- -- test ability to push constants through outer join clauses -- diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index eb1a2cd228..1868b6d8e2 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -801,6 +801,37 @@ select a.unique1, b.unique1, c.unique1, coalesce(b.twothousand, a.twothousand) from tenk1 a left join tenk1 b on b.thousand = a.unique1 left join tenk1 c on c.unique2 = coalesce(b.twothousand, a.twothousand) where a.unique2 = 5530 and coalesce(b.twothousand, a.twothousand) = 44; +-- +-- check handling of join aliases when flattening multiple levels of subquery +-- + +explain (verbose, costs off) +select foo1.join_key as foo1_id, foo3.join_key AS foo3_id, bug_field from + (values (0),(1)) foo1(join_key) +left join + (select join_key, bug_field from + (select ss1.join_key, ss1.bug_field from + (select f1 as join_key, 666 as bug_field from int4_tbl i1) ss1 + ) foo2 + left join + (select unique2 as join_key from tenk1 i2) ss2 + using (join_key) + ) foo3 +using (join_key); + +select foo1.join_key as foo1_id, foo3.join_key AS foo3_id, bug_field from + (values (0),(1)) foo1(join_key) +left join + (select join_key, bug_field from + (select ss1.join_key, ss1.bug_field from + (select f1 as join_key, 666 as bug_field from int4_tbl i1) ss1 + ) foo2 + left join + (select unique2 as join_key from tenk1 i2) ss2 + using (join_key) + ) foo3 +using (join_key); + -- -- test ability to push constants through outer join clauses -- -- 2.40.0