From 7f5cfe914de49130d86b2e1e9636b7135e577ef1 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 23 Jul 2013 17:54:24 -0400 Subject: [PATCH] Further hacking on ruleutils' new column-alias-assignment code. After further thought about implicit coercions appearing in a joinaliasvars list, I realized that they represent an additional reason why we might need to reference the join output column directly instead of referencing an underlying column. Consider SELECT x FROM t1 LEFT JOIN t2 USING (x) where t1.x is of type date while t2.x is of type timestamptz. The merged output variable is of type timestamptz, but it won't go to null when t2 does, therefore neither t1.x nor t2.x is a valid substitute reference. The code in get_variable() actually gets this case right, since it knows it shouldn't look through a coercion, but we failed to ensure that the unqualified output column name would be globally unique. To fix, modify the code that trawls for a dangerous situation so that it actually scans through an unnamed join's joinaliasvars list to see if there are any non-simple-Var entries. --- src/backend/utils/adt/ruleutils.c | 92 +++++++++++++---------- src/test/regress/expected/create_view.out | 28 +++++++ src/test/regress/sql/create_view.sql | 13 ++++ 3 files changed, 94 insertions(+), 39 deletions(-) diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index 628b83a67f..a7de92f306 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -164,10 +164,10 @@ typedef struct * since they just inherit column names from their input RTEs, and we can't * rename the columns at the join level. Most of the time this isn't an issue * because we don't need to reference the join's output columns as such; we - * can reference the input columns instead. That approach fails for merged - * FULL JOIN USING columns, however, so when we have one of those in an - * unnamed join, we have to make that column's alias globally unique across - * the whole query to ensure it can be referenced unambiguously. + * can reference the input columns instead. That approach can fail for merged + * JOIN USING columns, however, so when we have one of those in an unnamed + * join, we have to make that column's alias globally unique across the whole + * query to ensure it can be referenced unambiguously. * * Another problem is that a JOIN USING clause requires the columns to be * merged to have the same aliases in both input RTEs. To handle that, we do @@ -301,7 +301,7 @@ static bool refname_is_unique(char *refname, deparse_namespace *dpns, static void set_deparse_for_query(deparse_namespace *dpns, Query *query, List *parent_namespaces); static void set_simple_column_names(deparse_namespace *dpns); -static bool has_unnamed_full_join_using(Node *jtnode); +static bool has_dangerous_join_using(deparse_namespace *dpns, Node *jtnode); static void set_using_names(deparse_namespace *dpns, Node *jtnode); static void set_relation_column_names(deparse_namespace *dpns, RangeTblEntry *rte, @@ -2570,7 +2570,7 @@ set_deparse_for_query(deparse_namespace *dpns, Query *query, { /* Detect whether global uniqueness of USING names is needed */ dpns->unique_using = - has_unnamed_full_join_using((Node *) query->jointree); + has_dangerous_join_using(dpns, (Node *) query->jointree); /* * Select names for columns merged by USING, via a recursive pass over @@ -2630,25 +2630,26 @@ set_simple_column_names(deparse_namespace *dpns) } /* - * has_unnamed_full_join_using: search jointree for unnamed FULL JOIN USING + * has_dangerous_join_using: search jointree for unnamed JOIN USING * - * Merged columns of a FULL JOIN USING act differently from either of the - * input columns, so they have to be referenced as columns of the JOIN not - * as columns of either input. And this is problematic if the join is - * unnamed (alias-less): we cannot qualify the column's name with an RTE - * name, since there is none. (Forcibly assigning an alias to the join is - * not a solution, since that will prevent legal references to tables below - * the join.) To ensure that every column in the query is unambiguously - * referenceable, we must assign such merged columns names that are globally - * unique across the whole query, aliasing other columns out of the way as - * necessary. + * Merged columns of a JOIN USING may act differently from either of the input + * columns, either because they are merged with COALESCE (in a FULL JOIN) or + * because an implicit coercion of the underlying input column is required. + * In such a case the column must be referenced as a column of the JOIN not as + * a column of either input. And this is problematic if the join is unnamed + * (alias-less): we cannot qualify the column's name with an RTE name, since + * there is none. (Forcibly assigning an alias to the join is not a solution, + * since that will prevent legal references to tables below the join.) + * To ensure that every column in the query is unambiguously referenceable, + * we must assign such merged columns names that are globally unique across + * the whole query, aliasing other columns out of the way as necessary. * * Because the ensuing re-aliasing is fairly damaging to the readability of * the query, we don't do this unless we have to. So, we must pre-scan * the join tree to see if we have to, before starting set_using_names(). */ static bool -has_unnamed_full_join_using(Node *jtnode) +has_dangerous_join_using(deparse_namespace *dpns, Node *jtnode) { if (IsA(jtnode, RangeTblRef)) { @@ -2661,7 +2662,7 @@ has_unnamed_full_join_using(Node *jtnode) foreach(lc, f->fromlist) { - if (has_unnamed_full_join_using((Node *) lfirst(lc))) + if (has_dangerous_join_using(dpns, (Node *) lfirst(lc))) return true; } } @@ -2669,16 +2670,30 @@ has_unnamed_full_join_using(Node *jtnode) { JoinExpr *j = (JoinExpr *) jtnode; - /* Is it an unnamed FULL JOIN with USING? */ - if (j->alias == NULL && - j->jointype == JOIN_FULL && - j->usingClause) - return true; + /* Is it an unnamed JOIN with USING? */ + if (j->alias == NULL && j->usingClause) + { + /* + * Yes, so check each join alias var to see if any of them are not + * simple references to underlying columns. If so, we have a + * dangerous situation and must pick unique aliases. + */ + RangeTblEntry *jrte = rt_fetch(j->rtindex, dpns->rtable); + ListCell *lc; + + foreach(lc, jrte->joinaliasvars) + { + Var *aliasvar = (Var *) lfirst(lc); + + if (aliasvar != NULL && !IsA(aliasvar, Var)) + return true; + } + } /* Nope, but inspect children */ - if (has_unnamed_full_join_using(j->larg)) + if (has_dangerous_join_using(dpns, j->larg)) return true; - if (has_unnamed_full_join_using(j->rarg)) + if (has_dangerous_join_using(dpns, j->rarg)) return true; } else @@ -2768,16 +2783,16 @@ set_using_names(deparse_namespace *dpns, Node *jtnode) * * If dpns->unique_using is TRUE, we force all USING names to be * unique across the whole query level. In principle we'd only need - * the names of USING columns in unnamed full joins to be globally - * unique, but to safely assign all USING names in a single pass, we - * have to enforce the same uniqueness rule for all of them. However, - * if a USING column's name has been pushed down from the parent, we - * should use it as-is rather than making a uniqueness adjustment. - * This is necessary when we're at an unnamed join, and it creates no - * risk of ambiguity. Also, if there's a user-written output alias - * for a merged column, we prefer to use that rather than the input - * name; this simplifies the logic and seems likely to lead to less - * aliasing overall. + * the names of dangerous USING columns to be globally unique, but to + * safely assign all USING names in a single pass, we have to enforce + * the same uniqueness rule for all of them. However, if a USING + * column's name has been pushed down from the parent, we should use + * it as-is rather than making a uniqueness adjustment. This is + * necessary when we're at an unnamed join, and it creates no risk of + * ambiguity. Also, if there's a user-written output alias for a + * merged column, we prefer to use that rather than the input name; + * this simplifies the logic and seems likely to lead to less aliasing + * overall. * * If dpns->unique_using is FALSE, we only need USING names to be * unique within their own join RTE. We still need to honor @@ -5344,9 +5359,8 @@ get_variable(Var *var, int levelsup, bool istoplevel, deparse_context *context) * If it's a simple reference to one of the input vars, then recursively * print the name of that var instead. When it's not a simple reference, * we have to just print the unqualified join column name. (This can only - * happen with columns that were merged by USING or NATURAL clauses in a - * FULL JOIN; we took pains previously to make the unqualified column name - * unique in such cases.) + * happen with "dangerous" merged columns in a JOIN USING; we took pains + * previously to make the unqualified column name unique in such cases.) * * This wouldn't work in decompiling plan trees, because we don't store * joinaliasvars lists after planning; but a plan tree should never diff --git a/src/test/regress/expected/create_view.out b/src/test/regress/expected/create_view.out index 3ca8a16ec8..f1fdd50d6a 100644 --- a/src/test/regress/expected/create_view.out +++ b/src/test/regress/expected/create_view.out @@ -1243,6 +1243,34 @@ select pg_get_viewdef('vv4', true); FULL JOIN tt8 tt8y(x_1, z, z2) USING (x_1); (1 row) +-- Implicit coercions in a JOIN USING create issues similar to FULL JOIN +create table tt7a (x date, xx int, y int); +alter table tt7a drop column xx; +create table tt8a (x timestamptz, z int); +create view vv2a as +select * from (values(now(),2,3,now(),5)) v(a,b,c,d,e) +union all +select * from tt7a left join tt8a using (x), tt8a tt8ax; +select pg_get_viewdef('vv2a', true); + pg_get_viewdef +---------------------------------------------------------------- + SELECT v.a, + + v.b, + + v.c, + + v.d, + + v.e + + FROM ( VALUES (now(),2,3,now(),5)) v(a, b, c, d, e)+ + UNION ALL + + SELECT x AS a, + + tt7a.y AS b, + + tt8a.z AS c, + + tt8ax.x_1 AS d, + + tt8ax.z AS e + + FROM tt7a + + LEFT JOIN tt8a USING (x), + + tt8a tt8ax(x_1, z); +(1 row) + -- -- Also check dropping a column that existed when the view was made -- diff --git a/src/test/regress/sql/create_view.sql b/src/test/regress/sql/create_view.sql index 4fbd5a5e6f..234a4214b2 100644 --- a/src/test/regress/sql/create_view.sql +++ b/src/test/regress/sql/create_view.sql @@ -389,6 +389,19 @@ select pg_get_viewdef('vv2', true); select pg_get_viewdef('vv3', true); select pg_get_viewdef('vv4', true); +-- Implicit coercions in a JOIN USING create issues similar to FULL JOIN + +create table tt7a (x date, xx int, y int); +alter table tt7a drop column xx; +create table tt8a (x timestamptz, z int); + +create view vv2a as +select * from (values(now(),2,3,now(),5)) v(a,b,c,d,e) +union all +select * from tt7a left join tt8a using (x), tt8a tt8ax; + +select pg_get_viewdef('vv2a', true); + -- -- Also check dropping a column that existed when the view was made -- -- 2.40.0