Further hacking on ruleutils' new column-alias-assignment code.

author Tom Lane <tgl@sss.pgh.pa.us>

Tue, 23 Jul 2013 21:54:24 +0000 (17:54 -0400)

committer Tom Lane <tgl@sss.pgh.pa.us>

Tue, 23 Jul 2013 21:55:23 +0000 (17:55 -0400)
author Tom Lane <tgl@sss.pgh.pa.us>
Tue, 23 Jul 2013 21:54:24 +0000 (17:54 -0400)
committer Tom Lane <tgl@sss.pgh.pa.us>
Tue, 23 Jul 2013 21:55:23 +0000 (17:55 -0400)
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c

index 628b83a67f6da4cad394cde6a52ec53b8da04862..a7de92f3066942a1a22ffca4bf3c3e67c327577d 100644 (file)
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -164,10 +164,10 @@ typedef struct
   * since they just inherit column names from their input RTEs, and we can't
   * rename the columns at the join level.  Most of the time this isn't an issue
   * because we don't need to reference the join's output columns as such; we
- * can reference the input columns instead.  That approach fails for merged
- * FULL JOIN USING columns, however, so when we have one of those in an
- * unnamed join, we have to make that column's alias globally unique across
- * the whole query to ensure it can be referenced unambiguously.
+ * can reference the input columns instead.  That approach can fail for merged
+ * JOIN USING columns, however, so when we have one of those in an unnamed
+ * join, we have to make that column's alias globally unique across the whole
+ * query to ensure it can be referenced unambiguously.
   *
   * Another problem is that a JOIN USING clause requires the columns to be
   * merged to have the same aliases in both input RTEs. To handle that, we do
@@ -301,7 +301,7 @@ static bool refname_is_unique(char *refname, deparse_namespace *dpns,
  static void set_deparse_for_query(deparse_namespace *dpns, Query *query,
                                           List *parent_namespaces);
  static void set_simple_column_names(deparse_namespace *dpns);
-static bool has_unnamed_full_join_using(Node *jtnode);
+static bool has_dangerous_join_using(deparse_namespace *dpns, Node *jtnode);
  static void set_using_names(deparse_namespace *dpns, Node *jtnode);
  static void set_relation_column_names(deparse_namespace *dpns,
                                                   RangeTblEntry *rte,
@@ -2570,7 +2570,7 @@ set_deparse_for_query(deparse_namespace *dpns, Query *query,
         {
                 /* Detect whether global uniqueness of USING names is needed */
                 dpns->unique_using =
-                       has_unnamed_full_join_using((Node *) query->jointree);
+                       has_dangerous_join_using(dpns, (Node *) query->jointree);
  
                 /*
                  * Select names for columns merged by USING, via a recursive pass over
@@ -2630,25 +2630,26 @@ set_simple_column_names(deparse_namespace *dpns)
  }
  
  /*
- * has_unnamed_full_join_using: search jointree for unnamed FULL JOIN USING
+ * has_dangerous_join_using: search jointree for unnamed JOIN USING
   *
- * Merged columns of a FULL JOIN USING act differently from either of the
- * input columns, so they have to be referenced as columns of the JOIN not
- * as columns of either input. And this is problematic if the join is
- * unnamed (alias-less): we cannot qualify the column's name with an RTE
- * name, since there is none.  (Forcibly assigning an alias to the join is
- * not a solution, since that will prevent legal references to tables below
- * the join.)  To ensure that every column in the query is unambiguously
- * referenceable, we must assign such merged columns names that are globally
- * unique across the whole query, aliasing other columns out of the way as
- * necessary.
+ * Merged columns of a JOIN USING may act differently from either of the input
+ * columns, either because they are merged with COALESCE (in a FULL JOIN) or
+ * because an implicit coercion of the underlying input column is required.
+ * In such a case the column must be referenced as a column of the JOIN not as
+ * a column of either input.  And this is problematic if the join is unnamed
+ * (alias-less): we cannot qualify the column's name with an RTE name, since
+ * there is none.  (Forcibly assigning an alias to the join is not a solution,
+ * since that will prevent legal references to tables below the join.)
+ * To ensure that every column in the query is unambiguously referenceable,
+ * we must assign such merged columns names that are globally unique across
+ * the whole query, aliasing other columns out of the way as necessary.
   *
   * Because the ensuing re-aliasing is fairly damaging to the readability of
   * the query, we don't do this unless we have to.  So, we must pre-scan
   * the join tree to see if we have to, before starting set_using_names().
   */
  static bool
-has_unnamed_full_join_using(Node *jtnode)
+has_dangerous_join_using(deparse_namespace *dpns, Node *jtnode)
  {
         if (IsA(jtnode, RangeTblRef))
         {
@@ -2661,7 +2662,7 @@ has_unnamed_full_join_using(Node *jtnode)
  
                 foreach(lc, f->fromlist)
                 {
-                       if (has_unnamed_full_join_using((Node *) lfirst(lc)))
+                       if (has_dangerous_join_using(dpns, (Node *) lfirst(lc)))
                                 return true;
                 }
         }
@@ -2669,16 +2670,30 @@ has_unnamed_full_join_using(Node *jtnode)
         {
                 JoinExpr   *j = (JoinExpr *) jtnode;
  
-               /* Is it an unnamed FULL JOIN with USING? */
-               if (j->alias == NULL &&
-                       j->jointype == JOIN_FULL &&
-                       j->usingClause)
-                       return true;
+               /* Is it an unnamed JOIN with USING? */
+               if (j->alias == NULL && j->usingClause)
+               {
+                       /*
+                        * Yes, so check each join alias var to see if any of them are not
+                        * simple references to underlying columns.  If so, we have a
+                        * dangerous situation and must pick unique aliases.
+                        */
+                       RangeTblEntry *jrte = rt_fetch(j->rtindex, dpns->rtable);
+                       ListCell   *lc;
+
+                       foreach(lc, jrte->joinaliasvars)
+                       {
+                               Var                *aliasvar = (Var *) lfirst(lc);
+
+                               if (aliasvar != NULL && !IsA(aliasvar, Var))
+                                       return true;
+                       }
+               }
  
                 /* Nope, but inspect children */
-               if (has_unnamed_full_join_using(j->larg))
+               if (has_dangerous_join_using(dpns, j->larg))
                         return true;
-               if (has_unnamed_full_join_using(j->rarg))
+               if (has_dangerous_join_using(dpns, j->rarg))
                         return true;
         }
         else
@@ -2768,16 +2783,16 @@ set_using_names(deparse_namespace *dpns, Node *jtnode)
                  *
                  * If dpns->unique_using is TRUE, we force all USING names to be
                  * unique across the whole query level.  In principle we'd only need
-                * the names of USING columns in unnamed full joins to be globally
-                * unique, but to safely assign all USING names in a single pass, we
-                * have to enforce the same uniqueness rule for all of them.  However,
-                * if a USING column's name has been pushed down from the parent, we
-                * should use it as-is rather than making a uniqueness adjustment.
-                * This is necessary when we're at an unnamed join, and it creates no
-                * risk of ambiguity.  Also, if there's a user-written output alias
-                * for a merged column, we prefer to use that rather than the input
-                * name; this simplifies the logic and seems likely to lead to less
-                * aliasing overall.
+                * the names of dangerous USING columns to be globally unique, but to
+                * safely assign all USING names in a single pass, we have to enforce
+                * the same uniqueness rule for all of them.  However, if a USING
+                * column's name has been pushed down from the parent, we should use
+                * it as-is rather than making a uniqueness adjustment.  This is
+                * necessary when we're at an unnamed join, and it creates no risk of
+                * ambiguity.  Also, if there's a user-written output alias for a
+                * merged column, we prefer to use that rather than the input name;
+                * this simplifies the logic and seems likely to lead to less aliasing
+                * overall.
                  *
                  * If dpns->unique_using is FALSE, we only need USING names to be
                  * unique within their own join RTE.  We still need to honor
@@ -5344,9 +5359,8 @@ get_variable(Var *var, int levelsup, bool istoplevel, deparse_context *context)
          * If it's a simple reference to one of the input vars, then recursively
          * print the name of that var instead.  When it's not a simple reference,
          * we have to just print the unqualified join column name.      (This can only
-        * happen with columns that were merged by USING or NATURAL clauses in a
-        * FULL JOIN; we took pains previously to make the unqualified column name
-        * unique in such cases.)
+        * happen with "dangerous" merged columns in a JOIN USING; we took pains
+        * previously to make the unqualified column name unique in such cases.)
          *
          * This wouldn't work in decompiling plan trees, because we don't store
          * joinaliasvars lists after planning; but a plan tree should never
diff --git a/src/test/regress/expected/create_view.out b/src/test/regress/expected/create_view.out

index 3ca8a16ec86b1005fc40f075ec0c3345e5e5d8fd..f1fdd50d6a4c9072855284d13208848c0ecb88c3 100644 (file)
--- a/src/test/regress/expected/create_view.out
+++ b/src/test/regress/expected/create_view.out
@@ -1243,6 +1243,34 @@ select pg_get_viewdef('vv4', true);
      FULL JOIN tt8 tt8y(x_1, z, z2) USING (x_1);
  (1 row)
  
+-- Implicit coercions in a JOIN USING create issues similar to FULL JOIN
+create table tt7a (x date, xx int, y int);
+alter table tt7a drop column xx;
+create table tt8a (x timestamptz, z int);
+create view vv2a as
+select * from (values(now(),2,3,now(),5)) v(a,b,c,d,e)
+union all
+select * from tt7a left join tt8a using (x), tt8a tt8ax;
+select pg_get_viewdef('vv2a', true);
+                         pg_get_viewdef                         
+----------------------------------------------------------------
+          SELECT v.a,                                          +
+             v.b,                                              +
+             v.c,                                              +
+             v.d,                                              +
+             v.e                                               +
+            FROM ( VALUES (now(),2,3,now(),5)) v(a, b, c, d, e)+
+ UNION ALL                                                     +
+          SELECT x AS a,                                       +
+             tt7a.y AS b,                                      +
+             tt8a.z AS c,                                      +
+             tt8ax.x_1 AS d,                                   +
+             tt8ax.z AS e                                      +
+            FROM tt7a                                          +
+       LEFT JOIN tt8a USING (x),                               +
+        tt8a tt8ax(x_1, z);
+(1 row)
+
  --
  -- Also check dropping a column that existed when the view was made
  --
diff --git a/src/test/regress/sql/create_view.sql b/src/test/regress/sql/create_view.sql

index 4fbd5a5e6f8baa3d41f1a201fbb99ad4bc3bf3da..234a4214b27f87fd8635122a5e72353369b35a6f 100644 (file)
--- a/src/test/regress/sql/create_view.sql
+++ b/src/test/regress/sql/create_view.sql
@@ -389,6 +389,19 @@ select pg_get_viewdef('vv2', true);
  select pg_get_viewdef('vv3', true);
  select pg_get_viewdef('vv4', true);
  
+-- Implicit coercions in a JOIN USING create issues similar to FULL JOIN
+
+create table tt7a (x date, xx int, y int);
+alter table tt7a drop column xx;
+create table tt8a (x timestamptz, z int);
+
+create view vv2a as
+select * from (values(now(),2,3,now(),5)) v(a,b,c,d,e)
+union all
+select * from tt7a left join tt8a using (x), tt8a tt8ax;
+
+select pg_get_viewdef('vv2a', true);
+
  --
  -- Also check dropping a column that existed when the view was made
  --
author	Tom Lane <tgl@sss.pgh.pa.us>
	Tue, 23 Jul 2013 21:54:24 +0000 (17:54 -0400)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Tue, 23 Jul 2013 21:55:23 +0000 (17:55 -0400)
src/backend/utils/adt/ruleutils.c		patch \| blob \| history
src/test/regress/expected/create_view.out		patch \| blob \| history
src/test/regress/sql/create_view.sql		patch \| blob \| history