Prevent pushing down WHERE clauses into unsafe UNION/INTERSECT nests.

author Tom Lane <tgl@sss.pgh.pa.us>

Thu, 6 Jun 2013 03:44:02 +0000 (23:44 -0400)

committer Tom Lane <tgl@sss.pgh.pa.us>

Thu, 6 Jun 2013 03:45:11 +0000 (23:45 -0400)
author Tom Lane <tgl@sss.pgh.pa.us>
Thu, 6 Jun 2013 03:44:02 +0000 (23:44 -0400)
committer Tom Lane <tgl@sss.pgh.pa.us>
Thu, 6 Jun 2013 03:45:11 +0000 (23:45 -0400)
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c

index 742177f45707c657d4ffa7e915be896eded2b6dd..b84bbba68c9e51a3eed5b0913d8156bd93168712 100644 (file)
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -82,13 +82,14 @@ static void set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel,
                                            RangeTblEntry *rte);
  static RelOptInfo *make_rel_from_joinlist(PlannerInfo *root, List *joinlist);
  static bool subquery_is_pushdown_safe(Query *subquery, Query *topquery,
-                                                 bool *differentTypes);
+                                                 bool *unsafeColumns);
  static bool recurse_pushdown_safe(Node *setOp, Query *topquery,
-                                         bool *differentTypes);
+                                         bool *unsafeColumns);
+static void check_output_expressions(Query *subquery, bool *unsafeColumns);
  static void compare_tlist_datatypes(List *tlist, List *colTypes,
-                                               bool *differentTypes);
+                                               bool *unsafeColumns);
  static bool qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
-                                         bool *differentTypes);
+                                         bool *unsafeColumns);
  static void subquery_push_qual(Query *subquery,
                                    RangeTblEntry *rte, Index rti, Node *qual);
  static void recurse_push_qual(Node *setOp, Query *topquery,
@@ -1048,7 +1049,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
         Query      *parse = root->parse;
         Query      *subquery = rte->subquery;
         Relids          required_outer;
-       bool       *differentTypes;
+       bool       *unsafeColumns;
         double          tuple_fraction;
         PlannerInfo *subroot;
         List       *pathkeys;
@@ -1067,8 +1068,12 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
          */
         required_outer = rel->lateral_relids;
  
-       /* We need a workspace for keeping track of set-op type coercions */
-       differentTypes = (bool *)
+       /*
+        * We need a workspace for keeping track of unsafe-to-reference columns.
+        * unsafeColumns[i] is set TRUE if we've found that output column i of the
+        * subquery is unsafe to use in a pushed-down qual.
+        */
+       unsafeColumns = (bool *)
                 palloc0((list_length(subquery->targetList) + 1) * sizeof(bool));
  
         /*
@@ -1096,7 +1101,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
          * push down a pushable qual, because it'd result in a worse plan?
          */
         if (rel->baserestrictinfo != NIL &&
-               subquery_is_pushdown_safe(subquery, subquery, differentTypes))
+               subquery_is_pushdown_safe(subquery, subquery, unsafeColumns))
         {
                 /* OK to consider pushing down individual quals */
                 List       *upperrestrictlist = NIL;
@@ -1110,7 +1115,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
                         if (!rinfo->pseudoconstant &&
                                 (!rte->security_barrier ||
                                  !contain_leaky_functions(clause)) &&
-                               qual_is_pushdown_safe(subquery, rti, clause, differentTypes))
+                               qual_is_pushdown_safe(subquery, rti, clause, unsafeColumns))
                         {
                                 /* Push it down */
                                 subquery_push_qual(subquery, rte, rti, clause);
@@ -1124,7 +1129,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
                 rel->baserestrictinfo = upperrestrictlist;
         }
  
-       pfree(differentTypes);
+       pfree(unsafeColumns);
  
         /*
          * We can safely pass the outer tuple_fraction down to the subquery if the
@@ -1553,17 +1558,19 @@ standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels)
   * 3. If the subquery contains EXCEPT or EXCEPT ALL set ops we cannot push
   * quals into it, because that could change the results.
   *
- * 4. For subqueries using UNION/UNION ALL/INTERSECT/INTERSECT ALL, we can
- * push quals into each component query, but the quals can only reference
- * subquery columns that suffer no type coercions in the set operation.
- * Otherwise there are possible semantic gotchas.  So, we check the
- * component queries to see if any of them have different output types;
- * differentTypes[k] is set true if column k has different type in any
- * component.
+ * In addition, we make several checks on the subquery's output columns
+ * to see if it is safe to reference them in pushed-down quals.  If output
+ * column k is found to be unsafe to reference, we set unsafeColumns[k] to
+ * TRUE, but we don't reject the subquery overall since column k might
+ * not be referenced by some/all quals.  The unsafeColumns[] array will be
+ * consulted later by qual_is_pushdown_safe(). It's better to do it this
+ * way than to make the checks directly in qual_is_pushdown_safe(), because
+ * when the subquery involves set operations we have to check the output
+ * expressions in each arm of the set op.
   */
  static bool
  subquery_is_pushdown_safe(Query *subquery, Query *topquery,
-                                                 bool *differentTypes)
+                                                 bool *unsafeColumns)
  {
         SetOperationStmt *topop;
  
@@ -1575,13 +1582,22 @@ subquery_is_pushdown_safe(Query *subquery, Query *topquery,
         if (subquery->hasWindowFuncs)
                 return false;
  
+       /*
+        * If we're at a leaf query, check for unsafe expressions in its target
+        * list, and mark any unsafe ones in unsafeColumns[].  (Non-leaf nodes in
+        * setop trees have only simple Vars in their tlists, so no need to check
+        * them.)
+        */
+       if (subquery->setOperations == NULL)
+               check_output_expressions(subquery, unsafeColumns);
+
         /* Are we at top level, or looking at a setop component? */
         if (subquery == topquery)
         {
                 /* Top level, so check any component queries */
                 if (subquery->setOperations != NULL)
                         if (!recurse_pushdown_safe(subquery->setOperations, topquery,
-                                                                          differentTypes))
+                                                                          unsafeColumns))
                                 return false;
         }
         else
@@ -1594,7 +1610,7 @@ subquery_is_pushdown_safe(Query *subquery, Query *topquery,
                 Assert(topop && IsA(topop, SetOperationStmt));
                 compare_tlist_datatypes(subquery->targetList,
                                                                 topop->colTypes,
-                                                               differentTypes);
+                                                               unsafeColumns);
         }
         return true;
  }
@@ -1604,7 +1620,7 @@ subquery_is_pushdown_safe(Query *subquery, Query *topquery,
   */
  static bool
  recurse_pushdown_safe(Node *setOp, Query *topquery,
-                                         bool *differentTypes)
+                                         bool *unsafeColumns)
  {
         if (IsA(setOp, RangeTblRef))
         {
@@ -1613,19 +1629,19 @@ recurse_pushdown_safe(Node *setOp, Query *topquery,
                 Query      *subquery = rte->subquery;
  
                 Assert(subquery != NULL);
-               return subquery_is_pushdown_safe(subquery, topquery, differentTypes);
+               return subquery_is_pushdown_safe(subquery, topquery, unsafeColumns);
         }
         else if (IsA(setOp, SetOperationStmt))
         {
                 SetOperationStmt *op = (SetOperationStmt *) setOp;
  
-               /* EXCEPT is no good */
+               /* EXCEPT is no good (point 3 for subquery_is_pushdown_safe) */
                 if (op->op == SETOP_EXCEPT)
                         return false;
                 /* Else recurse */
-               if (!recurse_pushdown_safe(op->larg, topquery, differentTypes))
+               if (!recurse_pushdown_safe(op->larg, topquery, unsafeColumns))
                         return false;
-               if (!recurse_pushdown_safe(op->rarg, topquery, differentTypes))
+               if (!recurse_pushdown_safe(op->rarg, topquery, unsafeColumns))
                         return false;
         }
         else
@@ -1637,17 +1653,92 @@ recurse_pushdown_safe(Node *setOp, Query *topquery,
  }
  
  /*
- * Compare tlist's datatypes against the list of set-operation result types.
- * For any items that are different, mark the appropriate element of
- * differentTypes[] to show that this column will have type conversions.
+ * check_output_expressions - check subquery's output expressions for safety
+ *
+ * There are several cases in which it's unsafe to push down an upper-level
+ * qual if it references a particular output column of a subquery.     We check
+ * each output column of the subquery and set unsafeColumns[k] to TRUE if
+ * that column is unsafe for a pushed-down qual to reference.  The conditions
+ * checked here are:
+ *
+ * 1. We must not push down any quals that refer to subselect outputs that
+ * return sets, else we'd introduce functions-returning-sets into the
+ * subquery's WHERE/HAVING quals.
+ *
+ * 2. We must not push down any quals that refer to subselect outputs that
+ * contain volatile functions, for fear of introducing strange results due
+ * to multiple evaluation of a volatile function.
+ *
+ * 3. If the subquery uses DISTINCT ON, we must not push down any quals that
+ * refer to non-DISTINCT output columns, because that could change the set
+ * of rows returned.  (This condition is vacuous for DISTINCT, because then
+ * there are no non-DISTINCT output columns, so we needn't check.  But note
+ * we are assuming that the qual can't distinguish values that the DISTINCT
+ * operator sees as equal.     This is a bit shaky but we have no way to test
+ * for the case, and it's unlikely enough that we shouldn't refuse the
+ * optimization just because it could theoretically happen.)
+ */
+static void
+check_output_expressions(Query *subquery, bool *unsafeColumns)
+{
+       ListCell   *lc;
+
+       foreach(lc, subquery->targetList)
+       {
+               TargetEntry *tle = (TargetEntry *) lfirst(lc);
+
+               if (tle->resjunk)
+                       continue;                       /* ignore resjunk columns */
+
+               /* We need not check further if output col is already known unsafe */
+               if (unsafeColumns[tle->resno])
+                       continue;
+
+               /* Functions returning sets are unsafe (point 1) */
+               if (expression_returns_set((Node *) tle->expr))
+               {
+                       unsafeColumns[tle->resno] = true;
+                       continue;
+               }
+
+               /* Volatile functions are unsafe (point 2) */
+               if (contain_volatile_functions((Node *) tle->expr))
+               {
+                       unsafeColumns[tle->resno] = true;
+                       continue;
+               }
+
+               /* If subquery uses DISTINCT ON, check point 3 */
+               if (subquery->hasDistinctOn &&
+                       !targetIsInSortList(tle, InvalidOid, subquery->distinctClause))
+               {
+                       /* non-DISTINCT column, so mark it unsafe */
+                       unsafeColumns[tle->resno] = true;
+                       continue;
+               }
+       }
+}
+
+/*
+ * For subqueries using UNION/UNION ALL/INTERSECT/INTERSECT ALL, we can
+ * push quals into each component query, but the quals can only reference
+ * subquery columns that suffer no type coercions in the set operation.
+ * Otherwise there are possible semantic gotchas.  So, we check the
+ * component queries to see if any of them have output types different from
+ * the top-level setop outputs.  unsafeColumns[k] is set true if column k
+ * has different type in any component.
   *
   * We don't have to care about typmods here: the only allowed difference
   * between set-op input and output typmods is input is a specific typmod
   * and output is -1, and that does not require a coercion.
+ *
+ * tlist is a subquery tlist.
+ * colTypes is an OID list of the top-level setop's output column types.
+ * unsafeColumns[] is the result array.
   */
  static void
  compare_tlist_datatypes(List *tlist, List *colTypes,
-                                               bool *differentTypes)
+                                               bool *unsafeColumns)
  {
         ListCell   *l;
         ListCell   *colType = list_head(colTypes);
@@ -1661,7 +1752,7 @@ compare_tlist_datatypes(List *tlist, List *colTypes,
                 if (colType == NULL)
                         elog(ERROR, "wrong number of tlist entries");
                 if (exprType((Node *) tle->expr) != lfirst_oid(colType))
-                       differentTypes[tle->resno] = true;
+                       unsafeColumns[tle->resno] = true;
                 colType = lnext(colType);
         }
         if (colType != NULL)
@@ -1684,34 +1775,15 @@ compare_tlist_datatypes(List *tlist, List *colTypes,
   * (since there is no easy way to name that within the subquery itself).
   *
   * 3. The qual must not refer to any subquery output columns that were
- * found to have inconsistent types across a set operation tree by
- * subquery_is_pushdown_safe().
- *
- * 4. If the subquery uses DISTINCT ON, we must not push down any quals that
- * refer to non-DISTINCT output columns, because that could change the set
- * of rows returned.  (This condition is vacuous for DISTINCT, because then
- * there are no non-DISTINCT output columns, so we needn't check.  But note
- * we are assuming that the qual can't distinguish values that the DISTINCT
- * operator sees as equal.     This is a bit shaky but we have no way to test
- * for the case, and it's unlikely enough that we shouldn't refuse the
- * optimization just because it could theoretically happen.)
- *
- * 5. We must not push down any quals that refer to subselect outputs that
- * return sets, else we'd introduce functions-returning-sets into the
- * subquery's WHERE/HAVING quals.
- *
- * 6. We must not push down any quals that refer to subselect outputs that
- * contain volatile functions, for fear of introducing strange results due
- * to multiple evaluation of a volatile function.
+ * found to be unsafe to reference by subquery_is_pushdown_safe().
   */
  static bool
  qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
-                                         bool *differentTypes)
+                                         bool *unsafeColumns)
  {
         bool            safe = true;
         List       *vars;
         ListCell   *vl;
-       Bitmapset  *tested = NULL;
  
         /* Refuse subselects (point 1) */
         if (contain_subplans(qual))
@@ -1734,7 +1806,6 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
         foreach(vl, vars)
         {
                 Var                *var = (Var *) lfirst(vl);
-               TargetEntry *tle;
  
                 /*
                  * XXX Punt if we find any PlaceHolderVars in the restriction clause.
@@ -1750,6 +1821,7 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
                 }
  
                 Assert(var->varno == rti);
+               Assert(var->varattno >= 0);
  
                 /* Check point 2 */
                 if (var->varattno == 0)
@@ -1758,45 +1830,8 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
                         break;
                 }
  
-               /*
-                * We use a bitmapset to avoid testing the same attno more than once.
-                * (NB: this only works because subquery outputs can't have negative
-                * attnos.)
-                */
-               if (bms_is_member(var->varattno, tested))
-                       continue;
-               tested = bms_add_member(tested, var->varattno);
-
                 /* Check point 3 */
-               if (differentTypes[var->varattno])
-               {
-                       safe = false;
-                       break;
-               }
-
-               /* Must find the tlist element referenced by the Var */
-               tle = get_tle_by_resno(subquery->targetList, var->varattno);
-               Assert(tle != NULL);
-               Assert(!tle->resjunk);
-
-               /* If subquery uses DISTINCT ON, check point 4 */
-               if (subquery->hasDistinctOn &&
-                       !targetIsInSortList(tle, InvalidOid, subquery->distinctClause))
-               {
-                       /* non-DISTINCT column, so fail */
-                       safe = false;
-                       break;
-               }
-
-               /* Refuse functions returning sets (point 5) */
-               if (expression_returns_set((Node *) tle->expr))
-               {
-                       safe = false;
-                       break;
-               }
-
-               /* Refuse volatile functions (point 6) */
-               if (contain_volatile_functions((Node *) tle->expr))
+               if (unsafeColumns[var->varattno])
                 {
                         safe = false;
                         break;
@@ -1804,7 +1839,6 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
         }
  
         list_free(vars);
-       bms_free(tested);
  
         return safe;
  }
diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out

index 031be5389eb372883708e0d23ce8d51c4f24624b..435f50d050ddd072ec50e2112e44157f2bd55bc0 100644 (file)
--- a/src/test/regress/expected/union.out
+++ b/src/test/regress/expected/union.out
@@ -515,3 +515,91 @@ explain (costs off)
     ->  Seq Scan on tenk1 b
  (2 rows)
  
+-- Test that we push quals into UNION sub-selects only when it's safe
+explain (costs off)
+SELECT * FROM
+  (SELECT 1 AS t, 2 AS x
+   UNION
+   SELECT 2 AS t, 4 AS x) ss
+WHERE x < 4;
+                 QUERY PLAN                 
+--------------------------------------------
+ Unique
+   ->  Sort
+         Sort Key: (1), (2)
+         ->  Append
+               ->  Result
+               ->  Result
+                     One-Time Filter: false
+(7 rows)
+
+SELECT * FROM
+  (SELECT 1 AS t, 2 AS x
+   UNION
+   SELECT 2 AS t, 4 AS x) ss
+WHERE x < 4;
+ t | x 
+---+---
+ 1 | 2
+(1 row)
+
+explain (costs off)
+SELECT * FROM
+  (SELECT 1 AS t, generate_series(1,10) AS x
+   UNION
+   SELECT 2 AS t, 4 AS x) ss
+WHERE x < 4
+ORDER BY x;
+           QUERY PLAN           
+--------------------------------
+ Sort
+   Sort Key: ss.x
+   ->  Subquery Scan on ss
+         Filter: (ss.x < 4)
+         ->  HashAggregate
+               ->  Append
+                     ->  Result
+                     ->  Result
+(8 rows)
+
+SELECT * FROM
+  (SELECT 1 AS t, generate_series(1,10) AS x
+   UNION
+   SELECT 2 AS t, 4 AS x) ss
+WHERE x < 4
+ORDER BY x;
+ t | x 
+---+---
+ 1 | 1
+ 1 | 2
+ 1 | 3
+(3 rows)
+
+explain (costs off)
+SELECT * FROM
+  (SELECT 1 AS t, (random()*3)::int AS x
+   UNION
+   SELECT 2 AS t, 4 AS x) ss
+WHERE x > 3;
+                                 QUERY PLAN                                 
+----------------------------------------------------------------------------
+ Subquery Scan on ss
+   Filter: (ss.x > 3)
+   ->  Unique
+         ->  Sort
+               Sort Key: (1), (((random() * 3::double precision))::integer)
+               ->  Append
+                     ->  Result
+                     ->  Result
+(8 rows)
+
+SELECT * FROM
+  (SELECT 1 AS t, (random()*3)::int AS x
+   UNION
+   SELECT 2 AS t, 4 AS x) ss
+WHERE x > 3;
+ t | x 
+---+---
+ 2 | 4
+(1 row)
+
diff --git a/src/test/regress/sql/union.sql b/src/test/regress/sql/union.sql

index 752ae470f0d1b38a1aeda87ae31d6b63e3e5cb3c..f3c9d113827eb240d055bfc32a31a60d89038dbc 100644 (file)
--- a/src/test/regress/sql/union.sql
+++ b/src/test/regress/sql/union.sql
@@ -207,3 +207,45 @@ explain (costs off)
     UNION ALL
     SELECT 2 AS t, * FROM tenk1 b) c
   WHERE t = 2;
+
+-- Test that we push quals into UNION sub-selects only when it's safe
+explain (costs off)
+SELECT * FROM
+  (SELECT 1 AS t, 2 AS x
+   UNION
+   SELECT 2 AS t, 4 AS x) ss
+WHERE x < 4;
+
+SELECT * FROM
+  (SELECT 1 AS t, 2 AS x
+   UNION
+   SELECT 2 AS t, 4 AS x) ss
+WHERE x < 4;
+
+explain (costs off)
+SELECT * FROM
+  (SELECT 1 AS t, generate_series(1,10) AS x
+   UNION
+   SELECT 2 AS t, 4 AS x) ss
+WHERE x < 4
+ORDER BY x;
+
+SELECT * FROM
+  (SELECT 1 AS t, generate_series(1,10) AS x
+   UNION
+   SELECT 2 AS t, 4 AS x) ss
+WHERE x < 4
+ORDER BY x;
+
+explain (costs off)
+SELECT * FROM
+  (SELECT 1 AS t, (random()*3)::int AS x
+   UNION
+   SELECT 2 AS t, 4 AS x) ss
+WHERE x > 3;
+
+SELECT * FROM
+  (SELECT 1 AS t, (random()*3)::int AS x
+   UNION
+   SELECT 2 AS t, 4 AS x) ss
+WHERE x > 3;
author	Tom Lane <tgl@sss.pgh.pa.us>
	Thu, 6 Jun 2013 03:44:02 +0000 (23:44 -0400)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Thu, 6 Jun 2013 03:45:11 +0000 (23:45 -0400)
src/backend/optimizer/path/allpaths.c		patch \| blob \| history
src/test/regress/expected/union.out		patch \| blob \| history
src/test/regress/sql/union.sql		patch \| blob \| history