From f753c9d596d835d47770d1b3eed6ea9c1d3af4a8 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 5 Jun 2013 23:44:14 -0400 Subject: [PATCH] Prevent pushing down WHERE clauses into unsafe UNION/INTERSECT nests. The planner is aware that it mustn't push down upper-level quals into subqueries if the quals reference subquery output columns that contain set-returning functions or volatile functions, or are non-DISTINCT outputs of a DISTINCT ON subquery. However, it missed making this check when there were one or more levels of UNION or INTERSECT above the dangerous expression. This could lead to "set-valued function called in context that cannot accept a set" errors, as seen in bug #8213 from Eric Soroos, or to silently wrong answers in the other cases. To fix, refactor the checks so that we make the column-is-unsafe checks during subquery_is_pushdown_safe(), which already has to recursively inspect all arms of a set-operation tree. This makes qual_is_pushdown_safe() considerably simpler, at the cost that we will spend some cycles checking output columns that possibly aren't referenced in any upper qual. But the cases where this code gets executed at all are already nontrivial queries, so it's unlikely anybody will notice any slowdown of planning. This has been broken since commit 05f916e6add9726bf4ee046e4060c1b03c9961f2, which makes the bug over ten years old. A bit surprising nobody noticed it before now. --- src/backend/optimizer/path/allpaths.c | 216 +++++++++++++++----------- src/test/regress/expected/union.out | 90 +++++++++++ src/test/regress/sql/union.sql | 42 +++++ 3 files changed, 257 insertions(+), 91 deletions(-) diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 7e3f166d15..08acb1d01b 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -69,13 +69,14 @@ static void set_foreign_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte); static RelOptInfo *make_rel_from_joinlist(PlannerInfo *root, List *joinlist); static bool subquery_is_pushdown_safe(Query *subquery, Query *topquery, - bool *differentTypes); + bool *unsafeColumns); static bool recurse_pushdown_safe(Node *setOp, Query *topquery, - bool *differentTypes); + bool *unsafeColumns); +static void check_output_expressions(Query *subquery, bool *unsafeColumns); static void compare_tlist_datatypes(List *tlist, List *colTypes, - bool *differentTypes); + bool *unsafeColumns); static bool qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual, - bool *differentTypes); + bool *unsafeColumns); static void subquery_push_qual(Query *subquery, RangeTblEntry *rte, Index rti, Node *qual); static void recurse_push_qual(Node *setOp, Query *topquery, @@ -705,7 +706,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, { Query *parse = root->parse; Query *subquery = rte->subquery; - bool *differentTypes; + bool *unsafeColumns; double tuple_fraction; PlannerInfo *subroot; List *pathkeys; @@ -717,8 +718,12 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, */ subquery = copyObject(subquery); - /* We need a workspace for keeping track of set-op type coercions */ - differentTypes = (bool *) + /* + * We need a workspace for keeping track of unsafe-to-reference columns. + * unsafeColumns[i] is set TRUE if we've found that output column i of the + * subquery is unsafe to use in a pushed-down qual. + */ + unsafeColumns = (bool *) palloc0((list_length(subquery->targetList) + 1) * sizeof(bool)); /* @@ -742,7 +747,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, * push down a pushable qual, because it'd result in a worse plan? */ if (rel->baserestrictinfo != NIL && - subquery_is_pushdown_safe(subquery, subquery, differentTypes)) + subquery_is_pushdown_safe(subquery, subquery, unsafeColumns)) { /* OK to consider pushing down individual quals */ List *upperrestrictlist = NIL; @@ -754,7 +759,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, Node *clause = (Node *) rinfo->clause; if (!rinfo->pseudoconstant && - qual_is_pushdown_safe(subquery, rti, clause, differentTypes)) + qual_is_pushdown_safe(subquery, rti, clause, unsafeColumns)) { /* Push it down */ subquery_push_qual(subquery, rte, rti, clause); @@ -768,7 +773,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, rel->baserestrictinfo = upperrestrictlist; } - pfree(differentTypes); + pfree(unsafeColumns); /* * We can safely pass the outer tuple_fraction down to the subquery if the @@ -1169,17 +1174,19 @@ standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels) * 3. If the subquery contains EXCEPT or EXCEPT ALL set ops we cannot push * quals into it, because that could change the results. * - * 4. For subqueries using UNION/UNION ALL/INTERSECT/INTERSECT ALL, we can - * push quals into each component query, but the quals can only reference - * subquery columns that suffer no type coercions in the set operation. - * Otherwise there are possible semantic gotchas. So, we check the - * component queries to see if any of them have different output types; - * differentTypes[k] is set true if column k has different type in any - * component. + * In addition, we make several checks on the subquery's output columns + * to see if it is safe to reference them in pushed-down quals. If output + * column k is found to be unsafe to reference, we set unsafeColumns[k] to + * TRUE, but we don't reject the subquery overall since column k might + * not be referenced by some/all quals. The unsafeColumns[] array will be + * consulted later by qual_is_pushdown_safe(). It's better to do it this + * way than to make the checks directly in qual_is_pushdown_safe(), because + * when the subquery involves set operations we have to check the output + * expressions in each arm of the set op. */ static bool subquery_is_pushdown_safe(Query *subquery, Query *topquery, - bool *differentTypes) + bool *unsafeColumns) { SetOperationStmt *topop; @@ -1191,13 +1198,22 @@ subquery_is_pushdown_safe(Query *subquery, Query *topquery, if (subquery->hasWindowFuncs) return false; + /* + * If we're at a leaf query, check for unsafe expressions in its target + * list, and mark any unsafe ones in unsafeColumns[]. (Non-leaf nodes in + * setop trees have only simple Vars in their tlists, so no need to check + * them.) + */ + if (subquery->setOperations == NULL) + check_output_expressions(subquery, unsafeColumns); + /* Are we at top level, or looking at a setop component? */ if (subquery == topquery) { /* Top level, so check any component queries */ if (subquery->setOperations != NULL) if (!recurse_pushdown_safe(subquery->setOperations, topquery, - differentTypes)) + unsafeColumns)) return false; } else @@ -1210,7 +1226,7 @@ subquery_is_pushdown_safe(Query *subquery, Query *topquery, Assert(topop && IsA(topop, SetOperationStmt)); compare_tlist_datatypes(subquery->targetList, topop->colTypes, - differentTypes); + unsafeColumns); } return true; } @@ -1220,7 +1236,7 @@ subquery_is_pushdown_safe(Query *subquery, Query *topquery, */ static bool recurse_pushdown_safe(Node *setOp, Query *topquery, - bool *differentTypes) + bool *unsafeColumns) { if (IsA(setOp, RangeTblRef)) { @@ -1229,19 +1245,19 @@ recurse_pushdown_safe(Node *setOp, Query *topquery, Query *subquery = rte->subquery; Assert(subquery != NULL); - return subquery_is_pushdown_safe(subquery, topquery, differentTypes); + return subquery_is_pushdown_safe(subquery, topquery, unsafeColumns); } else if (IsA(setOp, SetOperationStmt)) { SetOperationStmt *op = (SetOperationStmt *) setOp; - /* EXCEPT is no good */ + /* EXCEPT is no good (point 3 for subquery_is_pushdown_safe) */ if (op->op == SETOP_EXCEPT) return false; /* Else recurse */ - if (!recurse_pushdown_safe(op->larg, topquery, differentTypes)) + if (!recurse_pushdown_safe(op->larg, topquery, unsafeColumns)) return false; - if (!recurse_pushdown_safe(op->rarg, topquery, differentTypes)) + if (!recurse_pushdown_safe(op->rarg, topquery, unsafeColumns)) return false; } else @@ -1253,17 +1269,92 @@ recurse_pushdown_safe(Node *setOp, Query *topquery, } /* - * Compare tlist's datatypes against the list of set-operation result types. - * For any items that are different, mark the appropriate element of - * differentTypes[] to show that this column will have type conversions. + * check_output_expressions - check subquery's output expressions for safety + * + * There are several cases in which it's unsafe to push down an upper-level + * qual if it references a particular output column of a subquery. We check + * each output column of the subquery and set unsafeColumns[k] to TRUE if + * that column is unsafe for a pushed-down qual to reference. The conditions + * checked here are: + * + * 1. We must not push down any quals that refer to subselect outputs that + * return sets, else we'd introduce functions-returning-sets into the + * subquery's WHERE/HAVING quals. + * + * 2. We must not push down any quals that refer to subselect outputs that + * contain volatile functions, for fear of introducing strange results due + * to multiple evaluation of a volatile function. + * + * 3. If the subquery uses DISTINCT ON, we must not push down any quals that + * refer to non-DISTINCT output columns, because that could change the set + * of rows returned. (This condition is vacuous for DISTINCT, because then + * there are no non-DISTINCT output columns, so we needn't check. But note + * we are assuming that the qual can't distinguish values that the DISTINCT + * operator sees as equal. This is a bit shaky but we have no way to test + * for the case, and it's unlikely enough that we shouldn't refuse the + * optimization just because it could theoretically happen.) + */ +static void +check_output_expressions(Query *subquery, bool *unsafeColumns) +{ + ListCell *lc; + + foreach(lc, subquery->targetList) + { + TargetEntry *tle = (TargetEntry *) lfirst(lc); + + if (tle->resjunk) + continue; /* ignore resjunk columns */ + + /* We need not check further if output col is already known unsafe */ + if (unsafeColumns[tle->resno]) + continue; + + /* Functions returning sets are unsafe (point 1) */ + if (expression_returns_set((Node *) tle->expr)) + { + unsafeColumns[tle->resno] = true; + continue; + } + + /* Volatile functions are unsafe (point 2) */ + if (contain_volatile_functions((Node *) tle->expr)) + { + unsafeColumns[tle->resno] = true; + continue; + } + + /* If subquery uses DISTINCT ON, check point 3 */ + if (subquery->hasDistinctOn && + !targetIsInSortList(tle, InvalidOid, subquery->distinctClause)) + { + /* non-DISTINCT column, so mark it unsafe */ + unsafeColumns[tle->resno] = true; + continue; + } + } +} + +/* + * For subqueries using UNION/UNION ALL/INTERSECT/INTERSECT ALL, we can + * push quals into each component query, but the quals can only reference + * subquery columns that suffer no type coercions in the set operation. + * Otherwise there are possible semantic gotchas. So, we check the + * component queries to see if any of them have output types different from + * the top-level setop outputs. unsafeColumns[k] is set true if column k + * has different type in any component. * * We don't have to care about typmods here: the only allowed difference * between set-op input and output typmods is input is a specific typmod * and output is -1, and that does not require a coercion. + * + * tlist is a subquery tlist. + * colTypes is an OID list of the top-level setop's output column types. + * unsafeColumns[] is the result array. */ static void compare_tlist_datatypes(List *tlist, List *colTypes, - bool *differentTypes) + bool *unsafeColumns) { ListCell *l; ListCell *colType = list_head(colTypes); @@ -1277,7 +1368,7 @@ compare_tlist_datatypes(List *tlist, List *colTypes, if (colType == NULL) elog(ERROR, "wrong number of tlist entries"); if (exprType((Node *) tle->expr) != lfirst_oid(colType)) - differentTypes[tle->resno] = true; + unsafeColumns[tle->resno] = true; colType = lnext(colType); } if (colType != NULL) @@ -1300,34 +1391,15 @@ compare_tlist_datatypes(List *tlist, List *colTypes, * (since there is no easy way to name that within the subquery itself). * * 3. The qual must not refer to any subquery output columns that were - * found to have inconsistent types across a set operation tree by - * subquery_is_pushdown_safe(). - * - * 4. If the subquery uses DISTINCT ON, we must not push down any quals that - * refer to non-DISTINCT output columns, because that could change the set - * of rows returned. (This condition is vacuous for DISTINCT, because then - * there are no non-DISTINCT output columns, so we needn't check. But note - * we are assuming that the qual can't distinguish values that the DISTINCT - * operator sees as equal. This is a bit shaky but we have no way to test - * for the case, and it's unlikely enough that we shouldn't refuse the - * optimization just because it could theoretically happen.) - * - * 5. We must not push down any quals that refer to subselect outputs that - * return sets, else we'd introduce functions-returning-sets into the - * subquery's WHERE/HAVING quals. - * - * 6. We must not push down any quals that refer to subselect outputs that - * contain volatile functions, for fear of introducing strange results due - * to multiple evaluation of a volatile function. + * found to be unsafe to reference by subquery_is_pushdown_safe(). */ static bool qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual, - bool *differentTypes) + bool *unsafeColumns) { bool safe = true; List *vars; ListCell *vl; - Bitmapset *tested = NULL; /* Refuse subselects (point 1) */ if (contain_subplans(qual)) @@ -1350,7 +1422,6 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual, foreach(vl, vars) { Var *var = (Var *) lfirst(vl); - TargetEntry *tle; /* * XXX Punt if we find any PlaceHolderVars in the restriction clause. @@ -1366,6 +1437,7 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual, } Assert(var->varno == rti); + Assert(var->varattno >= 0); /* Check point 2 */ if (var->varattno == 0) @@ -1374,45 +1446,8 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual, break; } - /* - * We use a bitmapset to avoid testing the same attno more than once. - * (NB: this only works because subquery outputs can't have negative - * attnos.) - */ - if (bms_is_member(var->varattno, tested)) - continue; - tested = bms_add_member(tested, var->varattno); - /* Check point 3 */ - if (differentTypes[var->varattno]) - { - safe = false; - break; - } - - /* Must find the tlist element referenced by the Var */ - tle = get_tle_by_resno(subquery->targetList, var->varattno); - Assert(tle != NULL); - Assert(!tle->resjunk); - - /* If subquery uses DISTINCT ON, check point 4 */ - if (subquery->hasDistinctOn && - !targetIsInSortList(tle, InvalidOid, subquery->distinctClause)) - { - /* non-DISTINCT column, so fail */ - safe = false; - break; - } - - /* Refuse functions returning sets (point 5) */ - if (expression_returns_set((Node *) tle->expr)) - { - safe = false; - break; - } - - /* Refuse volatile functions (point 6) */ - if (contain_volatile_functions((Node *) tle->expr)) + if (unsafeColumns[var->varattno]) { safe = false; break; @@ -1420,7 +1455,6 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual, } list_free(vars); - bms_free(tested); return safe; } diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out index 8db4b6d02a..2c30e485c8 100644 --- a/src/test/regress/expected/union.out +++ b/src/test/regress/expected/union.out @@ -517,3 +517,93 @@ explain (costs off) -> Seq Scan on tenk1 b (3 rows) +-- Test that we push quals into UNION sub-selects only when it's safe +explain (costs off) +SELECT * FROM + (SELECT 1 AS t, 2 AS x + UNION + SELECT 2 AS t, 4 AS x) ss +WHERE x < 4; + QUERY PLAN +-------------------------------------------- + Unique + -> Sort + Sort Key: (1), (2) + -> Append + -> Result + -> Result + One-Time Filter: false +(7 rows) + +SELECT * FROM + (SELECT 1 AS t, 2 AS x + UNION + SELECT 2 AS t, 4 AS x) ss +WHERE x < 4; + t | x +---+--- + 1 | 2 +(1 row) + +explain (costs off) +SELECT * FROM + (SELECT 1 AS t, generate_series(1,10) AS x + UNION + SELECT 2 AS t, 4 AS x) ss +WHERE x < 4 +ORDER BY x; + QUERY PLAN +------------------------------------------------------------- + Sort + Sort Key: ss.x + -> Subquery Scan on ss + Filter: (ss.x < 4) + -> Unique + -> Sort + Sort Key: (1), (generate_series(1, 10)) + -> Append + -> Result + -> Result +(10 rows) + +SELECT * FROM + (SELECT 1 AS t, generate_series(1,10) AS x + UNION + SELECT 2 AS t, 4 AS x) ss +WHERE x < 4 +ORDER BY x; + t | x +---+--- + 1 | 1 + 1 | 2 + 1 | 3 +(3 rows) + +explain (costs off) +SELECT * FROM + (SELECT 1 AS t, (random()*3)::int AS x + UNION + SELECT 2 AS t, 4 AS x) ss +WHERE x > 3; + QUERY PLAN +---------------------------------------------------------------------------- + Subquery Scan on ss + Filter: (ss.x > 3) + -> Unique + -> Sort + Sort Key: (1), (((random() * 3::double precision))::integer) + -> Append + -> Result + -> Result +(8 rows) + +SELECT * FROM + (SELECT 1 AS t, (random()*3)::int AS x + UNION + SELECT 2 AS t, 4 AS x) ss +WHERE x > 3; + t | x +---+--- + 2 | 4 +(1 row) + diff --git a/src/test/regress/sql/union.sql b/src/test/regress/sql/union.sql index 752ae470f0..f3c9d11382 100644 --- a/src/test/regress/sql/union.sql +++ b/src/test/regress/sql/union.sql @@ -207,3 +207,45 @@ explain (costs off) UNION ALL SELECT 2 AS t, * FROM tenk1 b) c WHERE t = 2; + +-- Test that we push quals into UNION sub-selects only when it's safe +explain (costs off) +SELECT * FROM + (SELECT 1 AS t, 2 AS x + UNION + SELECT 2 AS t, 4 AS x) ss +WHERE x < 4; + +SELECT * FROM + (SELECT 1 AS t, 2 AS x + UNION + SELECT 2 AS t, 4 AS x) ss +WHERE x < 4; + +explain (costs off) +SELECT * FROM + (SELECT 1 AS t, generate_series(1,10) AS x + UNION + SELECT 2 AS t, 4 AS x) ss +WHERE x < 4 +ORDER BY x; + +SELECT * FROM + (SELECT 1 AS t, generate_series(1,10) AS x + UNION + SELECT 2 AS t, 4 AS x) ss +WHERE x < 4 +ORDER BY x; + +explain (costs off) +SELECT * FROM + (SELECT 1 AS t, (random()*3)::int AS x + UNION + SELECT 2 AS t, 4 AS x) ss +WHERE x > 3; + +SELECT * FROM + (SELECT 1 AS t, (random()*3)::int AS x + UNION + SELECT 2 AS t, 4 AS x) ss +WHERE x > 3; -- 2.40.0