From: Tom Lane Date: Tue, 8 Jul 2014 18:03:16 +0000 (-0400) Subject: Don't assume a subquery's output is unique if there's a SRF in its tlist. X-Git-Tag: REL9_4_BETA2~33 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=ac45aa1dde2dd690682bf74f7535ba6d1a87bff8;p=postgresql Don't assume a subquery's output is unique if there's a SRF in its tlist. While the x output of "select x from t group by x" can be presumed unique, this does not hold for "select x, generate_series(1,10) from t group by x", because we may expand the set-returning function after the grouping step. (Perhaps that should be re-thought; but considering all the other oddities involved with SRFs in targetlists, it seems unlikely we'll change it.) Put a check in query_is_distinct_for() so it's not fooled by such cases. Back-patch to all supported branches. David Rowley --- diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 4e05dcd246..d129f8d65e 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -1473,6 +1473,17 @@ query_is_distinct_for(Query *query, List *colnos, List *opids) Assert(list_length(colnos) == list_length(opids)); + /* + * A set-returning function in the query's targetlist can result in + * returning duplicate rows, if the SRF is evaluated after the + * de-duplication step; so we play it safe and say "no" if there are any + * SRFs. (We could be certain that it's okay if SRFs appear only in the + * specified columns, since those must be evaluated before de-duplication; + * but it doesn't presently seem worth the complication to check that.) + */ + if (expression_returns_set((Node *) query->targetList)) + return false; + /* * DISTINCT (including DISTINCT ON) guarantees uniqueness if all the * columns in the DISTINCT clause appear in colnos and operator semantics diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out index 0f070ef93c..d85a7170f3 100644 --- a/src/test/regress/expected/subselect.out +++ b/src/test/regress/expected/subselect.out @@ -739,6 +739,41 @@ select * from int4_tbl where 0 (1 row) +-- +-- Check for incorrect optimization when IN subquery contains a SRF +-- +explain (verbose, costs off) +select * from int4_tbl o where (f1, f1) in + (select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1); + QUERY PLAN +---------------------------------------------------------------------- + Hash Join + Output: o.f1 + Hash Cond: (o.f1 = "ANY_subquery".f1) + -> Seq Scan on public.int4_tbl o + Output: o.f1 + -> Hash + Output: "ANY_subquery".f1, "ANY_subquery".g + -> HashAggregate + Output: "ANY_subquery".f1, "ANY_subquery".g + Group Key: "ANY_subquery".f1, "ANY_subquery".g + -> Subquery Scan on "ANY_subquery" + Output: "ANY_subquery".f1, "ANY_subquery".g + Filter: ("ANY_subquery".f1 = "ANY_subquery".g) + -> HashAggregate + Output: i.f1, (generate_series(1, 2) / 10) + Group Key: i.f1 + -> Seq Scan on public.int4_tbl i + Output: i.f1 +(18 rows) + +select * from int4_tbl o where (f1, f1) in + (select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1); + f1 +---- + 0 +(1 row) + -- -- Check that volatile quals aren't pushed down past a DISTINCT: -- nextval() should not be called more than the nominal number of times diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql index b3fb03c97f..c3b4773488 100644 --- a/src/test/regress/sql/subselect.sql +++ b/src/test/regress/sql/subselect.sql @@ -423,6 +423,15 @@ select * from int4_tbl where (case when f1 in (select unique1 from tenk1 a) then f1 else null end) in (select ten from tenk1 b); +-- +-- Check for incorrect optimization when IN subquery contains a SRF +-- +explain (verbose, costs off) +select * from int4_tbl o where (f1, f1) in + (select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1); +select * from int4_tbl o where (f1, f1) in + (select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1); + -- -- Check that volatile quals aren't pushed down past a DISTINCT: -- nextval() should not be called more than the nominal number of times