From: Tom Lane Date: Mon, 8 Jul 2013 02:37:24 +0000 (-0400) Subject: Fix planning of parameterized appendrel paths with expensive join quals. X-Git-Tag: REL9_4_BETA1~1351 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=5372275b4b5fc183c6c6dd4517cfd74d5b641446;p=postgresql Fix planning of parameterized appendrel paths with expensive join quals. The code in set_append_rel_pathlist() for building parameterized paths for append relations (inheritance and UNION ALL combinations) supposed that the cheapest regular path for a child relation would still be cheapest when reparameterized. Which might not be the case, particularly if the added join conditions are expensive to compute, as in a recent example from Jeff Janes. Fix it to compare child path costs *after* reparameterizing. We can short-circuit that if the cheapest pre-existing path is already parameterized correctly, which seems likely to be true often enough to be worth checking for. Back-patch to 9.2 where parameterized paths were introduced. --- diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index b84bbba68c..4b8a73d60f 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -68,6 +68,9 @@ static void set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, static void generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel, List *live_childrels, List *all_child_pathkeys); +static Path *get_cheapest_parameterized_child_path(PlannerInfo *root, + RelOptInfo *rel, + Relids required_outer); static List *accumulate_append_subpath(List *subpaths, Path *path); static void set_dummy_rel_pathlist(RelOptInfo *rel); static void set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, @@ -831,28 +834,18 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, foreach(lcr, live_childrels) { RelOptInfo *childrel = (RelOptInfo *) lfirst(lcr); - Path *cheapest_total; + Path *subpath; - cheapest_total = - get_cheapest_path_for_pathkeys(childrel->pathlist, - NIL, - required_outer, - TOTAL_COST); - Assert(cheapest_total != NULL); - - /* Children must have exactly the desired parameterization */ - if (!bms_equal(PATH_REQ_OUTER(cheapest_total), required_outer)) + subpath = get_cheapest_parameterized_child_path(root, + childrel, + required_outer); + if (subpath == NULL) { - cheapest_total = reparameterize_path(root, cheapest_total, - required_outer, 1.0); - if (cheapest_total == NULL) - { - subpaths_valid = false; - break; - } + /* failed to make a suitable path for this child */ + subpaths_valid = false; + break; } - - subpaths = accumulate_append_subpath(subpaths, cheapest_total); + subpaths = accumulate_append_subpath(subpaths, subpath); } if (subpaths_valid) @@ -962,6 +955,79 @@ generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel, } } +/* + * get_cheapest_parameterized_child_path + * Get cheapest path for this relation that has exactly the requested + * parameterization. + * + * Returns NULL if unable to create such a path. + */ +static Path * +get_cheapest_parameterized_child_path(PlannerInfo *root, RelOptInfo *rel, + Relids required_outer) +{ + Path *cheapest; + ListCell *lc; + + /* + * Look up the cheapest existing path with no more than the needed + * parameterization. If it has exactly the needed parameterization, we're + * done. + */ + cheapest = get_cheapest_path_for_pathkeys(rel->pathlist, + NIL, + required_outer, + TOTAL_COST); + Assert(cheapest != NULL); + if (bms_equal(PATH_REQ_OUTER(cheapest), required_outer)) + return cheapest; + + /* + * Otherwise, we can "reparameterize" an existing path to match the given + * parameterization, which effectively means pushing down additional + * joinquals to be checked within the path's scan. However, some existing + * paths might check the available joinquals already while others don't; + * therefore, it's not clear which existing path will be cheapest after + * reparameterization. We have to go through them all and find out. + */ + cheapest = NULL; + foreach(lc, rel->pathlist) + { + Path *path = (Path *) lfirst(lc); + + /* Can't use it if it needs more than requested parameterization */ + if (!bms_is_subset(PATH_REQ_OUTER(path), required_outer)) + continue; + + /* + * Reparameterization can only increase the path's cost, so if it's + * already more expensive than the current cheapest, forget it. + */ + if (cheapest != NULL && + compare_path_costs(cheapest, path, TOTAL_COST) <= 0) + continue; + + /* Reparameterize if needed, then recheck cost */ + if (!bms_equal(PATH_REQ_OUTER(path), required_outer)) + { + path = reparameterize_path(root, path, required_outer, 1.0); + if (path == NULL) + continue; /* failed to reparameterize this one */ + Assert(bms_equal(PATH_REQ_OUTER(path), required_outer)); + + if (cheapest != NULL && + compare_path_costs(cheapest, path, TOTAL_COST) <= 0) + continue; + } + + /* We have a new best path */ + cheapest = path; + } + + /* Return the best path, or NULL if we found no suitable candidate */ + return cheapest; +} + /* * accumulate_append_subpath * Add a subpath to the list being built for an Append or MergeAppend diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out index 435f50d050..bf8f1bcaf7 100644 --- a/src/test/regress/expected/union.out +++ b/src/test/regress/expected/union.out @@ -603,3 +603,37 @@ WHERE x > 3; 2 | 4 (1 row) +-- Test proper handling of parameterized appendrel paths when the +-- potential join qual is expensive +create function expensivefunc(int) returns int +language plpgsql immutable strict cost 10000 +as $$begin return $1; end$$; +create temp table t3 as select generate_series(-1000,1000) as x; +create index t3i on t3 (expensivefunc(x)); +analyze t3; +explain (costs off) +select * from + (select * from t3 a union all select * from t3 b) ss + join int4_tbl on f1 = expensivefunc(x); + QUERY PLAN +------------------------------------------------------------ + Nested Loop + -> Seq Scan on int4_tbl + -> Append + -> Index Scan using t3i on t3 a + Index Cond: (expensivefunc(x) = int4_tbl.f1) + -> Index Scan using t3i on t3 b + Index Cond: (expensivefunc(x) = int4_tbl.f1) +(7 rows) + +select * from + (select * from t3 a union all select * from t3 b) ss + join int4_tbl on f1 = expensivefunc(x); + x | f1 +---+---- + 0 | 0 + 0 | 0 +(2 rows) + +drop table t3; +drop function expensivefunc(int); diff --git a/src/test/regress/sql/union.sql b/src/test/regress/sql/union.sql index f3c9d11382..6194ce4751 100644 --- a/src/test/regress/sql/union.sql +++ b/src/test/regress/sql/union.sql @@ -249,3 +249,24 @@ SELECT * FROM UNION SELECT 2 AS t, 4 AS x) ss WHERE x > 3; + +-- Test proper handling of parameterized appendrel paths when the +-- potential join qual is expensive +create function expensivefunc(int) returns int +language plpgsql immutable strict cost 10000 +as $$begin return $1; end$$; + +create temp table t3 as select generate_series(-1000,1000) as x; +create index t3i on t3 (expensivefunc(x)); +analyze t3; + +explain (costs off) +select * from + (select * from t3 a union all select * from t3 b) ss + join int4_tbl on f1 = expensivefunc(x); +select * from + (select * from t3 a union all select * from t3 b) ss + join int4_tbl on f1 = expensivefunc(x); + +drop table t3; +drop function expensivefunc(int);