]> granicus.if.org Git - postgresql/commitdiff
Fix planning of parameterized appendrel paths with expensive join quals.
authorTom Lane <tgl@sss.pgh.pa.us>
Mon, 8 Jul 2013 02:37:28 +0000 (22:37 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Mon, 8 Jul 2013 02:37:28 +0000 (22:37 -0400)
The code in set_append_rel_pathlist() for building parameterized paths
for append relations (inheritance and UNION ALL combinations) supposed
that the cheapest regular path for a child relation would still be cheapest
when reparameterized.  Which might not be the case, particularly if the
added join conditions are expensive to compute, as in a recent example from
Jeff Janes.  Fix it to compare child path costs *after* reparameterizing.
We can short-circuit that if the cheapest pre-existing path is already
parameterized correctly, which seems likely to be true often enough to be
worth checking for.

Back-patch to 9.2 where parameterized paths were introduced.

src/backend/optimizer/path/allpaths.c
src/test/regress/expected/union.out
src/test/regress/sql/union.sql

index b84bbba68c9e51a3eed5b0913d8156bd93168712..4b8a73d60f42a414b49a481e9911a810e7caeed1 100644 (file)
@@ -68,6 +68,9 @@ static void set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
 static void generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel,
                                                   List *live_childrels,
                                                   List *all_child_pathkeys);
+static Path *get_cheapest_parameterized_child_path(PlannerInfo *root,
+                                                                         RelOptInfo *rel,
+                                                                         Relids required_outer);
 static List *accumulate_append_subpath(List *subpaths, Path *path);
 static void set_dummy_rel_pathlist(RelOptInfo *rel);
 static void set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
@@ -831,28 +834,18 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
                foreach(lcr, live_childrels)
                {
                        RelOptInfo *childrel = (RelOptInfo *) lfirst(lcr);
-                       Path       *cheapest_total;
+                       Path       *subpath;
 
-                       cheapest_total =
-                               get_cheapest_path_for_pathkeys(childrel->pathlist,
-                                                                                          NIL,
-                                                                                          required_outer,
-                                                                                          TOTAL_COST);
-                       Assert(cheapest_total != NULL);
-
-                       /* Children must have exactly the desired parameterization */
-                       if (!bms_equal(PATH_REQ_OUTER(cheapest_total), required_outer))
+                       subpath = get_cheapest_parameterized_child_path(root,
+                                                                                                                       childrel,
+                                                                                                                       required_outer);
+                       if (subpath == NULL)
                        {
-                               cheapest_total = reparameterize_path(root, cheapest_total,
-                                                                                                        required_outer, 1.0);
-                               if (cheapest_total == NULL)
-                               {
-                                       subpaths_valid = false;
-                                       break;
-                               }
+                               /* failed to make a suitable path for this child */
+                               subpaths_valid = false;
+                               break;
                        }
-
-                       subpaths = accumulate_append_subpath(subpaths, cheapest_total);
+                       subpaths = accumulate_append_subpath(subpaths, subpath);
                }
 
                if (subpaths_valid)
@@ -962,6 +955,79 @@ generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel,
        }
 }
 
+/*
+ * get_cheapest_parameterized_child_path
+ *             Get cheapest path for this relation that has exactly the requested
+ *             parameterization.
+ *
+ * Returns NULL if unable to create such a path.
+ */
+static Path *
+get_cheapest_parameterized_child_path(PlannerInfo *root, RelOptInfo *rel,
+                                                                         Relids required_outer)
+{
+       Path       *cheapest;
+       ListCell   *lc;
+
+       /*
+        * Look up the cheapest existing path with no more than the needed
+        * parameterization.  If it has exactly the needed parameterization, we're
+        * done.
+        */
+       cheapest = get_cheapest_path_for_pathkeys(rel->pathlist,
+                                                                                         NIL,
+                                                                                         required_outer,
+                                                                                         TOTAL_COST);
+       Assert(cheapest != NULL);
+       if (bms_equal(PATH_REQ_OUTER(cheapest), required_outer))
+               return cheapest;
+
+       /*
+        * Otherwise, we can "reparameterize" an existing path to match the given
+        * parameterization, which effectively means pushing down additional
+        * joinquals to be checked within the path's scan.  However, some existing
+        * paths might check the available joinquals already while others don't;
+        * therefore, it's not clear which existing path will be cheapest after
+        * reparameterization.  We have to go through them all and find out.
+        */
+       cheapest = NULL;
+       foreach(lc, rel->pathlist)
+       {
+               Path       *path = (Path *) lfirst(lc);
+
+               /* Can't use it if it needs more than requested parameterization */
+               if (!bms_is_subset(PATH_REQ_OUTER(path), required_outer))
+                       continue;
+
+               /*
+                * Reparameterization can only increase the path's cost, so if it's
+                * already more expensive than the current cheapest, forget it.
+                */
+               if (cheapest != NULL &&
+                       compare_path_costs(cheapest, path, TOTAL_COST) <= 0)
+                       continue;
+
+               /* Reparameterize if needed, then recheck cost */
+               if (!bms_equal(PATH_REQ_OUTER(path), required_outer))
+               {
+                       path = reparameterize_path(root, path, required_outer, 1.0);
+                       if (path == NULL)
+                               continue;               /* failed to reparameterize this one */
+                       Assert(bms_equal(PATH_REQ_OUTER(path), required_outer));
+
+                       if (cheapest != NULL &&
+                               compare_path_costs(cheapest, path, TOTAL_COST) <= 0)
+                               continue;
+               }
+
+               /* We have a new best path */
+               cheapest = path;
+       }
+
+       /* Return the best path, or NULL if we found no suitable candidate */
+       return cheapest;
+}
+
 /*
  * accumulate_append_subpath
  *             Add a subpath to the list being built for an Append or MergeAppend
index 435f50d050ddd072ec50e2112e44157f2bd55bc0..bf8f1bcaf77f7751a45e72693d66d6857ecb995d 100644 (file)
@@ -603,3 +603,37 @@ WHERE x > 3;
  2 | 4
 (1 row)
 
+-- Test proper handling of parameterized appendrel paths when the
+-- potential join qual is expensive
+create function expensivefunc(int) returns int
+language plpgsql immutable strict cost 10000
+as $$begin return $1; end$$;
+create temp table t3 as select generate_series(-1000,1000) as x;
+create index t3i on t3 (expensivefunc(x));
+analyze t3;
+explain (costs off)
+select * from
+  (select * from t3 a union all select * from t3 b) ss
+  join int4_tbl on f1 = expensivefunc(x);
+                         QUERY PLAN                         
+------------------------------------------------------------
+ Nested Loop
+   ->  Seq Scan on int4_tbl
+   ->  Append
+         ->  Index Scan using t3i on t3 a
+               Index Cond: (expensivefunc(x) = int4_tbl.f1)
+         ->  Index Scan using t3i on t3 b
+               Index Cond: (expensivefunc(x) = int4_tbl.f1)
+(7 rows)
+
+select * from
+  (select * from t3 a union all select * from t3 b) ss
+  join int4_tbl on f1 = expensivefunc(x);
+ x | f1 
+---+----
+ 0 |  0
+ 0 |  0
+(2 rows)
+
+drop table t3;
+drop function expensivefunc(int);
index f3c9d113827eb240d055bfc32a31a60d89038dbc..6194ce475110944abb69951b70c66ef368a8834a 100644 (file)
@@ -249,3 +249,24 @@ SELECT * FROM
    UNION
    SELECT 2 AS t, 4 AS x) ss
 WHERE x > 3;
+
+-- Test proper handling of parameterized appendrel paths when the
+-- potential join qual is expensive
+create function expensivefunc(int) returns int
+language plpgsql immutable strict cost 10000
+as $$begin return $1; end$$;
+
+create temp table t3 as select generate_series(-1000,1000) as x;
+create index t3i on t3 (expensivefunc(x));
+analyze t3;
+
+explain (costs off)
+select * from
+  (select * from t3 a union all select * from t3 b) ss
+  join int4_tbl on f1 = expensivefunc(x);
+select * from
+  (select * from t3 a union all select * from t3 b) ss
+  join int4_tbl on f1 = expensivefunc(x);
+
+drop table t3;
+drop function expensivefunc(int);