Drop cheap-startup-cost paths during add_path() if we don't need them.

author Tom Lane <tgl@sss.pgh.pa.us>

Sat, 1 Sep 2012 22:16:24 +0000 (18:16 -0400)

committer Tom Lane <tgl@sss.pgh.pa.us>

Sat, 1 Sep 2012 22:16:24 +0000 (18:16 -0400)
author Tom Lane <tgl@sss.pgh.pa.us>
Sat, 1 Sep 2012 22:16:24 +0000 (18:16 -0400)
committer Tom Lane <tgl@sss.pgh.pa.us>
Sat, 1 Sep 2012 22:16:24 +0000 (18:16 -0400)
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c

index 01f381e9bada55257ff7451ceeb1feff79804b94..1f2bb6cc72f1242f14d55eee7cdc8e0e0d0775a9 100644 (file)
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -1733,6 +1733,7 @@ _outRelOptInfo(StringInfo str, const RelOptInfo *node)
         WRITE_BITMAPSET_FIELD(relids);
         WRITE_FLOAT_FIELD(rows, "%.0f");
         WRITE_INT_FIELD(width);
+       WRITE_BOOL_FIELD(consider_startup);
         WRITE_NODE_FIELD(reltargetlist);
         WRITE_NODE_FIELD(pathlist);
         WRITE_NODE_FIELD(ppilist);
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c

index 8677ff20fd545c805825e0a14769d9dfb10d6a00..d827b4959b6e910aefa9bfae87b9eada2eb98d56 100644 (file)
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -139,6 +139,9 @@ compare_fractional_path_costs(Path *path1, Path *path2,
   * total cost, we just say that their costs are "different", since neither
   * dominates the other across the whole performance spectrum.
   *
+ * If consider_startup is false, then we don't care about keeping paths with
+ * good startup cost, so we'll never return COSTS_DIFFERENT.
+ *
   * This function also includes special hacks to support a policy enforced
   * by its sole caller, add_path(): paths that have any parameterization
   * cannot win comparisons on the grounds of having cheaper startup cost,
@@ -146,7 +149,8 @@ compare_fractional_path_costs(Path *path1, Path *path2,
   * (Unparameterized paths are more common, so we check for this case last.)
   */
  static PathCostComparison
-compare_path_costs_fuzzily(Path *path1, Path *path2, double fuzz_factor)
+compare_path_costs_fuzzily(Path *path1, Path *path2, double fuzz_factor,
+                                                  bool consider_startup)
  {
         /*
          * Check total cost first since it's more likely to be different; many
@@ -155,7 +159,8 @@ compare_path_costs_fuzzily(Path *path1, Path *path2, double fuzz_factor)
         if (path1->total_cost > path2->total_cost * fuzz_factor)
         {
                 /* path1 fuzzily worse on total cost */
-               if (path2->startup_cost > path1->startup_cost * fuzz_factor &&
+               if (consider_startup &&
+                       path2->startup_cost > path1->startup_cost * fuzz_factor &&
                         path1->param_info == NULL)
                 {
                         /* ... but path2 fuzzily worse on startup, so DIFFERENT */
@@ -167,7 +172,8 @@ compare_path_costs_fuzzily(Path *path1, Path *path2, double fuzz_factor)
         if (path2->total_cost > path1->total_cost * fuzz_factor)
         {
                 /* path2 fuzzily worse on total cost */
-               if (path1->startup_cost > path2->startup_cost * fuzz_factor &&
+               if (consider_startup &&
+                       path1->startup_cost > path2->startup_cost * fuzz_factor &&
                         path2->param_info == NULL)
                 {
                         /* ... but path1 fuzzily worse on startup, so DIFFERENT */
@@ -177,6 +183,7 @@ compare_path_costs_fuzzily(Path *path1, Path *path2, double fuzz_factor)
                 return COSTS_BETTER1;
         }
         /* fuzzily the same on total cost */
+       /* (so we may as well compare startup cost, even if !consider_startup) */
         if (path1->startup_cost > path2->startup_cost * fuzz_factor &&
                 path2->param_info == NULL)
         {
@@ -360,6 +367,9 @@ set_cheapest(RelOptInfo *parent_rel)
   *       reduce the number of parameterized paths that are kept.  See discussion
   *       in src/backend/optimizer/README.
   *
+ *       Another policy that is enforced here is that we only consider cheap
+ *       startup cost to be interesting if parent_rel->consider_startup is true.
+ *
   *       The pathlist is kept sorted by total_cost, with cheaper paths
   *       at the front.  Within this routine, that's simply a speed hack:
   *       doing it that way makes it more likely that we will reject an inferior
@@ -423,7 +433,8 @@ add_path(RelOptInfo *parent_rel, Path *new_path)
                  * Do a fuzzy cost comparison with 1% fuzziness limit.  (XXX does this
                  * percentage need to be user-configurable?)
                  */
-               costcmp = compare_path_costs_fuzzily(new_path, old_path, 1.01);
+               costcmp = compare_path_costs_fuzzily(new_path, old_path, 1.01,
+                                                                                        parent_rel->consider_startup);
  
                 /*
                  * If the two paths compare differently for startup and total cost,
@@ -488,8 +499,10 @@ add_path(RelOptInfo *parent_rel, Path *new_path)
                                                                         remove_old = true;      /* new dominates old */
                                                                 else if (new_path->rows > old_path->rows)
                                                                         accept_new = false; /* old dominates new */
-                                                               else if (compare_path_costs_fuzzily(new_path, old_path,
-                                                                                         1.0000000001) == COSTS_BETTER1)
+                                                               else if (compare_path_costs_fuzzily(new_path,
+                                                                                                                                       old_path,
+                                                                                                                                       1.0000000001,
+                                                                                                                                       parent_rel->consider_startup) == COSTS_BETTER1)
                                                                         remove_old = true;      /* new dominates old */
                                                                 else
                                                                         accept_new = false; /* old equals or
diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c

index 680184c0e9f2a9ca9dbb9f17be72b2d17a6f273f..3878c3752fef56e43bbe12d8c1613698993d0d1d 100644 (file)
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -99,6 +99,8 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptKind reloptkind)
         rel->relids = bms_make_singleton(relid);
         rel->rows = 0;
         rel->width = 0;
+       /* cheap startup cost is interesting iff not all tuples to be retrieved */
+       rel->consider_startup = (root->tuple_fraction > 0);
         rel->reltargetlist = NIL;
         rel->pathlist = NIL;
         rel->ppilist = NIL;
@@ -354,6 +356,8 @@ build_join_rel(PlannerInfo *root,
         joinrel->relids = bms_copy(joinrelids);
         joinrel->rows = 0;
         joinrel->width = 0;
+       /* cheap startup cost is interesting iff not all tuples to be retrieved */
+       joinrel->consider_startup = (root->tuple_fraction > 0);
         joinrel->reltargetlist = NIL;
         joinrel->pathlist = NIL;
         joinrel->ppilist = NIL;
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h

index 7b4eaf4f5e540158ac2f88eba232ad91e17dc8f2..af9425ad9b309fbecd65d1a83a229ddfd447f382 100644 (file)
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -299,6 +299,8 @@ typedef struct PlannerInfo
   *                        clauses have been applied (ie, output rows of a plan for it)
   *             width - avg. number of bytes per tuple in the relation after the
   *                             appropriate projections have been done (ie, output width)
+ *             consider_startup - true if there is any value in keeping paths for
+ *                                                this rel on the basis of having cheap startup cost
   *             reltargetlist - List of Var and PlaceHolderVar nodes for the values
   *                                             we need to output from this relation.
   *                                             List is in no particular order, but all rels of an
@@ -405,6 +407,9 @@ typedef struct RelOptInfo
         double          rows;                   /* estimated number of result tuples */
         int                     width;                  /* estimated avg width of result tuples */
  
+       /* per-relation planner control flags */
+       bool            consider_startup;       /* keep cheap-startup-cost paths? */
+
         /* materialization information */
         List       *reltargetlist;      /* Vars to be output by scan of relation */
         List       *pathlist;           /* Path structures */
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out

index 914a6fd84473fa97302e3c3ca8320d35dea3fcb3..299ea06de37385579b29d68cab41987a715b015f 100644 (file)
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -3437,6 +3437,7 @@ select v.* from
  
  create temp table dual();
  insert into dual default values;
+analyze dual;
  select v.* from
    (int8_tbl x left join (select q1,(select coalesce(q2,0)) q2 from int8_tbl) y on x.q2 = y.q1)
    left join int4_tbl z on z.f1 = x.q2,
@@ -3446,21 +3447,21 @@ select v.* from
                 123 |                  
                 456 |                  
                 123 |  4567890123456789
-  4567890123456789 |               123
+  4567890123456789 | -4567890123456789
                 123 |  4567890123456789
    4567890123456789 |  4567890123456789
                 123 |  4567890123456789
-  4567890123456789 | -4567890123456789
    4567890123456789 |               123
-               123 |               456
    4567890123456789 |               123
                 123 |  4567890123456789
-  4567890123456789 |  4567890123456789
    4567890123456789 |               123
+               123 |               456
    4567890123456789 |  4567890123456789
+  4567890123456789 | -4567890123456789
    4567890123456789 |  4567890123456789
    4567890123456789 |  4567890123456789
-  4567890123456789 | -4567890123456789
+  4567890123456789 |  4567890123456789
+  4567890123456789 |               123
    4567890123456789 |                  
   -4567890123456789 |                  
  (20 rows)
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql

index fcc6572709de95739c65760d20e07437389335fc..c425be917aa916ae37d0544d725c5c775fe59630 100644 (file)
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -937,6 +937,7 @@ select v.* from
    lateral (select x.q1,y.q1 union all select x.q2,y.q2) v(vx,vy);
  create temp table dual();
  insert into dual default values;
+analyze dual;
  select v.* from
    (int8_tbl x left join (select q1,(select coalesce(q2,0)) q2 from int8_tbl) y on x.q2 = y.q1)
    left join int4_tbl z on z.f1 = x.q2,
author	Tom Lane <tgl@sss.pgh.pa.us>
	Sat, 1 Sep 2012 22:16:24 +0000 (18:16 -0400)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Sat, 1 Sep 2012 22:16:24 +0000 (18:16 -0400)
src/backend/nodes/outfuncs.c		patch \| blob \| history
src/backend/optimizer/util/pathnode.c		patch \| blob \| history
src/backend/optimizer/util/relnode.c		patch \| blob \| history
src/include/nodes/relation.h		patch \| blob \| history
src/test/regress/expected/join.out		patch \| blob \| history
src/test/regress/sql/join.sql		patch \| blob \| history