]> granicus.if.org Git - postgresql/commitdiff
Drop cheap-startup-cost paths during add_path() if we don't need them.
authorTom Lane <tgl@sss.pgh.pa.us>
Sat, 1 Sep 2012 22:16:24 +0000 (18:16 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Sat, 1 Sep 2012 22:16:24 +0000 (18:16 -0400)
We can detect whether the planner top level is going to care at all about
cheap startup cost (it will only do so if query_planner's tuple_fraction
argument is greater than zero).  If it isn't, we might as well discard
paths immediately whose only advantage over others is cheap startup cost.
This turns out to get rid of quite a lot of paths in complex queries ---
I saw planner runtime reduction of more than a third on one large query.

Since add_path isn't currently passed the PlannerInfo "root", the easiest
way to tell it whether to do this was to add a bool flag to RelOptInfo.
That's a bit redundant, since all relations in a given query level will
have the same setting.  But in the future it's possible that we'd refine
the control decision to work on a per-relation basis, so this seems like
a good arrangement anyway.

Per my suggestion of a few months ago.

src/backend/nodes/outfuncs.c
src/backend/optimizer/util/pathnode.c
src/backend/optimizer/util/relnode.c
src/include/nodes/relation.h
src/test/regress/expected/join.out
src/test/regress/sql/join.sql

index 01f381e9bada55257ff7451ceeb1feff79804b94..1f2bb6cc72f1242f14d55eee7cdc8e0e0d0775a9 100644 (file)
@@ -1733,6 +1733,7 @@ _outRelOptInfo(StringInfo str, const RelOptInfo *node)
        WRITE_BITMAPSET_FIELD(relids);
        WRITE_FLOAT_FIELD(rows, "%.0f");
        WRITE_INT_FIELD(width);
+       WRITE_BOOL_FIELD(consider_startup);
        WRITE_NODE_FIELD(reltargetlist);
        WRITE_NODE_FIELD(pathlist);
        WRITE_NODE_FIELD(ppilist);
index 8677ff20fd545c805825e0a14769d9dfb10d6a00..d827b4959b6e910aefa9bfae87b9eada2eb98d56 100644 (file)
@@ -139,6 +139,9 @@ compare_fractional_path_costs(Path *path1, Path *path2,
  * total cost, we just say that their costs are "different", since neither
  * dominates the other across the whole performance spectrum.
  *
+ * If consider_startup is false, then we don't care about keeping paths with
+ * good startup cost, so we'll never return COSTS_DIFFERENT.
+ *
  * This function also includes special hacks to support a policy enforced
  * by its sole caller, add_path(): paths that have any parameterization
  * cannot win comparisons on the grounds of having cheaper startup cost,
@@ -146,7 +149,8 @@ compare_fractional_path_costs(Path *path1, Path *path2,
  * (Unparameterized paths are more common, so we check for this case last.)
  */
 static PathCostComparison
-compare_path_costs_fuzzily(Path *path1, Path *path2, double fuzz_factor)
+compare_path_costs_fuzzily(Path *path1, Path *path2, double fuzz_factor,
+                                                  bool consider_startup)
 {
        /*
         * Check total cost first since it's more likely to be different; many
@@ -155,7 +159,8 @@ compare_path_costs_fuzzily(Path *path1, Path *path2, double fuzz_factor)
        if (path1->total_cost > path2->total_cost * fuzz_factor)
        {
                /* path1 fuzzily worse on total cost */
-               if (path2->startup_cost > path1->startup_cost * fuzz_factor &&
+               if (consider_startup &&
+                       path2->startup_cost > path1->startup_cost * fuzz_factor &&
                        path1->param_info == NULL)
                {
                        /* ... but path2 fuzzily worse on startup, so DIFFERENT */
@@ -167,7 +172,8 @@ compare_path_costs_fuzzily(Path *path1, Path *path2, double fuzz_factor)
        if (path2->total_cost > path1->total_cost * fuzz_factor)
        {
                /* path2 fuzzily worse on total cost */
-               if (path1->startup_cost > path2->startup_cost * fuzz_factor &&
+               if (consider_startup &&
+                       path1->startup_cost > path2->startup_cost * fuzz_factor &&
                        path2->param_info == NULL)
                {
                        /* ... but path1 fuzzily worse on startup, so DIFFERENT */
@@ -177,6 +183,7 @@ compare_path_costs_fuzzily(Path *path1, Path *path2, double fuzz_factor)
                return COSTS_BETTER1;
        }
        /* fuzzily the same on total cost */
+       /* (so we may as well compare startup cost, even if !consider_startup) */
        if (path1->startup_cost > path2->startup_cost * fuzz_factor &&
                path2->param_info == NULL)
        {
@@ -360,6 +367,9 @@ set_cheapest(RelOptInfo *parent_rel)
  *       reduce the number of parameterized paths that are kept.  See discussion
  *       in src/backend/optimizer/README.
  *
+ *       Another policy that is enforced here is that we only consider cheap
+ *       startup cost to be interesting if parent_rel->consider_startup is true.
+ *
  *       The pathlist is kept sorted by total_cost, with cheaper paths
  *       at the front.  Within this routine, that's simply a speed hack:
  *       doing it that way makes it more likely that we will reject an inferior
@@ -423,7 +433,8 @@ add_path(RelOptInfo *parent_rel, Path *new_path)
                 * Do a fuzzy cost comparison with 1% fuzziness limit.  (XXX does this
                 * percentage need to be user-configurable?)
                 */
-               costcmp = compare_path_costs_fuzzily(new_path, old_path, 1.01);
+               costcmp = compare_path_costs_fuzzily(new_path, old_path, 1.01,
+                                                                                        parent_rel->consider_startup);
 
                /*
                 * If the two paths compare differently for startup and total cost,
@@ -488,8 +499,10 @@ add_path(RelOptInfo *parent_rel, Path *new_path)
                                                                        remove_old = true;      /* new dominates old */
                                                                else if (new_path->rows > old_path->rows)
                                                                        accept_new = false; /* old dominates new */
-                                                               else if (compare_path_costs_fuzzily(new_path, old_path,
-                                                                                         1.0000000001) == COSTS_BETTER1)
+                                                               else if (compare_path_costs_fuzzily(new_path,
+                                                                                                                                       old_path,
+                                                                                                                                       1.0000000001,
+                                                                                                                                       parent_rel->consider_startup) == COSTS_BETTER1)
                                                                        remove_old = true;      /* new dominates old */
                                                                else
                                                                        accept_new = false; /* old equals or
index 680184c0e9f2a9ca9dbb9f17be72b2d17a6f273f..3878c3752fef56e43bbe12d8c1613698993d0d1d 100644 (file)
@@ -99,6 +99,8 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptKind reloptkind)
        rel->relids = bms_make_singleton(relid);
        rel->rows = 0;
        rel->width = 0;
+       /* cheap startup cost is interesting iff not all tuples to be retrieved */
+       rel->consider_startup = (root->tuple_fraction > 0);
        rel->reltargetlist = NIL;
        rel->pathlist = NIL;
        rel->ppilist = NIL;
@@ -354,6 +356,8 @@ build_join_rel(PlannerInfo *root,
        joinrel->relids = bms_copy(joinrelids);
        joinrel->rows = 0;
        joinrel->width = 0;
+       /* cheap startup cost is interesting iff not all tuples to be retrieved */
+       joinrel->consider_startup = (root->tuple_fraction > 0);
        joinrel->reltargetlist = NIL;
        joinrel->pathlist = NIL;
        joinrel->ppilist = NIL;
index 7b4eaf4f5e540158ac2f88eba232ad91e17dc8f2..af9425ad9b309fbecd65d1a83a229ddfd447f382 100644 (file)
@@ -299,6 +299,8 @@ typedef struct PlannerInfo
  *                        clauses have been applied (ie, output rows of a plan for it)
  *             width - avg. number of bytes per tuple in the relation after the
  *                             appropriate projections have been done (ie, output width)
+ *             consider_startup - true if there is any value in keeping paths for
+ *                                                this rel on the basis of having cheap startup cost
  *             reltargetlist - List of Var and PlaceHolderVar nodes for the values
  *                                             we need to output from this relation.
  *                                             List is in no particular order, but all rels of an
@@ -405,6 +407,9 @@ typedef struct RelOptInfo
        double          rows;                   /* estimated number of result tuples */
        int                     width;                  /* estimated avg width of result tuples */
 
+       /* per-relation planner control flags */
+       bool            consider_startup;       /* keep cheap-startup-cost paths? */
+
        /* materialization information */
        List       *reltargetlist;      /* Vars to be output by scan of relation */
        List       *pathlist;           /* Path structures */
index 914a6fd84473fa97302e3c3ca8320d35dea3fcb3..299ea06de37385579b29d68cab41987a715b015f 100644 (file)
@@ -3437,6 +3437,7 @@ select v.* from
 
 create temp table dual();
 insert into dual default values;
+analyze dual;
 select v.* from
   (int8_tbl x left join (select q1,(select coalesce(q2,0)) q2 from int8_tbl) y on x.q2 = y.q1)
   left join int4_tbl z on z.f1 = x.q2,
@@ -3446,21 +3447,21 @@ select v.* from
                123 |                  
                456 |                  
                123 |  4567890123456789
-  4567890123456789 |               123
+  4567890123456789 | -4567890123456789
                123 |  4567890123456789
   4567890123456789 |  4567890123456789
                123 |  4567890123456789
-  4567890123456789 | -4567890123456789
   4567890123456789 |               123
-               123 |               456
   4567890123456789 |               123
                123 |  4567890123456789
-  4567890123456789 |  4567890123456789
   4567890123456789 |               123
+               123 |               456
   4567890123456789 |  4567890123456789
+  4567890123456789 | -4567890123456789
   4567890123456789 |  4567890123456789
   4567890123456789 |  4567890123456789
-  4567890123456789 | -4567890123456789
+  4567890123456789 |  4567890123456789
+  4567890123456789 |               123
   4567890123456789 |                  
  -4567890123456789 |                  
 (20 rows)
index fcc6572709de95739c65760d20e07437389335fc..c425be917aa916ae37d0544d725c5c775fe59630 100644 (file)
@@ -937,6 +937,7 @@ select v.* from
   lateral (select x.q1,y.q1 union all select x.q2,y.q2) v(vx,vy);
 create temp table dual();
 insert into dual default values;
+analyze dual;
 select v.* from
   (int8_tbl x left join (select q1,(select coalesce(q2,0)) q2 from int8_tbl) y on x.q2 = y.q1)
   left join int4_tbl z on z.f1 = x.q2,