]> granicus.if.org Git - postgresql/commitdiff
Improve planning of Materialize nodes inserted atop the inner input of a
authorTom Lane <tgl@sss.pgh.pa.us>
Sun, 15 Nov 2009 02:45:35 +0000 (02:45 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Sun, 15 Nov 2009 02:45:35 +0000 (02:45 +0000)
mergejoin to shield it from doing mark/restore and refetches.  Put an explicit
flag in MergePath so we can centralize the logic that knows about this,
and add costing logic that considers using Materialize even when it's not
forced by the previously-existing considerations.  This is in response to
a discussion back in August that suggested that materializing an inner
indexscan can be helpful when the refetch percentage is high enough.

src/backend/nodes/outfuncs.c
src/backend/optimizer/path/allpaths.c
src/backend/optimizer/path/costsize.c
src/backend/optimizer/plan/createplan.c
src/backend/optimizer/util/pathnode.c
src/include/nodes/relation.h
src/include/optimizer/cost.h

index 7abee7a2d84903239a6c891fefcd507680bdb221..4e1c96271a94866fc1162f0035acab0504f7eefb 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.371 2009/10/28 14:55:38 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.372 2009/11/15 02:45:34 tgl Exp $
  *
  * NOTES
  *       Every node type that can appear in stored rules' parsetrees *must*
@@ -1501,6 +1501,7 @@ _outMergePath(StringInfo str, MergePath *node)
        WRITE_NODE_FIELD(path_mergeclauses);
        WRITE_NODE_FIELD(outersortkeys);
        WRITE_NODE_FIELD(innersortkeys);
+       WRITE_BOOL_FIELD(materialize_inner);
 }
 
 static void
index 4d402ca7202a3a18897ef17bad91b54ea237ea62..c225d7e28879e6b9536c08856ca1c288c9b125c9 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.188 2009/10/26 02:26:33 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.189 2009/11/15 02:45:35 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1443,13 +1443,12 @@ print_path(PlannerInfo *root, Path *path, int indent)
                {
                        MergePath  *mp = (MergePath *) path;
 
-                       if (mp->outersortkeys || mp->innersortkeys)
-                       {
-                               for (i = 0; i < indent; i++)
-                                       printf("\t");
-                               printf("  sortouter=%d sortinner=%d\n",
-                                          ((mp->outersortkeys) ? 1 : 0),
-                                          ((mp->innersortkeys) ? 1 : 0));
+                       for (i = 0; i < indent; i++)
+                               printf("\t");
+                       printf("  sortouter=%d sortinner=%d materializeinner=%d\n",
+                                  ((mp->outersortkeys) ? 1 : 0),
+                                  ((mp->innersortkeys) ? 1 : 0),
+                                  ((mp->materialize_inner) ? 1 : 0));
                        }
                }
 
index 6acc5ae34b0a4bfacb2227ed87f2ad0ac18fd321..ffbd9afbbaeb7261297c283884cecdcdb4412d57 100644 (file)
@@ -54,7 +54,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.211 2009/09/12 22:12:03 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.212 2009/11/15 02:45:35 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1166,23 +1166,6 @@ cost_sort(Path *path, PlannerInfo *root,
        path->total_cost = startup_cost + run_cost;
 }
 
-/*
- * sort_exceeds_work_mem
- *       Given a finished Sort plan node, detect whether it is expected to
- *       spill to disk (ie, will need more than work_mem workspace)
- *
- * This assumes there will be no available LIMIT.
- */
-bool
-sort_exceeds_work_mem(Sort *sort)
-{
-       double          input_bytes = relation_byte_size(sort->plan.plan_rows,
-                                                                                                sort->plan.plan_width);
-       long            work_mem_bytes = work_mem * 1024L;
-
-       return (input_bytes > work_mem_bytes);
-}
-
 /*
  * cost_material
  *       Determines and returns the cost of materializing a relation, including
@@ -1543,7 +1526,18 @@ cost_nestloop(NestPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
  *       Determines and returns the cost of joining two relations using the
  *       merge join algorithm.
  *
- * 'path' is already filled in except for the cost fields
+ * Unlike other costsize functions, this routine makes one actual decision:
+ * whether we should materialize the inner path.  We do that either because
+ * the inner path can't support mark/restore, or because it's cheaper to
+ * use an interposed Material node to handle mark/restore.  When the decision
+ * is cost-based it would be logically cleaner to build and cost two separate
+ * paths with and without that flag set; but that would require repeating most
+ * of the calculations here, which are not all that cheap.  Since the choice
+ * will not affect output pathkeys or startup cost, only total cost, there is
+ * no possibility of wanting to keep both paths.  So it seems best to make
+ * the decision here and record it in the path's materialize_inner field.
+ *
+ * 'path' is already filled in except for the cost fields and materialize_inner
  * 'sjinfo' is extra info about the join for selectivity estimation
  *
  * Notes: path's mergeclauses should be a subset of the joinrestrictinfo list;
@@ -1561,7 +1555,10 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
        List       *innersortkeys = path->innersortkeys;
        Cost            startup_cost = 0;
        Cost            run_cost = 0;
-       Cost            cpu_per_tuple;
+       Cost            cpu_per_tuple,
+                               inner_run_cost,
+                               bare_inner_cost,
+                               mat_inner_cost;
        QualCost        merge_qual_cost;
        QualCost        qp_qual_cost;
        double          outer_path_rows = PATH_ROWS(outer_path);
@@ -1606,10 +1603,7 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
        /*
         * When there are equal merge keys in the outer relation, the mergejoin
         * must rescan any matching tuples in the inner relation. This means
-        * re-fetching inner tuples.  Our cost model for this is that a re-fetch
-        * costs the same as an original fetch, which is probably an overestimate;
-        * but on the other hand we ignore the bookkeeping costs of mark/restore.
-        * Not clear if it's worth developing a more refined model.
+        * re-fetching inner tuples; we have to estimate how often that happens.
         *
         * For regular inner and outer joins, the number of re-fetches can be
         * estimated approximately as size of merge join output minus size of
@@ -1641,7 +1635,7 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
                if (rescannedtuples < 0)
                        rescannedtuples = 0;
        }
-       /* We'll inflate inner run cost this much to account for rescanning */
+       /* We'll inflate various costs this much to account for rescanning */
        rescanratio = 1.0 + (rescannedtuples / inner_path_rows);
 
        /*
@@ -1778,32 +1772,83 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
                                  -1.0);
                startup_cost += sort_path.startup_cost;
                startup_cost += (sort_path.total_cost - sort_path.startup_cost)
-                       * innerstartsel * rescanratio;
-               run_cost += (sort_path.total_cost - sort_path.startup_cost)
-                       * (innerendsel - innerstartsel) * rescanratio;
-
-               /*
-                * If the inner sort is expected to spill to disk, we want to add a
-                * materialize node to shield it from the need to handle mark/restore.
-                * This will allow it to perform the last merge pass on-the-fly, while
-                * in most cases not requiring the materialize to spill to disk.
-                * Charge an extra cpu_tuple_cost per tuple to account for the
-                * materialize node.  (Keep this estimate in sync with similar ones in
-                * create_mergejoin_path and create_mergejoin_plan.)
-                */
-               if (relation_byte_size(inner_path_rows, inner_path->parent->width) >
-                       (work_mem * 1024L))
-                       run_cost += cpu_tuple_cost * inner_path_rows;
+                       * innerstartsel;
+               inner_run_cost = (sort_path.total_cost - sort_path.startup_cost)
+                       * (innerendsel - innerstartsel);
        }
        else
        {
                startup_cost += inner_path->startup_cost;
                startup_cost += (inner_path->total_cost - inner_path->startup_cost)
-                       * innerstartsel * rescanratio;
-               run_cost += (inner_path->total_cost - inner_path->startup_cost)
-                       * (innerendsel - innerstartsel) * rescanratio;
+                       * innerstartsel;
+               inner_run_cost = (inner_path->total_cost - inner_path->startup_cost)
+                       * (innerendsel - innerstartsel);
        }
 
+       /*
+        * Decide whether we want to materialize the inner input to shield it from
+        * mark/restore and performing re-fetches.  Our cost model for regular
+        * re-fetches is that a re-fetch costs the same as an original fetch,
+        * which is probably an overestimate; but on the other hand we ignore the
+        * bookkeeping costs of mark/restore.  Not clear if it's worth developing
+        * a more refined model.  So we just need to inflate the inner run cost
+        * by rescanratio.
+        */
+       bare_inner_cost = inner_run_cost * rescanratio;
+       /*
+        * When we interpose a Material node the re-fetch cost is assumed to be
+        * just cpu_tuple_cost per tuple, independently of the underlying plan's
+        * cost; but we have to charge an extra cpu_tuple_cost per original fetch
+        * as well.  Note that we're assuming the materialize node will never
+        * spill to disk, since it only has to remember tuples back to the last
+        * mark.  (If there are a huge number of duplicates, our other cost
+        * factors will make the path so expensive that it probably won't get
+        * chosen anyway.)  So we don't use cost_rescan here.
+        *
+        * Note: keep this estimate in sync with create_mergejoin_plan's labeling
+        * of the generated Material node.
+        */
+       mat_inner_cost = inner_run_cost +
+               cpu_tuple_cost * inner_path_rows * rescanratio;
+
+       /* Prefer materializing if it looks cheaper */
+       if (mat_inner_cost < bare_inner_cost)
+               path->materialize_inner = true;
+       /*
+        * Even if materializing doesn't look cheaper, we *must* do it if the
+        * inner path is to be used directly (without sorting) and it doesn't
+        * support mark/restore.
+        *
+        * Since the inner side must be ordered, and only Sorts and IndexScans can
+        * create order to begin with, and they both support mark/restore, you
+        * might think there's no problem --- but you'd be wrong.  Nestloop and
+        * merge joins can *preserve* the order of their inputs, so they can be
+        * selected as the input of a mergejoin, and they don't support
+        * mark/restore at present.
+        */
+       else if (innersortkeys == NIL &&
+                        !ExecSupportsMarkRestore(inner_path->pathtype))
+               path->materialize_inner = true;
+       /*
+        * Also, force materializing if the inner path is to be sorted and the
+        * sort is expected to spill to disk.  This is because the final merge
+        * pass can be done on-the-fly if it doesn't have to support mark/restore.
+        * We don't try to adjust the cost estimates for this consideration,
+        * though.
+        */
+       else if (innersortkeys != NIL &&
+                        relation_byte_size(inner_path_rows, inner_path->parent->width) >
+                        (work_mem * 1024L))
+               path->materialize_inner = true;
+       else
+               path->materialize_inner = false;
+
+       /* Charge the right incremental cost for the chosen case */
+       if (path->materialize_inner)
+               run_cost += mat_inner_cost;
+       else
+               run_cost += bare_inner_cost;
+
        /* CPU costs */
 
        /*
index b068d2f3f83346710da8deba4d1201e7b5569437..41238b7c0e6e48e4045a8de662e7db2eb433501c 100644 (file)
@@ -10,7 +10,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.266 2009/10/26 02:26:33 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.267 2009/11/15 02:45:35 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1664,9 +1664,8 @@ create_mergejoin_plan(PlannerInfo *root,
                                                         best_path->jpath.outerjoinpath->parent->relids);
 
        /*
-        * Create explicit sort nodes for the outer and inner join paths if
-        * necessary.  The sort cost was already accounted for in the path. Make
-        * sure there are no excess columns in the inputs if sorting.
+        * Create explicit sort nodes for the outer and inner paths if necessary.
+        * Make sure there are no excess columns in the inputs if sorting.
         */
        if (best_path->outersortkeys)
        {
@@ -1695,23 +1694,17 @@ create_mergejoin_plan(PlannerInfo *root,
                innerpathkeys = best_path->jpath.innerjoinpath->pathkeys;
 
        /*
-        * If inner plan is a sort that is expected to spill to disk, add a
-        * materialize node to shield it from the need to handle mark/restore.
-        * This will allow it to perform the last merge pass on-the-fly, while in
-        * most cases not requiring the materialize to spill to disk.
-        *
-        * XXX really, Sort oughta do this for itself, probably, to avoid the
-        * overhead of a separate plan node.
+        * If specified, add a materialize node to shield the inner plan from
+        * the need to handle mark/restore.
         */
-       if (IsA(inner_plan, Sort) &&
-               sort_exceeds_work_mem((Sort *) inner_plan))
+       if (best_path->materialize_inner)
        {
                Plan       *matplan = (Plan *) make_material(inner_plan);
 
                /*
                 * We assume the materialize will not spill to disk, and therefore
                 * charge just cpu_tuple_cost per tuple.  (Keep this estimate in sync
-                * with similar ones in cost_mergejoin and create_mergejoin_path.)
+                * with cost_mergejoin.)
                 */
                copy_plan_costsize(matplan, inner_plan);
                matplan->total_cost += cpu_tuple_cost * matplan->plan_rows;
@@ -1887,6 +1880,7 @@ create_mergejoin_plan(PlannerInfo *root,
                                                           inner_plan,
                                                           best_path->jpath.jointype);
 
+       /* Costs of sort and material steps are included in path cost already */
        copy_path_costsize(&join_plan->join.plan, &best_path->jpath.path);
 
        return join_plan;
index e9733892cc216df70e194b5b74b81fe7a7414804..62169e589b933ccf0d04e5910052cdd56fe54f9e 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.154 2009/09/17 20:49:29 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.155 2009/11/15 02:45:35 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -17,7 +17,6 @@
 #include <math.h>
 
 #include "catalog/pg_operator.h"
-#include "executor/executor.h"
 #include "miscadmin.h"
 #include "optimizer/clauses.h"
 #include "optimizer/cost.h"
@@ -1414,47 +1413,6 @@ create_mergejoin_path(PlannerInfo *root,
                pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
                innersortkeys = NIL;
 
-       /*
-        * If we are not sorting the inner path, we may need a materialize node to
-        * ensure it can be marked/restored.
-        *
-        * Since the inner side must be ordered, and only Sorts and IndexScans can
-        * create order to begin with, and they both support mark/restore, you
-        * might think there's no problem --- but you'd be wrong.  Nestloop and
-        * merge joins can *preserve* the order of their inputs, so they can be
-        * selected as the input of a mergejoin, and they don't support
-        * mark/restore at present.
-        *
-        * Note: Sort supports mark/restore, so no materialize is really needed in
-        * that case; but one may be desirable anyway to optimize the sort.
-        * However, since we aren't representing the sort step separately in the
-        * Path tree, we can't explicitly represent the materialize either. So
-        * that case is not handled here.  Instead, cost_mergejoin has to factor
-        * in the cost and create_mergejoin_plan has to add the plan node.
-        */
-       if (innersortkeys == NIL &&
-               !ExecSupportsMarkRestore(inner_path->pathtype))
-       {
-               Path       *mpath;
-
-               mpath = (Path *) create_material_path(inner_path->parent, inner_path);
-
-               /*
-                * We expect the materialize won't spill to disk (it could only do so
-                * if there were a whole lot of duplicate tuples, which is a case
-                * cost_mergejoin will avoid choosing anyway).  Therefore
-                * cost_material's cost estimate is bogus and we should charge just
-                * cpu_tuple_cost per tuple.  (Keep this estimate in sync with similar
-                * ones in cost_mergejoin and create_mergejoin_plan; also see
-                * cost_rescan.)
-                */
-               mpath->startup_cost = inner_path->startup_cost;
-               mpath->total_cost = inner_path->total_cost;
-               mpath->total_cost += cpu_tuple_cost * inner_path->parent->rows;
-
-               inner_path = mpath;
-       }
-
        pathnode->jpath.path.pathtype = T_MergeJoin;
        pathnode->jpath.path.parent = joinrel;
        pathnode->jpath.jointype = jointype;
@@ -1465,6 +1423,7 @@ create_mergejoin_path(PlannerInfo *root,
        pathnode->path_mergeclauses = mergeclauses;
        pathnode->outersortkeys = outersortkeys;
        pathnode->innersortkeys = innersortkeys;
+       /* pathnode->materialize_inner will be set by cost_mergejoin */
 
        cost_mergejoin(pathnode, root, sjinfo);
 
index 59f83e856282ac8d3ae2837209548f150e6d0284..d54770eabc446ade48cac7e426e62792d0096b73 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.178 2009/10/26 02:26:43 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.179 2009/11/15 02:45:35 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -835,6 +835,14 @@ typedef JoinPath NestPath;
 /*
  * A mergejoin path has these fields.
  *
+ * Unlike other path types, a MergePath node doesn't represent just a single
+ * run-time plan node: it can represent up to four.  Aside from the MergeJoin
+ * node itself, there can be a Sort node for the outer input, a Sort node
+ * for the inner input, and/or a Material node for the inner input.  We could
+ * represent these nodes by separate path nodes, but considering how many
+ * different merge paths are investigated during a complex join problem,
+ * it seems better to avoid unnecessary palloc overhead.
+ *
  * path_mergeclauses lists the clauses (in the form of RestrictInfos)
  * that will be used in the merge.
  *
@@ -846,15 +854,19 @@ typedef JoinPath NestPath;
  * outersortkeys (resp. innersortkeys) is NIL if the outer path
  * (resp. inner path) is already ordered appropriately for the
  * mergejoin.  If it is not NIL then it is a PathKeys list describing
- * the ordering that must be created by an explicit sort step.
+ * the ordering that must be created by an explicit Sort node.
+ *
+ * materialize_inner is TRUE if a Material node should be placed atop the
+ * inner input.  This may appear with or without an inner Sort step.
  */
 
 typedef struct MergePath
 {
        JoinPath        jpath;
        List       *path_mergeclauses;          /* join clauses to be used for merge */
-       List       *outersortkeys;      /* keys for explicit sort, if any */
-       List       *innersortkeys;      /* keys for explicit sort, if any */
+       List       *outersortkeys;                      /* keys for explicit sort, if any */
+       List       *innersortkeys;                      /* keys for explicit sort, if any */
+       bool            materialize_inner;              /* add Materialize to inner? */
 } MergePath;
 
 /*
index f862979c0847baf971417e1ebf1cdc3ff7219549..041b7e8f9de55076655640e096d4e1240629b572 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/optimizer/cost.h,v 1.98 2009/09/12 22:12:04 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/cost.h,v 1.99 2009/11/15 02:45:35 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -84,7 +84,6 @@ extern void cost_recursive_union(Plan *runion, Plan *nrterm, Plan *rterm);
 extern void cost_sort(Path *path, PlannerInfo *root,
                  List *pathkeys, Cost input_cost, double tuples, int width,
                  double limit_tuples);
-extern bool sort_exceeds_work_mem(Sort *sort);
 extern void cost_material(Path *path,
                          Cost input_startup_cost, Cost input_total_cost,
                          double tuples, int width);