Improve planning of Materialize nodes inserted atop the inner input of a

author Tom Lane <tgl@sss.pgh.pa.us>

Sun, 15 Nov 2009 02:45:35 +0000 (02:45 +0000)

committer Tom Lane <tgl@sss.pgh.pa.us>

Sun, 15 Nov 2009 02:45:35 +0000 (02:45 +0000)
author Tom Lane <tgl@sss.pgh.pa.us>
Sun, 15 Nov 2009 02:45:35 +0000 (02:45 +0000)
committer Tom Lane <tgl@sss.pgh.pa.us>
Sun, 15 Nov 2009 02:45:35 +0000 (02:45 +0000)
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c

index 7abee7a2d84903239a6c891fefcd507680bdb221..4e1c96271a94866fc1162f0035acab0504f7eefb 100644 (file)
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.371 2009/10/28 14:55:38 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.372 2009/11/15 02:45:34 tgl Exp $
   *
   * NOTES
   *       Every node type that can appear in stored rules' parsetrees *must*
@@ -1501,6 +1501,7 @@ _outMergePath(StringInfo str, MergePath *node)
         WRITE_NODE_FIELD(path_mergeclauses);
         WRITE_NODE_FIELD(outersortkeys);
         WRITE_NODE_FIELD(innersortkeys);
+       WRITE_BOOL_FIELD(materialize_inner);
  }
  
  static void
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c

index 4d402ca7202a3a18897ef17bad91b54ea237ea62..c225d7e28879e6b9536c08856ca1c288c9b125c9 100644 (file)
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.188 2009/10/26 02:26:33 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.189 2009/11/15 02:45:35 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -1443,13 +1443,12 @@ print_path(PlannerInfo *root, Path *path, int indent)
                 {
                         MergePath  *mp = (MergePath *) path;
  
-                       if (mp->outersortkeys || mp->innersortkeys)
-                       {
-                               for (i = 0; i < indent; i++)
-                                       printf("\t");
-                               printf("  sortouter=%d sortinner=%d\n",
-                                          ((mp->outersortkeys) ? 1 : 0),
-                                          ((mp->innersortkeys) ? 1 : 0));
+                       for (i = 0; i < indent; i++)
+                               printf("\t");
+                       printf("  sortouter=%d sortinner=%d materializeinner=%d\n",
+                                  ((mp->outersortkeys) ? 1 : 0),
+                                  ((mp->innersortkeys) ? 1 : 0),
+                                  ((mp->materialize_inner) ? 1 : 0));
                         }
                 }
  
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c

index 6acc5ae34b0a4bfacb2227ed87f2ad0ac18fd321..ffbd9afbbaeb7261297c283884cecdcdb4412d57 100644 (file)
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -54,7 +54,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.211 2009/09/12 22:12:03 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.212 2009/11/15 02:45:35 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -1166,23 +1166,6 @@ cost_sort(Path *path, PlannerInfo *root,
         path->total_cost = startup_cost + run_cost;
  }
  
-/*
- * sort_exceeds_work_mem
- *       Given a finished Sort plan node, detect whether it is expected to
- *       spill to disk (ie, will need more than work_mem workspace)
- *
- * This assumes there will be no available LIMIT.
- */
-bool
-sort_exceeds_work_mem(Sort *sort)
-{
-       double          input_bytes = relation_byte_size(sort->plan.plan_rows,
-                                                                                                sort->plan.plan_width);
-       long            work_mem_bytes = work_mem * 1024L;
-
-       return (input_bytes > work_mem_bytes);
-}
-
  /*
   * cost_material
   *       Determines and returns the cost of materializing a relation, including
@@ -1543,7 +1526,18 @@ cost_nestloop(NestPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
   *       Determines and returns the cost of joining two relations using the
   *       merge join algorithm.
   *
- * 'path' is already filled in except for the cost fields
+ * Unlike other costsize functions, this routine makes one actual decision:
+ * whether we should materialize the inner path.  We do that either because
+ * the inner path can't support mark/restore, or because it's cheaper to
+ * use an interposed Material node to handle mark/restore.  When the decision
+ * is cost-based it would be logically cleaner to build and cost two separate
+ * paths with and without that flag set; but that would require repeating most
+ * of the calculations here, which are not all that cheap.  Since the choice
+ * will not affect output pathkeys or startup cost, only total cost, there is
+ * no possibility of wanting to keep both paths.  So it seems best to make
+ * the decision here and record it in the path's materialize_inner field.
+ *
+ * 'path' is already filled in except for the cost fields and materialize_inner
   * 'sjinfo' is extra info about the join for selectivity estimation
   *
   * Notes: path's mergeclauses should be a subset of the joinrestrictinfo list;
@@ -1561,7 +1555,10 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
         List       *innersortkeys = path->innersortkeys;
         Cost            startup_cost = 0;
         Cost            run_cost = 0;
-       Cost            cpu_per_tuple;
+       Cost            cpu_per_tuple,
+                               inner_run_cost,
+                               bare_inner_cost,
+                               mat_inner_cost;
         QualCost        merge_qual_cost;
         QualCost        qp_qual_cost;
         double          outer_path_rows = PATH_ROWS(outer_path);
@@ -1606,10 +1603,7 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
         /*
          * When there are equal merge keys in the outer relation, the mergejoin
          * must rescan any matching tuples in the inner relation. This means
-        * re-fetching inner tuples.  Our cost model for this is that a re-fetch
-        * costs the same as an original fetch, which is probably an overestimate;
-        * but on the other hand we ignore the bookkeeping costs of mark/restore.
-        * Not clear if it's worth developing a more refined model.
+        * re-fetching inner tuples; we have to estimate how often that happens.
          *
          * For regular inner and outer joins, the number of re-fetches can be
          * estimated approximately as size of merge join output minus size of
@@ -1641,7 +1635,7 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
                 if (rescannedtuples < 0)
                         rescannedtuples = 0;
         }
-       /* We'll inflate inner run cost this much to account for rescanning */
+       /* We'll inflate various costs this much to account for rescanning */
         rescanratio = 1.0 + (rescannedtuples / inner_path_rows);
  
         /*
@@ -1778,32 +1772,83 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
                                   -1.0);
                 startup_cost += sort_path.startup_cost;
                 startup_cost += (sort_path.total_cost - sort_path.startup_cost)
-                       * innerstartsel * rescanratio;
-               run_cost += (sort_path.total_cost - sort_path.startup_cost)
-                       * (innerendsel - innerstartsel) * rescanratio;
-
-               /*
-                * If the inner sort is expected to spill to disk, we want to add a
-                * materialize node to shield it from the need to handle mark/restore.
-                * This will allow it to perform the last merge pass on-the-fly, while
-                * in most cases not requiring the materialize to spill to disk.
-                * Charge an extra cpu_tuple_cost per tuple to account for the
-                * materialize node.  (Keep this estimate in sync with similar ones in
-                * create_mergejoin_path and create_mergejoin_plan.)
-                */
-               if (relation_byte_size(inner_path_rows, inner_path->parent->width) >
-                       (work_mem * 1024L))
-                       run_cost += cpu_tuple_cost * inner_path_rows;
+                       * innerstartsel;
+               inner_run_cost = (sort_path.total_cost - sort_path.startup_cost)
+                       * (innerendsel - innerstartsel);
         }
         else
         {
                 startup_cost += inner_path->startup_cost;
                 startup_cost += (inner_path->total_cost - inner_path->startup_cost)
-                       * innerstartsel * rescanratio;
-               run_cost += (inner_path->total_cost - inner_path->startup_cost)
-                       * (innerendsel - innerstartsel) * rescanratio;
+                       * innerstartsel;
+               inner_run_cost = (inner_path->total_cost - inner_path->startup_cost)
+                       * (innerendsel - innerstartsel);
         }
  
+       /*
+        * Decide whether we want to materialize the inner input to shield it from
+        * mark/restore and performing re-fetches.  Our cost model for regular
+        * re-fetches is that a re-fetch costs the same as an original fetch,
+        * which is probably an overestimate; but on the other hand we ignore the
+        * bookkeeping costs of mark/restore.  Not clear if it's worth developing
+        * a more refined model.  So we just need to inflate the inner run cost
+        * by rescanratio.
+        */
+       bare_inner_cost = inner_run_cost * rescanratio;
+       /*
+        * When we interpose a Material node the re-fetch cost is assumed to be
+        * just cpu_tuple_cost per tuple, independently of the underlying plan's
+        * cost; but we have to charge an extra cpu_tuple_cost per original fetch
+        * as well.  Note that we're assuming the materialize node will never
+        * spill to disk, since it only has to remember tuples back to the last
+        * mark.  (If there are a huge number of duplicates, our other cost
+        * factors will make the path so expensive that it probably won't get
+        * chosen anyway.)  So we don't use cost_rescan here.
+        *
+        * Note: keep this estimate in sync with create_mergejoin_plan's labeling
+        * of the generated Material node.
+        */
+       mat_inner_cost = inner_run_cost +
+               cpu_tuple_cost * inner_path_rows * rescanratio;
+
+       /* Prefer materializing if it looks cheaper */
+       if (mat_inner_cost < bare_inner_cost)
+               path->materialize_inner = true;
+       /*
+        * Even if materializing doesn't look cheaper, we *must* do it if the
+        * inner path is to be used directly (without sorting) and it doesn't
+        * support mark/restore.
+        *
+        * Since the inner side must be ordered, and only Sorts and IndexScans can
+        * create order to begin with, and they both support mark/restore, you
+        * might think there's no problem --- but you'd be wrong.  Nestloop and
+        * merge joins can *preserve* the order of their inputs, so they can be
+        * selected as the input of a mergejoin, and they don't support
+        * mark/restore at present.
+        */
+       else if (innersortkeys == NIL &&
+                        !ExecSupportsMarkRestore(inner_path->pathtype))
+               path->materialize_inner = true;
+       /*
+        * Also, force materializing if the inner path is to be sorted and the
+        * sort is expected to spill to disk.  This is because the final merge
+        * pass can be done on-the-fly if it doesn't have to support mark/restore.
+        * We don't try to adjust the cost estimates for this consideration,
+        * though.
+        */
+       else if (innersortkeys != NIL &&
+                        relation_byte_size(inner_path_rows, inner_path->parent->width) >
+                        (work_mem * 1024L))
+               path->materialize_inner = true;
+       else
+               path->materialize_inner = false;
+
+       /* Charge the right incremental cost for the chosen case */
+       if (path->materialize_inner)
+               run_cost += mat_inner_cost;
+       else
+               run_cost += bare_inner_cost;
+
         /* CPU costs */
  
         /*
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c

index b068d2f3f83346710da8deba4d1201e7b5569437..41238b7c0e6e48e4045a8de662e7db2eb433501c 100644 (file)
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -10,7 +10,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.266 2009/10/26 02:26:33 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.267 2009/11/15 02:45:35 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -1664,9 +1664,8 @@ create_mergejoin_plan(PlannerInfo *root,
                                                          best_path->jpath.outerjoinpath->parent->relids);
  
         /*
-        * Create explicit sort nodes for the outer and inner join paths if
-        * necessary.  The sort cost was already accounted for in the path. Make
-        * sure there are no excess columns in the inputs if sorting.
+        * Create explicit sort nodes for the outer and inner paths if necessary.
+        * Make sure there are no excess columns in the inputs if sorting.
          */
         if (best_path->outersortkeys)
         {
@@ -1695,23 +1694,17 @@ create_mergejoin_plan(PlannerInfo *root,
                 innerpathkeys = best_path->jpath.innerjoinpath->pathkeys;
  
         /*
-        * If inner plan is a sort that is expected to spill to disk, add a
-        * materialize node to shield it from the need to handle mark/restore.
-        * This will allow it to perform the last merge pass on-the-fly, while in
-        * most cases not requiring the materialize to spill to disk.
-        *
-        * XXX really, Sort oughta do this for itself, probably, to avoid the
-        * overhead of a separate plan node.
+        * If specified, add a materialize node to shield the inner plan from
+        * the need to handle mark/restore.
          */
-       if (IsA(inner_plan, Sort) &&
-               sort_exceeds_work_mem((Sort *) inner_plan))
+       if (best_path->materialize_inner)
         {
                 Plan       *matplan = (Plan *) make_material(inner_plan);
  
                 /*
                  * We assume the materialize will not spill to disk, and therefore
                  * charge just cpu_tuple_cost per tuple.  (Keep this estimate in sync
-                * with similar ones in cost_mergejoin and create_mergejoin_path.)
+                * with cost_mergejoin.)
                  */
                 copy_plan_costsize(matplan, inner_plan);
                 matplan->total_cost += cpu_tuple_cost * matplan->plan_rows;
@@ -1887,6 +1880,7 @@ create_mergejoin_plan(PlannerInfo *root,
                                                            inner_plan,
                                                            best_path->jpath.jointype);
  
+       /* Costs of sort and material steps are included in path cost already */
         copy_path_costsize(&join_plan->join.plan, &best_path->jpath.path);
  
         return join_plan;
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c

index e9733892cc216df70e194b5b74b81fe7a7414804..62169e589b933ccf0d04e5910052cdd56fe54f9e 100644 (file)
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.154 2009/09/17 20:49:29 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.155 2009/11/15 02:45:35 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -17,7 +17,6 @@
  #include <math.h>
  
  #include "catalog/pg_operator.h"
-#include "executor/executor.h"
  #include "miscadmin.h"
  #include "optimizer/clauses.h"
  #include "optimizer/cost.h"
@@ -1414,47 +1413,6 @@ create_mergejoin_path(PlannerInfo *root,
                 pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
                 innersortkeys = NIL;
  
-       /*
-        * If we are not sorting the inner path, we may need a materialize node to
-        * ensure it can be marked/restored.
-        *
-        * Since the inner side must be ordered, and only Sorts and IndexScans can
-        * create order to begin with, and they both support mark/restore, you
-        * might think there's no problem --- but you'd be wrong.  Nestloop and
-        * merge joins can *preserve* the order of their inputs, so they can be
-        * selected as the input of a mergejoin, and they don't support
-        * mark/restore at present.
-        *
-        * Note: Sort supports mark/restore, so no materialize is really needed in
-        * that case; but one may be desirable anyway to optimize the sort.
-        * However, since we aren't representing the sort step separately in the
-        * Path tree, we can't explicitly represent the materialize either. So
-        * that case is not handled here.  Instead, cost_mergejoin has to factor
-        * in the cost and create_mergejoin_plan has to add the plan node.
-        */
-       if (innersortkeys == NIL &&
-               !ExecSupportsMarkRestore(inner_path->pathtype))
-       {
-               Path       *mpath;
-
-               mpath = (Path *) create_material_path(inner_path->parent, inner_path);
-
-               /*
-                * We expect the materialize won't spill to disk (it could only do so
-                * if there were a whole lot of duplicate tuples, which is a case
-                * cost_mergejoin will avoid choosing anyway).  Therefore
-                * cost_material's cost estimate is bogus and we should charge just
-                * cpu_tuple_cost per tuple.  (Keep this estimate in sync with similar
-                * ones in cost_mergejoin and create_mergejoin_plan; also see
-                * cost_rescan.)
-                */
-               mpath->startup_cost = inner_path->startup_cost;
-               mpath->total_cost = inner_path->total_cost;
-               mpath->total_cost += cpu_tuple_cost * inner_path->parent->rows;
-
-               inner_path = mpath;
-       }
-
         pathnode->jpath.path.pathtype = T_MergeJoin;
         pathnode->jpath.path.parent = joinrel;
         pathnode->jpath.jointype = jointype;
@@ -1465,6 +1423,7 @@ create_mergejoin_path(PlannerInfo *root,
         pathnode->path_mergeclauses = mergeclauses;
         pathnode->outersortkeys = outersortkeys;
         pathnode->innersortkeys = innersortkeys;
+       /* pathnode->materialize_inner will be set by cost_mergejoin */
  
         cost_mergejoin(pathnode, root, sjinfo);
  
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h

index 59f83e856282ac8d3ae2837209548f150e6d0284..d54770eabc446ade48cac7e426e62792d0096b73 100644 (file)
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.178 2009/10/26 02:26:43 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.179 2009/11/15 02:45:35 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -835,6 +835,14 @@ typedef JoinPath NestPath;
  /*
   * A mergejoin path has these fields.
   *
+ * Unlike other path types, a MergePath node doesn't represent just a single
+ * run-time plan node: it can represent up to four.  Aside from the MergeJoin
+ * node itself, there can be a Sort node for the outer input, a Sort node
+ * for the inner input, and/or a Material node for the inner input.  We could
+ * represent these nodes by separate path nodes, but considering how many
+ * different merge paths are investigated during a complex join problem,
+ * it seems better to avoid unnecessary palloc overhead.
+ *
   * path_mergeclauses lists the clauses (in the form of RestrictInfos)
   * that will be used in the merge.
   *
@@ -846,15 +854,19 @@ typedef JoinPath NestPath;
   * outersortkeys (resp. innersortkeys) is NIL if the outer path
   * (resp. inner path) is already ordered appropriately for the
   * mergejoin.  If it is not NIL then it is a PathKeys list describing
- * the ordering that must be created by an explicit sort step.
+ * the ordering that must be created by an explicit Sort node.
+ *
+ * materialize_inner is TRUE if a Material node should be placed atop the
+ * inner input.  This may appear with or without an inner Sort step.
   */
  
  typedef struct MergePath
  {
         JoinPath        jpath;
         List       *path_mergeclauses;          /* join clauses to be used for merge */
-       List       *outersortkeys;      /* keys for explicit sort, if any */
-       List       *innersortkeys;      /* keys for explicit sort, if any */
+       List       *outersortkeys;                      /* keys for explicit sort, if any */
+       List       *innersortkeys;                      /* keys for explicit sort, if any */
+       bool            materialize_inner;              /* add Materialize to inner? */
  } MergePath;
  
  /*
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h

index f862979c0847baf971417e1ebf1cdc3ff7219549..041b7e8f9de55076655640e096d4e1240629b572 100644 (file)
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/optimizer/cost.h,v 1.98 2009/09/12 22:12:04 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/cost.h,v 1.99 2009/11/15 02:45:35 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -84,7 +84,6 @@ extern void cost_recursive_union(Plan *runion, Plan *nrterm, Plan *rterm);
  extern void cost_sort(Path *path, PlannerInfo *root,
                   List *pathkeys, Cost input_cost, double tuples, int width,
                   double limit_tuples);
-extern bool sort_exceeds_work_mem(Sort *sort);
  extern void cost_material(Path *path,
                           Cost input_startup_cost, Cost input_total_cost,
                           double tuples, int width);
author	Tom Lane <tgl@sss.pgh.pa.us>
	Sun, 15 Nov 2009 02:45:35 +0000 (02:45 +0000)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Sun, 15 Nov 2009 02:45:35 +0000 (02:45 +0000)
src/backend/nodes/outfuncs.c		patch \| blob \| history
src/backend/optimizer/path/allpaths.c		patch \| blob \| history
src/backend/optimizer/path/costsize.c		patch \| blob \| history
src/backend/optimizer/plan/createplan.c		patch \| blob \| history
src/backend/optimizer/util/pathnode.c		patch \| blob \| history
src/include/nodes/relation.h		patch \| blob \| history
src/include/optimizer/cost.h		patch \| blob \| history