Refactor planning of projection steps that don't need a Result plan node.

author Tom Lane <tgl@sss.pgh.pa.us>

Tue, 21 Jun 2016 22:38:20 +0000 (18:38 -0400)

committer Tom Lane <tgl@sss.pgh.pa.us>

Tue, 21 Jun 2016 22:38:20 +0000 (18:38 -0400)
author Tom Lane <tgl@sss.pgh.pa.us>
Tue, 21 Jun 2016 22:38:20 +0000 (18:38 -0400)
committer Tom Lane <tgl@sss.pgh.pa.us>
Tue, 21 Jun 2016 22:38:20 +0000 (18:38 -0400)
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c

index b6754478dcf3e8fd6ae69d4efe6cb95f07864ff5..b0a28f515f19e3e8f615935a4c2e1f1b05fec630 100644 (file)
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -1809,6 +1809,7 @@ _outProjectionPath(StringInfo str, const ProjectionPath *node)
         _outPathInfo(str, (const Path *) node);
  
         WRITE_NODE_FIELD(subpath);
+       WRITE_BOOL_FIELD(dummypp);
  }
  
  static void
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c

index ab8df76a6edd09282c934ea86663d7d495e6a2dd..b2db6e8d0355c8f9e0ebe8c351fab985c1623d6b 100644 (file)
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -1409,8 +1409,9 @@ create_gather_plan(PlannerInfo *root, GatherPath *best_path)
  /*
   * create_projection_plan
   *
- *       Create a Result node to do a projection step and (recursively) plans
- *       for its subpaths.
+ *       Create a plan tree to do a projection step and (recursively) plans
+ *       for its subpaths.  We may need a Result node for the projection,
+ *       but sometimes we can just let the subplan do the work.
   */
  static Plan *
  create_projection_plan(PlannerInfo *root, ProjectionPath *best_path)
@@ -1425,31 +1426,37 @@ create_projection_plan(PlannerInfo *root, ProjectionPath *best_path)
         tlist = build_path_tlist(root, &best_path->path);
  
         /*
-        * We might not really need a Result node here.  There are several ways
-        * that this can happen.  For example, MergeAppend doesn't project, so we
-        * would have thought that we needed a projection to attach resjunk sort
-        * columns to its output ... but create_merge_append_plan might have added
-        * those same resjunk sort columns to both MergeAppend and its children.
-        * Alternatively, apply_projection_to_path might have created a projection
-        * path as the subpath of a Gather node even though the subpath was
-        * projection-capable.  So, if the subpath is capable of projection or the
-        * desired tlist is the same expression-wise as the subplan's, just jam it
-        * in there.  We'll have charged for a Result that doesn't actually appear
-        * in the plan, but that's better than having a Result we don't need.
+        * We might not really need a Result node here, either because the subplan
+        * can project or because it's returning the right list of expressions
+        * anyway.  Usually create_projection_path will have detected that and set
+        * dummypp if we don't need a Result; but its decision can't be final,
+        * because some createplan.c routines change the tlists of their nodes.
+        * (An example is that create_merge_append_plan might add resjunk sort
+        * columns to a MergeAppend.)  So we have to recheck here.  If we do
+        * arrive at a different answer than create_projection_path did, we'll
+        * have made slightly wrong cost estimates; but label the plan with the
+        * cost estimates we actually used, not "corrected" ones.  (XXX this could
+        * be cleaned up if we moved more of the sortcolumn setup logic into Path
+        * creation, but that would add expense to creating Paths we might end up
+        * not using.)
          */
         if (is_projection_capable_path(best_path->subpath) ||
                 tlist_same_exprs(tlist, subplan->targetlist))
         {
+               /* Don't need a separate Result, just assign tlist to subplan */
                 plan = subplan;
                 plan->targetlist = tlist;
  
-               /* Adjust cost to match what we thought during planning */
+               /* Label plan with the estimated costs we actually used */
                 plan->startup_cost = best_path->path.startup_cost;
                 plan->total_cost = best_path->path.total_cost;
+               plan->plan_rows = best_path->path.rows;
+               plan->plan_width = best_path->path.pathtarget->width;
                 /* ... but be careful not to munge subplan's parallel-aware flag */
         }
         else
         {
+               /* We need a Result node */
                 plan = (Plan *) make_result(tlist, NULL, subplan);
  
                 copy_generic_path_info(plan, (Path *) best_path);
diff --git a/src/backend/optimizer/plan/planagg.c b/src/backend/optimizer/plan/planagg.c

index 0434a5a8e73e16e1a79cef46fa3758f5000004e2..cefec7bdf1083cbc3f0ee3b47db63893ff41f755 100644 (file)
--- a/src/backend/optimizer/plan/planagg.c
+++ b/src/backend/optimizer/plan/planagg.c
@@ -465,8 +465,7 @@ build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
          * cheapest path.)
          */
         sorted_path = apply_projection_to_path(subroot, final_rel, sorted_path,
-                                                                                  create_pathtarget(subroot, tlist),
-                                                                                  false);
+                                                                                  create_pathtarget(subroot, tlist));
  
         /*
          * Determine cost to get just the first row of the presorted path.
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c

index 094c7082b141faf6956337a21b9561e6f7ac0030..2372311d40996a68e64cff0c457f603720a694ff 100644 (file)
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -1500,7 +1500,6 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
                 PathTarget *grouping_target;
                 PathTarget *scanjoin_target;
                 bool            have_grouping;
-               bool            scanjoin_target_parallel_safe = false;
                 WindowFuncLists *wflists = NULL;
                 List       *activeWindows = NIL;
                 List       *rollup_lists = NIL;
@@ -1730,14 +1729,6 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
                 else
                         scanjoin_target = grouping_target;
  
-               /*
-                * Check whether scan/join target is parallel safe ... unless there
-                * are no partial paths, in which case we don't care.
-                */
-               if (current_rel->partial_pathlist &&
-                       !has_parallel_hazard((Node *) scanjoin_target->exprs, false))
-                       scanjoin_target_parallel_safe = true;
-
                 /*
                  * Forcibly apply scan/join target to all the Paths for the scan/join
                  * rel.
@@ -1756,8 +1747,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
  
                         Assert(subpath->param_info == NULL);
                         path = apply_projection_to_path(root, current_rel,
-                                                                                       subpath, scanjoin_target,
-                                                                                       scanjoin_target_parallel_safe);
+                                                                                       subpath, scanjoin_target);
                         /* If we had to add a Result, path is different from subpath */
                         if (path != subpath)
                         {
@@ -1774,15 +1764,13 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
                  * partial pathlist will expect partial paths for that rel to produce
                  * the same output as complete paths ... and we just changed the
                  * output for the complete paths, so we'll need to do the same thing
-                * for partial paths.
+                * for partial paths.  But only parallel-safe expressions can be
+                * computed by partial paths.
                  */
-               if (scanjoin_target_parallel_safe)
+               if (current_rel->partial_pathlist &&
+                       !has_parallel_hazard((Node *) scanjoin_target->exprs, false))
                 {
-                       /*
-                        * Apply the scan/join target to each partial path.  Otherwise,
-                        * anything that attempts to use the partial paths for further
-                        * upper planning may go wrong.
-                        */
+                       /* Apply the scan/join target to each partial path */
                         foreach(lc, current_rel->partial_pathlist)
                         {
                                 Path       *subpath = (Path *) lfirst(lc);
@@ -1792,36 +1780,14 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
                                 Assert(subpath->param_info == NULL);
  
                                 /*
-                                * We can't use apply_projection_to_path() here, because there
-                                * could already be pointers to these paths, and therefore we
-                                * dare not modify them in place.  Instead, we must use
-                                * create_projection_path() unconditionally.
+                                * Don't use apply_projection_to_path() here, because there
+                                * could be other pointers to these paths, and therefore we
+                                * mustn't modify them in place.
                                  */
                                 newpath = (Path *) create_projection_path(root,
                                                                                                                   current_rel,
                                                                                                                   subpath,
                                                                                                                   scanjoin_target);
-
-                               /*
-                                * Although create_projection_path() inserts a ProjectionPath
-                                * unconditionally, create_projection_plan() will only insert
-                                * a Result node if the subpath is not projection-capable, so
-                                * we should discount the cost of that node if it will not
-                                * actually get inserted.  (This is pretty grotty but we can
-                                * improve it later if it seems important.)
-                                */
-                               if (equal(scanjoin_target->exprs, subpath->pathtarget->exprs))
-                               {
-                                       /* at most we need a relabeling of the subpath */
-                                       newpath->startup_cost = subpath->startup_cost;
-                                       newpath->total_cost = subpath->total_cost;
-                               }
-                               else if (is_projection_capable_path(subpath))
-                               {
-                                       /* need to project, but we don't need a Result */
-                                       newpath->total_cost -= cpu_tuple_cost * subpath->rows;
-                               }
-
                                 lfirst(lc) = newpath;
                         }
                 }
@@ -4231,7 +4197,7 @@ create_ordered_paths(PlannerInfo *root,
                         /* Add projection step if needed */
                         if (path->pathtarget != target)
                                 path = apply_projection_to_path(root, ordered_rel,
-                                                                                               path, target, false);
+                                                                                               path, target);
  
                         add_path(ordered_rel, path);
                 }
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c

index 30975e0106a564a0f2282405f1529e650691daed..552b756b8b1b5da51445c02183a304889b439ee1 100644 (file)
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -325,8 +325,7 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
                                                                          refnames_tlist);
  
                 path = apply_projection_to_path(root, rel, path,
-                                                                               create_pathtarget(root, tlist),
-                                                                               false);
+                                                                               create_pathtarget(root, tlist));
  
                 /* Return the fully-fledged tlist to caller, too */
                 *pTargetList = tlist;
@@ -395,8 +394,7 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
                                                                                         path->parent,
                                                                                         path,
                                                                                         create_pathtarget(root,
-                                                                                                                         *pTargetList),
-                                                                                       false);
+                                                                                                                         *pTargetList));
                 }
                 return path;
         }
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c

index 0ff353fa11d0e39b40d644baa27ee987b6000b16..8fd933fd6bdff4efa87b899b6e0792ec04d21afb 100644 (file)
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -2168,6 +2168,7 @@ create_projection_path(PlannerInfo *root,
                                            PathTarget *target)
  {
         ProjectionPath *pathnode = makeNode(ProjectionPath);
+       PathTarget *oldtarget = subpath->pathtarget;
  
         pathnode->path.pathtype = T_Result;
         pathnode->path.parent = rel;
@@ -2184,13 +2185,46 @@ create_projection_path(PlannerInfo *root,
         pathnode->subpath = subpath;
  
         /*
-        * The Result node's cost is cpu_tuple_cost per row, plus the cost of
-        * evaluating the tlist.  There is no qual to worry about.
+        * We might not need a separate Result node.  If the input plan node type
+        * can project, we can just tell it to project something else.  Or, if it
+        * can't project but the desired target has the same expression list as
+        * what the input will produce anyway, we can still give it the desired
+        * tlist (possibly changing its ressortgroupref labels, but nothing else).
+        * Note: in the latter case, create_projection_plan has to recheck our
+        * conclusion; see comments therein.
          */
-       pathnode->path.rows = subpath->rows;
-       pathnode->path.startup_cost = subpath->startup_cost + target->cost.startup;
-       pathnode->path.total_cost = subpath->total_cost + target->cost.startup +
-               (cpu_tuple_cost + target->cost.per_tuple) * subpath->rows;
+       if (is_projection_capable_path(subpath) ||
+               equal(oldtarget->exprs, target->exprs))
+       {
+               /* No separate Result node needed */
+               pathnode->dummypp = true;
+
+               /*
+                * Set cost of plan as subpath's cost, adjusted for tlist replacement.
+                */
+               pathnode->path.rows = subpath->rows;
+               pathnode->path.startup_cost = subpath->startup_cost +
+                       (target->cost.startup - oldtarget->cost.startup);
+               pathnode->path.total_cost = subpath->total_cost +
+                       (target->cost.startup - oldtarget->cost.startup) +
+                       (target->cost.per_tuple - oldtarget->cost.per_tuple) * subpath->rows;
+       }
+       else
+       {
+               /* We really do need the Result node */
+               pathnode->dummypp = false;
+
+               /*
+                * The Result node's cost is cpu_tuple_cost per row, plus the cost of
+                * evaluating the tlist.  There is no qual to worry about.
+                */
+               pathnode->path.rows = subpath->rows;
+               pathnode->path.startup_cost = subpath->startup_cost +
+                       target->cost.startup;
+               pathnode->path.total_cost = subpath->total_cost +
+                       target->cost.startup +
+                       (cpu_tuple_cost + target->cost.per_tuple) * subpath->rows;
+       }
  
         return pathnode;
  }
@@ -2199,38 +2233,37 @@ create_projection_path(PlannerInfo *root,
   * apply_projection_to_path
   *       Add a projection step, or just apply the target directly to given path.
   *
- * Most plan types include ExecProject, so we can implement a new projection
- * without an extra plan node: just replace the given path's pathtarget with
- * the desired one.  If the given path can't project, add a ProjectionPath.
+ * This has the same net effect as create_projection_path(), except that if
+ * a separate Result plan node isn't needed, we just replace the given path's
+ * pathtarget with the desired one.  This must be used only when the caller
+ * knows that the given path isn't referenced elsewhere and so can be modified
+ * in-place.
   *
- * We can also short-circuit cases where the targetlist expressions are
- * actually equal; this is not an uncommon case, since it may arise from
- * trying to apply a PathTarget with sortgroupref labeling to a derived
- * path without such labeling.
+ * If the input path is a GatherPath, we try to push the new target down to
+ * its input as well; this is a yet more invasive modification of the input
+ * path, which create_projection_path() can't do.
   *
- * This requires knowing that the source path won't be referenced for other
- * purposes (e.g., other possible paths), since we modify it in-place.  Note
- * also that we mustn't change the source path's parent link; so when it is
+ * Note that we mustn't change the source path's parent link; so when it is
   * add_path'd to "rel" things will be a bit inconsistent.  So far that has
   * not caused any trouble.
   *
   * 'rel' is the parent relation associated with the result
   * 'path' is the path representing the source of data
   * 'target' is the PathTarget to be computed
- * 'target_parallel' indicates that target expressions are all parallel-safe
   */
  Path *
  apply_projection_to_path(PlannerInfo *root,
                                                  RelOptInfo *rel,
                                                  Path *path,
-                                                PathTarget *target,
-                                                bool target_parallel)
+                                                PathTarget *target)
  {
         QualCost        oldcost;
  
-       /* Make a separate ProjectionPath if needed */
-       if (!is_projection_capable_path(path) &&
-               !equal(path->pathtarget->exprs, target->exprs))
+       /*
+        * If given path can't project, we might need a Result node, so make a
+        * separate ProjectionPath.
+        */
+       if (!is_projection_capable_path(path))
                 return (Path *) create_projection_path(root, rel, path, target);
  
         /*
@@ -2247,10 +2280,11 @@ apply_projection_to_path(PlannerInfo *root,
         /*
          * If the path happens to be a Gather path, we'd like to arrange for the
          * subpath to return the required target list so that workers can help
-        * project. But if there is something that is not parallel-safe in the
+        * project.  But if there is something that is not parallel-safe in the
          * target expressions, then we can't.
          */
-       if (IsA(path, GatherPath) &&target_parallel)
+       if (IsA(path, GatherPath) &&
+               !has_parallel_hazard((Node *) target->exprs, false))
         {
                 GatherPath *gpath = (GatherPath *) path;
  
@@ -2258,14 +2292,12 @@ apply_projection_to_path(PlannerInfo *root,
                  * We always use create_projection_path here, even if the subpath is
                  * projection-capable, so as to avoid modifying the subpath in place.
                  * It seems unlikely at present that there could be any other
-                * references to the subpath anyway, but better safe than sorry.
-                * (create_projection_plan will only insert a Result node if the
-                * subpath is not projection-capable, so we only include the cost of
-                * that node if it will actually be inserted.  This is a bit grotty
-                * but we can improve it later if it seems important.)
+                * references to the subpath, but better safe than sorry.
+                *
+                * Note that we don't change the GatherPath's cost estimates; it might
+                * be appropriate to do so, to reflect the fact that the bulk of the
+                * target evaluation will happen in workers.
                  */
-               if (!is_projection_capable_path(gpath->subpath))
-                       gpath->path.total_cost += cpu_tuple_cost * gpath->subpath->rows;
                 gpath->subpath = (Path *)
                         create_projection_path(root,
                                                                    gpath->subpath->parent,
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h

index 57747fc4e556f86b3963bc1a12d23b244304fca7..9470df626cc86c45417951c138b104e3930e8f74 100644 (file)
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -1274,15 +1274,22 @@ typedef struct HashPath
  /*
   * ProjectionPath represents a projection (that is, targetlist computation)
   *
- * This path node represents using a Result plan node to do a projection.
- * It's only needed atop a node that doesn't support projection (such as
- * Sort); otherwise we just jam the new desired PathTarget into the lower
- * path node, and adjust that node's estimated cost accordingly.
+ * Nominally, this path node represents using a Result plan node to do a
+ * projection step.  However, if the input plan node supports projection,
+ * we can just modify its output targetlist to do the required calculations
+ * directly, and not need a Result.  In some places in the planner we can just
+ * jam the desired PathTarget into the input path node (and adjust its cost
+ * accordingly), so we don't need a ProjectionPath.  But in other places
+ * it's necessary to not modify the input path node, so we need a separate
+ * ProjectionPath node, which is marked dummy to indicate that we intend to
+ * assign the work to the input plan node.  The estimated cost for the
+ * ProjectionPath node will account for whether a Result will be used or not.
   */
  typedef struct ProjectionPath
  {
         Path            path;
         Path       *subpath;            /* path representing input source */
+       bool            dummypp;                /* true if no separate Result is needed */
  } ProjectionPath;
  
  /*
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h

index 586ecdd9b8714a9fc4953e0ffb93282b017af3ef..5de4c34a2b73c2fa26fd3541f8592ff1d66ac7f4 100644 (file)
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -143,8 +143,7 @@ extern ProjectionPath *create_projection_path(PlannerInfo *root,
  extern Path *apply_projection_to_path(PlannerInfo *root,
                                                  RelOptInfo *rel,
                                                  Path *path,
-                                                PathTarget *target,
-                                                bool target_parallel);
+                                                PathTarget *target);
  extern SortPath *create_sort_path(PlannerInfo *root,
                                  RelOptInfo *rel,
                                  Path *subpath,
author	Tom Lane <tgl@sss.pgh.pa.us>
	Tue, 21 Jun 2016 22:38:20 +0000 (18:38 -0400)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Tue, 21 Jun 2016 22:38:20 +0000 (18:38 -0400)
src/backend/nodes/outfuncs.c		patch \| blob \| history
src/backend/optimizer/plan/createplan.c		patch \| blob \| history
src/backend/optimizer/plan/planagg.c		patch \| blob \| history
src/backend/optimizer/plan/planner.c		patch \| blob \| history
src/backend/optimizer/prep/prepunion.c		patch \| blob \| history
src/backend/optimizer/util/pathnode.c		patch \| blob \| history
src/include/nodes/relation.h		patch \| blob \| history
src/include/optimizer/pathnode.h		patch \| blob \| history