*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.371 2009/10/28 14:55:38 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.372 2009/11/15 02:45:34 tgl Exp $
*
* NOTES
* Every node type that can appear in stored rules' parsetrees *must*
WRITE_NODE_FIELD(path_mergeclauses);
WRITE_NODE_FIELD(outersortkeys);
WRITE_NODE_FIELD(innersortkeys);
+ WRITE_BOOL_FIELD(materialize_inner);
}
static void
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.188 2009/10/26 02:26:33 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.189 2009/11/15 02:45:35 tgl Exp $
*
*-------------------------------------------------------------------------
*/
{
MergePath *mp = (MergePath *) path;
- if (mp->outersortkeys || mp->innersortkeys)
- {
- for (i = 0; i < indent; i++)
- printf("\t");
- printf(" sortouter=%d sortinner=%d\n",
- ((mp->outersortkeys) ? 1 : 0),
- ((mp->innersortkeys) ? 1 : 0));
+ for (i = 0; i < indent; i++)
+ printf("\t");
+ printf(" sortouter=%d sortinner=%d materializeinner=%d\n",
+ ((mp->outersortkeys) ? 1 : 0),
+ ((mp->innersortkeys) ? 1 : 0),
+ ((mp->materialize_inner) ? 1 : 0));
}
}
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.211 2009/09/12 22:12:03 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.212 2009/11/15 02:45:35 tgl Exp $
*
*-------------------------------------------------------------------------
*/
path->total_cost = startup_cost + run_cost;
}
-/*
- * sort_exceeds_work_mem
- * Given a finished Sort plan node, detect whether it is expected to
- * spill to disk (ie, will need more than work_mem workspace)
- *
- * This assumes there will be no available LIMIT.
- */
-bool
-sort_exceeds_work_mem(Sort *sort)
-{
- double input_bytes = relation_byte_size(sort->plan.plan_rows,
- sort->plan.plan_width);
- long work_mem_bytes = work_mem * 1024L;
-
- return (input_bytes > work_mem_bytes);
-}
-
/*
* cost_material
* Determines and returns the cost of materializing a relation, including
* Determines and returns the cost of joining two relations using the
* merge join algorithm.
*
- * 'path' is already filled in except for the cost fields
+ * Unlike other costsize functions, this routine makes one actual decision:
+ * whether we should materialize the inner path. We do that either because
+ * the inner path can't support mark/restore, or because it's cheaper to
+ * use an interposed Material node to handle mark/restore. When the decision
+ * is cost-based it would be logically cleaner to build and cost two separate
+ * paths with and without that flag set; but that would require repeating most
+ * of the calculations here, which are not all that cheap. Since the choice
+ * will not affect output pathkeys or startup cost, only total cost, there is
+ * no possibility of wanting to keep both paths. So it seems best to make
+ * the decision here and record it in the path's materialize_inner field.
+ *
+ * 'path' is already filled in except for the cost fields and materialize_inner
* 'sjinfo' is extra info about the join for selectivity estimation
*
* Notes: path's mergeclauses should be a subset of the joinrestrictinfo list;
List *innersortkeys = path->innersortkeys;
Cost startup_cost = 0;
Cost run_cost = 0;
- Cost cpu_per_tuple;
+ Cost cpu_per_tuple,
+ inner_run_cost,
+ bare_inner_cost,
+ mat_inner_cost;
QualCost merge_qual_cost;
QualCost qp_qual_cost;
double outer_path_rows = PATH_ROWS(outer_path);
/*
* When there are equal merge keys in the outer relation, the mergejoin
* must rescan any matching tuples in the inner relation. This means
- * re-fetching inner tuples. Our cost model for this is that a re-fetch
- * costs the same as an original fetch, which is probably an overestimate;
- * but on the other hand we ignore the bookkeeping costs of mark/restore.
- * Not clear if it's worth developing a more refined model.
+ * re-fetching inner tuples; we have to estimate how often that happens.
*
* For regular inner and outer joins, the number of re-fetches can be
* estimated approximately as size of merge join output minus size of
if (rescannedtuples < 0)
rescannedtuples = 0;
}
- /* We'll inflate inner run cost this much to account for rescanning */
+ /* We'll inflate various costs this much to account for rescanning */
rescanratio = 1.0 + (rescannedtuples / inner_path_rows);
/*
-1.0);
startup_cost += sort_path.startup_cost;
startup_cost += (sort_path.total_cost - sort_path.startup_cost)
- * innerstartsel * rescanratio;
- run_cost += (sort_path.total_cost - sort_path.startup_cost)
- * (innerendsel - innerstartsel) * rescanratio;
-
- /*
- * If the inner sort is expected to spill to disk, we want to add a
- * materialize node to shield it from the need to handle mark/restore.
- * This will allow it to perform the last merge pass on-the-fly, while
- * in most cases not requiring the materialize to spill to disk.
- * Charge an extra cpu_tuple_cost per tuple to account for the
- * materialize node. (Keep this estimate in sync with similar ones in
- * create_mergejoin_path and create_mergejoin_plan.)
- */
- if (relation_byte_size(inner_path_rows, inner_path->parent->width) >
- (work_mem * 1024L))
- run_cost += cpu_tuple_cost * inner_path_rows;
+ * innerstartsel;
+ inner_run_cost = (sort_path.total_cost - sort_path.startup_cost)
+ * (innerendsel - innerstartsel);
}
else
{
startup_cost += inner_path->startup_cost;
startup_cost += (inner_path->total_cost - inner_path->startup_cost)
- * innerstartsel * rescanratio;
- run_cost += (inner_path->total_cost - inner_path->startup_cost)
- * (innerendsel - innerstartsel) * rescanratio;
+ * innerstartsel;
+ inner_run_cost = (inner_path->total_cost - inner_path->startup_cost)
+ * (innerendsel - innerstartsel);
}
+ /*
+ * Decide whether we want to materialize the inner input to shield it from
+ * mark/restore and performing re-fetches. Our cost model for regular
+ * re-fetches is that a re-fetch costs the same as an original fetch,
+ * which is probably an overestimate; but on the other hand we ignore the
+ * bookkeeping costs of mark/restore. Not clear if it's worth developing
+ * a more refined model. So we just need to inflate the inner run cost
+ * by rescanratio.
+ */
+ bare_inner_cost = inner_run_cost * rescanratio;
+ /*
+ * When we interpose a Material node the re-fetch cost is assumed to be
+ * just cpu_tuple_cost per tuple, independently of the underlying plan's
+ * cost; but we have to charge an extra cpu_tuple_cost per original fetch
+ * as well. Note that we're assuming the materialize node will never
+ * spill to disk, since it only has to remember tuples back to the last
+ * mark. (If there are a huge number of duplicates, our other cost
+ * factors will make the path so expensive that it probably won't get
+ * chosen anyway.) So we don't use cost_rescan here.
+ *
+ * Note: keep this estimate in sync with create_mergejoin_plan's labeling
+ * of the generated Material node.
+ */
+ mat_inner_cost = inner_run_cost +
+ cpu_tuple_cost * inner_path_rows * rescanratio;
+
+ /* Prefer materializing if it looks cheaper */
+ if (mat_inner_cost < bare_inner_cost)
+ path->materialize_inner = true;
+ /*
+ * Even if materializing doesn't look cheaper, we *must* do it if the
+ * inner path is to be used directly (without sorting) and it doesn't
+ * support mark/restore.
+ *
+ * Since the inner side must be ordered, and only Sorts and IndexScans can
+ * create order to begin with, and they both support mark/restore, you
+ * might think there's no problem --- but you'd be wrong. Nestloop and
+ * merge joins can *preserve* the order of their inputs, so they can be
+ * selected as the input of a mergejoin, and they don't support
+ * mark/restore at present.
+ */
+ else if (innersortkeys == NIL &&
+ !ExecSupportsMarkRestore(inner_path->pathtype))
+ path->materialize_inner = true;
+ /*
+ * Also, force materializing if the inner path is to be sorted and the
+ * sort is expected to spill to disk. This is because the final merge
+ * pass can be done on-the-fly if it doesn't have to support mark/restore.
+ * We don't try to adjust the cost estimates for this consideration,
+ * though.
+ */
+ else if (innersortkeys != NIL &&
+ relation_byte_size(inner_path_rows, inner_path->parent->width) >
+ (work_mem * 1024L))
+ path->materialize_inner = true;
+ else
+ path->materialize_inner = false;
+
+ /* Charge the right incremental cost for the chosen case */
+ if (path->materialize_inner)
+ run_cost += mat_inner_cost;
+ else
+ run_cost += bare_inner_cost;
+
/* CPU costs */
/*
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.266 2009/10/26 02:26:33 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.267 2009/11/15 02:45:35 tgl Exp $
*
*-------------------------------------------------------------------------
*/
best_path->jpath.outerjoinpath->parent->relids);
/*
- * Create explicit sort nodes for the outer and inner join paths if
- * necessary. The sort cost was already accounted for in the path. Make
- * sure there are no excess columns in the inputs if sorting.
+ * Create explicit sort nodes for the outer and inner paths if necessary.
+ * Make sure there are no excess columns in the inputs if sorting.
*/
if (best_path->outersortkeys)
{
innerpathkeys = best_path->jpath.innerjoinpath->pathkeys;
/*
- * If inner plan is a sort that is expected to spill to disk, add a
- * materialize node to shield it from the need to handle mark/restore.
- * This will allow it to perform the last merge pass on-the-fly, while in
- * most cases not requiring the materialize to spill to disk.
- *
- * XXX really, Sort oughta do this for itself, probably, to avoid the
- * overhead of a separate plan node.
+ * If specified, add a materialize node to shield the inner plan from
+ * the need to handle mark/restore.
*/
- if (IsA(inner_plan, Sort) &&
- sort_exceeds_work_mem((Sort *) inner_plan))
+ if (best_path->materialize_inner)
{
Plan *matplan = (Plan *) make_material(inner_plan);
/*
* We assume the materialize will not spill to disk, and therefore
* charge just cpu_tuple_cost per tuple. (Keep this estimate in sync
- * with similar ones in cost_mergejoin and create_mergejoin_path.)
+ * with cost_mergejoin.)
*/
copy_plan_costsize(matplan, inner_plan);
matplan->total_cost += cpu_tuple_cost * matplan->plan_rows;
inner_plan,
best_path->jpath.jointype);
+ /* Costs of sort and material steps are included in path cost already */
copy_path_costsize(&join_plan->join.plan, &best_path->jpath.path);
return join_plan;
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.154 2009/09/17 20:49:29 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.155 2009/11/15 02:45:35 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include <math.h>
#include "catalog/pg_operator.h"
-#include "executor/executor.h"
#include "miscadmin.h"
#include "optimizer/clauses.h"
#include "optimizer/cost.h"
pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
innersortkeys = NIL;
- /*
- * If we are not sorting the inner path, we may need a materialize node to
- * ensure it can be marked/restored.
- *
- * Since the inner side must be ordered, and only Sorts and IndexScans can
- * create order to begin with, and they both support mark/restore, you
- * might think there's no problem --- but you'd be wrong. Nestloop and
- * merge joins can *preserve* the order of their inputs, so they can be
- * selected as the input of a mergejoin, and they don't support
- * mark/restore at present.
- *
- * Note: Sort supports mark/restore, so no materialize is really needed in
- * that case; but one may be desirable anyway to optimize the sort.
- * However, since we aren't representing the sort step separately in the
- * Path tree, we can't explicitly represent the materialize either. So
- * that case is not handled here. Instead, cost_mergejoin has to factor
- * in the cost and create_mergejoin_plan has to add the plan node.
- */
- if (innersortkeys == NIL &&
- !ExecSupportsMarkRestore(inner_path->pathtype))
- {
- Path *mpath;
-
- mpath = (Path *) create_material_path(inner_path->parent, inner_path);
-
- /*
- * We expect the materialize won't spill to disk (it could only do so
- * if there were a whole lot of duplicate tuples, which is a case
- * cost_mergejoin will avoid choosing anyway). Therefore
- * cost_material's cost estimate is bogus and we should charge just
- * cpu_tuple_cost per tuple. (Keep this estimate in sync with similar
- * ones in cost_mergejoin and create_mergejoin_plan; also see
- * cost_rescan.)
- */
- mpath->startup_cost = inner_path->startup_cost;
- mpath->total_cost = inner_path->total_cost;
- mpath->total_cost += cpu_tuple_cost * inner_path->parent->rows;
-
- inner_path = mpath;
- }
-
pathnode->jpath.path.pathtype = T_MergeJoin;
pathnode->jpath.path.parent = joinrel;
pathnode->jpath.jointype = jointype;
pathnode->path_mergeclauses = mergeclauses;
pathnode->outersortkeys = outersortkeys;
pathnode->innersortkeys = innersortkeys;
+ /* pathnode->materialize_inner will be set by cost_mergejoin */
cost_mergejoin(pathnode, root, sjinfo);
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.178 2009/10/26 02:26:43 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.179 2009/11/15 02:45:35 tgl Exp $
*
*-------------------------------------------------------------------------
*/
/*
* A mergejoin path has these fields.
*
+ * Unlike other path types, a MergePath node doesn't represent just a single
+ * run-time plan node: it can represent up to four. Aside from the MergeJoin
+ * node itself, there can be a Sort node for the outer input, a Sort node
+ * for the inner input, and/or a Material node for the inner input. We could
+ * represent these nodes by separate path nodes, but considering how many
+ * different merge paths are investigated during a complex join problem,
+ * it seems better to avoid unnecessary palloc overhead.
+ *
* path_mergeclauses lists the clauses (in the form of RestrictInfos)
* that will be used in the merge.
*
* outersortkeys (resp. innersortkeys) is NIL if the outer path
* (resp. inner path) is already ordered appropriately for the
* mergejoin. If it is not NIL then it is a PathKeys list describing
- * the ordering that must be created by an explicit sort step.
+ * the ordering that must be created by an explicit Sort node.
+ *
+ * materialize_inner is TRUE if a Material node should be placed atop the
+ * inner input. This may appear with or without an inner Sort step.
*/
typedef struct MergePath
{
JoinPath jpath;
List *path_mergeclauses; /* join clauses to be used for merge */
- List *outersortkeys; /* keys for explicit sort, if any */
- List *innersortkeys; /* keys for explicit sort, if any */
+ List *outersortkeys; /* keys for explicit sort, if any */
+ List *innersortkeys; /* keys for explicit sort, if any */
+ bool materialize_inner; /* add Materialize to inner? */
} MergePath;
/*
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/optimizer/cost.h,v 1.98 2009/09/12 22:12:04 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/cost.h,v 1.99 2009/11/15 02:45:35 tgl Exp $
*
*-------------------------------------------------------------------------
*/
extern void cost_sort(Path *path, PlannerInfo *root,
List *pathkeys, Cost input_cost, double tuples, int width,
double limit_tuples);
-extern bool sort_exceeds_work_mem(Sort *sort);
extern void cost_material(Path *path,
Cost input_startup_cost, Cost input_total_cost,
double tuples, int width);