* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.196 2008/08/25 22:42:32 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.197 2008/09/05 21:07:29 tgl Exp $
*
*-------------------------------------------------------------------------
*/
* innerstartsel * rescanratio;
run_cost += (sort_path.total_cost - sort_path.startup_cost)
* (innerendsel - innerstartsel) * rescanratio;
+
+ /*
+ * If the inner sort is expected to spill to disk, we want to add a
+ * materialize node to shield it from the need to handle mark/restore.
+ * This will allow it to perform the last merge pass on-the-fly, while
+ * in most cases not requiring the materialize to spill to disk.
+ * Charge an extra cpu_tuple_cost per tuple to account for the
+ * materialize node. (Keep this estimate in sync with similar ones in
+ * create_mergejoin_path and create_mergejoin_plan.)
+ */
+ if (relation_byte_size(inner_path_rows, inner_path->parent->width) >
+ (work_mem * 1024L))
+ run_cost += cpu_tuple_cost * inner_path_rows;
}
else
{
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.247 2008/08/28 23:09:46 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.248 2008/09/05 21:07:29 tgl Exp $
*
*-------------------------------------------------------------------------
*/
/*
* We assume the materialize will not spill to disk, and therefore
- * charge just cpu_tuple_cost per tuple.
+ * charge just cpu_tuple_cost per tuple. (Keep this estimate in sync
+ * with similar ones in cost_mergejoin and create_mergejoin_path.)
*/
copy_plan_costsize(matplan, inner_plan);
matplan->total_cost += cpu_tuple_cost * matplan->plan_rows;
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.146 2008/08/14 18:47:59 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.147 2008/09/05 21:07:29 tgl Exp $
*
*-------------------------------------------------------------------------
*/
/*
* If we are not sorting the inner path, we may need a materialize node to
- * ensure it can be marked/restored. (Sort does support mark/restore, so
- * no materialize is needed in that case.)
+ * ensure it can be marked/restored.
*
* Since the inner side must be ordered, and only Sorts and IndexScans can
- * create order to begin with, you might think there's no problem --- but
- * you'd be wrong. Nestloop and merge joins can *preserve* the order of
- * their inputs, so they can be selected as the input of a mergejoin, and
- * they don't support mark/restore at present.
+ * create order to begin with, and they both support mark/restore, you
+ * might think there's no problem --- but you'd be wrong. Nestloop and
+ * merge joins can *preserve* the order of their inputs, so they can be
+ * selected as the input of a mergejoin, and they don't support
+ * mark/restore at present.
+ *
+ * Note: Sort supports mark/restore, so no materialize is really needed
+ * in that case; but one may be desirable anyway to optimize the sort.
+ * However, since we aren't representing the sort step separately in
+ * the Path tree, we can't explicitly represent the materialize either.
+ * So that case is not handled here. Instead, cost_mergejoin has to
+ * factor in the cost and create_mergejoin_plan has to add the plan node.
*/
if (innersortkeys == NIL &&
!ExecSupportsMarkRestore(inner_path->pathtype))
- inner_path = (Path *)
- create_material_path(inner_path->parent, inner_path);
+ {
+ Path *mpath;
+
+ mpath = (Path *) create_material_path(inner_path->parent, inner_path);
+
+ /*
+ * We expect the materialize won't spill to disk (it could only do
+ * so if there were a whole lot of duplicate tuples, which is a case
+ * cost_mergejoin will avoid choosing anyway). Therefore
+ * cost_material's cost estimate is bogus and we should charge
+ * just cpu_tuple_cost per tuple. (Keep this estimate in sync with
+ * similar ones in cost_mergejoin and create_mergejoin_plan.)
+ */
+ mpath->startup_cost = inner_path->startup_cost;
+ mpath->total_cost = inner_path->total_cost;
+ mpath->total_cost += cpu_tuple_cost * inner_path->parent->rows;
+
+ inner_path = mpath;
+ }
pathnode->jpath.path.pathtype = T_MergeJoin;
pathnode->jpath.path.parent = joinrel;