X-Git-Url: https://granicus.if.org/sourcecode?a=blobdiff_plain;f=src%2Fbackend%2Foptimizer%2Fplan%2Fplanner.c;h=6ebbc5b7bc3109cbb4d28a5874ae5ef147cab672;hb=63247bec284a935b3145d5302c834967049e5dea;hp=3bb603a0f614bb53ff4cbb0eebefee770afbd819;hpb=9cbd0c155d1602aad879f510256b626c58942080;p=postgresql

diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 3bb603a0f6..6ebbc5b7bc 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -3,12 +3,12 @@
  * planner.c
  *	  The query optimizer external interface.
  *
- * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.214 2007/02/20 17:32:15 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.235 2008/07/31 22:47:56 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -42,6 +42,13 @@
 #include "utils/syscache.h"
 
 
+/* GUC parameter */
+double cursor_tuple_fraction = DEFAULT_CURSOR_TUPLE_FRACTION;
+
+/* Hook for plugins to get control in planner() */
+planner_hook_type planner_hook = NULL;
+
+
 /* Expression kind codes for preprocess_expression */
 #define EXPRKIND_QUAL		0
 #define EXPRKIND_TARGET		1
@@ -61,7 +68,8 @@ static double preprocess_limit(PlannerInfo *root,
 				 double tuple_fraction,
 				 int64 *offset_est, int64 *count_est);
 static Oid *extract_grouping_ops(List *groupClause);
-static bool choose_hashed_grouping(PlannerInfo *root, double tuple_fraction,
+static bool choose_hashed_grouping(PlannerInfo *root,
+					   double tuple_fraction, double limit_tuples,
 					   Path *cheapest_path, Path *sorted_path,
 					   Oid *groupOperators, double dNumGroups,
 					   AggClauseCounts *agg_counts);
@@ -78,16 +86,42 @@ static List *postprocess_setop_tlist(List *new_tlist, List *orig_tlist);
  *
  *	   Query optimizer entry point
  *
+ * To support loadable plugins that monitor or modify planner behavior,
+ * we provide a hook variable that lets a plugin get control before and
+ * after the standard planning process.  The plugin would normally call
+ * standard_planner().
+ *
+ * Note to plugin authors: standard_planner() scribbles on its Query input,
+ * so you'd better copy that data structure if you want to plan more than once.
+ *
  *****************************************************************************/
 PlannedStmt *
-planner(Query *parse, bool isCursor, int cursorOptions,
-		ParamListInfo boundParams)
+planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
+{
+	PlannedStmt *result;
+
+	if (planner_hook)
+		result = (*planner_hook) (parse, cursorOptions, boundParams);
+	else
+		result = standard_planner(parse, cursorOptions, boundParams);
+	return result;
+}
+
+PlannedStmt *
+standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
 {
 	PlannedStmt *result;
 	PlannerGlobal *glob;
 	double		tuple_fraction;
 	PlannerInfo *root;
 	Plan	   *top_plan;
+	ListCell   *lp,
+			   *lr;
+
+	/* Cursor options may come from caller or from DECLARE CURSOR stmt */
+	if (parse->utilityStmt &&
+		IsA(parse->utilityStmt, DeclareCursorStmt))
+		cursorOptions |= ((DeclareCursorStmt *) parse->utilityStmt)->options;
 
 	/*
 	 * Set up global state for this planner invocation.  This data is needed
@@ -99,18 +133,35 @@ planner(Query *parse, bool isCursor, int cursorOptions,
 
 	glob->boundParams = boundParams;
 	glob->paramlist = NIL;
-	glob->next_plan_id = 0;
+	glob->subplans = NIL;
+	glob->subrtables = NIL;
+	glob->rewindPlanIDs = NULL;
+	glob->finalrtable = NIL;
+	glob->relationOids = NIL;
+	glob->transientPlan = false;
 
 	/* Determine what fraction of the plan is likely to be scanned */
-	if (isCursor)
+	if (cursorOptions & CURSOR_OPT_FAST_PLAN)
 	{
 		/*
 		 * We have no real idea how many tuples the user will ultimately FETCH
-		 * from a cursor, but it seems a good bet that he doesn't want 'em
-		 * all.  Optimize for 10% retrieval (you gotta better number?  Should
-		 * this be a SETtable parameter?)
+		 * from a cursor, but it is often the case that he doesn't want 'em
+		 * all, or would prefer a fast-start plan anyway so that he can
+		 * process some of the tuples sooner.  Use a GUC parameter to decide
+		 * what fraction to optimize for.
+		 */
+		tuple_fraction = cursor_tuple_fraction;
+
+		/*
+		 * We document cursor_tuple_fraction as simply being a fraction,
+		 * which means the edge cases 0 and 1 have to be treated specially
+		 * here.  We convert 1 to 0 ("all the tuples") and 0 to a very small
+		 * fraction.
 		 */
-		tuple_fraction = 0.10;
+		if (tuple_fraction >= 1.0)
+			tuple_fraction = 0.0;
+		else if (tuple_fraction <= 0.0)
+			tuple_fraction = 1e-10;
 	}
 	else
 	{
@@ -125,26 +176,41 @@ planner(Query *parse, bool isCursor, int cursorOptions,
 	 * If creating a plan for a scrollable cursor, make sure it can run
 	 * backwards on demand.  Add a Material node at the top at need.
 	 */
-	if (isCursor && (cursorOptions & CURSOR_OPT_SCROLL))
+	if (cursorOptions & CURSOR_OPT_SCROLL)
 	{
 		if (!ExecSupportsBackwardScan(top_plan))
 			top_plan = materialize_finished_plan(top_plan);
 	}
 
 	/* final cleanup of the plan */
-	top_plan = set_plan_references(top_plan, parse->rtable);
+	Assert(glob->finalrtable == NIL);
+	top_plan = set_plan_references(glob, top_plan, root->parse->rtable);
+	/* ... and the subplans (both regular subplans and initplans) */
+	Assert(list_length(glob->subplans) == list_length(glob->subrtables));
+	forboth(lp, glob->subplans, lr, glob->subrtables)
+	{
+		Plan	   *subplan = (Plan *) lfirst(lp);
+		List	   *subrtable = (List *) lfirst(lr);
+
+		lfirst(lp) = set_plan_references(glob, subplan, subrtable);
+	}
 
 	/* build the PlannedStmt result */
 	result = makeNode(PlannedStmt);
 
 	result->commandType = parse->commandType;
 	result->canSetTag = parse->canSetTag;
+	result->transientPlan = glob->transientPlan;
 	result->planTree = top_plan;
-	result->rtable = parse->rtable;
+	result->rtable = glob->finalrtable;
 	result->resultRelations = root->resultRelations;
-	result->into = parse->into;
+	result->utilityStmt = parse->utilityStmt;
+	result->intoClause = parse->intoClause;
+	result->subplans = glob->subplans;
+	result->rewindPlanIDs = glob->rewindPlanIDs;
 	result->returningLists = root->returningLists;
 	result->rowMarks = parse->rowMarks;
+	result->relationOids = glob->relationOids;
 	result->nParamExec = list_length(glob->paramlist);
 
 	return result;
@@ -182,7 +248,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
 				 Index level, double tuple_fraction,
 				 PlannerInfo **subroot)
 {
-	int			saved_plan_id = glob->next_plan_id;
+	int			num_old_subplans = list_length(glob->subplans);
 	PlannerInfo *root;
 	Plan	   *plan;
 	List	   *newHaving;
@@ -202,13 +268,20 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
 	/*
 	 * Look for IN clauses at the top level of WHERE, and transform them into
 	 * joins.  Note that this step only handles IN clauses originally at top
-	 * level of WHERE; if we pull up any subqueries in the next step, their
-	 * INs are processed just before pulling them up.
+	 * level of WHERE; if we pull up any subqueries below, their INs are
+	 * processed just before pulling them up.
 	 */
 	if (parse->hasSubLinks)
 		parse->jointree->quals = pull_up_IN_clauses(root,
 													parse->jointree->quals);
 
+	/*
+	 * Scan the rangetable for set-returning functions, and inline them
+	 * if possible (producing subqueries that might get pulled up next).
+	 * Recursion issues here are handled in the same way as for IN clauses.
+	 */
+	inline_set_returning_functions(root);
+
 	/*
 	 * Check to see if any subqueries in the rangetable can be merged into
 	 * this query.
@@ -384,8 +457,9 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
 	 * initPlan list and extParam/allParam sets for plan nodes, and attach the
 	 * initPlans to the top plan node.
 	 */
-	if (root->glob->next_plan_id != saved_plan_id || root->query_level > 1)
-		SS_finalize_plan(root, plan);
+	if (list_length(glob->subplans) != num_old_subplans ||
+		root->query_level > 1)
+		SS_finalize_plan(root, plan, true);
 
 	/* Return internal info if caller wants it */
 	if (subroot)
@@ -444,7 +518,7 @@ preprocess_expression(PlannerInfo *root, Node *expr, int kind)
 		(root->parse->jointree->fromlist != NIL ||
 		 kind == EXPRKIND_QUAL ||
 		 root->query_level > 1))
-		expr = eval_const_expressions(expr);
+		expr = eval_const_expressions(root, expr);
 
 	/*
 	 * If it's a qual or havingQual, canonicalize it.
@@ -621,10 +695,16 @@ inheritance_planner(PlannerInfo *root)
 	 * If we managed to exclude every child rel, return a dummy plan
 	 */
 	if (subplans == NIL)
-		return (Plan *) make_result(tlist,
+	{
+		root->resultRelations = list_make1_int(parentRTindex);
+		/* although dummy, it must have a valid tlist for executor */
+		tlist = preprocess_targetlist(root, parse->targetList);
+		return (Plan *) make_result(root,
+									tlist,
 									(Node *) list_make1(makeBoolConst(false,
 																	  false)),
 									NULL);
+	}
 
 	/*
 	 * Planning might have modified the rangetable, due to changes of the
@@ -639,6 +719,10 @@ inheritance_planner(PlannerInfo *root)
 	 */
 	parse->rtable = rtable;
 
+	/* Suppress Append if there's only one surviving child rel */
+	if (list_length(subplans) == 1)
+		return (Plan *) linitial(subplans);
+
 	return (Plan *) make_append(subplans, true, tlist);
 }
 
@@ -668,6 +752,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 	List	   *tlist = parse->targetList;
 	int64		offset_est = 0;
 	int64		count_est = 0;
+	double		limit_tuples = -1.0;
 	Plan	   *result_plan;
 	List	   *current_pathkeys;
 	List	   *sort_pathkeys;
@@ -675,9 +760,18 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 
 	/* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */
 	if (parse->limitCount || parse->limitOffset)
+	{
 		tuple_fraction = preprocess_limit(root, tuple_fraction,
 										  &offset_est, &count_est);
 
+		/*
+		 * If we have a known LIMIT, and don't have an unknown OFFSET, we can
+		 * estimate the effects of using a bounded sort.
+		 */
+		if (count_est > 0 && offset_est >= 0)
+			limit_tuples = (double) count_est + (double) offset_est;
+	}
+
 	if (parse->setOperations)
 	{
 		List	   *set_sortclauses;
@@ -706,7 +800,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 		 */
 		current_pathkeys = make_pathkeys_for_sortclauses(root,
 														 set_sortclauses,
-													result_plan->targetlist,
+													 result_plan->targetlist,
 														 true);
 
 		/*
@@ -732,6 +826,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 		/*
 		 * Calculate pathkeys that represent result ordering requirements
 		 */
+		Assert(parse->distinctClause == NIL);
 		sort_pathkeys = make_pathkeys_for_sortclauses(root,
 													  parse->sortClause,
 													  tlist,
@@ -770,17 +865,29 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 		 * Calculate pathkeys that represent grouping/ordering requirements.
 		 * Stash them in PlannerInfo so that query_planner can canonicalize
 		 * them after EquivalenceClasses have been formed.
+		 *
+		 * Note: for the moment, DISTINCT is always implemented via sort/uniq,
+		 * and we set the sort_pathkeys to be the more rigorous of the
+		 * DISTINCT and ORDER BY requirements.  This should be changed
+		 * someday, but DISTINCT ON is a bit of a problem ...
 		 */
 		root->group_pathkeys =
 			make_pathkeys_for_sortclauses(root,
 										  parse->groupClause,
 										  tlist,
 										  false);
-		root->sort_pathkeys =
-			make_pathkeys_for_sortclauses(root,
-										  parse->sortClause,
-										  tlist,
-										  false);
+		if (list_length(parse->distinctClause) > list_length(parse->sortClause))
+			root->sort_pathkeys =
+				make_pathkeys_for_sortclauses(root,
+											  parse->distinctClause,
+											  tlist,
+											  false);
+		else
+			root->sort_pathkeys =
+				make_pathkeys_for_sortclauses(root,
+											  parse->sortClause,
+											  tlist,
+											  false);
 
 		/*
 		 * Will need actual number of aggregates for estimating costs.
@@ -809,9 +916,9 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 		 * by ORDER BY --- but that might just leave us failing to exploit an
 		 * available sort order at all. Needs more thought...)
 		 */
-		if (parse->groupClause)
+		if (root->group_pathkeys)
 			root->query_pathkeys = root->group_pathkeys;
-		else if (parse->sortClause)
+		else if (root->sort_pathkeys)
 			root->query_pathkeys = root->sort_pathkeys;
 		else
 			root->query_pathkeys = NIL;
@@ -822,7 +929,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 		 * estimate the number of groups in the query, and canonicalize all
 		 * the pathkeys.
 		 */
-		query_planner(root, sub_tlist, tuple_fraction,
+		query_planner(root, sub_tlist, tuple_fraction, limit_tuples,
 					  &cheapest_path, &sorted_path, &dNumGroups);
 
 		group_pathkeys = root->group_pathkeys;
@@ -836,7 +943,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 		{
 			groupOperators = extract_grouping_ops(parse->groupClause);
 			use_hashed_grouping =
-				choose_hashed_grouping(root, tuple_fraction,
+				choose_hashed_grouping(root, tuple_fraction, limit_tuples,
 									   cheapest_path, sorted_path,
 									   groupOperators, dNumGroups,
 									   &agg_counts);
@@ -878,9 +985,23 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 			 * Normal case --- create a plan according to query_planner's
 			 * results.
 			 */
+			bool	need_sort_for_grouping = false;
+
 			result_plan = create_plan(root, best_path);
 			current_pathkeys = best_path->pathkeys;
 
+			/* Detect if we'll need an explicit sort for grouping */
+			if (parse->groupClause && !use_hashed_grouping &&
+				!pathkeys_contained_in(group_pathkeys, current_pathkeys))
+			{
+				need_sort_for_grouping = true;
+				/*
+				 * Always override query_planner's tlist, so that we don't
+				 * sort useless data from a "physical" tlist.
+				 */
+				need_tlist_eval = true;
+			}
+
 			/*
 			 * create_plan() returns a plan with just a "flat" tlist of
 			 * required Vars.  Usually we need to insert the sub_tlist as the
@@ -897,7 +1018,9 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 				 */
 				if (!is_projection_capable_plan(result_plan))
 				{
-					result_plan = (Plan *) make_result(sub_tlist, NULL,
+					result_plan = (Plan *) make_result(root,
+													   sub_tlist,
+													   NULL,
 													   result_plan);
 				}
 				else
@@ -928,7 +1051,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 				 * tuples) --- so make_agg() and make_group() are responsible
 				 * for computing the added cost.
 				 */
-				cost_qual_eval(&tlist_cost, sub_tlist);
+				cost_qual_eval(&tlist_cost, sub_tlist, root);
 				result_plan->startup_cost += tlist_cost.startup;
 				result_plan->total_cost += tlist_cost.startup +
 					tlist_cost.per_tuple * result_plan->plan_rows;
@@ -973,8 +1096,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 
 				if (parse->groupClause)
 				{
-					if (!pathkeys_contained_in(group_pathkeys,
-											   current_pathkeys))
+					if (need_sort_for_grouping)
 					{
 						result_plan = (Plan *)
 							make_sort_from_groupcols(root,
@@ -1017,7 +1139,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 				 * Add an explicit sort if we couldn't make the path come out
 				 * the way the GROUP node needs it.
 				 */
-				if (!pathkeys_contained_in(group_pathkeys, current_pathkeys))
+				if (need_sort_for_grouping)
 				{
 					result_plan = (Plan *)
 						make_sort_from_groupcols(root,
@@ -1051,7 +1173,8 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 				 * this routine to avoid having to generate the plan in the
 				 * first place.
 				 */
-				result_plan = (Plan *) make_result(tlist,
+				result_plan = (Plan *) make_result(root,
+												   tlist,
 												   parse->havingQual,
 												   NULL);
 			}
@@ -1062,13 +1185,14 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 	 * If we were not able to make the plan come out in the right order, add
 	 * an explicit sort step.
 	 */
-	if (parse->sortClause)
+	if (sort_pathkeys)
 	{
 		if (!pathkeys_contained_in(sort_pathkeys, current_pathkeys))
 		{
 			result_plan = (Plan *) make_sort_from_pathkeys(root,
 														   result_plan,
-														   sort_pathkeys);
+														   sort_pathkeys,
+														   limit_tuples);
 			current_pathkeys = sort_pathkeys;
 		}
 	}
@@ -1110,7 +1234,9 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 	{
 		List	   *rlist;
 
-		rlist = set_returning_clause_references(parse->returningList,
+		Assert(parse->resultRelation);
+		rlist = set_returning_clause_references(root->glob,
+												parse->returningList,
 												result_plan,
 												parse->resultRelation);
 		root->returningLists = list_make1(rlist);
@@ -1369,7 +1495,7 @@ extract_grouping_ops(List *groupClause)
 		GroupClause *groupcl = (GroupClause *) lfirst(glitem);
 
 		groupOperators[colno] = get_equality_op_for_ordering_op(groupcl->sortop);
-		if (!OidIsValid(groupOperators[colno]))		/* shouldn't happen */
+		if (!OidIsValid(groupOperators[colno])) /* shouldn't happen */
 			elog(ERROR, "could not find equality operator for ordering operator %u",
 				 groupcl->sortop);
 		colno++;
@@ -1382,7 +1508,8 @@ extract_grouping_ops(List *groupClause)
  * choose_hashed_grouping - should we use hashed grouping?
  */
 static bool
-choose_hashed_grouping(PlannerInfo *root, double tuple_fraction,
+choose_hashed_grouping(PlannerInfo *root,
+					   double tuple_fraction, double limit_tuples,
 					   Path *cheapest_path, Path *sorted_path,
 					   Oid *groupOperators, double dNumGroups,
 					   AggClauseCounts *agg_counts)
@@ -1399,8 +1526,8 @@ choose_hashed_grouping(PlannerInfo *root, double tuple_fraction,
 	/*
 	 * Check can't-do-it conditions, including whether the grouping operators
 	 * are hashjoinable.  (We assume hashing is OK if they are marked
-	 * oprcanhash.  If there isn't actually a supporting hash function,
-	 * the executor will complain at runtime.)
+	 * oprcanhash.	If there isn't actually a supporting hash function, the
+	 * executor will complain at runtime.)
 	 *
 	 * Executor doesn't support hashed aggregation with DISTINCT aggregates.
 	 * (Doing so would imply storing *all* the input values in the hash table,
@@ -1467,7 +1594,7 @@ choose_hashed_grouping(PlannerInfo *root, double tuple_fraction,
 	/* Result of hashed agg is always unsorted */
 	if (root->sort_pathkeys)
 		cost_sort(&hashed_p, root, root->sort_pathkeys, hashed_p.total_cost,
-				  dNumGroups, cheapest_path_width);
+				  dNumGroups, cheapest_path_width, limit_tuples);
 
 	if (sorted_path)
 	{
@@ -1484,7 +1611,7 @@ choose_hashed_grouping(PlannerInfo *root, double tuple_fraction,
 	if (!pathkeys_contained_in(root->group_pathkeys, current_pathkeys))
 	{
 		cost_sort(&sorted_p, root, root->group_pathkeys, sorted_p.total_cost,
-				  cheapest_path_rows, cheapest_path_width);
+				  cheapest_path_rows, cheapest_path_width, -1.0);
 		current_pathkeys = root->group_pathkeys;
 	}
 
@@ -1501,7 +1628,7 @@ choose_hashed_grouping(PlannerInfo *root, double tuple_fraction,
 	if (root->sort_pathkeys &&
 		!pathkeys_contained_in(root->sort_pathkeys, current_pathkeys))
 		cost_sort(&sorted_p, root, root->sort_pathkeys, sorted_p.total_cost,
-				  dNumGroups, cheapest_path_width);
+				  dNumGroups, cheapest_path_width, limit_tuples);
 
 	/*
 	 * Now make the decision using the top-level tuple fraction.  First we
@@ -1544,7 +1671,7 @@ choose_hashed_grouping(PlannerInfo *root, double tuple_fraction,
  * pass down only c,d,a+b, but it's not really worth the trouble to
  * eliminate simple var references from the subplan.  We will avoid doing
  * the extra computation to recompute a+b at the outer level; see
- * replace_vars_with_subplan_refs() in setrefs.c.)
+ * fix_upper_expr() in setrefs.c.)
  *
  * If we are grouping or aggregating, *and* there are no non-Var grouping
  * expressions, then the returned tlist is effectively dummy; we do not