* planner.c
* The query optimizer external interface.
*
- * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.214 2007/02/20 17:32:15 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.235 2008/07/31 22:47:56 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "utils/syscache.h"
+/* GUC parameter */
+double cursor_tuple_fraction = DEFAULT_CURSOR_TUPLE_FRACTION;
+
+/* Hook for plugins to get control in planner() */
+planner_hook_type planner_hook = NULL;
+
+
/* Expression kind codes for preprocess_expression */
#define EXPRKIND_QUAL 0
#define EXPRKIND_TARGET 1
double tuple_fraction,
int64 *offset_est, int64 *count_est);
static Oid *extract_grouping_ops(List *groupClause);
-static bool choose_hashed_grouping(PlannerInfo *root, double tuple_fraction,
+static bool choose_hashed_grouping(PlannerInfo *root,
+ double tuple_fraction, double limit_tuples,
Path *cheapest_path, Path *sorted_path,
Oid *groupOperators, double dNumGroups,
AggClauseCounts *agg_counts);
*
* Query optimizer entry point
*
+ * To support loadable plugins that monitor or modify planner behavior,
+ * we provide a hook variable that lets a plugin get control before and
+ * after the standard planning process. The plugin would normally call
+ * standard_planner().
+ *
+ * Note to plugin authors: standard_planner() scribbles on its Query input,
+ * so you'd better copy that data structure if you want to plan more than once.
+ *
*****************************************************************************/
PlannedStmt *
-planner(Query *parse, bool isCursor, int cursorOptions,
- ParamListInfo boundParams)
+planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
+{
+ PlannedStmt *result;
+
+ if (planner_hook)
+ result = (*planner_hook) (parse, cursorOptions, boundParams);
+ else
+ result = standard_planner(parse, cursorOptions, boundParams);
+ return result;
+}
+
+PlannedStmt *
+standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
{
PlannedStmt *result;
PlannerGlobal *glob;
double tuple_fraction;
PlannerInfo *root;
Plan *top_plan;
+ ListCell *lp,
+ *lr;
+
+ /* Cursor options may come from caller or from DECLARE CURSOR stmt */
+ if (parse->utilityStmt &&
+ IsA(parse->utilityStmt, DeclareCursorStmt))
+ cursorOptions |= ((DeclareCursorStmt *) parse->utilityStmt)->options;
/*
* Set up global state for this planner invocation. This data is needed
glob->boundParams = boundParams;
glob->paramlist = NIL;
- glob->next_plan_id = 0;
+ glob->subplans = NIL;
+ glob->subrtables = NIL;
+ glob->rewindPlanIDs = NULL;
+ glob->finalrtable = NIL;
+ glob->relationOids = NIL;
+ glob->transientPlan = false;
/* Determine what fraction of the plan is likely to be scanned */
- if (isCursor)
+ if (cursorOptions & CURSOR_OPT_FAST_PLAN)
{
/*
* We have no real idea how many tuples the user will ultimately FETCH
- * from a cursor, but it seems a good bet that he doesn't want 'em
- * all. Optimize for 10% retrieval (you gotta better number? Should
- * this be a SETtable parameter?)
+ * from a cursor, but it is often the case that he doesn't want 'em
+ * all, or would prefer a fast-start plan anyway so that he can
+ * process some of the tuples sooner. Use a GUC parameter to decide
+ * what fraction to optimize for.
+ */
+ tuple_fraction = cursor_tuple_fraction;
+
+ /*
+ * We document cursor_tuple_fraction as simply being a fraction,
+ * which means the edge cases 0 and 1 have to be treated specially
+ * here. We convert 1 to 0 ("all the tuples") and 0 to a very small
+ * fraction.
*/
- tuple_fraction = 0.10;
+ if (tuple_fraction >= 1.0)
+ tuple_fraction = 0.0;
+ else if (tuple_fraction <= 0.0)
+ tuple_fraction = 1e-10;
}
else
{
* If creating a plan for a scrollable cursor, make sure it can run
* backwards on demand. Add a Material node at the top at need.
*/
- if (isCursor && (cursorOptions & CURSOR_OPT_SCROLL))
+ if (cursorOptions & CURSOR_OPT_SCROLL)
{
if (!ExecSupportsBackwardScan(top_plan))
top_plan = materialize_finished_plan(top_plan);
}
/* final cleanup of the plan */
- top_plan = set_plan_references(top_plan, parse->rtable);
+ Assert(glob->finalrtable == NIL);
+ top_plan = set_plan_references(glob, top_plan, root->parse->rtable);
+ /* ... and the subplans (both regular subplans and initplans) */
+ Assert(list_length(glob->subplans) == list_length(glob->subrtables));
+ forboth(lp, glob->subplans, lr, glob->subrtables)
+ {
+ Plan *subplan = (Plan *) lfirst(lp);
+ List *subrtable = (List *) lfirst(lr);
+
+ lfirst(lp) = set_plan_references(glob, subplan, subrtable);
+ }
/* build the PlannedStmt result */
result = makeNode(PlannedStmt);
result->commandType = parse->commandType;
result->canSetTag = parse->canSetTag;
+ result->transientPlan = glob->transientPlan;
result->planTree = top_plan;
- result->rtable = parse->rtable;
+ result->rtable = glob->finalrtable;
result->resultRelations = root->resultRelations;
- result->into = parse->into;
+ result->utilityStmt = parse->utilityStmt;
+ result->intoClause = parse->intoClause;
+ result->subplans = glob->subplans;
+ result->rewindPlanIDs = glob->rewindPlanIDs;
result->returningLists = root->returningLists;
result->rowMarks = parse->rowMarks;
+ result->relationOids = glob->relationOids;
result->nParamExec = list_length(glob->paramlist);
return result;
Index level, double tuple_fraction,
PlannerInfo **subroot)
{
- int saved_plan_id = glob->next_plan_id;
+ int num_old_subplans = list_length(glob->subplans);
PlannerInfo *root;
Plan *plan;
List *newHaving;
/*
* Look for IN clauses at the top level of WHERE, and transform them into
* joins. Note that this step only handles IN clauses originally at top
- * level of WHERE; if we pull up any subqueries in the next step, their
- * INs are processed just before pulling them up.
+ * level of WHERE; if we pull up any subqueries below, their INs are
+ * processed just before pulling them up.
*/
if (parse->hasSubLinks)
parse->jointree->quals = pull_up_IN_clauses(root,
parse->jointree->quals);
+ /*
+ * Scan the rangetable for set-returning functions, and inline them
+ * if possible (producing subqueries that might get pulled up next).
+ * Recursion issues here are handled in the same way as for IN clauses.
+ */
+ inline_set_returning_functions(root);
+
/*
* Check to see if any subqueries in the rangetable can be merged into
* this query.
* initPlan list and extParam/allParam sets for plan nodes, and attach the
* initPlans to the top plan node.
*/
- if (root->glob->next_plan_id != saved_plan_id || root->query_level > 1)
- SS_finalize_plan(root, plan);
+ if (list_length(glob->subplans) != num_old_subplans ||
+ root->query_level > 1)
+ SS_finalize_plan(root, plan, true);
/* Return internal info if caller wants it */
if (subroot)
(root->parse->jointree->fromlist != NIL ||
kind == EXPRKIND_QUAL ||
root->query_level > 1))
- expr = eval_const_expressions(expr);
+ expr = eval_const_expressions(root, expr);
/*
* If it's a qual or havingQual, canonicalize it.
* If we managed to exclude every child rel, return a dummy plan
*/
if (subplans == NIL)
- return (Plan *) make_result(tlist,
+ {
+ root->resultRelations = list_make1_int(parentRTindex);
+ /* although dummy, it must have a valid tlist for executor */
+ tlist = preprocess_targetlist(root, parse->targetList);
+ return (Plan *) make_result(root,
+ tlist,
(Node *) list_make1(makeBoolConst(false,
false)),
NULL);
+ }
/*
* Planning might have modified the rangetable, due to changes of the
*/
parse->rtable = rtable;
+ /* Suppress Append if there's only one surviving child rel */
+ if (list_length(subplans) == 1)
+ return (Plan *) linitial(subplans);
+
return (Plan *) make_append(subplans, true, tlist);
}
List *tlist = parse->targetList;
int64 offset_est = 0;
int64 count_est = 0;
+ double limit_tuples = -1.0;
Plan *result_plan;
List *current_pathkeys;
List *sort_pathkeys;
/* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */
if (parse->limitCount || parse->limitOffset)
+ {
tuple_fraction = preprocess_limit(root, tuple_fraction,
&offset_est, &count_est);
+ /*
+ * If we have a known LIMIT, and don't have an unknown OFFSET, we can
+ * estimate the effects of using a bounded sort.
+ */
+ if (count_est > 0 && offset_est >= 0)
+ limit_tuples = (double) count_est + (double) offset_est;
+ }
+
if (parse->setOperations)
{
List *set_sortclauses;
*/
current_pathkeys = make_pathkeys_for_sortclauses(root,
set_sortclauses,
- result_plan->targetlist,
+ result_plan->targetlist,
true);
/*
/*
* Calculate pathkeys that represent result ordering requirements
*/
+ Assert(parse->distinctClause == NIL);
sort_pathkeys = make_pathkeys_for_sortclauses(root,
parse->sortClause,
tlist,
* Calculate pathkeys that represent grouping/ordering requirements.
* Stash them in PlannerInfo so that query_planner can canonicalize
* them after EquivalenceClasses have been formed.
+ *
+ * Note: for the moment, DISTINCT is always implemented via sort/uniq,
+ * and we set the sort_pathkeys to be the more rigorous of the
+ * DISTINCT and ORDER BY requirements. This should be changed
+ * someday, but DISTINCT ON is a bit of a problem ...
*/
root->group_pathkeys =
make_pathkeys_for_sortclauses(root,
parse->groupClause,
tlist,
false);
- root->sort_pathkeys =
- make_pathkeys_for_sortclauses(root,
- parse->sortClause,
- tlist,
- false);
+ if (list_length(parse->distinctClause) > list_length(parse->sortClause))
+ root->sort_pathkeys =
+ make_pathkeys_for_sortclauses(root,
+ parse->distinctClause,
+ tlist,
+ false);
+ else
+ root->sort_pathkeys =
+ make_pathkeys_for_sortclauses(root,
+ parse->sortClause,
+ tlist,
+ false);
/*
* Will need actual number of aggregates for estimating costs.
* by ORDER BY --- but that might just leave us failing to exploit an
* available sort order at all. Needs more thought...)
*/
- if (parse->groupClause)
+ if (root->group_pathkeys)
root->query_pathkeys = root->group_pathkeys;
- else if (parse->sortClause)
+ else if (root->sort_pathkeys)
root->query_pathkeys = root->sort_pathkeys;
else
root->query_pathkeys = NIL;
* estimate the number of groups in the query, and canonicalize all
* the pathkeys.
*/
- query_planner(root, sub_tlist, tuple_fraction,
+ query_planner(root, sub_tlist, tuple_fraction, limit_tuples,
&cheapest_path, &sorted_path, &dNumGroups);
group_pathkeys = root->group_pathkeys;
{
groupOperators = extract_grouping_ops(parse->groupClause);
use_hashed_grouping =
- choose_hashed_grouping(root, tuple_fraction,
+ choose_hashed_grouping(root, tuple_fraction, limit_tuples,
cheapest_path, sorted_path,
groupOperators, dNumGroups,
&agg_counts);
* Normal case --- create a plan according to query_planner's
* results.
*/
+ bool need_sort_for_grouping = false;
+
result_plan = create_plan(root, best_path);
current_pathkeys = best_path->pathkeys;
+ /* Detect if we'll need an explicit sort for grouping */
+ if (parse->groupClause && !use_hashed_grouping &&
+ !pathkeys_contained_in(group_pathkeys, current_pathkeys))
+ {
+ need_sort_for_grouping = true;
+ /*
+ * Always override query_planner's tlist, so that we don't
+ * sort useless data from a "physical" tlist.
+ */
+ need_tlist_eval = true;
+ }
+
/*
* create_plan() returns a plan with just a "flat" tlist of
* required Vars. Usually we need to insert the sub_tlist as the
*/
if (!is_projection_capable_plan(result_plan))
{
- result_plan = (Plan *) make_result(sub_tlist, NULL,
+ result_plan = (Plan *) make_result(root,
+ sub_tlist,
+ NULL,
result_plan);
}
else
* tuples) --- so make_agg() and make_group() are responsible
* for computing the added cost.
*/
- cost_qual_eval(&tlist_cost, sub_tlist);
+ cost_qual_eval(&tlist_cost, sub_tlist, root);
result_plan->startup_cost += tlist_cost.startup;
result_plan->total_cost += tlist_cost.startup +
tlist_cost.per_tuple * result_plan->plan_rows;
if (parse->groupClause)
{
- if (!pathkeys_contained_in(group_pathkeys,
- current_pathkeys))
+ if (need_sort_for_grouping)
{
result_plan = (Plan *)
make_sort_from_groupcols(root,
* Add an explicit sort if we couldn't make the path come out
* the way the GROUP node needs it.
*/
- if (!pathkeys_contained_in(group_pathkeys, current_pathkeys))
+ if (need_sort_for_grouping)
{
result_plan = (Plan *)
make_sort_from_groupcols(root,
* this routine to avoid having to generate the plan in the
* first place.
*/
- result_plan = (Plan *) make_result(tlist,
+ result_plan = (Plan *) make_result(root,
+ tlist,
parse->havingQual,
NULL);
}
* If we were not able to make the plan come out in the right order, add
* an explicit sort step.
*/
- if (parse->sortClause)
+ if (sort_pathkeys)
{
if (!pathkeys_contained_in(sort_pathkeys, current_pathkeys))
{
result_plan = (Plan *) make_sort_from_pathkeys(root,
result_plan,
- sort_pathkeys);
+ sort_pathkeys,
+ limit_tuples);
current_pathkeys = sort_pathkeys;
}
}
{
List *rlist;
- rlist = set_returning_clause_references(parse->returningList,
+ Assert(parse->resultRelation);
+ rlist = set_returning_clause_references(root->glob,
+ parse->returningList,
result_plan,
parse->resultRelation);
root->returningLists = list_make1(rlist);
GroupClause *groupcl = (GroupClause *) lfirst(glitem);
groupOperators[colno] = get_equality_op_for_ordering_op(groupcl->sortop);
- if (!OidIsValid(groupOperators[colno])) /* shouldn't happen */
+ if (!OidIsValid(groupOperators[colno])) /* shouldn't happen */
elog(ERROR, "could not find equality operator for ordering operator %u",
groupcl->sortop);
colno++;
* choose_hashed_grouping - should we use hashed grouping?
*/
static bool
-choose_hashed_grouping(PlannerInfo *root, double tuple_fraction,
+choose_hashed_grouping(PlannerInfo *root,
+ double tuple_fraction, double limit_tuples,
Path *cheapest_path, Path *sorted_path,
Oid *groupOperators, double dNumGroups,
AggClauseCounts *agg_counts)
/*
* Check can't-do-it conditions, including whether the grouping operators
* are hashjoinable. (We assume hashing is OK if they are marked
- * oprcanhash. If there isn't actually a supporting hash function,
- * the executor will complain at runtime.)
+ * oprcanhash. If there isn't actually a supporting hash function, the
+ * executor will complain at runtime.)
*
* Executor doesn't support hashed aggregation with DISTINCT aggregates.
* (Doing so would imply storing *all* the input values in the hash table,
/* Result of hashed agg is always unsorted */
if (root->sort_pathkeys)
cost_sort(&hashed_p, root, root->sort_pathkeys, hashed_p.total_cost,
- dNumGroups, cheapest_path_width);
+ dNumGroups, cheapest_path_width, limit_tuples);
if (sorted_path)
{
if (!pathkeys_contained_in(root->group_pathkeys, current_pathkeys))
{
cost_sort(&sorted_p, root, root->group_pathkeys, sorted_p.total_cost,
- cheapest_path_rows, cheapest_path_width);
+ cheapest_path_rows, cheapest_path_width, -1.0);
current_pathkeys = root->group_pathkeys;
}
if (root->sort_pathkeys &&
!pathkeys_contained_in(root->sort_pathkeys, current_pathkeys))
cost_sort(&sorted_p, root, root->sort_pathkeys, sorted_p.total_cost,
- dNumGroups, cheapest_path_width);
+ dNumGroups, cheapest_path_width, limit_tuples);
/*
* Now make the decision using the top-level tuple fraction. First we
* pass down only c,d,a+b, but it's not really worth the trouble to
* eliminate simple var references from the subplan. We will avoid doing
* the extra computation to recompute a+b at the outer level; see
- * replace_vars_with_subplan_refs() in setrefs.c.)
+ * fix_upper_expr() in setrefs.c.)
*
* If we are grouping or aggregating, *and* there are no non-Var grouping
* expressions, then the returned tlist is effectively dummy; we do not