*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.227 2008/03/18 22:04:14 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.236 2008/08/02 21:32:00 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "utils/syscache.h"
+/* GUC parameter */
+double cursor_tuple_fraction = DEFAULT_CURSOR_TUPLE_FRACTION;
+
/* Hook for plugins to get control in planner() */
planner_hook_type planner_hook = NULL;
static double preprocess_limit(PlannerInfo *root,
double tuple_fraction,
int64 *offset_est, int64 *count_est);
+static void preprocess_groupclause(PlannerInfo *root);
static Oid *extract_grouping_ops(List *groupClause);
static bool choose_hashed_grouping(PlannerInfo *root,
double tuple_fraction, double limit_tuples,
{
/*
* We have no real idea how many tuples the user will ultimately FETCH
- * from a cursor, but it seems a good bet that he doesn't want 'em
- * all. Optimize for 10% retrieval (you gotta better number? Should
- * this be a SETtable parameter?)
+ * from a cursor, but it is often the case that he doesn't want 'em
+ * all, or would prefer a fast-start plan anyway so that he can
+ * process some of the tuples sooner. Use a GUC parameter to decide
+ * what fraction to optimize for.
+ */
+ tuple_fraction = cursor_tuple_fraction;
+
+ /*
+ * We document cursor_tuple_fraction as simply being a fraction,
+ * which means the edge cases 0 and 1 have to be treated specially
+ * here. We convert 1 to 0 ("all the tuples") and 0 to a very small
+ * fraction.
*/
- tuple_fraction = 0.10;
+ if (tuple_fraction >= 1.0)
+ tuple_fraction = 0.0;
+ else if (tuple_fraction <= 0.0)
+ tuple_fraction = 1e-10;
}
else
{
*/
if (list_length(glob->subplans) != num_old_subplans ||
root->query_level > 1)
- SS_finalize_plan(root, plan);
+ SS_finalize_plan(root, plan, true);
/* Return internal info if caller wants it */
if (subroot)
(root->parse->jointree->fromlist != NIL ||
kind == EXPRKIND_QUAL ||
root->query_level > 1))
- expr = eval_const_expressions(expr);
+ expr = eval_const_expressions(root, expr);
/*
* If it's a qual or havingQual, canonicalize it.
/*
* Calculate pathkeys that represent result ordering requirements
*/
+ Assert(parse->distinctClause == NIL);
sort_pathkeys = make_pathkeys_for_sortclauses(root,
parse->sortClause,
tlist,
Path *best_path;
long numGroups = 0;
AggClauseCounts agg_counts;
- int numGroupCols = list_length(parse->groupClause);
+ int numGroupCols;
bool use_hashed_grouping = false;
MemSet(&agg_counts, 0, sizeof(AggClauseCounts));
+ /* Preprocess GROUP BY clause, if any */
+ if (parse->groupClause)
+ preprocess_groupclause(root);
+ numGroupCols = list_length(parse->groupClause);
+
/* Preprocess targetlist */
tlist = preprocess_targetlist(root, tlist);
* Calculate pathkeys that represent grouping/ordering requirements.
* Stash them in PlannerInfo so that query_planner can canonicalize
* them after EquivalenceClasses have been formed.
+ *
+ * Note: for the moment, DISTINCT is always implemented via sort/uniq,
+ * and we set the sort_pathkeys to be the more rigorous of the
+ * DISTINCT and ORDER BY requirements. This should be changed
+ * someday, but DISTINCT ON is a bit of a problem ...
*/
root->group_pathkeys =
make_pathkeys_for_sortclauses(root,
parse->groupClause,
tlist,
false);
- root->sort_pathkeys =
- make_pathkeys_for_sortclauses(root,
- parse->sortClause,
- tlist,
- false);
+ if (list_length(parse->distinctClause) > list_length(parse->sortClause))
+ root->sort_pathkeys =
+ make_pathkeys_for_sortclauses(root,
+ parse->distinctClause,
+ tlist,
+ false);
+ else
+ root->sort_pathkeys =
+ make_pathkeys_for_sortclauses(root,
+ parse->sortClause,
+ tlist,
+ false);
/*
* Will need actual number of aggregates for estimating costs.
* by ORDER BY --- but that might just leave us failing to exploit an
* available sort order at all. Needs more thought...)
*/
- if (parse->groupClause)
+ if (root->group_pathkeys)
root->query_pathkeys = root->group_pathkeys;
- else if (parse->sortClause)
+ else if (root->sort_pathkeys)
root->query_pathkeys = root->sort_pathkeys;
else
root->query_pathkeys = NIL;
* Normal case --- create a plan according to query_planner's
* results.
*/
+ bool need_sort_for_grouping = false;
+
result_plan = create_plan(root, best_path);
current_pathkeys = best_path->pathkeys;
+ /* Detect if we'll need an explicit sort for grouping */
+ if (parse->groupClause && !use_hashed_grouping &&
+ !pathkeys_contained_in(group_pathkeys, current_pathkeys))
+ {
+ need_sort_for_grouping = true;
+ /*
+ * Always override query_planner's tlist, so that we don't
+ * sort useless data from a "physical" tlist.
+ */
+ need_tlist_eval = true;
+ }
+
/*
* create_plan() returns a plan with just a "flat" tlist of
* required Vars. Usually we need to insert the sub_tlist as the
if (parse->groupClause)
{
- if (!pathkeys_contained_in(group_pathkeys,
- current_pathkeys))
+ if (need_sort_for_grouping)
{
result_plan = (Plan *)
make_sort_from_groupcols(root,
* Add an explicit sort if we couldn't make the path come out
* the way the GROUP node needs it.
*/
- if (!pathkeys_contained_in(group_pathkeys, current_pathkeys))
+ if (need_sort_for_grouping)
{
result_plan = (Plan *)
make_sort_from_groupcols(root,
* If we were not able to make the plan come out in the right order, add
* an explicit sort step.
*/
- if (parse->sortClause)
+ if (sort_pathkeys)
{
if (!pathkeys_contained_in(sort_pathkeys, current_pathkeys))
{
return tuple_fraction;
}
+
+/*
+ * preprocess_groupclause - do preparatory work on GROUP BY clause
+ *
+ * The idea here is to adjust the ordering of the GROUP BY elements
+ * (which in itself is semantically insignificant) to match ORDER BY,
+ * thereby allowing a single sort operation to both implement the ORDER BY
+ * requirement and set up for a Unique step that implements GROUP BY.
+ *
+ * In principle it might be interesting to consider other orderings of the
+ * GROUP BY elements, which could match the sort ordering of other
+ * possible plans (eg an indexscan) and thereby reduce cost. We don't
+ * bother with that, though. Hashed grouping will frequently win anyway.
+ */
+static void
+preprocess_groupclause(PlannerInfo *root)
+{
+ Query *parse = root->parse;
+ List *new_groupclause;
+ bool partial_match;
+ ListCell *sl;
+ ListCell *gl;
+
+ /* If no ORDER BY, nothing useful to do here anyway */
+ if (parse->sortClause == NIL)
+ return;
+
+ /*
+ * Scan the ORDER BY clause and construct a list of matching GROUP BY
+ * items, but only as far as we can make a matching prefix.
+ *
+ * This code assumes that the sortClause contains no duplicate items.
+ */
+ new_groupclause = NIL;
+ foreach(sl, parse->sortClause)
+ {
+ SortGroupClause *sc = (SortGroupClause *) lfirst(sl);
+
+ foreach(gl, parse->groupClause)
+ {
+ SortGroupClause *gc = (SortGroupClause *) lfirst(gl);
+
+ if (equal(gc, sc))
+ {
+ new_groupclause = lappend(new_groupclause, gc);
+ break;
+ }
+ }
+ if (gl == NULL)
+ break; /* no match, so stop scanning */
+ }
+
+ /* Did we match all of the ORDER BY list, or just some of it? */
+ partial_match = (sl != NULL);
+
+ /* If no match at all, no point in reordering GROUP BY */
+ if (new_groupclause == NIL)
+ return;
+
+ /*
+ * Add any remaining GROUP BY items to the new list, but only if we
+ * were able to make a complete match. In other words, we only
+ * rearrange the GROUP BY list if the result is that one list is a
+ * prefix of the other --- otherwise there's no possibility of a
+ * common sort.
+ */
+ foreach(gl, parse->groupClause)
+ {
+ SortGroupClause *gc = (SortGroupClause *) lfirst(gl);
+
+ if (list_member_ptr(new_groupclause, gc))
+ continue; /* it matched an ORDER BY item */
+ if (partial_match)
+ return; /* give up, no common sort possible */
+ new_groupclause = lappend(new_groupclause, gc);
+ }
+
+ /* Success --- install the rearranged GROUP BY list */
+ Assert(list_length(parse->groupClause) == list_length(new_groupclause));
+ parse->groupClause = new_groupclause;
+}
+
/*
* extract_grouping_ops - make an array of the equality operator OIDs
* for the GROUP BY clause
foreach(glitem, groupClause)
{
- GroupClause *groupcl = (GroupClause *) lfirst(glitem);
+ SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
- groupOperators[colno] = get_equality_op_for_ordering_op(groupcl->sortop);
- if (!OidIsValid(groupOperators[colno])) /* shouldn't happen */
- elog(ERROR, "could not find equality operator for ordering operator %u",
- groupcl->sortop);
+ groupOperators[colno] = groupcl->eqop;
+ Assert(OidIsValid(groupOperators[colno]));
colno++;
}
foreach(gl, parse->groupClause)
{
- GroupClause *grpcl = (GroupClause *) lfirst(gl);
+ SortGroupClause *grpcl = (SortGroupClause *) lfirst(gl);
Node *groupexpr = get_sortgroupclause_expr(grpcl, tlist);
TargetEntry *te = NULL;
ListCell *sl;
foreach(gl, root->parse->groupClause)
{
- GroupClause *grpcl = (GroupClause *) lfirst(gl);
+ SortGroupClause *grpcl = (SortGroupClause *) lfirst(gl);
Node *groupexpr = get_sortgroupclause_expr(grpcl, tlist);
TargetEntry *te = NULL;
ListCell *sl;