]> granicus.if.org Git - postgresql/blobdiff - src/backend/optimizer/plan/planner.c
Rearrange the querytree representation of ORDER BY/GROUP BY/DISTINCT items
[postgresql] / src / backend / optimizer / plan / planner.c
index 2f469bd924e1c56d5344f8edee5a1f49d2c8fd7d..735a77ac4e1eef2fed42ec0fb5e48c79cf7aa0a9 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.227 2008/03/18 22:04:14 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.236 2008/08/02 21:32:00 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -42,6 +42,9 @@
 #include "utils/syscache.h"
 
 
+/* GUC parameter */
+double cursor_tuple_fraction = DEFAULT_CURSOR_TUPLE_FRACTION;
+
 /* Hook for plugins to get control in planner() */
 planner_hook_type planner_hook = NULL;
 
@@ -64,6 +67,7 @@ static bool is_dummy_plan(Plan *plan);
 static double preprocess_limit(PlannerInfo *root,
                                 double tuple_fraction,
                                 int64 *offset_est, int64 *count_est);
+static void preprocess_groupclause(PlannerInfo *root);
 static Oid *extract_grouping_ops(List *groupClause);
 static bool choose_hashed_grouping(PlannerInfo *root,
                                           double tuple_fraction, double limit_tuples,
@@ -142,11 +146,23 @@ standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
        {
                /*
                 * We have no real idea how many tuples the user will ultimately FETCH
-                * from a cursor, but it seems a good bet that he doesn't want 'em
-                * all.  Optimize for 10% retrieval (you gotta better number?  Should
-                * this be a SETtable parameter?)
+                * from a cursor, but it is often the case that he doesn't want 'em
+                * all, or would prefer a fast-start plan anyway so that he can
+                * process some of the tuples sooner.  Use a GUC parameter to decide
+                * what fraction to optimize for.
+                */
+               tuple_fraction = cursor_tuple_fraction;
+
+               /*
+                * We document cursor_tuple_fraction as simply being a fraction,
+                * which means the edge cases 0 and 1 have to be treated specially
+                * here.  We convert 1 to 0 ("all the tuples") and 0 to a very small
+                * fraction.
                 */
-               tuple_fraction = 0.10;
+               if (tuple_fraction >= 1.0)
+                       tuple_fraction = 0.0;
+               else if (tuple_fraction <= 0.0)
+                       tuple_fraction = 1e-10;
        }
        else
        {
@@ -444,7 +460,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
         */
        if (list_length(glob->subplans) != num_old_subplans ||
                root->query_level > 1)
-               SS_finalize_plan(root, plan);
+               SS_finalize_plan(root, plan, true);
 
        /* Return internal info if caller wants it */
        if (subroot)
@@ -503,7 +519,7 @@ preprocess_expression(PlannerInfo *root, Node *expr, int kind)
                (root->parse->jointree->fromlist != NIL ||
                 kind == EXPRKIND_QUAL ||
                 root->query_level > 1))
-               expr = eval_const_expressions(expr);
+               expr = eval_const_expressions(root, expr);
 
        /*
         * If it's a qual or havingQual, canonicalize it.
@@ -811,6 +827,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
                /*
                 * Calculate pathkeys that represent result ordering requirements
                 */
+               Assert(parse->distinctClause == NIL);
                sort_pathkeys = make_pathkeys_for_sortclauses(root,
                                                                                                          parse->sortClause,
                                                                                                          tlist,
@@ -830,11 +847,16 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
                Path       *best_path;
                long            numGroups = 0;
                AggClauseCounts agg_counts;
-               int                     numGroupCols = list_length(parse->groupClause);
+               int                     numGroupCols;
                bool            use_hashed_grouping = false;
 
                MemSet(&agg_counts, 0, sizeof(AggClauseCounts));
 
+               /* Preprocess GROUP BY clause, if any */
+               if (parse->groupClause)
+                       preprocess_groupclause(root);
+               numGroupCols = list_length(parse->groupClause);
+
                /* Preprocess targetlist */
                tlist = preprocess_targetlist(root, tlist);
 
@@ -849,17 +871,29 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
                 * Calculate pathkeys that represent grouping/ordering requirements.
                 * Stash them in PlannerInfo so that query_planner can canonicalize
                 * them after EquivalenceClasses have been formed.
+                *
+                * Note: for the moment, DISTINCT is always implemented via sort/uniq,
+                * and we set the sort_pathkeys to be the more rigorous of the
+                * DISTINCT and ORDER BY requirements.  This should be changed
+                * someday, but DISTINCT ON is a bit of a problem ...
                 */
                root->group_pathkeys =
                        make_pathkeys_for_sortclauses(root,
                                                                                  parse->groupClause,
                                                                                  tlist,
                                                                                  false);
-               root->sort_pathkeys =
-                       make_pathkeys_for_sortclauses(root,
-                                                                                 parse->sortClause,
-                                                                                 tlist,
-                                                                                 false);
+               if (list_length(parse->distinctClause) > list_length(parse->sortClause))
+                       root->sort_pathkeys =
+                               make_pathkeys_for_sortclauses(root,
+                                                                                         parse->distinctClause,
+                                                                                         tlist,
+                                                                                         false);
+               else
+                       root->sort_pathkeys =
+                               make_pathkeys_for_sortclauses(root,
+                                                                                         parse->sortClause,
+                                                                                         tlist,
+                                                                                         false);
 
                /*
                 * Will need actual number of aggregates for estimating costs.
@@ -888,9 +922,9 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
                 * by ORDER BY --- but that might just leave us failing to exploit an
                 * available sort order at all. Needs more thought...)
                 */
-               if (parse->groupClause)
+               if (root->group_pathkeys)
                        root->query_pathkeys = root->group_pathkeys;
-               else if (parse->sortClause)
+               else if (root->sort_pathkeys)
                        root->query_pathkeys = root->sort_pathkeys;
                else
                        root->query_pathkeys = NIL;
@@ -957,9 +991,23 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
                         * Normal case --- create a plan according to query_planner's
                         * results.
                         */
+                       bool    need_sort_for_grouping = false;
+
                        result_plan = create_plan(root, best_path);
                        current_pathkeys = best_path->pathkeys;
 
+                       /* Detect if we'll need an explicit sort for grouping */
+                       if (parse->groupClause && !use_hashed_grouping &&
+                               !pathkeys_contained_in(group_pathkeys, current_pathkeys))
+                       {
+                               need_sort_for_grouping = true;
+                               /*
+                                * Always override query_planner's tlist, so that we don't
+                                * sort useless data from a "physical" tlist.
+                                */
+                               need_tlist_eval = true;
+                       }
+
                        /*
                         * create_plan() returns a plan with just a "flat" tlist of
                         * required Vars.  Usually we need to insert the sub_tlist as the
@@ -1054,8 +1102,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 
                                if (parse->groupClause)
                                {
-                                       if (!pathkeys_contained_in(group_pathkeys,
-                                                                                          current_pathkeys))
+                                       if (need_sort_for_grouping)
                                        {
                                                result_plan = (Plan *)
                                                        make_sort_from_groupcols(root,
@@ -1098,7 +1145,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
                                 * Add an explicit sort if we couldn't make the path come out
                                 * the way the GROUP node needs it.
                                 */
-                               if (!pathkeys_contained_in(group_pathkeys, current_pathkeys))
+                               if (need_sort_for_grouping)
                                {
                                        result_plan = (Plan *)
                                                make_sort_from_groupcols(root,
@@ -1144,7 +1191,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
         * If we were not able to make the plan come out in the right order, add
         * an explicit sort step.
         */
-       if (parse->sortClause)
+       if (sort_pathkeys)
        {
                if (!pathkeys_contained_in(sort_pathkeys, current_pathkeys))
                {
@@ -1435,6 +1482,88 @@ preprocess_limit(PlannerInfo *root, double tuple_fraction,
        return tuple_fraction;
 }
 
+
+/*
+ * preprocess_groupclause - do preparatory work on GROUP BY clause
+ *
+ * The idea here is to adjust the ordering of the GROUP BY elements
+ * (which in itself is semantically insignificant) to match ORDER BY,
+ * thereby allowing a single sort operation to both implement the ORDER BY
+ * requirement and set up for a Unique step that implements GROUP BY.
+ *
+ * In principle it might be interesting to consider other orderings of the
+ * GROUP BY elements, which could match the sort ordering of other
+ * possible plans (eg an indexscan) and thereby reduce cost.  We don't
+ * bother with that, though.  Hashed grouping will frequently win anyway.
+ */
+static void
+preprocess_groupclause(PlannerInfo *root)
+{
+       Query      *parse = root->parse;
+       List       *new_groupclause;
+       bool            partial_match;
+       ListCell   *sl;
+       ListCell   *gl;
+
+       /* If no ORDER BY, nothing useful to do here anyway */
+       if (parse->sortClause == NIL)
+               return;
+
+       /*
+        * Scan the ORDER BY clause and construct a list of matching GROUP BY
+        * items, but only as far as we can make a matching prefix.
+        *
+        * This code assumes that the sortClause contains no duplicate items.
+        */
+       new_groupclause = NIL;
+       foreach(sl, parse->sortClause)
+       {
+               SortGroupClause *sc = (SortGroupClause *) lfirst(sl);
+
+               foreach(gl, parse->groupClause)
+               {
+                       SortGroupClause *gc = (SortGroupClause *) lfirst(gl);
+
+                       if (equal(gc, sc))
+                       {
+                               new_groupclause = lappend(new_groupclause, gc);
+                               break;
+                       }
+               }
+               if (gl == NULL)
+                       break;                          /* no match, so stop scanning */
+       }
+
+       /* Did we match all of the ORDER BY list, or just some of it? */
+       partial_match = (sl != NULL);
+
+       /* If no match at all, no point in reordering GROUP BY */
+       if (new_groupclause == NIL)
+               return;
+
+       /*
+        * Add any remaining GROUP BY items to the new list, but only if we
+        * were able to make a complete match.  In other words, we only
+        * rearrange the GROUP BY list if the result is that one list is a
+        * prefix of the other --- otherwise there's no possibility of a
+        * common sort.
+        */
+       foreach(gl, parse->groupClause)
+       {
+               SortGroupClause *gc = (SortGroupClause *) lfirst(gl);
+
+               if (list_member_ptr(new_groupclause, gc))
+                       continue;                       /* it matched an ORDER BY item */
+               if (partial_match)
+                       return;                         /* give up, no common sort possible */
+               new_groupclause = lappend(new_groupclause, gc);
+       }
+
+       /* Success --- install the rearranged GROUP BY list */
+       Assert(list_length(parse->groupClause) == list_length(new_groupclause));
+       parse->groupClause = new_groupclause;
+}
+
 /*
  * extract_grouping_ops - make an array of the equality operator OIDs
  *             for the GROUP BY clause
@@ -1451,12 +1580,10 @@ extract_grouping_ops(List *groupClause)
 
        foreach(glitem, groupClause)
        {
-               GroupClause *groupcl = (GroupClause *) lfirst(glitem);
+               SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
 
-               groupOperators[colno] = get_equality_op_for_ordering_op(groupcl->sortop);
-               if (!OidIsValid(groupOperators[colno])) /* shouldn't happen */
-                       elog(ERROR, "could not find equality operator for ordering operator %u",
-                                groupcl->sortop);
+               groupOperators[colno] = groupcl->eqop;
+               Assert(OidIsValid(groupOperators[colno]));
                colno++;
        }
 
@@ -1697,7 +1824,7 @@ make_subplanTargetList(PlannerInfo *root,
 
                foreach(gl, parse->groupClause)
                {
-                       GroupClause *grpcl = (GroupClause *) lfirst(gl);
+                       SortGroupClause *grpcl = (SortGroupClause *) lfirst(gl);
                        Node       *groupexpr = get_sortgroupclause_expr(grpcl, tlist);
                        TargetEntry *te = NULL;
                        ListCell   *sl;
@@ -1756,7 +1883,7 @@ locate_grouping_columns(PlannerInfo *root,
 
        foreach(gl, root->parse->groupClause)
        {
-               GroupClause *grpcl = (GroupClause *) lfirst(gl);
+               SortGroupClause *grpcl = (SortGroupClause *) lfirst(gl);
                Node       *groupexpr = get_sortgroupclause_expr(grpcl, tlist);
                TargetEntry *te = NULL;
                ListCell   *sl;