Rearrange the querytree representation of ORDER BY/GROUP BY/DISTINCT items

[postgresql] / src / backend / optimizer / plan / planner.c
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c

index 2f469bd924e1c56d5344f8edee5a1f49d2c8fd7d..735a77ac4e1eef2fed42ec0fb5e48c79cf7aa0a9 100644 (file)
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.227 2008/03/18 22:04:14 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.236 2008/08/02 21:32:00 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -42,6 +42,9 @@
  #include "utils/syscache.h"
  
  
+/* GUC parameter */
+double cursor_tuple_fraction = DEFAULT_CURSOR_TUPLE_FRACTION;
+
  /* Hook for plugins to get control in planner() */
  planner_hook_type planner_hook = NULL;
  
@@ -64,6 +67,7 @@ static bool is_dummy_plan(Plan *plan);
  static double preprocess_limit(PlannerInfo *root,
                                  double tuple_fraction,
                                  int64 *offset_est, int64 *count_est);
+static void preprocess_groupclause(PlannerInfo *root);
  static Oid *extract_grouping_ops(List *groupClause);
  static bool choose_hashed_grouping(PlannerInfo *root,
                                            double tuple_fraction, double limit_tuples,
@@ -142,11 +146,23 @@ standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
         {
                 /*
                  * We have no real idea how many tuples the user will ultimately FETCH
-                * from a cursor, but it seems a good bet that he doesn't want 'em
-                * all.  Optimize for 10% retrieval (you gotta better number?  Should
-                * this be a SETtable parameter?)
+                * from a cursor, but it is often the case that he doesn't want 'em
+                * all, or would prefer a fast-start plan anyway so that he can
+                * process some of the tuples sooner.  Use a GUC parameter to decide
+                * what fraction to optimize for.
+                */
+               tuple_fraction = cursor_tuple_fraction;
+
+               /*
+                * We document cursor_tuple_fraction as simply being a fraction,
+                * which means the edge cases 0 and 1 have to be treated specially
+                * here.  We convert 1 to 0 ("all the tuples") and 0 to a very small
+                * fraction.
                  */
-               tuple_fraction = 0.10;
+               if (tuple_fraction >= 1.0)
+                       tuple_fraction = 0.0;
+               else if (tuple_fraction <= 0.0)
+                       tuple_fraction = 1e-10;
         }
         else
         {
@@ -444,7 +460,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
          */
         if (list_length(glob->subplans) != num_old_subplans ||
                 root->query_level > 1)
-               SS_finalize_plan(root, plan);
+               SS_finalize_plan(root, plan, true);
  
         /* Return internal info if caller wants it */
         if (subroot)
@@ -503,7 +519,7 @@ preprocess_expression(PlannerInfo *root, Node *expr, int kind)
                 (root->parse->jointree->fromlist != NIL ||
                  kind == EXPRKIND_QUAL ||
                  root->query_level > 1))
-               expr = eval_const_expressions(expr);
+               expr = eval_const_expressions(root, expr);
  
         /*
          * If it's a qual or havingQual, canonicalize it.
@@ -811,6 +827,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
                 /*
                  * Calculate pathkeys that represent result ordering requirements
                  */
+               Assert(parse->distinctClause == NIL);
                 sort_pathkeys = make_pathkeys_for_sortclauses(root,
                                                                                                           parse->sortClause,
                                                                                                           tlist,
@@ -830,11 +847,16 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
                 Path       *best_path;
                 long            numGroups = 0;
                 AggClauseCounts agg_counts;
-               int                     numGroupCols = list_length(parse->groupClause);
+               int                     numGroupCols;
                 bool            use_hashed_grouping = false;
  
                 MemSet(&agg_counts, 0, sizeof(AggClauseCounts));
  
+               /* Preprocess GROUP BY clause, if any */
+               if (parse->groupClause)
+                       preprocess_groupclause(root);
+               numGroupCols = list_length(parse->groupClause);
+
                 /* Preprocess targetlist */
                 tlist = preprocess_targetlist(root, tlist);
  
@@ -849,17 +871,29 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
                  * Calculate pathkeys that represent grouping/ordering requirements.
                  * Stash them in PlannerInfo so that query_planner can canonicalize
                  * them after EquivalenceClasses have been formed.
+                *
+                * Note: for the moment, DISTINCT is always implemented via sort/uniq,
+                * and we set the sort_pathkeys to be the more rigorous of the
+                * DISTINCT and ORDER BY requirements.  This should be changed
+                * someday, but DISTINCT ON is a bit of a problem ...
                  */
                 root->group_pathkeys =
                         make_pathkeys_for_sortclauses(root,
                                                                                   parse->groupClause,
                                                                                   tlist,
                                                                                   false);
-               root->sort_pathkeys =
-                       make_pathkeys_for_sortclauses(root,
-                                                                                 parse->sortClause,
-                                                                                 tlist,
-                                                                                 false);
+               if (list_length(parse->distinctClause) > list_length(parse->sortClause))
+                       root->sort_pathkeys =
+                               make_pathkeys_for_sortclauses(root,
+                                                                                         parse->distinctClause,
+                                                                                         tlist,
+                                                                                         false);
+               else
+                       root->sort_pathkeys =
+                               make_pathkeys_for_sortclauses(root,
+                                                                                         parse->sortClause,
+                                                                                         tlist,
+                                                                                         false);
  
                 /*
                  * Will need actual number of aggregates for estimating costs.
@@ -888,9 +922,9 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
                  * by ORDER BY --- but that might just leave us failing to exploit an
                  * available sort order at all. Needs more thought...)
                  */
-               if (parse->groupClause)
+               if (root->group_pathkeys)
                         root->query_pathkeys = root->group_pathkeys;
-               else if (parse->sortClause)
+               else if (root->sort_pathkeys)
                         root->query_pathkeys = root->sort_pathkeys;
                 else
                         root->query_pathkeys = NIL;
@@ -957,9 +991,23 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
                          * Normal case --- create a plan according to query_planner's
                          * results.
                          */
+                       bool    need_sort_for_grouping = false;
+
                         result_plan = create_plan(root, best_path);
                         current_pathkeys = best_path->pathkeys;
  
+                       /* Detect if we'll need an explicit sort for grouping */
+                       if (parse->groupClause && !use_hashed_grouping &&
+                               !pathkeys_contained_in(group_pathkeys, current_pathkeys))
+                       {
+                               need_sort_for_grouping = true;
+                               /*
+                                * Always override query_planner's tlist, so that we don't
+                                * sort useless data from a "physical" tlist.
+                                */
+                               need_tlist_eval = true;
+                       }
+
                         /*
                          * create_plan() returns a plan with just a "flat" tlist of
                          * required Vars.  Usually we need to insert the sub_tlist as the
@@ -1054,8 +1102,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
  
                                 if (parse->groupClause)
                                 {
-                                       if (!pathkeys_contained_in(group_pathkeys,
-                                                                                          current_pathkeys))
+                                       if (need_sort_for_grouping)
                                         {
                                                 result_plan = (Plan *)
                                                         make_sort_from_groupcols(root,
@@ -1098,7 +1145,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
                                  * Add an explicit sort if we couldn't make the path come out
                                  * the way the GROUP node needs it.
                                  */
-                               if (!pathkeys_contained_in(group_pathkeys, current_pathkeys))
+                               if (need_sort_for_grouping)
                                 {
                                         result_plan = (Plan *)
                                                 make_sort_from_groupcols(root,
@@ -1144,7 +1191,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
          * If we were not able to make the plan come out in the right order, add
          * an explicit sort step.
          */
-       if (parse->sortClause)
+       if (sort_pathkeys)
         {
                 if (!pathkeys_contained_in(sort_pathkeys, current_pathkeys))
                 {
@@ -1435,6 +1482,88 @@ preprocess_limit(PlannerInfo *root, double tuple_fraction,
         return tuple_fraction;
  }
  
+
+/*
+ * preprocess_groupclause - do preparatory work on GROUP BY clause
+ *
+ * The idea here is to adjust the ordering of the GROUP BY elements
+ * (which in itself is semantically insignificant) to match ORDER BY,
+ * thereby allowing a single sort operation to both implement the ORDER BY
+ * requirement and set up for a Unique step that implements GROUP BY.
+ *
+ * In principle it might be interesting to consider other orderings of the
+ * GROUP BY elements, which could match the sort ordering of other
+ * possible plans (eg an indexscan) and thereby reduce cost.  We don't
+ * bother with that, though.  Hashed grouping will frequently win anyway.
+ */
+static void
+preprocess_groupclause(PlannerInfo *root)
+{
+       Query      *parse = root->parse;
+       List       *new_groupclause;
+       bool            partial_match;
+       ListCell   *sl;
+       ListCell   *gl;
+
+       /* If no ORDER BY, nothing useful to do here anyway */
+       if (parse->sortClause == NIL)
+               return;
+
+       /*
+        * Scan the ORDER BY clause and construct a list of matching GROUP BY
+        * items, but only as far as we can make a matching prefix.
+        *
+        * This code assumes that the sortClause contains no duplicate items.
+        */
+       new_groupclause = NIL;
+       foreach(sl, parse->sortClause)
+       {
+               SortGroupClause *sc = (SortGroupClause *) lfirst(sl);
+
+               foreach(gl, parse->groupClause)
+               {
+                       SortGroupClause *gc = (SortGroupClause *) lfirst(gl);
+
+                       if (equal(gc, sc))
+                       {
+                               new_groupclause = lappend(new_groupclause, gc);
+                               break;
+                       }
+               }
+               if (gl == NULL)
+                       break;                          /* no match, so stop scanning */
+       }
+
+       /* Did we match all of the ORDER BY list, or just some of it? */
+       partial_match = (sl != NULL);
+
+       /* If no match at all, no point in reordering GROUP BY */
+       if (new_groupclause == NIL)
+               return;
+
+       /*
+        * Add any remaining GROUP BY items to the new list, but only if we
+        * were able to make a complete match.  In other words, we only
+        * rearrange the GROUP BY list if the result is that one list is a
+        * prefix of the other --- otherwise there's no possibility of a
+        * common sort.
+        */
+       foreach(gl, parse->groupClause)
+       {
+               SortGroupClause *gc = (SortGroupClause *) lfirst(gl);
+
+               if (list_member_ptr(new_groupclause, gc))
+                       continue;                       /* it matched an ORDER BY item */
+               if (partial_match)
+                       return;                         /* give up, no common sort possible */
+               new_groupclause = lappend(new_groupclause, gc);
+       }
+
+       /* Success --- install the rearranged GROUP BY list */
+       Assert(list_length(parse->groupClause) == list_length(new_groupclause));
+       parse->groupClause = new_groupclause;
+}
+
  /*
   * extract_grouping_ops - make an array of the equality operator OIDs
   *             for the GROUP BY clause
@@ -1451,12 +1580,10 @@ extract_grouping_ops(List *groupClause)
  
         foreach(glitem, groupClause)
         {
-               GroupClause *groupcl = (GroupClause *) lfirst(glitem);
+               SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
  
-               groupOperators[colno] = get_equality_op_for_ordering_op(groupcl->sortop);
-               if (!OidIsValid(groupOperators[colno])) /* shouldn't happen */
-                       elog(ERROR, "could not find equality operator for ordering operator %u",
-                                groupcl->sortop);
+               groupOperators[colno] = groupcl->eqop;
+               Assert(OidIsValid(groupOperators[colno]));
                 colno++;
         }
  
@@ -1697,7 +1824,7 @@ make_subplanTargetList(PlannerInfo *root,
  
                 foreach(gl, parse->groupClause)
                 {
-                       GroupClause *grpcl = (GroupClause *) lfirst(gl);
+                       SortGroupClause *grpcl = (SortGroupClause *) lfirst(gl);
                         Node       *groupexpr = get_sortgroupclause_expr(grpcl, tlist);
                         TargetEntry *te = NULL;
                         ListCell   *sl;
@@ -1756,7 +1883,7 @@ locate_grouping_columns(PlannerInfo *root,
  
         foreach(gl, root->parse->groupClause)
         {
-               GroupClause *grpcl = (GroupClause *) lfirst(gl);
+               SortGroupClause *grpcl = (SortGroupClause *) lfirst(gl);
                 Node       *groupexpr = get_sortgroupclause_expr(grpcl, tlist);
                 TargetEntry *te = NULL;
                 ListCell   *sl;