Make GROUP BY work properly for datatypes that only support hashing and not

author Tom Lane <tgl@sss.pgh.pa.us>

Sun, 3 Aug 2008 19:10:52 +0000 (19:10 +0000)

committer Tom Lane <tgl@sss.pgh.pa.us>

Sun, 3 Aug 2008 19:10:52 +0000 (19:10 +0000)
author Tom Lane <tgl@sss.pgh.pa.us>
Sun, 3 Aug 2008 19:10:52 +0000 (19:10 +0000)
committer Tom Lane <tgl@sss.pgh.pa.us>
Sun, 3 Aug 2008 19:10:52 +0000 (19:10 +0000)
diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c

index d25a5509b4cef44517ccc2c012420200a8d92093..5e5da6cda4f1bd6eeff833a97ddd3373830375fa 100644 (file)
--- a/src/backend/optimizer/plan/planmain.c
+++ b/src/backend/optimizer/plan/planmain.c
@@ -14,7 +14,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.107 2008/07/31 22:47:56 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.108 2008/08/03 19:10:52 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -288,8 +288,7 @@ query_planner(PlannerInfo *root, List *tlist,
                  * levels of sort --- and, therefore, certainly need to read all the
                  * tuples --- unless ORDER BY is a subset of GROUP BY.
                  */
-               if (root->group_pathkeys && root->sort_pathkeys &&
-                       !pathkeys_contained_in(root->sort_pathkeys, root->group_pathkeys))
+               if (!pathkeys_contained_in(root->sort_pathkeys, root->group_pathkeys))
                         tuple_fraction = 0.0;
         }
         else if (parse->hasAggs || root->hasHavingQual)
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c

index 735a77ac4e1eef2fed42ec0fb5e48c79cf7aa0a9..40a9bffaf37e3db17aed317e80d69153fc207497 100644 (file)
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.236 2008/08/02 21:32:00 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.237 2008/08/03 19:10:52 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -69,11 +69,12 @@ static double preprocess_limit(PlannerInfo *root,
                                  int64 *offset_est, int64 *count_est);
  static void preprocess_groupclause(PlannerInfo *root);
  static Oid *extract_grouping_ops(List *groupClause);
+static bool grouping_is_sortable(List *groupClause);
+static bool grouping_is_hashable(List *groupClause);
  static bool choose_hashed_grouping(PlannerInfo *root,
                                            double tuple_fraction, double limit_tuples,
                                            Path *cheapest_path, Path *sorted_path,
-                                          Oid *groupOperators, double dNumGroups,
-                                          AggClauseCounts *agg_counts);
+                                          double dNumGroups, AggClauseCounts *agg_counts);
  static List *make_subplanTargetList(PlannerInfo *root, List *tlist,
                                            AttrNumber **groupColIdx, bool *need_tlist_eval);
  static void locate_grouping_columns(PlannerInfo *root,
@@ -839,7 +840,6 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
                 List       *sub_tlist;
                 List       *group_pathkeys;
                 AttrNumber *groupColIdx = NULL;
-               Oid                *groupOperators = NULL;
                 bool            need_tlist_eval = true;
                 QualCost        tlist_cost;
                 Path       *cheapest_path;
@@ -877,11 +877,15 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
                  * DISTINCT and ORDER BY requirements.  This should be changed
                  * someday, but DISTINCT ON is a bit of a problem ...
                  */
-               root->group_pathkeys =
-                       make_pathkeys_for_sortclauses(root,
-                                                                                 parse->groupClause,
-                                                                                 tlist,
-                                                                                 false);
+               if (parse->groupClause && grouping_is_sortable(parse->groupClause))
+                       root->group_pathkeys =
+                               make_pathkeys_for_sortclauses(root,
+                                                                                         parse->groupClause,
+                                                                                         tlist,
+                                                                                         false);
+               else
+                       root->group_pathkeys = NIL;
+
                 if (list_length(parse->distinctClause) > list_length(parse->sortClause))
                         root->sort_pathkeys =
                                 make_pathkeys_for_sortclauses(root,
@@ -915,12 +919,12 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
                 /*
                  * Figure out whether we need a sorted result from query_planner.
                  *
-                * If we have a GROUP BY clause, then we want a result sorted properly
-                * for grouping.  Otherwise, if there is an ORDER BY clause, we want
-                * to sort by the ORDER BY clause.      (Note: if we have both, and ORDER
-                * BY is a superset of GROUP BY, it would be tempting to request sort
-                * by ORDER BY --- but that might just leave us failing to exploit an
-                * available sort order at all. Needs more thought...)
+                * If we have a sortable GROUP BY clause, then we want a result sorted
+                * properly for grouping.  Otherwise, if there is an ORDER BY clause,
+                * we want to sort by the ORDER BY clause. (Note: if we have both, and
+                * ORDER BY is a superset of GROUP BY, it would be tempting to request
+                * sort by ORDER BY --- but that might just leave us failing to
+                * exploit an available sort order at all. Needs more thought...)
                  */
                 if (root->group_pathkeys)
                         root->query_pathkeys = root->group_pathkeys;
@@ -942,17 +946,39 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
                 sort_pathkeys = root->sort_pathkeys;
  
                 /*
-                * If grouping, extract the grouping operators and decide whether we
-                * want to use hashed grouping.
+                * If grouping, decide whether to use sorted or hashed grouping.
                  */
                 if (parse->groupClause)
                 {
-                       groupOperators = extract_grouping_ops(parse->groupClause);
-                       use_hashed_grouping =
-                               choose_hashed_grouping(root, tuple_fraction, limit_tuples,
-                                                                          cheapest_path, sorted_path,
-                                                                          groupOperators, dNumGroups,
-                                                                          &agg_counts);
+                       bool    can_hash;
+                       bool    can_sort;
+
+                       /*
+                        * Executor doesn't support hashed aggregation with DISTINCT
+                        * aggregates.  (Doing so would imply storing *all* the input
+                        * values in the hash table, which seems like a certain loser.)
+                        */
+                       can_hash = (agg_counts.numDistinctAggs == 0 &&
+                                               grouping_is_hashable(parse->groupClause));
+                       can_sort = grouping_is_sortable(parse->groupClause);
+                       if (can_hash && can_sort)
+                       {
+                               /* we have a meaningful choice to make ... */
+                               use_hashed_grouping =
+                                       choose_hashed_grouping(root,
+                                                                                  tuple_fraction, limit_tuples,
+                                                                                  cheapest_path, sorted_path,
+                                                                                  dNumGroups, &agg_counts);
+                       }
+                       else if (can_hash)
+                               use_hashed_grouping = true;
+                       else if (can_sort)
+                               use_hashed_grouping = false;
+                       else
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                                                errmsg("could not implement GROUP BY"),
+                                                errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
  
                         /* Also convert # groups to long int --- but 'ware overflow! */
                         numGroups = (long) Min(dNumGroups, (double) LONG_MAX);
@@ -1088,7 +1114,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
                                                                                                 AGG_HASHED,
                                                                                                 numGroupCols,
                                                                                                 groupColIdx,
-                                                                                               groupOperators,
+                                                                       extract_grouping_ops(parse->groupClause),
                                                                                                 numGroups,
                                                                                                 agg_counts.numAggs,
                                                                                                 result_plan);
@@ -1131,7 +1157,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
                                                                                                 aggstrategy,
                                                                                                 numGroupCols,
                                                                                                 groupColIdx,
-                                                                                               groupOperators,
+                                                                       extract_grouping_ops(parse->groupClause),
                                                                                                 numGroups,
                                                                                                 agg_counts.numAggs,
                                                                                                 result_plan);
@@ -1160,7 +1186,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
                                                                                                   (List *) parse->havingQual,
                                                                                                   numGroupCols,
                                                                                                   groupColIdx,
-                                                                                                 groupOperators,
+                                                                       extract_grouping_ops(parse->groupClause),
                                                                                                   dNumGroups,
                                                                                                   result_plan);
                                 /* The Group node won't change sort ordering */
@@ -1495,6 +1521,9 @@ preprocess_limit(PlannerInfo *root, double tuple_fraction,
   * GROUP BY elements, which could match the sort ordering of other
   * possible plans (eg an indexscan) and thereby reduce cost.  We don't
   * bother with that, though.  Hashed grouping will frequently win anyway.
+ *
+ * Note: we need no comparable processing of the distinctClause because
+ * the parser already enforced that that matches ORDER BY.
   */
  static void
  preprocess_groupclause(PlannerInfo *root)
@@ -1505,7 +1534,7 @@ preprocess_groupclause(PlannerInfo *root)
         ListCell   *sl;
         ListCell   *gl;
  
-       /* If no ORDER BY, nothing useful to do here anyway */
+       /* If no ORDER BY, nothing useful to do here */
         if (parse->sortClause == NIL)
                 return;
  
@@ -1546,7 +1575,8 @@ preprocess_groupclause(PlannerInfo *root)
          * were able to make a complete match.  In other words, we only
          * rearrange the GROUP BY list if the result is that one list is a
          * prefix of the other --- otherwise there's no possibility of a
-        * common sort.
+        * common sort.  Also, give up if there are any non-sortable GROUP BY
+        * items, since then there's no hope anyway.
          */
         foreach(gl, parse->groupClause)
         {
@@ -1556,6 +1586,8 @@ preprocess_groupclause(PlannerInfo *root)
                         continue;                       /* it matched an ORDER BY item */
                 if (partial_match)
                         return;                         /* give up, no common sort possible */
+               if (!OidIsValid(gc->sortop))
+                       return;                         /* give up, GROUP BY can't be sorted */
                 new_groupclause = lappend(new_groupclause, gc);
         }
  
@@ -1566,7 +1598,7 @@ preprocess_groupclause(PlannerInfo *root)
  
  /*
   * extract_grouping_ops - make an array of the equality operator OIDs
- *             for the GROUP BY clause
+ *             for a SortGroupClause list
   */
  static Oid *
  extract_grouping_ops(List *groupClause)
@@ -1590,15 +1622,59 @@ extract_grouping_ops(List *groupClause)
         return groupOperators;
  }
  
+/*
+ * grouping_is_sortable - is it possible to implement grouping list by sorting?
+ *
+ * This is easy since the parser will have included a sortop if one exists.
+ */
+static bool
+grouping_is_sortable(List *groupClause)
+{
+       ListCell   *glitem;
+
+       foreach(glitem, groupClause)
+       {
+               SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
+
+               if (!OidIsValid(groupcl->sortop))
+                       return false;
+       }
+       return true;
+}
+
+/*
+ * grouping_is_hashable - is it possible to implement grouping list by hashing?
+ *
+ * We assume hashing is OK if the equality operators are marked oprcanhash.
+ * (If there isn't actually a supporting hash function, the executor will
+ * complain at runtime; but this is a misdeclaration of the operator, not
+ * a system bug.)
+ */
+static bool
+grouping_is_hashable(List *groupClause)
+{
+       ListCell   *glitem;
+
+       foreach(glitem, groupClause)
+       {
+               SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
+
+               if (!op_hashjoinable(groupcl->eqop))
+                       return false;
+       }
+       return true;
+}
+
  /*
   * choose_hashed_grouping - should we use hashed grouping?
+ *
+ * Note: this is only applied when both alternatives are actually feasible.
   */
  static bool
  choose_hashed_grouping(PlannerInfo *root,
                                            double tuple_fraction, double limit_tuples,
                                            Path *cheapest_path, Path *sorted_path,
-                                          Oid *groupOperators, double dNumGroups,
-                                          AggClauseCounts *agg_counts)
+                                          double dNumGroups, AggClauseCounts *agg_counts)
  {
         int                     numGroupCols = list_length(root->parse->groupClause);
         double          cheapest_path_rows;
@@ -1607,27 +1683,10 @@ choose_hashed_grouping(PlannerInfo *root,
         List       *current_pathkeys;
         Path            hashed_p;
         Path            sorted_p;
-       int                     i;
  
-       /*
-        * Check can't-do-it conditions, including whether the grouping operators
-        * are hashjoinable.  (We assume hashing is OK if they are marked
-        * oprcanhash.  If there isn't actually a supporting hash function, the
-        * executor will complain at runtime.)
-        *
-        * Executor doesn't support hashed aggregation with DISTINCT aggregates.
-        * (Doing so would imply storing *all* the input values in the hash table,
-        * which seems like a certain loser.)
-        */
+       /* Prefer sorting when enable_hashagg is off */
         if (!enable_hashagg)
                 return false;
-       if (agg_counts->numDistinctAggs != 0)
-               return false;
-       for (i = 0; i < numGroupCols; i++)
-       {
-               if (!op_hashjoinable(groupOperators[i]))
-                       return false;
-       }
  
         /*
          * Don't do it if it doesn't look like the hashtable will fit into
diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c

index 0a3b544b10e6e73f90d8c5b06a74321d5941f580..76e59c82d6edee77066ce70064769b29999ae7bd 100644 (file)
--- a/src/backend/parser/parse_clause.c
+++ b/src/backend/parser/parse_clause.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.172 2008/08/02 21:32:00 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.173 2008/08/03 19:10:52 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -1351,15 +1351,11 @@ transformGroupClause(ParseState *pstate, List *grouplist,
                 /*
                  * If no match in ORDER BY, just add it to the result using
                  * default sort/group semantics.
-                *
-                * XXX for now, the planner requires groupClause to be sortable,
-                * so we have to insist on that here.
                  */
                 if (!found)
                         result = addTargetToGroupList(pstate, tle,
                                                                                   result, *targetlist,
-                                                                                 true, /* XXX for now */
-                                                                                 true);
+                                                                                 false, true);
         }
  
         return result;
author	Tom Lane <tgl@sss.pgh.pa.us>
	Sun, 3 Aug 2008 19:10:52 +0000 (19:10 +0000)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Sun, 3 Aug 2008 19:10:52 +0000 (19:10 +0000)
src/backend/optimizer/plan/planmain.c		patch \| blob \| history
src/backend/optimizer/plan/planner.c		patch \| blob \| history
src/backend/parser/parse_clause.c		patch \| blob \| history