]> granicus.if.org Git - postgresql/commitdiff
Make GROUP BY work properly for datatypes that only support hashing and not
authorTom Lane <tgl@sss.pgh.pa.us>
Sun, 3 Aug 2008 19:10:52 +0000 (19:10 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Sun, 3 Aug 2008 19:10:52 +0000 (19:10 +0000)
sorting.  The infrastructure for this was all in place already; it's only
necessary to fix the planner to not assume that sorting is always an available
option.

src/backend/optimizer/plan/planmain.c
src/backend/optimizer/plan/planner.c
src/backend/parser/parse_clause.c

index d25a5509b4cef44517ccc2c012420200a8d92093..5e5da6cda4f1bd6eeff833a97ddd3373830375fa 100644 (file)
@@ -14,7 +14,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.107 2008/07/31 22:47:56 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.108 2008/08/03 19:10:52 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -288,8 +288,7 @@ query_planner(PlannerInfo *root, List *tlist,
                 * levels of sort --- and, therefore, certainly need to read all the
                 * tuples --- unless ORDER BY is a subset of GROUP BY.
                 */
-               if (root->group_pathkeys && root->sort_pathkeys &&
-                       !pathkeys_contained_in(root->sort_pathkeys, root->group_pathkeys))
+               if (!pathkeys_contained_in(root->sort_pathkeys, root->group_pathkeys))
                        tuple_fraction = 0.0;
        }
        else if (parse->hasAggs || root->hasHavingQual)
index 735a77ac4e1eef2fed42ec0fb5e48c79cf7aa0a9..40a9bffaf37e3db17aed317e80d69153fc207497 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.236 2008/08/02 21:32:00 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.237 2008/08/03 19:10:52 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -69,11 +69,12 @@ static double preprocess_limit(PlannerInfo *root,
                                 int64 *offset_est, int64 *count_est);
 static void preprocess_groupclause(PlannerInfo *root);
 static Oid *extract_grouping_ops(List *groupClause);
+static bool grouping_is_sortable(List *groupClause);
+static bool grouping_is_hashable(List *groupClause);
 static bool choose_hashed_grouping(PlannerInfo *root,
                                           double tuple_fraction, double limit_tuples,
                                           Path *cheapest_path, Path *sorted_path,
-                                          Oid *groupOperators, double dNumGroups,
-                                          AggClauseCounts *agg_counts);
+                                          double dNumGroups, AggClauseCounts *agg_counts);
 static List *make_subplanTargetList(PlannerInfo *root, List *tlist,
                                           AttrNumber **groupColIdx, bool *need_tlist_eval);
 static void locate_grouping_columns(PlannerInfo *root,
@@ -839,7 +840,6 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
                List       *sub_tlist;
                List       *group_pathkeys;
                AttrNumber *groupColIdx = NULL;
-               Oid                *groupOperators = NULL;
                bool            need_tlist_eval = true;
                QualCost        tlist_cost;
                Path       *cheapest_path;
@@ -877,11 +877,15 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
                 * DISTINCT and ORDER BY requirements.  This should be changed
                 * someday, but DISTINCT ON is a bit of a problem ...
                 */
-               root->group_pathkeys =
-                       make_pathkeys_for_sortclauses(root,
-                                                                                 parse->groupClause,
-                                                                                 tlist,
-                                                                                 false);
+               if (parse->groupClause && grouping_is_sortable(parse->groupClause))
+                       root->group_pathkeys =
+                               make_pathkeys_for_sortclauses(root,
+                                                                                         parse->groupClause,
+                                                                                         tlist,
+                                                                                         false);
+               else
+                       root->group_pathkeys = NIL;
+
                if (list_length(parse->distinctClause) > list_length(parse->sortClause))
                        root->sort_pathkeys =
                                make_pathkeys_for_sortclauses(root,
@@ -915,12 +919,12 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
                /*
                 * Figure out whether we need a sorted result from query_planner.
                 *
-                * If we have a GROUP BY clause, then we want a result sorted properly
-                * for grouping.  Otherwise, if there is an ORDER BY clause, we want
-                * to sort by the ORDER BY clause.      (Note: if we have both, and ORDER
-                * BY is a superset of GROUP BY, it would be tempting to request sort
-                * by ORDER BY --- but that might just leave us failing to exploit an
-                * available sort order at all. Needs more thought...)
+                * If we have a sortable GROUP BY clause, then we want a result sorted
+                * properly for grouping.  Otherwise, if there is an ORDER BY clause,
+                * we want to sort by the ORDER BY clause. (Note: if we have both, and
+                * ORDER BY is a superset of GROUP BY, it would be tempting to request
+                * sort by ORDER BY --- but that might just leave us failing to
+                * exploit an available sort order at all. Needs more thought...)
                 */
                if (root->group_pathkeys)
                        root->query_pathkeys = root->group_pathkeys;
@@ -942,17 +946,39 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
                sort_pathkeys = root->sort_pathkeys;
 
                /*
-                * If grouping, extract the grouping operators and decide whether we
-                * want to use hashed grouping.
+                * If grouping, decide whether to use sorted or hashed grouping.
                 */
                if (parse->groupClause)
                {
-                       groupOperators = extract_grouping_ops(parse->groupClause);
-                       use_hashed_grouping =
-                               choose_hashed_grouping(root, tuple_fraction, limit_tuples,
-                                                                          cheapest_path, sorted_path,
-                                                                          groupOperators, dNumGroups,
-                                                                          &agg_counts);
+                       bool    can_hash;
+                       bool    can_sort;
+
+                       /*
+                        * Executor doesn't support hashed aggregation with DISTINCT
+                        * aggregates.  (Doing so would imply storing *all* the input
+                        * values in the hash table, which seems like a certain loser.)
+                        */
+                       can_hash = (agg_counts.numDistinctAggs == 0 &&
+                                               grouping_is_hashable(parse->groupClause));
+                       can_sort = grouping_is_sortable(parse->groupClause);
+                       if (can_hash && can_sort)
+                       {
+                               /* we have a meaningful choice to make ... */
+                               use_hashed_grouping =
+                                       choose_hashed_grouping(root,
+                                                                                  tuple_fraction, limit_tuples,
+                                                                                  cheapest_path, sorted_path,
+                                                                                  dNumGroups, &agg_counts);
+                       }
+                       else if (can_hash)
+                               use_hashed_grouping = true;
+                       else if (can_sort)
+                               use_hashed_grouping = false;
+                       else
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                                                errmsg("could not implement GROUP BY"),
+                                                errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
 
                        /* Also convert # groups to long int --- but 'ware overflow! */
                        numGroups = (long) Min(dNumGroups, (double) LONG_MAX);
@@ -1088,7 +1114,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
                                                                                                AGG_HASHED,
                                                                                                numGroupCols,
                                                                                                groupColIdx,
-                                                                                               groupOperators,
+                                                                       extract_grouping_ops(parse->groupClause),
                                                                                                numGroups,
                                                                                                agg_counts.numAggs,
                                                                                                result_plan);
@@ -1131,7 +1157,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
                                                                                                aggstrategy,
                                                                                                numGroupCols,
                                                                                                groupColIdx,
-                                                                                               groupOperators,
+                                                                       extract_grouping_ops(parse->groupClause),
                                                                                                numGroups,
                                                                                                agg_counts.numAggs,
                                                                                                result_plan);
@@ -1160,7 +1186,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
                                                                                                  (List *) parse->havingQual,
                                                                                                  numGroupCols,
                                                                                                  groupColIdx,
-                                                                                                 groupOperators,
+                                                                       extract_grouping_ops(parse->groupClause),
                                                                                                  dNumGroups,
                                                                                                  result_plan);
                                /* The Group node won't change sort ordering */
@@ -1495,6 +1521,9 @@ preprocess_limit(PlannerInfo *root, double tuple_fraction,
  * GROUP BY elements, which could match the sort ordering of other
  * possible plans (eg an indexscan) and thereby reduce cost.  We don't
  * bother with that, though.  Hashed grouping will frequently win anyway.
+ *
+ * Note: we need no comparable processing of the distinctClause because
+ * the parser already enforced that that matches ORDER BY.
  */
 static void
 preprocess_groupclause(PlannerInfo *root)
@@ -1505,7 +1534,7 @@ preprocess_groupclause(PlannerInfo *root)
        ListCell   *sl;
        ListCell   *gl;
 
-       /* If no ORDER BY, nothing useful to do here anyway */
+       /* If no ORDER BY, nothing useful to do here */
        if (parse->sortClause == NIL)
                return;
 
@@ -1546,7 +1575,8 @@ preprocess_groupclause(PlannerInfo *root)
         * were able to make a complete match.  In other words, we only
         * rearrange the GROUP BY list if the result is that one list is a
         * prefix of the other --- otherwise there's no possibility of a
-        * common sort.
+        * common sort.  Also, give up if there are any non-sortable GROUP BY
+        * items, since then there's no hope anyway.
         */
        foreach(gl, parse->groupClause)
        {
@@ -1556,6 +1586,8 @@ preprocess_groupclause(PlannerInfo *root)
                        continue;                       /* it matched an ORDER BY item */
                if (partial_match)
                        return;                         /* give up, no common sort possible */
+               if (!OidIsValid(gc->sortop))
+                       return;                         /* give up, GROUP BY can't be sorted */
                new_groupclause = lappend(new_groupclause, gc);
        }
 
@@ -1566,7 +1598,7 @@ preprocess_groupclause(PlannerInfo *root)
 
 /*
  * extract_grouping_ops - make an array of the equality operator OIDs
- *             for the GROUP BY clause
+ *             for a SortGroupClause list
  */
 static Oid *
 extract_grouping_ops(List *groupClause)
@@ -1590,15 +1622,59 @@ extract_grouping_ops(List *groupClause)
        return groupOperators;
 }
 
+/*
+ * grouping_is_sortable - is it possible to implement grouping list by sorting?
+ *
+ * This is easy since the parser will have included a sortop if one exists.
+ */
+static bool
+grouping_is_sortable(List *groupClause)
+{
+       ListCell   *glitem;
+
+       foreach(glitem, groupClause)
+       {
+               SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
+
+               if (!OidIsValid(groupcl->sortop))
+                       return false;
+       }
+       return true;
+}
+
+/*
+ * grouping_is_hashable - is it possible to implement grouping list by hashing?
+ *
+ * We assume hashing is OK if the equality operators are marked oprcanhash.
+ * (If there isn't actually a supporting hash function, the executor will
+ * complain at runtime; but this is a misdeclaration of the operator, not
+ * a system bug.)
+ */
+static bool
+grouping_is_hashable(List *groupClause)
+{
+       ListCell   *glitem;
+
+       foreach(glitem, groupClause)
+       {
+               SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
+
+               if (!op_hashjoinable(groupcl->eqop))
+                       return false;
+       }
+       return true;
+}
+
 /*
  * choose_hashed_grouping - should we use hashed grouping?
+ *
+ * Note: this is only applied when both alternatives are actually feasible.
  */
 static bool
 choose_hashed_grouping(PlannerInfo *root,
                                           double tuple_fraction, double limit_tuples,
                                           Path *cheapest_path, Path *sorted_path,
-                                          Oid *groupOperators, double dNumGroups,
-                                          AggClauseCounts *agg_counts)
+                                          double dNumGroups, AggClauseCounts *agg_counts)
 {
        int                     numGroupCols = list_length(root->parse->groupClause);
        double          cheapest_path_rows;
@@ -1607,27 +1683,10 @@ choose_hashed_grouping(PlannerInfo *root,
        List       *current_pathkeys;
        Path            hashed_p;
        Path            sorted_p;
-       int                     i;
 
-       /*
-        * Check can't-do-it conditions, including whether the grouping operators
-        * are hashjoinable.  (We assume hashing is OK if they are marked
-        * oprcanhash.  If there isn't actually a supporting hash function, the
-        * executor will complain at runtime.)
-        *
-        * Executor doesn't support hashed aggregation with DISTINCT aggregates.
-        * (Doing so would imply storing *all* the input values in the hash table,
-        * which seems like a certain loser.)
-        */
+       /* Prefer sorting when enable_hashagg is off */
        if (!enable_hashagg)
                return false;
-       if (agg_counts->numDistinctAggs != 0)
-               return false;
-       for (i = 0; i < numGroupCols; i++)
-       {
-               if (!op_hashjoinable(groupOperators[i]))
-                       return false;
-       }
 
        /*
         * Don't do it if it doesn't look like the hashtable will fit into
index 0a3b544b10e6e73f90d8c5b06a74321d5941f580..76e59c82d6edee77066ce70064769b29999ae7bd 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.172 2008/08/02 21:32:00 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.173 2008/08/03 19:10:52 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1351,15 +1351,11 @@ transformGroupClause(ParseState *pstate, List *grouplist,
                /*
                 * If no match in ORDER BY, just add it to the result using
                 * default sort/group semantics.
-                *
-                * XXX for now, the planner requires groupClause to be sortable,
-                * so we have to insist on that here.
                 */
                if (!found)
                        result = addTargetToGroupList(pstate, tle,
                                                                                  result, *targetlist,
-                                                                                 true, /* XXX for now */
-                                                                                 true);
+                                                                                 false, true);
        }
 
        return result;