Improve SELECT DISTINCT to consider hash aggregation, as well as sort/uniq,

author Tom Lane <tgl@sss.pgh.pa.us>

Tue, 5 Aug 2008 02:43:18 +0000 (02:43 +0000)

committer Tom Lane <tgl@sss.pgh.pa.us>

Tue, 5 Aug 2008 02:43:18 +0000 (02:43 +0000)
author Tom Lane <tgl@sss.pgh.pa.us>
Tue, 5 Aug 2008 02:43:18 +0000 (02:43 +0000)
committer Tom Lane <tgl@sss.pgh.pa.us>
Tue, 5 Aug 2008 02:43:18 +0000 (02:43 +0000)
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c

index 66630ae6612a8acaabbfe43c6f88845f323e2940..408b9b2a757e24e08b1ff44ba473c31f0e545217 100644 (file)
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.329 2008/08/02 21:31:59 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.330 2008/08/05 02:43:17 tgl Exp $
   *
   * NOTES
   *       Every node type that can appear in stored rules' parsetrees *must*
@@ -1334,6 +1334,7 @@ _outPlannerInfo(StringInfo str, PlannerInfo *node)
         WRITE_NODE_FIELD(append_rel_list);
         WRITE_NODE_FIELD(query_pathkeys);
         WRITE_NODE_FIELD(group_pathkeys);
+       WRITE_NODE_FIELD(distinct_pathkeys);
         WRITE_NODE_FIELD(sort_pathkeys);
         WRITE_FLOAT_FIELD(total_table_pages, "%.0f");
         WRITE_FLOAT_FIELD(tuple_fraction, "%.4f");
diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c

index 5e5da6cda4f1bd6eeff833a97ddd3373830375fa..081a7c9cebd1f37615cd42e2a49d3d29b706e42e 100644 (file)
--- a/src/backend/optimizer/plan/planmain.c
+++ b/src/backend/optimizer/plan/planmain.c
@@ -14,7 +14,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.108 2008/08/03 19:10:52 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.109 2008/08/05 02:43:17 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -66,9 +66,9 @@
   * PlannerInfo field and not a passed parameter is that the low-level routines
   * in indxpath.c need to see it.)
   *
- * Note: the PlannerInfo node also includes group_pathkeys and sort_pathkeys,
- * which like query_pathkeys need to be canonicalized once the info is
- * available.
+ * Note: the PlannerInfo node also includes group_pathkeys, distinct_pathkeys,
+ * and sort_pathkeys, which like query_pathkeys need to be canonicalized once
+ * the info is available.
   *
   * tuple_fraction is interpreted as follows:
   *       0: expect all tuples to be retrieved (normal case)
@@ -120,6 +120,8 @@ query_planner(PlannerInfo *root, List *tlist,
                                                                                                          root->query_pathkeys);
                 root->group_pathkeys = canonicalize_pathkeys(root,
                                                                                                          root->group_pathkeys);
+               root->distinct_pathkeys = canonicalize_pathkeys(root,
+                                                                                                       root->distinct_pathkeys);
                 root->sort_pathkeys = canonicalize_pathkeys(root,
                                                                                                         root->sort_pathkeys);
                 return;
@@ -237,10 +239,12 @@ query_planner(PlannerInfo *root, List *tlist,
         /*
          * We have completed merging equivalence sets, so it's now possible to
          * convert the requested query_pathkeys to canonical form.      Also
-        * canonicalize the groupClause and sortClause pathkeys for use later.
+        * canonicalize the groupClause, distinctClause and sortClause pathkeys
+        * for use later.
          */
         root->query_pathkeys = canonicalize_pathkeys(root, root->query_pathkeys);
         root->group_pathkeys = canonicalize_pathkeys(root, root->group_pathkeys);
+       root->distinct_pathkeys = canonicalize_pathkeys(root, root->distinct_pathkeys);
         root->sort_pathkeys = canonicalize_pathkeys(root, root->sort_pathkeys);
  
         /*
@@ -286,9 +290,11 @@ query_planner(PlannerInfo *root, List *tlist,
                 /*
                  * If both GROUP BY and ORDER BY are specified, we will need two
                  * levels of sort --- and, therefore, certainly need to read all the
-                * tuples --- unless ORDER BY is a subset of GROUP BY.
+                * tuples --- unless ORDER BY is a subset of GROUP BY.  Likewise if
+                * we have both DISTINCT and GROUP BY.
                  */
-               if (!pathkeys_contained_in(root->sort_pathkeys, root->group_pathkeys))
+               if (!pathkeys_contained_in(root->sort_pathkeys, root->group_pathkeys) ||
+                       !pathkeys_contained_in(root->distinct_pathkeys, root->group_pathkeys))
                         tuple_fraction = 0.0;
         }
         else if (parse->hasAggs || root->hasHavingQual)
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c

index 40a9bffaf37e3db17aed317e80d69153fc207497..d8c6942e250aa49d426f51cae270fe38ee40859e 100644 (file)
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.237 2008/08/03 19:10:52 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.238 2008/08/05 02:43:17 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -69,12 +69,17 @@ static double preprocess_limit(PlannerInfo *root,
                                  int64 *offset_est, int64 *count_est);
  static void preprocess_groupclause(PlannerInfo *root);
  static Oid *extract_grouping_ops(List *groupClause);
+static AttrNumber *extract_grouping_cols(List *groupClause, List *tlist);
  static bool grouping_is_sortable(List *groupClause);
  static bool grouping_is_hashable(List *groupClause);
  static bool choose_hashed_grouping(PlannerInfo *root,
                                            double tuple_fraction, double limit_tuples,
                                            Path *cheapest_path, Path *sorted_path,
                                            double dNumGroups, AggClauseCounts *agg_counts);
+static bool choose_hashed_distinct(PlannerInfo *root,
+                                          Plan *input_plan, List *input_pathkeys,
+                                          double tuple_fraction, double limit_tuples,
+                                          double dNumDistinctRows);
  static List *make_subplanTargetList(PlannerInfo *root, List *tlist,
                                            AttrNumber **groupColIdx, bool *need_tlist_eval);
  static void locate_grouping_columns(PlannerInfo *root,
@@ -757,7 +762,6 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
         double          limit_tuples = -1.0;
         Plan       *result_plan;
         List       *current_pathkeys;
-       List       *sort_pathkeys;
         double          dNumGroups = 0;
  
         /* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */
@@ -829,16 +833,15 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
                  * Calculate pathkeys that represent result ordering requirements
                  */
                 Assert(parse->distinctClause == NIL);
-               sort_pathkeys = make_pathkeys_for_sortclauses(root,
-                                                                                                         parse->sortClause,
-                                                                                                         tlist,
-                                                                                                         true);
+               root->sort_pathkeys = make_pathkeys_for_sortclauses(root,
+                                                                                                                       parse->sortClause,
+                                                                                                                       tlist,
+                                                                                                                       true);
         }
         else
         {
                 /* No set operations, do regular planning */
                 List       *sub_tlist;
-               List       *group_pathkeys;
                 AttrNumber *groupColIdx = NULL;
                 bool            need_tlist_eval = true;
                 QualCost        tlist_cost;
@@ -870,14 +873,12 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
                 /*
                  * Calculate pathkeys that represent grouping/ordering requirements.
                  * Stash them in PlannerInfo so that query_planner can canonicalize
-                * them after EquivalenceClasses have been formed.
-                *
-                * Note: for the moment, DISTINCT is always implemented via sort/uniq,
-                * and we set the sort_pathkeys to be the more rigorous of the
-                * DISTINCT and ORDER BY requirements.  This should be changed
-                * someday, but DISTINCT ON is a bit of a problem ...
+                * them after EquivalenceClasses have been formed.  The sortClause
+                * is certainly sort-able, but GROUP BY and DISTINCT might not be,
+                * in which case we just leave their pathkeys empty.
                  */
-               if (parse->groupClause && grouping_is_sortable(parse->groupClause))
+               if (parse->groupClause &&
+                       grouping_is_sortable(parse->groupClause))
                         root->group_pathkeys =
                                 make_pathkeys_for_sortclauses(root,
                                                                                           parse->groupClause,
@@ -886,18 +887,21 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
                 else
                         root->group_pathkeys = NIL;
  
-               if (list_length(parse->distinctClause) > list_length(parse->sortClause))
-                       root->sort_pathkeys =
+               if (parse->distinctClause &&
+                       grouping_is_sortable(parse->distinctClause))
+                       root->distinct_pathkeys =
                                 make_pathkeys_for_sortclauses(root,
                                                                                           parse->distinctClause,
                                                                                           tlist,
                                                                                           false);
                 else
-                       root->sort_pathkeys =
-                               make_pathkeys_for_sortclauses(root,
-                                                                                         parse->sortClause,
-                                                                                         tlist,
-                                                                                         false);
+                       root->distinct_pathkeys = NIL;
+
+               root->sort_pathkeys =
+                       make_pathkeys_for_sortclauses(root,
+                                                                                 parse->sortClause,
+                                                                                 tlist,
+                                                                                 false);
  
                 /*
                  * Will need actual number of aggregates for estimating costs.
@@ -917,17 +921,27 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
                 }
  
                 /*
-                * Figure out whether we need a sorted result from query_planner.
+                * Figure out whether we want a sorted result from query_planner.
                  *
                  * If we have a sortable GROUP BY clause, then we want a result sorted
-                * properly for grouping.  Otherwise, if there is an ORDER BY clause,
-                * we want to sort by the ORDER BY clause. (Note: if we have both, and
-                * ORDER BY is a superset of GROUP BY, it would be tempting to request
-                * sort by ORDER BY --- but that might just leave us failing to
-                * exploit an available sort order at all. Needs more thought...)
+                * properly for grouping.  Otherwise, if there's a sortable DISTINCT
+                * clause that's more rigorous than the ORDER BY clause, we try to
+                * produce output that's sufficiently well sorted for the DISTINCT.
+                * Otherwise, if there is an ORDER BY clause, we want to sort by the
+                * ORDER BY clause.
+                *
+                * Note: if we have both ORDER BY and GROUP BY, and ORDER BY is a
+                * superset of GROUP BY, it would be tempting to request sort by ORDER
+                * BY --- but that might just leave us failing to exploit an available
+                * sort order at all.  Needs more thought.  The choice for DISTINCT
+                * versus ORDER BY is much easier, since we know that the parser
+                * ensured that one is a superset of the other.
                  */
                 if (root->group_pathkeys)
                         root->query_pathkeys = root->group_pathkeys;
+               else if (list_length(root->distinct_pathkeys) >
+                                list_length(root->sort_pathkeys))
+                       root->query_pathkeys = root->distinct_pathkeys;
                 else if (root->sort_pathkeys)
                         root->query_pathkeys = root->sort_pathkeys;
                 else
@@ -942,9 +956,6 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
                 query_planner(root, sub_tlist, tuple_fraction, limit_tuples,
                                           &cheapest_path, &sorted_path, &dNumGroups);
  
-               group_pathkeys = root->group_pathkeys;
-               sort_pathkeys = root->sort_pathkeys;
-
                 /*
                  * If grouping, decide whether to use sorted or hashed grouping.
                  */
@@ -1024,7 +1035,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
  
                         /* Detect if we'll need an explicit sort for grouping */
                         if (parse->groupClause && !use_hashed_grouping &&
-                               !pathkeys_contained_in(group_pathkeys, current_pathkeys))
+                               !pathkeys_contained_in(root->group_pathkeys, current_pathkeys))
                         {
                                 need_sort_for_grouping = true;
                                 /*
@@ -1135,7 +1146,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
                                                                                                          parse->groupClause,
                                                                                                          groupColIdx,
                                                                                                          result_plan);
-                                               current_pathkeys = group_pathkeys;
+                                               current_pathkeys = root->group_pathkeys;
                                         }
                                         aggstrategy = AGG_SORTED;
  
@@ -1178,7 +1189,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
                                                                                                  parse->groupClause,
                                                                                                  groupColIdx,
                                                                                                  result_plan);
-                                       current_pathkeys = group_pathkeys;
+                                       current_pathkeys = root->group_pathkeys;
                                 }
  
                                 result_plan = (Plan *) make_group(root,
@@ -1214,35 +1225,129 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
         }                                                       /* end of if (setOperations) */
  
         /*
-        * If we were not able to make the plan come out in the right order, add
-        * an explicit sort step.
+        * If there is a DISTINCT clause, add the necessary node(s).
          */
-       if (sort_pathkeys)
+       if (parse->distinctClause)
         {
-               if (!pathkeys_contained_in(sort_pathkeys, current_pathkeys))
+               double  dNumDistinctRows;
+               long    numDistinctRows;
+               bool    use_hashed_distinct;
+               bool    can_sort;
+               bool    can_hash;
+
+               /*
+                * If there was grouping or aggregation, use the current number of
+                * rows as the estimated number of DISTINCT rows (ie, assume the
+                * result was already mostly unique).  If not, use the number of
+                * distinct-groups calculated by query_planner.
+                */
+               if (parse->groupClause || root->hasHavingQual || parse->hasAggs)
+                       dNumDistinctRows = result_plan->plan_rows;
+               else
+                       dNumDistinctRows = dNumGroups;
+
+               /* Also convert to long int --- but 'ware overflow! */
+               numDistinctRows = (long) Min(dNumDistinctRows, (double) LONG_MAX);
+
+               /*
+                * If we have a sortable DISTINCT ON clause, we always use sorting.
+                * This enforces the expected behavior of DISTINCT ON.
+                */
+               can_sort = grouping_is_sortable(parse->distinctClause);
+               if (can_sort && parse->hasDistinctOn)
+                       use_hashed_distinct = false;
+               else
                 {
-                       result_plan = (Plan *) make_sort_from_pathkeys(root,
-                                                                                                                  result_plan,
-                                                                                                                  sort_pathkeys,
-                                                                                                                  limit_tuples);
-                       current_pathkeys = sort_pathkeys;
+                       can_hash = grouping_is_hashable(parse->distinctClause);
+                       if (can_hash && can_sort)
+                       {
+                               /* we have a meaningful choice to make ... */
+                               use_hashed_distinct =
+                                       choose_hashed_distinct(root,
+                                                                                  result_plan, current_pathkeys,
+                                                                                  tuple_fraction, limit_tuples,
+                                                                                  dNumDistinctRows);
+                       }
+                       else if (can_hash)
+                               use_hashed_distinct = true;
+                       else if (can_sort)
+                               use_hashed_distinct = false;
+                       else
+                       {
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                                                errmsg("could not implement DISTINCT"),
+                                                errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
+                               use_hashed_distinct = false; /* keep compiler quiet */
+                       }
+               }
+
+               if (use_hashed_distinct)
+               {
+                       /* Hashed aggregate plan --- no sort needed */
+                       result_plan = (Plan *) make_agg(root,
+                                                                                       result_plan->targetlist,
+                                                                                       NIL,
+                                                                                       AGG_HASHED,
+                                                                                       list_length(parse->distinctClause),
+                                                                                       extract_grouping_cols(parse->distinctClause,
+                                                                                                                                 result_plan->targetlist),
+                                                                                       extract_grouping_ops(parse->distinctClause),
+                                                                                       numDistinctRows,
+                                                                                       0,
+                                                                                       result_plan);
+                       /* Hashed aggregation produces randomly-ordered results */
+                       current_pathkeys = NIL;
+               }
+               else
+               {
+                       /*
+                        * Use a Unique node to implement DISTINCT.  Add an explicit sort
+                        * if we couldn't make the path come out the way the Unique node
+                        * needs it.  If we do have to sort, sort by the more rigorous
+                        * of DISTINCT and ORDER BY, to avoid a second sort below.
+                        */
+                       if (!pathkeys_contained_in(root->distinct_pathkeys,
+                                                                          current_pathkeys))
+                       {
+                               if (list_length(root->distinct_pathkeys) >=
+                                       list_length(root->sort_pathkeys))
+                                       current_pathkeys = root->distinct_pathkeys;
+                               else
+                               {
+                                       current_pathkeys = root->sort_pathkeys;
+                                       /* Assert checks that parser didn't mess up... */
+                                       Assert(pathkeys_contained_in(root->distinct_pathkeys,
+                                                                                                current_pathkeys));
+                               }
+
+                               result_plan = (Plan *) make_sort_from_pathkeys(root,
+                                                                                                                          result_plan,
+                                                                                                                          current_pathkeys,
+                                                                                                                          -1.0);
+                       }
+
+                       result_plan = (Plan *) make_unique(result_plan,
+                                                                                          parse->distinctClause);
+                       result_plan->plan_rows = dNumDistinctRows;
+                       /* The Unique node won't change sort ordering */
                 }
         }
  
         /*
-        * If there is a DISTINCT clause, add the UNIQUE node.
+        * If ORDER BY was given and we were not able to make the plan come out in
+        * the right order, add an explicit sort step.
          */
-       if (parse->distinctClause)
+       if (parse->sortClause)
         {
-               result_plan = (Plan *) make_unique(result_plan, parse->distinctClause);
-
-               /*
-                * If there was grouping or aggregation, leave plan_rows as-is (ie,
-                * assume the result was already mostly unique).  If not, use the
-                * number of distinct-groups calculated by query_planner.
-                */
-               if (!parse->groupClause && !root->hasHavingQual && !parse->hasAggs)
-                       result_plan->plan_rows = dNumGroups;
+               if (!pathkeys_contained_in(root->sort_pathkeys, current_pathkeys))
+               {
+                       result_plan = (Plan *) make_sort_from_pathkeys(root,
+                                                                                                                  result_plan,
+                                                                                                                  root->sort_pathkeys,
+                                                                                                                  limit_tuples);
+                       current_pathkeys = root->sort_pathkeys;
+               }
         }
  
         /*
@@ -1622,6 +1727,31 @@ extract_grouping_ops(List *groupClause)
         return groupOperators;
  }
  
+/*
+ * extract_grouping_cols - make an array of the grouping column resnos
+ *             for a SortGroupClause list
+ */
+static AttrNumber *
+extract_grouping_cols(List *groupClause, List *tlist)
+{
+       AttrNumber *grpColIdx;
+       int                     numCols = list_length(groupClause);
+       int                     colno = 0;
+       ListCell   *glitem;
+
+       grpColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
+
+       foreach(glitem, groupClause)
+       {
+               SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
+               TargetEntry *tle = get_sortgroupclause_tle(groupcl, tlist);
+
+               grpColIdx[colno++] = tle->resno;
+       }
+
+       return grpColIdx;
+}
+
  /*
   * grouping_is_sortable - is it possible to implement grouping list by sorting?
   *
@@ -1680,6 +1810,7 @@ choose_hashed_grouping(PlannerInfo *root,
         double          cheapest_path_rows;
         int                     cheapest_path_width;
         Size            hashentrysize;
+       List       *target_pathkeys;
         List       *current_pathkeys;
         Path            hashed_p;
         Path            sorted_p;
@@ -1716,6 +1847,20 @@ choose_hashed_grouping(PlannerInfo *root,
         if (hashentrysize * dNumGroups > work_mem * 1024L)
                 return false;
  
+       /*
+        * When we have both GROUP BY and DISTINCT, use the more-rigorous of
+        * DISTINCT and ORDER BY as the assumed required output sort order.
+        * This is an oversimplification because the DISTINCT might get
+        * implemented via hashing, but it's not clear that the case is common
+        * enough (or that our estimates are good enough) to justify trying to
+        * solve it exactly.
+        */
+       if (list_length(root->distinct_pathkeys) >
+               list_length(root->sort_pathkeys))
+               target_pathkeys = root->distinct_pathkeys;
+       else
+               target_pathkeys = root->sort_pathkeys;
+
         /*
          * See if the estimated cost is no more than doing it the other way. While
          * avoiding the need for sorted input is usually a win, the fact that the
@@ -1737,8 +1882,8 @@ choose_hashed_grouping(PlannerInfo *root,
                          cheapest_path->startup_cost, cheapest_path->total_cost,
                          cheapest_path_rows);
         /* Result of hashed agg is always unsorted */
-       if (root->sort_pathkeys)
-               cost_sort(&hashed_p, root, root->sort_pathkeys, hashed_p.total_cost,
+       if (target_pathkeys)
+               cost_sort(&hashed_p, root, target_pathkeys, hashed_p.total_cost,
                                   dNumGroups, cheapest_path_width, limit_tuples);
  
         if (sorted_path)
@@ -1770,9 +1915,9 @@ choose_hashed_grouping(PlannerInfo *root,
                                    sorted_p.startup_cost, sorted_p.total_cost,
                                    cheapest_path_rows);
         /* The Agg or Group node will preserve ordering */
-       if (root->sort_pathkeys &&
-               !pathkeys_contained_in(root->sort_pathkeys, current_pathkeys))
-               cost_sort(&sorted_p, root, root->sort_pathkeys, sorted_p.total_cost,
+       if (target_pathkeys &&
+               !pathkeys_contained_in(target_pathkeys, current_pathkeys))
+               cost_sort(&sorted_p, root, target_pathkeys, sorted_p.total_cost,
                                   dNumGroups, cheapest_path_width, limit_tuples);
  
         /*
@@ -1791,6 +1936,111 @@ choose_hashed_grouping(PlannerInfo *root,
         return false;
  }
  
+/*
+ * choose_hashed_distinct - should we use hashing for DISTINCT?
+ *
+ * This is fairly similar to choose_hashed_grouping, but there are enough
+ * differences that it doesn't seem worth trying to unify the two functions.
+ *
+ * But note that making the two choices independently is a bit bogus in
+ * itself.  If the two could be combined into a single choice operation
+ * it'd probably be better, but that seems far too unwieldy to be practical,
+ * especially considering that the combination of GROUP BY and DISTINCT
+ * isn't very common in real queries.  By separating them, we are giving
+ * extra preference to using a sorting implementation when a common sort key
+ * is available ... and that's not necessarily wrong anyway.
+ *
+ * Note: this is only applied when both alternatives are actually feasible.
+ */
+static bool
+choose_hashed_distinct(PlannerInfo *root,
+                                          Plan *input_plan, List *input_pathkeys,
+                                          double tuple_fraction, double limit_tuples,
+                                          double dNumDistinctRows)
+{
+       int                     numDistinctCols = list_length(root->parse->distinctClause);
+       Size            hashentrysize;
+       List       *current_pathkeys;
+       Path            hashed_p;
+       Path            sorted_p;
+
+       /* Prefer sorting when enable_hashagg is off */
+       if (!enable_hashagg)
+               return false;
+
+       /*
+        * Don't do it if it doesn't look like the hashtable will fit into
+        * work_mem.
+        */
+       hashentrysize = MAXALIGN(input_plan->plan_width) + MAXALIGN(sizeof(MinimalTupleData));
+
+       if (hashentrysize * dNumDistinctRows > work_mem * 1024L)
+               return false;
+
+       /*
+        * See if the estimated cost is no more than doing it the other way. While
+        * avoiding the need for sorted input is usually a win, the fact that the
+        * output won't be sorted may be a loss; so we need to do an actual cost
+        * comparison.
+        *
+        * We need to consider input_plan + hashagg [+ final sort] versus
+        * input_plan [+ sort] + group [+ final sort] where brackets indicate
+        * a step that may not be needed.
+        *
+        * These path variables are dummies that just hold cost fields; we don't
+        * make actual Paths for these steps.
+        */
+       cost_agg(&hashed_p, root, AGG_HASHED, 0,
+                        numDistinctCols, dNumDistinctRows,
+                        input_plan->startup_cost, input_plan->total_cost,
+                        input_plan->plan_rows);
+       /*
+        * Result of hashed agg is always unsorted, so if ORDER BY is present
+        * we need to charge for the final sort.
+        */
+       if (root->parse->sortClause)
+               cost_sort(&hashed_p, root, root->sort_pathkeys, hashed_p.total_cost,
+                                 dNumDistinctRows, input_plan->plan_width, limit_tuples);
+
+       /* Now for the GROUP case ... */
+       sorted_p.startup_cost = input_plan->startup_cost;
+       sorted_p.total_cost = input_plan->total_cost;
+       current_pathkeys = input_pathkeys;
+       if (!pathkeys_contained_in(root->distinct_pathkeys, current_pathkeys))
+       {
+               /* We don't want to sort twice */
+               if (list_length(root->distinct_pathkeys) >=
+                       list_length(root->sort_pathkeys))
+                       current_pathkeys = root->distinct_pathkeys;
+               else
+                       current_pathkeys = root->sort_pathkeys;
+               cost_sort(&sorted_p, root, current_pathkeys, sorted_p.total_cost,
+                                 input_plan->plan_rows, input_plan->plan_width, -1.0);
+       }
+       cost_group(&sorted_p, root, numDistinctCols, dNumDistinctRows,
+                          sorted_p.startup_cost, sorted_p.total_cost,
+                          input_plan->plan_rows);
+       if (root->parse->sortClause &&
+               !pathkeys_contained_in(root->sort_pathkeys, current_pathkeys))
+               cost_sort(&sorted_p, root, root->sort_pathkeys, sorted_p.total_cost,
+                                 dNumDistinctRows, input_plan->plan_width, limit_tuples);
+
+       /*
+        * Now make the decision using the top-level tuple fraction.  First we
+        * have to convert an absolute count (LIMIT) into fractional form.
+        */
+       if (tuple_fraction >= 1.0)
+               tuple_fraction /= dNumDistinctRows;
+
+       if (compare_fractional_path_costs(&hashed_p, &sorted_p,
+                                                                         tuple_fraction) < 0)
+       {
+               /* Hashed is cheaper, so use it */
+               return true;
+       }
+       return false;
+}
+
  /*---------------
   * make_subplanTargetList
   *       Generate appropriate target list when grouping is required.
@@ -1857,7 +2107,7 @@ make_subplanTargetList(PlannerInfo *root,
  
         /*
          * Otherwise, start with a "flattened" tlist (having just the vars
-        * mentioned in the targetlist and HAVING qual --- but not upper- level
+        * mentioned in the targetlist and HAVING qual --- but not upper-level
          * Vars; they will be replaced by Params later on).
          */
         sub_tlist = flatten_tlist(tlist);
@@ -1886,16 +2136,28 @@ make_subplanTargetList(PlannerInfo *root,
                         SortGroupClause *grpcl = (SortGroupClause *) lfirst(gl);
                         Node       *groupexpr = get_sortgroupclause_expr(grpcl, tlist);
                         TargetEntry *te = NULL;
-                       ListCell   *sl;
  
-                       /* Find or make a matching sub_tlist entry */
-                       foreach(sl, sub_tlist)
+                       /*
+                        * Find or make a matching sub_tlist entry.  If the groupexpr
+                        * isn't a Var, no point in searching.  (Note that the parser
+                        * won't make multiple groupClause entries for the same TLE.)
+                        */
+                       if (groupexpr && IsA(groupexpr, Var))
                         {
-                               te = (TargetEntry *) lfirst(sl);
-                               if (equal(groupexpr, te->expr))
-                                       break;
+                               ListCell   *sl;
+
+                               foreach(sl, sub_tlist)
+                               {
+                                       TargetEntry *lte = (TargetEntry *) lfirst(sl);
+
+                                       if (equal(groupexpr, lte->expr))
+                                       {
+                                               te = lte;
+                                               break;
+                                       }
+                               }
                         }
-                       if (!sl)
+                       if (!te)
                         {
                                 te = makeTargetEntry((Expr *) groupexpr,
                                                                          list_length(sub_tlist) + 1,
diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c

index 76e59c82d6edee77066ce70064769b29999ae7bd..2b04ee5e33783301dc2450cd9b22c8798e01520e 100644 (file)
--- a/src/backend/parser/parse_clause.c
+++ b/src/backend/parser/parse_clause.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.173 2008/08/03 19:10:52 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.174 2008/08/05 02:43:17 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -1447,9 +1447,6 @@ transformDistinctClause(ParseState *pstate,
         /*
          * Now add any remaining non-resjunk tlist items, using default
          * sort/group semantics for their data types.
-        *
-        * XXX for now, the planner requires distinctClause to be sortable,
-        * so we have to insist on that here.
          */
         foreach(tlitem, *targetlist)
         {
@@ -1459,8 +1456,7 @@ transformDistinctClause(ParseState *pstate,
                         continue;                       /* ignore junk */
                 result = addTargetToGroupList(pstate, tle,
                                                                           result, *targetlist,
-                                                                         true, /* XXX for now */
-                                                                         true);
+                                                                         false, true);
         }
  
         return result;
@@ -1555,8 +1551,7 @@ transformDistinctOnClause(ParseState *pstate, List *distinctlist,
                                          errmsg("SELECT DISTINCT ON expressions must match initial ORDER BY expressions")));
                 result = addTargetToGroupList(pstate, tle,
                                                                           result, *targetlist,
-                                                                         true, /* someday allow hash-only? */
-                                                                         true);
+                                                                         false, true);
         }
  
         return result;
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h

index 8476d7e85c141bc0a06273cd3a5ccbad883f52d5..f8c23071661b50086aa26287915fc5082354a1ae 100644 (file)
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.156 2008/04/21 20:54:15 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.157 2008/08/05 02:43:17 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -162,8 +162,9 @@ typedef struct PlannerInfo
         List       *query_pathkeys; /* desired pathkeys for query_planner(), and
                                                                  * actual pathkeys afterwards */
  
-       List       *group_pathkeys; /* groupClause pathkeys, if any */
-       List       *sort_pathkeys;      /* sortClause pathkeys, if any */
+       List       *group_pathkeys;             /* groupClause pathkeys, if any */
+       List       *distinct_pathkeys;  /* distinctClause pathkeys, if any */
+       List       *sort_pathkeys;              /* sortClause pathkeys, if any */
  
         List       *initial_rels;       /* RelOptInfos we are now trying to join */
  
diff --git a/src/test/regress/expected/numerology.out b/src/test/regress/expected/numerology.out

index c5ad36fdd32df016b14ccd5ee49823117619660d..d404d9db68121049455a14ab3a97abfbed467187 100644 (file)
--- a/src/test/regress/expected/numerology.out
+++ b/src/test/regress/expected/numerology.out
@@ -79,7 +79,7 @@ INSERT INTO TEMP_GROUP
  INSERT INTO TEMP_GROUP
    SELECT 2, i.f1, f.f1
    FROM INT4_TBL i, FLOAT8_TBL f;
-SELECT DISTINCT f1 AS two FROM TEMP_GROUP;
+SELECT DISTINCT f1 AS two FROM TEMP_GROUP ORDER BY 1;
   two 
  -----
     1
diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out

index 69f0efb8d42206d2a829cbba1e9eed96b6c15add..533bac3ab6705bc6ff5be990b68a3e84f7e4cf11 100644 (file)
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -129,7 +129,8 @@ WHERE p1.oid != p2.oid AND
      p1.prosrc = p2.prosrc AND
      p1.prolang = 12 AND p2.prolang = 12 AND
      NOT p1.proisagg AND NOT p2.proisagg AND
-    (p1.prorettype < p2.prorettype);
+    (p1.prorettype < p2.prorettype)
+ORDER BY 1, 2;
   prorettype | prorettype 
  ------------+------------
           25 |       1043
@@ -142,7 +143,8 @@ WHERE p1.oid != p2.oid AND
      p1.prosrc = p2.prosrc AND
      p1.prolang = 12 AND p2.prolang = 12 AND
      NOT p1.proisagg AND NOT p2.proisagg AND
-    (p1.proargtypes[0] < p2.proargtypes[0]);
+    (p1.proargtypes[0] < p2.proargtypes[0])
+ORDER BY 1, 2;
   proargtypes | proargtypes 
  -------------+-------------
            25 |        1042
@@ -158,7 +160,8 @@ WHERE p1.oid != p2.oid AND
      p1.prosrc = p2.prosrc AND
      p1.prolang = 12 AND p2.prolang = 12 AND
      NOT p1.proisagg AND NOT p2.proisagg AND
-    (p1.proargtypes[1] < p2.proargtypes[1]);
+    (p1.proargtypes[1] < p2.proargtypes[1])
+ORDER BY 1, 2;
   proargtypes | proargtypes 
  -------------+-------------
            23 |          28
@@ -173,7 +176,8 @@ WHERE p1.oid != p2.oid AND
      p1.prosrc = p2.prosrc AND
      p1.prolang = 12 AND p2.prolang = 12 AND
      NOT p1.proisagg AND NOT p2.proisagg AND
-    (p1.proargtypes[2] < p2.proargtypes[2]);
+    (p1.proargtypes[2] < p2.proargtypes[2])
+ORDER BY 1, 2;
   proargtypes | proargtypes 
  -------------+-------------
          1114 |        1184
@@ -185,7 +189,8 @@ WHERE p1.oid != p2.oid AND
      p1.prosrc = p2.prosrc AND
      p1.prolang = 12 AND p2.prolang = 12 AND
      NOT p1.proisagg AND NOT p2.proisagg AND
-    (p1.proargtypes[3] < p2.proargtypes[3]);
+    (p1.proargtypes[3] < p2.proargtypes[3])
+ORDER BY 1, 2;
   proargtypes | proargtypes 
  -------------+-------------
          1114 |        1184
@@ -197,7 +202,8 @@ WHERE p1.oid != p2.oid AND
      p1.prosrc = p2.prosrc AND
      p1.prolang = 12 AND p2.prolang = 12 AND
      NOT p1.proisagg AND NOT p2.proisagg AND
-    (p1.proargtypes[4] < p2.proargtypes[4]);
+    (p1.proargtypes[4] < p2.proargtypes[4])
+ORDER BY 1, 2;
   proargtypes | proargtypes 
  -------------+-------------
  (0 rows)
@@ -208,7 +214,8 @@ WHERE p1.oid != p2.oid AND
      p1.prosrc = p2.prosrc AND
      p1.prolang = 12 AND p2.prolang = 12 AND
      NOT p1.proisagg AND NOT p2.proisagg AND
-    (p1.proargtypes[5] < p2.proargtypes[5]);
+    (p1.proargtypes[5] < p2.proargtypes[5])
+ORDER BY 1, 2;
   proargtypes | proargtypes 
  -------------+-------------
  (0 rows)
@@ -219,7 +226,8 @@ WHERE p1.oid != p2.oid AND
      p1.prosrc = p2.prosrc AND
      p1.prolang = 12 AND p2.prolang = 12 AND
      NOT p1.proisagg AND NOT p2.proisagg AND
-    (p1.proargtypes[6] < p2.proargtypes[6]);
+    (p1.proargtypes[6] < p2.proargtypes[6])
+ORDER BY 1, 2;
   proargtypes | proargtypes 
  -------------+-------------
  (0 rows)
@@ -230,7 +238,8 @@ WHERE p1.oid != p2.oid AND
      p1.prosrc = p2.prosrc AND
      p1.prolang = 12 AND p2.prolang = 12 AND
      NOT p1.proisagg AND NOT p2.proisagg AND
-    (p1.proargtypes[7] < p2.proargtypes[7]);
+    (p1.proargtypes[7] < p2.proargtypes[7])
+ORDER BY 1, 2;
   proargtypes | proargtypes 
  -------------+-------------
  (0 rows)
diff --git a/src/test/regress/expected/select_distinct.out b/src/test/regress/expected/select_distinct.out

index abe34ae7ae5966a54d21f216cfdade115667ae43..fe64fe0d9c175909a93ce388b3ffd121a5d46628 100644 (file)
--- a/src/test/regress/expected/select_distinct.out
+++ b/src/test/regress/expected/select_distinct.out
@@ -14,7 +14,7 @@ SELECT DISTINCT two FROM tmp;
  --
  -- awk '{print $5;}' onek.data | sort -n | uniq
  --
-SELECT DISTINCT ten FROM tmp;
+SELECT DISTINCT ten FROM tmp ORDER BY 1;
   ten 
  -----
     0
@@ -32,7 +32,7 @@ SELECT DISTINCT ten FROM tmp;
  --
  -- awk '{print $16;}' onek.data | sort -d | uniq
  --
-SELECT DISTINCT string4 FROM tmp;
+SELECT DISTINCT string4 FROM tmp ORDER BY 1;
   string4 
  ---------
   AAAAxx
diff --git a/src/test/regress/input/misc.source b/src/test/regress/input/misc.source

index c82b1405978628f002254f10abb0cba52f83fef2..c5813630625ec965e79d482626157d2942fc6fc6 100644 (file)
--- a/src/test/regress/input/misc.source
+++ b/src/test/regress/input/misc.source
@@ -183,7 +183,8 @@ SELECT p.name, name(p.hobbies) FROM person* p;
  -- the next two queries demonstrate how functions generate bogus duplicates.
  -- this is a "feature" ..
  --
-SELECT DISTINCT hobbies_r.name, name(hobbies_r.equipment) FROM hobbies_r;
+SELECT DISTINCT hobbies_r.name, name(hobbies_r.equipment) FROM hobbies_r
+  ORDER BY 1,2;
  
  SELECT hobbies_r.name, (hobbies_r.equipment).name FROM hobbies_r;
  
diff --git a/src/test/regress/output/misc.source b/src/test/regress/output/misc.source

index e409c0a10019c2c99d1f91ba9a853e482616cafa..91e0d2b04d79dab77c3dab10c274e7a496c856f1 100644 (file)
--- a/src/test/regress/output/misc.source
+++ b/src/test/regress/output/misc.source
@@ -469,7 +469,8 @@ SELECT p.name, name(p.hobbies) FROM person* p;
  -- the next two queries demonstrate how functions generate bogus duplicates.
  -- this is a "feature" ..
  --
-SELECT DISTINCT hobbies_r.name, name(hobbies_r.equipment) FROM hobbies_r;
+SELECT DISTINCT hobbies_r.name, name(hobbies_r.equipment) FROM hobbies_r
+  ORDER BY 1,2;
      name     |     name      
  -------------+---------------
   basketball  | hightops
diff --git a/src/test/regress/sql/numerology.sql b/src/test/regress/sql/numerology.sql

index 2220fdba38587ea1599124ac01b3a2cc8ddf0dc1..6626cf20ebc5d10a38b093b98bab812e2b9b4972 100644 (file)
--- a/src/test/regress/sql/numerology.sql
+++ b/src/test/regress/sql/numerology.sql
@@ -63,7 +63,7 @@ INSERT INTO TEMP_GROUP
    SELECT 2, i.f1, f.f1
    FROM INT4_TBL i, FLOAT8_TBL f;
  
-SELECT DISTINCT f1 AS two FROM TEMP_GROUP;
+SELECT DISTINCT f1 AS two FROM TEMP_GROUP ORDER BY 1;
  
  SELECT f1 AS two, max(f3) AS max_float, min(f3) as min_float
    FROM TEMP_GROUP
diff --git a/src/test/regress/sql/opr_sanity.sql b/src/test/regress/sql/opr_sanity.sql

index e2ab6e57d4a36790b8b5b73db41f7e5ac2a85206..5017849830aa612d14a1359eb8c2cd3f9e622f7f 100644 (file)
--- a/src/test/regress/sql/opr_sanity.sql
+++ b/src/test/regress/sql/opr_sanity.sql
@@ -121,7 +121,8 @@ WHERE p1.oid != p2.oid AND
      p1.prosrc = p2.prosrc AND
      p1.prolang = 12 AND p2.prolang = 12 AND
      NOT p1.proisagg AND NOT p2.proisagg AND
-    (p1.prorettype < p2.prorettype);
+    (p1.prorettype < p2.prorettype)
+ORDER BY 1, 2;
  
  SELECT DISTINCT p1.proargtypes[0], p2.proargtypes[0]
  FROM pg_proc AS p1, pg_proc AS p2
@@ -129,7 +130,8 @@ WHERE p1.oid != p2.oid AND
      p1.prosrc = p2.prosrc AND
      p1.prolang = 12 AND p2.prolang = 12 AND
      NOT p1.proisagg AND NOT p2.proisagg AND
-    (p1.proargtypes[0] < p2.proargtypes[0]);
+    (p1.proargtypes[0] < p2.proargtypes[0])
+ORDER BY 1, 2;
  
  SELECT DISTINCT p1.proargtypes[1], p2.proargtypes[1]
  FROM pg_proc AS p1, pg_proc AS p2
@@ -137,7 +139,8 @@ WHERE p1.oid != p2.oid AND
      p1.prosrc = p2.prosrc AND
      p1.prolang = 12 AND p2.prolang = 12 AND
      NOT p1.proisagg AND NOT p2.proisagg AND
-    (p1.proargtypes[1] < p2.proargtypes[1]);
+    (p1.proargtypes[1] < p2.proargtypes[1])
+ORDER BY 1, 2;
  
  SELECT DISTINCT p1.proargtypes[2], p2.proargtypes[2]
  FROM pg_proc AS p1, pg_proc AS p2
@@ -145,7 +148,8 @@ WHERE p1.oid != p2.oid AND
      p1.prosrc = p2.prosrc AND
      p1.prolang = 12 AND p2.prolang = 12 AND
      NOT p1.proisagg AND NOT p2.proisagg AND
-    (p1.proargtypes[2] < p2.proargtypes[2]);
+    (p1.proargtypes[2] < p2.proargtypes[2])
+ORDER BY 1, 2;
  
  SELECT DISTINCT p1.proargtypes[3], p2.proargtypes[3]
  FROM pg_proc AS p1, pg_proc AS p2
@@ -153,7 +157,8 @@ WHERE p1.oid != p2.oid AND
      p1.prosrc = p2.prosrc AND
      p1.prolang = 12 AND p2.prolang = 12 AND
      NOT p1.proisagg AND NOT p2.proisagg AND
-    (p1.proargtypes[3] < p2.proargtypes[3]);
+    (p1.proargtypes[3] < p2.proargtypes[3])
+ORDER BY 1, 2;
  
  SELECT DISTINCT p1.proargtypes[4], p2.proargtypes[4]
  FROM pg_proc AS p1, pg_proc AS p2
@@ -161,7 +166,8 @@ WHERE p1.oid != p2.oid AND
      p1.prosrc = p2.prosrc AND
      p1.prolang = 12 AND p2.prolang = 12 AND
      NOT p1.proisagg AND NOT p2.proisagg AND
-    (p1.proargtypes[4] < p2.proargtypes[4]);
+    (p1.proargtypes[4] < p2.proargtypes[4])
+ORDER BY 1, 2;
  
  SELECT DISTINCT p1.proargtypes[5], p2.proargtypes[5]
  FROM pg_proc AS p1, pg_proc AS p2
@@ -169,7 +175,8 @@ WHERE p1.oid != p2.oid AND
      p1.prosrc = p2.prosrc AND
      p1.prolang = 12 AND p2.prolang = 12 AND
      NOT p1.proisagg AND NOT p2.proisagg AND
-    (p1.proargtypes[5] < p2.proargtypes[5]);
+    (p1.proargtypes[5] < p2.proargtypes[5])
+ORDER BY 1, 2;
  
  SELECT DISTINCT p1.proargtypes[6], p2.proargtypes[6]
  FROM pg_proc AS p1, pg_proc AS p2
@@ -177,7 +184,8 @@ WHERE p1.oid != p2.oid AND
      p1.prosrc = p2.prosrc AND
      p1.prolang = 12 AND p2.prolang = 12 AND
      NOT p1.proisagg AND NOT p2.proisagg AND
-    (p1.proargtypes[6] < p2.proargtypes[6]);
+    (p1.proargtypes[6] < p2.proargtypes[6])
+ORDER BY 1, 2;
  
  SELECT DISTINCT p1.proargtypes[7], p2.proargtypes[7]
  FROM pg_proc AS p1, pg_proc AS p2
@@ -185,7 +193,8 @@ WHERE p1.oid != p2.oid AND
      p1.prosrc = p2.prosrc AND
      p1.prolang = 12 AND p2.prolang = 12 AND
      NOT p1.proisagg AND NOT p2.proisagg AND
-    (p1.proargtypes[7] < p2.proargtypes[7]);
+    (p1.proargtypes[7] < p2.proargtypes[7])
+ORDER BY 1, 2;
  
  -- Look for functions that return type "internal" and do not have any
  -- "internal" argument.  Such a function would be a security hole since
diff --git a/src/test/regress/sql/select_distinct.sql b/src/test/regress/sql/select_distinct.sql

index c4a63aaf16fd9788b10523cc4d173bf31520bf65..7416e0194e172090810e7ba70fb4cfc00b618220 100644 (file)
--- a/src/test/regress/sql/select_distinct.sql
+++ b/src/test/regress/sql/select_distinct.sql
@@ -10,12 +10,12 @@ SELECT DISTINCT two FROM tmp;
  --
  -- awk '{print $5;}' onek.data | sort -n | uniq
  --
-SELECT DISTINCT ten FROM tmp;
+SELECT DISTINCT ten FROM tmp ORDER BY 1;
  
  --
  -- awk '{print $16;}' onek.data | sort -d | uniq
  --
-SELECT DISTINCT string4 FROM tmp;
+SELECT DISTINCT string4 FROM tmp ORDER BY 1;
  
  --
  -- awk '{print $3,$16,$5;}' onek.data | sort -d | uniq |
author	Tom Lane <tgl@sss.pgh.pa.us>
	Tue, 5 Aug 2008 02:43:18 +0000 (02:43 +0000)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Tue, 5 Aug 2008 02:43:18 +0000 (02:43 +0000)
src/backend/nodes/outfuncs.c		patch \| blob \| history
src/backend/optimizer/plan/planmain.c		patch \| blob \| history
src/backend/optimizer/plan/planner.c		patch \| blob \| history
src/backend/parser/parse_clause.c		patch \| blob \| history
src/include/nodes/relation.h		patch \| blob \| history
src/test/regress/expected/numerology.out		patch \| blob \| history
src/test/regress/expected/opr_sanity.out		patch \| blob \| history
src/test/regress/expected/select_distinct.out		patch \| blob \| history
src/test/regress/input/misc.source		patch \| blob \| history
src/test/regress/output/misc.source		patch \| blob \| history
src/test/regress/sql/numerology.sql		patch \| blob \| history
src/test/regress/sql/opr_sanity.sql		patch \| blob \| history
src/test/regress/sql/select_distinct.sql		patch \| blob \| history