Improve planner's handling of set-returning functions in grouping columns.

author Tom Lane <tgl@sss.pgh.pa.us>

Sat, 25 Nov 2017 16:48:09 +0000 (11:48 -0500)

committer Tom Lane <tgl@sss.pgh.pa.us>

Sat, 25 Nov 2017 16:48:09 +0000 (11:48 -0500)
author Tom Lane <tgl@sss.pgh.pa.us>
Sat, 25 Nov 2017 16:48:09 +0000 (11:48 -0500)
committer Tom Lane <tgl@sss.pgh.pa.us>
Sat, 25 Nov 2017 16:48:09 +0000 (11:48 -0500)
diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c

index 5b0da14748a78bd791927057df9fb5121db5bc6d..5783f90b62e5392c8b7f852a2d6d45ff97fec02e 100644 (file)
--- a/src/backend/optimizer/plan/analyzejoins.c
+++ b/src/backend/optimizer/plan/analyzejoins.c
@@ -744,8 +744,8 @@ rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list)
  bool
  query_supports_distinctness(Query *query)
  {
-       /* we don't cope with SRFs, see comment below */
-       if (query->hasTargetSRFs)
+       /* SRFs break distinctness except with DISTINCT, see below */
+       if (query->hasTargetSRFs && query->distinctClause == NIL)
                 return false;
  
         /* check for features we can prove distinctness with */
@@ -786,21 +786,11 @@ query_is_distinct_for(Query *query, List *colnos, List *opids)
  
         Assert(list_length(colnos) == list_length(opids));
  
-       /*
-        * A set-returning function in the query's targetlist can result in
-        * returning duplicate rows, if the SRF is evaluated after the
-        * de-duplication step; so we play it safe and say "no" if there are any
-        * SRFs.  (We could be certain that it's okay if SRFs appear only in the
-        * specified columns, since those must be evaluated before de-duplication;
-        * but it doesn't presently seem worth the complication to check that.)
-        */
-       if (query->hasTargetSRFs)
-               return false;
-
         /*
          * DISTINCT (including DISTINCT ON) guarantees uniqueness if all the
          * columns in the DISTINCT clause appear in colnos and operator semantics
-        * match.
+        * match.  This is true even if there are SRFs in the DISTINCT columns or
+        * elsewhere in the tlist.
          */
         if (query->distinctClause)
         {
@@ -819,6 +809,16 @@ query_is_distinct_for(Query *query, List *colnos, List *opids)
                         return true;
         }
  
+       /*
+        * Otherwise, a set-returning function in the query's targetlist can
+        * result in returning duplicate rows, despite any grouping that might
+        * occur before tlist evaluation.  (If all tlist SRFs are within GROUP BY
+        * columns, it would be safe because they'd be expanded before grouping.
+        * But it doesn't currently seem worth the effort to check for that.)
+        */
+       if (query->hasTargetSRFs)
+               return false;
+
         /*
          * Similarly, GROUP BY without GROUPING SETS guarantees uniqueness if all
          * the grouped columns appear in colnos and operator semantics match.
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c

index 4bbb4a850eb18b171c8b8ddc1e0ed3bc28543107..edff6da41090f81b64d8e4338840d368a64b779d 100644 (file)
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -3361,6 +3361,7 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
                                         List **pgset)
  {
         List       *varinfos = NIL;
+       double          srf_multiplier = 1.0;
         double          numdistinct;
         ListCell   *l;
         int                     i;
@@ -3394,6 +3395,7 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
         foreach(l, groupExprs)
         {
                 Node       *groupexpr = (Node *) lfirst(l);
+               double          this_srf_multiplier;
                 VariableStatData vardata;
                 List       *varshere;
                 ListCell   *l2;
@@ -3402,6 +3404,21 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
                 if (pgset && !list_member_int(*pgset, i++))
                         continue;
  
+               /*
+                * Set-returning functions in grouping columns are a bit problematic.
+                * The code below will effectively ignore their SRF nature and come up
+                * with a numdistinct estimate as though they were scalar functions.
+                * We compensate by scaling up the end result by the largest SRF
+                * rowcount estimate.  (This will be an overestimate if the SRF
+                * produces multiple copies of any output value, but it seems best to
+                * assume the SRF's outputs are distinct.  In any case, it's probably
+                * pointless to worry too much about this without much better
+                * estimates for SRF output rowcounts than we have today.)
+                */
+               this_srf_multiplier = expression_returns_set_rows(groupexpr);
+               if (srf_multiplier < this_srf_multiplier)
+                       srf_multiplier = this_srf_multiplier;
+
                 /* Short-circuit for expressions returning boolean */
                 if (exprType(groupexpr) == BOOLOID)
                 {
@@ -3467,9 +3484,15 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
          */
         if (varinfos == NIL)
         {
+               /* Apply SRF multiplier as we would do in the long path */
+               numdistinct *= srf_multiplier;
+               /* Round off */
+               numdistinct = ceil(numdistinct);
                 /* Guard against out-of-range answers */
                 if (numdistinct > input_rows)
                         numdistinct = input_rows;
+               if (numdistinct < 1.0)
+                       numdistinct = 1.0;
                 return numdistinct;
         }
  
@@ -3638,6 +3661,10 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
                 varinfos = newvarinfos;
         } while (varinfos != NIL);
  
+       /* Now we can account for the effects of any SRFs */
+       numdistinct *= srf_multiplier;
+
+       /* Round off */
         numdistinct = ceil(numdistinct);
  
         /* Guard against out-of-range answers */
author	Tom Lane <tgl@sss.pgh.pa.us>
	Sat, 25 Nov 2017 16:48:09 +0000 (11:48 -0500)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Sat, 25 Nov 2017 16:48:09 +0000 (11:48 -0500)
src/backend/optimizer/plan/analyzejoins.c		patch \| blob \| history
src/backend/utils/adt/selfuncs.c		patch \| blob \| history