bool
query_supports_distinctness(Query *query)
{
- /* we don't cope with SRFs, see comment below */
- if (query->hasTargetSRFs)
+ /* SRFs break distinctness except with DISTINCT, see below */
+ if (query->hasTargetSRFs && query->distinctClause == NIL)
return false;
/* check for features we can prove distinctness with */
Assert(list_length(colnos) == list_length(opids));
- /*
- * A set-returning function in the query's targetlist can result in
- * returning duplicate rows, if the SRF is evaluated after the
- * de-duplication step; so we play it safe and say "no" if there are any
- * SRFs. (We could be certain that it's okay if SRFs appear only in the
- * specified columns, since those must be evaluated before de-duplication;
- * but it doesn't presently seem worth the complication to check that.)
- */
- if (query->hasTargetSRFs)
- return false;
-
/*
* DISTINCT (including DISTINCT ON) guarantees uniqueness if all the
* columns in the DISTINCT clause appear in colnos and operator semantics
- * match.
+ * match. This is true even if there are SRFs in the DISTINCT columns or
+ * elsewhere in the tlist.
*/
if (query->distinctClause)
{
return true;
}
+ /*
+ * Otherwise, a set-returning function in the query's targetlist can
+ * result in returning duplicate rows, despite any grouping that might
+ * occur before tlist evaluation. (If all tlist SRFs are within GROUP BY
+ * columns, it would be safe because they'd be expanded before grouping.
+ * But it doesn't currently seem worth the effort to check for that.)
+ */
+ if (query->hasTargetSRFs)
+ return false;
+
/*
* Similarly, GROUP BY without GROUPING SETS guarantees uniqueness if all
* the grouped columns appear in colnos and operator semantics match.
List **pgset)
{
List *varinfos = NIL;
+ double srf_multiplier = 1.0;
double numdistinct;
ListCell *l;
int i;
foreach(l, groupExprs)
{
Node *groupexpr = (Node *) lfirst(l);
+ double this_srf_multiplier;
VariableStatData vardata;
List *varshere;
ListCell *l2;
if (pgset && !list_member_int(*pgset, i++))
continue;
+ /*
+ * Set-returning functions in grouping columns are a bit problematic.
+ * The code below will effectively ignore their SRF nature and come up
+ * with a numdistinct estimate as though they were scalar functions.
+ * We compensate by scaling up the end result by the largest SRF
+ * rowcount estimate. (This will be an overestimate if the SRF
+ * produces multiple copies of any output value, but it seems best to
+ * assume the SRF's outputs are distinct. In any case, it's probably
+ * pointless to worry too much about this without much better
+ * estimates for SRF output rowcounts than we have today.)
+ */
+ this_srf_multiplier = expression_returns_set_rows(groupexpr);
+ if (srf_multiplier < this_srf_multiplier)
+ srf_multiplier = this_srf_multiplier;
+
/* Short-circuit for expressions returning boolean */
if (exprType(groupexpr) == BOOLOID)
{
*/
if (varinfos == NIL)
{
+ /* Apply SRF multiplier as we would do in the long path */
+ numdistinct *= srf_multiplier;
+ /* Round off */
+ numdistinct = ceil(numdistinct);
/* Guard against out-of-range answers */
if (numdistinct > input_rows)
numdistinct = input_rows;
+ if (numdistinct < 1.0)
+ numdistinct = 1.0;
return numdistinct;
}
varinfos = newvarinfos;
} while (varinfos != NIL);
+ /* Now we can account for the effects of any SRFs */
+ numdistinct *= srf_multiplier;
+
+ /* Round off */
numdistinct = ceil(numdistinct);
/* Guard against out-of-range answers */