From a3f945a1b2b3451c02534abee8f4e421d3cfbfd3 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Tue, 1 Feb 2005 23:08:13 +0000
Subject: [PATCH] Adjust estimate_num_groups() to not clamp per-relation group
 count estimate to less than the number of values estimated for any one
 grouping Var, as suggested by Manfred.  This is intuitively right, and what's
 more it puts the plan choices in the subselect regression test back the way
 they were before ...

---
 src/backend/utils/adt/selfuncs.c        | 18 ++++++++++++++++--
 src/test/regress/expected/subselect.out |  4 ++--
 2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 7b020ff489..b4536710eb 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -15,7 +15,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.170 2005/01/28 20:34:25 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.171 2005/02/01 23:07:58 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -2043,6 +2043,7 @@ estimate_num_groups(Query *root, List *groupExprs, double input_rows)
 		GroupVarInfo *varinfo1 = (GroupVarInfo *) linitial(varinfos);
 		RelOptInfo *rel = varinfo1->rel;
 		double		reldistinct = varinfo1->ndistinct;
+		double		relmaxndistinct = reldistinct;
 		int			relvarcount = 1;
 		List	   *newvarinfos = NIL;
 
@@ -2057,6 +2058,8 @@ estimate_num_groups(Query *root, List *groupExprs, double input_rows)
 			if (varinfo2->rel == varinfo1->rel)
 			{
 				reldistinct *= varinfo2->ndistinct;
+				if (relmaxndistinct < varinfo2->ndistinct)
+					relmaxndistinct = varinfo2->ndistinct;
 				relvarcount++;
 			}
 			else
@@ -2075,12 +2078,23 @@ estimate_num_groups(Query *root, List *groupExprs, double input_rows)
 			/*
 			 * Clamp to size of rel, or size of rel / 10 if multiple Vars.
 			 * The fudge factor is because the Vars are probably correlated
-			 * but we don't know by how much.
+			 * but we don't know by how much.  We should never clamp to less
+			 * than the largest ndistinct value for any of the Vars, though,
+			 * since there will surely be at least that many groups.
 			 */
 			double		clamp = rel->tuples;
 
 			if (relvarcount > 1)
+			{
 				clamp *= 0.1;
+				if (clamp < relmaxndistinct)
+				{
+					clamp = relmaxndistinct;
+					/* for sanity in case some ndistinct is too large: */
+					if (clamp > rel->tuples)
+						clamp = rel->tuples;
+				}
+			}
 			if (reldistinct > clamp)
 				reldistinct = clamp;
 
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out
index 56bea03598..07e727de48 100644
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -134,11 +134,11 @@ SELECT '' AS five, f1 AS "Correlated Field"
                      WHERE f3 IS NOT NULL);
  five | Correlated Field 
 ------+------------------
+      |                2
       |                3
       |                1
-      |                3
-      |                2
       |                2
+      |                3
 (5 rows)
 
 --
-- 
2.40.0