Rework the stats_ext test

author Alvaro Herrera <alvherre@alvh.no-ip.org>

Mon, 27 Mar 2017 15:40:42 +0000 (12:40 -0300)

committer Alvaro Herrera <alvherre@alvh.no-ip.org>

Mon, 27 Mar 2017 15:43:04 +0000 (12:43 -0300)
author Alvaro Herrera <alvherre@alvh.no-ip.org>
Mon, 27 Mar 2017 15:40:42 +0000 (12:40 -0300)
committer Alvaro Herrera <alvherre@alvh.no-ip.org>
Mon, 27 Mar 2017 15:43:04 +0000 (12:43 -0300)
diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out

index 0e30861ab82e30ff5774fca57b128294ac452811..8fe96d68786fcb37ce48fe33074404ee986b60bd 100644 (file)
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@@ -1,4 +1,10 @@
  -- Generic extended statistics support
+-- We will be checking execution plans without/with statistics, so
+-- let's make sure we get simple non-parallel plans. Also set the
+-- work_mem low so that we can use small amounts of data.
+SET max_parallel_workers = 0;
+SET max_parallel_workers_per_gather = 0;
+SET work_mem = '128kB';
  -- Ensure stats are dropped sanely
  CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER);
  CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1;
@@ -49,6 +55,67 @@ CREATE TABLE ndistinct (
      c INT,
      d INT
  );
+-- over-estimates when using only per-column statistics
+INSERT INTO ndistinct (a, b, c, filler1)
+     SELECT i/100, i/100, i/100, cash_words((i/100)::money)
+       FROM generate_series(1,30000) s(i);
+ANALYZE ndistinct;
+-- Group Aggregate, due to over-estimate of the number of groups
+EXPLAIN (COSTS off)
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
+            QUERY PLAN             
+-----------------------------------
+ GroupAggregate
+   Group Key: a, b
+   ->  Sort
+         Sort Key: a, b
+         ->  Seq Scan on ndistinct
+(5 rows)
+
+EXPLAIN (COSTS off)
+ SELECT COUNT(*) FROM ndistinct GROUP BY b, c;
+            QUERY PLAN             
+-----------------------------------
+ GroupAggregate
+   Group Key: b, c
+   ->  Sort
+         Sort Key: b, c
+         ->  Seq Scan on ndistinct
+(5 rows)
+
+EXPLAIN (COSTS off)
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
+            QUERY PLAN             
+-----------------------------------
+ GroupAggregate
+   Group Key: a, b, c
+   ->  Sort
+         Sort Key: a, b, c
+         ->  Seq Scan on ndistinct
+(5 rows)
+
+EXPLAIN (COSTS off)
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
+            QUERY PLAN             
+-----------------------------------
+ GroupAggregate
+   Group Key: a, b, c, d
+   ->  Sort
+         Sort Key: a, b, c, d
+         ->  Seq Scan on ndistinct
+(5 rows)
+
+EXPLAIN (COSTS off)
+ SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
+            QUERY PLAN             
+-----------------------------------
+ GroupAggregate
+   Group Key: b, c, d
+   ->  Sort
+         Sort Key: b, c, d
+         ->  Seq Scan on ndistinct
+(5 rows)
+
  -- unknown column
  CREATE STATISTICS s10 ON (unknown_column) FROM ndistinct;
  ERROR:  column "unknown_column" referenced in statistics does not exist
@@ -63,18 +130,15 @@ CREATE STATISTICS s10 ON (a, a, b) FROM ndistinct;
  ERROR:  duplicate column name in statistics definition
  -- correct command
  CREATE STATISTICS s10 ON (a, b, c) FROM ndistinct;
--- perfectly correlated groups
-INSERT INTO ndistinct (a, b, c, filler1)
-     SELECT i/100, i/100, i/100, cash_words(i::money)
-       FROM generate_series(1,10000) s(i);
  ANALYZE ndistinct;
  SELECT staenabled, standistinct
    FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass;
   staenabled |                                          standistinct                                          
  ------------+------------------------------------------------------------------------------------------------
- {d}        | [{(b 3 4), 101.000000}, {(b 3 6), 101.000000}, {(b 4 6), 101.000000}, {(b 3 4 6), 101.000000}]
+ {d}        | [{(b 3 4), 301.000000}, {(b 3 6), 301.000000}, {(b 4 6), 301.000000}, {(b 3 4 6), 301.000000}]
  (1 row)
  
+-- Hash Aggregate, thanks to estimates improved by the statistic
  EXPLAIN (COSTS off)
   SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
           QUERY PLAN          
@@ -85,78 +149,165 @@ EXPLAIN (COSTS off)
  (3 rows)
  
  EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
+ SELECT COUNT(*) FROM ndistinct GROUP BY b, c;
           QUERY PLAN          
  -----------------------------
   HashAggregate
-   Group Key: a, b, c
+   Group Key: b, c
     ->  Seq Scan on ndistinct
  (3 rows)
  
  EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
           QUERY PLAN          
  -----------------------------
   HashAggregate
-   Group Key: a, b, c, d
+   Group Key: a, b, c
     ->  Seq Scan on ndistinct
  (3 rows)
  
+-- last two plans keep using Group Aggregate, because 'd' is not covered
+-- by the statistic and while it's NULL-only we assume 200 values for it
+EXPLAIN (COSTS off)
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
+            QUERY PLAN             
+-----------------------------------
+ GroupAggregate
+   Group Key: a, b, c, d
+   ->  Sort
+         Sort Key: a, b, c, d
+         ->  Seq Scan on ndistinct
+(5 rows)
+
+EXPLAIN (COSTS off)
+ SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
+            QUERY PLAN             
+-----------------------------------
+ GroupAggregate
+   Group Key: b, c, d
+   ->  Sort
+         Sort Key: b, c, d
+         ->  Seq Scan on ndistinct
+(5 rows)
+
  TRUNCATE TABLE ndistinct;
--- partially correlated groups
-INSERT INTO ndistinct (a, b, c)
-     SELECT i/50, i/100, i/200 FROM generate_series(1,10000) s(i);
+-- under-estimates when using only per-column statistics
+INSERT INTO ndistinct (a, b, c, filler1)
+     SELECT mod(i,50), mod(i,51), mod(i,32),
+            cash_words(mod(i,33)::int::money)
+       FROM generate_series(1,10000) s(i);
  ANALYZE ndistinct;
  SELECT staenabled, standistinct
    FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass;
- staenabled |                                          standistinct                                          
-------------+------------------------------------------------------------------------------------------------
- {d}        | [{(b 3 4), 201.000000}, {(b 3 6), 201.000000}, {(b 4 6), 101.000000}, {(b 3 4 6), 201.000000}]
+ staenabled |                                            standistinct                                            
+------------+----------------------------------------------------------------------------------------------------
+ {d}        | [{(b 3 4), 2550.000000}, {(b 3 6), 800.000000}, {(b 4 6), 1632.000000}, {(b 3 4 6), 10000.000000}]
  (1 row)
  
-EXPLAIN
+-- plans using Group Aggregate, thanks to using correct esimates
+EXPLAIN (COSTS off)
   SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
-                             QUERY PLAN                              
----------------------------------------------------------------------
- HashAggregate  (cost=230.00..232.01 rows=201 width=16)
+            QUERY PLAN             
+-----------------------------------
+ GroupAggregate
+   Group Key: a, b
+   ->  Sort
+         Sort Key: a, b
+         ->  Seq Scan on ndistinct
+(5 rows)
+
+EXPLAIN (COSTS off)
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
+            QUERY PLAN             
+-----------------------------------
+ GroupAggregate
+   Group Key: a, b, c
+   ->  Sort
+         Sort Key: a, b, c
+         ->  Seq Scan on ndistinct
+(5 rows)
+
+EXPLAIN (COSTS off)
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
+            QUERY PLAN             
+-----------------------------------
+ GroupAggregate
+   Group Key: a, b, c, d
+   ->  Sort
+         Sort Key: a, b, c, d
+         ->  Seq Scan on ndistinct
+(5 rows)
+
+EXPLAIN (COSTS off)
+ SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
+         QUERY PLAN          
+-----------------------------
+ HashAggregate
+   Group Key: b, c, d
+   ->  Seq Scan on ndistinct
+(3 rows)
+
+EXPLAIN (COSTS off)
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, d;
+         QUERY PLAN          
+-----------------------------
+ HashAggregate
+   Group Key: a, d
+   ->  Seq Scan on ndistinct
+(3 rows)
+
+DROP STATISTICS s10;
+SELECT staenabled, standistinct
+  FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass;
+ staenabled | standistinct 
+------------+--------------
+(0 rows)
+
+-- dropping the statistics switches the plans to Hash Aggregate,
+-- due to under-estimates
+EXPLAIN (COSTS off)
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
+         QUERY PLAN          
+-----------------------------
+ HashAggregate
     Group Key: a, b
-   ->  Seq Scan on ndistinct  (cost=0.00..155.00 rows=10000 width=8)
+   ->  Seq Scan on ndistinct
  (3 rows)
  
-EXPLAIN
+EXPLAIN (COSTS off)
   SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
-                              QUERY PLAN                              
-----------------------------------------------------------------------
- HashAggregate  (cost=255.00..257.01 rows=201 width=20)
+         QUERY PLAN          
+-----------------------------
+ HashAggregate
     Group Key: a, b, c
-   ->  Seq Scan on ndistinct  (cost=0.00..155.00 rows=10000 width=12)
+   ->  Seq Scan on ndistinct
  (3 rows)
  
-EXPLAIN
+EXPLAIN (COSTS off)
   SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
-                              QUERY PLAN                              
-----------------------------------------------------------------------
- HashAggregate  (cost=280.00..290.00 rows=1000 width=24)
+         QUERY PLAN          
+-----------------------------
+ HashAggregate
     Group Key: a, b, c, d
-   ->  Seq Scan on ndistinct  (cost=0.00..155.00 rows=10000 width=16)
+   ->  Seq Scan on ndistinct
  (3 rows)
  
-EXPLAIN
+EXPLAIN (COSTS off)
   SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
-                              QUERY PLAN                              
-----------------------------------------------------------------------
- HashAggregate  (cost=255.00..265.00 rows=1000 width=20)
+         QUERY PLAN          
+-----------------------------
+ HashAggregate
     Group Key: b, c, d
-   ->  Seq Scan on ndistinct  (cost=0.00..155.00 rows=10000 width=12)
+   ->  Seq Scan on ndistinct
  (3 rows)
  
-EXPLAIN
+EXPLAIN (COSTS off)
   SELECT COUNT(*) FROM ndistinct GROUP BY a, d;
-                             QUERY PLAN                              
----------------------------------------------------------------------
- HashAggregate  (cost=230.00..240.00 rows=1000 width=16)
+         QUERY PLAN          
+-----------------------------
+ HashAggregate
     Group Key: a, d
-   ->  Seq Scan on ndistinct  (cost=0.00..155.00 rows=10000 width=8)
+   ->  Seq Scan on ndistinct
  (3 rows)
  
  DROP TABLE ndistinct;
diff --git a/src/test/regress/expected/stats_ext_1.out b/src/test/regress/expected/stats_ext_1.out

deleted file mode 100644 (file)

index 128afba..0000000
--- a/src/test/regress/expected/stats_ext_1.out
+++ /dev/null
@@ -1,155 +0,0 @@
--- Generic extended statistics support
--- Ensure stats are dropped sanely
-CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER);
-CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1;
-DROP STATISTICS ab1_a_b_stats;
-CREATE SCHEMA regress_schema_2;
-CREATE STATISTICS regress_schema_2.ab1_a_b_stats ON (a, b) FROM ab1;
-DROP STATISTICS regress_schema_2.ab1_a_b_stats;
--- Ensure statistics are dropped when columns are
-CREATE STATISTICS ab1_b_c_stats ON (b, c) FROM ab1;
-CREATE STATISTICS ab1_a_b_c_stats ON (a, b, c) FROM ab1;
-CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1;
-ALTER TABLE ab1 DROP COLUMN a;
-\d ab1
-                Table "public.ab1"
- Column |  Type   | Collation | Nullable | Default 
---------+---------+-----------+----------+---------
- b      | integer |           |          | 
- c      | integer |           |          | 
-Statistics:
-    "public.ab1_b_c_stats" WITH (ndistinct) ON (b, c)
-
-DROP TABLE ab1;
--- Ensure things work sanely with SET STATISTICS 0
-CREATE TABLE ab1 (a INTEGER, b INTEGER);
-ALTER TABLE ab1 ALTER a SET STATISTICS 0;
-INSERT INTO ab1 SELECT a, a%23 FROM generate_series(1, 1000) a;
-CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1;
-ANALYZE ab1;
-ERROR:  extended statistics could not be collected for column "a" of relation public.ab1
-HINT:  Consider ALTER TABLE "public"."ab1" ALTER "a" SET STATISTICS -1
-ALTER TABLE ab1 ALTER a SET STATISTICS -1;
-ANALYZE ab1;
-DROP TABLE ab1;
--- n-distinct tests
-CREATE TABLE ndistinct (
-    filler1 TEXT,
-    filler2 NUMERIC,
-    a INT,
-    b INT,
-    filler3 DATE,
-    c INT,
-    d INT
-);
--- unknown column
-CREATE STATISTICS s10 ON (unknown_column) FROM ndistinct;
-ERROR:  column "unknown_column" referenced in statistics does not exist
--- single column
-CREATE STATISTICS s10 ON (a) FROM ndistinct;
-ERROR:  statistics require at least 2 columns
--- single column, duplicated
-CREATE STATISTICS s10 ON (a,a) FROM ndistinct;
-ERROR:  duplicate column name in statistics definition
--- two columns, one duplicated
-CREATE STATISTICS s10 ON (a, a, b) FROM ndistinct;
-ERROR:  duplicate column name in statistics definition
--- correct command
-CREATE STATISTICS s10 ON (a, b, c) FROM ndistinct;
--- perfectly correlated groups
-INSERT INTO ndistinct (a, b, c, filler1)
-     SELECT i/100, i/100, i/100, cash_words(i::money)
-       FROM generate_series(1,10000) s(i);
-ANALYZE ndistinct;
-SELECT staenabled, standistinct
-  FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass;
- staenabled |                                          standistinct                                          
-------------+------------------------------------------------------------------------------------------------
- {d}        | [{(b 3 4), 101.000000}, {(b 3 6), 101.000000}, {(b 4 6), 101.000000}, {(b 3 4 6), 101.000000}]
-(1 row)
-
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
-         QUERY PLAN          
------------------------------
- HashAggregate
-   Group Key: a, b
-   ->  Seq Scan on ndistinct
-(3 rows)
-
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
-         QUERY PLAN          
------------------------------
- HashAggregate
-   Group Key: a, b, c
-   ->  Seq Scan on ndistinct
-(3 rows)
-
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
-         QUERY PLAN          
------------------------------
- HashAggregate
-   Group Key: a, b, c, d
-   ->  Seq Scan on ndistinct
-(3 rows)
-
-TRUNCATE TABLE ndistinct;
--- partially correlated groups
-INSERT INTO ndistinct (a, b, c)
-     SELECT i/50, i/100, i/200 FROM generate_series(1,10000) s(i);
-ANALYZE ndistinct;
-SELECT staenabled, standistinct
-  FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass;
- staenabled |                                          standistinct                                          
-------------+------------------------------------------------------------------------------------------------
- {d}        | [{(b 3 4), 201.000000}, {(b 3 6), 201.000000}, {(b 4 6), 101.000000}, {(b 3 4 6), 201.000000}]
-(1 row)
-
-EXPLAIN
- SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
-                             QUERY PLAN                              
----------------------------------------------------------------------
- HashAggregate  (cost=225.00..227.01 rows=201 width=16)
-   Group Key: a, b
-   ->  Seq Scan on ndistinct  (cost=0.00..150.00 rows=10000 width=8)
-(3 rows)
-
-EXPLAIN
- SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
-                              QUERY PLAN                              
-----------------------------------------------------------------------
- HashAggregate  (cost=250.00..252.01 rows=201 width=20)
-   Group Key: a, b, c
-   ->  Seq Scan on ndistinct  (cost=0.00..150.00 rows=10000 width=12)
-(3 rows)
-
-EXPLAIN
- SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
-                              QUERY PLAN                              
-----------------------------------------------------------------------
- HashAggregate  (cost=275.00..285.00 rows=1000 width=24)
-   Group Key: a, b, c, d
-   ->  Seq Scan on ndistinct  (cost=0.00..150.00 rows=10000 width=16)
-(3 rows)
-
-EXPLAIN
- SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
-                              QUERY PLAN                              
-----------------------------------------------------------------------
- HashAggregate  (cost=250.00..260.00 rows=1000 width=20)
-   Group Key: b, c, d
-   ->  Seq Scan on ndistinct  (cost=0.00..150.00 rows=10000 width=12)
-(3 rows)
-
-EXPLAIN
- SELECT COUNT(*) FROM ndistinct GROUP BY a, d;
-                             QUERY PLAN                              
----------------------------------------------------------------------
- HashAggregate  (cost=225.00..235.00 rows=1000 width=16)
-   Group Key: a, d
-   ->  Seq Scan on ndistinct  (cost=0.00..150.00 rows=10000 width=8)
-(3 rows)
-
-DROP TABLE ndistinct;
diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql

index 8f23653a9b632cc1c9b9dd3e5951e58425f847e2..4faaf88e062c3133a88383b46147c89b592ad594 100644 (file)
--- a/src/test/regress/sql/stats_ext.sql
+++ b/src/test/regress/sql/stats_ext.sql
@@ -1,5 +1,12 @@
  -- Generic extended statistics support
  
+-- We will be checking execution plans without/with statistics, so
+-- let's make sure we get simple non-parallel plans. Also set the
+-- work_mem low so that we can use small amounts of data.
+SET max_parallel_workers = 0;
+SET max_parallel_workers_per_gather = 0;
+SET work_mem = '128kB';
+
  -- Ensure stats are dropped sanely
  CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER);
  CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1;
@@ -43,6 +50,29 @@ CREATE TABLE ndistinct (
      d INT
  );
  
+-- over-estimates when using only per-column statistics
+INSERT INTO ndistinct (a, b, c, filler1)
+     SELECT i/100, i/100, i/100, cash_words((i/100)::money)
+       FROM generate_series(1,30000) s(i);
+
+ANALYZE ndistinct;
+
+-- Group Aggregate, due to over-estimate of the number of groups
+EXPLAIN (COSTS off)
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
+
+EXPLAIN (COSTS off)
+ SELECT COUNT(*) FROM ndistinct GROUP BY b, c;
+
+EXPLAIN (COSTS off)
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
+
+EXPLAIN (COSTS off)
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
+
+EXPLAIN (COSTS off)
+ SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
+
  -- unknown column
  CREATE STATISTICS s10 ON (unknown_column) FROM ndistinct;
  
@@ -58,9 +88,35 @@ CREATE STATISTICS s10 ON (a, a, b) FROM ndistinct;
  -- correct command
  CREATE STATISTICS s10 ON (a, b, c) FROM ndistinct;
  
--- perfectly correlated groups
+ANALYZE ndistinct;
+
+SELECT staenabled, standistinct
+  FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass;
+
+-- Hash Aggregate, thanks to estimates improved by the statistic
+EXPLAIN (COSTS off)
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
+
+EXPLAIN (COSTS off)
+ SELECT COUNT(*) FROM ndistinct GROUP BY b, c;
+
+EXPLAIN (COSTS off)
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
+
+-- last two plans keep using Group Aggregate, because 'd' is not covered
+-- by the statistic and while it's NULL-only we assume 200 values for it
+EXPLAIN (COSTS off)
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
+
+EXPLAIN (COSTS off)
+ SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
+
+TRUNCATE TABLE ndistinct;
+
+-- under-estimates when using only per-column statistics
  INSERT INTO ndistinct (a, b, c, filler1)
-     SELECT i/100, i/100, i/100, cash_words(i::money)
+     SELECT mod(i,50), mod(i,51), mod(i,32),
+            cash_words(mod(i,33)::int::money)
         FROM generate_series(1,10000) s(i);
  
  ANALYZE ndistinct;
@@ -68,6 +124,7 @@ ANALYZE ndistinct;
  SELECT staenabled, standistinct
    FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass;
  
+-- plans using Group Aggregate, thanks to using correct esimates
  EXPLAIN (COSTS off)
   SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
  
@@ -77,30 +134,32 @@ EXPLAIN (COSTS off)
  EXPLAIN (COSTS off)
   SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
  
-TRUNCATE TABLE ndistinct;
+EXPLAIN (COSTS off)
+ SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
  
--- partially correlated groups
-INSERT INTO ndistinct (a, b, c)
-     SELECT i/50, i/100, i/200 FROM generate_series(1,10000) s(i);
+EXPLAIN (COSTS off)
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, d;
  
-ANALYZE ndistinct;
+DROP STATISTICS s10;
  
  SELECT staenabled, standistinct
    FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass;
  
-EXPLAIN
+-- dropping the statistics switches the plans to Hash Aggregate,
+-- due to under-estimates
+EXPLAIN (COSTS off)
   SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
  
-EXPLAIN
+EXPLAIN (COSTS off)
   SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
  
-EXPLAIN
+EXPLAIN (COSTS off)
   SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
  
-EXPLAIN
+EXPLAIN (COSTS off)
   SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
  
-EXPLAIN
+EXPLAIN (COSTS off)
   SELECT COUNT(*) FROM ndistinct GROUP BY a, d;
  
  DROP TABLE ndistinct;
author	Alvaro Herrera <alvherre@alvh.no-ip.org>
	Mon, 27 Mar 2017 15:40:42 +0000 (12:40 -0300)
committer	Alvaro Herrera <alvherre@alvh.no-ip.org>
	Mon, 27 Mar 2017 15:43:04 +0000 (12:43 -0300)
src/test/regress/expected/stats_ext.out		patch \| blob \| history
src/test/regress/expected/stats_ext_1.out	[deleted file]	patch \| blob \| history
src/test/regress/sql/stats_ext.sql		patch \| blob \| history