When updating reltuples after ANALYZE, just extrapolate from our sample.

author Tom Lane <tgl@sss.pgh.pa.us>

Tue, 13 Mar 2018 17:24:27 +0000 (13:24 -0400)

committer Tom Lane <tgl@sss.pgh.pa.us>

Tue, 13 Mar 2018 17:24:27 +0000 (13:24 -0400)
author Tom Lane <tgl@sss.pgh.pa.us>
Tue, 13 Mar 2018 17:24:27 +0000 (13:24 -0400)
committer Tom Lane <tgl@sss.pgh.pa.us>
Tue, 13 Mar 2018 17:24:27 +0000 (13:24 -0400)
diff --git a/contrib/pgstattuple/pgstatapprox.c b/contrib/pgstattuple/pgstatapprox.c

index 3cfbc08649042b46215f865c5029ca54bdce35d6..474c3bd517f30f61b430c0ab4b6124b8f00c3b9c 100644 (file)
--- a/contrib/pgstattuple/pgstatapprox.c
+++ b/contrib/pgstattuple/pgstatapprox.c
@@ -184,7 +184,7 @@ statapprox_heap(Relation rel, output_type *stat)
  
         stat->table_len = (uint64) nblocks * BLCKSZ;
  
-       stat->tuple_count = vac_estimate_reltuples(rel, false, nblocks, scanned,
+       stat->tuple_count = vac_estimate_reltuples(rel, nblocks, scanned,
                                                                                            stat->tuple_count + misc_count);
  
         /*
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c

index 5f21fcb5f4086d344a5711b3d8d0899610e6b148..ef93fb4d1722309e0f4c97b52a9650e882933a14 100644 (file)
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -1249,19 +1249,22 @@ acquire_sample_rows(Relation onerel, int elevel,
                 qsort((void *) rows, numrows, sizeof(HeapTuple), compare_rows);
  
         /*
-        * Estimate total numbers of rows in relation.  For live rows, use
-        * vac_estimate_reltuples; for dead rows, we have no source of old
-        * information, so we have to assume the density is the same in unseen
-        * pages as in the pages we scanned.
+        * Estimate total numbers of live and dead rows in relation, extrapolating
+        * on the assumption that the average tuple density in pages we didn't
+        * scan is the same as in the pages we did scan.  Since what we scanned is
+        * a random sample of the pages in the relation, this should be a good
+        * assumption.
          */
-       *totalrows = vac_estimate_reltuples(onerel, true,
-                                                                               totalblocks,
-                                                                               bs.m,
-                                                                               liverows);
         if (bs.m > 0)
+       {
+               *totalrows = floor((liverows / bs.m) * totalblocks + 0.5);
                 *totaldeadrows = floor((deadrows / bs.m) * totalblocks + 0.5);
+       }
         else
+       {
+               *totalrows = 0.0;
                 *totaldeadrows = 0.0;
+       }
  
         /*
          * Emit some interesting relation info
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c

index 7aca69a0ba0a85bb312cbce65008efe0d07bfb9a..b50c554c517677b9076a4e28fb7d67ff5d98f6c9 100644 (file)
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -766,16 +766,14 @@ vacuum_set_xid_limits(Relation rel,
   * vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples
   *
   *             If we scanned the whole relation then we should just use the count of
- *             live tuples seen; but if we did not, we should not trust the count
- *             unreservedly, especially not in VACUUM, which may have scanned a quite
- *             nonrandom subset of the table.  When we have only partial information,
- *             we take the old value of pg_class.reltuples as a measurement of the
+ *             live tuples seen; but if we did not, we should not blindly extrapolate
+ *             from that number, since VACUUM may have scanned a quite nonrandom
+ *             subset of the table.  When we have only partial information, we take
+ *             the old value of pg_class.reltuples as a measurement of the
   *             tuple density in the unscanned pages.
- *
- *             This routine is shared by VACUUM and ANALYZE.
   */
  double
-vac_estimate_reltuples(Relation relation, bool is_analyze,
+vac_estimate_reltuples(Relation relation,
                                            BlockNumber total_pages,
                                            BlockNumber scanned_pages,
                                            double scanned_tuples)
@@ -783,9 +781,8 @@ vac_estimate_reltuples(Relation relation, bool is_analyze,
         BlockNumber old_rel_pages = relation->rd_rel->relpages;
         double          old_rel_tuples = relation->rd_rel->reltuples;
         double          old_density;
-       double          new_density;
-       double          multiplier;
-       double          updated_density;
+       double          unscanned_pages;
+       double          total_tuples;
  
         /* If we did scan the whole table, just use the count as-is */
         if (scanned_pages >= total_pages)
@@ -809,31 +806,14 @@ vac_estimate_reltuples(Relation relation, bool is_analyze,
  
         /*
          * Okay, we've covered the corner cases.  The normal calculation is to
-        * convert the old measurement to a density (tuples per page), then update
-        * the density using an exponential-moving-average approach, and finally
-        * compute reltuples as updated_density * total_pages.
-        *
-        * For ANALYZE, the moving average multiplier is just the fraction of the
-        * table's pages we scanned.  This is equivalent to assuming that the
-        * tuple density in the unscanned pages didn't change.  Of course, it
-        * probably did, if the new density measurement is different. But over
-        * repeated cycles, the value of reltuples will converge towards the
-        * correct value, if repeated measurements show the same new density.
-        *
-        * For VACUUM, the situation is a bit different: we have looked at a
-        * nonrandom sample of pages, but we know for certain that the pages we
-        * didn't look at are precisely the ones that haven't changed lately.
-        * Thus, there is a reasonable argument for doing exactly the same thing
-        * as for the ANALYZE case, that is use the old density measurement as the
-        * value for the unscanned pages.
-        *
-        * This logic could probably use further refinement.
+        * convert the old measurement to a density (tuples per page), then
+        * estimate the number of tuples in the unscanned pages using that figure,
+        * and finally add on the number of tuples in the scanned pages.
          */
         old_density = old_rel_tuples / old_rel_pages;
-       new_density = scanned_tuples / scanned_pages;
-       multiplier = (double) scanned_pages / (double) total_pages;
-       updated_density = old_density + (new_density - old_density) * multiplier;
-       return floor(updated_density * total_pages + 0.5);
+       unscanned_pages = (double) total_pages - (double) scanned_pages;
+       total_tuples = old_density * unscanned_pages + scanned_tuples;
+       return floor(total_tuples + 0.5);
  }
  
  
diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c

index cf7f5e116295eb171c41f780052f0bd5b16feaed..9ac84e8293a533e74dff387bf8cc3437bb5e677c 100644 (file)
--- a/src/backend/commands/vacuumlazy.c
+++ b/src/backend/commands/vacuumlazy.c
@@ -1286,7 +1286,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats,
         vacrelstats->new_dead_tuples = nkeep;
  
         /* now we can compute the new value for pg_class.reltuples */
-       vacrelstats->new_rel_tuples = vac_estimate_reltuples(onerel, false,
+       vacrelstats->new_rel_tuples = vac_estimate_reltuples(onerel,
                                                                                                                  nblocks,
                                                                                                                  vacrelstats->tupcount_pages,
                                                                                                                  num_tuples);
diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h

index 797b6dfec8d152c6326bc6297d689d4cb33d9583..85d472f0a54a60812d20dee23082fd3687a78b78 100644 (file)
--- a/src/include/commands/vacuum.h
+++ b/src/include/commands/vacuum.h
@@ -162,7 +162,7 @@ extern void vacuum(int options, List *relations, VacuumParams *params,
  extern void vac_open_indexes(Relation relation, LOCKMODE lockmode,
                                  int *nindexes, Relation **Irel);
  extern void vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode);
-extern double vac_estimate_reltuples(Relation relation, bool is_analyze,
+extern double vac_estimate_reltuples(Relation relation,
                                            BlockNumber total_pages,
                                            BlockNumber scanned_pages,
                                            double scanned_tuples);
author	Tom Lane <tgl@sss.pgh.pa.us>
	Tue, 13 Mar 2018 17:24:27 +0000 (13:24 -0400)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Tue, 13 Mar 2018 17:24:27 +0000 (13:24 -0400)
contrib/pgstattuple/pgstatapprox.c		patch \| blob \| history
src/backend/commands/analyze.c		patch \| blob \| history
src/backend/commands/vacuum.c		patch \| blob \| history
src/backend/commands/vacuumlazy.c		patch \| blob \| history
src/include/commands/vacuum.h		patch \| blob \| history