When updating reltuples after ANALYZE, just extrapolate from our sample.

author Tom Lane <tgl@sss.pgh.pa.us>

Tue, 13 Mar 2018 17:24:27 +0000 (13:24 -0400)

committer Tom Lane <tgl@sss.pgh.pa.us>

Tue, 13 Mar 2018 17:24:27 +0000 (13:24 -0400)
author Tom Lane <tgl@sss.pgh.pa.us>
Tue, 13 Mar 2018 17:24:27 +0000 (13:24 -0400)
committer Tom Lane <tgl@sss.pgh.pa.us>
Tue, 13 Mar 2018 17:24:27 +0000 (13:24 -0400)
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c

index 848263d3900f6d93c792369d0bbd353aa9b0d0d8..925d06ed8ceeb6920c5b2b25ddb4122fae443442 100644 (file)
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -1215,19 +1215,22 @@ acquire_sample_rows(Relation onerel, int elevel,
                 qsort((void *) rows, numrows, sizeof(HeapTuple), compare_rows);
  
         /*
-        * Estimate total numbers of rows in relation.  For live rows, use
-        * vac_estimate_reltuples; for dead rows, we have no source of old
-        * information, so we have to assume the density is the same in unseen
-        * pages as in the pages we scanned.
+        * Estimate total numbers of live and dead rows in relation, extrapolating
+        * on the assumption that the average tuple density in pages we didn't
+        * scan is the same as in the pages we did scan.  Since what we scanned is
+        * a random sample of the pages in the relation, this should be a good
+        * assumption.
          */
-       *totalrows = vac_estimate_reltuples(onerel, true,
-                                                                               totalblocks,
-                                                                               bs.m,
-                                                                               liverows);
         if (bs.m > 0)
+       {
+               *totalrows = floor((liverows / bs.m) * totalblocks + 0.5);
                 *totaldeadrows = floor((deadrows / bs.m) * totalblocks + 0.5);
+       }
         else
+       {
+               *totalrows = 0.0;
                 *totaldeadrows = 0.0;
+       }
  
         /*
          * Emit some interesting relation info
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c

index d533cef6a6c19c5efc154e1f1d616efe3a81013a..dd93ecb60460c8cefc5a7b37fe92d804a514dafc 100644 (file)
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -685,13 +685,13 @@ vacuum_set_xid_limits(Relation rel,
   * vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples
   *
   *             If we scanned the whole relation then we should just use the count of
- *             live tuples seen; but if we did not, we should not trust the count
- *             unreservedly, especially not in VACUUM, which may have scanned a quite
- *             nonrandom subset of the table.  When we have only partial information,
- *             we take the old value of pg_class.reltuples as a measurement of the
+ *             live tuples seen; but if we did not, we should not blindly extrapolate
+ *             from that number, since VACUUM may have scanned a quite nonrandom
+ *             subset of the table.  When we have only partial information, we take
+ *             the old value of pg_class.reltuples as a measurement of the
   *             tuple density in the unscanned pages.
   *
- *             This routine is shared by VACUUM and ANALYZE.
+ *             The is_analyze argument is historical.
   */
  double
  vac_estimate_reltuples(Relation relation, bool is_analyze,
@@ -702,9 +702,8 @@ vac_estimate_reltuples(Relation relation, bool is_analyze,
         BlockNumber old_rel_pages = relation->rd_rel->relpages;
         double          old_rel_tuples = relation->rd_rel->reltuples;
         double          old_density;
-       double          new_density;
-       double          multiplier;
-       double          updated_density;
+       double          unscanned_pages;
+       double          total_tuples;
  
         /* If we did scan the whole table, just use the count as-is */
         if (scanned_pages >= total_pages)
@@ -728,31 +727,14 @@ vac_estimate_reltuples(Relation relation, bool is_analyze,
  
         /*
          * Okay, we've covered the corner cases.  The normal calculation is to
-        * convert the old measurement to a density (tuples per page), then update
-        * the density using an exponential-moving-average approach, and finally
-        * compute reltuples as updated_density * total_pages.
-        *
-        * For ANALYZE, the moving average multiplier is just the fraction of the
-        * table's pages we scanned.  This is equivalent to assuming that the
-        * tuple density in the unscanned pages didn't change.  Of course, it
-        * probably did, if the new density measurement is different. But over
-        * repeated cycles, the value of reltuples will converge towards the
-        * correct value, if repeated measurements show the same new density.
-        *
-        * For VACUUM, the situation is a bit different: we have looked at a
-        * nonrandom sample of pages, but we know for certain that the pages we
-        * didn't look at are precisely the ones that haven't changed lately.
-        * Thus, there is a reasonable argument for doing exactly the same thing
-        * as for the ANALYZE case, that is use the old density measurement as the
-        * value for the unscanned pages.
-        *
-        * This logic could probably use further refinement.
+        * convert the old measurement to a density (tuples per page), then
+        * estimate the number of tuples in the unscanned pages using that figure,
+        * and finally add on the number of tuples in the scanned pages.
          */
         old_density = old_rel_tuples / old_rel_pages;
-       new_density = scanned_tuples / scanned_pages;
-       multiplier = (double) scanned_pages / (double) total_pages;
-       updated_density = old_density + (new_density - old_density) * multiplier;
-       return floor(updated_density * total_pages + 0.5);
+       unscanned_pages = (double) total_pages - (double) scanned_pages;
+       total_tuples = old_density * unscanned_pages + scanned_tuples;
+       return floor(total_tuples + 0.5);
  }
author	Tom Lane <tgl@sss.pgh.pa.us>
	Tue, 13 Mar 2018 17:24:27 +0000 (13:24 -0400)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Tue, 13 Mar 2018 17:24:27 +0000 (13:24 -0400)
src/backend/commands/analyze.c		patch \| blob \| history
src/backend/commands/vacuum.c		patch \| blob \| history