qsort((void *) rows, numrows, sizeof(HeapTuple), compare_rows);
/*
- * Estimate total numbers of rows in relation. For live rows, use
- * vac_estimate_reltuples; for dead rows, we have no source of old
- * information, so we have to assume the density is the same in unseen
- * pages as in the pages we scanned.
+ * Estimate total numbers of live and dead rows in relation, extrapolating
+ * on the assumption that the average tuple density in pages we didn't
+ * scan is the same as in the pages we did scan. Since what we scanned is
+ * a random sample of the pages in the relation, this should be a good
+ * assumption.
*/
- *totalrows = vac_estimate_reltuples(onerel, true,
- totalblocks,
- bs.m,
- liverows);
if (bs.m > 0)
+ {
+ *totalrows = floor((liverows / bs.m) * totalblocks + 0.5);
*totaldeadrows = floor((deadrows / bs.m) * totalblocks + 0.5);
+ }
else
+ {
+ *totalrows = 0.0;
*totaldeadrows = 0.0;
+ }
/*
* Emit some interesting relation info
* vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples
*
* If we scanned the whole relation then we should just use the count of
- * live tuples seen; but if we did not, we should not trust the count
- * unreservedly, especially not in VACUUM, which may have scanned a quite
- * nonrandom subset of the table. When we have only partial information,
- * we take the old value of pg_class.reltuples as a measurement of the
+ * live tuples seen; but if we did not, we should not blindly extrapolate
+ * from that number, since VACUUM may have scanned a quite nonrandom
+ * subset of the table. When we have only partial information, we take
+ * the old value of pg_class.reltuples as a measurement of the
* tuple density in the unscanned pages.
- *
- * This routine is shared by VACUUM and ANALYZE.
*/
double
-vac_estimate_reltuples(Relation relation, bool is_analyze,
+vac_estimate_reltuples(Relation relation,
BlockNumber total_pages,
BlockNumber scanned_pages,
double scanned_tuples)
BlockNumber old_rel_pages = relation->rd_rel->relpages;
double old_rel_tuples = relation->rd_rel->reltuples;
double old_density;
- double new_density;
- double multiplier;
- double updated_density;
+ double unscanned_pages;
+ double total_tuples;
/* If we did scan the whole table, just use the count as-is */
if (scanned_pages >= total_pages)
/*
* Okay, we've covered the corner cases. The normal calculation is to
- * convert the old measurement to a density (tuples per page), then update
- * the density using an exponential-moving-average approach, and finally
- * compute reltuples as updated_density * total_pages.
- *
- * For ANALYZE, the moving average multiplier is just the fraction of the
- * table's pages we scanned. This is equivalent to assuming that the
- * tuple density in the unscanned pages didn't change. Of course, it
- * probably did, if the new density measurement is different. But over
- * repeated cycles, the value of reltuples will converge towards the
- * correct value, if repeated measurements show the same new density.
- *
- * For VACUUM, the situation is a bit different: we have looked at a
- * nonrandom sample of pages, but we know for certain that the pages we
- * didn't look at are precisely the ones that haven't changed lately.
- * Thus, there is a reasonable argument for doing exactly the same thing
- * as for the ANALYZE case, that is use the old density measurement as the
- * value for the unscanned pages.
- *
- * This logic could probably use further refinement.
+ * convert the old measurement to a density (tuples per page), then
+ * estimate the number of tuples in the unscanned pages using that figure,
+ * and finally add on the number of tuples in the scanned pages.
*/
old_density = old_rel_tuples / old_rel_pages;
- new_density = scanned_tuples / scanned_pages;
- multiplier = (double) scanned_pages / (double) total_pages;
- updated_density = old_density + (new_density - old_density) * multiplier;
- return floor(updated_density * total_pages + 0.5);
+ unscanned_pages = (double) total_pages - (double) scanned_pages;
+ total_tuples = old_density * unscanned_pages + scanned_tuples;
+ return floor(total_tuples + 0.5);
}