Fix VACUUM so that it always updates pg_class.reltuples/relpages.

author Tom Lane <tgl@sss.pgh.pa.us>

Mon, 30 May 2011 21:05:26 +0000 (17:05 -0400)

committer Tom Lane <tgl@sss.pgh.pa.us>

Mon, 30 May 2011 21:06:52 +0000 (17:06 -0400)
author Tom Lane <tgl@sss.pgh.pa.us>
Mon, 30 May 2011 21:05:26 +0000 (17:05 -0400)
committer Tom Lane <tgl@sss.pgh.pa.us>
Mon, 30 May 2011 21:06:52 +0000 (17:06 -0400)
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c

index 0568a1bcf86281a9b1086d343e7027557295065c..fa84989fc6fa8be90d4eecb9c33e94a232d79880 100644 (file)
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -84,8 +84,7 @@ static MemoryContext anl_context = NULL;
  static BufferAccessStrategy vac_strategy;
  
  
-static void do_analyze_rel(Relation onerel, VacuumStmt *vacstmt,
-                          bool update_reltuples, bool inh);
+static void do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, bool inh);
  static void BlockSampler_Init(BlockSampler bs, BlockNumber nblocks,
                                   int samplesize);
  static bool BlockSampler_HasMore(BlockSampler bs);
@@ -115,18 +114,9 @@ static bool std_typanalyze(VacAttrStats *stats);
  
  /*
   *     analyze_rel() -- analyze one relation
- *
- * If update_reltuples is true, we update reltuples and relpages columns
- * in pg_class.  Caller should pass false if we're part of VACUUM ANALYZE,
- * and the VACUUM didn't skip any pages.  We only have an approximate count,
- * so we don't want to overwrite the accurate values already inserted by the
- * VACUUM in that case.  VACUUM always scans all indexes, however, so the
- * pg_class entries for indexes are never updated if we're part of VACUUM
- * ANALYZE.
   */
  void
-analyze_rel(Oid relid, VacuumStmt *vacstmt,
-                       BufferAccessStrategy bstrategy, bool update_reltuples)
+analyze_rel(Oid relid, VacuumStmt *vacstmt, BufferAccessStrategy bstrategy)
  {
         Relation        onerel;
  
@@ -238,13 +228,13 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt,
         /*
          * Do the normal non-recursive ANALYZE.
          */
-       do_analyze_rel(onerel, vacstmt, update_reltuples, false);
+       do_analyze_rel(onerel, vacstmt, false);
  
         /*
          * If there are child tables, do recursive ANALYZE.
          */
         if (onerel->rd_rel->relhassubclass)
-               do_analyze_rel(onerel, vacstmt, false, true);
+               do_analyze_rel(onerel, vacstmt, true);
  
         /*
          * Close source relation now, but keep lock so that no one deletes it
@@ -267,8 +257,7 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt,
   *     do_analyze_rel() -- analyze one relation, recursively or not
   */
  static void
-do_analyze_rel(Relation onerel, VacuumStmt *vacstmt,
-                          bool update_reltuples, bool inh)
+do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, bool inh)
  {
         int                     attr_cnt,
                                 tcnt,
@@ -437,9 +426,9 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt,
         }
  
         /*
-        * Quit if no analyzable columns and no pg_class update needed.
+        * Quit if no analyzable columns.
          */
-       if (attr_cnt <= 0 && !analyzableindex && !update_reltuples)
+       if (attr_cnt <= 0 && !analyzableindex)
                 goto cleanup;
  
         /*
@@ -549,10 +538,10 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt,
         }
  
         /*
-        * Update pages/tuples stats in pg_class, but not if we're inside a VACUUM
-        * that got a more precise number.
+        * Update pages/tuples stats in pg_class ... but not if we're doing
+        * inherited stats.
          */
-       if (update_reltuples)
+       if (!inh)
                 vac_update_relstats(onerel,
                                                         RelationGetNumberOfBlocks(onerel),
                                                         totalrows, hasindex, InvalidTransactionId);
@@ -562,7 +551,7 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt,
          * VACUUM ANALYZE, don't overwrite the accurate count already inserted by
          * VACUUM.
          */
-       if (!(vacstmt->options & VACOPT_VACUUM))
+       if (!inh && !(vacstmt->options & VACOPT_VACUUM))
         {
                 for (ind = 0; ind < nindexes; ind++)
                 {
@@ -577,13 +566,12 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt,
         }
  
         /*
-        * Report ANALYZE to the stats collector, too; likewise, tell it to adopt
-        * these numbers only if we're not inside a VACUUM that got a better
-        * number.      However, a call with inh = true shouldn't reset the stats.
+        * Report ANALYZE to the stats collector, too.  However, if doing
+        * inherited stats we shouldn't report, because the stats collector only
+        * tracks per-table stats.
          */
         if (!inh)
-               pgstat_report_analyze(onerel, update_reltuples,
-                                                         totalrows, totaldeadrows);
+               pgstat_report_analyze(onerel, totalrows, totaldeadrows);
  
         /* We skip to here if there were no analyzable columns */
  cleanup:
@@ -1243,18 +1231,19 @@ acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows,
                 qsort((void *) rows, numrows, sizeof(HeapTuple), compare_rows);
  
         /*
-        * Estimate total numbers of rows in relation.
+        * Estimate total numbers of rows in relation.  For live rows, use
+        * vac_estimate_reltuples; for dead rows, we have no source of old
+        * information, so we have to assume the density is the same in unseen
+        * pages as in the pages we scanned.
          */
+       *totalrows = vac_estimate_reltuples(onerel, true,
+                                                                               totalblocks,
+                                                                               bs.m,
+                                                                               liverows);
         if (bs.m > 0)
-       {
-               *totalrows = floor((liverows * totalblocks) / bs.m + 0.5);
-               *totaldeadrows = floor((deadrows * totalblocks) / bs.m + 0.5);
-       }
+               *totaldeadrows = floor((deadrows / bs.m) * totalblocks + 0.5);
         else
-       {
-               *totalrows = 0.0;
                 *totaldeadrows = 0.0;
-       }
  
         /*
          * Emit some interesting relation info
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c

index 9606569617afafe16c55752183cb2a6de89bcad1..224c34f6e7808f79e704264d8036d045c8a2aeb2 100644 (file)
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -20,6 +20,8 @@
   */
  #include "postgres.h"
  
+#include <math.h>
+
  #include "access/clog.h"
  #include "access/genam.h"
  #include "access/heapam.h"
@@ -62,7 +64,7 @@ static BufferAccessStrategy vac_strategy;
  static List *get_rel_oids(Oid relid, const RangeVar *vacrel);
  static void vac_truncate_clog(TransactionId frozenXID);
  static bool vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast,
-                  bool for_wraparound, bool *scanned_all);
+                  bool for_wraparound);
  
  
  /*
@@ -219,12 +221,10 @@ vacuum(VacuumStmt *vacstmt, Oid relid, bool do_toast,
                 foreach(cur, relations)
                 {
                         Oid                     relid = lfirst_oid(cur);
-                       bool            scanned_all = false;
  
                         if (vacstmt->options & VACOPT_VACUUM)
                         {
-                               if (!vacuum_rel(relid, vacstmt, do_toast, for_wraparound,
-                                                               &scanned_all))
+                               if (!vacuum_rel(relid, vacstmt, do_toast, for_wraparound))
                                         continue;
                         }
  
@@ -241,7 +241,7 @@ vacuum(VacuumStmt *vacstmt, Oid relid, bool do_toast,
                                         PushActiveSnapshot(GetTransactionSnapshot());
                                 }
  
-                               analyze_rel(relid, vacstmt, vac_strategy, !scanned_all);
+                               analyze_rel(relid, vacstmt, vac_strategy);
  
                                 if (use_own_xacts)
                                 {
@@ -453,6 +453,79 @@ vacuum_set_xid_limits(int freeze_min_age,
  }
  
  
+/*
+ * vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples
+ *
+ *             If we scanned the whole relation then we should just use the count of
+ *             live tuples seen; but if we did not, we should not trust the count
+ *             unreservedly, especially not in VACUUM, which may have scanned a quite
+ *             nonrandom subset of the table.  When we have only partial information,
+ *             we take the old value of pg_class.reltuples as a measurement of the
+ *             tuple density in the unscanned pages.
+ *
+ *             This routine is shared by VACUUM and ANALYZE.
+ */
+double
+vac_estimate_reltuples(Relation relation, bool is_analyze,
+                                          BlockNumber total_pages,
+                                          BlockNumber scanned_pages,
+                                          double scanned_tuples)
+{
+       BlockNumber     old_rel_pages = relation->rd_rel->relpages;
+       double          old_rel_tuples = relation->rd_rel->reltuples;
+       double          old_density;
+       double          new_density;
+       double          multiplier;
+       double          updated_density;
+
+       /* If we did scan the whole table, just use the count as-is */
+       if (scanned_pages >= total_pages)
+               return scanned_tuples;
+
+       /*
+        * If scanned_pages is zero but total_pages isn't, keep the existing
+        * value of reltuples.
+        */
+       if (scanned_pages == 0)
+               return old_rel_tuples;
+
+       /*
+        * If old value of relpages is zero, old density is indeterminate; we
+        * can't do much except scale up scanned_tuples to match total_pages.
+        */
+       if (old_rel_pages == 0)
+               return floor((scanned_tuples / scanned_pages) * total_pages + 0.5);
+
+       /*
+        * Okay, we've covered the corner cases.  The normal calculation is to
+        * convert the old measurement to a density (tuples per page), then
+        * update the density using an exponential-moving-average approach,
+        * and finally compute reltuples as updated_density * total_pages.
+        *
+        * For ANALYZE, the moving average multiplier is just the fraction of
+        * the table's pages we scanned.  This is equivalent to assuming
+        * that the tuple density in the unscanned pages didn't change.  Of
+        * course, it probably did, if the new density measurement is different.
+        * But over repeated cycles, the value of reltuples will converge towards
+        * the correct value, if repeated measurements show the same new density.
+        *
+        * For VACUUM, the situation is a bit different: we have looked at a
+        * nonrandom sample of pages, but we know for certain that the pages we
+        * didn't look at are precisely the ones that haven't changed lately.
+        * Thus, there is a reasonable argument for doing exactly the same thing
+        * as for the ANALYZE case, that is use the old density measurement as
+        * the value for the unscanned pages.
+        *
+        * This logic could probably use further refinement.
+        */
+       old_density = old_rel_tuples / old_rel_pages;
+       new_density = scanned_tuples / scanned_pages;
+       multiplier = (double) scanned_pages / (double) total_pages;
+       updated_density = old_density + (new_density - old_density) * multiplier;
+       return floor(updated_density * total_pages + 0.5);
+}
+
+
  /*
   *     vac_update_relstats() -- update statistics for one relation
   *
@@ -480,7 +553,7 @@ vacuum_set_xid_limits(int freeze_min_age,
   *             somebody vacuuming pg_class might think they could delete a tuple
   *             marked with xmin = our xid.
   *
- *             This routine is shared by VACUUM and stand-alone ANALYZE.
+ *             This routine is shared by VACUUM and ANALYZE.
   */
  void
  vac_update_relstats(Relation relation,
@@ -758,14 +831,10 @@ vac_truncate_clog(TransactionId frozenXID)
   *             many small transactions.  Otherwise, two-phase locking would require
   *             us to lock the entire database during one pass of the vacuum cleaner.
   *
- *             We'll return true in *scanned_all if the vacuum scanned all heap
- *             pages, and updated pg_class.
- *
   *             At entry and exit, we are not inside a transaction.
   */
  static bool
-vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound,
-                  bool *scanned_all)
+vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound)
  {
         LOCKMODE        lmode;
         Relation        onerel;
@@ -775,9 +844,6 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound,
         int                     save_sec_context;
         int                     save_nestlevel;
  
-       if (scanned_all)
-               *scanned_all = false;
-
         /* Begin a transaction for vacuuming this relation */
         StartTransactionCommand();
  
@@ -971,7 +1037,7 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound,
                                         vacstmt->freeze_min_age, vacstmt->freeze_table_age);
         }
         else
-               lazy_vacuum_rel(onerel, vacstmt, vac_strategy, scanned_all);
+               lazy_vacuum_rel(onerel, vacstmt, vac_strategy);
  
         /* Roll back any GUC changes executed by index functions */
         AtEOXact_GUC(false, save_nestlevel);
@@ -997,7 +1063,7 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound,
          * totally unimportant for toast relations.
          */
         if (toast_relid != InvalidOid)
-               vacuum_rel(toast_relid, vacstmt, false, for_wraparound, NULL);
+               vacuum_rel(toast_relid, vacstmt, false, for_wraparound);
  
         /*
          * Now release the session-level lock on the master table.
diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c

index 9393fa0727aaad7508e1163623322b4066412257..ce5fa180662ed1474849b8e94817cff8ba149dd6 100644 (file)
--- a/src/backend/commands/vacuumlazy.c
+++ b/src/backend/commands/vacuumlazy.c
@@ -77,17 +77,18 @@
   * Before we consider skipping a page that's marked as clean in
   * visibility map, we must've seen at least this many clean pages.
   */
-#define SKIP_PAGES_THRESHOLD   32
+#define SKIP_PAGES_THRESHOLD   ((BlockNumber) 32)
  
  typedef struct LVRelStats
  {
         /* hasindex = true means two-pass strategy; false means one-pass */
         bool            hasindex;
-       bool            scanned_all;    /* have we scanned all pages (this far)? */
         /* Overall statistics about rel */
-       BlockNumber rel_pages;
+       BlockNumber rel_pages;          /* total number of pages */
+       BlockNumber scanned_pages;      /* number of pages we examined */
+       double          scanned_tuples; /* counts only tuples on scanned pages */
         double          old_rel_tuples; /* previous value of pg_class.reltuples */
-       double          rel_tuples;             /* counts only tuples on scanned pages */
+       double          new_rel_tuples; /* new estimated total # of tuples */
         BlockNumber pages_removed;
         double          tuples_deleted;
         BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
@@ -143,7 +144,7 @@ static int  vac_cmp_itemptr(const void *left, const void *right);
   */
  void
  lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
-                               BufferAccessStrategy bstrategy, bool *scanned_all)
+                               BufferAccessStrategy bstrategy)
  {
         LVRelStats *vacrelstats;
         Relation   *Irel;
@@ -175,7 +176,6 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
  
         vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats));
  
-       vacrelstats->scanned_all = true;        /* will be cleared if we skip a page */
         vacrelstats->old_rel_tuples = onerel->rd_rel->reltuples;
         vacrelstats->num_index_scans = 0;
  
@@ -205,24 +205,20 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
         FreeSpaceMapVacuum(onerel);
  
         /*
-        * Update statistics in pg_class.  But only if we didn't skip any pages;
-        * the tuple count only includes tuples from the pages we've visited, and
-        * we haven't frozen tuples in unvisited pages either.  The page count is
-        * accurate in any case, but because we use the reltuples / relpages ratio
-        * in the planner, it's better to not update relpages either if we can't
-        * update reltuples.
+        * Update statistics in pg_class.  But don't change relfrozenxid if we
+        * skipped any pages.
          */
-       if (vacrelstats->scanned_all)
-               vac_update_relstats(onerel,
-                                                       vacrelstats->rel_pages, vacrelstats->rel_tuples,
-                                                       vacrelstats->hasindex,
-                                                       FreezeLimit);
+       vac_update_relstats(onerel,
+                                               vacrelstats->rel_pages, vacrelstats->new_rel_tuples,
+                                               vacrelstats->hasindex,
+                                               (vacrelstats->scanned_pages < vacrelstats->rel_pages) ?
+                                               InvalidTransactionId :
+                                               FreezeLimit);
  
         /* report results to the stats collector, too */
         pgstat_report_vacuum(RelationGetRelid(onerel),
                                                  onerel->rd_rel->relisshared,
-                                                vacrelstats->scanned_all,
-                                                vacrelstats->rel_tuples);
+                                                vacrelstats->new_rel_tuples);
  
         /* and log the action if appropriate */
         if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration >= 0)
@@ -239,13 +235,12 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
                                                         get_namespace_name(RelationGetNamespace(onerel)),
                                                         RelationGetRelationName(onerel),
                                                         vacrelstats->num_index_scans,
-                                                 vacrelstats->pages_removed, vacrelstats->rel_pages,
-                                               vacrelstats->tuples_deleted, vacrelstats->rel_tuples,
+                                                       vacrelstats->pages_removed,
+                                                       vacrelstats->rel_pages,
+                                                       vacrelstats->tuples_deleted,
+                                                       vacrelstats->new_rel_tuples,
                                                         pg_rusage_show(&ru0))));
         }
-
-       if (scanned_all)
-               *scanned_all = vacrelstats->scanned_all;
  }
  
  /*
@@ -301,7 +296,6 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
         HeapTupleData tuple;
         char       *relname;
         BlockNumber empty_pages,
-                               scanned_pages,
                                 vacuumed_pages;
         double          num_tuples,
                                 tups_vacuumed,
@@ -311,7 +305,8 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
         int                     i;
         PGRUsage        ru0;
         Buffer          vmbuffer = InvalidBuffer;
-       BlockNumber all_visible_streak;
+       BlockNumber next_not_all_visible_block;
+       bool            skipping_all_visible_blocks;
  
         pg_rusage_init(&ru0);
  
@@ -321,7 +316,7 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
                                         get_namespace_name(RelationGetNamespace(onerel)),
                                         relname)));
  
-       empty_pages = vacuumed_pages = scanned_pages = 0;
+       empty_pages = vacuumed_pages = 0;
         num_tuples = tups_vacuumed = nkeep = nunused = 0;
  
         indstats = (IndexBulkDeleteResult **)
@@ -329,12 +324,47 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
  
         nblocks = RelationGetNumberOfBlocks(onerel);
         vacrelstats->rel_pages = nblocks;
+       vacrelstats->scanned_pages = 0;
         vacrelstats->nonempty_pages = 0;
         vacrelstats->latestRemovedXid = InvalidTransactionId;
  
         lazy_space_alloc(vacrelstats, nblocks);
  
-       all_visible_streak = 0;
+       /*
+        * We want to skip pages that don't require vacuuming according to the
+        * visibility map, but only when we can skip at least SKIP_PAGES_THRESHOLD
+        * consecutive pages.  Since we're reading sequentially, the OS should be
+        * doing readahead for us, so there's no gain in skipping a page now and
+        * then; that's likely to disable readahead and so be counterproductive.
+        * Also, skipping even a single page means that we can't update
+        * relfrozenxid, so we only want to do it if we can skip a goodly number
+        * of pages.
+        *
+        * Before entering the main loop, establish the invariant that
+        * next_not_all_visible_block is the next block number >= blkno that's
+        * not all-visible according to the visibility map, or nblocks if there's
+        * no such block.  Also, we set up the skipping_all_visible_blocks flag,
+        * which is needed because we need hysteresis in the decision: once we've
+        * started skipping blocks, we may as well skip everything up to the next
+        * not-all-visible block.
+        *
+        * Note: if scan_all is true, we won't actually skip any pages; but we
+        * maintain next_not_all_visible_block anyway, so as to set up the
+        * all_visible_according_to_vm flag correctly for each page.
+        */
+       for (next_not_all_visible_block = 0;
+                next_not_all_visible_block < nblocks;
+                next_not_all_visible_block++)
+       {
+               if (!visibilitymap_test(onerel, next_not_all_visible_block, &vmbuffer))
+                       break;
+               vacuum_delay_point();
+       }
+       if (next_not_all_visible_block >= SKIP_PAGES_THRESHOLD)
+               skipping_all_visible_blocks = true;
+       else
+               skipping_all_visible_blocks = false;
+
         for (blkno = 0; blkno < nblocks; blkno++)
         {
                 Buffer          buf;
@@ -347,41 +377,45 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
                 OffsetNumber frozen[MaxOffsetNumber];
                 int                     nfrozen;
                 Size            freespace;
-               bool            all_visible_according_to_vm = false;
+               bool            all_visible_according_to_vm;
                 bool            all_visible;
                 bool            has_dead_tuples;
  
-               /*
-                * Skip pages that don't require vacuuming according to the visibility
-                * map. But only if we've seen a streak of at least
-                * SKIP_PAGES_THRESHOLD pages marked as clean. Since we're reading
-                * sequentially, the OS should be doing readahead for us and there's
-                * no gain in skipping a page now and then. You need a longer run of
-                * consecutive skipped pages before it's worthwhile. Also, skipping
-                * even a single page means that we can't update relfrozenxid or
-                * reltuples, so we only want to do it if there's a good chance to
-                * skip a goodly number of pages.
-                */
-               if (!scan_all)
+               if (blkno == next_not_all_visible_block)
                 {
-                       all_visible_according_to_vm =
-                               visibilitymap_test(onerel, blkno, &vmbuffer);
-                       if (all_visible_according_to_vm)
+                       /* Time to advance next_not_all_visible_block */
+                       for (next_not_all_visible_block++;
+                                next_not_all_visible_block < nblocks;
+                                next_not_all_visible_block++)
                         {
-                               all_visible_streak++;
-                               if (all_visible_streak >= SKIP_PAGES_THRESHOLD)
-                               {
-                                       vacrelstats->scanned_all = false;
-                                       continue;
-                               }
+                               if (!visibilitymap_test(onerel, next_not_all_visible_block,
+                                                                               &vmbuffer))
+                                       break;
+                               vacuum_delay_point();
                         }
+
+                       /*
+                        * We know we can't skip the current block.  But set up
+                        * skipping_all_visible_blocks to do the right thing at the
+                        * following blocks.
+                        */
+                       if (next_not_all_visible_block - blkno > SKIP_PAGES_THRESHOLD)
+                               skipping_all_visible_blocks = true;
                         else
-                               all_visible_streak = 0;
+                               skipping_all_visible_blocks = false;
+                       all_visible_according_to_vm = false;
+               }
+               else
+               {
+                       /* Current block is all-visible */
+                       if (skipping_all_visible_blocks && !scan_all)
+                               continue;
+                       all_visible_according_to_vm = true;
                 }
  
                 vacuum_delay_point();
  
-               scanned_pages++;
+               vacrelstats->scanned_pages++;
  
                 /*
                  * If we are close to overrunning the available space for dead-tuple
@@ -764,9 +798,15 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
         }
  
         /* save stats for use later */
-       vacrelstats->rel_tuples = num_tuples;
+       vacrelstats->scanned_tuples = num_tuples;
         vacrelstats->tuples_deleted = tups_vacuumed;
  
+       /* now we can compute the new value for pg_class.reltuples */
+       vacrelstats->new_rel_tuples = vac_estimate_reltuples(onerel, false,
+                                                                                                                nblocks,
+                                                                                                                vacrelstats->scanned_pages,
+                                                                                                                num_tuples);
+
         /* If any tuples need to be deleted, perform final vacuum cycle */
         /* XXX put a threshold on min number of tuples here? */
         if (vacrelstats->num_dead_tuples > 0)
@@ -805,7 +845,8 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
         ereport(elevel,
                         (errmsg("\"%s\": found %.0f removable, %.0f nonremovable row versions in %u out of %u pages",
                                         RelationGetRelationName(onerel),
-                                       tups_vacuumed, num_tuples, scanned_pages, nblocks),
+                                       tups_vacuumed, num_tuples,
+                                       vacrelstats->scanned_pages, nblocks),
                          errdetail("%.0f dead row versions cannot be removed yet.\n"
                                            "There were %.0f unused item pointers.\n"
                                            "%u pages are entirely empty.\n"
@@ -977,10 +1018,9 @@ lazy_cleanup_index(Relation indrel,
  
         ivinfo.index = indrel;
         ivinfo.analyze_only = false;
-       ivinfo.estimated_count = !vacrelstats->scanned_all;
+       ivinfo.estimated_count = (vacrelstats->scanned_pages < vacrelstats->rel_pages);
         ivinfo.message_level = elevel;
-       /* use rel_tuples only if we scanned all pages, else fall back */
-       ivinfo.num_heap_tuples = vacrelstats->scanned_all ? vacrelstats->rel_tuples : vacrelstats->old_rel_tuples;
+       ivinfo.num_heap_tuples = vacrelstats->new_rel_tuples;
         ivinfo.strategy = vac_strategy;
  
         stats = index_vacuum_cleanup(&ivinfo, stats);
@@ -1041,8 +1081,13 @@ lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats)
         new_rel_pages = RelationGetNumberOfBlocks(onerel);
         if (new_rel_pages != old_rel_pages)
         {
-               /* might as well use the latest news when we update pg_class stats */
-               vacrelstats->rel_pages = new_rel_pages;
+               /*
+                * Note: we intentionally don't update vacrelstats->rel_pages with
+                * the new rel size here.  If we did, it would amount to assuming that
+                * the new pages are empty, which is unlikely.  Leaving the numbers
+                * alone amounts to assuming that the new pages have the same tuple
+                * density as existing ones, which is less unlikely.
+                */
                 UnlockRelation(onerel, AccessExclusiveLock);
                 return;
         }
@@ -1076,7 +1121,11 @@ lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats)
          */
         UnlockRelation(onerel, AccessExclusiveLock);
  
-       /* update statistics */
+       /*
+        * Update statistics.  Here, it *is* correct to adjust rel_pages without
+        * also touching reltuples, since the tuple count wasn't changed by the
+        * truncation.
+        */
         vacrelstats->rel_pages = new_rel_pages;
         vacrelstats->pages_removed = old_rel_pages - new_rel_pages;
  
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c

index 5ed6e8337c11fdd599a0deb219e1cb0285228a88..1d80c311d879d9cf9009621860cda3ab19c6dea9 100644 (file)
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -1246,8 +1246,7 @@ pgstat_report_autovac(Oid dboid)
   * ---------
   */
  void
-pgstat_report_vacuum(Oid tableoid, bool shared, bool adopt_counts,
-                                        PgStat_Counter tuples)
+pgstat_report_vacuum(Oid tableoid, bool shared, PgStat_Counter tuples)
  {
         PgStat_MsgVacuum msg;
  
@@ -1257,7 +1256,6 @@ pgstat_report_vacuum(Oid tableoid, bool shared, bool adopt_counts,
         pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_VACUUM);
         msg.m_databaseid = shared ? InvalidOid : MyDatabaseId;
         msg.m_tableoid = tableoid;
-       msg.m_adopt_counts = adopt_counts;
         msg.m_autovacuum = IsAutoVacuumWorkerProcess();
         msg.m_vacuumtime = GetCurrentTimestamp();
         msg.m_tuples = tuples;
@@ -1271,7 +1269,7 @@ pgstat_report_vacuum(Oid tableoid, bool shared, bool adopt_counts,
   * --------
   */
  void
-pgstat_report_analyze(Relation rel, bool adopt_counts,
+pgstat_report_analyze(Relation rel,
                                           PgStat_Counter livetuples, PgStat_Counter deadtuples)
  {
         PgStat_MsgAnalyze msg;
@@ -1308,7 +1306,6 @@ pgstat_report_analyze(Relation rel, bool adopt_counts,
         pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_ANALYZE);
         msg.m_databaseid = rel->rd_rel->relisshared ? InvalidOid : MyDatabaseId;
         msg.m_tableoid = RelationGetRelid(rel);
-       msg.m_adopt_counts = adopt_counts;
         msg.m_autovacuum = IsAutoVacuumWorkerProcess();
         msg.m_analyzetime = GetCurrentTimestamp();
         msg.m_live_tuples = livetuples;
@@ -4197,8 +4194,7 @@ pgstat_recv_vacuum(PgStat_MsgVacuum *msg, int len)
  
         tabentry = pgstat_get_tab_entry(dbentry, msg->m_tableoid, true);
  
-       if (msg->m_adopt_counts)
-               tabentry->n_live_tuples = msg->m_tuples;
+       tabentry->n_live_tuples = msg->m_tuples;
         /* Resetting dead_tuples to 0 is an approximation ... */
         tabentry->n_dead_tuples = 0;
  
@@ -4233,11 +4229,8 @@ pgstat_recv_analyze(PgStat_MsgAnalyze *msg, int len)
  
         tabentry = pgstat_get_tab_entry(dbentry, msg->m_tableoid, true);
  
-       if (msg->m_adopt_counts)
-       {
-               tabentry->n_live_tuples = msg->m_live_tuples;
-               tabentry->n_dead_tuples = msg->m_dead_tuples;
-       }
+       tabentry->n_live_tuples = msg->m_live_tuples;
+       tabentry->n_dead_tuples = msg->m_dead_tuples;
  
         /*
          * We reset changes_since_analyze to zero, forgetting any changes that
diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h

index 79c9f5d90fb674ca8c778a65ca540ef62bded0af..cfbe0c43924029843f636845d87a09996d706af3 100644 (file)
--- a/src/include/commands/vacuum.h
+++ b/src/include/commands/vacuum.h
@@ -142,6 +142,10 @@ extern void vacuum(VacuumStmt *vacstmt, Oid relid, bool do_toast,
  extern void vac_open_indexes(Relation relation, LOCKMODE lockmode,
                                  int *nindexes, Relation **Irel);
  extern void vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode);
+extern double vac_estimate_reltuples(Relation relation, bool is_analyze,
+                                          BlockNumber total_pages,
+                                          BlockNumber scanned_pages,
+                                          double scanned_tuples);
  extern void vac_update_relstats(Relation relation,
                                         BlockNumber num_pages,
                                         double num_tuples,
@@ -157,10 +161,10 @@ extern void vacuum_delay_point(void);
  
  /* in commands/vacuumlazy.c */
  extern void lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
-                               BufferAccessStrategy bstrategy, bool *scanned_all);
+                               BufferAccessStrategy bstrategy);
  
  /* in commands/analyze.c */
  extern void analyze_rel(Oid relid, VacuumStmt *vacstmt,
-                       BufferAccessStrategy bstrategy, bool update_reltuples);
+                       BufferAccessStrategy bstrategy);
  
  #endif   /* VACUUM_H */
diff --git a/src/include/pgstat.h b/src/include/pgstat.h

index f04be95b459b2053c66495979b95f0b697a2af13..5446fa04409ebefc31a3a1f282e7dd1224a5d734 100644 (file)
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -322,7 +322,6 @@ typedef struct PgStat_MsgVacuum
         PgStat_MsgHdr m_hdr;
         Oid                     m_databaseid;
         Oid                     m_tableoid;
-       bool            m_adopt_counts;
         bool            m_autovacuum;
         TimestampTz m_vacuumtime;
         PgStat_Counter m_tuples;
@@ -339,7 +338,6 @@ typedef struct PgStat_MsgAnalyze
         PgStat_MsgHdr m_hdr;
         Oid                     m_databaseid;
         Oid                     m_tableoid;
-       bool            m_adopt_counts;
         bool            m_autovacuum;
         TimestampTz m_analyzetime;
         PgStat_Counter m_live_tuples;
@@ -706,9 +704,9 @@ extern void pgstat_reset_shared_counters(const char *);
  extern void pgstat_reset_single_counter(Oid objectid, PgStat_Single_Reset_Type type);
  
  extern void pgstat_report_autovac(Oid dboid);
-extern void pgstat_report_vacuum(Oid tableoid, bool shared, bool adopt_counts,
+extern void pgstat_report_vacuum(Oid tableoid, bool shared,
                                          PgStat_Counter tuples);
-extern void pgstat_report_analyze(Relation rel, bool adopt_counts,
+extern void pgstat_report_analyze(Relation rel,
                                           PgStat_Counter livetuples, PgStat_Counter deadtuples);
  
  extern void pgstat_report_recovery_conflict(int reason);
author	Tom Lane <tgl@sss.pgh.pa.us>
	Mon, 30 May 2011 21:05:26 +0000 (17:05 -0400)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Mon, 30 May 2011 21:06:52 +0000 (17:06 -0400)
src/backend/commands/analyze.c		patch \| blob \| history
src/backend/commands/vacuum.c		patch \| blob \| history
src/backend/commands/vacuumlazy.c		patch \| blob \| history
src/backend/postmaster/pgstat.c		patch \| blob \| history
src/include/commands/vacuum.h		patch \| blob \| history
src/include/pgstat.h		patch \| blob \| history