]> granicus.if.org Git - postgresql/commitdiff
Sync up our various ways of estimating pg_class.reltuples.
authorTom Lane <tgl@sss.pgh.pa.us>
Thu, 22 Mar 2018 19:47:29 +0000 (15:47 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Thu, 22 Mar 2018 19:47:41 +0000 (15:47 -0400)
VACUUM thought that reltuples represents the total number of tuples in
the relation, while ANALYZE counted only live tuples.  This can cause
"flapping" in the value when background vacuums and analyzes happen
separately.  The planner's use of reltuples essentially assumes that
it's the count of live (visible) tuples, so let's standardize on having
it mean live tuples.

Another issue is that the definition of "live tuple" isn't totally clear;
what should be done with INSERT_IN_PROGRESS or DELETE_IN_PROGRESS tuples?
ANALYZE's choices in this regard are made on the assumption that if the
originating transaction commits at all, it will happen after ANALYZE
finishes, so we should ignore the effects of the in-progress transaction
--- unless it is our own transaction, and then we should count it.
Let's propagate this definition into VACUUM, too.

Likewise propagate this definition into CREATE INDEX, and into
contrib/pgstattuple's pgstattuple_approx() function.

Tomas Vondra, reviewed by Haribabu Kommi, some corrections by me

Discussion: https://postgr.es/m/16db4468-edfa-830a-f921-39a50498e77e@2ndquadrant.com

contrib/pgstattuple/pgstatapprox.c
doc/src/sgml/catalogs.sgml
src/backend/catalog/index.c
src/backend/commands/vacuum.c
src/backend/commands/vacuumlazy.c

index 474c3bd517f30f61b430c0ab4b6124b8f00c3b9c..ef33cacec6af983d92547021dabaeebd566faa60 100644 (file)
@@ -68,7 +68,6 @@ statapprox_heap(Relation rel, output_type *stat)
        Buffer          vmbuffer = InvalidBuffer;
        BufferAccessStrategy bstrategy;
        TransactionId OldestXmin;
-       uint64          misc_count = 0;
 
        OldestXmin = GetOldestXmin(rel, PROCARRAY_FLAGS_VACUUM);
        bstrategy = GetAccessStrategy(BAS_BULKREAD);
@@ -114,14 +113,15 @@ statapprox_heap(Relation rel, output_type *stat)
                else
                        stat->free_space += BLCKSZ - SizeOfPageHeaderData;
 
+               /* We may count the page as scanned even if it's new/empty */
+               scanned++;
+
                if (PageIsNew(page) || PageIsEmpty(page))
                {
                        UnlockReleaseBuffer(buf);
                        continue;
                }
 
-               scanned++;
-
                /*
                 * Look at each tuple on the page and decide whether it's live or
                 * dead, then count it and its size. Unlike lazy_scan_heap, we can
@@ -153,25 +153,23 @@ statapprox_heap(Relation rel, output_type *stat)
                        tuple.t_tableOid = RelationGetRelid(rel);
 
                        /*
-                        * We count live and dead tuples, but we also need to add up
-                        * others in order to feed vac_estimate_reltuples.
+                        * We follow VACUUM's lead in counting INSERT_IN_PROGRESS tuples
+                        * as "dead" while DELETE_IN_PROGRESS tuples are "live".  We don't
+                        * bother distinguishing tuples inserted/deleted by our own
+                        * transaction.
                         */
                        switch (HeapTupleSatisfiesVacuum(&tuple, OldestXmin, buf))
                        {
-                               case HEAPTUPLE_RECENTLY_DEAD:
-                                       misc_count++;
-                                       /* Fall through */
-                               case HEAPTUPLE_DEAD:
-                                       stat->dead_tuple_len += tuple.t_len;
-                                       stat->dead_tuple_count++;
-                                       break;
                                case HEAPTUPLE_LIVE:
+                               case HEAPTUPLE_DELETE_IN_PROGRESS:
                                        stat->tuple_len += tuple.t_len;
                                        stat->tuple_count++;
                                        break;
+                               case HEAPTUPLE_DEAD:
+                               case HEAPTUPLE_RECENTLY_DEAD:
                                case HEAPTUPLE_INSERT_IN_PROGRESS:
-                               case HEAPTUPLE_DELETE_IN_PROGRESS:
-                                       misc_count++;
+                                       stat->dead_tuple_len += tuple.t_len;
+                                       stat->dead_tuple_count++;
                                        break;
                                default:
                                        elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
@@ -184,8 +182,16 @@ statapprox_heap(Relation rel, output_type *stat)
 
        stat->table_len = (uint64) nblocks * BLCKSZ;
 
+       /*
+        * We don't know how many tuples are in the pages we didn't scan, so
+        * extrapolate the live-tuple count to the whole table in the same way
+        * that VACUUM does.  (Like VACUUM, we're not taking a random sample, so
+        * just extrapolating linearly seems unsafe.)  There should be no dead
+        * tuples in all-visible pages, so no correction is needed for that, and
+        * we already accounted for the space in those pages, too.
+        */
        stat->tuple_count = vac_estimate_reltuples(rel, nblocks, scanned,
-                                                                                          stat->tuple_count + misc_count);
+                                                                                          stat->tuple_count);
 
        /*
         * Calculate percentages if the relation has one or more pages.
index fc81133f07dbf388445d30112c8c8f6c9546b4cc..c030cdab4be7d6e06bde4fbdcf5f5f9877d926f8 100644 (file)
@@ -1739,8 +1739,8 @@ SCRAM-SHA-256$<replaceable>&lt;iteration count&gt;</replaceable>:<replaceable>&l
       <entry><type>float4</type></entry>
       <entry></entry>
       <entry>
-       Number of rows in the table.  This is only an estimate used by the
-       planner.  It is updated by <command>VACUUM</command>,
+       Number of live rows in the table.  This is only an estimate used by
+       the planner.  It is updated by <command>VACUUM</command>,
        <command>ANALYZE</command>, and a few DDL commands such as
        <command>CREATE INDEX</command>.
       </entry>
index 9e2dd0e729edbcff18b534a79a4226d602abf481..f4a1efbf549c492a99ae99a5aabe34a6b6aa4166 100644 (file)
@@ -2366,12 +2366,12 @@ index_build(Relation heapRelation,
  * things to add it to the new index.  After we return, the AM's index
  * build procedure does whatever cleanup it needs.
  *
- * The total count of heap tuples is returned.  This is for updating pg_class
- * statistics.  (It's annoying not to be able to do that here, but we want
- * to merge that update with others; see index_update_stats.)  Note that the
- * index AM itself must keep track of the number of index tuples; we don't do
- * so here because the AM might reject some of the tuples for its own reasons,
- * such as being unable to store NULLs.
+ * The total count of live heap tuples is returned.  This is for updating
+ * pg_class statistics.  (It's annoying not to be able to do that here, but we
+ * want to merge that update with others; see index_update_stats.)  Note that
+ * the index AM itself must keep track of the number of index tuples; we don't
+ * do so here because the AM might reject some of the tuples for its own
+ * reasons, such as being unable to store NULLs.
  *
  * A side effect is to set indexInfo->ii_BrokenHotChain to true if we detect
  * any potentially broken HOT chains.  Currently, we set this if there are
@@ -2402,8 +2402,8 @@ IndexBuildHeapScan(Relation heapRelation,
  * to scan cannot be done when requesting syncscan.
  *
  * When "anyvisible" mode is requested, all tuples visible to any transaction
- * are considered, including those inserted or deleted by transactions that are
- * still in progress.
+ * are indexed and counted as live, including those inserted or deleted by
+ * transactions that are still in progress.
  */
 double
 IndexBuildHeapRangeScan(Relation heapRelation,
@@ -2599,6 +2599,12 @@ IndexBuildHeapRangeScan(Relation heapRelation,
                         */
                        LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
 
+                       /*
+                        * The criteria for counting a tuple as live in this block need to
+                        * match what analyze.c's acquire_sample_rows() does, otherwise
+                        * CREATE INDEX and ANALYZE may produce wildly different reltuples
+                        * values, e.g. when there are many recently-dead tuples.
+                        */
                        switch (HeapTupleSatisfiesVacuum(heapTuple, OldestXmin,
                                                                                         scan->rs_cbuf))
                        {
@@ -2611,6 +2617,8 @@ IndexBuildHeapRangeScan(Relation heapRelation,
                                        /* Normal case, index and unique-check it */
                                        indexIt = true;
                                        tupleIsAlive = true;
+                                       /* Count it as live, too */
+                                       reltuples += 1;
                                        break;
                                case HEAPTUPLE_RECENTLY_DEAD:
 
@@ -2624,6 +2632,9 @@ IndexBuildHeapRangeScan(Relation heapRelation,
                                         * the live tuple at the end of the HOT-chain.  Since this
                                         * breaks semantics for pre-existing snapshots, mark the
                                         * index as unusable for them.
+                                        *
+                                        * We don't count recently-dead tuples in reltuples, even
+                                        * if we index them; see acquire_sample_rows().
                                         */
                                        if (HeapTupleIsHotUpdated(heapTuple))
                                        {
@@ -2646,6 +2657,7 @@ IndexBuildHeapRangeScan(Relation heapRelation,
                                        {
                                                indexIt = true;
                                                tupleIsAlive = true;
+                                               reltuples += 1;
                                                break;
                                        }
 
@@ -2683,6 +2695,15 @@ IndexBuildHeapRangeScan(Relation heapRelation,
                                                        goto recheck;
                                                }
                                        }
+                                       else
+                                       {
+                                               /*
+                                                * For consistency with acquire_sample_rows(), count
+                                                * HEAPTUPLE_INSERT_IN_PROGRESS tuples as live only
+                                                * when inserted by our own transaction.
+                                                */
+                                               reltuples += 1;
+                                       }
 
                                        /*
                                         * We must index such tuples, since if the index build
@@ -2702,6 +2723,7 @@ IndexBuildHeapRangeScan(Relation heapRelation,
                                        {
                                                indexIt = true;
                                                tupleIsAlive = false;
+                                               reltuples += 1;
                                                break;
                                        }
 
@@ -2745,6 +2767,14 @@ IndexBuildHeapRangeScan(Relation heapRelation,
                                                 * the same as a RECENTLY_DEAD tuple.
                                                 */
                                                indexIt = true;
+
+                                               /*
+                                                * Count HEAPTUPLE_DELETE_IN_PROGRESS tuples as live,
+                                                * if they were not deleted by the current
+                                                * transaction.  That's what acquire_sample_rows()
+                                                * does, and we want the behavior to be consistent.
+                                                */
+                                               reltuples += 1;
                                        }
                                        else if (HeapTupleIsHotUpdated(heapTuple))
                                        {
@@ -2762,8 +2792,8 @@ IndexBuildHeapRangeScan(Relation heapRelation,
                                        {
                                                /*
                                                 * It's a regular tuple deleted by our own xact. Index
-                                                * it but don't check for uniqueness, the same as a
-                                                * RECENTLY_DEAD tuple.
+                                                * it, but don't check for uniqueness nor count in
+                                                * reltuples, the same as a RECENTLY_DEAD tuple.
                                                 */
                                                indexIt = true;
                                        }
@@ -2787,8 +2817,6 @@ IndexBuildHeapRangeScan(Relation heapRelation,
                        tupleIsAlive = true;
                }
 
-               reltuples += 1;
-
                MemoryContextReset(econtext->ecxt_per_tuple_memory);
 
                /* Set up for predicate or expression evaluation */
index 3985c4dd0e50b6c66c61a7016191b22c3b8a2fec..a1782c28742ccad6602fdec1048f484009d86514 100644 (file)
@@ -771,6 +771,9 @@ vacuum_set_xid_limits(Relation rel,
  *             subset of the table.  When we have only partial information, we take
  *             the old value of pg_class.reltuples as a measurement of the
  *             tuple density in the unscanned pages.
+ *
+ *             Note: scanned_tuples should count only *live* tuples, since
+ *             pg_class.reltuples is defined that way.
  */
 double
 vac_estimate_reltuples(Relation relation,
@@ -852,6 +855,9 @@ vac_estimate_reltuples(Relation relation,
  *             transaction.  This is OK since postponing the flag maintenance is
  *             always allowable.
  *
+ *             Note: num_tuples should count only *live* tuples, since
+ *             pg_class.reltuples is defined that way.
+ *
  *             This routine is shared by VACUUM and ANALYZE.
  */
 void
index 9ac84e8293a533e74dff387bf8cc3437bb5e677c..f9da24c491fc460ad70685bf2cd5815be7f3715c 100644 (file)
@@ -114,9 +114,9 @@ typedef struct LVRelStats
        BlockNumber pinskipped_pages;   /* # of pages we skipped due to a pin */
        BlockNumber frozenskipped_pages;        /* # of frozen pages we skipped */
        BlockNumber tupcount_pages; /* pages whose tuples we counted */
-       double          scanned_tuples; /* counts only tuples on tupcount_pages */
-       double          old_rel_tuples; /* previous value of pg_class.reltuples */
+       double          old_live_tuples;        /* previous value of pg_class.reltuples */
        double          new_rel_tuples; /* new estimated total # of tuples */
+       double          new_live_tuples;        /* new estimated total # of live tuples */
        double          new_dead_tuples;        /* new estimated total # of dead tuples */
        BlockNumber pages_removed;
        double          tuples_deleted;
@@ -196,7 +196,6 @@ lazy_vacuum_rel(Relation onerel, int options, VacuumParams *params,
        TransactionId xidFullScanLimit;
        MultiXactId mxactFullScanLimit;
        BlockNumber new_rel_pages;
-       double          new_rel_tuples;
        BlockNumber new_rel_allvisible;
        double          new_live_tuples;
        TransactionId new_frozen_xid;
@@ -245,7 +244,7 @@ lazy_vacuum_rel(Relation onerel, int options, VacuumParams *params,
        vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats));
 
        vacrelstats->old_rel_pages = onerel->rd_rel->relpages;
-       vacrelstats->old_rel_tuples = onerel->rd_rel->reltuples;
+       vacrelstats->old_live_tuples = onerel->rd_rel->reltuples;
        vacrelstats->num_index_scans = 0;
        vacrelstats->pages_removed = 0;
        vacrelstats->lock_waiter_detected = false;
@@ -311,11 +310,11 @@ lazy_vacuum_rel(Relation onerel, int options, VacuumParams *params,
         * since then we don't know for certain that all tuples have a newer xmin.
         */
        new_rel_pages = vacrelstats->rel_pages;
-       new_rel_tuples = vacrelstats->new_rel_tuples;
+       new_live_tuples = vacrelstats->new_live_tuples;
        if (vacrelstats->tupcount_pages == 0 && new_rel_pages > 0)
        {
                new_rel_pages = vacrelstats->old_rel_pages;
-               new_rel_tuples = vacrelstats->old_rel_tuples;
+               new_live_tuples = vacrelstats->old_live_tuples;
        }
 
        visibilitymap_count(onerel, &new_rel_allvisible, NULL);
@@ -327,7 +326,7 @@ lazy_vacuum_rel(Relation onerel, int options, VacuumParams *params,
 
        vac_update_relstats(onerel,
                                                new_rel_pages,
-                                               new_rel_tuples,
+                                               new_live_tuples,
                                                new_rel_allvisible,
                                                vacrelstats->hasindex,
                                                new_frozen_xid,
@@ -335,10 +334,6 @@ lazy_vacuum_rel(Relation onerel, int options, VacuumParams *params,
                                                false);
 
        /* report results to the stats collector, too */
-       new_live_tuples = new_rel_tuples - vacrelstats->new_dead_tuples;
-       if (new_live_tuples < 0)
-               new_live_tuples = 0;    /* just in case */
-
        pgstat_report_vacuum(RelationGetRelid(onerel),
                                                 onerel->rd_rel->relisshared,
                                                 new_live_tuples,
@@ -471,10 +466,11 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats,
        TransactionId relminmxid = onerel->rd_rel->relminmxid;
        BlockNumber empty_pages,
                                vacuumed_pages;
-       double          num_tuples,
-                               tups_vacuumed,
-                               nkeep,
-                               nunused;
+       double          num_tuples,             /* total number of nonremovable tuples */
+                               live_tuples,    /* live tuples (reltuples estimate) */
+                               tups_vacuumed,  /* tuples cleaned up by vacuum */
+                               nkeep,                  /* dead-but-not-removable tuples */
+                               nunused;                /* unused item pointers */
        IndexBulkDeleteResult **indstats;
        int                     i;
        PGRUsage        ru0;
@@ -505,7 +501,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats,
                                                relname)));
 
        empty_pages = vacuumed_pages = 0;
-       num_tuples = tups_vacuumed = nkeep = nunused = 0;
+       num_tuples = live_tuples = tups_vacuumed = nkeep = nunused = 0;
 
        indstats = (IndexBulkDeleteResult **)
                palloc0(nindexes * sizeof(IndexBulkDeleteResult *));
@@ -988,6 +984,17 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats,
 
                        tupgone = false;
 
+                       /*
+                        * The criteria for counting a tuple as live in this block need to
+                        * match what analyze.c's acquire_sample_rows() does, otherwise
+                        * VACUUM and ANALYZE may produce wildly different reltuples
+                        * values, e.g. when there are many recently-dead tuples.
+                        *
+                        * The logic here is a bit simpler than acquire_sample_rows(), as
+                        * VACUUM can't run inside a transaction block, which makes some
+                        * cases impossible (e.g. in-progress insert from the same
+                        * transaction).
+                        */
                        switch (HeapTupleSatisfiesVacuum(&tuple, OldestXmin, buf))
                        {
                                case HEAPTUPLE_DEAD:
@@ -1028,6 +1035,12 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats,
                                                elog(WARNING, "relation \"%s\" TID %u/%u: OID is invalid",
                                                         relname, blkno, offnum);
 
+                                       /*
+                                        * Count it as live.  Not only is this natural, but it's
+                                        * also what acquire_sample_rows() does.
+                                        */
+                                       live_tuples += 1;
+
                                        /*
                                         * Is the tuple definitely visible to all transactions?
                                         *
@@ -1073,12 +1086,29 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats,
                                        all_visible = false;
                                        break;
                                case HEAPTUPLE_INSERT_IN_PROGRESS:
-                                       /* This is an expected case during concurrent vacuum */
+
+                                       /*
+                                        * This is an expected case during concurrent vacuum.
+                                        *
+                                        * We do not count these rows as live, because we expect
+                                        * the inserting transaction to update the counters at
+                                        * commit, and we assume that will happen only after we
+                                        * report our results.  This assumption is a bit shaky,
+                                        * but it is what acquire_sample_rows() does, so be
+                                        * consistent.
+                                        */
                                        all_visible = false;
                                        break;
                                case HEAPTUPLE_DELETE_IN_PROGRESS:
                                        /* This is an expected case during concurrent vacuum */
                                        all_visible = false;
+
+                                       /*
+                                        * Count such rows as live.  As above, we assume the
+                                        * deleting transaction will commit and update the
+                                        * counters after we report.
+                                        */
+                                       live_tuples += 1;
                                        break;
                                default:
                                        elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
@@ -1281,15 +1311,18 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats,
        pfree(frozen);
 
        /* save stats for use later */
-       vacrelstats->scanned_tuples = num_tuples;
        vacrelstats->tuples_deleted = tups_vacuumed;
        vacrelstats->new_dead_tuples = nkeep;
 
        /* now we can compute the new value for pg_class.reltuples */
-       vacrelstats->new_rel_tuples = vac_estimate_reltuples(onerel,
-                                                                                                                nblocks,
-                                                                                                                vacrelstats->tupcount_pages,
-                                                                                                                num_tuples);
+       vacrelstats->new_live_tuples = vac_estimate_reltuples(onerel,
+                                                                                                                 nblocks,
+                                                                                                                 vacrelstats->tupcount_pages,
+                                                                                                                 live_tuples);
+
+       /* also compute total number of surviving heap entries */
+       vacrelstats->new_rel_tuples =
+               vacrelstats->new_live_tuples + vacrelstats->new_dead_tuples;
 
        /*
         * Release any remaining pin on visibility map page.
@@ -1625,7 +1658,8 @@ lazy_vacuum_index(Relation indrel,
        ivinfo.analyze_only = false;
        ivinfo.estimated_count = true;
        ivinfo.message_level = elevel;
-       ivinfo.num_heap_tuples = vacrelstats->old_rel_tuples;
+       /* We can only provide an approximate value of num_heap_tuples here */
+       ivinfo.num_heap_tuples = vacrelstats->old_live_tuples;
        ivinfo.strategy = vac_strategy;
 
        /* Do bulk deletion */
@@ -1656,6 +1690,12 @@ lazy_cleanup_index(Relation indrel,
        ivinfo.analyze_only = false;
        ivinfo.estimated_count = (vacrelstats->tupcount_pages < vacrelstats->rel_pages);
        ivinfo.message_level = elevel;
+
+       /*
+        * Now we can provide a better estimate of total number of surviving
+        * tuples (we assume indexes are more interested in that than in the
+        * number of nominally live tuples).
+        */
        ivinfo.num_heap_tuples = vacrelstats->new_rel_tuples;
        ivinfo.strategy = vac_strategy;