]> granicus.if.org Git - postgresql/commitdiff
Don't rely on estimates for amcheck Bloom filters.
authorPeter Geoghegan <pg@bowt.ie>
Sat, 20 Jul 2019 18:11:55 +0000 (11:11 -0700)
committerPeter Geoghegan <pg@bowt.ie>
Sat, 20 Jul 2019 18:11:55 +0000 (11:11 -0700)
Solely relying on a relation's reltuples/relpages estimate to size the
Bloom filters used by amcheck verification makes verification less
effective when the estimates are very stale.  In extreme cases,
verification options that use Bloom filters internally could be totally
ineffective, without users receiving any clear indication that certain
types of corruption might easily be missed.

To fix, use RelationGetNumberOfBlocks() instead of relpages to size the
downlink block Bloom filter.  Use the same RelationGetNumberOfBlocks()
value to derive a minimum size for the heapallindexed Bloom filter,
rather than completely trusting reltuples.  Verification will still be
reasonably effective when the projected/estimated number of Bloom filter
elements is at least 1/5 of the final number of elements, which is
assured by the new sizing logic.

Reported-By: Alexander Korotkov
Discussion: https://postgr.es/m/CAH2-Wzk0ke2J42KrNYBKu0Xovjy-sU5ub7PWjgpbsKdAQcL4OA@mail.gmail.com
Backpatch: 11-, where downlink/heapallindexed verification were added.

contrib/amcheck/verify_nbtree.c

index 9126c18d0e175b5c6ab032ffea46bd55a452b15b..55a3a4bbe0c2a0b967243bfd8038eb91d6d89a62 100644 (file)
@@ -377,11 +377,20 @@ bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace,
 
        if (state->heapallindexed)
        {
+               int64           total_pages;
                int64           total_elems;
                uint64          seed;
 
-               /* Size Bloom filter based on estimated number of tuples in index */
-               total_elems = (int64) state->rel->rd_rel->reltuples;
+               /*
+                * Size Bloom filter based on estimated number of tuples in index,
+                * while conservatively assuming that each block must contain at least
+                * MaxIndexTuplesPerPage / 5 non-pivot tuples.  (Non-leaf pages cannot
+                * contain non-pivot tuples.  That's okay because they generally make
+                * up no more than about 1% of all pages in the index.)
+                */
+               total_pages = RelationGetNumberOfBlocks(rel);
+               total_elems = Max(total_pages * (MaxIndexTuplesPerPage / 5),
+                                                 (int64) state->rel->rd_rel->reltuples);
                /* Random seed relies on backend srandom() call to avoid repetition */
                seed = random();
                /* Create Bloom filter to fingerprint index */
@@ -425,8 +434,6 @@ bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace,
                }
                else
                {
-                       int64           total_pages;
-
                        /*
                         * Extra readonly downlink check.
                         *
@@ -437,7 +444,6 @@ bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace,
                         * splits and page deletions, though.  This is taken care of in
                         * bt_downlink_missing_check().
                         */
-                       total_pages = (int64) state->rel->rd_rel->relpages;
                        state->downlinkfilter = bloom_create(total_pages, work_mem, seed);
                }
        }