]> granicus.if.org Git - postgresql/commitdiff
hash: Increase the number of possible overflow bitmaps by 8x.
authorRobert Haas <rhaas@postgresql.org>
Fri, 4 Aug 2017 19:29:26 +0000 (15:29 -0400)
committerRobert Haas <rhaas@postgresql.org>
Fri, 4 Aug 2017 20:30:32 +0000 (16:30 -0400)
Per a report from AP, it's not that hard to exhaust the supply of
bitmap pages if you create a table with a hash index and then insert a
few billion rows - and then you start getting errors when you try to
insert additional rows.  In the particular case reported by AP,
there's another fix that we can make to improve recycling of overflow
pages, which is another way to avoid the error, but there may be other
cases where this problem happens and that fix won't help.  So let's
buy ourselves as much headroom as we can without rearchitecting
anything.

The comments claim that the old limit was 64GB, but it was really
only 32GB, because we didn't use all the bits in the page for bitmap
bits - only the largest power of 2 that could fit after deducting
space for the page header and so forth.  Thus, we have 4kB per page
for bitmap bits, not 8kB.  The new limit is thus actually 8 times the
old *real* limit but only 4 times the old *purported* limit.

Since this breaks on-disk compatibility, bump HASH_VERSION.  We've
already done this earlier in this release cycle, so this doesn't cause
any incremental inconvenience for people using pg_upgrade from
releases prior to v10.  However, users who use pg_upgrade to reach
10beta3 or later from 10beta2 or earlier will need to REINDEX any hash
indexes again.

Amit Kapila and Robert Haas

Discussion: http://postgr.es/m/20170704105728.mwb72jebfmok2nm2@zip.com.au

contrib/pageinspect/expected/hash.out
contrib/pgstattuple/expected/pgstattuple.out
doc/src/sgml/pageinspect.sgml
doc/src/sgml/pgstattuple.sgml
src/include/access/hash.h

index 39374158b189fb8f9ef1b0f83d7988857b0b2d88..75d7bcfad5f74bae8dbad2fe6e50cf10bf1fd9e5 100644 (file)
@@ -43,9 +43,9 @@ ERROR:  invalid overflow block number 5
 SELECT magic, version, ntuples, bsize, bmsize, bmshift, maxbucket, highmask,
 lowmask, ovflpoint, firstfree, nmaps, procid, spares, mapp FROM
 hash_metapage_info(get_raw_page('test_hash_a_idx', 0));
--[ RECORD 1 ]----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+-[ RECORD 1 ]--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 magic     | 105121344
-version   | 3
+version   | 4
 ntuples   | 1
 bsize     | 8152
 bmsize    | 4096
@@ -58,7 +58,7 @@ firstfree | 0
 nmaps     | 1
 procid    | 450
 spares    | {0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}
-mapp      | {5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}
+mapp      | {5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}
 
 SELECT magic, version, ntuples, bsize, bmsize, bmshift, maxbucket, highmask,
 lowmask, ovflpoint, firstfree, nmaps, procid, spares, mapp FROM
index c7c17328276c542a820d4d442e80b61ef3ce49ba..20b5585d03a2db6e6b8bd10ce42a979a3d038901 100644 (file)
@@ -134,7 +134,7 @@ create index test_hashidx on test using hash (b);
 select * from pgstathashindex('test_hashidx');
  version | bucket_pages | overflow_pages | bitmap_pages | unused_pages | live_items | dead_items | free_percent 
 ---------+--------------+----------------+--------------+--------------+------------+------------+--------------
-       3 |            4 |              0 |            1 |            0 |          0 |          0 |          100
+       4 |            4 |              0 |            1 |            0 |          0 |          0 |          100
 (1 row)
 
 -- these should error with the wrong type
@@ -235,7 +235,7 @@ select pgstatindex('test_partition_idx');
 select pgstathashindex('test_partition_hash_idx');
    pgstathashindex   
 ---------------------
- (3,8,0,1,0,0,0,100)
+ (4,8,0,1,0,0,0,100)
 (1 row)
 
 drop table test_partitioned;
index ccdaf3e0aca3b699f0af0e452134a90e52e47f52..e46f5ca6bc66d25840a39e0350659d6b4987c583 100644 (file)
@@ -687,8 +687,13 @@ test=# SELECT * FROM hash_bitmap_info('con_hash_index', 2052);
       <function>hash_metapage_info</function> returns information stored
       in meta page of a <acronym>HASH</acronym> index.  For example:
 <screen>
-test=# SELECT * FROM hash_metapage_info(get_raw_page('con_hash_index', 0));
--[ RECORD 1 ]-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+test=# SELECT magic, version, ntuples, ffactor, bsize, bmsize, bmshift,
+test-#     maxbucket, highmask, lowmask, ovflpoint, firstfree, nmaps, procid,
+test-#     regexp_replace(spares::text, '(,0)*}', '}') as spares,
+test-#     regexp_replace(mapp::text, '(,0)*}', '}') as mapp
+test-# FROM hash_metapage_info(get_raw_page('con_hash_index', 0));
+-[ RECORD 1 ]-------------------------------------------------------------------------------
+spares    | {0,0,0,0,0,0,1,1,1,1,1,1,1,1,3,4,4,4,45,55,58,59,508,567,628,704,1193,1202,1204}
 magic     | 105121344
 version   | 3
 ntuples   | 500500
@@ -703,8 +708,8 @@ ovflpoint | 28
 firstfree | 1204
 nmaps     | 1
 procid    | 450
-spares    | {0,0,0,0,0,0,1,1,1,1,1,1,1,1,3,4,4,4,45,55,58,59,508,567,628,704,1193,1202,1204,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}
-mapp      | {65,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}
+spares    | {0,0,0,0,0,0,1,1,1,1,1,1,1,1,3,4,4,4,45,55,58,59,508,567,628,704,1193,1202,1204}
+mapp      | {65}
 </screen>
      </para>
     </listitem>
index e98e04fa2fd2edb848caf727b7f39aea3c24cf31..a7c67ae64526c5a093e4e2c9b00e6c784d42d26e 100644 (file)
@@ -368,7 +368,7 @@ pending_tuples | 0
 <programlisting>
 test=&gt; select * from pgstathashindex('con_hash_index');
 -[ RECORD 1 ]--+-----------------
-version        | 2
+version        | 4
 bucket_pages   | 33081
 overflow_pages | 0
 bitmap_pages   | 1
index 7fa868b556b3d7c53b30de95cdc4dde6c98185fb..72fce3038c02381c1addfe59ccfb51a193ca70e1 100644 (file)
@@ -158,8 +158,7 @@ typedef HashScanOpaqueData *HashScanOpaque;
 #define HASH_METAPAGE  0               /* metapage is always block 0 */
 
 #define HASH_MAGIC             0x6440640
-#define HASH_VERSION   3               /* 3 signifies multi-phased bucket allocation
-                                                                * to reduce doubling */
+#define HASH_VERSION   4
 
 /*
  * spares[] holds the number of overflow pages currently allocated at or
@@ -182,10 +181,10 @@ typedef HashScanOpaqueData *HashScanOpaque;
  * after HASH_SPLITPOINT_GROUPS_WITH_ONE_PHASE).
  *
  * There is no particular upper limit on the size of mapp[], other than
- * needing to fit into the metapage.  (With 8K block size, 128 bitmaps
- * limit us to 64 GB of overflow space...)
+ * needing to fit into the metapage.  (With 8K block size, 1024 bitmaps
+ * limit us to 256 GB of overflow space...)
  */
-#define HASH_MAX_BITMAPS                       128
+#define HASH_MAX_BITMAPS                       1024
 
 #define HASH_SPLITPOINT_PHASE_BITS     2
 #define HASH_SPLITPOINT_PHASES_PER_GRP (1 << HASH_SPLITPOINT_PHASE_BITS)