Flag index metapages as standard-format in xlog.c calls.

[postgresql] / src / backend / access / hash / hashpage.c
diff --git a/src/backend/access/hash/hashpage.c b/src/backend/access/hash/hashpage.c

index 4544889294a80c37ba8d2758f3d154693092ad1f..4b14f88af944b0097942ebfec5ae6a4619795d77 100644 (file)
--- a/src/backend/access/hash/hashpage.c
+++ b/src/backend/access/hash/hashpage.c
@@ -298,20 +298,20 @@ _hash_dropscanbuf(Relation rel, HashScanOpaque so)
  {
         /* release pin we hold on primary bucket page */
         if (BufferIsValid(so->hashso_bucket_buf) &&
-               so->hashso_bucket_buf != so->hashso_curbuf)
+               so->hashso_bucket_buf != so->currPos.buf)
                 _hash_dropbuf(rel, so->hashso_bucket_buf);
         so->hashso_bucket_buf = InvalidBuffer;
  
         /* release pin we hold on primary bucket page  of bucket being split */
         if (BufferIsValid(so->hashso_split_bucket_buf) &&
-               so->hashso_split_bucket_buf != so->hashso_curbuf)
+               so->hashso_split_bucket_buf != so->currPos.buf)
                 _hash_dropbuf(rel, so->hashso_split_bucket_buf);
         so->hashso_split_bucket_buf = InvalidBuffer;
  
         /* release any pin we still hold */
-       if (BufferIsValid(so->hashso_curbuf))
-               _hash_dropbuf(rel, so->hashso_curbuf);
-       so->hashso_curbuf = InvalidBuffer;
+       if (BufferIsValid(so->currPos.buf))
+               _hash_dropbuf(rel, so->currPos.buf);
+       so->currPos.buf = InvalidBuffer;
  
         /* reset split scan */
         so->hashso_buc_populated = false;
@@ -345,12 +345,20 @@ _hash_init(Relation rel, double num_tuples, ForkNumber forkNum)
         int32           ffactor;
         uint32          num_buckets;
         uint32          i;
+       bool            use_wal;
  
         /* safety check */
         if (RelationGetNumberOfBlocksInFork(rel, forkNum) != 0)
                 elog(ERROR, "cannot initialize non-empty hash index \"%s\"",
                          RelationGetRelationName(rel));
  
+       /*
+        * WAL log creation of pages if the relation is persistent, or this is the
+        * init fork.  Init forks for unlogged relations always need to be WAL
+        * logged.
+        */
+       use_wal = RelationNeedsWAL(rel) || forkNum == INIT_FORKNUM;
+
         /*
          * Determine the target fill factor (in tuples per bucket) for this index.
          * The idea is to make the fill factor correspond to pages about as full
@@ -365,7 +373,7 @@ _hash_init(Relation rel, double num_tuples, ForkNumber forkNum)
         if (ffactor < 10)
                 ffactor = 10;
  
-       procid = index_getprocid(rel, 1, HASHPROC);
+       procid = index_getprocid(rel, 1, HASHSTANDARD_PROC);
  
         /*
          * We initialize the metapage, the first N bucket pages, and the first
@@ -384,7 +392,7 @@ _hash_init(Relation rel, double num_tuples, ForkNumber forkNum)
         metap = HashPageGetMeta(pg);
  
         /* XLOG stuff */
-       if (RelationNeedsWAL(rel))
+       if (use_wal)
         {
                 xl_hash_init_meta_page xlrec;
                 XLogRecPtr      recptr;
@@ -395,7 +403,7 @@ _hash_init(Relation rel, double num_tuples, ForkNumber forkNum)
  
                 XLogBeginInsert();
                 XLogRegisterData((char *) &xlrec, SizeOfHashInitMetaPage);
-               XLogRegisterBuffer(0, metabuf, REGBUF_WILL_INIT);
+               XLogRegisterBuffer(0, metabuf, REGBUF_WILL_INIT | REGBUF_STANDARD);
  
                 recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_INIT_META_PAGE);
  
@@ -427,11 +435,12 @@ _hash_init(Relation rel, double num_tuples, ForkNumber forkNum)
                 _hash_initbuf(buf, metap->hashm_maxbucket, i, LH_BUCKET_PAGE, false);
                 MarkBufferDirty(buf);
  
-               log_newpage(&rel->rd_node,
-                                       forkNum,
-                                       blkno,
-                                       BufferGetPage(buf),
-                                       true);
+               if (use_wal)
+                       log_newpage(&rel->rd_node,
+                                               forkNum,
+                                               blkno,
+                                               BufferGetPage(buf),
+                                               true);
                 _hash_relbuf(rel, buf);
         }
  
@@ -459,7 +468,7 @@ _hash_init(Relation rel, double num_tuples, ForkNumber forkNum)
         MarkBufferDirty(metabuf);
  
         /* XLOG stuff */
-       if (RelationNeedsWAL(rel))
+       if (use_wal)
         {
                 xl_hash_init_bitmap_page xlrec;
                 XLogRecPtr      recptr;
@@ -583,8 +592,9 @@ _hash_init_metabuffer(Buffer buf, double num_tuples, RegProcedure procid,
         metap->hashm_firstfree = 0;
  
         /*
-        * Set pd_lower just past the end of the metadata.  This is to log full
-        * page image of metapage in xloginsert.c.
+        * Set pd_lower just past the end of the metadata.  This is essential,
+        * because without doing so, metadata will be lost if xlog.c compresses
+        * the page.
          */
         ((PageHeader) page)->pd_lower =
                 ((char *) metap + sizeof(HashMetaPageData)) - (char *) page;
@@ -923,7 +933,7 @@ restart_expand:
                         XLogRegisterBufData(2, (char *) &metap->hashm_ovflpoint,
                                                                 sizeof(uint32));
                         XLogRegisterBufData(2,
-                                          (char *) &metap->hashm_spares[metap->hashm_ovflpoint],
+                                                               (char *) &metap->hashm_spares[metap->hashm_ovflpoint],
                                                                 sizeof(uint32));
                 }
  
@@ -947,9 +957,9 @@ restart_expand:
                                           buf_oblkno, buf_nblkno, NULL,
                                           maxbucket, highmask, lowmask);
  
-       /* all done, now release the locks and pins on primary buckets. */
-       _hash_relbuf(rel, buf_oblkno);
-       _hash_relbuf(rel, buf_nblkno);
+       /* all done, now release the pins on primary buckets. */
+       _hash_dropbuf(rel, buf_oblkno);
+       _hash_dropbuf(rel, buf_nblkno);
  
         return;
  
@@ -1059,10 +1069,11 @@ _hash_alloc_buckets(Relation rel, BlockNumber firstblock, uint32 nblocks)
   * while a split is in progress.
   *
   * In addition, the caller must have created the new bucket's base page,
- * which is passed in buffer nbuf, pinned and write-locked.  That lock and
- * pin are released here.  (The API is set up this way because we must do
- * _hash_getnewbuf() before releasing the metapage write lock.  So instead of
- * passing the new bucket's start block number, we pass an actual buffer.)
+ * which is passed in buffer nbuf, pinned and write-locked.  The lock will be
+ * released here and pin must be released by the caller.  (The API is set up
+ * this way because we must do _hash_getnewbuf() before releasing the metapage
+ * write lock.  So instead of passing the new bucket's start block number, we
+ * pass an actual buffer.)
   */
  static void
  _hash_splitbucket(Relation rel,
@@ -1271,8 +1282,9 @@ _hash_splitbucket(Relation rel,
  
         /*
          * After the split is finished, mark the old bucket to indicate that it
-        * contains deletable tuples.  Vacuum will clear split-cleanup flag after
-        * deleting such tuples.
+        * contains deletable tuples.  We will clear split-cleanup flag after
+        * deleting such tuples either at the end of split or at the next split
+        * from old bucket or at the time of vacuum.
          */
         oopaque->hasho_flag |= LH_BUCKET_NEEDS_SPLIT_CLEANUP;
  
@@ -1305,6 +1317,28 @@ _hash_splitbucket(Relation rel,
         }
  
         END_CRIT_SECTION();
+
+       /*
+        * If possible, clean up the old bucket.  We might not be able to do this
+        * if someone else has a pin on it, but if not then we can go ahead.  This
+        * isn't absolutely necessary, but it reduces bloat; if we don't do it
+        * now, VACUUM will do it eventually, but maybe not until new overflow
+        * pages have been allocated.  Note that there's no need to clean up the
+        * new bucket.
+        */
+       if (IsBufferCleanupOK(bucket_obuf))
+       {
+               LockBuffer(bucket_nbuf, BUFFER_LOCK_UNLOCK);
+               hashbucketcleanup(rel, obucket, bucket_obuf,
+                                                 BufferGetBlockNumber(bucket_obuf), NULL,
+                                                 maxbucket, highmask, lowmask, NULL, NULL, true,
+                                                 NULL, NULL);
+       }
+       else
+       {
+               LockBuffer(bucket_nbuf, BUFFER_LOCK_UNLOCK);
+               LockBuffer(bucket_obuf, BUFFER_LOCK_UNLOCK);
+       }
  }
  
  /*
@@ -1425,8 +1459,7 @@ _hash_finish_split(Relation rel, Buffer metabuf, Buffer obuf, Bucket obucket,
                                           nbucket, obuf, bucket_nbuf, tidhtab,
                                           maxbucket, highmask, lowmask);
  
-       _hash_relbuf(rel, bucket_nbuf);
-       LockBuffer(obuf, BUFFER_LOCK_UNLOCK);
+       _hash_dropbuf(rel, bucket_nbuf);
         hash_destroy(tidhtab);
  }