]> granicus.if.org Git - postgresql/commitdiff
hash: Fix write-ahead logging bugs related to init forks.
authorRobert Haas <rhaas@postgresql.org>
Mon, 17 Jul 2017 16:03:35 +0000 (12:03 -0400)
committerRobert Haas <rhaas@postgresql.org>
Mon, 17 Jul 2017 16:03:35 +0000 (12:03 -0400)
One, logging for CREATE INDEX was oblivious to the fact that when
an unlogged table is created, *only* operations on the init fork
should be logged.

Two, init fork buffers need to be flushed after they are written;
otherwise, a filesystem-level copy following recovery may do the
wrong thing.  (There may be a better fix for this issue than the
one used here, but this is transposed from the similar logic already
present in XLogReadBufferForRedoExtended, and a broader refactoring
after beta2 seems inadvisable.)

Amit Kapila, reviewed by Ashutosh Sharma, Kyotaro Horiguchi,
and Michael Paquier

Discussion: http://postgr.es/m/CAA4eK1JpcMsEtOL_J7WODumeEfyrPi7FPYHeVdS7fyyrCrgp4w@mail.gmail.com

src/backend/access/hash/hash_xlog.c
src/backend/access/hash/hashpage.c

index 429af11f4dd648e680a13dd7f8cb8c9c3a21f8d0..67a856c1427c540ffe0221fd66fb3504b0528d4c 100644 (file)
@@ -33,6 +33,7 @@ hash_xlog_init_meta_page(XLogReaderState *record)
        XLogRecPtr      lsn = record->EndRecPtr;
        Page            page;
        Buffer          metabuf;
+       ForkNumber      forknum;
 
        xl_hash_init_meta_page *xlrec = (xl_hash_init_meta_page *) XLogRecGetData(record);
 
@@ -44,6 +45,17 @@ hash_xlog_init_meta_page(XLogReaderState *record)
        page = (Page) BufferGetPage(metabuf);
        PageSetLSN(page, lsn);
        MarkBufferDirty(metabuf);
+
+       /*
+        * Force the on-disk state of init forks to always be in sync with the
+        * state in shared buffers.  See XLogReadBufferForRedoExtended.  We need
+        * special handling for init forks as create index operations don't log a
+        * full page image of the metapage.
+        */
+       XLogRecGetBlockTag(record, 0, NULL, &forknum, NULL);
+       if (forknum == INIT_FORKNUM)
+               FlushOneBuffer(metabuf);
+
        /* all done */
        UnlockReleaseBuffer(metabuf);
 }
@@ -60,6 +72,7 @@ hash_xlog_init_bitmap_page(XLogReaderState *record)
        Page            page;
        HashMetaPage metap;
        uint32          num_buckets;
+       ForkNumber      forknum;
 
        xl_hash_init_bitmap_page *xlrec = (xl_hash_init_bitmap_page *) XLogRecGetData(record);
 
@@ -70,6 +83,16 @@ hash_xlog_init_bitmap_page(XLogReaderState *record)
        _hash_initbitmapbuffer(bitmapbuf, xlrec->bmsize, true);
        PageSetLSN(BufferGetPage(bitmapbuf), lsn);
        MarkBufferDirty(bitmapbuf);
+
+       /*
+        * Force the on-disk state of init forks to always be in sync with the
+        * state in shared buffers.  See XLogReadBufferForRedoExtended.  We need
+        * special handling for init forks as create index operations don't log a
+        * full page image of the metapage.
+        */
+       XLogRecGetBlockTag(record, 0, NULL, &forknum, NULL);
+       if (forknum == INIT_FORKNUM)
+               FlushOneBuffer(bitmapbuf);
        UnlockReleaseBuffer(bitmapbuf);
 
        /* add the new bitmap page to the metapage's list of bitmaps */
@@ -90,6 +113,10 @@ hash_xlog_init_bitmap_page(XLogReaderState *record)
 
                PageSetLSN(page, lsn);
                MarkBufferDirty(metabuf);
+
+               XLogRecGetBlockTag(record, 1, NULL, &forknum, NULL);
+               if (forknum == INIT_FORKNUM)
+                       FlushOneBuffer(metabuf);
        }
        if (BufferIsValid(metabuf))
                UnlockReleaseBuffer(metabuf);
index 1cb18a75138606fd93551bf84f3bbdaab719da7c..d5b6502775197211d49b733235f5b44c27e00909 100644 (file)
@@ -345,12 +345,20 @@ _hash_init(Relation rel, double num_tuples, ForkNumber forkNum)
        int32           ffactor;
        uint32          num_buckets;
        uint32          i;
+       bool            use_wal;
 
        /* safety check */
        if (RelationGetNumberOfBlocksInFork(rel, forkNum) != 0)
                elog(ERROR, "cannot initialize non-empty hash index \"%s\"",
                         RelationGetRelationName(rel));
 
+       /*
+        * WAL log creation of pages if the relation is persistent, or this is the
+        * init fork.  Init forks for unlogged relations always need to be WAL
+        * logged.
+        */
+       use_wal = RelationNeedsWAL(rel) || forkNum == INIT_FORKNUM;
+
        /*
         * Determine the target fill factor (in tuples per bucket) for this index.
         * The idea is to make the fill factor correspond to pages about as full
@@ -384,7 +392,7 @@ _hash_init(Relation rel, double num_tuples, ForkNumber forkNum)
        metap = HashPageGetMeta(pg);
 
        /* XLOG stuff */
-       if (RelationNeedsWAL(rel))
+       if (use_wal)
        {
                xl_hash_init_meta_page xlrec;
                XLogRecPtr      recptr;
@@ -427,11 +435,12 @@ _hash_init(Relation rel, double num_tuples, ForkNumber forkNum)
                _hash_initbuf(buf, metap->hashm_maxbucket, i, LH_BUCKET_PAGE, false);
                MarkBufferDirty(buf);
 
-               log_newpage(&rel->rd_node,
-                                       forkNum,
-                                       blkno,
-                                       BufferGetPage(buf),
-                                       true);
+               if (use_wal)
+                       log_newpage(&rel->rd_node,
+                                               forkNum,
+                                               blkno,
+                                               BufferGetPage(buf),
+                                               true);
                _hash_relbuf(rel, buf);
        }
 
@@ -459,7 +468,7 @@ _hash_init(Relation rel, double num_tuples, ForkNumber forkNum)
        MarkBufferDirty(metabuf);
 
        /* XLOG stuff */
-       if (RelationNeedsWAL(rel))
+       if (use_wal)
        {
                xl_hash_init_bitmap_page xlrec;
                XLogRecPtr      recptr;