]> granicus.if.org Git - postgresql/commitdiff
Generate less WAL during GiST, GIN and SP-GiST index build.
authorHeikki Linnakangas <heikki.linnakangas@iki.fi>
Wed, 3 Apr 2019 14:03:15 +0000 (17:03 +0300)
committerHeikki Linnakangas <heikki.linnakangas@iki.fi>
Wed, 3 Apr 2019 14:03:15 +0000 (17:03 +0300)
Instead of WAL-logging every modification during the build separately,
first build the index without any WAL-logging, and make a separate pass
through the index at the end, to write all pages to the WAL. This
significantly reduces the amount of WAL generated, and is usually also
faster, despite the extra I/O needed for the extra scan through the index.
WAL generated this way is also faster to replay.

For GiST, the LSN-NSN interlock makes this a little tricky. All pages must
be marked with a valid (i.e. non-zero) LSN, so that the parent-child
LSN-NSN interlock works correctly. We now use magic value 1 for that during
index build. Change the fake LSN counter to begin from 1000, so that 1 is
safely smaller than any real or fake LSN. 2 would've been enough for our
purposes, but let's reserve a bigger range, in case we need more special
values in the future.

Author: Anastasia Lubennikova, Andrey V. Lepikhov
Reviewed-by: Heikki Linnakangas, Dmitry Dolgov
27 files changed:
src/backend/access/gin/ginbtree.c
src/backend/access/gin/gindatapage.c
src/backend/access/gin/ginentrypage.c
src/backend/access/gin/gininsert.c
src/backend/access/gin/ginutil.c
src/backend/access/gin/ginvacuum.c
src/backend/access/gin/ginxlog.c
src/backend/access/gist/gist.c
src/backend/access/gist/gistbuild.c
src/backend/access/gist/gistutil.c
src/backend/access/gist/gistxlog.c
src/backend/access/rmgrdesc/gindesc.c
src/backend/access/rmgrdesc/gistdesc.c
src/backend/access/rmgrdesc/spgdesc.c
src/backend/access/spgist/spgdoinsert.c
src/backend/access/spgist/spginsert.c
src/backend/access/spgist/spgxlog.c
src/backend/access/transam/xlog.c
src/backend/access/transam/xloginsert.c
src/include/access/gin.h
src/include/access/ginxlog.h
src/include/access/gist.h
src/include/access/gist_private.h
src/include/access/gistxlog.h
src/include/access/spgxlog.h
src/include/access/xlogdefs.h
src/include/access/xloginsert.h

index 533949e46a46a0c481711c382d7689e4d641bec0..9f82eef8c391ca528e2d5d9ce0621458e05311ff 100644 (file)
@@ -396,7 +396,7 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
                /* It will fit, perform the insertion */
                START_CRIT_SECTION();
 
-               if (RelationNeedsWAL(btree->index))
+               if (RelationNeedsWAL(btree->index) && !btree->isBuild)
                {
                        XLogBeginInsert();
                        XLogRegisterBuffer(0, stack->buffer, REGBUF_STANDARD);
@@ -417,7 +417,7 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
                        MarkBufferDirty(childbuf);
                }
 
-               if (RelationNeedsWAL(btree->index))
+               if (RelationNeedsWAL(btree->index) && !btree->isBuild)
                {
                        XLogRecPtr      recptr;
                        ginxlogInsert xlrec;
@@ -595,7 +595,7 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
                }
 
                /* write WAL record */
-               if (RelationNeedsWAL(btree->index))
+               if (RelationNeedsWAL(btree->index) && !btree->isBuild)
                {
                        XLogRecPtr      recptr;
 
index 3ad8b767102a2fd9f2c14e01e259ccae532b7024..fb085c7dd84a3167d079d88fb30e72c6566a09ce 100644 (file)
@@ -593,7 +593,7 @@ dataBeginPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
                 * Great, all the items fit on a single page.  If needed, prepare data
                 * for a WAL record describing the changes we'll make.
                 */
-               if (RelationNeedsWAL(btree->index))
+               if (RelationNeedsWAL(btree->index) && !btree->isBuild)
                        computeLeafRecompressWALData(leaf);
 
                /*
@@ -719,7 +719,7 @@ dataExecPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
        dataPlaceToPageLeafRecompress(buf, leaf);
 
        /* If needed, register WAL data built by computeLeafRecompressWALData */
-       if (RelationNeedsWAL(btree->index))
+       if (RelationNeedsWAL(btree->index) && !btree->isBuild)
        {
                XLogRegisterBufData(0, leaf->walinfo, leaf->walinfolen);
        }
@@ -1152,7 +1152,7 @@ dataExecPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
        pitem = (PostingItem *) insertdata;
        GinDataPageAddPostingItem(page, pitem, off);
 
-       if (RelationNeedsWAL(btree->index))
+       if (RelationNeedsWAL(btree->index) && !btree->isBuild)
        {
                /*
                 * This must be static, because it has to survive until XLogInsert,
@@ -1773,6 +1773,7 @@ createPostingTree(Relation index, ItemPointerData *items, uint32 nitems,
        Pointer         ptr;
        int                     nrootitems;
        int                     rootsize;
+       bool            is_build = (buildStats != NULL);
 
        /* Construct the new root page in memory first. */
        tmppage = (Page) palloc(BLCKSZ);
@@ -1826,7 +1827,7 @@ createPostingTree(Relation index, ItemPointerData *items, uint32 nitems,
        PageRestoreTempPage(tmppage, page);
        MarkBufferDirty(buffer);
 
-       if (RelationNeedsWAL(index))
+       if (RelationNeedsWAL(index) && !is_build)
        {
                XLogRecPtr      recptr;
                ginxlogCreatePostingTree data;
index 4889de2a4f5cbfb6794eed9f5f23963b4cb59542..1f5ba33d5124c957dd6e06e24d5af83aeee45ea4 100644 (file)
@@ -571,7 +571,7 @@ entryExecPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
                elog(ERROR, "failed to add item to index page in \"%s\"",
                         RelationGetRelationName(btree->index));
 
-       if (RelationNeedsWAL(btree->index))
+       if (RelationNeedsWAL(btree->index) && !btree->isBuild)
        {
                /*
                 * This must be static, because it has to survive until XLogInsert,
index edc353a7fe041b0e710ca0082f9c462aaecbfb84..55eab1461733af71111f1841ef372087b6868050 100644 (file)
@@ -195,6 +195,7 @@ ginEntryInsert(GinState *ginstate,
                buildStats->nEntries++;
 
        ginPrepareEntryScan(&btree, attnum, key, category, ginstate);
+       btree.isBuild = (buildStats != NULL);
 
        stack = ginFindLeafPage(&btree, false, false, NULL);
        page = BufferGetPage(stack->buffer);
@@ -347,23 +348,6 @@ ginbuild(Relation heap, Relation index, IndexInfo *indexInfo)
        GinInitBuffer(RootBuffer, GIN_LEAF);
        MarkBufferDirty(RootBuffer);
 
-       if (RelationNeedsWAL(index))
-       {
-               XLogRecPtr      recptr;
-               Page            page;
-
-               XLogBeginInsert();
-               XLogRegisterBuffer(0, MetaBuffer, REGBUF_WILL_INIT | REGBUF_STANDARD);
-               XLogRegisterBuffer(1, RootBuffer, REGBUF_WILL_INIT);
-
-               recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_INDEX);
-
-               page = BufferGetPage(RootBuffer);
-               PageSetLSN(page, recptr);
-
-               page = BufferGetPage(MetaBuffer);
-               PageSetLSN(page, recptr);
-       }
 
        UnlockReleaseBuffer(MetaBuffer);
        UnlockReleaseBuffer(RootBuffer);
@@ -419,7 +403,18 @@ ginbuild(Relation heap, Relation index, IndexInfo *indexInfo)
         * Update metapage stats
         */
        buildstate.buildStats.nTotalPages = RelationGetNumberOfBlocks(index);
-       ginUpdateStats(index, &buildstate.buildStats);
+       ginUpdateStats(index, &buildstate.buildStats, true);
+
+       /*
+        * We didn't write WAL records as we built the index, so if WAL-logging is
+        * required, write all pages to the WAL now.
+        */
+       if (RelationNeedsWAL(index))
+       {
+               log_newpage_range(index, MAIN_FORKNUM,
+                                                 0, RelationGetNumberOfBlocks(index),
+                                                 true);
+       }
 
        /*
         * Return statistics
index d2360eeafb0ee357473c7c76a4331dc9d4a2a598..cf9699ad18ed455884c58ff553e044ac3f74dc65 100644 (file)
@@ -662,7 +662,7 @@ ginGetStats(Relation index, GinStatsData *stats)
  * Note: nPendingPages and ginVersion are *not* copied over
  */
 void
-ginUpdateStats(Relation index, const GinStatsData *stats)
+ginUpdateStats(Relation index, const GinStatsData *stats, bool is_build)
 {
        Buffer          metabuffer;
        Page            metapage;
@@ -692,7 +692,7 @@ ginUpdateStats(Relation index, const GinStatsData *stats)
 
        MarkBufferDirty(metabuffer);
 
-       if (RelationNeedsWAL(index))
+       if (RelationNeedsWAL(index) && !is_build)
        {
                XLogRecPtr      recptr;
                ginxlogUpdateMeta data;
index dfe885b1017040087cd5c29c4b49a13120932384..b9a28d186333f9a40690b1fe9b4c083c28e8d965 100644 (file)
@@ -759,7 +759,7 @@ ginvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
 
        /* Update the metapage with accurate page and entry counts */
        idxStat.nTotalPages = npages;
-       ginUpdateStats(info->index, &idxStat);
+       ginUpdateStats(info->index, &idxStat, false);
 
        /* Finally, vacuum the FSM */
        IndexFreeSpaceMapVacuum(info->index);
index c467ffa346d4afa8c32fd0fbd1a7820ebdd1bbed..b648af1ff650d5d82d0d089f29f6a1d1d4dfcc54 100644 (file)
@@ -40,36 +40,6 @@ ginRedoClearIncompleteSplit(XLogReaderState *record, uint8 block_id)
                UnlockReleaseBuffer(buffer);
 }
 
-static void
-ginRedoCreateIndex(XLogReaderState *record)
-{
-       XLogRecPtr      lsn = record->EndRecPtr;
-       Buffer          RootBuffer,
-                               MetaBuffer;
-       Page            page;
-
-       MetaBuffer = XLogInitBufferForRedo(record, 0);
-       Assert(BufferGetBlockNumber(MetaBuffer) == GIN_METAPAGE_BLKNO);
-       page = (Page) BufferGetPage(MetaBuffer);
-
-       GinInitMetabuffer(MetaBuffer);
-
-       PageSetLSN(page, lsn);
-       MarkBufferDirty(MetaBuffer);
-
-       RootBuffer = XLogInitBufferForRedo(record, 1);
-       Assert(BufferGetBlockNumber(RootBuffer) == GIN_ROOT_BLKNO);
-       page = (Page) BufferGetPage(RootBuffer);
-
-       GinInitBuffer(RootBuffer, GIN_LEAF);
-
-       PageSetLSN(page, lsn);
-       MarkBufferDirty(RootBuffer);
-
-       UnlockReleaseBuffer(RootBuffer);
-       UnlockReleaseBuffer(MetaBuffer);
-}
-
 static void
 ginRedoCreatePTree(XLogReaderState *record)
 {
@@ -767,9 +737,6 @@ gin_redo(XLogReaderState *record)
        oldCtx = MemoryContextSwitchTo(opCtx);
        switch (info)
        {
-               case XLOG_GIN_CREATE_INDEX:
-                       ginRedoCreateIndex(record);
-                       break;
                case XLOG_GIN_CREATE_PTREE:
                        ginRedoCreatePTree(record);
                        break;
index f44c922b5d66b58c6304bda1878e87509b4cf0c5..2db790c840c3fe2cbe786fe3a0d8175e561a6e1d 100644 (file)
@@ -173,7 +173,7 @@ gistinsert(Relation r, Datum *values, bool *isnull,
                                                 values, isnull, true /* size is currently bogus */ );
        itup->t_tid = *ht_ctid;
 
-       gistdoinsert(r, itup, 0, giststate, heapRel);
+       gistdoinsert(r, itup, 0, giststate, heapRel, false);
 
        /* cleanup */
        MemoryContextSwitchTo(oldCxt);
@@ -220,7 +220,8 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
                                Buffer leftchildbuf,
                                List **splitinfo,
                                bool markfollowright,
-                               Relation heapRel)
+                               Relation heapRel,
+                               bool is_build)
 {
        BlockNumber blkno = BufferGetBlockNumber(buffer);
        Page            page = BufferGetPage(buffer);
@@ -459,7 +460,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
                 * insertion for that. NB: The number of pages and data segments
                 * specified here must match the calculations in gistXLogSplit()!
                 */
-               if (RelationNeedsWAL(rel))
+               if (!is_build && RelationNeedsWAL(rel))
                        XLogEnsureRecordSpace(npage, 1 + npage * 2);
 
                START_CRIT_SECTION();
@@ -480,18 +481,30 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
                PageRestoreTempPage(dist->page, BufferGetPage(dist->buffer));
                dist->page = BufferGetPage(dist->buffer);
 
-               /* Write the WAL record */
-               if (RelationNeedsWAL(rel))
-                       recptr = gistXLogSplit(is_leaf,
-                                                                  dist, oldrlink, oldnsn, leftchildbuf,
-                                                                  markfollowright);
+               /*
+                * Write the WAL record.
+                *
+                * If we're building a new index, however, we don't WAL-log changes
+                * yet. The LSN-NSN interlock between parent and child requires that
+                * LSNs never move backwards, so set the LSNs to a value that's
+                * smaller than any real or fake unlogged LSN that might be generated
+                * later. (There can't be any concurrent scans during index build, so
+                * we don't need to be able to detect concurrent splits yet.)
+                */
+               if (is_build)
+                       recptr = GistBuildLSN;
                else
-                       recptr = gistGetFakeLSN(rel);
+               {
+                       if (RelationNeedsWAL(rel))
+                               recptr = gistXLogSplit(is_leaf,
+                                                                          dist, oldrlink, oldnsn, leftchildbuf,
+                                                                          markfollowright);
+                       else
+                               recptr = gistGetFakeLSN(rel);
+               }
 
                for (ptr = dist; ptr; ptr = ptr->next)
-               {
                        PageSetLSN(ptr->page, recptr);
-               }
 
                /*
                 * Return the new child buffers to the caller.
@@ -545,28 +558,29 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
                if (BufferIsValid(leftchildbuf))
                        MarkBufferDirty(leftchildbuf);
 
-               if (RelationNeedsWAL(rel))
+               if (is_build)
+                       recptr = GistBuildLSN;
+               else
                {
-                       OffsetNumber ndeloffs = 0,
-                                               deloffs[1];
-
-                       if (OffsetNumberIsValid(oldoffnum))
+                       if (RelationNeedsWAL(rel))
                        {
-                               deloffs[0] = oldoffnum;
-                               ndeloffs = 1;
-                       }
+                               OffsetNumber ndeloffs = 0,
+                                                       deloffs[1];
 
-                       recptr = gistXLogUpdate(buffer,
-                                                                       deloffs, ndeloffs, itup, ntup,
-                                                                       leftchildbuf);
+                               if (OffsetNumberIsValid(oldoffnum))
+                               {
+                                       deloffs[0] = oldoffnum;
+                                       ndeloffs = 1;
+                               }
 
-                       PageSetLSN(page, recptr);
-               }
-               else
-               {
-                       recptr = gistGetFakeLSN(rel);
-                       PageSetLSN(page, recptr);
+                               recptr = gistXLogUpdate(buffer,
+                                                                               deloffs, ndeloffs, itup, ntup,
+                                                                               leftchildbuf);
+                       }
+                       else
+                               recptr = gistGetFakeLSN(rel);
                }
+               PageSetLSN(page, recptr);
 
                if (newblkno)
                        *newblkno = blkno;
@@ -607,7 +621,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
  */
 void
 gistdoinsert(Relation r, IndexTuple itup, Size freespace,
-                        GISTSTATE *giststate, Relation heapRel)
+                        GISTSTATE *giststate, Relation heapRel, bool is_build)
 {
        ItemId          iid;
        IndexTuple      idxtuple;
@@ -620,6 +634,7 @@ gistdoinsert(Relation r, IndexTuple itup, Size freespace,
        state.freespace = freespace;
        state.r = r;
        state.heapRel = heapRel;
+       state.is_build = is_build;
 
        /* Start from the root */
        firststack.blkno = GIST_ROOT_BLKNO;
@@ -1252,7 +1267,8 @@ gistinserttuples(GISTInsertState *state, GISTInsertStack *stack,
                                                           leftchild,
                                                           &splitinfo,
                                                           true,
-                                                          state->heapRel);
+                                                          state->heapRel,
+                                                          state->is_build);
 
        /*
         * Before recursing up in case the page was split, release locks on the
index 6024671989e93cf94bcf095f7d5a71a6bd499958..8e81eda5176c4c336f600d47114dae650cc5955f 100644 (file)
@@ -180,19 +180,7 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
        GISTInitBuffer(buffer, F_LEAF);
 
        MarkBufferDirty(buffer);
-
-       if (RelationNeedsWAL(index))
-       {
-               XLogRecPtr      recptr;
-
-               XLogBeginInsert();
-               XLogRegisterBuffer(0, buffer, REGBUF_WILL_INIT);
-
-               recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_CREATE_INDEX);
-               PageSetLSN(page, recptr);
-       }
-       else
-               PageSetLSN(page, gistGetFakeLSN(heap));
+       PageSetLSN(page, GistBuildLSN);
 
        UnlockReleaseBuffer(buffer);
 
@@ -226,6 +214,17 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
 
        freeGISTstate(buildstate.giststate);
 
+       /*
+        * We didn't write WAL records as we built the index, so if WAL-logging is
+        * required, write all pages to the WAL now.
+        */
+       if (RelationNeedsWAL(index))
+       {
+               log_newpage_range(index, MAIN_FORKNUM,
+                                                 0, RelationGetNumberOfBlocks(index),
+                                                 true);
+       }
+
        /*
         * Return statistics
         */
@@ -488,7 +487,7 @@ gistBuildCallback(Relation index,
                 * locked, we call gistdoinsert directly.
                 */
                gistdoinsert(index, itup, buildstate->freespace,
-                                        buildstate->giststate, buildstate->heaprel);
+                                        buildstate->giststate, buildstate->heaprel, true);
        }
 
        /* Update tuple count and total size. */
@@ -695,7 +694,7 @@ gistbufferinginserttuples(GISTBuildState *buildstate, Buffer buffer, int level,
                                                           InvalidBuffer,
                                                           &splitinfo,
                                                           false,
-                                                          buildstate->heaprel);
+                                                          buildstate->heaprel, true);
 
        /*
         * If this is a root split, update the root path item kept in memory. This
index 2163cc482d8d69a93bab311a1bf1888017553cce..94b6ad6a59b657a6394ed1c969e34ac5732d41ed 100644 (file)
@@ -1008,7 +1008,7 @@ gistproperty(Oid index_oid, int attno,
 XLogRecPtr
 gistGetFakeLSN(Relation rel)
 {
-       static XLogRecPtr counter = 1;
+       static XLogRecPtr counter = FirstNormalUnloggedLSN;
 
        if (rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP)
        {
index cb80ab00cd7fa6b7aaf5715a8cb0af553d2ef32a..4fb1855e8904c2f7056255203ae6bf7b473a5ff2 100644 (file)
@@ -490,25 +490,6 @@ gistRedoPageSplitRecord(XLogReaderState *record)
        UnlockReleaseBuffer(firstbuffer);
 }
 
-static void
-gistRedoCreateIndex(XLogReaderState *record)
-{
-       XLogRecPtr      lsn = record->EndRecPtr;
-       Buffer          buffer;
-       Page            page;
-
-       buffer = XLogInitBufferForRedo(record, 0);
-       Assert(BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO);
-       page = (Page) BufferGetPage(buffer);
-
-       GISTInitBuffer(buffer, F_LEAF);
-
-       PageSetLSN(page, lsn);
-
-       MarkBufferDirty(buffer);
-       UnlockReleaseBuffer(buffer);
-}
-
 /* redo page deletion */
 static void
 gistRedoPageDelete(XLogReaderState *record)
@@ -594,9 +575,6 @@ gist_redo(XLogReaderState *record)
                case XLOG_GIST_PAGE_SPLIT:
                        gistRedoPageSplitRecord(record);
                        break;
-               case XLOG_GIST_CREATE_INDEX:
-                       gistRedoCreateIndex(record);
-                       break;
                case XLOG_GIST_PAGE_DELETE:
                        gistRedoPageDelete(record);
                        break;
index ef30ce16b0e3606df6ba4e3b5e5c44f163c0c6e7..f3f4e1b2149d68cdbedbd10bb5e672a1abe2f000 100644 (file)
@@ -78,9 +78,6 @@ gin_desc(StringInfo buf, XLogReaderState *record)
 
        switch (info)
        {
-               case XLOG_GIN_CREATE_INDEX:
-                       /* no further information */
-                       break;
                case XLOG_GIN_CREATE_PTREE:
                        /* no further information */
                        break;
@@ -188,9 +185,6 @@ gin_identify(uint8 info)
 
        switch (info & ~XLR_INFO_MASK)
        {
-               case XLOG_GIN_CREATE_INDEX:
-                       id = "CREATE_INDEX";
-                       break;
                case XLOG_GIN_CREATE_PTREE:
                        id = "CREATE_PTREE";
                        break;
index 3ff4f83d38768b038817fc0da5dd439c695ccdd7..eb308c72d6ba7881c941c9358616a99c9168ace5 100644 (file)
@@ -71,8 +71,6 @@ gist_desc(StringInfo buf, XLogReaderState *record)
                case XLOG_GIST_PAGE_SPLIT:
                        out_gistxlogPageSplit(buf, (gistxlogPageSplit *) rec);
                        break;
-               case XLOG_GIST_CREATE_INDEX:
-                       break;
                case XLOG_GIST_PAGE_DELETE:
                        out_gistxlogPageDelete(buf, (gistxlogPageDelete *) rec);
                        break;
@@ -98,9 +96,6 @@ gist_identify(uint8 info)
                case XLOG_GIST_PAGE_SPLIT:
                        id = "PAGE_SPLIT";
                        break;
-               case XLOG_GIST_CREATE_INDEX:
-                       id = "CREATE_INDEX";
-                       break;
                case XLOG_GIST_PAGE_DELETE:
                        id = "PAGE_DELETE";
                        break;
index 37af31a764dfec38c89335484db0443530f4597c..40c1c8b3f98e833075232efac94cb3ff1f550a08 100644 (file)
@@ -24,8 +24,6 @@ spg_desc(StringInfo buf, XLogReaderState *record)
 
        switch (info)
        {
-               case XLOG_SPGIST_CREATE_INDEX:
-                       break;
                case XLOG_SPGIST_ADD_LEAF:
                        {
                                spgxlogAddLeaf *xlrec = (spgxlogAddLeaf *) rec;
@@ -88,9 +86,6 @@ spg_identify(uint8 info)
 
        switch (info & ~XLR_INFO_MASK)
        {
-               case XLOG_SPGIST_CREATE_INDEX:
-                       id = "CREATE_INDEX";
-                       break;
                case XLOG_SPGIST_ADD_LEAF:
                        id = "ADD_LEAF";
                        break;
index 0d07b8b291537850db0f604d93a34e7741b0ffbd..c34c44cd8b565c5964ac84da744a2e4378045f97 100644 (file)
@@ -289,7 +289,7 @@ addLeafTuple(Relation index, SpGistState *state, SpGistLeafTuple leafTuple,
 
        MarkBufferDirty(current->buffer);
 
-       if (RelationNeedsWAL(index))
+       if (RelationNeedsWAL(index) && !state->isBuild)
        {
                XLogRecPtr      recptr;
                int                     flags;
@@ -516,7 +516,7 @@ moveLeafs(Relation index, SpGistState *state,
        MarkBufferDirty(current->buffer);
        MarkBufferDirty(nbuf);
 
-       if (RelationNeedsWAL(index))
+       if (RelationNeedsWAL(index) && !state->isBuild)
        {
                XLogRecPtr      recptr;
 
@@ -1334,7 +1334,7 @@ doPickSplit(Relation index, SpGistState *state,
                saveCurrent.buffer = InvalidBuffer;
        }
 
-       if (RelationNeedsWAL(index))
+       if (RelationNeedsWAL(index) && !state->isBuild)
        {
                XLogRecPtr      recptr;
                int                     flags;
@@ -1531,7 +1531,7 @@ spgAddNodeAction(Relation index, SpGistState *state,
 
                MarkBufferDirty(current->buffer);
 
-               if (RelationNeedsWAL(index))
+               if (RelationNeedsWAL(index) && !state->isBuild)
                {
                        XLogRecPtr      recptr;
 
@@ -1644,7 +1644,7 @@ spgAddNodeAction(Relation index, SpGistState *state,
 
                MarkBufferDirty(saveCurrent.buffer);
 
-               if (RelationNeedsWAL(index))
+               if (RelationNeedsWAL(index) && !state->isBuild)
                {
                        XLogRecPtr      recptr;
                        int                     flags;
@@ -1840,7 +1840,7 @@ spgSplitNodeAction(Relation index, SpGistState *state,
 
        MarkBufferDirty(current->buffer);
 
-       if (RelationNeedsWAL(index))
+       if (RelationNeedsWAL(index) && !state->isBuild)
        {
                XLogRecPtr      recptr;
 
index b06feafdc241be018bc74912ac3ff3a9e51a6b6e..b40bd440cf09e1723e2515711f193dfc70e801a2 100644 (file)
@@ -105,26 +105,6 @@ spgbuild(Relation heap, Relation index, IndexInfo *indexInfo)
        SpGistInitBuffer(nullbuffer, SPGIST_LEAF | SPGIST_NULLS);
        MarkBufferDirty(nullbuffer);
 
-       if (RelationNeedsWAL(index))
-       {
-               XLogRecPtr      recptr;
-
-               XLogBeginInsert();
-
-               /*
-                * Replay will re-initialize the pages, so don't take full pages
-                * images.  No other data to log.
-                */
-               XLogRegisterBuffer(0, metabuffer, REGBUF_WILL_INIT | REGBUF_STANDARD);
-               XLogRegisterBuffer(1, rootbuffer, REGBUF_WILL_INIT | REGBUF_STANDARD);
-               XLogRegisterBuffer(2, nullbuffer, REGBUF_WILL_INIT | REGBUF_STANDARD);
-
-               recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_CREATE_INDEX);
-
-               PageSetLSN(BufferGetPage(metabuffer), recptr);
-               PageSetLSN(BufferGetPage(rootbuffer), recptr);
-               PageSetLSN(BufferGetPage(nullbuffer), recptr);
-       }
 
        END_CRIT_SECTION();
 
@@ -151,6 +131,17 @@ spgbuild(Relation heap, Relation index, IndexInfo *indexInfo)
 
        SpGistUpdateMetaPage(index);
 
+       /*
+        * We didn't write WAL records as we built the index, so if WAL-logging is
+        * required, write all pages to the WAL now.
+        */
+       if (RelationNeedsWAL(index))
+       {
+               log_newpage_range(index, MAIN_FORKNUM,
+                                                 0, RelationGetNumberOfBlocks(index),
+                                                 true);
+       }
+
        result = (IndexBuildResult *) palloc0(sizeof(IndexBuildResult));
        result->heap_tuples = reltuples;
        result->index_tuples = buildstate.indtuples;
index 71836ee8a5baaae041ba43d2faa7f87606dfa0ea..ebe6ae8715b92ac4b63a2f3e449ace400853d65c 100644 (file)
@@ -72,38 +72,6 @@ addOrReplaceTuple(Page page, Item tuple, int size, OffsetNumber offset)
                         size);
 }
 
-static void
-spgRedoCreateIndex(XLogReaderState *record)
-{
-       XLogRecPtr      lsn = record->EndRecPtr;
-       Buffer          buffer;
-       Page            page;
-
-       buffer = XLogInitBufferForRedo(record, 0);
-       Assert(BufferGetBlockNumber(buffer) == SPGIST_METAPAGE_BLKNO);
-       page = (Page) BufferGetPage(buffer);
-       SpGistInitMetapage(page);
-       PageSetLSN(page, lsn);
-       MarkBufferDirty(buffer);
-       UnlockReleaseBuffer(buffer);
-
-       buffer = XLogInitBufferForRedo(record, 1);
-       Assert(BufferGetBlockNumber(buffer) == SPGIST_ROOT_BLKNO);
-       SpGistInitBuffer(buffer, SPGIST_LEAF);
-       page = (Page) BufferGetPage(buffer);
-       PageSetLSN(page, lsn);
-       MarkBufferDirty(buffer);
-       UnlockReleaseBuffer(buffer);
-
-       buffer = XLogInitBufferForRedo(record, 2);
-       Assert(BufferGetBlockNumber(buffer) == SPGIST_NULL_BLKNO);
-       SpGistInitBuffer(buffer, SPGIST_LEAF | SPGIST_NULLS);
-       page = (Page) BufferGetPage(buffer);
-       PageSetLSN(page, lsn);
-       MarkBufferDirty(buffer);
-       UnlockReleaseBuffer(buffer);
-}
-
 static void
 spgRedoAddLeaf(XLogReaderState *record)
 {
@@ -976,9 +944,6 @@ spg_redo(XLogReaderState *record)
        oldCxt = MemoryContextSwitchTo(opCtx);
        switch (info)
        {
-               case XLOG_SPGIST_CREATE_INDEX:
-                       spgRedoCreateIndex(record);
-                       break;
                case XLOG_SPGIST_ADD_LEAF:
                        spgRedoAddLeaf(record);
                        break;
index c6ca96079c1f9704a330871743a53a3dfa8348c0..e3a3110716d6aedcf32eb9636dbcd4d854711bdc 100644 (file)
@@ -5242,7 +5242,7 @@ BootStrapXLOG(void)
        ControlFile->time = checkPoint.time;
        ControlFile->checkPoint = checkPoint.redo;
        ControlFile->checkPointCopy = checkPoint;
-       ControlFile->unloggedLSN = 1;
+       ControlFile->unloggedLSN = FirstNormalUnloggedLSN;
 
        /* Set important parameter values for use when replaying WAL */
        ControlFile->MaxConnections = MaxConnections;
@@ -9781,12 +9781,11 @@ xlog_redo(XLogReaderState *record)
        }
        else if (info == XLOG_FPI || info == XLOG_FPI_FOR_HINT)
        {
-               Buffer          buffer;
-
                /*
                 * Full-page image (FPI) records contain nothing else but a backup
-                * block. The block reference must include a full-page image -
-                * otherwise there would be no point in this record.
+                * block (or multiple backup blocks). Every block reference must
+                * include a full-page image - otherwise there would be no point in
+                * this record.
                 *
                 * No recovery conflicts are generated by these generic records - if a
                 * resource manager needs to generate conflicts, it has to define a
@@ -9798,9 +9797,14 @@ xlog_redo(XLogReaderState *record)
                 * XLOG_FPI and XLOG_FPI_FOR_HINT records, they use a different info
                 * code just to distinguish them for statistics purposes.
                 */
-               if (XLogReadBufferForRedo(record, 0, &buffer) != BLK_RESTORED)
-                       elog(ERROR, "unexpected XLogReadBufferForRedo result when restoring backup block");
-               UnlockReleaseBuffer(buffer);
+               for (uint8 block_id = 0; block_id <= record->max_block_id; block_id++)
+               {
+                       Buffer          buffer;
+
+                       if (XLogReadBufferForRedo(record, block_id, &buffer) != BLK_RESTORED)
+                               elog(ERROR, "unexpected XLogReadBufferForRedo result when restoring backup block");
+                       UnlockReleaseBuffer(buffer);
+               }
        }
        else if (info == XLOG_BACKUP_END)
        {
index 62df247ab23649839e853b8cdfeb91fd9a529091..1c76dcfa0dcfcbbb36eb00e37b83f63c58423962 100644 (file)
@@ -1021,6 +1021,88 @@ log_newpage_buffer(Buffer buffer, bool page_std)
        return log_newpage(&rnode, forkNum, blkno, page, page_std);
 }
 
+/*
+ * WAL-log a range of blocks in a relation.
+ *
+ * An image of all pages with block numbers 'startblk' <= X < 'endblock' is
+ * written to the WAL. If the range is large, this is done in multiple WAL
+ * records.
+ *
+ * If all page follows the standard page layout, with a PageHeader and unused
+ * space between pd_lower and pd_upper, set 'page_std' to true. That allows
+ * the unused space to be left out from the WAL records, making them smaller.
+ *
+ * NOTE: This function acquires exclusive-locks on the pages. Typically, this
+ * is used on a newly-built relation, and the caller is holding a
+ * AccessExclusiveLock on it, so no other backend can be accessing it at the
+ * same time. If that's not the case, you must ensure that this does not
+ * cause a deadlock through some other means.
+ */
+void
+log_newpage_range(Relation rel, ForkNumber forkNum,
+                                 BlockNumber startblk, BlockNumber endblk,
+                                 bool page_std)
+{
+       BlockNumber blkno;
+
+       /*
+        * Iterate over all the pages in the range. They are collected into
+        * batches of XLR_MAX_BLOCK_ID pages, and a single WAL-record is written
+        * for each batch.
+        */
+       XLogEnsureRecordSpace(XLR_MAX_BLOCK_ID - 1, 0);
+
+       blkno = startblk;
+       while (blkno < endblk)
+       {
+               Buffer          bufpack[XLR_MAX_BLOCK_ID];
+               XLogRecPtr      recptr;
+               int                     nbufs;
+               int                     i;
+
+               CHECK_FOR_INTERRUPTS();
+
+               /* Collect a batch of blocks. */
+               nbufs = 0;
+               while (nbufs < XLR_MAX_BLOCK_ID && blkno < endblk)
+               {
+                       Buffer          buf = ReadBuffer(rel, blkno);
+
+                       LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
+
+                       /*
+                        * Completely empty pages are not WAL-logged. Writing a WAL record
+                        * would change the LSN, and we don't want that. We want the page
+                        * to stay empty.
+                        */
+                       if (!PageIsNew(BufferGetPage(buf)))
+                               bufpack[nbufs++] = buf;
+                       else
+                               UnlockReleaseBuffer(buf);
+                       blkno++;
+               }
+
+               /* Write WAL record for this batch. */
+               XLogBeginInsert();
+
+               START_CRIT_SECTION();
+               for (i = 0; i < nbufs; i++)
+               {
+                       XLogRegisterBuffer(i, bufpack[i], REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
+                       MarkBufferDirty(bufpack[i]);
+               }
+
+               recptr = XLogInsert(RM_XLOG_ID, XLOG_FPI);
+
+               for (i = 0; i < nbufs; i++)
+               {
+                       PageSetLSN(BufferGetPage(bufpack[i]), recptr);
+                       UnlockReleaseBuffer(bufpack[i]);
+               }
+               END_CRIT_SECTION();
+       }
+}
+
 /*
  * Allocate working buffers needed for WAL record construction.
  */
index 61fa6970395505dfa325068a70421ea848cde370..4f0fa0378271920cde93fd293fb18e69b05ea21c 100644 (file)
@@ -71,6 +71,7 @@ extern int    gin_pending_list_limit;
 
 /* ginutil.c */
 extern void ginGetStats(Relation index, GinStatsData *stats);
-extern void ginUpdateStats(Relation index, const GinStatsData *stats);
+extern void ginUpdateStats(Relation index, const GinStatsData *stats,
+                          bool is_build);
 
 #endif                                                 /* GIN_H */
index 9bd4e0b9baa6167a3f5bfb04537732c310be5281..2c5d743caceb7b0ff25e25f6857931f3f66f0dfb 100644 (file)
@@ -16,8 +16,6 @@
 #include "lib/stringinfo.h"
 #include "storage/off.h"
 
-#define XLOG_GIN_CREATE_INDEX  0x00
-
 #define XLOG_GIN_CREATE_PTREE  0x10
 
 typedef struct ginxlogCreatePostingTree
index ce8bfd83ea44bbd1f57c8ab215e99bfb8f73a055..6902f4115b7f074e7570792b5b64bdfd3fe7407a 100644 (file)
 
 typedef XLogRecPtr GistNSN;
 
+/*
+ * A bogus LSN / NSN value used during index build. Must be smaller than any
+ * real or fake unlogged LSN, so that after an index build finishes, all the
+ * splits are considered completed.
+ */
+#define GistBuildLSN   ((XLogRecPtr) 1)
+
 /*
  * For on-disk compatibility with pre-9.3 servers, NSN is stored as two
  * 32-bit fields on disk, same as LSNs.
index 02dc285a78a084fb63f214d1ef95746e714ed396..78e2e3fb31286ebd69221fba0bc2b56c0bee519f 100644 (file)
@@ -244,6 +244,7 @@ typedef struct
        Relation        r;
        Relation        heapRel;
        Size            freespace;              /* free space to be left */
+       bool            is_build;
 
        GISTInsertStack *stack;
 } GISTInsertState;
@@ -393,7 +394,8 @@ extern void gistdoinsert(Relation r,
                         IndexTuple itup,
                         Size freespace,
                         GISTSTATE *GISTstate,
-                        Relation heapRel);
+                        Relation heapRel,
+                        bool is_build);
 
 /* A List of these is returned from gistplacetopage() in *splitinfo */
 typedef struct
@@ -409,7 +411,8 @@ extern bool gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
                                Buffer leftchildbuf,
                                List **splitinfo,
                                bool markleftchild,
-                               Relation heapRel);
+                               Relation heapRel,
+                               bool is_build);
 
 extern SplitedPageLayout *gistSplit(Relation r, Page page, IndexTuple *itup,
                  int len, GISTSTATE *giststate);
index 2f87b67a53a8983ced0aaf368a1ad0bccb9df07c..9990d97cbd38b405fd91e37957efbd385454d0c1 100644 (file)
@@ -23,7 +23,7 @@
                                                                                  * FSM */
 #define XLOG_GIST_PAGE_SPLIT           0x30
  /* #define XLOG_GIST_INSERT_COMPLETE   0x40 */        /* not used anymore */
-#define XLOG_GIST_CREATE_INDEX         0x50
+ /* #define XLOG_GIST_CREATE_INDEX              0x50 */        /* not used anymore */
 #define XLOG_GIST_PAGE_DELETE          0x60
 
 /*
index 6527fc9eb143de1ba371ec534abe477c2dbad714..ee8fc6fd6b2dd4444ac387be2de311d0534efa99 100644 (file)
@@ -18,7 +18,7 @@
 #include "storage/off.h"
 
 /* XLOG record types for SPGiST */
-#define XLOG_SPGIST_CREATE_INDEX       0x00
+/* #define XLOG_SPGIST_CREATE_INDEX       0x00 */      /* not used anymore */
 #define XLOG_SPGIST_ADD_LEAF           0x10
 #define XLOG_SPGIST_MOVE_LEAFS         0x20
 #define XLOG_SPGIST_ADD_NODE           0x30
index 383968c4e3f4a209306926620d9d9bb70fb9ff4a..cadecab721cf367b3c88d981a0f93d7d4f31727b 100644 (file)
@@ -28,6 +28,13 @@ typedef uint64 XLogRecPtr;
 #define InvalidXLogRecPtr      0
 #define XLogRecPtrIsInvalid(r) ((r) == InvalidXLogRecPtr)
 
+/*
+ * First LSN to use for "fake" LSNs.
+ *
+ * Values smaller than this can be used for special per-AM purposes.
+ */
+#define FirstNormalUnloggedLSN ((XLogRecPtr) 1000)
+
 /*
  * XLogSegNo - physical log file sequence number.
  */
index e16257228accb776cde2573fe6f62c2fdc6a75da..30c4ff7bea1c0cb921618e89dde9c74d4ce29331 100644 (file)
@@ -16,6 +16,7 @@
 #include "storage/block.h"
 #include "storage/buf.h"
 #include "storage/relfilenode.h"
+#include "utils/relcache.h"
 
 /*
  * The minimum size of the WAL construction working area. If you need to
@@ -54,6 +55,8 @@ extern bool XLogCheckBufferNeedsBackup(Buffer buffer);
 extern XLogRecPtr log_newpage(RelFileNode *rnode, ForkNumber forkNum,
                        BlockNumber blk, char *page, bool page_std);
 extern XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std);
+extern void log_newpage_range(Relation rel, ForkNumber forkNum,
+                                 BlockNumber startblk, BlockNumber endblk, bool page_std);
 extern XLogRecPtr XLogSaveBufferForHint(Buffer buffer, bool buffer_std);
 
 extern void InitXLogInsert(void);