]> granicus.if.org Git - postgresql/commitdiff
Revamp the WAL record format.
authorHeikki Linnakangas <heikki.linnakangas@iki.fi>
Thu, 20 Nov 2014 15:56:26 +0000 (17:56 +0200)
committerHeikki Linnakangas <heikki.linnakangas@iki.fi>
Thu, 20 Nov 2014 16:46:41 +0000 (18:46 +0200)
Each WAL record now carries information about the modified relation and
block(s) in a standardized format. That makes it easier to write tools that
need that information, like pg_rewind, prefetching the blocks to speed up
recovery, etc.

There's a whole new API for building WAL records, replacing the XLogRecData
chains used previously. The new API consists of XLogRegister* functions,
which are called for each buffer and chunk of data that is added to the
record. The new API also gives more control over when a full-page image is
written, by passing flags to the XLogRegisterBuffer function.

This also simplifies the XLogReadBufferForRedo() calls. The function can dig
the relation and block number from the WAL record, so they no longer need to
be passed as arguments.

For the convenience of redo routines, XLogReader now disects each WAL record
after reading it, copying the main data part and the per-block data into
MAXALIGNed buffers. The data chunks are not aligned within the WAL record,
but the redo routines can assume that the pointers returned by XLogRecGet*
functions are. Redo routines are now passed the XLogReaderState, which
contains the record in the already-disected format, instead of the plain
XLogRecord.

The new record format also makes the fixed size XLogRecord header smaller,
by removing the xl_len field. The length of the "main data" portion is now
stored at the end of the WAL record, and there's a separate header after
XLogRecord for it. The alignment padding at the end of XLogRecord is also
removed. This compansates for the fact that the new format would otherwise
be more bulky than the old format.

Reviewed by Andres Freund, Amit Kapila, Michael Paquier, Alvaro Herrera,
Fujii Masao.

93 files changed:
contrib/pg_xlogdump/pg_xlogdump.c
contrib/pg_xlogdump/rmgrdesc.h
src/backend/access/brin/brin.c
src/backend/access/brin/brin_pageops.c
src/backend/access/brin/brin_revmap.c
src/backend/access/brin/brin_xlog.c
src/backend/access/gin/ginbtree.c
src/backend/access/gin/gindatapage.c
src/backend/access/gin/ginentrypage.c
src/backend/access/gin/ginfast.c
src/backend/access/gin/gininsert.c
src/backend/access/gin/ginutil.c
src/backend/access/gin/ginvacuum.c
src/backend/access/gin/ginxlog.c
src/backend/access/gist/gist.c
src/backend/access/gist/gistbuild.c
src/backend/access/gist/gistxlog.c
src/backend/access/hash/hash.c
src/backend/access/heap/heapam.c
src/backend/access/heap/rewriteheap.c
src/backend/access/nbtree/nbtinsert.c
src/backend/access/nbtree/nbtpage.c
src/backend/access/nbtree/nbtxlog.c
src/backend/access/rmgrdesc/brindesc.c
src/backend/access/rmgrdesc/clogdesc.c
src/backend/access/rmgrdesc/dbasedesc.c
src/backend/access/rmgrdesc/gindesc.c
src/backend/access/rmgrdesc/gistdesc.c
src/backend/access/rmgrdesc/hashdesc.c
src/backend/access/rmgrdesc/heapdesc.c
src/backend/access/rmgrdesc/mxactdesc.c
src/backend/access/rmgrdesc/nbtdesc.c
src/backend/access/rmgrdesc/relmapdesc.c
src/backend/access/rmgrdesc/seqdesc.c
src/backend/access/rmgrdesc/smgrdesc.c
src/backend/access/rmgrdesc/spgdesc.c
src/backend/access/rmgrdesc/standbydesc.c
src/backend/access/rmgrdesc/tblspcdesc.c
src/backend/access/rmgrdesc/xactdesc.c
src/backend/access/rmgrdesc/xlogdesc.c
src/backend/access/spgist/spgdoinsert.c
src/backend/access/spgist/spginsert.c
src/backend/access/spgist/spgvacuum.c
src/backend/access/spgist/spgxlog.c
src/backend/access/transam/README
src/backend/access/transam/clog.c
src/backend/access/transam/multixact.c
src/backend/access/transam/twophase.c
src/backend/access/transam/xact.c
src/backend/access/transam/xlog.c
src/backend/access/transam/xloginsert.c
src/backend/access/transam/xlogreader.c
src/backend/access/transam/xlogutils.c
src/backend/catalog/storage.c
src/backend/commands/dbcommands.c
src/backend/commands/sequence.c
src/backend/commands/tablespace.c
src/backend/replication/logical/decode.c
src/backend/replication/logical/logical.c
src/backend/replication/logical/logicalfuncs.c
src/backend/replication/logical/reorderbuffer.c
src/backend/replication/logical/snapbuild.c
src/backend/replication/walsender.c
src/backend/storage/ipc/standby.c
src/backend/utils/cache/relmapper.c
src/bin/pg_resetxlog/pg_resetxlog.c
src/include/access/brin_xlog.h
src/include/access/clog.h
src/include/access/gin.h
src/include/access/gin_private.h
src/include/access/gist_private.h
src/include/access/hash.h
src/include/access/heapam_xlog.h
src/include/access/htup_details.h
src/include/access/itup.h
src/include/access/multixact.h
src/include/access/nbtree.h
src/include/access/spgist.h
src/include/access/spgist_private.h
src/include/access/xact.h
src/include/access/xlog.h
src/include/access/xlog_internal.h
src/include/access/xloginsert.h
src/include/access/xlogreader.h
src/include/access/xlogrecord.h
src/include/access/xlogutils.h
src/include/catalog/storage_xlog.h
src/include/commands/dbcommands.h
src/include/commands/sequence.h
src/include/commands/tablespace.h
src/include/replication/decode.h
src/include/storage/standby.h
src/include/utils/relmapper.h

index 7f151f961c87b799ac1aa63c7518d5e191ef5ac4..26556dc82deaa66f3ca323973b867b6c52593009 100644 (file)
@@ -17,6 +17,7 @@
 
 #include "access/xlogreader.h"
 #include "access/xlogrecord.h"
+#include "access/xlog_internal.h"
 #include "access/transam.h"
 #include "common/fe_memutils.h"
 #include "getopt_long.h"
@@ -343,90 +344,117 @@ XLogDumpReadPage(XLogReaderState *state, XLogRecPtr targetPagePtr, int reqLen,
  * Store per-rmgr and per-record statistics for a given record.
  */
 static void
-XLogDumpCountRecord(XLogDumpConfig *config, XLogDumpStats *stats, XLogRecPtr ReadRecPtr, XLogRecord *record)
+XLogDumpCountRecord(XLogDumpConfig *config, XLogDumpStats *stats,
+                                       XLogReaderState *record)
 {
        RmgrId          rmid;
        uint8           recid;
+       uint32          rec_len;
+       uint32          fpi_len;
 
        stats->count++;
 
        /* Update per-rmgr statistics */
 
-       rmid = record->xl_rmid;
+       rmid = XLogRecGetRmid(record);
+       rec_len = XLogRecGetDataLen(record) + SizeOfXLogRecord;
+       fpi_len = record->decoded_record->xl_tot_len - rec_len;
 
        stats->rmgr_stats[rmid].count++;
-       stats->rmgr_stats[rmid].rec_len +=
-               record->xl_len + SizeOfXLogRecord;
-       stats->rmgr_stats[rmid].fpi_len +=
-               record->xl_tot_len - (record->xl_len + SizeOfXLogRecord);
+       stats->rmgr_stats[rmid].rec_len += rec_len;
+       stats->rmgr_stats[rmid].fpi_len += fpi_len;
 
        /*
         * Update per-record statistics, where the record is identified by a
-        * combination of the RmgrId and the four bits of the xl_info field
-        * that are the rmgr's domain (resulting in sixteen possible entries
-        * per RmgrId).
+        * combination of the RmgrId and the four bits of the xl_info field that
+        * are the rmgr's domain (resulting in sixteen possible entries per
+        * RmgrId).
         */
 
-       recid = record->xl_info >> 4;
+       recid = XLogRecGetInfo(record) >> 4;
 
        stats->record_stats[rmid][recid].count++;
-       stats->record_stats[rmid][recid].rec_len +=
-               record->xl_len + SizeOfXLogRecord;
-       stats->record_stats[rmid][recid].fpi_len +=
-               record->xl_tot_len - (record->xl_len + SizeOfXLogRecord);
+       stats->record_stats[rmid][recid].rec_len += rec_len;
+       stats->record_stats[rmid][recid].fpi_len += fpi_len;
 }
 
 /*
  * Print a record to stdout
  */
 static void
-XLogDumpDisplayRecord(XLogDumpConfig *config, XLogRecPtr ReadRecPtr, XLogRecord *record)
+XLogDumpDisplayRecord(XLogDumpConfig *config, XLogReaderState *record)
 {
-       const char         *id;
-       const RmgrDescData *desc = &RmgrDescTable[record->xl_rmid];
-
-       id = desc->rm_identify(record->xl_info);
+       const char *id;
+       const RmgrDescData *desc = &RmgrDescTable[XLogRecGetRmid(record)];
+       RelFileNode rnode;
+       ForkNumber      forknum;
+       BlockNumber blk;
+       int                     block_id;
+       uint8           info = XLogRecGetInfo(record);
+       XLogRecPtr      xl_prev = XLogRecGetPrev(record);
+
+       id = desc->rm_identify(info);
        if (id == NULL)
-               id = psprintf("UNKNOWN (%x)", record->xl_info & ~XLR_INFO_MASK);
+               id = psprintf("UNKNOWN (%x)", info & ~XLR_INFO_MASK);
 
-       printf("rmgr: %-11s len (rec/tot): %6u/%6u, tx: %10u, lsn: %X/%08X, prev %X/%08X, bkp: %u%u%u%u, desc: %s ",
+       printf("rmgr: %-11s len (rec/tot): %6u/%6u, tx: %10u, lsn: %X/%08X, prev %X/%08X, ",
                   desc->rm_name,
-                  record->xl_len, record->xl_tot_len,
-                  record->xl_xid,
-                  (uint32) (ReadRecPtr >> 32), (uint32) ReadRecPtr,
-                  (uint32) (record->xl_prev >> 32), (uint32) record->xl_prev,
-                  !!(XLR_BKP_BLOCK(0) & record->xl_info),
-                  !!(XLR_BKP_BLOCK(1) & record->xl_info),
-                  !!(XLR_BKP_BLOCK(2) & record->xl_info),
-                  !!(XLR_BKP_BLOCK(3) & record->xl_info),
-                  id);
+                  XLogRecGetDataLen(record), XLogRecGetTotalLen(record),
+                  XLogRecGetXid(record),
+                  (uint32) (record->ReadRecPtr >> 32), (uint32) record->ReadRecPtr,
+                  (uint32) (xl_prev >> 32), (uint32) xl_prev);
+       printf("desc: %s ", id);
 
        /* the desc routine will printf the description directly to stdout */
        desc->rm_desc(NULL, record);
 
-       putchar('\n');
-
-       if (config->bkp_details)
+       if (!config->bkp_details)
        {
-               int                     bkpnum;
-               char       *blk = (char *) XLogRecGetData(record) + record->xl_len;
-
-               for (bkpnum = 0; bkpnum < XLR_MAX_BKP_BLOCKS; bkpnum++)
+               /* print block references (short format) */
+               for (block_id = 0; block_id <= record->max_block_id; block_id++)
                {
-                       BkpBlock        bkpb;
-
-                       if (!(XLR_BKP_BLOCK(bkpnum) & record->xl_info))
+                       if (!XLogRecHasBlockRef(record, block_id))
                                continue;
 
-                       memcpy(&bkpb, blk, sizeof(BkpBlock));
-                       blk += sizeof(BkpBlock);
-                       blk += BLCKSZ - bkpb.hole_length;
+                       XLogRecGetBlockTag(record, block_id, &rnode, &forknum, &blk);
+                       if (forknum != MAIN_FORKNUM)
+                               printf(", blkref #%u: rel %u/%u/%u fork %s blk %u",
+                                          block_id,
+                                          rnode.spcNode, rnode.dbNode, rnode.relNode,
+                                          forkNames[forknum],
+                                          blk);
+                       else
+                               printf(", blkref #%u: rel %u/%u/%u blk %u",
+                                          block_id,
+                                          rnode.spcNode, rnode.dbNode, rnode.relNode,
+                                          blk);
+                       if (XLogRecHasBlockImage(record, block_id))
+                               printf(" FPW");
+               }
+               putchar('\n');
+       }
+       else
+       {
+               /* print block references (detailed format) */
+               putchar('\n');
+               for (block_id = 0; block_id <= record->max_block_id; block_id++)
+               {
+                       if (!XLogRecHasBlockRef(record, block_id))
+                               continue;
 
-                       printf("\tbackup bkp #%u; rel %u/%u/%u; fork: %s; block: %u; hole: offset: %u, length: %u\n",
-                                  bkpnum,
-                                  bkpb.node.spcNode, bkpb.node.dbNode, bkpb.node.relNode,
-                                  forkNames[bkpb.fork],
-                                  bkpb.block, bkpb.hole_offset, bkpb.hole_length);
+                       XLogRecGetBlockTag(record, block_id, &rnode, &forknum, &blk);
+                       printf("\tblkref #%u: rel %u/%u/%u fork %s blk %u",
+                                  block_id,
+                                  rnode.spcNode, rnode.dbNode, rnode.relNode,
+                                  forkNames[forknum],
+                                  blk);
+                       if (XLogRecHasBlockImage(record, block_id))
+                       {
+                               printf(" (FPW); hole: offset: %u, length: %u\n",
+                                          record->blocks[block_id].hole_offset,
+                                          record->blocks[block_id].hole_length);
+                       }
+                       putchar('\n');
                }
        }
 }
@@ -924,9 +952,9 @@ main(int argc, char **argv)
 
                /* process the record */
                if (config.stats == true)
-                       XLogDumpCountRecord(&config, &stats, xlogreader_state->ReadRecPtr, record);
+                       XLogDumpCountRecord(&config, &stats, xlogreader_state);
                else
-                       XLogDumpDisplayRecord(&config, xlogreader_state->ReadRecPtr, record);
+                       XLogDumpDisplayRecord(&config, xlogreader_state);
 
                /* check whether we printed enough */
                config.already_displayed_records++;
index da805c53ca14e10e2b1d5d2651346833edb7b23f..aec4418303725a0d26fb1664ab7f3344f67172d0 100644 (file)
@@ -13,7 +13,7 @@
 typedef struct RmgrDescData
 {
        const char *rm_name;
-       void            (*rm_desc) (StringInfo buf, XLogRecord *record);
+       void            (*rm_desc) (StringInfo buf, XLogReaderState *record);
        const char *(*rm_identify) (uint8 info);
 } RmgrDescData;
 
index bd35cf6696acac2f207e6fb77d3076865d387d80..cb645e3d4596a77038b587233ca7aab2071421de 100644 (file)
@@ -666,19 +666,16 @@ brinbuild(PG_FUNCTION_ARGS)
        {
                xl_brin_createidx xlrec;
                XLogRecPtr      recptr;
-               XLogRecData rdata;
                Page            page;
 
-               xlrec.node = index->rd_node;
                xlrec.version = BRIN_CURRENT_VERSION;
                xlrec.pagesPerRange = BrinGetPagesPerRange(index);
 
-               rdata.buffer = InvalidBuffer;
-               rdata.data = (char *) &xlrec;
-               rdata.len = SizeOfBrinCreateIdx;
-               rdata.next = NULL;
+               XLogBeginInsert();
+               XLogRegisterData((char *) &xlrec, SizeOfBrinCreateIdx);
+               XLogRegisterBuffer(0, meta, REGBUF_WILL_INIT);
 
-               recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_CREATE_INDEX, &rdata);
+               recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_CREATE_INDEX);
 
                page = BufferGetPage(meta);
                PageSetLSN(page, recptr);
index 50f1dec1631a376461fc7f4d4d62a2c5b56f450b..0b6fbeb603cd49117ee58832709dc0ff4c614a72 100644 (file)
@@ -140,27 +140,19 @@ brin_doupdate(Relation idxrel, BlockNumber pagesPerRange,
                /* XLOG stuff */
                if (RelationNeedsWAL(idxrel))
                {
-                       BlockNumber blk = BufferGetBlockNumber(oldbuf);
                        xl_brin_samepage_update xlrec;
                        XLogRecPtr      recptr;
-                       XLogRecData rdata[2];
                        uint8           info = XLOG_BRIN_SAMEPAGE_UPDATE;
 
-                       xlrec.node = idxrel->rd_node;
-                       ItemPointerSetBlockNumber(&xlrec.tid, blk);
-                       ItemPointerSetOffsetNumber(&xlrec.tid, oldoff);
-                       rdata[0].data = (char *) &xlrec;
-                       rdata[0].len = SizeOfBrinSamepageUpdate;
-                       rdata[0].buffer = InvalidBuffer;
-                       rdata[0].next = &(rdata[1]);
+                       xlrec.offnum = oldoff;
 
-                       rdata[1].data = (char *) newtup;
-                       rdata[1].len = newsz;
-                       rdata[1].buffer = oldbuf;
-                       rdata[1].buffer_std = true;
-                       rdata[1].next = NULL;
+                       XLogBeginInsert();
+                       XLogRegisterData((char *) &xlrec, SizeOfBrinSamepageUpdate);
 
-                       recptr = XLogInsert(RM_BRIN_ID, info, rdata);
+                       XLogRegisterBuffer(0, oldbuf, REGBUF_STANDARD);
+                       XLogRegisterBufData(0, (char *) newtup, newsz);
+
+                       recptr = XLogInsert(RM_BRIN_ID, info);
 
                        PageSetLSN(oldpage, recptr);
                }
@@ -211,43 +203,30 @@ brin_doupdate(Relation idxrel, BlockNumber pagesPerRange,
                {
                        xl_brin_update xlrec;
                        XLogRecPtr      recptr;
-                       XLogRecData rdata[4];
                        uint8           info;
 
                        info = XLOG_BRIN_UPDATE | (extended ? XLOG_BRIN_INIT_PAGE : 0);
 
-                       xlrec.insert.node = idxrel->rd_node;
-                       ItemPointerSet(&xlrec.insert.tid, BufferGetBlockNumber(newbuf), newoff);
+                       xlrec.insert.offnum = newoff;
                        xlrec.insert.heapBlk = heapBlk;
-                       xlrec.insert.tuplen = newsz;
-                       xlrec.insert.revmapBlk = BufferGetBlockNumber(revmapbuf);
                        xlrec.insert.pagesPerRange = pagesPerRange;
-                       ItemPointerSet(&xlrec.oldtid, BufferGetBlockNumber(oldbuf), oldoff);
+                       xlrec.oldOffnum = oldoff;
+
+                       XLogBeginInsert();
 
-                       rdata[0].data = (char *) &xlrec;
-                       rdata[0].len = SizeOfBrinUpdate;
-                       rdata[0].buffer = InvalidBuffer;
-                       rdata[0].next = &(rdata[1]);
+                       /* new page */
+                       XLogRegisterData((char *) &xlrec, SizeOfBrinUpdate);
 
-                       rdata[1].data = (char *) newtup;
-                       rdata[1].len = newsz;
-                       rdata[1].buffer = extended ? InvalidBuffer : newbuf;
-                       rdata[1].buffer_std = true;
-                       rdata[1].next = &(rdata[2]);
+                       XLogRegisterBuffer(0, newbuf, REGBUF_STANDARD | (extended ? REGBUF_WILL_INIT : 0));
+                       XLogRegisterBufData(0, (char *) newtup, newsz);
 
-                       rdata[2].data = (char *) NULL;
-                       rdata[2].len = 0;
-                       rdata[2].buffer = revmapbuf;
-                       rdata[2].buffer_std = true;
-                       rdata[2].next = &(rdata[3]);
+                       /* revmap page */
+                       XLogRegisterBuffer(1, revmapbuf, REGBUF_STANDARD);
 
-                       rdata[3].data = (char *) NULL;
-                       rdata[3].len = 0;
-                       rdata[3].buffer = oldbuf;
-                       rdata[3].buffer_std = true;
-                       rdata[3].next = NULL;
+                       /* old page */
+                       XLogRegisterBuffer(2, oldbuf, REGBUF_STANDARD);
 
-                       recptr = XLogInsert(RM_BRIN_ID, info, rdata);
+                       recptr = XLogInsert(RM_BRIN_ID, info);
 
                        PageSetLSN(oldpage, recptr);
                        PageSetLSN(newpage, recptr);
@@ -354,36 +333,22 @@ brin_doinsert(Relation idxrel, BlockNumber pagesPerRange,
        {
                xl_brin_insert xlrec;
                XLogRecPtr      recptr;
-               XLogRecData rdata[3];
                uint8           info;
 
                info = XLOG_BRIN_INSERT | (extended ? XLOG_BRIN_INIT_PAGE : 0);
-               xlrec.node = idxrel->rd_node;
                xlrec.heapBlk = heapBlk;
                xlrec.pagesPerRange = pagesPerRange;
-               xlrec.revmapBlk = BufferGetBlockNumber(revmapbuf);
-               xlrec.tuplen = itemsz;
-               ItemPointerSet(&xlrec.tid, blk, off);
-
-               rdata[0].data = (char *) &xlrec;
-               rdata[0].len = SizeOfBrinInsert;
-               rdata[0].buffer = InvalidBuffer;
-               rdata[0].buffer_std = false;
-               rdata[0].next = &(rdata[1]);
-
-               rdata[1].data = (char *) tup;
-               rdata[1].len = itemsz;
-               rdata[1].buffer = extended ? InvalidBuffer : *buffer;
-               rdata[1].buffer_std = true;
-               rdata[1].next = &(rdata[2]);
-
-               rdata[2].data = (char *) NULL;
-               rdata[2].len = 0;
-               rdata[2].buffer = revmapbuf;
-               rdata[2].buffer_std = false;
-               rdata[2].next = NULL;
-
-               recptr = XLogInsert(RM_BRIN_ID, info, rdata);
+               xlrec.offnum = off;
+
+               XLogBeginInsert();
+               XLogRegisterData((char *) &xlrec, SizeOfBrinInsert);
+
+               XLogRegisterBuffer(0, *buffer, REGBUF_STANDARD | (extended ? REGBUF_WILL_INIT : 0));
+               XLogRegisterBufData(0, (char *) tup, itemsz);
+
+               XLogRegisterBuffer(1, revmapbuf, 0);
+
+               recptr = XLogInsert(RM_BRIN_ID, info);
 
                PageSetLSN(page, recptr);
                PageSetLSN(BufferGetPage(revmapbuf), recptr);
index 272c74e6b6e10cc885a2d684c58503411e32bd44..adc7d0b8473bc2b8f129ef152c4e94229ba6508a 100644 (file)
@@ -477,23 +477,16 @@ revmap_physical_extend(BrinRevmap *revmap)
        {
                xl_brin_revmap_extend xlrec;
                XLogRecPtr      recptr;
-               XLogRecData rdata[2];
 
-               xlrec.node = revmap->rm_irel->rd_node;
                xlrec.targetBlk = mapBlk;
-               rdata[0].data = (char *) &xlrec;
-               rdata[0].len = SizeOfBrinRevmapExtend;
-               rdata[0].buffer = InvalidBuffer;
-               rdata[0].buffer_std = false;
-               rdata[0].next = &(rdata[1]);
-
-               rdata[1].data = (char *) NULL;
-               rdata[1].len = 0;
-               rdata[1].buffer = revmap->rm_metaBuf;
-               rdata[1].buffer_std = false;
-               rdata[1].next = NULL;
-
-               recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_REVMAP_EXTEND, rdata);
+
+               XLogBeginInsert();
+               XLogRegisterData((char *) &xlrec, SizeOfBrinRevmapExtend);
+               XLogRegisterBuffer(0, revmap->rm_metaBuf, 0);
+
+               XLogRegisterBuffer(1, buf, REGBUF_WILL_INIT);
+
+               recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_REVMAP_EXTEND);
                PageSetLSN(metapage, recptr);
                PageSetLSN(page, recptr);
        }
index 29370689a704b2ea5ec1cf82bd93d0a92ee6f33b..e6a1750975327c4dd6f313841aa1e4f35822b62c 100644 (file)
  * xlog replay routines
  */
 static void
-brin_xlog_createidx(XLogRecPtr lsn, XLogRecord *record)
+brin_xlog_createidx(XLogReaderState *record)
 {
+       XLogRecPtr      lsn = record->EndRecPtr;
        xl_brin_createidx *xlrec = (xl_brin_createidx *) XLogRecGetData(record);
        Buffer          buf;
        Page            page;
 
-       /* Backup blocks are not used in create_index records */
-       Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
-
        /* create the index' metapage */
-       buf = XLogReadBuffer(xlrec->node, BRIN_METAPAGE_BLKNO, true);
+       buf = XLogInitBufferForRedo(record, 0);
        Assert(BufferIsValid(buf));
        page = (Page) BufferGetPage(buf);
        brin_metapage_init(page, xlrec->pagesPerRange, xlrec->version);
@@ -44,51 +42,47 @@ brin_xlog_createidx(XLogRecPtr lsn, XLogRecord *record)
  * revmap.
  */
 static void
-brin_xlog_insert_update(XLogRecPtr lsn, XLogRecord *record,
-                                               xl_brin_insert *xlrec, BrinTuple *tuple)
+brin_xlog_insert_update(XLogReaderState *record,
+                                               xl_brin_insert *xlrec)
 {
-       BlockNumber blkno;
+       XLogRecPtr      lsn = record->EndRecPtr;
        Buffer          buffer;
        Page            page;
        XLogRedoAction action;
 
-       blkno = ItemPointerGetBlockNumber(&xlrec->tid);
-
        /*
         * If we inserted the first and only tuple on the page, re-initialize the
         * page from scratch.
         */
-       if (record->xl_info & XLOG_BRIN_INIT_PAGE)
+       if (XLogRecGetInfo(record) & XLOG_BRIN_INIT_PAGE)
        {
-               /*
-                * No full-page image here.  Don't try to read it, because there
-                * might be one for the revmap buffer, below.
-                */
-               buffer = XLogReadBuffer(xlrec->node, blkno, true);
+               buffer = XLogInitBufferForRedo(record, 0);
                page = BufferGetPage(buffer);
                brin_page_init(page, BRIN_PAGETYPE_REGULAR);
                action = BLK_NEEDS_REDO;
        }
        else
        {
-               action = XLogReadBufferForRedo(lsn, record, 0,
-                                                                          xlrec->node, blkno, &buffer);
+               action = XLogReadBufferForRedo(record, 0, &buffer);
        }
 
        /* insert the index item into the page */
        if (action == BLK_NEEDS_REDO)
        {
                OffsetNumber offnum;
+               BrinTuple  *tuple;
+               Size            tuplen;
+
+               tuple = (BrinTuple *) XLogRecGetBlockData(record, 0, &tuplen);
 
                Assert(tuple->bt_blkno == xlrec->heapBlk);
 
                page = (Page) BufferGetPage(buffer);
-               offnum = ItemPointerGetOffsetNumber(&(xlrec->tid));
+               offnum = xlrec->offnum;
                if (PageGetMaxOffsetNumber(page) + 1 < offnum)
                        elog(PANIC, "brin_xlog_insert_update: invalid max offset number");
 
-               offnum = PageAddItem(page, (Item) tuple, xlrec->tuplen, offnum, true,
-                                                        false);
+               offnum = PageAddItem(page, (Item) tuple, tuplen, offnum, true, false);
                if (offnum == InvalidOffsetNumber)
                        elog(PANIC, "brin_xlog_insert_update: failed to add tuple");
 
@@ -99,16 +93,17 @@ brin_xlog_insert_update(XLogRecPtr lsn, XLogRecord *record,
                UnlockReleaseBuffer(buffer);
 
        /* update the revmap */
-       action = XLogReadBufferForRedo(lsn, record,
-                                                                  record->xl_info & XLOG_BRIN_INIT_PAGE ? 0 : 1,
-                                                                  xlrec->node,
-                                                                  xlrec->revmapBlk, &buffer);
+       action = XLogReadBufferForRedo(record, 1, &buffer);
        if (action == BLK_NEEDS_REDO)
        {
+               ItemPointerData tid;
+               BlockNumber blkno = BufferGetBlockNumber(buffer);
+
+               ItemPointerSet(&tid, blkno, xlrec->offnum);
                page = (Page) BufferGetPage(buffer);
 
                brinSetHeapBlockItemptr(buffer, xlrec->pagesPerRange, xlrec->heapBlk,
-                                                               xlrec->tid);
+                                                               tid);
                PageSetLSN(page, lsn);
                MarkBufferDirty(buffer);
        }
@@ -122,34 +117,26 @@ brin_xlog_insert_update(XLogRecPtr lsn, XLogRecord *record,
  * replay a BRIN index insertion
  */
 static void
-brin_xlog_insert(XLogRecPtr lsn, XLogRecord *record)
+brin_xlog_insert(XLogReaderState *record)
 {
        xl_brin_insert *xlrec = (xl_brin_insert *) XLogRecGetData(record);
-       BrinTuple  *newtup;
 
-       newtup = (BrinTuple *) ((char *) xlrec + SizeOfBrinInsert);
-
-       brin_xlog_insert_update(lsn, record, xlrec, newtup);
+       brin_xlog_insert_update(record, xlrec);
 }
 
 /*
  * replay a BRIN index update
  */
 static void
-brin_xlog_update(XLogRecPtr lsn, XLogRecord *record)
+brin_xlog_update(XLogReaderState *record)
 {
+       XLogRecPtr      lsn = record->EndRecPtr;
        xl_brin_update *xlrec = (xl_brin_update *) XLogRecGetData(record);
-       BlockNumber blkno;
        Buffer          buffer;
-       BrinTuple  *newtup;
        XLogRedoAction action;
 
-       newtup = (BrinTuple *) ((char *) xlrec + SizeOfBrinUpdate);
-
        /* First remove the old tuple */
-       blkno = ItemPointerGetBlockNumber(&(xlrec->oldtid));
-       action = XLogReadBufferForRedo(lsn, record, 2, xlrec->insert.node,
-                                                                  blkno, &buffer);
+       action = XLogReadBufferForRedo(record, 2, &buffer);
        if (action == BLK_NEEDS_REDO)
        {
                Page            page;
@@ -157,7 +144,7 @@ brin_xlog_update(XLogRecPtr lsn, XLogRecord *record)
 
                page = (Page) BufferGetPage(buffer);
 
-               offnum = ItemPointerGetOffsetNumber(&(xlrec->oldtid));
+               offnum = xlrec->oldOffnum;
                if (PageGetMaxOffsetNumber(page) + 1 < offnum)
                        elog(PANIC, "brin_xlog_update: invalid max offset number");
 
@@ -168,7 +155,7 @@ brin_xlog_update(XLogRecPtr lsn, XLogRecord *record)
        }
 
        /* Then insert the new tuple and update revmap, like in an insertion. */
-       brin_xlog_insert_update(lsn, record, &xlrec->insert, newtup);
+       brin_xlog_insert_update(record, &xlrec->insert);
 
        if (BufferIsValid(buffer))
                UnlockReleaseBuffer(buffer);
@@ -178,30 +165,27 @@ brin_xlog_update(XLogRecPtr lsn, XLogRecord *record)
  * Update a tuple on a single page.
  */
 static void
-brin_xlog_samepage_update(XLogRecPtr lsn, XLogRecord *record)
+brin_xlog_samepage_update(XLogReaderState *record)
 {
+       XLogRecPtr      lsn = record->EndRecPtr;
        xl_brin_samepage_update *xlrec;
-       BlockNumber blkno;
        Buffer          buffer;
        XLogRedoAction action;
 
        xlrec = (xl_brin_samepage_update *) XLogRecGetData(record);
-       blkno = ItemPointerGetBlockNumber(&(xlrec->tid));
-       action = XLogReadBufferForRedo(lsn, record, 0, xlrec->node, blkno,
-                                                                  &buffer);
+       action = XLogReadBufferForRedo(record, 0, &buffer);
        if (action == BLK_NEEDS_REDO)
        {
-               int                     tuplen;
+               Size            tuplen;
                BrinTuple  *mmtuple;
                Page            page;
                OffsetNumber offnum;
 
-               tuplen = record->xl_len - SizeOfBrinSamepageUpdate;
-               mmtuple = (BrinTuple *) ((char *) xlrec + SizeOfBrinSamepageUpdate);
+               mmtuple = (BrinTuple *) XLogRecGetBlockData(record, 0, &tuplen);
 
                page = (Page) BufferGetPage(buffer);
 
-               offnum = ItemPointerGetOffsetNumber(&(xlrec->tid));
+               offnum = xlrec->offnum;
                if (PageGetMaxOffsetNumber(page) + 1 < offnum)
                        elog(PANIC, "brin_xlog_samepage_update: invalid max offset number");
 
@@ -223,18 +207,23 @@ brin_xlog_samepage_update(XLogRecPtr lsn, XLogRecord *record)
  * Replay a revmap page extension
  */
 static void
-brin_xlog_revmap_extend(XLogRecPtr lsn, XLogRecord *record)
+brin_xlog_revmap_extend(XLogReaderState *record)
 {
+       XLogRecPtr      lsn = record->EndRecPtr;
        xl_brin_revmap_extend *xlrec;
        Buffer          metabuf;
        Buffer          buf;
        Page            page;
+       BlockNumber targetBlk;
        XLogRedoAction action;
 
        xlrec = (xl_brin_revmap_extend *) XLogRecGetData(record);
+
+       XLogRecGetBlockTag(record, 1, NULL, NULL, &targetBlk);
+       Assert(xlrec->targetBlk == targetBlk);
+
        /* Update the metapage */
-       action = XLogReadBufferForRedo(lsn, record, 0, xlrec->node,
-                                                                  BRIN_METAPAGE_BLKNO, &metabuf);
+       action = XLogReadBufferForRedo(record, 0, &metabuf);
        if (action == BLK_NEEDS_REDO)
        {
                Page            metapg;
@@ -255,7 +244,7 @@ brin_xlog_revmap_extend(XLogRecPtr lsn, XLogRecord *record)
         * image here.
         */
 
-       buf = XLogReadBuffer(xlrec->node, xlrec->targetBlk, true);
+       buf = XLogInitBufferForRedo(record, 1);
        page = (Page) BufferGetPage(buf);
        brin_page_init(page, BRIN_PAGETYPE_REVMAP);
 
@@ -268,26 +257,26 @@ brin_xlog_revmap_extend(XLogRecPtr lsn, XLogRecord *record)
 }
 
 void
-brin_redo(XLogRecPtr lsn, XLogRecord *record)
+brin_redo(XLogReaderState *record)
 {
-       uint8           info = record->xl_info & ~XLR_INFO_MASK;
+       uint8           info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
 
        switch (info & XLOG_BRIN_OPMASK)
        {
                case XLOG_BRIN_CREATE_INDEX:
-                       brin_xlog_createidx(lsn, record);
+                       brin_xlog_createidx(record);
                        break;
                case XLOG_BRIN_INSERT:
-                       brin_xlog_insert(lsn, record);
+                       brin_xlog_insert(record);
                        break;
                case XLOG_BRIN_UPDATE:
-                       brin_xlog_update(lsn, record);
+                       brin_xlog_update(record);
                        break;
                case XLOG_BRIN_SAMEPAGE_UPDATE:
-                       brin_xlog_samepage_update(lsn, record);
+                       brin_xlog_samepage_update(record);
                        break;
                case XLOG_BRIN_REVMAP_EXTEND:
-                       brin_xlog_revmap_extend(lsn, record);
+                       brin_xlog_revmap_extend(record);
                        break;
                default:
                        elog(PANIC, "brin_redo: unknown op code %u", info);
index 5365477000adc378d865b31f1095957369d0c09f..99f40a871f0f4656eaf809f69ca076d975121d0b 100644 (file)
@@ -326,7 +326,6 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
                           Buffer childbuf, GinStatsData *buildStats)
 {
        Page            page = BufferGetPage(stack->buffer);
-       XLogRecData *payloadrdata;
        GinPlaceToPageRC rc;
        uint16          xlflags = 0;
        Page            childpage = NULL;
@@ -351,12 +350,36 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
        /*
         * Try to put the incoming tuple on the page. placeToPage will decide if
         * the page needs to be split.
+        *
+        * WAL-logging this operation is a bit funny:
+        *
+        * We're responsible for calling XLogBeginInsert() and XLogInsert().
+        * XLogBeginInsert() must be called before placeToPage, because
+        * placeToPage can register some data to the WAL record.
+        *
+        * If placeToPage returns INSERTED, placeToPage has already called
+        * START_CRIT_SECTION(), and we're responsible for calling
+        * END_CRIT_SECTION. When it returns INSERTED, it is also responsible for
+        * registering any data required to replay the operation with
+        * XLogRegisterData(0, ...). It may only add data to block index 0; the
+        * main data of the WAL record is reserved for this function.
+        *
+        * If placeToPage returns SPLIT, we're wholly responsible for WAL logging.
+        * Splits happen infrequently, so we just make a full-page image of all
+        * the pages involved.
         */
+
+       if (RelationNeedsWAL(btree->index))
+               XLogBeginInsert();
+
        rc = btree->placeToPage(btree, stack->buffer, stack,
                                                        insertdata, updateblkno,
-                                                       &payloadrdata, &newlpage, &newrpage);
+                                                       &newlpage, &newrpage);
        if (rc == UNMODIFIED)
+       {
+               XLogResetInsertion();
                return true;
+       }
        else if (rc == INSERTED)
        {
                /* placeToPage did START_CRIT_SECTION() */
@@ -372,17 +395,18 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
                if (RelationNeedsWAL(btree->index))
                {
                        XLogRecPtr      recptr;
-                       XLogRecData rdata[3];
                        ginxlogInsert xlrec;
                        BlockIdData childblknos[2];
 
-                       xlrec.node = btree->index->rd_node;
-                       xlrec.blkno = BufferGetBlockNumber(stack->buffer);
+                       /*
+                        * placetopage already registered stack->buffer as block 0.
+                        */
                        xlrec.flags = xlflags;
 
-                       rdata[0].buffer = InvalidBuffer;
-                       rdata[0].data = (char *) &xlrec;
-                       rdata[0].len = sizeof(ginxlogInsert);
+                       if (childbuf != InvalidBuffer)
+                               XLogRegisterBuffer(1, childbuf, REGBUF_STANDARD);
+
+                       XLogRegisterData((char *) &xlrec, sizeof(ginxlogInsert));
 
                        /*
                         * Log information about child if this was an insertion of a
@@ -390,26 +414,13 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
                         */
                        if (childbuf != InvalidBuffer)
                        {
-                               rdata[0].next = &rdata[1];
-
                                BlockIdSet(&childblknos[0], BufferGetBlockNumber(childbuf));
                                BlockIdSet(&childblknos[1], GinPageGetOpaque(childpage)->rightlink);
-
-                               rdata[1].buffer = InvalidBuffer;
-                               rdata[1].data = (char *) childblknos;
-                               rdata[1].len = sizeof(BlockIdData) * 2;
-                               rdata[1].next = &rdata[2];
-
-                               rdata[2].buffer = childbuf;
-                               rdata[2].buffer_std = false;
-                               rdata[2].data = NULL;
-                               rdata[2].len = 0;
-                               rdata[2].next = payloadrdata;
+                               XLogRegisterData((char *) childblknos,
+                                                                sizeof(BlockIdData) * 2);
                        }
-                       else
-                               rdata[0].next = payloadrdata;
 
-                       recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT, rdata);
+                       recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT);
                        PageSetLSN(page, recptr);
                        if (childbuf != InvalidBuffer)
                                PageSetLSN(childpage, recptr);
@@ -421,10 +432,9 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
        }
        else if (rc == SPLIT)
        {
-               /* Didn't fit, have to split */
+               /* Didn't fit, had to split */
                Buffer          rbuffer;
                BlockNumber savedRightLink;
-               XLogRecData rdata[2];
                ginxlogSplit data;
                Buffer          lbuffer = InvalidBuffer;
                Page            newrootpg = NULL;
@@ -448,7 +458,6 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
                 */
 
                data.node = btree->index->rd_node;
-               data.rblkno = BufferGetBlockNumber(rbuffer);
                data.flags = xlflags;
                if (childbuf != InvalidBuffer)
                {
@@ -462,23 +471,6 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
                else
                        data.leftChildBlkno = data.rightChildBlkno = InvalidBlockNumber;
 
-               rdata[0].buffer = InvalidBuffer;
-               rdata[0].data = (char *) &data;
-               rdata[0].len = sizeof(ginxlogSplit);
-
-               if (childbuf != InvalidBuffer)
-               {
-                       rdata[0].next = &rdata[1];
-
-                       rdata[1].buffer = childbuf;
-                       rdata[1].buffer_std = false;
-                       rdata[1].data = NULL;
-                       rdata[1].len = 0;
-                       rdata[1].next = payloadrdata;
-               }
-               else
-                       rdata[0].next = payloadrdata;
-
                if (stack->parent == NULL)
                {
                        /*
@@ -496,12 +488,7 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
                                        buildStats->nEntryPages++;
                        }
 
-                       /*
-                        * root never has a right-link, so we borrow the rrlink field to
-                        * store the root block number.
-                        */
-                       data.rrlink = BufferGetBlockNumber(stack->buffer);
-                       data.lblkno = BufferGetBlockNumber(lbuffer);
+                       data.rrlink = InvalidBlockNumber;
                        data.flags |= GIN_SPLIT_ROOT;
 
                        GinPageGetOpaque(newrpage)->rightlink = InvalidBlockNumber;
@@ -524,7 +511,6 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
                {
                        /* split non-root page */
                        data.rrlink = savedRightLink;
-                       data.lblkno = BufferGetBlockNumber(stack->buffer);
 
                        GinPageGetOpaque(newrpage)->rightlink = savedRightLink;
                        GinPageGetOpaque(newlpage)->flags |= GIN_INCOMPLETE_SPLIT;
@@ -572,7 +558,28 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
                {
                        XLogRecPtr      recptr;
 
-                       recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_SPLIT, rdata);
+                       /*
+                        * We just take full page images of all the split pages. Splits
+                        * are uncommon enough that it's not worth complicating the code
+                        * to be more efficient.
+                        */
+                       if (stack->parent == NULL)
+                       {
+                               XLogRegisterBuffer(0, lbuffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
+                               XLogRegisterBuffer(1, rbuffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
+                               XLogRegisterBuffer(2, stack->buffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
+                       }
+                       else
+                       {
+                               XLogRegisterBuffer(0, stack->buffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
+                               XLogRegisterBuffer(1, rbuffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
+                       }
+                       if (BufferIsValid(childbuf))
+                               XLogRegisterBuffer(3, childbuf, 0);
+
+                       XLogRegisterData((char *) &data, sizeof(ginxlogSplit));
+
+                       recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_SPLIT);
                        PageSetLSN(BufferGetPage(stack->buffer), recptr);
                        PageSetLSN(BufferGetPage(rbuffer), recptr);
                        if (stack->parent == NULL)
index 97cd706c08e58b91c9481c52f30b8af4a1045b6f..012225eaa35b90032cab677c42d4da7bd18e665b 100644 (file)
@@ -98,20 +98,19 @@ static ItemPointer dataLeafPageGetUncompressed(Page page, int *nitems);
 static void dataSplitPageInternal(GinBtree btree, Buffer origbuf,
                                          GinBtreeStack *stack,
                                          void *insertdata, BlockNumber updateblkno,
-                                         XLogRecData **prdata, Page *newlpage, Page *newrpage);
+                                         Page *newlpage, Page *newrpage);
 
 static disassembledLeaf *disassembleLeaf(Page page);
 static bool leafRepackItems(disassembledLeaf *leaf, ItemPointer remaining);
 static bool addItemsToLeaf(disassembledLeaf *leaf, ItemPointer newItems,
                           int nNewItems);
 
-static XLogRecData *constructLeafRecompressWALData(Buffer buf,
-                                                          disassembledLeaf *leaf);
+static void registerLeafRecompressWALData(Buffer buf, disassembledLeaf *leaf);
 static void dataPlaceToPageLeafRecompress(Buffer buf, disassembledLeaf *leaf);
 static void dataPlaceToPageLeafSplit(Buffer buf,
                                                 disassembledLeaf *leaf,
                                                 ItemPointerData lbound, ItemPointerData rbound,
-                                                XLogRecData **prdata, Page lpage, Page rpage);
+                                                Page lpage, Page rpage);
 
 /*
  * Read TIDs from leaf data page to single uncompressed array. The TIDs are
@@ -428,8 +427,7 @@ GinPageDeletePostingItem(Page page, OffsetNumber offset)
  */
 static GinPlaceToPageRC
 dataPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
-                                       void *insertdata, XLogRecData **prdata,
-                                       Page *newlpage, Page *newrpage)
+                                       void *insertdata, Page *newlpage, Page *newrpage)
 {
        GinBtreeDataLeafInsertData *items = insertdata;
        ItemPointer newItems = &items->items[items->curitem];
@@ -602,9 +600,7 @@ dataPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
                 */
                MemoryContextSwitchTo(oldCxt);
                if (RelationNeedsWAL(btree->index))
-                       *prdata = constructLeafRecompressWALData(buf, leaf);
-               else
-                       *prdata = NULL;
+                       registerLeafRecompressWALData(buf, leaf);
                START_CRIT_SECTION();
                dataPlaceToPageLeafRecompress(buf, leaf);
 
@@ -685,7 +681,7 @@ dataPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
                *newrpage = MemoryContextAlloc(oldCxt, BLCKSZ);
 
                dataPlaceToPageLeafSplit(buf, leaf, lbound, rbound,
-                                                                prdata, *newlpage, *newrpage);
+                                                                *newlpage, *newrpage);
 
                Assert(GinPageRightMost(page) ||
                           ginCompareItemPointers(GinDataPageGetRightBound(*newlpage),
@@ -791,7 +787,6 @@ ginVacuumPostingTreeLeaf(Relation indexrel, Buffer buffer, GinVacuumState *gvs)
         */
        if (removedsomething)
        {
-               XLogRecData *payloadrdata = NULL;
                bool            modified;
 
                /*
@@ -818,7 +813,10 @@ ginVacuumPostingTreeLeaf(Relation indexrel, Buffer buffer, GinVacuumState *gvs)
                }
 
                if (RelationNeedsWAL(indexrel))
-                       payloadrdata = constructLeafRecompressWALData(buffer, leaf);
+               {
+                       XLogBeginInsert();
+                       registerLeafRecompressWALData(buffer, leaf);
+               }
                START_CRIT_SECTION();
                dataPlaceToPageLeafRecompress(buffer, leaf);
 
@@ -827,18 +825,8 @@ ginVacuumPostingTreeLeaf(Relation indexrel, Buffer buffer, GinVacuumState *gvs)
                if (RelationNeedsWAL(indexrel))
                {
                        XLogRecPtr      recptr;
-                       XLogRecData rdata;
-                       ginxlogVacuumDataLeafPage xlrec;
 
-                       xlrec.node = indexrel->rd_node;
-                       xlrec.blkno = BufferGetBlockNumber(buffer);
-
-                       rdata.buffer = InvalidBuffer;
-                       rdata.data = (char *) &xlrec;
-                       rdata.len = offsetof(ginxlogVacuumDataLeafPage, data);
-                       rdata.next = payloadrdata;
-
-                       recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_DATA_LEAF_PAGE, &rdata);
+                       recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_DATA_LEAF_PAGE);
                        PageSetLSN(page, recptr);
                }
 
@@ -850,13 +838,12 @@ ginVacuumPostingTreeLeaf(Relation indexrel, Buffer buffer, GinVacuumState *gvs)
  * Construct a ginxlogRecompressDataLeaf record representing the changes
  * in *leaf.
  */
-static XLogRecData *
-constructLeafRecompressWALData(Buffer buf, disassembledLeaf *leaf)
+static void
+registerLeafRecompressWALData(Buffer buf, disassembledLeaf *leaf)
 {
        int                     nmodified = 0;
        char       *walbufbegin;
        char       *walbufend;
-       XLogRecData *rdata;
        dlist_iter      iter;
        int                     segno;
        ginxlogRecompressDataLeaf *recompress_xlog;
@@ -871,12 +858,11 @@ constructLeafRecompressWALData(Buffer buf, disassembledLeaf *leaf)
                        nmodified++;
        }
 
-       walbufbegin = palloc(
-                                                sizeof(ginxlogRecompressDataLeaf) +
-                                                BLCKSZ +               /* max size needed to hold the segment
-                                                                                * data */
-                                                nmodified * 2 +                /* (segno + action) per action */
-                                                sizeof(XLogRecData));
+       walbufbegin =
+               palloc(sizeof(ginxlogRecompressDataLeaf) +
+                          BLCKSZ +                     /* max size needed to hold the segment data */
+                          nmodified * 2        /* (segno + action) per action */
+               );
        walbufend = walbufbegin;
 
        recompress_xlog = (ginxlogRecompressDataLeaf *) walbufend;
@@ -944,14 +930,10 @@ constructLeafRecompressWALData(Buffer buf, disassembledLeaf *leaf)
                        segno++;
        }
 
-       rdata = (XLogRecData *) MAXALIGN(walbufend);
-       rdata->buffer = buf;
-       rdata->buffer_std = TRUE;
-       rdata->data = walbufbegin;
-       rdata->len = walbufend - walbufbegin;
-       rdata->next = NULL;
 
-       return rdata;
+       XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
+       XLogRegisterBufData(0, walbufbegin, walbufend - walbufbegin);
+
 }
 
 /*
@@ -1024,7 +1006,7 @@ dataPlaceToPageLeafRecompress(Buffer buf, disassembledLeaf *leaf)
 static void
 dataPlaceToPageLeafSplit(Buffer buf, disassembledLeaf *leaf,
                                                 ItemPointerData lbound, ItemPointerData rbound,
-                                                XLogRecData **prdata, Page lpage, Page rpage)
+                                                Page lpage, Page rpage)
 {
        char       *ptr;
        int                     segsize;
@@ -1034,10 +1016,6 @@ dataPlaceToPageLeafSplit(Buffer buf, disassembledLeaf *leaf,
        dlist_node *firstright;
        leafSegmentInfo *seginfo;
 
-       /* these must be static so they can be returned to caller */
-       static ginxlogSplitDataLeaf split_xlog;
-       static XLogRecData rdata[3];
-
        /* Initialize temporary pages to hold the new left and right pages */
        GinInitPage(lpage, GIN_DATA | GIN_LEAF | GIN_COMPRESSED, BLCKSZ);
        GinInitPage(rpage, GIN_DATA | GIN_LEAF | GIN_COMPRESSED, BLCKSZ);
@@ -1092,29 +1070,6 @@ dataPlaceToPageLeafSplit(Buffer buf, disassembledLeaf *leaf,
        Assert(rsize == leaf->rsize);
        GinDataPageSetDataSize(rpage, rsize);
        *GinDataPageGetRightBound(rpage) = rbound;
-
-       /* Create WAL record */
-       split_xlog.lsize = lsize;
-       split_xlog.rsize = rsize;
-       split_xlog.lrightbound = lbound;
-       split_xlog.rrightbound = rbound;
-
-       rdata[0].buffer = InvalidBuffer;
-       rdata[0].data = (char *) &split_xlog;
-       rdata[0].len = sizeof(ginxlogSplitDataLeaf);
-       rdata[0].next = &rdata[1];
-
-       rdata[1].buffer = InvalidBuffer;
-       rdata[1].data = (char *) GinDataLeafPageGetPostingList(lpage);
-       rdata[1].len = lsize;
-       rdata[1].next = &rdata[2];
-
-       rdata[2].buffer = InvalidBuffer;
-       rdata[2].data = (char *) GinDataLeafPageGetPostingList(rpage);
-       rdata[2].len = rsize;
-       rdata[2].next = NULL;
-
-       *prdata = rdata;
 }
 
 /*
@@ -1124,29 +1079,30 @@ dataPlaceToPageLeafSplit(Buffer buf, disassembledLeaf *leaf,
  *
  * In addition to inserting the given item, the downlink of the existing item
  * at 'off' is updated to point to 'updateblkno'.
+ *
+ * On INSERTED, registers the buffer as buffer ID 0, with data.
+ * On SPLIT, returns rdata that represents the split pages in *prdata.
  */
 static GinPlaceToPageRC
 dataPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
                                                void *insertdata, BlockNumber updateblkno,
-                                               XLogRecData **prdata, Page *newlpage, Page *newrpage)
+                                               Page *newlpage, Page *newrpage)
 {
        Page            page = BufferGetPage(buf);
        OffsetNumber off = stack->off;
        PostingItem *pitem;
 
-       /* these must be static so they can be returned to caller */
-       static XLogRecData rdata;
+       /* this must be static so it can be returned to caller */
        static ginxlogInsertDataInternal data;
 
        /* split if we have to */
        if (GinNonLeafDataPageGetFreeSpace(page) < sizeof(PostingItem))
        {
                dataSplitPageInternal(btree, buf, stack, insertdata, updateblkno,
-                                                         prdata, newlpage, newrpage);
+                                                         newlpage, newrpage);
                return SPLIT;
        }
 
-       *prdata = &rdata;
        Assert(GinPageIsData(page));
 
        START_CRIT_SECTION();
@@ -1159,14 +1115,15 @@ dataPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
        pitem = (PostingItem *) insertdata;
        GinDataPageAddPostingItem(page, pitem, off);
 
-       data.offset = off;
-       data.newitem = *pitem;
+       if (RelationNeedsWAL(btree->index))
+       {
+               data.offset = off;
+               data.newitem = *pitem;
 
-       rdata.buffer = buf;
-       rdata.buffer_std = TRUE;
-       rdata.data = (char *) &data;
-       rdata.len = sizeof(ginxlogInsertDataInternal);
-       rdata.next = NULL;
+               XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
+               XLogRegisterBufData(0, (char *) &data,
+                                                       sizeof(ginxlogInsertDataInternal));
+       }
 
        return INSERTED;
 }
@@ -1178,7 +1135,6 @@ dataPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
 static GinPlaceToPageRC
 dataPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
                                void *insertdata, BlockNumber updateblkno,
-                               XLogRecData **prdata,
                                Page *newlpage, Page *newrpage)
 {
        Page            page = BufferGetPage(buf);
@@ -1187,11 +1143,11 @@ dataPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
 
        if (GinPageIsLeaf(page))
                return dataPlaceToPageLeaf(btree, buf, stack, insertdata,
-                                                                  prdata, newlpage, newrpage);
+                                                                  newlpage, newrpage);
        else
                return dataPlaceToPageInternal(btree, buf, stack,
                                                                           insertdata, updateblkno,
-                                                                          prdata, newlpage, newrpage);
+                                                                          newlpage, newrpage);
 }
 
 /*
@@ -1202,7 +1158,7 @@ static void
 dataSplitPageInternal(GinBtree btree, Buffer origbuf,
                                          GinBtreeStack *stack,
                                          void *insertdata, BlockNumber updateblkno,
-                                         XLogRecData **prdata, Page *newlpage, Page *newrpage)
+                                         Page *newlpage, Page *newrpage)
 {
        Page            oldpage = BufferGetPage(origbuf);
        OffsetNumber off = stack->off;
@@ -1215,19 +1171,13 @@ dataSplitPageInternal(GinBtree btree, Buffer origbuf,
        Page            lpage;
        Page            rpage;
        OffsetNumber separator;
-
-       /* these must be static so they can be returned to caller */
-       static ginxlogSplitDataInternal data;
-       static XLogRecData rdata[4];
-       static PostingItem allitems[(BLCKSZ / sizeof(PostingItem)) + 1];
+       PostingItem allitems[(BLCKSZ / sizeof(PostingItem)) + 1];
 
        lpage = PageGetTempPage(oldpage);
        rpage = PageGetTempPage(oldpage);
        GinInitPage(lpage, GinPageGetOpaque(oldpage)->flags, pageSize);
        GinInitPage(rpage, GinPageGetOpaque(oldpage)->flags, pageSize);
 
-       *prdata = rdata;
-
        /*
         * First construct a new list of PostingItems, which includes all the old
         * items, and the new item.
@@ -1277,20 +1227,6 @@ dataSplitPageInternal(GinBtree btree, Buffer origbuf,
        /* set up right bound for right page */
        *GinDataPageGetRightBound(rpage) = oldbound;
 
-       data.separator = separator;
-       data.nitem = nitems;
-       data.rightbound = oldbound;
-
-       rdata[0].buffer = InvalidBuffer;
-       rdata[0].data = (char *) &data;
-       rdata[0].len = sizeof(ginxlogSplitDataInternal);
-       rdata[0].next = &rdata[1];
-
-       rdata[1].buffer = InvalidBuffer;
-       rdata[1].data = (char *) allitems;
-       rdata[1].len = nitems * sizeof(PostingItem);
-       rdata[1].next = NULL;
-
        *newlpage = lpage;
        *newrpage = rpage;
 }
@@ -1797,24 +1733,18 @@ createPostingTree(Relation index, ItemPointerData *items, uint32 nitems,
        if (RelationNeedsWAL(index))
        {
                XLogRecPtr      recptr;
-               XLogRecData rdata[2];
                ginxlogCreatePostingTree data;
 
-               data.node = index->rd_node;
-               data.blkno = blkno;
                data.size = rootsize;
 
-               rdata[0].buffer = InvalidBuffer;
-               rdata[0].data = (char *) &data;
-               rdata[0].len = sizeof(ginxlogCreatePostingTree);
-               rdata[0].next = &rdata[1];
+               XLogBeginInsert();
+               XLogRegisterData((char *) &data, sizeof(ginxlogCreatePostingTree));
 
-               rdata[1].buffer = InvalidBuffer;
-               rdata[1].data = (char *) GinDataLeafPageGetPostingList(page);
-               rdata[1].len = rootsize;
-               rdata[1].next = NULL;
+               XLogRegisterData((char *) GinDataLeafPageGetPostingList(page),
+                                                rootsize);
+               XLogRegisterBuffer(0, buffer, REGBUF_WILL_INIT);
 
-               recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_PTREE, rdata);
+               recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_PTREE);
                PageSetLSN(page, recptr);
        }
 
index 84dc1e228c1436e85b3b6c8589e594c7ce11616c..2dae7b95499f3eb544d6b1dcbdf720f99a645278 100644 (file)
@@ -22,7 +22,7 @@
 static void entrySplitPage(GinBtree btree, Buffer origbuf,
                           GinBtreeStack *stack,
                           void *insertPayload,
-                          BlockNumber updateblkno, XLogRecData **prdata,
+                          BlockNumber updateblkno,
                           Page *newlpage, Page *newrpage);
 
 /*
@@ -515,33 +515,33 @@ entryPreparePage(GinBtree btree, Page page, OffsetNumber off,
  * On insertion to an internal node, in addition to inserting the given item,
  * the downlink of the existing item at 'off' is updated to point to
  * 'updateblkno'.
+ *
+ * On INSERTED, registers the buffer as buffer ID 0, with data.
+ * On SPLIT, returns rdata that represents the split pages in *prdata.
  */
 static GinPlaceToPageRC
 entryPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
                                 void *insertPayload, BlockNumber updateblkno,
-                                XLogRecData **prdata, Page *newlpage, Page *newrpage)
+                                Page *newlpage, Page *newrpage)
 {
        GinBtreeEntryInsertData *insertData = insertPayload;
        Page            page = BufferGetPage(buf);
        OffsetNumber off = stack->off;
        OffsetNumber placed;
-       int                     cnt = 0;
 
-       /* these must be static so they can be returned to caller */
-       static XLogRecData rdata[3];
+       /* this must be static so it can be returned to caller. */
        static ginxlogInsertEntry data;
 
        /* quick exit if it doesn't fit */
        if (!entryIsEnoughSpace(btree, buf, off, insertData))
        {
                entrySplitPage(btree, buf, stack, insertPayload, updateblkno,
-                                          prdata, newlpage, newrpage);
+                                          newlpage, newrpage);
                return SPLIT;
        }
 
        START_CRIT_SECTION();
 
-       *prdata = rdata;
        entryPreparePage(btree, page, off, insertData, updateblkno);
 
        placed = PageAddItem(page,
@@ -552,21 +552,17 @@ entryPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
                elog(ERROR, "failed to add item to index page in \"%s\"",
                         RelationGetRelationName(btree->index));
 
-       data.isDelete = insertData->isDelete;
-       data.offset = off;
-
-       rdata[cnt].buffer = buf;
-       rdata[cnt].buffer_std = true;
-       rdata[cnt].data = (char *) &data;
-       rdata[cnt].len = offsetof(ginxlogInsertEntry, tuple);
-       rdata[cnt].next = &rdata[cnt + 1];
-       cnt++;
-
-       rdata[cnt].buffer = buf;
-       rdata[cnt].buffer_std = true;
-       rdata[cnt].data = (char *) insertData->entry;
-       rdata[cnt].len = IndexTupleSize(insertData->entry);
-       rdata[cnt].next = NULL;
+       if (RelationNeedsWAL(btree->index))
+       {
+               data.isDelete = insertData->isDelete;
+               data.offset = off;
+
+               XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
+               XLogRegisterBufData(0, (char *) &data,
+                                                       offsetof(ginxlogInsertEntry, tuple));
+               XLogRegisterBufData(0, (char *) insertData->entry,
+                                                       IndexTupleSize(insertData->entry));
+       }
 
        return INSERTED;
 }
@@ -581,7 +577,7 @@ static void
 entrySplitPage(GinBtree btree, Buffer origbuf,
                           GinBtreeStack *stack,
                           void *insertPayload,
-                          BlockNumber updateblkno, XLogRecData **prdata,
+                          BlockNumber updateblkno,
                           Page *newlpage, Page *newrpage)
 {
        GinBtreeEntryInsertData *insertData = insertPayload;
@@ -590,7 +586,6 @@ entrySplitPage(GinBtree btree, Buffer origbuf,
                                maxoff,
                                separator = InvalidOffsetNumber;
        Size            totalsize = 0;
-       Size            tupstoresize;
        Size            lsize = 0,
                                size;
        char       *ptr;
@@ -599,13 +594,8 @@ entrySplitPage(GinBtree btree, Buffer origbuf,
        Page            lpage = PageGetTempPageCopy(BufferGetPage(origbuf));
        Page            rpage = PageGetTempPageCopy(BufferGetPage(origbuf));
        Size            pageSize = PageGetPageSize(lpage);
+       char            tupstore[2 * BLCKSZ];
 
-       /* these must be static so they can be returned to caller */
-       static XLogRecData rdata[2];
-       static ginxlogSplitEntry data;
-       static char tupstore[2 * BLCKSZ];
-
-       *prdata = rdata;
        entryPreparePage(btree, lpage, off, insertData, updateblkno);
 
        /*
@@ -638,7 +628,6 @@ entrySplitPage(GinBtree btree, Buffer origbuf,
                ptr += size;
                totalsize += size + sizeof(ItemIdData);
        }
-       tupstoresize = ptr - tupstore;
 
        /*
         * Initialize the left and right pages, and copy all the tuples back to
@@ -673,19 +662,6 @@ entrySplitPage(GinBtree btree, Buffer origbuf,
                ptr += MAXALIGN(IndexTupleSize(itup));
        }
 
-       data.separator = separator;
-       data.nitem = maxoff;
-
-       rdata[0].buffer = InvalidBuffer;
-       rdata[0].data = (char *) &data;
-       rdata[0].len = sizeof(ginxlogSplitEntry);
-       rdata[0].next = &rdata[1];
-
-       rdata[1].buffer = InvalidBuffer;
-       rdata[1].data = tupstore;
-       rdata[1].len = tupstoresize;
-       rdata[1].next = NULL;
-
        *newlpage = lpage;
        *newrpage = rpage;
 }
index 25746995b5ef6ade36dc6525588de55d067e50df..fd81d675570f3f7d8a0fa7055b8f084a560eb2eb 100644 (file)
@@ -108,26 +108,19 @@ writeListPage(Relation index, Buffer buffer,
 
        if (RelationNeedsWAL(index))
        {
-               XLogRecData rdata[2];
                ginxlogInsertListPage data;
                XLogRecPtr      recptr;
 
-               data.node = index->rd_node;
-               data.blkno = BufferGetBlockNumber(buffer);
                data.rightlink = rightlink;
                data.ntuples = ntuples;
 
-               rdata[0].buffer = InvalidBuffer;
-               rdata[0].data = (char *) &data;
-               rdata[0].len = sizeof(ginxlogInsertListPage);
-               rdata[0].next = rdata + 1;
+               XLogBeginInsert();
+               XLogRegisterData((char *) &data, sizeof(ginxlogInsertListPage));
 
-               rdata[1].buffer = InvalidBuffer;
-               rdata[1].data = workspace;
-               rdata[1].len = size;
-               rdata[1].next = NULL;
+               XLogRegisterBuffer(0, buffer, REGBUF_WILL_INIT);
+               XLogRegisterBufData(0, workspace, size);
 
-               recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT_LISTPAGE, rdata);
+               recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT_LISTPAGE);
                PageSetLSN(page, recptr);
        }
 
@@ -224,26 +217,23 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
        Buffer          metabuffer;
        Page            metapage;
        GinMetaPageData *metadata = NULL;
-       XLogRecData rdata[2];
        Buffer          buffer = InvalidBuffer;
        Page            page = NULL;
        ginxlogUpdateMeta data;
        bool            separateList = false;
        bool            needCleanup = false;
        int                     cleanupSize;
+       bool            needWal;
 
        if (collector->ntuples == 0)
                return;
 
+       needWal = RelationNeedsWAL(index);
+
        data.node = index->rd_node;
        data.ntuples = 0;
        data.newRightlink = data.prevTail = InvalidBlockNumber;
 
-       rdata[0].buffer = InvalidBuffer;
-       rdata[0].data = (char *) &data;
-       rdata[0].len = sizeof(ginxlogUpdateMeta);
-       rdata[0].next = NULL;
-
        metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO);
        metapage = BufferGetPage(metabuffer);
 
@@ -283,6 +273,9 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
                memset(&sublist, 0, sizeof(GinMetaPageData));
                makeSublist(index, collector->tuples, collector->ntuples, &sublist);
 
+               if (needWal)
+                       XLogBeginInsert();
+
                /*
                 * metapage was unlocked, see above
                 */
@@ -315,14 +308,6 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
                        LockBuffer(buffer, GIN_EXCLUSIVE);
                        page = BufferGetPage(buffer);
 
-                       rdata[0].next = rdata + 1;
-
-                       rdata[1].buffer = buffer;
-                       rdata[1].buffer_std = true;
-                       rdata[1].data = NULL;
-                       rdata[1].len = 0;
-                       rdata[1].next = NULL;
-
                        Assert(GinPageGetOpaque(page)->rightlink == InvalidBlockNumber);
 
                        START_CRIT_SECTION();
@@ -336,6 +321,9 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
 
                        metadata->nPendingPages += sublist.nPendingPages;
                        metadata->nPendingHeapTuples += sublist.nPendingHeapTuples;
+
+                       if (needWal)
+                               XLogRegisterBuffer(1, buffer, REGBUF_STANDARD);
                }
        }
        else
@@ -348,6 +336,7 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
                int                     i,
                                        tupsize;
                char       *ptr;
+               char       *collectordata;
 
                buffer = ReadBuffer(index, metadata->tail);
                LockBuffer(buffer, GIN_EXCLUSIVE);
@@ -356,16 +345,13 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
                off = (PageIsEmpty(page)) ? FirstOffsetNumber :
                        OffsetNumberNext(PageGetMaxOffsetNumber(page));
 
-               rdata[0].next = rdata + 1;
-
-               rdata[1].buffer = buffer;
-               rdata[1].buffer_std = true;
-               ptr = rdata[1].data = (char *) palloc(collector->sumsize);
-               rdata[1].len = collector->sumsize;
-               rdata[1].next = NULL;
+               collectordata = ptr = (char *) palloc(collector->sumsize);
 
                data.ntuples = collector->ntuples;
 
+               if (needWal)
+                       XLogBeginInsert();
+
                START_CRIT_SECTION();
 
                /*
@@ -390,7 +376,12 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
                        off++;
                }
 
-               Assert((ptr - rdata[1].data) <= collector->sumsize);
+               Assert((ptr - collectordata) <= collector->sumsize);
+               if (needWal)
+               {
+                       XLogRegisterBuffer(1, buffer, REGBUF_STANDARD);
+                       XLogRegisterBufData(1, collectordata, collector->sumsize);
+               }
 
                metadata->tailFreeSize = PageGetExactFreeSpace(page);
 
@@ -402,13 +393,16 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
         */
        MarkBufferDirty(metabuffer);
 
-       if (RelationNeedsWAL(index))
+       if (needWal)
        {
                XLogRecPtr      recptr;
 
                memcpy(&data.metadata, metadata, sizeof(GinMetaPageData));
 
-               recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_UPDATE_META_PAGE, rdata);
+               XLogRegisterBuffer(0, metabuffer, REGBUF_WILL_INIT);
+               XLogRegisterData((char *) &data, sizeof(ginxlogUpdateMeta));
+
+               recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_UPDATE_META_PAGE);
                PageSetLSN(metapage, recptr);
 
                if (buffer != InvalidBuffer)
@@ -526,20 +520,11 @@ shiftList(Relation index, Buffer metabuffer, BlockNumber newHead,
                int                     i;
                int64           nDeletedHeapTuples = 0;
                ginxlogDeleteListPages data;
-               XLogRecData rdata[1];
                Buffer          buffers[GIN_NDELETE_AT_ONCE];
 
-               data.node = index->rd_node;
-
-               rdata[0].buffer = InvalidBuffer;
-               rdata[0].data = (char *) &data;
-               rdata[0].len = sizeof(ginxlogDeleteListPages);
-               rdata[0].next = NULL;
-
                data.ndeleted = 0;
                while (data.ndeleted < GIN_NDELETE_AT_ONCE && blknoToDelete != newHead)
                {
-                       data.toDelete[data.ndeleted] = blknoToDelete;
                        buffers[data.ndeleted] = ReadBuffer(index, blknoToDelete);
                        LockBuffer(buffers[data.ndeleted], GIN_EXCLUSIVE);
                        page = BufferGetPage(buffers[data.ndeleted]);
@@ -562,6 +547,13 @@ shiftList(Relation index, Buffer metabuffer, BlockNumber newHead,
                if (stats)
                        stats->pages_deleted += data.ndeleted;
 
+               /*
+                * This operation touches an unusually large number of pages, so
+                * prepare the XLogInsert machinery for that before entering the
+                * critical section.
+                */
+               XLogEnsureRecordSpace(data.ndeleted, 0);
+
                START_CRIT_SECTION();
 
                metadata->head = blknoToDelete;
@@ -592,9 +584,17 @@ shiftList(Relation index, Buffer metabuffer, BlockNumber newHead,
                {
                        XLogRecPtr      recptr;
 
+                       XLogBeginInsert();
+                       XLogRegisterBuffer(0, metabuffer, REGBUF_WILL_INIT);
+                       for (i = 0; i < data.ndeleted; i++)
+                               XLogRegisterBuffer(i + 1, buffers[i], REGBUF_WILL_INIT);
+
                        memcpy(&data.metadata, metadata, sizeof(GinMetaPageData));
 
-                       recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_DELETE_LISTPAGE, rdata);
+                       XLogRegisterData((char *) &data,
+                                                        sizeof(ginxlogDeleteListPages));
+
+                       recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_DELETE_LISTPAGE);
                        PageSetLSN(metapage, recptr);
 
                        for (i = 0; i < data.ndeleted; i++)
index 370884ed17fee19a8bfda824ce27e5b18f405137..c1ad0fd8c4db24b244d71679c9418d22a3c01f83 100644 (file)
@@ -347,15 +347,13 @@ ginbuild(PG_FUNCTION_ARGS)
        if (RelationNeedsWAL(index))
        {
                XLogRecPtr      recptr;
-               XLogRecData rdata;
                Page            page;
 
-               rdata.buffer = InvalidBuffer;
-               rdata.data = (char *) &(index->rd_node);
-               rdata.len = sizeof(RelFileNode);
-               rdata.next = NULL;
+               XLogBeginInsert();
+               XLogRegisterBuffer(0, MetaBuffer, REGBUF_WILL_INIT);
+               XLogRegisterBuffer(1, RootBuffer, REGBUF_WILL_INIT);
 
-               recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_INDEX, &rdata);
+               recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_INDEX);
 
                page = BufferGetPage(RootBuffer);
                PageSetLSN(page, recptr);
index d0458cfd0cfa939b841e8ccb6f9eb43732d1881e..f593a7224f2d871ec0c17ea08d6c47a5c6da908d 100644 (file)
@@ -605,19 +605,17 @@ ginUpdateStats(Relation index, const GinStatsData *stats)
        {
                XLogRecPtr      recptr;
                ginxlogUpdateMeta data;
-               XLogRecData rdata;
 
                data.node = index->rd_node;
                data.ntuples = 0;
                data.newRightlink = data.prevTail = InvalidBlockNumber;
                memcpy(&data.metadata, metadata, sizeof(GinMetaPageData));
 
-               rdata.buffer = InvalidBuffer;
-               rdata.data = (char *) &data;
-               rdata.len = sizeof(ginxlogUpdateMeta);
-               rdata.next = NULL;
+               XLogBeginInsert();
+               XLogRegisterData((char *) &data, sizeof(ginxlogUpdateMeta));
+               XLogRegisterBuffer(0, metabuffer, REGBUF_WILL_INIT);
 
-               recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_UPDATE_META_PAGE, &rdata);
+               recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_UPDATE_META_PAGE);
                PageSetLSN(metapage, recptr);
        }
 
index 3a61321a835d984a7cc9365bfadc63d9a884918a..6f32600ed7967a6057f71acd902773aa002179b0 100644 (file)
@@ -89,10 +89,6 @@ xlogVacuumPage(Relation index, Buffer buffer)
 {
        Page            page = BufferGetPage(buffer);
        XLogRecPtr      recptr;
-       XLogRecData rdata[3];
-       ginxlogVacuumPage xlrec;
-       uint16          lower;
-       uint16          upper;
 
        /* This is only used for entry tree leaf pages. */
        Assert(!GinPageIsData(page));
@@ -101,57 +97,14 @@ xlogVacuumPage(Relation index, Buffer buffer)
        if (!RelationNeedsWAL(index))
                return;
 
-       xlrec.node = index->rd_node;
-       xlrec.blkno = BufferGetBlockNumber(buffer);
-
-       /* Assume we can omit data between pd_lower and pd_upper */
-       lower = ((PageHeader) page)->pd_lower;
-       upper = ((PageHeader) page)->pd_upper;
-
-       Assert(lower < BLCKSZ);
-       Assert(upper < BLCKSZ);
-
-       if (lower >= SizeOfPageHeaderData &&
-               upper > lower &&
-               upper <= BLCKSZ)
-       {
-               xlrec.hole_offset = lower;
-               xlrec.hole_length = upper - lower;
-       }
-       else
-       {
-               /* No "hole" to compress out */
-               xlrec.hole_offset = 0;
-               xlrec.hole_length = 0;
-       }
-
-       rdata[0].data = (char *) &xlrec;
-       rdata[0].len = sizeof(ginxlogVacuumPage);
-       rdata[0].buffer = InvalidBuffer;
-       rdata[0].next = &rdata[1];
-
-       if (xlrec.hole_length == 0)
-       {
-               rdata[1].data = (char *) page;
-               rdata[1].len = BLCKSZ;
-               rdata[1].buffer = InvalidBuffer;
-               rdata[1].next = NULL;
-       }
-       else
-       {
-               /* must skip the hole */
-               rdata[1].data = (char *) page;
-               rdata[1].len = xlrec.hole_offset;
-               rdata[1].buffer = InvalidBuffer;
-               rdata[1].next = &rdata[2];
-
-               rdata[2].data = (char *) page + (xlrec.hole_offset + xlrec.hole_length);
-               rdata[2].len = BLCKSZ - (xlrec.hole_offset + xlrec.hole_length);
-               rdata[2].buffer = InvalidBuffer;
-               rdata[2].next = NULL;
-       }
+       /*
+        * Always create a full image, we don't track the changes on the page at
+        * any more fine-grained level. This could obviously be improved...
+        */
+       XLogBeginInsert();
+       XLogRegisterBuffer(0, buffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
 
-       recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_PAGE, rdata);
+       recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_PAGE);
        PageSetLSN(page, recptr);
 }
 
@@ -292,48 +245,27 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn
        if (RelationNeedsWAL(gvs->index))
        {
                XLogRecPtr      recptr;
-               XLogRecData rdata[4];
                ginxlogDeletePage data;
 
-               data.node = gvs->index->rd_node;
-               data.blkno = deleteBlkno;
-               data.parentBlkno = parentBlkno;
+               /*
+                * We can't pass REGBUF_STANDARD for the deleted page, because we
+                * didn't set pd_lower on pre-9.4 versions. The page might've been
+                * binary-upgraded from an older version, and hence not have pd_lower
+                * set correctly. Ditto for the left page, but removing the item from
+                * the parent updated its pd_lower, so we know that's OK at this
+                * point.
+                */
+               XLogBeginInsert();
+               XLogRegisterBuffer(0, dBuffer, 0);
+               XLogRegisterBuffer(1, pBuffer, REGBUF_STANDARD);
+               XLogRegisterBuffer(2, lBuffer, 0);
+
                data.parentOffset = myoff;
-               data.leftBlkno = leftBlkno;
                data.rightLink = GinPageGetOpaque(page)->rightlink;
 
-               /*
-                * We can't pass buffer_std = TRUE, because we didn't set pd_lower on
-                * pre-9.4 versions. The page might've been binary-upgraded from an
-                * older version, and hence not have pd_lower set correctly. Ditto for
-                * the left page, but removing the item from the parent updated its
-                * pd_lower, so we know that's OK at this point.
-                */
-               rdata[0].buffer = dBuffer;
-               rdata[0].buffer_std = FALSE;
-               rdata[0].data = NULL;
-               rdata[0].len = 0;
-               rdata[0].next = rdata + 1;
-
-               rdata[1].buffer = pBuffer;
-               rdata[1].buffer_std = TRUE;
-               rdata[1].data = NULL;
-               rdata[1].len = 0;
-               rdata[1].next = rdata + 2;
-
-               rdata[2].buffer = lBuffer;
-               rdata[2].buffer_std = FALSE;
-               rdata[2].data = NULL;
-               rdata[2].len = 0;
-               rdata[2].next = rdata + 3;
-
-               rdata[3].buffer = InvalidBuffer;
-               rdata[3].buffer_std = FALSE;
-               rdata[3].len = sizeof(ginxlogDeletePage);
-               rdata[3].data = (char *) &data;
-               rdata[3].next = NULL;
-
-               recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_DELETE_PAGE, rdata);
+               XLogRegisterData((char *) &data, sizeof(ginxlogDeletePage));
+
+               recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_DELETE_PAGE);
                PageSetLSN(page, recptr);
                PageSetLSN(parentPage, recptr);
                PageSetLSN(BufferGetPage(lBuffer), recptr);
index d0553bb8f729753bf39099a3841105a107274559..6c0042bd7956587c46600dac04d748eb23800238 100644 (file)
 static MemoryContext opCtx;            /* working memory for operations */
 
 static void
-ginRedoClearIncompleteSplit(XLogRecPtr lsn, XLogRecord *record,
-                                                       int block_index,
-                                                       RelFileNode node, BlockNumber blkno)
+ginRedoClearIncompleteSplit(XLogReaderState *record, uint8 block_id)
 {
+       XLogRecPtr      lsn = record->EndRecPtr;
        Buffer          buffer;
        Page            page;
 
-       if (XLogReadBufferForRedo(lsn, record, block_index, node, blkno, &buffer)
-               == BLK_NEEDS_REDO)
+       if (XLogReadBufferForRedo(record, block_id, &buffer) == BLK_NEEDS_REDO)
        {
                page = (Page) BufferGetPage(buffer);
-
                GinPageGetOpaque(page)->flags &= ~GIN_INCOMPLETE_SPLIT;
 
                PageSetLSN(page, lsn);
@@ -42,18 +39,15 @@ ginRedoClearIncompleteSplit(XLogRecPtr lsn, XLogRecord *record,
 }
 
 static void
-ginRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record)
+ginRedoCreateIndex(XLogReaderState *record)
 {
-       RelFileNode *node = (RelFileNode *) XLogRecGetData(record);
+       XLogRecPtr      lsn = record->EndRecPtr;
        Buffer          RootBuffer,
                                MetaBuffer;
        Page            page;
 
-       /* Backup blocks are not used in create_index records */
-       Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
-
-       MetaBuffer = XLogReadBuffer(*node, GIN_METAPAGE_BLKNO, true);
-       Assert(BufferIsValid(MetaBuffer));
+       MetaBuffer = XLogInitBufferForRedo(record, 0);
+       Assert(BufferGetBlockNumber(MetaBuffer) == GIN_METAPAGE_BLKNO);
        page = (Page) BufferGetPage(MetaBuffer);
 
        GinInitMetabuffer(MetaBuffer);
@@ -61,8 +55,8 @@ ginRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record)
        PageSetLSN(page, lsn);
        MarkBufferDirty(MetaBuffer);
 
-       RootBuffer = XLogReadBuffer(*node, GIN_ROOT_BLKNO, true);
-       Assert(BufferIsValid(RootBuffer));
+       RootBuffer = XLogInitBufferForRedo(record, 1);
+       Assert(BufferGetBlockNumber(RootBuffer) == GIN_ROOT_BLKNO);
        page = (Page) BufferGetPage(RootBuffer);
 
        GinInitBuffer(RootBuffer, GIN_LEAF);
@@ -75,18 +69,15 @@ ginRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record)
 }
 
 static void
-ginRedoCreatePTree(XLogRecPtr lsn, XLogRecord *record)
+ginRedoCreatePTree(XLogReaderState *record)
 {
+       XLogRecPtr      lsn = record->EndRecPtr;
        ginxlogCreatePostingTree *data = (ginxlogCreatePostingTree *) XLogRecGetData(record);
        char       *ptr;
        Buffer          buffer;
        Page            page;
 
-       /* Backup blocks are not used in create_ptree records */
-       Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
-
-       buffer = XLogReadBuffer(data->node, data->blkno, true);
-       Assert(BufferIsValid(buffer));
+       buffer = XLogInitBufferForRedo(record, 0);
        page = (Page) BufferGetPage(buffer);
 
        GinInitBuffer(buffer, GIN_DATA | GIN_LEAF | GIN_COMPRESSED);
@@ -328,35 +319,40 @@ ginRedoInsertData(Buffer buffer, bool isLeaf, BlockNumber rightblkno, void *rdat
 }
 
 static void
-ginRedoInsert(XLogRecPtr lsn, XLogRecord *record)
+ginRedoInsert(XLogReaderState *record)
 {
+       XLogRecPtr      lsn = record->EndRecPtr;
        ginxlogInsert *data = (ginxlogInsert *) XLogRecGetData(record);
        Buffer          buffer;
-       char       *payload;
+#ifdef NOT_USED
        BlockNumber leftChildBlkno = InvalidBlockNumber;
+#endif
        BlockNumber rightChildBlkno = InvalidBlockNumber;
        bool            isLeaf = (data->flags & GIN_INSERT_ISLEAF) != 0;
 
-       payload = XLogRecGetData(record) + sizeof(ginxlogInsert);
-
        /*
         * First clear incomplete-split flag on child page if this finishes a
         * split.
         */
        if (!isLeaf)
        {
+               char       *payload = XLogRecGetData(record) + sizeof(ginxlogInsert);
+
+#ifdef NOT_USED
                leftChildBlkno = BlockIdGetBlockNumber((BlockId) payload);
+#endif
                payload += sizeof(BlockIdData);
                rightChildBlkno = BlockIdGetBlockNumber((BlockId) payload);
                payload += sizeof(BlockIdData);
 
-               ginRedoClearIncompleteSplit(lsn, record, 0, data->node, leftChildBlkno);
+               ginRedoClearIncompleteSplit(record, 1);
        }
 
-       if (XLogReadBufferForRedo(lsn, record, isLeaf ? 0 : 1, data->node,
-                                                         data->blkno, &buffer) == BLK_NEEDS_REDO)
+       if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
        {
                Page            page = BufferGetPage(buffer);
+               Size            len;
+               char       *payload = XLogRecGetBlockData(record, 0, &len);
 
                /* How to insert the payload is tree-type specific */
                if (data->flags & GIN_INSERT_ISDATA)
@@ -378,161 +374,33 @@ ginRedoInsert(XLogRecPtr lsn, XLogRecord *record)
 }
 
 static void
-ginRedoSplitEntry(Page lpage, Page rpage, void *rdata)
-{
-       ginxlogSplitEntry *data = (ginxlogSplitEntry *) rdata;
-       IndexTuple      itup = (IndexTuple) ((char *) rdata + sizeof(ginxlogSplitEntry));
-       OffsetNumber i;
-
-       for (i = 0; i < data->separator; i++)
-       {
-               if (PageAddItem(lpage, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber)
-                       elog(ERROR, "failed to add item to gin index page");
-               itup = (IndexTuple) (((char *) itup) + MAXALIGN(IndexTupleSize(itup)));
-       }
-
-       for (i = data->separator; i < data->nitem; i++)
-       {
-               if (PageAddItem(rpage, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber)
-                       elog(ERROR, "failed to add item to gin index page");
-               itup = (IndexTuple) (((char *) itup) + MAXALIGN(IndexTupleSize(itup)));
-       }
-}
-
-static void
-ginRedoSplitData(Page lpage, Page rpage, void *rdata)
-{
-       bool            isleaf = GinPageIsLeaf(lpage);
-
-       if (isleaf)
-       {
-               ginxlogSplitDataLeaf *data = (ginxlogSplitDataLeaf *) rdata;
-               Pointer         lptr = (Pointer) rdata + sizeof(ginxlogSplitDataLeaf);
-               Pointer         rptr = lptr + data->lsize;
-
-               Assert(data->lsize > 0 && data->lsize <= GinDataPageMaxDataSize);
-               Assert(data->rsize > 0 && data->rsize <= GinDataPageMaxDataSize);
-
-               memcpy(GinDataLeafPageGetPostingList(lpage), lptr, data->lsize);
-               memcpy(GinDataLeafPageGetPostingList(rpage), rptr, data->rsize);
-
-               GinDataPageSetDataSize(lpage, data->lsize);
-               GinDataPageSetDataSize(rpage, data->rsize);
-               *GinDataPageGetRightBound(lpage) = data->lrightbound;
-               *GinDataPageGetRightBound(rpage) = data->rrightbound;
-       }
-       else
-       {
-               ginxlogSplitDataInternal *data = (ginxlogSplitDataInternal *) rdata;
-               PostingItem *items = (PostingItem *) ((char *) rdata + sizeof(ginxlogSplitDataInternal));
-               OffsetNumber i;
-               OffsetNumber maxoff;
-
-               for (i = 0; i < data->separator; i++)
-                       GinDataPageAddPostingItem(lpage, &items[i], InvalidOffsetNumber);
-               for (i = data->separator; i < data->nitem; i++)
-                       GinDataPageAddPostingItem(rpage, &items[i], InvalidOffsetNumber);
-
-               /* set up right key */
-               maxoff = GinPageGetOpaque(lpage)->maxoff;
-               *GinDataPageGetRightBound(lpage) = GinDataPageGetPostingItem(lpage, maxoff)->key;
-               *GinDataPageGetRightBound(rpage) = data->rightbound;
-       }
-}
-
-static void
-ginRedoSplit(XLogRecPtr lsn, XLogRecord *record)
+ginRedoSplit(XLogReaderState *record)
 {
        ginxlogSplit *data = (ginxlogSplit *) XLogRecGetData(record);
        Buffer          lbuffer,
-                               rbuffer;
-       Page            lpage,
-                               rpage;
-       uint32          flags;
-       uint32          lflags,
-                               rflags;
-       char       *payload;
+                               rbuffer,
+                               rootbuf;
        bool            isLeaf = (data->flags & GIN_INSERT_ISLEAF) != 0;
-       bool            isData = (data->flags & GIN_INSERT_ISDATA) != 0;
        bool            isRoot = (data->flags & GIN_SPLIT_ROOT) != 0;
 
-       payload = XLogRecGetData(record) + sizeof(ginxlogSplit);
-
        /*
         * First clear incomplete-split flag on child page if this finishes a
         * split
         */
        if (!isLeaf)
-               ginRedoClearIncompleteSplit(lsn, record, 0, data->node, data->leftChildBlkno);
-
-       flags = 0;
-       if (isLeaf)
-               flags |= GIN_LEAF;
-       if (isData)
-               flags |= GIN_DATA;
-       if (isLeaf && isData)
-               flags |= GIN_COMPRESSED;
-
-       lflags = rflags = flags;
-       if (!isRoot)
-               lflags |= GIN_INCOMPLETE_SPLIT;
-
-       lbuffer = XLogReadBuffer(data->node, data->lblkno, true);
-       Assert(BufferIsValid(lbuffer));
-       lpage = (Page) BufferGetPage(lbuffer);
-       GinInitBuffer(lbuffer, lflags);
-
-       rbuffer = XLogReadBuffer(data->node, data->rblkno, true);
-       Assert(BufferIsValid(rbuffer));
-       rpage = (Page) BufferGetPage(rbuffer);
-       GinInitBuffer(rbuffer, rflags);
-
-       GinPageGetOpaque(lpage)->rightlink = BufferGetBlockNumber(rbuffer);
-       GinPageGetOpaque(rpage)->rightlink = isRoot ? InvalidBlockNumber : data->rrlink;
-
-       /* Do the tree-type specific portion to restore the page contents */
-       if (isData)
-               ginRedoSplitData(lpage, rpage, payload);
-       else
-               ginRedoSplitEntry(lpage, rpage, payload);
+               ginRedoClearIncompleteSplit(record, 3);
 
-       PageSetLSN(rpage, lsn);
-       MarkBufferDirty(rbuffer);
+       if (XLogReadBufferForRedo(record, 0, &lbuffer) != BLK_RESTORED)
+               elog(ERROR, "GIN split record did not contain a full-page image of left page");
 
-       PageSetLSN(lpage, lsn);
-       MarkBufferDirty(lbuffer);
+       if (XLogReadBufferForRedo(record, 1, &rbuffer) != BLK_RESTORED)
+               elog(ERROR, "GIN split record did not contain a full-page image of right page");
 
        if (isRoot)
        {
-               BlockNumber rootBlkno = data->rrlink;
-               Buffer          rootBuf = XLogReadBuffer(data->node, rootBlkno, true);
-               Page            rootPage = BufferGetPage(rootBuf);
-
-               GinInitBuffer(rootBuf, flags & ~GIN_LEAF & ~GIN_COMPRESSED);
-
-               if (isData)
-               {
-                       Assert(rootBlkno != GIN_ROOT_BLKNO);
-                       ginDataFillRoot(NULL, BufferGetPage(rootBuf),
-                                                       BufferGetBlockNumber(lbuffer),
-                                                       BufferGetPage(lbuffer),
-                                                       BufferGetBlockNumber(rbuffer),
-                                                       BufferGetPage(rbuffer));
-               }
-               else
-               {
-                       Assert(rootBlkno == GIN_ROOT_BLKNO);
-                       ginEntryFillRoot(NULL, BufferGetPage(rootBuf),
-                                                        BufferGetBlockNumber(lbuffer),
-                                                        BufferGetPage(lbuffer),
-                                                        BufferGetBlockNumber(rbuffer),
-                                                        BufferGetPage(rbuffer));
-               }
-
-               PageSetLSN(rootPage, lsn);
-
-               MarkBufferDirty(rootBuf);
-               UnlockReleaseBuffer(rootBuf);
+               if (XLogReadBufferForRedo(record, 2, &rootbuf) != BLK_RESTORED)
+                       elog(ERROR, "GIN split record did not contain a full-page image of root page");
+               UnlockReleaseBuffer(rootbuf);
        }
 
        UnlockReleaseBuffer(rbuffer);
@@ -544,54 +412,30 @@ ginRedoSplit(XLogRecPtr lsn, XLogRecord *record)
  * a XLOG_FPI record.
  */
 static void
-ginRedoVacuumPage(XLogRecPtr lsn, XLogRecord *record)
+ginRedoVacuumPage(XLogReaderState *record)
 {
-       ginxlogVacuumPage *xlrec = (ginxlogVacuumPage *) XLogRecGetData(record);
-       char       *blk = ((char *) xlrec) + sizeof(ginxlogVacuumPage);
        Buffer          buffer;
-       Page            page;
-
-       Assert(xlrec->hole_offset < BLCKSZ);
-       Assert(xlrec->hole_length < BLCKSZ);
-
-       /* Backup blocks are not used, we'll re-initialize the page always. */
-       Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
 
-       buffer = XLogReadBuffer(xlrec->node, xlrec->blkno, true);
-       if (!BufferIsValid(buffer))
-               return;
-       page = (Page) BufferGetPage(buffer);
-
-       if (xlrec->hole_length == 0)
+       if (XLogReadBufferForRedo(record, 0, &buffer) != BLK_RESTORED)
        {
-               memcpy((char *) page, blk, BLCKSZ);
+               elog(ERROR, "replay of gin entry tree page vacuum did not restore the page");
        }
-       else
-       {
-               memcpy((char *) page, blk, xlrec->hole_offset);
-               /* must zero-fill the hole */
-               MemSet((char *) page + xlrec->hole_offset, 0, xlrec->hole_length);
-               memcpy((char *) page + (xlrec->hole_offset + xlrec->hole_length),
-                          blk + xlrec->hole_offset,
-                          BLCKSZ - (xlrec->hole_offset + xlrec->hole_length));
-       }
-
-       PageSetLSN(page, lsn);
-
-       MarkBufferDirty(buffer);
        UnlockReleaseBuffer(buffer);
 }
 
 static void
-ginRedoVacuumDataLeafPage(XLogRecPtr lsn, XLogRecord *record)
+ginRedoVacuumDataLeafPage(XLogReaderState *record)
 {
-       ginxlogVacuumDataLeafPage *xlrec = (ginxlogVacuumDataLeafPage *) XLogRecGetData(record);
+       XLogRecPtr      lsn = record->EndRecPtr;
        Buffer          buffer;
 
-       if (XLogReadBufferForRedo(lsn, record, 0, xlrec->node, xlrec->blkno,
-                                                         &buffer) == BLK_NEEDS_REDO)
+       if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
        {
                Page            page = BufferGetPage(buffer);
+               Size            len;
+               ginxlogVacuumDataLeafPage *xlrec;
+
+               xlrec = (ginxlogVacuumDataLeafPage *) XLogRecGetBlockData(record, 0, &len);
 
                Assert(GinPageIsLeaf(page));
                Assert(GinPageIsData(page));
@@ -605,30 +449,27 @@ ginRedoVacuumDataLeafPage(XLogRecPtr lsn, XLogRecord *record)
 }
 
 static void
-ginRedoDeletePage(XLogRecPtr lsn, XLogRecord *record)
+ginRedoDeletePage(XLogReaderState *record)
 {
+       XLogRecPtr      lsn = record->EndRecPtr;
        ginxlogDeletePage *data = (ginxlogDeletePage *) XLogRecGetData(record);
        Buffer          dbuffer;
        Buffer          pbuffer;
        Buffer          lbuffer;
        Page            page;
 
-       if (XLogReadBufferForRedo(lsn, record, 0, data->node, data->blkno, &dbuffer)
-               == BLK_NEEDS_REDO)
+       if (XLogReadBufferForRedo(record, 0, &dbuffer) == BLK_NEEDS_REDO)
        {
                page = BufferGetPage(dbuffer);
-
                Assert(GinPageIsData(page));
                GinPageGetOpaque(page)->flags = GIN_DELETED;
                PageSetLSN(page, lsn);
                MarkBufferDirty(dbuffer);
        }
 
-       if (XLogReadBufferForRedo(lsn, record, 1, data->node, data->parentBlkno,
-                                                         &pbuffer) == BLK_NEEDS_REDO)
+       if (XLogReadBufferForRedo(record, 1, &pbuffer) == BLK_NEEDS_REDO)
        {
                page = BufferGetPage(pbuffer);
-
                Assert(GinPageIsData(page));
                Assert(!GinPageIsLeaf(page));
                GinPageDeletePostingItem(page, data->parentOffset);
@@ -636,11 +477,9 @@ ginRedoDeletePage(XLogRecPtr lsn, XLogRecord *record)
                MarkBufferDirty(pbuffer);
        }
 
-       if (XLogReadBufferForRedo(lsn, record, 2, data->node, data->leftBlkno,
-                                                         &lbuffer) == BLK_NEEDS_REDO)
+       if (XLogReadBufferForRedo(record, 2, &lbuffer) == BLK_NEEDS_REDO)
        {
                page = BufferGetPage(lbuffer);
-
                Assert(GinPageIsData(page));
                GinPageGetOpaque(page)->rightlink = data->rightLink;
                PageSetLSN(page, lsn);
@@ -656,8 +495,9 @@ ginRedoDeletePage(XLogRecPtr lsn, XLogRecord *record)
 }
 
 static void
-ginRedoUpdateMetapage(XLogRecPtr lsn, XLogRecord *record)
+ginRedoUpdateMetapage(XLogReaderState *record)
 {
+       XLogRecPtr      lsn = record->EndRecPtr;
        ginxlogUpdateMeta *data = (ginxlogUpdateMeta *) XLogRecGetData(record);
        Buffer          metabuffer;
        Page            metapage;
@@ -668,9 +508,8 @@ ginRedoUpdateMetapage(XLogRecPtr lsn, XLogRecord *record)
         * image, so restore the metapage unconditionally without looking at the
         * LSN, to avoid torn page hazards.
         */
-       metabuffer = XLogReadBuffer(data->node, GIN_METAPAGE_BLKNO, false);
-       if (!BufferIsValid(metabuffer))
-               return;                                 /* assume index was deleted, nothing to do */
+       metabuffer = XLogInitBufferForRedo(record, 0);
+       Assert(BufferGetBlockNumber(metabuffer) == GIN_METAPAGE_BLKNO);
        metapage = BufferGetPage(metabuffer);
 
        memcpy(GinPageGetMeta(metapage), &data->metadata, sizeof(GinMetaPageData));
@@ -682,17 +521,18 @@ ginRedoUpdateMetapage(XLogRecPtr lsn, XLogRecord *record)
                /*
                 * insert into tail page
                 */
-               if (XLogReadBufferForRedo(lsn, record, 0, data->node,
-                                                                 data->metadata.tail, &buffer)
-                       == BLK_NEEDS_REDO)
+               if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO)
                {
                        Page            page = BufferGetPage(buffer);
                        OffsetNumber off;
                        int                     i;
                        Size            tupsize;
+                       char       *payload;
                        IndexTuple      tuples;
+                       Size            totaltupsize;
 
-                       tuples = (IndexTuple) (XLogRecGetData(record) + sizeof(ginxlogUpdateMeta));
+                       payload = XLogRecGetBlockData(record, 1, &totaltupsize);
+                       tuples = (IndexTuple) payload;
 
                        if (PageIsEmpty(page))
                                off = FirstOffsetNumber;
@@ -711,6 +551,7 @@ ginRedoUpdateMetapage(XLogRecPtr lsn, XLogRecord *record)
 
                                off++;
                        }
+                       Assert(payload + totaltupsize == (char *) tuples);
 
                        /*
                         * Increase counter of heap tuples
@@ -728,8 +569,7 @@ ginRedoUpdateMetapage(XLogRecPtr lsn, XLogRecord *record)
                /*
                 * New tail
                 */
-               if (XLogReadBufferForRedo(lsn, record, 0, data->node, data->prevTail,
-                                                                 &buffer) == BLK_NEEDS_REDO)
+               if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO)
                {
                        Page            page = BufferGetPage(buffer);
 
@@ -746,8 +586,9 @@ ginRedoUpdateMetapage(XLogRecPtr lsn, XLogRecord *record)
 }
 
 static void
-ginRedoInsertListPage(XLogRecPtr lsn, XLogRecord *record)
+ginRedoInsertListPage(XLogReaderState *record)
 {
+       XLogRecPtr      lsn = record->EndRecPtr;
        ginxlogInsertListPage *data = (ginxlogInsertListPage *) XLogRecGetData(record);
        Buffer          buffer;
        Page            page;
@@ -755,15 +596,12 @@ ginRedoInsertListPage(XLogRecPtr lsn, XLogRecord *record)
                                off = FirstOffsetNumber;
        int                     i,
                                tupsize;
-       IndexTuple      tuples = (IndexTuple) (XLogRecGetData(record) + sizeof(ginxlogInsertListPage));
-
-       /*
-        * Backup blocks are not used, we always re-initialize the page.
-        */
-       Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
+       char       *payload;
+       IndexTuple      tuples;
+       Size            totaltupsize;
 
-       buffer = XLogReadBuffer(data->node, data->blkno, true);
-       Assert(BufferIsValid(buffer));
+       /* We always re-initialize the page. */
+       buffer = XLogInitBufferForRedo(record, 0);
        page = BufferGetPage(buffer);
 
        GinInitBuffer(buffer, GIN_LIST);
@@ -779,6 +617,9 @@ ginRedoInsertListPage(XLogRecPtr lsn, XLogRecord *record)
                GinPageGetOpaque(page)->maxoff = 0;
        }
 
+       payload = XLogRecGetBlockData(record, 0, &totaltupsize);
+
+       tuples = (IndexTuple) payload;
        for (i = 0; i < data->ntuples; i++)
        {
                tupsize = IndexTupleSize(tuples);
@@ -791,6 +632,7 @@ ginRedoInsertListPage(XLogRecPtr lsn, XLogRecord *record)
                tuples = (IndexTuple) (((char *) tuples) + tupsize);
                off++;
        }
+       Assert((char *) tuples == payload + totaltupsize);
 
        PageSetLSN(page, lsn);
        MarkBufferDirty(buffer);
@@ -799,21 +641,20 @@ ginRedoInsertListPage(XLogRecPtr lsn, XLogRecord *record)
 }
 
 static void
-ginRedoDeleteListPages(XLogRecPtr lsn, XLogRecord *record)
+ginRedoDeleteListPages(XLogReaderState *record)
 {
+       XLogRecPtr      lsn = record->EndRecPtr;
        ginxlogDeleteListPages *data = (ginxlogDeleteListPages *) XLogRecGetData(record);
        Buffer          metabuffer;
        Page            metapage;
        int                     i;
 
-       /* Backup blocks are not used in delete_listpage records */
-       Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
-
-       metabuffer = XLogReadBuffer(data->node, GIN_METAPAGE_BLKNO, false);
-       if (!BufferIsValid(metabuffer))
-               return;                                 /* assume index was deleted, nothing to do */
+       metabuffer = XLogInitBufferForRedo(record, 0);
+       Assert(BufferGetBlockNumber(metabuffer) == GIN_METAPAGE_BLKNO);
        metapage = BufferGetPage(metabuffer);
 
+       GinInitPage(metapage, GIN_META, BufferGetPageSize(metabuffer));
+
        memcpy(GinPageGetMeta(metapage), &data->metadata, sizeof(GinMetaPageData));
        PageSetLSN(metapage, lsn);
        MarkBufferDirty(metabuffer);
@@ -838,7 +679,7 @@ ginRedoDeleteListPages(XLogRecPtr lsn, XLogRecord *record)
                Buffer          buffer;
                Page            page;
 
-               buffer = XLogReadBuffer(data->node, data->toDelete[i], true);
+               buffer = XLogInitBufferForRedo(record, i + 1);
                page = BufferGetPage(buffer);
                GinInitBuffer(buffer, GIN_DELETED);
 
@@ -851,9 +692,9 @@ ginRedoDeleteListPages(XLogRecPtr lsn, XLogRecord *record)
 }
 
 void
-gin_redo(XLogRecPtr lsn, XLogRecord *record)
+gin_redo(XLogReaderState *record)
 {
-       uint8           info = record->xl_info & ~XLR_INFO_MASK;
+       uint8           info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
        MemoryContext oldCtx;
 
        /*
@@ -866,34 +707,34 @@ gin_redo(XLogRecPtr lsn, XLogRecord *record)
        switch (info)
        {
                case XLOG_GIN_CREATE_INDEX:
-                       ginRedoCreateIndex(lsn, record);
+                       ginRedoCreateIndex(record);
                        break;
                case XLOG_GIN_CREATE_PTREE:
-                       ginRedoCreatePTree(lsn, record);
+                       ginRedoCreatePTree(record);
                        break;
                case XLOG_GIN_INSERT:
-                       ginRedoInsert(lsn, record);
+                       ginRedoInsert(record);
                        break;
                case XLOG_GIN_SPLIT:
-                       ginRedoSplit(lsn, record);
+                       ginRedoSplit(record);
                        break;
                case XLOG_GIN_VACUUM_PAGE:
-                       ginRedoVacuumPage(lsn, record);
+                       ginRedoVacuumPage(record);
                        break;
                case XLOG_GIN_VACUUM_DATA_LEAF_PAGE:
-                       ginRedoVacuumDataLeafPage(lsn, record);
+                       ginRedoVacuumDataLeafPage(record);
                        break;
                case XLOG_GIN_DELETE_PAGE:
-                       ginRedoDeletePage(lsn, record);
+                       ginRedoDeletePage(record);
                        break;
                case XLOG_GIN_UPDATE_META_PAGE:
-                       ginRedoUpdateMetapage(lsn, record);
+                       ginRedoUpdateMetapage(record);
                        break;
                case XLOG_GIN_INSERT_LISTPAGE:
-                       ginRedoInsertListPage(lsn, record);
+                       ginRedoInsertListPage(record);
                        break;
                case XLOG_GIN_DELETE_LISTPAGE:
-                       ginRedoDeleteListPages(lsn, record);
+                       ginRedoDeleteListPages(record);
                        break;
                default:
                        elog(PANIC, "gin_redo: unknown op code %u", info);
index 644b882b7d4ba44abaeeafa5744b79fb4cf0ec5e..2141045f994be5ff8089071aad1c254fa21adfa6 100644 (file)
@@ -16,6 +16,7 @@
 
 #include "access/genam.h"
 #include "access/gist_private.h"
+#include "access/xloginsert.h"
 #include "catalog/index.h"
 #include "catalog/pg_collation.h"
 #include "miscadmin.h"
@@ -394,6 +395,14 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
                        GistPageSetNSN(ptr->page, oldnsn);
                }
 
+               /*
+                * gistXLogSplit() needs to WAL log a lot of pages, prepare WAL
+                * insertion for that. NB: The number of pages and data segments
+                * specified here must match the calculations in gistXLogSplit()!
+                */
+               if (RelationNeedsWAL(rel))
+                       XLogEnsureRecordSpace(npage, 1 + npage * 2);
+
                START_CRIT_SECTION();
 
                /*
index 2143096c66b50d16e604e7aff8c80dc20e06bbb2..5acc986585a8e2d6aeac800f867fdf8899f4c5a9 100644 (file)
@@ -183,14 +183,11 @@ gistbuild(PG_FUNCTION_ARGS)
        if (RelationNeedsWAL(index))
        {
                XLogRecPtr      recptr;
-               XLogRecData rdata;
 
-               rdata.data = (char *) &(index->rd_node);
-               rdata.len = sizeof(RelFileNode);
-               rdata.buffer = InvalidBuffer;
-               rdata.next = NULL;
+               XLogBeginInsert();
+               XLogRegisterBuffer(0, buffer, REGBUF_WILL_INIT);
 
-               recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_CREATE_INDEX, &rdata);
+               recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_CREATE_INDEX);
                PageSetLSN(page, recptr);
        }
        else
index 2999d211916861cbbaeff9f3928e1403ecd1ae1a..0a4f04810f281fbd47006d74d9ed5f77958456b1 100644 (file)
 #include "access/xlogutils.h"
 #include "utils/memutils.h"
 
-typedef struct
-{
-       gistxlogPage *header;
-       IndexTuple *itup;
-} NewPage;
-
-typedef struct
-{
-       gistxlogPageSplit *data;
-       NewPage    *page;
-} PageSplitRecord;
-
 static MemoryContext opCtx;            /* working memory for operations */
 
 /*
@@ -44,9 +32,9 @@ static MemoryContext opCtx;           /* working memory for operations */
  * action.)
  */
 static void
-gistRedoClearFollowRight(XLogRecPtr lsn, XLogRecord *record, int block_index,
-                                                RelFileNode node, BlockNumber childblkno)
+gistRedoClearFollowRight(XLogReaderState *record, uint8 block_id)
 {
+       XLogRecPtr      lsn = record->EndRecPtr;
        Buffer          buffer;
        Page            page;
        XLogRedoAction action;
@@ -55,8 +43,7 @@ gistRedoClearFollowRight(XLogRecPtr lsn, XLogRecord *record, int block_index,
         * Note that we still update the page even if it was restored from a full
         * page image, because the updated NSN is not included in the image.
         */
-       action = XLogReadBufferForRedo(lsn, record, block_index, node, childblkno,
-                                                                  &buffer);
+       action = XLogReadBufferForRedo(record, block_id, &buffer);
        if (action == BLK_NEEDS_REDO || action == BLK_RESTORED)
        {
                page = BufferGetPage(buffer);
@@ -75,20 +62,23 @@ gistRedoClearFollowRight(XLogRecPtr lsn, XLogRecord *record, int block_index,
  * redo any page update (except page split)
  */
 static void
-gistRedoPageUpdateRecord(XLogRecPtr lsn, XLogRecord *record)
+gistRedoPageUpdateRecord(XLogReaderState *record)
 {
-       char       *begin = XLogRecGetData(record);
-       gistxlogPageUpdate *xldata = (gistxlogPageUpdate *) begin;
+       XLogRecPtr      lsn = record->EndRecPtr;
+       gistxlogPageUpdate *xldata = (gistxlogPageUpdate *) XLogRecGetData(record);
        Buffer          buffer;
        Page            page;
-       char       *data;
 
-       if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blkno,
-                                                         &buffer) == BLK_NEEDS_REDO)
+       if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
        {
-               page = (Page) BufferGetPage(buffer);
+               char       *begin;
+               char       *data;
+               Size            datalen;
+               int                     ninserted = 0;
 
-               data = begin + sizeof(gistxlogPageUpdate);
+               data = begin = XLogRecGetBlockData(record, 0, &datalen);
+
+               page = (Page) BufferGetPage(buffer);
 
                /* Delete old tuples */
                if (xldata->ntodelete > 0)
@@ -105,12 +95,12 @@ gistRedoPageUpdateRecord(XLogRecPtr lsn, XLogRecord *record)
                }
 
                /* add tuples */
-               if (data - begin < record->xl_len)
+               if (data - begin < datalen)
                {
                        OffsetNumber off = (PageIsEmpty(page)) ? FirstOffsetNumber :
                        OffsetNumberNext(PageGetMaxOffsetNumber(page));
 
-                       while (data - begin < record->xl_len)
+                       while (data - begin < datalen)
                        {
                                IndexTuple      itup = (IndexTuple) data;
                                Size            sz = IndexTupleSize(itup);
@@ -123,9 +113,12 @@ gistRedoPageUpdateRecord(XLogRecPtr lsn, XLogRecord *record)
                                        elog(ERROR, "failed to add item to GiST index page, size %d bytes",
                                                 (int) sz);
                                off++;
+                               ninserted++;
                        }
                }
 
+               Assert(ninserted == xldata->ntoinsert);
+
                PageSetLSN(page, lsn);
                MarkBufferDirty(buffer);
        }
@@ -137,58 +130,51 @@ gistRedoPageUpdateRecord(XLogRecPtr lsn, XLogRecord *record)
         * that even if the target page no longer exists, we still attempt to
         * replay the change on the child page.
         */
-       if (BlockNumberIsValid(xldata->leftchild))
-               gistRedoClearFollowRight(lsn, record, 1,
-                                                                xldata->node, xldata->leftchild);
+       if (XLogRecHasBlockRef(record, 1))
+               gistRedoClearFollowRight(record, 1);
 
        if (BufferIsValid(buffer))
                UnlockReleaseBuffer(buffer);
 }
 
-static void
-decodePageSplitRecord(PageSplitRecord *decoded, XLogRecord *record)
+/*
+ * Returns an array of index pointers.
+ */
+static IndexTuple *
+decodePageSplitRecord(char *begin, int len, int *n)
 {
-       char       *begin = XLogRecGetData(record),
-                          *ptr;
-       int                     j,
-                               i = 0;
+       char       *ptr;
+       int                     i = 0;
+       IndexTuple *tuples;
+
+       /* extract the number of tuples */
+       memcpy(n, begin, sizeof(int));
+       ptr = begin + sizeof(int);
 
-       decoded->data = (gistxlogPageSplit *) begin;
-       decoded->page = (NewPage *) palloc(sizeof(NewPage) * decoded->data->npage);
+       tuples = palloc(*n * sizeof(IndexTuple));
 
-       ptr = begin + sizeof(gistxlogPageSplit);
-       for (i = 0; i < decoded->data->npage; i++)
+       for (i = 0; i < *n; i++)
        {
-               Assert(ptr - begin < record->xl_len);
-               decoded->page[i].header = (gistxlogPage *) ptr;
-               ptr += sizeof(gistxlogPage);
-
-               decoded->page[i].itup = (IndexTuple *)
-                       palloc(sizeof(IndexTuple) * decoded->page[i].header->num);
-               j = 0;
-               while (j < decoded->page[i].header->num)
-               {
-                       Assert(ptr - begin < record->xl_len);
-                       decoded->page[i].itup[j] = (IndexTuple) ptr;
-                       ptr += IndexTupleSize((IndexTuple) ptr);
-                       j++;
-               }
+               Assert(ptr - begin < len);
+               tuples[i] = (IndexTuple) ptr;
+               ptr += IndexTupleSize((IndexTuple) ptr);
        }
+       Assert(ptr - begin == len);
+
+       return tuples;
 }
 
 static void
-gistRedoPageSplitRecord(XLogRecPtr lsn, XLogRecord *record)
+gistRedoPageSplitRecord(XLogReaderState *record)
 {
+       XLogRecPtr      lsn = record->EndRecPtr;
        gistxlogPageSplit *xldata = (gistxlogPageSplit *) XLogRecGetData(record);
-       PageSplitRecord xlrec;
        Buffer          firstbuffer = InvalidBuffer;
        Buffer          buffer;
        Page            page;
        int                     i;
        bool            isrootsplit = false;
 
-       decodePageSplitRecord(&xlrec, record);
-
        /*
         * We must hold lock on the first-listed page throughout the action,
         * including while updating the left child page (if any).  We can unlock
@@ -198,32 +184,39 @@ gistRedoPageSplitRecord(XLogRecPtr lsn, XLogRecord *record)
         */
 
        /* loop around all pages */
-       for (i = 0; i < xlrec.data->npage; i++)
+       for (i = 0; i < xldata->npage; i++)
        {
-               NewPage    *newpage = xlrec.page + i;
                int                     flags;
-
-               if (newpage->header->blkno == GIST_ROOT_BLKNO)
+               char       *data;
+               Size            datalen;
+               int                     num;
+               BlockNumber blkno;
+               IndexTuple *tuples;
+
+               XLogRecGetBlockTag(record, i + 1, NULL, NULL, &blkno);
+               if (blkno == GIST_ROOT_BLKNO)
                {
                        Assert(i == 0);
                        isrootsplit = true;
                }
 
-               buffer = XLogReadBuffer(xlrec.data->node, newpage->header->blkno, true);
-               Assert(BufferIsValid(buffer));
+               buffer = XLogInitBufferForRedo(record, i + 1);
                page = (Page) BufferGetPage(buffer);
+               data = XLogRecGetBlockData(record, i + 1, &datalen);
+
+               tuples = decodePageSplitRecord(data, datalen, &num);
 
                /* ok, clear buffer */
-               if (xlrec.data->origleaf && newpage->header->blkno != GIST_ROOT_BLKNO)
+               if (xldata->origleaf && blkno != GIST_ROOT_BLKNO)
                        flags = F_LEAF;
                else
                        flags = 0;
                GISTInitBuffer(buffer, flags);
 
                /* and fill it */
-               gistfillbuffer(page, newpage->itup, newpage->header->num, FirstOffsetNumber);
+               gistfillbuffer(page, tuples, num, FirstOffsetNumber);
 
-               if (newpage->header->blkno == GIST_ROOT_BLKNO)
+               if (blkno == GIST_ROOT_BLKNO)
                {
                        GistPageGetOpaque(page)->rightlink = InvalidBlockNumber;
                        GistPageSetNSN(page, xldata->orignsn);
@@ -231,12 +224,17 @@ gistRedoPageSplitRecord(XLogRecPtr lsn, XLogRecord *record)
                }
                else
                {
-                       if (i < xlrec.data->npage - 1)
-                               GistPageGetOpaque(page)->rightlink = xlrec.page[i + 1].header->blkno;
+                       if (i < xldata->npage - 1)
+                       {
+                               BlockNumber nextblkno;
+
+                               XLogRecGetBlockTag(record, i + 2, NULL, NULL, &nextblkno);
+                               GistPageGetOpaque(page)->rightlink = nextblkno;
+                       }
                        else
                                GistPageGetOpaque(page)->rightlink = xldata->origrlink;
                        GistPageSetNSN(page, xldata->orignsn);
-                       if (i < xlrec.data->npage - 1 && !isrootsplit &&
+                       if (i < xldata->npage - 1 && !isrootsplit &&
                                xldata->markfollowright)
                                GistMarkFollowRight(page);
                        else
@@ -253,26 +251,22 @@ gistRedoPageSplitRecord(XLogRecPtr lsn, XLogRecord *record)
        }
 
        /* Fix follow-right data on left child page, if any */
-       if (BlockNumberIsValid(xldata->leftchild))
-               gistRedoClearFollowRight(lsn, record, 0,
-                                                                xldata->node, xldata->leftchild);
+       if (XLogRecHasBlockRef(record, 0))
+               gistRedoClearFollowRight(record, 0);
 
        /* Finally, release lock on the first page */
        UnlockReleaseBuffer(firstbuffer);
 }
 
 static void
-gistRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record)
+gistRedoCreateIndex(XLogReaderState *record)
 {
-       RelFileNode *node = (RelFileNode *) XLogRecGetData(record);
+       XLogRecPtr      lsn = record->EndRecPtr;
        Buffer          buffer;
        Page            page;
 
-       /* Backup blocks are not used in create_index records */
-       Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
-
-       buffer = XLogReadBuffer(*node, GIST_ROOT_BLKNO, true);
-       Assert(BufferIsValid(buffer));
+       buffer = XLogInitBufferForRedo(record, 0);
+       Assert(BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO);
        page = (Page) BufferGetPage(buffer);
 
        GISTInitBuffer(buffer, F_LEAF);
@@ -284,9 +278,9 @@ gistRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record)
 }
 
 void
-gist_redo(XLogRecPtr lsn, XLogRecord *record)
+gist_redo(XLogReaderState *record)
 {
-       uint8           info = record->xl_info & ~XLR_INFO_MASK;
+       uint8           info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
        MemoryContext oldCxt;
 
        /*
@@ -299,13 +293,13 @@ gist_redo(XLogRecPtr lsn, XLogRecord *record)
        switch (info)
        {
                case XLOG_GIST_PAGE_UPDATE:
-                       gistRedoPageUpdateRecord(lsn, record);
+                       gistRedoPageUpdateRecord(record);
                        break;
                case XLOG_GIST_PAGE_SPLIT:
-                       gistRedoPageSplitRecord(lsn, record);
+                       gistRedoPageSplitRecord(record);
                        break;
                case XLOG_GIST_CREATE_INDEX:
-                       gistRedoCreateIndex(lsn, record);
+                       gistRedoCreateIndex(record);
                        break;
                default:
                        elog(PANIC, "gist_redo: unknown op code %u", info);
@@ -336,70 +330,49 @@ gistXLogSplit(RelFileNode node, BlockNumber blkno, bool page_is_leaf,
                          BlockNumber origrlink, GistNSN orignsn,
                          Buffer leftchildbuf, bool markfollowright)
 {
-       XLogRecData rdata[GIST_MAX_SPLIT_PAGES * 2 + 2];
        gistxlogPageSplit xlrec;
        SplitedPageLayout *ptr;
-       int                     npage = 0,
-                               cur;
+       int                     npage = 0;
        XLogRecPtr      recptr;
+       int                     i;
 
        for (ptr = dist; ptr; ptr = ptr->next)
                npage++;
 
-       /*
-        * the caller should've checked this already, but doesn't hurt to check
-        * again.
-        */
-       if (npage > GIST_MAX_SPLIT_PAGES)
-               elog(ERROR, "GiST page split into too many halves");
-
-       xlrec.node = node;
-       xlrec.origblkno = blkno;
        xlrec.origrlink = origrlink;
        xlrec.orignsn = orignsn;
        xlrec.origleaf = page_is_leaf;
        xlrec.npage = (uint16) npage;
-       xlrec.leftchild =
-               BufferIsValid(leftchildbuf) ? BufferGetBlockNumber(leftchildbuf) : InvalidBlockNumber;
        xlrec.markfollowright = markfollowright;
 
-       rdata[0].data = (char *) &xlrec;
-       rdata[0].len = sizeof(gistxlogPageSplit);
-       rdata[0].buffer = InvalidBuffer;
-
-       cur = 1;
+       XLogBeginInsert();
 
        /*
         * Include a full page image of the child buf. (only necessary if a
         * checkpoint happened since the child page was split)
         */
        if (BufferIsValid(leftchildbuf))
-       {
-               rdata[cur - 1].next = &(rdata[cur]);
-               rdata[cur].data = NULL;
-               rdata[cur].len = 0;
-               rdata[cur].buffer = leftchildbuf;
-               rdata[cur].buffer_std = true;
-               cur++;
-       }
+               XLogRegisterBuffer(0, leftchildbuf, REGBUF_STANDARD);
 
+       /*
+        * NOTE: We register a lot of data. The caller must've called
+        * XLogEnsureRecordSpace() to prepare for that. We cannot do it here,
+        * because we're already in a critical section. If you change the number
+        * of buffer or data registrations here, make sure you modify the
+        * XLogEnsureRecordSpace() calls accordingly!
+        */
+       XLogRegisterData((char *) &xlrec, sizeof(gistxlogPageSplit));
+
+       i = 1;
        for (ptr = dist; ptr; ptr = ptr->next)
        {
-               rdata[cur - 1].next = &(rdata[cur]);
-               rdata[cur].buffer = InvalidBuffer;
-               rdata[cur].data = (char *) &(ptr->block);
-               rdata[cur].len = sizeof(gistxlogPage);
-               cur++;
-
-               rdata[cur - 1].next = &(rdata[cur]);
-               rdata[cur].buffer = InvalidBuffer;
-               rdata[cur].data = (char *) (ptr->list);
-               rdata[cur].len = ptr->lenlist;
-               cur++;
+               XLogRegisterBuffer(i, ptr->buffer, REGBUF_WILL_INIT);
+               XLogRegisterBufData(i, (char *) &(ptr->block.num), sizeof(int));
+               XLogRegisterBufData(i, (char *) ptr->list, ptr->lenlist);
+               i++;
        }
-       rdata[cur - 1].next = NULL;
 
-       recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_SPLIT, rdata);
+       recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_SPLIT);
 
        return recptr;
 }
@@ -413,9 +386,7 @@ gistXLogSplit(RelFileNode node, BlockNumber blkno, bool page_is_leaf,
  *
  * Note that both the todelete array and the tuples are marked as belonging
  * to the target buffer; they need not be stored in XLOG if XLogInsert decides
- * to log the whole buffer contents instead.  Also, we take care that there's
- * at least one rdata item referencing the buffer, even when ntodelete and
- * ituplen are both zero; this ensures that XLogInsert knows about the buffer.
+ * to log the whole buffer contents instead.
  */
 XLogRecPtr
 gistXLogUpdate(RelFileNode node, Buffer buffer,
@@ -423,57 +394,31 @@ gistXLogUpdate(RelFileNode node, Buffer buffer,
                           IndexTuple *itup, int ituplen,
                           Buffer leftchildbuf)
 {
-       XLogRecData rdata[MaxIndexTuplesPerPage + 3];
        gistxlogPageUpdate xlrec;
-       int                     cur,
-                               i;
+       int                     i;
        XLogRecPtr      recptr;
 
-       xlrec.node = node;
-       xlrec.blkno = BufferGetBlockNumber(buffer);
        xlrec.ntodelete = ntodelete;
-       xlrec.leftchild =
-               BufferIsValid(leftchildbuf) ? BufferGetBlockNumber(leftchildbuf) : InvalidBlockNumber;
-
-       rdata[0].data = (char *) &xlrec;
-       rdata[0].len = sizeof(gistxlogPageUpdate);
-       rdata[0].buffer = InvalidBuffer;
-       rdata[0].next = &(rdata[1]);
+       xlrec.ntoinsert = ituplen;
 
-       rdata[1].data = (char *) todelete;
-       rdata[1].len = sizeof(OffsetNumber) * ntodelete;
-       rdata[1].buffer = buffer;
-       rdata[1].buffer_std = true;
+       XLogBeginInsert();
+       XLogRegisterData((char *) &xlrec, sizeof(gistxlogPageUpdate));
 
-       cur = 2;
+       XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
+       XLogRegisterBufData(0, (char *) todelete, sizeof(OffsetNumber) * ntodelete);
 
        /* new tuples */
        for (i = 0; i < ituplen; i++)
-       {
-               rdata[cur - 1].next = &(rdata[cur]);
-               rdata[cur].data = (char *) (itup[i]);
-               rdata[cur].len = IndexTupleSize(itup[i]);
-               rdata[cur].buffer = buffer;
-               rdata[cur].buffer_std = true;
-               cur++;
-       }
+               XLogRegisterBufData(0, (char *) (itup[i]), IndexTupleSize(itup[i]));
 
        /*
         * Include a full page image of the child buf. (only necessary if a
         * checkpoint happened since the child page was split)
         */
        if (BufferIsValid(leftchildbuf))
-       {
-               rdata[cur - 1].next = &(rdata[cur]);
-               rdata[cur].data = NULL;
-               rdata[cur].len = 0;
-               rdata[cur].buffer = leftchildbuf;
-               rdata[cur].buffer_std = true;
-               cur++;
-       }
-       rdata[cur - 1].next = NULL;
+               XLogRegisterBuffer(1, leftchildbuf, REGBUF_STANDARD);
 
-       recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_UPDATE, rdata);
+       recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_UPDATE);
 
        return recptr;
 }
index 925a58f4f64544895eb7882ffd0e44b861f8bbde..673459fd6c14441360f49f6176bfdb565017e47d 100644 (file)
@@ -700,7 +700,7 @@ hashvacuumcleanup(PG_FUNCTION_ARGS)
 
 
 void
-hash_redo(XLogRecPtr lsn, XLogRecord *record)
+hash_redo(XLogReaderState *record)
 {
        elog(PANIC, "hash_redo: unimplemented");
 }
index 1763b70631d4d15fd43d1a5f66faa8d38457a4f3..c6e1eb79b2c912c66842d532f16136f9b30c8fda 100644 (file)
@@ -2132,84 +2132,64 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
                xl_heap_insert xlrec;
                xl_heap_header xlhdr;
                XLogRecPtr      recptr;
-               XLogRecData rdata[4];
                Page            page = BufferGetPage(buffer);
                uint8           info = XLOG_HEAP_INSERT;
-               bool            need_tuple_data;
+               int                     bufflags = 0;
 
                /*
-                * For logical decoding, we need the tuple even if we're doing a full
-                * page write, so make sure to log it separately. (XXX We could
-                * alternatively store a pointer into the FPW).
-                *
-                * Also, if this is a catalog, we need to transmit combocids to
-                * properly decode, so log that as well.
+                * If this is a catalog, we need to transmit combocids to properly
+                * decode, so log that as well.
                 */
-               need_tuple_data = RelationIsLogicallyLogged(relation);
                if (RelationIsAccessibleInLogicalDecoding(relation))
                        log_heap_new_cid(relation, heaptup);
 
-               xlrec.flags = all_visible_cleared ? XLOG_HEAP_ALL_VISIBLE_CLEARED : 0;
-               xlrec.target.node = relation->rd_node;
-               xlrec.target.tid = heaptup->t_self;
-               rdata[0].data = (char *) &xlrec;
-               rdata[0].len = SizeOfHeapInsert;
-               rdata[0].buffer = InvalidBuffer;
-               rdata[0].next = &(rdata[1]);
-
-               xlhdr.t_infomask2 = heaptup->t_data->t_infomask2;
-               xlhdr.t_infomask = heaptup->t_data->t_infomask;
-               xlhdr.t_hoff = heaptup->t_data->t_hoff;
-
                /*
-                * note we mark rdata[1] as belonging to buffer; if XLogInsert decides
-                * to write the whole page to the xlog, we don't need to store
-                * xl_heap_header in the xlog.
+                * If this is the single and first tuple on page, we can reinit the
+                * page instead of restoring the whole thing.  Set flag, and hide
+                * buffer references from XLogInsert.
                 */
-               rdata[1].data = (char *) &xlhdr;
-               rdata[1].len = SizeOfHeapHeader;
-               rdata[1].buffer = need_tuple_data ? InvalidBuffer : buffer;
-               rdata[1].buffer_std = true;
-               rdata[1].next = &(rdata[2]);
+               if (ItemPointerGetOffsetNumber(&(heaptup->t_self)) == FirstOffsetNumber &&
+                       PageGetMaxOffsetNumber(page) == FirstOffsetNumber)
+               {
+                       info |= XLOG_HEAP_INIT_PAGE;
+                       bufflags |= REGBUF_WILL_INIT;
+               }
 
-               /* PG73FORMAT: write bitmap [+ padding] [+ oid] + data */
-               rdata[2].data = (char *) heaptup->t_data + offsetof(HeapTupleHeaderData, t_bits);
-               rdata[2].len = heaptup->t_len - offsetof(HeapTupleHeaderData, t_bits);
-               rdata[2].buffer = need_tuple_data ? InvalidBuffer : buffer;
-               rdata[2].buffer_std = true;
-               rdata[2].next = NULL;
+               xlrec.offnum = ItemPointerGetOffsetNumber(&heaptup->t_self);
+               xlrec.flags = all_visible_cleared ? XLOG_HEAP_ALL_VISIBLE_CLEARED : 0;
+               Assert(ItemPointerGetBlockNumber(&heaptup->t_self) == BufferGetBlockNumber(buffer));
 
                /*
-                * Make a separate rdata entry for the tuple's buffer if we're doing
-                * logical decoding, so that an eventual FPW doesn't remove the
-                * tuple's data.
+                * For logical decoding, we need the tuple even if we're doing a full
+                * page write, so make sure it's included even if we take a full-page
+                * image. (XXX We could alternatively store a pointer into the FPW).
                 */
-               if (need_tuple_data)
+               if (RelationIsLogicallyLogged(relation))
                {
-                       rdata[2].next = &(rdata[3]);
-
-                       rdata[3].data = NULL;
-                       rdata[3].len = 0;
-                       rdata[3].buffer = buffer;
-                       rdata[3].buffer_std = true;
-                       rdata[3].next = NULL;
-
                        xlrec.flags |= XLOG_HEAP_CONTAINS_NEW_TUPLE;
+                       bufflags |= REGBUF_KEEP_DATA;
                }
 
+               XLogBeginInsert();
+               XLogRegisterData((char *) &xlrec, SizeOfHeapInsert);
+
+               xlhdr.t_infomask2 = heaptup->t_data->t_infomask2;
+               xlhdr.t_infomask = heaptup->t_data->t_infomask;
+               xlhdr.t_hoff = heaptup->t_data->t_hoff;
+
                /*
-                * If this is the single and first tuple on page, we can reinit the
-                * page instead of restoring the whole thing.  Set flag, and hide
-                * buffer references from XLogInsert.
+                * note we mark xlhdr as belonging to buffer; if XLogInsert decides to
+                * write the whole page to the xlog, we don't need to store
+                * xl_heap_header in the xlog.
                 */
-               if (ItemPointerGetOffsetNumber(&(heaptup->t_self)) == FirstOffsetNumber &&
-                       PageGetMaxOffsetNumber(page) == FirstOffsetNumber)
-               {
-                       info |= XLOG_HEAP_INIT_PAGE;
-                       rdata[1].buffer = rdata[2].buffer = rdata[3].buffer = InvalidBuffer;
-               }
+               XLogRegisterBuffer(0, buffer, REGBUF_STANDARD | bufflags);
+               XLogRegisterBufData(0, (char *) &xlhdr, SizeOfHeapHeader);
+               /* PG73FORMAT: write bitmap [+ padding] [+ oid] + data */
+               XLogRegisterBufData(0,
+                       (char *) heaptup->t_data + offsetof(HeapTupleHeaderData, t_bits),
+                                        heaptup->t_len - offsetof(HeapTupleHeaderData, t_bits));
 
-               recptr = XLogInsert(RM_HEAP_ID, info, rdata);
+               recptr = XLogInsert(RM_HEAP_ID, info);
 
                PageSetLSN(page, recptr);
        }
@@ -2397,6 +2377,13 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
                                break;
 
                        RelationPutHeapTuple(relation, buffer, heaptup);
+
+                       /*
+                        * We don't use heap_multi_insert for catalog tuples yet, but
+                        * better be prepared...
+                        */
+                       if (needwal && need_cids)
+                               log_heap_new_cid(relation, heaptup);
                }
 
                if (PageIsAllVisible(page))
@@ -2419,12 +2406,12 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
                {
                        XLogRecPtr      recptr;
                        xl_heap_multi_insert *xlrec;
-                       XLogRecData rdata[3];
                        uint8           info = XLOG_HEAP2_MULTI_INSERT;
                        char       *tupledata;
                        int                     totaldatalen;
                        char       *scratchptr = scratch;
                        bool            init;
+                       int                     bufflags = 0;
 
                        /*
                         * If the page was previously empty, we can reinit the page
@@ -2450,8 +2437,6 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
                        tupledata = scratchptr;
 
                        xlrec->flags = all_visible_cleared ? XLOG_HEAP_ALL_VISIBLE_CLEARED : 0;
-                       xlrec->node = relation->rd_node;
-                       xlrec->blkno = BufferGetBlockNumber(buffer);
                        xlrec->ntuples = nthispage;
 
                        /*
@@ -2481,64 +2466,40 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
                                           datalen);
                                tuphdr->datalen = datalen;
                                scratchptr += datalen;
-
-                               /*
-                                * We don't use heap_multi_insert for catalog tuples yet, but
-                                * better be prepared...
-                                */
-                               if (need_cids)
-                                       log_heap_new_cid(relation, heaptup);
                        }
                        totaldatalen = scratchptr - tupledata;
                        Assert((scratchptr - scratch) < BLCKSZ);
 
-                       rdata[0].data = (char *) xlrec;
-                       rdata[0].len = tupledata - scratch;
-                       rdata[0].buffer = InvalidBuffer;
-                       rdata[0].next = &rdata[1];
-
-                       rdata[1].data = tupledata;
-                       rdata[1].len = totaldatalen;
-                       rdata[1].buffer = need_tuple_data ? InvalidBuffer : buffer;
-                       rdata[1].buffer_std = true;
-                       rdata[1].next = NULL;
-
-                       /*
-                        * Make a separate rdata entry for the tuple's buffer if we're
-                        * doing logical decoding, so that an eventual FPW doesn't remove
-                        * the tuple's data.
-                        */
                        if (need_tuple_data)
-                       {
-                               rdata[1].next = &(rdata[2]);
-
-                               rdata[2].data = NULL;
-                               rdata[2].len = 0;
-                               rdata[2].buffer = buffer;
-                               rdata[2].buffer_std = true;
-                               rdata[2].next = NULL;
                                xlrec->flags |= XLOG_HEAP_CONTAINS_NEW_TUPLE;
-                       }
 
                        /*
-                        * If we're going to reinitialize the whole page using the WAL
-                        * record, hide buffer reference from XLogInsert.
+                        * Signal that this is the last xl_heap_multi_insert record
+                        * emitted by this call to heap_multi_insert(). Needed for logical
+                        * decoding so it knows when to cleanup temporary data.
                         */
+                       if (ndone + nthispage == ntuples)
+                               xlrec->flags |= XLOG_HEAP_LAST_MULTI_INSERT;
+
                        if (init)
                        {
-                               rdata[1].buffer = rdata[2].buffer = InvalidBuffer;
                                info |= XLOG_HEAP_INIT_PAGE;
+                               bufflags |= REGBUF_WILL_INIT;
                        }
 
                        /*
-                        * Signal that this is the last xl_heap_multi_insert record
-                        * emitted by this call to heap_multi_insert(). Needed for logical
-                        * decoding so it knows when to cleanup temporary data.
+                        * If we're doing logical decoding, include the new tuple data
+                        * even if we take a full-page image of the page.
                         */
-                       if (ndone + nthispage == ntuples)
-                               xlrec->flags |= XLOG_HEAP_LAST_MULTI_INSERT;
+                       if (need_tuple_data)
+                               bufflags |= REGBUF_KEEP_DATA;
+
+                       XLogBeginInsert();
+                       XLogRegisterData((char *) xlrec, tupledata - scratch);
+                       XLogRegisterBuffer(0, buffer, REGBUF_STANDARD | bufflags);
 
-                       recptr = XLogInsert(RM_HEAP2_ID, info, rdata);
+                       XLogRegisterBufData(0, tupledata, totaldatalen);
+                       recptr = XLogInsert(RM_HEAP2_ID, info);
 
                        PageSetLSN(page, recptr);
                }
@@ -2909,7 +2870,6 @@ l1:
        {
                xl_heap_delete xlrec;
                XLogRecPtr      recptr;
-               XLogRecData rdata[4];
 
                /* For logical decode we need combocids to properly decode the catalog */
                if (RelationIsAccessibleInLogicalDecoding(relation))
@@ -2918,19 +2878,21 @@ l1:
                xlrec.flags = all_visible_cleared ? XLOG_HEAP_ALL_VISIBLE_CLEARED : 0;
                xlrec.infobits_set = compute_infobits(tp.t_data->t_infomask,
                                                                                          tp.t_data->t_infomask2);
-               xlrec.target.node = relation->rd_node;
-               xlrec.target.tid = tp.t_self;
+               xlrec.offnum = ItemPointerGetOffsetNumber(&tp.t_self);
                xlrec.xmax = new_xmax;
-               rdata[0].data = (char *) &xlrec;
-               rdata[0].len = SizeOfHeapDelete;
-               rdata[0].buffer = InvalidBuffer;
-               rdata[0].next = &(rdata[1]);
 
-               rdata[1].data = NULL;
-               rdata[1].len = 0;
-               rdata[1].buffer = buffer;
-               rdata[1].buffer_std = true;
-               rdata[1].next = NULL;
+               if (old_key_tuple != NULL)
+               {
+                       if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
+                               xlrec.flags |= XLOG_HEAP_CONTAINS_OLD_TUPLE;
+                       else
+                               xlrec.flags |= XLOG_HEAP_CONTAINS_OLD_KEY;
+               }
+
+               XLogBeginInsert();
+               XLogRegisterData((char *) &xlrec, SizeOfHeapDelete);
+
+               XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
 
                /*
                 * Log replica identity of the deleted tuple if there is one
@@ -2943,27 +2905,14 @@ l1:
                        xlhdr.t_infomask = old_key_tuple->t_data->t_infomask;
                        xlhdr.t_hoff = old_key_tuple->t_data->t_hoff;
 
-                       rdata[1].next = &(rdata[2]);
-                       rdata[2].data = (char *) &xlhdr;
-                       rdata[2].len = SizeOfHeapHeader;
-                       rdata[2].buffer = InvalidBuffer;
-                       rdata[2].next = NULL;
-
-                       rdata[2].next = &(rdata[3]);
-                       rdata[3].data = (char *) old_key_tuple->t_data
-                               + offsetof(HeapTupleHeaderData, t_bits);
-                       rdata[3].len = old_key_tuple->t_len
-                               - offsetof(HeapTupleHeaderData, t_bits);
-                       rdata[3].buffer = InvalidBuffer;
-                       rdata[3].next = NULL;
-
-                       if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
-                               xlrec.flags |= XLOG_HEAP_CONTAINS_OLD_TUPLE;
-                       else
-                               xlrec.flags |= XLOG_HEAP_CONTAINS_OLD_KEY;
+                       XLogRegisterData((char *) &xlhdr, SizeOfHeapHeader);
+                       XLogRegisterData((char *) old_key_tuple->t_data
+                                                        + offsetof(HeapTupleHeaderData, t_bits),
+                                                        old_key_tuple->t_len
+                                                        - offsetof(HeapTupleHeaderData, t_bits));
                }
 
-               recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE, rdata);
+               recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE);
 
                PageSetLSN(page, recptr);
        }
@@ -4735,25 +4684,17 @@ failed:
        {
                xl_heap_lock xlrec;
                XLogRecPtr      recptr;
-               XLogRecData rdata[2];
 
-               xlrec.target.node = relation->rd_node;
-               xlrec.target.tid = tuple->t_self;
+               XLogBeginInsert();
+               XLogRegisterBuffer(0, *buffer, REGBUF_STANDARD);
+
+               xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
                xlrec.locking_xid = xid;
                xlrec.infobits_set = compute_infobits(new_infomask,
                                                                                          tuple->t_data->t_infomask2);
-               rdata[0].data = (char *) &xlrec;
-               rdata[0].len = SizeOfHeapLock;
-               rdata[0].buffer = InvalidBuffer;
-               rdata[0].next = &(rdata[1]);
-
-               rdata[1].data = NULL;
-               rdata[1].len = 0;
-               rdata[1].buffer = *buffer;
-               rdata[1].buffer_std = true;
-               rdata[1].next = NULL;
+               XLogRegisterData((char *) &xlrec, SizeOfHeapLock);
 
-               recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_LOCK, rdata);
+               recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_LOCK);
 
                PageSetLSN(page, recptr);
        }
@@ -5342,26 +5283,18 @@ l4:
                {
                        xl_heap_lock_updated xlrec;
                        XLogRecPtr      recptr;
-                       XLogRecData rdata[2];
                        Page            page = BufferGetPage(buf);
 
-                       xlrec.target.node = rel->rd_node;
-                       xlrec.target.tid = mytup.t_self;
+                       XLogBeginInsert();
+                       XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
+
+                       xlrec.offnum = ItemPointerGetOffsetNumber(&mytup.t_self);
                        xlrec.xmax = new_xmax;
                        xlrec.infobits_set = compute_infobits(new_infomask, new_infomask2);
 
-                       rdata[0].data = (char *) &xlrec;
-                       rdata[0].len = SizeOfHeapLockUpdated;
-                       rdata[0].buffer = InvalidBuffer;
-                       rdata[0].next = &(rdata[1]);
+                       XLogRegisterData((char *) &xlrec, SizeOfHeapLockUpdated);
 
-                       rdata[1].data = NULL;
-                       rdata[1].len = 0;
-                       rdata[1].buffer = buf;
-                       rdata[1].buffer_std = true;
-                       rdata[1].next = NULL;
-
-                       recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_LOCK_UPDATED, rdata);
+                       recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_LOCK_UPDATED);
 
                        PageSetLSN(page, recptr);
                }
@@ -5489,23 +5422,16 @@ heap_inplace_update(Relation relation, HeapTuple tuple)
        {
                xl_heap_inplace xlrec;
                XLogRecPtr      recptr;
-               XLogRecData rdata[2];
 
-               xlrec.target.node = relation->rd_node;
-               xlrec.target.tid = tuple->t_self;
+               xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
 
-               rdata[0].data = (char *) &xlrec;
-               rdata[0].len = SizeOfHeapInplace;
-               rdata[0].buffer = InvalidBuffer;
-               rdata[0].next = &(rdata[1]);
+               XLogBeginInsert();
+               XLogRegisterData((char *) &xlrec, SizeOfHeapInplace);
 
-               rdata[1].data = (char *) htup + htup->t_hoff;
-               rdata[1].len = newlen;
-               rdata[1].buffer = buffer;
-               rdata[1].buffer_std = true;
-               rdata[1].next = NULL;
+               XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
+               XLogRegisterBufData(0, (char *) htup + htup->t_hoff, newlen);
 
-               recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_INPLACE, rdata);
+               recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_INPLACE);
 
                PageSetLSN(page, recptr);
        }
@@ -6507,17 +6433,14 @@ log_heap_cleanup_info(RelFileNode rnode, TransactionId latestRemovedXid)
 {
        xl_heap_cleanup_info xlrec;
        XLogRecPtr      recptr;
-       XLogRecData rdata;
 
        xlrec.node = rnode;
        xlrec.latestRemovedXid = latestRemovedXid;
 
-       rdata.data = (char *) &xlrec;
-       rdata.len = SizeOfHeapCleanupInfo;
-       rdata.buffer = InvalidBuffer;
-       rdata.next = NULL;
+       XLogBeginInsert();
+       XLogRegisterData((char *) &xlrec, SizeOfHeapCleanupInfo);
 
-       recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_CLEANUP_INFO, &rdata);
+       recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_CLEANUP_INFO);
 
        return recptr;
 }
@@ -6542,23 +6465,19 @@ log_heap_clean(Relation reln, Buffer buffer,
                           TransactionId latestRemovedXid)
 {
        xl_heap_clean xlrec;
-       uint8           info;
        XLogRecPtr      recptr;
-       XLogRecData rdata[4];
 
        /* Caller should not call me on a non-WAL-logged relation */
        Assert(RelationNeedsWAL(reln));
 
-       xlrec.node = reln->rd_node;
-       xlrec.block = BufferGetBlockNumber(buffer);
        xlrec.latestRemovedXid = latestRemovedXid;
        xlrec.nredirected = nredirected;
        xlrec.ndead = ndead;
 
-       rdata[0].data = (char *) &xlrec;
-       rdata[0].len = SizeOfHeapClean;
-       rdata[0].buffer = InvalidBuffer;
-       rdata[0].next = &(rdata[1]);
+       XLogBeginInsert();
+       XLogRegisterData((char *) &xlrec, SizeOfHeapClean);
+
+       XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
 
        /*
         * The OffsetNumber arrays are not actually in the buffer, but we pretend
@@ -6569,49 +6488,18 @@ log_heap_clean(Relation reln, Buffer buffer,
         * even if no item pointers changed state.
         */
        if (nredirected > 0)
-       {
-               rdata[1].data = (char *) redirected;
-               rdata[1].len = nredirected * sizeof(OffsetNumber) * 2;
-       }
-       else
-       {
-               rdata[1].data = NULL;
-               rdata[1].len = 0;
-       }
-       rdata[1].buffer = buffer;
-       rdata[1].buffer_std = true;
-       rdata[1].next = &(rdata[2]);
+               XLogRegisterBufData(0, (char *) redirected,
+                                                       nredirected * sizeof(OffsetNumber) * 2);
 
        if (ndead > 0)
-       {
-               rdata[2].data = (char *) nowdead;
-               rdata[2].len = ndead * sizeof(OffsetNumber);
-       }
-       else
-       {
-               rdata[2].data = NULL;
-               rdata[2].len = 0;
-       }
-       rdata[2].buffer = buffer;
-       rdata[2].buffer_std = true;
-       rdata[2].next = &(rdata[3]);
+               XLogRegisterBufData(0, (char *) nowdead,
+                                                       ndead * sizeof(OffsetNumber));
 
        if (nunused > 0)
-       {
-               rdata[3].data = (char *) nowunused;
-               rdata[3].len = nunused * sizeof(OffsetNumber);
-       }
-       else
-       {
-               rdata[3].data = NULL;
-               rdata[3].len = 0;
-       }
-       rdata[3].buffer = buffer;
-       rdata[3].buffer_std = true;
-       rdata[3].next = NULL;
+               XLogRegisterBufData(0, (char *) nowunused,
+                                                       nunused * sizeof(OffsetNumber));
 
-       info = XLOG_HEAP2_CLEAN;
-       recptr = XLogInsert(RM_HEAP2_ID, info, rdata);
+       recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_CLEAN);
 
        return recptr;
 }
@@ -6626,35 +6514,28 @@ log_heap_freeze(Relation reln, Buffer buffer, TransactionId cutoff_xid,
 {
        xl_heap_freeze_page xlrec;
        XLogRecPtr      recptr;
-       XLogRecData rdata[2];
 
        /* Caller should not call me on a non-WAL-logged relation */
        Assert(RelationNeedsWAL(reln));
        /* nor when there are no tuples to freeze */
        Assert(ntuples > 0);
 
-       xlrec.node = reln->rd_node;
-       xlrec.block = BufferGetBlockNumber(buffer);
        xlrec.cutoff_xid = cutoff_xid;
        xlrec.ntuples = ntuples;
 
-       rdata[0].data = (char *) &xlrec;
-       rdata[0].len = SizeOfHeapFreezePage;
-       rdata[0].buffer = InvalidBuffer;
-       rdata[0].next = &(rdata[1]);
+       XLogBeginInsert();
+       XLogRegisterData((char *) &xlrec, SizeOfHeapFreezePage);
 
        /*
         * The freeze plan array is not actually in the buffer, but pretend that
         * it is.  When XLogInsert stores the whole buffer, the freeze plan need
         * not be stored too.
         */
-       rdata[1].data = (char *) tuples;
-       rdata[1].len = ntuples * sizeof(xl_heap_freeze_tuple);
-       rdata[1].buffer = buffer;
-       rdata[1].buffer_std = true;
-       rdata[1].next = NULL;
+       XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
+       XLogRegisterBufData(0, (char *) tuples,
+                                               ntuples * sizeof(xl_heap_freeze_tuple));
 
-       recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_FREEZE_PAGE, rdata);
+       recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_FREEZE_PAGE);
 
        return recptr;
 }
@@ -6665,8 +6546,8 @@ log_heap_freeze(Relation reln, Buffer buffer, TransactionId cutoff_xid,
  * corresponding visibility map block.  Both should have already been modified
  * and dirtied.
  *
- * If checksums are enabled, we also add the heap_buffer to the chain to
- * protect it from being torn.
+ * If checksums are enabled, we also generate a full-page image of
+ * heap_buffer, if necessary.
  */
 XLogRecPtr
 log_heap_visible(RelFileNode rnode, Buffer heap_buffer, Buffer vm_buffer,
@@ -6674,38 +6555,23 @@ log_heap_visible(RelFileNode rnode, Buffer heap_buffer, Buffer vm_buffer,
 {
        xl_heap_visible xlrec;
        XLogRecPtr      recptr;
-       XLogRecData rdata[3];
+       uint8           flags;
 
        Assert(BufferIsValid(heap_buffer));
        Assert(BufferIsValid(vm_buffer));
 
-       xlrec.node = rnode;
-       xlrec.block = BufferGetBlockNumber(heap_buffer);
        xlrec.cutoff_xid = cutoff_xid;
+       XLogBeginInsert();
+       XLogRegisterData((char *) &xlrec, SizeOfHeapVisible);
 
-       rdata[0].data = (char *) &xlrec;
-       rdata[0].len = SizeOfHeapVisible;
-       rdata[0].buffer = InvalidBuffer;
-       rdata[0].next = &(rdata[1]);
+       XLogRegisterBuffer(0, vm_buffer, 0);
 
-       rdata[1].data = NULL;
-       rdata[1].len = 0;
-       rdata[1].buffer = vm_buffer;
-       rdata[1].buffer_std = false;
-       rdata[1].next = NULL;
+       flags = REGBUF_STANDARD;
+       if (!XLogHintBitIsNeeded())
+               flags |= REGBUF_NO_IMAGE;
+       XLogRegisterBuffer(1, heap_buffer, flags);
 
-       if (XLogHintBitIsNeeded())
-       {
-               rdata[1].next = &(rdata[2]);
-
-               rdata[2].data = NULL;
-               rdata[2].len = 0;
-               rdata[2].buffer = heap_buffer;
-               rdata[2].buffer_std = true;
-               rdata[2].next = NULL;
-       }
-
-       recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_VISIBLE, rdata);
+       recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_VISIBLE);
 
        return recptr;
 }
@@ -6721,22 +6587,23 @@ log_heap_update(Relation reln, Buffer oldbuf,
                                bool all_visible_cleared, bool new_all_visible_cleared)
 {
        xl_heap_update xlrec;
-       xl_heap_header_len xlhdr;
-       xl_heap_header_len xlhdr_idx;
+       xl_heap_header xlhdr;
+       xl_heap_header xlhdr_idx;
        uint8           info;
        uint16          prefix_suffix[2];
        uint16          prefixlen = 0,
                                suffixlen = 0;
        XLogRecPtr      recptr;
-       XLogRecData rdata[9];
        Page            page = BufferGetPage(newbuf);
        bool            need_tuple_data = RelationIsLogicallyLogged(reln);
-       int                     nr;
-       Buffer          newbufref;
+       bool            init;
+       int                     bufflags;
 
        /* Caller should not call me on a non-WAL-logged relation */
        Assert(RelationNeedsWAL(reln));
 
+       XLogBeginInsert();
+
        if (HeapTupleIsHeapOnly(newtup))
                info = XLOG_HEAP_HOT_UPDATE;
        else
@@ -6794,103 +6661,97 @@ log_heap_update(Relation reln, Buffer oldbuf,
                        suffixlen = 0;
        }
 
-       xlrec.target.node = reln->rd_node;
-       xlrec.target.tid = oldtup->t_self;
-       xlrec.old_xmax = HeapTupleHeaderGetRawXmax(oldtup->t_data);
-       xlrec.old_infobits_set = compute_infobits(oldtup->t_data->t_infomask,
-                                                                                         oldtup->t_data->t_infomask2);
-       xlrec.new_xmax = HeapTupleHeaderGetRawXmax(newtup->t_data);
+       /* Prepare main WAL data chain */
        xlrec.flags = 0;
        if (all_visible_cleared)
                xlrec.flags |= XLOG_HEAP_ALL_VISIBLE_CLEARED;
-       xlrec.newtid = newtup->t_self;
        if (new_all_visible_cleared)
                xlrec.flags |= XLOG_HEAP_NEW_ALL_VISIBLE_CLEARED;
        if (prefixlen > 0)
                xlrec.flags |= XLOG_HEAP_PREFIX_FROM_OLD;
        if (suffixlen > 0)
                xlrec.flags |= XLOG_HEAP_SUFFIX_FROM_OLD;
+       if (need_tuple_data)
+       {
+               xlrec.flags |= XLOG_HEAP_CONTAINS_NEW_TUPLE;
+               if (old_key_tuple)
+               {
+                       if (reln->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
+                               xlrec.flags |= XLOG_HEAP_CONTAINS_OLD_TUPLE;
+                       else
+                               xlrec.flags |= XLOG_HEAP_CONTAINS_OLD_KEY;
+               }
+       }
 
        /* If new tuple is the single and first tuple on page... */
        if (ItemPointerGetOffsetNumber(&(newtup->t_self)) == FirstOffsetNumber &&
                PageGetMaxOffsetNumber(page) == FirstOffsetNumber)
        {
                info |= XLOG_HEAP_INIT_PAGE;
-               newbufref = InvalidBuffer;
+               init = true;
        }
        else
-               newbufref = newbuf;
+               init = false;
 
-       rdata[0].data = NULL;
-       rdata[0].len = 0;
-       rdata[0].buffer = oldbuf;
-       rdata[0].buffer_std = true;
-       rdata[0].next = &(rdata[1]);
+       /* Prepare WAL data for the old page */
+       xlrec.old_offnum = ItemPointerGetOffsetNumber(&oldtup->t_self);
+       xlrec.old_xmax = HeapTupleHeaderGetRawXmax(oldtup->t_data);
+       xlrec.old_infobits_set = compute_infobits(oldtup->t_data->t_infomask,
+                                                                                         oldtup->t_data->t_infomask2);
+
+       /* Prepare WAL data for the new page */
+       xlrec.new_offnum = ItemPointerGetOffsetNumber(&newtup->t_self);
+       xlrec.new_xmax = HeapTupleHeaderGetRawXmax(newtup->t_data);
+
+       bufflags = REGBUF_STANDARD;
+       if (init)
+               bufflags |= REGBUF_WILL_INIT;
+       if (need_tuple_data)
+               bufflags |= REGBUF_KEEP_DATA;
 
-       rdata[1].data = (char *) &xlrec;
-       rdata[1].len = SizeOfHeapUpdate;
-       rdata[1].buffer = InvalidBuffer;
-       rdata[1].next = &(rdata[2]);
+       XLogRegisterBuffer(0, newbuf, bufflags);
+       if (oldbuf != newbuf)
+               XLogRegisterBuffer(1, oldbuf, REGBUF_STANDARD);
 
-       /* prefix and/or suffix length fields */
+       XLogRegisterData((char *) &xlrec, SizeOfHeapUpdate);
+
+       /*
+        * Prepare WAL data for the new tuple.
+        */
        if (prefixlen > 0 || suffixlen > 0)
        {
                if (prefixlen > 0 && suffixlen > 0)
                {
                        prefix_suffix[0] = prefixlen;
                        prefix_suffix[1] = suffixlen;
-                       rdata[2].data = (char *) &prefix_suffix;
-                       rdata[2].len = 2 * sizeof(uint16);
+                       XLogRegisterBufData(0, (char *) &prefix_suffix, sizeof(uint16) * 2);
                }
                else if (prefixlen > 0)
                {
-                       rdata[2].data = (char *) &prefixlen;
-                       rdata[2].len = sizeof(uint16);
+                       XLogRegisterBufData(0, (char *) &prefixlen, sizeof(uint16));
                }
                else
                {
-                       rdata[2].data = (char *) &suffixlen;
-                       rdata[2].len = sizeof(uint16);
+                       XLogRegisterBufData(0, (char *) &suffixlen, sizeof(uint16));
                }
-               rdata[2].buffer = newbufref;
-               rdata[2].buffer_std = true;
-               rdata[2].next = &(rdata[3]);
-               nr = 3;
        }
-       else
-               nr = 2;
-
-       xlhdr.header.t_infomask2 = newtup->t_data->t_infomask2;
-       xlhdr.header.t_infomask = newtup->t_data->t_infomask;
-       xlhdr.header.t_hoff = newtup->t_data->t_hoff;
-       Assert(offsetof(HeapTupleHeaderData, t_bits) +prefixlen + suffixlen <= newtup->t_len);
-       xlhdr.t_len = newtup->t_len - offsetof(HeapTupleHeaderData, t_bits) -prefixlen - suffixlen;
 
-       /*
-        * As with insert records, we need not store this rdata segment if we
-        * decide to store the whole buffer instead, unless we're doing logical
-        * decoding.
-        */
-       rdata[nr].data = (char *) &xlhdr;
-       rdata[nr].len = SizeOfHeapHeaderLen;
-       rdata[nr].buffer = need_tuple_data ? InvalidBuffer : newbufref;
-       rdata[nr].buffer_std = true;
-       rdata[nr].next = &(rdata[nr + 1]);
-       nr++;
+       xlhdr.t_infomask2 = newtup->t_data->t_infomask2;
+       xlhdr.t_infomask = newtup->t_data->t_infomask;
+       xlhdr.t_hoff = newtup->t_data->t_hoff;
+       Assert(offsetof(HeapTupleHeaderData, t_bits) + prefixlen + suffixlen <= newtup->t_len);
 
        /*
         * PG73FORMAT: write bitmap [+ padding] [+ oid] + data
         *
         * The 'data' doesn't include the common prefix or suffix.
         */
+       XLogRegisterBufData(0, (char *) &xlhdr, SizeOfHeapHeader);
        if (prefixlen == 0)
        {
-               rdata[nr].data = ((char *) newtup->t_data) + offsetof(HeapTupleHeaderData, t_bits);
-               rdata[nr].len = newtup->t_len - offsetof(HeapTupleHeaderData, t_bits) -suffixlen;
-               rdata[nr].buffer = need_tuple_data ? InvalidBuffer : newbufref;
-               rdata[nr].buffer_std = true;
-               rdata[nr].next = NULL;
-               nr++;
+               XLogRegisterBufData(0,
+                  ((char *) newtup->t_data) + offsetof(HeapTupleHeaderData, t_bits),
+                  newtup->t_len - offsetof(HeapTupleHeaderData, t_bits) -suffixlen);
        }
        else
        {
@@ -6901,75 +6762,33 @@ log_heap_update(Relation reln, Buffer oldbuf,
                /* bitmap [+ padding] [+ oid] */
                if (newtup->t_data->t_hoff - offsetof(HeapTupleHeaderData, t_bits) >0)
                {
-                       rdata[nr - 1].next = &(rdata[nr]);
-                       rdata[nr].data = ((char *) newtup->t_data) + offsetof(HeapTupleHeaderData, t_bits);
-                       rdata[nr].len = newtup->t_data->t_hoff - offsetof(HeapTupleHeaderData, t_bits);
-                       rdata[nr].buffer = need_tuple_data ? InvalidBuffer : newbufref;
-                       rdata[nr].buffer_std = true;
-                       rdata[nr].next = NULL;
-                       nr++;
+                       XLogRegisterBufData(0,
+                       ((char *) newtup->t_data) + offsetof(HeapTupleHeaderData, t_bits),
+                        newtup->t_data->t_hoff - offsetof(HeapTupleHeaderData, t_bits));
                }
 
                /* data after common prefix */
-               rdata[nr - 1].next = &(rdata[nr]);
-               rdata[nr].data = ((char *) newtup->t_data) + newtup->t_data->t_hoff + prefixlen;
-               rdata[nr].len = newtup->t_len - newtup->t_data->t_hoff - prefixlen - suffixlen;
-               rdata[nr].buffer = need_tuple_data ? InvalidBuffer : newbufref;
-               rdata[nr].buffer_std = true;
-               rdata[nr].next = NULL;
-               nr++;
+               XLogRegisterBufData(0,
+                         ((char *) newtup->t_data) + newtup->t_data->t_hoff + prefixlen,
+                        newtup->t_len - newtup->t_data->t_hoff - prefixlen - suffixlen);
        }
 
-       /*
-        * Separate storage for the FPW buffer reference of the new page in the
-        * wal_level >= logical case.
-        */
-       if (need_tuple_data)
+       /* We need to log a tuple identity */
+       if (need_tuple_data && old_key_tuple)
        {
-               rdata[nr - 1].next = &(rdata[nr]);
-
-               rdata[nr].data = NULL,
-                       rdata[nr].len = 0;
-               rdata[nr].buffer = newbufref;
-               rdata[nr].buffer_std = true;
-               rdata[nr].next = NULL;
-               nr++;
-
-               xlrec.flags |= XLOG_HEAP_CONTAINS_NEW_TUPLE;
+               /* don't really need this, but its more comfy to decode */
+               xlhdr_idx.t_infomask2 = old_key_tuple->t_data->t_infomask2;
+               xlhdr_idx.t_infomask = old_key_tuple->t_data->t_infomask;
+               xlhdr_idx.t_hoff = old_key_tuple->t_data->t_hoff;
 
-               /* We need to log a tuple identity */
-               if (old_key_tuple)
-               {
-                       /* don't really need this, but its more comfy to decode */
-                       xlhdr_idx.header.t_infomask2 = old_key_tuple->t_data->t_infomask2;
-                       xlhdr_idx.header.t_infomask = old_key_tuple->t_data->t_infomask;
-                       xlhdr_idx.header.t_hoff = old_key_tuple->t_data->t_hoff;
-                       xlhdr_idx.t_len = old_key_tuple->t_len;
-
-                       rdata[nr - 1].next = &(rdata[nr]);
-                       rdata[nr].data = (char *) &xlhdr_idx;
-                       rdata[nr].len = SizeOfHeapHeaderLen;
-                       rdata[nr].buffer = InvalidBuffer;
-                       rdata[nr].next = &(rdata[nr + 1]);
-                       nr++;
-
-                       /* PG73FORMAT: write bitmap [+ padding] [+ oid] + data */
-                       rdata[nr].data = (char *) old_key_tuple->t_data
-                               + offsetof(HeapTupleHeaderData, t_bits);
-                       rdata[nr].len = old_key_tuple->t_len
-                               - offsetof(HeapTupleHeaderData, t_bits);
-                       rdata[nr].buffer = InvalidBuffer;
-                       rdata[nr].next = NULL;
-                       nr++;
+               XLogRegisterData((char *) &xlhdr_idx, SizeOfHeapHeader);
 
-                       if (reln->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
-                               xlrec.flags |= XLOG_HEAP_CONTAINS_OLD_TUPLE;
-                       else
-                               xlrec.flags |= XLOG_HEAP_CONTAINS_OLD_KEY;
-               }
+               /* PG73FORMAT: write bitmap [+ padding] [+ oid] + data */
+               XLogRegisterData((char *) old_key_tuple->t_data + offsetof(HeapTupleHeaderData, t_bits),
+                          old_key_tuple->t_len - offsetof(HeapTupleHeaderData, t_bits));
        }
 
-       recptr = XLogInsert(RM_HEAP_ID, info, rdata);
+       recptr = XLogInsert(RM_HEAP_ID, info);
 
        return recptr;
 }
@@ -6986,15 +6805,14 @@ log_heap_new_cid(Relation relation, HeapTuple tup)
        xl_heap_new_cid xlrec;
 
        XLogRecPtr      recptr;
-       XLogRecData rdata[1];
        HeapTupleHeader hdr = tup->t_data;
 
        Assert(ItemPointerIsValid(&tup->t_self));
        Assert(tup->t_tableOid != InvalidOid);
 
        xlrec.top_xid = GetTopTransactionId();
-       xlrec.target.node = relation->rd_node;
-       xlrec.target.tid = tup->t_self;
+       xlrec.target_node = relation->rd_node;
+       xlrec.target_tid = tup->t_self;
 
        /*
         * If the tuple got inserted & deleted in the same TX we definitely have a
@@ -7035,12 +6853,15 @@ log_heap_new_cid(Relation relation, HeapTuple tup)
                xlrec.combocid = InvalidCommandId;
        }
 
-       rdata[0].data = (char *) &xlrec;
-       rdata[0].len = SizeOfHeapNewCid;
-       rdata[0].buffer = InvalidBuffer;
-       rdata[0].next = NULL;
+       /*
+        * Note that we don't need to register the buffer here, because this
+        * operation does not modify the page. The insert/update/delete that
+        * called us certainly did, but that's WAL-logged separately.
+        */
+       XLogBeginInsert();
+       XLogRegisterData((char *) &xlrec, SizeOfHeapNewCid);
 
-       recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_NEW_CID, rdata);
+       recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_NEW_CID);
 
        return recptr;
 }
@@ -7165,7 +6986,7 @@ ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_changed, bool *
  * Handles CLEANUP_INFO
  */
 static void
-heap_xlog_cleanup_info(XLogRecPtr lsn, XLogRecord *record)
+heap_xlog_cleanup_info(XLogReaderState *record)
 {
        xl_heap_cleanup_info *xlrec = (xl_heap_cleanup_info *) XLogRecGetData(record);
 
@@ -7179,15 +7000,16 @@ heap_xlog_cleanup_info(XLogRecPtr lsn, XLogRecord *record)
         */
 
        /* Backup blocks are not used in cleanup_info records */
-       Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
+       Assert(!XLogRecHasAnyBlockRefs(record));
 }
 
 /*
  * Handles HEAP2_CLEAN record type
  */
 static void
-heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record)
+heap_xlog_clean(XLogReaderState *record)
 {
+       XLogRecPtr      lsn = record->EndRecPtr;
        xl_heap_clean *xlrec = (xl_heap_clean *) XLogRecGetData(record);
        Buffer          buffer;
        Size            freespace = 0;
@@ -7195,8 +7017,7 @@ heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record)
        BlockNumber blkno;
        XLogRedoAction action;
 
-       rnode = xlrec->node;
-       blkno = xlrec->block;
+       XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
 
        /*
         * We're about to remove tuples. In Hot Standby mode, ensure that there's
@@ -7213,9 +7034,8 @@ heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record)
         * If we have a full-page image, restore it (using a cleanup lock) and
         * we're done.
         */
-       action = XLogReadBufferForRedoExtended(lsn, record, 0,
-                                                                                  rnode, MAIN_FORKNUM, blkno,
-                                                                                  RBM_NORMAL, true, &buffer);
+       action = XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true,
+                                                                                  &buffer);
        if (action == BLK_NEEDS_REDO)
        {
                Page            page = (Page) BufferGetPage(buffer);
@@ -7226,11 +7046,13 @@ heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record)
                int                     nredirected;
                int                     ndead;
                int                     nunused;
+               Size            datalen;
+
+               redirected = (OffsetNumber *) XLogRecGetBlockData(record, 0, &datalen);
 
                nredirected = xlrec->nredirected;
                ndead = xlrec->ndead;
-               end = (OffsetNumber *) ((char *) xlrec + record->xl_len);
-               redirected = (OffsetNumber *) ((char *) xlrec + SizeOfHeapClean);
+               end = (OffsetNumber *) ((char *) redirected + datalen);
                nowdead = redirected + (nredirected * 2);
                nowunused = nowdead + ndead;
                nunused = (end - nowunused);
@@ -7263,7 +7085,7 @@ heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record)
         * totally accurate anyway.
         */
        if (action == BLK_NEEDS_REDO)
-               XLogRecordPageWithFreeSpace(xlrec->node, xlrec->block, freespace);
+               XLogRecordPageWithFreeSpace(rnode, blkno, freespace);
 }
 
 /*
@@ -7275,17 +7097,18 @@ heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record)
  * page modification would fail to clear the visibility map bit.
  */
 static void
-heap_xlog_visible(XLogRecPtr lsn, XLogRecord *record)
+heap_xlog_visible(XLogReaderState *record)
 {
+       XLogRecPtr      lsn = record->EndRecPtr;
        xl_heap_visible *xlrec = (xl_heap_visible *) XLogRecGetData(record);
+       Buffer          vmbuffer = InvalidBuffer;
        Buffer          buffer;
        Page            page;
        RelFileNode rnode;
        BlockNumber blkno;
        XLogRedoAction action;
 
-       rnode = xlrec->node;
-       blkno = xlrec->block;
+       XLogRecGetBlockTag(record, 1, &rnode, NULL, &blkno);
 
        /*
         * If there are any Hot Standby transactions running that have an xmin
@@ -7304,7 +7127,7 @@ heap_xlog_visible(XLogRecPtr lsn, XLogRecord *record)
         * truncated later in recovery, we don't need to update the page, but we'd
         * better still update the visibility map.
         */
-       action = XLogReadBufferForRedo(lsn, record, 1, rnode, blkno, &buffer);
+       action = XLogReadBufferForRedo(record, 1, &buffer);
        if (action == BLK_NEEDS_REDO)
        {
                /*
@@ -7341,12 +7164,21 @@ heap_xlog_visible(XLogRecPtr lsn, XLogRecord *record)
         * the visibility map bit does so before checking the page LSN, so any
         * bits that need to be cleared will still be cleared.
         */
-       if (record->xl_info & XLR_BKP_BLOCK(0))
-               (void) RestoreBackupBlock(lsn, record, 0, false, false);
-       else
+       if (XLogReadBufferForRedoExtended(record, 0, RBM_ZERO_ON_ERROR, false,
+                                                                         &vmbuffer) == BLK_NEEDS_REDO)
        {
+               Page            vmpage = BufferGetPage(vmbuffer);
                Relation        reln;
-               Buffer          vmbuffer = InvalidBuffer;
+
+               /* initialize the page if it was read as zeros */
+               if (PageIsNew(vmpage))
+                       PageInit(vmpage, BLCKSZ, 0);
+
+               /*
+                * XLogReplayBufferExtended locked the buffer. But visibilitymap_set
+                * will handle locking itself.
+                */
+               LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
 
                reln = CreateFakeRelcacheEntry(rnode);
                visibilitymap_pin(reln, blkno, &vmbuffer);
@@ -7362,25 +7194,27 @@ heap_xlog_visible(XLogRecPtr lsn, XLogRecord *record)
                 * we did for the heap page.  If this results in a dropped bit, no
                 * real harm is done; and the next VACUUM will fix it.
                 */
-               if (lsn > PageGetLSN(BufferGetPage(vmbuffer)))
+               if (lsn > PageGetLSN(vmpage))
                        visibilitymap_set(reln, blkno, InvalidBuffer, lsn, vmbuffer,
                                                          xlrec->cutoff_xid);
 
                ReleaseBuffer(vmbuffer);
                FreeFakeRelcacheEntry(reln);
        }
+       else if (BufferIsValid(vmbuffer))
+               UnlockReleaseBuffer(vmbuffer);
 }
 
 /*
  * Replay XLOG_HEAP2_FREEZE_PAGE records
  */
 static void
-heap_xlog_freeze_page(XLogRecPtr lsn, XLogRecord *record)
+heap_xlog_freeze_page(XLogReaderState *record)
 {
+       XLogRecPtr      lsn = record->EndRecPtr;
        xl_heap_freeze_page *xlrec = (xl_heap_freeze_page *) XLogRecGetData(record);
        TransactionId cutoff_xid = xlrec->cutoff_xid;
        Buffer          buffer;
-       Page            page;
        int                     ntup;
 
        /*
@@ -7388,12 +7222,19 @@ heap_xlog_freeze_page(XLogRecPtr lsn, XLogRecord *record)
         * consider the frozen xids as running.
         */
        if (InHotStandby)
-               ResolveRecoveryConflictWithSnapshot(cutoff_xid, xlrec->node);
+       {
+               RelFileNode rnode;
 
-       if (XLogReadBufferForRedo(lsn, record, 0, xlrec->node, xlrec->block,
-                                                         &buffer) == BLK_NEEDS_REDO)
+               XLogRecGetBlockTag(record, 0, &rnode, NULL, NULL);
+               ResolveRecoveryConflictWithSnapshot(cutoff_xid, rnode);
+       }
+
+       if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
        {
-               page = BufferGetPage(buffer);
+               Page            page = BufferGetPage(buffer);
+               xl_heap_freeze_tuple *tuples;
+
+               tuples = (xl_heap_freeze_tuple *) XLogRecGetBlockData(record, 0, NULL);
 
                /* now execute freeze plan for each frozen tuple */
                for (ntup = 0; ntup < xlrec->ntuples; ntup++)
@@ -7402,7 +7243,7 @@ heap_xlog_freeze_page(XLogRecPtr lsn, XLogRecord *record)
                        ItemId          lp;
                        HeapTupleHeader tuple;
 
-                       xlrec_tp = &xlrec->tuples[ntup];
+                       xlrec_tp = &tuples[ntup];
                        lp = PageGetItemId(page, xlrec_tp->offset); /* offsets are one-based */
                        tuple = (HeapTupleHeader) PageGetItem(page, lp);
 
@@ -7444,19 +7285,21 @@ fix_infomask_from_infobits(uint8 infobits, uint16 *infomask, uint16 *infomask2)
 }
 
 static void
-heap_xlog_delete(XLogRecPtr lsn, XLogRecord *record)
+heap_xlog_delete(XLogReaderState *record)
 {
+       XLogRecPtr      lsn = record->EndRecPtr;
        xl_heap_delete *xlrec = (xl_heap_delete *) XLogRecGetData(record);
        Buffer          buffer;
        Page            page;
-       OffsetNumber offnum;
        ItemId          lp = NULL;
        HeapTupleHeader htup;
        BlockNumber blkno;
        RelFileNode target_node;
+       ItemPointerData target_tid;
 
-       blkno = ItemPointerGetBlockNumber(&(xlrec->target.tid));
-       target_node = xlrec->target.node;
+       XLogRecGetBlockTag(record, 0, &target_node, NULL, &blkno);
+       ItemPointerSetBlockNumber(&target_tid, blkno);
+       ItemPointerSetOffsetNumber(&target_tid, xlrec->offnum);
 
        /*
         * The visibility map may need to be fixed even if the heap page is
@@ -7473,16 +7316,14 @@ heap_xlog_delete(XLogRecPtr lsn, XLogRecord *record)
                FreeFakeRelcacheEntry(reln);
        }
 
-       if (XLogReadBufferForRedo(lsn, record, 0, target_node, blkno, &buffer)
-               == BLK_NEEDS_REDO)
+       if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
        {
-               page = (Page) BufferGetPage(buffer);
+               page = BufferGetPage(buffer);
 
-               offnum = ItemPointerGetOffsetNumber(&(xlrec->target.tid));
-               if (PageGetMaxOffsetNumber(page) >= offnum)
-                       lp = PageGetItemId(page, offnum);
+               if (PageGetMaxOffsetNumber(page) >= xlrec->offnum)
+                       lp = PageGetItemId(page, xlrec->offnum);
 
-               if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
+               if (PageGetMaxOffsetNumber(page) < xlrec->offnum || !ItemIdIsNormal(lp))
                        elog(PANIC, "heap_delete_redo: invalid lp");
 
                htup = (HeapTupleHeader) PageGetItem(page, lp);
@@ -7496,13 +7337,13 @@ heap_xlog_delete(XLogRecPtr lsn, XLogRecord *record)
                HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
 
                /* Mark the page as a candidate for pruning */
-               PageSetPrunable(page, record->xl_xid);
+               PageSetPrunable(page, XLogRecGetXid(record));
 
                if (xlrec->flags & XLOG_HEAP_ALL_VISIBLE_CLEARED)
                        PageClearAllVisible(page);
 
                /* Make sure there is no forward chain link in t_ctid */
-               htup->t_ctid = xlrec->target.tid;
+               htup->t_ctid = target_tid;
                PageSetLSN(page, lsn);
                MarkBufferDirty(buffer);
        }
@@ -7511,12 +7352,12 @@ heap_xlog_delete(XLogRecPtr lsn, XLogRecord *record)
 }
 
 static void
-heap_xlog_insert(XLogRecPtr lsn, XLogRecord *record)
+heap_xlog_insert(XLogReaderState *record)
 {
+       XLogRecPtr      lsn = record->EndRecPtr;
        xl_heap_insert *xlrec = (xl_heap_insert *) XLogRecGetData(record);
        Buffer          buffer;
        Page            page;
-       OffsetNumber offnum;
        struct
        {
                HeapTupleHeaderData hdr;
@@ -7528,10 +7369,12 @@ heap_xlog_insert(XLogRecPtr lsn, XLogRecord *record)
        Size            freespace = 0;
        RelFileNode target_node;
        BlockNumber blkno;
+       ItemPointerData target_tid;
        XLogRedoAction action;
 
-       target_node = xlrec->target.node;
-       blkno = ItemPointerGetBlockNumber(&(xlrec->target.tid));
+       XLogRecGetBlockTag(record, 0, &target_node, NULL, &blkno);
+       ItemPointerSetBlockNumber(&target_tid, blkno);
+       ItemPointerSetOffsetNumber(&target_tid, xlrec->offnum);
 
        /*
         * The visibility map may need to be fixed even if the heap page is
@@ -7549,51 +7392,51 @@ heap_xlog_insert(XLogRecPtr lsn, XLogRecord *record)
        }
 
        /*
-        * If we inserted the first and only tuple on the page, re-initialize
-        * the page from scratch.
+        * If we inserted the first and only tuple on the page, re-initialize the
+        * page from scratch.
         */
-       if (record->xl_info & XLOG_HEAP_INIT_PAGE)
+       if (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE)
        {
-               XLogReadBufferForRedoExtended(lsn, record, 0,
-                                                                         target_node, MAIN_FORKNUM, blkno,
-                                                                         RBM_ZERO_AND_LOCK, false, &buffer);
+               buffer = XLogInitBufferForRedo(record, 0);
                page = BufferGetPage(buffer);
                PageInit(page, BufferGetPageSize(buffer), 0);
                action = BLK_NEEDS_REDO;
        }
        else
-               action = XLogReadBufferForRedo(lsn, record, 0, target_node, blkno,
-                                                                          &buffer);
-
+               action = XLogReadBufferForRedo(record, 0, &buffer);
        if (action == BLK_NEEDS_REDO)
        {
+               Size            datalen;
+               char       *data;
+
                page = BufferGetPage(buffer);
 
-               offnum = ItemPointerGetOffsetNumber(&(xlrec->target.tid));
-               if (PageGetMaxOffsetNumber(page) + 1 < offnum)
+               if (PageGetMaxOffsetNumber(page) + 1 < xlrec->offnum)
                        elog(PANIC, "heap_insert_redo: invalid max offset number");
 
-               newlen = record->xl_len - SizeOfHeapInsert - SizeOfHeapHeader;
-               Assert(newlen <= MaxHeapTupleSize);
-               memcpy((char *) &xlhdr,
-                          (char *) xlrec + SizeOfHeapInsert,
-                          SizeOfHeapHeader);
+               data = XLogRecGetBlockData(record, 0, &datalen);
+
+               newlen = datalen - SizeOfHeapHeader;
+               Assert(datalen > SizeOfHeapHeader && newlen <= MaxHeapTupleSize);
+               memcpy((char *) &xlhdr, data, SizeOfHeapHeader);
+               data += SizeOfHeapHeader;
+
                htup = &tbuf.hdr;
                MemSet((char *) htup, 0, sizeof(HeapTupleHeaderData));
                /* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */
                memcpy((char *) htup + offsetof(HeapTupleHeaderData, t_bits),
-                          (char *) xlrec + SizeOfHeapInsert + SizeOfHeapHeader,
+                          data,
                           newlen);
                newlen += offsetof(HeapTupleHeaderData, t_bits);
                htup->t_infomask2 = xlhdr.t_infomask2;
                htup->t_infomask = xlhdr.t_infomask;
                htup->t_hoff = xlhdr.t_hoff;
-               HeapTupleHeaderSetXmin(htup, record->xl_xid);
+               HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
                HeapTupleHeaderSetCmin(htup, FirstCommandId);
-               htup->t_ctid = xlrec->target.tid;
+               htup->t_ctid = target_tid;
 
-               offnum = PageAddItem(page, (Item) htup, newlen, offnum, true, true);
-               if (offnum == InvalidOffsetNumber)
+               if (PageAddItem(page, (Item) htup, newlen, xlrec->offnum,
+                                               true, true) == InvalidOffsetNumber)
                        elog(PANIC, "heap_insert_redo: failed to add tuple");
 
                freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
@@ -7618,16 +7461,16 @@ heap_xlog_insert(XLogRecPtr lsn, XLogRecord *record)
         * totally accurate anyway.
         */
        if (action == BLK_NEEDS_REDO && freespace < BLCKSZ / 5)
-               XLogRecordPageWithFreeSpace(xlrec->target.node, blkno, freespace);
+               XLogRecordPageWithFreeSpace(target_node, blkno, freespace);
 }
 
 /*
  * Handles MULTI_INSERT record type.
  */
 static void
-heap_xlog_multi_insert(XLogRecPtr lsn, XLogRecord *record)
+heap_xlog_multi_insert(XLogReaderState *record)
 {
-       char       *recdata = XLogRecGetData(record);
+       XLogRecPtr      lsn = record->EndRecPtr;
        xl_heap_multi_insert *xlrec;
        RelFileNode rnode;
        BlockNumber blkno;
@@ -7642,27 +7485,16 @@ heap_xlog_multi_insert(XLogRecPtr lsn, XLogRecord *record)
        uint32          newlen;
        Size            freespace = 0;
        int                     i;
-       bool            isinit = (record->xl_info & XLOG_HEAP_INIT_PAGE) != 0;
+       bool            isinit = (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) != 0;
        XLogRedoAction action;
 
        /*
         * Insertion doesn't overwrite MVCC data, so no conflict processing is
         * required.
         */
+       xlrec = (xl_heap_multi_insert *) XLogRecGetData(record);
 
-       xlrec = (xl_heap_multi_insert *) recdata;
-       recdata += SizeOfHeapMultiInsert;
-
-       rnode = xlrec->node;
-       blkno = xlrec->blkno;
-
-       /*
-        * If we're reinitializing the page, the tuples are stored in order from
-        * FirstOffsetNumber. Otherwise there's an array of offsets in the WAL
-        * record.
-        */
-       if (!isinit)
-               recdata += sizeof(OffsetNumber) * xlrec->ntuples;
+       XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
 
        /*
         * The visibility map may need to be fixed even if the heap page is
@@ -7681,24 +7513,35 @@ heap_xlog_multi_insert(XLogRecPtr lsn, XLogRecord *record)
 
        if (isinit)
        {
-               XLogReadBufferForRedoExtended(lsn, record, 0,
-                                                                         rnode, MAIN_FORKNUM, blkno,
-                                                                         RBM_ZERO_AND_LOCK, false, &buffer);
+               buffer = XLogInitBufferForRedo(record, 0);
                page = BufferGetPage(buffer);
                PageInit(page, BufferGetPageSize(buffer), 0);
                action = BLK_NEEDS_REDO;
        }
        else
-               action = XLogReadBufferForRedo(lsn, record, 0, rnode, blkno, &buffer);
-
+               action = XLogReadBufferForRedo(record, 0, &buffer);
        if (action == BLK_NEEDS_REDO)
        {
-               page = BufferGetPage(buffer);
+               char       *tupdata;
+               char       *endptr;
+               Size            len;
+
+               /* Tuples are stored as block data */
+               tupdata = XLogRecGetBlockData(record, 0, &len);
+               endptr = tupdata + len;
+
+               page = (Page) BufferGetPage(buffer);
+
                for (i = 0; i < xlrec->ntuples; i++)
                {
                        OffsetNumber offnum;
                        xl_multi_insert_tuple *xlhdr;
 
+                       /*
+                        * If we're reinitializing the page, the tuples are stored in
+                        * order from FirstOffsetNumber. Otherwise there's an array of
+                        * offsets in the WAL record, and the tuples come after that.
+                        */
                        if (isinit)
                                offnum = FirstOffsetNumber + i;
                        else
@@ -7706,8 +7549,8 @@ heap_xlog_multi_insert(XLogRecPtr lsn, XLogRecord *record)
                        if (PageGetMaxOffsetNumber(page) + 1 < offnum)
                                elog(PANIC, "heap_multi_insert_redo: invalid max offset number");
 
-                       xlhdr = (xl_multi_insert_tuple *) SHORTALIGN(recdata);
-                       recdata = ((char *) xlhdr) + SizeOfMultiInsertTuple;
+                       xlhdr = (xl_multi_insert_tuple *) SHORTALIGN(tupdata);
+                       tupdata = ((char *) xlhdr) + SizeOfMultiInsertTuple;
 
                        newlen = xlhdr->datalen;
                        Assert(newlen <= MaxHeapTupleSize);
@@ -7715,15 +7558,15 @@ heap_xlog_multi_insert(XLogRecPtr lsn, XLogRecord *record)
                        MemSet((char *) htup, 0, sizeof(HeapTupleHeaderData));
                        /* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */
                        memcpy((char *) htup + offsetof(HeapTupleHeaderData, t_bits),
-                                  (char *) recdata,
+                                  (char *) tupdata,
                                   newlen);
-                       recdata += newlen;
+                       tupdata += newlen;
 
                        newlen += offsetof(HeapTupleHeaderData, t_bits);
                        htup->t_infomask2 = xlhdr->t_infomask2;
                        htup->t_infomask = xlhdr->t_infomask;
                        htup->t_hoff = xlhdr->t_hoff;
-                       HeapTupleHeaderSetXmin(htup, record->xl_xid);
+                       HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
                        HeapTupleHeaderSetCmin(htup, FirstCommandId);
                        ItemPointerSetBlockNumber(&htup->t_ctid, blkno);
                        ItemPointerSetOffsetNumber(&htup->t_ctid, offnum);
@@ -7732,6 +7575,8 @@ heap_xlog_multi_insert(XLogRecPtr lsn, XLogRecord *record)
                        if (offnum == InvalidOffsetNumber)
                                elog(PANIC, "heap_multi_insert_redo: failed to add tuple");
                }
+               if (tupdata != endptr)
+                       elog(PANIC, "heap_multi_insert_redo: total tuple length mismatch");
 
                freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
 
@@ -7755,19 +7600,21 @@ heap_xlog_multi_insert(XLogRecPtr lsn, XLogRecord *record)
         * totally accurate anyway.
         */
        if (action == BLK_NEEDS_REDO && freespace < BLCKSZ / 5)
-               XLogRecordPageWithFreeSpace(xlrec->node, blkno, freespace);
+               XLogRecordPageWithFreeSpace(rnode, blkno, freespace);
 }
 
 /*
  * Handles UPDATE and HOT_UPDATE
  */
 static void
-heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
+heap_xlog_update(XLogReaderState *record, bool hot_update)
 {
+       XLogRecPtr      lsn = record->EndRecPtr;
        xl_heap_update *xlrec = (xl_heap_update *) XLogRecGetData(record);
        RelFileNode rnode;
        BlockNumber oldblk;
        BlockNumber newblk;
+       ItemPointerData newtid;
        Buffer          obuffer,
                                nbuffer;
        Page            page;
@@ -7775,7 +7622,6 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
        ItemId          lp = NULL;
        HeapTupleData oldtup;
        HeapTupleHeader htup;
-       char       *recdata;
        uint16          prefixlen = 0,
                                suffixlen = 0;
        char       *newp;
@@ -7784,7 +7630,7 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
                HeapTupleHeaderData hdr;
                char            data[MaxHeapTupleSize];
        }                       tbuf;
-       xl_heap_header_len xlhdr;
+       xl_heap_header xlhdr;
        uint32          newlen;
        Size            freespace = 0;
        XLogRedoAction oldaction;
@@ -7794,9 +7640,16 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
        oldtup.t_data = NULL;
        oldtup.t_len = 0;
 
-       rnode = xlrec->target.node;
-       newblk = ItemPointerGetBlockNumber(&xlrec->newtid);
-       oldblk = ItemPointerGetBlockNumber(&xlrec->target.tid);
+       XLogRecGetBlockTag(record, 0, &rnode, NULL, &newblk);
+       if (XLogRecGetBlockTag(record, 1, NULL, NULL, &oldblk))
+       {
+               /* HOT updates are never done across pages */
+               Assert(!hot_update);
+       }
+       else
+               oldblk = newblk;
+
+       ItemPointerSet(&newtid, newblk, xlrec->new_offnum);
 
        /*
         * The visibility map may need to be fixed even if the heap page is
@@ -7824,12 +7677,12 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
         */
 
        /* Deal with old tuple version */
-       oldaction = XLogReadBufferForRedo(lsn, record, 0, rnode, oldblk, &obuffer);
+       oldaction = XLogReadBufferForRedo(record, (oldblk == newblk) ? 0 : 1,
+                                                                         &obuffer);
        if (oldaction == BLK_NEEDS_REDO)
        {
-               page = (Page) BufferGetPage(obuffer);
-
-               offnum = ItemPointerGetOffsetNumber(&(xlrec->target.tid));
+               page = BufferGetPage(obuffer);
+               offnum = xlrec->old_offnum;
                if (PageGetMaxOffsetNumber(page) >= offnum)
                        lp = PageGetItemId(page, offnum);
 
@@ -7852,10 +7705,10 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
                HeapTupleHeaderSetXmax(htup, xlrec->old_xmax);
                HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
                /* Set forward chain link in t_ctid */
-               htup->t_ctid = xlrec->newtid;
+               htup->t_ctid = newtid;
 
                /* Mark the page as a candidate for pruning */
-               PageSetPrunable(page, record->xl_xid);
+               PageSetPrunable(page, XLogRecGetXid(record));
 
                if (xlrec->flags & XLOG_HEAP_ALL_VISIBLE_CLEARED)
                        PageClearAllVisible(page);
@@ -7872,18 +7725,15 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
                nbuffer = obuffer;
                newaction = oldaction;
        }
-       else if (record->xl_info & XLOG_HEAP_INIT_PAGE)
+       else if (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE)
        {
-               XLogReadBufferForRedoExtended(lsn, record, 1,
-                                                                         rnode, MAIN_FORKNUM, newblk,
-                                                                         RBM_ZERO_AND_LOCK, false, &nbuffer);
+               nbuffer = XLogInitBufferForRedo(record, 0);
                page = (Page) BufferGetPage(nbuffer);
                PageInit(page, BufferGetPageSize(nbuffer), 0);
                newaction = BLK_NEEDS_REDO;
        }
        else
-               newaction = XLogReadBufferForRedo(lsn, record, 1, rnode, newblk,
-                                                                                 &nbuffer);
+               newaction = XLogReadBufferForRedo(record, 0, &nbuffer);
 
        /*
         * The visibility map may need to be fixed even if the heap page is
@@ -7891,7 +7741,7 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
         */
        if (xlrec->flags & XLOG_HEAP_NEW_ALL_VISIBLE_CLEARED)
        {
-               Relation        reln = CreateFakeRelcacheEntry(xlrec->target.node);
+               Relation        reln = CreateFakeRelcacheEntry(rnode);
                Buffer          vmbuffer = InvalidBuffer;
 
                visibilitymap_pin(reln, newblk, &vmbuffer);
@@ -7903,14 +7753,20 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
        /* Deal with new tuple */
        if (newaction == BLK_NEEDS_REDO)
        {
-               page = (Page) BufferGetPage(nbuffer);
+               char       *recdata;
+               char       *recdata_end;
+               Size            datalen;
+               Size            tuplen;
+
+               recdata = XLogRecGetBlockData(record, 0, &datalen);
+               recdata_end = recdata + datalen;
 
-               offnum = ItemPointerGetOffsetNumber(&(xlrec->newtid));
+               page = BufferGetPage(nbuffer);
+
+               offnum = xlrec->new_offnum;
                if (PageGetMaxOffsetNumber(page) + 1 < offnum)
                        elog(PANIC, "heap_update_redo: invalid max offset number");
 
-               recdata = (char *) xlrec + SizeOfHeapUpdate;
-
                if (xlrec->flags & XLOG_HEAP_PREFIX_FROM_OLD)
                {
                        Assert(newblk == oldblk);
@@ -7924,10 +7780,12 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
                        recdata += sizeof(uint16);
                }
 
-               memcpy((char *) &xlhdr, recdata, SizeOfHeapHeaderLen);
-               recdata += SizeOfHeapHeaderLen;
+               memcpy((char *) &xlhdr, recdata, SizeOfHeapHeader);
+               recdata += SizeOfHeapHeader;
+
+               tuplen = recdata_end - recdata;
+               Assert(tuplen <= MaxHeapTupleSize);
 
-               Assert(xlhdr.t_len + prefixlen + suffixlen <= MaxHeapTupleSize);
                htup = &tbuf.hdr;
                MemSet((char *) htup, 0, sizeof(HeapTupleHeaderData));
 
@@ -7941,7 +7799,7 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
                        int                     len;
 
                        /* copy bitmap [+ padding] [+ oid] from WAL record */
-                       len = xlhdr.header.t_hoff - offsetof(HeapTupleHeaderData, t_bits);
+                       len = xlhdr.t_hoff - offsetof(HeapTupleHeaderData, t_bits);
                        memcpy(newp, recdata, len);
                        recdata += len;
                        newp += len;
@@ -7951,7 +7809,7 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
                        newp += prefixlen;
 
                        /* copy new tuple data from WAL record */
-                       len = xlhdr.t_len - (xlhdr.header.t_hoff - offsetof(HeapTupleHeaderData, t_bits));
+                       len = tuplen - (xlhdr.t_hoff - offsetof(HeapTupleHeaderData, t_bits));
                        memcpy(newp, recdata, len);
                        recdata += len;
                        newp += len;
@@ -7962,24 +7820,26 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
                         * copy bitmap [+ padding] [+ oid] + data from record, all in one
                         * go
                         */
-                       memcpy(newp, recdata, xlhdr.t_len);
-                       recdata += xlhdr.t_len;
-                       newp += xlhdr.t_len;
+                       memcpy(newp, recdata, tuplen);
+                       recdata += tuplen;
+                       newp += tuplen;
                }
+               Assert(recdata == recdata_end);
+
                /* copy suffix from old tuple */
                if (suffixlen > 0)
                        memcpy(newp, (char *) oldtup.t_data + oldtup.t_len - suffixlen, suffixlen);
 
-               newlen = offsetof(HeapTupleHeaderData, t_bits) + xlhdr.t_len + prefixlen + suffixlen;
-               htup->t_infomask2 = xlhdr.header.t_infomask2;
-               htup->t_infomask = xlhdr.header.t_infomask;
-               htup->t_hoff = xlhdr.header.t_hoff;
+               newlen = offsetof(HeapTupleHeaderData, t_bits) + tuplen + prefixlen + suffixlen;
+               htup->t_infomask2 = xlhdr.t_infomask2;
+               htup->t_infomask = xlhdr.t_infomask;
+               htup->t_hoff = xlhdr.t_hoff;
 
-               HeapTupleHeaderSetXmin(htup, record->xl_xid);
+               HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
                HeapTupleHeaderSetCmin(htup, FirstCommandId);
                HeapTupleHeaderSetXmax(htup, xlrec->new_xmax);
                /* Make sure there is no forward chain link in t_ctid */
-               htup->t_ctid = xlrec->newtid;
+               htup->t_ctid = newtid;
 
                offnum = PageAddItem(page, (Item) htup, newlen, offnum, true, true);
                if (offnum == InvalidOffsetNumber)
@@ -7993,6 +7853,7 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
                PageSetLSN(page, lsn);
                MarkBufferDirty(nbuffer);
        }
+
        if (BufferIsValid(nbuffer) && nbuffer != obuffer)
                UnlockReleaseBuffer(nbuffer);
        if (BufferIsValid(obuffer))
@@ -8014,14 +7875,13 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
         * totally accurate anyway.
         */
        if (newaction == BLK_NEEDS_REDO && !hot_update && freespace < BLCKSZ / 5)
-               XLogRecordPageWithFreeSpace(xlrec->target.node,
-                                                                ItemPointerGetBlockNumber(&(xlrec->newtid)),
-                                                                       freespace);
+               XLogRecordPageWithFreeSpace(rnode, newblk, freespace);
 }
 
 static void
-heap_xlog_lock(XLogRecPtr lsn, XLogRecord *record)
+heap_xlog_lock(XLogReaderState *record)
 {
+       XLogRecPtr      lsn = record->EndRecPtr;
        xl_heap_lock *xlrec = (xl_heap_lock *) XLogRecGetData(record);
        Buffer          buffer;
        Page            page;
@@ -8029,13 +7889,11 @@ heap_xlog_lock(XLogRecPtr lsn, XLogRecord *record)
        ItemId          lp = NULL;
        HeapTupleHeader htup;
 
-       if (XLogReadBufferForRedo(lsn, record, 0, xlrec->target.node,
-                                                         ItemPointerGetBlockNumber(&xlrec->target.tid),
-                                                         &buffer) == BLK_NEEDS_REDO)
+       if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
        {
                page = (Page) BufferGetPage(buffer);
 
-               offnum = ItemPointerGetOffsetNumber(&(xlrec->target.tid));
+               offnum = xlrec->offnum;
                if (PageGetMaxOffsetNumber(page) >= offnum)
                        lp = PageGetItemId(page, offnum);
 
@@ -8055,7 +7913,9 @@ heap_xlog_lock(XLogRecPtr lsn, XLogRecord *record)
                {
                        HeapTupleHeaderClearHotUpdated(htup);
                        /* Make sure there is no forward chain link in t_ctid */
-                       htup->t_ctid = xlrec->target.tid;
+                       ItemPointerSet(&htup->t_ctid,
+                                                  BufferGetBlockNumber(buffer),
+                                                  offnum);
                }
                HeapTupleHeaderSetXmax(htup, xlrec->locking_xid);
                HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
@@ -8067,22 +7927,23 @@ heap_xlog_lock(XLogRecPtr lsn, XLogRecord *record)
 }
 
 static void
-heap_xlog_lock_updated(XLogRecPtr lsn, XLogRecord *record)
+heap_xlog_lock_updated(XLogReaderState *record)
 {
-       xl_heap_lock_updated *xlrec =
-       (xl_heap_lock_updated *) XLogRecGetData(record);
+       XLogRecPtr      lsn = record->EndRecPtr;
+       xl_heap_lock_updated *xlrec;
        Buffer          buffer;
        Page            page;
        OffsetNumber offnum;
        ItemId          lp = NULL;
        HeapTupleHeader htup;
 
-       if (XLogReadBufferForRedo(lsn, record, 0, xlrec->target.node,
-                                                         ItemPointerGetBlockNumber(&(xlrec->target.tid)),
-                                                         &buffer) == BLK_NEEDS_REDO)
+       xlrec = (xl_heap_lock_updated *) XLogRecGetData(record);
+
+       if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
        {
                page = BufferGetPage(buffer);
-               offnum = ItemPointerGetOffsetNumber(&(xlrec->target.tid));
+
+               offnum = xlrec->offnum;
                if (PageGetMaxOffsetNumber(page) >= offnum)
                        lp = PageGetItemId(page, offnum);
 
@@ -8103,8 +7964,9 @@ heap_xlog_lock_updated(XLogRecPtr lsn, XLogRecord *record)
 }
 
 static void
-heap_xlog_inplace(XLogRecPtr lsn, XLogRecord *record)
+heap_xlog_inplace(XLogReaderState *record)
 {
+       XLogRecPtr      lsn = record->EndRecPtr;
        xl_heap_inplace *xlrec = (xl_heap_inplace *) XLogRecGetData(record);
        Buffer          buffer;
        Page            page;
@@ -8112,15 +7974,15 @@ heap_xlog_inplace(XLogRecPtr lsn, XLogRecord *record)
        ItemId          lp = NULL;
        HeapTupleHeader htup;
        uint32          oldlen;
-       uint32          newlen;
+       Size            newlen;
 
-       if (XLogReadBufferForRedo(lsn, record, 0, xlrec->target.node,
-                                                         ItemPointerGetBlockNumber(&(xlrec->target.tid)),
-                                                         &buffer) == BLK_NEEDS_REDO)
+       if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
        {
+               char       *newtup = XLogRecGetBlockData(record, 0, &newlen);
+
                page = BufferGetPage(buffer);
 
-               offnum = ItemPointerGetOffsetNumber(&(xlrec->target.tid));
+               offnum = xlrec->offnum;
                if (PageGetMaxOffsetNumber(page) >= offnum)
                        lp = PageGetItemId(page, offnum);
 
@@ -8130,13 +7992,10 @@ heap_xlog_inplace(XLogRecPtr lsn, XLogRecord *record)
                htup = (HeapTupleHeader) PageGetItem(page, lp);
 
                oldlen = ItemIdGetLength(lp) - htup->t_hoff;
-               newlen = record->xl_len - SizeOfHeapInplace;
                if (oldlen != newlen)
                        elog(PANIC, "heap_inplace_redo: wrong tuple length");
 
-               memcpy((char *) htup + htup->t_hoff,
-                          (char *) xlrec + SizeOfHeapInplace,
-                          newlen);
+               memcpy((char *) htup + htup->t_hoff, newtup, newlen);
 
                PageSetLSN(page, lsn);
                MarkBufferDirty(buffer);
@@ -8146,9 +8005,9 @@ heap_xlog_inplace(XLogRecPtr lsn, XLogRecord *record)
 }
 
 void
-heap_redo(XLogRecPtr lsn, XLogRecord *record)
+heap_redo(XLogReaderState *record)
 {
-       uint8           info = record->xl_info & ~XLR_INFO_MASK;
+       uint8           info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
 
        /*
         * These operations don't overwrite MVCC data so no conflict processing is
@@ -8158,22 +8017,22 @@ heap_redo(XLogRecPtr lsn, XLogRecord *record)
        switch (info & XLOG_HEAP_OPMASK)
        {
                case XLOG_HEAP_INSERT:
-                       heap_xlog_insert(lsn, record);
+                       heap_xlog_insert(record);
                        break;
                case XLOG_HEAP_DELETE:
-                       heap_xlog_delete(lsn, record);
+                       heap_xlog_delete(record);
                        break;
                case XLOG_HEAP_UPDATE:
-                       heap_xlog_update(lsn, record, false);
+                       heap_xlog_update(record, false);
                        break;
                case XLOG_HEAP_HOT_UPDATE:
-                       heap_xlog_update(lsn, record, true);
+                       heap_xlog_update(record, true);
                        break;
                case XLOG_HEAP_LOCK:
-                       heap_xlog_lock(lsn, record);
+                       heap_xlog_lock(record);
                        break;
                case XLOG_HEAP_INPLACE:
-                       heap_xlog_inplace(lsn, record);
+                       heap_xlog_inplace(record);
                        break;
                default:
                        elog(PANIC, "heap_redo: unknown op code %u", info);
@@ -8181,29 +8040,29 @@ heap_redo(XLogRecPtr lsn, XLogRecord *record)
 }
 
 void
-heap2_redo(XLogRecPtr lsn, XLogRecord *record)
+heap2_redo(XLogReaderState *record)
 {
-       uint8           info = record->xl_info & ~XLR_INFO_MASK;
+       uint8           info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
 
        switch (info & XLOG_HEAP_OPMASK)
        {
                case XLOG_HEAP2_CLEAN:
-                       heap_xlog_clean(lsn, record);
+                       heap_xlog_clean(record);
                        break;
                case XLOG_HEAP2_FREEZE_PAGE:
-                       heap_xlog_freeze_page(lsn, record);
+                       heap_xlog_freeze_page(record);
                        break;
                case XLOG_HEAP2_CLEANUP_INFO:
-                       heap_xlog_cleanup_info(lsn, record);
+                       heap_xlog_cleanup_info(record);
                        break;
                case XLOG_HEAP2_VISIBLE:
-                       heap_xlog_visible(lsn, record);
+                       heap_xlog_visible(record);
                        break;
                case XLOG_HEAP2_MULTI_INSERT:
-                       heap_xlog_multi_insert(lsn, record);
+                       heap_xlog_multi_insert(record);
                        break;
                case XLOG_HEAP2_LOCK_UPDATED:
-                       heap_xlog_lock_updated(lsn, record);
+                       heap_xlog_lock_updated(record);
                        break;
                case XLOG_HEAP2_NEW_CID:
 
@@ -8213,7 +8072,7 @@ heap2_redo(XLogRecPtr lsn, XLogRecord *record)
                         */
                        break;
                case XLOG_HEAP2_REWRITE:
-                       heap_xlog_logical_rewrite(lsn, record);
+                       heap_xlog_logical_rewrite(record);
                        break;
                default:
                        elog(PANIC, "heap2_redo: unknown op code %u", info);
index bea52460a086cde3f3743df42aebedf7d88b5197..4b132b7d016891bc4faf74eb9203366bbbe1812a 100644 (file)
@@ -865,7 +865,6 @@ logical_heap_rewrite_flush_mappings(RewriteState state)
        hash_seq_init(&seq_status, state->rs_logical_mappings);
        while ((src = (RewriteMappingFile *) hash_seq_search(&seq_status)) != NULL)
        {
-               XLogRecData rdata[2];
                char       *waldata;
                char       *waldata_start;
                xl_heap_rewrite_mapping xlrec;
@@ -889,11 +888,6 @@ logical_heap_rewrite_flush_mappings(RewriteState state)
                xlrec.offset = src->off;
                xlrec.start_lsn = state->rs_begin_lsn;
 
-               rdata[0].data = (char *) (&xlrec);
-               rdata[0].len = sizeof(xlrec);
-               rdata[0].buffer = InvalidBuffer;
-               rdata[0].next = &(rdata[1]);
-
                /* write all mappings consecutively */
                len = src->num_mappings * sizeof(LogicalRewriteMappingData);
                waldata_start = waldata = palloc(len);
@@ -934,13 +928,12 @@ logical_heap_rewrite_flush_mappings(RewriteState state)
                                                        written, len)));
                src->off += len;
 
-               rdata[1].data = waldata_start;
-               rdata[1].len = len;
-               rdata[1].buffer = InvalidBuffer;
-               rdata[1].next = NULL;
+               XLogBeginInsert();
+               XLogRegisterData((char *) (&xlrec), sizeof(xlrec));
+               XLogRegisterData(waldata_start, len);
 
                /* write xlog record */
-               XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_REWRITE, rdata);
+               XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_REWRITE);
 
                pfree(waldata_start);
        }
@@ -1123,7 +1116,7 @@ logical_rewrite_heap_tuple(RewriteState state, ItemPointerData old_tid,
  * Replay XLOG_HEAP2_REWRITE records
  */
 void
-heap_xlog_logical_rewrite(XLogRecPtr lsn, XLogRecord *r)
+heap_xlog_logical_rewrite(XLogReaderState *r)
 {
        char            path[MAXPGPATH];
        int                     fd;
@@ -1138,7 +1131,7 @@ heap_xlog_logical_rewrite(XLogRecPtr lsn, XLogRecord *r)
                         xlrec->mapped_db, xlrec->mapped_rel,
                         (uint32) (xlrec->start_lsn >> 32),
                         (uint32) xlrec->start_lsn,
-                        xlrec->mapped_xid, r->xl_xid);
+                        xlrec->mapped_xid, XLogRecGetXid(r));
 
        fd = OpenTransientFile(path,
                                                   O_CREAT | O_WRONLY | PG_BINARY,
index bcaba7e5e8400f1b144131043d5818b2ffa50c33..2c4f9904e1a5ac60779a39b803219a2cb1dcd38a 100644 (file)
@@ -837,37 +837,25 @@ _bt_insertonpg(Relation rel,
                if (RelationNeedsWAL(rel))
                {
                        xl_btree_insert xlrec;
-                       BlockNumber xlleftchild;
                        xl_btree_metadata xlmeta;
                        uint8           xlinfo;
                        XLogRecPtr      recptr;
-                       XLogRecData rdata[4];
-                       XLogRecData *nextrdata;
                        IndexTupleData trunctuple;
 
-                       xlrec.target.node = rel->rd_node;
-                       ItemPointerSet(&(xlrec.target.tid), itup_blkno, itup_off);
+                       xlrec.offnum = itup_off;
 
-                       rdata[0].data = (char *) &xlrec;
-                       rdata[0].len = SizeOfBtreeInsert;
-                       rdata[0].buffer = InvalidBuffer;
-                       rdata[0].next = nextrdata = &(rdata[1]);
+                       XLogBeginInsert();
+                       XLogRegisterData((char *) &xlrec, SizeOfBtreeInsert);
 
                        if (P_ISLEAF(lpageop))
                                xlinfo = XLOG_BTREE_INSERT_LEAF;
                        else
                        {
                                /*
-                                * Include the block number of the left child, whose
-                                * INCOMPLETE_SPLIT flag was cleared.
+                                * Register the left child whose INCOMPLETE_SPLIT flag was
+                                * cleared.
                                 */
-                               xlleftchild = BufferGetBlockNumber(cbuf);
-                               nextrdata->data = (char *) &xlleftchild;
-                               nextrdata->len = sizeof(BlockNumber);
-                               nextrdata->buffer = cbuf;
-                               nextrdata->buffer_std = true;
-                               nextrdata->next = nextrdata + 1;
-                               nextrdata++;
+                               XLogRegisterBuffer(1, cbuf, REGBUF_STANDARD);
 
                                xlinfo = XLOG_BTREE_INSERT_UPPER;
                        }
@@ -879,33 +867,25 @@ _bt_insertonpg(Relation rel,
                                xlmeta.fastroot = metad->btm_fastroot;
                                xlmeta.fastlevel = metad->btm_fastlevel;
 
-                               nextrdata->data = (char *) &xlmeta;
-                               nextrdata->len = sizeof(xl_btree_metadata);
-                               nextrdata->buffer = InvalidBuffer;
-                               nextrdata->next = nextrdata + 1;
-                               nextrdata++;
+                               XLogRegisterBuffer(2, metabuf, REGBUF_WILL_INIT);
+                               XLogRegisterBufData(2, (char *) &xlmeta, sizeof(xl_btree_metadata));
 
                                xlinfo = XLOG_BTREE_INSERT_META;
                        }
 
                        /* Read comments in _bt_pgaddtup */
+                       XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
                        if (!P_ISLEAF(lpageop) && newitemoff == P_FIRSTDATAKEY(lpageop))
                        {
                                trunctuple = *itup;
                                trunctuple.t_info = sizeof(IndexTupleData);
-                               nextrdata->data = (char *) &trunctuple;
-                               nextrdata->len = sizeof(IndexTupleData);
+                               XLogRegisterBufData(0, (char *) &trunctuple,
+                                                                       sizeof(IndexTupleData));
                        }
                        else
-                       {
-                               nextrdata->data = (char *) itup;
-                               nextrdata->len = IndexTupleDSize(*itup);
-                       }
-                       nextrdata->buffer = buf;
-                       nextrdata->buffer_std = true;
-                       nextrdata->next = NULL;
+                               XLogRegisterBufData(0, (char *) itup, IndexTupleDSize(*itup));
 
-                       recptr = XLogInsert(RM_BTREE_ID, xlinfo, rdata);
+                       recptr = XLogInsert(RM_BTREE_ID, xlinfo);
 
                        if (BufferIsValid(metabuf))
                        {
@@ -1260,56 +1240,37 @@ _bt_split(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber firstright,
                xl_btree_split xlrec;
                uint8           xlinfo;
                XLogRecPtr      recptr;
-               XLogRecData rdata[7];
-               XLogRecData *lastrdata;
-               BlockNumber cblkno;
-
-               xlrec.node = rel->rd_node;
-               xlrec.leftsib = origpagenumber;
-               xlrec.rightsib = rightpagenumber;
-               xlrec.rnext = ropaque->btpo_next;
+
                xlrec.level = ropaque->btpo.level;
                xlrec.firstright = firstright;
+               xlrec.newitemoff = newitemoff;
 
-               rdata[0].data = (char *) &xlrec;
-               rdata[0].len = SizeOfBtreeSplit;
-               rdata[0].buffer = InvalidBuffer;
+               XLogBeginInsert();
+               XLogRegisterData((char *) &xlrec, SizeOfBtreeSplit);
 
-               lastrdata = &rdata[0];
+               XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
+               XLogRegisterBuffer(1, rbuf, REGBUF_WILL_INIT);
+               /* Log the right sibling, because we've changed its prev-pointer. */
+               if (!P_RIGHTMOST(ropaque))
+                       XLogRegisterBuffer(2, sbuf, REGBUF_STANDARD);
+               if (BufferIsValid(cbuf))
+                       XLogRegisterBuffer(3, cbuf, REGBUF_STANDARD);
 
                /*
-                * Log the new item and its offset, if it was inserted on the left
-                * page. (If it was put on the right page, we don't need to explicitly
-                * WAL log it because it's included with all the other items on the
-                * right page.) Show the new item as belonging to the left page
-                * buffer, so that it is not stored if XLogInsert decides it needs a
-                * full-page image of the left page.  We store the offset anyway,
-                * though, to support archive compression of these records.
+                * Log the new item, if it was inserted on the left page. (If it was
+                * put on the right page, we don't need to explicitly WAL log it
+                * because it's included with all the other items on the right page.)
+                * Show the new item as belonging to the left page buffer, so that it
+                * is not stored if XLogInsert decides it needs a full-page image of
+                * the left page.  We store the offset anyway, though, to support
+                * archive compression of these records.
                 */
                if (newitemonleft)
-               {
-                       lastrdata->next = lastrdata + 1;
-                       lastrdata++;
-
-                       lastrdata->data = (char *) &newitemoff;
-                       lastrdata->len = sizeof(OffsetNumber);
-                       lastrdata->buffer = InvalidBuffer;
-
-                       lastrdata->next = lastrdata + 1;
-                       lastrdata++;
-
-                       lastrdata->data = (char *) newitem;
-                       lastrdata->len = MAXALIGN(newitemsz);
-                       lastrdata->buffer = buf;        /* backup block 0 */
-                       lastrdata->buffer_std = true;
-               }
+                       XLogRegisterBufData(0, (char *) newitem, MAXALIGN(newitemsz));
 
                /* Log left page */
                if (!isleaf)
                {
-                       lastrdata->next = lastrdata + 1;
-                       lastrdata++;
-
                        /*
                         * We must also log the left page's high key, because the right
                         * page's leftmost key is suppressed on non-leaf levels.  Show it
@@ -1319,43 +1280,7 @@ _bt_split(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber firstright,
                         */
                        itemid = PageGetItemId(origpage, P_HIKEY);
                        item = (IndexTuple) PageGetItem(origpage, itemid);
-                       lastrdata->data = (char *) item;
-                       lastrdata->len = MAXALIGN(IndexTupleSize(item));
-                       lastrdata->buffer = buf;        /* backup block 0 */
-                       lastrdata->buffer_std = true;
-               }
-
-               if (isleaf && !newitemonleft)
-               {
-                       lastrdata->next = lastrdata + 1;
-                       lastrdata++;
-
-                       /*
-                        * Although we don't need to WAL-log anything on the left page, we
-                        * still need XLogInsert to consider storing a full-page image of
-                        * the left page, so make an empty entry referencing that buffer.
-                        * This also ensures that the left page is always backup block 0.
-                        */
-                       lastrdata->data = NULL;
-                       lastrdata->len = 0;
-                       lastrdata->buffer = buf;        /* backup block 0 */
-                       lastrdata->buffer_std = true;
-               }
-
-               /*
-                * Log block number of left child, whose INCOMPLETE_SPLIT flag this
-                * insertion clears.
-                */
-               if (!isleaf)
-               {
-                       lastrdata->next = lastrdata + 1;
-                       lastrdata++;
-
-                       cblkno = BufferGetBlockNumber(cbuf);
-                       lastrdata->data = (char *) &cblkno;
-                       lastrdata->len = sizeof(BlockNumber);
-                       lastrdata->buffer = cbuf;       /* backup block 1 */
-                       lastrdata->buffer_std = true;
+                       XLogRegisterBufData(0, (char *) item, MAXALIGN(IndexTupleSize(item)));
                }
 
                /*
@@ -1370,35 +1295,16 @@ _bt_split(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber firstright,
                 * and so the item pointers can be reconstructed.  See comments for
                 * _bt_restore_page().
                 */
-               lastrdata->next = lastrdata + 1;
-               lastrdata++;
-
-               lastrdata->data = (char *) rightpage +
-                       ((PageHeader) rightpage)->pd_upper;
-               lastrdata->len = ((PageHeader) rightpage)->pd_special -
-                       ((PageHeader) rightpage)->pd_upper;
-               lastrdata->buffer = InvalidBuffer;
-
-               /* Log the right sibling, because we've changed its' prev-pointer. */
-               if (!P_RIGHTMOST(ropaque))
-               {
-                       lastrdata->next = lastrdata + 1;
-                       lastrdata++;
-
-                       lastrdata->data = NULL;
-                       lastrdata->len = 0;
-                       lastrdata->buffer = sbuf;       /* bkp block 1 (leaf) or 2 (non-leaf) */
-                       lastrdata->buffer_std = true;
-               }
-
-               lastrdata->next = NULL;
+               XLogRegisterBufData(1,
+                                        (char *) rightpage + ((PageHeader) rightpage)->pd_upper,
+                                                       ((PageHeader) rightpage)->pd_special - ((PageHeader) rightpage)->pd_upper);
 
                if (isroot)
                        xlinfo = newitemonleft ? XLOG_BTREE_SPLIT_L_ROOT : XLOG_BTREE_SPLIT_R_ROOT;
                else
                        xlinfo = newitemonleft ? XLOG_BTREE_SPLIT_L : XLOG_BTREE_SPLIT_R;
 
-               recptr = XLogInsert(RM_BTREE_ID, xlinfo, rdata);
+               recptr = XLogInsert(RM_BTREE_ID, xlinfo);
 
                PageSetLSN(origpage, recptr);
                PageSetLSN(rightpage, recptr);
@@ -2090,34 +1996,35 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
        {
                xl_btree_newroot xlrec;
                XLogRecPtr      recptr;
-               XLogRecData rdata[3];
+               xl_btree_metadata md;
 
-               xlrec.node = rel->rd_node;
                xlrec.rootblk = rootblknum;
                xlrec.level = metad->btm_level;
 
-               rdata[0].data = (char *) &xlrec;
-               rdata[0].len = SizeOfBtreeNewroot;
-               rdata[0].buffer = InvalidBuffer;
-               rdata[0].next = &(rdata[1]);
+               XLogBeginInsert();
+               XLogRegisterData((char *) &xlrec, SizeOfBtreeNewroot);
+
+               XLogRegisterBuffer(0, rootbuf, REGBUF_WILL_INIT);
+               XLogRegisterBuffer(1, lbuf, REGBUF_STANDARD);
+               XLogRegisterBuffer(2, metabuf, REGBUF_WILL_INIT);
+
+               md.root = rootblknum;
+               md.level = metad->btm_level;
+               md.fastroot = rootblknum;
+               md.fastlevel = metad->btm_level;
+
+               XLogRegisterBufData(2, (char *) &md, sizeof(xl_btree_metadata));
 
                /*
                 * Direct access to page is not good but faster - we should implement
                 * some new func in page API.
                 */
-               rdata[1].data = (char *) rootpage + ((PageHeader) rootpage)->pd_upper;
-               rdata[1].len = ((PageHeader) rootpage)->pd_special -
-                       ((PageHeader) rootpage)->pd_upper;
-               rdata[1].buffer = InvalidBuffer;
-               rdata[1].next = &(rdata[2]);
-
-               /* Make a full-page image of the left child if needed */
-               rdata[2].data = NULL;
-               rdata[2].len = 0;
-               rdata[2].buffer = lbuf;
-               rdata[2].next = NULL;
-
-               recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWROOT, rdata);
+               XLogRegisterBufData(0,
+                                          (char *) rootpage + ((PageHeader) rootpage)->pd_upper,
+                                                       ((PageHeader) rootpage)->pd_special -
+                                                       ((PageHeader) rootpage)->pd_upper);
+
+               recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWROOT);
 
                PageSetLSN(lpage, recptr);
                PageSetLSN(rootpage, recptr);
index ea95ce6e1eca8428186142c0b46003ad3f11ec9f..a25dafeb400bfae53d215705352c1d9df33b65b2 100644 (file)
@@ -236,18 +236,25 @@ _bt_getroot(Relation rel, int access)
                {
                        xl_btree_newroot xlrec;
                        XLogRecPtr      recptr;
-                       XLogRecData rdata;
+                       xl_btree_metadata md;
+
+                       XLogBeginInsert();
+                       XLogRegisterBuffer(0, rootbuf, REGBUF_WILL_INIT);
+                       XLogRegisterBuffer(2, metabuf, REGBUF_WILL_INIT);
+
+                       md.root = rootblkno;
+                       md.level = 0;
+                       md.fastroot = rootblkno;
+                       md.fastlevel = 0;
+
+                       XLogRegisterBufData(2, (char *) &md, sizeof(xl_btree_metadata));
 
-                       xlrec.node = rel->rd_node;
                        xlrec.rootblk = rootblkno;
                        xlrec.level = 0;
 
-                       rdata.data = (char *) &xlrec;
-                       rdata.len = SizeOfBtreeNewroot;
-                       rdata.buffer = InvalidBuffer;
-                       rdata.next = NULL;
+                       XLogRegisterData((char *) &xlrec, SizeOfBtreeNewroot);
 
-                       recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWROOT, &rdata);
+                       recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWROOT);
 
                        PageSetLSN(rootpage, recptr);
                        PageSetLSN(metapg, recptr);
@@ -528,39 +535,23 @@ _bt_checkpage(Relation rel, Buffer buf)
 static void
 _bt_log_reuse_page(Relation rel, BlockNumber blkno, TransactionId latestRemovedXid)
 {
-       if (!RelationNeedsWAL(rel))
-               return;
-
-       /* No ereport(ERROR) until changes are logged */
-       START_CRIT_SECTION();
+       xl_btree_reuse_page xlrec_reuse;
 
        /*
-        * We don't do MarkBufferDirty here because we're about to initialise the
-        * page, and nobody else can see it yet.
+        * Note that we don't register the buffer with the record, because this
+        * operation doesn't modify the page. This record only exists to provide a
+        * conflict point for Hot Standby.
         */
 
        /* XLOG stuff */
-       {
-               XLogRecData rdata[1];
-               xl_btree_reuse_page xlrec_reuse;
+       xlrec_reuse.node = rel->rd_node;
+       xlrec_reuse.block = blkno;
+       xlrec_reuse.latestRemovedXid = latestRemovedXid;
 
-               xlrec_reuse.node = rel->rd_node;
-               xlrec_reuse.block = blkno;
-               xlrec_reuse.latestRemovedXid = latestRemovedXid;
-               rdata[0].data = (char *) &xlrec_reuse;
-               rdata[0].len = SizeOfBtreeReusePage;
-               rdata[0].buffer = InvalidBuffer;
-               rdata[0].next = NULL;
+       XLogBeginInsert();
+       XLogRegisterData((char *) &xlrec_reuse, SizeOfBtreeReusePage);
 
-               XLogInsert(RM_BTREE_ID, XLOG_BTREE_REUSE_PAGE, rdata);
-
-               /*
-                * We don't do PageSetLSN here because we're about to initialise the
-                * page, so no need.
-                */
-       }
-
-       END_CRIT_SECTION();
+       XLogInsert(RM_BTREE_ID, XLOG_BTREE_REUSE_PAGE);
 }
 
 /*
@@ -633,7 +624,7 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access)
                                         * WAL record that will allow us to conflict with queries
                                         * running on standby.
                                         */
-                                       if (XLogStandbyInfoActive())
+                                       if (XLogStandbyInfoActive() && RelationNeedsWAL(rel))
                                        {
                                                BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page);
 
@@ -830,17 +821,13 @@ _bt_delitems_vacuum(Relation rel, Buffer buf,
        if (RelationNeedsWAL(rel))
        {
                XLogRecPtr      recptr;
-               XLogRecData rdata[2];
                xl_btree_vacuum xlrec_vacuum;
 
-               xlrec_vacuum.node = rel->rd_node;
-               xlrec_vacuum.block = BufferGetBlockNumber(buf);
-
                xlrec_vacuum.lastBlockVacuumed = lastBlockVacuumed;
-               rdata[0].data = (char *) &xlrec_vacuum;
-               rdata[0].len = SizeOfBtreeVacuum;
-               rdata[0].buffer = InvalidBuffer;
-               rdata[0].next = &(rdata[1]);
+
+               XLogBeginInsert();
+               XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
+               XLogRegisterData((char *) &xlrec_vacuum, SizeOfBtreeVacuum);
 
                /*
                 * The target-offsets array is not in the buffer, but pretend that it
@@ -848,20 +835,9 @@ _bt_delitems_vacuum(Relation rel, Buffer buf,
                 * need not be stored too.
                 */
                if (nitems > 0)
-               {
-                       rdata[1].data = (char *) itemnos;
-                       rdata[1].len = nitems * sizeof(OffsetNumber);
-               }
-               else
-               {
-                       rdata[1].data = NULL;
-                       rdata[1].len = 0;
-               }
-               rdata[1].buffer = buf;
-               rdata[1].buffer_std = true;
-               rdata[1].next = NULL;
+                       XLogRegisterBufData(0, (char *) itemnos, nitems * sizeof(OffsetNumber));
 
-               recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_VACUUM, rdata);
+               recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_VACUUM);
 
                PageSetLSN(page, recptr);
        }
@@ -919,36 +895,23 @@ _bt_delitems_delete(Relation rel, Buffer buf,
        if (RelationNeedsWAL(rel))
        {
                XLogRecPtr      recptr;
-               XLogRecData rdata[3];
                xl_btree_delete xlrec_delete;
 
-               xlrec_delete.node = rel->rd_node;
                xlrec_delete.hnode = heapRel->rd_node;
-               xlrec_delete.block = BufferGetBlockNumber(buf);
                xlrec_delete.nitems = nitems;
 
-               rdata[0].data = (char *) &xlrec_delete;
-               rdata[0].len = SizeOfBtreeDelete;
-               rdata[0].buffer = InvalidBuffer;
-               rdata[0].next = &(rdata[1]);
+               XLogBeginInsert();
+               XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
+               XLogRegisterData((char *) &xlrec_delete, SizeOfBtreeDelete);
 
                /*
                 * We need the target-offsets array whether or not we store the whole
                 * buffer, to allow us to find the latestRemovedXid on a standby
                 * server.
                 */
-               rdata[1].data = (char *) itemnos;
-               rdata[1].len = nitems * sizeof(OffsetNumber);
-               rdata[1].buffer = InvalidBuffer;
-               rdata[1].next = &(rdata[2]);
-
-               rdata[2].data = NULL;
-               rdata[2].len = 0;
-               rdata[2].buffer = buf;
-               rdata[2].buffer_std = true;
-               rdata[2].next = NULL;
+               XLogRegisterData((char *) itemnos, nitems * sizeof(OffsetNumber));
 
-               recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_DELETE, rdata);
+               recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_DELETE);
 
                PageSetLSN(page, recptr);
        }
@@ -1493,33 +1456,26 @@ _bt_mark_page_halfdead(Relation rel, Buffer leafbuf, BTStack stack)
        {
                xl_btree_mark_page_halfdead xlrec;
                XLogRecPtr      recptr;
-               XLogRecData rdata[2];
 
-               xlrec.target.node = rel->rd_node;
-               ItemPointerSet(&(xlrec.target.tid), BufferGetBlockNumber(topparent), topoff);
+               xlrec.poffset = topoff;
                xlrec.leafblk = leafblkno;
                if (target != leafblkno)
                        xlrec.topparent = target;
                else
                        xlrec.topparent = InvalidBlockNumber;
 
+               XLogBeginInsert();
+               XLogRegisterBuffer(0, leafbuf, REGBUF_WILL_INIT);
+               XLogRegisterBuffer(1, topparent, REGBUF_STANDARD);
+
                page = BufferGetPage(leafbuf);
                opaque = (BTPageOpaque) PageGetSpecialPointer(page);
                xlrec.leftblk = opaque->btpo_prev;
                xlrec.rightblk = opaque->btpo_next;
 
-               rdata[0].data = (char *) &xlrec;
-               rdata[0].len = SizeOfBtreeMarkPageHalfDead;
-               rdata[0].buffer = InvalidBuffer;
-               rdata[0].next = &(rdata[1]);
-
-               rdata[1].data = NULL;
-               rdata[1].len = 0;
-               rdata[1].buffer = topparent;
-               rdata[1].buffer_std = true;
-               rdata[1].next = NULL;
+               XLogRegisterData((char *) &xlrec, SizeOfBtreeMarkPageHalfDead);
 
-               recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_MARK_PAGE_HALFDEAD, rdata);
+               recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_MARK_PAGE_HALFDEAD);
 
                page = BufferGetPage(topparent);
                PageSetLSN(page, recptr);
@@ -1826,63 +1782,44 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty)
                xl_btree_metadata xlmeta;
                uint8           xlinfo;
                XLogRecPtr      recptr;
-               XLogRecData rdata[4];
-               XLogRecData *nextrdata;
 
-               xlrec.node = rel->rd_node;
+               XLogBeginInsert();
+
+               XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT);
+               if (BufferIsValid(lbuf))
+                       XLogRegisterBuffer(1, lbuf, REGBUF_STANDARD);
+               XLogRegisterBuffer(2, rbuf, REGBUF_STANDARD);
+               if (target != leafblkno)
+                       XLogRegisterBuffer(3, leafbuf, REGBUF_WILL_INIT);
 
                /* information on the unlinked block */
-               xlrec.deadblk = target;
                xlrec.leftsib = leftsib;
                xlrec.rightsib = rightsib;
                xlrec.btpo_xact = opaque->btpo.xact;
 
                /* information needed to recreate the leaf block (if not the target) */
-               xlrec.leafblk = leafblkno;
                xlrec.leafleftsib = leafleftsib;
                xlrec.leafrightsib = leafrightsib;
                xlrec.topparent = nextchild;
 
-               rdata[0].data = (char *) &xlrec;
-               rdata[0].len = SizeOfBtreeUnlinkPage;
-               rdata[0].buffer = InvalidBuffer;
-               rdata[0].next = nextrdata = &(rdata[1]);
+               XLogRegisterData((char *) &xlrec, SizeOfBtreeUnlinkPage);
 
                if (BufferIsValid(metabuf))
                {
+                       XLogRegisterBuffer(4, metabuf, REGBUF_WILL_INIT);
+
                        xlmeta.root = metad->btm_root;
                        xlmeta.level = metad->btm_level;
                        xlmeta.fastroot = metad->btm_fastroot;
                        xlmeta.fastlevel = metad->btm_fastlevel;
 
-                       nextrdata->data = (char *) &xlmeta;
-                       nextrdata->len = sizeof(xl_btree_metadata);
-                       nextrdata->buffer = InvalidBuffer;
-                       nextrdata->next = nextrdata + 1;
-                       nextrdata++;
+                       XLogRegisterBufData(4, (char *) &xlmeta, sizeof(xl_btree_metadata));
                        xlinfo = XLOG_BTREE_UNLINK_PAGE_META;
                }
                else
                        xlinfo = XLOG_BTREE_UNLINK_PAGE;
 
-               nextrdata->data = NULL;
-               nextrdata->len = 0;
-               nextrdata->buffer = rbuf;
-               nextrdata->buffer_std = true;
-               nextrdata->next = NULL;
-
-               if (BufferIsValid(lbuf))
-               {
-                       nextrdata->next = nextrdata + 1;
-                       nextrdata++;
-                       nextrdata->data = NULL;
-                       nextrdata->len = 0;
-                       nextrdata->buffer = lbuf;
-                       nextrdata->buffer_std = true;
-                       nextrdata->next = NULL;
-               }
-
-               recptr = XLogInsert(RM_BTREE_ID, xlinfo, rdata);
+               recptr = XLogInsert(RM_BTREE_ID, xlinfo);
 
                if (BufferIsValid(metabuf))
                {
index 13951be62af2e376ee1f669ba81782ec1e93055b..52aef9b9836b41fce3bb0063539fe90babf33076 100644 (file)
@@ -72,17 +72,23 @@ _bt_restore_page(Page page, char *from, int len)
 }
 
 static void
-_bt_restore_meta(RelFileNode rnode, XLogRecPtr lsn,
-                                BlockNumber root, uint32 level,
-                                BlockNumber fastroot, uint32 fastlevel)
+_bt_restore_meta(XLogReaderState *record, uint8 block_id)
 {
+       XLogRecPtr      lsn = record->EndRecPtr;
        Buffer          metabuf;
        Page            metapg;
        BTMetaPageData *md;
        BTPageOpaque pageop;
+       xl_btree_metadata *xlrec;
+       char       *ptr;
+       Size            len;
 
-       metabuf = XLogReadBuffer(rnode, BTREE_METAPAGE, true);
-       Assert(BufferIsValid(metabuf));
+       metabuf = XLogInitBufferForRedo(record, block_id);
+       ptr = XLogRecGetBlockData(record, block_id, &len);
+
+       Assert(len == sizeof(xl_btree_metadata));
+       Assert(BufferGetBlockNumber(metabuf) == BTREE_METAPAGE);
+       xlrec = (xl_btree_metadata *) ptr;
        metapg = BufferGetPage(metabuf);
 
        _bt_pageinit(metapg, BufferGetPageSize(metabuf));
@@ -90,10 +96,10 @@ _bt_restore_meta(RelFileNode rnode, XLogRecPtr lsn,
        md = BTPageGetMeta(metapg);
        md->btm_magic = BTREE_MAGIC;
        md->btm_version = BTREE_VERSION;
-       md->btm_root = root;
-       md->btm_level = level;
-       md->btm_fastroot = fastroot;
-       md->btm_fastlevel = fastlevel;
+       md->btm_root = xlrec->root;
+       md->btm_level = xlrec->level;
+       md->btm_fastroot = xlrec->fastroot;
+       md->btm_fastlevel = xlrec->fastlevel;
 
        pageop = (BTPageOpaque) PageGetSpecialPointer(metapg);
        pageop->btpo_flags = BTP_META;
@@ -117,14 +123,12 @@ _bt_restore_meta(RelFileNode rnode, XLogRecPtr lsn,
  * types that can insert a downlink: insert, split, and newroot.
  */
 static void
-_bt_clear_incomplete_split(XLogRecPtr lsn, XLogRecord *record,
-                                                  int block_index,
-                                                  RelFileNode rnode, BlockNumber cblock)
+_bt_clear_incomplete_split(XLogReaderState *record, uint8 block_id)
 {
+       XLogRecPtr      lsn = record->EndRecPtr;
        Buffer          buf;
 
-       if (XLogReadBufferForRedo(lsn, record, block_index, rnode, cblock, &buf)
-               == BLK_NEEDS_REDO)
+       if (XLogReadBufferForRedo(record, block_id, &buf) == BLK_NEEDS_REDO)
        {
                Page            page = (Page) BufferGetPage(buf);
                BTPageOpaque pageop = (BTPageOpaque) PageGetSpecialPointer(page);
@@ -140,38 +144,12 @@ _bt_clear_incomplete_split(XLogRecPtr lsn, XLogRecord *record,
 }
 
 static void
-btree_xlog_insert(bool isleaf, bool ismeta,
-                                 XLogRecPtr lsn, XLogRecord *record)
+btree_xlog_insert(bool isleaf, bool ismeta, XLogReaderState *record)
 {
+       XLogRecPtr      lsn = record->EndRecPtr;
        xl_btree_insert *xlrec = (xl_btree_insert *) XLogRecGetData(record);
        Buffer          buffer;
        Page            page;
-       char       *datapos;
-       int                     datalen;
-       xl_btree_metadata md;
-       BlockNumber cblkno = 0;
-       int                     main_blk_index;
-
-       datapos = (char *) xlrec + SizeOfBtreeInsert;
-       datalen = record->xl_len - SizeOfBtreeInsert;
-
-       /*
-        * if this insert finishes a split at lower level, extract the block
-        * number of the (left) child.
-        */
-       if (!isleaf && (record->xl_info & XLR_BKP_BLOCK(0)) == 0)
-       {
-               memcpy(&cblkno, datapos, sizeof(BlockNumber));
-               Assert(cblkno != 0);
-               datapos += sizeof(BlockNumber);
-               datalen -= sizeof(BlockNumber);
-       }
-       if (ismeta)
-       {
-               memcpy(&md, datapos, sizeof(xl_btree_metadata));
-               datapos += sizeof(xl_btree_metadata);
-               datalen -= sizeof(xl_btree_metadata);
-       }
 
        /*
         * Insertion to an internal page finishes an incomplete split at the child
@@ -183,21 +161,15 @@ btree_xlog_insert(bool isleaf, bool ismeta,
         * cannot be updates happening.
         */
        if (!isleaf)
+               _bt_clear_incomplete_split(record, 1);
+       if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
        {
-               _bt_clear_incomplete_split(lsn, record, 0, xlrec->target.node, cblkno);
-               main_blk_index = 1;
-       }
-       else
-               main_blk_index = 0;
+               Size            datalen;
+               char       *datapos = XLogRecGetBlockData(record, 0, &datalen);
 
-       if (XLogReadBufferForRedo(lsn, record, main_blk_index, xlrec->target.node,
-                                                         ItemPointerGetBlockNumber(&(xlrec->target.tid)),
-                                                         &buffer) == BLK_NEEDS_REDO)
-       {
                page = BufferGetPage(buffer);
 
-               if (PageAddItem(page, (Item) datapos, datalen,
-                                               ItemPointerGetOffsetNumber(&(xlrec->target.tid)),
+               if (PageAddItem(page, (Item) datapos, datalen, xlrec->offnum,
                                                false, false) == InvalidOffsetNumber)
                        elog(PANIC, "btree_insert_redo: failed to add item");
 
@@ -215,15 +187,13 @@ btree_xlog_insert(bool isleaf, bool ismeta,
         * obsolete link from the metapage.
         */
        if (ismeta)
-               _bt_restore_meta(xlrec->target.node, lsn,
-                                                md.root, md.level,
-                                                md.fastroot, md.fastlevel);
+               _bt_restore_meta(record, 2);
 }
 
 static void
-btree_xlog_split(bool onleft, bool isroot,
-                                XLogRecPtr lsn, XLogRecord *record)
+btree_xlog_split(bool onleft, bool isroot, XLogReaderState *record)
 {
+       XLogRecPtr      lsn = record->EndRecPtr;
        xl_btree_split *xlrec = (xl_btree_split *) XLogRecGetData(record);
        bool            isleaf = (xlrec->level == 0);
        Buffer          lbuf;
@@ -231,56 +201,17 @@ btree_xlog_split(bool onleft, bool isroot,
        Page            rpage;
        BTPageOpaque ropaque;
        char       *datapos;
-       int                     datalen;
-       OffsetNumber newitemoff = 0;
-       Item            newitem = NULL;
-       Size            newitemsz = 0;
+       Size            datalen;
        Item            left_hikey = NULL;
        Size            left_hikeysz = 0;
-       BlockNumber cblkno = InvalidBlockNumber;
-
-       datapos = (char *) xlrec + SizeOfBtreeSplit;
-       datalen = record->xl_len - SizeOfBtreeSplit;
-
-       /* Extract newitemoff and newitem, if present */
-       if (onleft)
-       {
-               memcpy(&newitemoff, datapos, sizeof(OffsetNumber));
-               datapos += sizeof(OffsetNumber);
-               datalen -= sizeof(OffsetNumber);
-       }
-       if (onleft && !(record->xl_info & XLR_BKP_BLOCK(0)))
-       {
-               /*
-                * We assume that 16-bit alignment is enough to apply IndexTupleSize
-                * (since it's fetching from a uint16 field) and also enough for
-                * PageAddItem to insert the tuple.
-                */
-               newitem = (Item) datapos;
-               newitemsz = MAXALIGN(IndexTupleSize(newitem));
-               datapos += newitemsz;
-               datalen -= newitemsz;
-       }
-
-       /* Extract left hikey and its size (still assuming 16-bit alignment) */
-       if (!isleaf && !(record->xl_info & XLR_BKP_BLOCK(0)))
-       {
-               left_hikey = (Item) datapos;
-               left_hikeysz = MAXALIGN(IndexTupleSize(left_hikey));
-               datapos += left_hikeysz;
-               datalen -= left_hikeysz;
-       }
+       BlockNumber leftsib;
+       BlockNumber rightsib;
+       BlockNumber rnext;
 
-       /*
-        * If this insertion finishes an incomplete split, get the block number of
-        * the child.
-        */
-       if (!isleaf && !(record->xl_info & XLR_BKP_BLOCK(1)))
-       {
-               memcpy(&cblkno, datapos, sizeof(BlockNumber));
-               datapos += sizeof(BlockNumber);
-               datalen -= sizeof(BlockNumber);
-       }
+       XLogRecGetBlockTag(record, 0, NULL, NULL, &leftsib);
+       XLogRecGetBlockTag(record, 1, NULL, NULL, &rightsib);
+       if (!XLogRecGetBlockTag(record, 2, NULL, NULL, &rnext))
+               rnext = P_NONE;
 
        /*
         * Clear the incomplete split flag on the left sibling of the child page
@@ -288,18 +219,18 @@ btree_xlog_split(bool onleft, bool isroot,
         * before locking the other pages)
         */
        if (!isleaf)
-               _bt_clear_incomplete_split(lsn, record, 1, xlrec->node, cblkno);
+               _bt_clear_incomplete_split(record, 3);
 
        /* Reconstruct right (new) sibling page from scratch */
-       rbuf = XLogReadBuffer(xlrec->node, xlrec->rightsib, true);
-       Assert(BufferIsValid(rbuf));
+       rbuf = XLogInitBufferForRedo(record, 1);
+       datapos = XLogRecGetBlockData(record, 1, &datalen);
        rpage = (Page) BufferGetPage(rbuf);
 
        _bt_pageinit(rpage, BufferGetPageSize(rbuf));
        ropaque = (BTPageOpaque) PageGetSpecialPointer(rpage);
 
-       ropaque->btpo_prev = xlrec->leftsib;
-       ropaque->btpo_next = xlrec->rnext;
+       ropaque->btpo_prev = leftsib;
+       ropaque->btpo_next = rnext;
        ropaque->btpo.level = xlrec->level;
        ropaque->btpo_flags = isleaf ? BTP_LEAF : 0;
        ropaque->btpo_cycleid = 0;
@@ -324,8 +255,7 @@ btree_xlog_split(bool onleft, bool isroot,
        /* don't release the buffer yet; we touch right page's first item below */
 
        /* Now reconstruct left (original) sibling page */
-       if (XLogReadBufferForRedo(lsn, record, 0, xlrec->node, xlrec->leftsib,
-                                                         &lbuf) == BLK_NEEDS_REDO)
+       if (XLogReadBufferForRedo(record, 0, &lbuf) == BLK_NEEDS_REDO)
        {
                /*
                 * To retain the same physical order of the tuples that they had, we
@@ -339,9 +269,31 @@ btree_xlog_split(bool onleft, bool isroot,
                Page            lpage = (Page) BufferGetPage(lbuf);
                BTPageOpaque lopaque = (BTPageOpaque) PageGetSpecialPointer(lpage);
                OffsetNumber off;
+               Item            newitem;
+               Size            newitemsz = 0;
                Page            newlpage;
                OffsetNumber leftoff;
 
+               datapos = XLogRecGetBlockData(record, 0, &datalen);
+
+               if (onleft)
+               {
+                       newitem = (Item) datapos;
+                       newitemsz = MAXALIGN(IndexTupleSize(newitem));
+                       datapos += newitemsz;
+                       datalen -= newitemsz;
+               }
+
+               /* Extract left hikey and its size (assuming 16-bit alignment) */
+               if (!isleaf)
+               {
+                       left_hikey = (Item) datapos;
+                       left_hikeysz = MAXALIGN(IndexTupleSize(left_hikey));
+                       datapos += left_hikeysz;
+                       datalen -= left_hikeysz;
+               }
+               Assert(datalen == 0);
+
                newlpage = PageGetTempPageCopySpecial(lpage);
 
                /* Set high key */
@@ -358,7 +310,7 @@ btree_xlog_split(bool onleft, bool isroot,
                        Item            item;
 
                        /* add the new item if it was inserted on left page */
-                       if (onleft && off == newitemoff)
+                       if (onleft && off == xlrec->newitemoff)
                        {
                                if (PageAddItem(newlpage, newitem, newitemsz, leftoff,
                                                                false, false) == InvalidOffsetNumber)
@@ -376,7 +328,7 @@ btree_xlog_split(bool onleft, bool isroot,
                }
 
                /* cope with possibility that newitem goes at the end */
-               if (onleft && off == newitemoff)
+               if (onleft && off == xlrec->newitemoff)
                {
                        if (PageAddItem(newlpage, newitem, newitemsz, leftoff,
                                                        false, false) == InvalidOffsetNumber)
@@ -390,7 +342,7 @@ btree_xlog_split(bool onleft, bool isroot,
                lopaque->btpo_flags = BTP_INCOMPLETE_SPLIT;
                if (isleaf)
                        lopaque->btpo_flags |= BTP_LEAF;
-               lopaque->btpo_next = xlrec->rightsib;
+               lopaque->btpo_next = rightsib;
                lopaque->btpo_cycleid = 0;
 
                PageSetLSN(lpage, lsn);
@@ -410,22 +362,16 @@ btree_xlog_split(bool onleft, bool isroot,
         * replay, because no other index update can be in progress, and readers
         * will cope properly when following an obsolete left-link.
         */
-       if (xlrec->rnext != P_NONE)
+       if (rnext != P_NONE)
        {
-               /*
-                * the backup block containing right sibling is 1 or 2, depending
-                * whether this was a leaf or internal page.
-                */
-               int                     rnext_index = isleaf ? 1 : 2;
                Buffer          buffer;
 
-               if (XLogReadBufferForRedo(lsn, record, rnext_index, xlrec->node,
-                                                                 xlrec->rnext, &buffer) == BLK_NEEDS_REDO)
+               if (XLogReadBufferForRedo(record, 2, &buffer) == BLK_NEEDS_REDO)
                {
                        Page            page = (Page) BufferGetPage(buffer);
                        BTPageOpaque pageop = (BTPageOpaque) PageGetSpecialPointer(page);
 
-                       pageop->btpo_prev = xlrec->rightsib;
+                       pageop->btpo_prev = rightsib;
 
                        PageSetLSN(page, lsn);
                        MarkBufferDirty(buffer);
@@ -436,8 +382,9 @@ btree_xlog_split(bool onleft, bool isroot,
 }
 
 static void
-btree_xlog_vacuum(XLogRecPtr lsn, XLogRecord *record)
+btree_xlog_vacuum(XLogReaderState *record)
 {
+       XLogRecPtr      lsn = record->EndRecPtr;
        xl_btree_vacuum *xlrec = (xl_btree_vacuum *) XLogRecGetData(record);
        Buffer          buffer;
        Page            page;
@@ -466,9 +413,13 @@ btree_xlog_vacuum(XLogRecPtr lsn, XLogRecord *record)
         */
        if (HotStandbyActiveInReplay())
        {
+               RelFileNode thisrnode;
+               BlockNumber thisblkno;
                BlockNumber blkno;
 
-               for (blkno = xlrec->lastBlockVacuumed + 1; blkno < xlrec->block; blkno++)
+               XLogRecGetBlockTag(record, 0, &thisrnode, NULL, &thisblkno);
+
+               for (blkno = xlrec->lastBlockVacuumed + 1; blkno < thisblkno; blkno++)
                {
                        /*
                         * We use RBM_NORMAL_NO_LOG mode because it's not an error
@@ -483,7 +434,7 @@ btree_xlog_vacuum(XLogRecPtr lsn, XLogRecord *record)
                         * buffer manager we could optimise this so that if the block is
                         * not in shared_buffers we confirm it as unpinned.
                         */
-                       buffer = XLogReadBufferExtended(xlrec->node, MAIN_FORKNUM, blkno,
+                       buffer = XLogReadBufferExtended(thisrnode, MAIN_FORKNUM, blkno,
                                                                                        RBM_NORMAL_NO_LOG);
                        if (BufferIsValid(buffer))
                        {
@@ -497,20 +448,23 @@ btree_xlog_vacuum(XLogRecPtr lsn, XLogRecord *record)
         * Like in btvacuumpage(), we need to take a cleanup lock on every leaf
         * page. See nbtree/README for details.
         */
-       if (XLogReadBufferForRedoExtended(lsn, record, 0,
-                                                                         xlrec->node, MAIN_FORKNUM, xlrec->block,
-                                                                         RBM_NORMAL, true, &buffer)
+       if (XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &buffer)
                == BLK_NEEDS_REDO)
        {
+               char       *ptr;
+               Size            len;
+
+               ptr = XLogRecGetBlockData(record, 0, &len);
+
                page = (Page) BufferGetPage(buffer);
 
-               if (record->xl_len > SizeOfBtreeVacuum)
+               if (len > 0)
                {
                        OffsetNumber *unused;
                        OffsetNumber *unend;
 
-                       unused = (OffsetNumber *) ((char *) xlrec + SizeOfBtreeVacuum);
-                       unend = (OffsetNumber *) ((char *) xlrec + record->xl_len);
+                       unused = (OffsetNumber *) ptr;
+                       unend = (OffsetNumber *) ((char *) ptr + len);
 
                        if ((unend - unused) > 0)
                                PageIndexMultiDelete(page, unused, unend - unused);
@@ -542,13 +496,16 @@ btree_xlog_vacuum(XLogRecPtr lsn, XLogRecord *record)
  * XXX optimise later with something like XLogPrefetchBuffer()
  */
 static TransactionId
-btree_xlog_delete_get_latestRemovedXid(xl_btree_delete *xlrec)
+btree_xlog_delete_get_latestRemovedXid(XLogReaderState *record)
 {
+       xl_btree_delete *xlrec = (xl_btree_delete *) XLogRecGetData(record);
        OffsetNumber *unused;
        Buffer          ibuffer,
                                hbuffer;
        Page            ipage,
                                hpage;
+       RelFileNode rnode;
+       BlockNumber blkno;
        ItemId          iitemid,
                                hitemid;
        IndexTuple      itup;
@@ -588,9 +545,11 @@ btree_xlog_delete_get_latestRemovedXid(xl_btree_delete *xlrec)
         * InvalidTransactionId to cancel all HS transactions.  That's probably
         * overkill, but it's safe, and certainly better than panicking here.
         */
-       ibuffer = XLogReadBuffer(xlrec->node, xlrec->block, false);
+       XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
+       ibuffer = XLogReadBufferExtended(rnode, MAIN_FORKNUM, blkno, RBM_NORMAL);
        if (!BufferIsValid(ibuffer))
                return InvalidTransactionId;
+       LockBuffer(ibuffer, BT_READ);
        ipage = (Page) BufferGetPage(ibuffer);
 
        /*
@@ -611,12 +570,13 @@ btree_xlog_delete_get_latestRemovedXid(xl_btree_delete *xlrec)
                 * Locate the heap page that the index tuple points at
                 */
                hblkno = ItemPointerGetBlockNumber(&(itup->t_tid));
-               hbuffer = XLogReadBuffer(xlrec->hnode, hblkno, false);
+               hbuffer = XLogReadBufferExtended(xlrec->hnode, MAIN_FORKNUM, hblkno, RBM_NORMAL);
                if (!BufferIsValid(hbuffer))
                {
                        UnlockReleaseBuffer(ibuffer);
                        return InvalidTransactionId;
                }
+               LockBuffer(hbuffer, BUFFER_LOCK_SHARE);
                hpage = (Page) BufferGetPage(hbuffer);
 
                /*
@@ -678,8 +638,9 @@ btree_xlog_delete_get_latestRemovedXid(xl_btree_delete *xlrec)
 }
 
 static void
-btree_xlog_delete(XLogRecPtr lsn, XLogRecord *record)
+btree_xlog_delete(XLogReaderState *record)
 {
+       XLogRecPtr      lsn = record->EndRecPtr;
        xl_btree_delete *xlrec = (xl_btree_delete *) XLogRecGetData(record);
        Buffer          buffer;
        Page            page;
@@ -698,21 +659,23 @@ btree_xlog_delete(XLogRecPtr lsn, XLogRecord *record)
         */
        if (InHotStandby)
        {
-               TransactionId latestRemovedXid = btree_xlog_delete_get_latestRemovedXid(xlrec);
+               TransactionId latestRemovedXid = btree_xlog_delete_get_latestRemovedXid(record);
+               RelFileNode rnode;
 
-               ResolveRecoveryConflictWithSnapshot(latestRemovedXid, xlrec->node);
+               XLogRecGetBlockTag(record, 0, &rnode, NULL, NULL);
+
+               ResolveRecoveryConflictWithSnapshot(latestRemovedXid, rnode);
        }
 
        /*
         * We don't need to take a cleanup lock to apply these changes. See
         * nbtree/README for details.
         */
-       if (XLogReadBufferForRedo(lsn, record, 0, xlrec->node, xlrec->block,
-                                                         &buffer) == BLK_NEEDS_REDO)
+       if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
        {
                page = (Page) BufferGetPage(buffer);
 
-               if (record->xl_len > SizeOfBtreeDelete)
+               if (XLogRecGetDataLen(record) > SizeOfBtreeDelete)
                {
                        OffsetNumber *unused;
 
@@ -736,17 +699,15 @@ btree_xlog_delete(XLogRecPtr lsn, XLogRecord *record)
 }
 
 static void
-btree_xlog_mark_page_halfdead(uint8 info, XLogRecPtr lsn, XLogRecord *record)
+btree_xlog_mark_page_halfdead(uint8 info, XLogReaderState *record)
 {
+       XLogRecPtr      lsn = record->EndRecPtr;
        xl_btree_mark_page_halfdead *xlrec = (xl_btree_mark_page_halfdead *) XLogRecGetData(record);
-       BlockNumber parent;
        Buffer          buffer;
        Page            page;
        BTPageOpaque pageop;
        IndexTupleData trunctuple;
 
-       parent = ItemPointerGetBlockNumber(&(xlrec->target.tid));
-
        /*
         * In normal operation, we would lock all the pages this WAL record
         * touches before changing any of them.  In WAL replay, it should be okay
@@ -756,8 +717,7 @@ btree_xlog_mark_page_halfdead(uint8 info, XLogRecPtr lsn, XLogRecord *record)
         */
 
        /* parent page */
-       if (XLogReadBufferForRedo(lsn, record, 0, xlrec->target.node, parent,
-                                                         &buffer) == BLK_NEEDS_REDO)
+       if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO)
        {
                OffsetNumber poffset;
                ItemId          itemid;
@@ -768,7 +728,7 @@ btree_xlog_mark_page_halfdead(uint8 info, XLogRecPtr lsn, XLogRecord *record)
                page = (Page) BufferGetPage(buffer);
                pageop = (BTPageOpaque) PageGetSpecialPointer(page);
 
-               poffset = ItemPointerGetOffsetNumber(&(xlrec->target.tid));
+               poffset = xlrec->poffset;
 
                nextoffset = OffsetNumberNext(poffset);
                itemid = PageGetItemId(page, nextoffset);
@@ -788,8 +748,7 @@ btree_xlog_mark_page_halfdead(uint8 info, XLogRecPtr lsn, XLogRecord *record)
                UnlockReleaseBuffer(buffer);
 
        /* Rewrite the leaf page as a halfdead page */
-       buffer = XLogReadBuffer(xlrec->target.node, xlrec->leafblk, true);
-       Assert(BufferIsValid(buffer));
+       buffer = XLogInitBufferForRedo(record, 0);
        page = (Page) BufferGetPage(buffer);
 
        _bt_pageinit(page, BufferGetPageSize(buffer));
@@ -822,17 +781,16 @@ btree_xlog_mark_page_halfdead(uint8 info, XLogRecPtr lsn, XLogRecord *record)
 
 
 static void
-btree_xlog_unlink_page(uint8 info, XLogRecPtr lsn, XLogRecord *record)
+btree_xlog_unlink_page(uint8 info, XLogReaderState *record)
 {
+       XLogRecPtr      lsn = record->EndRecPtr;
        xl_btree_unlink_page *xlrec = (xl_btree_unlink_page *) XLogRecGetData(record);
-       BlockNumber target;
        BlockNumber leftsib;
        BlockNumber rightsib;
        Buffer          buffer;
        Page            page;
        BTPageOpaque pageop;
 
-       target = xlrec->deadblk;
        leftsib = xlrec->leftsib;
        rightsib = xlrec->rightsib;
 
@@ -845,8 +803,7 @@ btree_xlog_unlink_page(uint8 info, XLogRecPtr lsn, XLogRecord *record)
         */
 
        /* Fix left-link of right sibling */
-       if (XLogReadBufferForRedo(lsn, record, 0, xlrec->node, rightsib, &buffer)
-               == BLK_NEEDS_REDO)
+       if (XLogReadBufferForRedo(record, 2, &buffer) == BLK_NEEDS_REDO)
        {
                page = (Page) BufferGetPage(buffer);
                pageop = (BTPageOpaque) PageGetSpecialPointer(page);
@@ -861,8 +818,7 @@ btree_xlog_unlink_page(uint8 info, XLogRecPtr lsn, XLogRecord *record)
        /* Fix right-link of left sibling, if any */
        if (leftsib != P_NONE)
        {
-               if (XLogReadBufferForRedo(lsn, record, 1, xlrec->node, leftsib, &buffer)
-                       == BLK_NEEDS_REDO)
+               if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO)
                {
                        page = (Page) BufferGetPage(buffer);
                        pageop = (BTPageOpaque) PageGetSpecialPointer(page);
@@ -876,8 +832,7 @@ btree_xlog_unlink_page(uint8 info, XLogRecPtr lsn, XLogRecord *record)
        }
 
        /* Rewrite target page as empty deleted page */
-       buffer = XLogReadBuffer(xlrec->node, target, true);
-       Assert(BufferIsValid(buffer));
+       buffer = XLogInitBufferForRedo(record, 0);
        page = (Page) BufferGetPage(buffer);
 
        _bt_pageinit(page, BufferGetPageSize(buffer));
@@ -898,7 +853,7 @@ btree_xlog_unlink_page(uint8 info, XLogRecPtr lsn, XLogRecord *record)
         * itself, update the leaf to point to the next remaining child in the
         * branch.
         */
-       if (target != xlrec->leafblk)
+       if (XLogRecHasBlockRef(record, 3))
        {
                /*
                 * There is no real data on the page, so we just re-create it from
@@ -906,8 +861,7 @@ btree_xlog_unlink_page(uint8 info, XLogRecPtr lsn, XLogRecord *record)
                 */
                IndexTupleData trunctuple;
 
-               buffer = XLogReadBuffer(xlrec->node, xlrec->leafblk, true);
-               Assert(BufferIsValid(buffer));
+               buffer = XLogInitBufferForRedo(record, 3);
                page = (Page) BufferGetPage(buffer);
                pageop = (BTPageOpaque) PageGetSpecialPointer(page);
 
@@ -936,27 +890,21 @@ btree_xlog_unlink_page(uint8 info, XLogRecPtr lsn, XLogRecord *record)
 
        /* Update metapage if needed */
        if (info == XLOG_BTREE_UNLINK_PAGE_META)
-       {
-               xl_btree_metadata md;
-
-               memcpy(&md, (char *) xlrec + SizeOfBtreeUnlinkPage,
-                          sizeof(xl_btree_metadata));
-               _bt_restore_meta(xlrec->node, lsn,
-                                                md.root, md.level,
-                                                md.fastroot, md.fastlevel);
-       }
+               _bt_restore_meta(record, 4);
 }
 
 static void
-btree_xlog_newroot(XLogRecPtr lsn, XLogRecord *record)
+btree_xlog_newroot(XLogReaderState *record)
 {
+       XLogRecPtr      lsn = record->EndRecPtr;
        xl_btree_newroot *xlrec = (xl_btree_newroot *) XLogRecGetData(record);
        Buffer          buffer;
        Page            page;
        BTPageOpaque pageop;
+       char       *ptr;
+       Size            len;
 
-       buffer = XLogReadBuffer(xlrec->node, xlrec->rootblk, true);
-       Assert(BufferIsValid(buffer));
+       buffer = XLogInitBufferForRedo(record, 0);
        page = (Page) BufferGetPage(buffer);
 
        _bt_pageinit(page, BufferGetPageSize(buffer));
@@ -969,34 +917,24 @@ btree_xlog_newroot(XLogRecPtr lsn, XLogRecord *record)
                pageop->btpo_flags |= BTP_LEAF;
        pageop->btpo_cycleid = 0;
 
-       if (record->xl_len > SizeOfBtreeNewroot)
+       if (xlrec->level > 0)
        {
-               IndexTuple      itup;
-               BlockNumber cblkno;
-
-               _bt_restore_page(page,
-                                                (char *) xlrec + SizeOfBtreeNewroot,
-                                                record->xl_len - SizeOfBtreeNewroot);
-               /* extract block number of the left-hand split page */
-               itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, P_HIKEY));
-               cblkno = ItemPointerGetBlockNumber(&(itup->t_tid));
-               Assert(ItemPointerGetOffsetNumber(&(itup->t_tid)) == P_HIKEY);
+               ptr = XLogRecGetBlockData(record, 0, &len);
+               _bt_restore_page(page, ptr, len);
 
                /* Clear the incomplete-split flag in left child */
-               _bt_clear_incomplete_split(lsn, record, 0, xlrec->node, cblkno);
+               _bt_clear_incomplete_split(record, 1);
        }
 
        PageSetLSN(page, lsn);
        MarkBufferDirty(buffer);
        UnlockReleaseBuffer(buffer);
 
-       _bt_restore_meta(xlrec->node, lsn,
-                                        xlrec->rootblk, xlrec->level,
-                                        xlrec->rootblk, xlrec->level);
+       _bt_restore_meta(record, 2);
 }
 
 static void
-btree_xlog_reuse_page(XLogRecPtr lsn, XLogRecord *record)
+btree_xlog_reuse_page(XLogReaderState *record)
 {
        xl_btree_reuse_page *xlrec = (xl_btree_reuse_page *) XLogRecGetData(record);
 
@@ -1015,58 +953,55 @@ btree_xlog_reuse_page(XLogRecPtr lsn, XLogRecord *record)
                ResolveRecoveryConflictWithSnapshot(xlrec->latestRemovedXid,
                                                                                        xlrec->node);
        }
-
-       /* Backup blocks are not used in reuse_page records */
-       Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
 }
 
 
 void
-btree_redo(XLogRecPtr lsn, XLogRecord *record)
+btree_redo(XLogReaderState *record)
 {
-       uint8           info = record->xl_info & ~XLR_INFO_MASK;
+       uint8           info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
 
        switch (info)
        {
                case XLOG_BTREE_INSERT_LEAF:
-                       btree_xlog_insert(true, false, lsn, record);
+                       btree_xlog_insert(true, false, record);
                        break;
                case XLOG_BTREE_INSERT_UPPER:
-                       btree_xlog_insert(false, false, lsn, record);
+                       btree_xlog_insert(false, false, record);
                        break;
                case XLOG_BTREE_INSERT_META:
-                       btree_xlog_insert(false, true, lsn, record);
+                       btree_xlog_insert(false, true, record);
                        break;
                case XLOG_BTREE_SPLIT_L:
-                       btree_xlog_split(true, false, lsn, record);
+                       btree_xlog_split(true, false, record);
                        break;
                case XLOG_BTREE_SPLIT_R:
-                       btree_xlog_split(false, false, lsn, record);
+                       btree_xlog_split(false, false, record);
                        break;
                case XLOG_BTREE_SPLIT_L_ROOT:
-                       btree_xlog_split(true, true, lsn, record);
+                       btree_xlog_split(true, true, record);
                        break;
                case XLOG_BTREE_SPLIT_R_ROOT:
-                       btree_xlog_split(false, true, lsn, record);
+                       btree_xlog_split(false, true, record);
                        break;
                case XLOG_BTREE_VACUUM:
-                       btree_xlog_vacuum(lsn, record);
+                       btree_xlog_vacuum(record);
                        break;
                case XLOG_BTREE_DELETE:
-                       btree_xlog_delete(lsn, record);
+                       btree_xlog_delete(record);
                        break;
                case XLOG_BTREE_MARK_PAGE_HALFDEAD:
-                       btree_xlog_mark_page_halfdead(info, lsn, record);
+                       btree_xlog_mark_page_halfdead(info, record);
                        break;
                case XLOG_BTREE_UNLINK_PAGE:
                case XLOG_BTREE_UNLINK_PAGE_META:
-                       btree_xlog_unlink_page(info, lsn, record);
+                       btree_xlog_unlink_page(info, record);
                        break;
                case XLOG_BTREE_NEWROOT:
-                       btree_xlog_newroot(lsn, record);
+                       btree_xlog_newroot(record);
                        break;
                case XLOG_BTREE_REUSE_PAGE:
-                       btree_xlog_reuse_page(lsn, record);
+                       btree_xlog_reuse_page(record);
                        break;
                default:
                        elog(PANIC, "btree_redo: unknown op code %u", info);
index 97dc3c0fa9115290db63ae412fd047d2601c6596..6cda6f8ffd968c6dfeb9b9f31205bd5c582905d7 100644 (file)
 #include "access/brin_xlog.h"
 
 void
-brin_desc(StringInfo buf, XLogRecord *record)
+brin_desc(StringInfo buf, XLogReaderState *record)
 {
        char       *rec = XLogRecGetData(record);
-       uint8           info = record->xl_info & ~XLR_INFO_MASK;
+       uint8           info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
 
        info &= XLOG_BRIN_OPMASK;
        if (info == XLOG_BRIN_CREATE_INDEX)
        {
                xl_brin_createidx *xlrec = (xl_brin_createidx *) rec;
 
-               appendStringInfo(buf, "v%d pagesPerRange %u rel %u/%u/%u",
-                                                xlrec->version, xlrec->pagesPerRange,
-                                                xlrec->node.spcNode, xlrec->node.dbNode,
-                                                xlrec->node.relNode);
+               appendStringInfo(buf, "v%d pagesPerRange %u",
+                                                xlrec->version, xlrec->pagesPerRange);
        }
        else if (info == XLOG_BRIN_INSERT)
        {
                xl_brin_insert *xlrec = (xl_brin_insert *) rec;
 
-               appendStringInfo(buf, "rel %u/%u/%u heapBlk %u revmapBlk %u pagesPerRange %u TID (%u,%u)",
-                                                xlrec->node.spcNode, xlrec->node.dbNode,
-                                                xlrec->node.relNode,
-                                                xlrec->heapBlk, xlrec->revmapBlk,
+               appendStringInfo(buf, "heapBlk %u pagesPerRange %u offnum %u",
+                                                xlrec->heapBlk,
                                                 xlrec->pagesPerRange,
-                                                ItemPointerGetBlockNumber(&xlrec->tid),
-                                                ItemPointerGetOffsetNumber(&xlrec->tid));
+                                                xlrec->offnum);
        }
        else if (info == XLOG_BRIN_UPDATE)
        {
                xl_brin_update *xlrec = (xl_brin_update *) rec;
 
-               appendStringInfo(buf, "rel %u/%u/%u heapBlk %u revmapBlk %u pagesPerRange %u old TID (%u,%u) TID (%u,%u)",
-                                                xlrec->insert.node.spcNode, xlrec->insert.node.dbNode,
-                                                xlrec->insert.node.relNode,
-                                                xlrec->insert.heapBlk, xlrec->insert.revmapBlk,
+               appendStringInfo(buf, "heapBlk %u pagesPerRange %u old offnum %u, new offnum %u",
+                                                xlrec->insert.heapBlk,
                                                 xlrec->insert.pagesPerRange,
-                                                ItemPointerGetBlockNumber(&xlrec->oldtid),
-                                                ItemPointerGetOffsetNumber(&xlrec->oldtid),
-                                                ItemPointerGetBlockNumber(&xlrec->insert.tid),
-                                                ItemPointerGetOffsetNumber(&xlrec->insert.tid));
+                                                xlrec->oldOffnum,
+                                                xlrec->insert.offnum);
        }
        else if (info == XLOG_BRIN_SAMEPAGE_UPDATE)
        {
                xl_brin_samepage_update *xlrec = (xl_brin_samepage_update *) rec;
 
-               appendStringInfo(buf, "rel %u/%u/%u TID (%u,%u)",
-                                                xlrec->node.spcNode, xlrec->node.dbNode,
-                                                xlrec->node.relNode,
-                                                ItemPointerGetBlockNumber(&xlrec->tid),
-                                                ItemPointerGetOffsetNumber(&xlrec->tid));
+               appendStringInfo(buf, "offnum %u", xlrec->offnum);
        }
        else if (info == XLOG_BRIN_REVMAP_EXTEND)
        {
                xl_brin_revmap_extend *xlrec = (xl_brin_revmap_extend *) rec;
 
-               appendStringInfo(buf, "rel %u/%u/%u targetBlk %u",
-                                                xlrec->node.spcNode, xlrec->node.dbNode,
-                                                xlrec->node.relNode, xlrec->targetBlk);
+               appendStringInfo(buf, "targetBlk %u", xlrec->targetBlk);
        }
 }
 
index 4a12e286e4aafc9918fc9d7800ae90d13161f1e3..8de72963e6f66dd01289c64b42a7d854281766a9 100644 (file)
 
 
 void
-clog_desc(StringInfo buf, XLogRecord *record)
+clog_desc(StringInfo buf, XLogReaderState *record)
 {
        char       *rec = XLogRecGetData(record);
-       uint8           info = record->xl_info & ~XLR_INFO_MASK;
+       uint8           info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
 
        if (info == CLOG_ZEROPAGE || info == CLOG_TRUNCATE)
        {
index 446e5f97f41438e170488fc487cfda895ef38802..ee1d83baa4cdf3a3f6b0120f3afaa8d060df8780 100644 (file)
 
 
 void
-dbase_desc(StringInfo buf, XLogRecord *record)
+dbase_desc(StringInfo buf, XLogReaderState *record)
 {
        char       *rec = XLogRecGetData(record);
-       uint8           info = record->xl_info & ~XLR_INFO_MASK;
+       uint8           info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
 
        if (info == XLOG_DBASE_CREATE)
        {
index 2f783cee2bbd77477f274d91103d9d1a0dd418af..8754214f644288216b8a598704b5340eebcf3d95 100644 (file)
 #include "postgres.h"
 
 #include "access/gin_private.h"
+#include "access/xlogutils.h"
 #include "lib/stringinfo.h"
 #include "storage/relfilenode.h"
 
-static void
-desc_node(StringInfo buf, RelFileNode node, BlockNumber blkno)
-{
-       appendStringInfo(buf, "node: %u/%u/%u blkno: %u",
-                                        node.spcNode, node.dbNode, node.relNode, blkno);
-}
-
 static void
 desc_recompress_leaf(StringInfo buf, ginxlogRecompressDataLeaf *insertData)
 {
@@ -77,26 +71,25 @@ desc_recompress_leaf(StringInfo buf, ginxlogRecompressDataLeaf *insertData)
 }
 
 void
-gin_desc(StringInfo buf, XLogRecord *record)
+gin_desc(StringInfo buf, XLogReaderState *record)
 {
        char       *rec = XLogRecGetData(record);
-       uint8           info = record->xl_info & ~XLR_INFO_MASK;
+       uint8           info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
 
        switch (info)
        {
                case XLOG_GIN_CREATE_INDEX:
-                       desc_node(buf, *(RelFileNode *) rec, GIN_ROOT_BLKNO);
+                       /* no further information */
                        break;
                case XLOG_GIN_CREATE_PTREE:
-                       desc_node(buf, ((ginxlogCreatePostingTree *) rec)->node, ((ginxlogCreatePostingTree *) rec)->blkno);
+                       /* no further information */
                        break;
                case XLOG_GIN_INSERT:
                        {
                                ginxlogInsert *xlrec = (ginxlogInsert *) rec;
                                char       *payload = rec + sizeof(ginxlogInsert);
 
-                               desc_node(buf, xlrec->node, xlrec->blkno);
-                               appendStringInfo(buf, " isdata: %c isleaf: %c",
+                               appendStringInfo(buf, "isdata: %c isleaf: %c",
                                                          (xlrec->flags & GIN_INSERT_ISDATA) ? 'T' : 'F',
                                                         (xlrec->flags & GIN_INSERT_ISLEAF) ? 'T' : 'F');
                                if (!(xlrec->flags & GIN_INSERT_ISLEAF))
@@ -119,7 +112,7 @@ gin_desc(StringInfo buf, XLogRecord *record)
                                        ginxlogRecompressDataLeaf *insertData =
                                        (ginxlogRecompressDataLeaf *) payload;
 
-                                       if (record->xl_info & XLR_BKP_BLOCK(0))
+                                       if (XLogRecHasBlockImage(record, 0))
                                                appendStringInfo(buf, " (full page image)");
                                        else
                                                desc_recompress_leaf(buf, insertData);
@@ -139,39 +132,38 @@ gin_desc(StringInfo buf, XLogRecord *record)
                        {
                                ginxlogSplit *xlrec = (ginxlogSplit *) rec;
 
-                               desc_node(buf, ((ginxlogSplit *) rec)->node, ((ginxlogSplit *) rec)->lblkno);
-                               appendStringInfo(buf, " isrootsplit: %c", (((ginxlogSplit *) rec)->flags & GIN_SPLIT_ROOT) ? 'T' : 'F');
+                               appendStringInfo(buf, "isrootsplit: %c",
+                               (((ginxlogSplit *) rec)->flags & GIN_SPLIT_ROOT) ? 'T' : 'F');
                                appendStringInfo(buf, " isdata: %c isleaf: %c",
                                                          (xlrec->flags & GIN_INSERT_ISDATA) ? 'T' : 'F',
                                                         (xlrec->flags & GIN_INSERT_ISLEAF) ? 'T' : 'F');
                        }
                        break;
                case XLOG_GIN_VACUUM_PAGE:
-                       desc_node(buf, ((ginxlogVacuumPage *) rec)->node, ((ginxlogVacuumPage *) rec)->blkno);
+                       /* no further information */
                        break;
                case XLOG_GIN_VACUUM_DATA_LEAF_PAGE:
                        {
                                ginxlogVacuumDataLeafPage *xlrec = (ginxlogVacuumDataLeafPage *) rec;
 
-                               desc_node(buf, xlrec->node, xlrec->blkno);
-                               if (record->xl_info & XLR_BKP_BLOCK(0))
+                               if (XLogRecHasBlockImage(record, 0))
                                        appendStringInfo(buf, " (full page image)");
                                else
                                        desc_recompress_leaf(buf, &xlrec->data);
                        }
                        break;
                case XLOG_GIN_DELETE_PAGE:
-                       desc_node(buf, ((ginxlogDeletePage *) rec)->node, ((ginxlogDeletePage *) rec)->blkno);
+                       /* no further information */
                        break;
                case XLOG_GIN_UPDATE_META_PAGE:
-                       desc_node(buf, ((ginxlogUpdateMeta *) rec)->node, GIN_METAPAGE_BLKNO);
+                       /* no further information */
                        break;
                case XLOG_GIN_INSERT_LISTPAGE:
-                       desc_node(buf, ((ginxlogInsertListPage *) rec)->node, ((ginxlogInsertListPage *) rec)->blkno);
+                       /* no further information */
                        break;
                case XLOG_GIN_DELETE_LISTPAGE:
-                       appendStringInfo(buf, "%d pages, ", ((ginxlogDeleteListPages *) rec)->ndeleted);
-                       desc_node(buf, ((ginxlogDeleteListPages *) rec)->node, GIN_METAPAGE_BLKNO);
+                       appendStringInfo(buf, "ndeleted: %d",
+                                                        ((ginxlogDeleteListPages *) rec)->ndeleted);
                        break;
        }
 }
index db3ba13ccdd0bcf0f37ee29a83e597ae126d4720..576c644c2ac758325a287df50c4bb19849e94c81 100644 (file)
 #include "lib/stringinfo.h"
 #include "storage/relfilenode.h"
 
-static void
-out_target(StringInfo buf, RelFileNode node)
-{
-       appendStringInfo(buf, "rel %u/%u/%u",
-                                        node.spcNode, node.dbNode, node.relNode);
-}
-
 static void
 out_gistxlogPageUpdate(StringInfo buf, gistxlogPageUpdate *xlrec)
 {
-       out_target(buf, xlrec->node);
-       appendStringInfo(buf, "; block number %u", xlrec->blkno);
 }
 
 static void
 out_gistxlogPageSplit(StringInfo buf, gistxlogPageSplit *xlrec)
 {
-       appendStringInfoString(buf, "page_split: ");
-       out_target(buf, xlrec->node);
-       appendStringInfo(buf, "; block number %u splits to %d pages",
-                                        xlrec->origblkno, xlrec->npage);
+       appendStringInfo(buf, "page_split: splits to %d pages",
+                                        xlrec->npage);
 }
 
 void
-gist_desc(StringInfo buf, XLogRecord *record)
+gist_desc(StringInfo buf, XLogReaderState *record)
 {
        char       *rec = XLogRecGetData(record);
-       uint8           info = record->xl_info & ~XLR_INFO_MASK;
+       uint8           info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
 
        switch (info)
        {
@@ -56,10 +45,6 @@ gist_desc(StringInfo buf, XLogRecord *record)
                        out_gistxlogPageSplit(buf, (gistxlogPageSplit *) rec);
                        break;
                case XLOG_GIST_CREATE_INDEX:
-                       appendStringInfo(buf, "rel %u/%u/%u",
-                                                        ((RelFileNode *) rec)->spcNode,
-                                                        ((RelFileNode *) rec)->dbNode,
-                                                        ((RelFileNode *) rec)->relNode);
                        break;
        }
 }
index c58461c6ffc1a1593daeb710d5dd1431d22b68cf..71afaa9cbd6ff2e8a6af994aa8a06db2847d15a9 100644 (file)
@@ -17,7 +17,7 @@
 #include "access/hash.h"
 
 void
-hash_desc(StringInfo buf, XLogRecord *record)
+hash_desc(StringInfo buf, XLogReaderState *record)
 {
 }
 
index ee2c073f71f73564f113f7da8166063cba2db48d..958b0b0e85cd6443bf143d07f416df5a02a52cd6 100644 (file)
 
 #include "access/heapam_xlog.h"
 
-static void
-out_target(StringInfo buf, xl_heaptid *target)
-{
-       appendStringInfo(buf, "rel %u/%u/%u; tid %u/%u",
-                        target->node.spcNode, target->node.dbNode, target->node.relNode,
-                                        ItemPointerGetBlockNumber(&(target->tid)),
-                                        ItemPointerGetOffsetNumber(&(target->tid)));
-}
-
 static void
 out_infobits(StringInfo buf, uint8 infobits)
 {
@@ -41,23 +32,23 @@ out_infobits(StringInfo buf, uint8 infobits)
 }
 
 void
-heap_desc(StringInfo buf, XLogRecord *record)
+heap_desc(StringInfo buf, XLogReaderState *record)
 {
        char       *rec = XLogRecGetData(record);
-       uint8           info = record->xl_info & ~XLR_INFO_MASK;
+       uint8           info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
 
        info &= XLOG_HEAP_OPMASK;
        if (info == XLOG_HEAP_INSERT)
        {
                xl_heap_insert *xlrec = (xl_heap_insert *) rec;
 
-               out_target(buf, &(xlrec->target));
+               appendStringInfo(buf, "off %u", xlrec->offnum);
        }
        else if (info == XLOG_HEAP_DELETE)
        {
                xl_heap_delete *xlrec = (xl_heap_delete *) rec;
 
-               out_target(buf, &(xlrec->target));
+               appendStringInfo(buf, "off %u", xlrec->offnum);
                appendStringInfoChar(buf, ' ');
                out_infobits(buf, xlrec->infobits_set);
        }
@@ -65,24 +56,24 @@ heap_desc(StringInfo buf, XLogRecord *record)
        {
                xl_heap_update *xlrec = (xl_heap_update *) rec;
 
-               out_target(buf, &(xlrec->target));
-               appendStringInfo(buf, " xmax %u ", xlrec->old_xmax);
+               appendStringInfo(buf, "off %u xmax %u",
+                                                xlrec->old_offnum,
+                                                xlrec->old_xmax);
                out_infobits(buf, xlrec->old_infobits_set);
-               appendStringInfo(buf, "; new tid %u/%u xmax %u",
-                                                ItemPointerGetBlockNumber(&(xlrec->newtid)),
-                                                ItemPointerGetOffsetNumber(&(xlrec->newtid)),
+               appendStringInfo(buf, "; new off %u xmax %u",
+                                                xlrec->new_offnum,
                                                 xlrec->new_xmax);
        }
        else if (info == XLOG_HEAP_HOT_UPDATE)
        {
                xl_heap_update *xlrec = (xl_heap_update *) rec;
 
-               out_target(buf, &(xlrec->target));
-               appendStringInfo(buf, " xmax %u ", xlrec->old_xmax);
+               appendStringInfo(buf, "off %u xmax %u",
+                                                xlrec->old_offnum,
+                                                xlrec->old_xmax);
                out_infobits(buf, xlrec->old_infobits_set);
-               appendStringInfo(buf, "; new tid %u/%u xmax %u",
-                                                ItemPointerGetBlockNumber(&(xlrec->newtid)),
-                                                ItemPointerGetOffsetNumber(&(xlrec->newtid)),
+               appendStringInfo(buf, "; new off %u xmax %u",
+                                                xlrec->new_offnum,
                                                 xlrec->new_xmax);
        }
        else if (info == XLOG_HEAP_LOCK)
@@ -90,40 +81,34 @@ heap_desc(StringInfo buf, XLogRecord *record)
                xl_heap_lock *xlrec = (xl_heap_lock *) rec;
 
                appendStringInfo(buf, "xid %u: ", xlrec->locking_xid);
-               out_target(buf, &(xlrec->target));
-               appendStringInfoChar(buf, ' ');
+               appendStringInfo(buf, "off %u ", xlrec->offnum);
                out_infobits(buf, xlrec->infobits_set);
        }
        else if (info == XLOG_HEAP_INPLACE)
        {
                xl_heap_inplace *xlrec = (xl_heap_inplace *) rec;
 
-               out_target(buf, &(xlrec->target));
+               appendStringInfo(buf, "off %u", xlrec->offnum);
        }
 }
 void
-heap2_desc(StringInfo buf, XLogRecord *record)
+heap2_desc(StringInfo buf, XLogReaderState *record)
 {
        char       *rec = XLogRecGetData(record);
-       uint8           info = record->xl_info & ~XLR_INFO_MASK;
+       uint8           info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
 
        info &= XLOG_HEAP_OPMASK;
        if (info == XLOG_HEAP2_CLEAN)
        {
                xl_heap_clean *xlrec = (xl_heap_clean *) rec;
 
-               appendStringInfo(buf, "rel %u/%u/%u; blk %u remxid %u",
-                                                xlrec->node.spcNode, xlrec->node.dbNode,
-                                                xlrec->node.relNode, xlrec->block,
-                                                xlrec->latestRemovedXid);
+               appendStringInfo(buf, "remxid %u", xlrec->latestRemovedXid);
        }
        else if (info == XLOG_HEAP2_FREEZE_PAGE)
        {
                xl_heap_freeze_page *xlrec = (xl_heap_freeze_page *) rec;
 
-               appendStringInfo(buf, "rel %u/%u/%u; blk %u; cutoff xid %u ntuples %u",
-                                                xlrec->node.spcNode, xlrec->node.dbNode,
-                                                xlrec->node.relNode, xlrec->block,
+               appendStringInfo(buf, "cutoff xid %u ntuples %u",
                                                 xlrec->cutoff_xid, xlrec->ntuples);
        }
        else if (info == XLOG_HEAP2_CLEANUP_INFO)
@@ -136,17 +121,13 @@ heap2_desc(StringInfo buf, XLogRecord *record)
        {
                xl_heap_visible *xlrec = (xl_heap_visible *) rec;
 
-               appendStringInfo(buf, "rel %u/%u/%u; blk %u",
-                                                xlrec->node.spcNode, xlrec->node.dbNode,
-                                                xlrec->node.relNode, xlrec->block);
+               appendStringInfo(buf, "cutoff xid %u", xlrec->cutoff_xid);
        }
        else if (info == XLOG_HEAP2_MULTI_INSERT)
        {
                xl_heap_multi_insert *xlrec = (xl_heap_multi_insert *) rec;
 
-               appendStringInfo(buf, "rel %u/%u/%u; blk %u; %d tuples",
-                               xlrec->node.spcNode, xlrec->node.dbNode, xlrec->node.relNode,
-                                                xlrec->blkno, xlrec->ntuples);
+               appendStringInfo(buf, "%d tuples", xlrec->ntuples);
        }
        else if (info == XLOG_HEAP2_LOCK_UPDATED)
        {
@@ -154,13 +135,18 @@ heap2_desc(StringInfo buf, XLogRecord *record)
 
                appendStringInfo(buf, "xmax %u msk %04x; ", xlrec->xmax,
                                                 xlrec->infobits_set);
-               out_target(buf, &(xlrec->target));
+               appendStringInfo(buf, "off %u", xlrec->offnum);
        }
        else if (info == XLOG_HEAP2_NEW_CID)
        {
                xl_heap_new_cid *xlrec = (xl_heap_new_cid *) rec;
 
-               out_target(buf, &(xlrec->target));
+               appendStringInfo(buf, "rel %u/%u/%u; tid %u/%u",
+                                                xlrec->target_node.spcNode,
+                                                xlrec->target_node.dbNode,
+                                                xlrec->target_node.relNode,
+                                                ItemPointerGetBlockNumber(&(xlrec->target_tid)),
+                                                ItemPointerGetOffsetNumber(&(xlrec->target_tid)));
                appendStringInfo(buf, "; cmin: %u, cmax: %u, combo: %u",
                                                 xlrec->cmin, xlrec->cmax, xlrec->combocid);
        }
index afc5aca1972e5a84e4269e8ad2110d1bf58d633e..0902cb73c6a2102a656155c7a9cb88f7d735a413 100644 (file)
@@ -47,10 +47,10 @@ out_member(StringInfo buf, MultiXactMember *member)
 }
 
 void
-multixact_desc(StringInfo buf, XLogRecord *record)
+multixact_desc(StringInfo buf, XLogReaderState *record)
 {
        char       *rec = XLogRecGetData(record);
-       uint8           info = record->xl_info & ~XLR_INFO_MASK;
+       uint8           info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
 
        if (info == XLOG_MULTIXACT_ZERO_OFF_PAGE ||
                info == XLOG_MULTIXACT_ZERO_MEM_PAGE)
index 8b63f2b6ba9fbc2570456dfc860648fb1acb8d79..85795f6409de68095adc05d1fedcc5daa4599230 100644 (file)
 
 #include "access/nbtree.h"
 
-static void
-out_target(StringInfo buf, xl_btreetid *target)
-{
-       appendStringInfo(buf, "rel %u/%u/%u; tid %u/%u",
-                        target->node.spcNode, target->node.dbNode, target->node.relNode,
-                                        ItemPointerGetBlockNumber(&(target->tid)),
-                                        ItemPointerGetOffsetNumber(&(target->tid)));
-}
-
 void
-btree_desc(StringInfo buf, XLogRecord *record)
+btree_desc(StringInfo buf, XLogReaderState *record)
 {
        char       *rec = XLogRecGetData(record);
-       uint8           info = record->xl_info & ~XLR_INFO_MASK;
+       uint8           info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
 
        switch (info)
        {
@@ -39,7 +30,7 @@ btree_desc(StringInfo buf, XLogRecord *record)
                        {
                                xl_btree_insert *xlrec = (xl_btree_insert *) rec;
 
-                               out_target(buf, &(xlrec->target));
+                               appendStringInfo(buf, "off %u", xlrec->offnum);
                                break;
                        }
                case XLOG_BTREE_SPLIT_L:
@@ -49,11 +40,7 @@ btree_desc(StringInfo buf, XLogRecord *record)
                        {
                                xl_btree_split *xlrec = (xl_btree_split *) rec;
 
-                               appendStringInfo(buf, "rel %u/%u/%u ",
-                                                                xlrec->node.spcNode, xlrec->node.dbNode,
-                                                                xlrec->node.relNode);
-                               appendStringInfo(buf, "left %u, right %u, next %u, level %u, firstright %d",
-                                                                xlrec->leftsib, xlrec->rightsib, xlrec->rnext,
+                               appendStringInfo(buf, "level %u, firstright %d",
                                                                 xlrec->level, xlrec->firstright);
                                break;
                        }
@@ -61,9 +48,7 @@ btree_desc(StringInfo buf, XLogRecord *record)
                        {
                                xl_btree_vacuum *xlrec = (xl_btree_vacuum *) rec;
 
-                               appendStringInfo(buf, "rel %u/%u/%u; blk %u, lastBlockVacuumed %u",
-                                                                xlrec->node.spcNode, xlrec->node.dbNode,
-                                                                xlrec->node.relNode, xlrec->block,
+                               appendStringInfo(buf, "lastBlockVacuumed %u",
                                                                 xlrec->lastBlockVacuumed);
                                break;
                        }
@@ -71,18 +56,14 @@ btree_desc(StringInfo buf, XLogRecord *record)
                        {
                                xl_btree_delete *xlrec = (xl_btree_delete *) rec;
 
-                               appendStringInfo(buf, "index %u/%u/%u; iblk %u, heap %u/%u/%u;",
-                                                                xlrec->node.spcNode, xlrec->node.dbNode, xlrec->node.relNode,
-                                                                xlrec->block,
-                                                                xlrec->hnode.spcNode, xlrec->hnode.dbNode, xlrec->hnode.relNode);
+                               appendStringInfo(buf, "%d items", xlrec->nitems);
                                break;
                        }
                case XLOG_BTREE_MARK_PAGE_HALFDEAD:
                        {
                                xl_btree_mark_page_halfdead *xlrec = (xl_btree_mark_page_halfdead *) rec;
 
-                               out_target(buf, &(xlrec->target));
-                               appendStringInfo(buf, "; topparent %u; leaf %u; left %u; right %u",
+                               appendStringInfo(buf, "topparent %u; leaf %u; left %u; right %u",
                                                                 xlrec->topparent, xlrec->leafblk, xlrec->leftblk, xlrec->rightblk);
                                break;
                        }
@@ -91,22 +72,19 @@ btree_desc(StringInfo buf, XLogRecord *record)
                        {
                                xl_btree_unlink_page *xlrec = (xl_btree_unlink_page *) rec;
 
-                               appendStringInfo(buf, "rel %u/%u/%u; ",
-                                                                xlrec->node.spcNode, xlrec->node.dbNode, xlrec->node.relNode);
-                               appendStringInfo(buf, "dead %u; left %u; right %u; btpo_xact %u; ",
-                                                                xlrec->deadblk, xlrec->leftsib, xlrec->rightsib, xlrec->btpo_xact);
-                               appendStringInfo(buf, "leaf %u; leafleft %u; leafright %u; topparent %u",
-                                                                xlrec->leafblk, xlrec->leafleftsib, xlrec->leafrightsib, xlrec->topparent);
+                               appendStringInfo(buf, "left %u; right %u; btpo_xact %u; ",
+                                                                xlrec->leftsib, xlrec->rightsib,
+                                                                xlrec->btpo_xact);
+                               appendStringInfo(buf, "leafleft %u; leafright %u; topparent %u",
+                                                                xlrec->leafleftsib, xlrec->leafrightsib,
+                                                                xlrec->topparent);
                                break;
                        }
                case XLOG_BTREE_NEWROOT:
                        {
                                xl_btree_newroot *xlrec = (xl_btree_newroot *) rec;
 
-                               appendStringInfo(buf, "rel %u/%u/%u; root %u lev %u",
-                                                                xlrec->node.spcNode, xlrec->node.dbNode,
-                                                                xlrec->node.relNode,
-                                                                xlrec->rootblk, xlrec->level);
+                               appendStringInfo(buf, "lev %u", xlrec->level);
                                break;
                        }
                case XLOG_BTREE_REUSE_PAGE:
@@ -115,7 +93,7 @@ btree_desc(StringInfo buf, XLogRecord *record)
 
                                appendStringInfo(buf, "rel %u/%u/%u; latestRemovedXid %u",
                                                                 xlrec->node.spcNode, xlrec->node.dbNode,
-                                                                xlrec->node.relNode, xlrec->latestRemovedXid);
+                                                          xlrec->node.relNode, xlrec->latestRemovedXid);
                                break;
                        }
        }
index ef7c533fe5f05a56c13ace5339476115078f6920..5bda1da25c408778cd2824f03aa27f2b4b508bbf 100644 (file)
 #include "utils/relmapper.h"
 
 void
-relmap_desc(StringInfo buf, XLogRecord *record)
+relmap_desc(StringInfo buf, XLogReaderState *record)
 {
        char       *rec = XLogRecGetData(record);
-       uint8           info = record->xl_info & ~XLR_INFO_MASK;
+       uint8           info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
 
        if (info == XLOG_RELMAP_UPDATE)
        {
index 73de3969df47cd03bd38ec43953fc3d9876f2797..b8da96310cb99cd44386fd7a496bda6a78b720b0 100644 (file)
 
 
 void
-seq_desc(StringInfo buf, XLogRecord *record)
+seq_desc(StringInfo buf, XLogReaderState *record)
 {
        char       *rec = XLogRecGetData(record);
-       uint8           info = record->xl_info & ~XLR_INFO_MASK;
+       uint8           info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
        xl_seq_rec *xlrec = (xl_seq_rec *) rec;
 
        if (info == XLOG_SEQ_LOG)
index 109e3eaf04df81afb2de23f3ae49458464473c45..4e8c06f5b90820ab2f469dbdb5810a3fbe525119 100644 (file)
 
 
 void
-smgr_desc(StringInfo buf, XLogRecord *record)
+smgr_desc(StringInfo buf, XLogReaderState *record)
 {
        char       *rec = XLogRecGetData(record);
-       uint8           info = record->xl_info & ~XLR_INFO_MASK;
+       uint8           info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
 
        if (info == XLOG_SMGR_CREATE)
        {
index 3ee0427dcb6a798a237ae411ac1230a59fa0ce69..319c5f9d709be3a0c411ee6d63a40dcb7cdd0349 100644 (file)
 
 #include "access/spgist_private.h"
 
-static void
-out_target(StringInfo buf, RelFileNode node)
-{
-       appendStringInfo(buf, "rel %u/%u/%u ",
-                                        node.spcNode, node.dbNode, node.relNode);
-}
-
 void
-spg_desc(StringInfo buf, XLogRecord *record)
+spg_desc(StringInfo buf, XLogReaderState *record)
 {
        char       *rec = XLogRecGetData(record);
-       uint8           info = record->xl_info & ~XLR_INFO_MASK;
+       uint8           info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
 
        switch (info)
        {
                case XLOG_SPGIST_CREATE_INDEX:
-                       appendStringInfo(buf, "rel %u/%u/%u",
-                                                        ((RelFileNode *) rec)->spcNode,
-                                                        ((RelFileNode *) rec)->dbNode,
-                                                        ((RelFileNode *) rec)->relNode);
                        break;
                case XLOG_SPGIST_ADD_LEAF:
-                       out_target(buf, ((spgxlogAddLeaf *) rec)->node);
-                       appendStringInfo(buf, "%u",
-                                                        ((spgxlogAddLeaf *) rec)->blknoLeaf);
+                       {
+                               spgxlogAddLeaf *xlrec = (spgxlogAddLeaf *) rec;
+
+                               appendStringInfo(buf, "add leaf to page");
+                               appendStringInfo(buf, "; off %u; headoff %u; parentoff %u",
+                                                                xlrec->offnumLeaf, xlrec->offnumHeadLeaf,
+                                                                xlrec->offnumParent);
+                               if (xlrec->newPage)
+                                       appendStringInfo(buf, " (newpage)");
+                               if (xlrec->storesNulls)
+                                       appendStringInfo(buf, " (nulls)");
+                       }
                        break;
                case XLOG_SPGIST_MOVE_LEAFS:
-                       out_target(buf, ((spgxlogMoveLeafs *) rec)->node);
-                       appendStringInfo(buf, "%u leafs from page %u to page %u",
-                                                        ((spgxlogMoveLeafs *) rec)->nMoves,
-                                                        ((spgxlogMoveLeafs *) rec)->blknoSrc,
-                                                        ((spgxlogMoveLeafs *) rec)->blknoDst);
+                       appendStringInfo(buf, "%u leafs",
+                                                        ((spgxlogMoveLeafs *) rec)->nMoves);
                        break;
                case XLOG_SPGIST_ADD_NODE:
-                       out_target(buf, ((spgxlogAddNode *) rec)->node);
-                       appendStringInfo(buf, "%u:%u",
-                                                        ((spgxlogAddNode *) rec)->blkno,
+                       appendStringInfo(buf, "off %u",
                                                         ((spgxlogAddNode *) rec)->offnum);
                        break;
                case XLOG_SPGIST_SPLIT_TUPLE:
-                       out_target(buf, ((spgxlogSplitTuple *) rec)->node);
-                       appendStringInfo(buf, "%u:%u to %u:%u",
-                                                        ((spgxlogSplitTuple *) rec)->blknoPrefix,
+                       appendStringInfo(buf, "prefix off: %u, postfix off: %u (same %d, new %d)",
                                                         ((spgxlogSplitTuple *) rec)->offnumPrefix,
-                                                        ((spgxlogSplitTuple *) rec)->blknoPostfix,
-                                                        ((spgxlogSplitTuple *) rec)->offnumPostfix);
+                                                        ((spgxlogSplitTuple *) rec)->offnumPostfix,
+                                                        ((spgxlogSplitTuple *) rec)->postfixBlkSame,
+                                                        ((spgxlogSplitTuple *) rec)->newPage
+                               );
                        break;
                case XLOG_SPGIST_PICKSPLIT:
-                       out_target(buf, ((spgxlogPickSplit *) rec)->node);
+                       {
+                               spgxlogPickSplit *xlrec = (spgxlogPickSplit *) rec;
+
+                               appendStringInfo(buf, "ndel %u; nins %u",
+                                                                xlrec->nDelete, xlrec->nInsert);
+                               if (xlrec->innerIsParent)
+                                       appendStringInfo(buf, " (innerIsParent)");
+                               if (xlrec->isRootSplit)
+                                       appendStringInfo(buf, " (isRootSplit)");
+                       }
                        break;
                case XLOG_SPGIST_VACUUM_LEAF:
-                       out_target(buf, ((spgxlogVacuumLeaf *) rec)->node);
-                       appendStringInfo(buf, "page %u",
-                                                        ((spgxlogVacuumLeaf *) rec)->blkno);
+                       /* no further information */
                        break;
                case XLOG_SPGIST_VACUUM_ROOT:
-                       out_target(buf, ((spgxlogVacuumRoot *) rec)->node);
-                       appendStringInfo(buf, "page %u",
-                                                        ((spgxlogVacuumRoot *) rec)->blkno);
+                       /* no further information */
                        break;
                case XLOG_SPGIST_VACUUM_REDIRECT:
-                       out_target(buf, ((spgxlogVacuumRedirect *) rec)->node);
-                       appendStringInfo(buf, "page %u, newest XID %u",
-                                                        ((spgxlogVacuumRedirect *) rec)->blkno,
+                       appendStringInfo(buf, "newest XID %u",
                                                 ((spgxlogVacuumRedirect *) rec)->newestRedirectXid);
                        break;
        }
index d09041f8dfc5d5969095b599cf594eb15daaf008..0ce1aa325c47d316c079a1f4fa1bb61b46aefdd8 100644 (file)
@@ -37,10 +37,10 @@ standby_desc_running_xacts(StringInfo buf, xl_running_xacts *xlrec)
 }
 
 void
-standby_desc(StringInfo buf, XLogRecord *record)
+standby_desc(StringInfo buf, XLogReaderState *record)
 {
        char       *rec = XLogRecGetData(record);
-       uint8           info = record->xl_info & ~XLR_INFO_MASK;
+       uint8           info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
 
        if (info == XLOG_STANDBY_LOCK)
        {
index b6b0e6394df88f769fcb7af7a8e71212a7e878a3..8b2ebb4d926eb31f18efe1a97d136212885e34aa 100644 (file)
 
 
 void
-tblspc_desc(StringInfo buf, XLogRecord *record)
+tblspc_desc(StringInfo buf, XLogReaderState *record)
 {
        char       *rec = XLogRecGetData(record);
-       uint8           info = record->xl_info & ~XLR_INFO_MASK;
+       uint8           info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
 
        if (info == XLOG_TBLSPC_CREATE)
        {
index 22a22efc7313c1db831b8020aec6920179881bc6..f5450a9b25074a3a81de1c4f115d4254dc348e9e 100644 (file)
@@ -137,10 +137,10 @@ xact_desc_assignment(StringInfo buf, xl_xact_assignment *xlrec)
 }
 
 void
-xact_desc(StringInfo buf, XLogRecord *record)
+xact_desc(StringInfo buf, XLogReaderState *record)
 {
        char       *rec = XLogRecGetData(record);
-       uint8           info = record->xl_info & ~XLR_INFO_MASK;
+       uint8           info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
 
        if (info == XLOG_XACT_COMMIT_COMPACT)
        {
index e0957ff3a8ce433c091772e313dffffe51191c94..4088ba99b7ffcb8a58245338ac31e8d3ac1bdab5 100644 (file)
@@ -32,10 +32,10 @@ const struct config_enum_entry wal_level_options[] = {
 };
 
 void
-xlog_desc(StringInfo buf, XLogRecord *record)
+xlog_desc(StringInfo buf, XLogReaderState *record)
 {
        char       *rec = XLogRecGetData(record);
-       uint8           info = record->xl_info & ~XLR_INFO_MASK;
+       uint8           info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
 
        if (info == XLOG_CHECKPOINT_SHUTDOWN ||
                info == XLOG_CHECKPOINT_ONLINE)
@@ -76,11 +76,7 @@ xlog_desc(StringInfo buf, XLogRecord *record)
        }
        else if (info == XLOG_FPI)
        {
-               BkpBlock   *bkp = (BkpBlock *) rec;
-
-               appendStringInfo(buf, "%s block %u",
-                                                relpathperm(bkp->node, bkp->fork),
-                                                bkp->block);
+               /* no further information to print */
        }
        else if (info == XLOG_BACKUP_END)
        {
index 21a071ab19932179758fb6b0e19009aa625043d6..1a17cc467ed17a4c329ee1671e23db0496bc4f53 100644 (file)
@@ -16,8 +16,8 @@
 #include "postgres.h"
 
 #include "access/genam.h"
-#include "access/xloginsert.h"
 #include "access/spgist_private.h"
+#include "access/xloginsert.h"
 #include "miscadmin.h"
 #include "storage/bufmgr.h"
 #include "utils/rel.h"
@@ -202,25 +202,17 @@ static void
 addLeafTuple(Relation index, SpGistState *state, SpGistLeafTuple leafTuple,
                   SPPageDesc *current, SPPageDesc *parent, bool isNulls, bool isNew)
 {
-       XLogRecData rdata[4];
        spgxlogAddLeaf xlrec;
 
-       xlrec.node = index->rd_node;
-       xlrec.blknoLeaf = current->blkno;
        xlrec.newPage = isNew;
        xlrec.storesNulls = isNulls;
 
        /* these will be filled below as needed */
        xlrec.offnumLeaf = InvalidOffsetNumber;
        xlrec.offnumHeadLeaf = InvalidOffsetNumber;
-       xlrec.blknoParent = InvalidBlockNumber;
        xlrec.offnumParent = InvalidOffsetNumber;
        xlrec.nodeI = 0;
 
-       ACCEPT_RDATA_DATA(&xlrec, sizeof(xlrec), 0);
-       ACCEPT_RDATA_DATA(leafTuple, leafTuple->size, 1);
-       ACCEPT_RDATA_BUFFER(current->buffer, 2);
-
        START_CRIT_SECTION();
 
        if (current->offnum == InvalidOffsetNumber ||
@@ -237,13 +229,10 @@ addLeafTuple(Relation index, SpGistState *state, SpGistLeafTuple leafTuple,
                /* Must update parent's downlink if any */
                if (parent->buffer != InvalidBuffer)
                {
-                       xlrec.blknoParent = parent->blkno;
                        xlrec.offnumParent = parent->offnum;
                        xlrec.nodeI = parent->node;
 
                        saveNodeLink(index, parent, current->blkno, current->offnum);
-
-                       ACCEPT_RDATA_BUFFER(parent->buffer, 3);
                }
        }
        else
@@ -303,12 +292,20 @@ addLeafTuple(Relation index, SpGistState *state, SpGistLeafTuple leafTuple,
        {
                XLogRecPtr      recptr;
 
-               recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_ADD_LEAF, rdata);
+               XLogBeginInsert();
+               XLogRegisterData((char *) &xlrec, sizeof(xlrec));
+               XLogRegisterData((char *) leafTuple, leafTuple->size);
+
+               XLogRegisterBuffer(0, current->buffer, REGBUF_STANDARD);
+               if (xlrec.offnumParent != InvalidOffsetNumber)
+                       XLogRegisterBuffer(1, parent->buffer, REGBUF_STANDARD);
+
+               recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_ADD_LEAF);
 
                PageSetLSN(current->page, recptr);
 
                /* update parent only if we actually changed it */
-               if (xlrec.blknoParent != InvalidBlockNumber)
+               if (xlrec.offnumParent != InvalidOffsetNumber)
                {
                        PageSetLSN(parent->page, recptr);
                }
@@ -399,7 +396,6 @@ moveLeafs(Relation index, SpGistState *state,
        OffsetNumber *toDelete;
        OffsetNumber *toInsert;
        BlockNumber nblkno;
-       XLogRecData rdata[7];
        spgxlogMoveLeafs xlrec;
        char       *leafdata,
                           *leafptr;
@@ -455,20 +451,6 @@ moveLeafs(Relation index, SpGistState *state,
        nblkno = BufferGetBlockNumber(nbuf);
        Assert(nblkno != current->blkno);
 
-       /* prepare WAL info */
-       xlrec.node = index->rd_node;
-       STORE_STATE(state, xlrec.stateSrc);
-
-       xlrec.blknoSrc = current->blkno;
-       xlrec.blknoDst = nblkno;
-       xlrec.nMoves = nDelete;
-       xlrec.replaceDead = replaceDead;
-       xlrec.storesNulls = isNulls;
-
-       xlrec.blknoParent = parent->blkno;
-       xlrec.offnumParent = parent->offnum;
-       xlrec.nodeI = parent->node;
-
        leafdata = leafptr = palloc(size);
 
        START_CRIT_SECTION();
@@ -533,15 +515,29 @@ moveLeafs(Relation index, SpGistState *state,
        {
                XLogRecPtr      recptr;
 
-               ACCEPT_RDATA_DATA(&xlrec, SizeOfSpgxlogMoveLeafs, 0);
-               ACCEPT_RDATA_DATA(toDelete, sizeof(OffsetNumber) * nDelete, 1);
-               ACCEPT_RDATA_DATA(toInsert, sizeof(OffsetNumber) * nInsert, 2);
-               ACCEPT_RDATA_DATA(leafdata, leafptr - leafdata, 3);
-               ACCEPT_RDATA_BUFFER(current->buffer, 4);
-               ACCEPT_RDATA_BUFFER(nbuf, 5);
-               ACCEPT_RDATA_BUFFER(parent->buffer, 6);
+               /* prepare WAL info */
+               STORE_STATE(state, xlrec.stateSrc);
 
-               recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_MOVE_LEAFS, rdata);
+               xlrec.nMoves = nDelete;
+               xlrec.replaceDead = replaceDead;
+               xlrec.storesNulls = isNulls;
+
+               xlrec.offnumParent = parent->offnum;
+               xlrec.nodeI = parent->node;
+
+               XLogBeginInsert();
+               XLogRegisterData((char *) &xlrec, SizeOfSpgxlogMoveLeafs);
+               XLogRegisterData((char *) toDelete,
+                                                sizeof(OffsetNumber) * nDelete);
+               XLogRegisterData((char *) toInsert,
+                                                sizeof(OffsetNumber) * nInsert);
+               XLogRegisterData((char *) leafdata, leafptr - leafdata);
+
+               XLogRegisterBuffer(0, current->buffer, REGBUF_STANDARD);
+               XLogRegisterBuffer(1, nbuf, REGBUF_STANDARD | (xlrec.newPage ? REGBUF_WILL_INIT : 0));
+               XLogRegisterBuffer(2, parent->buffer, REGBUF_STANDARD);
+
+               recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_MOVE_LEAFS);
 
                PageSetLSN(current->page, recptr);
                PageSetLSN(npage, recptr);
@@ -701,8 +697,6 @@ doPickSplit(Relation index, SpGistState *state,
        int                     currentFreeSpace;
        int                     totalLeafSizes;
        bool            allTheSame;
-       XLogRecData rdata[10];
-       int                     nRdata;
        spgxlogPickSplit xlrec;
        char       *leafdata,
                           *leafptr;
@@ -725,7 +719,6 @@ doPickSplit(Relation index, SpGistState *state,
        newLeafs = (SpGistLeafTuple *) palloc(sizeof(SpGistLeafTuple) * n);
        leafPageSelect = (uint8 *) palloc(sizeof(uint8) * n);
 
-       xlrec.node = index->rd_node;
        STORE_STATE(state, xlrec.stateSrc);
 
        /*
@@ -971,10 +964,6 @@ doPickSplit(Relation index, SpGistState *state,
        }
 
        /*
-        * Because a WAL record can't involve more than four buffers, we can only
-        * afford to deal with two leaf pages in each picksplit action, ie the
-        * current page and at most one other.
-        *
         * The new leaf tuples converted from the existing ones should require the
         * same or less space, and therefore should all fit onto one page
         * (although that's not necessarily the current page, since we can't
@@ -1108,17 +1097,13 @@ doPickSplit(Relation index, SpGistState *state,
        }
 
        /* Start preparing WAL record */
-       xlrec.blknoSrc = current->blkno;
-       xlrec.blknoDest = InvalidBlockNumber;
        xlrec.nDelete = 0;
        xlrec.initSrc = isNew;
        xlrec.storesNulls = isNulls;
+       xlrec.isRootSplit = SpGistBlockIsRoot(current->blkno);
 
        leafdata = leafptr = (char *) palloc(totalLeafSizes);
 
-       ACCEPT_RDATA_DATA(&xlrec, SizeOfSpgxlogPickSplit, 0);
-       nRdata = 1;
-
        /* Here we begin making the changes to the target pages */
        START_CRIT_SECTION();
 
@@ -1150,12 +1135,6 @@ doPickSplit(Relation index, SpGistState *state,
                else
                {
                        xlrec.nDelete = nToDelete;
-                       ACCEPT_RDATA_DATA(toDelete,
-                                                         sizeof(OffsetNumber) * nToDelete,
-                                                         nRdata);
-                       nRdata++;
-                       ACCEPT_RDATA_BUFFER(current->buffer, nRdata);
-                       nRdata++;
 
                        if (!state->isBuild)
                        {
@@ -1240,25 +1219,8 @@ doPickSplit(Relation index, SpGistState *state,
        if (newLeafBuffer != InvalidBuffer)
        {
                MarkBufferDirty(newLeafBuffer);
-               /* also save block number for WAL */
-               xlrec.blknoDest = BufferGetBlockNumber(newLeafBuffer);
-               if (!xlrec.initDest)
-               {
-                       ACCEPT_RDATA_BUFFER(newLeafBuffer, nRdata);
-                       nRdata++;
-               }
        }
 
-       xlrec.nInsert = nToInsert;
-       ACCEPT_RDATA_DATA(toInsert, sizeof(OffsetNumber) * nToInsert, nRdata);
-       nRdata++;
-       ACCEPT_RDATA_DATA(leafPageSelect, sizeof(uint8) * nToInsert, nRdata);
-       nRdata++;
-       ACCEPT_RDATA_DATA(innerTuple, innerTuple->size, nRdata);
-       nRdata++;
-       ACCEPT_RDATA_DATA(leafdata, leafptr - leafdata, nRdata);
-       nRdata++;
-
        /* Remember current buffer, since we're about to change "current" */
        saveCurrent = *current;
 
@@ -1276,7 +1238,6 @@ doPickSplit(Relation index, SpGistState *state,
                current->blkno = parent->blkno;
                current->buffer = parent->buffer;
                current->page = parent->page;
-               xlrec.blknoInner = current->blkno;
                xlrec.offnumInner = current->offnum =
                        SpGistPageAddNewItem(state, current->page,
                                                                 (Item) innerTuple, innerTuple->size,
@@ -1285,14 +1246,11 @@ doPickSplit(Relation index, SpGistState *state,
                /*
                 * Update parent node link and mark parent page dirty
                 */
-               xlrec.blknoParent = parent->blkno;
+               xlrec.innerIsParent = true;
                xlrec.offnumParent = parent->offnum;
                xlrec.nodeI = parent->node;
                saveNodeLink(index, parent, current->blkno, current->offnum);
 
-               ACCEPT_RDATA_BUFFER(parent->buffer, nRdata);
-               nRdata++;
-
                /*
                 * Update redirection link (in old current buffer)
                 */
@@ -1314,7 +1272,6 @@ doPickSplit(Relation index, SpGistState *state,
                current->buffer = newInnerBuffer;
                current->blkno = BufferGetBlockNumber(current->buffer);
                current->page = BufferGetPage(current->buffer);
-               xlrec.blknoInner = current->blkno;
                xlrec.offnumInner = current->offnum =
                        SpGistPageAddNewItem(state, current->page,
                                                                 (Item) innerTuple, innerTuple->size,
@@ -1326,16 +1283,11 @@ doPickSplit(Relation index, SpGistState *state,
                /*
                 * Update parent node link and mark parent page dirty
                 */
-               xlrec.blknoParent = parent->blkno;
+               xlrec.innerIsParent = (parent->buffer == current->buffer);
                xlrec.offnumParent = parent->offnum;
                xlrec.nodeI = parent->node;
                saveNodeLink(index, parent, current->blkno, current->offnum);
 
-               ACCEPT_RDATA_BUFFER(current->buffer, nRdata);
-               nRdata++;
-               ACCEPT_RDATA_BUFFER(parent->buffer, nRdata);
-               nRdata++;
-
                /*
                 * Update redirection link (in old current buffer)
                 */
@@ -1357,8 +1309,8 @@ doPickSplit(Relation index, SpGistState *state,
 
                SpGistInitBuffer(current->buffer, (isNulls ? SPGIST_NULLS : 0));
                xlrec.initInner = true;
+               xlrec.innerIsParent = false;
 
-               xlrec.blknoInner = current->blkno;
                xlrec.offnumInner = current->offnum =
                        PageAddItem(current->page, (Item) innerTuple, innerTuple->size,
                                                InvalidOffsetNumber, false, false);
@@ -1367,7 +1319,6 @@ doPickSplit(Relation index, SpGistState *state,
                                 innerTuple->size);
 
                /* No parent link to update, nor redirection to do */
-               xlrec.blknoParent = InvalidBlockNumber;
                xlrec.offnumParent = InvalidOffsetNumber;
                xlrec.nodeI = 0;
 
@@ -1381,9 +1332,46 @@ doPickSplit(Relation index, SpGistState *state,
        if (RelationNeedsWAL(index))
        {
                XLogRecPtr      recptr;
+               int                     flags;
+
+               XLogBeginInsert();
+
+               xlrec.nInsert = nToInsert;
+               XLogRegisterData((char *) &xlrec, SizeOfSpgxlogPickSplit);
+
+               XLogRegisterData((char *) toDelete,
+                                                sizeof(OffsetNumber) * xlrec.nDelete);
+               XLogRegisterData((char *) toInsert,
+                                                sizeof(OffsetNumber) * xlrec.nInsert);
+               XLogRegisterData((char *) leafPageSelect,
+                                                sizeof(uint8) * xlrec.nInsert);
+               XLogRegisterData((char *) innerTuple, innerTuple->size);
+               XLogRegisterData(leafdata, leafptr - leafdata);
+
+               flags = REGBUF_STANDARD;
+               if (xlrec.initSrc)
+                       flags |= REGBUF_WILL_INIT;
+               if (BufferIsValid(saveCurrent.buffer))
+                       XLogRegisterBuffer(0, saveCurrent.buffer, flags);
+
+               if (BufferIsValid(newLeafBuffer))
+               {
+                       flags = REGBUF_STANDARD;
+                       if (xlrec.initDest)
+                               flags |= REGBUF_WILL_INIT;
+                       XLogRegisterBuffer(1, newLeafBuffer, flags);
+               }
+               XLogRegisterBuffer(2, current->buffer, REGBUF_STANDARD);
+               if (parent->buffer != InvalidBuffer)
+               {
+                       if (parent->buffer != current->buffer)
+                               XLogRegisterBuffer(3, parent->buffer, REGBUF_STANDARD);
+                       else
+                               Assert(xlrec.innerIsParent);
+               }
 
                /* Issue the WAL record */
-               recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_PICKSPLIT, rdata);
+               recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_PICKSPLIT);
 
                /* Update page LSNs on all affected pages */
                if (newLeafBuffer != InvalidBuffer)
@@ -1489,7 +1477,6 @@ spgAddNodeAction(Relation index, SpGistState *state,
                                 int nodeN, Datum nodeLabel)
 {
        SpGistInnerTuple newInnerTuple;
-       XLogRecData rdata[5];
        spgxlogAddNode xlrec;
 
        /* Should not be applied to nulls */
@@ -1499,25 +1486,18 @@ spgAddNodeAction(Relation index, SpGistState *state,
        newInnerTuple = addNode(state, innerTuple, nodeLabel, nodeN);
 
        /* Prepare WAL record */
-       xlrec.node = index->rd_node;
        STORE_STATE(state, xlrec.stateSrc);
-       xlrec.blkno = current->blkno;
        xlrec.offnum = current->offnum;
 
        /* we don't fill these unless we need to change the parent downlink */
-       xlrec.blknoParent = InvalidBlockNumber;
+       xlrec.parentBlk = -1;
        xlrec.offnumParent = InvalidOffsetNumber;
        xlrec.nodeI = 0;
 
        /* we don't fill these unless tuple has to be moved */
-       xlrec.blknoNew = InvalidBlockNumber;
        xlrec.offnumNew = InvalidOffsetNumber;
        xlrec.newPage = false;
 
-       ACCEPT_RDATA_DATA(&xlrec, sizeof(xlrec), 0);
-       ACCEPT_RDATA_DATA(newInnerTuple, newInnerTuple->size, 1);
-       ACCEPT_RDATA_BUFFER(current->buffer, 2);
-
        if (PageGetExactFreeSpace(current->page) >=
                newInnerTuple->size - innerTuple->size)
        {
@@ -1539,7 +1519,13 @@ spgAddNodeAction(Relation index, SpGistState *state,
                {
                        XLogRecPtr      recptr;
 
-                       recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_ADD_NODE, rdata);
+                       XLogBeginInsert();
+                       XLogRegisterData((char *) &xlrec, sizeof(xlrec));
+                       XLogRegisterData((char *) newInnerTuple, newInnerTuple->size);
+
+                       XLogRegisterBuffer(0, current->buffer, REGBUF_STANDARD);
+
+                       recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_ADD_NODE);
 
                        PageSetLSN(current->page, recptr);
                }
@@ -1565,7 +1551,6 @@ spgAddNodeAction(Relation index, SpGistState *state,
 
                saveCurrent = *current;
 
-               xlrec.blknoParent = parent->blkno;
                xlrec.offnumParent = parent->offnum;
                xlrec.nodeI = parent->node;
 
@@ -1580,8 +1565,6 @@ spgAddNodeAction(Relation index, SpGistState *state,
                current->blkno = BufferGetBlockNumber(current->buffer);
                current->page = BufferGetPage(current->buffer);
 
-               xlrec.blknoNew = current->blkno;
-
                /*
                 * Let's just make real sure new current isn't same as old.  Right now
                 * that's impossible, but if SpGistGetBuffer ever got smart enough to
@@ -1590,17 +1573,19 @@ spgAddNodeAction(Relation index, SpGistState *state,
                 * replay would be subtly wrong, so I think a mere assert isn't enough
                 * here.
                 */
-               if (xlrec.blknoNew == xlrec.blkno)
+               if (current->blkno == saveCurrent.blkno)
                        elog(ERROR, "SPGiST new buffer shouldn't be same as old buffer");
 
                /*
                 * New current and parent buffer will both be modified; but note that
                 * parent buffer could be same as either new or old current.
                 */
-               ACCEPT_RDATA_BUFFER(current->buffer, 3);
-               if (parent->buffer != current->buffer &&
-                       parent->buffer != saveCurrent.buffer)
-                       ACCEPT_RDATA_BUFFER(parent->buffer, 4);
+               if (parent->buffer == saveCurrent.buffer)
+                       xlrec.parentBlk = 0;
+               else if (parent->buffer == current->buffer)
+                       xlrec.parentBlk = 1;
+               else
+                       xlrec.parentBlk = 2;
 
                START_CRIT_SECTION();
 
@@ -1647,7 +1632,20 @@ spgAddNodeAction(Relation index, SpGistState *state,
                {
                        XLogRecPtr      recptr;
 
-                       recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_ADD_NODE, rdata);
+                       XLogBeginInsert();
+
+                       /* orig page */
+                       XLogRegisterBuffer(0, saveCurrent.buffer, REGBUF_STANDARD);
+                       /* new page */
+                       XLogRegisterBuffer(1, current->buffer, REGBUF_STANDARD);
+                       /* parent page (if different from orig and new) */
+                       if (xlrec.parentBlk == 2)
+                               XLogRegisterBuffer(2, parent->buffer, REGBUF_STANDARD);
+
+                       XLogRegisterData((char *) &xlrec, sizeof(xlrec));
+                       XLogRegisterData((char *) newInnerTuple, newInnerTuple->size);
+
+                       recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_ADD_NODE);
 
                        /* we don't bother to check if any of these are redundant */
                        PageSetLSN(current->page, recptr);
@@ -1682,7 +1680,6 @@ spgSplitNodeAction(Relation index, SpGistState *state,
        BlockNumber postfixBlkno;
        OffsetNumber postfixOffset;
        int                     i;
-       XLogRecData rdata[5];
        spgxlogSplitTuple xlrec;
        Buffer          newBuffer = InvalidBuffer;
 
@@ -1725,14 +1722,8 @@ spgSplitNodeAction(Relation index, SpGistState *state,
        postfixTuple->allTheSame = innerTuple->allTheSame;
 
        /* prep data for WAL record */
-       xlrec.node = index->rd_node;
        xlrec.newPage = false;
 
-       ACCEPT_RDATA_DATA(&xlrec, sizeof(xlrec), 0);
-       ACCEPT_RDATA_DATA(prefixTuple, prefixTuple->size, 1);
-       ACCEPT_RDATA_DATA(postfixTuple, postfixTuple->size, 2);
-       ACCEPT_RDATA_BUFFER(current->buffer, 3);
-
        /*
         * If we can't fit both tuples on the current page, get a new page for the
         * postfix tuple.  In particular, can't split to the root page.
@@ -1752,7 +1743,6 @@ spgSplitNodeAction(Relation index, SpGistState *state,
                                                                        GBUF_INNER_PARITY(current->blkno + 1),
                                                                        postfixTuple->size + sizeof(ItemIdData),
                                                                        &xlrec.newPage);
-               ACCEPT_RDATA_BUFFER(newBuffer, 4);
        }
 
        START_CRIT_SECTION();
@@ -1767,27 +1757,28 @@ spgSplitNodeAction(Relation index, SpGistState *state,
        if (xlrec.offnumPrefix != current->offnum)
                elog(ERROR, "failed to add item of size %u to SPGiST index page",
                         prefixTuple->size);
-       xlrec.blknoPrefix = current->blkno;
 
        /*
         * put postfix tuple into appropriate page
         */
        if (newBuffer == InvalidBuffer)
        {
-               xlrec.blknoPostfix = postfixBlkno = current->blkno;
+               postfixBlkno = current->blkno;
                xlrec.offnumPostfix = postfixOffset =
                        SpGistPageAddNewItem(state, current->page,
                                                                 (Item) postfixTuple, postfixTuple->size,
                                                                 NULL, false);
+               xlrec.postfixBlkSame = true;
        }
        else
        {
-               xlrec.blknoPostfix = postfixBlkno = BufferGetBlockNumber(newBuffer);
+               postfixBlkno = BufferGetBlockNumber(newBuffer);
                xlrec.offnumPostfix = postfixOffset =
                        SpGistPageAddNewItem(state, BufferGetPage(newBuffer),
                                                                 (Item) postfixTuple, postfixTuple->size,
                                                                 NULL, false);
                MarkBufferDirty(newBuffer);
+               xlrec.postfixBlkSame = false;
        }
 
        /*
@@ -1808,7 +1799,23 @@ spgSplitNodeAction(Relation index, SpGistState *state,
        {
                XLogRecPtr      recptr;
 
-               recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_SPLIT_TUPLE, rdata);
+               XLogBeginInsert();
+               XLogRegisterData((char *) &xlrec, sizeof(xlrec));
+               XLogRegisterData((char *) prefixTuple, prefixTuple->size);
+               XLogRegisterData((char *) postfixTuple, postfixTuple->size);
+
+               XLogRegisterBuffer(0, current->buffer, REGBUF_STANDARD);
+               if (newBuffer != InvalidBuffer)
+               {
+                       int                     flags;
+
+                       flags = REGBUF_STANDARD;
+                       if (xlrec.newPage)
+                               flags |= REGBUF_WILL_INIT;
+                       XLogRegisterBuffer(1, newBuffer, flags);
+               }
+
+               recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_SPLIT_TUPLE);
 
                PageSetLSN(current->page, recptr);
 
index e1dfc8e358076086109c4f79c8fa3ff78bc41f7f..f168ac5c5cf7e3cd7d05ffae88897c2608c97a7a 100644 (file)
@@ -105,15 +105,18 @@ spgbuild(PG_FUNCTION_ARGS)
        if (RelationNeedsWAL(index))
        {
                XLogRecPtr      recptr;
-               XLogRecData rdata;
 
-               /* WAL data is just the relfilenode */
-               rdata.data = (char *) &(index->rd_node);
-               rdata.len = sizeof(RelFileNode);
-               rdata.buffer = InvalidBuffer;
-               rdata.next = NULL;
+               XLogBeginInsert();
 
-               recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_CREATE_INDEX, &rdata);
+               /*
+                * Replay will re-initialize the pages, so don't take full pages
+                * images.  No other data to log.
+                */
+               XLogRegisterBuffer(0, metabuffer, REGBUF_WILL_INIT);
+               XLogRegisterBuffer(1, rootbuffer, REGBUF_WILL_INIT | REGBUF_STANDARD);
+               XLogRegisterBuffer(2, nullbuffer, REGBUF_WILL_INIT | REGBUF_STANDARD);
+
+               recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_CREATE_INDEX);
 
                PageSetLSN(BufferGetPage(metabuffer), recptr);
                PageSetLSN(BufferGetPage(rootbuffer), recptr);
index 2e05d22b74967ffa4b48c3c84c8d1780a48f076f..c95b80b5c7c2802883422c9e1470ab0bfe24a4ec 100644 (file)
@@ -127,7 +127,6 @@ vacuumLeafPage(spgBulkDeleteState *bds, Relation index, Buffer buffer,
 {
        Page            page = BufferGetPage(buffer);
        spgxlogVacuumLeaf xlrec;
-       XLogRecData rdata[8];
        OffsetNumber toDead[MaxIndexTuplesPerPage];
        OffsetNumber toPlaceholder[MaxIndexTuplesPerPage];
        OffsetNumber moveSrc[MaxIndexTuplesPerPage];
@@ -323,20 +322,6 @@ vacuumLeafPage(spgBulkDeleteState *bds, Relation index, Buffer buffer,
        if (nDeletable != xlrec.nDead + xlrec.nPlaceholder + xlrec.nMove)
                elog(ERROR, "inconsistent counts of deletable tuples");
 
-       /* Prepare WAL record */
-       xlrec.node = index->rd_node;
-       xlrec.blkno = BufferGetBlockNumber(buffer);
-       STORE_STATE(&bds->spgstate, xlrec.stateSrc);
-
-       ACCEPT_RDATA_DATA(&xlrec, SizeOfSpgxlogVacuumLeaf, 0);
-       ACCEPT_RDATA_DATA(toDead, sizeof(OffsetNumber) * xlrec.nDead, 1);
-       ACCEPT_RDATA_DATA(toPlaceholder, sizeof(OffsetNumber) * xlrec.nPlaceholder, 2);
-       ACCEPT_RDATA_DATA(moveSrc, sizeof(OffsetNumber) * xlrec.nMove, 3);
-       ACCEPT_RDATA_DATA(moveDest, sizeof(OffsetNumber) * xlrec.nMove, 4);
-       ACCEPT_RDATA_DATA(chainSrc, sizeof(OffsetNumber) * xlrec.nChain, 5);
-       ACCEPT_RDATA_DATA(chainDest, sizeof(OffsetNumber) * xlrec.nChain, 6);
-       ACCEPT_RDATA_BUFFER(buffer, 7);
-
        /* Do the updates */
        START_CRIT_SECTION();
 
@@ -389,7 +374,22 @@ vacuumLeafPage(spgBulkDeleteState *bds, Relation index, Buffer buffer,
        {
                XLogRecPtr      recptr;
 
-               recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_LEAF, rdata);
+               XLogBeginInsert();
+
+               STORE_STATE(&bds->spgstate, xlrec.stateSrc);
+
+               XLogRegisterData((char *) &xlrec, SizeOfSpgxlogVacuumLeaf);
+               /* sizeof(xlrec) should be a multiple of sizeof(OffsetNumber) */
+               XLogRegisterData((char *) toDead, sizeof(OffsetNumber) * xlrec.nDead);
+               XLogRegisterData((char *) toPlaceholder, sizeof(OffsetNumber) * xlrec.nPlaceholder);
+               XLogRegisterData((char *) moveSrc, sizeof(OffsetNumber) * xlrec.nMove);
+               XLogRegisterData((char *) moveDest, sizeof(OffsetNumber) * xlrec.nMove);
+               XLogRegisterData((char *) chainSrc, sizeof(OffsetNumber) * xlrec.nChain);
+               XLogRegisterData((char *) chainDest, sizeof(OffsetNumber) * xlrec.nChain);
+
+               XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
+
+               recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_LEAF);
 
                PageSetLSN(page, recptr);
        }
@@ -407,12 +407,10 @@ vacuumLeafRoot(spgBulkDeleteState *bds, Relation index, Buffer buffer)
 {
        Page            page = BufferGetPage(buffer);
        spgxlogVacuumRoot xlrec;
-       XLogRecData rdata[3];
        OffsetNumber toDelete[MaxIndexTuplesPerPage];
        OffsetNumber i,
                                max = PageGetMaxOffsetNumber(page);
 
-       xlrec.blkno = BufferGetBlockNumber(buffer);
        xlrec.nDelete = 0;
 
        /* Scan page, identify tuples to delete, accumulate stats */
@@ -448,15 +446,6 @@ vacuumLeafRoot(spgBulkDeleteState *bds, Relation index, Buffer buffer)
        if (xlrec.nDelete == 0)
                return;                                 /* nothing more to do */
 
-       /* Prepare WAL record */
-       xlrec.node = index->rd_node;
-       STORE_STATE(&bds->spgstate, xlrec.stateSrc);
-
-       ACCEPT_RDATA_DATA(&xlrec, SizeOfSpgxlogVacuumRoot, 0);
-       /* sizeof(xlrec) should be a multiple of sizeof(OffsetNumber) */
-       ACCEPT_RDATA_DATA(toDelete, sizeof(OffsetNumber) * xlrec.nDelete, 1);
-       ACCEPT_RDATA_BUFFER(buffer, 2);
-
        /* Do the update */
        START_CRIT_SECTION();
 
@@ -469,7 +458,19 @@ vacuumLeafRoot(spgBulkDeleteState *bds, Relation index, Buffer buffer)
        {
                XLogRecPtr      recptr;
 
-               recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_ROOT, rdata);
+               XLogBeginInsert();
+
+               /* Prepare WAL record */
+               STORE_STATE(&bds->spgstate, xlrec.stateSrc);
+
+               XLogRegisterData((char *) &xlrec, SizeOfSpgxlogVacuumRoot);
+               /* sizeof(xlrec) should be a multiple of sizeof(OffsetNumber) */
+               XLogRegisterData((char *) toDelete,
+                                                sizeof(OffsetNumber) * xlrec.nDelete);
+
+               XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
+
+               recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_ROOT);
 
                PageSetLSN(page, recptr);
        }
@@ -499,10 +500,7 @@ vacuumRedirectAndPlaceholder(Relation index, Buffer buffer)
        OffsetNumber itemToPlaceholder[MaxIndexTuplesPerPage];
        OffsetNumber itemnos[MaxIndexTuplesPerPage];
        spgxlogVacuumRedirect xlrec;
-       XLogRecData rdata[3];
 
-       xlrec.node = index->rd_node;
-       xlrec.blkno = BufferGetBlockNumber(buffer);
        xlrec.nToPlaceholder = 0;
        xlrec.newestRedirectXid = InvalidTransactionId;
 
@@ -585,11 +583,15 @@ vacuumRedirectAndPlaceholder(Relation index, Buffer buffer)
        {
                XLogRecPtr      recptr;
 
-               ACCEPT_RDATA_DATA(&xlrec, SizeOfSpgxlogVacuumRedirect, 0);
-               ACCEPT_RDATA_DATA(itemToPlaceholder, sizeof(OffsetNumber) * xlrec.nToPlaceholder, 1);
-               ACCEPT_RDATA_BUFFER(buffer, 2);
+               XLogBeginInsert();
+
+               XLogRegisterData((char *) &xlrec, SizeOfSpgxlogVacuumRedirect);
+               XLogRegisterData((char *) itemToPlaceholder,
+                                                sizeof(OffsetNumber) * xlrec.nToPlaceholder);
+
+               XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
 
-               recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_REDIRECT, rdata);
+               recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_REDIRECT);
 
                PageSetLSN(page, recptr);
        }
index 920739436ac37c2a75b06150b00e816119ea7cd1..ac6d4bd369aafb9397a7a4b3396de16150d6061e 100644 (file)
@@ -71,33 +71,30 @@ addOrReplaceTuple(Page page, Item tuple, int size, OffsetNumber offset)
 }
 
 static void
-spgRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record)
+spgRedoCreateIndex(XLogReaderState *record)
 {
-       RelFileNode *node = (RelFileNode *) XLogRecGetData(record);
+       XLogRecPtr      lsn = record->EndRecPtr;
        Buffer          buffer;
        Page            page;
 
-       /* Backup blocks are not used in create_index records */
-       Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
-
-       buffer = XLogReadBuffer(*node, SPGIST_METAPAGE_BLKNO, true);
-       Assert(BufferIsValid(buffer));
+       buffer = XLogInitBufferForRedo(record, 0);
+       Assert(BufferGetBlockNumber(buffer) == SPGIST_METAPAGE_BLKNO);
        page = (Page) BufferGetPage(buffer);
        SpGistInitMetapage(page);
        PageSetLSN(page, lsn);
        MarkBufferDirty(buffer);
        UnlockReleaseBuffer(buffer);
 
-       buffer = XLogReadBuffer(*node, SPGIST_ROOT_BLKNO, true);
-       Assert(BufferIsValid(buffer));
+       buffer = XLogInitBufferForRedo(record, 1);
+       Assert(BufferGetBlockNumber(buffer) == SPGIST_ROOT_BLKNO);
        SpGistInitBuffer(buffer, SPGIST_LEAF);
        page = (Page) BufferGetPage(buffer);
        PageSetLSN(page, lsn);
        MarkBufferDirty(buffer);
        UnlockReleaseBuffer(buffer);
 
-       buffer = XLogReadBuffer(*node, SPGIST_NULL_BLKNO, true);
-       Assert(BufferIsValid(buffer));
+       buffer = XLogInitBufferForRedo(record, 2);
+       Assert(BufferGetBlockNumber(buffer) == SPGIST_NULL_BLKNO);
        SpGistInitBuffer(buffer, SPGIST_LEAF | SPGIST_NULLS);
        page = (Page) BufferGetPage(buffer);
        PageSetLSN(page, lsn);
@@ -106,8 +103,9 @@ spgRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record)
 }
 
 static void
-spgRedoAddLeaf(XLogRecPtr lsn, XLogRecord *record)
+spgRedoAddLeaf(XLogReaderState *record)
 {
+       XLogRecPtr      lsn = record->EndRecPtr;
        char       *ptr = XLogRecGetData(record);
        spgxlogAddLeaf *xldata = (spgxlogAddLeaf *) ptr;
        char       *leafTuple;
@@ -128,15 +126,13 @@ spgRedoAddLeaf(XLogRecPtr lsn, XLogRecord *record)
         */
        if (xldata->newPage)
        {
-               buffer = XLogReadBuffer(xldata->node, xldata->blknoLeaf, true);
+               buffer = XLogInitBufferForRedo(record, 0);
                SpGistInitBuffer(buffer,
                                         SPGIST_LEAF | (xldata->storesNulls ? SPGIST_NULLS : 0));
                action = BLK_NEEDS_REDO;
        }
        else
-               action = XLogReadBufferForRedo(lsn, record, 0,
-                                                                          xldata->node, xldata->blknoLeaf,
-                                                                          &buffer);
+               action = XLogReadBufferForRedo(record, 0, &buffer);
 
        if (action == BLK_NEEDS_REDO)
        {
@@ -164,7 +160,8 @@ spgRedoAddLeaf(XLogRecPtr lsn, XLogRecord *record)
                {
                        /* replacing a DEAD tuple */
                        PageIndexTupleDelete(page, xldata->offnumLeaf);
-                       if (PageAddItem(page, (Item) leafTuple, leafTupleHdr.size,
+                       if (PageAddItem(page,
+                                                       (Item) leafTuple, leafTupleHdr.size,
                                         xldata->offnumLeaf, false, false) != xldata->offnumLeaf)
                                elog(ERROR, "failed to add item of size %u to SPGiST index page",
                                         leafTupleHdr.size);
@@ -177,13 +174,14 @@ spgRedoAddLeaf(XLogRecPtr lsn, XLogRecord *record)
                UnlockReleaseBuffer(buffer);
 
        /* update parent downlink if necessary */
-       if (xldata->blknoParent != InvalidBlockNumber)
+       if (xldata->offnumParent != InvalidOffsetNumber)
        {
-               if (XLogReadBufferForRedo(lsn, record, 1,
-                                                                 xldata->node, xldata->blknoParent,
-                                                                 &buffer) == BLK_NEEDS_REDO)
+               if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO)
                {
                        SpGistInnerTuple tuple;
+                       BlockNumber blknoLeaf;
+
+                       XLogRecGetBlockTag(record, 0, NULL, NULL, &blknoLeaf);
 
                        page = BufferGetPage(buffer);
 
@@ -191,7 +189,7 @@ spgRedoAddLeaf(XLogRecPtr lsn, XLogRecord *record)
                                                                  PageGetItemId(page, xldata->offnumParent));
 
                        spgUpdateNodeLink(tuple, xldata->nodeI,
-                                                         xldata->blknoLeaf, xldata->offnumLeaf);
+                                                         blknoLeaf, xldata->offnumLeaf);
 
                        PageSetLSN(page, lsn);
                        MarkBufferDirty(buffer);
@@ -202,8 +200,9 @@ spgRedoAddLeaf(XLogRecPtr lsn, XLogRecord *record)
 }
 
 static void
-spgRedoMoveLeafs(XLogRecPtr lsn, XLogRecord *record)
+spgRedoMoveLeafs(XLogReaderState *record)
 {
+       XLogRecPtr      lsn = record->EndRecPtr;
        char       *ptr = XLogRecGetData(record);
        spgxlogMoveLeafs *xldata = (spgxlogMoveLeafs *) ptr;
        SpGistState state;
@@ -213,6 +212,9 @@ spgRedoMoveLeafs(XLogRecPtr lsn, XLogRecord *record)
        Buffer          buffer;
        Page            page;
        XLogRedoAction action;
+       BlockNumber blknoDst;
+
+       XLogRecGetBlockTag(record, 1, NULL, NULL, &blknoDst);
 
        fillFakeState(&state, xldata->stateSrc);
 
@@ -235,15 +237,14 @@ spgRedoMoveLeafs(XLogRecPtr lsn, XLogRecord *record)
        /* Insert tuples on the dest page (do first, so redirect is valid) */
        if (xldata->newPage)
        {
-               buffer = XLogReadBuffer(xldata->node, xldata->blknoDst, true);
+               buffer = XLogInitBufferForRedo(record, 1);
                SpGistInitBuffer(buffer,
                                         SPGIST_LEAF | (xldata->storesNulls ? SPGIST_NULLS : 0));
                action = BLK_NEEDS_REDO;
        }
        else
-               action = XLogReadBufferForRedo(lsn, record, 1,
-                                                                          xldata->node, xldata->blknoDst,
-                                                                          &buffer);
+               action = XLogReadBufferForRedo(record, 1, &buffer);
+
        if (action == BLK_NEEDS_REDO)
        {
                int                     i;
@@ -260,7 +261,8 @@ spgRedoMoveLeafs(XLogRecPtr lsn, XLogRecord *record)
                         * field.
                         */
                        leafTuple = ptr;
-                       memcpy(&leafTupleHdr, leafTuple, sizeof(SpGistLeafTupleData));
+                       memcpy(&leafTupleHdr, leafTuple,
+                                  sizeof(SpGistLeafTupleData));
 
                        addOrReplaceTuple(page, (Item) leafTuple,
                                                          leafTupleHdr.size, toInsert[i]);
@@ -274,14 +276,14 @@ spgRedoMoveLeafs(XLogRecPtr lsn, XLogRecord *record)
                UnlockReleaseBuffer(buffer);
 
        /* Delete tuples from the source page, inserting a redirection pointer */
-       if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blknoSrc,
-                                                         &buffer) == BLK_NEEDS_REDO)
+       if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
        {
                page = BufferGetPage(buffer);
+
                spgPageIndexMultiDelete(&state, page, toDelete, xldata->nMoves,
                                                state.isBuild ? SPGIST_PLACEHOLDER : SPGIST_REDIRECT,
                                                                SPGIST_PLACEHOLDER,
-                                                               xldata->blknoDst,
+                                                               blknoDst,
                                                                toInsert[nInsert - 1]);
 
                PageSetLSN(page, lsn);
@@ -291,8 +293,7 @@ spgRedoMoveLeafs(XLogRecPtr lsn, XLogRecord *record)
                UnlockReleaseBuffer(buffer);
 
        /* And update the parent downlink */
-       if (XLogReadBufferForRedo(lsn, record, 2, xldata->node, xldata->blknoParent,
-                                                         &buffer) == BLK_NEEDS_REDO)
+       if (XLogReadBufferForRedo(record, 2, &buffer) == BLK_NEEDS_REDO)
        {
                SpGistInnerTuple tuple;
 
@@ -302,7 +303,7 @@ spgRedoMoveLeafs(XLogRecPtr lsn, XLogRecord *record)
                                                                  PageGetItemId(page, xldata->offnumParent));
 
                spgUpdateNodeLink(tuple, xldata->nodeI,
-                                                 xldata->blknoDst, toInsert[nInsert - 1]);
+                                                 blknoDst, toInsert[nInsert - 1]);
 
                PageSetLSN(page, lsn);
                MarkBufferDirty(buffer);
@@ -312,8 +313,9 @@ spgRedoMoveLeafs(XLogRecPtr lsn, XLogRecord *record)
 }
 
 static void
-spgRedoAddNode(XLogRecPtr lsn, XLogRecord *record)
+spgRedoAddNode(XLogReaderState *record)
 {
+       XLogRecPtr      lsn = record->EndRecPtr;
        char       *ptr = XLogRecGetData(record);
        spgxlogAddNode *xldata = (spgxlogAddNode *) ptr;
        char       *innerTuple;
@@ -321,7 +323,6 @@ spgRedoAddNode(XLogRecPtr lsn, XLogRecord *record)
        SpGistState state;
        Buffer          buffer;
        Page            page;
-       int                     bbi;
        XLogRedoAction action;
 
        ptr += sizeof(spgxlogAddNode);
@@ -331,17 +332,18 @@ spgRedoAddNode(XLogRecPtr lsn, XLogRecord *record)
 
        fillFakeState(&state, xldata->stateSrc);
 
-       if (xldata->blknoNew == InvalidBlockNumber)
+       if (!XLogRecHasBlockRef(record, 1))
        {
                /* update in place */
-               Assert(xldata->blknoParent == InvalidBlockNumber);
-               if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blkno,
-                                                                 &buffer) == BLK_NEEDS_REDO)
+               Assert(xldata->parentBlk == -1);
+               if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
                {
                        page = BufferGetPage(buffer);
+
                        PageIndexTupleDelete(page, xldata->offnum);
                        if (PageAddItem(page, (Item) innerTuple, innerTupleHdr.size,
-                                                       xldata->offnum, false, false) != xldata->offnum)
+                                                       xldata->offnum,
+                                                       false, false) != xldata->offnum)
                                elog(ERROR, "failed to add item of size %u to SPGiST index page",
                                         innerTupleHdr.size);
 
@@ -353,30 +355,30 @@ spgRedoAddNode(XLogRecPtr lsn, XLogRecord *record)
        }
        else
        {
+               BlockNumber blkno;
+               BlockNumber blknoNew;
+
+               XLogRecGetBlockTag(record, 0, NULL, NULL, &blkno);
+               XLogRecGetBlockTag(record, 1, NULL, NULL, &blknoNew);
+
                /*
                 * In normal operation we would have all three pages (source, dest,
                 * and parent) locked simultaneously; but in WAL replay it should be
                 * safe to update them one at a time, as long as we do it in the right
-                * order.
-                *
-                * The logic here depends on the assumption that blkno != blknoNew,
-                * else we can't tell which BKP bit goes with which page, and the LSN
-                * checks could go wrong too.
+                * order. We must insert the new tuple before replacing the old tuple
+                * with the redirect tuple.
                 */
-               Assert(xldata->blkno != xldata->blknoNew);
 
                /* Install new tuple first so redirect is valid */
                if (xldata->newPage)
                {
-                       buffer = XLogReadBuffer(xldata->node, xldata->blknoNew, true);
                        /* AddNode is not used for nulls pages */
+                       buffer = XLogInitBufferForRedo(record, 1);
                        SpGistInitBuffer(buffer, 0);
                        action = BLK_NEEDS_REDO;
                }
                else
-                       action = XLogReadBufferForRedo(lsn, record, 1,
-                                                                                  xldata->node, xldata->blknoNew,
-                                                                                  &buffer);
+                       action = XLogReadBufferForRedo(record, 1, &buffer);
                if (action == BLK_NEEDS_REDO)
                {
                        page = BufferGetPage(buffer);
@@ -385,22 +387,26 @@ spgRedoAddNode(XLogRecPtr lsn, XLogRecord *record)
                                                          innerTupleHdr.size, xldata->offnumNew);
 
                        /*
-                        * If parent is in this same page, don't advance LSN; doing so
-                        * would fool us into not applying the parent downlink update
-                        * below.  We'll update the LSN when we fix the parent downlink.
+                        * If parent is in this same page, update it now.
                         */
-                       if (xldata->blknoParent != xldata->blknoNew)
+                       if (xldata->parentBlk == 1)
                        {
-                               PageSetLSN(page, lsn);
+                               SpGistInnerTuple parentTuple;
+
+                               parentTuple = (SpGistInnerTuple) PageGetItem(page,
+                                                                 PageGetItemId(page, xldata->offnumParent));
+
+                               spgUpdateNodeLink(parentTuple, xldata->nodeI,
+                                                                 blknoNew, xldata->offnumNew);
                        }
+                       PageSetLSN(page, lsn);
                        MarkBufferDirty(buffer);
                }
                if (BufferIsValid(buffer))
                        UnlockReleaseBuffer(buffer);
 
                /* Delete old tuple, replacing it with redirect or placeholder tuple */
-               if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blkno,
-                                                                 &buffer) == BLK_NEEDS_REDO)
+               if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
                {
                        SpGistDeadTuple dt;
 
@@ -412,11 +418,12 @@ spgRedoAddNode(XLogRecPtr lsn, XLogRecord *record)
                                                                          InvalidOffsetNumber);
                        else
                                dt = spgFormDeadTuple(&state, SPGIST_REDIRECT,
-                                                                         xldata->blknoNew,
+                                                                         blknoNew,
                                                                          xldata->offnumNew);
 
                        PageIndexTupleDelete(page, xldata->offnum);
-                       if (PageAddItem(page, (Item) dt, dt->size, xldata->offnum,
+                       if (PageAddItem(page, (Item) dt, dt->size,
+                                                       xldata->offnum,
                                                        false, false) != xldata->offnum)
                                elog(ERROR, "failed to add item of size %u to SPGiST index page",
                                         dt->size);
@@ -427,67 +434,55 @@ spgRedoAddNode(XLogRecPtr lsn, XLogRecord *record)
                                SpGistPageGetOpaque(page)->nRedirection++;
 
                        /*
-                        * If parent is in this same page, don't advance LSN; doing so
-                        * would fool us into not applying the parent downlink update
-                        * below.  We'll update the LSN when we fix the parent downlink.
+                        * If parent is in this same page, update it now.
                         */
-                       if (xldata->blknoParent != xldata->blkno)
+                       if (xldata->parentBlk == 0)
                        {
-                               PageSetLSN(page, lsn);
+                               SpGistInnerTuple parentTuple;
+
+                               parentTuple = (SpGistInnerTuple) PageGetItem(page,
+                                                                 PageGetItemId(page, xldata->offnumParent));
+
+                               spgUpdateNodeLink(parentTuple, xldata->nodeI,
+                                                                 blknoNew, xldata->offnumNew);
                        }
+                       PageSetLSN(page, lsn);
                        MarkBufferDirty(buffer);
                }
                if (BufferIsValid(buffer))
                        UnlockReleaseBuffer(buffer);
 
                /*
-                * Update parent downlink.  Since parent could be in either of the
-                * previous two buffers, it's a bit tricky to determine which BKP bit
-                * applies.
+                * Update parent downlink (if we didn't do it as part of the source or
+                * destination page update already).
                 */
-               if (xldata->blknoParent == xldata->blkno)
-                       bbi = 0;
-               else if (xldata->blknoParent == xldata->blknoNew)
-                       bbi = 1;
-               else
-                       bbi = 2;
-
-               if (record->xl_info & XLR_BKP_BLOCK(bbi))
+               if (xldata->parentBlk == 2)
                {
-                       if (bbi == 2)           /* else we already did it */
-                               (void) RestoreBackupBlock(lsn, record, bbi, false, false);
-                       action = BLK_RESTORED;
-                       buffer = InvalidBuffer;
-               }
-               else
-               {
-                       action = XLogReadBufferForRedo(lsn, record, bbi, xldata->node,
-                                                                                  xldata->blknoParent, &buffer);
-                       Assert(action != BLK_RESTORED);
-               }
-               if (action == BLK_NEEDS_REDO)
-               {
-                       SpGistInnerTuple innerTuple;
+                       if (XLogReadBufferForRedo(record, 2, &buffer) == BLK_NEEDS_REDO)
+                       {
+                               SpGistInnerTuple parentTuple;
 
-                       page = BufferGetPage(buffer);
+                               page = BufferGetPage(buffer);
 
-                       innerTuple = (SpGistInnerTuple) PageGetItem(page,
+                               parentTuple = (SpGistInnerTuple) PageGetItem(page,
                                                                  PageGetItemId(page, xldata->offnumParent));
 
-                       spgUpdateNodeLink(innerTuple, xldata->nodeI,
-                                                         xldata->blknoNew, xldata->offnumNew);
+                               spgUpdateNodeLink(parentTuple, xldata->nodeI,
+                                                                 blknoNew, xldata->offnumNew);
 
-                       PageSetLSN(page, lsn);
-                       MarkBufferDirty(buffer);
+                               PageSetLSN(page, lsn);
+                               MarkBufferDirty(buffer);
+                       }
+                       if (BufferIsValid(buffer))
+                               UnlockReleaseBuffer(buffer);
                }
-               if (BufferIsValid(buffer))
-                       UnlockReleaseBuffer(buffer);
        }
 }
 
 static void
-spgRedoSplitTuple(XLogRecPtr lsn, XLogRecord *record)
+spgRedoSplitTuple(XLogReaderState *record)
 {
+       XLogRecPtr      lsn = record->EndRecPtr;
        char       *ptr = XLogRecGetData(record);
        spgxlogSplitTuple *xldata = (spgxlogSplitTuple *) ptr;
        char       *prefixTuple;
@@ -496,6 +491,7 @@ spgRedoSplitTuple(XLogRecPtr lsn, XLogRecord *record)
        SpGistInnerTupleData postfixTupleHdr;
        Buffer          buffer;
        Page            page;
+       XLogRedoAction action;
 
        ptr += sizeof(spgxlogSplitTuple);
        prefixTuple = ptr;
@@ -513,22 +509,17 @@ spgRedoSplitTuple(XLogRecPtr lsn, XLogRecord *record)
         */
 
        /* insert postfix tuple first to avoid dangling link */
-       if (xldata->blknoPostfix != xldata->blknoPrefix)
+       if (!xldata->postfixBlkSame)
        {
-               XLogRedoAction action;
-
                if (xldata->newPage)
                {
-                       buffer = XLogReadBuffer(xldata->node, xldata->blknoPostfix, true);
+                       buffer = XLogInitBufferForRedo(record, 1);
                        /* SplitTuple is not used for nulls pages */
                        SpGistInitBuffer(buffer, 0);
                        action = BLK_NEEDS_REDO;
                }
                else
-                       action = XLogReadBufferForRedo(lsn, record, 1,
-                                                                                  xldata->node, xldata->blknoPostfix,
-                                                                                  &buffer);
-
+                       action = XLogReadBufferForRedo(record, 1, &buffer);
                if (action == BLK_NEEDS_REDO)
                {
                        page = BufferGetPage(buffer);
@@ -544,18 +535,19 @@ spgRedoSplitTuple(XLogRecPtr lsn, XLogRecord *record)
        }
 
        /* now handle the original page */
-       if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blknoPrefix,
-                                                         &buffer) == BLK_NEEDS_REDO)
+       if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
        {
                page = BufferGetPage(buffer);
+
                PageIndexTupleDelete(page, xldata->offnumPrefix);
                if (PageAddItem(page, (Item) prefixTuple, prefixTupleHdr.size,
                                 xldata->offnumPrefix, false, false) != xldata->offnumPrefix)
                        elog(ERROR, "failed to add item of size %u to SPGiST index page",
                                 prefixTupleHdr.size);
 
-               if (xldata->blknoPostfix == xldata->blknoPrefix)
-                       addOrReplaceTuple(page, (Item) postfixTuple, postfixTupleHdr.size,
+               if (xldata->postfixBlkSame)
+                       addOrReplaceTuple(page, (Item) postfixTuple,
+                                                         postfixTupleHdr.size,
                                                          xldata->offnumPostfix);
 
                PageSetLSN(page, lsn);
@@ -566,8 +558,9 @@ spgRedoSplitTuple(XLogRecPtr lsn, XLogRecord *record)
 }
 
 static void
-spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
+spgRedoPickSplit(XLogReaderState *record)
 {
+       XLogRecPtr      lsn = record->EndRecPtr;
        char       *ptr = XLogRecGetData(record);
        spgxlogPickSplit *xldata = (spgxlogPickSplit *) ptr;
        char       *innerTuple;
@@ -578,14 +571,16 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
        uint8      *leafPageSelect;
        Buffer          srcBuffer;
        Buffer          destBuffer;
+       Buffer          innerBuffer;
        Page            srcPage;
        Page            destPage;
-       Buffer          innerBuffer;
        Page            page;
-       int                     bbi;
        int                     i;
+       BlockNumber blknoInner;
        XLogRedoAction action;
 
+       XLogRecGetBlockTag(record, 2, NULL, NULL, &blknoInner);
+
        fillFakeState(&state, xldata->stateSrc);
 
        ptr += SizeOfSpgxlogPickSplit;
@@ -603,13 +598,7 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
 
        /* now ptr points to the list of leaf tuples */
 
-       /*
-        * It's a bit tricky to identify which pages have been handled as
-        * full-page images, so we explicitly count each referenced buffer.
-        */
-       bbi = 0;
-
-       if (SpGistBlockIsRoot(xldata->blknoSrc))
+       if (xldata->isRootSplit)
        {
                /* when splitting root, we touch it only in the guise of new inner */
                srcBuffer = InvalidBuffer;
@@ -618,8 +607,7 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
        else if (xldata->initSrc)
        {
                /* just re-init the source page */
-               srcBuffer = XLogReadBuffer(xldata->node, xldata->blknoSrc, true);
-               Assert(BufferIsValid(srcBuffer));
+               srcBuffer = XLogInitBufferForRedo(record, 0);
                srcPage = (Page) BufferGetPage(srcBuffer);
 
                SpGistInitBuffer(srcBuffer,
@@ -634,9 +622,8 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
                 * inserting leaf tuples and the new inner tuple, else the added
                 * redirect tuple will be a dangling link.)
                 */
-               if (XLogReadBufferForRedo(lsn, record, bbi,
-                                                                 xldata->node, xldata->blknoSrc,
-                                                                 &srcBuffer) == BLK_NEEDS_REDO)
+               srcPage = NULL;
+               if (XLogReadBufferForRedo(record, 0, &srcBuffer) == BLK_NEEDS_REDO)
                {
                        srcPage = BufferGetPage(srcBuffer);
 
@@ -650,7 +637,7 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
                                                                                toDelete, xldata->nDelete,
                                                                                SPGIST_REDIRECT,
                                                                                SPGIST_PLACEHOLDER,
-                                                                               xldata->blknoInner,
+                                                                               blknoInner,
                                                                                xldata->offnumInner);
                        else
                                spgPageIndexMultiDelete(&state, srcPage,
@@ -662,15 +649,10 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
 
                        /* don't update LSN etc till we're done with it */
                }
-               else
-               {
-                       srcPage = NULL;         /* don't do any page updates */
-               }
-               bbi++;
        }
 
        /* try to access dest page if any */
-       if (xldata->blknoDest == InvalidBlockNumber)
+       if (!XLogRecHasBlockRef(record, 1))
        {
                destBuffer = InvalidBuffer;
                destPage = NULL;
@@ -678,8 +660,7 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
        else if (xldata->initDest)
        {
                /* just re-init the dest page */
-               destBuffer = XLogReadBuffer(xldata->node, xldata->blknoDest, true);
-               Assert(BufferIsValid(destBuffer));
+               destBuffer = XLogInitBufferForRedo(record, 1);
                destPage = (Page) BufferGetPage(destBuffer);
 
                SpGistInitBuffer(destBuffer,
@@ -692,17 +673,10 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
                 * We could probably release the page lock immediately in the
                 * full-page-image case, but for safety let's hold it till later.
                 */
-               if (XLogReadBufferForRedo(lsn, record, bbi,
-                                                                 xldata->node, xldata->blknoDest,
-                                                                 &destBuffer) == BLK_NEEDS_REDO)
-               {
+               if (XLogReadBufferForRedo(record, 1, &destBuffer) == BLK_NEEDS_REDO)
                        destPage = (Page) BufferGetPage(destBuffer);
-               }
                else
-               {
                        destPage = NULL;        /* don't do any page updates */
-               }
-               bbi++;
        }
 
        /* restore leaf tuples to src and/or dest page */
@@ -739,14 +713,12 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
        /* restore new inner tuple */
        if (xldata->initInner)
        {
-               innerBuffer = XLogReadBuffer(xldata->node, xldata->blknoInner, true);
-               SpGistInitBuffer(innerBuffer,
-                                                (xldata->storesNulls ? SPGIST_NULLS : 0));
+               innerBuffer = XLogInitBufferForRedo(record, 2);
+               SpGistInitBuffer(innerBuffer, (xldata->storesNulls ? SPGIST_NULLS : 0));
                action = BLK_NEEDS_REDO;
        }
        else
-               action = XLogReadBufferForRedo(lsn, record, bbi, xldata->node,
-                                                                          xldata->blknoInner, &innerBuffer);
+               action = XLogReadBufferForRedo(record, 2, &innerBuffer);
 
        if (action == BLK_NEEDS_REDO)
        {
@@ -756,14 +728,14 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
                                                  xldata->offnumInner);
 
                /* if inner is also parent, update link while we're here */
-               if (xldata->blknoInner == xldata->blknoParent)
+               if (xldata->innerIsParent)
                {
                        SpGistInnerTuple parent;
 
                        parent = (SpGistInnerTuple) PageGetItem(page,
                                                                  PageGetItemId(page, xldata->offnumParent));
                        spgUpdateNodeLink(parent, xldata->nodeI,
-                                                         xldata->blknoInner, xldata->offnumInner);
+                                                         blknoInner, xldata->offnumInner);
                }
 
                PageSetLSN(page, lsn);
@@ -771,7 +743,6 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
        }
        if (BufferIsValid(innerBuffer))
                UnlockReleaseBuffer(innerBuffer);
-       bbi++;
 
        /*
         * Now we can release the leaf-page locks.  It's okay to do this before
@@ -783,18 +754,11 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
                UnlockReleaseBuffer(destBuffer);
 
        /* update parent downlink, unless we did it above */
-       if (xldata->blknoParent == InvalidBlockNumber)
-       {
-               /* no parent cause we split the root */
-               Assert(SpGistBlockIsRoot(xldata->blknoInner));
-       }
-       else if (xldata->blknoInner != xldata->blknoParent)
+       if (XLogRecHasBlockRef(record, 3))
        {
                Buffer          parentBuffer;
 
-               if (XLogReadBufferForRedo(lsn, record, bbi,
-                                                                 xldata->node, xldata->blknoParent,
-                                                                 &parentBuffer) == BLK_NEEDS_REDO)
+               if (XLogReadBufferForRedo(record, 3, &parentBuffer) == BLK_NEEDS_REDO)
                {
                        SpGistInnerTuple parent;
 
@@ -803,7 +767,7 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
                        parent = (SpGistInnerTuple) PageGetItem(page,
                                                                  PageGetItemId(page, xldata->offnumParent));
                        spgUpdateNodeLink(parent, xldata->nodeI,
-                                                         xldata->blknoInner, xldata->offnumInner);
+                                                         blknoInner, xldata->offnumInner);
 
                        PageSetLSN(page, lsn);
                        MarkBufferDirty(parentBuffer);
@@ -811,11 +775,14 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
                if (BufferIsValid(parentBuffer))
                        UnlockReleaseBuffer(parentBuffer);
        }
+       else
+               Assert(xldata->innerIsParent || xldata->isRootSplit);
 }
 
 static void
-spgRedoVacuumLeaf(XLogRecPtr lsn, XLogRecord *record)
+spgRedoVacuumLeaf(XLogReaderState *record)
 {
+       XLogRecPtr      lsn = record->EndRecPtr;
        char       *ptr = XLogRecGetData(record);
        spgxlogVacuumLeaf *xldata = (spgxlogVacuumLeaf *) ptr;
        OffsetNumber *toDead;
@@ -844,8 +811,7 @@ spgRedoVacuumLeaf(XLogRecPtr lsn, XLogRecord *record)
        ptr += sizeof(OffsetNumber) * xldata->nChain;
        chainDest = (OffsetNumber *) ptr;
 
-       if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blkno,
-                                                         &buffer) == BLK_NEEDS_REDO)
+       if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
        {
                page = BufferGetPage(buffer);
 
@@ -897,8 +863,9 @@ spgRedoVacuumLeaf(XLogRecPtr lsn, XLogRecord *record)
 }
 
 static void
-spgRedoVacuumRoot(XLogRecPtr lsn, XLogRecord *record)
+spgRedoVacuumRoot(XLogReaderState *record)
 {
+       XLogRecPtr      lsn = record->EndRecPtr;
        char       *ptr = XLogRecGetData(record);
        spgxlogVacuumRoot *xldata = (spgxlogVacuumRoot *) ptr;
        OffsetNumber *toDelete;
@@ -907,8 +874,7 @@ spgRedoVacuumRoot(XLogRecPtr lsn, XLogRecord *record)
 
        toDelete = xldata->offsets;
 
-       if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blkno,
-                                                         &buffer) == BLK_NEEDS_REDO)
+       if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
        {
                page = BufferGetPage(buffer);
 
@@ -923,8 +889,9 @@ spgRedoVacuumRoot(XLogRecPtr lsn, XLogRecord *record)
 }
 
 static void
-spgRedoVacuumRedirect(XLogRecPtr lsn, XLogRecord *record)
+spgRedoVacuumRedirect(XLogReaderState *record)
 {
+       XLogRecPtr      lsn = record->EndRecPtr;
        char       *ptr = XLogRecGetData(record);
        spgxlogVacuumRedirect *xldata = (spgxlogVacuumRedirect *) ptr;
        OffsetNumber *itemToPlaceholder;
@@ -939,12 +906,16 @@ spgRedoVacuumRedirect(XLogRecPtr lsn, XLogRecord *record)
        if (InHotStandby)
        {
                if (TransactionIdIsValid(xldata->newestRedirectXid))
+               {
+                       RelFileNode node;
+
+                       XLogRecGetBlockTag(record, 0, &node, NULL, NULL);
                        ResolveRecoveryConflictWithSnapshot(xldata->newestRedirectXid,
-                                                                                               xldata->node);
+                                                                                               node);
+               }
        }
 
-       if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blkno,
-                                                         &buffer) == BLK_NEEDS_REDO)
+       if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
        {
                Page            page = BufferGetPage(buffer);
                SpGistPageOpaque opaque = SpGistPageGetOpaque(page);
@@ -995,40 +966,40 @@ spgRedoVacuumRedirect(XLogRecPtr lsn, XLogRecord *record)
 }
 
 void
-spg_redo(XLogRecPtr lsn, XLogRecord *record)
+spg_redo(XLogReaderState *record)
 {
-       uint8           info = record->xl_info & ~XLR_INFO_MASK;
+       uint8           info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
        MemoryContext oldCxt;
 
        oldCxt = MemoryContextSwitchTo(opCtx);
        switch (info)
        {
                case XLOG_SPGIST_CREATE_INDEX:
-                       spgRedoCreateIndex(lsn, record);
+                       spgRedoCreateIndex(record);
                        break;
                case XLOG_SPGIST_ADD_LEAF:
-                       spgRedoAddLeaf(lsn, record);
+                       spgRedoAddLeaf(record);
                        break;
                case XLOG_SPGIST_MOVE_LEAFS:
-                       spgRedoMoveLeafs(lsn, record);
+                       spgRedoMoveLeafs(record);
                        break;
                case XLOG_SPGIST_ADD_NODE:
-                       spgRedoAddNode(lsn, record);
+                       spgRedoAddNode(record);
                        break;
                case XLOG_SPGIST_SPLIT_TUPLE:
-                       spgRedoSplitTuple(lsn, record);
+                       spgRedoSplitTuple(record);
                        break;
                case XLOG_SPGIST_PICKSPLIT:
-                       spgRedoPickSplit(lsn, record);
+                       spgRedoPickSplit(record);
                        break;
                case XLOG_SPGIST_VACUUM_LEAF:
-                       spgRedoVacuumLeaf(lsn, record);
+                       spgRedoVacuumLeaf(record);
                        break;
                case XLOG_SPGIST_VACUUM_ROOT:
-                       spgRedoVacuumRoot(lsn, record);
+                       spgRedoVacuumRoot(record);
                        break;
                case XLOG_SPGIST_VACUUM_REDIRECT:
-                       spgRedoVacuumRedirect(lsn, record);
+                       spgRedoVacuumRedirect(record);
                        break;
                default:
                        elog(PANIC, "spg_redo: unknown op code %u", info);
index 92b12fbb6c2af4b2f86f28fe8fb76f8372ab1549..ba6ae05d6536516d241ec2726e55cf5eb2029644 100644 (file)
@@ -440,96 +440,164 @@ happen before the WAL record is inserted; see notes in SyncOneBuffer().)
 Note that marking a buffer dirty with MarkBufferDirty() should only
 happen iff you write a WAL record; see Writing Hints below.
 
-5. If the relation requires WAL-logging, build a WAL log record and pass it
-to XLogInsert(); then update the page's LSN using the returned XLOG
-location.  For instance,
+5. If the relation requires WAL-logging, build a WAL record using
+XLogBeginInsert and XLogRegister* functions, and insert it.  (See
+"Constructing a WAL record" below).  Then update the page's LSN using the
+returned XLOG location.  For instance,
 
-               recptr = XLogInsert(rmgr_id, info, rdata);
+               XLogBeginInsert();
+               XLogRegisterBuffer(...)
+               XLogRegisterData(...)
+               recptr = XLogInsert(rmgr_id, info);
 
                PageSetLSN(dp, recptr);
-               // Note that we no longer do PageSetTLI() from 9.3 onwards
-               // since that field on a page has now changed its meaning.
 
 6. END_CRIT_SECTION()
 
 7. Unlock and unpin the buffer(s).
 
-XLogInsert's "rdata" argument is an array of pointer/size items identifying
-chunks of data to be written in the XLOG record, plus optional shared-buffer
-IDs for chunks that are in shared buffers rather than temporary variables.
-The "rdata" array must mention (at least once) each of the shared buffers
-being modified, unless the action is such that the WAL replay routine can
-reconstruct the entire page contents.  XLogInsert includes the logic that
-tests to see whether a shared buffer has been modified since the last
-checkpoint.  If not, the entire page contents are logged rather than just the
-portion(s) pointed to by "rdata".
-
-Because XLogInsert drops the rdata components associated with buffers it
-chooses to log in full, the WAL replay routines normally need to test to see
-which buffers were handled that way --- otherwise they may be misled about
-what the XLOG record actually contains.  XLOG records that describe multi-page
-changes therefore require some care to design: you must be certain that you
-know what data is indicated by each "BKP" bit.  An example of the trickiness
-is that in a HEAP_UPDATE record, BKP(0) normally is associated with the source
-page and BKP(1) is associated with the destination page --- but if these are
-the same page, only BKP(0) would have been set.
-
-For this reason as well as the risk of deadlocking on buffer locks, it's best
-to design WAL records so that they reflect small atomic actions involving just
-one or a few pages.  The current XLOG infrastructure cannot handle WAL records
-involving references to more than four shared buffers, anyway.
-
-In the case where the WAL record contains enough information to re-generate
-the entire contents of a page, do *not* show that page's buffer ID in the
-rdata array, even if some of the rdata items point into the buffer.  This is
-because you don't want XLogInsert to log the whole page contents.  The
-standard replay-routine pattern for this case is
-
-       buffer = XLogReadBuffer(rnode, blkno, true);
-       Assert(BufferIsValid(buffer));
-       page = (Page) BufferGetPage(buffer);
-
-       ... initialize the page ...
-
-       PageSetLSN(page, lsn);
-       MarkBufferDirty(buffer);
-       UnlockReleaseBuffer(buffer);
-
-In the case where the WAL record provides only enough information to
-incrementally update the page, the rdata array *must* mention the buffer
-ID at least once; otherwise there is no defense against torn-page problems.
-The standard replay-routine pattern for this case is
-
-       if (XLogReadBufferForRedo(lsn, record, N, rnode, blkno, &buffer) == BLK_NEEDS_REDO)
-       {
-               page = (Page) BufferGetPage(buffer);
-
-               ... apply the change ...
-
-               PageSetLSN(page, lsn);
-               MarkBufferDirty(buffer);
-       }
-       if (BufferIsValid(buffer))
-               UnlockReleaseBuffer(buffer);
-
-XLogReadBufferForRedo reads the page from disk, and checks what action needs to
-be taken to the page.  If the XLR_BKP_BLOCK(N) flag is set, it restores the
-full page image and returns BLK_RESTORED.  If there is no full page image, but
-page cannot be found or if the change has already been replayed (i.e. the
-page's LSN >= the record we're replaying), it returns BLK_NOTFOUND or BLK_DONE,
-respectively.  Usually, the redo routine only needs to pay attention to the
-BLK_NEEDS_REDO return code, which means that the routine should apply the
-incremental change.  In any case, the caller is responsible for unlocking and
-releasing the buffer.  Note that XLogReadBufferForRedo returns the buffer
-locked even if no redo is required, unless the page does not exist.
-
-As noted above, for a multi-page update you need to be able to determine
-which XLR_BKP_BLOCK(N) flag applies to each page.  If a WAL record reflects
-a combination of fully-rewritable and incremental updates, then the rewritable
-pages don't count for the XLR_BKP_BLOCK(N) numbering.  (XLR_BKP_BLOCK(N) is
-associated with the N'th distinct buffer ID seen in the "rdata" array, and
-per the above discussion, fully-rewritable buffers shouldn't be mentioned in
-"rdata".)
+Complex changes (such as a multilevel index insertion) normally need to be
+described by a series of atomic-action WAL records.  The intermediate states
+must be self-consistent, so that if the replay is interrupted between any
+two actions, the system is fully functional.  In btree indexes, for example,
+a page split requires a new page to be allocated, and an insertion of a new
+key in the parent btree level, but for locking reasons this has to be
+reflected by two separate WAL records.  Replaying the first record, to
+allocate the new page and move tuples to it, sets a flag on the page to
+indicate that the key has not been inserted to the parent yet.  Replaying the
+second record clears the flag.  This intermediate state is never seen by
+other backends during normal operation, because the lock on the child page
+is held across the two actions, but will be seen if the operation is
+interrupted before writing the second WAL record.  The search algorithm works
+with the intermediate state as normal, but if an insertion encounters a page
+with the incomplete-split flag set, it will finish the interrupted split by
+inserting the key to the parent, before proceeding.
+
+
+Constructing a WAL record
+-------------------------
+
+A WAL record consists of a header common to all WAL record types,
+record-specific data, and information about the data blocks modified.  Each
+modified data block is identified by an ID number, and can optionally have
+more record-specific data associated with the block.  If XLogInsert decides
+that a full-page image of a block needs to be taken, the data associated
+with that block is not included.
+
+The API for constructing a WAL record consists of five functions:
+XLogBeginInsert, XLogRegisterBuffer, XLogRegisterData, XLogRegisterBufData,
+and XLogInsert.  First, call XLogBeginInsert().  Then register all the buffers
+modified, and data needed to replay the changes, using XLogRegister*
+functions.  Finally, insert the constructed record to the WAL by calling
+XLogInsert().
+
+       XLogBeginInsert();
+
+       /* register buffers modified as part of this WAL-logged action */
+       XLogRegisterBuffer(0, lbuffer, REGBUF_STANDARD);
+       XLogRegisterBuffer(1, rbuffer, REGBUF_STANDARD);
+
+       /* register data that is always included in the WAL record */
+       XLogRegisterData(&xlrec, SizeOfFictionalAction);
+
+       /*
+        * register data associated with a buffer. This will not be included
+        * in the record if a full-page image is taken.
+        */
+       XLogRegisterBufData(0, tuple->data, tuple->len);
+
+       /* more data associated with the buffer */
+       XLogRegisterBufData(0, data2, len2);
+
+       /*
+        * Ok, all the data and buffers to include in the WAL record have
+        * been registered. Insert the record.
+        */
+       recptr = XLogInsert(RM_FOO_ID, XLOG_FOOBAR_DO_STUFF);
+
+Details of the API functions:
+
+void XLogBeginInsert(void)
+
+    Must be called before XLogRegisterBuffer and XLogRegisterData.
+
+void XLogResetInsertion(void)
+
+    Clear any currently registered data and buffers from the WAL record
+    construction workspace.  This is only needed if you have already called
+    XLogBeginInsert(), but decide to not insert the record after all.
+
+void XLogEnsureRecordSpace(int max_block_id, int nrdatas)
+
+    Normally, the WAL record construction buffers have the following limits:
+
+    * highest block ID that can be used is 4 (allowing five block references)
+    * Max 20 chunks of registered data
+
+    These default limits are enough for most record types that change some
+    on-disk structures.  For the odd case that requires more data, or needs to
+    modify more buffers, these limits can be raised by calling
+    XLogEnsureRecordSpace().  XLogEnsureRecordSpace() must be called before
+    XLogBeginInsert(), and outside a critical section.
+
+void XLogRegisterBuffer(uint8 block_id, Buffer buf, uint8 flags);
+
+    XLogRegisterBuffer adds information about a data block to the WAL record.
+    block_id is an arbitrary number used to identify this page reference in
+    the redo routine.  The information needed to re-find the page at redo -
+    relfilenode, fork, and block number - are included in the WAL record.
+
+    XLogInsert will automatically include a full copy of the page contents, if
+    this is the first modification of the buffer since the last checkpoint.
+    It is important to register every buffer modified by the action with
+    XLogRegisterBuffer, to avoid torn-page hazards.
+
+    The flags control when and how the buffer contents are included in the
+    WAL record.  Normally, a full-page image is taken only if the page has not
+    been modified since the last checkpoint, and only if full_page_writes=on
+    or an online backup is in progress.  The REGBUF_FORCE_IMAGE flag can be
+    used to force a full-page image to always be included; that is useful
+    e.g. for an operation that rewrites most of the page, so that tracking the
+    details is not worth it.  For the rare case where it is not necessary to
+    protect from torn pages, REGBUF_NO_IMAGE flag can be used to suppress
+    full page image from being taken.  REGBUF_WILL_INIT also suppresses a full
+    page image, but the redo routine must re-generate the page from scratch,
+    without looking at the old page contents.  Re-initializing the page
+    protects from torn page hazards like a full page image does.
+
+    The REGBUF_STANDARD flag can be specified together with the other flags to
+    indicate that the page follows the standard page layout.  It causes the
+    area between pd_lower and pd_upper to be left out from the image, reducing
+    WAL volume.
+
+    If the REGBUF_KEEP_DATA flag is given, any per-buffer data registered with
+    XLogRegisterBufData() is included in the WAL record even if a full-page
+    image is taken.
+
+void XLogRegisterData(char *data, int len);
+
+    XLogRegisterData is used to include arbitrary data in the WAL record.  If
+    XLogRegisterData() is called multiple times, the data are appended, and
+    will be made available to the redo routine as one contiguous chunk.
+
+void XLogRegisterBufData(uint8 block_id, char *data, int len);
+
+    XLogRegisterBufData is used to include data associated with a particular
+    buffer that was registered earlier with XLogRegisterBuffer().  If
+    XLogRegisterBufData() is called multiple times with the same block ID, the
+    data are appended, and will be made available to the redo routine as one
+    contiguous chunk.
+
+    If a full-page image of the buffer is taken at insertion, the data is not
+    included in the WAL record, unless the REGBUF_KEEP_DATA flag is used.
+
+
+Writing a REDO routine
+----------------------
+
+A REDO routine uses the data and page references included in the WAL record
+to reconstruct the new state of the page.  The record decoding functions
+and macros in xlogreader.c/h can be used to extract the data from the record.
 
 When replaying a WAL record that describes changes on multiple pages, you
 must be careful to lock the pages properly to prevent concurrent Hot Standby
@@ -545,23 +613,6 @@ either an exclusive buffer lock or a shared lock plus buffer header lock,
 or be writing the data block directly rather than through shared buffers
 while holding AccessExclusiveLock on the relation.
 
-Due to all these constraints, complex changes (such as a multilevel index
-insertion) normally need to be described by a series of atomic-action WAL
-records. The intermediate states must be self-consistent, so that if the
-replay is interrupted between any two actions, the system is fully
-functional. In btree indexes, for example, a page split requires a new page
-to be allocated, and an insertion of a new key in the parent btree level,
-but for locking reasons this has to be reflected by two separate WAL
-records. Replaying the first record, to allocate the new page and move
-tuples to it, sets a flag on the page to indicate that the key has not been
-inserted to the parent yet. Replaying the second record clears the flag.
-This intermediate state is never seen by other backends during normal
-operation, because the lock on the child page is held across the two
-actions, but will be seen if the operation is interrupted before writing
-the second WAL record. The search algorithm works with the intermediate
-state as normal, but if an insertion encounters a page with the
-incomplete-split flag set, it will finish the interrupted split by
-inserting the key to the parent, before proceeding.
 
 Writing Hints
 -------------
index 5ee070bd0a980aa15716e452ae48a6f03c0b8730..313bd0424049a073038edf105bff837db0b1c472 100644 (file)
@@ -699,13 +699,9 @@ CLOGPagePrecedes(int page1, int page2)
 static void
 WriteZeroPageXlogRec(int pageno)
 {
-       XLogRecData rdata;
-
-       rdata.data = (char *) (&pageno);
-       rdata.len = sizeof(int);
-       rdata.buffer = InvalidBuffer;
-       rdata.next = NULL;
-       (void) XLogInsert(RM_CLOG_ID, CLOG_ZEROPAGE, &rdata);
+       XLogBeginInsert();
+       XLogRegisterData((char *) (&pageno), sizeof(int));
+       (void) XLogInsert(RM_CLOG_ID, CLOG_ZEROPAGE);
 }
 
 /*
@@ -717,14 +713,11 @@ WriteZeroPageXlogRec(int pageno)
 static void
 WriteTruncateXlogRec(int pageno)
 {
-       XLogRecData rdata;
        XLogRecPtr      recptr;
 
-       rdata.data = (char *) (&pageno);
-       rdata.len = sizeof(int);
-       rdata.buffer = InvalidBuffer;
-       rdata.next = NULL;
-       recptr = XLogInsert(RM_CLOG_ID, CLOG_TRUNCATE, &rdata);
+       XLogBeginInsert();
+       XLogRegisterData((char *) (&pageno), sizeof(int));
+       recptr = XLogInsert(RM_CLOG_ID, CLOG_TRUNCATE);
        XLogFlush(recptr);
 }
 
@@ -732,12 +725,12 @@ WriteTruncateXlogRec(int pageno)
  * CLOG resource manager's routines
  */
 void
-clog_redo(XLogRecPtr lsn, XLogRecord *record)
+clog_redo(XLogReaderState *record)
 {
-       uint8           info = record->xl_info & ~XLR_INFO_MASK;
+       uint8           info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
 
        /* Backup blocks are not used in clog records */
-       Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
+       Assert(!XLogRecHasAnyBlockRefs(record));
 
        if (info == CLOG_ZEROPAGE)
        {
index 3c20bb37e4c590c31b8e05a3f8efd297e44856ce..fff9f837330c2f62ca12c6eb1960e39c776ffc6f 100644 (file)
@@ -720,7 +720,6 @@ MultiXactIdCreateFromMembers(int nmembers, MultiXactMember *members)
 {
        MultiXactId multi;
        MultiXactOffset offset;
-       XLogRecData rdata[2];
        xl_multixact_create xlrec;
 
        debug_elog3(DEBUG2, "Create: %s",
@@ -796,17 +795,11 @@ MultiXactIdCreateFromMembers(int nmembers, MultiXactMember *members)
         * the status flags in one XLogRecData, then all the xids in another one?
         * Not clear that it's worth the trouble though.
         */
-       rdata[0].data = (char *) (&xlrec);
-       rdata[0].len = SizeOfMultiXactCreate;
-       rdata[0].buffer = InvalidBuffer;
-       rdata[0].next = &(rdata[1]);
+       XLogBeginInsert();
+       XLogRegisterData((char *) (&xlrec), SizeOfMultiXactCreate);
+       XLogRegisterData((char *) members, nmembers * sizeof(MultiXactMember));
 
-       rdata[1].data = (char *) members;
-       rdata[1].len = nmembers * sizeof(MultiXactMember);
-       rdata[1].buffer = InvalidBuffer;
-       rdata[1].next = NULL;
-
-       (void) XLogInsert(RM_MULTIXACT_ID, XLOG_MULTIXACT_CREATE_ID, rdata);
+       (void) XLogInsert(RM_MULTIXACT_ID, XLOG_MULTIXACT_CREATE_ID);
 
        /* Now enter the information into the OFFSETs and MEMBERs logs */
        RecordNewMultiXact(multi, offset, nmembers, members);
@@ -2705,25 +2698,21 @@ MultiXactOffsetPrecedes(MultiXactOffset offset1, MultiXactOffset offset2)
 static void
 WriteMZeroPageXlogRec(int pageno, uint8 info)
 {
-       XLogRecData rdata;
-
-       rdata.data = (char *) (&pageno);
-       rdata.len = sizeof(int);
-       rdata.buffer = InvalidBuffer;
-       rdata.next = NULL;
-       (void) XLogInsert(RM_MULTIXACT_ID, info, &rdata);
+       XLogBeginInsert();
+       XLogRegisterData((char *) (&pageno), sizeof(int));
+       (void) XLogInsert(RM_MULTIXACT_ID, info);
 }
 
 /*
  * MULTIXACT resource manager's routines
  */
 void
-multixact_redo(XLogRecPtr lsn, XLogRecord *record)
+multixact_redo(XLogReaderState *record)
 {
-       uint8           info = record->xl_info & ~XLR_INFO_MASK;
+       uint8           info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
 
        /* Backup blocks are not used in multixact records */
-       Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
+       Assert(!XLogRecHasAnyBlockRefs(record));
 
        if (info == XLOG_MULTIXACT_ZERO_OFF_PAGE)
        {
@@ -2775,7 +2764,7 @@ multixact_redo(XLogRecPtr lsn, XLogRecord *record)
                 * should be unnecessary, since any XID found here ought to have other
                 * evidence in the XLOG, but let's be safe.
                 */
-               max_xid = record->xl_xid;
+               max_xid = XLogRecGetXid(record);
                for (i = 0; i < xlrec->nmembers; i++)
                {
                        if (TransactionIdPrecedes(max_xid, xlrec->members[i].xid))
index d23c292edcdc3a398cd43a5fcac5fbd68a3edaf2..40de84e934ed28a49ba2dcb4e15f5ca893b56c60 100644 (file)
@@ -889,14 +889,21 @@ typedef struct TwoPhaseRecordOnDisk
 
 /*
  * During prepare, the state file is assembled in memory before writing it
- * to WAL and the actual state file.  We use a chain of XLogRecData blocks
- * so that we will be able to pass the state file contents directly to
- * XLogInsert.
+ * to WAL and the actual state file.  We use a chain of StateFileChunk blocks
+ * for that.
  */
+typedef struct StateFileChunk
+{
+       char       *data;
+       uint32          len;
+       struct StateFileChunk *next;
+} StateFileChunk;
+
 static struct xllist
 {
-       XLogRecData *head;                      /* first data block in the chain */
-       XLogRecData *tail;                      /* last block in chain */
+       StateFileChunk *head;           /* first data block in the chain */
+       StateFileChunk *tail;           /* last block in chain */
+       uint32          num_chunks;
        uint32          bytes_free;             /* free bytes left in tail block */
        uint32          total_len;              /* total data bytes in chain */
 }      records;
@@ -917,11 +924,11 @@ save_state_data(const void *data, uint32 len)
 
        if (padlen > records.bytes_free)
        {
-               records.tail->next = palloc0(sizeof(XLogRecData));
+               records.tail->next = palloc0(sizeof(StateFileChunk));
                records.tail = records.tail->next;
-               records.tail->buffer = InvalidBuffer;
                records.tail->len = 0;
                records.tail->next = NULL;
+               records.num_chunks++;
 
                records.bytes_free = Max(padlen, 512);
                records.tail->data = palloc(records.bytes_free);
@@ -951,8 +958,7 @@ StartPrepare(GlobalTransaction gxact)
        SharedInvalidationMessage *invalmsgs;
 
        /* Initialize linked list */
-       records.head = palloc0(sizeof(XLogRecData));
-       records.head->buffer = InvalidBuffer;
+       records.head = palloc0(sizeof(StateFileChunk));
        records.head->len = 0;
        records.head->next = NULL;
 
@@ -960,6 +966,7 @@ StartPrepare(GlobalTransaction gxact)
        records.head->data = palloc(records.bytes_free);
 
        records.tail = records.head;
+       records.num_chunks = 1;
 
        records.total_len = 0;
 
@@ -1019,7 +1026,7 @@ EndPrepare(GlobalTransaction gxact)
        TransactionId xid = pgxact->xid;
        TwoPhaseFileHeader *hdr;
        char            path[MAXPGPATH];
-       XLogRecData *record;
+       StateFileChunk *record;
        pg_crc32        statefile_crc;
        pg_crc32        bogus_crc;
        int                     fd;
@@ -1117,12 +1124,16 @@ EndPrepare(GlobalTransaction gxact)
         * We save the PREPARE record's location in the gxact for later use by
         * CheckPointTwoPhase.
         */
+       XLogEnsureRecordSpace(0, records.num_chunks);
+
        START_CRIT_SECTION();
 
        MyPgXact->delayChkpt = true;
 
-       gxact->prepare_lsn = XLogInsert(RM_XACT_ID, XLOG_XACT_PREPARE,
-                                                                       records.head);
+       XLogBeginInsert();
+       for (record = records.head; record != NULL; record = record->next)
+               XLogRegisterData(record->data, record->len);
+       gxact->prepare_lsn = XLogInsert(RM_XACT_ID, XLOG_XACT_PREPARE);
        XLogFlush(gxact->prepare_lsn);
 
        /* If we crash now, we have prepared: WAL replay will fix things */
@@ -1180,6 +1191,7 @@ EndPrepare(GlobalTransaction gxact)
        SyncRepWaitForLSN(gxact->prepare_lsn);
 
        records.tail = records.head = NULL;
+       records.num_chunks = 0;
 }
 
 /*
@@ -2071,8 +2083,6 @@ RecordTransactionCommitPrepared(TransactionId xid,
                                                                SharedInvalidationMessage *invalmsgs,
                                                                bool initfileinval)
 {
-       XLogRecData rdata[4];
-       int                     lastrdata = 0;
        xl_xact_commit_prepared xlrec;
        XLogRecPtr      recptr;
 
@@ -2094,39 +2104,24 @@ RecordTransactionCommitPrepared(TransactionId xid,
        xlrec.crec.nsubxacts = nchildren;
        xlrec.crec.nmsgs = ninvalmsgs;
 
-       rdata[0].data = (char *) (&xlrec);
-       rdata[0].len = MinSizeOfXactCommitPrepared;
-       rdata[0].buffer = InvalidBuffer;
+       XLogBeginInsert();
+       XLogRegisterData((char *) (&xlrec), MinSizeOfXactCommitPrepared);
+
        /* dump rels to delete */
        if (nrels > 0)
-       {
-               rdata[0].next = &(rdata[1]);
-               rdata[1].data = (char *) rels;
-               rdata[1].len = nrels * sizeof(RelFileNode);
-               rdata[1].buffer = InvalidBuffer;
-               lastrdata = 1;
-       }
+               XLogRegisterData((char *) rels, nrels * sizeof(RelFileNode));
+
        /* dump committed child Xids */
        if (nchildren > 0)
-       {
-               rdata[lastrdata].next = &(rdata[2]);
-               rdata[2].data = (char *) children;
-               rdata[2].len = nchildren * sizeof(TransactionId);
-               rdata[2].buffer = InvalidBuffer;
-               lastrdata = 2;
-       }
+               XLogRegisterData((char *) children,
+                                                nchildren * sizeof(TransactionId));
+
        /* dump cache invalidation messages */
        if (ninvalmsgs > 0)
-       {
-               rdata[lastrdata].next = &(rdata[3]);
-               rdata[3].data = (char *) invalmsgs;
-               rdata[3].len = ninvalmsgs * sizeof(SharedInvalidationMessage);
-               rdata[3].buffer = InvalidBuffer;
-               lastrdata = 3;
-       }
-       rdata[lastrdata].next = NULL;
+               XLogRegisterData((char *) invalmsgs,
+                                                ninvalmsgs * sizeof(SharedInvalidationMessage));
 
-       recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT_PREPARED, rdata);
+       recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT_PREPARED);
 
        /*
         * We don't currently try to sleep before flush here ... nor is there any
@@ -2169,8 +2164,6 @@ RecordTransactionAbortPrepared(TransactionId xid,
                                                           int nrels,
                                                           RelFileNode *rels)
 {
-       XLogRecData rdata[3];
-       int                     lastrdata = 0;
        xl_xact_abort_prepared xlrec;
        XLogRecPtr      recptr;
 
@@ -2189,30 +2182,20 @@ RecordTransactionAbortPrepared(TransactionId xid,
        xlrec.arec.xact_time = GetCurrentTimestamp();
        xlrec.arec.nrels = nrels;
        xlrec.arec.nsubxacts = nchildren;
-       rdata[0].data = (char *) (&xlrec);
-       rdata[0].len = MinSizeOfXactAbortPrepared;
-       rdata[0].buffer = InvalidBuffer;
+
+       XLogBeginInsert();
+       XLogRegisterData((char *) (&xlrec), MinSizeOfXactAbortPrepared);
+
        /* dump rels to delete */
        if (nrels > 0)
-       {
-               rdata[0].next = &(rdata[1]);
-               rdata[1].data = (char *) rels;
-               rdata[1].len = nrels * sizeof(RelFileNode);
-               rdata[1].buffer = InvalidBuffer;
-               lastrdata = 1;
-       }
+               XLogRegisterData((char *) rels, nrels * sizeof(RelFileNode));
+
        /* dump committed child Xids */
        if (nchildren > 0)
-       {
-               rdata[lastrdata].next = &(rdata[2]);
-               rdata[2].data = (char *) children;
-               rdata[2].len = nchildren * sizeof(TransactionId);
-               rdata[2].buffer = InvalidBuffer;
-               lastrdata = 2;
-       }
-       rdata[lastrdata].next = NULL;
+               XLogRegisterData((char *) children,
+                                                nchildren * sizeof(TransactionId));
 
-       recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_ABORT_PREPARED, rdata);
+       recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_ABORT_PREPARED);
 
        /* Always flush, since we're about to remove the 2PC state file */
        XLogFlush(recptr);
index 6f92bad07ca2698568946f0f5fc998794db7fd6e..763e9deb6f52bd32e6f80e5a22c3ef08f7eb9611 100644 (file)
@@ -571,7 +571,6 @@ AssignTransactionId(TransactionState s)
                if (nUnreportedXids >= PGPROC_MAX_CACHED_SUBXIDS ||
                        log_unknown_top)
                {
-                       XLogRecData rdata[2];
                        xl_xact_assignment xlrec;
 
                        /*
@@ -582,17 +581,12 @@ AssignTransactionId(TransactionState s)
                        Assert(TransactionIdIsValid(xlrec.xtop));
                        xlrec.nsubxacts = nUnreportedXids;
 
-                       rdata[0].data = (char *) &xlrec;
-                       rdata[0].len = MinSizeOfXactAssignment;
-                       rdata[0].buffer = InvalidBuffer;
-                       rdata[0].next = &rdata[1];
+                       XLogBeginInsert();
+                       XLogRegisterData((char *) &xlrec, MinSizeOfXactAssignment);
+                       XLogRegisterData((char *) unreportedXids,
+                                                        nUnreportedXids * sizeof(TransactionId));
 
-                       rdata[1].data = (char *) unreportedXids;
-                       rdata[1].len = nUnreportedXids * sizeof(TransactionId);
-                       rdata[1].buffer = InvalidBuffer;
-                       rdata[1].next = NULL;
-
-                       (void) XLogInsert(RM_XACT_ID, XLOG_XACT_ASSIGNMENT, rdata);
+                       (void) XLogInsert(RM_XACT_ID, XLOG_XACT_ASSIGNMENT);
 
                        nUnreportedXids = 0;
                        /* mark top, not current xact as having been logged */
@@ -1087,8 +1081,6 @@ RecordTransactionCommit(void)
                if (nrels > 0 || nmsgs > 0 || RelcacheInitFileInval || forceSyncCommit ||
                        XLogLogicalInfoActive())
                {
-                       XLogRecData rdata[4];
-                       int                     lastrdata = 0;
                        xl_xact_commit xlrec;
 
                        /*
@@ -1107,63 +1099,38 @@ RecordTransactionCommit(void)
                        xlrec.nrels = nrels;
                        xlrec.nsubxacts = nchildren;
                        xlrec.nmsgs = nmsgs;
-                       rdata[0].data = (char *) (&xlrec);
-                       rdata[0].len = MinSizeOfXactCommit;
-                       rdata[0].buffer = InvalidBuffer;
+
+                       XLogBeginInsert();
+                       XLogRegisterData((char *) (&xlrec), MinSizeOfXactCommit);
                        /* dump rels to delete */
                        if (nrels > 0)
-                       {
-                               rdata[0].next = &(rdata[1]);
-                               rdata[1].data = (char *) rels;
-                               rdata[1].len = nrels * sizeof(RelFileNode);
-                               rdata[1].buffer = InvalidBuffer;
-                               lastrdata = 1;
-                       }
+                               XLogRegisterData((char *) rels,
+                                                                nrels * sizeof(RelFileNode));
                        /* dump committed child Xids */
                        if (nchildren > 0)
-                       {
-                               rdata[lastrdata].next = &(rdata[2]);
-                               rdata[2].data = (char *) children;
-                               rdata[2].len = nchildren * sizeof(TransactionId);
-                               rdata[2].buffer = InvalidBuffer;
-                               lastrdata = 2;
-                       }
+                               XLogRegisterData((char *) children,
+                                                                nchildren * sizeof(TransactionId));
                        /* dump shared cache invalidation messages */
                        if (nmsgs > 0)
-                       {
-                               rdata[lastrdata].next = &(rdata[3]);
-                               rdata[3].data = (char *) invalMessages;
-                               rdata[3].len = nmsgs * sizeof(SharedInvalidationMessage);
-                               rdata[3].buffer = InvalidBuffer;
-                               lastrdata = 3;
-                       }
-                       rdata[lastrdata].next = NULL;
-
-                       (void) XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT, rdata);
+                               XLogRegisterData((char *) invalMessages,
+                                                                nmsgs * sizeof(SharedInvalidationMessage));
+                       (void) XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT);
                }
                else
                {
-                       XLogRecData rdata[2];
-                       int                     lastrdata = 0;
                        xl_xact_commit_compact xlrec;
 
                        xlrec.xact_time = xactStopTimestamp;
                        xlrec.nsubxacts = nchildren;
-                       rdata[0].data = (char *) (&xlrec);
-                       rdata[0].len = MinSizeOfXactCommitCompact;
-                       rdata[0].buffer = InvalidBuffer;
+
+                       XLogBeginInsert();
+                       XLogRegisterData((char *) (&xlrec), MinSizeOfXactCommitCompact);
                        /* dump committed child Xids */
                        if (nchildren > 0)
-                       {
-                               rdata[0].next = &(rdata[1]);
-                               rdata[1].data = (char *) children;
-                               rdata[1].len = nchildren * sizeof(TransactionId);
-                               rdata[1].buffer = InvalidBuffer;
-                               lastrdata = 1;
-                       }
-                       rdata[lastrdata].next = NULL;
+                               XLogRegisterData((char *) children,
+                                                                nchildren * sizeof(TransactionId));
 
-                       (void) XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT_COMPACT, rdata);
+                       (void) XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT_COMPACT);
                }
        }
 
@@ -1436,8 +1403,6 @@ RecordTransactionAbort(bool isSubXact)
        RelFileNode *rels;
        int                     nchildren;
        TransactionId *children;
-       XLogRecData rdata[3];
-       int                     lastrdata = 0;
        xl_xact_abort xlrec;
 
        /*
@@ -1486,30 +1451,20 @@ RecordTransactionAbort(bool isSubXact)
        }
        xlrec.nrels = nrels;
        xlrec.nsubxacts = nchildren;
-       rdata[0].data = (char *) (&xlrec);
-       rdata[0].len = MinSizeOfXactAbort;
-       rdata[0].buffer = InvalidBuffer;
+
+       XLogBeginInsert();
+       XLogRegisterData((char *) (&xlrec), MinSizeOfXactAbort);
+
        /* dump rels to delete */
        if (nrels > 0)
-       {
-               rdata[0].next = &(rdata[1]);
-               rdata[1].data = (char *) rels;
-               rdata[1].len = nrels * sizeof(RelFileNode);
-               rdata[1].buffer = InvalidBuffer;
-               lastrdata = 1;
-       }
+               XLogRegisterData((char *) rels, nrels * sizeof(RelFileNode));
+
        /* dump committed child Xids */
        if (nchildren > 0)
-       {
-               rdata[lastrdata].next = &(rdata[2]);
-               rdata[2].data = (char *) children;
-               rdata[2].len = nchildren * sizeof(TransactionId);
-               rdata[2].buffer = InvalidBuffer;
-               lastrdata = 2;
-       }
-       rdata[lastrdata].next = NULL;
+               XLogRegisterData((char *) children,
+                                                nchildren * sizeof(TransactionId));
 
-       (void) XLogInsert(RM_XACT_ID, XLOG_XACT_ABORT, rdata);
+       (void) XLogInsert(RM_XACT_ID, XLOG_XACT_ABORT);
 
        /*
         * Report the latest async abort LSN, so that the WAL writer knows to
@@ -2351,6 +2306,9 @@ AbortTransaction(void)
        AbortBufferIO();
        UnlockBuffers();
 
+       /* Reset WAL record construction state */
+       XLogResetInsertion();
+
        /*
         * Also clean up any open wait for lock, since the lock manager will choke
         * if we try to wait for another lock before doing this.
@@ -4299,6 +4257,9 @@ AbortSubTransaction(void)
        AbortBufferIO();
        UnlockBuffers();
 
+       /* Reset WAL record construction state */
+       XLogResetInsertion();
+
        /*
         * Also clean up any open wait for lock, since the lock manager will choke
         * if we try to wait for another lock before doing this.
@@ -4938,42 +4899,42 @@ xact_redo_abort(xl_xact_abort *xlrec, TransactionId xid)
 }
 
 void
-xact_redo(XLogRecPtr lsn, XLogRecord *record)
+xact_redo(XLogReaderState *record)
 {
-       uint8           info = record->xl_info & ~XLR_INFO_MASK;
+       uint8           info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
 
        /* Backup blocks are not used in xact records */
-       Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
+       Assert(!XLogRecHasAnyBlockRefs(record));
 
        if (info == XLOG_XACT_COMMIT_COMPACT)
        {
                xl_xact_commit_compact *xlrec = (xl_xact_commit_compact *) XLogRecGetData(record);
 
-               xact_redo_commit_compact(xlrec, record->xl_xid, lsn);
+               xact_redo_commit_compact(xlrec, XLogRecGetXid(record), record->EndRecPtr);
        }
        else if (info == XLOG_XACT_COMMIT)
        {
                xl_xact_commit *xlrec = (xl_xact_commit *) XLogRecGetData(record);
 
-               xact_redo_commit(xlrec, record->xl_xid, lsn);
+               xact_redo_commit(xlrec, XLogRecGetXid(record), record->EndRecPtr);
        }
        else if (info == XLOG_XACT_ABORT)
        {
                xl_xact_abort *xlrec = (xl_xact_abort *) XLogRecGetData(record);
 
-               xact_redo_abort(xlrec, record->xl_xid);
+               xact_redo_abort(xlrec, XLogRecGetXid(record));
        }
        else if (info == XLOG_XACT_PREPARE)
        {
                /* the record contents are exactly the 2PC file */
-               RecreateTwoPhaseFile(record->xl_xid,
-                                                        XLogRecGetData(record), record->xl_len);
+               RecreateTwoPhaseFile(XLogRecGetXid(record),
+                                                 XLogRecGetData(record), XLogRecGetDataLen(record));
        }
        else if (info == XLOG_XACT_COMMIT_PREPARED)
        {
                xl_xact_commit_prepared *xlrec = (xl_xact_commit_prepared *) XLogRecGetData(record);
 
-               xact_redo_commit(&xlrec->crec, xlrec->xid, lsn);
+               xact_redo_commit(&xlrec->crec, xlrec->xid, record->EndRecPtr);
                RemoveTwoPhaseFile(xlrec->xid, false);
        }
        else if (info == XLOG_XACT_ABORT_PREPARED)
index 60531277dc67b88cc0fdf4cfe6f6bd728d9cd3dd..2059bbeda4aad2c70d3574157845f7216c2fc2c9 100644 (file)
@@ -757,10 +757,10 @@ static MemoryContext walDebugCxt = NULL;
 
 static void readRecoveryCommandFile(void);
 static void exitArchiveRecovery(TimeLineID endTLI, XLogSegNo endLogSegNo);
-static bool recoveryStopsBefore(XLogRecord *record);
-static bool recoveryStopsAfter(XLogRecord *record);
+static bool recoveryStopsBefore(XLogReaderState *record);
+static bool recoveryStopsAfter(XLogReaderState *record);
 static void recoveryPausesHere(void);
-static bool recoveryApplyDelay(XLogRecord *record);
+static bool recoveryApplyDelay(XLogReaderState *record);
 static void SetLatestXTime(TimestampTz xtime);
 static void SetCurrentChunkStartTime(TimestampTz xtime);
 static void CheckRequiredParameterValues(void);
@@ -807,9 +807,9 @@ static char *str_time(pg_time_t tnow);
 static bool CheckForStandbyTrigger(void);
 
 #ifdef WAL_DEBUG
-static void xlog_outrec(StringInfo buf, XLogRecord *record);
+static void xlog_outrec(StringInfo buf, XLogReaderState *record);
 #endif
-static void xlog_outdesc(StringInfo buf, RmgrId rmid, XLogRecord *record);
+static void xlog_outdesc(StringInfo buf, XLogReaderState *record);
 static void pg_start_backup_callback(int code, Datum arg);
 static bool read_backup_label(XLogRecPtr *checkPointLoc,
                                  bool *backupEndRequired, bool *backupFromStandby);
@@ -861,7 +861,6 @@ XLogRecPtr
 XLogInsertRecord(XLogRecData *rdata, XLogRecPtr fpw_lsn)
 {
        XLogCtlInsert *Insert = &XLogCtl->Insert;
-       XLogRecData *rdt;
        pg_crc32        rdata_crc;
        bool            inserted;
        XLogRecord *rechdr = (XLogRecord *) rdata->data;
@@ -870,28 +869,13 @@ XLogInsertRecord(XLogRecData *rdata, XLogRecPtr fpw_lsn)
        XLogRecPtr      StartPos;
        XLogRecPtr      EndPos;
 
+       /* we assume that all of the record header is in the first chunk */
+       Assert(rdata->len >= SizeOfXLogRecord);
+
        /* cross-check on whether we should be here or not */
        if (!XLogInsertAllowed())
                elog(ERROR, "cannot make new WAL entries during recovery");
 
-       /*
-        * Calculate CRC of the data, including all the backup blocks
-        *
-        * Note that the record header isn't added into the CRC initially since we
-        * don't know the prev-link yet.  Thus, the CRC will represent the CRC of
-        * the whole record in the order: rdata, then backup blocks, then record
-        * header.
-        */
-       INIT_CRC32C(rdata_crc);
-       for (rdt = rdata->next; rdt != NULL; rdt = rdt->next)
-               COMP_CRC32C(rdata_crc, rdt->data, rdt->len);
-
-       /*
-        * Calculate CRC of the header, except for prev-link, because we don't
-        * know it yet.  It will be added later.
-        */
-       COMP_CRC32C(rdata_crc, ((char *) rechdr), offsetof(XLogRecord, xl_prev));
-
        /*----------
         *
         * We have now done all the preparatory work we can without holding a
@@ -976,10 +960,11 @@ XLogInsertRecord(XLogRecData *rdata, XLogRecPtr fpw_lsn)
        if (inserted)
        {
                /*
-                * Now that xl_prev has been filled in, finish CRC calculation of the
-                * record header.
+                * Now that xl_prev has been filled in, calculate CRC of the record
+                * header.
                 */
-               COMP_CRC32C(rdata_crc, ((char *) &rechdr->xl_prev), sizeof(XLogRecPtr));
+               rdata_crc = rechdr->xl_crc;
+               COMP_CRC32C(rdata_crc, rechdr, offsetof(XLogRecord, xl_crc));
                FIN_CRC32C(rdata_crc);
                rechdr->xl_crc = rdata_crc;
 
@@ -1053,34 +1038,47 @@ XLogInsertRecord(XLogRecData *rdata, XLogRecPtr fpw_lsn)
 #ifdef WAL_DEBUG
        if (XLOG_DEBUG)
        {
+               static XLogReaderState *debug_reader = NULL;
                StringInfoData buf;
-               MemoryContext oldCxt = MemoryContextSwitchTo(walDebugCxt);
+               StringInfoData recordBuf;
+               char       *errormsg = NULL;
+               MemoryContext oldCxt;
+
+               oldCxt = MemoryContextSwitchTo(walDebugCxt);
 
                initStringInfo(&buf);
                appendStringInfo(&buf, "INSERT @ %X/%X: ",
                                                 (uint32) (EndPos >> 32), (uint32) EndPos);
-               xlog_outrec(&buf, rechdr);
-               if (rdata->data != NULL)
-               {
-                       StringInfoData recordbuf;
 
-                       /*
-                        * We have to piece together the WAL record data from the
-                        * XLogRecData entries, so that we can pass it to the rm_desc
-                        * function as one contiguous chunk.
-                        */
-                       initStringInfo(&recordbuf);
-                       appendBinaryStringInfo(&recordbuf, (char *) rechdr, sizeof(XLogRecord));
-                       for (; rdata != NULL; rdata = rdata->next)
-                               appendBinaryStringInfo(&recordbuf, rdata->data, rdata->len);
+               /*
+                * We have to piece together the WAL record data from the XLogRecData
+                * entries, so that we can pass it to the rm_desc function as one
+                * contiguous chunk.
+                */
+               initStringInfo(&recordBuf);
+               for (; rdata != NULL; rdata = rdata->next)
+                       appendBinaryStringInfo(&recordBuf, rdata->data, rdata->len);
+
+               if (!debug_reader)
+                       debug_reader = XLogReaderAllocate(NULL, NULL);
 
+               if (!debug_reader ||
+                       !DecodeXLogRecord(debug_reader, (XLogRecord *) recordBuf.data,
+                                                         &errormsg))
+               {
+                       appendStringInfo(&buf, "error decoding record: %s",
+                                                        errormsg ? errormsg : "no error message");
+               }
+               else
+               {
                        appendStringInfoString(&buf, " - ");
-                       xlog_outdesc(&buf, rechdr->xl_rmid, (XLogRecord *) recordbuf.data);
+                       xlog_outdesc(&buf, debug_reader);
                }
                elog(LOG, "%s", buf.data);
 
+               pfree(buf.data);
+               pfree(recordBuf.data);
                MemoryContextSwitchTo(oldCxt);
-               MemoryContextReset(walDebugCxt);
        }
 #endif
 
@@ -1170,7 +1168,7 @@ ReserveXLogSwitch(XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
        uint64          startbytepos;
        uint64          endbytepos;
        uint64          prevbytepos;
-       uint32          size = SizeOfXLogRecord;
+       uint32          size = MAXALIGN(SizeOfXLogRecord);
        XLogRecPtr      ptr;
        uint32          segleft;
 
@@ -1234,9 +1232,6 @@ CopyXLogRecordToWAL(int write_len, bool isLogSwitch, XLogRecData *rdata,
        XLogRecPtr      CurrPos;
        XLogPageHeader pagehdr;
 
-       /* The first chunk is the record header */
-       Assert(rdata->len == SizeOfXLogRecord);
-
        /*
         * Get a pointer to the right place in the right WAL buffer to start
         * inserting to.
@@ -1309,9 +1304,6 @@ CopyXLogRecordToWAL(int write_len, bool isLogSwitch, XLogRecData *rdata,
        }
        Assert(written == write_len);
 
-       /* Align the end position, so that the next record starts aligned */
-       CurrPos = MAXALIGN64(CurrPos);
-
        /*
         * If this was an xlog-switch, it's not enough to write the switch record,
         * we also have to consume all the remaining space in the WAL segment. We
@@ -1341,6 +1333,11 @@ CopyXLogRecordToWAL(int write_len, bool isLogSwitch, XLogRecData *rdata,
                        CurrPos += XLOG_BLCKSZ;
                }
        }
+       else
+       {
+               /* Align the end position, so that the next record starts aligned */
+               CurrPos = MAXALIGN64(CurrPos);
+       }
 
        if (CurrPos != EndPos)
                elog(PANIC, "space reserved for WAL record does not match what was written");
@@ -4470,6 +4467,7 @@ BootStrapXLOG(void)
        XLogPageHeader page;
        XLogLongPageHeader longpage;
        XLogRecord *record;
+       char       *recptr;
        bool            use_existent;
        uint64          sysidentifier;
        struct timeval tv;
@@ -4541,17 +4539,23 @@ BootStrapXLOG(void)
        longpage->xlp_xlog_blcksz = XLOG_BLCKSZ;
 
        /* Insert the initial checkpoint record */
-       record = (XLogRecord *) ((char *) page + SizeOfXLogLongPHD);
+       recptr = ((char *) page + SizeOfXLogLongPHD);
+       record = (XLogRecord *) recptr;
        record->xl_prev = 0;
        record->xl_xid = InvalidTransactionId;
-       record->xl_tot_len = SizeOfXLogRecord + sizeof(checkPoint);
-       record->xl_len = sizeof(checkPoint);
+       record->xl_tot_len = SizeOfXLogRecord + SizeOfXLogRecordDataHeaderShort + sizeof(checkPoint);
        record->xl_info = XLOG_CHECKPOINT_SHUTDOWN;
        record->xl_rmid = RM_XLOG_ID;
-       memcpy(XLogRecGetData(record), &checkPoint, sizeof(checkPoint));
+       recptr += SizeOfXLogRecord;
+       /* fill the XLogRecordDataHeaderShort struct */
+       *(recptr++) = XLR_BLOCK_ID_DATA_SHORT;
+       *(recptr++) = sizeof(checkPoint);
+       memcpy(recptr, &checkPoint, sizeof(checkPoint));
+       recptr += sizeof(checkPoint);
+       Assert(recptr - (char *) record == record->xl_tot_len);
 
        INIT_CRC32C(crc);
-       COMP_CRC32C(crc, &checkPoint, sizeof(checkPoint));
+       COMP_CRC32C(crc, ((char *) record) + SizeOfXLogRecord, record->xl_tot_len - SizeOfXLogRecord);
        COMP_CRC32C(crc, (char *) record, offsetof(XLogRecord, xl_crc));
        FIN_CRC32C(crc);
        record->xl_crc = crc;
@@ -4984,36 +4988,37 @@ exitArchiveRecovery(TimeLineID endTLI, XLogSegNo endLogSegNo)
  * timestamps.
  */
 static bool
-getRecordTimestamp(XLogRecord *record, TimestampTz *recordXtime)
+getRecordTimestamp(XLogReaderState *record, TimestampTz *recordXtime)
 {
-       uint8           record_info = record->xl_info & ~XLR_INFO_MASK;
+       uint8           record_info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
+       uint8           rmid = XLogRecGetRmid(record);
 
-       if (record->xl_rmid == RM_XLOG_ID && record_info == XLOG_RESTORE_POINT)
+       if (rmid == RM_XLOG_ID && record_info == XLOG_RESTORE_POINT)
        {
                *recordXtime = ((xl_restore_point *) XLogRecGetData(record))->rp_time;
                return true;
        }
-       if (record->xl_rmid == RM_XACT_ID && record_info == XLOG_XACT_COMMIT_COMPACT)
+       if (rmid == RM_XACT_ID && record_info == XLOG_XACT_COMMIT_COMPACT)
        {
                *recordXtime = ((xl_xact_commit_compact *) XLogRecGetData(record))->xact_time;
                return true;
        }
-       if (record->xl_rmid == RM_XACT_ID && record_info == XLOG_XACT_COMMIT)
+       if (rmid == RM_XACT_ID && record_info == XLOG_XACT_COMMIT)
        {
                *recordXtime = ((xl_xact_commit *) XLogRecGetData(record))->xact_time;
                return true;
        }
-       if (record->xl_rmid == RM_XACT_ID && record_info == XLOG_XACT_COMMIT_PREPARED)
+       if (rmid == RM_XACT_ID && record_info == XLOG_XACT_COMMIT_PREPARED)
        {
                *recordXtime = ((xl_xact_commit_prepared *) XLogRecGetData(record))->crec.xact_time;
                return true;
        }
-       if (record->xl_rmid == RM_XACT_ID && record_info == XLOG_XACT_ABORT)
+       if (rmid == RM_XACT_ID && record_info == XLOG_XACT_ABORT)
        {
                *recordXtime = ((xl_xact_abort *) XLogRecGetData(record))->xact_time;
                return true;
        }
-       if (record->xl_rmid == RM_XACT_ID && record_info == XLOG_XACT_ABORT_PREPARED)
+       if (rmid == RM_XACT_ID && record_info == XLOG_XACT_ABORT_PREPARED)
        {
                *recordXtime = ((xl_xact_abort_prepared *) XLogRecGetData(record))->arec.xact_time;
                return true;
@@ -5030,7 +5035,7 @@ getRecordTimestamp(XLogRecord *record, TimestampTz *recordXtime)
  * new timeline's history file.
  */
 static bool
-recoveryStopsBefore(XLogRecord *record)
+recoveryStopsBefore(XLogReaderState *record)
 {
        bool            stopsHere = false;
        uint8           record_info;
@@ -5052,14 +5057,14 @@ recoveryStopsBefore(XLogRecord *record)
        }
 
        /* Otherwise we only consider stopping before COMMIT or ABORT records. */
-       if (record->xl_rmid != RM_XACT_ID)
+       if (XLogRecGetRmid(record) != RM_XACT_ID)
                return false;
-       record_info = record->xl_info & ~XLR_INFO_MASK;
+       record_info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
 
        if (record_info == XLOG_XACT_COMMIT_COMPACT || record_info == XLOG_XACT_COMMIT)
        {
                isCommit = true;
-               recordXid = record->xl_xid;
+               recordXid = XLogRecGetXid(record);
        }
        else if (record_info == XLOG_XACT_COMMIT_PREPARED)
        {
@@ -5069,7 +5074,7 @@ recoveryStopsBefore(XLogRecord *record)
        else if (record_info == XLOG_XACT_ABORT)
        {
                isCommit = false;
-               recordXid = record->xl_xid;
+               recordXid = XLogRecGetXid(record);
        }
        else if (record_info == XLOG_XACT_ABORT_PREPARED)
        {
@@ -5140,19 +5145,21 @@ recoveryStopsBefore(XLogRecord *record)
  * record in XLogCtl->recoveryLastXTime.
  */
 static bool
-recoveryStopsAfter(XLogRecord *record)
+recoveryStopsAfter(XLogReaderState *record)
 {
        uint8           record_info;
+       uint8           rmid;
        TimestampTz recordXtime;
 
-       record_info = record->xl_info & ~XLR_INFO_MASK;
+       record_info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
+       rmid = XLogRecGetRmid(record);
 
        /*
         * There can be many restore points that share the same name; we stop at
         * the first one.
         */
        if (recoveryTarget == RECOVERY_TARGET_NAME &&
-               record->xl_rmid == RM_XLOG_ID && record_info == XLOG_RESTORE_POINT)
+               rmid == RM_XLOG_ID && record_info == XLOG_RESTORE_POINT)
        {
                xl_restore_point *recordRestorePointData;
 
@@ -5173,7 +5180,7 @@ recoveryStopsAfter(XLogRecord *record)
                }
        }
 
-       if (record->xl_rmid == RM_XACT_ID &&
+       if (rmid == RM_XACT_ID &&
                (record_info == XLOG_XACT_COMMIT_COMPACT ||
                 record_info == XLOG_XACT_COMMIT ||
                 record_info == XLOG_XACT_COMMIT_PREPARED ||
@@ -5192,7 +5199,7 @@ recoveryStopsAfter(XLogRecord *record)
                else if (record_info == XLOG_XACT_ABORT_PREPARED)
                        recordXid = ((xl_xact_abort_prepared *) XLogRecGetData(record))->xid;
                else
-                       recordXid = record->xl_xid;
+                       recordXid = XLogRecGetXid(record);
 
                /*
                 * There can be only one transaction end record with this exact
@@ -5307,7 +5314,7 @@ SetRecoveryPause(bool recoveryPause)
  * usability.
  */
 static bool
-recoveryApplyDelay(XLogRecord *record)
+recoveryApplyDelay(XLogReaderState *record)
 {
        uint8           record_info;
        TimestampTz xtime;
@@ -5326,8 +5333,8 @@ recoveryApplyDelay(XLogRecord *record)
         * so there is already opportunity for issues caused by early conflicts on
         * standbys.
         */
-       record_info = record->xl_info & ~XLR_INFO_MASK;
-       if (!(record->xl_rmid == RM_XACT_ID &&
+       record_info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
+       if (!(XLogRecGetRmid(record) == RM_XACT_ID &&
                  (record_info == XLOG_XACT_COMMIT_COMPACT ||
                   record_info == XLOG_XACT_COMMIT ||
                   record_info == XLOG_XACT_COMMIT_PREPARED)))
@@ -5696,7 +5703,7 @@ StartupXLOG(void)
                record = ReadCheckpointRecord(xlogreader, checkPointLoc, 0, true);
                if (record != NULL)
                {
-                       memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint));
+                       memcpy(&checkPoint, XLogRecGetData(xlogreader), sizeof(CheckPoint));
                        wasShutdown = (record->xl_info == XLOG_CHECKPOINT_SHUTDOWN);
                        ereport(DEBUG1,
                                        (errmsg("checkpoint record is at %X/%X",
@@ -5793,7 +5800,7 @@ StartupXLOG(void)
                                ereport(PANIC,
                                         (errmsg("could not locate a valid checkpoint record")));
                }
-               memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint));
+               memcpy(&checkPoint, XLogRecGetData(xlogreader), sizeof(CheckPoint));
                wasShutdown = (record->xl_info == XLOG_CHECKPOINT_SHUTDOWN);
        }
 
@@ -6230,9 +6237,9 @@ StartupXLOG(void)
                                        appendStringInfo(&buf, "REDO @ %X/%X; LSN %X/%X: ",
                                                        (uint32) (ReadRecPtr >> 32), (uint32) ReadRecPtr,
                                                         (uint32) (EndRecPtr >> 32), (uint32) EndRecPtr);
-                                       xlog_outrec(&buf, record);
+                                       xlog_outrec(&buf, xlogreader);
                                        appendStringInfoString(&buf, " - ");
-                                       xlog_outdesc(&buf, record->xl_rmid, record);
+                                       xlog_outdesc(&buf, xlogreader);
                                        elog(LOG, "%s", buf.data);
                                        pfree(buf.data);
                                }
@@ -6260,7 +6267,7 @@ StartupXLOG(void)
                                /*
                                 * Have we reached our recovery target?
                                 */
-                               if (recoveryStopsBefore(record))
+                               if (recoveryStopsBefore(xlogreader))
                                {
                                        reachedStopPoint = true;        /* see below */
                                        break;
@@ -6270,7 +6277,7 @@ StartupXLOG(void)
                                 * If we've been asked to lag the master, wait on latch until
                                 * enough time has passed.
                                 */
-                               if (recoveryApplyDelay(record))
+                               if (recoveryApplyDelay(xlogreader))
                                {
                                        /*
                                         * We test for paused recovery again here. If user sets
@@ -6285,7 +6292,7 @@ StartupXLOG(void)
 
                                /* Setup error traceback support for ereport() */
                                errcallback.callback = rm_redo_error_callback;
-                               errcallback.arg = (void *) record;
+                               errcallback.arg = (void *) xlogreader;
                                errcallback.previous = error_context_stack;
                                error_context_stack = &errcallback;
 
@@ -6324,7 +6331,7 @@ StartupXLOG(void)
                                        {
                                                CheckPoint      checkPoint;
 
-                                               memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint));
+                                               memcpy(&checkPoint, XLogRecGetData(xlogreader), sizeof(CheckPoint));
                                                newTLI = checkPoint.ThisTimeLineID;
                                                prevTLI = checkPoint.PrevTimeLineID;
                                        }
@@ -6332,7 +6339,7 @@ StartupXLOG(void)
                                        {
                                                xl_end_of_recovery xlrec;
 
-                                               memcpy(&xlrec, XLogRecGetData(record), sizeof(xl_end_of_recovery));
+                                               memcpy(&xlrec, XLogRecGetData(xlogreader), sizeof(xl_end_of_recovery));
                                                newTLI = xlrec.ThisTimeLineID;
                                                prevTLI = xlrec.PrevTimeLineID;
                                        }
@@ -6366,7 +6373,7 @@ StartupXLOG(void)
                                        RecordKnownAssignedTransactionIds(record->xl_xid);
 
                                /* Now apply the WAL record itself */
-                               RmgrTable[record->xl_rmid].rm_redo(EndRecPtr, record);
+                               RmgrTable[record->xl_rmid].rm_redo(xlogreader);
 
                                /* Pop the error context stack */
                                error_context_stack = errcallback.previous;
@@ -6394,7 +6401,7 @@ StartupXLOG(void)
                                        WalSndWakeup();
 
                                /* Exit loop if we reached inclusive recovery target */
-                               if (recoveryStopsAfter(record))
+                               if (recoveryStopsAfter(xlogreader))
                                {
                                        reachedStopPoint = true;
                                        break;
@@ -7148,8 +7155,7 @@ ReadCheckpointRecord(XLogReaderState *xlogreader, XLogRecPtr RecPtr,
                }
                return NULL;
        }
-       if (record->xl_len != sizeof(CheckPoint) ||
-               record->xl_tot_len != SizeOfXLogRecord + sizeof(CheckPoint))
+       if (record->xl_tot_len != SizeOfXLogRecord + SizeOfXLogRecordDataHeaderShort + sizeof(CheckPoint))
        {
                switch (whichChkpt)
                {
@@ -7194,6 +7200,9 @@ InitXLOGAccess(void)
        (void) GetRedoRecPtr();
        /* Also update our copy of doPageWrites. */
        doPageWrites = (Insert->fullPageWrites || Insert->forcePageWrites);
+
+       /* Also initialize the working areas for constructing WAL records */
+       InitXLogInsert();
 }
 
 /*
@@ -7490,7 +7499,6 @@ CreateCheckPoint(int flags)
        CheckPoint      checkPoint;
        XLogRecPtr      recptr;
        XLogCtlInsert *Insert = &XLogCtl->Insert;
-       XLogRecData rdata;
        uint32          freespace;
        XLogSegNo       _logSegNo;
        XLogRecPtr      curInsert;
@@ -7760,15 +7768,11 @@ CreateCheckPoint(int flags)
        /*
         * Now insert the checkpoint record into XLOG.
         */
-       rdata.data = (char *) (&checkPoint);
-       rdata.len = sizeof(checkPoint);
-       rdata.buffer = InvalidBuffer;
-       rdata.next = NULL;
-
+       XLogBeginInsert();
+       XLogRegisterData((char *) (&checkPoint), sizeof(checkPoint));
        recptr = XLogInsert(RM_XLOG_ID,
                                                shutdown ? XLOG_CHECKPOINT_SHUTDOWN :
-                                               XLOG_CHECKPOINT_ONLINE,
-                                               &rdata);
+                                               XLOG_CHECKPOINT_ONLINE);
 
        XLogFlush(recptr);
 
@@ -7908,7 +7912,6 @@ static void
 CreateEndOfRecoveryRecord(void)
 {
        xl_end_of_recovery xlrec;
-       XLogRecData rdata;
        XLogRecPtr      recptr;
 
        /* sanity check */
@@ -7926,12 +7929,9 @@ CreateEndOfRecoveryRecord(void)
 
        START_CRIT_SECTION();
 
-       rdata.data = (char *) &xlrec;
-       rdata.len = sizeof(xl_end_of_recovery);
-       rdata.buffer = InvalidBuffer;
-       rdata.next = NULL;
-
-       recptr = XLogInsert(RM_XLOG_ID, XLOG_END_OF_RECOVERY, &rdata);
+       XLogBeginInsert();
+       XLogRegisterData((char *) &xlrec, sizeof(xl_end_of_recovery));
+       recptr = XLogInsert(RM_XLOG_ID, XLOG_END_OF_RECOVERY);
 
        XLogFlush(recptr);
 
@@ -8307,13 +8307,9 @@ KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo)
 void
 XLogPutNextOid(Oid nextOid)
 {
-       XLogRecData rdata;
-
-       rdata.data = (char *) (&nextOid);
-       rdata.len = sizeof(Oid);
-       rdata.buffer = InvalidBuffer;
-       rdata.next = NULL;
-       (void) XLogInsert(RM_XLOG_ID, XLOG_NEXTOID, &rdata);
+       XLogBeginInsert();
+       XLogRegisterData((char *) (&nextOid), sizeof(Oid));
+       (void) XLogInsert(RM_XLOG_ID, XLOG_NEXTOID);
 
        /*
         * We need not flush the NEXTOID record immediately, because any of the
@@ -8349,15 +8345,10 @@ XLogRecPtr
 RequestXLogSwitch(void)
 {
        XLogRecPtr      RecPtr;
-       XLogRecData rdata;
-
-       /* XLOG SWITCH, alone among xlog record types, has no data */
-       rdata.buffer = InvalidBuffer;
-       rdata.data = NULL;
-       rdata.len = 0;
-       rdata.next = NULL;
 
-       RecPtr = XLogInsert(RM_XLOG_ID, XLOG_SWITCH, &rdata);
+       /* XLOG SWITCH has no data */
+       XLogBeginInsert();
+       RecPtr = XLogInsert(RM_XLOG_ID, XLOG_SWITCH);
 
        return RecPtr;
 }
@@ -8369,18 +8360,15 @@ XLogRecPtr
 XLogRestorePoint(const char *rpName)
 {
        XLogRecPtr      RecPtr;
-       XLogRecData rdata;
        xl_restore_point xlrec;
 
        xlrec.rp_time = GetCurrentTimestamp();
        strlcpy(xlrec.rp_name, rpName, MAXFNAMELEN);
 
-       rdata.buffer = InvalidBuffer;
-       rdata.data = (char *) &xlrec;
-       rdata.len = sizeof(xl_restore_point);
-       rdata.next = NULL;
+       XLogBeginInsert();
+       XLogRegisterData((char *) &xlrec, sizeof(xl_restore_point));
 
-       RecPtr = XLogInsert(RM_XLOG_ID, XLOG_RESTORE_POINT, &rdata);
+       RecPtr = XLogInsert(RM_XLOG_ID, XLOG_RESTORE_POINT);
 
        ereport(LOG,
                        (errmsg("restore point \"%s\" created at %X/%X",
@@ -8412,7 +8400,6 @@ XLogReportParameters(void)
                 */
                if (wal_level != ControlFile->wal_level || XLogIsNeeded())
                {
-                       XLogRecData rdata;
                        xl_parameter_change xlrec;
                        XLogRecPtr      recptr;
 
@@ -8423,12 +8410,10 @@ XLogReportParameters(void)
                        xlrec.wal_level = wal_level;
                        xlrec.wal_log_hints = wal_log_hints;
 
-                       rdata.buffer = InvalidBuffer;
-                       rdata.data = (char *) &xlrec;
-                       rdata.len = sizeof(xlrec);
-                       rdata.next = NULL;
+                       XLogBeginInsert();
+                       XLogRegisterData((char *) &xlrec, sizeof(xlrec));
 
-                       recptr = XLogInsert(RM_XLOG_ID, XLOG_PARAMETER_CHANGE, &rdata);
+                       recptr = XLogInsert(RM_XLOG_ID, XLOG_PARAMETER_CHANGE);
                        XLogFlush(recptr);
                }
 
@@ -8486,14 +8471,10 @@ UpdateFullPageWrites(void)
         */
        if (XLogStandbyInfoActive() && !RecoveryInProgress())
        {
-               XLogRecData rdata;
-
-               rdata.data = (char *) (&fullPageWrites);
-               rdata.len = sizeof(bool);
-               rdata.buffer = InvalidBuffer;
-               rdata.next = NULL;
+               XLogBeginInsert();
+               XLogRegisterData((char *) (&fullPageWrites), sizeof(bool));
 
-               XLogInsert(RM_XLOG_ID, XLOG_FPW_CHANGE, &rdata);
+               XLogInsert(RM_XLOG_ID, XLOG_FPW_CHANGE);
        }
 
        if (!fullPageWrites)
@@ -8558,12 +8539,13 @@ checkTimeLineSwitch(XLogRecPtr lsn, TimeLineID newTLI, TimeLineID prevTLI)
  * not all record types are related to control file updates.
  */
 void
-xlog_redo(XLogRecPtr lsn, XLogRecord *record)
+xlog_redo(XLogReaderState *record)
 {
-       uint8           info = record->xl_info & ~XLR_INFO_MASK;
+       uint8           info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
+       XLogRecPtr      lsn = record->EndRecPtr;
 
-       /* Backup blocks are not used by XLOG rmgr */
-       Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
+       /* in XLOG rmgr, backup blocks are only used by XLOG_FPI records */
+       Assert(!XLogRecHasAnyBlockRefs(record) || info == XLOG_FPI);
 
        if (info == XLOG_NEXTOID)
        {
@@ -8750,14 +8732,12 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
        }
        else if (info == XLOG_FPI)
        {
-               char       *data;
-               BkpBlock        bkpb;
+               Buffer          buffer;
 
                /*
-                * Full-page image (FPI) records contain a backup block stored
-                * "inline" in the normal data since the locking when writing hint
-                * records isn't sufficient to use the normal backup block mechanism,
-                * which assumes exclusive lock on the buffer supplied.
+                * Full-page image (FPI) records contain nothing else but a backup
+                * block. The block reference must include a full-page image -
+                * otherwise there would be no point in this record.
                 *
                 * Since the only change in these backup block are hint bits, there
                 * are no recovery conflicts generated.
@@ -8766,11 +8746,9 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
                 * smgr implementation has no need to implement anything. Which means
                 * nothing is needed in md.c etc
                 */
-               data = XLogRecGetData(record);
-               memcpy(&bkpb, data, sizeof(BkpBlock));
-               data += sizeof(BkpBlock);
-
-               RestoreBackupBlockContents(lsn, bkpb, data, false, false);
+               if (XLogReadBufferForRedo(record, 0, &buffer) != BLK_RESTORED)
+                       elog(ERROR, "unexpected XLogReadBufferForRedo result when restoring backup block");
+               UnlockReleaseBuffer(buffer);
        }
        else if (info == XLOG_BACKUP_END)
        {
@@ -8867,22 +8845,42 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
 #ifdef WAL_DEBUG
 
 static void
-xlog_outrec(StringInfo buf, XLogRecord *record)
+xlog_outrec(StringInfo buf, XLogReaderState *record)
 {
-       int                     i;
+       int                     block_id;
 
        appendStringInfo(buf, "prev %X/%X; xid %u",
-                                        (uint32) (record->xl_prev >> 32),
-                                        (uint32) record->xl_prev,
-                                        record->xl_xid);
+                                        (uint32) (XLogRecGetPrev(record) >> 32),
+                                        (uint32) XLogRecGetPrev(record),
+                                        XLogRecGetXid(record));
 
        appendStringInfo(buf, "; len %u",
-                                        record->xl_len);
+                                        XLogRecGetDataLen(record));
 
-       for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++)
+       /* decode block references */
+       for (block_id = 0; block_id <= record->max_block_id; block_id++)
        {
-               if (record->xl_info & XLR_BKP_BLOCK(i))
-                       appendStringInfo(buf, "; bkpb%d", i);
+               RelFileNode rnode;
+               ForkNumber      forknum;
+               BlockNumber blk;
+
+               if (!XLogRecHasBlockRef(record, block_id))
+                       continue;
+
+               XLogRecGetBlockTag(record, block_id, &rnode, &forknum, &blk);
+               if (forknum != MAIN_FORKNUM)
+                       appendStringInfo(buf, "; blkref #%u: rel %u/%u/%u, fork %u, blk %u",
+                                                        block_id,
+                                                        rnode.spcNode, rnode.dbNode, rnode.relNode,
+                                                        forknum,
+                                                        blk);
+               else
+                       appendStringInfo(buf, "; blkref #%u: rel %u/%u/%u, blk %u",
+                                                        block_id,
+                                                        rnode.spcNode, rnode.dbNode, rnode.relNode,
+                                                        blk);
+               if (XLogRecHasBlockImage(record, block_id))
+                       appendStringInfo(buf, " FPW");
        }
 }
 #endif   /* WAL_DEBUG */
@@ -8892,17 +8890,18 @@ xlog_outrec(StringInfo buf, XLogRecord *record)
  * optionally followed by a colon, a space, and a further description.
  */
 static void
-xlog_outdesc(StringInfo buf, RmgrId rmid, XLogRecord *record)
+xlog_outdesc(StringInfo buf, XLogReaderState *record)
 {
+       RmgrId          rmid = XLogRecGetRmid(record);
+       uint8           info = XLogRecGetInfo(record);
        const char *id;
 
        appendStringInfoString(buf, RmgrTable[rmid].rm_name);
        appendStringInfoChar(buf, '/');
 
-       id = RmgrTable[rmid].rm_identify(record->xl_info);
+       id = RmgrTable[rmid].rm_identify(info);
        if (id == NULL)
-               appendStringInfo(buf, "UNKNOWN (%X): ",
-                                                record->xl_info & ~XLR_INFO_MASK);
+               appendStringInfo(buf, "UNKNOWN (%X): ", info & ~XLR_INFO_MASK);
        else
                appendStringInfo(buf, "%s: ", id);
 
@@ -9411,7 +9410,6 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p)
        XLogRecPtr      startpoint;
        XLogRecPtr      stoppoint;
        TimeLineID      stoptli;
-       XLogRecData rdata;
        pg_time_t       stamp_time;
        char            strfbuf[128];
        char            histfilepath[MAXPGPATH];
@@ -9618,11 +9616,9 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p)
        /*
         * Write the backup-end xlog record
         */
-       rdata.data = (char *) (&startpoint);
-       rdata.len = sizeof(startpoint);
-       rdata.buffer = InvalidBuffer;
-       rdata.next = NULL;
-       stoppoint = XLogInsert(RM_XLOG_ID, XLOG_BACKUP_END, &rdata);
+       XLogBeginInsert();
+       XLogRegisterData((char *) (&startpoint), sizeof(startpoint));
+       stoppoint = XLogInsert(RM_XLOG_ID, XLOG_BACKUP_END);
        stoptli = ThisTimeLineID;
 
        /*
@@ -9930,15 +9926,13 @@ read_backup_label(XLogRecPtr *checkPointLoc, bool *backupEndRequired,
 static void
 rm_redo_error_callback(void *arg)
 {
-       XLogRecord *record = (XLogRecord *) arg;
+       XLogReaderState *record = (XLogReaderState *) arg;
        StringInfoData buf;
 
        initStringInfo(&buf);
-       xlog_outdesc(&buf, record->xl_rmid, record);
+       xlog_outdesc(&buf, record);
 
-       /* don't bother emitting empty description */
-       if (buf.len > 0)
-               errcontext("xlog redo %s", buf.data);
+       errcontext("xlog redo %s", buf.data);
 
        pfree(buf.data);
 }
index b83343bf5bdd26f94845de25c69a775e41f66bca..89c407e521bed5f01502364914e1f403b7db5cd1 100644 (file)
@@ -3,6 +3,12 @@
  * xloginsert.c
  *             Functions for constructing WAL records
  *
+ * Constructing a WAL record begins with a call to XLogBeginInsert,
+ * followed by a number of XLogRegister* calls. The registered data is
+ * collected in private working memory, and finally assembled into a chain
+ * of XLogRecData structs by a call to XLogRecordAssemble(). See
+ * access/transam/README for details.
+ *
  * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
 #include "utils/memutils.h"
 #include "pg_trace.h"
 
+/*
+ * For each block reference registered with XLogRegisterBuffer, we fill in
+ * a registered_buffer struct.
+ */
+typedef struct
+{
+       bool            in_use;                 /* is this slot in use? */
+       uint8           flags;                  /* REGBUF_* flags */
+       RelFileNode rnode;                      /* identifies the relation and block */
+       ForkNumber      forkno;
+       BlockNumber block;
+       Page            page;                   /* page content */
+       uint32          rdata_len;              /* total length of data in rdata chain */
+       XLogRecData *rdata_head;        /* head of the chain of data registered with
+                                                                * this block */
+       XLogRecData *rdata_tail;        /* last entry in the chain, or &rdata_head if
+                                                                * empty */
+
+       XLogRecData bkp_rdatas[2];      /* temporary rdatas used to hold references to
+                                                                * backup block data in XLogRecordAssemble() */
+}      registered_buffer;
+
+static registered_buffer *registered_buffers;
+static int     max_registered_buffers;         /* allocated size */
+static int     max_registered_block_id = 0;            /* highest block_id + 1
+                                                                                                * currently registered */
+
+/*
+ * A chain of XLogRecDatas to hold the "main data" of a WAL record, registered
+ * with XLogRegisterData(...).
+ */
+static XLogRecData *mainrdata_head;
+static XLogRecData *mainrdata_last = (XLogRecData *) &mainrdata_head;
+static uint32 mainrdata_len;   /* total # of bytes in chain */
+
+/*
+ * These are used to hold the record header while constructing a record.
+ * 'hdr_scratch' is not a plain variable, but is palloc'd at initialization,
+ * because we want it to be MAXALIGNed and padding bytes zeroed.
+ *
+ * For simplicity, it's allocated large enough to hold the headers for any
+ * WAL record.
+ */
+static XLogRecData hdr_rdt;
+static char *hdr_scratch = NULL;
+
+#define HEADER_SCRATCH_SIZE \
+       (SizeOfXLogRecord + \
+        MaxSizeOfXLogRecordBlockHeader * (XLR_MAX_BLOCK_ID + 1) + \
+        SizeOfXLogRecordDataHeaderLong)
+
+/*
+ * An array of XLogRecData structs, to hold registered data.
+ */
+static XLogRecData *rdatas;
+static int     num_rdatas;                     /* entries currently used */
+static int     max_rdatas;                     /* allocated size */
+
+static bool begininsert_called = false;
+
+/* Memory context to hold the registered buffer and data references. */
+static MemoryContext xloginsert_cxt;
+
 static XLogRecData *XLogRecordAssemble(RmgrId rmid, uint8 info,
-                                  XLogRecData *rdata,
                                   XLogRecPtr RedoRecPtr, bool doPageWrites,
-                                  XLogRecPtr *fpw_lsn, XLogRecData **rdt_lastnormal);
-static void XLogFillBkpBlock(Buffer buffer, bool buffer_std, BkpBlock *bkpb);
+                                  XLogRecPtr *fpw_lsn);
+
+/*
+ * Begin constructing a WAL record. This must be called before the
+ * XLogRegister* functions and XLogInsert().
+ */
+void
+XLogBeginInsert(void)
+{
+       Assert(max_registered_block_id == 0);
+       Assert(mainrdata_last == (XLogRecData *) &mainrdata_head);
+       Assert(mainrdata_len == 0);
+       Assert(!begininsert_called);
+
+       /* cross-check on whether we should be here or not */
+       if (!XLogInsertAllowed())
+               elog(ERROR, "cannot make new WAL entries during recovery");
+
+       begininsert_called = true;
+}
 
 /*
- * Insert an XLOG record having the specified RMID and info bytes,
- * with the body of the record being the data chunk(s) described by
- * the rdata chain (see xloginsert.h for notes about rdata).
+ * Ensure that there are enough buffer and data slots in the working area,
+ * for subsequent XLogRegisterBuffer, XLogRegisterData and XLogRegisterBufData
+ * calls.
+ *
+ * There is always space for a small number of buffers and data chunks, enough
+ * for most record types. This function is for the exceptional cases that need
+ * more.
+ */
+void
+XLogEnsureRecordSpace(int max_block_id, int ndatas)
+{
+       int                     nbuffers;
+
+       /*
+        * This must be called before entering a critical section, because
+        * allocating memory inside a critical section can fail. repalloc() will
+        * check the same, but better to check it here too so that we fail
+        * consistently even if the arrays happen to be large enough already.
+        */
+       Assert(CritSectionCount == 0);
+
+       /* the minimum values can't be decreased */
+       if (max_block_id < XLR_NORMAL_MAX_BLOCK_ID)
+               max_block_id = XLR_NORMAL_MAX_BLOCK_ID;
+       if (ndatas < XLR_NORMAL_RDATAS)
+               ndatas = XLR_NORMAL_RDATAS;
+
+       if (max_block_id > XLR_MAX_BLOCK_ID)
+               elog(ERROR, "maximum number of WAL record block references exceeded");
+       nbuffers = max_block_id + 1;
+
+       if (nbuffers > max_registered_buffers)
+       {
+               registered_buffers = (registered_buffer *)
+                       repalloc(registered_buffers, sizeof(registered_buffer) * nbuffers);
+
+               /*
+                * At least the padding bytes in the structs must be zeroed, because
+                * they are included in WAL data, but initialize it all for tidiness.
+                */
+               MemSet(&registered_buffers[max_registered_buffers], 0,
+                       (nbuffers - max_registered_buffers) * sizeof(registered_buffer));
+               max_registered_buffers = nbuffers;
+       }
+
+       if (ndatas > max_rdatas)
+       {
+               rdatas = (XLogRecData *) repalloc(rdatas, sizeof(XLogRecData) * ndatas);
+               max_rdatas = ndatas;
+       }
+}
+
+/*
+ * Reset WAL record construction buffers.
+ */
+void
+XLogResetInsertion(void)
+{
+       int                     i;
+
+       for (i = 0; i < max_registered_block_id; i++)
+               registered_buffers[i].in_use = false;
+
+       num_rdatas = 0;
+       max_registered_block_id = 0;
+       mainrdata_len = 0;
+       mainrdata_last = (XLogRecData *) &mainrdata_head;
+       begininsert_called = false;
+}
+
+/*
+ * Register a reference to a buffer with the WAL record being constructed.
+ * This must be called for every page that the WAL-logged operation modifies.
+ */
+void
+XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
+{
+       registered_buffer *regbuf;
+
+       /* NO_IMAGE doesn't make sense with FORCE_IMAGE */
+       Assert(!((flags & REGBUF_FORCE_IMAGE) && (flags & (REGBUF_NO_IMAGE))));
+       Assert(begininsert_called);
+
+       if (block_id >= max_registered_block_id)
+       {
+               if (block_id >= max_registered_buffers)
+                       elog(ERROR, "too many registered buffers");
+               max_registered_block_id = block_id + 1;
+       }
+
+       regbuf = &registered_buffers[block_id];
+
+       BufferGetTag(buffer, &regbuf->rnode, &regbuf->forkno, &regbuf->block);
+       regbuf->page = BufferGetPage(buffer);
+       regbuf->flags = flags;
+       regbuf->rdata_tail = (XLogRecData *) &regbuf->rdata_head;
+       regbuf->rdata_len = 0;
+
+       /*
+        * Check that this page hasn't already been registered with some other
+        * block_id.
+        */
+#ifdef USE_ASSERT_CHECKING
+       {
+               int                     i;
+
+               for (i = 0; i < max_registered_block_id; i++)
+               {
+                       registered_buffer *regbuf_old = &registered_buffers[i];
+
+                       if (i == block_id || !regbuf_old->in_use)
+                               continue;
+
+                       Assert(!RelFileNodeEquals(regbuf_old->rnode, regbuf->rnode) ||
+                                  regbuf_old->forkno != regbuf->forkno ||
+                                  regbuf_old->block != regbuf->block);
+               }
+       }
+#endif
+
+       regbuf->in_use = true;
+}
+
+/*
+ * Like XLogRegisterBuffer, but for registering a block that's not in the
+ * shared buffer pool (i.e. when you don't have a Buffer for it).
+ */
+void
+XLogRegisterBlock(uint8 block_id, RelFileNode *rnode, ForkNumber forknum,
+                                 BlockNumber blknum, Page page, uint8 flags)
+{
+       registered_buffer *regbuf;
+
+       /* This is currently only used to WAL-log a full-page image of a page */
+       Assert(flags & REGBUF_FORCE_IMAGE);
+       Assert(begininsert_called);
+
+       if (block_id >= max_registered_block_id)
+               max_registered_block_id = block_id + 1;
+
+       if (block_id >= max_registered_buffers)
+               elog(ERROR, "too many registered buffers");
+
+       regbuf = &registered_buffers[block_id];
+
+       regbuf->rnode = *rnode;
+       regbuf->forkno = forknum;
+       regbuf->block = blknum;
+       regbuf->page = page;
+       regbuf->flags = flags;
+       regbuf->rdata_tail = (XLogRecData *) &regbuf->rdata_head;
+       regbuf->rdata_len = 0;
+
+       /*
+        * Check that this page hasn't already been registered with some other
+        * block_id.
+        */
+#ifdef USE_ASSERT_CHECKING
+       {
+               int                     i;
+
+               for (i = 0; i < max_registered_block_id; i++)
+               {
+                       registered_buffer *regbuf_old = &registered_buffers[i];
+
+                       if (i == block_id || !regbuf_old->in_use)
+                               continue;
+
+                       Assert(!RelFileNodeEquals(regbuf_old->rnode, regbuf->rnode) ||
+                                  regbuf_old->forkno != regbuf->forkno ||
+                                  regbuf_old->block != regbuf->block);
+               }
+       }
+#endif
+
+       regbuf->in_use = true;
+}
+
+/*
+ * Add data to the WAL record that's being constructed.
+ *
+ * The data is appended to the "main chunk", available at replay with
+ * XLogGetRecData().
+ */
+void
+XLogRegisterData(char *data, int len)
+{
+       XLogRecData *rdata;
+
+       Assert(begininsert_called);
+
+       if (num_rdatas >= max_rdatas)
+               elog(ERROR, "too much WAL data");
+       rdata = &rdatas[num_rdatas++];
+
+       rdata->data = data;
+       rdata->len = len;
+
+       /*
+        * we use the mainrdata_last pointer to track the end of the chain, so no
+        * need to clear 'next' here.
+        */
+
+       mainrdata_last->next = rdata;
+       mainrdata_last = rdata;
+
+       mainrdata_len += len;
+}
+
+/*
+ * Add buffer-specific data to the WAL record that's being constructed.
+ *
+ * Block_id must reference a block previously registered with
+ * XLogRegisterBuffer(). If this is called more than once for the same
+ * block_id, the data is appended.
+ *
+ * The maximum amount of data that can be registered per block is 65535
+ * bytes. That should be plenty; if you need more than BLCKSZ bytes to
+ * reconstruct the changes to the page, you might as well just log a full
+ * copy of it. (the "main data" that's not associated with a block is not
+ * limited)
+ */
+void
+XLogRegisterBufData(uint8 block_id, char *data, int len)
+{
+       registered_buffer *regbuf;
+       XLogRecData *rdata;
+
+       Assert(begininsert_called);
+
+       /* find the registered buffer struct */
+       regbuf = &registered_buffers[block_id];
+       if (!regbuf->in_use)
+               elog(ERROR, "no block with id %d registered with WAL insertion",
+                        block_id);
+
+       if (num_rdatas >= max_rdatas)
+               elog(ERROR, "too much WAL data");
+       rdata = &rdatas[num_rdatas++];
+
+       rdata->data = data;
+       rdata->len = len;
+
+       regbuf->rdata_tail->next = rdata;
+       regbuf->rdata_tail = rdata;
+       regbuf->rdata_len += len;
+}
+
+/*
+ * Insert an XLOG record having the specified RMID and info bytes, with the
+ * body of the record being the data and buffer references registered earlier
+ * with XLogRegister* calls.
  *
  * Returns XLOG pointer to end of record (beginning of next record).
  * This can be used as LSN for data pages affected by the logged action.
  * (LSN is the XLOG point up to which the XLOG must be flushed to disk
  * before the data page can be written out.  This implements the basic
  * WAL rule "write the log before the data".)
- *
- * NB: this routine feels free to scribble on the XLogRecData structs,
- * though not on the data they reference.  This is OK since the XLogRecData
- * structs are always just temporaries in the calling code.
  */
 XLogRecPtr
-XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata)
+XLogInsert(RmgrId rmid, uint8 info)
 {
-       XLogRecPtr      RedoRecPtr;
-       bool            doPageWrites;
        XLogRecPtr      EndPos;
-       XLogRecPtr      fpw_lsn;
-       XLogRecData *rdt;
-       XLogRecData *rdt_lastnormal;
 
-       /* info's high bits are reserved for use by me */
-       if (info & XLR_INFO_MASK)
+       /* XLogBeginInsert() must have been called. */
+       if (!begininsert_called)
+               elog(ERROR, "XLogBeginInsert was not called");
+
+       /*
+        * The caller can set rmgr bits and XLR_SPECIAL_REL_UPDATE; the rest are
+        * reserved for use by me.
+        */
+       if ((info & ~(XLR_RMGR_INFO_MASK | XLR_SPECIAL_REL_UPDATE)) != 0)
                elog(PANIC, "invalid xlog info mask %02X", info);
 
        TRACE_POSTGRESQL_XLOG_INSERT(rmid, info);
@@ -67,292 +400,282 @@ XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata)
         */
        if (IsBootstrapProcessingMode() && rmid != RM_XLOG_ID)
        {
+               XLogResetInsertion();
                EndPos = SizeOfXLogLongPHD;             /* start of 1st chkpt record */
                return EndPos;
        }
 
-       /*
-        * Get values needed to decide whether to do full-page writes. Since we
-        * don't yet have an insertion lock, these could change under us, but
-        * XLogInsertRecord will recheck them once it has a lock.
-        */
-       GetFullPageWriteInfo(&RedoRecPtr, &doPageWrites);
-
-       /*
-        * Assemble an XLogRecData chain representing the WAL record, including
-        * any backup blocks needed.
-        *
-        * We may have to loop back to here if a race condition is detected in
-        * XLogInsertRecord.  We could prevent the race by doing all this work
-        * while holding an insertion lock, but it seems better to avoid doing CRC
-        * calculations while holding one.
-        */
-retry:
-       rdt = XLogRecordAssemble(rmid, info, rdata, RedoRecPtr, doPageWrites,
-                                                        &fpw_lsn, &rdt_lastnormal);
-
-       EndPos = XLogInsertRecord(rdt, fpw_lsn);
-
-       if (EndPos == InvalidXLogRecPtr)
+       do
        {
+               XLogRecPtr      RedoRecPtr;
+               bool            doPageWrites;
+               XLogRecPtr      fpw_lsn;
+               XLogRecData *rdt;
+
                /*
-                * Undo the changes we made to the rdata chain, and retry.
-                *
-                * XXX: This doesn't undo *all* the changes; the XLogRecData
-                * entries for buffers that we had already decided to back up have
-                * had their data-pointers cleared. That's OK, as long as we
-                * decide to back them up on the next iteration as well. Hence,
-                * don't allow "doPageWrites" value to go from true to false after
-                * we've modified the rdata chain.
+                * Get values needed to decide whether to do full-page writes. Since
+                * we don't yet have an insertion lock, these could change under us,
+                * but XLogInsertRecData will recheck them once it has a lock.
                 */
-               bool            newDoPageWrites;
+               GetFullPageWriteInfo(&RedoRecPtr, &doPageWrites);
 
-               GetFullPageWriteInfo(&RedoRecPtr, &newDoPageWrites);
-               doPageWrites = doPageWrites || newDoPageWrites;
-               rdt_lastnormal->next = NULL;
+               rdt = XLogRecordAssemble(rmid, info, RedoRecPtr, doPageWrites,
+                                                                &fpw_lsn);
 
-               goto retry;
-       }
+               EndPos = XLogInsertRecord(rdt, fpw_lsn);
+       } while (EndPos == InvalidXLogRecPtr);
+
+       XLogResetInsertion();
 
        return EndPos;
 }
 
 /*
- * Assemble a full WAL record, including backup blocks, from an XLogRecData
- * chain, ready for insertion with XLogInsertRecord(). The record header
- * fields are filled in, except for the xl_prev field and CRC.
+ * Assemble a WAL record from the registered data and buffers into an
+ * XLogRecData chain, ready for insertion with XLogInsertRecord().
  *
- * The rdata chain is modified, adding entries for full-page images.
- * *rdt_lastnormal is set to point to the last normal (ie. not added by
- * this function) entry. It can be used to reset the chain to its original
- * state.
+ * The record header fields are filled in, except for the xl_prev field. The
+ * calculated CRC does not include xl_prev either.
  *
- * If the rdata chain contains any buffer references, and a full-page image
- * was not taken of all the buffers, *fpw_lsn is set to the lowest LSN among
- * such pages. This signals that the assembled record is only good for
- * insertion on the assumption that the RedoRecPtr and doPageWrites values
- * were up-to-date.
+ * If there are any registered buffers, and a full-page image was not taken
+ * of all them, *page_writes_omitted is set to true. This signals that the
+ * assembled record is only good for insertion on the assumption that the
+ * RedoRecPtr and doPageWrites values were up-to-date.
  */
 static XLogRecData *
-XLogRecordAssemble(RmgrId rmid, uint8 info, XLogRecData *rdata,
+XLogRecordAssemble(RmgrId rmid, uint8 info,
                                   XLogRecPtr RedoRecPtr, bool doPageWrites,
-                                  XLogRecPtr *fpw_lsn, XLogRecData **rdt_lastnormal)
+                                  XLogRecPtr *fpw_lsn)
 {
-       bool            isLogSwitch = (rmid == RM_XLOG_ID && info == XLOG_SWITCH);
        XLogRecData *rdt;
-       Buffer          dtbuf[XLR_MAX_BKP_BLOCKS];
-       bool            dtbuf_bkp[XLR_MAX_BKP_BLOCKS];
-       uint32          len,
-                               total_len;
-       unsigned        i;
+       uint32          total_len = 0;
+       int                     block_id;
+       pg_crc32        rdata_crc;
+       registered_buffer *prev_regbuf = NULL;
+       XLogRecData *rdt_datas_last;
+       XLogRecord *rechdr;
+       char       *scratch = hdr_scratch;
 
        /*
-        * These need to be static because they are returned to the caller as part
-        * of the XLogRecData chain.
+        * Note: this function can be called multiple times for the same record.
+        * All the modifications we do to the rdata chains below must handle that.
         */
-       static BkpBlock dtbuf_xlg[XLR_MAX_BKP_BLOCKS];
-       static XLogRecData dtbuf_rdt1[XLR_MAX_BKP_BLOCKS];
-       static XLogRecData dtbuf_rdt2[XLR_MAX_BKP_BLOCKS];
-       static XLogRecData dtbuf_rdt3[XLR_MAX_BKP_BLOCKS];
-       static XLogRecData hdr_rdt;
-       static XLogRecord *rechdr;
-
-       if (rechdr == NULL)
-       {
-               static char rechdrbuf[SizeOfXLogRecord + MAXIMUM_ALIGNOF];
 
-               rechdr = (XLogRecord *) MAXALIGN(&rechdrbuf);
-               MemSet(rechdr, 0, SizeOfXLogRecord);
-       }
+       /* The record begins with the fixed-size header */
+       rechdr = (XLogRecord *) scratch;
+       scratch += SizeOfXLogRecord;
 
-       /* The record begins with the header */
-       hdr_rdt.data = (char *) rechdr;
-       hdr_rdt.len = SizeOfXLogRecord;
-       hdr_rdt.next = rdata;
-       total_len = SizeOfXLogRecord;
+       hdr_rdt.next = NULL;
+       rdt_datas_last = &hdr_rdt;
+       hdr_rdt.data = hdr_scratch;
 
        /*
-        * Here we scan the rdata chain, to determine which buffers must be backed
-        * up.
-        *
-        * We add entries for backup blocks to the chain, so that they don't need
-        * any special treatment in the critical section where the chunks are
-        * copied into the WAL buffers. Those entries have to be unlinked from the
-        * chain if we have to loop back here.
+        * Make an rdata chain containing all the data portions of all block
+        * references. This includes the data for full-page images. Also append
+        * the headers for the block references in the scratch buffer.
         */
-       for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++)
-       {
-               dtbuf[i] = InvalidBuffer;
-               dtbuf_bkp[i] = false;
-       }
-
        *fpw_lsn = InvalidXLogRecPtr;
-       len = 0;
-       for (rdt = rdata;;)
+       for (block_id = 0; block_id < max_registered_block_id; block_id++)
        {
-               if (rdt->buffer == InvalidBuffer)
+               registered_buffer *regbuf = &registered_buffers[block_id];
+               bool            needs_backup;
+               bool            needs_data;
+               XLogRecordBlockHeader bkpb;
+               XLogRecordBlockImageHeader bimg;
+               bool            samerel;
+
+               if (!regbuf->in_use)
+                       continue;
+
+               /* Determine if this block needs to be backed up */
+               if (regbuf->flags & REGBUF_FORCE_IMAGE)
+                       needs_backup = true;
+               else if (regbuf->flags & REGBUF_NO_IMAGE)
+                       needs_backup = false;
+               else if (!doPageWrites)
+                       needs_backup = false;
+               else
                {
-                       /* Simple data, just include it */
-                       len += rdt->len;
+                       /*
+                        * We assume page LSN is first data on *every* page that can be
+                        * passed to XLogInsert, whether it has the standard page layout
+                        * or not.
+                        */
+                       XLogRecPtr      page_lsn = PageGetLSN(regbuf->page);
+
+                       needs_backup = (page_lsn <= RedoRecPtr);
+                       if (!needs_backup)
+                       {
+                               if (*fpw_lsn == InvalidXLogRecPtr || page_lsn < *fpw_lsn)
+                                       *fpw_lsn = page_lsn;
+                       }
                }
+
+               /* Determine if the buffer data needs to included */
+               if (regbuf->rdata_len == 0)
+                       needs_data = false;
+               else if ((regbuf->flags & REGBUF_KEEP_DATA) != 0)
+                       needs_data = true;
                else
+                       needs_data = !needs_backup;
+
+               bkpb.id = block_id;
+               bkpb.fork_flags = regbuf->forkno;
+               bkpb.data_length = 0;
+
+               if ((regbuf->flags & REGBUF_WILL_INIT) == REGBUF_WILL_INIT)
+                       bkpb.fork_flags |= BKPBLOCK_WILL_INIT;
+
+               if (needs_backup)
                {
-                       /* Find info for buffer */
-                       for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++)
+                       Page            page = regbuf->page;
+
+                       /*
+                        * The page needs to be backed up, so set up *bimg
+                        */
+                       if (regbuf->flags & REGBUF_STANDARD)
                        {
-                               if (rdt->buffer == dtbuf[i])
+                               /* Assume we can omit data between pd_lower and pd_upper */
+                               uint16          lower = ((PageHeader) page)->pd_lower;
+                               uint16          upper = ((PageHeader) page)->pd_upper;
+
+                               if (lower >= SizeOfPageHeaderData &&
+                                       upper > lower &&
+                                       upper <= BLCKSZ)
                                {
-                                       /* Buffer already referenced by earlier chain item */
-                                       if (dtbuf_bkp[i])
-                                       {
-                                               rdt->data = NULL;
-                                               rdt->len = 0;
-                                       }
-                                       else if (rdt->data)
-                                               len += rdt->len;
-                                       break;
+                                       bimg.hole_offset = lower;
+                                       bimg.hole_length = upper - lower;
                                }
-                               if (dtbuf[i] == InvalidBuffer)
+                               else
                                {
-                                       /* OK, put it in this slot */
-                                       XLogRecPtr      page_lsn;
-                                       bool            needs_backup;
-
-                                       dtbuf[i] = rdt->buffer;
-
-                                       /*
-                                        * Determine whether the buffer has to be backed up.
-                                        *
-                                        * We assume page LSN is first data on *every* page that
-                                        * can be passed to XLogInsert, whether it has the
-                                        * standard page layout or not. We don't need to take the
-                                        * buffer header lock for PageGetLSN because we hold an
-                                        * exclusive lock on the page and/or the relation.
-                                        */
-                                       page_lsn = PageGetLSN(BufferGetPage(rdt->buffer));
-                                       if (!doPageWrites)
-                                               needs_backup = false;
-                                       else if (page_lsn <= RedoRecPtr)
-                                               needs_backup = true;
-                                       else
-                                               needs_backup = false;
-
-                                       if (needs_backup)
-                                       {
-                                               /*
-                                                * The page needs to be backed up, so set up BkpBlock
-                                                */
-                                               XLogFillBkpBlock(rdt->buffer, rdt->buffer_std,
-                                                                                &(dtbuf_xlg[i]));
-                                               dtbuf_bkp[i] = true;
-                                               rdt->data = NULL;
-                                               rdt->len = 0;
-                                       }
-                                       else
-                                       {
-                                               if (rdt->data)
-                                                       len += rdt->len;
-                                               if (*fpw_lsn == InvalidXLogRecPtr ||
-                                                       page_lsn < *fpw_lsn)
-                                               {
-                                                       *fpw_lsn = page_lsn;
-                                               }
-                                       }
-                                       break;
+                                       /* No "hole" to compress out */
+                                       bimg.hole_offset = 0;
+                                       bimg.hole_length = 0;
                                }
                        }
-                       if (i >= XLR_MAX_BKP_BLOCKS)
-                               elog(PANIC, "can backup at most %d blocks per xlog record",
-                                        XLR_MAX_BKP_BLOCKS);
-               }
-               /* Break out of loop when rdt points to last chain item */
-               if (rdt->next == NULL)
-                       break;
-               rdt = rdt->next;
-       }
-       total_len += len;
+                       else
+                       {
+                               /* Not a standard page header, don't try to eliminate "hole" */
+                               bimg.hole_offset = 0;
+                               bimg.hole_length = 0;
+                       }
 
-       /*
-        * Make additional rdata chain entries for the backup blocks, so that we
-        * don't need to special-case them in the write loop.  This modifies the
-        * original rdata chain, but we keep a pointer to the last regular entry,
-        * rdt_lastnormal, so that we can undo this if we have to start over.
-        *
-        * At the exit of this loop, total_len includes the backup block data.
-        *
-        * Also set the appropriate info bits to show which buffers were backed
-        * up. The XLR_BKP_BLOCK(N) bit corresponds to the N'th distinct buffer
-        * value (ignoring InvalidBuffer) appearing in the rdata chain.
-        */
-       *rdt_lastnormal = rdt;
-       for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++)
-       {
-               BkpBlock   *bkpb;
-               char       *page;
+                       /* Fill in the remaining fields in the XLogRecordBlockData struct */
+                       bkpb.fork_flags |= BKPBLOCK_HAS_IMAGE;
 
-               if (!dtbuf_bkp[i])
-                       continue;
+                       total_len += BLCKSZ - bimg.hole_length;
+
+                       /*
+                        * Construct XLogRecData entries for the page content.
+                        */
+                       rdt_datas_last->next = &regbuf->bkp_rdatas[0];
+                       rdt_datas_last = rdt_datas_last->next;
+                       if (bimg.hole_length == 0)
+                       {
+                               rdt_datas_last->data = page;
+                               rdt_datas_last->len = BLCKSZ;
+                       }
+                       else
+                       {
+                               /* must skip the hole */
+                               rdt_datas_last->data = page;
+                               rdt_datas_last->len = bimg.hole_offset;
 
-               info |= XLR_BKP_BLOCK(i);
+                               rdt_datas_last->next = &regbuf->bkp_rdatas[1];
+                               rdt_datas_last = rdt_datas_last->next;
 
-               bkpb = &(dtbuf_xlg[i]);
-               page = (char *) BufferGetBlock(dtbuf[i]);
+                               rdt_datas_last->data = page + (bimg.hole_offset + bimg.hole_length);
+                               rdt_datas_last->len = BLCKSZ - (bimg.hole_offset + bimg.hole_length);
+                       }
+               }
 
-               rdt->next = &(dtbuf_rdt1[i]);
-               rdt = rdt->next;
+               if (needs_data)
+               {
+                       /*
+                        * Link the caller-supplied rdata chain for this buffer to the
+                        * overall list.
+                        */
+                       bkpb.fork_flags |= BKPBLOCK_HAS_DATA;
+                       bkpb.data_length = regbuf->rdata_len;
+                       total_len += regbuf->rdata_len;
+
+                       rdt_datas_last->next = regbuf->rdata_head;
+                       rdt_datas_last = regbuf->rdata_tail;
+               }
 
-               rdt->data = (char *) bkpb;
-               rdt->len = sizeof(BkpBlock);
-               total_len += sizeof(BkpBlock);
+               if (prev_regbuf && RelFileNodeEquals(regbuf->rnode, prev_regbuf->rnode))
+               {
+                       samerel = true;
+                       bkpb.fork_flags |= BKPBLOCK_SAME_REL;
+                       prev_regbuf = regbuf;
+               }
+               else
+                       samerel = false;
 
-               rdt->next = &(dtbuf_rdt2[i]);
-               rdt = rdt->next;
+               /* Ok, copy the header to the scratch buffer */
+               memcpy(scratch, &bkpb, SizeOfXLogRecordBlockHeader);
+               scratch += SizeOfXLogRecordBlockHeader;
+               if (needs_backup)
+               {
+                       memcpy(scratch, &bimg, SizeOfXLogRecordBlockImageHeader);
+                       scratch += SizeOfXLogRecordBlockImageHeader;
+               }
+               if (!samerel)
+               {
+                       memcpy(scratch, &regbuf->rnode, sizeof(RelFileNode));
+                       scratch += sizeof(RelFileNode);
+               }
+               memcpy(scratch, &regbuf->block, sizeof(BlockNumber));
+               scratch += sizeof(BlockNumber);
+       }
 
-               if (bkpb->hole_length == 0)
+       /* followed by main data, if any */
+       if (mainrdata_len > 0)
+       {
+               if (mainrdata_len > 255)
                {
-                       rdt->data = page;
-                       rdt->len = BLCKSZ;
-                       total_len += BLCKSZ;
-                       rdt->next = NULL;
+                       *(scratch++) = XLR_BLOCK_ID_DATA_LONG;
+                       memcpy(scratch, &mainrdata_len, sizeof(uint32));
+                       scratch += sizeof(uint32);
                }
                else
                {
-                       /* must skip the hole */
-                       rdt->data = page;
-                       rdt->len = bkpb->hole_offset;
-                       total_len += bkpb->hole_offset;
-
-                       rdt->next = &(dtbuf_rdt3[i]);
-                       rdt = rdt->next;
-
-                       rdt->data = page + (bkpb->hole_offset + bkpb->hole_length);
-                       rdt->len = BLCKSZ - (bkpb->hole_offset + bkpb->hole_length);
-                       total_len += rdt->len;
-                       rdt->next = NULL;
+                       *(scratch++) = XLR_BLOCK_ID_DATA_SHORT;
+                       *(scratch++) = (uint8) mainrdata_len;
                }
+               rdt_datas_last->next = mainrdata_head;
+               rdt_datas_last = mainrdata_last;
+               total_len += mainrdata_len;
        }
+       rdt_datas_last->next = NULL;
+
+       hdr_rdt.len = (scratch - hdr_scratch);
+       total_len += hdr_rdt.len;
 
        /*
-        * We disallow len == 0 because it provides a useful bit of extra error
-        * checking in ReadRecord.  This means that all callers of XLogInsert
-        * must supply at least some not-in-a-buffer data.  However, we make an
-        * exception for XLOG SWITCH records because we don't want them to ever
-        * cross a segment boundary.
+        * Calculate CRC of the data
+        *
+        * Note that the record header isn't added into the CRC initially since we
+        * don't know the prev-link yet.  Thus, the CRC will represent the CRC of
+        * the whole record in the order: rdata, then backup blocks, then record
+        * header.
         */
-       if (len == 0 && !isLogSwitch)
-               elog(PANIC, "invalid xlog record length %u", rechdr->xl_len);
+       INIT_CRC32C(rdata_crc);
+       COMP_CRC32C(rdata_crc, hdr_scratch + SizeOfXLogRecord, hdr_rdt.len - SizeOfXLogRecord);
+       for (rdt = hdr_rdt.next; rdt != NULL; rdt = rdt->next)
+               COMP_CRC32C(rdata_crc, rdt->data, rdt->len);
 
        /*
         * Fill in the fields in the record header. Prev-link is filled in later,
-        * once we know where in the WAL the record will be inserted. CRC is also
-        * not calculated yet.
+        * once we know where in the WAL the record will be inserted. The CRC does
+        * not include the record header yet.
         */
        rechdr->xl_xid = GetCurrentTransactionIdIfAny();
        rechdr->xl_tot_len = total_len;
-       rechdr->xl_len = len;           /* doesn't include backup blocks */
        rechdr->xl_info = info;
        rechdr->xl_rmid = rmid;
        rechdr->xl_prev = InvalidXLogRecPtr;
+       rechdr->xl_crc = rdata_crc;
 
        return &hdr_rdt;
 }
@@ -429,45 +752,41 @@ XLogSaveBufferForHint(Buffer buffer, bool buffer_std)
 
        if (lsn <= RedoRecPtr)
        {
-               XLogRecData rdata[2];
-               BkpBlock        bkpb;
+               int                     flags;
                char            copied_buffer[BLCKSZ];
                char       *origdata = (char *) BufferGetBlock(buffer);
-
-               /* Make a BkpBlock struct representing the buffer */
-               XLogFillBkpBlock(buffer, buffer_std, &bkpb);
+               RelFileNode rnode;
+               ForkNumber      forkno;
+               BlockNumber blkno;
 
                /*
                 * Copy buffer so we don't have to worry about concurrent hint bit or
                 * lsn updates. We assume pd_lower/upper cannot be changed without an
                 * exclusive lock, so the contents bkp are not racy.
-                *
-                * With buffer_std set to false, XLogFillBkpBlock() sets hole_length
-                * and hole_offset to 0; so the following code is safe for either
-                * case.
                 */
-               memcpy(copied_buffer, origdata, bkpb.hole_offset);
-               memcpy(copied_buffer + bkpb.hole_offset,
-                          origdata + bkpb.hole_offset + bkpb.hole_length,
-                          BLCKSZ - bkpb.hole_offset - bkpb.hole_length);
+               if (buffer_std)
+               {
+                       /* Assume we can omit data between pd_lower and pd_upper */
+                       Page            page = BufferGetPage(buffer);
+                       uint16          lower = ((PageHeader) page)->pd_lower;
+                       uint16          upper = ((PageHeader) page)->pd_upper;
 
-               /*
-                * Header for backup block.
-                */
-               rdata[0].data = (char *) &bkpb;
-               rdata[0].len = sizeof(BkpBlock);
-               rdata[0].buffer = InvalidBuffer;
-               rdata[0].next = &(rdata[1]);
+                       memcpy(copied_buffer, origdata, lower);
+                       memcpy(copied_buffer + upper, origdata + upper, BLCKSZ - upper);
+               }
+               else
+                       memcpy(copied_buffer, origdata, BLCKSZ);
 
-               /*
-                * Save copy of the buffer.
-                */
-               rdata[1].data = copied_buffer;
-               rdata[1].len = BLCKSZ - bkpb.hole_length;
-               rdata[1].buffer = InvalidBuffer;
-               rdata[1].next = NULL;
+               XLogBeginInsert();
 
-               recptr = XLogInsert(RM_XLOG_ID, XLOG_FPI, rdata);
+               flags = REGBUF_FORCE_IMAGE;
+               if (buffer_std)
+                       flags |= REGBUF_STANDARD;
+
+               BufferGetTag(buffer, &rnode, &forkno, &blkno);
+               XLogRegisterBlock(0, &rnode, forkno, blkno, copied_buffer, flags);
+
+               recptr = XLogInsert(RM_XLOG_ID, XLOG_FPI);
        }
 
        return recptr;
@@ -489,71 +808,16 @@ XLogRecPtr
 log_newpage(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno,
                        Page page, bool page_std)
 {
-       BkpBlock        bkpb;
+       int                     flags;
        XLogRecPtr      recptr;
-       XLogRecData rdata[3];
-
-       /* NO ELOG(ERROR) from here till newpage op is logged */
-       START_CRIT_SECTION();
-
-       bkpb.node = *rnode;
-       bkpb.fork = forkNum;
-       bkpb.block = blkno;
 
+       flags = REGBUF_FORCE_IMAGE;
        if (page_std)
-       {
-               /* Assume we can omit data between pd_lower and pd_upper */
-               uint16          lower = ((PageHeader) page)->pd_lower;
-               uint16          upper = ((PageHeader) page)->pd_upper;
-
-               if (lower >= SizeOfPageHeaderData &&
-                       upper > lower &&
-                       upper <= BLCKSZ)
-               {
-                       bkpb.hole_offset = lower;
-                       bkpb.hole_length = upper - lower;
-               }
-               else
-               {
-                       /* No "hole" to compress out */
-                       bkpb.hole_offset = 0;
-                       bkpb.hole_length = 0;
-               }
-       }
-       else
-       {
-               /* Not a standard page header, don't try to eliminate "hole" */
-               bkpb.hole_offset = 0;
-               bkpb.hole_length = 0;
-       }
-
-       rdata[0].data = (char *) &bkpb;
-       rdata[0].len = sizeof(BkpBlock);
-       rdata[0].buffer = InvalidBuffer;
-       rdata[0].next = &(rdata[1]);
-
-       if (bkpb.hole_length == 0)
-       {
-               rdata[1].data = (char *) page;
-               rdata[1].len = BLCKSZ;
-               rdata[1].buffer = InvalidBuffer;
-               rdata[1].next = NULL;
-       }
-       else
-       {
-               /* must skip the hole */
-               rdata[1].data = (char *) page;
-               rdata[1].len = bkpb.hole_offset;
-               rdata[1].buffer = InvalidBuffer;
-               rdata[1].next = &rdata[2];
-
-               rdata[2].data = (char *) page + (bkpb.hole_offset + bkpb.hole_length);
-               rdata[2].len = BLCKSZ - (bkpb.hole_offset + bkpb.hole_length);
-               rdata[2].buffer = InvalidBuffer;
-               rdata[2].next = NULL;
-       }
+               flags |= REGBUF_STANDARD;
 
-       recptr = XLogInsert(RM_XLOG_ID, XLOG_FPI, rdata);
+       XLogBeginInsert();
+       XLogRegisterBlock(0, rnode, forkNum, blkno, page, flags);
+       recptr = XLogInsert(RM_XLOG_ID, XLOG_FPI);
 
        /*
         * The page may be uninitialized. If so, we can't set the LSN because that
@@ -564,8 +828,6 @@ log_newpage(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno,
                PageSetLSN(page, recptr);
        }
 
-       END_CRIT_SECTION();
-
        return recptr;
 }
 
@@ -596,38 +858,38 @@ log_newpage_buffer(Buffer buffer, bool page_std)
 }
 
 /*
- * Fill a BkpBlock for a buffer.
+ * Allocate working buffers needed for WAL record construction.
  */
-static void
-XLogFillBkpBlock(Buffer buffer, bool buffer_std, BkpBlock *bkpb)
+void
+InitXLogInsert(void)
 {
-       BufferGetTag(buffer, &bkpb->node, &bkpb->fork, &bkpb->block);
+       /* Initialize the working areas */
+       if (xloginsert_cxt == NULL)
+       {
+               xloginsert_cxt = AllocSetContextCreate(TopMemoryContext,
+                                                                                          "WAL record construction",
+                                                                                          ALLOCSET_DEFAULT_MINSIZE,
+                                                                                          ALLOCSET_DEFAULT_INITSIZE,
+                                                                                          ALLOCSET_DEFAULT_MAXSIZE);
+       }
 
-       if (buffer_std)
+       if (registered_buffers == NULL)
        {
-               /* Assume we can omit data between pd_lower and pd_upper */
-               Page            page = BufferGetPage(buffer);
-               uint16          lower = ((PageHeader) page)->pd_lower;
-               uint16          upper = ((PageHeader) page)->pd_upper;
-
-               if (lower >= SizeOfPageHeaderData &&
-                       upper > lower &&
-                       upper <= BLCKSZ)
-               {
-                       bkpb->hole_offset = lower;
-                       bkpb->hole_length = upper - lower;
-               }
-               else
-               {
-                       /* No "hole" to compress out */
-                       bkpb->hole_offset = 0;
-                       bkpb->hole_length = 0;
-               }
+               registered_buffers = (registered_buffer *)
+                       MemoryContextAllocZero(xloginsert_cxt,
+                                 sizeof(registered_buffer) * (XLR_NORMAL_MAX_BLOCK_ID + 1));
+               max_registered_buffers = XLR_NORMAL_MAX_BLOCK_ID + 1;
        }
-       else
+       if (rdatas == NULL)
        {
-               /* Not a standard page header, don't try to eliminate "hole" */
-               bkpb->hole_offset = 0;
-               bkpb->hole_length = 0;
+               rdatas = MemoryContextAlloc(xloginsert_cxt,
+                                                                       sizeof(XLogRecData) * XLR_NORMAL_RDATAS);
+               max_rdatas = XLR_NORMAL_RDATAS;
        }
+
+       /*
+        * Allocate a buffer to hold the header information for a WAL record.
+        */
+       if (hdr_scratch == NULL)
+               hdr_scratch = palloc0(HEADER_SCRATCH_SIZE);
 }
index 7d573cc585d3cc6e13421ed108e328852788d253..67d62234369d2d904f1adb78da22d5f13080e777 100644 (file)
@@ -37,6 +37,8 @@ report_invalid_record(XLogReaderState *state, const char *fmt,...)
    the supplied arguments. */
 __attribute__((format(PG_PRINTF_ATTRIBUTE, 2, 3)));
 
+static void ResetDecoder(XLogReaderState *state);
+
 /* size of the buffer allocated for error message. */
 #define MAX_ERRORMSG_LEN 1000
 
@@ -59,46 +61,33 @@ report_invalid_record(XLogReaderState *state, const char *fmt,...)
 /*
  * Allocate and initialize a new XLogReader.
  *
- * Returns NULL if the xlogreader couldn't be allocated.
+ * The returned XLogReader is palloc'd. (In FRONTEND code, that means that
+ * running out-of-memory causes an immediate exit(1).
  */
 XLogReaderState *
 XLogReaderAllocate(XLogPageReadCB pagereadfunc, void *private_data)
 {
        XLogReaderState *state;
 
-       AssertArg(pagereadfunc != NULL);
+       state = (XLogReaderState *) palloc0(sizeof(XLogReaderState));
 
-       state = (XLogReaderState *) malloc(sizeof(XLogReaderState));
-       if (!state)
-               return NULL;
-       MemSet(state, 0, sizeof(XLogReaderState));
+       state->max_block_id = -1;
 
        /*
         * Permanently allocate readBuf.  We do it this way, rather than just
         * making a static array, for two reasons: (1) no need to waste the
         * storage in most instantiations of the backend; (2) a static char array
-        * isn't guaranteed to have any particular alignment, whereas malloc()
+        * isn't guaranteed to have any particular alignment, whereas palloc()
         * will provide MAXALIGN'd storage.
         */
-       state->readBuf = (char *) malloc(XLOG_BLCKSZ);
-       if (!state->readBuf)
-       {
-               free(state);
-               return NULL;
-       }
+       state->readBuf = (char *) palloc(XLOG_BLCKSZ);
 
        state->read_page = pagereadfunc;
        /* system_identifier initialized to zeroes above */
        state->private_data = private_data;
        /* ReadRecPtr and EndRecPtr initialized to zeroes above */
        /* readSegNo, readOff, readLen, readPageTLI initialized to zeroes above */
-       state->errormsg_buf = malloc(MAX_ERRORMSG_LEN + 1);
-       if (!state->errormsg_buf)
-       {
-               free(state->readBuf);
-               free(state);
-               return NULL;
-       }
+       state->errormsg_buf = palloc(MAX_ERRORMSG_LEN + 1);
        state->errormsg_buf[0] = '\0';
 
        /*
@@ -107,9 +96,9 @@ XLogReaderAllocate(XLogPageReadCB pagereadfunc, void *private_data)
         */
        if (!allocate_recordbuf(state, 0))
        {
-               free(state->errormsg_buf);
-               free(state->readBuf);
-               free(state);
+               pfree(state->errormsg_buf);
+               pfree(state->readBuf);
+               pfree(state);
                return NULL;
        }
 
@@ -119,11 +108,24 @@ XLogReaderAllocate(XLogPageReadCB pagereadfunc, void *private_data)
 void
 XLogReaderFree(XLogReaderState *state)
 {
-       free(state->errormsg_buf);
+       int                     block_id;
+
+       for (block_id = 0; block_id <= state->max_block_id; block_id++)
+       {
+               if (state->blocks[block_id].in_use)
+               {
+                       if (state->blocks[block_id].data)
+                               pfree(state->blocks[block_id].data);
+               }
+       }
+       if (state->main_data)
+               pfree(state->main_data);
+
+       pfree(state->errormsg_buf);
        if (state->readRecordBuf)
-               free(state->readRecordBuf);
-       free(state->readBuf);
-       free(state);
+               pfree(state->readRecordBuf);
+       pfree(state->readBuf);
+       pfree(state);
 }
 
 /*
@@ -146,14 +148,8 @@ allocate_recordbuf(XLogReaderState *state, uint32 reclength)
        newSize = Max(newSize, 5 * Max(BLCKSZ, XLOG_BLCKSZ));
 
        if (state->readRecordBuf)
-               free(state->readRecordBuf);
-       state->readRecordBuf = (char *) malloc(newSize);
-       if (!state->readRecordBuf)
-       {
-               state->readRecordBufSize = 0;
-               return false;
-       }
-
+               pfree(state->readRecordBuf);
+       state->readRecordBuf = (char *) palloc(newSize);
        state->readRecordBufSize = newSize;
        return true;
 }
@@ -191,6 +187,8 @@ XLogReadRecord(XLogReaderState *state, XLogRecPtr RecPtr, char **errormsg)
        *errormsg = NULL;
        state->errormsg_buf[0] = '\0';
 
+       ResetDecoder(state);
+
        if (RecPtr == InvalidXLogRecPtr)
        {
                RecPtr = state->EndRecPtr;
@@ -440,7 +438,10 @@ XLogReadRecord(XLogReaderState *state, XLogRecPtr RecPtr, char **errormsg)
                state->EndRecPtr -= state->EndRecPtr % XLogSegSize;
        }
 
-       return record;
+       if (DecodeXLogRecord(state, record, errormsg))
+               return record;
+       else
+               return NULL;
 
 err:
 
@@ -579,30 +580,7 @@ ValidXLogRecordHeader(XLogReaderState *state, XLogRecPtr RecPtr,
                                          XLogRecPtr PrevRecPtr, XLogRecord *record,
                                          bool randAccess)
 {
-       /*
-        * xl_len == 0 is bad data for everything except XLOG SWITCH, where it is
-        * required.
-        */
-       if (record->xl_rmid == RM_XLOG_ID && record->xl_info == XLOG_SWITCH)
-       {
-               if (record->xl_len != 0)
-               {
-                       report_invalid_record(state,
-                                                                 "invalid xlog switch record at %X/%X",
-                                                                 (uint32) (RecPtr >> 32), (uint32) RecPtr);
-                       return false;
-               }
-       }
-       else if (record->xl_len == 0)
-       {
-               report_invalid_record(state,
-                                                         "record with zero length at %X/%X",
-                                                         (uint32) (RecPtr >> 32), (uint32) RecPtr);
-               return false;
-       }
-       if (record->xl_tot_len < SizeOfXLogRecord + record->xl_len ||
-               record->xl_tot_len > SizeOfXLogRecord + record->xl_len +
-               XLR_MAX_BKP_BLOCKS * (sizeof(BkpBlock) + BLCKSZ))
+       if (record->xl_tot_len < SizeOfXLogRecord)
        {
                report_invalid_record(state,
                                                          "invalid record length at %X/%X",
@@ -663,79 +641,17 @@ ValidXLogRecordHeader(XLogReaderState *state, XLogRecPtr RecPtr,
  * We assume all of the record (that is, xl_tot_len bytes) has been read
  * into memory at *record.  Also, ValidXLogRecordHeader() has accepted the
  * record's header, which means in particular that xl_tot_len is at least
- * SizeOfXlogRecord, so it is safe to fetch xl_len.
+ * SizeOfXlogRecord.
  */
 static bool
 ValidXLogRecord(XLogReaderState *state, XLogRecord *record, XLogRecPtr recptr)
 {
        pg_crc32        crc;
-       int                     i;
-       uint32          len = record->xl_len;
-       BkpBlock        bkpb;
-       char       *blk;
-       size_t          remaining = record->xl_tot_len;
 
-       /* First the rmgr data */
-       if (remaining < SizeOfXLogRecord + len)
-       {
-               /* ValidXLogRecordHeader() should've caught this already... */
-               report_invalid_record(state, "invalid record length at %X/%X",
-                                                         (uint32) (recptr >> 32), (uint32) recptr);
-               return false;
-       }
-       remaining -= SizeOfXLogRecord + len;
+       /* Calculate the CRC */
        INIT_CRC32C(crc);
-       COMP_CRC32C(crc, XLogRecGetData(record), len);
-
-       /* Add in the backup blocks, if any */
-       blk = (char *) XLogRecGetData(record) + len;
-       for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++)
-       {
-               uint32          blen;
-
-               if (!(record->xl_info & XLR_BKP_BLOCK(i)))
-                       continue;
-
-               if (remaining < sizeof(BkpBlock))
-               {
-                       report_invalid_record(state,
-                                                         "invalid backup block size in record at %X/%X",
-                                                                 (uint32) (recptr >> 32), (uint32) recptr);
-                       return false;
-               }
-               memcpy(&bkpb, blk, sizeof(BkpBlock));
-
-               if (bkpb.hole_offset + bkpb.hole_length > BLCKSZ)
-               {
-                       report_invalid_record(state,
-                                                                 "incorrect hole size in record at %X/%X",
-                                                                 (uint32) (recptr >> 32), (uint32) recptr);
-                       return false;
-               }
-               blen = sizeof(BkpBlock) + BLCKSZ - bkpb.hole_length;
-
-               if (remaining < blen)
-               {
-                       report_invalid_record(state,
-                                                         "invalid backup block size in record at %X/%X",
-                                                                 (uint32) (recptr >> 32), (uint32) recptr);
-                       return false;
-               }
-               remaining -= blen;
-               COMP_CRC32C(crc, blk, blen);
-               blk += blen;
-       }
-
-       /* Check that xl_tot_len agrees with our calculation */
-       if (remaining != 0)
-       {
-               report_invalid_record(state,
-                                                         "incorrect total length in record at %X/%X",
-                                                         (uint32) (recptr >> 32), (uint32) recptr);
-               return false;
-       }
-
-       /* Finally include the record header */
+       COMP_CRC32C(crc, ((char *) record) + SizeOfXLogRecord, record->xl_tot_len - SizeOfXLogRecord);
+       /* include the record header last */
        COMP_CRC32C(crc, (char *) record, offsetof(XLogRecord, xl_crc));
        FIN_CRC32C(crc);
 
@@ -985,3 +901,321 @@ out:
 }
 
 #endif   /* FRONTEND */
+
+
+/* ----------------------------------------
+ * Functions for decoding the data and block references in a record.
+ * ----------------------------------------
+ */
+
+/* private function to reset the state between records */
+static void
+ResetDecoder(XLogReaderState *state)
+{
+       int                     block_id;
+
+       state->decoded_record = NULL;
+
+       state->main_data_len = 0;
+
+       for (block_id = 0; block_id <= state->max_block_id; block_id++)
+       {
+               state->blocks[block_id].in_use = false;
+               state->blocks[block_id].has_image = false;
+               state->blocks[block_id].has_data = false;
+       }
+       state->max_block_id = -1;
+}
+
+/*
+ * Decode the previously read record.
+ *
+ * On error, a human-readable error message is returned in *errormsg, and
+ * the return value is false.
+ */
+bool
+DecodeXLogRecord(XLogReaderState *state, XLogRecord *record, char **errormsg)
+{
+       /*
+        * read next _size bytes from record buffer, but check for overrun first.
+        */
+#define COPY_HEADER_FIELD(_dst, _size)                 \
+       do {                                                                            \
+               if (remaining < _size)                                  \
+                       goto shortdata_err;                                     \
+               memcpy(_dst, ptr, _size);                               \
+               ptr += _size;                                                   \
+               remaining -= _size;                                             \
+       } while(0)
+
+       char       *ptr;
+       uint32          remaining;
+       uint32          datatotal;
+       RelFileNode *rnode = NULL;
+       uint8           block_id;
+
+       ResetDecoder(state);
+
+       state->decoded_record = record;
+
+       ptr = (char *) record;
+       ptr += SizeOfXLogRecord;
+       remaining = record->xl_tot_len - SizeOfXLogRecord;
+
+       /* Decode the headers */
+       datatotal = 0;
+       while (remaining > datatotal)
+       {
+               COPY_HEADER_FIELD(&block_id, sizeof(uint8));
+
+               if (block_id == XLR_BLOCK_ID_DATA_SHORT)
+               {
+                       /* XLogRecordDataHeaderShort */
+                       uint8           main_data_len;
+
+                       COPY_HEADER_FIELD(&main_data_len, sizeof(uint8));
+
+                       state->main_data_len = main_data_len;
+                       datatotal += main_data_len;
+                       break;                          /* by convention, the main data fragment is
+                                                                * always last */
+               }
+               else if (block_id == XLR_BLOCK_ID_DATA_LONG)
+               {
+                       /* XLogRecordDataHeaderLong */
+                       uint32          main_data_len;
+
+                       COPY_HEADER_FIELD(&main_data_len, sizeof(uint32));
+                       state->main_data_len = main_data_len;
+                       datatotal += main_data_len;
+                       break;                          /* by convention, the main data fragment is
+                                                                * always last */
+               }
+               else if (block_id <= XLR_MAX_BLOCK_ID)
+               {
+                       /* XLogRecordBlockHeader */
+                       DecodedBkpBlock *blk;
+                       uint8           fork_flags;
+
+                       if (block_id <= state->max_block_id)
+                       {
+                               report_invalid_record(state,
+                                                                         "out-of-order block_id %u at %X/%X",
+                                                                         block_id,
+                                                                         (uint32) (state->ReadRecPtr >> 32),
+                                                                         (uint32) state->ReadRecPtr);
+                               goto err;
+                       }
+                       state->max_block_id = block_id;
+
+                       blk = &state->blocks[block_id];
+                       blk->in_use = true;
+
+                       COPY_HEADER_FIELD(&fork_flags, sizeof(uint8));
+                       blk->forknum = fork_flags & BKPBLOCK_FORK_MASK;
+                       blk->flags = fork_flags;
+                       blk->has_image = ((fork_flags & BKPBLOCK_HAS_IMAGE) != 0);
+                       blk->has_data = ((fork_flags & BKPBLOCK_HAS_DATA) != 0);
+
+                       COPY_HEADER_FIELD(&blk->data_len, sizeof(uint16));
+                       /* cross-check that the HAS_DATA flag is set iff data_length > 0 */
+                       if (blk->has_data && blk->data_len == 0)
+                               report_invalid_record(state,
+                                         "BKPBLOCK_HAS_DATA set, but no data included at %X/%X",
+                                                                         (uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr);
+                       if (!blk->has_data && blk->data_len != 0)
+                               report_invalid_record(state,
+                                "BKPBLOCK_HAS_DATA not set, but data length is %u at %X/%X",
+                                                                         (unsigned int) blk->data_len,
+                                                                         (uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr);
+                       datatotal += blk->data_len;
+
+                       if (blk->has_image)
+                       {
+                               COPY_HEADER_FIELD(&blk->hole_offset, sizeof(uint16));
+                               COPY_HEADER_FIELD(&blk->hole_length, sizeof(uint16));
+                               datatotal += BLCKSZ - blk->hole_length;
+                       }
+                       if (!(fork_flags & BKPBLOCK_SAME_REL))
+                       {
+                               COPY_HEADER_FIELD(&blk->rnode, sizeof(RelFileNode));
+                               rnode = &blk->rnode;
+                       }
+                       else
+                       {
+                               if (rnode == NULL)
+                               {
+                                       report_invalid_record(state,
+                                               "BKPBLOCK_SAME_REL set but no previous rel at %X/%X",
+                                                                                 (uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr);
+                                       goto err;
+                               }
+
+                               blk->rnode = *rnode;
+                       }
+                       COPY_HEADER_FIELD(&blk->blkno, sizeof(BlockNumber));
+               }
+               else
+               {
+                       report_invalid_record(state,
+                                                                 "invalid block_id %u at %X/%X",
+                                                                 block_id,
+                                                                 (uint32) (state->ReadRecPtr >> 32),
+                                                                 (uint32) state->ReadRecPtr);
+                       goto err;
+               }
+       }
+
+       if (remaining != datatotal)
+               goto shortdata_err;
+
+       /*
+        * Ok, we've parsed the fragment headers, and verified that the total
+        * length of the payload in the fragments is equal to the amount of data
+        * left. Copy the data of each fragment to a separate buffer.
+        *
+        * We could just set up pointers into readRecordBuf, but we want to align
+        * the data for the convenience of the callers. Backup images are not
+        * copied, however; they don't need alignment.
+        */
+
+       /* block data first */
+       for (block_id = 0; block_id <= state->max_block_id; block_id++)
+       {
+               DecodedBkpBlock *blk = &state->blocks[block_id];
+
+               if (!blk->in_use)
+                       continue;
+               if (blk->has_image)
+               {
+                       blk->bkp_image = ptr;
+                       ptr += BLCKSZ - blk->hole_length;
+               }
+               if (blk->has_data)
+               {
+                       if (!blk->data || blk->data_len > blk->data_bufsz)
+                       {
+                               if (blk->data)
+                                       pfree(blk->data);
+                               blk->data_bufsz = blk->data_len;
+                               blk->data = palloc(blk->data_bufsz);
+                       }
+                       memcpy(blk->data, ptr, blk->data_len);
+                       ptr += blk->data_len;
+               }
+       }
+
+       /* and finally, the main data */
+       if (state->main_data_len > 0)
+       {
+               if (!state->main_data || state->main_data_len > state->main_data_bufsz)
+               {
+                       if (state->main_data)
+                               pfree(state->main_data);
+                       state->main_data_bufsz = state->main_data_len;
+                       state->main_data = palloc(state->main_data_bufsz);
+               }
+               memcpy(state->main_data, ptr, state->main_data_len);
+               ptr += state->main_data_len;
+       }
+
+       return true;
+
+shortdata_err:
+       report_invalid_record(state,
+                                                 "record with invalid length at %X/%X",
+                        (uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr);
+err:
+       *errormsg = state->errormsg_buf;
+
+       return false;
+}
+
+/*
+ * Returns information about the block that a block reference refers to.
+ *
+ * If the WAL record contains a block reference with the given ID, *rnode,
+ * *forknum, and *blknum are filled in (if not NULL), and returns TRUE.
+ * Otherwise returns FALSE.
+ */
+bool
+XLogRecGetBlockTag(XLogReaderState *record, uint8 block_id,
+                               RelFileNode *rnode, ForkNumber *forknum, BlockNumber *blknum)
+{
+       DecodedBkpBlock *bkpb;
+
+       if (!record->blocks[block_id].in_use)
+               return false;
+
+       bkpb = &record->blocks[block_id];
+       if (rnode)
+               *rnode = bkpb->rnode;
+       if (forknum)
+               *forknum = bkpb->forknum;
+       if (blknum)
+               *blknum = bkpb->blkno;
+       return true;
+}
+
+/*
+ * Returns the data associated with a block reference, or NULL if there is
+ * no data (e.g. because a full-page image was taken instead). The returned
+ * pointer points to a MAXALIGNed buffer.
+ */
+char *
+XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len)
+{
+       DecodedBkpBlock *bkpb;
+
+       if (!record->blocks[block_id].in_use)
+               return NULL;
+
+       bkpb = &record->blocks[block_id];
+
+       if (!bkpb->has_data)
+       {
+               if (len)
+                       *len = 0;
+               return NULL;
+       }
+       else
+       {
+               if (len)
+                       *len = bkpb->data_len;
+               return bkpb->data;
+       }
+}
+
+/*
+ * Restore a full-page image from a backup block attached to an XLOG record.
+ *
+ * Returns the buffer number containing the page.
+ */
+bool
+RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page)
+{
+       DecodedBkpBlock *bkpb;
+
+       if (!record->blocks[block_id].in_use)
+               return false;
+       if (!record->blocks[block_id].has_image)
+               return false;
+
+       bkpb = &record->blocks[block_id];
+
+       if (bkpb->hole_length == 0)
+       {
+               memcpy(page, bkpb->bkp_image, BLCKSZ);
+       }
+       else
+       {
+               memcpy(page, bkpb->bkp_image, bkpb->hole_offset);
+               /* must zero-fill the hole */
+               MemSet(page + bkpb->hole_offset, 0, bkpb->hole_length);
+               memcpy(page + (bkpb->hole_offset + bkpb->hole_length),
+                          bkpb->bkp_image + bkpb->hole_offset,
+                          BLCKSZ - (bkpb->hole_offset + bkpb->hole_length));
+       }
+
+       return true;
+}
index cf04081c19ea635da57174ecded66ef531f215b8..ae323a0db87f6db005928147730346fafdfd2dc7 100644 (file)
@@ -253,9 +253,8 @@ XLogCheckInvalidPages(void)
  *
  * 'lsn' is the LSN of the record being replayed.  It is compared with the
  * page's LSN to determine if the record has already been replayed.
- * 'rnode' and 'blkno' point to the block being replayed (main fork number
- * is implied, use XLogReadBufferForRedoExtended for other forks).
- * 'block_index' identifies the backup block in the record for the page.
+ * 'block_id' is the ID number the block was registered with, when the WAL
+ * record was created.
  *
  * Returns one of the following:
  *
@@ -272,15 +271,36 @@ XLogCheckInvalidPages(void)
  * single-process crash recovery, but some subroutines such as MarkBufferDirty
  * will complain if we don't have the lock.  In hot standby mode it's
  * definitely necessary.)
+ *
+ * Note: when a backup block is available in XLOG, we restore it
+ * unconditionally, even if the page in the database appears newer.  This is
+ * to protect ourselves against database pages that were partially or
+ * incorrectly written during a crash.  We assume that the XLOG data must be
+ * good because it has passed a CRC check, while the database page might not
+ * be.  This will force us to replay all subsequent modifications of the page
+ * that appear in XLOG, rather than possibly ignoring them as already
+ * applied, but that's not a huge drawback.
  */
 XLogRedoAction
-XLogReadBufferForRedo(XLogRecPtr lsn, XLogRecord *record, int block_index,
-                                         RelFileNode rnode, BlockNumber blkno,
+XLogReadBufferForRedo(XLogReaderState *record, uint8 block_id,
                                          Buffer *buf)
 {
-       return XLogReadBufferForRedoExtended(lsn, record, block_index,
-                                                                                rnode, MAIN_FORKNUM, blkno,
-                                                                                RBM_NORMAL, false, buf);
+       return XLogReadBufferForRedoExtended(record, block_id, RBM_NORMAL,
+                                                                                false, buf);
+}
+
+/*
+ * Pin and lock a buffer referenced by a WAL record, for the purpose of
+ * re-initializing it.
+ */
+Buffer
+XLogInitBufferForRedo(XLogReaderState *record, uint8 block_id)
+{
+       Buffer          buf;
+
+       XLogReadBufferForRedoExtended(record, block_id, RBM_ZERO_AND_LOCK, false,
+                                                                 &buf);
+       return buf;
 }
 
 /*
@@ -299,21 +319,54 @@ XLogReadBufferForRedo(XLogRecPtr lsn, XLogRecord *record, int block_index,
  * using LockBufferForCleanup(), instead of a regular exclusive lock.
  */
 XLogRedoAction
-XLogReadBufferForRedoExtended(XLogRecPtr lsn, XLogRecord *record,
-                                                         int block_index, RelFileNode rnode,
-                                                         ForkNumber forkno, BlockNumber blkno,
+XLogReadBufferForRedoExtended(XLogReaderState *record,
+                                                         uint8 block_id,
                                                          ReadBufferMode mode, bool get_cleanup_lock,
                                                          Buffer *buf)
 {
-       if (record->xl_info & XLR_BKP_BLOCK(block_index))
+       XLogRecPtr      lsn = record->EndRecPtr;
+       RelFileNode rnode;
+       ForkNumber      forknum;
+       BlockNumber blkno;
+       Page            page;
+
+       if (!XLogRecGetBlockTag(record, block_id, &rnode, &forknum, &blkno))
+       {
+               /* Caller specified a bogus block_id */
+               elog(PANIC, "failed to locate backup block with ID %d", block_id);
+       }
+
+       /* If it's a full-page image, restore it. */
+       if (XLogRecHasBlockImage(record, block_id))
        {
-               *buf = RestoreBackupBlock(lsn, record, block_index,
-                                                                 get_cleanup_lock, true);
+               *buf = XLogReadBufferExtended(rnode, forknum, blkno,
+                  get_cleanup_lock ? RBM_ZERO_AND_CLEANUP_LOCK : RBM_ZERO_AND_LOCK);
+               page = BufferGetPage(*buf);
+               if (!RestoreBlockImage(record, block_id, page))
+                       elog(ERROR, "failed to restore block image");
+
+               /*
+                * The page may be uninitialized. If so, we can't set the LSN because
+                * that would corrupt the page.
+                */
+               if (!PageIsNew(page))
+               {
+                       PageSetLSN(page, lsn);
+               }
+
+               MarkBufferDirty(*buf);
+
                return BLK_RESTORED;
        }
        else
        {
-               *buf = XLogReadBufferExtended(rnode, forkno, blkno, mode);
+               if ((record->blocks[block_id].flags & BKPBLOCK_WILL_INIT) != 0 &&
+                       mode != RBM_ZERO_AND_LOCK && mode != RBM_ZERO_AND_CLEANUP_LOCK)
+               {
+                       elog(PANIC, "block with WILL_INIT flag in WAL record must be zeroed by redo routine");
+               }
+
+               *buf = XLogReadBufferExtended(rnode, forknum, blkno, mode);
                if (BufferIsValid(*buf))
                {
                        if (mode != RBM_ZERO_AND_LOCK && mode != RBM_ZERO_AND_CLEANUP_LOCK)
@@ -333,37 +386,6 @@ XLogReadBufferForRedoExtended(XLogRecPtr lsn, XLogRecord *record,
        }
 }
 
-/*
- * XLogReadBuffer
- *             Read a page during XLOG replay.
- *
- * This is a shorthand of XLogReadBufferExtended() followed by
- * LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE), for reading from the main
- * fork.
- *
- * (Getting the buffer lock is not really necessary during single-process
- * crash recovery, but some subroutines such as MarkBufferDirty will complain
- * if we don't have the lock.  In hot standby mode it's definitely necessary.)
- *
- * The returned buffer is exclusively-locked.
- *
- * For historical reasons, instead of a ReadBufferMode argument, this only
- * supports RBM_ZERO_AND_LOCK (init == true) and RBM_NORMAL (init == false)
- * modes.
- */
-Buffer
-XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init)
-{
-       Buffer          buf;
-
-       buf = XLogReadBufferExtended(rnode, MAIN_FORKNUM, blkno,
-                                                                init ? RBM_ZERO_AND_LOCK : RBM_NORMAL);
-       if (BufferIsValid(buf) && !init)
-               LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
-
-       return buf;
-}
-
 /*
  * XLogReadBufferExtended
  *             Read a page during XLOG replay
@@ -383,6 +405,11 @@ XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init)
  * In RBM_NORMAL_NO_LOG mode, we return InvalidBuffer if the page doesn't
  * exist, and we don't check for all-zeroes.  Thus, no log entry is made
  * to imply that the page should be dropped or truncated later.
+ *
+ * NB: A redo function should normally not call this directly. To get a page
+ * to modify, use XLogReplayBuffer instead. It is important that all pages
+ * modified by a WAL record are registered in the WAL records, or they will be
+ * invisible to tools that that need to know which pages are modified.
  */
 Buffer
 XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum,
@@ -473,124 +500,6 @@ XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum,
        return buffer;
 }
 
-/*
- * Restore a full-page image from a backup block attached to an XLOG record.
- *
- * lsn: LSN of the XLOG record being replayed
- * record: the complete XLOG record
- * block_index: which backup block to restore (0 .. XLR_MAX_BKP_BLOCKS - 1)
- * get_cleanup_lock: TRUE to get a cleanup rather than plain exclusive lock
- * keep_buffer: TRUE to return the buffer still locked and pinned
- *
- * Returns the buffer number containing the page.  Note this is not terribly
- * useful unless keep_buffer is specified as TRUE.
- *
- * Note: when a backup block is available in XLOG, we restore it
- * unconditionally, even if the page in the database appears newer.
- * This is to protect ourselves against database pages that were partially
- * or incorrectly written during a crash.  We assume that the XLOG data
- * must be good because it has passed a CRC check, while the database
- * page might not be.  This will force us to replay all subsequent
- * modifications of the page that appear in XLOG, rather than possibly
- * ignoring them as already applied, but that's not a huge drawback.
- *
- * If 'get_cleanup_lock' is true, a cleanup lock is obtained on the buffer,
- * else a normal exclusive lock is used.  During crash recovery, that's just
- * pro forma because there can't be any regular backends in the system, but
- * in hot standby mode the distinction is important.
- *
- * If 'keep_buffer' is true, return without releasing the buffer lock and pin;
- * then caller is responsible for doing UnlockReleaseBuffer() later.  This
- * is needed in some cases when replaying XLOG records that touch multiple
- * pages, to prevent inconsistent states from being visible to other backends.
- * (Again, that's only important in hot standby mode.)
- */
-Buffer
-RestoreBackupBlock(XLogRecPtr lsn, XLogRecord *record, int block_index,
-                                  bool get_cleanup_lock, bool keep_buffer)
-{
-       BkpBlock        bkpb;
-       char       *blk;
-       int                     i;
-
-       /* Locate requested BkpBlock in the record */
-       blk = (char *) XLogRecGetData(record) + record->xl_len;
-       for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++)
-       {
-               if (!(record->xl_info & XLR_BKP_BLOCK(i)))
-                       continue;
-
-               memcpy(&bkpb, blk, sizeof(BkpBlock));
-               blk += sizeof(BkpBlock);
-
-               if (i == block_index)
-               {
-                       /* Found it, apply the update */
-                       return RestoreBackupBlockContents(lsn, bkpb, blk, get_cleanup_lock,
-                                                                                         keep_buffer);
-               }
-
-               blk += BLCKSZ - bkpb.hole_length;
-       }
-
-       /* Caller specified a bogus block_index */
-       elog(ERROR, "failed to restore block_index %d", block_index);
-       return InvalidBuffer;           /* keep compiler quiet */
-}
-
-/*
- * Workhorse for RestoreBackupBlock usable without an xlog record
- *
- * Restores a full-page image from BkpBlock and a data pointer.
- */
-Buffer
-RestoreBackupBlockContents(XLogRecPtr lsn, BkpBlock bkpb, char *blk,
-                                                  bool get_cleanup_lock, bool keep_buffer)
-{
-       Buffer          buffer;
-       Page            page;
-
-       buffer = XLogReadBufferExtended(bkpb.node, bkpb.fork, bkpb.block,
-                                                                       get_cleanup_lock ? RBM_ZERO_AND_CLEANUP_LOCK : RBM_ZERO_AND_LOCK);
-       Assert(BufferIsValid(buffer));
-
-       page = (Page) BufferGetPage(buffer);
-
-       if (bkpb.hole_length == 0)
-       {
-               memcpy((char *) page, blk, BLCKSZ);
-       }
-       else
-       {
-               memcpy((char *) page, blk, bkpb.hole_offset);
-               /* must zero-fill the hole */
-               MemSet((char *) page + bkpb.hole_offset, 0, bkpb.hole_length);
-               memcpy((char *) page + (bkpb.hole_offset + bkpb.hole_length),
-                          blk + bkpb.hole_offset,
-                          BLCKSZ - (bkpb.hole_offset + bkpb.hole_length));
-       }
-
-       /*
-        * The checksum value on this page is currently invalid. We don't need to
-        * reset it here since it will be set before being written.
-        */
-
-       /*
-        * The page may be uninitialized. If so, we can't set the LSN because that
-        * would corrupt the page.
-        */
-       if (!PageIsNew(page))
-       {
-               PageSetLSN(page, lsn);
-       }
-       MarkBufferDirty(buffer);
-
-       if (!keep_buffer)
-               UnlockReleaseBuffer(buffer);
-
-       return buffer;
-}
-
 /*
  * Struct actually returned by XLogFakeRelcacheEntry, though the declared
  * return type is Relation.
index 46780e71d69c50378ab217547a89d3ac63a14f52..3f5e1700f06215b0350ba0a9731e55e11c10f856 100644 (file)
@@ -125,7 +125,6 @@ void
 log_smgrcreate(RelFileNode *rnode, ForkNumber forkNum)
 {
        xl_smgr_create xlrec;
-       XLogRecData rdata;
 
        /*
         * Make an XLOG entry reporting the file creation.
@@ -133,12 +132,9 @@ log_smgrcreate(RelFileNode *rnode, ForkNumber forkNum)
        xlrec.rnode = *rnode;
        xlrec.forkNum = forkNum;
 
-       rdata.data = (char *) &xlrec;
-       rdata.len = sizeof(xlrec);
-       rdata.buffer = InvalidBuffer;
-       rdata.next = NULL;
-
-       XLogInsert(RM_SMGR_ID, XLOG_SMGR_CREATE, &rdata);
+       XLogBeginInsert();
+       XLogRegisterData((char *) &xlrec, sizeof(xlrec));
+       XLogInsert(RM_SMGR_ID, XLOG_SMGR_CREATE | XLR_SPECIAL_REL_UPDATE);
 }
 
 /*
@@ -268,18 +264,16 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
                 * Make an XLOG entry reporting the file truncation.
                 */
                XLogRecPtr      lsn;
-               XLogRecData rdata;
                xl_smgr_truncate xlrec;
 
                xlrec.blkno = nblocks;
                xlrec.rnode = rel->rd_node;
 
-               rdata.data = (char *) &xlrec;
-               rdata.len = sizeof(xlrec);
-               rdata.buffer = InvalidBuffer;
-               rdata.next = NULL;
+               XLogBeginInsert();
+               XLogRegisterData((char *) &xlrec, sizeof(xlrec));
 
-               lsn = XLogInsert(RM_SMGR_ID, XLOG_SMGR_TRUNCATE, &rdata);
+               lsn = XLogInsert(RM_SMGR_ID,
+                                                XLOG_SMGR_TRUNCATE | XLR_SPECIAL_REL_UPDATE);
 
                /*
                 * Flush, because otherwise the truncation of the main relation might
@@ -479,12 +473,13 @@ AtSubAbort_smgr(void)
 }
 
 void
-smgr_redo(XLogRecPtr lsn, XLogRecord *record)
+smgr_redo(XLogReaderState *record)
 {
-       uint8           info = record->xl_info & ~XLR_INFO_MASK;
+       XLogRecPtr      lsn = record->EndRecPtr;
+       uint8           info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
 
        /* Backup blocks are not used in smgr records */
-       Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
+       Assert(!XLogRecHasAnyBlockRefs(record));
 
        if (info == XLOG_SMGR_CREATE)
        {
@@ -505,8 +500,8 @@ smgr_redo(XLogRecPtr lsn, XLogRecord *record)
                /*
                 * Forcibly create relation if it doesn't exist (which suggests that
                 * it was dropped somewhere later in the WAL sequence).  As in
-                * XLogReadBuffer, we prefer to recreate the rel and replay the log as
-                * best we can until the drop is seen.
+                * XLogReadBufferForRedo, we prefer to recreate the rel and replay the
+                * log as best we can until the drop is seen.
                 */
                smgrcreate(reln, MAIN_FORKNUM, true);
 
index 94c82d37410c59fb1aa6b9e791760758416582b6..1a5244cade20b9331c2785007c3f6a407f122964 100644 (file)
@@ -619,19 +619,17 @@ createdb(const CreatedbStmt *stmt)
                        /* Record the filesystem change in XLOG */
                        {
                                xl_dbase_create_rec xlrec;
-                               XLogRecData rdata[1];
 
                                xlrec.db_id = dboid;
                                xlrec.tablespace_id = dsttablespace;
                                xlrec.src_db_id = src_dboid;
                                xlrec.src_tablespace_id = srctablespace;
 
-                               rdata[0].data = (char *) &xlrec;
-                               rdata[0].len = sizeof(xl_dbase_create_rec);
-                               rdata[0].buffer = InvalidBuffer;
-                               rdata[0].next = NULL;
+                               XLogBeginInsert();
+                               XLogRegisterData((char *) &xlrec, sizeof(xl_dbase_create_rec));
 
-                               (void) XLogInsert(RM_DBASE_ID, XLOG_DBASE_CREATE, rdata);
+                               (void) XLogInsert(RM_DBASE_ID,
+                                                                 XLOG_DBASE_CREATE | XLR_SPECIAL_REL_UPDATE);
                        }
                }
                heap_endscan(scan);
@@ -1226,19 +1224,17 @@ movedb(const char *dbname, const char *tblspcname)
                 */
                {
                        xl_dbase_create_rec xlrec;
-                       XLogRecData rdata[1];
 
                        xlrec.db_id = db_id;
                        xlrec.tablespace_id = dst_tblspcoid;
                        xlrec.src_db_id = db_id;
                        xlrec.src_tablespace_id = src_tblspcoid;
 
-                       rdata[0].data = (char *) &xlrec;
-                       rdata[0].len = sizeof(xl_dbase_create_rec);
-                       rdata[0].buffer = InvalidBuffer;
-                       rdata[0].next = NULL;
+                       XLogBeginInsert();
+                       XLogRegisterData((char *) &xlrec, sizeof(xl_dbase_create_rec));
 
-                       (void) XLogInsert(RM_DBASE_ID, XLOG_DBASE_CREATE, rdata);
+                       (void) XLogInsert(RM_DBASE_ID,
+                                                         XLOG_DBASE_CREATE | XLR_SPECIAL_REL_UPDATE);
                }
 
                /*
@@ -1330,17 +1326,15 @@ movedb(const char *dbname, const char *tblspcname)
         */
        {
                xl_dbase_drop_rec xlrec;
-               XLogRecData rdata[1];
 
                xlrec.db_id = db_id;
                xlrec.tablespace_id = src_tblspcoid;
 
-               rdata[0].data = (char *) &xlrec;
-               rdata[0].len = sizeof(xl_dbase_drop_rec);
-               rdata[0].buffer = InvalidBuffer;
-               rdata[0].next = NULL;
+               XLogBeginInsert();
+               XLogRegisterData((char *) &xlrec, sizeof(xl_dbase_drop_rec));
 
-               (void) XLogInsert(RM_DBASE_ID, XLOG_DBASE_DROP, rdata);
+               (void) XLogInsert(RM_DBASE_ID,
+                                                 XLOG_DBASE_DROP | XLR_SPECIAL_REL_UPDATE);
        }
 
        /* Now it's safe to release the database lock */
@@ -1870,17 +1864,15 @@ remove_dbtablespaces(Oid db_id)
                /* Record the filesystem change in XLOG */
                {
                        xl_dbase_drop_rec xlrec;
-                       XLogRecData rdata[1];
 
                        xlrec.db_id = db_id;
                        xlrec.tablespace_id = dsttablespace;
 
-                       rdata[0].data = (char *) &xlrec;
-                       rdata[0].len = sizeof(xl_dbase_drop_rec);
-                       rdata[0].buffer = InvalidBuffer;
-                       rdata[0].next = NULL;
+                       XLogBeginInsert();
+                       XLogRegisterData((char *) &xlrec, sizeof(xl_dbase_drop_rec));
 
-                       (void) XLogInsert(RM_DBASE_ID, XLOG_DBASE_DROP, rdata);
+                       (void) XLogInsert(RM_DBASE_ID,
+                                                         XLOG_DBASE_DROP | XLR_SPECIAL_REL_UPDATE);
                }
 
                pfree(dstpath);
@@ -2043,12 +2035,12 @@ get_database_name(Oid dbid)
  * DATABASE resource manager's routines
  */
 void
-dbase_redo(XLogRecPtr lsn, XLogRecord *record)
+dbase_redo(XLogReaderState *record)
 {
-       uint8           info = record->xl_info & ~XLR_INFO_MASK;
+       uint8           info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
 
        /* Backup blocks are not used in dbase records */
-       Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
+       Assert(!XLogRecHasAnyBlockRefs(record));
 
        if (info == XLOG_DBASE_CREATE)
        {
index cb8b27a93c0ee439d66eafd44e8243fa72156654..ba5b938863cb492632f9c45acb3c29b8d363165a 100644 (file)
@@ -372,20 +372,16 @@ fill_seq_with_data(Relation rel, HeapTuple tuple)
        {
                xl_seq_rec      xlrec;
                XLogRecPtr      recptr;
-               XLogRecData rdata[2];
+
+               XLogBeginInsert();
+               XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT);
 
                xlrec.node = rel->rd_node;
-               rdata[0].data = (char *) &xlrec;
-               rdata[0].len = sizeof(xl_seq_rec);
-               rdata[0].buffer = InvalidBuffer;
-               rdata[0].next = &(rdata[1]);
 
-               rdata[1].data = (char *) tuple->t_data;
-               rdata[1].len = tuple->t_len;
-               rdata[1].buffer = InvalidBuffer;
-               rdata[1].next = NULL;
+               XLogRegisterData((char *) &xlrec, sizeof(xl_seq_rec));
+               XLogRegisterData((char *) tuple->t_data, tuple->t_len);
 
-               recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG, rdata);
+               recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG);
 
                PageSetLSN(page, recptr);
        }
@@ -454,21 +450,17 @@ AlterSequence(AlterSeqStmt *stmt)
        {
                xl_seq_rec      xlrec;
                XLogRecPtr      recptr;
-               XLogRecData rdata[2];
                Page            page = BufferGetPage(buf);
 
+               XLogBeginInsert();
+               XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT);
+
                xlrec.node = seqrel->rd_node;
-               rdata[0].data = (char *) &xlrec;
-               rdata[0].len = sizeof(xl_seq_rec);
-               rdata[0].buffer = InvalidBuffer;
-               rdata[0].next = &(rdata[1]);
+               XLogRegisterData((char *) &xlrec, sizeof(xl_seq_rec));
 
-               rdata[1].data = (char *) seqtuple.t_data;
-               rdata[1].len = seqtuple.t_len;
-               rdata[1].buffer = InvalidBuffer;
-               rdata[1].next = NULL;
+               XLogRegisterData((char *) seqtuple.t_data, seqtuple.t_len);
 
-               recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG, rdata);
+               recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG);
 
                PageSetLSN(page, recptr);
        }
@@ -706,7 +698,6 @@ nextval_internal(Oid relid)
        {
                xl_seq_rec      xlrec;
                XLogRecPtr      recptr;
-               XLogRecData rdata[2];
 
                /*
                 * We don't log the current state of the tuple, but rather the state
@@ -714,6 +705,8 @@ nextval_internal(Oid relid)
                 * that many future WAL records, at the cost that we lose those
                 * sequence values if we crash.
                 */
+               XLogBeginInsert();
+               XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT);
 
                /* set values that will be saved in xlog */
                seq->last_value = next;
@@ -721,17 +714,11 @@ nextval_internal(Oid relid)
                seq->log_cnt = 0;
 
                xlrec.node = seqrel->rd_node;
-               rdata[0].data = (char *) &xlrec;
-               rdata[0].len = sizeof(xl_seq_rec);
-               rdata[0].buffer = InvalidBuffer;
-               rdata[0].next = &(rdata[1]);
 
-               rdata[1].data = (char *) seqtuple.t_data;
-               rdata[1].len = seqtuple.t_len;
-               rdata[1].buffer = InvalidBuffer;
-               rdata[1].next = NULL;
+               XLogRegisterData((char *) &xlrec, sizeof(xl_seq_rec));
+               XLogRegisterData((char *) seqtuple.t_data, seqtuple.t_len);
 
-               recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG, rdata);
+               recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG);
 
                PageSetLSN(page, recptr);
        }
@@ -894,21 +881,16 @@ do_setval(Oid relid, int64 next, bool iscalled)
        {
                xl_seq_rec      xlrec;
                XLogRecPtr      recptr;
-               XLogRecData rdata[2];
                Page            page = BufferGetPage(buf);
 
-               xlrec.node = seqrel->rd_node;
-               rdata[0].data = (char *) &xlrec;
-               rdata[0].len = sizeof(xl_seq_rec);
-               rdata[0].buffer = InvalidBuffer;
-               rdata[0].next = &(rdata[1]);
+               XLogBeginInsert();
+               XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT);
 
-               rdata[1].data = (char *) seqtuple.t_data;
-               rdata[1].len = seqtuple.t_len;
-               rdata[1].buffer = InvalidBuffer;
-               rdata[1].next = NULL;
+               xlrec.node = seqrel->rd_node;
+               XLogRegisterData((char *) &xlrec, sizeof(xl_seq_rec));
+               XLogRegisterData((char *) seqtuple.t_data, seqtuple.t_len);
 
-               recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG, rdata);
+               recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG);
 
                PageSetLSN(page, recptr);
        }
@@ -1552,9 +1534,10 @@ pg_sequence_parameters(PG_FUNCTION_ARGS)
 
 
 void
-seq_redo(XLogRecPtr lsn, XLogRecord *record)
+seq_redo(XLogReaderState *record)
 {
-       uint8           info = record->xl_info & ~XLR_INFO_MASK;
+       XLogRecPtr      lsn = record->EndRecPtr;
+       uint8           info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
        Buffer          buffer;
        Page            page;
        Page            localpage;
@@ -1563,14 +1546,10 @@ seq_redo(XLogRecPtr lsn, XLogRecord *record)
        xl_seq_rec *xlrec = (xl_seq_rec *) XLogRecGetData(record);
        sequence_magic *sm;
 
-       /* Backup blocks are not used in seq records */
-       Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
-
        if (info != XLOG_SEQ_LOG)
                elog(PANIC, "seq_redo: unknown op code %u", info);
 
-       buffer = XLogReadBuffer(xlrec->node, 0, true);
-       Assert(BufferIsValid(buffer));
+       buffer = XLogInitBufferForRedo(record, 0);
        page = (Page) BufferGetPage(buffer);
 
        /*
@@ -1589,7 +1568,7 @@ seq_redo(XLogRecPtr lsn, XLogRecord *record)
        sm->magic = SEQ_MAGIC;
 
        item = (char *) xlrec + sizeof(xl_seq_rec);
-       itemsz = record->xl_len - sizeof(xl_seq_rec);
+       itemsz = XLogRecGetDataLen(record) - sizeof(xl_seq_rec);
 
        if (PageAddItem(localpage, (Item) item, itemsz,
                                        FirstOffsetNumber, false, false) == InvalidOffsetNumber)
index 616308bc2d37c826c89833d21d22c8bc07b95bc1..3c9af5776a0a0ed31de48b57bbb052b4152cfabc 100644 (file)
@@ -354,20 +354,15 @@ CreateTableSpace(CreateTableSpaceStmt *stmt)
        /* Record the filesystem change in XLOG */
        {
                xl_tblspc_create_rec xlrec;
-               XLogRecData rdata[2];
 
                xlrec.ts_id = tablespaceoid;
-               rdata[0].data = (char *) &xlrec;
-               rdata[0].len = offsetof(xl_tblspc_create_rec, ts_path);
-               rdata[0].buffer = InvalidBuffer;
-               rdata[0].next = &(rdata[1]);
 
-               rdata[1].data = (char *) location;
-               rdata[1].len = strlen(location) + 1;
-               rdata[1].buffer = InvalidBuffer;
-               rdata[1].next = NULL;
+               XLogBeginInsert();
+               XLogRegisterData((char *) &xlrec,
+                                                offsetof(xl_tblspc_create_rec, ts_path));
+               XLogRegisterData((char *) location, strlen(location) + 1);
 
-               (void) XLogInsert(RM_TBLSPC_ID, XLOG_TBLSPC_CREATE, rdata);
+               (void) XLogInsert(RM_TBLSPC_ID, XLOG_TBLSPC_CREATE);
        }
 
        /*
@@ -515,15 +510,13 @@ DropTableSpace(DropTableSpaceStmt *stmt)
        /* Record the filesystem change in XLOG */
        {
                xl_tblspc_drop_rec xlrec;
-               XLogRecData rdata[1];
 
                xlrec.ts_id = tablespaceoid;
-               rdata[0].data = (char *) &xlrec;
-               rdata[0].len = sizeof(xl_tblspc_drop_rec);
-               rdata[0].buffer = InvalidBuffer;
-               rdata[0].next = NULL;
 
-               (void) XLogInsert(RM_TBLSPC_ID, XLOG_TBLSPC_DROP, rdata);
+               XLogBeginInsert();
+               XLogRegisterData((char *) &xlrec, sizeof(xl_tblspc_drop_rec));
+
+               (void) XLogInsert(RM_TBLSPC_ID, XLOG_TBLSPC_DROP);
        }
 
        /*
@@ -1408,12 +1401,12 @@ get_tablespace_name(Oid spc_oid)
  * TABLESPACE resource manager's routines
  */
 void
-tblspc_redo(XLogRecPtr lsn, XLogRecord *record)
+tblspc_redo(XLogReaderState *record)
 {
-       uint8           info = record->xl_info & ~XLR_INFO_MASK;
+       uint8           info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
 
        /* Backup blocks are not used in tblspc records */
-       Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
+       Assert(!XLogRecHasAnyBlockRefs(record));
 
        if (info == XLOG_TBLSPC_CREATE)
        {
index 8e78aafda7cbf01914df86fd03a5657eda93fea3..1c7dac38fc99eb43e7d6cf974f75c466a6019432 100644 (file)
@@ -31,7 +31,9 @@
 #include "access/transam.h"
 #include "access/xact.h"
 #include "access/xlog_internal.h"
+#include "access/xlogutils.h"
 #include "access/xlogreader.h"
+#include "access/xlogrecord.h"
 
 #include "catalog/pg_control.h"
 
@@ -46,8 +48,7 @@ typedef struct XLogRecordBuffer
 {
        XLogRecPtr      origptr;
        XLogRecPtr      endptr;
-       XLogRecord      record;
-       char       *record_data;
+       XLogReaderState *record;
 } XLogRecordBuffer;
 
 /* RMGR Handlers */
@@ -79,17 +80,16 @@ static void DecodeXLogTuple(char *data, Size len, ReorderBufferTupleBuf *tup);
  * context.
  */
 void
-LogicalDecodingProcessRecord(LogicalDecodingContext *ctx, XLogRecord *record)
+LogicalDecodingProcessRecord(LogicalDecodingContext *ctx, XLogReaderState *record)
 {
        XLogRecordBuffer buf;
 
        buf.origptr = ctx->reader->ReadRecPtr;
        buf.endptr = ctx->reader->EndRecPtr;
-       buf.record = *record;
-       buf.record_data = XLogRecGetData(record);
+       buf.record = record;
 
        /* cast so we get a warning when new rmgrs are added */
-       switch ((RmgrIds) buf.record.xl_rmid)
+       switch ((RmgrIds) XLogRecGetRmid(record))
        {
                        /*
                         * Rmgrs we care about for logical decoding. Add new rmgrs in
@@ -135,7 +135,7 @@ LogicalDecodingProcessRecord(LogicalDecodingContext *ctx, XLogRecord *record)
                case RM_BRIN_ID:
                        break;
                case RM_NEXT_ID:
-                       elog(ERROR, "unexpected RM_NEXT_ID rmgr_id: %u", (RmgrIds) buf.record.xl_rmid);
+                       elog(ERROR, "unexpected RM_NEXT_ID rmgr_id: %u", (RmgrIds) XLogRecGetRmid(buf.record));
        }
 }
 
@@ -146,7 +146,7 @@ static void
 DecodeXLogOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 {
        SnapBuild  *builder = ctx->snapshot_builder;
-       uint8           info = buf->record.xl_info & ~XLR_INFO_MASK;
+       uint8           info = XLogRecGetInfo(buf->record) & ~XLR_INFO_MASK;
 
        switch (info)
        {
@@ -185,8 +185,8 @@ DecodeXactOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 {
        SnapBuild  *builder = ctx->snapshot_builder;
        ReorderBuffer *reorder = ctx->reorder;
-       XLogRecord *r = &buf->record;
-       uint8           info = r->xl_info & ~XLR_INFO_MASK;
+       XLogReaderState *r = buf->record;
+       uint8           info = XLogRecGetInfo(r) & ~XLR_INFO_MASK;
 
        /* no point in doing anything yet, data could not be decoded anyway */
        if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT)
@@ -200,12 +200,12 @@ DecodeXactOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
                                TransactionId *subxacts = NULL;
                                SharedInvalidationMessage *invals = NULL;
 
-                               xlrec = (xl_xact_commit *) buf->record_data;
+                               xlrec = (xl_xact_commit *) XLogRecGetData(r);
 
                                subxacts = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]);
                                invals = (SharedInvalidationMessage *) &(subxacts[xlrec->nsubxacts]);
 
-                               DecodeCommit(ctx, buf, r->xl_xid, xlrec->dbId,
+                               DecodeCommit(ctx, buf, XLogRecGetXid(r), xlrec->dbId,
                                                         xlrec->xact_time,
                                                         xlrec->nsubxacts, subxacts,
                                                         xlrec->nmsgs, invals);
@@ -220,7 +220,7 @@ DecodeXactOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
                                SharedInvalidationMessage *invals = NULL;
 
                                /* Prepared commits contain a normal commit record... */
-                               prec = (xl_xact_commit_prepared *) buf->record_data;
+                               prec = (xl_xact_commit_prepared *) XLogRecGetData(r);
                                xlrec = &prec->crec;
 
                                subxacts = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]);
@@ -237,9 +237,9 @@ DecodeXactOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
                        {
                                xl_xact_commit_compact *xlrec;
 
-                               xlrec = (xl_xact_commit_compact *) buf->record_data;
+                               xlrec = (xl_xact_commit_compact *) XLogRecGetData(r);
 
-                               DecodeCommit(ctx, buf, r->xl_xid, InvalidOid,
+                               DecodeCommit(ctx, buf, XLogRecGetXid(r), InvalidOid,
                                                         xlrec->xact_time,
                                                         xlrec->nsubxacts, xlrec->subxacts,
                                                         0, NULL);
@@ -250,11 +250,11 @@ DecodeXactOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
                                xl_xact_abort *xlrec;
                                TransactionId *sub_xids;
 
-                               xlrec = (xl_xact_abort *) buf->record_data;
+                               xlrec = (xl_xact_abort *) XLogRecGetData(r);
 
                                sub_xids = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]);
 
-                               DecodeAbort(ctx, buf->origptr, r->xl_xid,
+                               DecodeAbort(ctx, buf->origptr, XLogRecGetXid(r),
                                                        sub_xids, xlrec->nsubxacts);
                                break;
                        }
@@ -265,7 +265,7 @@ DecodeXactOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
                                TransactionId *sub_xids;
 
                                /* prepared abort contain a normal commit abort... */
-                               prec = (xl_xact_abort_prepared *) buf->record_data;
+                               prec = (xl_xact_abort_prepared *) XLogRecGetData(r);
                                xlrec = &prec->arec;
 
                                sub_xids = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]);
@@ -282,7 +282,7 @@ DecodeXactOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
                                int                     i;
                                TransactionId *sub_xid;
 
-                               xlrec = (xl_xact_assignment *) buf->record_data;
+                               xlrec = (xl_xact_assignment *) XLogRecGetData(r);
 
                                sub_xid = &xlrec->xsub[0];
 
@@ -316,14 +316,14 @@ static void
 DecodeStandbyOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 {
        SnapBuild  *builder = ctx->snapshot_builder;
-       XLogRecord *r = &buf->record;
-       uint8           info = r->xl_info & ~XLR_INFO_MASK;
+       XLogReaderState *r = buf->record;
+       uint8           info = XLogRecGetInfo(r) & ~XLR_INFO_MASK;
 
        switch (info)
        {
                case XLOG_RUNNING_XACTS:
                        {
-                               xl_running_xacts *running = (xl_running_xacts *) buf->record_data;
+                               xl_running_xacts *running = (xl_running_xacts *) XLogRecGetData(r);
 
                                SnapBuildProcessRunningXacts(builder, buf->origptr, running);
 
@@ -352,8 +352,8 @@ DecodeStandbyOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 static void
 DecodeHeap2Op(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 {
-       uint8           info = buf->record.xl_info & XLOG_HEAP_OPMASK;
-       TransactionId xid = buf->record.xl_xid;
+       uint8           info = XLogRecGetInfo(buf->record) & XLOG_HEAP_OPMASK;
+       TransactionId xid = XLogRecGetXid(buf->record);
        SnapBuild  *builder = ctx->snapshot_builder;
 
        /* no point in doing anything yet */
@@ -370,7 +370,7 @@ DecodeHeap2Op(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
                        {
                                xl_heap_new_cid *xlrec;
 
-                               xlrec = (xl_heap_new_cid *) buf->record_data;
+                               xlrec = (xl_heap_new_cid *) XLogRecGetData(buf->record);
                                SnapBuildProcessNewCid(builder, xid, buf->origptr, xlrec);
 
                                break;
@@ -405,8 +405,8 @@ DecodeHeap2Op(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 static void
 DecodeHeapOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 {
-       uint8           info = buf->record.xl_info & XLOG_HEAP_OPMASK;
-       TransactionId xid = buf->record.xl_xid;
+       uint8           info = XLogRecGetInfo(buf->record) & XLOG_HEAP_OPMASK;
+       TransactionId xid = XLogRecGetXid(buf->record);
        SnapBuild  *builder = ctx->snapshot_builder;
 
        /* no point in doing anything yet */
@@ -576,34 +576,35 @@ DecodeAbort(LogicalDecodingContext *ctx, XLogRecPtr lsn, TransactionId xid,
 static void
 DecodeInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 {
-       XLogRecord *r = &buf->record;
+       XLogReaderState *r = buf->record;
        xl_heap_insert *xlrec;
        ReorderBufferChange *change;
+       RelFileNode target_node;
 
-       xlrec = (xl_heap_insert *) buf->record_data;
+       xlrec = (xl_heap_insert *) XLogRecGetData(r);
 
        /* only interested in our database */
-       if (xlrec->target.node.dbNode != ctx->slot->data.database)
+       XLogRecGetBlockTag(r, 0, &target_node, NULL, NULL);
+       if (target_node.dbNode != ctx->slot->data.database)
                return;
 
        change = ReorderBufferGetChange(ctx->reorder);
        change->action = REORDER_BUFFER_CHANGE_INSERT;
-       memcpy(&change->data.tp.relnode, &xlrec->target.node, sizeof(RelFileNode));
+       memcpy(&change->data.tp.relnode, &target_node, sizeof(RelFileNode));
 
        if (xlrec->flags & XLOG_HEAP_CONTAINS_NEW_TUPLE)
        {
-               Assert(r->xl_len > (SizeOfHeapInsert + SizeOfHeapHeader));
+               Size            tuplelen;
+               char       *tupledata = XLogRecGetBlockData(r, 0, &tuplelen);
 
                change->data.tp.newtuple = ReorderBufferGetTupleBuf(ctx->reorder);
 
-               DecodeXLogTuple((char *) xlrec + SizeOfHeapInsert,
-                                               r->xl_len - SizeOfHeapInsert,
-                                               change->data.tp.newtuple);
+               DecodeXLogTuple(tupledata, tuplelen, change->data.tp.newtuple);
        }
 
        change->data.tp.clear_toast_afterwards = true;
 
-       ReorderBufferQueueChange(ctx->reorder, r->xl_xid, buf->origptr, change);
+       ReorderBufferQueueChange(ctx->reorder, XLogRecGetXid(r), buf->origptr, change);
 }
 
 /*
@@ -615,62 +616,47 @@ DecodeInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 static void
 DecodeUpdate(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 {
-       XLogRecord *r = &buf->record;
+       XLogReaderState *r = buf->record;
        xl_heap_update *xlrec;
-       xl_heap_header_len xlhdr;
        ReorderBufferChange *change;
        char       *data;
+       Size            datalen;
+       RelFileNode target_node;
 
-       xlrec = (xl_heap_update *) buf->record_data;
+       xlrec = (xl_heap_update *) XLogRecGetData(r);
 
        /* only interested in our database */
-       if (xlrec->target.node.dbNode != ctx->slot->data.database)
+       XLogRecGetBlockTag(r, 0, &target_node, NULL, NULL);
+       if (target_node.dbNode != ctx->slot->data.database)
                return;
 
        change = ReorderBufferGetChange(ctx->reorder);
        change->action = REORDER_BUFFER_CHANGE_UPDATE;
-       memcpy(&change->data.tp.relnode, &xlrec->target.node, sizeof(RelFileNode));
-
-       /* caution, remaining data in record is not aligned */
-       data = buf->record_data + SizeOfHeapUpdate;
+       memcpy(&change->data.tp.relnode, &target_node, sizeof(RelFileNode));
 
        if (xlrec->flags & XLOG_HEAP_CONTAINS_NEW_TUPLE)
        {
-               Assert(r->xl_len > (SizeOfHeapUpdate + SizeOfHeapHeaderLen));
-
-               memcpy(&xlhdr, data, sizeof(xlhdr));
-               data += offsetof(xl_heap_header_len, header);
+               data = XLogRecGetBlockData(r, 0, &datalen);
 
                change->data.tp.newtuple = ReorderBufferGetTupleBuf(ctx->reorder);
 
-               DecodeXLogTuple(data,
-                                               xlhdr.t_len + SizeOfHeapHeader,
-                                               change->data.tp.newtuple);
-               /* skip over the rest of the tuple header */
-               data += SizeOfHeapHeader;
-               /* skip over the tuple data */
-               data += xlhdr.t_len;
+               DecodeXLogTuple(data, datalen, change->data.tp.newtuple);
        }
 
        if (xlrec->flags & XLOG_HEAP_CONTAINS_OLD)
        {
-               memcpy(&xlhdr, data, sizeof(xlhdr));
-               data += offsetof(xl_heap_header_len, header);
+               /* caution, remaining data in record is not aligned */
+               data = XLogRecGetData(r) + SizeOfHeapUpdate;
+               datalen = XLogRecGetDataLen(r) - SizeOfHeapUpdate;
 
                change->data.tp.oldtuple = ReorderBufferGetTupleBuf(ctx->reorder);
 
-               DecodeXLogTuple(data,
-                                               xlhdr.t_len + SizeOfHeapHeader,
-                                               change->data.tp.oldtuple);
-#ifdef NOT_USED
-               data += SizeOfHeapHeader;
-               data += xlhdr.t_len;
-#endif
+               DecodeXLogTuple(data, datalen, change->data.tp.oldtuple);
        }
 
        change->data.tp.clear_toast_afterwards = true;
 
-       ReorderBufferQueueChange(ctx->reorder, r->xl_xid, buf->origptr, change);
+       ReorderBufferQueueChange(ctx->reorder, XLogRecGetXid(r), buf->origptr, change);
 }
 
 /*
@@ -681,36 +667,38 @@ DecodeUpdate(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 static void
 DecodeDelete(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 {
-       XLogRecord *r = &buf->record;
+       XLogReaderState *r = buf->record;
        xl_heap_delete *xlrec;
        ReorderBufferChange *change;
+       RelFileNode target_node;
 
-       xlrec = (xl_heap_delete *) buf->record_data;
+       xlrec = (xl_heap_delete *) XLogRecGetData(r);
 
        /* only interested in our database */
-       if (xlrec->target.node.dbNode != ctx->slot->data.database)
+       XLogRecGetBlockTag(r, 0, &target_node, NULL, NULL);
+       if (target_node.dbNode != ctx->slot->data.database)
                return;
 
        change = ReorderBufferGetChange(ctx->reorder);
        change->action = REORDER_BUFFER_CHANGE_DELETE;
 
-       memcpy(&change->data.tp.relnode, &xlrec->target.node, sizeof(RelFileNode));
+       memcpy(&change->data.tp.relnode, &target_node, sizeof(RelFileNode));
 
        /* old primary key stored */
        if (xlrec->flags & XLOG_HEAP_CONTAINS_OLD)
        {
-               Assert(r->xl_len > (SizeOfHeapDelete + SizeOfHeapHeader));
+               Assert(XLogRecGetDataLen(r) > (SizeOfHeapDelete + SizeOfHeapHeader));
 
                change->data.tp.oldtuple = ReorderBufferGetTupleBuf(ctx->reorder);
 
                DecodeXLogTuple((char *) xlrec + SizeOfHeapDelete,
-                                               r->xl_len - SizeOfHeapDelete,
+                                               XLogRecGetDataLen(r) - SizeOfHeapDelete,
                                                change->data.tp.oldtuple);
        }
 
        change->data.tp.clear_toast_afterwards = true;
 
-       ReorderBufferQueueChange(ctx->reorder, r->xl_xid, buf->origptr, change);
+       ReorderBufferQueueChange(ctx->reorder, XLogRecGetXid(r), buf->origptr, change);
 }
 
 /*
@@ -721,27 +709,24 @@ DecodeDelete(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 static void
 DecodeMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 {
-       XLogRecord *r = &buf->record;
+       XLogReaderState *r = buf->record;
        xl_heap_multi_insert *xlrec;
        int                     i;
        char       *data;
-       bool            isinit = (r->xl_info & XLOG_HEAP_INIT_PAGE) != 0;
+       char       *tupledata;
+       Size            tuplelen;
+       RelFileNode rnode;
 
-       xlrec = (xl_heap_multi_insert *) buf->record_data;
+       xlrec = (xl_heap_multi_insert *) XLogRecGetData(r);
 
        /* only interested in our database */
-       if (xlrec->node.dbNode != ctx->slot->data.database)
+       XLogRecGetBlockTag(r, 0, &rnode, NULL, NULL);
+       if (rnode.dbNode != ctx->slot->data.database)
                return;
 
-       data = buf->record_data + SizeOfHeapMultiInsert;
-
-       /*
-        * OffsetNumbers (which are not of interest to us) are stored when
-        * XLOG_HEAP_INIT_PAGE is not set -- skip over them.
-        */
-       if (!isinit)
-               data += sizeof(OffsetNumber) * xlrec->ntuples;
+       tupledata = XLogRecGetBlockData(r, 0, &tuplelen);
 
+       data = tupledata;
        for (i = 0; i < xlrec->ntuples; i++)
        {
                ReorderBufferChange *change;
@@ -751,7 +736,7 @@ DecodeMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 
                change = ReorderBufferGetChange(ctx->reorder);
                change->action = REORDER_BUFFER_CHANGE_INSERT;
-               memcpy(&change->data.tp.relnode, &xlrec->node, sizeof(RelFileNode));
+               memcpy(&change->data.tp.relnode, &rnode, sizeof(RelFileNode));
 
                /*
                 * CONTAINS_NEW_TUPLE will always be set currently as multi_insert
@@ -806,9 +791,10 @@ DecodeMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
                else
                        change->data.tp.clear_toast_afterwards = false;
 
-               ReorderBufferQueueChange(ctx->reorder, r->xl_xid,
+               ReorderBufferQueueChange(ctx->reorder, XLogRecGetXid(r),
                                                                 buf->origptr, change);
        }
+       Assert(data == tupledata + tuplelen);
 }
 
 /*
index 875b89a62886a4f3445170b5baf3814c3c383928..8c318cd4b519c789b5ac456e936d0d3146eb9965 100644 (file)
@@ -34,6 +34,7 @@
 #include "miscadmin.h"
 
 #include "access/xact.h"
+#include "access/xlog_internal.h"
 
 #include "replication/decode.h"
 #include "replication/logical.h"
@@ -455,12 +456,12 @@ DecodingContextFindStartpoint(LogicalDecodingContext *ctx)
                record = XLogReadRecord(ctx->reader, startptr, &err);
                if (err)
                        elog(ERROR, "%s", err);
-
-               Assert(record);
+               if (!record)
+                       elog(ERROR, "no record found");         /* shouldn't happen */
 
                startptr = InvalidXLogRecPtr;
 
-               LogicalDecodingProcessRecord(ctx, record);
+               LogicalDecodingProcessRecord(ctx, ctx->reader);
 
                /* only continue till we found a consistent spot */
                if (DecodingContextReady(ctx))
index 3a5ec2f61d931ed1c78cc4f3b7e207867aac01e4..1977f098c798b3cfa310f19ba24eec4ea6022762 100644 (file)
@@ -21,6 +21,8 @@
 #include "funcapi.h"
 #include "miscadmin.h"
 
+#include "access/xlog_internal.h"
+
 #include "catalog/pg_type.h"
 
 #include "nodes/makefuncs.h"
@@ -431,7 +433,7 @@ pg_logical_slot_get_changes_guts(FunctionCallInfo fcinfo, bool confirm, bool bin
                         * store the description into our tuplestore.
                         */
                        if (record != NULL)
-                               LogicalDecodingProcessRecord(ctx, record);
+                               LogicalDecodingProcessRecord(ctx, ctx->reader);
 
                        /* check limits */
                        if (upto_lsn != InvalidXLogRecPtr &&
index 7d8f40738d4f0190b40382ec7be86ae75321280d..6e75398eabe5441164328336df571a95a18131eb 100644 (file)
@@ -54,6 +54,7 @@
 #include "access/transam.h"
 #include "access/tuptoaster.h"
 #include "access/xact.h"
+#include "access/xlog_internal.h"
 #include "catalog/catalog.h"
 #include "lib/binaryheap.h"
 #include "miscadmin.h"
index 200b54d7c2ac912c2dde3333e6755f32a0af3074..20f9b04adfa71858391435f5e3138bd7356f2220 100644 (file)
@@ -699,7 +699,7 @@ SnapBuildProcessNewCid(SnapBuild *builder, TransactionId xid,
        ReorderBufferXidSetCatalogChanges(builder->reorder, xid, lsn);
 
        ReorderBufferAddNewTupleCids(builder->reorder, xlrec->top_xid, lsn,
-                                                                xlrec->target.node, xlrec->target.tid,
+                                                                xlrec->target_node, xlrec->target_tid,
                                                                 xlrec->cmin, xlrec->cmax,
                                                                 xlrec->combocid);
 
index 385d18ba1bbba13981e82c876d07e42460318624..addae8f6ce512e6864548d86ec91a1daaae58812 100644 (file)
@@ -2444,7 +2444,7 @@ XLogSendLogical(void)
 
        if (record != NULL)
        {
-               LogicalDecodingProcessRecord(logical_decoding_ctx, record);
+               LogicalDecodingProcessRecord(logical_decoding_ctx, logical_decoding_ctx->reader);
 
                sentPtr = logical_decoding_ctx->reader->EndRecPtr;
        }
index 8c3720bc7370b5e907e0bfe471be8829e66647a7..4269dda66b62b0340778521cceafb4c2895f610b 100644 (file)
@@ -759,12 +759,12 @@ StandbyReleaseOldLocks(int nxids, TransactionId *xids)
  */
 
 void
-standby_redo(XLogRecPtr lsn, XLogRecord *record)
+standby_redo(XLogReaderState *record)
 {
-       uint8           info = record->xl_info & ~XLR_INFO_MASK;
+       uint8           info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
 
        /* Backup blocks are not used in standby records */
-       Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
+       Assert(!XLogRecHasAnyBlockRefs(record));
 
        /* Do nothing if we're not in hot standby mode */
        if (standbyState == STANDBY_DISABLED)
@@ -928,8 +928,6 @@ static XLogRecPtr
 LogCurrentRunningXacts(RunningTransactions CurrRunningXacts)
 {
        xl_running_xacts xlrec;
-       XLogRecData rdata[2];
-       int                     lastrdata = 0;
        XLogRecPtr      recptr;
 
        xlrec.xcnt = CurrRunningXacts->xcnt;
@@ -940,23 +938,15 @@ LogCurrentRunningXacts(RunningTransactions CurrRunningXacts)
        xlrec.latestCompletedXid = CurrRunningXacts->latestCompletedXid;
 
        /* Header */
-       rdata[0].data = (char *) (&xlrec);
-       rdata[0].len = MinSizeOfXactRunningXacts;
-       rdata[0].buffer = InvalidBuffer;
+       XLogBeginInsert();
+       XLogRegisterData((char *) (&xlrec), MinSizeOfXactRunningXacts);
 
        /* array of TransactionIds */
        if (xlrec.xcnt > 0)
-       {
-               rdata[0].next = &(rdata[1]);
-               rdata[1].data = (char *) CurrRunningXacts->xids;
-               rdata[1].len = (xlrec.xcnt + xlrec.subxcnt) * sizeof(TransactionId);
-               rdata[1].buffer = InvalidBuffer;
-               lastrdata = 1;
-       }
+               XLogRegisterData((char *) CurrRunningXacts->xids,
+                                          (xlrec.xcnt + xlrec.subxcnt) * sizeof(TransactionId));
 
-       rdata[lastrdata].next = NULL;
-
-       recptr = XLogInsert(RM_STANDBY_ID, XLOG_RUNNING_XACTS, rdata);
+       recptr = XLogInsert(RM_STANDBY_ID, XLOG_RUNNING_XACTS);
 
        if (CurrRunningXacts->subxid_overflow)
                elog(trace_recovery(DEBUG2),
@@ -996,22 +986,15 @@ LogCurrentRunningXacts(RunningTransactions CurrRunningXacts)
 static void
 LogAccessExclusiveLocks(int nlocks, xl_standby_lock *locks)
 {
-       XLogRecData rdata[2];
        xl_standby_locks xlrec;
 
        xlrec.nlocks = nlocks;
 
-       rdata[0].data = (char *) &xlrec;
-       rdata[0].len = offsetof(xl_standby_locks, locks);
-       rdata[0].buffer = InvalidBuffer;
-       rdata[0].next = &rdata[1];
-
-       rdata[1].data = (char *) locks;
-       rdata[1].len = nlocks * sizeof(xl_standby_lock);
-       rdata[1].buffer = InvalidBuffer;
-       rdata[1].next = NULL;
+       XLogBeginInsert();
+       XLogRegisterData((char *) &xlrec, offsetof(xl_standby_locks, locks));
+       XLogRegisterData((char *) locks, nlocks * sizeof(xl_standby_lock));
 
-       (void) XLogInsert(RM_STANDBY_ID, XLOG_STANDBY_LOCK, rdata);
+       (void) XLogInsert(RM_STANDBY_ID, XLOG_STANDBY_LOCK);
 }
 
 /*
index d1f64e58c8c9a3fd7c1f63d1ac0d45a2ef9b2df4..b90d6b5c7fd3f873b186f73f3412392ed45a4bb1 100644 (file)
@@ -754,7 +754,6 @@ write_relmap_file(bool shared, RelMapFile *newmap,
        if (write_wal)
        {
                xl_relmap_update xlrec;
-               XLogRecData rdata[2];
                XLogRecPtr      lsn;
 
                /* now errors are fatal ... */
@@ -764,16 +763,11 @@ write_relmap_file(bool shared, RelMapFile *newmap,
                xlrec.tsid = tsid;
                xlrec.nbytes = sizeof(RelMapFile);
 
-               rdata[0].data = (char *) (&xlrec);
-               rdata[0].len = MinSizeOfRelmapUpdate;
-               rdata[0].buffer = InvalidBuffer;
-               rdata[0].next = &(rdata[1]);
-               rdata[1].data = (char *) newmap;
-               rdata[1].len = sizeof(RelMapFile);
-               rdata[1].buffer = InvalidBuffer;
-               rdata[1].next = NULL;
+               XLogBeginInsert();
+               XLogRegisterData((char *) (&xlrec), MinSizeOfRelmapUpdate);
+               XLogRegisterData((char *) newmap, sizeof(RelMapFile));
 
-               lsn = XLogInsert(RM_RELMAP_ID, XLOG_RELMAP_UPDATE, rdata);
+               lsn = XLogInsert(RM_RELMAP_ID, XLOG_RELMAP_UPDATE);
 
                /* As always, WAL must hit the disk before the data update does */
                XLogFlush(lsn);
@@ -907,12 +901,12 @@ perform_relmap_update(bool shared, const RelMapFile *updates)
  * RELMAP resource manager's routines
  */
 void
-relmap_redo(XLogRecPtr lsn, XLogRecord *record)
+relmap_redo(XLogReaderState *record)
 {
-       uint8           info = record->xl_info & ~XLR_INFO_MASK;
+       uint8           info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
 
        /* Backup blocks are not used in relmap records */
-       Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
+       Assert(!XLogRecHasAnyBlockRefs(record));
 
        if (info == XLOG_RELMAP_UPDATE)
        {
index 2ba994698257bdc8d43b94bf3df947f377434cce..666e8dbaa24b2ef73fcbe105248373c39f008947 100644 (file)
@@ -1006,6 +1006,7 @@ WriteEmptyXLOG(void)
        char            path[MAXPGPATH];
        int                     fd;
        int                     nbytes;
+       char       *recptr;
 
        /* Use malloc() to ensure buffer is MAXALIGNED */
        buffer = (char *) pg_malloc(XLOG_BLCKSZ);
@@ -1023,18 +1024,21 @@ WriteEmptyXLOG(void)
        longpage->xlp_xlog_blcksz = XLOG_BLCKSZ;
 
        /* Insert the initial checkpoint record */
-       record = (XLogRecord *) ((char *) page + SizeOfXLogLongPHD);
+       recptr = (char *) page + SizeOfXLogLongPHD;
+       record = (XLogRecord *) recptr;
        record->xl_prev = 0;
        record->xl_xid = InvalidTransactionId;
-       record->xl_tot_len = SizeOfXLogRecord + sizeof(CheckPoint);
-       record->xl_len = sizeof(CheckPoint);
+       record->xl_tot_len = SizeOfXLogRecord + SizeOfXLogRecordDataHeaderShort + sizeof(CheckPoint);
        record->xl_info = XLOG_CHECKPOINT_SHUTDOWN;
        record->xl_rmid = RM_XLOG_ID;
-       memcpy(XLogRecGetData(record), &ControlFile.checkPointCopy,
+       recptr += SizeOfXLogRecord;
+       *(recptr++) = XLR_BLOCK_ID_DATA_SHORT;
+       *(recptr++) = sizeof(CheckPoint);
+       memcpy(recptr, &ControlFile.checkPointCopy,
                   sizeof(CheckPoint));
 
        INIT_CRC32C(crc);
-       COMP_CRC32C(crc, &ControlFile.checkPointCopy, sizeof(CheckPoint));
+       COMP_CRC32C(crc, ((char *) record) + SizeOfXLogRecord, record->xl_tot_len - SizeOfXLogRecord);
        COMP_CRC32C(crc, (char *) record, offsetof(XLogRecord, xl_crc));
        FIN_CRC32C(crc);
        record->xl_crc = crc;
index d748db4d0c6b0b0fe98ef2979c1e053870c90267..6dc9eb3eca89494e0f8adb15c9988a9dad4ca5c5 100644 (file)
@@ -14,7 +14,7 @@
 #ifndef BRIN_XLOG_H
 #define BRIN_XLOG_H
 
-#include "access/xlogrecord.h"
+#include "access/xlogreader.h"
 #include "lib/stringinfo.h"
 #include "storage/bufpage.h"
 #include "storage/itemptr.h"
  */
 #define XLOG_BRIN_INIT_PAGE            0x80
 
-/* This is what we need to know about a BRIN index create */
+/*
+ * This is what we need to know about a BRIN index create.
+ *
+ * Backup block 0: metapage
+ */
 typedef struct xl_brin_createidx
 {
        BlockNumber pagesPerRange;
-       RelFileNode node;
        uint16          version;
 } xl_brin_createidx;
 #define SizeOfBrinCreateIdx (offsetof(xl_brin_createidx, version) + sizeof(uint16))
 
 /*
  * This is what we need to know about a BRIN tuple insert
+ *
+ * Backup block 0: main page, block data is the new BrinTuple.
+ * Backup block 1: revmap page
  */
 typedef struct xl_brin_insert
 {
-       RelFileNode node;
        BlockNumber heapBlk;
 
        /* extra information needed to update the revmap */
-       BlockNumber revmapBlk;
        BlockNumber pagesPerRange;
 
-       uint16          tuplen;
-       ItemPointerData tid;
-       /* tuple data follows at end of struct */
+       /* offset number in the main page to insert the tuple to. */
+       OffsetNumber offnum;
 } xl_brin_insert;
 
-#define SizeOfBrinInsert       (offsetof(xl_brin_insert, tid) + sizeof(ItemPointerData))
+#define SizeOfBrinInsert       (offsetof(xl_brin_insert, offnum) + sizeof(OffsetNumber))
 
 /*
- * A cross-page update is the same as an insert, but also store the old tid.
+ * A cross-page update is the same as an insert, but also stores information
+ * about the old tuple.
+ *
+ * Like in xlog_brin_update:
+ * Backup block 0: new page, block data includes the new BrinTuple.
+ * Backup block 1: revmap page
+ *
+ * And in addition:
+ * Backup block 2: old page
  */
 typedef struct xl_brin_update
 {
-       ItemPointerData oldtid;
+       /* offset number of old tuple on old page */
+       OffsetNumber oldOffnum;
+
        xl_brin_insert insert;
 } xl_brin_update;
 
 #define SizeOfBrinUpdate       (offsetof(xl_brin_update, insert) + SizeOfBrinInsert)
 
-/* This is what we need to know about a BRIN tuple samepage update */
+/*
+ * This is what we need to know about a BRIN tuple samepage update
+ *
+ * Backup block 0: updated page, with new BrinTuple as block data
+ */
 typedef struct xl_brin_samepage_update
 {
-       RelFileNode node;
-       ItemPointerData tid;
-       /* tuple data follows at end of struct */
+       OffsetNumber offnum;
 } xl_brin_samepage_update;
 
-#define SizeOfBrinSamepageUpdate               (offsetof(xl_brin_samepage_update, tid) + sizeof(ItemPointerData))
+#define SizeOfBrinSamepageUpdate               (sizeof(OffsetNumber))
 
-/* This is what we need to know about a revmap extension */
+/*
+ * This is what we need to know about a revmap extension
+ *
+ * Backup block 0: metapage
+ * Backup block 1: new revmap page
+ */
 typedef struct xl_brin_revmap_extend
 {
-       RelFileNode node;
+       /*
+        * XXX: This is actually redundant - the block number is stored as part of
+        * backup block 1.
+        */
        BlockNumber targetBlk;
 } xl_brin_revmap_extend;
 
@@ -102,8 +125,8 @@ typedef struct xl_brin_revmap_extend
                                                                 sizeof(BlockNumber))
 
 
-extern void brin_desc(StringInfo buf, XLogRecord *record);
-extern void brin_redo(XLogRecPtr lsn, XLogRecord *record);
+extern void brin_redo(XLogReaderState *record);
+extern void brin_desc(StringInfo buf, XLogReaderState *record);
 extern const char *brin_identify(uint8 info);
 
 #endif   /* BRIN_XLOG_H */
index 04ac4ba3119bf9de34ab1b47f3c361d66104915e..fe5e4c634d1fdc42e53c7a528f7383a9d697451f 100644 (file)
@@ -11,7 +11,7 @@
 #ifndef CLOG_H
 #define CLOG_H
 
-#include "access/xlogrecord.h"
+#include "access/xlogreader.h"
 #include "lib/stringinfo.h"
 
 /*
@@ -48,8 +48,8 @@ extern void TruncateCLOG(TransactionId oldestXact);
 #define CLOG_ZEROPAGE          0x00
 #define CLOG_TRUNCATE          0x10
 
-extern void clog_redo(XLogRecPtr lsn, XLogRecord *record);
-extern void clog_desc(StringInfo buf, XLogRecord *record);
+extern void clog_redo(XLogReaderState *record);
+extern void clog_desc(StringInfo buf, XLogReaderState *record);
 extern const char *clog_identify(uint8 info);
 
 #endif   /* CLOG_H */
index 433e56f20dfa23b166fe2e6bf88862ed5d7dc005..fe5f77b1736bd9b335bbb9bad2dc61343a5dbd9f 100644 (file)
@@ -10,7 +10,7 @@
 #ifndef GIN_H
 #define GIN_H
 
-#include "access/xlogrecord.h"
+#include "access/xlogreader.h"
 #include "lib/stringinfo.h"
 #include "storage/block.h"
 #include "utils/relcache.h"
@@ -74,8 +74,8 @@ extern void ginGetStats(Relation index, GinStatsData *stats);
 extern void ginUpdateStats(Relation index, const GinStatsData *stats);
 
 /* ginxlog.c */
-extern void gin_redo(XLogRecPtr lsn, XLogRecord *record);
-extern void gin_desc(StringInfo buf, XLogRecord *record);
+extern void gin_redo(XLogReaderState *record);
+extern void gin_desc(StringInfo buf, XLogReaderState *record);
 extern const char *gin_identify(uint8 info);
 extern void gin_xlog_startup(void);
 extern void gin_xlog_cleanup(void);
index 333316d78e2ef70da5acadc65c506a7d89ca8a92..3d46f20bb83d9f157dc1a4904ed997bbda5eca20 100644 (file)
@@ -13,7 +13,6 @@
 #include "access/genam.h"
 #include "access/gin.h"
 #include "access/itup.h"
-#include "access/xloginsert.h"
 #include "fmgr.h"
 #include "storage/bufmgr.h"
 #include "utils/rbtree.h"
@@ -397,22 +396,22 @@ typedef struct
 
 typedef struct ginxlogCreatePostingTree
 {
-       RelFileNode node;
-       BlockNumber blkno;
        uint32          size;
        /* A compressed posting list follows */
 } ginxlogCreatePostingTree;
 
-#define XLOG_GIN_INSERT  0x20
-
 /*
  * The format of the insertion record varies depending on the page type.
  * ginxlogInsert is the common part between all variants.
+ *
+ * Backup Blk 0: target page
+ * Backup Blk 1: left child, if this insertion finishes an incomplete split
  */
+
+#define XLOG_GIN_INSERT  0x20
+
 typedef struct
 {
-       RelFileNode node;
-       BlockNumber blkno;
        uint16          flags;                  /* GIN_SPLIT_ISLEAF and/or GIN_SPLIT_ISDATA */
 
        /*
@@ -477,14 +476,17 @@ typedef struct
        PostingItem newitem;
 } ginxlogInsertDataInternal;
 
-
+/*
+ * Backup Blk 0: new left page (= original page, if not root split)
+ * Backup Blk 1: new right page
+ * Backup Blk 2: original page / new root page, if root split
+ * Backup Blk 3: left child, if this insertion completes an earlier split
+ */
 #define XLOG_GIN_SPLIT 0x30
 
 typedef struct ginxlogSplit
 {
        RelFileNode node;
-       BlockNumber lblkno;
-       BlockNumber rblkno;
        BlockNumber rrlink;                     /* right link, or root's blocknumber if root
                                                                 * split */
        BlockNumber leftChildBlkno; /* valid on a non-leaf split */
@@ -538,15 +540,6 @@ typedef struct
  */
 #define XLOG_GIN_VACUUM_PAGE   0x40
 
-typedef struct ginxlogVacuumPage
-{
-       RelFileNode node;
-       BlockNumber blkno;
-       uint16          hole_offset;    /* number of bytes before "hole" */
-       uint16          hole_length;    /* number of bytes in "hole" */
-       /* entire page contents (minus the hole) follow at end of record */
-} ginxlogVacuumPage;
-
 /*
  * Vacuuming posting tree leaf page is WAL-logged like recompression caused
  * by insertion.
@@ -555,26 +548,28 @@ typedef struct ginxlogVacuumPage
 
 typedef struct ginxlogVacuumDataLeafPage
 {
-       RelFileNode node;
-       BlockNumber blkno;
-
        ginxlogRecompressDataLeaf data;
 } ginxlogVacuumDataLeafPage;
 
+/*
+ * Backup Blk 0: deleted page
+ * Backup Blk 1: parent
+ * Backup Blk 2: left sibling
+ */
 #define XLOG_GIN_DELETE_PAGE   0x50
 
 typedef struct ginxlogDeletePage
 {
-       RelFileNode node;
-       BlockNumber blkno;
-       BlockNumber parentBlkno;
        OffsetNumber parentOffset;
-       BlockNumber leftBlkno;
        BlockNumber rightLink;
 } ginxlogDeletePage;
 
 #define XLOG_GIN_UPDATE_META_PAGE 0x60
 
+/*
+ * Backup Blk 0: metapage
+ * Backup Blk 1: tail page
+ */
 typedef struct ginxlogUpdateMeta
 {
        RelFileNode node;
@@ -591,22 +586,29 @@ typedef struct ginxlogUpdateMeta
 
 typedef struct ginxlogInsertListPage
 {
-       RelFileNode node;
-       BlockNumber blkno;
        BlockNumber rightlink;
        int32           ntuples;
        /* array of inserted tuples follows */
 } ginxlogInsertListPage;
 
+/*
+ * Backup Blk 0: metapage
+ * Backup Blk 1 to (ndeleted + 1): deleted pages
+ */
+
 #define XLOG_GIN_DELETE_LISTPAGE  0x80
 
-#define GIN_NDELETE_AT_ONCE 16
+/*
+ * The WAL record for deleting list pages must contain a block reference to
+ * all the deleted pages, so the number of pages that can be deleted in one
+ * record is limited by XLR_MAX_BLOCK_ID. (block_id 0 is used for the
+ * metapage.)
+ */
+#define GIN_NDELETE_AT_ONCE Min(16, XLR_MAX_BLOCK_ID - 1)
 typedef struct ginxlogDeleteListPages
 {
-       RelFileNode node;
        GinMetaPageData metadata;
        int32           ndeleted;
-       BlockNumber toDelete[GIN_NDELETE_AT_ONCE];
 } ginxlogDeleteListPages;
 
 
@@ -673,7 +675,7 @@ typedef struct GinBtreeData
 
        /* insert methods */
        OffsetNumber (*findChildPtr) (GinBtree, Page, BlockNumber, OffsetNumber);
-       GinPlaceToPageRC (*placeToPage) (GinBtree, Buffer, GinBtreeStack *, void *, BlockNumber, XLogRecData **, Page *, Page *);
+       GinPlaceToPageRC (*placeToPage) (GinBtree, Buffer, GinBtreeStack *, void *, BlockNumber, Page *, Page *);
        void       *(*prepareDownlink) (GinBtree, Buffer);
        void            (*fillRoot) (GinBtree, Page, BlockNumber, Page, BlockNumber, Page);
 
index 21daf3b2b6adf2588b3d1fa749b8ecd4f434db8b..2cbc918ad1ab0a2a84e1d8bcbb5908cfef881215 100644 (file)
@@ -16,7 +16,7 @@
 
 #include "access/gist.h"
 #include "access/itup.h"
-#include "access/xlogrecord.h"
+#include "access/xlogreader.h"
 #include "fmgr.h"
 #include "storage/bufmgr.h"
 #include "storage/buffile.h"
@@ -185,34 +185,33 @@ typedef GISTScanOpaqueData *GISTScanOpaque;
 #define XLOG_GIST_CREATE_INDEX         0x50
  /* #define XLOG_GIST_PAGE_DELETE               0x60 */        /* not used anymore */
 
+/*
+ * Backup Blk 0: updated page.
+ * Backup Blk 1: If this operation completes a page split, by inserting a
+ *                              downlink for the split page, the left half of the split
+ */
 typedef struct gistxlogPageUpdate
 {
-       RelFileNode node;
-       BlockNumber blkno;
-
-       /*
-        * If this operation completes a page split, by inserting a downlink for
-        * the split page, leftchild points to the left half of the split.
-        */
-       BlockNumber leftchild;
-
        /* number of deleted offsets */
        uint16          ntodelete;
+       uint16          ntoinsert;
 
        /*
-        * follow: 1. todelete OffsetNumbers 2. tuples to insert
+        * In payload of blk 0 : 1. todelete OffsetNumbers 2. tuples to insert
         */
 } gistxlogPageUpdate;
 
+/*
+ * Backup Blk 0: If this operation completes a page split, by inserting a
+ *                              downlink for the split page, the left half of the split
+ * Backup Blk 1 - npage: split pages (1 is the original page)
+ */
 typedef struct gistxlogPageSplit
 {
-       RelFileNode node;
-       BlockNumber origblkno;          /* splitted page */
        BlockNumber origrlink;          /* rightlink of the page before split */
        GistNSN         orignsn;                /* NSN of the page before split */
        bool            origleaf;               /* was splitted page a leaf page? */
 
-       BlockNumber leftchild;          /* like in gistxlogPageUpdate */
        uint16          npage;                  /* # of pages in the split */
        bool            markfollowright;        /* set F_FOLLOW_RIGHT flags */
 
@@ -451,8 +450,8 @@ extern SplitedPageLayout *gistSplit(Relation r, Page page, IndexTuple *itup,
                  int len, GISTSTATE *giststate);
 
 /* gistxlog.c */
-extern void gist_redo(XLogRecPtr lsn, XLogRecord *record);
-extern void gist_desc(StringInfo buf, XLogRecord *record);
+extern void gist_redo(XLogReaderState *record);
+extern void gist_desc(StringInfo buf, XLogReaderState *record);
 extern const char *gist_identify(uint8 info);
 extern void gist_xlog_startup(void);
 extern void gist_xlog_cleanup(void);
index c175a5c1822a149a2410062a984b4b48e2e16761..afd06ff7defd73546e538fe7439f0393c66e227a 100644 (file)
@@ -20,7 +20,7 @@
 #include "access/genam.h"
 #include "access/itup.h"
 #include "access/sdir.h"
-#include "access/xlogrecord.h"
+#include "access/xlogreader.h"
 #include "fmgr.h"
 #include "lib/stringinfo.h"
 #include "storage/bufmgr.h"
@@ -356,8 +356,8 @@ extern OffsetNumber _hash_binsearch(Page page, uint32 hash_value);
 extern OffsetNumber _hash_binsearch_last(Page page, uint32 hash_value);
 
 /* hash.c */
-extern void hash_redo(XLogRecPtr lsn, XLogRecord *record);
-extern void hash_desc(StringInfo buf, XLogRecord *record);
+extern void hash_redo(XLogReaderState *record);
+extern void hash_desc(StringInfo buf, XLogReaderState *record);
 extern const char *hash_identify(uint8 info);
 
 #endif   /* HASH_H */
index 1d64264b010c53338d73802ae81bd03c9202678c..853e2dd491f78c436fbcfd96544807ad8f8adfbb 100644 (file)
@@ -15,7 +15,7 @@
 #define HEAPAM_XLOG_H
 
 #include "access/htup.h"
-#include "access/xlogrecord.h"
+#include "access/xlogreader.h"
 #include "lib/stringinfo.h"
 #include "storage/buf.h"
 #include "storage/bufpage.h"
 #define XLOG_HEAP_CONTAINS_OLD                                         \
        (XLOG_HEAP_CONTAINS_OLD_TUPLE | XLOG_HEAP_CONTAINS_OLD_KEY)
 
-/*
- * All what we need to find changed tuple
- *
- * NB: on most machines, sizeof(xl_heaptid) will include some trailing pad
- * bytes for alignment.  We don't want to store the pad space in the XLOG,
- * so use SizeOfHeapTid for space calculations.  Similar comments apply for
- * the other xl_FOO structs.
- */
-typedef struct xl_heaptid
-{
-       RelFileNode node;
-       ItemPointerData tid;            /* changed tuple id */
-} xl_heaptid;
-
-#define SizeOfHeapTid          (offsetof(xl_heaptid, tid) + SizeOfIptrData)
-
 /* This is what we need to know about delete */
 typedef struct xl_heap_delete
 {
-       xl_heaptid      target;                 /* deleted tuple id */
        TransactionId xmax;                     /* xmax of the deleted tuple */
+       OffsetNumber offnum;            /* deleted tuple's offset */
        uint8           infobits_set;   /* infomask bits */
        uint8           flags;
 } xl_heap_delete;
@@ -122,45 +106,33 @@ typedef struct xl_heap_header
 
 #define SizeOfHeapHeader       (offsetof(xl_heap_header, t_hoff) + sizeof(uint8))
 
-/*
- * Variant of xl_heap_header that contains the length of the tuple, which is
- * useful if the length of the tuple cannot be computed using the overall
- * record length. E.g. because there are several tuples inside a single
- * record.
- */
-typedef struct xl_heap_header_len
-{
-       uint16          t_len;
-       xl_heap_header header;
-} xl_heap_header_len;
-
-#define SizeOfHeapHeaderLen (offsetof(xl_heap_header_len, header) + SizeOfHeapHeader)
-
 /* This is what we need to know about insert */
 typedef struct xl_heap_insert
 {
-       xl_heaptid      target;                 /* inserted tuple id */
+       OffsetNumber offnum;            /* inserted tuple's offset */
        uint8           flags;
-       /* xl_heap_header & TUPLE DATA FOLLOWS AT END OF STRUCT */
+
+       /* xl_heap_header & TUPLE DATA in backup block 0 */
 } xl_heap_insert;
 
 #define SizeOfHeapInsert       (offsetof(xl_heap_insert, flags) + sizeof(uint8))
 
 /*
- * This is what we need to know about a multi-insert. The record consists of
- * xl_heap_multi_insert header, followed by a xl_multi_insert_tuple and tuple
- * data for each tuple. 'offsets' array is omitted if the whole page is
- * reinitialized (XLOG_HEAP_INIT_PAGE)
+ * This is what we need to know about a multi-insert.
+ *
+ * The main data of the record consists of this xl_heap_multi_insert header.
+ * 'offsets' array is omitted if the whole page is reinitialized
+ * (XLOG_HEAP_INIT_PAGE).
+ *
+ * In block 0's data portion, there is an xl_multi_insert_tuple struct,
+ * followed by the tuple data for each tuple. There is padding to align
+ * each xl_multi_insert struct.
  */
 typedef struct xl_heap_multi_insert
 {
-       RelFileNode node;
-       BlockNumber blkno;
        uint8           flags;
        uint16          ntuples;
        OffsetNumber offsets[1];
-
-       /* TUPLE DATA (xl_multi_insert_tuples) FOLLOW AT END OF STRUCT */
 } xl_heap_multi_insert;
 
 #define SizeOfHeapMultiInsert  offsetof(xl_heap_multi_insert, offsets)
@@ -176,34 +148,39 @@ typedef struct xl_multi_insert_tuple
 
 #define SizeOfMultiInsertTuple (offsetof(xl_multi_insert_tuple, t_hoff) + sizeof(uint8))
 
-/* This is what we need to know about update|hot_update */
+/*
+ * This is what we need to know about update|hot_update
+ *
+ * Backup blk 0: new page
+ *
+ * If XLOG_HEAP_PREFIX_FROM_OLD or XLOG_HEAP_SUFFIX_FROM_OLD flags are set,
+ * the prefix and/or suffix come first, as one or two uint16s.
+ *
+ * After that, xl_heap_header and new tuple data follow.  The new tuple
+ * data doesn't include the prefix and suffix, which are copied from the
+ * old tuple on replay.
+ *
+ * If HEAP_CONTAINS_NEW_TUPLE_DATA flag is given, the tuple data is
+ * included even if a full-page image was taken.
+ *
+ * Backup blk 1: old page, if different. (no data, just a reference to the blk)
+ */
 typedef struct xl_heap_update
 {
-       xl_heaptid      target;                 /* deleted tuple id */
        TransactionId old_xmax;         /* xmax of the old tuple */
-       TransactionId new_xmax;         /* xmax of the new tuple */
-       ItemPointerData newtid;         /* new inserted tuple id */
+       OffsetNumber old_offnum;        /* old tuple's offset */
        uint8           old_infobits_set;               /* infomask bits to set on old tuple */
        uint8           flags;
+       TransactionId new_xmax;         /* xmax of the new tuple */
+       OffsetNumber new_offnum;        /* new tuple's offset */
 
        /*
-        * If XLOG_HEAP_PREFIX_FROM_OLD or XLOG_HEAP_SUFFIX_FROM_OLD flags are
-        * set, the prefix and/or suffix come next, as one or two uint16s.
-        *
-        * After that, xl_heap_header_len and new tuple data follow.  The new
-        * tuple data and length don't include the prefix and suffix, which are
-        * copied from the old tuple on replay.  The new tuple data is omitted if
-        * a full-page image of the page was taken (unless the
-        * XLOG_HEAP_CONTAINS_NEW_TUPLE flag is set, in which case it's included
-        * anyway).
-        *
         * If XLOG_HEAP_CONTAINS_OLD_TUPLE or XLOG_HEAP_CONTAINS_OLD_KEY flags are
-        * set, another xl_heap_header_len struct and tuple data for the old tuple
-        * follows.
+        * set, a xl_heap_header struct and tuple data for the old tuple follows.
         */
 } xl_heap_update;
 
-#define SizeOfHeapUpdate       (offsetof(xl_heap_update, flags) + sizeof(uint8))
+#define SizeOfHeapUpdate       (offsetof(xl_heap_update, new_offnum) + sizeof(OffsetNumber))
 
 /*
  * This is what we need to know about vacuum page cleanup/redirect
@@ -218,12 +195,10 @@ typedef struct xl_heap_update
  */
 typedef struct xl_heap_clean
 {
-       RelFileNode node;
-       BlockNumber block;
        TransactionId latestRemovedXid;
        uint16          nredirected;
        uint16          ndead;
-       /* OFFSET NUMBERS FOLLOW */
+       /* OFFSET NUMBERS are in the block reference 0 */
 } xl_heap_clean;
 
 #define SizeOfHeapClean (offsetof(xl_heap_clean, ndead) + sizeof(uint16))
@@ -251,8 +226,8 @@ typedef struct xl_heap_cleanup_info
 /* This is what we need to know about lock */
 typedef struct xl_heap_lock
 {
-       xl_heaptid      target;                 /* locked tuple id */
        TransactionId locking_xid;      /* might be a MultiXactId not xid */
+       OffsetNumber offnum;            /* locked tuple's offset on page */
        int8            infobits_set;   /* infomask and infomask2 bits to set */
 } xl_heap_lock;
 
@@ -261,8 +236,8 @@ typedef struct xl_heap_lock
 /* This is what we need to know about locking an updated version of a row */
 typedef struct xl_heap_lock_updated
 {
-       xl_heaptid      target;
        TransactionId xmax;
+       OffsetNumber offnum;
        uint8           infobits_set;
 } xl_heap_lock_updated;
 
@@ -271,11 +246,11 @@ typedef struct xl_heap_lock_updated
 /* This is what we need to know about in-place update */
 typedef struct xl_heap_inplace
 {
-       xl_heaptid      target;                 /* updated tuple id */
+       OffsetNumber offnum;            /* updated tuple's offset on page */
        /* TUPLE DATA FOLLOWS AT END OF STRUCT */
 } xl_heap_inplace;
 
-#define SizeOfHeapInplace      (offsetof(xl_heap_inplace, target) + SizeOfHeapTid)
+#define SizeOfHeapInplace      (offsetof(xl_heap_inplace, offnum) + sizeof(OffsetNumber))
 
 /*
  * This struct represents a 'freeze plan', which is what we need to know about
@@ -296,23 +271,26 @@ typedef struct xl_heap_freeze_tuple
 
 /*
  * This is what we need to know about a block being frozen during vacuum
+ *
+ * Backup block 0's data contains an array of xl_heap_freeze_tuple structs,
+ * one for each tuple.
  */
 typedef struct xl_heap_freeze_page
 {
-       RelFileNode node;
-       BlockNumber block;
        TransactionId cutoff_xid;
        uint16          ntuples;
-       xl_heap_freeze_tuple tuples[FLEXIBLE_ARRAY_MEMBER];
 } xl_heap_freeze_page;
 
-#define SizeOfHeapFreezePage offsetof(xl_heap_freeze_page, tuples)
+#define SizeOfHeapFreezePage (offsetof(xl_heap_freeze_page, ntuples) + sizeof(uint16))
 
-/* This is what we need to know about setting a visibility map bit */
+/*
+ * This is what we need to know about setting a visibility map bit
+ *
+ * Backup blk 0: visibility map buffer
+ * Backup blk 1: heap buffer
+ */
 typedef struct xl_heap_visible
 {
-       RelFileNode node;
-       BlockNumber block;
        TransactionId cutoff_xid;
 } xl_heap_visible;
 
@@ -338,10 +316,11 @@ typedef struct xl_heap_new_cid
        /*
         * Store the relfilenode/ctid pair to facilitate lookups.
         */
-       xl_heaptid      target;
+       RelFileNode target_node;
+       ItemPointerData target_tid;
 } xl_heap_new_cid;
 
-#define SizeOfHeapNewCid (offsetof(xl_heap_new_cid, target) + SizeOfHeapTid)
+#define SizeOfHeapNewCid (offsetof(xl_heap_new_cid, target_tid) + sizeof(ItemPointerData))
 
 /* logical rewrite xlog record header */
 typedef struct xl_heap_rewrite_mapping
@@ -357,13 +336,13 @@ typedef struct xl_heap_rewrite_mapping
 extern void HeapTupleHeaderAdvanceLatestRemovedXid(HeapTupleHeader tuple,
                                                                           TransactionId *latestRemovedXid);
 
-extern void heap_redo(XLogRecPtr lsn, XLogRecord *record);
-extern void heap_desc(StringInfo buf, XLogRecord *record);
+extern void heap_redo(XLogReaderState *record);
+extern void heap_desc(StringInfo buf, XLogReaderState *record);
 extern const char *heap_identify(uint8 info);
-extern void heap2_redo(XLogRecPtr lsn, XLogRecord *record);
-extern void heap2_desc(StringInfo buf, XLogRecord *record);
+extern void heap2_redo(XLogReaderState *record);
+extern void heap2_desc(StringInfo buf, XLogReaderState *record);
 extern const char *heap2_identify(uint8 info);
-extern void heap_xlog_logical_rewrite(XLogRecPtr lsn, XLogRecord *r);
+extern void heap_xlog_logical_rewrite(XLogReaderState *r);
 
 extern XLogRecPtr log_heap_cleanup_info(RelFileNode rnode,
                                          TransactionId latestRemovedXid);
index 294d21bd18080333d3676eea4c08f53307e56a7c..300c2a52f02ac41eb4202f519cc6c20b3a986348 100644 (file)
@@ -498,6 +498,7 @@ do { \
  * you can, say, fit 2 tuples of size MaxHeapTupleSize/2 on the same page.
  */
 #define MaxHeapTupleSize  (BLCKSZ - MAXALIGN(SizeOfPageHeaderData + sizeof(ItemIdData)))
+#define MinHeapTupleSize  MAXALIGN(offsetof(HeapTupleHeaderData, t_bits))
 
 /*
  * MaxHeapTuplesPerPage is an upper bound on the number of tuples that can
index de17936b106ece6927f6a33dd1c26267949aaf20..e4dc51e872088aaab3a851a3ce3de417c6b72325 100644 (file)
@@ -133,6 +133,7 @@ typedef IndexAttributeBitMapData *IndexAttributeBitMap;
  * IndexTupleData struct.  We arrive at the divisor because each tuple
  * must be maxaligned, and it must have an associated item pointer.
  */
+#define MinIndexTupleSize MAXALIGN(sizeof(IndexTupleData) + 1)
 #define MaxIndexTuplesPerPage  \
        ((int) ((BLCKSZ - SizeOfPageHeaderData) / \
                        (MAXALIGN(sizeof(IndexTupleData) + 1) + sizeof(ItemIdData))))
index 43d737505d2435842a60b5a3b9e878c8ed9e4a70..ac58a3766d57dd29aaeff8ba3e4d14b2d1b4cfb4 100644 (file)
@@ -11,7 +11,7 @@
 #ifndef MULTIXACT_H
 #define MULTIXACT_H
 
-#include "access/xlogrecord.h"
+#include "access/xlogreader.h"
 #include "lib/stringinfo.h"
 
 
@@ -135,8 +135,8 @@ extern void multixact_twophase_postcommit(TransactionId xid, uint16 info,
 extern void multixact_twophase_postabort(TransactionId xid, uint16 info,
                                                         void *recdata, uint32 len);
 
-extern void multixact_redo(XLogRecPtr lsn, XLogRecord *record);
-extern void multixact_desc(StringInfo buf, XLogRecord *record);
+extern void multixact_redo(XLogReaderState *record);
+extern void multixact_desc(StringInfo buf, XLogReaderState *record);
 extern const char *multixact_identify(uint8 info);
 extern char *mxid_to_string(MultiXactId multi, int nmembers,
                           MultiXactMember *members);
index 6ecd2ced62d62bf8b4014a1769636849a57594aa..d3d258bcc9f3f33dcbd99228e6530db283a4ab06 100644 (file)
@@ -17,7 +17,7 @@
 #include "access/genam.h"
 #include "access/itup.h"
 #include "access/sdir.h"
-#include "access/xlogrecord.h"
+#include "access/xlogreader.h"
 #include "catalog/pg_index.h"
 #include "lib/stringinfo.h"
 #include "storage/bufmgr.h"
@@ -227,15 +227,6 @@ typedef struct BTMetaPageData
 #define XLOG_BTREE_REUSE_PAGE  0xD0    /* old page is about to be reused from
                                                                                 * FSM */
 
-/*
- * All that we need to find changed index tuple
- */
-typedef struct xl_btreetid
-{
-       RelFileNode node;
-       ItemPointerData tid;            /* changed tuple id */
-} xl_btreetid;
-
 /*
  * All that we need to regenerate the meta-data page
  */
@@ -252,16 +243,17 @@ typedef struct xl_btree_metadata
  *
  * This data record is used for INSERT_LEAF, INSERT_UPPER, INSERT_META.
  * Note that INSERT_META implies it's not a leaf page.
+ *
+ * Backup Blk 0: original page (data contains the inserted tuple)
+ * Backup Blk 1: child's left sibling, if INSERT_UPPER or INSERT_META
+ * Backup Blk 2: xl_btree_metadata, if INSERT_META
  */
 typedef struct xl_btree_insert
 {
-       xl_btreetid target;                     /* inserted tuple id */
-       /* BlockNumber finishes_split field FOLLOWS IF NOT XLOG_BTREE_INSERT_LEAF */
-       /* xl_btree_metadata FOLLOWS IF XLOG_BTREE_INSERT_META */
-       /* INDEX TUPLE FOLLOWS AT END OF STRUCT */
+       OffsetNumber offnum;
 } xl_btree_insert;
 
-#define SizeOfBtreeInsert      (offsetof(xl_btreetid, tid) + SizeOfIptrData)
+#define SizeOfBtreeInsert      (offsetof(xl_btree_insert, offnum) + sizeof(OffsetNumber))
 
 /*
  * On insert with split, we save all the items going into the right sibling
@@ -278,45 +270,41 @@ typedef struct xl_btree_insert
  * the root page, and thus that a newroot record rather than an insert or
  * split record should follow.  Note that a split record never carries a
  * metapage update --- we'll do that in the parent-level update.
+ *
+ * Backup Blk 0: original page / new left page
+ *
+ * The left page's data portion contains the new item, if it's the _L variant.
+ * (In the _R variants, the new item is one of the right page's tuples.)
+ * If level > 0, an IndexTuple representing the HIKEY of the left page
+ * follows.  We don't need this on leaf pages, because it's the same as the
+ * leftmost key in the new right page.
+ *
+ * Backup Blk 1: new right page
+ *
+ * The right page's data portion contains the right page's tuples in the
+ * form used by _bt_restore_page.
+ *
+ * Backup Blk 2: next block (orig page's rightlink), if any
+ * Backup Blk 3: child's left sibling, if non-leaf split
  */
 typedef struct xl_btree_split
 {
-       RelFileNode node;
-       BlockNumber leftsib;            /* orig page / new left page */
-       BlockNumber rightsib;           /* new right page */
-       BlockNumber rnext;                      /* next block (orig page's rightlink) */
        uint32          level;                  /* tree level of page being split */
        OffsetNumber firstright;        /* first item moved to right page */
-
-       /*
-        * In the _L variants, next are OffsetNumber newitemoff and the new item.
-        * (In the _R variants, the new item is one of the right page's tuples.)
-        * The new item, but not newitemoff, is suppressed if XLogInsert chooses
-        * to store the left page's whole page image.
-        *
-        * If level > 0, an IndexTuple representing the HIKEY of the left page
-        * follows.  We don't need this on leaf pages, because it's the same as
-        * the leftmost key in the new right page.  Also, it's suppressed if
-        * XLogInsert chooses to store the left page's whole page image.
-        *
-        * If level > 0, BlockNumber of the page whose incomplete-split flag this
-        * insertion clears. (not aligned)
-        *
-        * Last are the right page's tuples in the form used by _bt_restore_page.
-        */
+       OffsetNumber newitemoff;        /* new item's offset (if placed on left page) */
 } xl_btree_split;
 
-#define SizeOfBtreeSplit       (offsetof(xl_btree_split, firstright) + sizeof(OffsetNumber))
+#define SizeOfBtreeSplit       (offsetof(xl_btree_split, newitemoff) + sizeof(OffsetNumber))
 
 /*
  * This is what we need to know about delete of individual leaf index tuples.
  * The WAL record can represent deletion of any number of index tuples on a
  * single index page when *not* executed by VACUUM.
+ *
+ * Backup Blk 0: index page
  */
 typedef struct xl_btree_delete
 {
-       RelFileNode node;                       /* RelFileNode of the index */
-       BlockNumber block;
        RelFileNode hnode;                      /* RelFileNode of the heap the index currently
                                                                 * points at */
        int                     nitems;
@@ -361,8 +349,6 @@ typedef struct xl_btree_reuse_page
  */
 typedef struct xl_btree_vacuum
 {
-       RelFileNode node;
-       BlockNumber block;
        BlockNumber lastBlockVacuumed;
 
        /* TARGET OFFSET NUMBERS FOLLOW */
@@ -376,10 +362,13 @@ typedef struct xl_btree_vacuum
  * remove this tuple's downlink and the *following* tuple's key).  Note that
  * the leaf page is empty, so we don't need to store its content --- it is
  * just reinitialized during recovery using the rest of the fields.
+ *
+ * Backup Blk 0: leaf block
+ * Backup Blk 1: top parent
  */
 typedef struct xl_btree_mark_page_halfdead
 {
-       xl_btreetid target;                     /* deleted tuple id in parent page */
+       OffsetNumber poffset;           /* deleted tuple id in parent page */
 
        /* information needed to recreate the leaf page: */
        BlockNumber leafblk;            /* leaf block ultimately being deleted */
@@ -394,11 +383,15 @@ typedef struct xl_btree_mark_page_halfdead
  * This is what we need to know about deletion of a btree page.  Note we do
  * not store any content for the deleted page --- it is just rewritten as empty
  * during recovery, apart from resetting the btpo.xact.
+ *
+ * Backup Blk 0: target block being deleted
+ * Backup Blk 1: target block's left sibling, if any
+ * Backup Blk 2: target block's right sibling
+ * Backup Blk 3: leaf block (if different from target)
+ * Backup Blk 4: metapage (if rightsib becomes new fast root)
  */
 typedef struct xl_btree_unlink_page
 {
-       RelFileNode node;
-       BlockNumber deadblk;            /* target block being deleted */
        BlockNumber leftsib;            /* target block's left sibling, if any */
        BlockNumber rightsib;           /* target block's right sibling */
 
@@ -406,7 +399,6 @@ typedef struct xl_btree_unlink_page
         * Information needed to recreate the leaf page, when target is an
         * internal page.
         */
-       BlockNumber leafblk;
        BlockNumber leafleftsib;
        BlockNumber leafrightsib;
        BlockNumber topparent;          /* next child down in the branch */
@@ -423,13 +415,15 @@ typedef struct xl_btree_unlink_page
  *
  * Note that although this implies rewriting the metadata page, we don't need
  * an xl_btree_metadata record --- the rootblk and level are sufficient.
+ *
+ * Backup Blk 0: new root page (2 tuples as payload, if splitting old root)
+ * Backup Blk 1: left child (if splitting an old root)
+ * Backup Blk 2: metapage
  */
 typedef struct xl_btree_newroot
 {
-       RelFileNode node;
-       BlockNumber rootblk;            /* location of new root */
+       BlockNumber rootblk;            /* location of new root (redundant with blk 0) */
        uint32          level;                  /* its tree level */
-       /* 0 or 2 INDEX TUPLES FOLLOW AT END OF STRUCT */
 } xl_btree_newroot;
 
 #define SizeOfBtreeNewroot     (offsetof(xl_btree_newroot, level) + sizeof(uint32))
@@ -726,8 +720,8 @@ extern void _bt_leafbuild(BTSpool *btspool, BTSpool *spool2);
 /*
  * prototypes for functions in nbtxlog.c
  */
-extern void btree_redo(XLogRecPtr lsn, XLogRecord *record);
-extern void btree_desc(StringInfo buf, XLogRecord *record);
+extern void btree_redo(XLogReaderState *record);
+extern void btree_desc(StringInfo buf, XLogReaderState *record);
 extern const char *btree_identify(uint8 info);
 
 #endif   /* NBTREE_H */
index ccf1ed77869b859419ce9c94d958d6dc9cbef9af..3aa96bde86f1f11ac8d20d1487e6ca836a74ed17 100644 (file)
@@ -15,7 +15,7 @@
 #define SPGIST_H
 
 #include "access/skey.h"
-#include "access/xlogrecord.h"
+#include "access/xlogreader.h"
 #include "fmgr.h"
 #include "lib/stringinfo.h"
 
@@ -197,8 +197,8 @@ extern Datum spgbulkdelete(PG_FUNCTION_ARGS);
 extern Datum spgvacuumcleanup(PG_FUNCTION_ARGS);
 
 /* spgxlog.c */
-extern void spg_redo(XLogRecPtr lsn, XLogRecord *record);
-extern void spg_desc(StringInfo buf, XLogRecord *record);
+extern void spg_redo(XLogReaderState *record);
+extern void spg_desc(StringInfo buf, XLogReaderState *record);
 extern const char *spg_identify(uint8 info);
 extern void spg_xlog_startup(void);
 extern void spg_xlog_cleanup(void);
index 3330644651c316339709e461900f73b9252b09a3..4b6fdee8017134aa1989d6970c02449d27efe3a7 100644 (file)
@@ -18,7 +18,6 @@
 #include "access/spgist.h"
 #include "nodes/tidbitmap.h"
 #include "storage/buf.h"
-#include "storage/relfilenode.h"
 #include "utils/relcache.h"
 
 
@@ -351,35 +350,8 @@ typedef SpGistDeadTupleData *SpGistDeadTuple;
 
 /*
  * XLOG stuff
- *
- * ACCEPT_RDATA_* can only use fixed-length rdata arrays, because of lengthof
  */
 
-#define ACCEPT_RDATA_DATA(p, s, i)     \
-       do { \
-               Assert((i) < lengthof(rdata)); \
-               rdata[i].data = (char *) (p); \
-               rdata[i].len = (s); \
-               rdata[i].buffer = InvalidBuffer; \
-               rdata[i].buffer_std = true; \
-               rdata[i].next = NULL; \
-               if ((i) > 0) \
-                       rdata[(i) - 1].next = rdata + (i); \
-       } while(0)
-
-#define ACCEPT_RDATA_BUFFER(b, i)  \
-       do { \
-               Assert((i) < lengthof(rdata)); \
-               rdata[i].data = NULL; \
-               rdata[i].len = 0; \
-               rdata[i].buffer = (b); \
-               rdata[i].buffer_std = true; \
-               rdata[i].next = NULL; \
-               if ((i) > 0) \
-                       rdata[(i) - 1].next = rdata + (i); \
-       } while(0)
-
-
 /* XLOG record types for SPGiST */
 #define XLOG_SPGIST_CREATE_INDEX       0x00
 #define XLOG_SPGIST_ADD_LEAF           0x10
@@ -408,36 +380,36 @@ typedef struct spgxlogState
                (d).isBuild = (s)->isBuild; \
        } while(0)
 
-
+/*
+ * Backup Blk 0: destination page for leaf tuple
+ * Backup Blk 1: parent page (if any)
+ */
 typedef struct spgxlogAddLeaf
 {
-       RelFileNode node;
-
-       BlockNumber blknoLeaf;          /* destination page for leaf tuple */
        bool            newPage;                /* init dest page? */
        bool            storesNulls;    /* page is in the nulls tree? */
        OffsetNumber offnumLeaf;        /* offset where leaf tuple gets placed */
        OffsetNumber offnumHeadLeaf;    /* offset of head tuple in chain, if any */
 
-       BlockNumber blknoParent;        /* where the parent downlink is, if any */
-       OffsetNumber offnumParent;
+       OffsetNumber offnumParent;      /* where the parent downlink is, if any */
        uint16          nodeI;
 
        /* new leaf tuple follows (unaligned!) */
 } spgxlogAddLeaf;
 
+/*
+ * Backup Blk 0: source leaf page
+ * Backup Blk 1: destination leaf page
+ * Backup Blk 2: parent page
+ */
 typedef struct spgxlogMoveLeafs
 {
-       RelFileNode node;
-
-       BlockNumber blknoSrc;           /* source leaf page */
-       BlockNumber blknoDst;           /* destination leaf page */
        uint16          nMoves;                 /* number of tuples moved from source page */
        bool            newPage;                /* init dest page? */
        bool            replaceDead;    /* are we replacing a DEAD source tuple? */
        bool            storesNulls;    /* pages are in the nulls tree? */
 
-       BlockNumber blknoParent;        /* where the parent downlink is */
+       /* where the parent downlink is */
        OffsetNumber offnumParent;
        uint16          nodeI;
 
@@ -452,11 +424,6 @@ typedef struct spgxlogMoveLeafs
         * Note: if replaceDead is true then there is only one inserted tuple
         * number and only one leaf tuple in the data, because we are not copying
         * the dead tuple from the source
-        *
-        * Buffer references in the rdata array are:
-        *              Src page
-        *              Dest page
-        *              Parent page
         *----------
         */
        OffsetNumber offsets[1];
@@ -464,21 +431,43 @@ typedef struct spgxlogMoveLeafs
 
 #define SizeOfSpgxlogMoveLeafs offsetof(spgxlogMoveLeafs, offsets)
 
+/*
+ * Backup Blk 0: original page
+ * Backup Blk 1: where new tuple goes, if not same place
+ * Backup Blk 2: where parent downlink is, if updated and different from
+ *                              the old and new
+ */
 typedef struct spgxlogAddNode
 {
-       RelFileNode node;
-
-       BlockNumber blkno;                      /* block number of original inner tuple */
-       OffsetNumber offnum;            /* offset of original inner tuple */
-
-       BlockNumber blknoParent;        /* where parent downlink is, if updated */
-       OffsetNumber offnumParent;
-       uint16          nodeI;
+       /*
+        * Offset of the original inner tuple, in the original page (on backup
+        * block 0).
+        */
+       OffsetNumber offnum;
 
-       BlockNumber blknoNew;           /* where new tuple goes, if not same place */
+       /*
+        * Offset of the new tuple, on the new page (on backup block 1). Invalid,
+        * if we overwrote the old tuple in the original page).
+        */
        OffsetNumber offnumNew;
        bool            newPage;                /* init new page? */
 
+       /*----
+        * Where is the parent downlink? parentBlk indicates which page it's on,
+        * and offnumParent is the offset within the page. The possible values for
+        * parentBlk are:
+        *
+        * 0: parent == original page
+        * 1: parent == new page
+        * 2: parent == different page (blk ref 2)
+        * -1: parent not updated
+        *----
+        */
+       char            parentBlk;
+       OffsetNumber offnumParent;      /* offset within the parent page */
+
+       uint16          nodeI;
+
        spgxlogState stateSrc;
 
        /*
@@ -486,41 +475,51 @@ typedef struct spgxlogAddNode
         */
 } spgxlogAddNode;
 
+/*
+ * Backup Blk 0: where the prefix tuple goes
+ * Backup Blk 1: where the postfix tuple goes (if different page)
+ */
 typedef struct spgxlogSplitTuple
 {
-       RelFileNode node;
-
-       BlockNumber blknoPrefix;        /* where the prefix tuple goes */
+       /* where the prefix tuple goes */
        OffsetNumber offnumPrefix;
 
-       BlockNumber blknoPostfix;       /* where the postfix tuple goes */
+       /* where the postfix tuple goes */
        OffsetNumber offnumPostfix;
        bool            newPage;                /* need to init that page? */
+       bool            postfixBlkSame; /* was postfix tuple put on same page as
+                                                                * prefix? */
 
        /*
-        * new prefix inner tuple follows, then new postfix inner tuple
-        * (both are unaligned!)
+        * new prefix inner tuple follows, then new postfix inner tuple (both are
+        * unaligned!)
         */
 } spgxlogSplitTuple;
 
+/*
+ * Buffer references in the rdata array are:
+ * Backup Blk 0: Src page (only if not root)
+ * Backup Blk 1: Dest page (if used)
+ * Backup Blk 2: Inner page
+ * Backup Blk 3: Parent page (if any, and different from Inner)
+ */
 typedef struct spgxlogPickSplit
 {
-       RelFileNode node;
+       bool            isRootSplit;
 
-       BlockNumber blknoSrc;           /* original leaf page */
-       BlockNumber blknoDest;          /* other leaf page, if any */
        uint16          nDelete;                /* n to delete from Src */
        uint16          nInsert;                /* n to insert on Src and/or Dest */
        bool            initSrc;                /* re-init the Src page? */
        bool            initDest;               /* re-init the Dest page? */
 
-       BlockNumber blknoInner;         /* where to put new inner tuple */
+       /* where to put new inner tuple */
        OffsetNumber offnumInner;
        bool            initInner;              /* re-init the Inner page? */
 
        bool            storesNulls;    /* pages are in the nulls tree? */
 
-       BlockNumber blknoParent;        /* where the parent downlink is, if any */
+       /* where the parent downlink is, if any */
+       bool            innerIsParent;  /* is parent the same as inner page? */
        OffsetNumber offnumParent;
        uint16          nodeI;
 
@@ -533,24 +532,15 @@ typedef struct spgxlogPickSplit
         *              array of page selector bytes for inserted tuples, length nInsert
         *              new inner tuple (unaligned!)
         *              list of leaf tuples, length nInsert (unaligned!)
-        *
-        * Buffer references in the rdata array are:
-        *              Src page (only if not root and not being init'd)
-        *              Dest page (if used and not being init'd)
-        *              Inner page (only if not being init'd)
-        *              Parent page (if any; could be same as Inner)
         *----------
         */
-       OffsetNumber    offsets[1];
+       OffsetNumber offsets[1];
 } spgxlogPickSplit;
 
 #define SizeOfSpgxlogPickSplit offsetof(spgxlogPickSplit, offsets)
 
 typedef struct spgxlogVacuumLeaf
 {
-       RelFileNode node;
-
-       BlockNumber blkno;                      /* block number to clean */
        uint16          nDead;                  /* number of tuples to become DEAD */
        uint16          nPlaceholder;   /* number of tuples to become PLACEHOLDER */
        uint16          nMove;                  /* number of tuples to move */
@@ -576,9 +566,6 @@ typedef struct spgxlogVacuumLeaf
 typedef struct spgxlogVacuumRoot
 {
        /* vacuum a root page when it is also a leaf */
-       RelFileNode node;
-
-       BlockNumber blkno;                      /* block number to clean */
        uint16          nDelete;                /* number of tuples to delete */
 
        spgxlogState stateSrc;
@@ -591,9 +578,6 @@ typedef struct spgxlogVacuumRoot
 
 typedef struct spgxlogVacuumRedirect
 {
-       RelFileNode node;
-
-       BlockNumber blkno;                      /* block number to clean */
        uint16          nToPlaceholder; /* number of redirects to make placeholders */
        OffsetNumber firstPlaceholder;          /* first placeholder tuple to remove */
        TransactionId newestRedirectXid;        /* newest XID of removed redirects */
index 11a51b26859ae4caba192d113ad082d960eca655..b018aa4f5d830e72d519d5cef06d26772679b850 100644 (file)
@@ -14,7 +14,7 @@
 #ifndef XACT_H
 #define XACT_H
 
-#include "access/xlogrecord.h"
+#include "access/xlogreader.h"
 #include "lib/stringinfo.h"
 #include "nodes/pg_list.h"
 #include "storage/relfilenode.h"
@@ -256,8 +256,8 @@ extern void UnregisterSubXactCallback(SubXactCallback callback, void *arg);
 
 extern int     xactGetCommittedChildren(TransactionId **ptr);
 
-extern void xact_redo(XLogRecPtr lsn, XLogRecord *record);
-extern void xact_desc(StringInfo buf, XLogRecord *record);
+extern void xact_redo(XLogReaderState *record);
+extern void xact_desc(StringInfo buf, XLogReaderState *record);
 extern const char *xact_identify(uint8 info);
 
 #endif   /* XACT_H */
index 6f8b5f46e100d2fe8d76ec0c450744c039f43929..d06fbc0ec1efe0141c2bb558128aad237880fb1c 100644 (file)
@@ -14,7 +14,7 @@
 #include "access/rmgr.h"
 #include "access/xlogdefs.h"
 #include "access/xloginsert.h"
-#include "access/xlogrecord.h"
+#include "access/xlogreader.h"
 #include "datatype/timestamp.h"
 #include "lib/stringinfo.h"
 
@@ -186,7 +186,9 @@ typedef struct CheckpointStatsData
 
 extern CheckpointStatsData CheckpointStats;
 
-extern XLogRecPtr XLogInsertRecord(XLogRecData *rdata, XLogRecPtr fpw_lsn);
+struct XLogRecData;
+
+extern XLogRecPtr XLogInsertRecord(struct XLogRecData *rdata, XLogRecPtr fpw_lsn);
 extern void XLogFlush(XLogRecPtr RecPtr);
 extern bool XLogBackgroundFlush(void);
 extern bool XLogNeedsFlush(XLogRecPtr RecPtr);
@@ -198,8 +200,8 @@ extern XLogSegNo XLogGetLastRemovedSegno(void);
 extern void XLogSetAsyncXactLSN(XLogRecPtr record);
 extern void XLogSetReplicationSlotMinimumLSN(XLogRecPtr lsn);
 
-extern void xlog_redo(XLogRecPtr lsn, XLogRecord *record);
-extern void xlog_desc(StringInfo buf, XLogRecord *record);
+extern void xlog_redo(XLogReaderState *record);
+extern void xlog_desc(StringInfo buf, XLogReaderState *record);
 extern const char *xlog_identify(uint8 info);
 
 extern void issue_xlog_fsync(int fd, XLogSegNo segno);
index 19b2ef8d90d862951c9fb3493c31ffaa63dd3b5b..423ef4d7fa0d63414cd8122b009677ca504cf5da 100644 (file)
@@ -20,7 +20,7 @@
 #define XLOG_INTERNAL_H
 
 #include "access/xlogdefs.h"
-#include "access/xlogrecord.h"
+#include "access/xlogreader.h"
 #include "datatype/timestamp.h"
 #include "lib/stringinfo.h"
 #include "pgtime.h"
@@ -31,7 +31,7 @@
 /*
  * Each page of XLOG file has a header like this:
  */
-#define XLOG_PAGE_MAGIC 0xD080 /* can be used as WAL version indicator */
+#define XLOG_PAGE_MAGIC 0xD081 /* can be used as WAL version indicator */
 
 typedef struct XLogPageHeaderData
 {
@@ -203,6 +203,17 @@ typedef struct xl_end_of_recovery
        TimeLineID      PrevTimeLineID; /* previous TLI we forked off from */
 } xl_end_of_recovery;
 
+/*
+ * The functions in xloginsert.c construct a chain of XLogRecData structs
+ * to represent the final WAL record.
+ */
+typedef struct XLogRecData
+{
+       struct XLogRecData *next;       /* next struct in chain, or NULL */
+       char       *data;                       /* start of rmgr data to include */
+       uint32          len;                    /* length of rmgr data to include */
+} XLogRecData;
+
 /*
  * Method table for resource managers.
  *
@@ -219,8 +230,8 @@ typedef struct xl_end_of_recovery
 typedef struct RmgrData
 {
        const char *rm_name;
-       void            (*rm_redo) (XLogRecPtr lsn, XLogRecord *rptr);
-       void            (*rm_desc) (StringInfo buf, XLogRecord *rptr);
+       void            (*rm_redo) (XLogReaderState *record);
+       void            (*rm_desc) (StringInfo buf, XLogReaderState *record);
        const char *(*rm_identify) (uint8 info);
        void            (*rm_startup) (void);
        void            (*rm_cleanup) (void);
index 30c2e84cbc9a076d980660ce13dbf61fa1378742..e5ab71e230563f18ca60598b8cb262bfb9344a90 100644 (file)
 #include "storage/relfilenode.h"
 
 /*
- * The rmgr data to be written by XLogInsert() is defined by a chain of
- * one or more XLogRecData structs.  (Multiple structs would be used when
- * parts of the source data aren't physically adjacent in memory, or when
- * multiple associated buffers need to be specified.)
- *
- * If buffer is valid then XLOG will check if buffer must be backed up
- * (ie, whether this is first change of that page since last checkpoint).
- * If so, the whole page contents are attached to the XLOG record, and XLOG
- * sets XLR_BKP_BLOCK(N) bit in xl_info.  Note that the buffer must be pinned
- * and exclusive-locked by the caller, so that it won't change under us.
- * NB: when the buffer is backed up, we DO NOT insert the data pointed to by
- * this XLogRecData struct into the XLOG record, since we assume it's present
- * in the buffer.  Therefore, rmgr redo routines MUST pay attention to
- * XLR_BKP_BLOCK(N) to know what is actually stored in the XLOG record.
- * The N'th XLR_BKP_BLOCK bit corresponds to the N'th distinct buffer
- * value (ignoring InvalidBuffer) appearing in the rdata chain.
- *
- * When buffer is valid, caller must set buffer_std to indicate whether the
- * page uses standard pd_lower/pd_upper header fields.  If this is true, then
- * XLOG is allowed to omit the free space between pd_lower and pd_upper from
- * the backed-up page image.  Note that even when buffer_std is false, the
- * page MUST have an LSN field as its first eight bytes!
- *
- * Note: data can be NULL to indicate no rmgr data associated with this chain
- * entry.  This can be sensible (ie, not a wasted entry) if buffer is valid.
- * The implication is that the buffer has been changed by the operation being
- * logged, and so may need to be backed up, but the change can be redone using
- * only information already present elsewhere in the XLOG entry.
+ * The minimum size of the WAL construction working area. If you need to
+ * register more than XLR_NORMAL_MAX_BLOCK_ID block references or have more
+ * than XLR_NORMAL_RDATAS data chunks in a single WAL record, you must call
+ * XLogEnsureRecordSpace() first to allocate more working memory.
  */
-typedef struct XLogRecData
-{
-       char       *data;                       /* start of rmgr data to include */
-       uint32          len;                    /* length of rmgr data to include */
-       Buffer          buffer;                 /* buffer associated with data, if any */
-       bool            buffer_std;             /* buffer has standard pd_lower/pd_upper */
-       struct XLogRecData *next;       /* next struct in chain, or NULL */
-} XLogRecData;
+#define XLR_NORMAL_MAX_BLOCK_ID                4
+#define XLR_NORMAL_RDATAS                      20
+
+/* flags for XLogRegisterBuffer */
+#define REGBUF_FORCE_IMAGE     0x01    /* force a full-page image */
+#define REGBUF_NO_IMAGE                0x02    /* don't take a full-page image */
+#define REGBUF_WILL_INIT       (0x04 | 0x02)   /* page will be re-initialized at
+                                                                        * replay (implies NO_IMAGE) */
+#define REGBUF_STANDARD                0x08    /* page follows "standard" page layout,
+                                                                        * (data between pd_lower and pd_upper
+                                                                        * will be skipped) */
+#define REGBUF_KEEP_DATA       0x10    /* include data even if a full-page image
+                                                                        * is taken */
+
+/* prototypes for public functions in xloginsert.c: */
+extern void XLogBeginInsert(void);
+extern XLogRecPtr XLogInsert(RmgrId rmid, uint8 info);
+extern void XLogEnsureRecordSpace(int nbuffers, int ndatas);
+extern void XLogRegisterData(char *data, int len);
+extern void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags);
+extern void XLogRegisterBlock(uint8 block_id, RelFileNode *rnode,
+                                 ForkNumber forknum, BlockNumber blknum, char *page,
+                                 uint8 flags);
+extern void XLogRegisterBufData(uint8 block_id, char *data, int len);
+extern void XLogResetInsertion(void);
+extern bool XLogCheckBufferNeedsBackup(Buffer buffer);
 
-extern XLogRecPtr XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata);
 extern XLogRecPtr log_newpage(RelFileNode *rnode, ForkNumber forkNum,
                        BlockNumber blk, char *page, bool page_std);
 extern XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std);
 extern XLogRecPtr XLogSaveBufferForHint(Buffer buffer, bool buffer_std);
-extern bool XLogCheckBufferNeedsBackup(Buffer buffer);
+
+extern void InitXLogInsert(void);
 
 #endif   /* XLOGINSERT_H */
index ea873a2d9c76d68b36639ff4e3827cd61989f8ca..eb6cc8996a5f2a11877b78891db89ecc8b2ceeff 100644 (file)
  *
  *             The basic idea is to allocate an XLogReaderState via
  *             XLogReaderAllocate(), and call XLogReadRecord() until it returns NULL.
+ *
+ *             After reading a record with XLogReadRecord(), it's decomposed into
+ *             the per-block and main data parts, and the parts can be accessed
+ *             with the XLogRec* macros and functions. You can also decode a
+ *             record that's already constructed in memory, without reading from
+ *             disk, by calling the DecodeXLogRecord() function.
  *-------------------------------------------------------------------------
  */
 #ifndef XLOGREADER_H
 #define XLOGREADER_H
 
-#include "access/xlog_internal.h"
+#include "access/xlogrecord.h"
 
 typedef struct XLogReaderState XLogReaderState;
 
@@ -31,6 +37,32 @@ typedef int (*XLogPageReadCB) (XLogReaderState *xlogreader,
                                                                                   char *readBuf,
                                                                                   TimeLineID *pageTLI);
 
+typedef struct
+{
+       /* Is this block ref in use? */
+       bool            in_use;
+
+       /* Identify the block this refers to */
+       RelFileNode rnode;
+       ForkNumber      forknum;
+       BlockNumber blkno;
+
+       /* copy of the fork_flags field from the XLogRecordBlockHeader */
+       uint8           flags;
+
+       /* Information on full-page image, if any */
+       bool            has_image;
+       char       *bkp_image;
+       uint16          hole_offset;
+       uint16          hole_length;
+
+       /* Buffer holding the rmgr-specific data associated with this block */
+       bool            has_data;
+       char       *data;
+       uint16          data_len;
+       uint16          data_bufsz;
+} DecodedBkpBlock;
+
 struct XLogReaderState
 {
        /* ----------------------------------------
@@ -79,6 +111,25 @@ struct XLogReaderState
        XLogRecPtr      ReadRecPtr;             /* start of last record read */
        XLogRecPtr      EndRecPtr;              /* end+1 of last record read */
 
+
+       /* ----------------------------------------
+        * Decoded representation of current record
+        *
+        * Use XLogRecGet* functions to investigate the record; these fields
+        * should not be accessed directly.
+        * ----------------------------------------
+        */
+       XLogRecord *decoded_record; /* currently decoded record */
+
+       char       *main_data;          /* record's main data portion */
+       uint32          main_data_len;  /* main data portion's length */
+       uint32          main_data_bufsz;        /* allocated size of the buffer */
+
+       /* information about blocks referenced by the record. */
+       DecodedBkpBlock blocks[XLR_MAX_BLOCK_ID + 1];
+
+       int                     max_block_id;   /* highest block_id in use (-1 if none) */
+
        /* ----------------------------------------
         * private/internal state
         * ----------------------------------------
@@ -123,4 +174,28 @@ extern struct XLogRecord *XLogReadRecord(XLogReaderState *state,
 extern XLogRecPtr XLogFindNextRecord(XLogReaderState *state, XLogRecPtr RecPtr);
 #endif   /* FRONTEND */
 
+/* Functions for decoding an XLogRecord */
+
+extern bool DecodeXLogRecord(XLogReaderState *state, XLogRecord *record,
+                                char **errmsg);
+
+#define XLogRecGetTotalLen(decoder) ((decoder)->decoded_record->xl_tot_len)
+#define XLogRecGetPrev(decoder) ((decoder)->decoded_record->xl_prev)
+#define XLogRecGetInfo(decoder) ((decoder)->decoded_record->xl_info)
+#define XLogRecGetRmid(decoder) ((decoder)->decoded_record->xl_rmid)
+#define XLogRecGetXid(decoder) ((decoder)->decoded_record->xl_xid)
+#define XLogRecGetData(decoder) ((decoder)->main_data)
+#define XLogRecGetDataLen(decoder) ((decoder)->main_data_len)
+#define XLogRecHasAnyBlockRefs(decoder) ((decoder)->max_block_id >= 0)
+#define XLogRecHasBlockRef(decoder, block_id) \
+       ((decoder)->blocks[block_id].in_use)
+#define XLogRecHasBlockImage(decoder, block_id) \
+       ((decoder)->blocks[block_id].has_image)
+
+extern bool RestoreBlockImage(XLogReaderState *recoder, uint8 block_id, char *dst);
+extern char *XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len);
+extern bool XLogRecGetBlockTag(XLogReaderState *record, uint8 block_id,
+                                  RelFileNode *rnode, ForkNumber *forknum,
+                                  BlockNumber *blknum);
+
 #endif   /* XLOGREADER_H */
index ab0fb1c50046e77a92e90ef5d4c99d9056cb1db2..11ddfac9c7ff503670ffb2406ffee65375a6b5c0 100644 (file)
 /*
  * The overall layout of an XLOG record is:
  *             Fixed-size header (XLogRecord struct)
- *             rmgr-specific data
- *             BkpBlock
- *             backup block data
- *             BkpBlock
- *             backup block data
+ *             XLogRecordBlockHeader struct
+ *             XLogRecordBlockHeader struct
  *             ...
+ *             XLogRecordDataHeader[Short|Long] struct
+ *             block data
+ *             block data
+ *             ...
+ *             main data
  *
- * where there can be zero to four backup blocks (as signaled by xl_info flag
- * bits).  XLogRecord structs always start on MAXALIGN boundaries in the WAL
- * files, and we round up SizeOfXLogRecord so that the rmgr data is also
- * guaranteed to begin on a MAXALIGN boundary.  However, no padding is added
- * to align BkpBlock structs or backup block data.
+ * There can be zero or more XLogRecordBlockHeaders, and 0 or more bytes of
+ * rmgr-specific data not associated with a block.  XLogRecord structs
+ * always start on MAXALIGN boundaries in the WAL files, but the rest of
+ * the fields are not aligned.
  *
- * NOTE: xl_len counts only the rmgr data, not the XLogRecord header,
- * and also not any backup blocks.  xl_tot_len counts everything.  Neither
- * length field is rounded up to an alignment boundary.
+ * The XLogRecordBlockHeader, XLogRecordDataHeaderShort and
+ * XLogRecordDataHeaderLong structs all begin with a single 'id' byte. It's
+ * used to distinguish between block references, and the main data structs.
  */
 typedef struct XLogRecord
 {
        uint32          xl_tot_len;             /* total len of entire record */
        TransactionId xl_xid;           /* xact id */
-       uint32          xl_len;                 /* total len of rmgr data */
+       XLogRecPtr      xl_prev;                /* ptr to previous record in log */
        uint8           xl_info;                /* flag bits, see below */
        RmgrId          xl_rmid;                /* resource manager for this record */
        /* 2 bytes of padding here, initialize to zero */
-       XLogRecPtr      xl_prev;                /* ptr to previous record in log */
        pg_crc32        xl_crc;                 /* CRC for this record */
 
-       /* If MAXALIGN==8, there are 4 wasted bytes here */
-
-       /* ACTUAL LOG DATA FOLLOWS AT END OF STRUCT */
+       /* XLogRecordBlockHeaders and XLogRecordDataHeader follow, no padding */
 
 } XLogRecord;
 
-#define SizeOfXLogRecord       MAXALIGN(sizeof(XLogRecord))
-
-#define XLogRecGetData(record) ((char*) (record) + SizeOfXLogRecord)
+#define SizeOfXLogRecord       (offsetof(XLogRecord, xl_crc) + sizeof(pg_crc32))
 
 /*
- * XLOG uses only low 4 bits of xl_info.  High 4 bits may be used by rmgr.
+ * The high 4 bits in xl_info may be used freely by rmgr. The
+ * XLR_SPECIAL_REL_UPDATE bit can be passed by XLogInsert caller. The rest
+ * are set internally by XLogInsert.
  */
 #define XLR_INFO_MASK                  0x0F
+#define XLR_RMGR_INFO_MASK             0xF0
 
 /*
- * If we backed up any disk blocks with the XLOG record, we use flag bits in
- * xl_info to signal it.  We support backup of up to 4 disk blocks per XLOG
- * record.
+ * If a WAL record modifies any relation files, in ways not covered by the
+ * usual block references, this flag is set. This is not used for anything
+ * by PostgreSQL itself, but it allows external tools that read WAL and keep
+ * track of modified blocks to recognize such special record types.
+ */
+#define XLR_SPECIAL_REL_UPDATE 0x01
+
+/*
+ * Header info for block data appended to an XLOG record.
+ *
+ * Note that we don't attempt to align the XLogRecordBlockHeader struct!
+ * So, the struct must be copied to aligned local storage before use.
+ * 'data_length' is the length of the payload data associated with this,
+ * and includes the possible full-page image, and rmgr-specific data. It
+ * does not include the XLogRecordBlockHeader struct itself.
  */
-#define XLR_BKP_BLOCK_MASK             0x0F    /* all info bits used for bkp blocks */
-#define XLR_MAX_BKP_BLOCKS             4
-#define XLR_BKP_BLOCK(iblk)            (0x08 >> (iblk))                /* iblk in 0..3 */
+typedef struct XLogRecordBlockHeader
+{
+       uint8           id;                             /* block reference ID */
+       uint8           fork_flags;             /* fork within the relation, and flags */
+       uint16          data_length;    /* number of payload bytes (not including page
+                                                                * image) */
+
+       /* If BKPBLOCK_HAS_IMAGE, an XLogRecordBlockImageHeader struct follows */
+       /* If !BKPBLOCK_SAME_REL is not set, a RelFileNode follows */
+       /* BlockNumber follows */
+} XLogRecordBlockHeader;
+
+#define SizeOfXLogRecordBlockHeader (offsetof(XLogRecordBlockHeader, data_length) + sizeof(uint16))
 
 /*
- * Header info for a backup block appended to an XLOG record.
+ * Additional header information when a full-page image is included
+ * (i.e. when BKPBLOCK_HAS_IMAGE is set).
  *
  * As a trivial form of data compression, the XLOG code is aware that
  * PG data pages usually contain an unused "hole" in the middle, which
  * contains only zero bytes.  If hole_length > 0 then we have removed
  * such a "hole" from the stored data (and it's not counted in the
  * XLOG record's CRC, either).  Hence, the amount of block data actually
- * present following the BkpBlock struct is BLCKSZ - hole_length bytes.
- *
- * Note that we don't attempt to align either the BkpBlock struct or the
- * block's data.  So, the struct must be copied to aligned local storage
- * before use.
+ * present is BLCKSZ - hole_length bytes.
  */
-typedef struct BkpBlock
+typedef struct XLogRecordBlockImageHeader
 {
-       RelFileNode node;                       /* relation containing block */
-       ForkNumber      fork;                   /* fork within the relation */
-       BlockNumber block;                      /* block number */
        uint16          hole_offset;    /* number of bytes before "hole" */
        uint16          hole_length;    /* number of bytes in "hole" */
+} XLogRecordBlockImageHeader;
+
+#define SizeOfXLogRecordBlockImageHeader sizeof(XLogRecordBlockImageHeader)
+
+/*
+ * Maximum size of the header for a block reference. This is used to size a
+ * temporary buffer for constructing the header.
+ */
+#define MaxSizeOfXLogRecordBlockHeader \
+       (SizeOfXLogRecordBlockHeader + \
+        SizeOfXLogRecordBlockImageHeader + \
+        sizeof(RelFileNode) + \
+        sizeof(BlockNumber))
+
+/*
+ * The fork number fits in the lower 4 bits in the fork_flags field. The upper
+ * bits are used for flags.
+ */
+#define BKPBLOCK_FORK_MASK     0x0F
+#define BKPBLOCK_FLAG_MASK     0xF0
+#define BKPBLOCK_HAS_IMAGE     0x10    /* block data is an XLogRecordBlockImage */
+#define BKPBLOCK_HAS_DATA      0x20
+#define BKPBLOCK_WILL_INIT     0x40    /* redo will re-init the page */
+#define BKPBLOCK_SAME_REL      0x80    /* RelFileNode omitted, same as previous */
+
+/*
+ * XLogRecordDataHeaderShort/Long are used for the "main data" portion of
+ * the record. If the length of the data is less than 256 bytes, the short
+ * form is used, with a single byte to hold the length. Otherwise the long
+ * form is used.
+ *
+ * (These structs are currently not used in the code, they are here just for
+ * documentation purposes).
+ */
+typedef struct XLogRecordDataHeaderShort
+{
+       uint8           id;                             /* XLR_BLOCK_ID_DATA_SHORT */
+       uint8           data_length;    /* number of payload bytes */
+} XLogRecordDataHeaderShort;
+
+#define SizeOfXLogRecordDataHeaderShort (sizeof(uint8) * 2)
+
+typedef struct XLogRecordDataHeaderLong
+{
+       uint8           id;                             /* XLR_BLOCK_ID_DATA_LONG */
+       /* followed by uint32 data_length, unaligned */
+} XLogRecordDataHeaderLong;
+
+#define SizeOfXLogRecordDataHeaderLong (sizeof(uint8) + sizeof(uint32))
+
+/*
+ * Block IDs used to distinguish different kinds of record fragments. Block
+ * references are numbered from 0 to XLR_MAX_BLOCK_ID. A rmgr is free to use
+ * any ID number in that range (although you should stick to small numbers,
+ * because the WAL machinery is optimized for that case). A couple of ID
+ * numbers are reserved to denote the "main" data portion of the record.
+ *
+ * The maximum is currently set at 32, quite arbitrarily. Most records only
+ * need a handful of block references, but there are a few exceptions that
+ * need more.
+ */
+#define XLR_MAX_BLOCK_ID                       32
+
+#define XLR_BLOCK_ID_DATA_SHORT                255
+#define XLR_BLOCK_ID_DATA_LONG         254
+
+#define SizeOfXLogRecordDataHeaderLong (sizeof(uint8) + sizeof(uint32))
 
-       /* ACTUAL BLOCK DATA FOLLOWS AT END OF STRUCT */
-} BkpBlock;
 
 #endif   /* XLOGRECORD_H */
index 8d906967232c8a27412fcdb0156965e57aa46bed..68f72cfac6dd5ec8c567563977c1a4d527e551b2 100644 (file)
@@ -11,7 +11,7 @@
 #ifndef XLOG_UTILS_H
 #define XLOG_UTILS_H
 
-#include "access/xlogrecord.h"
+#include "access/xlogreader.h"
 #include "storage/bufmgr.h"
 
 
@@ -33,26 +33,17 @@ typedef enum
                                                 * replayed) */
 } XLogRedoAction;
 
-extern XLogRedoAction XLogReadBufferForRedo(XLogRecPtr lsn, XLogRecord *record,
-                                         int block_index, RelFileNode rnode, BlockNumber blkno,
-                                         Buffer *buf);
-extern XLogRedoAction XLogReadBufferForRedoExtended(XLogRecPtr lsn,
-                                                         XLogRecord *record, int block_index,
-                                                         RelFileNode rnode, ForkNumber forkno,
-                                                         BlockNumber blkno,
+extern XLogRedoAction XLogReadBufferForRedo(XLogReaderState *record,
+                                         uint8 buffer_id, Buffer *buf);
+extern Buffer XLogInitBufferForRedo(XLogReaderState *record, uint8 block_id);
+extern XLogRedoAction XLogReadBufferForRedoExtended(XLogReaderState *record,
+                                                         uint8 buffer_id,
                                                          ReadBufferMode mode, bool get_cleanup_lock,
                                                          Buffer *buf);
 
-extern Buffer XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init);
 extern Buffer XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum,
                                           BlockNumber blkno, ReadBufferMode mode);
 
-extern Buffer RestoreBackupBlock(XLogRecPtr lsn, XLogRecord *record,
-                                  int block_index,
-                                  bool get_cleanup_lock, bool keep_buffer);
-extern Buffer RestoreBackupBlockContents(XLogRecPtr lsn, BkpBlock bkpb,
-                                                char *blk, bool get_cleanup_lock, bool keep_buffer);
-
 extern Relation CreateFakeRelcacheEntry(RelFileNode rnode);
 extern void FreeFakeRelcacheEntry(Relation fakerel);
 
index 6c687e3a8273f35225fdac44de4bd0b977bbe99a..31a51c42f60bb8a3bf333a0a74452327078a2629 100644 (file)
@@ -14,7 +14,7 @@
 #ifndef STORAGE_XLOG_H
 #define STORAGE_XLOG_H
 
-#include "access/xlogrecord.h"
+#include "access/xlogreader.h"
 #include "lib/stringinfo.h"
 #include "storage/block.h"
 #include "storage/relfilenode.h"
@@ -44,8 +44,8 @@ typedef struct xl_smgr_truncate
 
 extern void log_smgrcreate(RelFileNode *rnode, ForkNumber forkNum);
 
-extern void smgr_redo(XLogRecPtr lsn, XLogRecord *record);
-extern void smgr_desc(StringInfo buf, XLogRecord *record);
+extern void smgr_redo(XLogReaderState *record);
+extern void smgr_desc(StringInfo buf, XLogReaderState *record);
 extern const char *smgr_identify(uint8 info);
 
 #endif   /* STORAGE_XLOG_H */
index b79d9fc8648be3ffe87dbf48a294f9f5b5dd9b99..bcf0e48cbb2e42c50e0c50d9215e524ab82e2dde 100644 (file)
@@ -14,7 +14,7 @@
 #ifndef DBCOMMANDS_H
 #define DBCOMMANDS_H
 
-#include "access/xlogrecord.h"
+#include "access/xlogreader.h"
 #include "lib/stringinfo.h"
 #include "nodes/parsenodes.h"
 
@@ -63,8 +63,8 @@ extern Oid    AlterDatabaseOwner(const char *dbname, Oid newOwnerId);
 extern Oid     get_database_oid(const char *dbname, bool missingok);
 extern char *get_database_name(Oid dbid);
 
-extern void dbase_redo(XLogRecPtr lsn, XLogRecord *rptr);
-extern void dbase_desc(StringInfo buf, XLogRecord *rptr);
+extern void dbase_redo(XLogReaderState *rptr);
+extern void dbase_desc(StringInfo buf, XLogReaderState *rptr);
 extern const char *dbase_identify(uint8 info);
 
 extern void check_encoding_locale_matches(int encoding, const char *collate, const char *ctype);
index 7cbe6f9a8195975f77473662c6915cc420b53606..386f1e677cf6ad6c5b34e4830d07e83e5bba36ec 100644 (file)
@@ -13,7 +13,7 @@
 #ifndef SEQUENCE_H
 #define SEQUENCE_H
 
-#include "access/xlogrecord.h"
+#include "access/xlogreader.h"
 #include "fmgr.h"
 #include "lib/stringinfo.h"
 #include "nodes/parsenodes.h"
@@ -77,8 +77,8 @@ extern Oid    AlterSequence(AlterSeqStmt *stmt);
 extern void ResetSequence(Oid seq_relid);
 extern void ResetSequenceCaches(void);
 
-extern void seq_redo(XLogRecPtr lsn, XLogRecord *rptr);
-extern void seq_desc(StringInfo buf, XLogRecord *rptr);
+extern void seq_redo(XLogReaderState *rptr);
+extern void seq_desc(StringInfo buf, XLogReaderState *rptr);
 extern const char *seq_identify(uint8 info);
 
 #endif   /* SEQUENCE_H */
index afd9e05cb780dffa3fa8d0afece80fbb2bf5c5f4..80e021e2d4ca5a3afe388dae72f6cdc15eb285f0 100644 (file)
@@ -14,7 +14,7 @@
 #ifndef TABLESPACE_H
 #define TABLESPACE_H
 
-#include "access/xlogrecord.h"
+#include "access/xlogreader.h"
 #include "lib/stringinfo.h"
 #include "nodes/parsenodes.h"
 
@@ -56,8 +56,8 @@ extern char *get_tablespace_name(Oid spc_oid);
 
 extern bool directory_is_empty(const char *path);
 
-extern void tblspc_redo(XLogRecPtr lsn, XLogRecord *rptr);
-extern void tblspc_desc(StringInfo buf, XLogRecord *rptr);
+extern void tblspc_redo(XLogReaderState *rptr);
+extern void tblspc_desc(StringInfo buf, XLogReaderState *rptr);
 extern const char *tblspc_identify(uint8 info);
 
 #endif   /* TABLESPACE_H */
index e4185287a1cd680b08299d357574a262d30fff3f..385c4a7b50840cddab40a94f1dce3c0bd43c3d56 100644 (file)
@@ -15,6 +15,6 @@
 #include "replication/logical.h"
 
 void LogicalDecodingProcessRecord(LogicalDecodingContext *ctx,
-                                                        XLogRecord *record);
+                                                        XLogReaderState *record);
 
 #endif
index c89989fd201197e7f577c12aec68ddd3d3cc25d9..d2599be0cfba6db4bec7e66e7048ec02589c577e 100644 (file)
@@ -14,7 +14,7 @@
 #ifndef STANDBY_H
 #define STANDBY_H
 
-#include "access/xlogrecord.h"
+#include "access/xlogreader.h"
 #include "lib/stringinfo.h"
 #include "storage/lock.h"
 #include "storage/procsignal.h"
@@ -82,8 +82,8 @@ typedef struct xl_running_xacts
 
 
 /* Recovery handlers for the Standby Rmgr (RM_STANDBY_ID) */
-extern void standby_redo(XLogRecPtr lsn, XLogRecord *record);
-extern void standby_desc(StringInfo buf, XLogRecord *record);
+extern void standby_redo(XLogReaderState *record);
+extern void standby_desc(StringInfo buf, XLogReaderState *record);
 extern const char *standby_identify(uint8 info);
 
 /*
index bd5836b0d982346df5f7c699aba111c2f8cec0f7..1f2c960ebe646aa1471a35acf5fdcea82394bb6d 100644 (file)
@@ -14,7 +14,7 @@
 #ifndef RELMAPPER_H
 #define RELMAPPER_H
 
-#include "access/xlogrecord.h"
+#include "access/xlogreader.h"
 #include "lib/stringinfo.h"
 
 /* ----------------
@@ -59,8 +59,8 @@ extern void RelationMapInitialize(void);
 extern void RelationMapInitializePhase2(void);
 extern void RelationMapInitializePhase3(void);
 
-extern void relmap_redo(XLogRecPtr lsn, XLogRecord *record);
-extern void relmap_desc(StringInfo buf, XLogRecord *record);
+extern void relmap_redo(XLogReaderState *record);
+extern void relmap_desc(StringInfo buf, XLogReaderState *record);
 extern const char *relmap_identify(uint8 info);
 
 #endif   /* RELMAPPER_H */