]> granicus.if.org Git - postgresql/commitdiff
Modify XLogInsert API to make callers specify whether pages to be backed
authorTom Lane <tgl@sss.pgh.pa.us>
Mon, 6 Jun 2005 20:22:58 +0000 (20:22 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Mon, 6 Jun 2005 20:22:58 +0000 (20:22 +0000)
up have the standard layout with unused space between pd_lower and pd_upper.
When this is set, XLogInsert will omit the unused space without bothering
to scan it to see if it's zero.  That saves time in XLogInsert, and also
allows reversion of my earlier patch to make PageRepairFragmentation et al
explicitly re-zero freed space.  Per suggestion by Heikki Linnakangas.

14 files changed:
src/backend/access/heap/heapam.c
src/backend/access/nbtree/nbtinsert.c
src/backend/access/nbtree/nbtpage.c
src/backend/access/nbtree/nbtsort.c
src/backend/access/transam/clog.c
src/backend/access/transam/xact.c
src/backend/access/transam/xlog.c
src/backend/commands/dbcommands.c
src/backend/commands/sequence.c
src/backend/commands/tablecmds.c
src/backend/commands/tablespace.c
src/backend/storage/page/bufpage.c
src/backend/storage/smgr/smgr.c
src/include/access/xlog.h

index fa9f1a207635f5d0aacfe6dc6305d3d13fa9dba7..433a4b4538cd40cebc39c94725111bb9ef494b93 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.192 2005/06/06 17:01:22 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.193 2005/06/06 20:22:56 tgl Exp $
  *
  *
  * INTERFACE ROUTINES
@@ -1107,9 +1107,9 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid)
 
                xlrec.target.node = relation->rd_node;
                xlrec.target.tid = tup->t_self;
-               rdata[0].buffer = InvalidBuffer;
                rdata[0].data = (char *) &xlrec;
                rdata[0].len = SizeOfHeapInsert;
+               rdata[0].buffer = InvalidBuffer;
                rdata[0].next = &(rdata[1]);
 
                xlhdr.t_natts = tup->t_data->t_natts;
@@ -1121,15 +1121,17 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid)
                 * decides to write the whole page to the xlog, we don't need to
                 * store xl_heap_header in the xlog.
                 */
-               rdata[1].buffer = buffer;
                rdata[1].data = (char *) &xlhdr;
                rdata[1].len = SizeOfHeapHeader;
+               rdata[1].buffer = buffer;
+               rdata[1].buffer_std = true;
                rdata[1].next = &(rdata[2]);
 
-               rdata[2].buffer = buffer;
                /* PG73FORMAT: write bitmap [+ padding] [+ oid] + data */
                rdata[2].data = (char *) tup->t_data + offsetof(HeapTupleHeaderData, t_bits);
                rdata[2].len = tup->t_len - offsetof(HeapTupleHeaderData, t_bits);
+               rdata[2].buffer = buffer;
+               rdata[2].buffer_std = true;
                rdata[2].next = NULL;
 
                /*
@@ -1378,14 +1380,15 @@ l1:
 
                xlrec.target.node = relation->rd_node;
                xlrec.target.tid = tp.t_self;
-               rdata[0].buffer = InvalidBuffer;
                rdata[0].data = (char *) &xlrec;
                rdata[0].len = SizeOfHeapDelete;
+               rdata[0].buffer = InvalidBuffer;
                rdata[0].next = &(rdata[1]);
 
-               rdata[1].buffer = buffer;
                rdata[1].data = NULL;
                rdata[1].len = 0;
+               rdata[1].buffer = buffer;
+               rdata[1].buffer_std = true;
                rdata[1].next = NULL;
 
                recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE, rdata);
@@ -2226,14 +2229,15 @@ l3:
                xlrec.target.node = relation->rd_node;
                xlrec.target.tid = tuple->t_self;
                xlrec.shared_lock = (mode == LockTupleShared);
-               rdata[0].buffer = InvalidBuffer;
                rdata[0].data = (char *) &xlrec;
                rdata[0].len = SizeOfHeapLock;
+               rdata[0].buffer = InvalidBuffer;
                rdata[0].next = &(rdata[1]);
 
-               rdata[1].buffer = *buffer;
                rdata[1].data = NULL;
                rdata[1].len = 0;
+               rdata[1].buffer = *buffer;
+               rdata[1].buffer_std = true;
                rdata[1].next = NULL;
 
                recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_LOCK, rdata);
@@ -2330,9 +2334,9 @@ log_heap_clean(Relation reln, Buffer buffer, OffsetNumber *unused, int uncnt)
        xlrec.node = reln->rd_node;
        xlrec.block = BufferGetBlockNumber(buffer);
 
-       rdata[0].buffer = InvalidBuffer;
        rdata[0].data = (char *) &xlrec;
        rdata[0].len = SizeOfHeapClean;
+       rdata[0].buffer = InvalidBuffer;
        rdata[0].next = &(rdata[1]);
 
        /*
@@ -2340,7 +2344,6 @@ log_heap_clean(Relation reln, Buffer buffer, OffsetNumber *unused, int uncnt)
         * that it is.  When XLogInsert stores the whole buffer, the offsets
         * array need not be stored too.
         */
-       rdata[1].buffer = buffer;
        if (uncnt > 0)
        {
                rdata[1].data = (char *) unused;
@@ -2351,6 +2354,8 @@ log_heap_clean(Relation reln, Buffer buffer, OffsetNumber *unused, int uncnt)
                rdata[1].data = NULL;
                rdata[1].len = 0;
        }
+       rdata[1].buffer = buffer;
+       rdata[1].buffer_std = true;
        rdata[1].next = NULL;
 
        recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_CLEAN, rdata);
@@ -2388,14 +2393,15 @@ log_heap_update(Relation reln, Buffer oldbuf, ItemPointerData from,
        xlrec.target.node = reln->rd_node;
        xlrec.target.tid = from;
        xlrec.newtid = newtup->t_self;
-       rdata[0].buffer = InvalidBuffer;
        rdata[0].data = (char *) &xlrec;
        rdata[0].len = SizeOfHeapUpdate;
+       rdata[0].buffer = InvalidBuffer;
        rdata[0].next = &(rdata[1]);
 
-       rdata[1].buffer = oldbuf;
        rdata[1].data = NULL;
        rdata[1].len = 0;
+       rdata[1].buffer = oldbuf;
+       rdata[1].buffer_std = true;
        rdata[1].next = &(rdata[2]);
 
        xlhdr.hdr.t_natts = newtup->t_data->t_natts;
@@ -2420,15 +2426,17 @@ log_heap_update(Relation reln, Buffer oldbuf, ItemPointerData from,
         * As with insert records, we need not store the rdata[2] segment if
         * we decide to store the whole buffer instead.
         */
-       rdata[2].buffer = newbuf;
        rdata[2].data = (char *) &xlhdr;
        rdata[2].len = hsize;
+       rdata[2].buffer = newbuf;
+       rdata[2].buffer_std = true;
        rdata[2].next = &(rdata[3]);
 
-       rdata[3].buffer = newbuf;
        /* PG73FORMAT: write bitmap [+ padding] [+ oid] + data */
        rdata[3].data = (char *) newtup->t_data + offsetof(HeapTupleHeaderData, t_bits);
        rdata[3].len = newtup->t_len - offsetof(HeapTupleHeaderData, t_bits);
+       rdata[3].buffer = newbuf;
+       rdata[3].buffer_std = true;
        rdata[3].next = NULL;
 
        /* If new tuple is the single and first tuple on page... */
index 868a91ab3a5efa16c1294d7389693f7c31b26916..9b9fa44e6ed15aabc07165ec8b22e0096812c5ec 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.120 2005/03/21 01:23:59 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.121 2005/06/06 20:22:57 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -564,9 +564,9 @@ _bt_insertonpg(Relation rel,
                        xlrec.target.node = rel->rd_node;
                        ItemPointerSet(&(xlrec.target.tid), itup_blkno, itup_off);
 
-                       rdata[0].buffer = InvalidBuffer;
                        rdata[0].data = (char *) &xlrec;
                        rdata[0].len = SizeOfBtreeInsert;
+                       rdata[0].buffer = InvalidBuffer;
                        rdata[0].next = nextrdata = &(rdata[1]);
 
                        if (BufferIsValid(metabuf))
@@ -576,9 +576,9 @@ _bt_insertonpg(Relation rel,
                                xlmeta.fastroot = metad->btm_fastroot;
                                xlmeta.fastlevel = metad->btm_fastlevel;
 
-                               nextrdata->buffer = InvalidBuffer;
                                nextrdata->data = (char *) &xlmeta;
                                nextrdata->len = sizeof(xl_btree_metadata);
+                               nextrdata->buffer = InvalidBuffer;
                                nextrdata->next = nextrdata + 1;
                                nextrdata++;
                                xlinfo = XLOG_BTREE_INSERT_META;
@@ -603,6 +603,7 @@ _bt_insertonpg(Relation rel,
                                        (sizeof(BTItemData) - sizeof(IndexTupleData));
                        }
                        nextrdata->buffer = buf;
+                       nextrdata->buffer_std = true;
                        nextrdata->next = NULL;
 
                        recptr = XLogInsert(RM_BTREE_ID, xlinfo, rdata);
@@ -853,28 +854,29 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
                xlrec.leftlen = ((PageHeader) leftpage)->pd_special -
                        ((PageHeader) leftpage)->pd_upper;
 
-               rdata[0].buffer = InvalidBuffer;
                rdata[0].data = (char *) &xlrec;
                rdata[0].len = SizeOfBtreeSplit;
+               rdata[0].buffer = InvalidBuffer;
                rdata[0].next = &(rdata[1]);
 
-               rdata[1].buffer = InvalidBuffer;
                rdata[1].data = (char *) leftpage + ((PageHeader) leftpage)->pd_upper;
                rdata[1].len = xlrec.leftlen;
+               rdata[1].buffer = InvalidBuffer;
                rdata[1].next = &(rdata[2]);
 
-               rdata[2].buffer = InvalidBuffer;
                rdata[2].data = (char *) rightpage + ((PageHeader) rightpage)->pd_upper;
                rdata[2].len = ((PageHeader) rightpage)->pd_special -
                        ((PageHeader) rightpage)->pd_upper;
+               rdata[2].buffer = InvalidBuffer;
                rdata[2].next = NULL;
 
                if (!P_RIGHTMOST(ropaque))
                {
                        rdata[2].next = &(rdata[3]);
-                       rdata[3].buffer = sbuf;
                        rdata[3].data = NULL;
                        rdata[3].len = 0;
+                       rdata[3].buffer = sbuf;
+                       rdata[3].buffer_std = true;
                        rdata[3].next = NULL;
                }
 
@@ -1464,19 +1466,19 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
                xlrec.rootblk = rootblknum;
                xlrec.level = metad->btm_level;
 
-               rdata[0].buffer = InvalidBuffer;
                rdata[0].data = (char *) &xlrec;
                rdata[0].len = SizeOfBtreeNewroot;
+               rdata[0].buffer = InvalidBuffer;
                rdata[0].next = &(rdata[1]);
 
                /*
                 * Direct access to page is not good but faster - we should
                 * implement some new func in page API.
                 */
-               rdata[1].buffer = InvalidBuffer;
                rdata[1].data = (char *) rootpage + ((PageHeader) rootpage)->pd_upper;
                rdata[1].len = ((PageHeader) rootpage)->pd_special -
                        ((PageHeader) rootpage)->pd_upper;
+               rdata[1].buffer = InvalidBuffer;
                rdata[1].next = NULL;
 
                recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWROOT, rdata);
index b9d42bad6d2aba1b97a0e9ae901277d4089568bc..f3ce5bd64a9c59373b6a01cb0ac740c8f1ae8245 100644 (file)
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.85 2005/06/02 05:55:28 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.86 2005/06/06 20:22:57 tgl Exp $
  *
  *     NOTES
  *        Postgres btree pages look like ordinary relation pages.      The opaque
@@ -74,9 +74,9 @@ _bt_metapinit(Relation rel)
                xlrec.meta.fastroot = metad->btm_fastroot;
                xlrec.meta.fastlevel = metad->btm_fastlevel;
 
-               rdata[0].buffer = InvalidBuffer;
                rdata[0].data = (char *) &xlrec;
                rdata[0].len = SizeOfBtreeNewmeta;
+               rdata[0].buffer = InvalidBuffer;
                rdata[0].next = NULL;
 
                recptr = XLogInsert(RM_BTREE_ID,
@@ -248,9 +248,9 @@ _bt_getroot(Relation rel, int access)
                        xlrec.rootblk = rootblkno;
                        xlrec.level = 0;
 
-                       rdata.buffer = InvalidBuffer;
                        rdata.data = (char *) &xlrec;
                        rdata.len = SizeOfBtreeNewroot;
+                       rdata.buffer = InvalidBuffer;
                        rdata.next = NULL;
 
                        recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWROOT, &rdata);
@@ -666,9 +666,9 @@ _bt_delitems(Relation rel, Buffer buf,
                xlrec.node = rel->rd_node;
                xlrec.block = BufferGetBlockNumber(buf);
 
-               rdata[0].buffer = InvalidBuffer;
                rdata[0].data = (char *) &xlrec;
                rdata[0].len = SizeOfBtreeDelete;
+               rdata[0].buffer = InvalidBuffer;
                rdata[0].next = &(rdata[1]);
 
                /*
@@ -676,7 +676,6 @@ _bt_delitems(Relation rel, Buffer buf,
                 * it is.  When XLogInsert stores the whole buffer, the offsets
                 * array need not be stored too.
                 */
-               rdata[1].buffer = buf;
                if (nitems > 0)
                {
                        rdata[1].data = (char *) itemnos;
@@ -687,6 +686,8 @@ _bt_delitems(Relation rel, Buffer buf,
                        rdata[1].data = NULL;
                        rdata[1].len = 0;
                }
+               rdata[1].buffer = buf;
+               rdata[1].buffer_std = true;
                rdata[1].next = NULL;
 
                recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_DELETE, rdata);
@@ -1038,9 +1039,9 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full)
                xlrec.leftblk = leftsib;
                xlrec.rightblk = rightsib;
 
-               rdata[0].buffer = InvalidBuffer;
                rdata[0].data = (char *) &xlrec;
                rdata[0].len = SizeOfBtreeDeletePage;
+               rdata[0].buffer = InvalidBuffer;
                rdata[0].next = nextrdata = &(rdata[1]);
 
                if (BufferIsValid(metabuf))
@@ -1050,9 +1051,9 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full)
                        xlmeta.fastroot = metad->btm_fastroot;
                        xlmeta.fastlevel = metad->btm_fastlevel;
 
-                       nextrdata->buffer = InvalidBuffer;
                        nextrdata->data = (char *) &xlmeta;
                        nextrdata->len = sizeof(xl_btree_metadata);
+                       nextrdata->buffer = InvalidBuffer;
                        nextrdata->next = nextrdata + 1;
                        nextrdata++;
                        xlinfo = XLOG_BTREE_DELETE_PAGE_META;
@@ -1060,24 +1061,27 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full)
                else
                        xlinfo = XLOG_BTREE_DELETE_PAGE;
 
-               nextrdata->buffer = pbuf;
                nextrdata->data = NULL;
                nextrdata->len = 0;
                nextrdata->next = nextrdata + 1;
+               nextrdata->buffer = pbuf;
+               nextrdata->buffer_std = true;
                nextrdata++;
 
-               nextrdata->buffer = rbuf;
                nextrdata->data = NULL;
                nextrdata->len = 0;
+               nextrdata->buffer = rbuf;
+               nextrdata->buffer_std = true;
                nextrdata->next = NULL;
 
                if (BufferIsValid(lbuf))
                {
                        nextrdata->next = nextrdata + 1;
                        nextrdata++;
-                       nextrdata->buffer = lbuf;
                        nextrdata->data = NULL;
                        nextrdata->len = 0;
+                       nextrdata->buffer = lbuf;
+                       nextrdata->buffer_std = true;
                        nextrdata->next = NULL;
                }
 
index 011040151183263dac2eb9527fed95ea1169428e..115708b78094a94a759656e922296708e26f195a 100644 (file)
@@ -56,7 +56,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.90 2004/12/31 21:59:22 pgsql Exp $
+ *       $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.91 2005/06/06 20:22:57 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -287,14 +287,14 @@ _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno)
                xlrec.node = wstate->index->rd_node;
                xlrec.blkno = blkno;
 
-               rdata[0].buffer = InvalidBuffer;
                rdata[0].data = (char *) &xlrec;
                rdata[0].len = SizeOfHeapNewpage;
+               rdata[0].buffer = InvalidBuffer;
                rdata[0].next = &(rdata[1]);
 
-               rdata[1].buffer = InvalidBuffer;
                rdata[1].data = (char *) page;
                rdata[1].len = BLCKSZ;
+               rdata[1].buffer = InvalidBuffer;
                rdata[1].next = NULL;
 
                recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_NEWPAGE, rdata);
index 73362330c104447e79588c33cca6c62d53805860..2da835dbbe5adb178710515d576fb7d11d5203a8 100644 (file)
@@ -24,7 +24,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/backend/access/transam/clog.c,v 1.29 2005/06/06 17:01:22 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/transam/clog.c,v 1.30 2005/06/06 20:22:57 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -379,9 +379,9 @@ WriteZeroPageXlogRec(int pageno)
 {
        XLogRecData rdata;
 
-       rdata.buffer = InvalidBuffer;
        rdata.data = (char *) (&pageno);
        rdata.len = sizeof(int);
+       rdata.buffer = InvalidBuffer;
        rdata.next = NULL;
        (void) XLogInsert(RM_CLOG_ID, CLOG_ZEROPAGE | XLOG_NO_TRAN, &rdata);
 }
index 3eb58a96a8d5e73f86efdd360391e49190f5cbf5..040a4ab0b79068faa24e961237e30a436560f5b0 100644 (file)
@@ -10,7 +10,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.203 2005/06/06 17:01:22 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.204 2005/06/06 20:22:57 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -719,25 +719,25 @@ RecordTransactionCommit(void)
                        xlrec.xtime = time(NULL);
                        xlrec.nrels = nrels;
                        xlrec.nsubxacts = nchildren;
-                       rdata[0].buffer = InvalidBuffer;
                        rdata[0].data = (char *) (&xlrec);
                        rdata[0].len = MinSizeOfXactCommit;
+                       rdata[0].buffer = InvalidBuffer;
                        /* dump rels to delete */
                        if (nrels > 0)
                        {
                                rdata[0].next = &(rdata[1]);
-                               rdata[1].buffer = InvalidBuffer;
                                rdata[1].data = (char *) rptr;
                                rdata[1].len = nrels * sizeof(RelFileNode);
+                               rdata[1].buffer = InvalidBuffer;
                                lastrdata = 1;
                        }
                        /* dump committed child Xids */
                        if (nchildren > 0)
                        {
                                rdata[lastrdata].next = &(rdata[2]);
-                               rdata[2].buffer = InvalidBuffer;
                                rdata[2].data = (char *) children;
                                rdata[2].len = nchildren * sizeof(TransactionId);
+                               rdata[2].buffer = InvalidBuffer;
                                lastrdata = 2;
                        }
                        rdata[lastrdata].next = NULL;
@@ -1019,25 +1019,25 @@ RecordTransactionAbort(void)
                        xlrec.xtime = time(NULL);
                        xlrec.nrels = nrels;
                        xlrec.nsubxacts = nchildren;
-                       rdata[0].buffer = InvalidBuffer;
                        rdata[0].data = (char *) (&xlrec);
                        rdata[0].len = MinSizeOfXactAbort;
+                       rdata[0].buffer = InvalidBuffer;
                        /* dump rels to delete */
                        if (nrels > 0)
                        {
                                rdata[0].next = &(rdata[1]);
-                               rdata[1].buffer = InvalidBuffer;
                                rdata[1].data = (char *) rptr;
                                rdata[1].len = nrels * sizeof(RelFileNode);
+                               rdata[1].buffer = InvalidBuffer;
                                lastrdata = 1;
                        }
                        /* dump committed child Xids */
                        if (nchildren > 0)
                        {
                                rdata[lastrdata].next = &(rdata[2]);
-                               rdata[2].buffer = InvalidBuffer;
                                rdata[2].data = (char *) children;
                                rdata[2].len = nchildren * sizeof(TransactionId);
+                               rdata[2].buffer = InvalidBuffer;
                                lastrdata = 2;
                        }
                        rdata[lastrdata].next = NULL;
@@ -1205,25 +1205,25 @@ RecordSubTransactionAbort(void)
                        xlrec.xtime = time(NULL);
                        xlrec.nrels = nrels;
                        xlrec.nsubxacts = nchildren;
-                       rdata[0].buffer = InvalidBuffer;
                        rdata[0].data = (char *) (&xlrec);
                        rdata[0].len = MinSizeOfXactAbort;
+                       rdata[0].buffer = InvalidBuffer;
                        /* dump rels to delete */
                        if (nrels > 0)
                        {
                                rdata[0].next = &(rdata[1]);
-                               rdata[1].buffer = InvalidBuffer;
                                rdata[1].data = (char *) rptr;
                                rdata[1].len = nrels * sizeof(RelFileNode);
+                               rdata[1].buffer = InvalidBuffer;
                                lastrdata = 1;
                        }
                        /* dump committed child Xids */
                        if (nchildren > 0)
                        {
                                rdata[lastrdata].next = &(rdata[2]);
-                               rdata[2].buffer = InvalidBuffer;
                                rdata[2].data = (char *) children;
                                rdata[2].len = nchildren * sizeof(TransactionId);
+                               rdata[2].buffer = InvalidBuffer;
                                lastrdata = 2;
                        }
                        rdata[lastrdata].next = NULL;
index c1609356d39c3c66b8765d396de16471e5de120a..aa37244162ad110eb236eb4a6f11b9d7a14d8d61 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.196 2005/06/06 17:01:23 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.197 2005/06/06 20:22:57 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -434,7 +434,8 @@ static void exitArchiveRecovery(TimeLineID endTLI,
                                        uint32 endLogId, uint32 endLogSeg);
 static bool recoveryStopsHere(XLogRecord *record, bool *includeThis);
 
-static void SetBkpBlock(BkpBlock *bkpb, Buffer buffer);
+static bool XLogCheckBuffer(XLogRecData *rdata,
+                                                       XLogRecPtr *lsn, BkpBlock *bkpb);
 static bool AdvanceXLInsertBuffer(void);
 static void XLogWrite(XLogwrtRqst WriteRqst);
 static int XLogFileInit(uint32 log, uint32 seg,
@@ -473,7 +474,7 @@ static void remove_backup_label(void);
 /*
  * Insert an XLOG record having the specified RMID and info bytes,
  * with the body of the record being the data chunk(s) described by
- * the rdata list (see xlog.h for notes about rdata).
+ * the rdata chain (see xlog.h for notes about rdata).
  *
  * Returns XLOG pointer to end of record (beginning of next record).
  * This can be used as LSN for data pages affected by the logged action.
@@ -532,7 +533,7 @@ XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata)
        }
 
        /*
-        * Here we scan the rdata list, determine which buffers must be backed
+        * Here we scan the rdata chain, determine which buffers must be backed
         * up, and compute the CRC values for the data.  Note that the record
         * header isn't added into the CRC initially since we don't know the
         * final length or info bits quite yet.  Thus, the CRC will represent
@@ -543,13 +544,13 @@ XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata)
         * below. We could prevent the race by doing all this work while
         * holding the insert lock, but it seems better to avoid doing CRC
         * calculations while holding the lock.  This means we have to be
-        * careful about modifying the rdata list until we know we aren't
+        * careful about modifying the rdata chain until we know we aren't
         * going to loop back again.  The only change we allow ourselves to
-        * make earlier is to set rdt->data = NULL in list items we have
+        * make earlier is to set rdt->data = NULL in chain items we have
         * decided we will have to back up the whole buffer for.  This is OK
         * because we will certainly decide the same thing again for those
         * items if we do it over; doing it here saves an extra pass over the
-        * list later.
+        * chain later.
         */
 begin:;
        for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++)
@@ -575,7 +576,7 @@ begin:;
                        {
                                if (rdt->buffer == dtbuf[i])
                                {
-                                       /* Buffer already referenced by earlier list item */
+                                       /* Buffer already referenced by earlier chain item */
                                        if (dtbuf_bkp[i])
                                                rdt->data = NULL;
                                        else if (rdt->data)
@@ -589,15 +590,9 @@ begin:;
                                {
                                        /* OK, put it in this slot */
                                        dtbuf[i] = rdt->buffer;
-
-                                       /*
-                                        * XXX We assume page LSN is first data on page
-                                        */
-                                       dtbuf_lsn[i] = *((XLogRecPtr *) BufferGetBlock(rdt->buffer));
-                                       if (XLByteLE(dtbuf_lsn[i], RedoRecPtr))
+                                       if (XLogCheckBuffer(rdt, &(dtbuf_lsn[i]), &(dtbuf_xlg[i])))
                                        {
                                                dtbuf_bkp[i] = true;
-                                               SetBkpBlock(&(dtbuf_xlg[i]), rdt->buffer);
                                                rdt->data = NULL;
                                        }
                                        else if (rdt->data)
@@ -612,7 +607,7 @@ begin:;
                                elog(PANIC, "can backup at most %d blocks per xlog record",
                                         XLR_MAX_BKP_BLOCKS);
                }
-               /* Break out of loop when rdt points to last list item */
+               /* Break out of loop when rdt points to last chain item */
                if (rdt->next == NULL)
                        break;
                rdt = rdt->next;
@@ -726,15 +721,15 @@ begin:;
        }
 
        /*
-        * Make additional rdata list entries for the backup blocks, so that
+        * Make additional rdata chain entries for the backup blocks, so that
         * we don't need to special-case them in the write loop.  Note that we
-        * have now irrevocably changed the input rdata list.  At the exit of
+        * have now irrevocably changed the input rdata chain.  At the exit of
         * this loop, write_len includes the backup block data.
         *
         * Also set the appropriate info bits to show which buffers were backed
         * up.  The i'th XLR_SET_BKP_BLOCK bit corresponds to the i'th
         * distinct buffer value (ignoring InvalidBuffer) appearing in the
-        * rdata list.
+        * rdata chain.
         */
        write_len = len;
        for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++)
@@ -742,7 +737,7 @@ begin:;
                BkpBlock   *bkpb;
                char       *page;
 
-               if (dtbuf[i] == InvalidBuffer || !(dtbuf_bkp[i]))
+               if (!dtbuf_bkp[i])
                        continue;
 
                info |= XLR_SET_BKP_BLOCK(i);
@@ -938,43 +933,64 @@ begin:;
 }
 
 /*
- * Fill a BkpBlock struct given a buffer containing the page to be saved
- *
- * This is nontrivial only because it has to decide whether to apply "hole
- * compression".
+ * Determine whether the buffer referenced by an XLogRecData item has to
+ * be backed up, and if so fill a BkpBlock struct for it.  In any case
+ * save the buffer's LSN at *lsn.
  */
-static void
-SetBkpBlock(BkpBlock *bkpb, Buffer buffer)
+static bool
+XLogCheckBuffer(XLogRecData *rdata,
+                               XLogRecPtr *lsn, BkpBlock *bkpb)
 {
        PageHeader      page;
-       uint16          offset;
-       uint16          length;
 
-       /* Save page identity info */
-       bkpb->node = BufferGetFileNode(buffer);
-       bkpb->block = BufferGetBlockNumber(buffer);
+       page = (PageHeader) BufferGetBlock(rdata->buffer);
+
+       /*
+        * XXX We assume page LSN is first data on *every* page that can be
+        * passed to XLogInsert, whether it otherwise has the standard page
+        * layout or not.
+        */
+       *lsn = page->pd_lsn;
 
-       /* Test whether there is a "hole" containing zeroes in the page */
-       page = (PageHeader) BufferGetBlock(buffer);
-       offset = page->pd_lower;
-       /* Check if pd_lower appears sane at all */
-       if (offset >= SizeOfPageHeaderData && offset < BLCKSZ)
+       if (XLByteLE(page->pd_lsn, RedoRecPtr))
        {
-               char   *spd = (char *) page + offset;
-               char   *epd = (char *) page + BLCKSZ;
-               char   *pd = spd;
+               /*
+                * The page needs to be backed up, so set up *bkpb
+                */
+               bkpb->node = BufferGetFileNode(rdata->buffer);
+               bkpb->block = BufferGetBlockNumber(rdata->buffer);
+
+               if (rdata->buffer_std)
+               {
+                       /* Assume we can omit data between pd_lower and pd_upper */
+                       uint16          lower = page->pd_lower;
+                       uint16          upper = page->pd_upper;
 
-               while (pd < epd && *pd == '\0')
-                       pd++;
+                       if (lower >= SizeOfPageHeaderData &&
+                               upper > lower &&
+                               upper <= BLCKSZ)
+                       {
+                               bkpb->hole_offset = lower;
+                               bkpb->hole_length = upper - lower;
+                       }
+                       else
+                       {
+                               /* No "hole" to compress out */
+                               bkpb->hole_offset = 0;
+                               bkpb->hole_length = 0;
+                       }
+               }
+               else
+               {
+                       /* Not a standard page header, don't try to eliminate "hole" */
+                       bkpb->hole_offset = 0;
+                       bkpb->hole_length = 0;
+               }
 
-               length = pd - spd;
-               if (length == 0)
-                       offset = 0;
+               return true;                    /* buffer requires backup */
        }
-       else
-               offset = length = 0;
-       bkpb->hole_offset = offset;
-       bkpb->hole_length = length;
+
+       return false;                           /* buffer does not need to be backed up */
 }
 
 /*
@@ -5093,9 +5109,9 @@ CreateCheckPoint(bool shutdown, bool force)
        /*
         * Now insert the checkpoint record into XLOG.
         */
-       rdata.buffer = InvalidBuffer;
        rdata.data = (char *) (&checkPoint);
        rdata.len = sizeof(checkPoint);
+       rdata.buffer = InvalidBuffer;
        rdata.next = NULL;
 
        recptr = XLogInsert(RM_XLOG_ID,
@@ -5197,9 +5213,9 @@ XLogPutNextOid(Oid nextOid)
 {
        XLogRecData rdata;
 
-       rdata.buffer = InvalidBuffer;
        rdata.data = (char *) (&nextOid);
        rdata.len = sizeof(Oid);
+       rdata.buffer = InvalidBuffer;
        rdata.next = NULL;
        (void) XLogInsert(RM_XLOG_ID, XLOG_NEXTOID, &rdata);
        /*
@@ -5220,9 +5236,9 @@ XLogPutNextMultiXactId(MultiXactId nextMulti)
 {
        XLogRecData rdata;
 
-       rdata.buffer = InvalidBuffer;
        rdata.data = (char *) (&nextMulti);
        rdata.len = sizeof(MultiXactId);
+       rdata.buffer = InvalidBuffer;
        rdata.next = NULL;
        (void) XLogInsert(RM_XLOG_ID, XLOG_NEXTMULTI, &rdata);
        /*
index 370977c6586716cdd36719d97e4840547ec63a1d..40cfb3e6586dea0723b5cd2b4910e5d752e14d55 100644 (file)
@@ -15,7 +15,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/commands/dbcommands.c,v 1.158 2005/06/06 17:01:23 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/commands/dbcommands.c,v 1.159 2005/06/06 20:22:57 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -444,9 +444,9 @@ createdb(const CreatedbStmt *stmt)
                        xlrec.src_db_id = src_dboid;
                        xlrec.src_tablespace_id = srctablespace;
 
-                       rdata[0].buffer = InvalidBuffer;
                        rdata[0].data = (char *) &xlrec;
                        rdata[0].len = sizeof(xl_dbase_create_rec);
+                       rdata[0].buffer = InvalidBuffer;
                        rdata[0].next = NULL;
 
                        (void) XLogInsert(RM_DBASE_ID, XLOG_DBASE_CREATE, rdata);
@@ -1074,9 +1074,9 @@ remove_dbtablespaces(Oid db_id)
                        xlrec.db_id = db_id;
                        xlrec.tablespace_id = dsttablespace;
 
-                       rdata[0].buffer = InvalidBuffer;
                        rdata[0].data = (char *) &xlrec;
                        rdata[0].len = sizeof(xl_dbase_drop_rec);
+                       rdata[0].buffer = InvalidBuffer;
                        rdata[0].next = NULL;
 
                        (void) XLogInsert(RM_DBASE_ID, XLOG_DBASE_DROP, rdata);
index 3278be1239f74cbabbb67ccf8147d9935a66bae2..78b9225b83cb7dfc6ea81f96752806046ae3eee7 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/commands/sequence.c,v 1.121 2005/06/06 17:01:23 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/commands/sequence.c,v 1.122 2005/06/06 20:22:57 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -264,14 +264,14 @@ DefineSequence(CreateSeqStmt *seq)
                newseq->log_cnt = 0;
 
                xlrec.node = rel->rd_node;
-               rdata[0].buffer = InvalidBuffer;
                rdata[0].data = (char *) &xlrec;
                rdata[0].len = sizeof(xl_seq_rec);
+               rdata[0].buffer = InvalidBuffer;
                rdata[0].next = &(rdata[1]);
 
-               rdata[1].buffer = InvalidBuffer;
                rdata[1].data = (char *) tuple->t_data;
                rdata[1].len = tuple->t_len;
+               rdata[1].buffer = InvalidBuffer;
                rdata[1].next = NULL;
 
                recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG | XLOG_NO_TRAN, rdata);
@@ -338,15 +338,15 @@ AlterSequence(AlterSeqStmt *stmt)
                XLogRecData rdata[2];
 
                xlrec.node = seqrel->rd_node;
-               rdata[0].buffer = InvalidBuffer;
                rdata[0].data = (char *) &xlrec;
                rdata[0].len = sizeof(xl_seq_rec);
+               rdata[0].buffer = InvalidBuffer;
                rdata[0].next = &(rdata[1]);
 
-               rdata[1].buffer = InvalidBuffer;
                rdata[1].data = (char *) page + ((PageHeader) page)->pd_upper;
                rdata[1].len = ((PageHeader) page)->pd_special -
                        ((PageHeader) page)->pd_upper;
+               rdata[1].buffer = InvalidBuffer;
                rdata[1].next = NULL;
 
                recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG | XLOG_NO_TRAN, rdata);
@@ -531,9 +531,9 @@ nextval(PG_FUNCTION_ARGS)
                XLogRecData rdata[2];
 
                xlrec.node = seqrel->rd_node;
-               rdata[0].buffer = InvalidBuffer;
                rdata[0].data = (char *) &xlrec;
                rdata[0].len = sizeof(xl_seq_rec);
+               rdata[0].buffer = InvalidBuffer;
                rdata[0].next = &(rdata[1]);
 
                /* set values that will be saved in xlog */
@@ -541,10 +541,10 @@ nextval(PG_FUNCTION_ARGS)
                seq->is_called = true;
                seq->log_cnt = 0;
 
-               rdata[1].buffer = InvalidBuffer;
                rdata[1].data = (char *) page + ((PageHeader) page)->pd_upper;
                rdata[1].len = ((PageHeader) page)->pd_special -
                        ((PageHeader) page)->pd_upper;
+               rdata[1].buffer = InvalidBuffer;
                rdata[1].next = NULL;
 
                recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG | XLOG_NO_TRAN, rdata);
@@ -666,9 +666,9 @@ do_setval(RangeVar *sequence, int64 next, bool iscalled)
                Page            page = BufferGetPage(buf);
 
                xlrec.node = seqrel->rd_node;
-               rdata[0].buffer = InvalidBuffer;
                rdata[0].data = (char *) &xlrec;
                rdata[0].len = sizeof(xl_seq_rec);
+               rdata[0].buffer = InvalidBuffer;
                rdata[0].next = &(rdata[1]);
 
                /* set values that will be saved in xlog */
@@ -676,10 +676,10 @@ do_setval(RangeVar *sequence, int64 next, bool iscalled)
                seq->is_called = true;
                seq->log_cnt = 0;
 
-               rdata[1].buffer = InvalidBuffer;
                rdata[1].data = (char *) page + ((PageHeader) page)->pd_upper;
                rdata[1].len = ((PageHeader) page)->pd_special -
                        ((PageHeader) page)->pd_upper;
+               rdata[1].buffer = InvalidBuffer;
                rdata[1].next = NULL;
 
                recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG | XLOG_NO_TRAN, rdata);
index c7669dfc58fe6afe43dc771bd14d2ba176222ca7..9981129c0ebf579b7d3441c77aab25e39b30ca4a 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.160 2005/06/05 00:38:08 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.161 2005/06/06 20:22:57 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -5667,14 +5667,14 @@ copy_relation_data(Relation rel, SMgrRelation dst)
                        xlrec.node = dst->smgr_rnode;
                        xlrec.blkno = blkno;
 
-                       rdata[0].buffer = InvalidBuffer;
                        rdata[0].data = (char *) &xlrec;
                        rdata[0].len = SizeOfHeapNewpage;
+                       rdata[0].buffer = InvalidBuffer;
                        rdata[0].next = &(rdata[1]);
 
-                       rdata[1].buffer = InvalidBuffer;
                        rdata[1].data = (char *) page;
                        rdata[1].len = BLCKSZ;
+                       rdata[1].buffer = InvalidBuffer;
                        rdata[1].next = NULL;
 
                        recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_NEWPAGE, rdata);
index f9041a0a6439d41fed872552e3fc6dbf93aedede..fac20708c0d36149c36a56f4f0aeaf0b40869ddf 100644 (file)
@@ -37,7 +37,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/commands/tablespace.c,v 1.20 2005/06/06 17:01:23 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/commands/tablespace.c,v 1.21 2005/06/06 20:22:57 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -356,14 +356,14 @@ CreateTableSpace(CreateTableSpaceStmt *stmt)
                XLogRecData rdata[2];
 
                xlrec.ts_id = tablespaceoid;
-               rdata[0].buffer = InvalidBuffer;
                rdata[0].data = (char *) &xlrec;
                rdata[0].len = offsetof(xl_tblspc_create_rec, ts_path);
+               rdata[0].buffer = InvalidBuffer;
                rdata[0].next = &(rdata[1]);
 
-               rdata[1].buffer = InvalidBuffer;
                rdata[1].data = (char *) location;
                rdata[1].len = strlen(location) + 1;
+               rdata[1].buffer = InvalidBuffer;
                rdata[1].next = NULL;
 
                (void) XLogInsert(RM_TBLSPC_ID, XLOG_TBLSPC_CREATE, rdata);
@@ -461,9 +461,9 @@ DropTableSpace(DropTableSpaceStmt *stmt)
                XLogRecData rdata[1];
 
                xlrec.ts_id = tablespaceoid;
-               rdata[0].buffer = InvalidBuffer;
                rdata[0].data = (char *) &xlrec;
                rdata[0].len = sizeof(xl_tblspc_drop_rec);
+               rdata[0].buffer = InvalidBuffer;
                rdata[0].next = NULL;
 
                (void) XLogInsert(RM_TBLSPC_ID, XLOG_TBLSPC_DROP, rdata);
index 8f8ba9e0d2b364f3059fc2f7cc21070fdebdfa53..5b3f7bee95f108149e068e1e6a087eaf7226005a 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/storage/page/bufpage.c,v 1.64 2005/06/02 05:55:28 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/storage/page/bufpage.c,v 1.65 2005/06/06 20:22:58 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -357,7 +357,7 @@ PageRepairFragmentation(Page page, OffsetNumber *unused)
                        lp = PageGetItemId(page, i + 1);
                        lp->lp_len = 0;         /* indicate unused & deallocated */
                }
-               ((PageHeader) page)->pd_upper = pd_upper = pd_special;
+               ((PageHeader) page)->pd_upper = pd_special;
        }
        else
        {                                                       /* nused != 0 */
@@ -411,17 +411,11 @@ PageRepairFragmentation(Page page, OffsetNumber *unused)
                        lp->lp_off = upper;
                }
 
-               ((PageHeader) page)->pd_upper = pd_upper = upper;
+               ((PageHeader) page)->pd_upper = upper;
 
                pfree(itemidbase);
        }
 
-       /*
-        * Zero out the now-free space.  This is not essential, but it allows
-        * xlog.c to compress WAL data better.
-        */
-       MemSet((char *) page + pd_lower, 0, pd_upper - pd_lower);
-
        return (nline - nused);
 }
 
@@ -531,13 +525,6 @@ PageIndexTupleDelete(Page page, OffsetNumber offnum)
        phdr->pd_upper += size;
        phdr->pd_lower -= sizeof(ItemIdData);
 
-       /*
-        * Zero out the just-freed space.  This is not essential, but it allows
-        * xlog.c to compress WAL data better.
-        */
-       MemSet((char *) page + phdr->pd_lower, 0, sizeof(ItemIdData));
-       MemSet(addr, 0, size);
-
        /*
         * Finally, we need to adjust the linp entries that remain.
         *
@@ -685,14 +672,8 @@ PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
                lp->lp_off = upper;
        }
 
-       phdr->pd_lower = pd_lower = SizeOfPageHeaderData + nused * sizeof(ItemIdData);
-       phdr->pd_upper = pd_upper = upper;
-
-       /*
-        * Zero out the now-free space.  This is not essential, but it allows
-        * xlog.c to compress WAL data better.
-        */
-       MemSet((char *) page + pd_lower, 0, pd_upper - pd_lower);
+       phdr->pd_lower = SizeOfPageHeaderData + nused * sizeof(ItemIdData);
+       phdr->pd_upper = upper;
 
        pfree(itemidbase);
 }
index ca171a3b1a2702995a37e9603e210f87c44c9040..13ad72a3755da61645e118f4a8d7b6d4fdc4f96f 100644 (file)
@@ -11,7 +11,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.88 2005/06/06 17:01:24 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.89 2005/06/06 20:22:58 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -370,9 +370,9 @@ smgrcreate(SMgrRelation reln, bool isTemp, bool isRedo)
         */
        xlrec.rnode = reln->smgr_rnode;
 
-       rdata.buffer = InvalidBuffer;
        rdata.data = (char *) &xlrec;
        rdata.len = sizeof(xlrec);
+       rdata.buffer = InvalidBuffer;
        rdata.next = NULL;
 
        lsn = XLogInsert(RM_SMGR_ID, XLOG_SMGR_CREATE | XLOG_NO_TRAN, &rdata);
@@ -635,9 +635,9 @@ smgrtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
                xlrec.blkno = newblks;
                xlrec.rnode = reln->smgr_rnode;
 
-               rdata.buffer = InvalidBuffer;
                rdata.data = (char *) &xlrec;
                rdata.len = sizeof(xlrec);
+               rdata.buffer = InvalidBuffer;
                rdata.next = NULL;
 
                lsn = XLogInsert(RM_SMGR_ID, XLOG_SMGR_TRUNCATE | XLOG_NO_TRAN,
index 554c3c3aec7434362b377a032db564e164f9bca8..7547d7f5b9f8c1b9f34c1a8bdeff3cbc88e6575e 100644 (file)
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.63 2005/06/06 17:01:24 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.64 2005/06/06 20:22:58 tgl Exp $
  */
 #ifndef XLOG_H
 #define XLOG_H
@@ -91,24 +91,42 @@ typedef struct XLogRecord
 extern int     sync_method;
 
 /*
- * List of these structs is used to pass data to XLogInsert().
+ * The rmgr data to be written by XLogInsert() is defined by a chain of
+ * one or more XLogRecData structs.  (Multiple structs would be used when
+ * parts of the source data aren't physically adjacent in memory, or when
+ * multiple associated buffers need to be specified.)
  *
  * If buffer is valid then XLOG will check if buffer must be backed up
  * (ie, whether this is first change of that page since last checkpoint).
  * If so, the whole page contents are attached to the XLOG record, and XLOG
  * sets XLR_BKP_BLOCK_X bit in xl_info.  Note that the buffer must be pinned
- * and locked while this is going on, so that it won't change under us.
- * NB: when this happens, we do not bother to insert the associated data into
- * the XLOG record, since we assume it's present in the buffer.  Therefore,
- * rmgr redo routines MUST pay attention to XLR_BKP_BLOCK_X to know what
- * is actually stored in the XLOG record.
+ * and exclusive-locked by the caller, so that it won't change under us.
+ * NB: when the buffer is backed up, we DO NOT insert the data pointed to by
+ * this XLogRecData struct into the XLOG record, since we assume it's present
+ * in the buffer.  Therefore, rmgr redo routines MUST pay attention to
+ * XLR_BKP_BLOCK_X to know what is actually stored in the XLOG record.
+ * The i'th XLR_BKP_BLOCK bit corresponds to the i'th distinct buffer
+ * value (ignoring InvalidBuffer) appearing in the rdata chain.
+ *
+ * When buffer is valid, caller must set buffer_std to indicate whether the
+ * page uses standard pd_lower/pd_upper header fields.  If this is true, then
+ * XLOG is allowed to omit the free space between pd_lower and pd_upper from
+ * the backed-up page image.  Note that even when buffer_std is false, the
+ * page MUST have an LSN field as its first eight bytes!
+ *
+ * Note: data can be NULL to indicate no rmgr data associated with this chain
+ * entry.  This can be sensible (ie, not a wasted entry) if buffer is valid.
+ * The implication is that the buffer has been changed by the operation being
+ * logged, and so may need to be backed up, but the change can be redone using
+ * only information already present elsewhere in the XLOG entry.
  */
 typedef struct XLogRecData
 {
-       Buffer          buffer;                 /* buffer associated with this data */
-       char       *data;
-       uint32          len;
-       struct XLogRecData *next;
+       char       *data;                       /* start of rmgr data to include */
+       uint32          len;                    /* length of rmgr data to include */
+       Buffer          buffer;                 /* buffer associated with data, if any */
+       bool            buffer_std;             /* buffer has standard pd_lower/pd_upper */
+       struct XLogRecData *next;       /* next struct in chain, or NULL */
 } XLogRecData;
 
 extern TimeLineID ThisTimeLineID;              /* current TLI */