From: Tom Lane Date: Mon, 6 Jun 2005 20:22:58 +0000 (+0000) Subject: Modify XLogInsert API to make callers specify whether pages to be backed X-Git-Tag: REL8_1_0BETA1~630 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=ee7ac7b11e9e6623871833729958662a4598fbc5;p=postgresql Modify XLogInsert API to make callers specify whether pages to be backed up have the standard layout with unused space between pd_lower and pd_upper. When this is set, XLogInsert will omit the unused space without bothering to scan it to see if it's zero. That saves time in XLogInsert, and also allows reversion of my earlier patch to make PageRepairFragmentation et al explicitly re-zero freed space. Per suggestion by Heikki Linnakangas. --- diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index fa9f1a2076..433a4b4538 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.192 2005/06/06 17:01:22 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.193 2005/06/06 20:22:56 tgl Exp $ * * * INTERFACE ROUTINES @@ -1107,9 +1107,9 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid) xlrec.target.node = relation->rd_node; xlrec.target.tid = tup->t_self; - rdata[0].buffer = InvalidBuffer; rdata[0].data = (char *) &xlrec; rdata[0].len = SizeOfHeapInsert; + rdata[0].buffer = InvalidBuffer; rdata[0].next = &(rdata[1]); xlhdr.t_natts = tup->t_data->t_natts; @@ -1121,15 +1121,17 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid) * decides to write the whole page to the xlog, we don't need to * store xl_heap_header in the xlog. */ - rdata[1].buffer = buffer; rdata[1].data = (char *) &xlhdr; rdata[1].len = SizeOfHeapHeader; + rdata[1].buffer = buffer; + rdata[1].buffer_std = true; rdata[1].next = &(rdata[2]); - rdata[2].buffer = buffer; /* PG73FORMAT: write bitmap [+ padding] [+ oid] + data */ rdata[2].data = (char *) tup->t_data + offsetof(HeapTupleHeaderData, t_bits); rdata[2].len = tup->t_len - offsetof(HeapTupleHeaderData, t_bits); + rdata[2].buffer = buffer; + rdata[2].buffer_std = true; rdata[2].next = NULL; /* @@ -1378,14 +1380,15 @@ l1: xlrec.target.node = relation->rd_node; xlrec.target.tid = tp.t_self; - rdata[0].buffer = InvalidBuffer; rdata[0].data = (char *) &xlrec; rdata[0].len = SizeOfHeapDelete; + rdata[0].buffer = InvalidBuffer; rdata[0].next = &(rdata[1]); - rdata[1].buffer = buffer; rdata[1].data = NULL; rdata[1].len = 0; + rdata[1].buffer = buffer; + rdata[1].buffer_std = true; rdata[1].next = NULL; recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE, rdata); @@ -2226,14 +2229,15 @@ l3: xlrec.target.node = relation->rd_node; xlrec.target.tid = tuple->t_self; xlrec.shared_lock = (mode == LockTupleShared); - rdata[0].buffer = InvalidBuffer; rdata[0].data = (char *) &xlrec; rdata[0].len = SizeOfHeapLock; + rdata[0].buffer = InvalidBuffer; rdata[0].next = &(rdata[1]); - rdata[1].buffer = *buffer; rdata[1].data = NULL; rdata[1].len = 0; + rdata[1].buffer = *buffer; + rdata[1].buffer_std = true; rdata[1].next = NULL; recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_LOCK, rdata); @@ -2330,9 +2334,9 @@ log_heap_clean(Relation reln, Buffer buffer, OffsetNumber *unused, int uncnt) xlrec.node = reln->rd_node; xlrec.block = BufferGetBlockNumber(buffer); - rdata[0].buffer = InvalidBuffer; rdata[0].data = (char *) &xlrec; rdata[0].len = SizeOfHeapClean; + rdata[0].buffer = InvalidBuffer; rdata[0].next = &(rdata[1]); /* @@ -2340,7 +2344,6 @@ log_heap_clean(Relation reln, Buffer buffer, OffsetNumber *unused, int uncnt) * that it is. When XLogInsert stores the whole buffer, the offsets * array need not be stored too. */ - rdata[1].buffer = buffer; if (uncnt > 0) { rdata[1].data = (char *) unused; @@ -2351,6 +2354,8 @@ log_heap_clean(Relation reln, Buffer buffer, OffsetNumber *unused, int uncnt) rdata[1].data = NULL; rdata[1].len = 0; } + rdata[1].buffer = buffer; + rdata[1].buffer_std = true; rdata[1].next = NULL; recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_CLEAN, rdata); @@ -2388,14 +2393,15 @@ log_heap_update(Relation reln, Buffer oldbuf, ItemPointerData from, xlrec.target.node = reln->rd_node; xlrec.target.tid = from; xlrec.newtid = newtup->t_self; - rdata[0].buffer = InvalidBuffer; rdata[0].data = (char *) &xlrec; rdata[0].len = SizeOfHeapUpdate; + rdata[0].buffer = InvalidBuffer; rdata[0].next = &(rdata[1]); - rdata[1].buffer = oldbuf; rdata[1].data = NULL; rdata[1].len = 0; + rdata[1].buffer = oldbuf; + rdata[1].buffer_std = true; rdata[1].next = &(rdata[2]); xlhdr.hdr.t_natts = newtup->t_data->t_natts; @@ -2420,15 +2426,17 @@ log_heap_update(Relation reln, Buffer oldbuf, ItemPointerData from, * As with insert records, we need not store the rdata[2] segment if * we decide to store the whole buffer instead. */ - rdata[2].buffer = newbuf; rdata[2].data = (char *) &xlhdr; rdata[2].len = hsize; + rdata[2].buffer = newbuf; + rdata[2].buffer_std = true; rdata[2].next = &(rdata[3]); - rdata[3].buffer = newbuf; /* PG73FORMAT: write bitmap [+ padding] [+ oid] + data */ rdata[3].data = (char *) newtup->t_data + offsetof(HeapTupleHeaderData, t_bits); rdata[3].len = newtup->t_len - offsetof(HeapTupleHeaderData, t_bits); + rdata[3].buffer = newbuf; + rdata[3].buffer_std = true; rdata[3].next = NULL; /* If new tuple is the single and first tuple on page... */ diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c index 868a91ab3a..9b9fa44e6e 100644 --- a/src/backend/access/nbtree/nbtinsert.c +++ b/src/backend/access/nbtree/nbtinsert.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.120 2005/03/21 01:23:59 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.121 2005/06/06 20:22:57 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -564,9 +564,9 @@ _bt_insertonpg(Relation rel, xlrec.target.node = rel->rd_node; ItemPointerSet(&(xlrec.target.tid), itup_blkno, itup_off); - rdata[0].buffer = InvalidBuffer; rdata[0].data = (char *) &xlrec; rdata[0].len = SizeOfBtreeInsert; + rdata[0].buffer = InvalidBuffer; rdata[0].next = nextrdata = &(rdata[1]); if (BufferIsValid(metabuf)) @@ -576,9 +576,9 @@ _bt_insertonpg(Relation rel, xlmeta.fastroot = metad->btm_fastroot; xlmeta.fastlevel = metad->btm_fastlevel; - nextrdata->buffer = InvalidBuffer; nextrdata->data = (char *) &xlmeta; nextrdata->len = sizeof(xl_btree_metadata); + nextrdata->buffer = InvalidBuffer; nextrdata->next = nextrdata + 1; nextrdata++; xlinfo = XLOG_BTREE_INSERT_META; @@ -603,6 +603,7 @@ _bt_insertonpg(Relation rel, (sizeof(BTItemData) - sizeof(IndexTupleData)); } nextrdata->buffer = buf; + nextrdata->buffer_std = true; nextrdata->next = NULL; recptr = XLogInsert(RM_BTREE_ID, xlinfo, rdata); @@ -853,28 +854,29 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright, xlrec.leftlen = ((PageHeader) leftpage)->pd_special - ((PageHeader) leftpage)->pd_upper; - rdata[0].buffer = InvalidBuffer; rdata[0].data = (char *) &xlrec; rdata[0].len = SizeOfBtreeSplit; + rdata[0].buffer = InvalidBuffer; rdata[0].next = &(rdata[1]); - rdata[1].buffer = InvalidBuffer; rdata[1].data = (char *) leftpage + ((PageHeader) leftpage)->pd_upper; rdata[1].len = xlrec.leftlen; + rdata[1].buffer = InvalidBuffer; rdata[1].next = &(rdata[2]); - rdata[2].buffer = InvalidBuffer; rdata[2].data = (char *) rightpage + ((PageHeader) rightpage)->pd_upper; rdata[2].len = ((PageHeader) rightpage)->pd_special - ((PageHeader) rightpage)->pd_upper; + rdata[2].buffer = InvalidBuffer; rdata[2].next = NULL; if (!P_RIGHTMOST(ropaque)) { rdata[2].next = &(rdata[3]); - rdata[3].buffer = sbuf; rdata[3].data = NULL; rdata[3].len = 0; + rdata[3].buffer = sbuf; + rdata[3].buffer_std = true; rdata[3].next = NULL; } @@ -1464,19 +1466,19 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf) xlrec.rootblk = rootblknum; xlrec.level = metad->btm_level; - rdata[0].buffer = InvalidBuffer; rdata[0].data = (char *) &xlrec; rdata[0].len = SizeOfBtreeNewroot; + rdata[0].buffer = InvalidBuffer; rdata[0].next = &(rdata[1]); /* * Direct access to page is not good but faster - we should * implement some new func in page API. */ - rdata[1].buffer = InvalidBuffer; rdata[1].data = (char *) rootpage + ((PageHeader) rootpage)->pd_upper; rdata[1].len = ((PageHeader) rootpage)->pd_special - ((PageHeader) rootpage)->pd_upper; + rdata[1].buffer = InvalidBuffer; rdata[1].next = NULL; recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWROOT, rdata); diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index b9d42bad6d..f3ce5bd64a 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.85 2005/06/02 05:55:28 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.86 2005/06/06 20:22:57 tgl Exp $ * * NOTES * Postgres btree pages look like ordinary relation pages. The opaque @@ -74,9 +74,9 @@ _bt_metapinit(Relation rel) xlrec.meta.fastroot = metad->btm_fastroot; xlrec.meta.fastlevel = metad->btm_fastlevel; - rdata[0].buffer = InvalidBuffer; rdata[0].data = (char *) &xlrec; rdata[0].len = SizeOfBtreeNewmeta; + rdata[0].buffer = InvalidBuffer; rdata[0].next = NULL; recptr = XLogInsert(RM_BTREE_ID, @@ -248,9 +248,9 @@ _bt_getroot(Relation rel, int access) xlrec.rootblk = rootblkno; xlrec.level = 0; - rdata.buffer = InvalidBuffer; rdata.data = (char *) &xlrec; rdata.len = SizeOfBtreeNewroot; + rdata.buffer = InvalidBuffer; rdata.next = NULL; recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWROOT, &rdata); @@ -666,9 +666,9 @@ _bt_delitems(Relation rel, Buffer buf, xlrec.node = rel->rd_node; xlrec.block = BufferGetBlockNumber(buf); - rdata[0].buffer = InvalidBuffer; rdata[0].data = (char *) &xlrec; rdata[0].len = SizeOfBtreeDelete; + rdata[0].buffer = InvalidBuffer; rdata[0].next = &(rdata[1]); /* @@ -676,7 +676,6 @@ _bt_delitems(Relation rel, Buffer buf, * it is. When XLogInsert stores the whole buffer, the offsets * array need not be stored too. */ - rdata[1].buffer = buf; if (nitems > 0) { rdata[1].data = (char *) itemnos; @@ -687,6 +686,8 @@ _bt_delitems(Relation rel, Buffer buf, rdata[1].data = NULL; rdata[1].len = 0; } + rdata[1].buffer = buf; + rdata[1].buffer_std = true; rdata[1].next = NULL; recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_DELETE, rdata); @@ -1038,9 +1039,9 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full) xlrec.leftblk = leftsib; xlrec.rightblk = rightsib; - rdata[0].buffer = InvalidBuffer; rdata[0].data = (char *) &xlrec; rdata[0].len = SizeOfBtreeDeletePage; + rdata[0].buffer = InvalidBuffer; rdata[0].next = nextrdata = &(rdata[1]); if (BufferIsValid(metabuf)) @@ -1050,9 +1051,9 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full) xlmeta.fastroot = metad->btm_fastroot; xlmeta.fastlevel = metad->btm_fastlevel; - nextrdata->buffer = InvalidBuffer; nextrdata->data = (char *) &xlmeta; nextrdata->len = sizeof(xl_btree_metadata); + nextrdata->buffer = InvalidBuffer; nextrdata->next = nextrdata + 1; nextrdata++; xlinfo = XLOG_BTREE_DELETE_PAGE_META; @@ -1060,24 +1061,27 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full) else xlinfo = XLOG_BTREE_DELETE_PAGE; - nextrdata->buffer = pbuf; nextrdata->data = NULL; nextrdata->len = 0; nextrdata->next = nextrdata + 1; + nextrdata->buffer = pbuf; + nextrdata->buffer_std = true; nextrdata++; - nextrdata->buffer = rbuf; nextrdata->data = NULL; nextrdata->len = 0; + nextrdata->buffer = rbuf; + nextrdata->buffer_std = true; nextrdata->next = NULL; if (BufferIsValid(lbuf)) { nextrdata->next = nextrdata + 1; nextrdata++; - nextrdata->buffer = lbuf; nextrdata->data = NULL; nextrdata->len = 0; + nextrdata->buffer = lbuf; + nextrdata->buffer_std = true; nextrdata->next = NULL; } diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c index 0110401511..115708b780 100644 --- a/src/backend/access/nbtree/nbtsort.c +++ b/src/backend/access/nbtree/nbtsort.c @@ -56,7 +56,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.90 2004/12/31 21:59:22 pgsql Exp $ + * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.91 2005/06/06 20:22:57 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -287,14 +287,14 @@ _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno) xlrec.node = wstate->index->rd_node; xlrec.blkno = blkno; - rdata[0].buffer = InvalidBuffer; rdata[0].data = (char *) &xlrec; rdata[0].len = SizeOfHeapNewpage; + rdata[0].buffer = InvalidBuffer; rdata[0].next = &(rdata[1]); - rdata[1].buffer = InvalidBuffer; rdata[1].data = (char *) page; rdata[1].len = BLCKSZ; + rdata[1].buffer = InvalidBuffer; rdata[1].next = NULL; recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_NEWPAGE, rdata); diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c index 73362330c1..2da835dbbe 100644 --- a/src/backend/access/transam/clog.c +++ b/src/backend/access/transam/clog.c @@ -24,7 +24,7 @@ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/access/transam/clog.c,v 1.29 2005/06/06 17:01:22 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/clog.c,v 1.30 2005/06/06 20:22:57 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -379,9 +379,9 @@ WriteZeroPageXlogRec(int pageno) { XLogRecData rdata; - rdata.buffer = InvalidBuffer; rdata.data = (char *) (&pageno); rdata.len = sizeof(int); + rdata.buffer = InvalidBuffer; rdata.next = NULL; (void) XLogInsert(RM_CLOG_ID, CLOG_ZEROPAGE | XLOG_NO_TRAN, &rdata); } diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index 3eb58a96a8..040a4ab0b7 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -10,7 +10,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.203 2005/06/06 17:01:22 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.204 2005/06/06 20:22:57 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -719,25 +719,25 @@ RecordTransactionCommit(void) xlrec.xtime = time(NULL); xlrec.nrels = nrels; xlrec.nsubxacts = nchildren; - rdata[0].buffer = InvalidBuffer; rdata[0].data = (char *) (&xlrec); rdata[0].len = MinSizeOfXactCommit; + rdata[0].buffer = InvalidBuffer; /* dump rels to delete */ if (nrels > 0) { rdata[0].next = &(rdata[1]); - rdata[1].buffer = InvalidBuffer; rdata[1].data = (char *) rptr; rdata[1].len = nrels * sizeof(RelFileNode); + rdata[1].buffer = InvalidBuffer; lastrdata = 1; } /* dump committed child Xids */ if (nchildren > 0) { rdata[lastrdata].next = &(rdata[2]); - rdata[2].buffer = InvalidBuffer; rdata[2].data = (char *) children; rdata[2].len = nchildren * sizeof(TransactionId); + rdata[2].buffer = InvalidBuffer; lastrdata = 2; } rdata[lastrdata].next = NULL; @@ -1019,25 +1019,25 @@ RecordTransactionAbort(void) xlrec.xtime = time(NULL); xlrec.nrels = nrels; xlrec.nsubxacts = nchildren; - rdata[0].buffer = InvalidBuffer; rdata[0].data = (char *) (&xlrec); rdata[0].len = MinSizeOfXactAbort; + rdata[0].buffer = InvalidBuffer; /* dump rels to delete */ if (nrels > 0) { rdata[0].next = &(rdata[1]); - rdata[1].buffer = InvalidBuffer; rdata[1].data = (char *) rptr; rdata[1].len = nrels * sizeof(RelFileNode); + rdata[1].buffer = InvalidBuffer; lastrdata = 1; } /* dump committed child Xids */ if (nchildren > 0) { rdata[lastrdata].next = &(rdata[2]); - rdata[2].buffer = InvalidBuffer; rdata[2].data = (char *) children; rdata[2].len = nchildren * sizeof(TransactionId); + rdata[2].buffer = InvalidBuffer; lastrdata = 2; } rdata[lastrdata].next = NULL; @@ -1205,25 +1205,25 @@ RecordSubTransactionAbort(void) xlrec.xtime = time(NULL); xlrec.nrels = nrels; xlrec.nsubxacts = nchildren; - rdata[0].buffer = InvalidBuffer; rdata[0].data = (char *) (&xlrec); rdata[0].len = MinSizeOfXactAbort; + rdata[0].buffer = InvalidBuffer; /* dump rels to delete */ if (nrels > 0) { rdata[0].next = &(rdata[1]); - rdata[1].buffer = InvalidBuffer; rdata[1].data = (char *) rptr; rdata[1].len = nrels * sizeof(RelFileNode); + rdata[1].buffer = InvalidBuffer; lastrdata = 1; } /* dump committed child Xids */ if (nchildren > 0) { rdata[lastrdata].next = &(rdata[2]); - rdata[2].buffer = InvalidBuffer; rdata[2].data = (char *) children; rdata[2].len = nchildren * sizeof(TransactionId); + rdata[2].buffer = InvalidBuffer; lastrdata = 2; } rdata[lastrdata].next = NULL; diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index c1609356d3..aa37244162 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.196 2005/06/06 17:01:23 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.197 2005/06/06 20:22:57 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -434,7 +434,8 @@ static void exitArchiveRecovery(TimeLineID endTLI, uint32 endLogId, uint32 endLogSeg); static bool recoveryStopsHere(XLogRecord *record, bool *includeThis); -static void SetBkpBlock(BkpBlock *bkpb, Buffer buffer); +static bool XLogCheckBuffer(XLogRecData *rdata, + XLogRecPtr *lsn, BkpBlock *bkpb); static bool AdvanceXLInsertBuffer(void); static void XLogWrite(XLogwrtRqst WriteRqst); static int XLogFileInit(uint32 log, uint32 seg, @@ -473,7 +474,7 @@ static void remove_backup_label(void); /* * Insert an XLOG record having the specified RMID and info bytes, * with the body of the record being the data chunk(s) described by - * the rdata list (see xlog.h for notes about rdata). + * the rdata chain (see xlog.h for notes about rdata). * * Returns XLOG pointer to end of record (beginning of next record). * This can be used as LSN for data pages affected by the logged action. @@ -532,7 +533,7 @@ XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata) } /* - * Here we scan the rdata list, determine which buffers must be backed + * Here we scan the rdata chain, determine which buffers must be backed * up, and compute the CRC values for the data. Note that the record * header isn't added into the CRC initially since we don't know the * final length or info bits quite yet. Thus, the CRC will represent @@ -543,13 +544,13 @@ XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata) * below. We could prevent the race by doing all this work while * holding the insert lock, but it seems better to avoid doing CRC * calculations while holding the lock. This means we have to be - * careful about modifying the rdata list until we know we aren't + * careful about modifying the rdata chain until we know we aren't * going to loop back again. The only change we allow ourselves to - * make earlier is to set rdt->data = NULL in list items we have + * make earlier is to set rdt->data = NULL in chain items we have * decided we will have to back up the whole buffer for. This is OK * because we will certainly decide the same thing again for those * items if we do it over; doing it here saves an extra pass over the - * list later. + * chain later. */ begin:; for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++) @@ -575,7 +576,7 @@ begin:; { if (rdt->buffer == dtbuf[i]) { - /* Buffer already referenced by earlier list item */ + /* Buffer already referenced by earlier chain item */ if (dtbuf_bkp[i]) rdt->data = NULL; else if (rdt->data) @@ -589,15 +590,9 @@ begin:; { /* OK, put it in this slot */ dtbuf[i] = rdt->buffer; - - /* - * XXX We assume page LSN is first data on page - */ - dtbuf_lsn[i] = *((XLogRecPtr *) BufferGetBlock(rdt->buffer)); - if (XLByteLE(dtbuf_lsn[i], RedoRecPtr)) + if (XLogCheckBuffer(rdt, &(dtbuf_lsn[i]), &(dtbuf_xlg[i]))) { dtbuf_bkp[i] = true; - SetBkpBlock(&(dtbuf_xlg[i]), rdt->buffer); rdt->data = NULL; } else if (rdt->data) @@ -612,7 +607,7 @@ begin:; elog(PANIC, "can backup at most %d blocks per xlog record", XLR_MAX_BKP_BLOCKS); } - /* Break out of loop when rdt points to last list item */ + /* Break out of loop when rdt points to last chain item */ if (rdt->next == NULL) break; rdt = rdt->next; @@ -726,15 +721,15 @@ begin:; } /* - * Make additional rdata list entries for the backup blocks, so that + * Make additional rdata chain entries for the backup blocks, so that * we don't need to special-case them in the write loop. Note that we - * have now irrevocably changed the input rdata list. At the exit of + * have now irrevocably changed the input rdata chain. At the exit of * this loop, write_len includes the backup block data. * * Also set the appropriate info bits to show which buffers were backed * up. The i'th XLR_SET_BKP_BLOCK bit corresponds to the i'th * distinct buffer value (ignoring InvalidBuffer) appearing in the - * rdata list. + * rdata chain. */ write_len = len; for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++) @@ -742,7 +737,7 @@ begin:; BkpBlock *bkpb; char *page; - if (dtbuf[i] == InvalidBuffer || !(dtbuf_bkp[i])) + if (!dtbuf_bkp[i]) continue; info |= XLR_SET_BKP_BLOCK(i); @@ -938,43 +933,64 @@ begin:; } /* - * Fill a BkpBlock struct given a buffer containing the page to be saved - * - * This is nontrivial only because it has to decide whether to apply "hole - * compression". + * Determine whether the buffer referenced by an XLogRecData item has to + * be backed up, and if so fill a BkpBlock struct for it. In any case + * save the buffer's LSN at *lsn. */ -static void -SetBkpBlock(BkpBlock *bkpb, Buffer buffer) +static bool +XLogCheckBuffer(XLogRecData *rdata, + XLogRecPtr *lsn, BkpBlock *bkpb) { PageHeader page; - uint16 offset; - uint16 length; - /* Save page identity info */ - bkpb->node = BufferGetFileNode(buffer); - bkpb->block = BufferGetBlockNumber(buffer); + page = (PageHeader) BufferGetBlock(rdata->buffer); + + /* + * XXX We assume page LSN is first data on *every* page that can be + * passed to XLogInsert, whether it otherwise has the standard page + * layout or not. + */ + *lsn = page->pd_lsn; - /* Test whether there is a "hole" containing zeroes in the page */ - page = (PageHeader) BufferGetBlock(buffer); - offset = page->pd_lower; - /* Check if pd_lower appears sane at all */ - if (offset >= SizeOfPageHeaderData && offset < BLCKSZ) + if (XLByteLE(page->pd_lsn, RedoRecPtr)) { - char *spd = (char *) page + offset; - char *epd = (char *) page + BLCKSZ; - char *pd = spd; + /* + * The page needs to be backed up, so set up *bkpb + */ + bkpb->node = BufferGetFileNode(rdata->buffer); + bkpb->block = BufferGetBlockNumber(rdata->buffer); + + if (rdata->buffer_std) + { + /* Assume we can omit data between pd_lower and pd_upper */ + uint16 lower = page->pd_lower; + uint16 upper = page->pd_upper; - while (pd < epd && *pd == '\0') - pd++; + if (lower >= SizeOfPageHeaderData && + upper > lower && + upper <= BLCKSZ) + { + bkpb->hole_offset = lower; + bkpb->hole_length = upper - lower; + } + else + { + /* No "hole" to compress out */ + bkpb->hole_offset = 0; + bkpb->hole_length = 0; + } + } + else + { + /* Not a standard page header, don't try to eliminate "hole" */ + bkpb->hole_offset = 0; + bkpb->hole_length = 0; + } - length = pd - spd; - if (length == 0) - offset = 0; + return true; /* buffer requires backup */ } - else - offset = length = 0; - bkpb->hole_offset = offset; - bkpb->hole_length = length; + + return false; /* buffer does not need to be backed up */ } /* @@ -5093,9 +5109,9 @@ CreateCheckPoint(bool shutdown, bool force) /* * Now insert the checkpoint record into XLOG. */ - rdata.buffer = InvalidBuffer; rdata.data = (char *) (&checkPoint); rdata.len = sizeof(checkPoint); + rdata.buffer = InvalidBuffer; rdata.next = NULL; recptr = XLogInsert(RM_XLOG_ID, @@ -5197,9 +5213,9 @@ XLogPutNextOid(Oid nextOid) { XLogRecData rdata; - rdata.buffer = InvalidBuffer; rdata.data = (char *) (&nextOid); rdata.len = sizeof(Oid); + rdata.buffer = InvalidBuffer; rdata.next = NULL; (void) XLogInsert(RM_XLOG_ID, XLOG_NEXTOID, &rdata); /* @@ -5220,9 +5236,9 @@ XLogPutNextMultiXactId(MultiXactId nextMulti) { XLogRecData rdata; - rdata.buffer = InvalidBuffer; rdata.data = (char *) (&nextMulti); rdata.len = sizeof(MultiXactId); + rdata.buffer = InvalidBuffer; rdata.next = NULL; (void) XLogInsert(RM_XLOG_ID, XLOG_NEXTMULTI, &rdata); /* diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c index 370977c658..40cfb3e658 100644 --- a/src/backend/commands/dbcommands.c +++ b/src/backend/commands/dbcommands.c @@ -15,7 +15,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/dbcommands.c,v 1.158 2005/06/06 17:01:23 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/commands/dbcommands.c,v 1.159 2005/06/06 20:22:57 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -444,9 +444,9 @@ createdb(const CreatedbStmt *stmt) xlrec.src_db_id = src_dboid; xlrec.src_tablespace_id = srctablespace; - rdata[0].buffer = InvalidBuffer; rdata[0].data = (char *) &xlrec; rdata[0].len = sizeof(xl_dbase_create_rec); + rdata[0].buffer = InvalidBuffer; rdata[0].next = NULL; (void) XLogInsert(RM_DBASE_ID, XLOG_DBASE_CREATE, rdata); @@ -1074,9 +1074,9 @@ remove_dbtablespaces(Oid db_id) xlrec.db_id = db_id; xlrec.tablespace_id = dsttablespace; - rdata[0].buffer = InvalidBuffer; rdata[0].data = (char *) &xlrec; rdata[0].len = sizeof(xl_dbase_drop_rec); + rdata[0].buffer = InvalidBuffer; rdata[0].next = NULL; (void) XLogInsert(RM_DBASE_ID, XLOG_DBASE_DROP, rdata); diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c index 3278be1239..78b9225b83 100644 --- a/src/backend/commands/sequence.c +++ b/src/backend/commands/sequence.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/sequence.c,v 1.121 2005/06/06 17:01:23 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/commands/sequence.c,v 1.122 2005/06/06 20:22:57 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -264,14 +264,14 @@ DefineSequence(CreateSeqStmt *seq) newseq->log_cnt = 0; xlrec.node = rel->rd_node; - rdata[0].buffer = InvalidBuffer; rdata[0].data = (char *) &xlrec; rdata[0].len = sizeof(xl_seq_rec); + rdata[0].buffer = InvalidBuffer; rdata[0].next = &(rdata[1]); - rdata[1].buffer = InvalidBuffer; rdata[1].data = (char *) tuple->t_data; rdata[1].len = tuple->t_len; + rdata[1].buffer = InvalidBuffer; rdata[1].next = NULL; recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG | XLOG_NO_TRAN, rdata); @@ -338,15 +338,15 @@ AlterSequence(AlterSeqStmt *stmt) XLogRecData rdata[2]; xlrec.node = seqrel->rd_node; - rdata[0].buffer = InvalidBuffer; rdata[0].data = (char *) &xlrec; rdata[0].len = sizeof(xl_seq_rec); + rdata[0].buffer = InvalidBuffer; rdata[0].next = &(rdata[1]); - rdata[1].buffer = InvalidBuffer; rdata[1].data = (char *) page + ((PageHeader) page)->pd_upper; rdata[1].len = ((PageHeader) page)->pd_special - ((PageHeader) page)->pd_upper; + rdata[1].buffer = InvalidBuffer; rdata[1].next = NULL; recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG | XLOG_NO_TRAN, rdata); @@ -531,9 +531,9 @@ nextval(PG_FUNCTION_ARGS) XLogRecData rdata[2]; xlrec.node = seqrel->rd_node; - rdata[0].buffer = InvalidBuffer; rdata[0].data = (char *) &xlrec; rdata[0].len = sizeof(xl_seq_rec); + rdata[0].buffer = InvalidBuffer; rdata[0].next = &(rdata[1]); /* set values that will be saved in xlog */ @@ -541,10 +541,10 @@ nextval(PG_FUNCTION_ARGS) seq->is_called = true; seq->log_cnt = 0; - rdata[1].buffer = InvalidBuffer; rdata[1].data = (char *) page + ((PageHeader) page)->pd_upper; rdata[1].len = ((PageHeader) page)->pd_special - ((PageHeader) page)->pd_upper; + rdata[1].buffer = InvalidBuffer; rdata[1].next = NULL; recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG | XLOG_NO_TRAN, rdata); @@ -666,9 +666,9 @@ do_setval(RangeVar *sequence, int64 next, bool iscalled) Page page = BufferGetPage(buf); xlrec.node = seqrel->rd_node; - rdata[0].buffer = InvalidBuffer; rdata[0].data = (char *) &xlrec; rdata[0].len = sizeof(xl_seq_rec); + rdata[0].buffer = InvalidBuffer; rdata[0].next = &(rdata[1]); /* set values that will be saved in xlog */ @@ -676,10 +676,10 @@ do_setval(RangeVar *sequence, int64 next, bool iscalled) seq->is_called = true; seq->log_cnt = 0; - rdata[1].buffer = InvalidBuffer; rdata[1].data = (char *) page + ((PageHeader) page)->pd_upper; rdata[1].len = ((PageHeader) page)->pd_special - ((PageHeader) page)->pd_upper; + rdata[1].buffer = InvalidBuffer; rdata[1].next = NULL; recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG | XLOG_NO_TRAN, rdata); diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index c7669dfc58..9981129c0e 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.160 2005/06/05 00:38:08 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.161 2005/06/06 20:22:57 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -5667,14 +5667,14 @@ copy_relation_data(Relation rel, SMgrRelation dst) xlrec.node = dst->smgr_rnode; xlrec.blkno = blkno; - rdata[0].buffer = InvalidBuffer; rdata[0].data = (char *) &xlrec; rdata[0].len = SizeOfHeapNewpage; + rdata[0].buffer = InvalidBuffer; rdata[0].next = &(rdata[1]); - rdata[1].buffer = InvalidBuffer; rdata[1].data = (char *) page; rdata[1].len = BLCKSZ; + rdata[1].buffer = InvalidBuffer; rdata[1].next = NULL; recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_NEWPAGE, rdata); diff --git a/src/backend/commands/tablespace.c b/src/backend/commands/tablespace.c index f9041a0a64..fac20708c0 100644 --- a/src/backend/commands/tablespace.c +++ b/src/backend/commands/tablespace.c @@ -37,7 +37,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/tablespace.c,v 1.20 2005/06/06 17:01:23 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/commands/tablespace.c,v 1.21 2005/06/06 20:22:57 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -356,14 +356,14 @@ CreateTableSpace(CreateTableSpaceStmt *stmt) XLogRecData rdata[2]; xlrec.ts_id = tablespaceoid; - rdata[0].buffer = InvalidBuffer; rdata[0].data = (char *) &xlrec; rdata[0].len = offsetof(xl_tblspc_create_rec, ts_path); + rdata[0].buffer = InvalidBuffer; rdata[0].next = &(rdata[1]); - rdata[1].buffer = InvalidBuffer; rdata[1].data = (char *) location; rdata[1].len = strlen(location) + 1; + rdata[1].buffer = InvalidBuffer; rdata[1].next = NULL; (void) XLogInsert(RM_TBLSPC_ID, XLOG_TBLSPC_CREATE, rdata); @@ -461,9 +461,9 @@ DropTableSpace(DropTableSpaceStmt *stmt) XLogRecData rdata[1]; xlrec.ts_id = tablespaceoid; - rdata[0].buffer = InvalidBuffer; rdata[0].data = (char *) &xlrec; rdata[0].len = sizeof(xl_tblspc_drop_rec); + rdata[0].buffer = InvalidBuffer; rdata[0].next = NULL; (void) XLogInsert(RM_TBLSPC_ID, XLOG_TBLSPC_DROP, rdata); diff --git a/src/backend/storage/page/bufpage.c b/src/backend/storage/page/bufpage.c index 8f8ba9e0d2..5b3f7bee95 100644 --- a/src/backend/storage/page/bufpage.c +++ b/src/backend/storage/page/bufpage.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/page/bufpage.c,v 1.64 2005/06/02 05:55:28 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/storage/page/bufpage.c,v 1.65 2005/06/06 20:22:58 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -357,7 +357,7 @@ PageRepairFragmentation(Page page, OffsetNumber *unused) lp = PageGetItemId(page, i + 1); lp->lp_len = 0; /* indicate unused & deallocated */ } - ((PageHeader) page)->pd_upper = pd_upper = pd_special; + ((PageHeader) page)->pd_upper = pd_special; } else { /* nused != 0 */ @@ -411,17 +411,11 @@ PageRepairFragmentation(Page page, OffsetNumber *unused) lp->lp_off = upper; } - ((PageHeader) page)->pd_upper = pd_upper = upper; + ((PageHeader) page)->pd_upper = upper; pfree(itemidbase); } - /* - * Zero out the now-free space. This is not essential, but it allows - * xlog.c to compress WAL data better. - */ - MemSet((char *) page + pd_lower, 0, pd_upper - pd_lower); - return (nline - nused); } @@ -531,13 +525,6 @@ PageIndexTupleDelete(Page page, OffsetNumber offnum) phdr->pd_upper += size; phdr->pd_lower -= sizeof(ItemIdData); - /* - * Zero out the just-freed space. This is not essential, but it allows - * xlog.c to compress WAL data better. - */ - MemSet((char *) page + phdr->pd_lower, 0, sizeof(ItemIdData)); - MemSet(addr, 0, size); - /* * Finally, we need to adjust the linp entries that remain. * @@ -685,14 +672,8 @@ PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems) lp->lp_off = upper; } - phdr->pd_lower = pd_lower = SizeOfPageHeaderData + nused * sizeof(ItemIdData); - phdr->pd_upper = pd_upper = upper; - - /* - * Zero out the now-free space. This is not essential, but it allows - * xlog.c to compress WAL data better. - */ - MemSet((char *) page + pd_lower, 0, pd_upper - pd_lower); + phdr->pd_lower = SizeOfPageHeaderData + nused * sizeof(ItemIdData); + phdr->pd_upper = upper; pfree(itemidbase); } diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c index ca171a3b1a..13ad72a375 100644 --- a/src/backend/storage/smgr/smgr.c +++ b/src/backend/storage/smgr/smgr.c @@ -11,7 +11,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.88 2005/06/06 17:01:24 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.89 2005/06/06 20:22:58 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -370,9 +370,9 @@ smgrcreate(SMgrRelation reln, bool isTemp, bool isRedo) */ xlrec.rnode = reln->smgr_rnode; - rdata.buffer = InvalidBuffer; rdata.data = (char *) &xlrec; rdata.len = sizeof(xlrec); + rdata.buffer = InvalidBuffer; rdata.next = NULL; lsn = XLogInsert(RM_SMGR_ID, XLOG_SMGR_CREATE | XLOG_NO_TRAN, &rdata); @@ -635,9 +635,9 @@ smgrtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp) xlrec.blkno = newblks; xlrec.rnode = reln->smgr_rnode; - rdata.buffer = InvalidBuffer; rdata.data = (char *) &xlrec; rdata.len = sizeof(xlrec); + rdata.buffer = InvalidBuffer; rdata.next = NULL; lsn = XLogInsert(RM_SMGR_ID, XLOG_SMGR_TRUNCATE | XLOG_NO_TRAN, diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index 554c3c3aec..7547d7f5b9 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.63 2005/06/06 17:01:24 tgl Exp $ + * $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.64 2005/06/06 20:22:58 tgl Exp $ */ #ifndef XLOG_H #define XLOG_H @@ -91,24 +91,42 @@ typedef struct XLogRecord extern int sync_method; /* - * List of these structs is used to pass data to XLogInsert(). + * The rmgr data to be written by XLogInsert() is defined by a chain of + * one or more XLogRecData structs. (Multiple structs would be used when + * parts of the source data aren't physically adjacent in memory, or when + * multiple associated buffers need to be specified.) * * If buffer is valid then XLOG will check if buffer must be backed up * (ie, whether this is first change of that page since last checkpoint). * If so, the whole page contents are attached to the XLOG record, and XLOG * sets XLR_BKP_BLOCK_X bit in xl_info. Note that the buffer must be pinned - * and locked while this is going on, so that it won't change under us. - * NB: when this happens, we do not bother to insert the associated data into - * the XLOG record, since we assume it's present in the buffer. Therefore, - * rmgr redo routines MUST pay attention to XLR_BKP_BLOCK_X to know what - * is actually stored in the XLOG record. + * and exclusive-locked by the caller, so that it won't change under us. + * NB: when the buffer is backed up, we DO NOT insert the data pointed to by + * this XLogRecData struct into the XLOG record, since we assume it's present + * in the buffer. Therefore, rmgr redo routines MUST pay attention to + * XLR_BKP_BLOCK_X to know what is actually stored in the XLOG record. + * The i'th XLR_BKP_BLOCK bit corresponds to the i'th distinct buffer + * value (ignoring InvalidBuffer) appearing in the rdata chain. + * + * When buffer is valid, caller must set buffer_std to indicate whether the + * page uses standard pd_lower/pd_upper header fields. If this is true, then + * XLOG is allowed to omit the free space between pd_lower and pd_upper from + * the backed-up page image. Note that even when buffer_std is false, the + * page MUST have an LSN field as its first eight bytes! + * + * Note: data can be NULL to indicate no rmgr data associated with this chain + * entry. This can be sensible (ie, not a wasted entry) if buffer is valid. + * The implication is that the buffer has been changed by the operation being + * logged, and so may need to be backed up, but the change can be redone using + * only information already present elsewhere in the XLOG entry. */ typedef struct XLogRecData { - Buffer buffer; /* buffer associated with this data */ - char *data; - uint32 len; - struct XLogRecData *next; + char *data; /* start of rmgr data to include */ + uint32 len; /* length of rmgr data to include */ + Buffer buffer; /* buffer associated with data, if any */ + bool buffer_std; /* buffer has standard pd_lower/pd_upper */ + struct XLogRecData *next; /* next struct in chain, or NULL */ } XLogRecData; extern TimeLineID ThisTimeLineID; /* current TLI */