From 9e857436ef7dff8fb8a0bf43cfe62650e2be6be9 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Wed, 4 Dec 2013 00:10:47 +0200 Subject: [PATCH] Don't include unused space in LOG_NEWPAGE records. This is the same trick we use when taking a full page image of a buffer passed to XLogInsert. --- src/backend/access/gin/gininsert.c | 4 +- src/backend/access/gist/gist.c | 2 +- src/backend/access/heap/heapam.c | 123 +++++++++++++++++--------- src/backend/access/heap/rewriteheap.c | 6 +- src/backend/access/nbtree/nbtree.c | 2 +- src/backend/access/nbtree/nbtsort.c | 2 +- src/backend/access/spgist/spginsert.c | 6 +- src/backend/commands/tablecmds.c | 8 +- src/backend/commands/vacuumlazy.c | 6 +- src/include/access/heapam_xlog.h | 10 ++- src/include/access/xlog_internal.h | 2 +- 11 files changed, 109 insertions(+), 62 deletions(-) diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c index 556e31854e..bd6ccd0e09 100644 --- a/src/backend/access/gin/gininsert.c +++ b/src/backend/access/gin/gininsert.c @@ -435,10 +435,10 @@ ginbuildempty(PG_FUNCTION_ARGS) START_CRIT_SECTION(); GinInitMetabuffer(MetaBuffer); MarkBufferDirty(MetaBuffer); - log_newpage_buffer(MetaBuffer); + log_newpage_buffer(MetaBuffer, false); GinInitBuffer(RootBuffer, GIN_LEAF); MarkBufferDirty(RootBuffer); - log_newpage_buffer(RootBuffer); + log_newpage_buffer(RootBuffer, false); END_CRIT_SECTION(); /* Unlock and release the buffers. */ diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c index 99cbcaa58e..815ad5ceee 100644 --- a/src/backend/access/gist/gist.c +++ b/src/backend/access/gist/gist.c @@ -83,7 +83,7 @@ gistbuildempty(PG_FUNCTION_ARGS) START_CRIT_SECTION(); GISTInitBuffer(buffer, F_LEAF); MarkBufferDirty(buffer); - log_newpage_buffer(buffer); + log_newpage_buffer(buffer, true); END_CRIT_SECTION(); /* Unlock and release the buffer */ diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index c13f87c4ce..8d596202ba 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -6207,16 +6207,22 @@ log_heap_update(Relation reln, Buffer oldbuf, * memory and writing them directly to smgr. If you're using buffers, call * log_newpage_buffer instead. * - * Note: the NEWPAGE log record is used for both heaps and indexes, so do - * not do anything that assumes we are touching a heap. + * If the page follows the standard page layout, with a PageHeader and unused + * space between pd_lower and pd_upper, set 'page_std' to TRUE. That allows + * the unused space to be left out from the WAL record, making it smaller. */ XLogRecPtr log_newpage(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno, - Page page) + Page page, bool page_std) { xl_heap_newpage xlrec; XLogRecPtr recptr; - XLogRecData rdata[2]; + XLogRecData rdata[3]; + + /* + * Note: the NEWPAGE log record is used for both heaps and indexes, so do + * not do anything that assumes we are touching a heap. + */ /* NO ELOG(ERROR) from here till newpage op is logged */ START_CRIT_SECTION(); @@ -6225,15 +6231,58 @@ log_newpage(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno, xlrec.forknum = forkNum; xlrec.blkno = blkno; + if (page_std) + { + /* Assume we can omit data between pd_lower and pd_upper */ + uint16 lower = ((PageHeader) page)->pd_lower; + uint16 upper = ((PageHeader) page)->pd_upper; + + if (lower >= SizeOfPageHeaderData && + upper > lower && + upper <= BLCKSZ) + { + xlrec.hole_offset = lower; + xlrec.hole_length = upper - lower; + } + else + { + /* No "hole" to compress out */ + xlrec.hole_offset = 0; + xlrec.hole_length = 0; + } + } + else + { + /* Not a standard page header, don't try to eliminate "hole" */ + xlrec.hole_offset = 0; + xlrec.hole_length = 0; + } + rdata[0].data = (char *) &xlrec; rdata[0].len = SizeOfHeapNewpage; rdata[0].buffer = InvalidBuffer; rdata[0].next = &(rdata[1]); - rdata[1].data = (char *) page; - rdata[1].len = BLCKSZ; - rdata[1].buffer = InvalidBuffer; - rdata[1].next = NULL; + if (xlrec.hole_length == 0) + { + rdata[1].data = (char *) page; + rdata[1].len = BLCKSZ; + rdata[1].buffer = InvalidBuffer; + rdata[1].next = NULL; + } + else + { + /* must skip the hole */ + rdata[1].data = (char *) page; + rdata[1].len = xlrec.hole_offset; + rdata[1].buffer = InvalidBuffer; + rdata[1].next = &rdata[2]; + + rdata[2].data = (char *) page + (xlrec.hole_offset + xlrec.hole_length); + rdata[2].len = BLCKSZ - (xlrec.hole_offset + xlrec.hole_length); + rdata[2].buffer = InvalidBuffer; + rdata[2].next = NULL; + } recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_NEWPAGE, rdata); @@ -6257,44 +6306,24 @@ log_newpage(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno, * Caller should initialize the buffer and mark it dirty before calling this * function. This function will set the page LSN and TLI. * - * Note: the NEWPAGE log record is used for both heaps and indexes, so do - * not do anything that assumes we are touching a heap. + * If the page follows the standard page layout, with a PageHeader and unused + * space between pd_lower and pd_upper, set 'page_std' to TRUE. That allows + * the unused space to be left out from the WAL record, making it smaller. */ XLogRecPtr -log_newpage_buffer(Buffer buffer) +log_newpage_buffer(Buffer buffer, bool page_std) { - xl_heap_newpage xlrec; - XLogRecPtr recptr; - XLogRecData rdata[2]; Page page = BufferGetPage(buffer); + RelFileNode rnode; + ForkNumber forkNum; + BlockNumber blkno; - /* We should be in a critical section. */ + /* Shared buffers should be modified in a critical section. */ Assert(CritSectionCount > 0); - BufferGetTag(buffer, &xlrec.node, &xlrec.forknum, &xlrec.blkno); - - rdata[0].data = (char *) &xlrec; - rdata[0].len = SizeOfHeapNewpage; - rdata[0].buffer = InvalidBuffer; - rdata[0].next = &(rdata[1]); - - rdata[1].data = page; - rdata[1].len = BLCKSZ; - rdata[1].buffer = InvalidBuffer; - rdata[1].next = NULL; - - recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_NEWPAGE, rdata); - - /* - * The page may be uninitialized. If so, we can't set the LSN and TLI - * because that would corrupt the page. - */ - if (!PageIsNew(page)) - { - PageSetLSN(page, recptr); - } + BufferGetTag(buffer, &rnode, &forkNum, &blkno); - return recptr; + return log_newpage(&rnode, forkNum, blkno, page, page_std); } /* @@ -6582,12 +6611,15 @@ static void heap_xlog_newpage(XLogRecPtr lsn, XLogRecord *record) { xl_heap_newpage *xlrec = (xl_heap_newpage *) XLogRecGetData(record); + char *blk = ((char *) xlrec) + sizeof(xl_heap_newpage); Buffer buffer; Page page; /* Backup blocks are not used in newpage records */ Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK)); + Assert(record->xl_len == SizeOfHeapNewpage + BLCKSZ - xlrec->hole_length); + /* * Note: the NEWPAGE log record is used for both heaps and indexes, so do * not do anything that assumes we are touching a heap. @@ -6598,8 +6630,19 @@ heap_xlog_newpage(XLogRecPtr lsn, XLogRecord *record) LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); page = (Page) BufferGetPage(buffer); - Assert(record->xl_len == SizeOfHeapNewpage + BLCKSZ); - memcpy(page, (char *) xlrec + SizeOfHeapNewpage, BLCKSZ); + if (xlrec->hole_length == 0) + { + memcpy((char *) page, blk, BLCKSZ); + } + else + { + memcpy((char *) page, blk, xlrec->hole_offset); + /* must zero-fill the hole */ + MemSet((char *) page + xlrec->hole_offset, 0, xlrec->hole_length); + memcpy((char *) page + (xlrec->hole_offset + xlrec->hole_length), + blk + xlrec->hole_offset, + BLCKSZ - (xlrec->hole_offset + xlrec->hole_length)); + } /* * The page may be uninitialized. If so, we can't set the LSN because that diff --git a/src/backend/access/heap/rewriteheap.c b/src/backend/access/heap/rewriteheap.c index 951894ce5a..deec77ddff 100644 --- a/src/backend/access/heap/rewriteheap.c +++ b/src/backend/access/heap/rewriteheap.c @@ -277,7 +277,8 @@ end_heap_rewrite(RewriteState state) log_newpage(&state->rs_new_rel->rd_node, MAIN_FORKNUM, state->rs_blockno, - state->rs_buffer); + state->rs_buffer, + true); RelationOpenSmgr(state->rs_new_rel); PageSetChecksumInplace(state->rs_buffer, state->rs_blockno); @@ -622,7 +623,8 @@ raw_heap_insert(RewriteState state, HeapTuple tup) log_newpage(&state->rs_new_rel->rd_node, MAIN_FORKNUM, state->rs_blockno, - page); + page, + true); /* * Now write the page. We say isTemp = true even if it's not a diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index 073190ffd5..207a4edbc5 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -222,7 +222,7 @@ btbuildempty(PG_FUNCTION_ARGS) (char *) metapage, true); if (XLogIsNeeded()) log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM, - BTREE_METAPAGE, metapage); + BTREE_METAPAGE, metapage, false); /* * An immediate sync is require even if we xlog'd the page, because the diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c index 52c5a2676e..366f39522d 100644 --- a/src/backend/access/nbtree/nbtsort.c +++ b/src/backend/access/nbtree/nbtsort.c @@ -274,7 +274,7 @@ _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno) if (wstate->btws_use_wal) { /* We use the heap NEWPAGE record type for this */ - log_newpage(&wstate->index->rd_node, MAIN_FORKNUM, blkno, page); + log_newpage(&wstate->index->rd_node, MAIN_FORKNUM, blkno, page, true); } /* diff --git a/src/backend/access/spgist/spginsert.c b/src/backend/access/spgist/spginsert.c index 2a50d87c74..de8fc4de6d 100644 --- a/src/backend/access/spgist/spginsert.c +++ b/src/backend/access/spgist/spginsert.c @@ -169,7 +169,7 @@ spgbuildempty(PG_FUNCTION_ARGS) (char *) page, true); if (XLogIsNeeded()) log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM, - SPGIST_METAPAGE_BLKNO, page); + SPGIST_METAPAGE_BLKNO, page, false); /* Likewise for the root page. */ SpGistInitPage(page, SPGIST_LEAF); @@ -179,7 +179,7 @@ spgbuildempty(PG_FUNCTION_ARGS) (char *) page, true); if (XLogIsNeeded()) log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM, - SPGIST_ROOT_BLKNO, page); + SPGIST_ROOT_BLKNO, page, true); /* Likewise for the null-tuples root page. */ SpGistInitPage(page, SPGIST_LEAF | SPGIST_NULLS); @@ -189,7 +189,7 @@ spgbuildempty(PG_FUNCTION_ARGS) (char *) page, true); if (XLogIsNeeded()) log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM, - SPGIST_NULL_BLKNO, page); + SPGIST_NULL_BLKNO, page, true); /* * An immediate sync is required even if we xlog'd the pages, because the diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 1aa1ad9127..1d9f29a7b7 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -9130,9 +9130,13 @@ copy_relation_data(SMgrRelation src, SMgrRelation dst, src->smgr_rnode.backend, forkNum)))); - /* XLOG stuff */ + /* + * WAL-log the copied page. Unfortunately we don't know what kind of + * a page this is, so we have to log the full page including any + * unused space. + */ if (use_wal) - log_newpage(&dst->smgr_rnode.node, forkNum, blkno, page); + log_newpage(&dst->smgr_rnode.node, forkNum, blkno, page, false); PageSetChecksumInplace(page, blkno); diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c index fe2d9e78fa..d346772600 100644 --- a/src/backend/commands/vacuumlazy.c +++ b/src/backend/commands/vacuumlazy.c @@ -699,14 +699,10 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, * which will cause a PANIC. To prevent that, check whether * the page has been previously WAL-logged, and if not, do that * now. - * - * XXX: It would be nice to use a logging method supporting - * standard buffers here since log_newpage_buffer() will write - * the full block instead of omitting the hole. */ if (RelationNeedsWAL(onerel) && PageGetLSN(page) == InvalidXLogRecPtr) - log_newpage_buffer(buf); + log_newpage_buffer(buf, true); PageSetAllVisible(page); visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr, diff --git a/src/include/access/heapam_xlog.h b/src/include/access/heapam_xlog.h index 4381778308..63b73d0329 100644 --- a/src/include/access/heapam_xlog.h +++ b/src/include/access/heapam_xlog.h @@ -198,10 +198,12 @@ typedef struct xl_heap_newpage RelFileNode node; ForkNumber forknum; BlockNumber blkno; /* location of new page */ - /* entire page contents follow at end of record */ + uint16 hole_offset; /* number of bytes before "hole" */ + uint16 hole_length; /* number of bytes in "hole" */ + /* entire page contents (minus the hole) follow at end of record */ } xl_heap_newpage; -#define SizeOfHeapNewpage (offsetof(xl_heap_newpage, blkno) + sizeof(BlockNumber)) +#define SizeOfHeapNewpage (offsetof(xl_heap_newpage, hole_length) + sizeof(uint16)) /* flags for infobits_set */ #define XLHL_XMAX_IS_MULTI 0x01 @@ -282,7 +284,7 @@ extern XLogRecPtr log_heap_freeze(Relation reln, Buffer buffer, extern XLogRecPtr log_heap_visible(RelFileNode rnode, Buffer heap_buffer, Buffer vm_buffer, TransactionId cutoff_xid); extern XLogRecPtr log_newpage(RelFileNode *rnode, ForkNumber forkNum, - BlockNumber blk, Page page); -extern XLogRecPtr log_newpage_buffer(Buffer buffer); + BlockNumber blk, Page page, bool page_std); +extern XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std); #endif /* HEAPAM_XLOG_H */ diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h index b6320eee3c..9fba8c3db8 100644 --- a/src/include/access/xlog_internal.h +++ b/src/include/access/xlog_internal.h @@ -55,7 +55,7 @@ typedef struct BkpBlock /* * Each page of XLOG file has a header like this: */ -#define XLOG_PAGE_MAGIC 0xD077 /* can be used as WAL version indicator */ +#define XLOG_PAGE_MAGIC 0xD078 /* can be used as WAL version indicator */ typedef struct XLogPageHeaderData { -- 2.40.0