1 /*-------------------------------------------------------------------------
4 * Functions for constructing WAL records
6 * Constructing a WAL record begins with a call to XLogBeginInsert,
7 * followed by a number of XLogRegister* calls. The registered data is
8 * collected in private working memory, and finally assembled into a chain
9 * of XLogRecData structs by a call to XLogRecordAssemble(). See
10 * access/transam/README for details.
12 * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
13 * Portions Copyright (c) 1994, Regents of the University of California
15 * src/backend/access/transam/xloginsert.c
17 *-------------------------------------------------------------------------
22 #include "access/xact.h"
23 #include "access/xlog.h"
24 #include "access/xlog_internal.h"
25 #include "access/xloginsert.h"
26 #include "catalog/pg_control.h"
27 #include "common/pg_lzcompress.h"
28 #include "miscadmin.h"
29 #include "replication/origin.h"
30 #include "storage/bufmgr.h"
31 #include "storage/proc.h"
32 #include "utils/memutils.h"
35 /* Buffer size required to store a compressed version of backup block image */
36 #define PGLZ_MAX_BLCKSZ PGLZ_MAX_OUTPUT(BLCKSZ)
39 * For each block reference registered with XLogRegisterBuffer, we fill in
40 * a registered_buffer struct.
44 bool in_use; /* is this slot in use? */
45 uint8 flags; /* REGBUF_* flags */
46 RelFileNode rnode; /* identifies the relation and block */
49 Page page; /* page content */
50 uint32 rdata_len; /* total length of data in rdata chain */
51 XLogRecData *rdata_head; /* head of the chain of data registered with
53 XLogRecData *rdata_tail; /* last entry in the chain, or &rdata_head if
56 XLogRecData bkp_rdatas[2]; /* temporary rdatas used to hold references to
57 * backup block data in XLogRecordAssemble() */
59 /* buffer to store a compressed version of backup block image */
60 char compressed_page[PGLZ_MAX_BLCKSZ];
63 static registered_buffer *registered_buffers;
64 static int max_registered_buffers; /* allocated size */
65 static int max_registered_block_id = 0; /* highest block_id + 1
66 * currently registered */
69 * A chain of XLogRecDatas to hold the "main data" of a WAL record, registered
70 * with XLogRegisterData(...).
72 static XLogRecData *mainrdata_head;
73 static XLogRecData *mainrdata_last = (XLogRecData *) &mainrdata_head;
74 static uint32 mainrdata_len; /* total # of bytes in chain */
76 /* flags for the in-progress insertion */
77 static uint8 curinsert_flags = 0;
80 * These are used to hold the record header while constructing a record.
81 * 'hdr_scratch' is not a plain variable, but is palloc'd at initialization,
82 * because we want it to be MAXALIGNed and padding bytes zeroed.
84 * For simplicity, it's allocated large enough to hold the headers for any
87 static XLogRecData hdr_rdt;
88 static char *hdr_scratch = NULL;
90 #define SizeOfXlogOrigin (sizeof(RepOriginId) + sizeof(char))
92 #define HEADER_SCRATCH_SIZE \
94 MaxSizeOfXLogRecordBlockHeader * (XLR_MAX_BLOCK_ID + 1) + \
95 SizeOfXLogRecordDataHeaderLong + SizeOfXlogOrigin)
98 * An array of XLogRecData structs, to hold registered data.
100 static XLogRecData *rdatas;
101 static int num_rdatas; /* entries currently used */
102 static int max_rdatas; /* allocated size */
104 static bool begininsert_called = false;
106 /* Memory context to hold the registered buffer and data references. */
107 static MemoryContext xloginsert_cxt;
109 static XLogRecData *XLogRecordAssemble(RmgrId rmid, uint8 info,
110 XLogRecPtr RedoRecPtr, bool doPageWrites,
111 XLogRecPtr *fpw_lsn);
112 static bool XLogCompressBackupBlock(char *page, uint16 hole_offset,
113 uint16 hole_length, char *dest, uint16 *dlen);
116 * Begin constructing a WAL record. This must be called before the
117 * XLogRegister* functions and XLogInsert().
120 XLogBeginInsert(void)
122 Assert(max_registered_block_id == 0);
123 Assert(mainrdata_last == (XLogRecData *) &mainrdata_head);
124 Assert(mainrdata_len == 0);
126 /* cross-check on whether we should be here or not */
127 if (!XLogInsertAllowed())
128 elog(ERROR, "cannot make new WAL entries during recovery");
130 if (begininsert_called)
131 elog(ERROR, "XLogBeginInsert was already called");
133 begininsert_called = true;
137 * Ensure that there are enough buffer and data slots in the working area,
138 * for subsequent XLogRegisterBuffer, XLogRegisterData and XLogRegisterBufData
141 * There is always space for a small number of buffers and data chunks, enough
142 * for most record types. This function is for the exceptional cases that need
146 XLogEnsureRecordSpace(int max_block_id, int ndatas)
151 * This must be called before entering a critical section, because
152 * allocating memory inside a critical section can fail. repalloc() will
153 * check the same, but better to check it here too so that we fail
154 * consistently even if the arrays happen to be large enough already.
156 Assert(CritSectionCount == 0);
158 /* the minimum values can't be decreased */
159 if (max_block_id < XLR_NORMAL_MAX_BLOCK_ID)
160 max_block_id = XLR_NORMAL_MAX_BLOCK_ID;
161 if (ndatas < XLR_NORMAL_RDATAS)
162 ndatas = XLR_NORMAL_RDATAS;
164 if (max_block_id > XLR_MAX_BLOCK_ID)
165 elog(ERROR, "maximum number of WAL record block references exceeded");
166 nbuffers = max_block_id + 1;
168 if (nbuffers > max_registered_buffers)
170 registered_buffers = (registered_buffer *)
171 repalloc(registered_buffers, sizeof(registered_buffer) * nbuffers);
174 * At least the padding bytes in the structs must be zeroed, because
175 * they are included in WAL data, but initialize it all for tidiness.
177 MemSet(®istered_buffers[max_registered_buffers], 0,
178 (nbuffers - max_registered_buffers) * sizeof(registered_buffer));
179 max_registered_buffers = nbuffers;
182 if (ndatas > max_rdatas)
184 rdatas = (XLogRecData *) repalloc(rdatas, sizeof(XLogRecData) * ndatas);
190 * Reset WAL record construction buffers.
193 XLogResetInsertion(void)
197 for (i = 0; i < max_registered_block_id; i++)
198 registered_buffers[i].in_use = false;
201 max_registered_block_id = 0;
203 mainrdata_last = (XLogRecData *) &mainrdata_head;
205 begininsert_called = false;
209 * Register a reference to a buffer with the WAL record being constructed.
210 * This must be called for every page that the WAL-logged operation modifies.
213 XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
215 registered_buffer *regbuf;
217 /* NO_IMAGE doesn't make sense with FORCE_IMAGE */
218 Assert(!((flags & REGBUF_FORCE_IMAGE) && (flags & (REGBUF_NO_IMAGE))));
219 Assert(begininsert_called);
221 if (block_id >= max_registered_block_id)
223 if (block_id >= max_registered_buffers)
224 elog(ERROR, "too many registered buffers");
225 max_registered_block_id = block_id + 1;
228 regbuf = ®istered_buffers[block_id];
230 BufferGetTag(buffer, ®buf->rnode, ®buf->forkno, ®buf->block);
231 regbuf->page = BufferGetPage(buffer);
232 regbuf->flags = flags;
233 regbuf->rdata_tail = (XLogRecData *) ®buf->rdata_head;
234 regbuf->rdata_len = 0;
237 * Check that this page hasn't already been registered with some other
240 #ifdef USE_ASSERT_CHECKING
244 for (i = 0; i < max_registered_block_id; i++)
246 registered_buffer *regbuf_old = ®istered_buffers[i];
248 if (i == block_id || !regbuf_old->in_use)
251 Assert(!RelFileNodeEquals(regbuf_old->rnode, regbuf->rnode) ||
252 regbuf_old->forkno != regbuf->forkno ||
253 regbuf_old->block != regbuf->block);
258 regbuf->in_use = true;
262 * Like XLogRegisterBuffer, but for registering a block that's not in the
263 * shared buffer pool (i.e. when you don't have a Buffer for it).
266 XLogRegisterBlock(uint8 block_id, RelFileNode *rnode, ForkNumber forknum,
267 BlockNumber blknum, Page page, uint8 flags)
269 registered_buffer *regbuf;
271 /* This is currently only used to WAL-log a full-page image of a page */
272 Assert(flags & REGBUF_FORCE_IMAGE);
273 Assert(begininsert_called);
275 if (block_id >= max_registered_block_id)
276 max_registered_block_id = block_id + 1;
278 if (block_id >= max_registered_buffers)
279 elog(ERROR, "too many registered buffers");
281 regbuf = ®istered_buffers[block_id];
283 regbuf->rnode = *rnode;
284 regbuf->forkno = forknum;
285 regbuf->block = blknum;
287 regbuf->flags = flags;
288 regbuf->rdata_tail = (XLogRecData *) ®buf->rdata_head;
289 regbuf->rdata_len = 0;
292 * Check that this page hasn't already been registered with some other
295 #ifdef USE_ASSERT_CHECKING
299 for (i = 0; i < max_registered_block_id; i++)
301 registered_buffer *regbuf_old = ®istered_buffers[i];
303 if (i == block_id || !regbuf_old->in_use)
306 Assert(!RelFileNodeEquals(regbuf_old->rnode, regbuf->rnode) ||
307 regbuf_old->forkno != regbuf->forkno ||
308 regbuf_old->block != regbuf->block);
313 regbuf->in_use = true;
317 * Add data to the WAL record that's being constructed.
319 * The data is appended to the "main chunk", available at replay with
323 XLogRegisterData(char *data, int len)
327 Assert(begininsert_called);
329 if (num_rdatas >= max_rdatas)
330 elog(ERROR, "too much WAL data");
331 rdata = &rdatas[num_rdatas++];
337 * we use the mainrdata_last pointer to track the end of the chain, so no
338 * need to clear 'next' here.
341 mainrdata_last->next = rdata;
342 mainrdata_last = rdata;
344 mainrdata_len += len;
348 * Add buffer-specific data to the WAL record that's being constructed.
350 * Block_id must reference a block previously registered with
351 * XLogRegisterBuffer(). If this is called more than once for the same
352 * block_id, the data is appended.
354 * The maximum amount of data that can be registered per block is 65535
355 * bytes. That should be plenty; if you need more than BLCKSZ bytes to
356 * reconstruct the changes to the page, you might as well just log a full
357 * copy of it. (the "main data" that's not associated with a block is not
361 XLogRegisterBufData(uint8 block_id, char *data, int len)
363 registered_buffer *regbuf;
366 Assert(begininsert_called);
368 /* find the registered buffer struct */
369 regbuf = ®istered_buffers[block_id];
371 elog(ERROR, "no block with id %d registered with WAL insertion",
374 if (num_rdatas >= max_rdatas)
375 elog(ERROR, "too much WAL data");
376 rdata = &rdatas[num_rdatas++];
381 regbuf->rdata_tail->next = rdata;
382 regbuf->rdata_tail = rdata;
383 regbuf->rdata_len += len;
387 * Set insert status flags for the upcoming WAL record.
389 * The flags that can be used here are:
390 * - XLOG_INCLUDE_ORIGIN, to determine if the replication origin should be
391 * included in the record.
392 * - XLOG_MARK_UNIMPORTANT, to signal that the record is not important for
393 * durability, which allows to avoid triggering WAL archiving and other
394 * background activity.
397 XLogSetRecordFlags(uint8 flags)
399 Assert(begininsert_called);
400 curinsert_flags = flags;
404 * Insert an XLOG record having the specified RMID and info bytes, with the
405 * body of the record being the data and buffer references registered earlier
406 * with XLogRegister* calls.
408 * Returns XLOG pointer to end of record (beginning of next record).
409 * This can be used as LSN for data pages affected by the logged action.
410 * (LSN is the XLOG point up to which the XLOG must be flushed to disk
411 * before the data page can be written out. This implements the basic
412 * WAL rule "write the log before the data".)
415 XLogInsert(RmgrId rmid, uint8 info)
419 /* XLogBeginInsert() must have been called. */
420 if (!begininsert_called)
421 elog(ERROR, "XLogBeginInsert was not called");
424 * The caller can set rmgr bits and XLR_SPECIAL_REL_UPDATE; the rest are
425 * reserved for use by me.
427 if ((info & ~(XLR_RMGR_INFO_MASK | XLR_SPECIAL_REL_UPDATE)) != 0)
428 elog(PANIC, "invalid xlog info mask %02X", info);
430 TRACE_POSTGRESQL_XLOG_INSERT(rmid, info);
433 * In bootstrap mode, we don't actually log anything but XLOG resources;
434 * return a phony record pointer.
436 if (IsBootstrapProcessingMode() && rmid != RM_XLOG_ID)
438 XLogResetInsertion();
439 EndPos = SizeOfXLogLongPHD; /* start of 1st chkpt record */
445 XLogRecPtr RedoRecPtr;
451 * Get values needed to decide whether to do full-page writes. Since
452 * we don't yet have an insertion lock, these could change under us,
453 * but XLogInsertRecord will recheck them once it has a lock.
455 GetFullPageWriteInfo(&RedoRecPtr, &doPageWrites);
457 rdt = XLogRecordAssemble(rmid, info, RedoRecPtr, doPageWrites,
460 EndPos = XLogInsertRecord(rdt, fpw_lsn, curinsert_flags);
461 } while (EndPos == InvalidXLogRecPtr);
463 XLogResetInsertion();
469 * Assemble a WAL record from the registered data and buffers into an
470 * XLogRecData chain, ready for insertion with XLogInsertRecord().
472 * The record header fields are filled in, except for the xl_prev field. The
473 * calculated CRC does not include the record header yet.
475 * If there are any registered buffers, and a full-page image was not taken
476 * of all of them, *fpw_lsn is set to the lowest LSN among such pages. This
477 * signals that the assembled record is only good for insertion on the
478 * assumption that the RedoRecPtr and doPageWrites values were up-to-date.
481 XLogRecordAssemble(RmgrId rmid, uint8 info,
482 XLogRecPtr RedoRecPtr, bool doPageWrites,
486 uint32 total_len = 0;
489 registered_buffer *prev_regbuf = NULL;
490 XLogRecData *rdt_datas_last;
492 char *scratch = hdr_scratch;
495 * Note: this function can be called multiple times for the same record.
496 * All the modifications we do to the rdata chains below must handle that.
499 /* The record begins with the fixed-size header */
500 rechdr = (XLogRecord *) scratch;
501 scratch += SizeOfXLogRecord;
504 rdt_datas_last = &hdr_rdt;
505 hdr_rdt.data = hdr_scratch;
508 * Make an rdata chain containing all the data portions of all block
509 * references. This includes the data for full-page images. Also append
510 * the headers for the block references in the scratch buffer.
512 *fpw_lsn = InvalidXLogRecPtr;
513 for (block_id = 0; block_id < max_registered_block_id; block_id++)
515 registered_buffer *regbuf = ®istered_buffers[block_id];
518 XLogRecordBlockHeader bkpb;
519 XLogRecordBlockImageHeader bimg;
520 XLogRecordBlockCompressHeader cbimg = {0};
522 bool is_compressed = false;
527 /* Determine if this block needs to be backed up */
528 if (regbuf->flags & REGBUF_FORCE_IMAGE)
530 else if (regbuf->flags & REGBUF_NO_IMAGE)
531 needs_backup = false;
532 else if (!doPageWrites)
533 needs_backup = false;
537 * We assume page LSN is first data on *every* page that can be
538 * passed to XLogInsert, whether it has the standard page layout
541 XLogRecPtr page_lsn = PageGetLSN(regbuf->page);
543 needs_backup = (page_lsn <= RedoRecPtr);
546 if (*fpw_lsn == InvalidXLogRecPtr || page_lsn < *fpw_lsn)
551 /* Determine if the buffer data needs to included */
552 if (regbuf->rdata_len == 0)
554 else if ((regbuf->flags & REGBUF_KEEP_DATA) != 0)
557 needs_data = !needs_backup;
560 bkpb.fork_flags = regbuf->forkno;
561 bkpb.data_length = 0;
563 if ((regbuf->flags & REGBUF_WILL_INIT) == REGBUF_WILL_INIT)
564 bkpb.fork_flags |= BKPBLOCK_WILL_INIT;
568 Page page = regbuf->page;
569 uint16 compressed_len;
572 * The page needs to be backed up, so calculate its hole length
575 if (regbuf->flags & REGBUF_STANDARD)
577 /* Assume we can omit data between pd_lower and pd_upper */
578 uint16 lower = ((PageHeader) page)->pd_lower;
579 uint16 upper = ((PageHeader) page)->pd_upper;
581 if (lower >= SizeOfPageHeaderData &&
585 bimg.hole_offset = lower;
586 cbimg.hole_length = upper - lower;
590 /* No "hole" to compress out */
591 bimg.hole_offset = 0;
592 cbimg.hole_length = 0;
597 /* Not a standard page header, don't try to eliminate "hole" */
598 bimg.hole_offset = 0;
599 cbimg.hole_length = 0;
603 * Try to compress a block image if wal_compression is enabled
608 XLogCompressBackupBlock(page, bimg.hole_offset,
610 regbuf->compressed_page,
615 * Fill in the remaining fields in the XLogRecordBlockHeader
618 bkpb.fork_flags |= BKPBLOCK_HAS_IMAGE;
621 * Construct XLogRecData entries for the page content.
623 rdt_datas_last->next = ®buf->bkp_rdatas[0];
624 rdt_datas_last = rdt_datas_last->next;
626 bimg.bimg_info = (cbimg.hole_length == 0) ? 0 : BKPIMAGE_HAS_HOLE;
630 bimg.length = compressed_len;
631 bimg.bimg_info |= BKPIMAGE_IS_COMPRESSED;
633 rdt_datas_last->data = regbuf->compressed_page;
634 rdt_datas_last->len = compressed_len;
638 bimg.length = BLCKSZ - cbimg.hole_length;
640 if (cbimg.hole_length == 0)
642 rdt_datas_last->data = page;
643 rdt_datas_last->len = BLCKSZ;
647 /* must skip the hole */
648 rdt_datas_last->data = page;
649 rdt_datas_last->len = bimg.hole_offset;
651 rdt_datas_last->next = ®buf->bkp_rdatas[1];
652 rdt_datas_last = rdt_datas_last->next;
654 rdt_datas_last->data =
655 page + (bimg.hole_offset + cbimg.hole_length);
656 rdt_datas_last->len =
657 BLCKSZ - (bimg.hole_offset + cbimg.hole_length);
661 total_len += bimg.length;
667 * Link the caller-supplied rdata chain for this buffer to the
670 bkpb.fork_flags |= BKPBLOCK_HAS_DATA;
671 bkpb.data_length = regbuf->rdata_len;
672 total_len += regbuf->rdata_len;
674 rdt_datas_last->next = regbuf->rdata_head;
675 rdt_datas_last = regbuf->rdata_tail;
678 if (prev_regbuf && RelFileNodeEquals(regbuf->rnode, prev_regbuf->rnode))
681 bkpb.fork_flags |= BKPBLOCK_SAME_REL;
685 prev_regbuf = regbuf;
687 /* Ok, copy the header to the scratch buffer */
688 memcpy(scratch, &bkpb, SizeOfXLogRecordBlockHeader);
689 scratch += SizeOfXLogRecordBlockHeader;
692 memcpy(scratch, &bimg, SizeOfXLogRecordBlockImageHeader);
693 scratch += SizeOfXLogRecordBlockImageHeader;
694 if (cbimg.hole_length != 0 && is_compressed)
696 memcpy(scratch, &cbimg,
697 SizeOfXLogRecordBlockCompressHeader);
698 scratch += SizeOfXLogRecordBlockCompressHeader;
703 memcpy(scratch, ®buf->rnode, sizeof(RelFileNode));
704 scratch += sizeof(RelFileNode);
706 memcpy(scratch, ®buf->block, sizeof(BlockNumber));
707 scratch += sizeof(BlockNumber);
710 /* followed by the record's origin, if any */
711 if ((curinsert_flags & XLOG_INCLUDE_ORIGIN) &&
712 replorigin_session_origin != InvalidRepOriginId)
714 *(scratch++) = XLR_BLOCK_ID_ORIGIN;
715 memcpy(scratch, &replorigin_session_origin, sizeof(replorigin_session_origin));
716 scratch += sizeof(replorigin_session_origin);
719 /* followed by main data, if any */
720 if (mainrdata_len > 0)
722 if (mainrdata_len > 255)
724 *(scratch++) = XLR_BLOCK_ID_DATA_LONG;
725 memcpy(scratch, &mainrdata_len, sizeof(uint32));
726 scratch += sizeof(uint32);
730 *(scratch++) = XLR_BLOCK_ID_DATA_SHORT;
731 *(scratch++) = (uint8) mainrdata_len;
733 rdt_datas_last->next = mainrdata_head;
734 rdt_datas_last = mainrdata_last;
735 total_len += mainrdata_len;
737 rdt_datas_last->next = NULL;
739 hdr_rdt.len = (scratch - hdr_scratch);
740 total_len += hdr_rdt.len;
743 * Calculate CRC of the data
745 * Note that the record header isn't added into the CRC initially since we
746 * don't know the prev-link yet. Thus, the CRC will represent the CRC of
747 * the whole record in the order: rdata, then backup blocks, then record
750 INIT_CRC32C(rdata_crc);
751 COMP_CRC32C(rdata_crc, hdr_scratch + SizeOfXLogRecord, hdr_rdt.len - SizeOfXLogRecord);
752 for (rdt = hdr_rdt.next; rdt != NULL; rdt = rdt->next)
753 COMP_CRC32C(rdata_crc, rdt->data, rdt->len);
756 * Fill in the fields in the record header. Prev-link is filled in later,
757 * once we know where in the WAL the record will be inserted. The CRC does
758 * not include the record header yet.
760 rechdr->xl_xid = GetCurrentTransactionIdIfAny();
761 rechdr->xl_tot_len = total_len;
762 rechdr->xl_info = info;
763 rechdr->xl_rmid = rmid;
764 rechdr->xl_prev = InvalidXLogRecPtr;
765 rechdr->xl_crc = rdata_crc;
771 * Create a compressed version of a backup block image.
773 * Returns FALSE if compression fails (i.e., compressed result is actually
774 * bigger than original). Otherwise, returns TRUE and sets 'dlen' to
775 * the length of compressed block image.
778 XLogCompressBackupBlock(char *page, uint16 hole_offset, uint16 hole_length,
779 char *dest, uint16 *dlen)
781 int32 orig_len = BLCKSZ - hole_length;
783 int32 extra_bytes = 0;
787 if (hole_length != 0)
789 /* must skip the hole */
791 memcpy(source, page, hole_offset);
792 memcpy(source + hole_offset,
793 page + (hole_offset + hole_length),
794 BLCKSZ - (hole_length + hole_offset));
797 * Extra data needs to be stored in WAL record for the compressed
798 * version of block image if the hole exists.
800 extra_bytes = SizeOfXLogRecordBlockCompressHeader;
806 * We recheck the actual size even if pglz_compress() reports success and
807 * see if the number of bytes saved by compression is larger than the
808 * length of extra data needed for the compressed version of block image.
810 len = pglz_compress(source, orig_len, dest, PGLZ_strategy_default);
812 len + extra_bytes < orig_len)
814 *dlen = (uint16) len; /* successful compression */
821 * Determine whether the buffer referenced has to be backed up.
823 * Since we don't yet have the insert lock, fullPageWrites and forcePageWrites
824 * could change later, so the result should be used for optimization purposes
828 XLogCheckBufferNeedsBackup(Buffer buffer)
830 XLogRecPtr RedoRecPtr;
834 GetFullPageWriteInfo(&RedoRecPtr, &doPageWrites);
836 page = BufferGetPage(buffer);
838 if (doPageWrites && PageGetLSN(page) <= RedoRecPtr)
839 return true; /* buffer requires backup */
841 return false; /* buffer does not need to be backed up */
845 * Write a backup block if needed when we are setting a hint. Note that
846 * this may be called for a variety of page types, not just heaps.
848 * Callable while holding just share lock on the buffer content.
850 * We can't use the plain backup block mechanism since that relies on the
851 * Buffer being exclusively locked. Since some modifications (setting LSN, hint
852 * bits) are allowed in a sharelocked buffer that can lead to wal checksum
853 * failures. So instead we copy the page and insert the copied data as normal
856 * We only need to do something if page has not yet been full page written in
857 * this checkpoint round. The LSN of the inserted wal record is returned if we
858 * had to write, InvalidXLogRecPtr otherwise.
860 * It is possible that multiple concurrent backends could attempt to write WAL
861 * records. In that case, multiple copies of the same block would be recorded
862 * in separate WAL records by different backends, though that is still OK from
863 * a correctness perspective.
866 XLogSaveBufferForHint(Buffer buffer, bool buffer_std)
868 XLogRecPtr recptr = InvalidXLogRecPtr;
870 XLogRecPtr RedoRecPtr;
873 * Ensure no checkpoint can change our view of RedoRecPtr.
875 Assert(MyPgXact->delayChkpt);
878 * Update RedoRecPtr so that we can make the right decision
880 RedoRecPtr = GetRedoRecPtr();
883 * We assume page LSN is first data on *every* page that can be passed to
884 * XLogInsert, whether it has the standard page layout or not. Since we're
885 * only holding a share-lock on the page, we must take the buffer header
886 * lock when we look at the LSN.
888 lsn = BufferGetLSNAtomic(buffer);
890 if (lsn <= RedoRecPtr)
893 char copied_buffer[BLCKSZ];
894 char *origdata = (char *) BufferGetBlock(buffer);
900 * Copy buffer so we don't have to worry about concurrent hint bit or
901 * lsn updates. We assume pd_lower/upper cannot be changed without an
902 * exclusive lock, so the contents bkp are not racy.
906 /* Assume we can omit data between pd_lower and pd_upper */
907 Page page = BufferGetPage(buffer);
908 uint16 lower = ((PageHeader) page)->pd_lower;
909 uint16 upper = ((PageHeader) page)->pd_upper;
911 memcpy(copied_buffer, origdata, lower);
912 memcpy(copied_buffer + upper, origdata + upper, BLCKSZ - upper);
915 memcpy(copied_buffer, origdata, BLCKSZ);
919 flags = REGBUF_FORCE_IMAGE;
921 flags |= REGBUF_STANDARD;
923 BufferGetTag(buffer, &rnode, &forkno, &blkno);
924 XLogRegisterBlock(0, &rnode, forkno, blkno, copied_buffer, flags);
926 recptr = XLogInsert(RM_XLOG_ID, XLOG_FPI_FOR_HINT);
933 * Write a WAL record containing a full image of a page. Caller is responsible
934 * for writing the page to disk after calling this routine.
936 * Note: If you're using this function, you should be building pages in private
937 * memory and writing them directly to smgr. If you're using buffers, call
938 * log_newpage_buffer instead.
940 * If the page follows the standard page layout, with a PageHeader and unused
941 * space between pd_lower and pd_upper, set 'page_std' to TRUE. That allows
942 * the unused space to be left out from the WAL record, making it smaller.
945 log_newpage(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno,
946 Page page, bool page_std)
951 flags = REGBUF_FORCE_IMAGE;
953 flags |= REGBUF_STANDARD;
956 XLogRegisterBlock(0, rnode, forkNum, blkno, page, flags);
957 recptr = XLogInsert(RM_XLOG_ID, XLOG_FPI);
960 * The page may be uninitialized. If so, we can't set the LSN because that
961 * would corrupt the page.
963 if (!PageIsNew(page))
965 PageSetLSN(page, recptr);
972 * Write a WAL record containing a full image of a page.
974 * Caller should initialize the buffer and mark it dirty before calling this
975 * function. This function will set the page LSN.
977 * If the page follows the standard page layout, with a PageHeader and unused
978 * space between pd_lower and pd_upper, set 'page_std' to TRUE. That allows
979 * the unused space to be left out from the WAL record, making it smaller.
982 log_newpage_buffer(Buffer buffer, bool page_std)
984 Page page = BufferGetPage(buffer);
989 /* Shared buffers should be modified in a critical section. */
990 Assert(CritSectionCount > 0);
992 BufferGetTag(buffer, &rnode, &forkNum, &blkno);
994 return log_newpage(&rnode, forkNum, blkno, page, page_std);
998 * Allocate working buffers needed for WAL record construction.
1001 InitXLogInsert(void)
1003 /* Initialize the working areas */
1004 if (xloginsert_cxt == NULL)
1006 xloginsert_cxt = AllocSetContextCreate(TopMemoryContext,
1007 "WAL record construction",
1008 ALLOCSET_DEFAULT_SIZES);
1011 if (registered_buffers == NULL)
1013 registered_buffers = (registered_buffer *)
1014 MemoryContextAllocZero(xloginsert_cxt,
1015 sizeof(registered_buffer) * (XLR_NORMAL_MAX_BLOCK_ID + 1));
1016 max_registered_buffers = XLR_NORMAL_MAX_BLOCK_ID + 1;
1020 rdatas = MemoryContextAlloc(xloginsert_cxt,
1021 sizeof(XLogRecData) * XLR_NORMAL_RDATAS);
1022 max_rdatas = XLR_NORMAL_RDATAS;
1026 * Allocate a buffer to hold the header information for a WAL record.
1028 if (hdr_scratch == NULL)
1029 hdr_scratch = MemoryContextAllocZero(xloginsert_cxt,
1030 HEADER_SCRATCH_SIZE);