1 /*-------------------------------------------------------------------------
4 * Standard POSTGRES buffer page definitions.
7 * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
10 * src/include/storage/bufpage.h
12 *-------------------------------------------------------------------------
17 #include "access/xlogdefs.h"
18 #include "storage/block.h"
19 #include "storage/item.h"
20 #include "storage/off.h"
23 * A postgres disk page is an abstraction layered on top of a postgres
24 * disk block (which is simply a unit of i/o, see block.h).
26 * specifically, while a disk block can be unformatted, a postgres
27 * disk page is always a slotted page of the form:
29 * +----------------+---------------------------------+
30 * | PageHeaderData | linp1 linp2 linp3 ... |
31 * +-----------+----+---------------------------------+
33 * +-----------+--------------------------------------+
37 * +-------------+------------------------------------+
39 * +-------------+------------------+-----------------+
40 * | ... tuple3 tuple2 tuple1 | "special space" |
41 * +--------------------------------+-----------------+
44 * a page is full when nothing can be added between pd_lower and
47 * all blocks written out by an access method must be disk pages.
51 * obviously, a page is not formatted before it is initialized by
56 * linp1..N form an ItemId array. ItemPointers point into this array
57 * rather than pointing directly to a tuple. Note that OffsetNumbers
58 * conventionally start at 1, not 0.
60 * tuple1..N are added "backwards" on the page. because a tuple's
61 * ItemPointer points to its ItemId entry rather than its actual
62 * byte-offset position, tuples can be physically shuffled on a page
63 * whenever the need arises.
65 * AM-generic per-page information is kept in PageHeaderData.
67 * AM-specific per-page data (if any) is kept in the area marked "special
68 * space"; each AM has an "opaque" structure defined somewhere that is
69 * stored as the page trailer. an access method should always
70 * initialize its pages with PageInit and then set its own opaque
78 * location (byte offset) within a page.
80 * note that this is actually limited to 2^15 because we have limited
81 * ItemIdData.lp_off and ItemIdData.lp_len to 15 bits (see itemid.h).
83 typedef uint16 LocationIndex;
87 * For historical reasons, the 64-bit LSN value is stored as two 32-bit
92 uint32 xlogid; /* high bits */
93 uint32 xrecoff; /* low bits */
96 #define PageXLogRecPtrGet(val) \
97 ((uint64) (val).xlogid << 32 | (val).xrecoff)
98 #define PageXLogRecPtrSet(ptr, lsn) \
99 ((ptr).xlogid = (uint32) ((lsn) >> 32), (ptr).xrecoff = (uint32) (lsn))
102 * disk page organization
104 * space management information generic to any page
106 * pd_lsn - identifies xlog record for last change to this page.
107 * pd_checksum - page checksum, if set.
108 * pd_flags - flag bits.
109 * pd_lower - offset to start of free space.
110 * pd_upper - offset to end of free space.
111 * pd_special - offset to start of special space.
112 * pd_pagesize_version - size in bytes and page layout version number.
113 * pd_prune_xid - oldest XID among potentially prunable tuples on page.
115 * The LSN is used by the buffer manager to enforce the basic rule of WAL:
116 * "thou shalt write xlog before data". A dirty buffer cannot be dumped
117 * to disk until xlog has been flushed at least as far as the page's LSN.
119 * pd_checksum stores the page checksum, if it has been set for this page;
120 * zero is a valid value for a checksum. If a checksum is not in use then
121 * we leave the field unset. This will typically mean the field is zero
122 * though non-zero values may also be present if databases have been
123 * pg_upgraded from releases prior to 9.3, when the same byte offset was
124 * used to store the current timelineid when the page was last updated.
125 * Note that there is no indication on a page as to whether the checksum
126 * is valid or not, a deliberate design choice which avoids the problem
127 * of relying on the page contents to decide whether to verify it. Hence
128 * there are no flag bits relating to checksums.
130 * pd_prune_xid is a hint field that helps determine whether pruning will be
131 * useful. It is currently unused in index pages.
133 * The page version number and page size are packed together into a single
134 * uint16 field. This is for historical reasons: before PostgreSQL 7.3,
135 * there was no concept of a page version number, and doing it this way
136 * lets us pretend that pre-7.3 databases have page version number zero.
137 * We constrain page sizes to be multiples of 256, leaving the low eight
138 * bits available for a version number.
140 * Minimum possible page size is perhaps 64B to fit page header, opaque space
141 * and a minimal tuple; of course, in reality you want it much bigger, so
142 * the constraint on pagesize mod 256 is not an important restriction.
143 * On the high end, we can only support pages up to 32KB because lp_off/lp_len
147 typedef struct PageHeaderData
149 /* XXX LSN is member of *any* block, not only page-organized ones */
150 PageXLogRecPtr pd_lsn; /* LSN: next byte after last byte of xlog
151 * record for last change to this page */
152 uint16 pd_checksum; /* checksum */
153 uint16 pd_flags; /* flag bits, see below */
154 LocationIndex pd_lower; /* offset to start of free space */
155 LocationIndex pd_upper; /* offset to end of free space */
156 LocationIndex pd_special; /* offset to start of special space */
157 uint16 pd_pagesize_version;
158 TransactionId pd_prune_xid; /* oldest prunable XID, or zero if none */
159 ItemIdData pd_linp[FLEXIBLE_ARRAY_MEMBER]; /* line pointer array */
162 typedef PageHeaderData *PageHeader;
165 * pd_flags contains the following flag bits. Undefined bits are initialized
166 * to zero and may be used in the future.
168 * PD_HAS_FREE_LINES is set if there are any LP_UNUSED line pointers before
169 * pd_lower. This should be considered a hint rather than the truth, since
170 * changes to it are not WAL-logged.
172 * PD_PAGE_FULL is set if an UPDATE doesn't find enough free space in the
173 * page for its new tuple version; this suggests that a prune is needed.
174 * Again, this is just a hint.
176 #define PD_HAS_FREE_LINES 0x0001 /* are there any unused line pointers? */
177 #define PD_PAGE_FULL 0x0002 /* not enough free space for new
179 #define PD_ALL_VISIBLE 0x0004 /* all tuples on page are visible to
182 #define PD_VALID_FLAG_BITS 0x0007 /* OR of all valid pd_flags bits */
185 * Page layout version number 0 is for pre-7.3 Postgres releases.
186 * Releases 7.3 and 7.4 use 1, denoting a new HeapTupleHeader layout.
187 * Release 8.0 uses 2; it changed the HeapTupleHeader layout again.
188 * Release 8.1 uses 3; it redefined HeapTupleHeader infomask bits.
189 * Release 8.3 uses 4; it changed the HeapTupleHeader layout again, and
190 * added the pd_flags field (by stealing some bits from pd_tli),
191 * as well as adding the pd_prune_xid field (which enlarges the header).
193 * As of Release 9.3, the checksum version must also be considered when
196 #define PG_PAGE_LAYOUT_VERSION 4
197 #define PG_DATA_CHECKSUM_VERSION 1
199 /* ----------------------------------------------------------------
200 * page support macros
201 * ----------------------------------------------------------------
206 * True iff page is valid.
208 #define PageIsValid(page) PointerIsValid(page)
211 * line pointer(s) do not count as part of header
213 #define SizeOfPageHeaderData (offsetof(PageHeaderData, pd_linp))
217 * returns true iff no itemid has been allocated on the page
219 #define PageIsEmpty(page) \
220 (((PageHeader) (page))->pd_lower <= SizeOfPageHeaderData)
224 * returns true iff page has not been initialized (by PageInit)
226 #define PageIsNew(page) (((PageHeader) (page))->pd_upper == 0)
230 * Returns an item identifier of a page.
232 #define PageGetItemId(page, offsetNumber) \
233 ((ItemId) (&((PageHeader) (page))->pd_linp[(offsetNumber) - 1]))
237 * To be used in case the page does not contain item pointers.
239 * Note: prior to 8.3 this was not guaranteed to yield a MAXALIGN'd result.
240 * Now it is. Beware of old code that might think the offset to the contents
241 * is just SizeOfPageHeaderData rather than MAXALIGN(SizeOfPageHeaderData).
243 #define PageGetContents(page) \
244 ((char *) (page) + MAXALIGN(SizeOfPageHeaderData))
247 * macros to access page size info
253 * True iff the page size is valid.
255 #define PageSizeIsValid(pageSize) ((pageSize) == BLCKSZ)
259 * Returns the page size of a page.
261 * this can only be called on a formatted page (unlike
262 * BufferGetPageSize, which can be called on an unformatted page).
263 * however, it can be called on a page that is not stored in a buffer.
265 #define PageGetPageSize(page) \
266 ((Size) (((PageHeader) (page))->pd_pagesize_version & (uint16) 0xFF00))
269 * PageGetPageLayoutVersion
270 * Returns the page layout version of a page.
272 #define PageGetPageLayoutVersion(page) \
273 (((PageHeader) (page))->pd_pagesize_version & 0x00FF)
276 * PageSetPageSizeAndVersion
277 * Sets the page size and page layout version number of a page.
279 * We could support setting these two values separately, but there's
280 * no real need for it at the moment.
282 #define PageSetPageSizeAndVersion(page, size, version) \
284 AssertMacro(((size) & 0xFF00) == (size)), \
285 AssertMacro(((version) & 0x00FF) == (version)), \
286 ((PageHeader) (page))->pd_pagesize_version = (size) | (version) \
290 * page special data macros
295 * Returns size of special space on a page.
297 #define PageGetSpecialSize(page) \
298 ((uint16) (PageGetPageSize(page) - ((PageHeader)(page))->pd_special))
301 * PageGetSpecialPointer
302 * Returns pointer to special space on a page.
304 #define PageGetSpecialPointer(page) \
306 AssertMacro(PageIsValid(page)), \
307 (char *) ((char *) (page) + ((PageHeader) (page))->pd_special) \
312 * Retrieves an item on the given page.
315 * This does not change the status of any of the resources passed.
316 * The semantics may change in the future.
318 #define PageGetItem(page, itemId) \
320 AssertMacro(PageIsValid(page)), \
321 AssertMacro(ItemIdHasStorage(itemId)), \
322 (Item)(((char *)(page)) + ItemIdGetOffset(itemId)) \
326 * PageGetMaxOffsetNumber
327 * Returns the maximum offset number used by the given page.
328 * Since offset numbers are 1-based, this is also the number
329 * of items on the page.
331 * NOTE: if the page is not initialized (pd_lower == 0), we must
332 * return zero to ensure sane behavior. Accept double evaluation
333 * of the argument so that we can ensure this.
335 #define PageGetMaxOffsetNumber(page) \
336 (((PageHeader) (page))->pd_lower <= SizeOfPageHeaderData ? 0 : \
337 ((((PageHeader) (page))->pd_lower - SizeOfPageHeaderData) \
338 / sizeof(ItemIdData)))
341 * Additional macros for access to page headers. (Beware multiple evaluation
344 #define PageGetLSN(page) \
345 PageXLogRecPtrGet(((PageHeader) (page))->pd_lsn)
346 #define PageSetLSN(page, lsn) \
347 PageXLogRecPtrSet(((PageHeader) (page))->pd_lsn, lsn)
349 #define PageHasFreeLinePointers(page) \
350 (((PageHeader) (page))->pd_flags & PD_HAS_FREE_LINES)
351 #define PageSetHasFreeLinePointers(page) \
352 (((PageHeader) (page))->pd_flags |= PD_HAS_FREE_LINES)
353 #define PageClearHasFreeLinePointers(page) \
354 (((PageHeader) (page))->pd_flags &= ~PD_HAS_FREE_LINES)
356 #define PageIsFull(page) \
357 (((PageHeader) (page))->pd_flags & PD_PAGE_FULL)
358 #define PageSetFull(page) \
359 (((PageHeader) (page))->pd_flags |= PD_PAGE_FULL)
360 #define PageClearFull(page) \
361 (((PageHeader) (page))->pd_flags &= ~PD_PAGE_FULL)
363 #define PageIsAllVisible(page) \
364 (((PageHeader) (page))->pd_flags & PD_ALL_VISIBLE)
365 #define PageSetAllVisible(page) \
366 (((PageHeader) (page))->pd_flags |= PD_ALL_VISIBLE)
367 #define PageClearAllVisible(page) \
368 (((PageHeader) (page))->pd_flags &= ~PD_ALL_VISIBLE)
370 #define PageIsPrunable(page, oldestxmin) \
372 AssertMacro(TransactionIdIsNormal(oldestxmin)), \
373 TransactionIdIsValid(((PageHeader) (page))->pd_prune_xid) && \
374 TransactionIdPrecedes(((PageHeader) (page))->pd_prune_xid, oldestxmin) \
376 #define PageSetPrunable(page, xid) \
378 Assert(TransactionIdIsNormal(xid)); \
379 if (!TransactionIdIsValid(((PageHeader) (page))->pd_prune_xid) || \
380 TransactionIdPrecedes(xid, ((PageHeader) (page))->pd_prune_xid)) \
381 ((PageHeader) (page))->pd_prune_xid = (xid); \
383 #define PageClearPrunable(page) \
384 (((PageHeader) (page))->pd_prune_xid = InvalidTransactionId)
387 /* ----------------------------------------------------------------
388 * extern declarations
389 * ----------------------------------------------------------------
392 extern void PageInit(Page page, Size pageSize, Size specialSize);
393 extern bool PageIsVerified(Page page, BlockNumber blkno);
394 extern OffsetNumber PageAddItem(Page page, Item item, Size size,
395 OffsetNumber offsetNumber, bool overwrite, bool is_heap);
396 extern Page PageGetTempPage(Page page);
397 extern Page PageGetTempPageCopy(Page page);
398 extern Page PageGetTempPageCopySpecial(Page page);
399 extern void PageRestoreTempPage(Page tempPage, Page oldPage);
400 extern void PageRepairFragmentation(Page page);
401 extern Size PageGetFreeSpace(Page page);
402 extern Size PageGetExactFreeSpace(Page page);
403 extern Size PageGetHeapFreeSpace(Page page);
404 extern void PageIndexTupleDelete(Page page, OffsetNumber offset);
405 extern void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems);
406 extern void PageIndexDeleteNoCompact(Page page, OffsetNumber *itemnos,
408 extern char *PageSetChecksumCopy(Page page, BlockNumber blkno);
409 extern void PageSetChecksumInplace(Page page, BlockNumber blkno);
411 #endif /* BUFPAGE_H */