1 /*-------------------------------------------------------------------------
4 * POSTGRES standard buffer page code.
6 * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * src/backend/storage/page/bufpage.c
13 *-------------------------------------------------------------------------
17 #include "access/htup_details.h"
18 #include "access/itup.h"
19 #include "access/xlog.h"
20 #include "storage/checksum.h"
21 #include "utils/memdebug.h"
22 #include "utils/memutils.h"
26 bool ignore_checksum_failure = false;
29 /* ----------------------------------------------------------------
30 * Page support functions
31 * ----------------------------------------------------------------
36 * Initializes the contents of a page.
37 * Note that we don't calculate an initial checksum here; that's not done
38 * until it's time to write.
41 PageInit(Page page, Size pageSize, Size specialSize)
43 PageHeader p = (PageHeader) page;
45 specialSize = MAXALIGN(specialSize);
47 Assert(pageSize == BLCKSZ);
48 Assert(pageSize > specialSize + SizeOfPageHeaderData);
50 /* Make sure all fields of page are zero, as well as unused space */
51 MemSet(p, 0, pageSize);
54 p->pd_lower = SizeOfPageHeaderData;
55 p->pd_upper = pageSize - specialSize;
56 p->pd_special = pageSize - specialSize;
57 PageSetPageSizeAndVersion(page, pageSize, PG_PAGE_LAYOUT_VERSION);
58 /* p->pd_prune_xid = InvalidTransactionId; done by above MemSet */
64 * Check that the page header and checksum (if any) appear valid.
66 * This is called when a page has just been read in from disk. The idea is
67 * to cheaply detect trashed pages before we go nuts following bogus item
68 * pointers, testing invalid transaction identifiers, etc.
70 * It turns out to be necessary to allow zeroed pages here too. Even though
71 * this routine is *not* called when deliberately adding a page to a relation,
72 * there are scenarios in which a zeroed page might be found in a table.
73 * (Example: a backend extends a relation, then crashes before it can write
74 * any WAL entry about the new page. The kernel will already have the
75 * zeroed page in the file, and it will stay that way after restart.) So we
76 * allow zeroed pages here, and are careful that the page access macros
77 * treat such a page as empty and without free space. Eventually, VACUUM
78 * will clean up such a page and make it usable.
81 PageIsVerified(Page page, BlockNumber blkno)
83 PageHeader p = (PageHeader) page;
86 bool checksum_failure = false;
87 bool header_sane = false;
88 bool all_zeroes = false;
92 * Don't verify page data unless the page passes basic non-zero test
96 if (DataChecksumsEnabled())
98 checksum = pg_checksum_page((char *) page, blkno);
100 if (checksum != p->pd_checksum)
101 checksum_failure = true;
105 * The following checks don't prove the header is correct, only that
106 * it looks sane enough to allow into the buffer pool. Later usage of
107 * the block can still reveal problems, which is why we offer the
110 if ((p->pd_flags & ~PD_VALID_FLAG_BITS) == 0 &&
111 p->pd_lower <= p->pd_upper &&
112 p->pd_upper <= p->pd_special &&
113 p->pd_special <= BLCKSZ &&
114 p->pd_special == MAXALIGN(p->pd_special))
117 if (header_sane && !checksum_failure)
121 /* Check all-zeroes case */
123 pagebytes = (char *) page;
124 for (i = 0; i < BLCKSZ; i++)
126 if (pagebytes[i] != 0)
137 * Throw a WARNING if the checksum fails, but only after we've checked for
138 * the all-zeroes case.
140 if (checksum_failure)
143 (ERRCODE_DATA_CORRUPTED,
144 errmsg("page verification failed, calculated checksum %u but expected %u",
145 checksum, p->pd_checksum)));
147 if (header_sane && ignore_checksum_failure)
158 * Add an item to a page. Return value is offset at which it was
159 * inserted, or InvalidOffsetNumber if there's not room to insert.
161 * If overwrite is true, we just store the item at the specified
162 * offsetNumber (which must be either a currently-unused item pointer,
163 * or one past the last existing item). Otherwise,
164 * if offsetNumber is valid and <= current max offset in the page,
165 * insert item into the array at that position by shuffling ItemId's
167 * If offsetNumber is not valid, then assign one by finding the first
168 * one that is both unused and deallocated.
170 * If is_heap is true, we enforce that there can't be more than
171 * MaxHeapTuplesPerPage line pointers on the page.
173 * !!! EREPORT(ERROR) IS DISALLOWED HERE !!!
176 PageAddItem(Page page,
179 OffsetNumber offsetNumber,
183 PageHeader phdr = (PageHeader) page;
189 bool needshuffle = false;
192 * Be wary about corrupted page pointers
194 if (phdr->pd_lower < SizeOfPageHeaderData ||
195 phdr->pd_lower > phdr->pd_upper ||
196 phdr->pd_upper > phdr->pd_special ||
197 phdr->pd_special > BLCKSZ)
199 (errcode(ERRCODE_DATA_CORRUPTED),
200 errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
201 phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
204 * Select offsetNumber to place the new item at
206 limit = OffsetNumberNext(PageGetMaxOffsetNumber(page));
208 /* was offsetNumber passed in? */
209 if (OffsetNumberIsValid(offsetNumber))
214 if (offsetNumber < limit)
216 itemId = PageGetItemId(phdr, offsetNumber);
217 if (ItemIdIsUsed(itemId) || ItemIdHasStorage(itemId))
219 elog(WARNING, "will not overwrite a used ItemId");
220 return InvalidOffsetNumber;
226 if (offsetNumber < limit)
227 needshuffle = true; /* need to move existing linp's */
232 /* offsetNumber was not passed in, so find a free slot */
233 /* if no free slot, we'll put it at limit (1st open slot) */
234 if (PageHasFreeLinePointers(phdr))
237 * Look for "recyclable" (unused) ItemId. We check for no storage
238 * as well, just to be paranoid --- unused items should never have
241 for (offsetNumber = 1; offsetNumber < limit; offsetNumber++)
243 itemId = PageGetItemId(phdr, offsetNumber);
244 if (!ItemIdIsUsed(itemId) && !ItemIdHasStorage(itemId))
247 if (offsetNumber >= limit)
249 /* the hint is wrong, so reset it */
250 PageClearHasFreeLinePointers(phdr);
255 /* don't bother searching if hint says there's no free slot */
256 offsetNumber = limit;
260 if (offsetNumber > limit)
262 elog(WARNING, "specified item offset is too large");
263 return InvalidOffsetNumber;
266 if (is_heap && offsetNumber > MaxHeapTuplesPerPage)
268 elog(WARNING, "can't put more than MaxHeapTuplesPerPage items in a heap page");
269 return InvalidOffsetNumber;
273 * Compute new lower and upper pointers for page, see if it'll fit.
275 * Note: do arithmetic as signed ints, to avoid mistakes if, say,
276 * alignedSize > pd_upper.
278 if (offsetNumber == limit || needshuffle)
279 lower = phdr->pd_lower + sizeof(ItemIdData);
281 lower = phdr->pd_lower;
283 alignedSize = MAXALIGN(size);
285 upper = (int) phdr->pd_upper - (int) alignedSize;
288 return InvalidOffsetNumber;
291 * OK to insert the item. First, shuffle the existing pointers if needed.
293 itemId = PageGetItemId(phdr, offsetNumber);
296 memmove(itemId + 1, itemId,
297 (limit - offsetNumber) * sizeof(ItemIdData));
299 /* set the item pointer */
300 ItemIdSetNormal(itemId, upper, size);
303 * Items normally contain no uninitialized bytes. Core bufpage consumers
304 * conform, but this is not a necessary coding rule; a new index AM could
305 * opt to depart from it. However, data type input functions and other
306 * C-language functions that synthesize datums should initialize all
307 * bytes; datumIsEqual() relies on this. Testing here, along with the
308 * similar check in printtup(), helps to catch such mistakes.
310 * Values of the "name" type retrieved via index-only scans may contain
311 * uninitialized bytes; see comment in btrescan(). Valgrind will report
312 * this as an error, but it is safe to ignore.
314 VALGRIND_CHECK_MEM_IS_DEFINED(item, size);
316 /* copy the item's data onto the page */
317 memcpy((char *) page + upper, item, size);
319 /* adjust page header */
320 phdr->pd_lower = (LocationIndex) lower;
321 phdr->pd_upper = (LocationIndex) upper;
328 * Get a temporary page in local memory for special processing.
329 * The returned page is not initialized at all; caller must do that.
332 PageGetTempPage(Page page)
337 pageSize = PageGetPageSize(page);
338 temp = (Page) palloc(pageSize);
344 * PageGetTempPageCopy
345 * Get a temporary page in local memory for special processing.
346 * The page is initialized by copying the contents of the given page.
349 PageGetTempPageCopy(Page page)
354 pageSize = PageGetPageSize(page);
355 temp = (Page) palloc(pageSize);
357 memcpy(temp, page, pageSize);
363 * PageGetTempPageCopySpecial
364 * Get a temporary page in local memory for special processing.
365 * The page is PageInit'd with the same special-space size as the
366 * given page, and the special space is copied from the given page.
369 PageGetTempPageCopySpecial(Page page)
374 pageSize = PageGetPageSize(page);
375 temp = (Page) palloc(pageSize);
377 PageInit(temp, pageSize, PageGetSpecialSize(page));
378 memcpy(PageGetSpecialPointer(temp),
379 PageGetSpecialPointer(page),
380 PageGetSpecialSize(page));
386 * PageRestoreTempPage
387 * Copy temporary page back to permanent page after special processing
388 * and release the temporary page.
391 PageRestoreTempPage(Page tempPage, Page oldPage)
395 pageSize = PageGetPageSize(tempPage);
396 memcpy((char *) oldPage, (char *) tempPage, pageSize);
402 * sorting support for PageRepairFragmentation, PageIndexMultiDelete,
403 * PageIndexDeleteNoCompact
405 typedef struct itemIdSortData
407 int offsetindex; /* linp array index */
408 int itemoff; /* page offset of item data */
409 Size alignedlen; /* MAXALIGN(item data len) */
410 ItemIdData olditemid; /* used only in PageIndexMultiDelete */
412 typedef itemIdSortData *itemIdSort;
415 itemoffcompare(const void *itemidp1, const void *itemidp2)
417 /* Sort in decreasing itemoff order */
418 return ((itemIdSort) itemidp2)->itemoff -
419 ((itemIdSort) itemidp1)->itemoff;
423 * PageRepairFragmentation
425 * Frees fragmented space on a page.
426 * It doesn't remove unused line pointers! Please don't change this.
428 * This routine is usable for heap pages only, but see PageIndexMultiDelete.
430 * As a side effect, the page's PD_HAS_FREE_LINES hint bit is updated.
433 PageRepairFragmentation(Page page)
435 Offset pd_lower = ((PageHeader) page)->pd_lower;
436 Offset pd_upper = ((PageHeader) page)->pd_upper;
437 Offset pd_special = ((PageHeader) page)->pd_special;
447 * It's worth the trouble to be more paranoid here than in most places,
448 * because we are about to reshuffle data in (what is usually) a shared
449 * disk buffer. If we aren't careful then corrupted pointers, lengths,
450 * etc could cause us to clobber adjacent disk buffers, spreading the data
451 * loss further. So, check everything.
453 if (pd_lower < SizeOfPageHeaderData ||
454 pd_lower > pd_upper ||
455 pd_upper > pd_special ||
456 pd_special > BLCKSZ ||
457 pd_special != MAXALIGN(pd_special))
459 (errcode(ERRCODE_DATA_CORRUPTED),
460 errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
461 pd_lower, pd_upper, pd_special)));
463 nline = PageGetMaxOffsetNumber(page);
464 nunused = nstorage = 0;
465 for (i = FirstOffsetNumber; i <= nline; i++)
467 lp = PageGetItemId(page, i);
468 if (ItemIdIsUsed(lp))
470 if (ItemIdHasStorage(lp))
475 /* Unused entries should have lp_len = 0, but make sure */
483 /* Page is completely empty, so just reset it quickly */
484 ((PageHeader) page)->pd_upper = pd_special;
488 /* Need to compact the page the hard way */
489 itemIdSortData itemidbase[MaxHeapTuplesPerPage];
490 itemIdSort itemidptr = itemidbase;
493 for (i = 0; i < nline; i++)
495 lp = PageGetItemId(page, i + 1);
496 if (ItemIdHasStorage(lp))
498 itemidptr->offsetindex = i;
499 itemidptr->itemoff = ItemIdGetOffset(lp);
500 if (itemidptr->itemoff < (int) pd_upper ||
501 itemidptr->itemoff >= (int) pd_special)
503 (errcode(ERRCODE_DATA_CORRUPTED),
504 errmsg("corrupted item pointer: %u",
505 itemidptr->itemoff)));
506 itemidptr->alignedlen = MAXALIGN(ItemIdGetLength(lp));
507 totallen += itemidptr->alignedlen;
512 if (totallen > (Size) (pd_special - pd_lower))
514 (errcode(ERRCODE_DATA_CORRUPTED),
515 errmsg("corrupted item lengths: total %u, available space %u",
516 (unsigned int) totallen, pd_special - pd_lower)));
518 /* sort itemIdSortData array into decreasing itemoff order */
519 qsort((char *) itemidbase, nstorage, sizeof(itemIdSortData),
522 /* compactify page */
525 for (i = 0, itemidptr = itemidbase; i < nstorage; i++, itemidptr++)
527 lp = PageGetItemId(page, itemidptr->offsetindex + 1);
528 upper -= itemidptr->alignedlen;
529 memmove((char *) page + upper,
530 (char *) page + itemidptr->itemoff,
531 itemidptr->alignedlen);
535 ((PageHeader) page)->pd_upper = upper;
538 /* Set hint bit for PageAddItem */
540 PageSetHasFreeLinePointers(page);
542 PageClearHasFreeLinePointers(page);
547 * Returns the size of the free (allocatable) space on a page,
548 * reduced by the space needed for a new line pointer.
550 * Note: this should usually only be used on index pages. Use
551 * PageGetHeapFreeSpace on heap pages.
554 PageGetFreeSpace(Page page)
559 * Use signed arithmetic here so that we behave sensibly if pd_lower >
562 space = (int) ((PageHeader) page)->pd_upper -
563 (int) ((PageHeader) page)->pd_lower;
565 if (space < (int) sizeof(ItemIdData))
567 space -= sizeof(ItemIdData);
573 * PageGetExactFreeSpace
574 * Returns the size of the free (allocatable) space on a page,
575 * without any consideration for adding/removing line pointers.
578 PageGetExactFreeSpace(Page page)
583 * Use signed arithmetic here so that we behave sensibly if pd_lower >
586 space = (int) ((PageHeader) page)->pd_upper -
587 (int) ((PageHeader) page)->pd_lower;
597 * PageGetHeapFreeSpace
598 * Returns the size of the free (allocatable) space on a page,
599 * reduced by the space needed for a new line pointer.
601 * The difference between this and PageGetFreeSpace is that this will return
602 * zero if there are already MaxHeapTuplesPerPage line pointers in the page
603 * and none are free. We use this to enforce that no more than
604 * MaxHeapTuplesPerPage line pointers are created on a heap page. (Although
605 * no more tuples than that could fit anyway, in the presence of redirected
606 * or dead line pointers it'd be possible to have too many line pointers.
607 * To avoid breaking code that assumes MaxHeapTuplesPerPage is a hard limit
608 * on the number of line pointers, we make this extra check.)
611 PageGetHeapFreeSpace(Page page)
615 space = PageGetFreeSpace(page);
622 * Are there already MaxHeapTuplesPerPage line pointers in the page?
624 nline = PageGetMaxOffsetNumber(page);
625 if (nline >= MaxHeapTuplesPerPage)
627 if (PageHasFreeLinePointers((PageHeader) page))
630 * Since this is just a hint, we must confirm that there is
631 * indeed a free line pointer
633 for (offnum = FirstOffsetNumber; offnum <= nline; offnum = OffsetNumberNext(offnum))
635 ItemId lp = PageGetItemId(page, offnum);
637 if (!ItemIdIsUsed(lp))
644 * The hint is wrong, but we can't clear it here since we
645 * don't have the ability to mark the page dirty.
653 * Although the hint might be wrong, PageAddItem will believe
654 * it anyway, so we must believe it too.
665 * PageIndexTupleDelete
667 * This routine does the work of removing a tuple from an index page.
669 * Unlike heap pages, we compact out the line pointer for the removed tuple.
672 PageIndexTupleDelete(Page page, OffsetNumber offnum)
674 PageHeader phdr = (PageHeader) page;
684 * As with PageRepairFragmentation, paranoia seems justified.
686 if (phdr->pd_lower < SizeOfPageHeaderData ||
687 phdr->pd_lower > phdr->pd_upper ||
688 phdr->pd_upper > phdr->pd_special ||
689 phdr->pd_special > BLCKSZ)
691 (errcode(ERRCODE_DATA_CORRUPTED),
692 errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
693 phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
695 nline = PageGetMaxOffsetNumber(page);
696 if ((int) offnum <= 0 || (int) offnum > nline)
697 elog(ERROR, "invalid index offnum: %u", offnum);
699 /* change offset number to offset index */
702 tup = PageGetItemId(page, offnum);
703 Assert(ItemIdHasStorage(tup));
704 size = ItemIdGetLength(tup);
705 offset = ItemIdGetOffset(tup);
707 if (offset < phdr->pd_upper || (offset + size) > phdr->pd_special ||
708 offset != MAXALIGN(offset) || size != MAXALIGN(size))
710 (errcode(ERRCODE_DATA_CORRUPTED),
711 errmsg("corrupted item pointer: offset = %u, size = %u",
712 offset, (unsigned int) size)));
715 * First, we want to get rid of the pd_linp entry for the index tuple. We
716 * copy all subsequent linp's back one slot in the array. We don't use
717 * PageGetItemId, because we are manipulating the _array_, not individual
720 nbytes = phdr->pd_lower -
721 ((char *) &phdr->pd_linp[offidx + 1] - (char *) phdr);
724 memmove((char *) &(phdr->pd_linp[offidx]),
725 (char *) &(phdr->pd_linp[offidx + 1]),
729 * Now move everything between the old upper bound (beginning of tuple
730 * space) and the beginning of the deleted tuple forward, so that space in
731 * the middle of the page is left free. If we've just deleted the tuple
732 * at the beginning of tuple space, then there's no need to do the copy
733 * (and bcopy on some architectures SEGV's if asked to move zero bytes).
736 /* beginning of tuple space */
737 addr = (char *) page + phdr->pd_upper;
739 if (offset > phdr->pd_upper)
740 memmove(addr + size, addr, (int) (offset - phdr->pd_upper));
742 /* adjust free space boundary pointers */
743 phdr->pd_upper += size;
744 phdr->pd_lower -= sizeof(ItemIdData);
747 * Finally, we need to adjust the linp entries that remain.
749 * Anything that used to be before the deleted tuple's data was moved
750 * forward by the size of the deleted tuple.
752 if (!PageIsEmpty(page))
756 nline--; /* there's one less than when we started */
757 for (i = 1; i <= nline; i++)
759 ItemId ii = PageGetItemId(phdr, i);
761 Assert(ItemIdHasStorage(ii));
762 if (ItemIdGetOffset(ii) <= offset)
770 * PageIndexMultiDelete
772 * This routine handles the case of deleting multiple tuples from an
773 * index page at once. It is considerably faster than a loop around
774 * PageIndexTupleDelete ... however, the caller *must* supply the array
775 * of item numbers to be deleted in item number order!
778 PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
780 PageHeader phdr = (PageHeader) page;
781 Offset pd_lower = phdr->pd_lower;
782 Offset pd_upper = phdr->pd_upper;
783 Offset pd_special = phdr->pd_special;
784 itemIdSortData itemidbase[MaxIndexTuplesPerPage];
785 itemIdSort itemidptr;
797 Assert(nitems < MaxIndexTuplesPerPage);
800 * If there aren't very many items to delete, then retail
801 * PageIndexTupleDelete is the best way. Delete the items in reverse
802 * order so we don't have to think about adjusting item numbers for
803 * previous deletions.
805 * TODO: tune the magic number here
809 while (--nitems >= 0)
810 PageIndexTupleDelete(page, itemnos[nitems]);
815 * As with PageRepairFragmentation, paranoia seems justified.
817 if (pd_lower < SizeOfPageHeaderData ||
818 pd_lower > pd_upper ||
819 pd_upper > pd_special ||
820 pd_special > BLCKSZ ||
821 pd_special != MAXALIGN(pd_special))
823 (errcode(ERRCODE_DATA_CORRUPTED),
824 errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
825 pd_lower, pd_upper, pd_special)));
828 * Scan the item pointer array and build a list of just the ones we are
829 * going to keep. Notice we do not modify the page yet, since we are
830 * still validity-checking.
832 nline = PageGetMaxOffsetNumber(page);
833 itemidptr = itemidbase;
837 for (offnum = FirstOffsetNumber; offnum <= nline; offnum = OffsetNumberNext(offnum))
839 lp = PageGetItemId(page, offnum);
840 Assert(ItemIdHasStorage(lp));
841 size = ItemIdGetLength(lp);
842 offset = ItemIdGetOffset(lp);
843 if (offset < pd_upper ||
844 (offset + size) > pd_special ||
845 offset != MAXALIGN(offset))
847 (errcode(ERRCODE_DATA_CORRUPTED),
848 errmsg("corrupted item pointer: offset = %u, size = %u",
849 offset, (unsigned int) size)));
851 if (nextitm < nitems && offnum == itemnos[nextitm])
853 /* skip item to be deleted */
858 itemidptr->offsetindex = nused; /* where it will go */
859 itemidptr->itemoff = offset;
860 itemidptr->olditemid = *lp;
861 itemidptr->alignedlen = MAXALIGN(size);
862 totallen += itemidptr->alignedlen;
868 /* this will catch invalid or out-of-order itemnos[] */
869 if (nextitm != nitems)
870 elog(ERROR, "incorrect index offsets supplied");
872 if (totallen > (Size) (pd_special - pd_lower))
874 (errcode(ERRCODE_DATA_CORRUPTED),
875 errmsg("corrupted item lengths: total %u, available space %u",
876 (unsigned int) totallen, pd_special - pd_lower)));
878 /* sort itemIdSortData array into decreasing itemoff order */
879 qsort((char *) itemidbase, nused, sizeof(itemIdSortData),
882 /* compactify page and install new itemids */
885 for (i = 0, itemidptr = itemidbase; i < nused; i++, itemidptr++)
887 lp = PageGetItemId(page, itemidptr->offsetindex + 1);
888 upper -= itemidptr->alignedlen;
889 memmove((char *) page + upper,
890 (char *) page + itemidptr->itemoff,
891 itemidptr->alignedlen);
892 *lp = itemidptr->olditemid;
896 phdr->pd_lower = SizeOfPageHeaderData + nused * sizeof(ItemIdData);
897 phdr->pd_upper = upper;
901 * PageIndexDeleteNoCompact
902 * Delete the given items for an index page, and defragment the resulting
903 * free space, but do not compact the item pointers array.
905 * itemnos is the array of tuples to delete; nitems is its size. maxIdxTuples
906 * is the maximum number of tuples that can exist in a page.
908 * Unused items at the end of the array are removed.
910 * This is used for index AMs that require that existing TIDs of live tuples
914 PageIndexDeleteNoCompact(Page page, OffsetNumber *itemnos, int nitems)
916 PageHeader phdr = (PageHeader) page;
917 LocationIndex pd_lower = phdr->pd_lower;
918 LocationIndex pd_upper = phdr->pd_upper;
919 LocationIndex pd_special = phdr->pd_special;
926 * As with PageRepairFragmentation, paranoia seems justified.
928 if (pd_lower < SizeOfPageHeaderData ||
929 pd_lower > pd_upper ||
930 pd_upper > pd_special ||
931 pd_special > BLCKSZ ||
932 pd_special != MAXALIGN(pd_special))
934 (errcode(ERRCODE_DATA_CORRUPTED),
935 errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
936 pd_lower, pd_upper, pd_special)));
939 * Scan the existing item pointer array and mark as unused those that are
940 * in our kill-list; make sure any non-interesting ones are marked unused
943 nline = PageGetMaxOffsetNumber(page);
946 for (offnum = FirstOffsetNumber; offnum <= nline; offnum = OffsetNumberNext(offnum))
952 lp = PageGetItemId(page, offnum);
954 itemlen = ItemIdGetLength(lp);
955 offset = ItemIdGetOffset(lp);
957 if (ItemIdIsUsed(lp))
959 if (offset < pd_upper ||
960 (offset + itemlen) > pd_special ||
961 offset != MAXALIGN(offset))
963 (errcode(ERRCODE_DATA_CORRUPTED),
964 errmsg("corrupted item pointer: offset = %u, length = %u",
965 offset, (unsigned int) itemlen)));
967 if (nextitm < nitems && offnum == itemnos[nextitm])
969 /* this one is on our list to delete, so mark it unused */
973 else if (ItemIdHasStorage(lp))
975 /* This one's live -- must do the compaction dance */
980 /* get rid of this one too */
986 /* this will catch invalid or out-of-order itemnos[] */
987 if (nextitm != nitems)
988 elog(ERROR, "incorrect index offsets supplied");
992 /* Page is completely empty, so just reset it quickly */
993 phdr->pd_lower = SizeOfPageHeaderData;
994 phdr->pd_upper = pd_special;
998 /* There are live items: need to compact the page the hard way */
999 itemIdSortData itemidbase[MaxOffsetNumber];
1000 itemIdSort itemidptr;
1006 * Scan the page taking note of each item that we need to preserve.
1007 * This includes both live items (those that contain data) and
1008 * interspersed unused ones. It's critical to preserve these unused
1009 * items, because otherwise the offset numbers for later live items
1010 * would change, which is not acceptable. Unused items might get used
1011 * again later; that is fine.
1013 itemidptr = itemidbase;
1015 for (i = 0; i < nline; i++, itemidptr++)
1019 itemidptr->offsetindex = i;
1021 lp = PageGetItemId(page, i + 1);
1022 if (ItemIdHasStorage(lp))
1024 itemidptr->itemoff = ItemIdGetOffset(lp);
1025 itemidptr->alignedlen = MAXALIGN(ItemIdGetLength(lp));
1026 totallen += itemidptr->alignedlen;
1030 itemidptr->itemoff = 0;
1031 itemidptr->alignedlen = 0;
1034 /* By here, there are exactly nline elements in itemidbase array */
1036 if (totallen > (Size) (pd_special - pd_lower))
1038 (errcode(ERRCODE_DATA_CORRUPTED),
1039 errmsg("corrupted item lengths: total %u, available space %u",
1040 (unsigned int) totallen, pd_special - pd_lower)));
1042 /* sort itemIdSortData array into decreasing itemoff order */
1043 qsort((char *) itemidbase, nline, sizeof(itemIdSortData),
1047 * Defragment the data areas of each tuple, being careful to preserve
1048 * each item's position in the linp array.
1051 PageClearHasFreeLinePointers(page);
1052 for (i = 0, itemidptr = itemidbase; i < nline; i++, itemidptr++)
1056 lp = PageGetItemId(page, itemidptr->offsetindex + 1);
1057 if (itemidptr->alignedlen == 0)
1059 PageSetHasFreeLinePointers(page);
1060 ItemIdSetUnused(lp);
1063 upper -= itemidptr->alignedlen;
1064 memmove((char *) page + upper,
1065 (char *) page + itemidptr->itemoff,
1066 itemidptr->alignedlen);
1068 /* lp_flags and lp_len remain the same as originally */
1071 /* Set the new page limits */
1072 phdr->pd_upper = upper;
1073 phdr->pd_lower = SizeOfPageHeaderData + i * sizeof(ItemIdData);
1078 * Set checksum for a page in shared buffers.
1080 * If checksums are disabled, or if the page is not initialized, just return
1081 * the input. Otherwise, we must make a copy of the page before calculating
1082 * the checksum, to prevent concurrent modifications (e.g. setting hint bits)
1083 * from making the final checksum invalid. It doesn't matter if we include or
1084 * exclude hints during the copy, as long as we write a valid page and
1085 * associated checksum.
1087 * Returns a pointer to the block-sized data that needs to be written. Uses
1088 * statically-allocated memory, so the caller must immediately write the
1089 * returned page and not refer to it again.
1092 PageSetChecksumCopy(Page page, BlockNumber blkno)
1094 static char *pageCopy = NULL;
1096 /* If we don't need a checksum, just return the passed-in data */
1097 if (PageIsNew(page) || !DataChecksumsEnabled())
1098 return (char *) page;
1101 * We allocate the copy space once and use it over on each subsequent
1102 * call. The point of palloc'ing here, rather than having a static char
1103 * array, is first to ensure adequate alignment for the checksumming code
1104 * and second to avoid wasting space in processes that never call this.
1106 if (pageCopy == NULL)
1107 pageCopy = MemoryContextAlloc(TopMemoryContext, BLCKSZ);
1109 memcpy(pageCopy, (char *) page, BLCKSZ);
1110 ((PageHeader) pageCopy)->pd_checksum = pg_checksum_page(pageCopy, blkno);
1115 * Set checksum for a page in private memory.
1117 * This must only be used when we know that no other process can be modifying
1121 PageSetChecksumInplace(Page page, BlockNumber blkno)
1123 /* If we don't need a checksum, just return */
1124 if (PageIsNew(page) || !DataChecksumsEnabled())
1127 ((PageHeader) page)->pd_checksum = pg_checksum_page((char *) page, blkno);