1 /*-------------------------------------------------------------------------
4 * WAL replay logic for inverted index.
7 * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
11 * src/backend/access/gin/ginxlog.c
12 *-------------------------------------------------------------------------
16 #include "access/gin_private.h"
17 #include "access/xlogutils.h"
18 #include "utils/memutils.h"
20 static MemoryContext opCtx; /* working memory for operations */
23 ginRedoClearIncompleteSplit(XLogRecPtr lsn, RelFileNode node, BlockNumber blkno)
28 buffer = XLogReadBuffer(node, blkno, false);
29 if (!BufferIsValid(buffer))
30 return; /* page was deleted, nothing to do */
31 page = (Page) BufferGetPage(buffer);
33 if (lsn > PageGetLSN(page))
35 GinPageGetOpaque(page)->flags &= ~GIN_INCOMPLETE_SPLIT;
37 PageSetLSN(page, lsn);
38 MarkBufferDirty(buffer);
41 UnlockReleaseBuffer(buffer);
45 ginRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record)
47 RelFileNode *node = (RelFileNode *) XLogRecGetData(record);
52 /* Backup blocks are not used in create_index records */
53 Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
55 MetaBuffer = XLogReadBuffer(*node, GIN_METAPAGE_BLKNO, true);
56 Assert(BufferIsValid(MetaBuffer));
57 page = (Page) BufferGetPage(MetaBuffer);
59 GinInitMetabuffer(MetaBuffer);
61 PageSetLSN(page, lsn);
62 MarkBufferDirty(MetaBuffer);
64 RootBuffer = XLogReadBuffer(*node, GIN_ROOT_BLKNO, true);
65 Assert(BufferIsValid(RootBuffer));
66 page = (Page) BufferGetPage(RootBuffer);
68 GinInitBuffer(RootBuffer, GIN_LEAF);
70 PageSetLSN(page, lsn);
71 MarkBufferDirty(RootBuffer);
73 UnlockReleaseBuffer(RootBuffer);
74 UnlockReleaseBuffer(MetaBuffer);
78 ginRedoCreatePTree(XLogRecPtr lsn, XLogRecord *record)
80 ginxlogCreatePostingTree *data = (ginxlogCreatePostingTree *) XLogRecGetData(record);
81 ItemPointerData *items = (ItemPointerData *) (XLogRecGetData(record) + sizeof(ginxlogCreatePostingTree));
85 /* Backup blocks are not used in create_ptree records */
86 Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
88 buffer = XLogReadBuffer(data->node, data->blkno, true);
89 Assert(BufferIsValid(buffer));
90 page = (Page) BufferGetPage(buffer);
92 GinInitBuffer(buffer, GIN_DATA | GIN_LEAF);
93 memcpy(GinDataPageGetData(page), items, sizeof(ItemPointerData) * data->nitem);
94 GinPageGetOpaque(page)->maxoff = data->nitem;
96 PageSetLSN(page, lsn);
98 MarkBufferDirty(buffer);
99 UnlockReleaseBuffer(buffer);
103 ginRedoInsertEntry(Buffer buffer, OffsetNumber offset, BlockNumber rightblkno,
106 Page page = BufferGetPage(buffer);
107 ginxlogInsertEntry *data = (ginxlogInsertEntry *) rdata;
110 if (rightblkno != InvalidBlockNumber)
112 /* update link to right page after split */
113 Assert(!GinPageIsLeaf(page));
114 Assert(offset >= FirstOffsetNumber && offset <= PageGetMaxOffsetNumber(page));
115 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offset));
116 GinSetDownlink(itup, rightblkno);
121 Assert(GinPageIsLeaf(page));
122 Assert(offset >= FirstOffsetNumber && offset <= PageGetMaxOffsetNumber(page));
123 PageIndexTupleDelete(page, offset);
128 if (PageAddItem(page, (Item) itup, IndexTupleSize(itup), offset, false, false) == InvalidOffsetNumber)
134 BufferGetTag(buffer, &node, &forknum, &blknum);
135 elog(ERROR, "failed to add item to index page in %u/%u/%u",
136 node.spcNode, node.dbNode, node.relNode);
141 ginRedoInsertData(Buffer buffer, OffsetNumber offset, BlockNumber rightblkno,
144 Page page = BufferGetPage(buffer);
146 if (GinPageIsLeaf(page))
148 ginxlogInsertDataLeaf *data = (ginxlogInsertDataLeaf *) rdata;
149 ItemPointerData *items = data->items;
152 for (i = 0; i < data->nitem; i++)
153 GinDataPageAddItemPointer(page, &items[i], offset + i);
157 PostingItem *pitem = (PostingItem *) rdata;
158 PostingItem *oldpitem;
160 /* update link to right page after split */
161 oldpitem = GinDataPageGetPostingItem(page, offset);
162 PostingItemSetBlockNumber(oldpitem, rightblkno);
164 GinDataPageAddPostingItem(page, pitem, offset);
169 ginRedoInsert(XLogRecPtr lsn, XLogRecord *record)
171 ginxlogInsert *data = (ginxlogInsert *) XLogRecGetData(record);
175 BlockNumber leftChildBlkno = InvalidBlockNumber;
176 BlockNumber rightChildBlkno = InvalidBlockNumber;
177 bool isLeaf = (data->flags & GIN_INSERT_ISLEAF) != 0;
179 payload = XLogRecGetData(record) + sizeof(ginxlogInsert);
182 * First clear incomplete-split flag on child page if this finishes
187 leftChildBlkno = BlockIdGetBlockNumber((BlockId) payload);
188 payload += sizeof(BlockIdData);
189 rightChildBlkno = BlockIdGetBlockNumber((BlockId) payload);
190 payload += sizeof(BlockIdData);
192 if (record->xl_info & XLR_BKP_BLOCK(0))
193 (void) RestoreBackupBlock(lsn, record, 0, false, false);
195 ginRedoClearIncompleteSplit(lsn, data->node, leftChildBlkno);
198 /* If we have a full-page image, restore it and we're done */
199 if (record->xl_info & XLR_BKP_BLOCK(isLeaf ? 0 : 1))
201 (void) RestoreBackupBlock(lsn, record, isLeaf ? 0 : 1, false, false);
205 buffer = XLogReadBuffer(data->node, data->blkno, false);
206 if (!BufferIsValid(buffer))
207 return; /* page was deleted, nothing to do */
208 page = (Page) BufferGetPage(buffer);
210 if (lsn > PageGetLSN(page))
212 /* How to insert the payload is tree-type specific */
213 if (data->flags & GIN_INSERT_ISDATA)
215 Assert(GinPageIsData(page));
216 ginRedoInsertData(buffer, data->offset, rightChildBlkno, payload);
220 Assert(!GinPageIsData(page));
221 ginRedoInsertEntry(buffer, data->offset, rightChildBlkno, payload);
224 PageSetLSN(page, lsn);
225 MarkBufferDirty(buffer);
228 UnlockReleaseBuffer(buffer);
232 ginRedoSplitEntry(Page lpage, Page rpage, void *rdata)
234 ginxlogSplitEntry *data = (ginxlogSplitEntry *) rdata;
235 IndexTuple itup = (IndexTuple) ((char *) rdata + sizeof(ginxlogSplitEntry));
238 for (i = 0; i < data->separator; i++)
240 if (PageAddItem(lpage, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber)
241 elog(ERROR, "failed to add item to gin index page");
242 itup = (IndexTuple) (((char *) itup) + MAXALIGN(IndexTupleSize(itup)));
245 for (i = data->separator; i < data->nitem; i++)
247 if (PageAddItem(rpage, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber)
248 elog(ERROR, "failed to add item to gin index page");
249 itup = (IndexTuple) (((char *) itup) + MAXALIGN(IndexTupleSize(itup)));
254 ginRedoSplitData(Page lpage, Page rpage, void *rdata)
256 ginxlogSplitData *data = (ginxlogSplitData *) rdata;
257 bool isleaf = GinPageIsLeaf(lpage);
258 char *ptr = (char *) rdata + sizeof(ginxlogSplitData);
264 ItemPointer items = (ItemPointer) ptr;
265 for (i = 0; i < data->separator; i++)
266 GinDataPageAddItemPointer(lpage, &items[i], InvalidOffsetNumber);
267 for (i = data->separator; i < data->nitem; i++)
268 GinDataPageAddItemPointer(rpage, &items[i], InvalidOffsetNumber);
272 PostingItem *items = (PostingItem *) ptr;
273 for (i = 0; i < data->separator; i++)
274 GinDataPageAddPostingItem(lpage, &items[i], InvalidOffsetNumber);
275 for (i = data->separator; i < data->nitem; i++)
276 GinDataPageAddPostingItem(rpage, &items[i], InvalidOffsetNumber);
279 /* set up right key */
280 bound = GinDataPageGetRightBound(lpage);
282 *bound = *GinDataPageGetItemPointer(lpage, GinPageGetOpaque(lpage)->maxoff);
284 *bound = GinDataPageGetPostingItem(lpage, GinPageGetOpaque(lpage)->maxoff)->key;
286 bound = GinDataPageGetRightBound(rpage);
287 *bound = data->rightbound;
291 ginRedoSplit(XLogRecPtr lsn, XLogRecord *record)
293 ginxlogSplit *data = (ginxlogSplit *) XLogRecGetData(record);
300 bool isLeaf = (data->flags & GIN_INSERT_ISLEAF) != 0;
301 bool isData = (data->flags & GIN_INSERT_ISDATA) != 0;
302 bool isRoot = (data->flags & GIN_SPLIT_ROOT) != 0;
304 payload = XLogRecGetData(record) + sizeof(ginxlogSplit);
307 * First clear incomplete-split flag on child page if this finishes
312 if (record->xl_info & XLR_BKP_BLOCK(0))
313 (void) RestoreBackupBlock(lsn, record, 0, false, false);
315 ginRedoClearIncompleteSplit(lsn, data->node, data->leftChildBlkno);
324 lbuffer = XLogReadBuffer(data->node, data->lblkno, true);
325 Assert(BufferIsValid(lbuffer));
326 lpage = (Page) BufferGetPage(lbuffer);
327 GinInitBuffer(lbuffer, flags);
329 rbuffer = XLogReadBuffer(data->node, data->rblkno, true);
330 Assert(BufferIsValid(rbuffer));
331 rpage = (Page) BufferGetPage(rbuffer);
332 GinInitBuffer(rbuffer, flags);
334 GinPageGetOpaque(lpage)->rightlink = BufferGetBlockNumber(rbuffer);
335 GinPageGetOpaque(rpage)->rightlink = isRoot ? InvalidBlockNumber : data->rrlink;
337 /* Do the tree-type specific portion to restore the page contents */
339 ginRedoSplitData(lpage, rpage, payload);
341 ginRedoSplitEntry(lpage, rpage, payload);
343 PageSetLSN(rpage, lsn);
344 MarkBufferDirty(rbuffer);
346 PageSetLSN(lpage, lsn);
347 MarkBufferDirty(lbuffer);
351 BlockNumber rootBlkno = data->rrlink;
352 Buffer rootBuf = XLogReadBuffer(data->node, rootBlkno, true);
353 Page rootPage = BufferGetPage(rootBuf);
355 GinInitBuffer(rootBuf, flags & ~GIN_LEAF);
359 Assert(rootBlkno != GIN_ROOT_BLKNO);
360 ginDataFillRoot(NULL, BufferGetPage(rootBuf),
361 BufferGetBlockNumber(lbuffer),
362 BufferGetPage(lbuffer),
363 BufferGetBlockNumber(rbuffer),
364 BufferGetPage(rbuffer));
368 Assert(rootBlkno == GIN_ROOT_BLKNO);
369 ginEntryFillRoot(NULL, BufferGetPage(rootBuf),
370 BufferGetBlockNumber(lbuffer),
371 BufferGetPage(lbuffer),
372 BufferGetBlockNumber(rbuffer),
373 BufferGetPage(rbuffer));
376 PageSetLSN(rootPage, lsn);
378 MarkBufferDirty(rootBuf);
379 UnlockReleaseBuffer(rootBuf);
382 UnlockReleaseBuffer(rbuffer);
383 UnlockReleaseBuffer(lbuffer);
387 ginRedoVacuumPage(XLogRecPtr lsn, XLogRecord *record)
389 ginxlogVacuumPage *data = (ginxlogVacuumPage *) XLogRecGetData(record);
393 /* If we have a full-page image, restore it and we're done */
394 if (record->xl_info & XLR_BKP_BLOCK(0))
396 (void) RestoreBackupBlock(lsn, record, 0, false, false);
400 buffer = XLogReadBuffer(data->node, data->blkno, false);
401 if (!BufferIsValid(buffer))
403 page = (Page) BufferGetPage(buffer);
405 if (lsn > PageGetLSN(page))
407 if (GinPageIsData(page))
409 memcpy(GinDataPageGetData(page),
410 XLogRecGetData(record) + sizeof(ginxlogVacuumPage),
411 data->nitem * GinSizeOfDataPageItem(page));
412 GinPageGetOpaque(page)->maxoff = data->nitem;
418 IndexTuple itup = (IndexTuple) (XLogRecGetData(record) + sizeof(ginxlogVacuumPage));
420 tod = (OffsetNumber *) palloc(sizeof(OffsetNumber) * PageGetMaxOffsetNumber(page));
421 for (i = FirstOffsetNumber; i <= PageGetMaxOffsetNumber(page); i++)
424 PageIndexMultiDelete(page, tod, PageGetMaxOffsetNumber(page));
426 for (i = 0; i < data->nitem; i++)
428 if (PageAddItem(page, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber)
429 elog(ERROR, "failed to add item to index page in %u/%u/%u",
430 data->node.spcNode, data->node.dbNode, data->node.relNode);
431 itup = (IndexTuple) (((char *) itup) + MAXALIGN(IndexTupleSize(itup)));
435 PageSetLSN(page, lsn);
436 MarkBufferDirty(buffer);
439 UnlockReleaseBuffer(buffer);
443 ginRedoDeletePage(XLogRecPtr lsn, XLogRecord *record)
445 ginxlogDeletePage *data = (ginxlogDeletePage *) XLogRecGetData(record);
451 if (record->xl_info & XLR_BKP_BLOCK(0))
452 dbuffer = RestoreBackupBlock(lsn, record, 0, false, true);
455 dbuffer = XLogReadBuffer(data->node, data->blkno, false);
456 if (BufferIsValid(dbuffer))
458 page = BufferGetPage(dbuffer);
459 if (lsn > PageGetLSN(page))
461 Assert(GinPageIsData(page));
462 GinPageGetOpaque(page)->flags = GIN_DELETED;
463 PageSetLSN(page, lsn);
464 MarkBufferDirty(dbuffer);
469 if (record->xl_info & XLR_BKP_BLOCK(1))
470 pbuffer = RestoreBackupBlock(lsn, record, 1, false, true);
473 pbuffer = XLogReadBuffer(data->node, data->parentBlkno, false);
474 if (BufferIsValid(pbuffer))
476 page = BufferGetPage(pbuffer);
477 if (lsn > PageGetLSN(page))
479 Assert(GinPageIsData(page));
480 Assert(!GinPageIsLeaf(page));
481 GinPageDeletePostingItem(page, data->parentOffset);
482 PageSetLSN(page, lsn);
483 MarkBufferDirty(pbuffer);
488 if (record->xl_info & XLR_BKP_BLOCK(2))
489 (void) RestoreBackupBlock(lsn, record, 2, false, false);
490 else if (data->leftBlkno != InvalidBlockNumber)
492 lbuffer = XLogReadBuffer(data->node, data->leftBlkno, false);
493 if (BufferIsValid(lbuffer))
495 page = BufferGetPage(lbuffer);
496 if (lsn > PageGetLSN(page))
498 Assert(GinPageIsData(page));
499 GinPageGetOpaque(page)->rightlink = data->rightLink;
500 PageSetLSN(page, lsn);
501 MarkBufferDirty(lbuffer);
503 UnlockReleaseBuffer(lbuffer);
507 if (BufferIsValid(pbuffer))
508 UnlockReleaseBuffer(pbuffer);
509 if (BufferIsValid(dbuffer))
510 UnlockReleaseBuffer(dbuffer);
514 ginRedoUpdateMetapage(XLogRecPtr lsn, XLogRecord *record)
516 ginxlogUpdateMeta *data = (ginxlogUpdateMeta *) XLogRecGetData(record);
521 metabuffer = XLogReadBuffer(data->node, GIN_METAPAGE_BLKNO, false);
522 if (!BufferIsValid(metabuffer))
523 return; /* assume index was deleted, nothing to do */
524 metapage = BufferGetPage(metabuffer);
526 if (lsn > PageGetLSN(metapage))
528 memcpy(GinPageGetMeta(metapage), &data->metadata, sizeof(GinMetaPageData));
529 PageSetLSN(metapage, lsn);
530 MarkBufferDirty(metabuffer);
533 if (data->ntuples > 0)
536 * insert into tail page
538 if (record->xl_info & XLR_BKP_BLOCK(0))
539 (void) RestoreBackupBlock(lsn, record, 0, false, false);
542 buffer = XLogReadBuffer(data->node, data->metadata.tail, false);
543 if (BufferIsValid(buffer))
545 Page page = BufferGetPage(buffer);
547 if (lsn > PageGetLSN(page))
550 off = (PageIsEmpty(page)) ? FirstOffsetNumber :
551 OffsetNumberNext(PageGetMaxOffsetNumber(page));
554 IndexTuple tuples = (IndexTuple) (XLogRecGetData(record) + sizeof(ginxlogUpdateMeta));
556 for (i = 0; i < data->ntuples; i++)
558 tupsize = IndexTupleSize(tuples);
560 l = PageAddItem(page, (Item) tuples, tupsize, off, false, false);
562 if (l == InvalidOffsetNumber)
563 elog(ERROR, "failed to add item to index page");
565 tuples = (IndexTuple) (((char *) tuples) + tupsize);
571 * Increase counter of heap tuples
573 GinPageGetOpaque(page)->maxoff++;
575 PageSetLSN(page, lsn);
576 MarkBufferDirty(buffer);
578 UnlockReleaseBuffer(buffer);
582 else if (data->prevTail != InvalidBlockNumber)
587 if (record->xl_info & XLR_BKP_BLOCK(0))
588 (void) RestoreBackupBlock(lsn, record, 0, false, false);
591 buffer = XLogReadBuffer(data->node, data->prevTail, false);
592 if (BufferIsValid(buffer))
594 Page page = BufferGetPage(buffer);
596 if (lsn > PageGetLSN(page))
598 GinPageGetOpaque(page)->rightlink = data->newRightlink;
600 PageSetLSN(page, lsn);
601 MarkBufferDirty(buffer);
603 UnlockReleaseBuffer(buffer);
608 UnlockReleaseBuffer(metabuffer);
612 ginRedoInsertListPage(XLogRecPtr lsn, XLogRecord *record)
614 ginxlogInsertListPage *data = (ginxlogInsertListPage *) XLogRecGetData(record);
618 off = FirstOffsetNumber;
621 IndexTuple tuples = (IndexTuple) (XLogRecGetData(record) + sizeof(ginxlogInsertListPage));
623 /* If we have a full-page image, restore it and we're done */
624 if (record->xl_info & XLR_BKP_BLOCK(0))
626 (void) RestoreBackupBlock(lsn, record, 0, false, false);
630 buffer = XLogReadBuffer(data->node, data->blkno, true);
631 Assert(BufferIsValid(buffer));
632 page = BufferGetPage(buffer);
634 GinInitBuffer(buffer, GIN_LIST);
635 GinPageGetOpaque(page)->rightlink = data->rightlink;
636 if (data->rightlink == InvalidBlockNumber)
638 /* tail of sublist */
639 GinPageSetFullRow(page);
640 GinPageGetOpaque(page)->maxoff = 1;
644 GinPageGetOpaque(page)->maxoff = 0;
647 for (i = 0; i < data->ntuples; i++)
649 tupsize = IndexTupleSize(tuples);
651 l = PageAddItem(page, (Item) tuples, tupsize, off, false, false);
653 if (l == InvalidOffsetNumber)
654 elog(ERROR, "failed to add item to index page");
656 tuples = (IndexTuple) (((char *) tuples) + tupsize);
659 PageSetLSN(page, lsn);
660 MarkBufferDirty(buffer);
662 UnlockReleaseBuffer(buffer);
666 ginRedoDeleteListPages(XLogRecPtr lsn, XLogRecord *record)
668 ginxlogDeleteListPages *data = (ginxlogDeleteListPages *) XLogRecGetData(record);
673 /* Backup blocks are not used in delete_listpage records */
674 Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
676 metabuffer = XLogReadBuffer(data->node, GIN_METAPAGE_BLKNO, false);
677 if (!BufferIsValid(metabuffer))
678 return; /* assume index was deleted, nothing to do */
679 metapage = BufferGetPage(metabuffer);
681 if (lsn > PageGetLSN(metapage))
683 memcpy(GinPageGetMeta(metapage), &data->metadata, sizeof(GinMetaPageData));
684 PageSetLSN(metapage, lsn);
685 MarkBufferDirty(metabuffer);
689 * In normal operation, shiftList() takes exclusive lock on all the
690 * pages-to-be-deleted simultaneously. During replay, however, it should
691 * be all right to lock them one at a time. This is dependent on the fact
692 * that we are deleting pages from the head of the list, and that readers
693 * share-lock the next page before releasing the one they are on. So we
694 * cannot get past a reader that is on, or due to visit, any page we are
695 * going to delete. New incoming readers will block behind our metapage
696 * lock and then see a fully updated page list.
698 for (i = 0; i < data->ndeleted; i++)
700 Buffer buffer = XLogReadBuffer(data->node, data->toDelete[i], false);
702 if (BufferIsValid(buffer))
704 Page page = BufferGetPage(buffer);
706 if (lsn > PageGetLSN(page))
708 GinPageGetOpaque(page)->flags = GIN_DELETED;
710 PageSetLSN(page, lsn);
711 MarkBufferDirty(buffer);
714 UnlockReleaseBuffer(buffer);
717 UnlockReleaseBuffer(metabuffer);
721 gin_redo(XLogRecPtr lsn, XLogRecord *record)
723 uint8 info = record->xl_info & ~XLR_INFO_MASK;
724 MemoryContext oldCtx;
727 * GIN indexes do not require any conflict processing. NB: If we ever
728 * implement a similar optimization as we have in b-tree, and remove
729 * killed tuples outside VACUUM, we'll need to handle that here.
732 oldCtx = MemoryContextSwitchTo(opCtx);
735 case XLOG_GIN_CREATE_INDEX:
736 ginRedoCreateIndex(lsn, record);
738 case XLOG_GIN_CREATE_PTREE:
739 ginRedoCreatePTree(lsn, record);
741 case XLOG_GIN_INSERT:
742 ginRedoInsert(lsn, record);
745 ginRedoSplit(lsn, record);
747 case XLOG_GIN_VACUUM_PAGE:
748 ginRedoVacuumPage(lsn, record);
750 case XLOG_GIN_DELETE_PAGE:
751 ginRedoDeletePage(lsn, record);
753 case XLOG_GIN_UPDATE_META_PAGE:
754 ginRedoUpdateMetapage(lsn, record);
756 case XLOG_GIN_INSERT_LISTPAGE:
757 ginRedoInsertListPage(lsn, record);
759 case XLOG_GIN_DELETE_LISTPAGE:
760 ginRedoDeleteListPages(lsn, record);
763 elog(PANIC, "gin_redo: unknown op code %u", info);
765 MemoryContextSwitchTo(oldCtx);
766 MemoryContextReset(opCtx);
770 gin_xlog_startup(void)
772 opCtx = AllocSetContextCreate(CurrentMemoryContext,
773 "GIN recovery temporary context",
774 ALLOCSET_DEFAULT_MINSIZE,
775 ALLOCSET_DEFAULT_INITSIZE,
776 ALLOCSET_DEFAULT_MAXSIZE);
780 gin_xlog_cleanup(void)
782 MemoryContextDelete(opCtx);