]> granicus.if.org Git - postgresql/blob - src/backend/access/gin/ginxlog.c
Reduce pinning and buffer content locking for btree scans.
[postgresql] / src / backend / access / gin / ginxlog.c
1 /*-------------------------------------------------------------------------
2  *
3  * ginxlog.c
4  *        WAL replay logic for inverted index.
5  *
6  *
7  * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * IDENTIFICATION
11  *                       src/backend/access/gin/ginxlog.c
12  *-------------------------------------------------------------------------
13  */
14 #include "postgres.h"
15
16 #include "access/gin_private.h"
17 #include "access/xlogutils.h"
18 #include "utils/memutils.h"
19
20 static MemoryContext opCtx;             /* working memory for operations */
21
22 static void
23 ginRedoClearIncompleteSplit(XLogReaderState *record, uint8 block_id)
24 {
25         XLogRecPtr      lsn = record->EndRecPtr;
26         Buffer          buffer;
27         Page            page;
28
29         if (XLogReadBufferForRedo(record, block_id, &buffer) == BLK_NEEDS_REDO)
30         {
31                 page = (Page) BufferGetPage(buffer);
32                 GinPageGetOpaque(page)->flags &= ~GIN_INCOMPLETE_SPLIT;
33
34                 PageSetLSN(page, lsn);
35                 MarkBufferDirty(buffer);
36         }
37         if (BufferIsValid(buffer))
38                 UnlockReleaseBuffer(buffer);
39 }
40
41 static void
42 ginRedoCreateIndex(XLogReaderState *record)
43 {
44         XLogRecPtr      lsn = record->EndRecPtr;
45         Buffer          RootBuffer,
46                                 MetaBuffer;
47         Page            page;
48
49         MetaBuffer = XLogInitBufferForRedo(record, 0);
50         Assert(BufferGetBlockNumber(MetaBuffer) == GIN_METAPAGE_BLKNO);
51         page = (Page) BufferGetPage(MetaBuffer);
52
53         GinInitMetabuffer(MetaBuffer);
54
55         PageSetLSN(page, lsn);
56         MarkBufferDirty(MetaBuffer);
57
58         RootBuffer = XLogInitBufferForRedo(record, 1);
59         Assert(BufferGetBlockNumber(RootBuffer) == GIN_ROOT_BLKNO);
60         page = (Page) BufferGetPage(RootBuffer);
61
62         GinInitBuffer(RootBuffer, GIN_LEAF);
63
64         PageSetLSN(page, lsn);
65         MarkBufferDirty(RootBuffer);
66
67         UnlockReleaseBuffer(RootBuffer);
68         UnlockReleaseBuffer(MetaBuffer);
69 }
70
71 static void
72 ginRedoCreatePTree(XLogReaderState *record)
73 {
74         XLogRecPtr      lsn = record->EndRecPtr;
75         ginxlogCreatePostingTree *data = (ginxlogCreatePostingTree *) XLogRecGetData(record);
76         char       *ptr;
77         Buffer          buffer;
78         Page            page;
79
80         buffer = XLogInitBufferForRedo(record, 0);
81         page = (Page) BufferGetPage(buffer);
82
83         GinInitBuffer(buffer, GIN_DATA | GIN_LEAF | GIN_COMPRESSED);
84
85         ptr = XLogRecGetData(record) + sizeof(ginxlogCreatePostingTree);
86
87         /* Place page data */
88         memcpy(GinDataLeafPageGetPostingList(page), ptr, data->size);
89
90         GinDataPageSetDataSize(page, data->size);
91
92         PageSetLSN(page, lsn);
93
94         MarkBufferDirty(buffer);
95         UnlockReleaseBuffer(buffer);
96 }
97
98 static void
99 ginRedoInsertEntry(Buffer buffer, bool isLeaf, BlockNumber rightblkno, void *rdata)
100 {
101         Page            page = BufferGetPage(buffer);
102         ginxlogInsertEntry *data = (ginxlogInsertEntry *) rdata;
103         OffsetNumber offset = data->offset;
104         IndexTuple      itup;
105
106         if (rightblkno != InvalidBlockNumber)
107         {
108                 /* update link to right page after split */
109                 Assert(!GinPageIsLeaf(page));
110                 Assert(offset >= FirstOffsetNumber && offset <= PageGetMaxOffsetNumber(page));
111                 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offset));
112                 GinSetDownlink(itup, rightblkno);
113         }
114
115         if (data->isDelete)
116         {
117                 Assert(GinPageIsLeaf(page));
118                 Assert(offset >= FirstOffsetNumber && offset <= PageGetMaxOffsetNumber(page));
119                 PageIndexTupleDelete(page, offset);
120         }
121
122         itup = &data->tuple;
123
124         if (PageAddItem(page, (Item) itup, IndexTupleSize(itup), offset, false, false) == InvalidOffsetNumber)
125         {
126                 RelFileNode node;
127                 ForkNumber      forknum;
128                 BlockNumber blknum;
129
130                 BufferGetTag(buffer, &node, &forknum, &blknum);
131                 elog(ERROR, "failed to add item to index page in %u/%u/%u",
132                          node.spcNode, node.dbNode, node.relNode);
133         }
134 }
135
136 static void
137 ginRedoRecompress(Page page, ginxlogRecompressDataLeaf *data)
138 {
139         int                     actionno;
140         int                     segno;
141         GinPostingList *oldseg;
142         Pointer         segmentend;
143         char       *walbuf;
144         int                     totalsize;
145
146         /*
147          * If the page is in pre-9.4 format, convert to new format first.
148          */
149         if (!GinPageIsCompressed(page))
150         {
151                 ItemPointer uncompressed = (ItemPointer) GinDataPageGetData(page);
152                 int                     nuncompressed = GinPageGetOpaque(page)->maxoff;
153                 int                     npacked;
154                 GinPostingList *plist;
155
156                 plist = ginCompressPostingList(uncompressed, nuncompressed,
157                                                                            BLCKSZ, &npacked);
158                 Assert(npacked == nuncompressed);
159
160                 totalsize = SizeOfGinPostingList(plist);
161
162                 memcpy(GinDataLeafPageGetPostingList(page), plist, totalsize);
163                 GinDataPageSetDataSize(page, totalsize);
164                 GinPageSetCompressed(page);
165                 GinPageGetOpaque(page)->maxoff = InvalidOffsetNumber;
166         }
167
168         oldseg = GinDataLeafPageGetPostingList(page);
169         segmentend = (Pointer) oldseg + GinDataLeafPageGetPostingListSize(page);
170         segno = 0;
171
172         walbuf = ((char *) data) + sizeof(ginxlogRecompressDataLeaf);
173         for (actionno = 0; actionno < data->nactions; actionno++)
174         {
175                 uint8           a_segno = *((uint8 *) (walbuf++));
176                 uint8           a_action = *((uint8 *) (walbuf++));
177                 GinPostingList *newseg = NULL;
178                 int                     newsegsize = 0;
179                 ItemPointerData *items = NULL;
180                 uint16          nitems = 0;
181                 ItemPointerData *olditems;
182                 int                     nolditems;
183                 ItemPointerData *newitems;
184                 int                     nnewitems;
185                 int                     segsize;
186                 Pointer         segptr;
187                 int                     szleft;
188
189                 /* Extract all the information we need from the WAL record */
190                 if (a_action == GIN_SEGMENT_INSERT ||
191                         a_action == GIN_SEGMENT_REPLACE)
192                 {
193                         newseg = (GinPostingList *) walbuf;
194                         newsegsize = SizeOfGinPostingList(newseg);
195                         walbuf += SHORTALIGN(newsegsize);
196                 }
197
198                 if (a_action == GIN_SEGMENT_ADDITEMS)
199                 {
200                         memcpy(&nitems, walbuf, sizeof(uint16));
201                         walbuf += sizeof(uint16);
202                         items = (ItemPointerData *) walbuf;
203                         walbuf += nitems * sizeof(ItemPointerData);
204                 }
205
206                 /* Skip to the segment that this action concerns */
207                 Assert(segno <= a_segno);
208                 while (segno < a_segno)
209                 {
210                         oldseg = GinNextPostingListSegment(oldseg);
211                         segno++;
212                 }
213
214                 /*
215                  * ADDITEMS action is handled like REPLACE, but the new segment to
216                  * replace the old one is reconstructed using the old segment from
217                  * disk and the new items from the WAL record.
218                  */
219                 if (a_action == GIN_SEGMENT_ADDITEMS)
220                 {
221                         int                     npacked;
222
223                         olditems = ginPostingListDecode(oldseg, &nolditems);
224
225                         newitems = ginMergeItemPointers(items, nitems,
226                                                                                         olditems, nolditems,
227                                                                                         &nnewitems);
228                         Assert(nnewitems == nolditems + nitems);
229
230                         newseg = ginCompressPostingList(newitems, nnewitems,
231                                                                                         BLCKSZ, &npacked);
232                         Assert(npacked == nnewitems);
233
234                         newsegsize = SizeOfGinPostingList(newseg);
235                         a_action = GIN_SEGMENT_REPLACE;
236                 }
237
238                 segptr = (Pointer) oldseg;
239                 if (segptr != segmentend)
240                         segsize = SizeOfGinPostingList(oldseg);
241                 else
242                 {
243                         /*
244                          * Positioned after the last existing segment. Only INSERTs
245                          * expected here.
246                          */
247                         Assert(a_action == GIN_SEGMENT_INSERT);
248                         segsize = 0;
249                 }
250                 szleft = segmentend - segptr;
251
252                 switch (a_action)
253                 {
254                         case GIN_SEGMENT_DELETE:
255                                 memmove(segptr, segptr + segsize, szleft - segsize);
256                                 segmentend -= segsize;
257
258                                 segno++;
259                                 break;
260
261                         case GIN_SEGMENT_INSERT:
262                                 /* make room for the new segment */
263                                 memmove(segptr + newsegsize, segptr, szleft);
264                                 /* copy the new segment in place */
265                                 memcpy(segptr, newseg, newsegsize);
266                                 segmentend += newsegsize;
267                                 segptr += newsegsize;
268                                 break;
269
270                         case GIN_SEGMENT_REPLACE:
271                                 /* shift the segments that follow */
272                                 memmove(segptr + newsegsize,
273                                                 segptr + segsize,
274                                                 szleft - segsize);
275                                 /* copy the replacement segment in place */
276                                 memcpy(segptr, newseg, newsegsize);
277                                 segmentend -= segsize;
278                                 segmentend += newsegsize;
279                                 segptr += newsegsize;
280                                 segno++;
281                                 break;
282
283                         default:
284                                 elog(ERROR, "unexpected GIN leaf action: %u", a_action);
285                 }
286                 oldseg = (GinPostingList *) segptr;
287         }
288
289         totalsize = segmentend - (Pointer) GinDataLeafPageGetPostingList(page);
290         GinDataPageSetDataSize(page, totalsize);
291 }
292
293 static void
294 ginRedoInsertData(Buffer buffer, bool isLeaf, BlockNumber rightblkno, void *rdata)
295 {
296         Page            page = BufferGetPage(buffer);
297
298         if (isLeaf)
299         {
300                 ginxlogRecompressDataLeaf *data = (ginxlogRecompressDataLeaf *) rdata;
301
302                 Assert(GinPageIsLeaf(page));
303
304                 ginRedoRecompress(page, data);
305         }
306         else
307         {
308                 ginxlogInsertDataInternal *data = (ginxlogInsertDataInternal *) rdata;
309                 PostingItem *oldpitem;
310
311                 Assert(!GinPageIsLeaf(page));
312
313                 /* update link to right page after split */
314                 oldpitem = GinDataPageGetPostingItem(page, data->offset);
315                 PostingItemSetBlockNumber(oldpitem, rightblkno);
316
317                 GinDataPageAddPostingItem(page, &data->newitem, data->offset);
318         }
319 }
320
321 static void
322 ginRedoInsert(XLogReaderState *record)
323 {
324         XLogRecPtr      lsn = record->EndRecPtr;
325         ginxlogInsert *data = (ginxlogInsert *) XLogRecGetData(record);
326         Buffer          buffer;
327 #ifdef NOT_USED
328         BlockNumber leftChildBlkno = InvalidBlockNumber;
329 #endif
330         BlockNumber rightChildBlkno = InvalidBlockNumber;
331         bool            isLeaf = (data->flags & GIN_INSERT_ISLEAF) != 0;
332
333         /*
334          * First clear incomplete-split flag on child page if this finishes a
335          * split.
336          */
337         if (!isLeaf)
338         {
339                 char       *payload = XLogRecGetData(record) + sizeof(ginxlogInsert);
340
341 #ifdef NOT_USED
342                 leftChildBlkno = BlockIdGetBlockNumber((BlockId) payload);
343 #endif
344                 payload += sizeof(BlockIdData);
345                 rightChildBlkno = BlockIdGetBlockNumber((BlockId) payload);
346                 payload += sizeof(BlockIdData);
347
348                 ginRedoClearIncompleteSplit(record, 1);
349         }
350
351         if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
352         {
353                 Page            page = BufferGetPage(buffer);
354                 Size            len;
355                 char       *payload = XLogRecGetBlockData(record, 0, &len);
356
357                 /* How to insert the payload is tree-type specific */
358                 if (data->flags & GIN_INSERT_ISDATA)
359                 {
360                         Assert(GinPageIsData(page));
361                         ginRedoInsertData(buffer, isLeaf, rightChildBlkno, payload);
362                 }
363                 else
364                 {
365                         Assert(!GinPageIsData(page));
366                         ginRedoInsertEntry(buffer, isLeaf, rightChildBlkno, payload);
367                 }
368
369                 PageSetLSN(page, lsn);
370                 MarkBufferDirty(buffer);
371         }
372         if (BufferIsValid(buffer))
373                 UnlockReleaseBuffer(buffer);
374 }
375
376 static void
377 ginRedoSplit(XLogReaderState *record)
378 {
379         ginxlogSplit *data = (ginxlogSplit *) XLogRecGetData(record);
380         Buffer          lbuffer,
381                                 rbuffer,
382                                 rootbuf;
383         bool            isLeaf = (data->flags & GIN_INSERT_ISLEAF) != 0;
384         bool            isRoot = (data->flags & GIN_SPLIT_ROOT) != 0;
385
386         /*
387          * First clear incomplete-split flag on child page if this finishes a
388          * split
389          */
390         if (!isLeaf)
391                 ginRedoClearIncompleteSplit(record, 3);
392
393         if (XLogReadBufferForRedo(record, 0, &lbuffer) != BLK_RESTORED)
394                 elog(ERROR, "GIN split record did not contain a full-page image of left page");
395
396         if (XLogReadBufferForRedo(record, 1, &rbuffer) != BLK_RESTORED)
397                 elog(ERROR, "GIN split record did not contain a full-page image of right page");
398
399         if (isRoot)
400         {
401                 if (XLogReadBufferForRedo(record, 2, &rootbuf) != BLK_RESTORED)
402                         elog(ERROR, "GIN split record did not contain a full-page image of root page");
403                 UnlockReleaseBuffer(rootbuf);
404         }
405
406         UnlockReleaseBuffer(rbuffer);
407         UnlockReleaseBuffer(lbuffer);
408 }
409
410 /*
411  * VACUUM_PAGE record contains simply a full image of the page, similar to
412  * a XLOG_FPI record.
413  */
414 static void
415 ginRedoVacuumPage(XLogReaderState *record)
416 {
417         Buffer          buffer;
418
419         if (XLogReadBufferForRedo(record, 0, &buffer) != BLK_RESTORED)
420         {
421                 elog(ERROR, "replay of gin entry tree page vacuum did not restore the page");
422         }
423         UnlockReleaseBuffer(buffer);
424 }
425
426 static void
427 ginRedoVacuumDataLeafPage(XLogReaderState *record)
428 {
429         XLogRecPtr      lsn = record->EndRecPtr;
430         Buffer          buffer;
431
432         if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
433         {
434                 Page            page = BufferGetPage(buffer);
435                 Size            len;
436                 ginxlogVacuumDataLeafPage *xlrec;
437
438                 xlrec = (ginxlogVacuumDataLeafPage *) XLogRecGetBlockData(record, 0, &len);
439
440                 Assert(GinPageIsLeaf(page));
441                 Assert(GinPageIsData(page));
442
443                 ginRedoRecompress(page, &xlrec->data);
444                 PageSetLSN(page, lsn);
445                 MarkBufferDirty(buffer);
446         }
447         if (BufferIsValid(buffer))
448                 UnlockReleaseBuffer(buffer);
449 }
450
451 static void
452 ginRedoDeletePage(XLogReaderState *record)
453 {
454         XLogRecPtr      lsn = record->EndRecPtr;
455         ginxlogDeletePage *data = (ginxlogDeletePage *) XLogRecGetData(record);
456         Buffer          dbuffer;
457         Buffer          pbuffer;
458         Buffer          lbuffer;
459         Page            page;
460
461         if (XLogReadBufferForRedo(record, 0, &dbuffer) == BLK_NEEDS_REDO)
462         {
463                 page = BufferGetPage(dbuffer);
464                 Assert(GinPageIsData(page));
465                 GinPageGetOpaque(page)->flags = GIN_DELETED;
466                 PageSetLSN(page, lsn);
467                 MarkBufferDirty(dbuffer);
468         }
469
470         if (XLogReadBufferForRedo(record, 1, &pbuffer) == BLK_NEEDS_REDO)
471         {
472                 page = BufferGetPage(pbuffer);
473                 Assert(GinPageIsData(page));
474                 Assert(!GinPageIsLeaf(page));
475                 GinPageDeletePostingItem(page, data->parentOffset);
476                 PageSetLSN(page, lsn);
477                 MarkBufferDirty(pbuffer);
478         }
479
480         if (XLogReadBufferForRedo(record, 2, &lbuffer) == BLK_NEEDS_REDO)
481         {
482                 page = BufferGetPage(lbuffer);
483                 Assert(GinPageIsData(page));
484                 GinPageGetOpaque(page)->rightlink = data->rightLink;
485                 PageSetLSN(page, lsn);
486                 MarkBufferDirty(lbuffer);
487         }
488
489         if (BufferIsValid(lbuffer))
490                 UnlockReleaseBuffer(lbuffer);
491         if (BufferIsValid(pbuffer))
492                 UnlockReleaseBuffer(pbuffer);
493         if (BufferIsValid(dbuffer))
494                 UnlockReleaseBuffer(dbuffer);
495 }
496
497 static void
498 ginRedoUpdateMetapage(XLogReaderState *record)
499 {
500         XLogRecPtr      lsn = record->EndRecPtr;
501         ginxlogUpdateMeta *data = (ginxlogUpdateMeta *) XLogRecGetData(record);
502         Buffer          metabuffer;
503         Page            metapage;
504         Buffer          buffer;
505
506         /*
507          * Restore the metapage. This is essentially the same as a full-page
508          * image, so restore the metapage unconditionally without looking at the
509          * LSN, to avoid torn page hazards.
510          */
511         metabuffer = XLogInitBufferForRedo(record, 0);
512         Assert(BufferGetBlockNumber(metabuffer) == GIN_METAPAGE_BLKNO);
513         metapage = BufferGetPage(metabuffer);
514
515         memcpy(GinPageGetMeta(metapage), &data->metadata, sizeof(GinMetaPageData));
516         PageSetLSN(metapage, lsn);
517         MarkBufferDirty(metabuffer);
518
519         if (data->ntuples > 0)
520         {
521                 /*
522                  * insert into tail page
523                  */
524                 if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO)
525                 {
526                         Page            page = BufferGetPage(buffer);
527                         OffsetNumber off;
528                         int                     i;
529                         Size            tupsize;
530                         char       *payload;
531                         IndexTuple      tuples;
532                         Size            totaltupsize;
533
534                         payload = XLogRecGetBlockData(record, 1, &totaltupsize);
535                         tuples = (IndexTuple) payload;
536
537                         if (PageIsEmpty(page))
538                                 off = FirstOffsetNumber;
539                         else
540                                 off = OffsetNumberNext(PageGetMaxOffsetNumber(page));
541
542                         for (i = 0; i < data->ntuples; i++)
543                         {
544                                 tupsize = IndexTupleSize(tuples);
545
546                                 if (PageAddItem(page, (Item) tuples, tupsize, off,
547                                                                 false, false) == InvalidOffsetNumber)
548                                         elog(ERROR, "failed to add item to index page");
549
550                                 tuples = (IndexTuple) (((char *) tuples) + tupsize);
551
552                                 off++;
553                         }
554                         Assert(payload + totaltupsize == (char *) tuples);
555
556                         /*
557                          * Increase counter of heap tuples
558                          */
559                         GinPageGetOpaque(page)->maxoff++;
560
561                         PageSetLSN(page, lsn);
562                         MarkBufferDirty(buffer);
563                 }
564                 if (BufferIsValid(buffer))
565                         UnlockReleaseBuffer(buffer);
566         }
567         else if (data->prevTail != InvalidBlockNumber)
568         {
569                 /*
570                  * New tail
571                  */
572                 if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO)
573                 {
574                         Page            page = BufferGetPage(buffer);
575
576                         GinPageGetOpaque(page)->rightlink = data->newRightlink;
577
578                         PageSetLSN(page, lsn);
579                         MarkBufferDirty(buffer);
580                 }
581                 if (BufferIsValid(buffer))
582                         UnlockReleaseBuffer(buffer);
583         }
584
585         UnlockReleaseBuffer(metabuffer);
586 }
587
588 static void
589 ginRedoInsertListPage(XLogReaderState *record)
590 {
591         XLogRecPtr      lsn = record->EndRecPtr;
592         ginxlogInsertListPage *data = (ginxlogInsertListPage *) XLogRecGetData(record);
593         Buffer          buffer;
594         Page            page;
595         OffsetNumber l,
596                                 off = FirstOffsetNumber;
597         int                     i,
598                                 tupsize;
599         char       *payload;
600         IndexTuple      tuples;
601         Size            totaltupsize;
602
603         /* We always re-initialize the page. */
604         buffer = XLogInitBufferForRedo(record, 0);
605         page = BufferGetPage(buffer);
606
607         GinInitBuffer(buffer, GIN_LIST);
608         GinPageGetOpaque(page)->rightlink = data->rightlink;
609         if (data->rightlink == InvalidBlockNumber)
610         {
611                 /* tail of sublist */
612                 GinPageSetFullRow(page);
613                 GinPageGetOpaque(page)->maxoff = 1;
614         }
615         else
616         {
617                 GinPageGetOpaque(page)->maxoff = 0;
618         }
619
620         payload = XLogRecGetBlockData(record, 0, &totaltupsize);
621
622         tuples = (IndexTuple) payload;
623         for (i = 0; i < data->ntuples; i++)
624         {
625                 tupsize = IndexTupleSize(tuples);
626
627                 l = PageAddItem(page, (Item) tuples, tupsize, off, false, false);
628
629                 if (l == InvalidOffsetNumber)
630                         elog(ERROR, "failed to add item to index page");
631
632                 tuples = (IndexTuple) (((char *) tuples) + tupsize);
633                 off++;
634         }
635         Assert((char *) tuples == payload + totaltupsize);
636
637         PageSetLSN(page, lsn);
638         MarkBufferDirty(buffer);
639
640         UnlockReleaseBuffer(buffer);
641 }
642
643 static void
644 ginRedoDeleteListPages(XLogReaderState *record)
645 {
646         XLogRecPtr      lsn = record->EndRecPtr;
647         ginxlogDeleteListPages *data = (ginxlogDeleteListPages *) XLogRecGetData(record);
648         Buffer          metabuffer;
649         Page            metapage;
650         int                     i;
651
652         metabuffer = XLogInitBufferForRedo(record, 0);
653         Assert(BufferGetBlockNumber(metabuffer) == GIN_METAPAGE_BLKNO);
654         metapage = BufferGetPage(metabuffer);
655
656         GinInitPage(metapage, GIN_META, BufferGetPageSize(metabuffer));
657
658         memcpy(GinPageGetMeta(metapage), &data->metadata, sizeof(GinMetaPageData));
659         PageSetLSN(metapage, lsn);
660         MarkBufferDirty(metabuffer);
661
662         /*
663          * In normal operation, shiftList() takes exclusive lock on all the
664          * pages-to-be-deleted simultaneously.  During replay, however, it should
665          * be all right to lock them one at a time.  This is dependent on the fact
666          * that we are deleting pages from the head of the list, and that readers
667          * share-lock the next page before releasing the one they are on. So we
668          * cannot get past a reader that is on, or due to visit, any page we are
669          * going to delete.  New incoming readers will block behind our metapage
670          * lock and then see a fully updated page list.
671          *
672          * No full-page images are taken of the deleted pages. Instead, they are
673          * re-initialized as empty, deleted pages. Their right-links don't need to
674          * be preserved, because no new readers can see the pages, as explained
675          * above.
676          */
677         for (i = 0; i < data->ndeleted; i++)
678         {
679                 Buffer          buffer;
680                 Page            page;
681
682                 buffer = XLogInitBufferForRedo(record, i + 1);
683                 page = BufferGetPage(buffer);
684                 GinInitBuffer(buffer, GIN_DELETED);
685
686                 PageSetLSN(page, lsn);
687                 MarkBufferDirty(buffer);
688
689                 UnlockReleaseBuffer(buffer);
690         }
691         UnlockReleaseBuffer(metabuffer);
692 }
693
694 void
695 gin_redo(XLogReaderState *record)
696 {
697         uint8           info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
698         MemoryContext oldCtx;
699
700         /*
701          * GIN indexes do not require any conflict processing. NB: If we ever
702          * implement a similar optimization as we have in b-tree, and remove
703          * killed tuples outside VACUUM, we'll need to handle that here.
704          */
705
706         oldCtx = MemoryContextSwitchTo(opCtx);
707         switch (info)
708         {
709                 case XLOG_GIN_CREATE_INDEX:
710                         ginRedoCreateIndex(record);
711                         break;
712                 case XLOG_GIN_CREATE_PTREE:
713                         ginRedoCreatePTree(record);
714                         break;
715                 case XLOG_GIN_INSERT:
716                         ginRedoInsert(record);
717                         break;
718                 case XLOG_GIN_SPLIT:
719                         ginRedoSplit(record);
720                         break;
721                 case XLOG_GIN_VACUUM_PAGE:
722                         ginRedoVacuumPage(record);
723                         break;
724                 case XLOG_GIN_VACUUM_DATA_LEAF_PAGE:
725                         ginRedoVacuumDataLeafPage(record);
726                         break;
727                 case XLOG_GIN_DELETE_PAGE:
728                         ginRedoDeletePage(record);
729                         break;
730                 case XLOG_GIN_UPDATE_META_PAGE:
731                         ginRedoUpdateMetapage(record);
732                         break;
733                 case XLOG_GIN_INSERT_LISTPAGE:
734                         ginRedoInsertListPage(record);
735                         break;
736                 case XLOG_GIN_DELETE_LISTPAGE:
737                         ginRedoDeleteListPages(record);
738                         break;
739                 default:
740                         elog(PANIC, "gin_redo: unknown op code %u", info);
741         }
742         MemoryContextSwitchTo(oldCtx);
743         MemoryContextReset(opCtx);
744 }
745
746 void
747 gin_xlog_startup(void)
748 {
749         opCtx = AllocSetContextCreate(CurrentMemoryContext,
750                                                                   "GIN recovery temporary context",
751                                                                   ALLOCSET_DEFAULT_MINSIZE,
752                                                                   ALLOCSET_DEFAULT_INITSIZE,
753                                                                   ALLOCSET_DEFAULT_MAXSIZE);
754 }
755
756 void
757 gin_xlog_cleanup(void)
758 {
759         MemoryContextDelete(opCtx);
760 }