1 /*-------------------------------------------------------------------------
4 * delete & vacuum routines for the postgres GIN
7 * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
11 * src/backend/access/gin/ginvacuum.c
12 *-------------------------------------------------------------------------
17 #include "access/gin_private.h"
18 #include "access/xloginsert.h"
19 #include "commands/vacuum.h"
20 #include "miscadmin.h"
21 #include "postmaster/autovacuum.h"
22 #include "storage/indexfsm.h"
23 #include "storage/lmgr.h"
24 #include "utils/memutils.h"
29 IndexBulkDeleteResult *result;
30 IndexBulkDeleteCallback callback;
33 BufferAccessStrategy strategy;
38 * Vacuums an uncompressed posting list. The size of the must can be specified
39 * in number of items (nitems).
41 * If none of the items need to be removed, returns NULL. Otherwise returns
42 * a new palloc'd array with the remaining items. The number of remaining
43 * items is returned in *nremaining.
46 ginVacuumItemPointers(GinVacuumState *gvs, ItemPointerData *items,
47 int nitem, int *nremaining)
51 ItemPointer tmpitems = NULL;
54 * Iterate over TIDs array
56 for (i = 0; i < nitem; i++)
58 if (gvs->callback(items + i, gvs->callback_state))
60 gvs->result->tuples_removed += 1;
64 * First TID to be deleted: allocate memory to hold the
67 tmpitems = palloc(sizeof(ItemPointerData) * nitem);
68 memcpy(tmpitems, items, sizeof(ItemPointerData) * i);
73 gvs->result->num_index_tuples += 1;
75 tmpitems[remaining] = items[i];
80 *nremaining = remaining;
85 * Create a WAL record for vacuuming entry tree leaf page.
88 xlogVacuumPage(Relation index, Buffer buffer)
90 Page page = BufferGetPage(buffer);
93 /* This is only used for entry tree leaf pages. */
94 Assert(!GinPageIsData(page));
95 Assert(GinPageIsLeaf(page));
97 if (!RelationNeedsWAL(index))
101 * Always create a full image, we don't track the changes on the page at
102 * any more fine-grained level. This could obviously be improved...
105 XLogRegisterBuffer(0, buffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
107 recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_PAGE);
108 PageSetLSN(page, recptr);
112 ginVacuumPostingTreeLeaves(GinVacuumState *gvs, BlockNumber blkno, bool isRoot, Buffer *rootBuffer)
116 bool hasVoidPage = FALSE;
117 MemoryContext oldCxt;
119 buffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, blkno,
120 RBM_NORMAL, gvs->strategy);
121 page = BufferGetPage(buffer);
124 * We should be sure that we don't concurrent with inserts, insert process
125 * never release root page until end (but it can unlock it and lock
126 * again). New scan can't start but previously started ones work
130 LockBufferForCleanup(buffer);
132 LockBuffer(buffer, GIN_EXCLUSIVE);
134 Assert(GinPageIsData(page));
136 if (GinPageIsLeaf(page))
138 oldCxt = MemoryContextSwitchTo(gvs->tmpCxt);
139 ginVacuumPostingTreeLeaf(gvs->index, buffer, gvs);
140 MemoryContextSwitchTo(oldCxt);
141 MemoryContextReset(gvs->tmpCxt);
143 /* if root is a leaf page, we don't desire further processing */
144 if (!isRoot && !hasVoidPage && GinDataLeafPageIsEmpty(page))
150 bool isChildHasVoid = FALSE;
152 for (i = FirstOffsetNumber; i <= GinPageGetOpaque(page)->maxoff; i++)
154 PostingItem *pitem = GinDataPageGetPostingItem(page, i);
156 if (ginVacuumPostingTreeLeaves(gvs, PostingItemGetBlockNumber(pitem), FALSE, NULL))
157 isChildHasVoid = TRUE;
165 * if we have root and there are empty pages in tree, then we don't
166 * release lock to go further processing and guarantee that tree is unused
168 if (!(isRoot && hasVoidPage))
170 UnlockReleaseBuffer(buffer);
175 *rootBuffer = buffer;
182 * Delete a posting tree page.
185 ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkno,
186 BlockNumber parentBlkno, OffsetNumber myoff, bool isParentRoot)
193 BlockNumber rightlink;
196 * Lock the pages in the same order as an insertion would, to avoid
197 * deadlocks: left, then right, then parent.
199 lBuffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, leftBlkno,
200 RBM_NORMAL, gvs->strategy);
201 dBuffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, deleteBlkno,
202 RBM_NORMAL, gvs->strategy);
203 pBuffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, parentBlkno,
204 RBM_NORMAL, gvs->strategy);
206 LockBuffer(lBuffer, GIN_EXCLUSIVE);
207 LockBuffer(dBuffer, GIN_EXCLUSIVE);
208 if (!isParentRoot) /* parent is already locked by
209 * LockBufferForCleanup() */
210 LockBuffer(pBuffer, GIN_EXCLUSIVE);
212 START_CRIT_SECTION();
214 /* Unlink the page by changing left sibling's rightlink */
215 page = BufferGetPage(dBuffer);
216 rightlink = GinPageGetOpaque(page)->rightlink;
218 page = BufferGetPage(lBuffer);
219 GinPageGetOpaque(page)->rightlink = rightlink;
221 /* Delete downlink from parent */
222 parentPage = BufferGetPage(pBuffer);
223 #ifdef USE_ASSERT_CHECKING
226 PostingItem *tod = GinDataPageGetPostingItem(parentPage, myoff);
228 Assert(PostingItemGetBlockNumber(tod) == deleteBlkno);
231 GinPageDeletePostingItem(parentPage, myoff);
233 page = BufferGetPage(dBuffer);
236 * we shouldn't change rightlink field to save workability of running
239 GinPageGetOpaque(page)->flags = GIN_DELETED;
241 MarkBufferDirty(pBuffer);
242 MarkBufferDirty(lBuffer);
243 MarkBufferDirty(dBuffer);
245 if (RelationNeedsWAL(gvs->index))
248 ginxlogDeletePage data;
251 * We can't pass REGBUF_STANDARD for the deleted page, because we
252 * didn't set pd_lower on pre-9.4 versions. The page might've been
253 * binary-upgraded from an older version, and hence not have pd_lower
254 * set correctly. Ditto for the left page, but removing the item from
255 * the parent updated its pd_lower, so we know that's OK at this
259 XLogRegisterBuffer(0, dBuffer, 0);
260 XLogRegisterBuffer(1, pBuffer, REGBUF_STANDARD);
261 XLogRegisterBuffer(2, lBuffer, 0);
263 data.parentOffset = myoff;
264 data.rightLink = GinPageGetOpaque(page)->rightlink;
266 XLogRegisterData((char *) &data, sizeof(ginxlogDeletePage));
268 recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_DELETE_PAGE);
269 PageSetLSN(page, recptr);
270 PageSetLSN(parentPage, recptr);
271 PageSetLSN(BufferGetPage(lBuffer), recptr);
275 LockBuffer(pBuffer, GIN_UNLOCK);
276 ReleaseBuffer(pBuffer);
277 UnlockReleaseBuffer(lBuffer);
278 UnlockReleaseBuffer(dBuffer);
282 gvs->result->pages_deleted++;
285 typedef struct DataPageDeleteStack
287 struct DataPageDeleteStack *child;
288 struct DataPageDeleteStack *parent;
290 BlockNumber blkno; /* current block number */
291 BlockNumber leftBlkno; /* rightest non-deleted page on left */
293 } DataPageDeleteStack;
296 * scans posting tree and deletes empty pages
299 ginScanToDelete(GinVacuumState *gvs, BlockNumber blkno, bool isRoot,
300 DataPageDeleteStack *parent, OffsetNumber myoff)
302 DataPageDeleteStack *me;
305 bool meDelete = FALSE;
316 me = (DataPageDeleteStack *) palloc0(sizeof(DataPageDeleteStack));
319 me->leftBlkno = InvalidBlockNumber;
325 buffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, blkno,
326 RBM_NORMAL, gvs->strategy);
327 page = BufferGetPage(buffer);
329 Assert(GinPageIsData(page));
331 if (!GinPageIsLeaf(page))
336 for (i = FirstOffsetNumber; i <= GinPageGetOpaque(page)->maxoff; i++)
338 PostingItem *pitem = GinDataPageGetPostingItem(page, i);
340 if (ginScanToDelete(gvs, PostingItemGetBlockNumber(pitem), FALSE, me, i))
345 if (GinPageIsLeaf(page))
346 isempty = GinDataLeafPageIsEmpty(page);
348 isempty = GinPageGetOpaque(page)->maxoff < FirstOffsetNumber;
352 /* we never delete the left- or rightmost branch */
353 if (me->leftBlkno != InvalidBlockNumber && !GinPageRightMost(page))
356 ginDeletePage(gvs, blkno, me->leftBlkno, me->parent->blkno, myoff, me->parent->isRoot);
361 ReleaseBuffer(buffer);
364 me->leftBlkno = blkno;
370 ginVacuumPostingTree(GinVacuumState *gvs, BlockNumber rootBlkno)
372 Buffer rootBuffer = InvalidBuffer;
373 DataPageDeleteStack root,
377 if (ginVacuumPostingTreeLeaves(gvs, rootBlkno, TRUE, &rootBuffer) == FALSE)
379 Assert(rootBuffer == InvalidBuffer);
383 memset(&root, 0, sizeof(DataPageDeleteStack));
384 root.leftBlkno = InvalidBlockNumber;
387 vacuum_delay_point();
389 ginScanToDelete(gvs, rootBlkno, TRUE, &root, InvalidOffsetNumber);
399 UnlockReleaseBuffer(rootBuffer);
403 * returns modified page or NULL if page isn't modified.
404 * Function works with original page until first change is occurred,
405 * then page is copied into temporary one.
408 ginVacuumEntryPage(GinVacuumState *gvs, Buffer buffer, BlockNumber *roots, uint32 *nroot)
410 Page origpage = BufferGetPage(buffer),
413 maxoff = PageGetMaxOffsetNumber(origpage);
419 for (i = FirstOffsetNumber; i <= maxoff; i++)
421 IndexTuple itup = (IndexTuple) PageGetItem(tmppage, PageGetItemId(tmppage, i));
423 if (GinIsPostingTree(itup))
426 * store posting tree's roots for further processing, we can't
427 * vacuum it just now due to risk of deadlocks with scans/inserts
429 roots[*nroot] = GinGetDownlink(itup);
432 else if (GinGetNPosting(itup) > 0)
435 ItemPointer items_orig;
436 bool free_items_orig;
439 /* Get list of item pointers from the tuple. */
440 if (GinItupIsCompressed(itup))
442 items_orig = ginPostingListDecode((GinPostingList *) GinGetPosting(itup), &nitems);
443 free_items_orig = true;
447 items_orig = (ItemPointer) GinGetPosting(itup);
448 nitems = GinGetNPosting(itup);
449 free_items_orig = false;
452 /* Remove any items from the list that need to be vacuumed. */
453 items = ginVacuumItemPointers(gvs, items_orig, nitems, &nitems);
458 /* If any item pointers were removed, recreate the tuple. */
463 GinNullCategory category;
464 GinPostingList *plist;
469 plist = ginCompressPostingList(items, nitems, GinMaxItemSize, NULL);
470 plistsize = SizeOfGinPostingList(plist);
479 * if we already created a temporary page, make changes in
482 if (tmppage == origpage)
485 * On first difference, create a temporary copy of the
486 * page and copy the tuple's posting list to it.
488 tmppage = PageGetTempPageCopy(origpage);
490 /* set itup pointer to new page */
491 itup = (IndexTuple) PageGetItem(tmppage, PageGetItemId(tmppage, i));
494 attnum = gintuple_get_attrnum(&gvs->ginstate, itup);
495 key = gintuple_get_key(&gvs->ginstate, itup, &category);
496 itup = GinFormTuple(&gvs->ginstate, attnum, key, category,
497 (char *) plist, plistsize,
501 PageIndexTupleDelete(tmppage, i);
503 if (PageAddItem(tmppage, (Item) itup, IndexTupleSize(itup), i, false, false) != i)
504 elog(ERROR, "failed to add item to index page in \"%s\"",
505 RelationGetRelationName(gvs->index));
513 return (tmppage == origpage) ? NULL : tmppage;
517 ginbulkdelete(PG_FUNCTION_ARGS)
519 IndexVacuumInfo *info = (IndexVacuumInfo *) PG_GETARG_POINTER(0);
520 IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(1);
521 IndexBulkDeleteCallback callback = (IndexBulkDeleteCallback) PG_GETARG_POINTER(2);
522 void *callback_state = (void *) PG_GETARG_POINTER(3);
523 Relation index = info->index;
524 BlockNumber blkno = GIN_ROOT_BLKNO;
527 BlockNumber rootOfPostingTree[BLCKSZ / (sizeof(IndexTupleData) + sizeof(ItemId))];
530 gvs.tmpCxt = AllocSetContextCreate(CurrentMemoryContext,
531 "Gin vacuum temporary context",
532 ALLOCSET_DEFAULT_MINSIZE,
533 ALLOCSET_DEFAULT_INITSIZE,
534 ALLOCSET_DEFAULT_MAXSIZE);
536 gvs.callback = callback;
537 gvs.callback_state = callback_state;
538 gvs.strategy = info->strategy;
539 initGinState(&gvs.ginstate, index);
541 /* first time through? */
544 /* Yes, so initialize stats to zeroes */
545 stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
546 /* and cleanup any pending inserts */
547 ginInsertCleanup(&gvs.ginstate, true, stats);
550 /* we'll re-count the tuples each time */
551 stats->num_index_tuples = 0;
554 buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno,
555 RBM_NORMAL, info->strategy);
560 Page page = BufferGetPage(buffer);
563 LockBuffer(buffer, GIN_SHARE);
565 Assert(!GinPageIsData(page));
567 if (GinPageIsLeaf(page))
569 LockBuffer(buffer, GIN_UNLOCK);
570 LockBuffer(buffer, GIN_EXCLUSIVE);
572 if (blkno == GIN_ROOT_BLKNO && !GinPageIsLeaf(page))
574 LockBuffer(buffer, GIN_UNLOCK);
575 continue; /* check it one more */
580 Assert(PageGetMaxOffsetNumber(page) >= FirstOffsetNumber);
582 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, FirstOffsetNumber));
583 blkno = GinGetDownlink(itup);
584 Assert(blkno != InvalidBlockNumber);
586 UnlockReleaseBuffer(buffer);
587 buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno,
588 RBM_NORMAL, info->strategy);
591 /* right now we found leftmost page in entry's BTree */
595 Page page = BufferGetPage(buffer);
599 Assert(!GinPageIsData(page));
601 resPage = ginVacuumEntryPage(&gvs, buffer, rootOfPostingTree, &nRoot);
603 blkno = GinPageGetOpaque(page)->rightlink;
607 START_CRIT_SECTION();
608 PageRestoreTempPage(resPage, page);
609 MarkBufferDirty(buffer);
610 xlogVacuumPage(gvs.index, buffer);
611 UnlockReleaseBuffer(buffer);
616 UnlockReleaseBuffer(buffer);
619 vacuum_delay_point();
621 for (i = 0; i < nRoot; i++)
623 ginVacuumPostingTree(&gvs, rootOfPostingTree[i]);
624 vacuum_delay_point();
627 if (blkno == InvalidBlockNumber) /* rightmost page */
630 buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno,
631 RBM_NORMAL, info->strategy);
632 LockBuffer(buffer, GIN_EXCLUSIVE);
635 MemoryContextDelete(gvs.tmpCxt);
637 PG_RETURN_POINTER(gvs.result);
641 ginvacuumcleanup(PG_FUNCTION_ARGS)
643 IndexVacuumInfo *info = (IndexVacuumInfo *) PG_GETARG_POINTER(0);
644 IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(1);
645 Relation index = info->index;
649 BlockNumber totFreePages;
651 GinStatsData idxStat;
654 * In an autovacuum analyze, we want to clean up pending insertions.
655 * Otherwise, an ANALYZE-only call is a no-op.
657 if (info->analyze_only)
659 if (IsAutoVacuumWorkerProcess())
661 initGinState(&ginstate, index);
662 ginInsertCleanup(&ginstate, true, stats);
664 PG_RETURN_POINTER(stats);
668 * Set up all-zero stats and cleanup pending inserts if ginbulkdelete
673 stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
674 initGinState(&ginstate, index);
675 ginInsertCleanup(&ginstate, true, stats);
678 memset(&idxStat, 0, sizeof(idxStat));
681 * XXX we always report the heap tuple count as the number of index
682 * entries. This is bogus if the index is partial, but it's real hard to
683 * tell how many distinct heap entries are referenced by a GIN index.
685 stats->num_index_tuples = info->num_heap_tuples;
686 stats->estimated_count = info->estimated_count;
689 * Need lock unless it's local to this backend.
691 needLock = !RELATION_IS_LOCAL(index);
694 LockRelationForExtension(index, ExclusiveLock);
695 npages = RelationGetNumberOfBlocks(index);
697 UnlockRelationForExtension(index, ExclusiveLock);
701 for (blkno = GIN_ROOT_BLKNO; blkno < npages; blkno++)
706 vacuum_delay_point();
708 buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno,
709 RBM_NORMAL, info->strategy);
710 LockBuffer(buffer, GIN_SHARE);
711 page = (Page) BufferGetPage(buffer);
713 if (GinPageIsDeleted(page))
715 Assert(blkno != GIN_ROOT_BLKNO);
716 RecordFreeIndexPage(index, blkno);
719 else if (GinPageIsData(page))
721 idxStat.nDataPages++;
723 else if (!GinPageIsList(page))
725 idxStat.nEntryPages++;
727 if (GinPageIsLeaf(page))
728 idxStat.nEntries += PageGetMaxOffsetNumber(page);
731 UnlockReleaseBuffer(buffer);
734 /* Update the metapage with accurate page and entry counts */
735 idxStat.nTotalPages = npages;
736 ginUpdateStats(info->index, &idxStat);
738 /* Finally, vacuum the FSM */
739 IndexFreeSpaceMapVacuum(info->index);
741 stats->pages_free = totFreePages;
744 LockRelationForExtension(index, ExclusiveLock);
745 stats->num_pages = RelationGetNumberOfBlocks(index);
747 UnlockRelationForExtension(index, ExclusiveLock);
749 PG_RETURN_POINTER(stats);