1 /*-------------------------------------------------------------------------
4 * Concurrent ("lazy") vacuuming.
7 * The major space usage for LAZY VACUUM is storage for the array of dead
8 * tuple TIDs, with the next biggest need being storage for per-disk-page
9 * free space info. We want to ensure we can vacuum even the very largest
10 * relations with finite memory space usage. To do that, we set upper bounds
11 * on the number of tuples and pages we will keep track of at once.
13 * We are willing to use at most maintenance_work_mem memory space to keep
14 * track of dead tuples. We initially allocate an array of TIDs of that size,
15 * with an upper limit that depends on table size (this limit ensures we don't
16 * allocate a huge area uselessly for vacuuming small tables). If the array
17 * threatens to overflow, we suspend the heap scan phase and perform a pass of
18 * index cleanup and page compaction, then resume the heap scan with an empty
21 * If we're processing a table with no indexes, we can just vacuum each page
22 * as we go; there's no need to save up multiple tuples to minimize the number
23 * of index scans performed. So we don't use maintenance_work_mem memory for
24 * the TID array, just enough to hold as many heap tuples as fit on one page.
27 * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
28 * Portions Copyright (c) 1994, Regents of the University of California
32 * $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.133 2010/04/21 17:20:56 sriggs Exp $
34 *-------------------------------------------------------------------------
40 #include "access/genam.h"
41 #include "access/heapam.h"
42 #include "access/transam.h"
43 #include "access/visibilitymap.h"
44 #include "catalog/storage.h"
45 #include "commands/dbcommands.h"
46 #include "commands/vacuum.h"
47 #include "miscadmin.h"
49 #include "postmaster/autovacuum.h"
50 #include "storage/bufmgr.h"
51 #include "storage/freespace.h"
52 #include "storage/lmgr.h"
53 #include "utils/lsyscache.h"
54 #include "utils/memutils.h"
55 #include "utils/pg_rusage.h"
56 #include "utils/tqual.h"
60 * Space/time tradeoff parameters: do these need to be user-tunable?
62 * To consider truncating the relation, we want there to be at least
63 * REL_TRUNCATE_MINIMUM or (relsize / REL_TRUNCATE_FRACTION) (whichever
64 * is less) potentially-freeable pages.
66 #define REL_TRUNCATE_MINIMUM 1000
67 #define REL_TRUNCATE_FRACTION 16
70 * Guesstimation of number of dead tuples per page. This is used to
71 * provide an upper limit to memory allocated when vacuuming small
74 #define LAZY_ALLOC_TUPLES MaxHeapTuplesPerPage
77 * Before we consider skipping a page that's marked as clean in
78 * visibility map, we must've seen at least this many clean pages.
80 #define SKIP_PAGES_THRESHOLD 32
82 typedef struct LVRelStats
84 /* hasindex = true means two-pass strategy; false means one-pass */
86 bool scanned_all; /* have we scanned all pages (this far)? */
87 /* Overall statistics about rel */
88 BlockNumber rel_pages;
89 double old_rel_tuples; /* previous value of pg_class.reltuples */
90 double rel_tuples; /* counts only tuples on scanned pages */
91 BlockNumber pages_removed;
92 double tuples_deleted;
93 BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
94 /* List of TIDs of tuples we intend to delete */
95 /* NB: this list is ordered by TID address */
96 int num_dead_tuples; /* current # of entries */
97 int max_dead_tuples; /* # slots allocated in array */
98 ItemPointer dead_tuples; /* array of ItemPointerData */
100 TransactionId latestRemovedXid;
104 /* A few variables that don't seem worth passing around as parameters */
105 static int elevel = -1;
107 static TransactionId OldestXmin;
108 static TransactionId FreezeLimit;
110 static BufferAccessStrategy vac_strategy;
113 /* non-export function prototypes */
114 static void lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
115 Relation *Irel, int nindexes, bool scan_all);
116 static void lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats);
117 static void lazy_vacuum_index(Relation indrel,
118 IndexBulkDeleteResult **stats,
119 LVRelStats *vacrelstats);
120 static void lazy_cleanup_index(Relation indrel,
121 IndexBulkDeleteResult *stats,
122 LVRelStats *vacrelstats);
123 static int lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
124 int tupindex, LVRelStats *vacrelstats);
125 static void lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats);
126 static BlockNumber count_nondeletable_pages(Relation onerel,
127 LVRelStats *vacrelstats);
128 static void lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks);
129 static void lazy_record_dead_tuple(LVRelStats *vacrelstats,
130 ItemPointer itemptr);
131 static bool lazy_tid_reaped(ItemPointer itemptr, void *state);
132 static int vac_cmp_itemptr(const void *left, const void *right);
136 * lazy_vacuum_rel() -- perform LAZY VACUUM for one heap relation
138 * This routine vacuums a single heap, cleans out its indexes, and
139 * updates its relpages and reltuples statistics.
141 * At entry, we have already established a transaction and opened
142 * and locked the relation.
145 lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
146 BufferAccessStrategy bstrategy, bool *scanned_all)
148 LVRelStats *vacrelstats;
151 BlockNumber possibly_freeable;
153 TimestampTz starttime = 0;
155 TransactionId freezeTableLimit;
157 pg_rusage_init(&ru0);
159 /* measure elapsed time iff autovacuum logging requires it */
160 if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration > 0)
161 starttime = GetCurrentTimestamp();
163 if (vacstmt->options & VACOPT_VERBOSE)
168 vac_strategy = bstrategy;
170 vacuum_set_xid_limits(vacstmt->freeze_min_age, vacstmt->freeze_table_age,
171 onerel->rd_rel->relisshared,
172 &OldestXmin, &FreezeLimit, &freezeTableLimit);
173 scan_all = TransactionIdPrecedesOrEquals(onerel->rd_rel->relfrozenxid,
176 vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats));
178 vacrelstats->scanned_all = true; /* will be cleared if we skip a page */
179 vacrelstats->old_rel_tuples = onerel->rd_rel->reltuples;
180 vacrelstats->num_index_scans = 0;
182 /* Open all indexes of the relation */
183 vac_open_indexes(onerel, RowExclusiveLock, &nindexes, &Irel);
184 vacrelstats->hasindex = (nindexes > 0);
186 /* Do the vacuuming */
187 lazy_scan_heap(onerel, vacrelstats, Irel, nindexes, scan_all);
189 /* Done with indexes */
190 vac_close_indexes(nindexes, Irel, NoLock);
193 * Optionally truncate the relation.
195 * Don't even think about it unless we have a shot at releasing a goodly
196 * number of pages. Otherwise, the time taken isn't worth it.
198 possibly_freeable = vacrelstats->rel_pages - vacrelstats->nonempty_pages;
199 if (possibly_freeable > 0 &&
200 (possibly_freeable >= REL_TRUNCATE_MINIMUM ||
201 possibly_freeable >= vacrelstats->rel_pages / REL_TRUNCATE_FRACTION))
202 lazy_truncate_heap(onerel, vacrelstats);
204 /* Vacuum the Free Space Map */
205 FreeSpaceMapVacuum(onerel);
208 * Update statistics in pg_class. But only if we didn't skip any pages;
209 * the tuple count only includes tuples from the pages we've visited, and
210 * we haven't frozen tuples in unvisited pages either. The page count is
211 * accurate in any case, but because we use the reltuples / relpages ratio
212 * in the planner, it's better to not update relpages either if we can't
215 if (vacrelstats->scanned_all)
216 vac_update_relstats(onerel,
217 vacrelstats->rel_pages, vacrelstats->rel_tuples,
218 vacrelstats->hasindex,
221 /* report results to the stats collector, too */
222 pgstat_report_vacuum(RelationGetRelid(onerel),
223 onerel->rd_rel->relisshared,
224 vacrelstats->scanned_all,
225 vacrelstats->rel_tuples);
227 /* and log the action if appropriate */
228 if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration >= 0)
230 if (Log_autovacuum_min_duration == 0 ||
231 TimestampDifferenceExceeds(starttime, GetCurrentTimestamp(),
232 Log_autovacuum_min_duration))
234 (errmsg("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n"
235 "pages: %d removed, %d remain\n"
236 "tuples: %.0f removed, %.0f remain\n"
238 get_database_name(MyDatabaseId),
239 get_namespace_name(RelationGetNamespace(onerel)),
240 RelationGetRelationName(onerel),
241 vacrelstats->num_index_scans,
242 vacrelstats->pages_removed, vacrelstats->rel_pages,
243 vacrelstats->tuples_deleted, vacrelstats->rel_tuples,
244 pg_rusage_show(&ru0))));
248 *scanned_all = vacrelstats->scanned_all;
252 * For Hot Standby we need to know the highest transaction id that will
253 * be removed by any change. VACUUM proceeds in a number of passes so
254 * we need to consider how each pass operates. The first phase runs
255 * heap_page_prune(), which can issue XLOG_HEAP2_CLEAN records as it
256 * progresses - these will have a latestRemovedXid on each record.
257 * In some cases this removes all of the tuples to be removed, though
258 * often we have dead tuples with index pointers so we must remember them
259 * for removal in phase 3. Index records for those rows are removed
260 * in phase 2 and index blocks do not have MVCC information attached.
261 * So before we can allow removal of any index tuples we need to issue
262 * a WAL record containing the latestRemovedXid of rows that will be
263 * removed in phase three. This allows recovery queries to block at the
264 * correct place, i.e. before phase two, rather than during phase three
265 * which would be after the rows have become inaccessible.
268 vacuum_log_cleanup_info(Relation rel, LVRelStats *vacrelstats)
271 * No need to log changes for temp tables, they do not contain data
272 * visible on the standby server.
274 if (rel->rd_istemp || !XLogIsNeeded())
277 Assert(TransactionIdIsValid(vacrelstats->latestRemovedXid));
279 (void) log_heap_cleanup_info(rel->rd_node, vacrelstats->latestRemovedXid);
283 * lazy_scan_heap() -- scan an open heap relation
285 * This routine sets commit status bits, builds lists of dead tuples
286 * and pages with free space, and calculates statistics on the number
287 * of live tuples in the heap. When done, or when we run low on space
288 * for dead-tuple TIDs, invoke vacuuming of indexes and heap.
290 * If there are no indexes then we just vacuum each dirty page as we
291 * process it, since there's no point in gathering many tuples.
294 lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
295 Relation *Irel, int nindexes, bool scan_all)
301 BlockNumber empty_pages,
308 IndexBulkDeleteResult **indstats;
311 Buffer vmbuffer = InvalidBuffer;
312 BlockNumber all_visible_streak;
314 pg_rusage_init(&ru0);
316 relname = RelationGetRelationName(onerel);
318 (errmsg("vacuuming \"%s.%s\"",
319 get_namespace_name(RelationGetNamespace(onerel)),
322 empty_pages = vacuumed_pages = scanned_pages = 0;
323 num_tuples = tups_vacuumed = nkeep = nunused = 0;
325 indstats = (IndexBulkDeleteResult **)
326 palloc0(nindexes * sizeof(IndexBulkDeleteResult *));
328 nblocks = RelationGetNumberOfBlocks(onerel);
329 vacrelstats->rel_pages = nblocks;
330 vacrelstats->nonempty_pages = 0;
331 vacrelstats->latestRemovedXid = InvalidTransactionId;
333 lazy_space_alloc(vacrelstats, nblocks);
335 all_visible_streak = 0;
336 for (blkno = 0; blkno < nblocks; blkno++)
345 OffsetNumber frozen[MaxOffsetNumber];
348 bool all_visible_according_to_vm = false;
352 * Skip pages that don't require vacuuming according to the visibility
353 * map. But only if we've seen a streak of at least
354 * SKIP_PAGES_THRESHOLD pages marked as clean. Since we're reading
355 * sequentially, the OS should be doing readahead for us and there's
356 * no gain in skipping a page now and then. You need a longer run of
357 * consecutive skipped pages before it's worthwhile. Also, skipping
358 * even a single page means that we can't update relfrozenxid or
359 * reltuples, so we only want to do it if there's a good chance to
360 * skip a goodly number of pages.
364 all_visible_according_to_vm =
365 visibilitymap_test(onerel, blkno, &vmbuffer);
366 if (all_visible_according_to_vm)
368 all_visible_streak++;
369 if (all_visible_streak >= SKIP_PAGES_THRESHOLD)
371 vacrelstats->scanned_all = false;
376 all_visible_streak = 0;
379 vacuum_delay_point();
384 * If we are close to overrunning the available space for dead-tuple
385 * TIDs, pause and do a cycle of vacuuming before we tackle this page.
387 if ((vacrelstats->max_dead_tuples - vacrelstats->num_dead_tuples) < MaxHeapTuplesPerPage &&
388 vacrelstats->num_dead_tuples > 0)
390 /* Log cleanup info before we touch indexes */
391 vacuum_log_cleanup_info(onerel, vacrelstats);
393 /* Remove index entries */
394 for (i = 0; i < nindexes; i++)
395 lazy_vacuum_index(Irel[i],
398 /* Remove tuples from heap */
399 lazy_vacuum_heap(onerel, vacrelstats);
401 * Forget the now-vacuumed tuples, and press on, but be careful
402 * not to reset latestRemovedXid since we want that value to be valid.
404 vacrelstats->num_dead_tuples = 0;
405 vacrelstats->num_index_scans++;
408 buf = ReadBufferExtended(onerel, MAIN_FORKNUM, blkno,
409 RBM_NORMAL, vac_strategy);
411 /* We need buffer cleanup lock so that we can prune HOT chains. */
412 LockBufferForCleanup(buf);
414 page = BufferGetPage(buf);
419 * An all-zeroes page could be left over if a backend extends the
420 * relation but crashes before initializing the page. Reclaim such
423 * We have to be careful here because we could be looking at a
424 * page that someone has just added to the relation and not yet
425 * been able to initialize (see RelationGetBufferForTuple). To
426 * protect against that, release the buffer lock, grab the
427 * relation extension lock momentarily, and re-lock the buffer. If
428 * the page is still uninitialized by then, it must be left over
429 * from a crashed backend, and we can initialize it.
431 * We don't really need the relation lock when this is a new or
432 * temp relation, but it's probably not worth the code space to
433 * check that, since this surely isn't a critical path.
435 * Note: the comparable code in vacuum.c need not worry because
436 * it's got exclusive lock on the whole relation.
438 LockBuffer(buf, BUFFER_LOCK_UNLOCK);
439 LockRelationForExtension(onerel, ExclusiveLock);
440 UnlockRelationForExtension(onerel, ExclusiveLock);
441 LockBufferForCleanup(buf);
445 (errmsg("relation \"%s\" page %u is uninitialized --- fixing",
447 PageInit(page, BufferGetPageSize(buf), 0);
450 freespace = PageGetHeapFreeSpace(page);
451 MarkBufferDirty(buf);
452 UnlockReleaseBuffer(buf);
454 RecordPageWithFreeSpace(onerel, blkno, freespace);
458 if (PageIsEmpty(page))
461 freespace = PageGetHeapFreeSpace(page);
463 if (!PageIsAllVisible(page))
465 PageSetAllVisible(page);
466 SetBufferCommitInfoNeedsSave(buf);
469 LockBuffer(buf, BUFFER_LOCK_UNLOCK);
471 /* Update the visibility map */
472 if (!all_visible_according_to_vm)
474 visibilitymap_pin(onerel, blkno, &vmbuffer);
475 LockBuffer(buf, BUFFER_LOCK_SHARE);
476 if (PageIsAllVisible(page))
477 visibilitymap_set(onerel, blkno, PageGetLSN(page), &vmbuffer);
478 LockBuffer(buf, BUFFER_LOCK_UNLOCK);
482 RecordPageWithFreeSpace(onerel, blkno, freespace);
487 * Prune all HOT-update chains in this page.
489 * We count tuples removed by the pruning step as removed by VACUUM.
491 tups_vacuumed += heap_page_prune(onerel, buf, OldestXmin, false,
492 &vacrelstats->latestRemovedXid);
494 * Now scan the page to collect vacuumable items and check for tuples
495 * requiring freezing.
500 prev_dead_count = vacrelstats->num_dead_tuples;
501 maxoff = PageGetMaxOffsetNumber(page);
502 for (offnum = FirstOffsetNumber;
504 offnum = OffsetNumberNext(offnum))
508 itemid = PageGetItemId(page, offnum);
510 /* Unused items require no processing, but we count 'em */
511 if (!ItemIdIsUsed(itemid))
517 /* Redirect items mustn't be touched */
518 if (ItemIdIsRedirected(itemid))
520 hastup = true; /* this page won't be truncatable */
524 ItemPointerSet(&(tuple.t_self), blkno, offnum);
527 * DEAD item pointers are to be vacuumed normally; but we don't
528 * count them in tups_vacuumed, else we'd be double-counting (at
529 * least in the common case where heap_page_prune() just freed up
532 if (ItemIdIsDead(itemid))
534 lazy_record_dead_tuple(vacrelstats, &(tuple.t_self));
539 Assert(ItemIdIsNormal(itemid));
541 tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
542 tuple.t_len = ItemIdGetLength(itemid);
546 switch (HeapTupleSatisfiesVacuum(tuple.t_data, OldestXmin, buf))
551 * Ordinarily, DEAD tuples would have been removed by
552 * heap_page_prune(), but it's possible that the tuple
553 * state changed since heap_page_prune() looked. In
554 * particular an INSERT_IN_PROGRESS tuple could have
555 * changed to DEAD if the inserter aborted. So this
556 * cannot be considered an error condition.
558 * If the tuple is HOT-updated then it must only be
559 * removed by a prune operation; so we keep it just as if
560 * it were RECENTLY_DEAD. Also, if it's a heap-only
561 * tuple, we choose to keep it, because it'll be a lot
562 * cheaper to get rid of it in the next pruning pass than
563 * to treat it like an indexed tuple.
565 if (HeapTupleIsHotUpdated(&tuple) ||
566 HeapTupleIsHeapOnly(&tuple))
569 tupgone = true; /* we can delete the tuple */
573 /* Tuple is good --- but let's do some validity checks */
574 if (onerel->rd_rel->relhasoids &&
575 !OidIsValid(HeapTupleGetOid(&tuple)))
576 elog(WARNING, "relation \"%s\" TID %u/%u: OID is invalid",
577 relname, blkno, offnum);
580 * Is the tuple definitely visible to all transactions?
582 * NB: Like with per-tuple hint bits, we can't set the
583 * PD_ALL_VISIBLE flag if the inserter committed
584 * asynchronously. See SetHintBits for more info. Check
585 * that the HEAP_XMIN_COMMITTED hint bit is set because of
592 if (!(tuple.t_data->t_infomask & HEAP_XMIN_COMMITTED))
599 * The inserter definitely committed. But is it old
600 * enough that everyone sees it as committed?
602 xmin = HeapTupleHeaderGetXmin(tuple.t_data);
603 if (!TransactionIdPrecedes(xmin, OldestXmin))
610 case HEAPTUPLE_RECENTLY_DEAD:
613 * If tuple is recently deleted then we must not remove it
619 case HEAPTUPLE_INSERT_IN_PROGRESS:
620 /* This is an expected case during concurrent vacuum */
623 case HEAPTUPLE_DELETE_IN_PROGRESS:
624 /* This is an expected case during concurrent vacuum */
628 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
634 lazy_record_dead_tuple(vacrelstats, &(tuple.t_self));
635 HeapTupleHeaderAdvanceLatestRemovedXid(tuple.t_data,
636 &vacrelstats->latestRemovedXid);
645 * Each non-removable tuple must be checked to see if it needs
646 * freezing. Note we already have exclusive buffer lock.
648 if (heap_freeze_tuple(tuple.t_data, FreezeLimit,
650 frozen[nfrozen++] = offnum;
652 } /* scan along page */
655 * If we froze any tuples, mark the buffer dirty, and write a WAL
656 * record recording the changes. We must log the changes to be
657 * crash-safe against future truncation of CLOG.
661 MarkBufferDirty(buf);
662 /* no XLOG for temp tables, though */
663 if (!onerel->rd_istemp)
667 recptr = log_heap_freeze(onerel, buf, FreezeLimit,
669 PageSetLSN(page, recptr);
670 PageSetTLI(page, ThisTimeLineID);
675 * If there are no indexes then we can vacuum the page right now
676 * instead of doing a second scan.
679 vacrelstats->num_dead_tuples > 0)
681 /* Remove tuples from heap */
682 lazy_vacuum_page(onerel, blkno, buf, 0, vacrelstats);
684 * Forget the now-vacuumed tuples, and press on, but be careful
685 * not to reset latestRemovedXid since we want that value to be valid.
687 Assert(TransactionIdIsValid(vacrelstats->latestRemovedXid));
688 vacrelstats->num_dead_tuples = 0;
692 freespace = PageGetHeapFreeSpace(page);
694 /* Update the all-visible flag on the page */
695 if (!PageIsAllVisible(page) && all_visible)
697 PageSetAllVisible(page);
698 SetBufferCommitInfoNeedsSave(buf);
700 else if (PageIsAllVisible(page) && !all_visible)
702 elog(WARNING, "PD_ALL_VISIBLE flag was incorrectly set in relation \"%s\" page %u",
704 PageClearAllVisible(page);
705 SetBufferCommitInfoNeedsSave(buf);
708 * Normally, we would drop the lock on the heap page before
709 * updating the visibility map, but since this case shouldn't
710 * happen anyway, don't worry about that.
712 visibilitymap_clear(onerel, blkno);
715 LockBuffer(buf, BUFFER_LOCK_UNLOCK);
717 /* Update the visibility map */
718 if (!all_visible_according_to_vm && all_visible)
720 visibilitymap_pin(onerel, blkno, &vmbuffer);
721 LockBuffer(buf, BUFFER_LOCK_SHARE);
722 if (PageIsAllVisible(page))
723 visibilitymap_set(onerel, blkno, PageGetLSN(page), &vmbuffer);
724 LockBuffer(buf, BUFFER_LOCK_UNLOCK);
729 /* Remember the location of the last page with nonremovable tuples */
731 vacrelstats->nonempty_pages = blkno + 1;
734 * If we remembered any tuples for deletion, then the page will be
735 * visited again by lazy_vacuum_heap, which will compute and record
736 * its post-compaction free space. If not, then we're done with this
737 * page, so remember its free space as-is. (This path will always be
738 * taken if there are no indexes.)
740 if (vacrelstats->num_dead_tuples == prev_dead_count)
741 RecordPageWithFreeSpace(onerel, blkno, freespace);
744 /* save stats for use later */
745 vacrelstats->rel_tuples = num_tuples;
746 vacrelstats->tuples_deleted = tups_vacuumed;
748 /* If any tuples need to be deleted, perform final vacuum cycle */
749 /* XXX put a threshold on min number of tuples here? */
750 if (vacrelstats->num_dead_tuples > 0)
752 /* Log cleanup info before we touch indexes */
753 vacuum_log_cleanup_info(onerel, vacrelstats);
755 /* Remove index entries */
756 for (i = 0; i < nindexes; i++)
757 lazy_vacuum_index(Irel[i],
760 /* Remove tuples from heap */
761 lazy_vacuum_heap(onerel, vacrelstats);
762 vacrelstats->num_index_scans++;
765 /* Release the pin on the visibility map page */
766 if (BufferIsValid(vmbuffer))
768 ReleaseBuffer(vmbuffer);
769 vmbuffer = InvalidBuffer;
772 /* Do post-vacuum cleanup and statistics update for each index */
773 for (i = 0; i < nindexes; i++)
774 lazy_cleanup_index(Irel[i], indstats[i], vacrelstats);
776 /* If no indexes, make log report that lazy_vacuum_heap would've made */
779 (errmsg("\"%s\": removed %.0f row versions in %u pages",
780 RelationGetRelationName(onerel),
781 tups_vacuumed, vacuumed_pages)));
784 (errmsg("\"%s\": found %.0f removable, %.0f nonremovable row versions in %u out of %u pages",
785 RelationGetRelationName(onerel),
786 tups_vacuumed, num_tuples, scanned_pages, nblocks),
787 errdetail("%.0f dead row versions cannot be removed yet.\n"
788 "There were %.0f unused item pointers.\n"
789 "%u pages are entirely empty.\n"
794 pg_rusage_show(&ru0))));
799 * lazy_vacuum_heap() -- second pass over the heap
801 * This routine marks dead tuples as unused and compacts out free
802 * space on their pages. Pages not having dead tuples recorded from
803 * lazy_scan_heap are not visited at all.
805 * Note: the reason for doing this as a second pass is we cannot remove
806 * the tuples until we've removed their index entries, and we want to
807 * process index entry removal in batches as large as possible.
810 lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats)
816 pg_rusage_init(&ru0);
820 while (tupindex < vacrelstats->num_dead_tuples)
827 vacuum_delay_point();
829 tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples[tupindex]);
830 buf = ReadBufferExtended(onerel, MAIN_FORKNUM, tblk, RBM_NORMAL,
832 LockBufferForCleanup(buf);
833 tupindex = lazy_vacuum_page(onerel, tblk, buf, tupindex, vacrelstats);
835 /* Now that we've compacted the page, record its available space */
836 page = BufferGetPage(buf);
837 freespace = PageGetHeapFreeSpace(page);
839 UnlockReleaseBuffer(buf);
840 RecordPageWithFreeSpace(onerel, tblk, freespace);
845 (errmsg("\"%s\": removed %d row versions in %d pages",
846 RelationGetRelationName(onerel),
849 pg_rusage_show(&ru0))));
853 * lazy_vacuum_page() -- free dead tuples on a page
854 * and repair its fragmentation.
856 * Caller must hold pin and buffer cleanup lock on the buffer.
858 * tupindex is the index in vacrelstats->dead_tuples of the first dead
859 * tuple for this page. We assume the rest follow sequentially.
860 * The return value is the first tupindex after the tuples of this page.
863 lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
864 int tupindex, LVRelStats *vacrelstats)
866 Page page = BufferGetPage(buffer);
867 OffsetNumber unused[MaxOffsetNumber];
870 START_CRIT_SECTION();
872 for (; tupindex < vacrelstats->num_dead_tuples; tupindex++)
878 tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples[tupindex]);
880 break; /* past end of tuples for this block */
881 toff = ItemPointerGetOffsetNumber(&vacrelstats->dead_tuples[tupindex]);
882 itemid = PageGetItemId(page, toff);
883 ItemIdSetUnused(itemid);
884 unused[uncnt++] = toff;
887 PageRepairFragmentation(page);
889 MarkBufferDirty(buffer);
892 if (!onerel->rd_istemp)
896 recptr = log_heap_clean(onerel, buffer,
899 vacrelstats->latestRemovedXid);
900 PageSetLSN(page, recptr);
901 PageSetTLI(page, ThisTimeLineID);
910 * lazy_vacuum_index() -- vacuum one index relation.
912 * Delete all the index entries pointing to tuples listed in
913 * vacrelstats->dead_tuples, and update running statistics.
916 lazy_vacuum_index(Relation indrel,
917 IndexBulkDeleteResult **stats,
918 LVRelStats *vacrelstats)
920 IndexVacuumInfo ivinfo;
923 pg_rusage_init(&ru0);
925 ivinfo.index = indrel;
926 ivinfo.analyze_only = false;
927 ivinfo.estimated_count = true;
928 ivinfo.message_level = elevel;
929 ivinfo.num_heap_tuples = vacrelstats->old_rel_tuples;
930 ivinfo.strategy = vac_strategy;
932 /* Do bulk deletion */
933 *stats = index_bulk_delete(&ivinfo, *stats,
934 lazy_tid_reaped, (void *) vacrelstats);
937 (errmsg("scanned index \"%s\" to remove %d row versions",
938 RelationGetRelationName(indrel),
939 vacrelstats->num_dead_tuples),
940 errdetail("%s.", pg_rusage_show(&ru0))));
944 * lazy_cleanup_index() -- do post-vacuum cleanup for one index relation.
947 lazy_cleanup_index(Relation indrel,
948 IndexBulkDeleteResult *stats,
949 LVRelStats *vacrelstats)
951 IndexVacuumInfo ivinfo;
954 pg_rusage_init(&ru0);
956 ivinfo.index = indrel;
957 ivinfo.analyze_only = false;
958 ivinfo.estimated_count = !vacrelstats->scanned_all;
959 ivinfo.message_level = elevel;
960 /* use rel_tuples only if we scanned all pages, else fall back */
961 ivinfo.num_heap_tuples = vacrelstats->scanned_all ? vacrelstats->rel_tuples : vacrelstats->old_rel_tuples;
962 ivinfo.strategy = vac_strategy;
964 stats = index_vacuum_cleanup(&ivinfo, stats);
970 * Now update statistics in pg_class, but only if the index says the count
973 if (!stats->estimated_count)
974 vac_update_relstats(indrel,
975 stats->num_pages, stats->num_index_tuples,
976 false, InvalidTransactionId);
979 (errmsg("index \"%s\" now contains %.0f row versions in %u pages",
980 RelationGetRelationName(indrel),
981 stats->num_index_tuples,
983 errdetail("%.0f index row versions were removed.\n"
984 "%u index pages have been deleted, %u are currently reusable.\n"
986 stats->tuples_removed,
987 stats->pages_deleted, stats->pages_free,
988 pg_rusage_show(&ru0))));
994 * lazy_truncate_heap - try to truncate off any empty pages at the end
997 lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats)
999 BlockNumber old_rel_pages = vacrelstats->rel_pages;
1000 BlockNumber new_rel_pages;
1003 pg_rusage_init(&ru0);
1006 * We need full exclusive lock on the relation in order to do truncation.
1007 * If we can't get it, give up rather than waiting --- we don't want to
1008 * block other backends, and we don't want to deadlock (which is quite
1009 * possible considering we already hold a lower-grade lock).
1011 if (!ConditionalLockRelation(onerel, AccessExclusiveLock))
1015 * Now that we have exclusive lock, look to see if the rel has grown
1016 * whilst we were vacuuming with non-exclusive lock. If so, give up; the
1017 * newly added pages presumably contain non-deletable tuples.
1019 new_rel_pages = RelationGetNumberOfBlocks(onerel);
1020 if (new_rel_pages != old_rel_pages)
1022 /* might as well use the latest news when we update pg_class stats */
1023 vacrelstats->rel_pages = new_rel_pages;
1024 UnlockRelation(onerel, AccessExclusiveLock);
1029 * Scan backwards from the end to verify that the end pages actually
1030 * contain no tuples. This is *necessary*, not optional, because other
1031 * backends could have added tuples to these pages whilst we were
1034 new_rel_pages = count_nondeletable_pages(onerel, vacrelstats);
1036 if (new_rel_pages >= old_rel_pages)
1038 /* can't do anything after all */
1039 UnlockRelation(onerel, AccessExclusiveLock);
1046 RelationTruncate(onerel, new_rel_pages);
1049 * We can release the exclusive lock as soon as we have truncated. Other
1050 * backends can't safely access the relation until they have processed the
1051 * smgr invalidation that smgrtruncate sent out ... but that should happen
1052 * as part of standard invalidation processing once they acquire lock on
1055 UnlockRelation(onerel, AccessExclusiveLock);
1057 /* update statistics */
1058 vacrelstats->rel_pages = new_rel_pages;
1059 vacrelstats->pages_removed = old_rel_pages - new_rel_pages;
1062 (errmsg("\"%s\": truncated %u to %u pages",
1063 RelationGetRelationName(onerel),
1064 old_rel_pages, new_rel_pages),
1066 pg_rusage_show(&ru0))));
1070 * Rescan end pages to verify that they are (still) empty of tuples.
1072 * Returns number of nondeletable pages (last nonempty page + 1).
1075 count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats)
1079 /* Strange coding of loop control is needed because blkno is unsigned */
1080 blkno = vacrelstats->rel_pages;
1081 while (blkno > vacrelstats->nonempty_pages)
1085 OffsetNumber offnum,
1090 * We don't insert a vacuum delay point here, because we have an
1091 * exclusive lock on the table which we want to hold for as short a
1092 * time as possible. We still need to check for interrupts however.
1094 CHECK_FOR_INTERRUPTS();
1098 buf = ReadBufferExtended(onerel, MAIN_FORKNUM, blkno,
1099 RBM_NORMAL, vac_strategy);
1101 /* In this phase we only need shared access to the buffer */
1102 LockBuffer(buf, BUFFER_LOCK_SHARE);
1104 page = BufferGetPage(buf);
1106 if (PageIsNew(page) || PageIsEmpty(page))
1108 /* PageIsNew probably shouldn't happen... */
1109 UnlockReleaseBuffer(buf);
1114 maxoff = PageGetMaxOffsetNumber(page);
1115 for (offnum = FirstOffsetNumber;
1117 offnum = OffsetNumberNext(offnum))
1121 itemid = PageGetItemId(page, offnum);
1124 * Note: any non-unused item should be taken as a reason to keep
1125 * this page. We formerly thought that DEAD tuples could be
1126 * thrown away, but that's not so, because we'd not have cleaned
1127 * out their index entries.
1129 if (ItemIdIsUsed(itemid))
1132 break; /* can stop scanning */
1134 } /* scan along page */
1136 UnlockReleaseBuffer(buf);
1138 /* Done scanning if we found a tuple here */
1144 * If we fall out of the loop, all the previously-thought-to-be-empty
1145 * pages still are; we need not bother to look at the last known-nonempty
1148 return vacrelstats->nonempty_pages;
1152 * lazy_space_alloc - space allocation decisions for lazy vacuum
1154 * See the comments at the head of this file for rationale.
1157 lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks)
1161 if (vacrelstats->hasindex)
1163 maxtuples = (maintenance_work_mem * 1024L) / sizeof(ItemPointerData);
1164 maxtuples = Min(maxtuples, INT_MAX);
1165 maxtuples = Min(maxtuples, MaxAllocSize / sizeof(ItemPointerData));
1167 /* curious coding here to ensure the multiplication can't overflow */
1168 if ((BlockNumber) (maxtuples / LAZY_ALLOC_TUPLES) > relblocks)
1169 maxtuples = relblocks * LAZY_ALLOC_TUPLES;
1171 /* stay sane if small maintenance_work_mem */
1172 maxtuples = Max(maxtuples, MaxHeapTuplesPerPage);
1176 maxtuples = MaxHeapTuplesPerPage;
1179 vacrelstats->num_dead_tuples = 0;
1180 vacrelstats->max_dead_tuples = (int) maxtuples;
1181 vacrelstats->dead_tuples = (ItemPointer)
1182 palloc(maxtuples * sizeof(ItemPointerData));
1186 * lazy_record_dead_tuple - remember one deletable tuple
1189 lazy_record_dead_tuple(LVRelStats *vacrelstats,
1190 ItemPointer itemptr)
1193 * The array shouldn't overflow under normal behavior, but perhaps it
1194 * could if we are given a really small maintenance_work_mem. In that
1195 * case, just forget the last few tuples (we'll get 'em next time).
1197 if (vacrelstats->num_dead_tuples < vacrelstats->max_dead_tuples)
1199 vacrelstats->dead_tuples[vacrelstats->num_dead_tuples] = *itemptr;
1200 vacrelstats->num_dead_tuples++;
1205 * lazy_tid_reaped() -- is a particular tid deletable?
1207 * This has the right signature to be an IndexBulkDeleteCallback.
1209 * Assumes dead_tuples array is in sorted order.
1212 lazy_tid_reaped(ItemPointer itemptr, void *state)
1214 LVRelStats *vacrelstats = (LVRelStats *) state;
1217 res = (ItemPointer) bsearch((void *) itemptr,
1218 (void *) vacrelstats->dead_tuples,
1219 vacrelstats->num_dead_tuples,
1220 sizeof(ItemPointerData),
1223 return (res != NULL);
1227 * Comparator routines for use with qsort() and bsearch().
1230 vac_cmp_itemptr(const void *left, const void *right)
1237 lblk = ItemPointerGetBlockNumber((ItemPointer) left);
1238 rblk = ItemPointerGetBlockNumber((ItemPointer) right);
1245 loff = ItemPointerGetOffsetNumber((ItemPointer) left);
1246 roff = ItemPointerGetOffsetNumber((ItemPointer) right);