1 /*-------------------------------------------------------------------------
4 * Concurrent ("lazy") vacuuming.
7 * The major space usage for LAZY VACUUM is storage for the array of dead
8 * tuple TIDs, with the next biggest need being storage for per-disk-page
9 * free space info. We want to ensure we can vacuum even the very largest
10 * relations with finite memory space usage. To do that, we set upper bounds
11 * on the number of tuples and pages we will keep track of at once.
13 * We are willing to use at most maintenance_work_mem (or perhaps
14 * autovacuum_work_mem) memory space to keep track of dead tuples. We
15 * initially allocate an array of TIDs of that size, with an upper limit that
16 * depends on table size (this limit ensures we don't allocate a huge area
17 * uselessly for vacuuming small tables). If the array threatens to overflow,
18 * we suspend the heap scan phase and perform a pass of index cleanup and page
19 * compaction, then resume the heap scan with an empty TID array.
21 * If we're processing a table with no indexes, we can just vacuum each page
22 * as we go; there's no need to save up multiple tuples to minimize the number
23 * of index scans performed. So we don't use maintenance_work_mem memory for
24 * the TID array, just enough to hold as many heap tuples as fit on one page.
27 * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
28 * Portions Copyright (c) 1994, Regents of the University of California
32 * src/backend/commands/vacuumlazy.c
34 *-------------------------------------------------------------------------
40 #include "access/genam.h"
41 #include "access/heapam.h"
42 #include "access/heapam_xlog.h"
43 #include "access/htup_details.h"
44 #include "access/multixact.h"
45 #include "access/transam.h"
46 #include "access/visibilitymap.h"
47 #include "access/xlog.h"
48 #include "catalog/catalog.h"
49 #include "catalog/storage.h"
50 #include "commands/dbcommands.h"
51 #include "commands/vacuum.h"
52 #include "miscadmin.h"
54 #include "portability/instr_time.h"
55 #include "postmaster/autovacuum.h"
56 #include "storage/bufmgr.h"
57 #include "storage/freespace.h"
58 #include "storage/lmgr.h"
59 #include "utils/lsyscache.h"
60 #include "utils/memutils.h"
61 #include "utils/pg_rusage.h"
62 #include "utils/timestamp.h"
63 #include "utils/tqual.h"
67 * Space/time tradeoff parameters: do these need to be user-tunable?
69 * To consider truncating the relation, we want there to be at least
70 * REL_TRUNCATE_MINIMUM or (relsize / REL_TRUNCATE_FRACTION) (whichever
71 * is less) potentially-freeable pages.
73 #define REL_TRUNCATE_MINIMUM 1000
74 #define REL_TRUNCATE_FRACTION 16
77 * Timing parameters for truncate locking heuristics.
79 * These were not exposed as user tunable GUC values because it didn't seem
80 * that the potential for improvement was great enough to merit the cost of
83 #define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL 20 /* ms */
84 #define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL 50 /* ms */
85 #define VACUUM_TRUNCATE_LOCK_TIMEOUT 5000 /* ms */
88 * Guesstimation of number of dead tuples per page. This is used to
89 * provide an upper limit to memory allocated when vacuuming small
92 #define LAZY_ALLOC_TUPLES MaxHeapTuplesPerPage
95 * Before we consider skipping a page that's marked as clean in
96 * visibility map, we must've seen at least this many clean pages.
98 #define SKIP_PAGES_THRESHOLD ((BlockNumber) 32)
100 typedef struct LVRelStats
102 /* hasindex = true means two-pass strategy; false means one-pass */
104 /* Overall statistics about rel */
105 BlockNumber old_rel_pages; /* previous value of pg_class.relpages */
106 BlockNumber rel_pages; /* total number of pages */
107 BlockNumber scanned_pages; /* number of pages we examined */
108 BlockNumber pinskipped_pages; /* # of pages we skipped due to a pin */
109 double scanned_tuples; /* counts only tuples on scanned pages */
110 double old_rel_tuples; /* previous value of pg_class.reltuples */
111 double new_rel_tuples; /* new estimated total # of tuples */
112 double new_dead_tuples; /* new estimated total # of dead tuples */
113 BlockNumber pages_removed;
114 double tuples_deleted;
115 BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
116 /* List of TIDs of tuples we intend to delete */
117 /* NB: this list is ordered by TID address */
118 int num_dead_tuples; /* current # of entries */
119 int max_dead_tuples; /* # slots allocated in array */
120 ItemPointer dead_tuples; /* array of ItemPointerData */
122 TransactionId latestRemovedXid;
123 bool lock_waiter_detected;
127 /* A few variables that don't seem worth passing around as parameters */
128 static int elevel = -1;
130 static TransactionId OldestXmin;
131 static TransactionId FreezeLimit;
132 static MultiXactId MultiXactCutoff;
134 static BufferAccessStrategy vac_strategy;
137 /* non-export function prototypes */
138 static void lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
139 Relation *Irel, int nindexes, bool scan_all);
140 static void lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats);
141 static bool lazy_check_needs_freeze(Buffer buf, bool *hastup);
142 static void lazy_vacuum_index(Relation indrel,
143 IndexBulkDeleteResult **stats,
144 LVRelStats *vacrelstats);
145 static void lazy_cleanup_index(Relation indrel,
146 IndexBulkDeleteResult *stats,
147 LVRelStats *vacrelstats);
148 static int lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
149 int tupindex, LVRelStats *vacrelstats, Buffer *vmbuffer);
150 static bool should_attempt_truncation(LVRelStats *vacrelstats);
151 static void lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats);
152 static BlockNumber count_nondeletable_pages(Relation onerel,
153 LVRelStats *vacrelstats);
154 static void lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks);
155 static void lazy_record_dead_tuple(LVRelStats *vacrelstats,
156 ItemPointer itemptr);
157 static bool lazy_tid_reaped(ItemPointer itemptr, void *state);
158 static int vac_cmp_itemptr(const void *left, const void *right);
159 static bool heap_page_is_all_visible(Relation rel, Buffer buf,
160 TransactionId *visibility_cutoff_xid);
164 * lazy_vacuum_rel() -- perform LAZY VACUUM for one heap relation
166 * This routine vacuums a single heap, cleans out its indexes, and
167 * updates its relpages and reltuples statistics.
169 * At entry, we have already established a transaction and opened
170 * and locked the relation.
173 lazy_vacuum_rel(Relation onerel, int options, VacuumParams *params,
174 BufferAccessStrategy bstrategy)
176 LVRelStats *vacrelstats;
180 TimestampTz starttime = 0;
185 bool scan_all; /* should we scan all pages? */
186 bool scanned_all; /* did we actually scan all pages? */
187 TransactionId xidFullScanLimit;
188 MultiXactId mxactFullScanLimit;
189 BlockNumber new_rel_pages;
190 double new_rel_tuples;
191 BlockNumber new_rel_allvisible;
192 double new_live_tuples;
193 TransactionId new_frozen_xid;
194 MultiXactId new_min_multi;
196 Assert(params != NULL);
198 /* measure elapsed time iff autovacuum logging requires it */
199 if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0)
201 pg_rusage_init(&ru0);
202 starttime = GetCurrentTimestamp();
205 if (options & VACOPT_VERBOSE)
210 vac_strategy = bstrategy;
212 vacuum_set_xid_limits(onerel,
213 params->freeze_min_age,
214 params->freeze_table_age,
215 params->multixact_freeze_min_age,
216 params->multixact_freeze_table_age,
217 &OldestXmin, &FreezeLimit, &xidFullScanLimit,
218 &MultiXactCutoff, &mxactFullScanLimit);
221 * We request a full scan if either the table's frozen Xid is now older
222 * than or equal to the requested Xid full-table scan limit; or if the
223 * table's minimum MultiXactId is older than or equal to the requested
224 * mxid full-table scan limit.
226 scan_all = TransactionIdPrecedesOrEquals(onerel->rd_rel->relfrozenxid,
228 scan_all |= MultiXactIdPrecedesOrEquals(onerel->rd_rel->relminmxid,
231 vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats));
233 vacrelstats->old_rel_pages = onerel->rd_rel->relpages;
234 vacrelstats->old_rel_tuples = onerel->rd_rel->reltuples;
235 vacrelstats->num_index_scans = 0;
236 vacrelstats->pages_removed = 0;
237 vacrelstats->lock_waiter_detected = false;
239 /* Open all indexes of the relation */
240 vac_open_indexes(onerel, RowExclusiveLock, &nindexes, &Irel);
241 vacrelstats->hasindex = (nindexes > 0);
243 /* Do the vacuuming */
244 lazy_scan_heap(onerel, vacrelstats, Irel, nindexes, scan_all);
246 /* Done with indexes */
247 vac_close_indexes(nindexes, Irel, NoLock);
250 * Compute whether we actually scanned the whole relation. If we did, we
251 * can adjust relfrozenxid and relminmxid.
253 * NB: We need to check this before truncating the relation, because that
254 * will change ->rel_pages.
256 if (vacrelstats->scanned_pages < vacrelstats->rel_pages)
265 * Optionally truncate the relation.
267 if (should_attempt_truncation(vacrelstats))
268 lazy_truncate_heap(onerel, vacrelstats);
270 /* Vacuum the Free Space Map */
271 FreeSpaceMapVacuum(onerel);
274 * Update statistics in pg_class.
276 * A corner case here is that if we scanned no pages at all because every
277 * page is all-visible, we should not update relpages/reltuples, because
278 * we have no new information to contribute. In particular this keeps us
279 * from replacing relpages=reltuples=0 (which means "unknown tuple
280 * density") with nonzero relpages and reltuples=0 (which means "zero
281 * tuple density") unless there's some actual evidence for the latter.
283 * We do update relallvisible even in the corner case, since if the table
284 * is all-visible we'd definitely like to know that. But clamp the value
285 * to be not more than what we're setting relpages to.
287 * Also, don't change relfrozenxid/relminmxid if we skipped any pages,
288 * since then we don't know for certain that all tuples have a newer xmin.
290 new_rel_pages = vacrelstats->rel_pages;
291 new_rel_tuples = vacrelstats->new_rel_tuples;
292 if (vacrelstats->scanned_pages == 0 && new_rel_pages > 0)
294 new_rel_pages = vacrelstats->old_rel_pages;
295 new_rel_tuples = vacrelstats->old_rel_tuples;
298 new_rel_allvisible = visibilitymap_count(onerel);
299 if (new_rel_allvisible > new_rel_pages)
300 new_rel_allvisible = new_rel_pages;
302 new_frozen_xid = scanned_all ? FreezeLimit : InvalidTransactionId;
303 new_min_multi = scanned_all ? MultiXactCutoff : InvalidMultiXactId;
305 vac_update_relstats(onerel,
309 vacrelstats->hasindex,
314 /* report results to the stats collector, too */
315 new_live_tuples = new_rel_tuples - vacrelstats->new_dead_tuples;
316 if (new_live_tuples < 0)
317 new_live_tuples = 0; /* just in case */
319 pgstat_report_vacuum(RelationGetRelid(onerel),
320 onerel->rd_rel->relisshared,
322 vacrelstats->new_dead_tuples);
324 /* and log the action if appropriate */
325 if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0)
327 TimestampTz endtime = GetCurrentTimestamp();
329 if (params->log_min_duration == 0 ||
330 TimestampDifferenceExceeds(starttime, endtime,
331 params->log_min_duration))
335 TimestampDifference(starttime, endtime, &secs, &usecs);
339 if ((secs > 0) || (usecs > 0))
341 read_rate = (double) BLCKSZ *VacuumPageMiss / (1024 * 1024) /
342 (secs + usecs / 1000000.0);
343 write_rate = (double) BLCKSZ *VacuumPageDirty / (1024 * 1024) /
344 (secs + usecs / 1000000.0);
348 * This is pretty messy, but we split it up so that we can skip
349 * emitting individual parts of the message when not applicable.
351 initStringInfo(&buf);
352 appendStringInfo(&buf, _("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n"),
353 get_database_name(MyDatabaseId),
354 get_namespace_name(RelationGetNamespace(onerel)),
355 RelationGetRelationName(onerel),
356 vacrelstats->num_index_scans);
357 appendStringInfo(&buf, _("pages: %u removed, %u remain, %u skipped due to pins\n"),
358 vacrelstats->pages_removed,
359 vacrelstats->rel_pages,
360 vacrelstats->pinskipped_pages);
361 appendStringInfo(&buf,
362 _("tuples: %.0f removed, %.0f remain, %.0f are dead but not yet removable\n"),
363 vacrelstats->tuples_deleted,
364 vacrelstats->new_rel_tuples,
365 vacrelstats->new_dead_tuples);
366 appendStringInfo(&buf,
367 _("buffer usage: %d hits, %d misses, %d dirtied\n"),
371 appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
372 read_rate, write_rate);
373 appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0));
376 (errmsg_internal("%s", buf.data)));
383 * For Hot Standby we need to know the highest transaction id that will
384 * be removed by any change. VACUUM proceeds in a number of passes so
385 * we need to consider how each pass operates. The first phase runs
386 * heap_page_prune(), which can issue XLOG_HEAP2_CLEAN records as it
387 * progresses - these will have a latestRemovedXid on each record.
388 * In some cases this removes all of the tuples to be removed, though
389 * often we have dead tuples with index pointers so we must remember them
390 * for removal in phase 3. Index records for those rows are removed
391 * in phase 2 and index blocks do not have MVCC information attached.
392 * So before we can allow removal of any index tuples we need to issue
393 * a WAL record containing the latestRemovedXid of rows that will be
394 * removed in phase three. This allows recovery queries to block at the
395 * correct place, i.e. before phase two, rather than during phase three
396 * which would be after the rows have become inaccessible.
399 vacuum_log_cleanup_info(Relation rel, LVRelStats *vacrelstats)
402 * Skip this for relations for which no WAL is to be written, or if we're
403 * not trying to support archive recovery.
405 if (!RelationNeedsWAL(rel) || !XLogIsNeeded())
409 * No need to write the record at all unless it contains a valid value
411 if (TransactionIdIsValid(vacrelstats->latestRemovedXid))
412 (void) log_heap_cleanup_info(rel->rd_node, vacrelstats->latestRemovedXid);
416 * lazy_scan_heap() -- scan an open heap relation
418 * This routine prunes each page in the heap, which will among other
419 * things truncate dead tuples to dead line pointers, defragment the
420 * page, and set commit status bits (see heap_page_prune). It also builds
421 * lists of dead tuples and pages with free space, calculates statistics
422 * on the number of live tuples in the heap, and marks pages as
423 * all-visible if appropriate. When done, or when we run low on space for
424 * dead-tuple TIDs, invoke vacuuming of indexes and call lazy_vacuum_heap
425 * to reclaim dead line pointers.
427 * If there are no indexes then we can reclaim line pointers on the fly;
428 * dead line pointers need only be retained until all index pointers that
429 * reference them have been killed.
432 lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
433 Relation *Irel, int nindexes, bool scan_all)
439 BlockNumber empty_pages,
445 IndexBulkDeleteResult **indstats;
448 Buffer vmbuffer = InvalidBuffer;
449 BlockNumber next_not_all_visible_block;
450 bool skipping_all_visible_blocks;
451 xl_heap_freeze_tuple *frozen;
454 pg_rusage_init(&ru0);
456 relname = RelationGetRelationName(onerel);
458 (errmsg("vacuuming \"%s.%s\"",
459 get_namespace_name(RelationGetNamespace(onerel)),
462 empty_pages = vacuumed_pages = 0;
463 num_tuples = tups_vacuumed = nkeep = nunused = 0;
465 indstats = (IndexBulkDeleteResult **)
466 palloc0(nindexes * sizeof(IndexBulkDeleteResult *));
468 nblocks = RelationGetNumberOfBlocks(onerel);
469 vacrelstats->rel_pages = nblocks;
470 vacrelstats->scanned_pages = 0;
471 vacrelstats->nonempty_pages = 0;
472 vacrelstats->latestRemovedXid = InvalidTransactionId;
474 lazy_space_alloc(vacrelstats, nblocks);
475 frozen = palloc(sizeof(xl_heap_freeze_tuple) * MaxHeapTuplesPerPage);
478 * We want to skip pages that don't require vacuuming according to the
479 * visibility map, but only when we can skip at least SKIP_PAGES_THRESHOLD
480 * consecutive pages. Since we're reading sequentially, the OS should be
481 * doing readahead for us, so there's no gain in skipping a page now and
482 * then; that's likely to disable readahead and so be counterproductive.
483 * Also, skipping even a single page means that we can't update
484 * relfrozenxid, so we only want to do it if we can skip a goodly number
487 * Before entering the main loop, establish the invariant that
488 * next_not_all_visible_block is the next block number >= blkno that's not
489 * all-visible according to the visibility map, or nblocks if there's no
490 * such block. Also, we set up the skipping_all_visible_blocks flag,
491 * which is needed because we need hysteresis in the decision: once we've
492 * started skipping blocks, we may as well skip everything up to the next
493 * not-all-visible block.
495 * Note: if scan_all is true, we won't actually skip any pages; but we
496 * maintain next_not_all_visible_block anyway, so as to set up the
497 * all_visible_according_to_vm flag correctly for each page.
499 * Note: The value returned by visibilitymap_test could be slightly
500 * out-of-date, since we make this test before reading the corresponding
501 * heap page or locking the buffer. This is OK. If we mistakenly think
502 * that the page is all-visible when in fact the flag's just been cleared,
503 * we might fail to vacuum the page. But it's OK to skip pages when
504 * scan_all is not set, so no great harm done; the next vacuum will find
505 * them. If we make the reverse mistake and vacuum a page unnecessarily,
506 * it'll just be a no-op.
508 * We will scan the table's last page, at least to the extent of
509 * determining whether it has tuples or not, even if it should be skipped
510 * according to the above rules; except when we've already determined that
511 * it's not worth trying to truncate the table. This avoids having
512 * lazy_truncate_heap() take access-exclusive lock on the table to attempt
513 * a truncation that just fails immediately because there are tuples in
514 * the last page. This is worth avoiding mainly because such a lock must
515 * be replayed on any hot standby, where it can be disruptive.
517 for (next_not_all_visible_block = 0;
518 next_not_all_visible_block < nblocks;
519 next_not_all_visible_block++)
521 if (!visibilitymap_test(onerel, next_not_all_visible_block, &vmbuffer))
523 vacuum_delay_point();
525 if (next_not_all_visible_block >= SKIP_PAGES_THRESHOLD)
526 skipping_all_visible_blocks = true;
528 skipping_all_visible_blocks = false;
530 for (blkno = 0; blkno < nblocks; blkno++)
541 bool all_visible_according_to_vm;
543 bool has_dead_tuples;
544 TransactionId visibility_cutoff_xid = InvalidTransactionId;
546 /* see note above about forcing scanning of last page */
547 #define FORCE_CHECK_PAGE() \
548 (blkno == nblocks - 1 && should_attempt_truncation(vacrelstats))
550 if (blkno == next_not_all_visible_block)
552 /* Time to advance next_not_all_visible_block */
553 for (next_not_all_visible_block++;
554 next_not_all_visible_block < nblocks;
555 next_not_all_visible_block++)
557 if (!visibilitymap_test(onerel, next_not_all_visible_block,
560 vacuum_delay_point();
564 * We know we can't skip the current block. But set up
565 * skipping_all_visible_blocks to do the right thing at the
568 if (next_not_all_visible_block - blkno > SKIP_PAGES_THRESHOLD)
569 skipping_all_visible_blocks = true;
571 skipping_all_visible_blocks = false;
572 all_visible_according_to_vm = false;
576 /* Current block is all-visible */
577 if (skipping_all_visible_blocks && !scan_all && !FORCE_CHECK_PAGE())
579 all_visible_according_to_vm = true;
582 vacuum_delay_point();
585 * If we are close to overrunning the available space for dead-tuple
586 * TIDs, pause and do a cycle of vacuuming before we tackle this page.
588 if ((vacrelstats->max_dead_tuples - vacrelstats->num_dead_tuples) < MaxHeapTuplesPerPage &&
589 vacrelstats->num_dead_tuples > 0)
592 * Before beginning index vacuuming, we release any pin we may
593 * hold on the visibility map page. This isn't necessary for
594 * correctness, but we do it anyway to avoid holding the pin
595 * across a lengthy, unrelated operation.
597 if (BufferIsValid(vmbuffer))
599 ReleaseBuffer(vmbuffer);
600 vmbuffer = InvalidBuffer;
603 /* Log cleanup info before we touch indexes */
604 vacuum_log_cleanup_info(onerel, vacrelstats);
606 /* Remove index entries */
607 for (i = 0; i < nindexes; i++)
608 lazy_vacuum_index(Irel[i],
611 /* Remove tuples from heap */
612 lazy_vacuum_heap(onerel, vacrelstats);
615 * Forget the now-vacuumed tuples, and press on, but be careful
616 * not to reset latestRemovedXid since we want that value to be
619 vacrelstats->num_dead_tuples = 0;
620 vacrelstats->num_index_scans++;
624 * Pin the visibility map page in case we need to mark the page
625 * all-visible. In most cases this will be very cheap, because we'll
626 * already have the correct page pinned anyway. However, it's
627 * possible that (a) next_not_all_visible_block is covered by a
628 * different VM page than the current block or (b) we released our pin
629 * and did a cycle of index vacuuming.
631 visibilitymap_pin(onerel, blkno, &vmbuffer);
633 buf = ReadBufferExtended(onerel, MAIN_FORKNUM, blkno,
634 RBM_NORMAL, vac_strategy);
636 /* We need buffer cleanup lock so that we can prune HOT chains. */
637 if (!ConditionalLockBufferForCleanup(buf))
640 * If we're not scanning the whole relation to guard against XID
641 * wraparound, and we don't want to forcibly check the page, then
642 * it's OK to skip vacuuming pages we get a lock conflict on. They
643 * will be dealt with in some future vacuum.
645 if (!scan_all && !FORCE_CHECK_PAGE())
648 vacrelstats->pinskipped_pages++;
653 * Read the page with share lock to see if any xids on it need to
654 * be frozen. If not we just skip the page, after updating our
655 * scan statistics. If there are some, we wait for cleanup lock.
657 * We could defer the lock request further by remembering the page
658 * and coming back to it later, or we could even register
659 * ourselves for multiple buffers and then service whichever one
660 * is received first. For now, this seems good enough.
662 * If we get here with scan_all false, then we're just forcibly
663 * checking the page, and so we don't want to insist on getting
664 * the lock; we only need to know if the page contains tuples, so
665 * that we can update nonempty_pages correctly. It's convenient
666 * to use lazy_check_needs_freeze() for both situations, though.
668 LockBuffer(buf, BUFFER_LOCK_SHARE);
669 if (!lazy_check_needs_freeze(buf, &hastup))
671 UnlockReleaseBuffer(buf);
672 vacrelstats->scanned_pages++;
673 vacrelstats->pinskipped_pages++;
675 vacrelstats->nonempty_pages = blkno + 1;
681 * Here, we must not advance scanned_pages; that would amount
682 * to claiming that the page contains no freezable tuples.
684 UnlockReleaseBuffer(buf);
685 vacrelstats->pinskipped_pages++;
687 vacrelstats->nonempty_pages = blkno + 1;
690 LockBuffer(buf, BUFFER_LOCK_UNLOCK);
691 LockBufferForCleanup(buf);
692 /* drop through to normal processing */
695 vacrelstats->scanned_pages++;
697 page = BufferGetPage(buf);
702 * An all-zeroes page could be left over if a backend extends the
703 * relation but crashes before initializing the page. Reclaim such
706 * We have to be careful here because we could be looking at a
707 * page that someone has just added to the relation and not yet
708 * been able to initialize (see RelationGetBufferForTuple). To
709 * protect against that, release the buffer lock, grab the
710 * relation extension lock momentarily, and re-lock the buffer. If
711 * the page is still uninitialized by then, it must be left over
712 * from a crashed backend, and we can initialize it.
714 * We don't really need the relation lock when this is a new or
715 * temp relation, but it's probably not worth the code space to
716 * check that, since this surely isn't a critical path.
718 * Note: the comparable code in vacuum.c need not worry because
719 * it's got exclusive lock on the whole relation.
721 LockBuffer(buf, BUFFER_LOCK_UNLOCK);
722 LockRelationForExtension(onerel, ExclusiveLock);
723 UnlockRelationForExtension(onerel, ExclusiveLock);
724 LockBufferForCleanup(buf);
728 (errmsg("relation \"%s\" page %u is uninitialized --- fixing",
730 PageInit(page, BufferGetPageSize(buf), 0);
733 freespace = PageGetHeapFreeSpace(page);
734 MarkBufferDirty(buf);
735 UnlockReleaseBuffer(buf);
737 RecordPageWithFreeSpace(onerel, blkno, freespace);
741 if (PageIsEmpty(page))
744 freespace = PageGetHeapFreeSpace(page);
746 /* empty pages are always all-visible */
747 if (!PageIsAllVisible(page))
749 START_CRIT_SECTION();
751 /* mark buffer dirty before writing a WAL record */
752 MarkBufferDirty(buf);
755 * It's possible that another backend has extended the heap,
756 * initialized the page, and then failed to WAL-log the page
757 * due to an ERROR. Since heap extension is not WAL-logged,
758 * recovery might try to replay our record setting the page
759 * all-visible and find that the page isn't initialized, which
760 * will cause a PANIC. To prevent that, check whether the
761 * page has been previously WAL-logged, and if not, do that
764 if (RelationNeedsWAL(onerel) &&
765 PageGetLSN(page) == InvalidXLogRecPtr)
766 log_newpage_buffer(buf, true);
768 PageSetAllVisible(page);
769 visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr,
770 vmbuffer, InvalidTransactionId);
774 UnlockReleaseBuffer(buf);
775 RecordPageWithFreeSpace(onerel, blkno, freespace);
780 * Prune all HOT-update chains in this page.
782 * We count tuples removed by the pruning step as removed by VACUUM.
784 tups_vacuumed += heap_page_prune(onerel, buf, OldestXmin, false,
785 &vacrelstats->latestRemovedXid);
788 * Now scan the page to collect vacuumable items and check for tuples
789 * requiring freezing.
792 has_dead_tuples = false;
795 prev_dead_count = vacrelstats->num_dead_tuples;
796 maxoff = PageGetMaxOffsetNumber(page);
799 * Note: If you change anything in the loop below, also look at
800 * heap_page_is_all_visible to see if that needs to be changed.
802 for (offnum = FirstOffsetNumber;
804 offnum = OffsetNumberNext(offnum))
808 itemid = PageGetItemId(page, offnum);
810 /* Unused items require no processing, but we count 'em */
811 if (!ItemIdIsUsed(itemid))
817 /* Redirect items mustn't be touched */
818 if (ItemIdIsRedirected(itemid))
820 hastup = true; /* this page won't be truncatable */
824 ItemPointerSet(&(tuple.t_self), blkno, offnum);
827 * DEAD item pointers are to be vacuumed normally; but we don't
828 * count them in tups_vacuumed, else we'd be double-counting (at
829 * least in the common case where heap_page_prune() just freed up
832 if (ItemIdIsDead(itemid))
834 lazy_record_dead_tuple(vacrelstats, &(tuple.t_self));
839 Assert(ItemIdIsNormal(itemid));
841 tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
842 tuple.t_len = ItemIdGetLength(itemid);
843 tuple.t_tableOid = RelationGetRelid(onerel);
847 switch (HeapTupleSatisfiesVacuum(&tuple, OldestXmin, buf))
852 * Ordinarily, DEAD tuples would have been removed by
853 * heap_page_prune(), but it's possible that the tuple
854 * state changed since heap_page_prune() looked. In
855 * particular an INSERT_IN_PROGRESS tuple could have
856 * changed to DEAD if the inserter aborted. So this
857 * cannot be considered an error condition.
859 * If the tuple is HOT-updated then it must only be
860 * removed by a prune operation; so we keep it just as if
861 * it were RECENTLY_DEAD. Also, if it's a heap-only
862 * tuple, we choose to keep it, because it'll be a lot
863 * cheaper to get rid of it in the next pruning pass than
864 * to treat it like an indexed tuple.
866 if (HeapTupleIsHotUpdated(&tuple) ||
867 HeapTupleIsHeapOnly(&tuple))
870 tupgone = true; /* we can delete the tuple */
874 /* Tuple is good --- but let's do some validity checks */
875 if (onerel->rd_rel->relhasoids &&
876 !OidIsValid(HeapTupleGetOid(&tuple)))
877 elog(WARNING, "relation \"%s\" TID %u/%u: OID is invalid",
878 relname, blkno, offnum);
881 * Is the tuple definitely visible to all transactions?
883 * NB: Like with per-tuple hint bits, we can't set the
884 * PD_ALL_VISIBLE flag if the inserter committed
885 * asynchronously. See SetHintBits for more info. Check
886 * that the tuple is hinted xmin-committed because of
893 if (!HeapTupleHeaderXminCommitted(tuple.t_data))
900 * The inserter definitely committed. But is it old
901 * enough that everyone sees it as committed?
903 xmin = HeapTupleHeaderGetXmin(tuple.t_data);
904 if (!TransactionIdPrecedes(xmin, OldestXmin))
910 /* Track newest xmin on page. */
911 if (TransactionIdFollows(xmin, visibility_cutoff_xid))
912 visibility_cutoff_xid = xmin;
915 case HEAPTUPLE_RECENTLY_DEAD:
918 * If tuple is recently deleted then we must not remove it
924 case HEAPTUPLE_INSERT_IN_PROGRESS:
925 /* This is an expected case during concurrent vacuum */
928 case HEAPTUPLE_DELETE_IN_PROGRESS:
929 /* This is an expected case during concurrent vacuum */
933 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
939 lazy_record_dead_tuple(vacrelstats, &(tuple.t_self));
940 HeapTupleHeaderAdvanceLatestRemovedXid(tuple.t_data,
941 &vacrelstats->latestRemovedXid);
943 has_dead_tuples = true;
951 * Each non-removable tuple must be checked to see if it needs
952 * freezing. Note we already have exclusive buffer lock.
954 if (heap_prepare_freeze_tuple(tuple.t_data, FreezeLimit,
955 MultiXactCutoff, &frozen[nfrozen]))
956 frozen[nfrozen++].offset = offnum;
958 } /* scan along page */
961 * If we froze any tuples, mark the buffer dirty, and write a WAL
962 * record recording the changes. We must log the changes to be
963 * crash-safe against future truncation of CLOG.
967 START_CRIT_SECTION();
969 MarkBufferDirty(buf);
971 /* execute collected freezes */
972 for (i = 0; i < nfrozen; i++)
975 HeapTupleHeader htup;
977 itemid = PageGetItemId(page, frozen[i].offset);
978 htup = (HeapTupleHeader) PageGetItem(page, itemid);
980 heap_execute_freeze_tuple(htup, &frozen[i]);
983 /* Now WAL-log freezing if necessary */
984 if (RelationNeedsWAL(onerel))
988 recptr = log_heap_freeze(onerel, buf, FreezeLimit,
990 PageSetLSN(page, recptr);
997 * If there are no indexes then we can vacuum the page right now
998 * instead of doing a second scan.
1000 if (nindexes == 0 &&
1001 vacrelstats->num_dead_tuples > 0)
1003 /* Remove tuples from heap */
1004 lazy_vacuum_page(onerel, blkno, buf, 0, vacrelstats, &vmbuffer);
1005 has_dead_tuples = false;
1008 * Forget the now-vacuumed tuples, and press on, but be careful
1009 * not to reset latestRemovedXid since we want that value to be
1012 vacrelstats->num_dead_tuples = 0;
1016 freespace = PageGetHeapFreeSpace(page);
1018 /* mark page all-visible, if appropriate */
1019 if (all_visible && !all_visible_according_to_vm)
1022 * It should never be the case that the visibility map page is set
1023 * while the page-level bit is clear, but the reverse is allowed
1024 * (if checksums are not enabled). Regardless, set the both bits
1025 * so that we get back in sync.
1027 * NB: If the heap page is all-visible but the VM bit is not set,
1028 * we don't need to dirty the heap page. However, if checksums
1029 * are enabled, we do need to make sure that the heap page is
1030 * dirtied before passing it to visibilitymap_set(), because it
1031 * may be logged. Given that this situation should only happen in
1032 * rare cases after a crash, it is not worth optimizing.
1034 PageSetAllVisible(page);
1035 MarkBufferDirty(buf);
1036 visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr,
1037 vmbuffer, visibility_cutoff_xid);
1041 * As of PostgreSQL 9.2, the visibility map bit should never be set if
1042 * the page-level bit is clear. However, it's possible that the bit
1043 * got cleared after we checked it and before we took the buffer
1044 * content lock, so we must recheck before jumping to the conclusion
1045 * that something bad has happened.
1047 else if (all_visible_according_to_vm && !PageIsAllVisible(page)
1048 && visibilitymap_test(onerel, blkno, &vmbuffer))
1050 elog(WARNING, "page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u",
1052 visibilitymap_clear(onerel, blkno, vmbuffer);
1056 * It's possible for the value returned by GetOldestXmin() to move
1057 * backwards, so it's not wrong for us to see tuples that appear to
1058 * not be visible to everyone yet, while PD_ALL_VISIBLE is already
1059 * set. The real safe xmin value never moves backwards, but
1060 * GetOldestXmin() is conservative and sometimes returns a value
1061 * that's unnecessarily small, so if we see that contradiction it just
1062 * means that the tuples that we think are not visible to everyone yet
1063 * actually are, and the PD_ALL_VISIBLE flag is correct.
1065 * There should never be dead tuples on a page with PD_ALL_VISIBLE
1068 else if (PageIsAllVisible(page) && has_dead_tuples)
1070 elog(WARNING, "page containing dead tuples is marked as all-visible in relation \"%s\" page %u",
1072 PageClearAllVisible(page);
1073 MarkBufferDirty(buf);
1074 visibilitymap_clear(onerel, blkno, vmbuffer);
1077 UnlockReleaseBuffer(buf);
1079 /* Remember the location of the last page with nonremovable tuples */
1081 vacrelstats->nonempty_pages = blkno + 1;
1084 * If we remembered any tuples for deletion, then the page will be
1085 * visited again by lazy_vacuum_heap, which will compute and record
1086 * its post-compaction free space. If not, then we're done with this
1087 * page, so remember its free space as-is. (This path will always be
1088 * taken if there are no indexes.)
1090 if (vacrelstats->num_dead_tuples == prev_dead_count)
1091 RecordPageWithFreeSpace(onerel, blkno, freespace);
1096 /* save stats for use later */
1097 vacrelstats->scanned_tuples = num_tuples;
1098 vacrelstats->tuples_deleted = tups_vacuumed;
1099 vacrelstats->new_dead_tuples = nkeep;
1101 /* now we can compute the new value for pg_class.reltuples */
1102 vacrelstats->new_rel_tuples = vac_estimate_reltuples(onerel, false,
1104 vacrelstats->scanned_pages,
1108 * Release any remaining pin on visibility map page.
1110 if (BufferIsValid(vmbuffer))
1112 ReleaseBuffer(vmbuffer);
1113 vmbuffer = InvalidBuffer;
1116 /* If any tuples need to be deleted, perform final vacuum cycle */
1117 /* XXX put a threshold on min number of tuples here? */
1118 if (vacrelstats->num_dead_tuples > 0)
1120 /* Log cleanup info before we touch indexes */
1121 vacuum_log_cleanup_info(onerel, vacrelstats);
1123 /* Remove index entries */
1124 for (i = 0; i < nindexes; i++)
1125 lazy_vacuum_index(Irel[i],
1128 /* Remove tuples from heap */
1129 lazy_vacuum_heap(onerel, vacrelstats);
1130 vacrelstats->num_index_scans++;
1133 /* Do post-vacuum cleanup and statistics update for each index */
1134 for (i = 0; i < nindexes; i++)
1135 lazy_cleanup_index(Irel[i], indstats[i], vacrelstats);
1137 /* If no indexes, make log report that lazy_vacuum_heap would've made */
1140 (errmsg("\"%s\": removed %.0f row versions in %u pages",
1141 RelationGetRelationName(onerel),
1142 tups_vacuumed, vacuumed_pages)));
1145 * This is pretty messy, but we split it up so that we can skip emitting
1146 * individual parts of the message when not applicable.
1148 initStringInfo(&buf);
1149 appendStringInfo(&buf,
1150 _("%.0f dead row versions cannot be removed yet.\n"),
1152 appendStringInfo(&buf, _("There were %.0f unused item pointers.\n"),
1154 appendStringInfo(&buf, ngettext("Skipped %u page due to buffer pins.\n",
1155 "Skipped %u pages due to buffer pins.\n",
1156 vacrelstats->pinskipped_pages),
1157 vacrelstats->pinskipped_pages);
1158 appendStringInfo(&buf, ngettext("%u page is entirely empty.\n",
1159 "%u pages are entirely empty.\n",
1162 appendStringInfo(&buf, _("%s."),
1163 pg_rusage_show(&ru0));
1166 (errmsg("\"%s\": found %.0f removable, %.0f nonremovable row versions in %u out of %u pages",
1167 RelationGetRelationName(onerel),
1168 tups_vacuumed, num_tuples,
1169 vacrelstats->scanned_pages, nblocks),
1170 errdetail_internal("%s", buf.data)));
1176 * lazy_vacuum_heap() -- second pass over the heap
1178 * This routine marks dead tuples as unused and compacts out free
1179 * space on their pages. Pages not having dead tuples recorded from
1180 * lazy_scan_heap are not visited at all.
1182 * Note: the reason for doing this as a second pass is we cannot remove
1183 * the tuples until we've removed their index entries, and we want to
1184 * process index entry removal in batches as large as possible.
1187 lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats)
1192 Buffer vmbuffer = InvalidBuffer;
1194 pg_rusage_init(&ru0);
1198 while (tupindex < vacrelstats->num_dead_tuples)
1205 vacuum_delay_point();
1207 tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples[tupindex]);
1208 buf = ReadBufferExtended(onerel, MAIN_FORKNUM, tblk, RBM_NORMAL,
1210 if (!ConditionalLockBufferForCleanup(buf))
1216 tupindex = lazy_vacuum_page(onerel, tblk, buf, tupindex, vacrelstats,
1219 /* Now that we've compacted the page, record its available space */
1220 page = BufferGetPage(buf);
1221 freespace = PageGetHeapFreeSpace(page);
1223 UnlockReleaseBuffer(buf);
1224 RecordPageWithFreeSpace(onerel, tblk, freespace);
1228 if (BufferIsValid(vmbuffer))
1230 ReleaseBuffer(vmbuffer);
1231 vmbuffer = InvalidBuffer;
1235 (errmsg("\"%s\": removed %d row versions in %d pages",
1236 RelationGetRelationName(onerel),
1239 pg_rusage_show(&ru0))));
1243 * lazy_vacuum_page() -- free dead tuples on a page
1244 * and repair its fragmentation.
1246 * Caller must hold pin and buffer cleanup lock on the buffer.
1248 * tupindex is the index in vacrelstats->dead_tuples of the first dead
1249 * tuple for this page. We assume the rest follow sequentially.
1250 * The return value is the first tupindex after the tuples of this page.
1253 lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
1254 int tupindex, LVRelStats *vacrelstats, Buffer *vmbuffer)
1256 Page page = BufferGetPage(buffer);
1257 OffsetNumber unused[MaxOffsetNumber];
1259 TransactionId visibility_cutoff_xid;
1261 START_CRIT_SECTION();
1263 for (; tupindex < vacrelstats->num_dead_tuples; tupindex++)
1269 tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples[tupindex]);
1271 break; /* past end of tuples for this block */
1272 toff = ItemPointerGetOffsetNumber(&vacrelstats->dead_tuples[tupindex]);
1273 itemid = PageGetItemId(page, toff);
1274 ItemIdSetUnused(itemid);
1275 unused[uncnt++] = toff;
1278 PageRepairFragmentation(page);
1281 * Mark buffer dirty before we write WAL.
1283 MarkBufferDirty(buffer);
1286 if (RelationNeedsWAL(onerel))
1290 recptr = log_heap_clean(onerel, buffer,
1293 vacrelstats->latestRemovedXid);
1294 PageSetLSN(page, recptr);
1298 * End critical section, so we safely can do visibility tests (which
1299 * possibly need to perform IO and allocate memory!). If we crash now the
1300 * page (including the corresponding vm bit) might not be marked all
1301 * visible, but that's fine. A later vacuum will fix that.
1306 * Now that we have removed the dead tuples from the page, once again
1307 * check if the page has become all-visible. The page is already marked
1308 * dirty, exclusively locked, and, if needed, a full page image has been
1309 * emitted in the log_heap_clean() above.
1311 if (heap_page_is_all_visible(onerel, buffer, &visibility_cutoff_xid))
1312 PageSetAllVisible(page);
1315 * All the changes to the heap page have been done. If the all-visible
1316 * flag is now set, also set the VM bit.
1318 if (PageIsAllVisible(page) &&
1319 !visibilitymap_test(onerel, blkno, vmbuffer))
1321 Assert(BufferIsValid(*vmbuffer));
1322 visibilitymap_set(onerel, blkno, buffer, InvalidXLogRecPtr, *vmbuffer,
1323 visibility_cutoff_xid);
1330 * lazy_check_needs_freeze() -- scan page to see if any tuples
1331 * need to be cleaned to avoid wraparound
1333 * Returns true if the page needs to be vacuumed using cleanup lock.
1334 * Also returns a flag indicating whether page contains any tuples at all.
1337 lazy_check_needs_freeze(Buffer buf, bool *hastup)
1339 Page page = BufferGetPage(buf);
1340 OffsetNumber offnum,
1342 HeapTupleHeader tupleheader;
1346 /* If we hit an uninitialized page, we want to force vacuuming it. */
1347 if (PageIsNew(page))
1350 /* Quick out for ordinary empty page. */
1351 if (PageIsEmpty(page))
1354 maxoff = PageGetMaxOffsetNumber(page);
1355 for (offnum = FirstOffsetNumber;
1357 offnum = OffsetNumberNext(offnum))
1361 itemid = PageGetItemId(page, offnum);
1363 /* this should match hastup test in count_nondeletable_pages() */
1364 if (ItemIdIsUsed(itemid))
1367 /* dead and redirect items never need freezing */
1368 if (!ItemIdIsNormal(itemid))
1371 tupleheader = (HeapTupleHeader) PageGetItem(page, itemid);
1373 if (heap_tuple_needs_freeze(tupleheader, FreezeLimit,
1374 MultiXactCutoff, buf))
1376 } /* scan along page */
1383 * lazy_vacuum_index() -- vacuum one index relation.
1385 * Delete all the index entries pointing to tuples listed in
1386 * vacrelstats->dead_tuples, and update running statistics.
1389 lazy_vacuum_index(Relation indrel,
1390 IndexBulkDeleteResult **stats,
1391 LVRelStats *vacrelstats)
1393 IndexVacuumInfo ivinfo;
1396 pg_rusage_init(&ru0);
1398 ivinfo.index = indrel;
1399 ivinfo.analyze_only = false;
1400 ivinfo.estimated_count = true;
1401 ivinfo.message_level = elevel;
1402 ivinfo.num_heap_tuples = vacrelstats->old_rel_tuples;
1403 ivinfo.strategy = vac_strategy;
1405 /* Do bulk deletion */
1406 *stats = index_bulk_delete(&ivinfo, *stats,
1407 lazy_tid_reaped, (void *) vacrelstats);
1410 (errmsg("scanned index \"%s\" to remove %d row versions",
1411 RelationGetRelationName(indrel),
1412 vacrelstats->num_dead_tuples),
1413 errdetail("%s.", pg_rusage_show(&ru0))));
1417 * lazy_cleanup_index() -- do post-vacuum cleanup for one index relation.
1420 lazy_cleanup_index(Relation indrel,
1421 IndexBulkDeleteResult *stats,
1422 LVRelStats *vacrelstats)
1424 IndexVacuumInfo ivinfo;
1427 pg_rusage_init(&ru0);
1429 ivinfo.index = indrel;
1430 ivinfo.analyze_only = false;
1431 ivinfo.estimated_count = (vacrelstats->scanned_pages < vacrelstats->rel_pages);
1432 ivinfo.message_level = elevel;
1433 ivinfo.num_heap_tuples = vacrelstats->new_rel_tuples;
1434 ivinfo.strategy = vac_strategy;
1436 stats = index_vacuum_cleanup(&ivinfo, stats);
1442 * Now update statistics in pg_class, but only if the index says the count
1445 if (!stats->estimated_count)
1446 vac_update_relstats(indrel,
1448 stats->num_index_tuples,
1451 InvalidTransactionId,
1456 (errmsg("index \"%s\" now contains %.0f row versions in %u pages",
1457 RelationGetRelationName(indrel),
1458 stats->num_index_tuples,
1460 errdetail("%.0f index row versions were removed.\n"
1461 "%u index pages have been deleted, %u are currently reusable.\n"
1463 stats->tuples_removed,
1464 stats->pages_deleted, stats->pages_free,
1465 pg_rusage_show(&ru0))));
1471 * should_attempt_truncation - should we attempt to truncate the heap?
1473 * Don't even think about it unless we have a shot at releasing a goodly
1474 * number of pages. Otherwise, the time taken isn't worth it.
1476 * This is split out so that we can test whether truncation is going to be
1477 * called for before we actually do it. If you change the logic here, be
1478 * careful to depend only on fields that lazy_scan_heap updates on-the-fly.
1481 should_attempt_truncation(LVRelStats *vacrelstats)
1483 BlockNumber possibly_freeable;
1485 possibly_freeable = vacrelstats->rel_pages - vacrelstats->nonempty_pages;
1486 if (possibly_freeable > 0 &&
1487 (possibly_freeable >= REL_TRUNCATE_MINIMUM ||
1488 possibly_freeable >= vacrelstats->rel_pages / REL_TRUNCATE_FRACTION))
1495 * lazy_truncate_heap - try to truncate off any empty pages at the end
1498 lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats)
1500 BlockNumber old_rel_pages = vacrelstats->rel_pages;
1501 BlockNumber new_rel_pages;
1505 pg_rusage_init(&ru0);
1508 * Loop until no more truncating can be done.
1513 * We need full exclusive lock on the relation in order to do
1514 * truncation. If we can't get it, give up rather than waiting --- we
1515 * don't want to block other backends, and we don't want to deadlock
1516 * (which is quite possible considering we already hold a lower-grade
1519 vacrelstats->lock_waiter_detected = false;
1523 if (ConditionalLockRelation(onerel, AccessExclusiveLock))
1527 * Check for interrupts while trying to (re-)acquire the exclusive
1530 CHECK_FOR_INTERRUPTS();
1532 if (++lock_retry > (VACUUM_TRUNCATE_LOCK_TIMEOUT /
1533 VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL))
1536 * We failed to establish the lock in the specified number of
1537 * retries. This means we give up truncating.
1539 vacrelstats->lock_waiter_detected = true;
1541 (errmsg("\"%s\": stopping truncate due to conflicting lock request",
1542 RelationGetRelationName(onerel))));
1546 pg_usleep(VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL);
1550 * Now that we have exclusive lock, look to see if the rel has grown
1551 * whilst we were vacuuming with non-exclusive lock. If so, give up;
1552 * the newly added pages presumably contain non-deletable tuples.
1554 new_rel_pages = RelationGetNumberOfBlocks(onerel);
1555 if (new_rel_pages != old_rel_pages)
1558 * Note: we intentionally don't update vacrelstats->rel_pages with
1559 * the new rel size here. If we did, it would amount to assuming
1560 * that the new pages are empty, which is unlikely. Leaving the
1561 * numbers alone amounts to assuming that the new pages have the
1562 * same tuple density as existing ones, which is less unlikely.
1564 UnlockRelation(onerel, AccessExclusiveLock);
1569 * Scan backwards from the end to verify that the end pages actually
1570 * contain no tuples. This is *necessary*, not optional, because
1571 * other backends could have added tuples to these pages whilst we
1574 new_rel_pages = count_nondeletable_pages(onerel, vacrelstats);
1576 if (new_rel_pages >= old_rel_pages)
1578 /* can't do anything after all */
1579 UnlockRelation(onerel, AccessExclusiveLock);
1586 RelationTruncate(onerel, new_rel_pages);
1589 * We can release the exclusive lock as soon as we have truncated.
1590 * Other backends can't safely access the relation until they have
1591 * processed the smgr invalidation that smgrtruncate sent out ... but
1592 * that should happen as part of standard invalidation processing once
1593 * they acquire lock on the relation.
1595 UnlockRelation(onerel, AccessExclusiveLock);
1598 * Update statistics. Here, it *is* correct to adjust rel_pages
1599 * without also touching reltuples, since the tuple count wasn't
1600 * changed by the truncation.
1602 vacrelstats->pages_removed += old_rel_pages - new_rel_pages;
1603 vacrelstats->rel_pages = new_rel_pages;
1606 (errmsg("\"%s\": truncated %u to %u pages",
1607 RelationGetRelationName(onerel),
1608 old_rel_pages, new_rel_pages),
1610 pg_rusage_show(&ru0))));
1611 old_rel_pages = new_rel_pages;
1612 } while (new_rel_pages > vacrelstats->nonempty_pages &&
1613 vacrelstats->lock_waiter_detected);
1617 * Rescan end pages to verify that they are (still) empty of tuples.
1619 * Returns number of nondeletable pages (last nonempty page + 1).
1622 count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats)
1625 instr_time starttime;
1627 /* Initialize the starttime if we check for conflicting lock requests */
1628 INSTR_TIME_SET_CURRENT(starttime);
1630 /* Strange coding of loop control is needed because blkno is unsigned */
1631 blkno = vacrelstats->rel_pages;
1632 while (blkno > vacrelstats->nonempty_pages)
1636 OffsetNumber offnum,
1641 * Check if another process requests a lock on our relation. We are
1642 * holding an AccessExclusiveLock here, so they will be waiting. We
1643 * only do this once per VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL, and we
1644 * only check if that interval has elapsed once every 32 blocks to
1645 * keep the number of system calls and actual shared lock table
1646 * lookups to a minimum.
1648 if ((blkno % 32) == 0)
1650 instr_time currenttime;
1653 INSTR_TIME_SET_CURRENT(currenttime);
1654 elapsed = currenttime;
1655 INSTR_TIME_SUBTRACT(elapsed, starttime);
1656 if ((INSTR_TIME_GET_MICROSEC(elapsed) / 1000)
1657 >= VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL)
1659 if (LockHasWaitersRelation(onerel, AccessExclusiveLock))
1662 (errmsg("\"%s\": suspending truncate due to conflicting lock request",
1663 RelationGetRelationName(onerel))));
1665 vacrelstats->lock_waiter_detected = true;
1668 starttime = currenttime;
1673 * We don't insert a vacuum delay point here, because we have an
1674 * exclusive lock on the table which we want to hold for as short a
1675 * time as possible. We still need to check for interrupts however.
1677 CHECK_FOR_INTERRUPTS();
1681 buf = ReadBufferExtended(onerel, MAIN_FORKNUM, blkno,
1682 RBM_NORMAL, vac_strategy);
1684 /* In this phase we only need shared access to the buffer */
1685 LockBuffer(buf, BUFFER_LOCK_SHARE);
1687 page = BufferGetPage(buf);
1689 if (PageIsNew(page) || PageIsEmpty(page))
1691 /* PageIsNew probably shouldn't happen... */
1692 UnlockReleaseBuffer(buf);
1697 maxoff = PageGetMaxOffsetNumber(page);
1698 for (offnum = FirstOffsetNumber;
1700 offnum = OffsetNumberNext(offnum))
1704 itemid = PageGetItemId(page, offnum);
1707 * Note: any non-unused item should be taken as a reason to keep
1708 * this page. We formerly thought that DEAD tuples could be
1709 * thrown away, but that's not so, because we'd not have cleaned
1710 * out their index entries.
1712 if (ItemIdIsUsed(itemid))
1715 break; /* can stop scanning */
1717 } /* scan along page */
1719 UnlockReleaseBuffer(buf);
1721 /* Done scanning if we found a tuple here */
1727 * If we fall out of the loop, all the previously-thought-to-be-empty
1728 * pages still are; we need not bother to look at the last known-nonempty
1731 return vacrelstats->nonempty_pages;
1735 * lazy_space_alloc - space allocation decisions for lazy vacuum
1737 * See the comments at the head of this file for rationale.
1740 lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks)
1743 int vac_work_mem = IsAutoVacuumWorkerProcess() &&
1744 autovacuum_work_mem != -1 ?
1745 autovacuum_work_mem : maintenance_work_mem;
1747 if (vacrelstats->hasindex)
1749 maxtuples = (vac_work_mem * 1024L) / sizeof(ItemPointerData);
1750 maxtuples = Min(maxtuples, INT_MAX);
1751 maxtuples = Min(maxtuples, MaxAllocSize / sizeof(ItemPointerData));
1753 /* curious coding here to ensure the multiplication can't overflow */
1754 if ((BlockNumber) (maxtuples / LAZY_ALLOC_TUPLES) > relblocks)
1755 maxtuples = relblocks * LAZY_ALLOC_TUPLES;
1757 /* stay sane if small maintenance_work_mem */
1758 maxtuples = Max(maxtuples, MaxHeapTuplesPerPage);
1762 maxtuples = MaxHeapTuplesPerPage;
1765 vacrelstats->num_dead_tuples = 0;
1766 vacrelstats->max_dead_tuples = (int) maxtuples;
1767 vacrelstats->dead_tuples = (ItemPointer)
1768 palloc(maxtuples * sizeof(ItemPointerData));
1772 * lazy_record_dead_tuple - remember one deletable tuple
1775 lazy_record_dead_tuple(LVRelStats *vacrelstats,
1776 ItemPointer itemptr)
1779 * The array shouldn't overflow under normal behavior, but perhaps it
1780 * could if we are given a really small maintenance_work_mem. In that
1781 * case, just forget the last few tuples (we'll get 'em next time).
1783 if (vacrelstats->num_dead_tuples < vacrelstats->max_dead_tuples)
1785 vacrelstats->dead_tuples[vacrelstats->num_dead_tuples] = *itemptr;
1786 vacrelstats->num_dead_tuples++;
1791 * lazy_tid_reaped() -- is a particular tid deletable?
1793 * This has the right signature to be an IndexBulkDeleteCallback.
1795 * Assumes dead_tuples array is in sorted order.
1798 lazy_tid_reaped(ItemPointer itemptr, void *state)
1800 LVRelStats *vacrelstats = (LVRelStats *) state;
1803 res = (ItemPointer) bsearch((void *) itemptr,
1804 (void *) vacrelstats->dead_tuples,
1805 vacrelstats->num_dead_tuples,
1806 sizeof(ItemPointerData),
1809 return (res != NULL);
1813 * Comparator routines for use with qsort() and bsearch().
1816 vac_cmp_itemptr(const void *left, const void *right)
1823 lblk = ItemPointerGetBlockNumber((ItemPointer) left);
1824 rblk = ItemPointerGetBlockNumber((ItemPointer) right);
1831 loff = ItemPointerGetOffsetNumber((ItemPointer) left);
1832 roff = ItemPointerGetOffsetNumber((ItemPointer) right);
1843 * Check if every tuple in the given page is visible to all current and future
1844 * transactions. Also return the visibility_cutoff_xid which is the highest
1845 * xmin amongst the visible tuples.
1848 heap_page_is_all_visible(Relation rel, Buffer buf, TransactionId *visibility_cutoff_xid)
1850 Page page = BufferGetPage(buf);
1851 BlockNumber blockno = BufferGetBlockNumber(buf);
1852 OffsetNumber offnum,
1854 bool all_visible = true;
1856 *visibility_cutoff_xid = InvalidTransactionId;
1859 * This is a stripped down version of the line pointer scan in
1860 * lazy_scan_heap(). So if you change anything here, also check that code.
1862 maxoff = PageGetMaxOffsetNumber(page);
1863 for (offnum = FirstOffsetNumber;
1864 offnum <= maxoff && all_visible;
1865 offnum = OffsetNumberNext(offnum))
1868 HeapTupleData tuple;
1870 itemid = PageGetItemId(page, offnum);
1872 /* Unused or redirect line pointers are of no interest */
1873 if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid))
1876 ItemPointerSet(&(tuple.t_self), blockno, offnum);
1879 * Dead line pointers can have index pointers pointing to them. So
1880 * they can't be treated as visible
1882 if (ItemIdIsDead(itemid))
1884 all_visible = false;
1888 Assert(ItemIdIsNormal(itemid));
1890 tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
1891 tuple.t_len = ItemIdGetLength(itemid);
1892 tuple.t_tableOid = RelationGetRelid(rel);
1894 switch (HeapTupleSatisfiesVacuum(&tuple, OldestXmin, buf))
1896 case HEAPTUPLE_LIVE:
1900 /* Check comments in lazy_scan_heap. */
1901 if (!HeapTupleHeaderXminCommitted(tuple.t_data))
1903 all_visible = false;
1908 * The inserter definitely committed. But is it old enough
1909 * that everyone sees it as committed?
1911 xmin = HeapTupleHeaderGetXmin(tuple.t_data);
1912 if (!TransactionIdPrecedes(xmin, OldestXmin))
1914 all_visible = false;
1918 /* Track newest xmin on page. */
1919 if (TransactionIdFollows(xmin, *visibility_cutoff_xid))
1920 *visibility_cutoff_xid = xmin;
1924 case HEAPTUPLE_DEAD:
1925 case HEAPTUPLE_RECENTLY_DEAD:
1926 case HEAPTUPLE_INSERT_IN_PROGRESS:
1927 case HEAPTUPLE_DELETE_IN_PROGRESS:
1928 all_visible = false;
1932 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1935 } /* scan along page */