granicus.if.org Git - postgresql/blob - src/backend/commands/vacuumlazy.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * vacuumlazy.c
   4  *        Concurrent ("lazy") vacuuming.
   5  *
   6  *
   7  * The major space usage for LAZY VACUUM is storage for the array of dead
   8  * tuple TIDs, with the next biggest need being storage for per-disk-page
   9  * free space info.  We want to ensure we can vacuum even the very largest
  10  * relations with finite memory space usage.  To do that, we set upper bounds
  11  * on the number of tuples and pages we will keep track of at once.
  12  *
  13  * We are willing to use at most maintenance_work_mem (or perhaps
  14  * autovacuum_work_mem) memory space to keep track of dead tuples.  We
  15  * initially allocate an array of TIDs of that size, with an upper limit that
  16  * depends on table size (this limit ensures we don't allocate a huge area
  17  * uselessly for vacuuming small tables).  If the array threatens to overflow,
  18  * we suspend the heap scan phase and perform a pass of index cleanup and page
  19  * compaction, then resume the heap scan with an empty TID array.
  20  *
  21  * If we're processing a table with no indexes, we can just vacuum each page
  22  * as we go; there's no need to save up multiple tuples to minimize the number
  23  * of index scans performed.  So we don't use maintenance_work_mem memory for
  24  * the TID array, just enough to hold as many heap tuples as fit on one page.
  25  *
  26  *
  27  * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
  28  * Portions Copyright (c) 1994, Regents of the University of California
  29  *
  30  *
  31  * IDENTIFICATION
  32  *        src/backend/commands/vacuumlazy.c
  33  *
  34  *-------------------------------------------------------------------------
  35  */
  36 #include "postgres.h"
  37
  38 #include <math.h>
  39
  40 #include "access/genam.h"
  41 #include "access/heapam.h"
  42 #include "access/heapam_xlog.h"
  43 #include "access/htup_details.h"
  44 #include "access/multixact.h"
  45 #include "access/transam.h"
  46 #include "access/visibilitymap.h"
  47 #include "catalog/storage.h"
  48 #include "commands/dbcommands.h"
  49 #include "commands/vacuum.h"
  50 #include "miscadmin.h"
  51 #include "pgstat.h"
  52 #include "portability/instr_time.h"
  53 #include "postmaster/autovacuum.h"
  54 #include "storage/bufmgr.h"
  55 #include "storage/freespace.h"
  56 #include "storage/lmgr.h"
  57 #include "utils/lsyscache.h"
  58 #include "utils/memutils.h"
  59 #include "utils/pg_rusage.h"
  60 #include "utils/timestamp.h"
  61 #include "utils/tqual.h"
  62
  63
  64 /*
  65  * Space/time tradeoff parameters: do these need to be user-tunable?
  66  *
  67  * To consider truncating the relation, we want there to be at least
  68  * REL_TRUNCATE_MINIMUM or (relsize / REL_TRUNCATE_FRACTION) (whichever
  69  * is less) potentially-freeable pages.
  70  */
  71 #define REL_TRUNCATE_MINIMUM    1000
  72 #define REL_TRUNCATE_FRACTION   16
  73
  74 /*
  75  * Timing parameters for truncate locking heuristics.
  76  *
  77  * These were not exposed as user tunable GUC values because it didn't seem
  78  * that the potential for improvement was great enough to merit the cost of
  79  * supporting them.
  80  */
  81 #define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL             20              /* ms */
  82 #define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL              50              /* ms */
  83 #define VACUUM_TRUNCATE_LOCK_TIMEOUT                    5000    /* ms */
  84
  85 /*
  86  * Guesstimation of number of dead tuples per page.  This is used to
  87  * provide an upper limit to memory allocated when vacuuming small
  88  * tables.
  89  */
  90 #define LAZY_ALLOC_TUPLES               MaxHeapTuplesPerPage
  91
  92 /*
  93  * Before we consider skipping a page that's marked as clean in
  94  * visibility map, we must've seen at least this many clean pages.
  95  */
  96 #define SKIP_PAGES_THRESHOLD    ((BlockNumber) 32)
  97
  98 typedef struct LVRelStats
  99 {
 100         /* hasindex = true means two-pass strategy; false means one-pass */
 101         bool            hasindex;
 102         /* Overall statistics about rel */
 103         BlockNumber old_rel_pages;      /* previous value of pg_class.relpages */
 104         BlockNumber rel_pages;          /* total number of pages */
 105         BlockNumber scanned_pages;      /* number of pages we examined */
 106         double          scanned_tuples; /* counts only tuples on scanned pages */
 107         double          old_rel_tuples; /* previous value of pg_class.reltuples */
 108         double          new_rel_tuples; /* new estimated total # of tuples */
 109         double          new_dead_tuples;        /* new estimated total # of dead tuples */
 110         BlockNumber pages_removed;
 111         double          tuples_deleted;
 112         BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
 113         /* List of TIDs of tuples we intend to delete */
 114         /* NB: this list is ordered by TID address */
 115         int                     num_dead_tuples;        /* current # of entries */
 116         int                     max_dead_tuples;        /* # slots allocated in array */
 117         ItemPointer dead_tuples;        /* array of ItemPointerData */
 118         int                     num_index_scans;
 119         TransactionId latestRemovedXid;
 120         bool            lock_waiter_detected;
 121 } LVRelStats;
 122
 123
 124 /* A few variables that don't seem worth passing around as parameters */
 125 static int      elevel = -1;
 126
 127 static TransactionId OldestXmin;
 128 static TransactionId FreezeLimit;
 129 static MultiXactId MultiXactCutoff;
 130
 131 static BufferAccessStrategy vac_strategy;
 132
 133
 134 /* non-export function prototypes */
 135 static void lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
 136                            Relation *Irel, int nindexes, bool scan_all);
 137 static void lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats);
 138 static bool lazy_check_needs_freeze(Buffer buf);
 139 static void lazy_vacuum_index(Relation indrel,
 140                                   IndexBulkDeleteResult **stats,
 141                                   LVRelStats *vacrelstats);
 142 static void lazy_cleanup_index(Relation indrel,
 143                                    IndexBulkDeleteResult *stats,
 144                                    LVRelStats *vacrelstats);
 145 static int lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
 146                                  int tupindex, LVRelStats *vacrelstats, Buffer *vmbuffer);
 147 static void lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats);
 148 static BlockNumber count_nondeletable_pages(Relation onerel,
 149                                                  LVRelStats *vacrelstats);
 150 static void lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks);
 151 static void lazy_record_dead_tuple(LVRelStats *vacrelstats,
 152                                            ItemPointer itemptr);
 153 static bool lazy_tid_reaped(ItemPointer itemptr, void *state);
 154 static int      vac_cmp_itemptr(const void *left, const void *right);
 155 static bool heap_page_is_all_visible(Relation rel, Buffer buf,
 156                                                  TransactionId *visibility_cutoff_xid);
 157
 158
 159 /*
 160  *      lazy_vacuum_rel() -- perform LAZY VACUUM for one heap relation
 161  *
 162  *              This routine vacuums a single heap, cleans out its indexes, and
 163  *              updates its relpages and reltuples statistics.
 164  *
 165  *              At entry, we have already established a transaction and opened
 166  *              and locked the relation.
 167  */
 168 void
 169 lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
 170                                 BufferAccessStrategy bstrategy)
 171 {
 172         LVRelStats *vacrelstats;
 173         Relation   *Irel;
 174         int                     nindexes;
 175         BlockNumber possibly_freeable;
 176         PGRUsage        ru0;
 177         TimestampTz starttime = 0;
 178         long            secs;
 179         int                     usecs;
 180         double          read_rate,
 181                                 write_rate;
 182         bool            scan_all;               /* should we scan all pages? */
 183         bool            scanned_all;    /* did we actually scan all pages? */
 184         TransactionId xidFullScanLimit;
 185         MultiXactId mxactFullScanLimit;
 186         BlockNumber new_rel_pages;
 187         double          new_rel_tuples;
 188         BlockNumber new_rel_allvisible;
 189         double          new_live_tuples;
 190         TransactionId new_frozen_xid;
 191         MultiXactId new_min_multi;
 192
 193         /* measure elapsed time iff autovacuum logging requires it */
 194         if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration >= 0)
 195         {
 196                 pg_rusage_init(&ru0);
 197                 starttime = GetCurrentTimestamp();
 198         }
 199
 200         if (vacstmt->options & VACOPT_VERBOSE)
 201                 elevel = INFO;
 202         else
 203                 elevel = DEBUG2;
 204
 205         vac_strategy = bstrategy;
 206
 207         vacuum_set_xid_limits(vacstmt->freeze_min_age, vacstmt->freeze_table_age,
 208                                                   vacstmt->multixact_freeze_min_age,
 209                                                   vacstmt->multixact_freeze_table_age,
 210                                                   onerel->rd_rel->relisshared,
 211                                                   &OldestXmin, &FreezeLimit, &xidFullScanLimit,
 212                                                   &MultiXactCutoff, &mxactFullScanLimit);
 213
 214         /*
 215          * We request a full scan if either the table's frozen Xid is now older
 216          * than or equal to the requested Xid full-table scan limit; or if the
 217          * table's minimum MultiXactId is older than or equal to the requested
 218          * mxid full-table scan limit.
 219          */
 220         scan_all = TransactionIdPrecedesOrEquals(onerel->rd_rel->relfrozenxid,
 221                                                                                          xidFullScanLimit);
 222         scan_all |= MultiXactIdPrecedesOrEquals(onerel->rd_rel->relminmxid,
 223                                                                                         mxactFullScanLimit);
 224
 225         vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats));
 226
 227         vacrelstats->old_rel_pages = onerel->rd_rel->relpages;
 228         vacrelstats->old_rel_tuples = onerel->rd_rel->reltuples;
 229         vacrelstats->num_index_scans = 0;
 230         vacrelstats->pages_removed = 0;
 231         vacrelstats->lock_waiter_detected = false;
 232
 233         /* Open all indexes of the relation */
 234         vac_open_indexes(onerel, RowExclusiveLock, &nindexes, &Irel);
 235         vacrelstats->hasindex = (nindexes > 0);
 236
 237         /* Do the vacuuming */
 238         lazy_scan_heap(onerel, vacrelstats, Irel, nindexes, scan_all);
 239
 240         /* Done with indexes */
 241         vac_close_indexes(nindexes, Irel, NoLock);
 242
 243         /*
 244          * Compute whether we actually scanned the whole relation. If we did, we
 245          * can adjust relfrozenxid and relminmxid.
 246          *
 247          * NB: We need to check this before truncating the relation, because that
 248          * will change ->rel_pages.
 249          */
 250         if (vacrelstats->scanned_pages < vacrelstats->rel_pages)
 251         {
 252                 Assert(!scan_all);
 253                 scanned_all = false;
 254         }
 255         else
 256                 scanned_all = true;
 257
 258         /*
 259          * Optionally truncate the relation.
 260          *
 261          * Don't even think about it unless we have a shot at releasing a goodly
 262          * number of pages.  Otherwise, the time taken isn't worth it.
 263          */
 264         possibly_freeable = vacrelstats->rel_pages - vacrelstats->nonempty_pages;
 265         if (possibly_freeable > 0 &&
 266                 (possibly_freeable >= REL_TRUNCATE_MINIMUM ||
 267                  possibly_freeable >= vacrelstats->rel_pages / REL_TRUNCATE_FRACTION))
 268                 lazy_truncate_heap(onerel, vacrelstats);
 269
 270         /* Vacuum the Free Space Map */
 271         FreeSpaceMapVacuum(onerel);
 272
 273         /*
 274          * Update statistics in pg_class.
 275          *
 276          * A corner case here is that if we scanned no pages at all because every
 277          * page is all-visible, we should not update relpages/reltuples, because
 278          * we have no new information to contribute.  In particular this keeps us
 279          * from replacing relpages=reltuples=0 (which means "unknown tuple
 280          * density") with nonzero relpages and reltuples=0 (which means "zero
 281          * tuple density") unless there's some actual evidence for the latter.
 282          *
 283          * We do update relallvisible even in the corner case, since if the table
 284          * is all-visible we'd definitely like to know that.  But clamp the value
 285          * to be not more than what we're setting relpages to.
 286          *
 287          * Also, don't change relfrozenxid/relminmxid if we skipped any pages,
 288          * since then we don't know for certain that all tuples have a newer xmin.
 289          */
 290         new_rel_pages = vacrelstats->rel_pages;
 291         new_rel_tuples = vacrelstats->new_rel_tuples;
 292         if (vacrelstats->scanned_pages == 0 && new_rel_pages > 0)
 293         {
 294                 new_rel_pages = vacrelstats->old_rel_pages;
 295                 new_rel_tuples = vacrelstats->old_rel_tuples;
 296         }
 297
 298         new_rel_allvisible = visibilitymap_count(onerel);
 299         if (new_rel_allvisible > new_rel_pages)
 300                 new_rel_allvisible = new_rel_pages;
 301
 302         new_frozen_xid = scanned_all ? FreezeLimit : InvalidTransactionId;
 303         new_min_multi = scanned_all ? MultiXactCutoff : InvalidMultiXactId;
 304
 305         vac_update_relstats(onerel,
 306                                                 new_rel_pages,
 307                                                 new_rel_tuples,
 308                                                 new_rel_allvisible,
 309                                                 vacrelstats->hasindex,
 310                                                 new_frozen_xid,
 311                                                 new_min_multi);
 312
 313         /* report results to the stats collector, too */
 314         new_live_tuples = new_rel_tuples - vacrelstats->new_dead_tuples;
 315         if (new_live_tuples < 0)
 316                 new_live_tuples = 0;    /* just in case */
 317
 318         pgstat_report_vacuum(RelationGetRelid(onerel),
 319                                                  onerel->rd_rel->relisshared,
 320                                                  new_live_tuples,
 321                                                  vacrelstats->new_dead_tuples);
 322
 323         /* and log the action if appropriate */
 324         if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration >= 0)
 325         {
 326                 TimestampTz endtime = GetCurrentTimestamp();
 327
 328                 if (Log_autovacuum_min_duration == 0 ||
 329                         TimestampDifferenceExceeds(starttime, endtime,
 330                                                                            Log_autovacuum_min_duration))
 331                 {
 332                         TimestampDifference(starttime, endtime, &secs, &usecs);
 333
 334                         read_rate = 0;
 335                         write_rate = 0;
 336                         if ((secs > 0) || (usecs > 0))
 337                         {
 338                                 read_rate = (double) BLCKSZ *VacuumPageMiss / (1024 * 1024) /
 339                                                         (secs + usecs / 1000000.0);
 340                                 write_rate = (double) BLCKSZ *VacuumPageDirty / (1024 * 1024) /
 341                                                         (secs + usecs / 1000000.0);
 342                         }
 343                         ereport(LOG,
 344                                         (errmsg("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n"
 345                                                         "pages: %d removed, %d remain\n"
 346                                                         "tuples: %.0f removed, %.0f remain, %.0f are dead but not yet removable\n"
 347                                                         "buffer usage: %d hits, %d misses, %d dirtied\n"
 348                                           "avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"
 349                                                         "system usage: %s",
 350                                                         get_database_name(MyDatabaseId),
 351                                                         get_namespace_name(RelationGetNamespace(onerel)),
 352                                                         RelationGetRelationName(onerel),
 353                                                         vacrelstats->num_index_scans,
 354                                                         vacrelstats->pages_removed,
 355                                                         vacrelstats->rel_pages,
 356                                                         vacrelstats->tuples_deleted,
 357                                                         vacrelstats->new_rel_tuples,
 358                                                         vacrelstats->new_dead_tuples,
 359                                                         VacuumPageHit,
 360                                                         VacuumPageMiss,
 361                                                         VacuumPageDirty,
 362                                                         read_rate, write_rate,
 363                                                         pg_rusage_show(&ru0))));
 364                 }
 365         }
 366 }
 367
 368 /*
 369  * For Hot Standby we need to know the highest transaction id that will
 370  * be removed by any change. VACUUM proceeds in a number of passes so
 371  * we need to consider how each pass operates. The first phase runs
 372  * heap_page_prune(), which can issue XLOG_HEAP2_CLEAN records as it
 373  * progresses - these will have a latestRemovedXid on each record.
 374  * In some cases this removes all of the tuples to be removed, though
 375  * often we have dead tuples with index pointers so we must remember them
 376  * for removal in phase 3. Index records for those rows are removed
 377  * in phase 2 and index blocks do not have MVCC information attached.
 378  * So before we can allow removal of any index tuples we need to issue
 379  * a WAL record containing the latestRemovedXid of rows that will be
 380  * removed in phase three. This allows recovery queries to block at the
 381  * correct place, i.e. before phase two, rather than during phase three
 382  * which would be after the rows have become inaccessible.
 383  */
 384 static void
 385 vacuum_log_cleanup_info(Relation rel, LVRelStats *vacrelstats)
 386 {
 387         /*
 388          * Skip this for relations for which no WAL is to be written, or if we're
 389          * not trying to support archive recovery.
 390          */
 391         if (!RelationNeedsWAL(rel) || !XLogIsNeeded())
 392                 return;
 393
 394         /*
 395          * No need to write the record at all unless it contains a valid value
 396          */
 397         if (TransactionIdIsValid(vacrelstats->latestRemovedXid))
 398                 (void) log_heap_cleanup_info(rel->rd_node, vacrelstats->latestRemovedXid);
 399 }
 400
 401 /*
 402  *      lazy_scan_heap() -- scan an open heap relation
 403  *
 404  *              This routine prunes each page in the heap, which will among other
 405  *              things truncate dead tuples to dead line pointers, defragment the
 406  *              page, and set commit status bits (see heap_page_prune).  It also builds
 407  *              lists of dead tuples and pages with free space, calculates statistics
 408  *              on the number of live tuples in the heap, and marks pages as
 409  *              all-visible if appropriate.  When done, or when we run low on space for
 410  *              dead-tuple TIDs, invoke vacuuming of indexes and call lazy_vacuum_heap
 411  *              to reclaim dead line pointers.
 412  *
 413  *              If there are no indexes then we can reclaim line pointers on the fly;
 414  *              dead line pointers need only be retained until all index pointers that
 415  *              reference them have been killed.
 416  */
 417 static void
 418 lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
 419                            Relation *Irel, int nindexes, bool scan_all)
 420 {
 421         BlockNumber nblocks,
 422                                 blkno;
 423         HeapTupleData tuple;
 424         char       *relname;
 425         BlockNumber empty_pages,
 426                                 vacuumed_pages;
 427         double          num_tuples,
 428                                 tups_vacuumed,
 429                                 nkeep,
 430                                 nunused;
 431         IndexBulkDeleteResult **indstats;
 432         int                     i;
 433         PGRUsage        ru0;
 434         Buffer          vmbuffer = InvalidBuffer;
 435         BlockNumber next_not_all_visible_block;
 436         bool            skipping_all_visible_blocks;
 437         xl_heap_freeze_tuple *frozen;
 438
 439         pg_rusage_init(&ru0);
 440
 441         relname = RelationGetRelationName(onerel);
 442         ereport(elevel,
 443                         (errmsg("vacuuming \"%s.%s\"",
 444                                         get_namespace_name(RelationGetNamespace(onerel)),
 445                                         relname)));
 446
 447         empty_pages = vacuumed_pages = 0;
 448         num_tuples = tups_vacuumed = nkeep = nunused = 0;
 449
 450         indstats = (IndexBulkDeleteResult **)
 451                 palloc0(nindexes * sizeof(IndexBulkDeleteResult *));
 452
 453         nblocks = RelationGetNumberOfBlocks(onerel);
 454         vacrelstats->rel_pages = nblocks;
 455         vacrelstats->scanned_pages = 0;
 456         vacrelstats->nonempty_pages = 0;
 457         vacrelstats->latestRemovedXid = InvalidTransactionId;
 458
 459         lazy_space_alloc(vacrelstats, nblocks);
 460         frozen = palloc(sizeof(xl_heap_freeze_tuple) * MaxHeapTuplesPerPage);
 461
 462         /*
 463          * We want to skip pages that don't require vacuuming according to the
 464          * visibility map, but only when we can skip at least SKIP_PAGES_THRESHOLD
 465          * consecutive pages.  Since we're reading sequentially, the OS should be
 466          * doing readahead for us, so there's no gain in skipping a page now and
 467          * then; that's likely to disable readahead and so be counterproductive.
 468          * Also, skipping even a single page means that we can't update
 469          * relfrozenxid, so we only want to do it if we can skip a goodly number
 470          * of pages.
 471          *
 472          * Before entering the main loop, establish the invariant that
 473          * next_not_all_visible_block is the next block number >= blkno that's not
 474          * all-visible according to the visibility map, or nblocks if there's no
 475          * such block.  Also, we set up the skipping_all_visible_blocks flag,
 476          * which is needed because we need hysteresis in the decision: once we've
 477          * started skipping blocks, we may as well skip everything up to the next
 478          * not-all-visible block.
 479          *
 480          * Note: if scan_all is true, we won't actually skip any pages; but we
 481          * maintain next_not_all_visible_block anyway, so as to set up the
 482          * all_visible_according_to_vm flag correctly for each page.
 483          *
 484          * Note: The value returned by visibilitymap_test could be slightly
 485          * out-of-date, since we make this test before reading the corresponding
 486          * heap page or locking the buffer.  This is OK.  If we mistakenly think
 487          * that the page is all-visible when in fact the flag's just been cleared,
 488          * we might fail to vacuum the page.  But it's OK to skip pages when
 489          * scan_all is not set, so no great harm done; the next vacuum will find
 490          * them.  If we make the reverse mistake and vacuum a page unnecessarily,
 491          * it'll just be a no-op.
 492          */
 493         for (next_not_all_visible_block = 0;
 494                  next_not_all_visible_block < nblocks;
 495                  next_not_all_visible_block++)
 496         {
 497                 if (!visibilitymap_test(onerel, next_not_all_visible_block, &vmbuffer))
 498                         break;
 499                 vacuum_delay_point();
 500         }
 501         if (next_not_all_visible_block >= SKIP_PAGES_THRESHOLD)
 502                 skipping_all_visible_blocks = true;
 503         else
 504                 skipping_all_visible_blocks = false;
 505
 506         for (blkno = 0; blkno < nblocks; blkno++)
 507         {
 508                 Buffer          buf;
 509                 Page            page;
 510                 OffsetNumber offnum,
 511                                         maxoff;
 512                 bool            tupgone,
 513                                         hastup;
 514                 int                     prev_dead_count;
 515                 int                     nfrozen;
 516                 Size            freespace;
 517                 bool            all_visible_according_to_vm;
 518                 bool            all_visible;
 519                 bool            has_dead_tuples;
 520                 TransactionId visibility_cutoff_xid = InvalidTransactionId;
 521
 522                 if (blkno == next_not_all_visible_block)
 523                 {
 524                         /* Time to advance next_not_all_visible_block */
 525                         for (next_not_all_visible_block++;
 526                                  next_not_all_visible_block < nblocks;
 527                                  next_not_all_visible_block++)
 528                         {
 529                                 if (!visibilitymap_test(onerel, next_not_all_visible_block,
 530                                                                                 &vmbuffer))
 531                                         break;
 532                                 vacuum_delay_point();
 533                         }
 534
 535                         /*
 536                          * We know we can't skip the current block.  But set up
 537                          * skipping_all_visible_blocks to do the right thing at the
 538                          * following blocks.
 539                          */
 540                         if (next_not_all_visible_block - blkno > SKIP_PAGES_THRESHOLD)
 541                                 skipping_all_visible_blocks = true;
 542                         else
 543                                 skipping_all_visible_blocks = false;
 544                         all_visible_according_to_vm = false;
 545                 }
 546                 else
 547                 {
 548                         /* Current block is all-visible */
 549                         if (skipping_all_visible_blocks && !scan_all)
 550                                 continue;
 551                         all_visible_according_to_vm = true;
 552                 }
 553
 554                 vacuum_delay_point();
 555
 556                 /*
 557                  * If we are close to overrunning the available space for dead-tuple
 558                  * TIDs, pause and do a cycle of vacuuming before we tackle this page.
 559                  */
 560                 if ((vacrelstats->max_dead_tuples - vacrelstats->num_dead_tuples) < MaxHeapTuplesPerPage &&
 561                         vacrelstats->num_dead_tuples > 0)
 562                 {
 563                         /*
 564                          * Before beginning index vacuuming, we release any pin we may
 565                          * hold on the visibility map page.  This isn't necessary for
 566                          * correctness, but we do it anyway to avoid holding the pin
 567                          * across a lengthy, unrelated operation.
 568                          */
 569                         if (BufferIsValid(vmbuffer))
 570                         {
 571                                 ReleaseBuffer(vmbuffer);
 572                                 vmbuffer = InvalidBuffer;
 573                         }
 574
 575                         /* Log cleanup info before we touch indexes */
 576                         vacuum_log_cleanup_info(onerel, vacrelstats);
 577
 578                         /* Remove index entries */
 579                         for (i = 0; i < nindexes; i++)
 580                                 lazy_vacuum_index(Irel[i],
 581                                                                   &indstats[i],
 582                                                                   vacrelstats);
 583                         /* Remove tuples from heap */
 584                         lazy_vacuum_heap(onerel, vacrelstats);
 585
 586                         /*
 587                          * Forget the now-vacuumed tuples, and press on, but be careful
 588                          * not to reset latestRemovedXid since we want that value to be
 589                          * valid.
 590                          */
 591                         vacrelstats->num_dead_tuples = 0;
 592                         vacrelstats->num_index_scans++;
 593                 }
 594
 595                 /*
 596                  * Pin the visibility map page in case we need to mark the page
 597                  * all-visible.  In most cases this will be very cheap, because we'll
 598                  * already have the correct page pinned anyway.  However, it's
 599                  * possible that (a) next_not_all_visible_block is covered by a
 600                  * different VM page than the current block or (b) we released our pin
 601                  * and did a cycle of index vacuuming.
 602                  */
 603                 visibilitymap_pin(onerel, blkno, &vmbuffer);
 604
 605                 buf = ReadBufferExtended(onerel, MAIN_FORKNUM, blkno,
 606                                                                  RBM_NORMAL, vac_strategy);
 607
 608                 /* We need buffer cleanup lock so that we can prune HOT chains. */
 609                 if (!ConditionalLockBufferForCleanup(buf))
 610                 {
 611                         /*
 612                          * If we're not scanning the whole relation to guard against XID
 613                          * wraparound, it's OK to skip vacuuming a page.  The next vacuum
 614                          * will clean it up.
 615                          */
 616                         if (!scan_all)
 617                         {
 618                                 ReleaseBuffer(buf);
 619                                 continue;
 620                         }
 621
 622                         /*
 623                          * If this is a wraparound checking vacuum, then we read the page
 624                          * with share lock to see if any xids need to be frozen. If the
 625                          * page doesn't need attention we just skip and continue. If it
 626                          * does, we wait for cleanup lock.
 627                          *
 628                          * We could defer the lock request further by remembering the page
 629                          * and coming back to it later, or we could even register
 630                          * ourselves for multiple buffers and then service whichever one
 631                          * is received first.  For now, this seems good enough.
 632                          */
 633                         LockBuffer(buf, BUFFER_LOCK_SHARE);
 634                         if (!lazy_check_needs_freeze(buf))
 635                         {
 636                                 UnlockReleaseBuffer(buf);
 637                                 vacrelstats->scanned_pages++;
 638                                 continue;
 639                         }
 640                         LockBuffer(buf, BUFFER_LOCK_UNLOCK);
 641                         LockBufferForCleanup(buf);
 642                         /* drop through to normal processing */
 643                 }
 644
 645                 vacrelstats->scanned_pages++;
 646
 647                 page = BufferGetPage(buf);
 648
 649                 if (PageIsNew(page))
 650                 {
 651                         /*
 652                          * An all-zeroes page could be left over if a backend extends the
 653                          * relation but crashes before initializing the page. Reclaim such
 654                          * pages for use.
 655                          *
 656                          * We have to be careful here because we could be looking at a
 657                          * page that someone has just added to the relation and not yet
 658                          * been able to initialize (see RelationGetBufferForTuple). To
 659                          * protect against that, release the buffer lock, grab the
 660                          * relation extension lock momentarily, and re-lock the buffer. If
 661                          * the page is still uninitialized by then, it must be left over
 662                          * from a crashed backend, and we can initialize it.
 663                          *
 664                          * We don't really need the relation lock when this is a new or
 665                          * temp relation, but it's probably not worth the code space to
 666                          * check that, since this surely isn't a critical path.
 667                          *
 668                          * Note: the comparable code in vacuum.c need not worry because
 669                          * it's got exclusive lock on the whole relation.
 670                          */
 671                         LockBuffer(buf, BUFFER_LOCK_UNLOCK);
 672                         LockRelationForExtension(onerel, ExclusiveLock);
 673                         UnlockRelationForExtension(onerel, ExclusiveLock);
 674                         LockBufferForCleanup(buf);
 675                         if (PageIsNew(page))
 676                         {
 677                                 ereport(WARNING,
 678                                 (errmsg("relation \"%s\" page %u is uninitialized --- fixing",
 679                                                 relname, blkno)));
 680                                 PageInit(page, BufferGetPageSize(buf), 0);
 681                                 empty_pages++;
 682                         }
 683                         freespace = PageGetHeapFreeSpace(page);
 684                         MarkBufferDirty(buf);
 685                         UnlockReleaseBuffer(buf);
 686
 687                         RecordPageWithFreeSpace(onerel, blkno, freespace);
 688                         continue;
 689                 }
 690
 691                 if (PageIsEmpty(page))
 692                 {
 693                         empty_pages++;
 694                         freespace = PageGetHeapFreeSpace(page);
 695
 696                         /* empty pages are always all-visible */
 697                         if (!PageIsAllVisible(page))
 698                         {
 699                                 START_CRIT_SECTION();
 700
 701                                 /* mark buffer dirty before writing a WAL record */
 702                                 MarkBufferDirty(buf);
 703
 704                                 /*
 705                                  * It's possible that another backend has extended the heap,
 706                                  * initialized the page, and then failed to WAL-log the page
 707                                  * due to an ERROR.  Since heap extension is not WAL-logged,
 708                                  * recovery might try to replay our record setting the
 709                                  * page all-visible and find that the page isn't initialized,
 710                                  * which will cause a PANIC.  To prevent that, check whether
 711                                  * the page has been previously WAL-logged, and if not, do that
 712                                  * now.
 713                                  */
 714                                 if (RelationNeedsWAL(onerel) &&
 715                                         PageGetLSN(page) == InvalidXLogRecPtr)
 716                                         log_newpage_buffer(buf, true);
 717
 718                                 PageSetAllVisible(page);
 719                                 visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr,
 720                                                                   vmbuffer, InvalidTransactionId);
 721                                 END_CRIT_SECTION();
 722                         }
 723
 724                         UnlockReleaseBuffer(buf);
 725                         RecordPageWithFreeSpace(onerel, blkno, freespace);
 726                         continue;
 727                 }
 728
 729                 /*
 730                  * Prune all HOT-update chains in this page.
 731                  *
 732                  * We count tuples removed by the pruning step as removed by VACUUM.
 733                  */
 734                 tups_vacuumed += heap_page_prune(onerel, buf, OldestXmin, false,
 735                                                                                  &vacrelstats->latestRemovedXid);
 736
 737                 /*
 738                  * Now scan the page to collect vacuumable items and check for tuples
 739                  * requiring freezing.
 740                  */
 741                 all_visible = true;
 742                 has_dead_tuples = false;
 743                 nfrozen = 0;
 744                 hastup = false;
 745                 prev_dead_count = vacrelstats->num_dead_tuples;
 746                 maxoff = PageGetMaxOffsetNumber(page);
 747
 748                 /*
 749                  * Note: If you change anything in the loop below, also look at
 750                  * heap_page_is_all_visible to see if that needs to be changed.
 751                  */
 752                 for (offnum = FirstOffsetNumber;
 753                          offnum <= maxoff;
 754                          offnum = OffsetNumberNext(offnum))
 755                 {
 756                         ItemId          itemid;
 757
 758                         itemid = PageGetItemId(page, offnum);
 759
 760                         /* Unused items require no processing, but we count 'em */
 761                         if (!ItemIdIsUsed(itemid))
 762                         {
 763                                 nunused += 1;
 764                                 continue;
 765                         }
 766
 767                         /* Redirect items mustn't be touched */
 768                         if (ItemIdIsRedirected(itemid))
 769                         {
 770                                 hastup = true;  /* this page won't be truncatable */
 771                                 continue;
 772                         }
 773
 774                         ItemPointerSet(&(tuple.t_self), blkno, offnum);
 775
 776                         /*
 777                          * DEAD item pointers are to be vacuumed normally; but we don't
 778                          * count them in tups_vacuumed, else we'd be double-counting (at
 779                          * least in the common case where heap_page_prune() just freed up
 780                          * a non-HOT tuple).
 781                          */
 782                         if (ItemIdIsDead(itemid))
 783                         {
 784                                 lazy_record_dead_tuple(vacrelstats, &(tuple.t_self));
 785                                 all_visible = false;
 786                                 continue;
 787                         }
 788
 789                         Assert(ItemIdIsNormal(itemid));
 790
 791                         tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
 792                         tuple.t_len = ItemIdGetLength(itemid);
 793                         tuple.t_tableOid = RelationGetRelid(onerel);
 794
 795                         tupgone = false;
 796
 797                         switch (HeapTupleSatisfiesVacuum(&tuple, OldestXmin, buf))
 798                         {
 799                                 case HEAPTUPLE_DEAD:
 800
 801                                         /*
 802                                          * Ordinarily, DEAD tuples would have been removed by
 803                                          * heap_page_prune(), but it's possible that the tuple
 804                                          * state changed since heap_page_prune() looked.  In
 805                                          * particular an INSERT_IN_PROGRESS tuple could have
 806                                          * changed to DEAD if the inserter aborted.  So this
 807                                          * cannot be considered an error condition.
 808                                          *
 809                                          * If the tuple is HOT-updated then it must only be
 810                                          * removed by a prune operation; so we keep it just as if
 811                                          * it were RECENTLY_DEAD.  Also, if it's a heap-only
 812                                          * tuple, we choose to keep it, because it'll be a lot
 813                                          * cheaper to get rid of it in the next pruning pass than
 814                                          * to treat it like an indexed tuple.
 815                                          */
 816                                         if (HeapTupleIsHotUpdated(&tuple) ||
 817                                                 HeapTupleIsHeapOnly(&tuple))
 818                                                 nkeep += 1;
 819                                         else
 820                                                 tupgone = true; /* we can delete the tuple */
 821                                         all_visible = false;
 822                                         break;
 823                                 case HEAPTUPLE_LIVE:
 824                                         /* Tuple is good --- but let's do some validity checks */
 825                                         if (onerel->rd_rel->relhasoids &&
 826                                                 !OidIsValid(HeapTupleGetOid(&tuple)))
 827                                                 elog(WARNING, "relation \"%s\" TID %u/%u: OID is invalid",
 828                                                          relname, blkno, offnum);
 829
 830                                         /*
 831                                          * Is the tuple definitely visible to all transactions?
 832                                          *
 833                                          * NB: Like with per-tuple hint bits, we can't set the
 834                                          * PD_ALL_VISIBLE flag if the inserter committed
 835                                          * asynchronously. See SetHintBits for more info. Check
 836                                          * that the tuple is hinted xmin-committed because
 837                                          * of that.
 838                                          */
 839                                         if (all_visible)
 840                                         {
 841                                                 TransactionId xmin;
 842
 843                                                 if (!HeapTupleHeaderXminCommitted(tuple.t_data))
 844                                                 {
 845                                                         all_visible = false;
 846                                                         break;
 847                                                 }
 848
 849                                                 /*
 850                                                  * The inserter definitely committed. But is it old
 851                                                  * enough that everyone sees it as committed?
 852                                                  */
 853                                                 xmin = HeapTupleHeaderGetXmin(tuple.t_data);
 854                                                 if (!TransactionIdPrecedes(xmin, OldestXmin))
 855                                                 {
 856                                                         all_visible = false;
 857                                                         break;
 858                                                 }
 859
 860                                                 /* Track newest xmin on page. */
 861                                                 if (TransactionIdFollows(xmin, visibility_cutoff_xid))
 862                                                         visibility_cutoff_xid = xmin;
 863                                         }
 864                                         break;
 865                                 case HEAPTUPLE_RECENTLY_DEAD:
 866
 867                                         /*
 868                                          * If tuple is recently deleted then we must not remove it
 869                                          * from relation.
 870                                          */
 871                                         nkeep += 1;
 872                                         all_visible = false;
 873                                         break;
 874                                 case HEAPTUPLE_INSERT_IN_PROGRESS:
 875                                         /* This is an expected case during concurrent vacuum */
 876                                         all_visible = false;
 877                                         break;
 878                                 case HEAPTUPLE_DELETE_IN_PROGRESS:
 879                                         /* This is an expected case during concurrent vacuum */
 880                                         all_visible = false;
 881                                         break;
 882                                 default:
 883                                         elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
 884                                         break;
 885                         }
 886
 887                         if (tupgone)
 888                         {
 889                                 lazy_record_dead_tuple(vacrelstats, &(tuple.t_self));
 890                                 HeapTupleHeaderAdvanceLatestRemovedXid(tuple.t_data,
 891                                                                                          &vacrelstats->latestRemovedXid);
 892                                 tups_vacuumed += 1;
 893                                 has_dead_tuples = true;
 894                         }
 895                         else
 896                         {
 897                                 num_tuples += 1;
 898                                 hastup = true;
 899
 900                                 /*
 901                                  * Each non-removable tuple must be checked to see if it needs
 902                                  * freezing.  Note we already have exclusive buffer lock.
 903                                  */
 904                                 if (heap_prepare_freeze_tuple(tuple.t_data, FreezeLimit,
 905                                                                                   MultiXactCutoff, &frozen[nfrozen]))
 906                                         frozen[nfrozen++].offset = offnum;
 907                         }
 908                 }                                               /* scan along page */
 909
 910                 /*
 911                  * If we froze any tuples, mark the buffer dirty, and write a WAL
 912                  * record recording the changes.  We must log the changes to be
 913                  * crash-safe against future truncation of CLOG.
 914                  */
 915                 if (nfrozen > 0)
 916                 {
 917                         START_CRIT_SECTION();
 918
 919                         MarkBufferDirty(buf);
 920
 921                         /* execute collected freezes */
 922                         for (i = 0; i < nfrozen; i++)
 923                         {
 924                                 ItemId          itemid;
 925                                 HeapTupleHeader htup;
 926
 927                                 itemid = PageGetItemId(page, frozen[i].offset);
 928                                 htup = (HeapTupleHeader) PageGetItem(page, itemid);
 929
 930                                 heap_execute_freeze_tuple(htup, &frozen[i]);
 931                         }
 932
 933                         /* Now WAL-log freezing if neccessary */
 934                         if (RelationNeedsWAL(onerel))
 935                         {
 936                                 XLogRecPtr      recptr;
 937
 938                                 recptr = log_heap_freeze(onerel, buf, FreezeLimit,
 939                                                                                  frozen, nfrozen);
 940                                 PageSetLSN(page, recptr);
 941                         }
 942
 943                         END_CRIT_SECTION();
 944                 }
 945
 946                 /*
 947                  * If there are no indexes then we can vacuum the page right now
 948                  * instead of doing a second scan.
 949                  */
 950                 if (nindexes == 0 &&
 951                         vacrelstats->num_dead_tuples > 0)
 952                 {
 953                         /* Remove tuples from heap */
 954                         lazy_vacuum_page(onerel, blkno, buf, 0, vacrelstats, &vmbuffer);
 955                         has_dead_tuples = false;
 956
 957                         /*
 958                          * Forget the now-vacuumed tuples, and press on, but be careful
 959                          * not to reset latestRemovedXid since we want that value to be
 960                          * valid.
 961                          */
 962                         vacrelstats->num_dead_tuples = 0;
 963                         vacuumed_pages++;
 964                 }
 965
 966                 freespace = PageGetHeapFreeSpace(page);
 967
 968                 /* mark page all-visible, if appropriate */
 969                 if (all_visible && !all_visible_according_to_vm)
 970                 {
 971                         /*
 972                          * It should never be the case that the visibility map page is set
 973                          * while the page-level bit is clear, but the reverse is allowed
 974                          * (if checksums are not enabled).      Regardless, set the both bits
 975                          * so that we get back in sync.
 976                          *
 977                          * NB: If the heap page is all-visible but the VM bit is not set,
 978                          * we don't need to dirty the heap page.  However, if checksums
 979                          * are enabled, we do need to make sure that the heap page is
 980                          * dirtied before passing it to visibilitymap_set(), because it
 981                          * may be logged.  Given that this situation should only happen in
 982                          * rare cases after a crash, it is not worth optimizing.
 983                          */
 984                         PageSetAllVisible(page);
 985                         MarkBufferDirty(buf);
 986                         visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr,
 987                                                           vmbuffer, visibility_cutoff_xid);
 988                 }
 989
 990                 /*
 991                  * As of PostgreSQL 9.2, the visibility map bit should never be set if
 992                  * the page-level bit is clear.  However, it's possible that the bit
 993                  * got cleared after we checked it and before we took the buffer
 994                  * content lock, so we must recheck before jumping to the conclusion
 995                  * that something bad has happened.
 996                  */
 997                 else if (all_visible_according_to_vm && !PageIsAllVisible(page)
 998                                  && visibilitymap_test(onerel, blkno, &vmbuffer))
 999                 {
1000                         elog(WARNING, "page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u",
1001                                  relname, blkno);
1002                         visibilitymap_clear(onerel, blkno, vmbuffer);
1003                 }
1004
1005                 /*
1006                  * It's possible for the value returned by GetOldestXmin() to move
1007                  * backwards, so it's not wrong for us to see tuples that appear to
1008                  * not be visible to everyone yet, while PD_ALL_VISIBLE is already
1009                  * set. The real safe xmin value never moves backwards, but
1010                  * GetOldestXmin() is conservative and sometimes returns a value
1011                  * that's unnecessarily small, so if we see that contradiction it just
1012                  * means that the tuples that we think are not visible to everyone yet
1013                  * actually are, and the PD_ALL_VISIBLE flag is correct.
1014                  *
1015                  * There should never be dead tuples on a page with PD_ALL_VISIBLE
1016                  * set, however.
1017                  */
1018                 else if (PageIsAllVisible(page) && has_dead_tuples)
1019                 {
1020                         elog(WARNING, "page containing dead tuples is marked as all-visible in relation \"%s\" page %u",
1021                                  relname, blkno);
1022                         PageClearAllVisible(page);
1023                         MarkBufferDirty(buf);
1024                         visibilitymap_clear(onerel, blkno, vmbuffer);
1025                 }
1026
1027                 UnlockReleaseBuffer(buf);
1028
1029                 /* Remember the location of the last page with nonremovable tuples */
1030                 if (hastup)
1031                         vacrelstats->nonempty_pages = blkno + 1;
1032
1033                 /*
1034                  * If we remembered any tuples for deletion, then the page will be
1035                  * visited again by lazy_vacuum_heap, which will compute and record
1036                  * its post-compaction free space.      If not, then we're done with this
1037                  * page, so remember its free space as-is.      (This path will always be
1038                  * taken if there are no indexes.)
1039                  */
1040                 if (vacrelstats->num_dead_tuples == prev_dead_count)
1041                         RecordPageWithFreeSpace(onerel, blkno, freespace);
1042         }
1043
1044         pfree(frozen);
1045
1046         /* save stats for use later */
1047         vacrelstats->scanned_tuples = num_tuples;
1048         vacrelstats->tuples_deleted = tups_vacuumed;
1049         vacrelstats->new_dead_tuples = nkeep;
1050
1051         /* now we can compute the new value for pg_class.reltuples */
1052         vacrelstats->new_rel_tuples = vac_estimate_reltuples(onerel, false,
1053                                                                                                                  nblocks,
1054                                                                                                   vacrelstats->scanned_pages,
1055                                                                                                                  num_tuples);
1056
1057         /*
1058          * Release any remaining pin on visibility map page.
1059          */
1060         if (BufferIsValid(vmbuffer))
1061         {
1062                 ReleaseBuffer(vmbuffer);
1063                 vmbuffer = InvalidBuffer;
1064         }
1065
1066         /* If any tuples need to be deleted, perform final vacuum cycle */
1067         /* XXX put a threshold on min number of tuples here? */
1068         if (vacrelstats->num_dead_tuples > 0)
1069         {
1070                 /* Log cleanup info before we touch indexes */
1071                 vacuum_log_cleanup_info(onerel, vacrelstats);
1072
1073                 /* Remove index entries */
1074                 for (i = 0; i < nindexes; i++)
1075                         lazy_vacuum_index(Irel[i],
1076                                                           &indstats[i],
1077                                                           vacrelstats);
1078                 /* Remove tuples from heap */
1079                 lazy_vacuum_heap(onerel, vacrelstats);
1080                 vacrelstats->num_index_scans++;
1081         }
1082
1083         /* Do post-vacuum cleanup and statistics update for each index */
1084         for (i = 0; i < nindexes; i++)
1085                 lazy_cleanup_index(Irel[i], indstats[i], vacrelstats);
1086
1087         /* If no indexes, make log report that lazy_vacuum_heap would've made */
1088         if (vacuumed_pages)
1089                 ereport(elevel,
1090                                 (errmsg("\"%s\": removed %.0f row versions in %u pages",
1091                                                 RelationGetRelationName(onerel),
1092                                                 tups_vacuumed, vacuumed_pages)));
1093
1094         ereport(elevel,
1095                         (errmsg("\"%s\": found %.0f removable, %.0f nonremovable row versions in %u out of %u pages",
1096                                         RelationGetRelationName(onerel),
1097                                         tups_vacuumed, num_tuples,
1098                                         vacrelstats->scanned_pages, nblocks),
1099                          errdetail("%.0f dead row versions cannot be removed yet.\n"
1100                                            "There were %.0f unused item pointers.\n"
1101                                            "%u pages are entirely empty.\n"
1102                                            "%s.",
1103                                            nkeep,
1104                                            nunused,
1105                                            empty_pages,
1106                                            pg_rusage_show(&ru0))));
1107 }
1108
1109
1110 /*
1111  *      lazy_vacuum_heap() -- second pass over the heap
1112  *
1113  *              This routine marks dead tuples as unused and compacts out free
1114  *              space on their pages.  Pages not having dead tuples recorded from
1115  *              lazy_scan_heap are not visited at all.
1116  *
1117  * Note: the reason for doing this as a second pass is we cannot remove
1118  * the tuples until we've removed their index entries, and we want to
1119  * process index entry removal in batches as large as possible.
1120  */
1121 static void
1122 lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats)
1123 {
1124         int                     tupindex;
1125         int                     npages;
1126         PGRUsage        ru0;
1127         Buffer          vmbuffer = InvalidBuffer;
1128
1129         pg_rusage_init(&ru0);
1130         npages = 0;
1131
1132         tupindex = 0;
1133         while (tupindex < vacrelstats->num_dead_tuples)
1134         {
1135                 BlockNumber tblk;
1136                 Buffer          buf;
1137                 Page            page;
1138                 Size            freespace;
1139
1140                 vacuum_delay_point();
1141
1142                 tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples[tupindex]);
1143                 buf = ReadBufferExtended(onerel, MAIN_FORKNUM, tblk, RBM_NORMAL,
1144                                                                  vac_strategy);
1145                 if (!ConditionalLockBufferForCleanup(buf))
1146                 {
1147                         ReleaseBuffer(buf);
1148                         ++tupindex;
1149                         continue;
1150                 }
1151                 tupindex = lazy_vacuum_page(onerel, tblk, buf, tupindex, vacrelstats,
1152                                                                         &vmbuffer);
1153
1154                 /* Now that we've compacted the page, record its available space */
1155                 page = BufferGetPage(buf);
1156                 freespace = PageGetHeapFreeSpace(page);
1157
1158                 UnlockReleaseBuffer(buf);
1159                 RecordPageWithFreeSpace(onerel, tblk, freespace);
1160                 npages++;
1161         }
1162
1163         if (BufferIsValid(vmbuffer))
1164         {
1165                 ReleaseBuffer(vmbuffer);
1166                 vmbuffer = InvalidBuffer;
1167         }
1168
1169         ereport(elevel,
1170                         (errmsg("\"%s\": removed %d row versions in %d pages",
1171                                         RelationGetRelationName(onerel),
1172                                         tupindex, npages),
1173                          errdetail("%s.",
1174                                            pg_rusage_show(&ru0))));
1175 }
1176
1177 /*
1178  *      lazy_vacuum_page() -- free dead tuples on a page
1179  *                                       and repair its fragmentation.
1180  *
1181  * Caller must hold pin and buffer cleanup lock on the buffer.
1182  *
1183  * tupindex is the index in vacrelstats->dead_tuples of the first dead
1184  * tuple for this page.  We assume the rest follow sequentially.
1185  * The return value is the first tupindex after the tuples of this page.
1186  */
1187 static int
1188 lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
1189                                  int tupindex, LVRelStats *vacrelstats, Buffer *vmbuffer)
1190 {
1191         Page            page = BufferGetPage(buffer);
1192         OffsetNumber unused[MaxOffsetNumber];
1193         int                     uncnt = 0;
1194         TransactionId visibility_cutoff_xid;
1195
1196         START_CRIT_SECTION();
1197
1198         for (; tupindex < vacrelstats->num_dead_tuples; tupindex++)
1199         {
1200                 BlockNumber tblk;
1201                 OffsetNumber toff;
1202                 ItemId          itemid;
1203
1204                 tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples[tupindex]);
1205                 if (tblk != blkno)
1206                         break;                          /* past end of tuples for this block */
1207                 toff = ItemPointerGetOffsetNumber(&vacrelstats->dead_tuples[tupindex]);
1208                 itemid = PageGetItemId(page, toff);
1209                 ItemIdSetUnused(itemid);
1210                 unused[uncnt++] = toff;
1211         }
1212
1213         PageRepairFragmentation(page);
1214
1215         /*
1216          * Mark buffer dirty before we write WAL.
1217          */
1218         MarkBufferDirty(buffer);
1219
1220         /* XLOG stuff */
1221         if (RelationNeedsWAL(onerel))
1222         {
1223                 XLogRecPtr      recptr;
1224
1225                 recptr = log_heap_clean(onerel, buffer,
1226                                                                 NULL, 0, NULL, 0,
1227                                                                 unused, uncnt,
1228                                                                 vacrelstats->latestRemovedXid);
1229                 PageSetLSN(page, recptr);
1230         }
1231
1232         /*
1233          * Now that we have removed the dead tuples from the page, once again
1234          * check if the page has become all-visible.
1235          */
1236         if (!visibilitymap_test(onerel, blkno, vmbuffer) &&
1237                 heap_page_is_all_visible(onerel, buffer, &visibility_cutoff_xid))
1238         {
1239                 Assert(BufferIsValid(*vmbuffer));
1240                 PageSetAllVisible(page);
1241                 visibilitymap_set(onerel, blkno, buffer, InvalidXLogRecPtr, *vmbuffer,
1242                                                   visibility_cutoff_xid);
1243         }
1244
1245         END_CRIT_SECTION();
1246
1247         return tupindex;
1248 }
1249
1250 /*
1251  *      lazy_check_needs_freeze() -- scan page to see if any tuples
1252  *                                       need to be cleaned to avoid wraparound
1253  *
1254  * Returns true if the page needs to be vacuumed using cleanup lock.
1255  */
1256 static bool
1257 lazy_check_needs_freeze(Buffer buf)
1258 {
1259         Page            page;
1260         OffsetNumber offnum,
1261                                 maxoff;
1262         HeapTupleHeader tupleheader;
1263
1264         page = BufferGetPage(buf);
1265
1266         if (PageIsNew(page) || PageIsEmpty(page))
1267         {
1268                 /* PageIsNew probably shouldn't happen... */
1269                 return false;
1270         }
1271
1272         maxoff = PageGetMaxOffsetNumber(page);
1273         for (offnum = FirstOffsetNumber;
1274                  offnum <= maxoff;
1275                  offnum = OffsetNumberNext(offnum))
1276         {
1277                 ItemId          itemid;
1278
1279                 itemid = PageGetItemId(page, offnum);
1280
1281                 if (!ItemIdIsNormal(itemid))
1282                         continue;
1283
1284                 tupleheader = (HeapTupleHeader) PageGetItem(page, itemid);
1285
1286                 if (heap_tuple_needs_freeze(tupleheader, FreezeLimit,
1287                                                                         MultiXactCutoff, buf))
1288                         return true;
1289         }                                                       /* scan along page */
1290
1291         return false;
1292 }
1293
1294
1295 /*
1296  *      lazy_vacuum_index() -- vacuum one index relation.
1297  *
1298  *              Delete all the index entries pointing to tuples listed in
1299  *              vacrelstats->dead_tuples, and update running statistics.
1300  */
1301 static void
1302 lazy_vacuum_index(Relation indrel,
1303                                   IndexBulkDeleteResult **stats,
1304                                   LVRelStats *vacrelstats)
1305 {
1306         IndexVacuumInfo ivinfo;
1307         PGRUsage        ru0;
1308
1309         pg_rusage_init(&ru0);
1310
1311         ivinfo.index = indrel;
1312         ivinfo.analyze_only = false;
1313         ivinfo.estimated_count = true;
1314         ivinfo.message_level = elevel;
1315         ivinfo.num_heap_tuples = vacrelstats->old_rel_tuples;
1316         ivinfo.strategy = vac_strategy;
1317
1318         /* Do bulk deletion */
1319         *stats = index_bulk_delete(&ivinfo, *stats,
1320                                                            lazy_tid_reaped, (void *) vacrelstats);
1321
1322         ereport(elevel,
1323                         (errmsg("scanned index \"%s\" to remove %d row versions",
1324                                         RelationGetRelationName(indrel),
1325                                         vacrelstats->num_dead_tuples),
1326                          errdetail("%s.", pg_rusage_show(&ru0))));
1327 }
1328
1329 /*
1330  *      lazy_cleanup_index() -- do post-vacuum cleanup for one index relation.
1331  */
1332 static void
1333 lazy_cleanup_index(Relation indrel,
1334                                    IndexBulkDeleteResult *stats,
1335                                    LVRelStats *vacrelstats)
1336 {
1337         IndexVacuumInfo ivinfo;
1338         PGRUsage        ru0;
1339
1340         pg_rusage_init(&ru0);
1341
1342         ivinfo.index = indrel;
1343         ivinfo.analyze_only = false;
1344         ivinfo.estimated_count = (vacrelstats->scanned_pages < vacrelstats->rel_pages);
1345         ivinfo.message_level = elevel;
1346         ivinfo.num_heap_tuples = vacrelstats->new_rel_tuples;
1347         ivinfo.strategy = vac_strategy;
1348
1349         stats = index_vacuum_cleanup(&ivinfo, stats);
1350
1351         if (!stats)
1352                 return;
1353
1354         /*
1355          * Now update statistics in pg_class, but only if the index says the count
1356          * is accurate.
1357          */
1358         if (!stats->estimated_count)
1359                 vac_update_relstats(indrel,
1360                                                         stats->num_pages,
1361                                                         stats->num_index_tuples,
1362                                                         0,
1363                                                         false,
1364                                                         InvalidTransactionId,
1365                                                         InvalidMultiXactId);
1366
1367         ereport(elevel,
1368                         (errmsg("index \"%s\" now contains %.0f row versions in %u pages",
1369                                         RelationGetRelationName(indrel),
1370                                         stats->num_index_tuples,
1371                                         stats->num_pages),
1372                          errdetail("%.0f index row versions were removed.\n"
1373                          "%u index pages have been deleted, %u are currently reusable.\n"
1374                                            "%s.",
1375                                            stats->tuples_removed,
1376                                            stats->pages_deleted, stats->pages_free,
1377                                            pg_rusage_show(&ru0))));
1378
1379         pfree(stats);
1380 }
1381
1382 /*
1383  * lazy_truncate_heap - try to truncate off any empty pages at the end
1384  */
1385 static void
1386 lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats)
1387 {
1388         BlockNumber old_rel_pages = vacrelstats->rel_pages;
1389         BlockNumber new_rel_pages;
1390         PGRUsage        ru0;
1391         int                     lock_retry;
1392
1393         pg_rusage_init(&ru0);
1394
1395         /*
1396          * Loop until no more truncating can be done.
1397          */
1398         do
1399         {
1400                 /*
1401                  * We need full exclusive lock on the relation in order to do
1402                  * truncation. If we can't get it, give up rather than waiting --- we
1403                  * don't want to block other backends, and we don't want to deadlock
1404                  * (which is quite possible considering we already hold a lower-grade
1405                  * lock).
1406                  */
1407                 vacrelstats->lock_waiter_detected = false;
1408                 lock_retry = 0;
1409                 while (true)
1410                 {
1411                         if (ConditionalLockRelation(onerel, AccessExclusiveLock))
1412                                 break;
1413
1414                         /*
1415                          * Check for interrupts while trying to (re-)acquire the exclusive
1416                          * lock.
1417                          */
1418                         CHECK_FOR_INTERRUPTS();
1419
1420                         if (++lock_retry > (VACUUM_TRUNCATE_LOCK_TIMEOUT /
1421                                                                 VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL))
1422                         {
1423                                 /*
1424                                  * We failed to establish the lock in the specified number of
1425                                  * retries. This means we give up truncating.
1426                                  */
1427                                 vacrelstats->lock_waiter_detected = true;
1428                                 ereport(elevel,
1429                                                 (errmsg("\"%s\": stopping truncate due to conflicting lock request",
1430                                                                 RelationGetRelationName(onerel))));
1431                                 return;
1432                         }
1433
1434                         pg_usleep(VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL);
1435                 }
1436
1437                 /*
1438                  * Now that we have exclusive lock, look to see if the rel has grown
1439                  * whilst we were vacuuming with non-exclusive lock.  If so, give up;
1440                  * the newly added pages presumably contain non-deletable tuples.
1441                  */
1442                 new_rel_pages = RelationGetNumberOfBlocks(onerel);
1443                 if (new_rel_pages != old_rel_pages)
1444                 {
1445                         /*
1446                          * Note: we intentionally don't update vacrelstats->rel_pages with
1447                          * the new rel size here.  If we did, it would amount to assuming
1448                          * that the new pages are empty, which is unlikely. Leaving the
1449                          * numbers alone amounts to assuming that the new pages have the
1450                          * same tuple density as existing ones, which is less unlikely.
1451                          */
1452                         UnlockRelation(onerel, AccessExclusiveLock);
1453                         return;
1454                 }
1455
1456                 /*
1457                  * Scan backwards from the end to verify that the end pages actually
1458                  * contain no tuples.  This is *necessary*, not optional, because
1459                  * other backends could have added tuples to these pages whilst we
1460                  * were vacuuming.
1461                  */
1462                 new_rel_pages = count_nondeletable_pages(onerel, vacrelstats);
1463
1464                 if (new_rel_pages >= old_rel_pages)
1465                 {
1466                         /* can't do anything after all */
1467                         UnlockRelation(onerel, AccessExclusiveLock);
1468                         return;
1469                 }
1470
1471                 /*
1472                  * Okay to truncate.
1473                  */
1474                 RelationTruncate(onerel, new_rel_pages);
1475
1476                 /*
1477                  * We can release the exclusive lock as soon as we have truncated.
1478                  * Other backends can't safely access the relation until they have
1479                  * processed the smgr invalidation that smgrtruncate sent out ... but
1480                  * that should happen as part of standard invalidation processing once
1481                  * they acquire lock on the relation.
1482                  */
1483                 UnlockRelation(onerel, AccessExclusiveLock);
1484
1485                 /*
1486                  * Update statistics.  Here, it *is* correct to adjust rel_pages
1487                  * without also touching reltuples, since the tuple count wasn't
1488                  * changed by the truncation.
1489                  */
1490                 vacrelstats->pages_removed += old_rel_pages - new_rel_pages;
1491                 vacrelstats->rel_pages = new_rel_pages;
1492
1493                 ereport(elevel,
1494                                 (errmsg("\"%s\": truncated %u to %u pages",
1495                                                 RelationGetRelationName(onerel),
1496                                                 old_rel_pages, new_rel_pages),
1497                                  errdetail("%s.",
1498                                                    pg_rusage_show(&ru0))));
1499                 old_rel_pages = new_rel_pages;
1500         } while (new_rel_pages > vacrelstats->nonempty_pages &&
1501                          vacrelstats->lock_waiter_detected);
1502 }
1503
1504 /*
1505  * Rescan end pages to verify that they are (still) empty of tuples.
1506  *
1507  * Returns number of nondeletable pages (last nonempty page + 1).
1508  */
1509 static BlockNumber
1510 count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats)
1511 {
1512         BlockNumber blkno;
1513         instr_time      starttime;
1514
1515         /* Initialize the starttime if we check for conflicting lock requests */
1516         INSTR_TIME_SET_CURRENT(starttime);
1517
1518         /* Strange coding of loop control is needed because blkno is unsigned */
1519         blkno = vacrelstats->rel_pages;
1520         while (blkno > vacrelstats->nonempty_pages)
1521         {
1522                 Buffer          buf;
1523                 Page            page;
1524                 OffsetNumber offnum,
1525                                         maxoff;
1526                 bool            hastup;
1527
1528                 /*
1529                  * Check if another process requests a lock on our relation. We are
1530                  * holding an AccessExclusiveLock here, so they will be waiting. We
1531                  * only do this once per VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL, and we
1532                  * only check if that interval has elapsed once every 32 blocks to
1533                  * keep the number of system calls and actual shared lock table
1534                  * lookups to a minimum.
1535                  */
1536                 if ((blkno % 32) == 0)
1537                 {
1538                         instr_time      currenttime;
1539                         instr_time      elapsed;
1540
1541                         INSTR_TIME_SET_CURRENT(currenttime);
1542                         elapsed = currenttime;
1543                         INSTR_TIME_SUBTRACT(elapsed, starttime);
1544                         if ((INSTR_TIME_GET_MICROSEC(elapsed) / 1000)
1545                                 >= VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL)
1546                         {
1547                                 if (LockHasWaitersRelation(onerel, AccessExclusiveLock))
1548                                 {
1549                                         ereport(elevel,
1550                                                         (errmsg("\"%s\": suspending truncate due to conflicting lock request",
1551                                                                         RelationGetRelationName(onerel))));
1552
1553                                         vacrelstats->lock_waiter_detected = true;
1554                                         return blkno;
1555                                 }
1556                                 starttime = currenttime;
1557                         }
1558                 }
1559
1560                 /*
1561                  * We don't insert a vacuum delay point here, because we have an
1562                  * exclusive lock on the table which we want to hold for as short a
1563                  * time as possible.  We still need to check for interrupts however.
1564                  */
1565                 CHECK_FOR_INTERRUPTS();
1566
1567                 blkno--;
1568
1569                 buf = ReadBufferExtended(onerel, MAIN_FORKNUM, blkno,
1570                                                                  RBM_NORMAL, vac_strategy);
1571
1572                 /* In this phase we only need shared access to the buffer */
1573                 LockBuffer(buf, BUFFER_LOCK_SHARE);
1574
1575                 page = BufferGetPage(buf);
1576
1577                 if (PageIsNew(page) || PageIsEmpty(page))
1578                 {
1579                         /* PageIsNew probably shouldn't happen... */
1580                         UnlockReleaseBuffer(buf);
1581                         continue;
1582                 }
1583
1584                 hastup = false;
1585                 maxoff = PageGetMaxOffsetNumber(page);
1586                 for (offnum = FirstOffsetNumber;
1587                          offnum <= maxoff;
1588                          offnum = OffsetNumberNext(offnum))
1589                 {
1590                         ItemId          itemid;
1591
1592                         itemid = PageGetItemId(page, offnum);
1593
1594                         /*
1595                          * Note: any non-unused item should be taken as a reason to keep
1596                          * this page.  We formerly thought that DEAD tuples could be
1597                          * thrown away, but that's not so, because we'd not have cleaned
1598                          * out their index entries.
1599                          */
1600                         if (ItemIdIsUsed(itemid))
1601                         {
1602                                 hastup = true;
1603                                 break;                  /* can stop scanning */
1604                         }
1605                 }                                               /* scan along page */
1606
1607                 UnlockReleaseBuffer(buf);
1608
1609                 /* Done scanning if we found a tuple here */
1610                 if (hastup)
1611                         return blkno + 1;
1612         }
1613
1614         /*
1615          * If we fall out of the loop, all the previously-thought-to-be-empty
1616          * pages still are; we need not bother to look at the last known-nonempty
1617          * page.
1618          */
1619         return vacrelstats->nonempty_pages;
1620 }
1621
1622 /*
1623  * lazy_space_alloc - space allocation decisions for lazy vacuum
1624  *
1625  * See the comments at the head of this file for rationale.
1626  */
1627 static void
1628 lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks)
1629 {
1630         long            maxtuples;
1631         int                     vac_work_mem =  IsAutoVacuumWorkerProcess() &&
1632                                                                         autovacuum_work_mem != -1 ?
1633                                                                 autovacuum_work_mem : maintenance_work_mem;
1634
1635         if (vacrelstats->hasindex)
1636         {
1637                 maxtuples = (vac_work_mem * 1024L) / sizeof(ItemPointerData);
1638                 maxtuples = Min(maxtuples, INT_MAX);
1639                 maxtuples = Min(maxtuples, MaxAllocSize / sizeof(ItemPointerData));
1640
1641                 /* curious coding here to ensure the multiplication can't overflow */
1642                 if ((BlockNumber) (maxtuples / LAZY_ALLOC_TUPLES) > relblocks)
1643                         maxtuples = relblocks * LAZY_ALLOC_TUPLES;
1644
1645                 /* stay sane if small maintenance_work_mem */
1646                 maxtuples = Max(maxtuples, MaxHeapTuplesPerPage);
1647         }
1648         else
1649         {
1650                 maxtuples = MaxHeapTuplesPerPage;
1651         }
1652
1653         vacrelstats->num_dead_tuples = 0;
1654         vacrelstats->max_dead_tuples = (int) maxtuples;
1655         vacrelstats->dead_tuples = (ItemPointer)
1656                 palloc(maxtuples * sizeof(ItemPointerData));
1657 }
1658
1659 /*
1660  * lazy_record_dead_tuple - remember one deletable tuple
1661  */
1662 static void
1663 lazy_record_dead_tuple(LVRelStats *vacrelstats,
1664                                            ItemPointer itemptr)
1665 {
1666         /*
1667          * The array shouldn't overflow under normal behavior, but perhaps it
1668          * could if we are given a really small maintenance_work_mem. In that
1669          * case, just forget the last few tuples (we'll get 'em next time).
1670          */
1671         if (vacrelstats->num_dead_tuples < vacrelstats->max_dead_tuples)
1672         {
1673                 vacrelstats->dead_tuples[vacrelstats->num_dead_tuples] = *itemptr;
1674                 vacrelstats->num_dead_tuples++;
1675         }
1676 }
1677
1678 /*
1679  *      lazy_tid_reaped() -- is a particular tid deletable?
1680  *
1681  *              This has the right signature to be an IndexBulkDeleteCallback.
1682  *
1683  *              Assumes dead_tuples array is in sorted order.
1684  */
1685 static bool
1686 lazy_tid_reaped(ItemPointer itemptr, void *state)
1687 {
1688         LVRelStats *vacrelstats = (LVRelStats *) state;
1689         ItemPointer res;
1690
1691         res = (ItemPointer) bsearch((void *) itemptr,
1692                                                                 (void *) vacrelstats->dead_tuples,
1693                                                                 vacrelstats->num_dead_tuples,
1694                                                                 sizeof(ItemPointerData),
1695                                                                 vac_cmp_itemptr);
1696
1697         return (res != NULL);
1698 }
1699
1700 /*
1701  * Comparator routines for use with qsort() and bsearch().
1702  */
1703 static int
1704 vac_cmp_itemptr(const void *left, const void *right)
1705 {
1706         BlockNumber lblk,
1707                                 rblk;
1708         OffsetNumber loff,
1709                                 roff;
1710
1711         lblk = ItemPointerGetBlockNumber((ItemPointer) left);
1712         rblk = ItemPointerGetBlockNumber((ItemPointer) right);
1713
1714         if (lblk < rblk)
1715                 return -1;
1716         if (lblk > rblk)
1717                 return 1;
1718
1719         loff = ItemPointerGetOffsetNumber((ItemPointer) left);
1720         roff = ItemPointerGetOffsetNumber((ItemPointer) right);
1721
1722         if (loff < roff)
1723                 return -1;
1724         if (loff > roff)
1725                 return 1;
1726
1727         return 0;
1728 }
1729
1730 /*
1731  * Check if every tuple in the given page is visible to all current and future
1732  * transactions. Also return the visibility_cutoff_xid which is the highest
1733  * xmin amongst the visible tuples.
1734  */
1735 static bool
1736 heap_page_is_all_visible(Relation rel, Buffer buf, TransactionId *visibility_cutoff_xid)
1737 {
1738         Page            page = BufferGetPage(buf);
1739         OffsetNumber offnum,
1740                                 maxoff;
1741         bool            all_visible = true;
1742
1743         *visibility_cutoff_xid = InvalidTransactionId;
1744
1745         /*
1746          * This is a stripped down version of the line pointer scan in
1747          * lazy_scan_heap(). So if you change anything here, also check that code.
1748          */
1749         maxoff = PageGetMaxOffsetNumber(page);
1750         for (offnum = FirstOffsetNumber;
1751                  offnum <= maxoff && all_visible;
1752                  offnum = OffsetNumberNext(offnum))
1753         {
1754                 ItemId          itemid;
1755                 HeapTupleData tuple;
1756
1757                 itemid = PageGetItemId(page, offnum);
1758
1759                 /* Unused or redirect line pointers are of no interest */
1760                 if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid))
1761                         continue;
1762
1763                 ItemPointerSet(&(tuple.t_self), BufferGetBlockNumber(buf), offnum);
1764
1765                 /*
1766                  * Dead line pointers can have index pointers pointing to them. So
1767                  * they can't be treated as visible
1768                  */
1769                 if (ItemIdIsDead(itemid))
1770                 {
1771                         all_visible = false;
1772                         break;
1773                 }
1774
1775                 Assert(ItemIdIsNormal(itemid));
1776
1777                 tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
1778                 tuple.t_len = ItemIdGetLength(itemid);
1779                 tuple.t_tableOid = RelationGetRelid(rel);
1780
1781                 switch (HeapTupleSatisfiesVacuum(&tuple, OldestXmin, buf))
1782                 {
1783                         case HEAPTUPLE_LIVE:
1784                                 {
1785                                         TransactionId xmin;
1786
1787                                         /* Check comments in lazy_scan_heap. */
1788                                         if (!HeapTupleHeaderXminCommitted(tuple.t_data))
1789                                         {
1790                                                 all_visible = false;
1791                                                 break;
1792                                         }
1793
1794                                         /*
1795                                          * The inserter definitely committed. But is it old enough
1796                                          * that everyone sees it as committed?
1797                                          */
1798                                         xmin = HeapTupleHeaderGetXmin(tuple.t_data);
1799                                         if (!TransactionIdPrecedes(xmin, OldestXmin))
1800                                         {
1801                                                 all_visible = false;
1802                                                 break;
1803                                         }
1804
1805                                         /* Track newest xmin on page. */
1806                                         if (TransactionIdFollows(xmin, *visibility_cutoff_xid))
1807                                                 *visibility_cutoff_xid = xmin;
1808                                 }
1809                                 break;
1810
1811                         case HEAPTUPLE_DEAD:
1812                         case HEAPTUPLE_RECENTLY_DEAD:
1813                         case HEAPTUPLE_INSERT_IN_PROGRESS:
1814                         case HEAPTUPLE_DELETE_IN_PROGRESS:
1815                                 all_visible = false;
1816                                 break;
1817
1818                         default:
1819                                 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1820                                 break;
1821                 }
1822         }                                                       /* scan along page */
1823
1824         return all_visible;
1825 }