granicus.if.org Git - postgresql/blob - src/backend/commands/vacuumlazy.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * vacuumlazy.c
   4  *        Concurrent ("lazy") vacuuming.
   5  *
   6  *
   7  * The major space usage for LAZY VACUUM is storage for the array of dead
   8  * tuple TIDs, with the next biggest need being storage for per-disk-page
   9  * free space info.  We want to ensure we can vacuum even the very largest
  10  * relations with finite memory space usage.  To do that, we set upper bounds
  11  * on the number of tuples and pages we will keep track of at once.
  12  *
  13  * We are willing to use at most maintenance_work_mem memory space to keep
  14  * track of dead tuples.  We initially allocate an array of TIDs of that size,
  15  * with an upper limit that depends on table size (this limit ensures we don't
  16  * allocate a huge area uselessly for vacuuming small tables).  If the array
  17  * threatens to overflow, we suspend the heap scan phase and perform a pass of
  18  * index cleanup and page compaction, then resume the heap scan with an empty
  19  * TID array.
  20  *
  21  * If we're processing a table with no indexes, we can just vacuum each page
  22  * as we go; there's no need to save up multiple tuples to minimize the number
  23  * of index scans performed.  So we don't use maintenance_work_mem memory for
  24  * the TID array, just enough to hold as many heap tuples as fit on one page.
  25  *
  26  *
  27  * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
  28  * Portions Copyright (c) 1994, Regents of the University of California
  29  *
  30  *
  31  * IDENTIFICATION
  32  *        $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.133 2010/04/21 17:20:56 sriggs Exp $
  33  *
  34  *-------------------------------------------------------------------------
  35  */
  36 #include "postgres.h"
  37
  38 #include <math.h>
  39
  40 #include "access/genam.h"
  41 #include "access/heapam.h"
  42 #include "access/transam.h"
  43 #include "access/visibilitymap.h"
  44 #include "catalog/storage.h"
  45 #include "commands/dbcommands.h"
  46 #include "commands/vacuum.h"
  47 #include "miscadmin.h"
  48 #include "pgstat.h"
  49 #include "postmaster/autovacuum.h"
  50 #include "storage/bufmgr.h"
  51 #include "storage/freespace.h"
  52 #include "storage/lmgr.h"
  53 #include "utils/lsyscache.h"
  54 #include "utils/memutils.h"
  55 #include "utils/pg_rusage.h"
  56 #include "utils/tqual.h"
  57
  58
  59 /*
  60  * Space/time tradeoff parameters: do these need to be user-tunable?
  61  *
  62  * To consider truncating the relation, we want there to be at least
  63  * REL_TRUNCATE_MINIMUM or (relsize / REL_TRUNCATE_FRACTION) (whichever
  64  * is less) potentially-freeable pages.
  65  */
  66 #define REL_TRUNCATE_MINIMUM    1000
  67 #define REL_TRUNCATE_FRACTION   16
  68
  69 /*
  70  * Guesstimation of number of dead tuples per page.  This is used to
  71  * provide an upper limit to memory allocated when vacuuming small
  72  * tables.
  73  */
  74 #define LAZY_ALLOC_TUPLES               MaxHeapTuplesPerPage
  75
  76 /*
  77  * Before we consider skipping a page that's marked as clean in
  78  * visibility map, we must've seen at least this many clean pages.
  79  */
  80 #define SKIP_PAGES_THRESHOLD    32
  81
  82 typedef struct LVRelStats
  83 {
  84         /* hasindex = true means two-pass strategy; false means one-pass */
  85         bool            hasindex;
  86         bool            scanned_all;    /* have we scanned all pages (this far)? */
  87         /* Overall statistics about rel */
  88         BlockNumber rel_pages;
  89         double          old_rel_tuples; /* previous value of pg_class.reltuples */
  90         double          rel_tuples;             /* counts only tuples on scanned pages */
  91         BlockNumber pages_removed;
  92         double          tuples_deleted;
  93         BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
  94         /* List of TIDs of tuples we intend to delete */
  95         /* NB: this list is ordered by TID address */
  96         int                     num_dead_tuples;        /* current # of entries */
  97         int                     max_dead_tuples;        /* # slots allocated in array */
  98         ItemPointer dead_tuples;        /* array of ItemPointerData */
  99         int                     num_index_scans;
 100         TransactionId latestRemovedXid;
 101 } LVRelStats;
 102
 103
 104 /* A few variables that don't seem worth passing around as parameters */
 105 static int      elevel = -1;
 106
 107 static TransactionId OldestXmin;
 108 static TransactionId FreezeLimit;
 109
 110 static BufferAccessStrategy vac_strategy;
 111
 112
 113 /* non-export function prototypes */
 114 static void lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
 115                            Relation *Irel, int nindexes, bool scan_all);
 116 static void lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats);
 117 static void lazy_vacuum_index(Relation indrel,
 118                                   IndexBulkDeleteResult **stats,
 119                                   LVRelStats *vacrelstats);
 120 static void lazy_cleanup_index(Relation indrel,
 121                                    IndexBulkDeleteResult *stats,
 122                                    LVRelStats *vacrelstats);
 123 static int lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
 124                                  int tupindex, LVRelStats *vacrelstats);
 125 static void lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats);
 126 static BlockNumber count_nondeletable_pages(Relation onerel,
 127                                                  LVRelStats *vacrelstats);
 128 static void lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks);
 129 static void lazy_record_dead_tuple(LVRelStats *vacrelstats,
 130                                            ItemPointer itemptr);
 131 static bool lazy_tid_reaped(ItemPointer itemptr, void *state);
 132 static int      vac_cmp_itemptr(const void *left, const void *right);
 133
 134
 135 /*
 136  *      lazy_vacuum_rel() -- perform LAZY VACUUM for one heap relation
 137  *
 138  *              This routine vacuums a single heap, cleans out its indexes, and
 139  *              updates its relpages and reltuples statistics.
 140  *
 141  *              At entry, we have already established a transaction and opened
 142  *              and locked the relation.
 143  */
 144 void
 145 lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
 146                                 BufferAccessStrategy bstrategy, bool *scanned_all)
 147 {
 148         LVRelStats *vacrelstats;
 149         Relation   *Irel;
 150         int                     nindexes;
 151         BlockNumber possibly_freeable;
 152         PGRUsage        ru0;
 153         TimestampTz starttime = 0;
 154         bool            scan_all;
 155         TransactionId freezeTableLimit;
 156
 157         pg_rusage_init(&ru0);
 158
 159         /* measure elapsed time iff autovacuum logging requires it */
 160         if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration > 0)
 161                 starttime = GetCurrentTimestamp();
 162
 163         if (vacstmt->options & VACOPT_VERBOSE)
 164                 elevel = INFO;
 165         else
 166                 elevel = DEBUG2;
 167
 168         vac_strategy = bstrategy;
 169
 170         vacuum_set_xid_limits(vacstmt->freeze_min_age, vacstmt->freeze_table_age,
 171                                                   onerel->rd_rel->relisshared,
 172                                                   &OldestXmin, &FreezeLimit, &freezeTableLimit);
 173         scan_all = TransactionIdPrecedesOrEquals(onerel->rd_rel->relfrozenxid,
 174                                                                                          freezeTableLimit);
 175
 176         vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats));
 177
 178         vacrelstats->scanned_all = true;        /* will be cleared if we skip a page */
 179         vacrelstats->old_rel_tuples = onerel->rd_rel->reltuples;
 180         vacrelstats->num_index_scans = 0;
 181
 182         /* Open all indexes of the relation */
 183         vac_open_indexes(onerel, RowExclusiveLock, &nindexes, &Irel);
 184         vacrelstats->hasindex = (nindexes > 0);
 185
 186         /* Do the vacuuming */
 187         lazy_scan_heap(onerel, vacrelstats, Irel, nindexes, scan_all);
 188
 189         /* Done with indexes */
 190         vac_close_indexes(nindexes, Irel, NoLock);
 191
 192         /*
 193          * Optionally truncate the relation.
 194          *
 195          * Don't even think about it unless we have a shot at releasing a goodly
 196          * number of pages.  Otherwise, the time taken isn't worth it.
 197          */
 198         possibly_freeable = vacrelstats->rel_pages - vacrelstats->nonempty_pages;
 199         if (possibly_freeable > 0 &&
 200                 (possibly_freeable >= REL_TRUNCATE_MINIMUM ||
 201                  possibly_freeable >= vacrelstats->rel_pages / REL_TRUNCATE_FRACTION))
 202                 lazy_truncate_heap(onerel, vacrelstats);
 203
 204         /* Vacuum the Free Space Map */
 205         FreeSpaceMapVacuum(onerel);
 206
 207         /*
 208          * Update statistics in pg_class.  But only if we didn't skip any pages;
 209          * the tuple count only includes tuples from the pages we've visited, and
 210          * we haven't frozen tuples in unvisited pages either.  The page count is
 211          * accurate in any case, but because we use the reltuples / relpages ratio
 212          * in the planner, it's better to not update relpages either if we can't
 213          * update reltuples.
 214          */
 215         if (vacrelstats->scanned_all)
 216                 vac_update_relstats(onerel,
 217                                                         vacrelstats->rel_pages, vacrelstats->rel_tuples,
 218                                                         vacrelstats->hasindex,
 219                                                         FreezeLimit);
 220
 221         /* report results to the stats collector, too */
 222         pgstat_report_vacuum(RelationGetRelid(onerel),
 223                                                  onerel->rd_rel->relisshared,
 224                                                  vacrelstats->scanned_all,
 225                                                  vacrelstats->rel_tuples);
 226
 227         /* and log the action if appropriate */
 228         if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration >= 0)
 229         {
 230                 if (Log_autovacuum_min_duration == 0 ||
 231                         TimestampDifferenceExceeds(starttime, GetCurrentTimestamp(),
 232                                                                            Log_autovacuum_min_duration))
 233                         ereport(LOG,
 234                                         (errmsg("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n"
 235                                                         "pages: %d removed, %d remain\n"
 236                                                         "tuples: %.0f removed, %.0f remain\n"
 237                                                         "system usage: %s",
 238                                                         get_database_name(MyDatabaseId),
 239                                                         get_namespace_name(RelationGetNamespace(onerel)),
 240                                                         RelationGetRelationName(onerel),
 241                                                         vacrelstats->num_index_scans,
 242                                                   vacrelstats->pages_removed, vacrelstats->rel_pages,
 243                                                 vacrelstats->tuples_deleted, vacrelstats->rel_tuples,
 244                                                         pg_rusage_show(&ru0))));
 245         }
 246
 247         if (scanned_all)
 248                 *scanned_all = vacrelstats->scanned_all;
 249 }
 250
 251 /*
 252  * For Hot Standby we need to know the highest transaction id that will
 253  * be removed by any change. VACUUM proceeds in a number of passes so
 254  * we need to consider how each pass operates. The first phase runs
 255  * heap_page_prune(), which can issue XLOG_HEAP2_CLEAN records as it
 256  * progresses - these will have a latestRemovedXid on each record.
 257  * In some cases this removes all of the tuples to be removed, though
 258  * often we have dead tuples with index pointers so we must remember them
 259  * for removal in phase 3. Index records for those rows are removed
 260  * in phase 2 and index blocks do not have MVCC information attached.
 261  * So before we can allow removal of any index tuples we need to issue
 262  * a WAL record containing the latestRemovedXid of rows that will be
 263  * removed in phase three. This allows recovery queries to block at the
 264  * correct place, i.e. before phase two, rather than during phase three
 265  * which would be after the rows have become inaccessible.
 266  */
 267 static void
 268 vacuum_log_cleanup_info(Relation rel, LVRelStats *vacrelstats)
 269 {
 270         /*
 271          * No need to log changes for temp tables, they do not contain data
 272          * visible on the standby server.
 273          */
 274         if (rel->rd_istemp || !XLogIsNeeded())
 275                 return;
 276
 277         Assert(TransactionIdIsValid(vacrelstats->latestRemovedXid));
 278
 279         (void) log_heap_cleanup_info(rel->rd_node, vacrelstats->latestRemovedXid);
 280 }
 281
 282 /*
 283  *      lazy_scan_heap() -- scan an open heap relation
 284  *
 285  *              This routine sets commit status bits, builds lists of dead tuples
 286  *              and pages with free space, and calculates statistics on the number
 287  *              of live tuples in the heap.  When done, or when we run low on space
 288  *              for dead-tuple TIDs, invoke vacuuming of indexes and heap.
 289  *
 290  *              If there are no indexes then we just vacuum each dirty page as we
 291  *              process it, since there's no point in gathering many tuples.
 292  */
 293 static void
 294 lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
 295                            Relation *Irel, int nindexes, bool scan_all)
 296 {
 297         BlockNumber nblocks,
 298                                 blkno;
 299         HeapTupleData tuple;
 300         char       *relname;
 301         BlockNumber empty_pages,
 302                                 scanned_pages,
 303                                 vacuumed_pages;
 304         double          num_tuples,
 305                                 tups_vacuumed,
 306                                 nkeep,
 307                                 nunused;
 308         IndexBulkDeleteResult **indstats;
 309         int                     i;
 310         PGRUsage        ru0;
 311         Buffer          vmbuffer = InvalidBuffer;
 312         BlockNumber all_visible_streak;
 313
 314         pg_rusage_init(&ru0);
 315
 316         relname = RelationGetRelationName(onerel);
 317         ereport(elevel,
 318                         (errmsg("vacuuming \"%s.%s\"",
 319                                         get_namespace_name(RelationGetNamespace(onerel)),
 320                                         relname)));
 321
 322         empty_pages = vacuumed_pages = scanned_pages = 0;
 323         num_tuples = tups_vacuumed = nkeep = nunused = 0;
 324
 325         indstats = (IndexBulkDeleteResult **)
 326                 palloc0(nindexes * sizeof(IndexBulkDeleteResult *));
 327
 328         nblocks = RelationGetNumberOfBlocks(onerel);
 329         vacrelstats->rel_pages = nblocks;
 330         vacrelstats->nonempty_pages = 0;
 331         vacrelstats->latestRemovedXid = InvalidTransactionId;
 332
 333         lazy_space_alloc(vacrelstats, nblocks);
 334
 335         all_visible_streak = 0;
 336         for (blkno = 0; blkno < nblocks; blkno++)
 337         {
 338                 Buffer          buf;
 339                 Page            page;
 340                 OffsetNumber offnum,
 341                                         maxoff;
 342                 bool            tupgone,
 343                                         hastup;
 344                 int                     prev_dead_count;
 345                 OffsetNumber frozen[MaxOffsetNumber];
 346                 int                     nfrozen;
 347                 Size            freespace;
 348                 bool            all_visible_according_to_vm = false;
 349                 bool            all_visible;
 350
 351                 /*
 352                  * Skip pages that don't require vacuuming according to the visibility
 353                  * map. But only if we've seen a streak of at least
 354                  * SKIP_PAGES_THRESHOLD pages marked as clean. Since we're reading
 355                  * sequentially, the OS should be doing readahead for us and there's
 356                  * no gain in skipping a page now and then. You need a longer run of
 357                  * consecutive skipped pages before it's worthwhile. Also, skipping
 358                  * even a single page means that we can't update relfrozenxid or
 359                  * reltuples, so we only want to do it if there's a good chance to
 360                  * skip a goodly number of pages.
 361                  */
 362                 if (!scan_all)
 363                 {
 364                         all_visible_according_to_vm =
 365                                 visibilitymap_test(onerel, blkno, &vmbuffer);
 366                         if (all_visible_according_to_vm)
 367                         {
 368                                 all_visible_streak++;
 369                                 if (all_visible_streak >= SKIP_PAGES_THRESHOLD)
 370                                 {
 371                                         vacrelstats->scanned_all = false;
 372                                         continue;
 373                                 }
 374                         }
 375                         else
 376                                 all_visible_streak = 0;
 377                 }
 378
 379                 vacuum_delay_point();
 380
 381                 scanned_pages++;
 382
 383                 /*
 384                  * If we are close to overrunning the available space for dead-tuple
 385                  * TIDs, pause and do a cycle of vacuuming before we tackle this page.
 386                  */
 387                 if ((vacrelstats->max_dead_tuples - vacrelstats->num_dead_tuples) < MaxHeapTuplesPerPage &&
 388                         vacrelstats->num_dead_tuples > 0)
 389                 {
 390                         /* Log cleanup info before we touch indexes */
 391                         vacuum_log_cleanup_info(onerel, vacrelstats);
 392
 393                         /* Remove index entries */
 394                         for (i = 0; i < nindexes; i++)
 395                                 lazy_vacuum_index(Irel[i],
 396                                                                   &indstats[i],
 397                                                                   vacrelstats);
 398                         /* Remove tuples from heap */
 399                         lazy_vacuum_heap(onerel, vacrelstats);
 400                         /*
 401                          * Forget the now-vacuumed tuples, and press on, but be careful
 402                          * not to reset latestRemovedXid since we want that value to be valid.
 403                          */
 404                         vacrelstats->num_dead_tuples = 0;
 405                         vacrelstats->num_index_scans++;
 406                 }
 407
 408                 buf = ReadBufferExtended(onerel, MAIN_FORKNUM, blkno,
 409                                                                  RBM_NORMAL, vac_strategy);
 410
 411                 /* We need buffer cleanup lock so that we can prune HOT chains. */
 412                 LockBufferForCleanup(buf);
 413
 414                 page = BufferGetPage(buf);
 415
 416                 if (PageIsNew(page))
 417                 {
 418                         /*
 419                          * An all-zeroes page could be left over if a backend extends the
 420                          * relation but crashes before initializing the page. Reclaim such
 421                          * pages for use.
 422                          *
 423                          * We have to be careful here because we could be looking at a
 424                          * page that someone has just added to the relation and not yet
 425                          * been able to initialize (see RelationGetBufferForTuple). To
 426                          * protect against that, release the buffer lock, grab the
 427                          * relation extension lock momentarily, and re-lock the buffer. If
 428                          * the page is still uninitialized by then, it must be left over
 429                          * from a crashed backend, and we can initialize it.
 430                          *
 431                          * We don't really need the relation lock when this is a new or
 432                          * temp relation, but it's probably not worth the code space to
 433                          * check that, since this surely isn't a critical path.
 434                          *
 435                          * Note: the comparable code in vacuum.c need not worry because
 436                          * it's got exclusive lock on the whole relation.
 437                          */
 438                         LockBuffer(buf, BUFFER_LOCK_UNLOCK);
 439                         LockRelationForExtension(onerel, ExclusiveLock);
 440                         UnlockRelationForExtension(onerel, ExclusiveLock);
 441                         LockBufferForCleanup(buf);
 442                         if (PageIsNew(page))
 443                         {
 444                                 ereport(WARNING,
 445                                 (errmsg("relation \"%s\" page %u is uninitialized --- fixing",
 446                                                 relname, blkno)));
 447                                 PageInit(page, BufferGetPageSize(buf), 0);
 448                                 empty_pages++;
 449                         }
 450                         freespace = PageGetHeapFreeSpace(page);
 451                         MarkBufferDirty(buf);
 452                         UnlockReleaseBuffer(buf);
 453
 454                         RecordPageWithFreeSpace(onerel, blkno, freespace);
 455                         continue;
 456                 }
 457
 458                 if (PageIsEmpty(page))
 459                 {
 460                         empty_pages++;
 461                         freespace = PageGetHeapFreeSpace(page);
 462
 463                         if (!PageIsAllVisible(page))
 464                         {
 465                                 PageSetAllVisible(page);
 466                                 SetBufferCommitInfoNeedsSave(buf);
 467                         }
 468
 469                         LockBuffer(buf, BUFFER_LOCK_UNLOCK);
 470
 471                         /* Update the visibility map */
 472                         if (!all_visible_according_to_vm)
 473                         {
 474                                 visibilitymap_pin(onerel, blkno, &vmbuffer);
 475                                 LockBuffer(buf, BUFFER_LOCK_SHARE);
 476                                 if (PageIsAllVisible(page))
 477                                         visibilitymap_set(onerel, blkno, PageGetLSN(page), &vmbuffer);
 478                                 LockBuffer(buf, BUFFER_LOCK_UNLOCK);
 479                         }
 480
 481                         ReleaseBuffer(buf);
 482                         RecordPageWithFreeSpace(onerel, blkno, freespace);
 483                         continue;
 484                 }
 485
 486                 /*
 487                  * Prune all HOT-update chains in this page.
 488                  *
 489                  * We count tuples removed by the pruning step as removed by VACUUM.
 490                  */
 491                 tups_vacuumed += heap_page_prune(onerel, buf, OldestXmin, false,
 492                                                                                                         &vacrelstats->latestRemovedXid);
 493                 /*
 494                  * Now scan the page to collect vacuumable items and check for tuples
 495                  * requiring freezing.
 496                  */
 497                 all_visible = true;
 498                 nfrozen = 0;
 499                 hastup = false;
 500                 prev_dead_count = vacrelstats->num_dead_tuples;
 501                 maxoff = PageGetMaxOffsetNumber(page);
 502                 for (offnum = FirstOffsetNumber;
 503                          offnum <= maxoff;
 504                          offnum = OffsetNumberNext(offnum))
 505                 {
 506                         ItemId          itemid;
 507
 508                         itemid = PageGetItemId(page, offnum);
 509
 510                         /* Unused items require no processing, but we count 'em */
 511                         if (!ItemIdIsUsed(itemid))
 512                         {
 513                                 nunused += 1;
 514                                 continue;
 515                         }
 516
 517                         /* Redirect items mustn't be touched */
 518                         if (ItemIdIsRedirected(itemid))
 519                         {
 520                                 hastup = true;  /* this page won't be truncatable */
 521                                 continue;
 522                         }
 523
 524                         ItemPointerSet(&(tuple.t_self), blkno, offnum);
 525
 526                         /*
 527                          * DEAD item pointers are to be vacuumed normally; but we don't
 528                          * count them in tups_vacuumed, else we'd be double-counting (at
 529                          * least in the common case where heap_page_prune() just freed up
 530                          * a non-HOT tuple).
 531                          */
 532                         if (ItemIdIsDead(itemid))
 533                         {
 534                                 lazy_record_dead_tuple(vacrelstats, &(tuple.t_self));
 535                                 all_visible = false;
 536                                 continue;
 537                         }
 538
 539                         Assert(ItemIdIsNormal(itemid));
 540
 541                         tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
 542                         tuple.t_len = ItemIdGetLength(itemid);
 543
 544                         tupgone = false;
 545
 546                         switch (HeapTupleSatisfiesVacuum(tuple.t_data, OldestXmin, buf))
 547                         {
 548                                 case HEAPTUPLE_DEAD:
 549
 550                                         /*
 551                                          * Ordinarily, DEAD tuples would have been removed by
 552                                          * heap_page_prune(), but it's possible that the tuple
 553                                          * state changed since heap_page_prune() looked.  In
 554                                          * particular an INSERT_IN_PROGRESS tuple could have
 555                                          * changed to DEAD if the inserter aborted.  So this
 556                                          * cannot be considered an error condition.
 557                                          *
 558                                          * If the tuple is HOT-updated then it must only be
 559                                          * removed by a prune operation; so we keep it just as if
 560                                          * it were RECENTLY_DEAD.  Also, if it's a heap-only
 561                                          * tuple, we choose to keep it, because it'll be a lot
 562                                          * cheaper to get rid of it in the next pruning pass than
 563                                          * to treat it like an indexed tuple.
 564                                          */
 565                                         if (HeapTupleIsHotUpdated(&tuple) ||
 566                                                 HeapTupleIsHeapOnly(&tuple))
 567                                                 nkeep += 1;
 568                                         else
 569                                                 tupgone = true; /* we can delete the tuple */
 570                                         all_visible = false;
 571                                         break;
 572                                 case HEAPTUPLE_LIVE:
 573                                         /* Tuple is good --- but let's do some validity checks */
 574                                         if (onerel->rd_rel->relhasoids &&
 575                                                 !OidIsValid(HeapTupleGetOid(&tuple)))
 576                                                 elog(WARNING, "relation \"%s\" TID %u/%u: OID is invalid",
 577                                                          relname, blkno, offnum);
 578
 579                                         /*
 580                                          * Is the tuple definitely visible to all transactions?
 581                                          *
 582                                          * NB: Like with per-tuple hint bits, we can't set the
 583                                          * PD_ALL_VISIBLE flag if the inserter committed
 584                                          * asynchronously. See SetHintBits for more info. Check
 585                                          * that the HEAP_XMIN_COMMITTED hint bit is set because of
 586                                          * that.
 587                                          */
 588                                         if (all_visible)
 589                                         {
 590                                                 TransactionId xmin;
 591
 592                                                 if (!(tuple.t_data->t_infomask & HEAP_XMIN_COMMITTED))
 593                                                 {
 594                                                         all_visible = false;
 595                                                         break;
 596                                                 }
 597
 598                                                 /*
 599                                                  * The inserter definitely committed. But is it old
 600                                                  * enough that everyone sees it as committed?
 601                                                  */
 602                                                 xmin = HeapTupleHeaderGetXmin(tuple.t_data);
 603                                                 if (!TransactionIdPrecedes(xmin, OldestXmin))
 604                                                 {
 605                                                         all_visible = false;
 606                                                         break;
 607                                                 }
 608                                         }
 609                                         break;
 610                                 case HEAPTUPLE_RECENTLY_DEAD:
 611
 612                                         /*
 613                                          * If tuple is recently deleted then we must not remove it
 614                                          * from relation.
 615                                          */
 616                                         nkeep += 1;
 617                                         all_visible = false;
 618                                         break;
 619                                 case HEAPTUPLE_INSERT_IN_PROGRESS:
 620                                         /* This is an expected case during concurrent vacuum */
 621                                         all_visible = false;
 622                                         break;
 623                                 case HEAPTUPLE_DELETE_IN_PROGRESS:
 624                                         /* This is an expected case during concurrent vacuum */
 625                                         all_visible = false;
 626                                         break;
 627                                 default:
 628                                         elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
 629                                         break;
 630                         }
 631
 632                         if (tupgone)
 633                         {
 634                                 lazy_record_dead_tuple(vacrelstats, &(tuple.t_self));
 635                                 HeapTupleHeaderAdvanceLatestRemovedXid(tuple.t_data,
 636                                                                                          &vacrelstats->latestRemovedXid);
 637                                 tups_vacuumed += 1;
 638                         }
 639                         else
 640                         {
 641                                 num_tuples += 1;
 642                                 hastup = true;
 643
 644                                 /*
 645                                  * Each non-removable tuple must be checked to see if it needs
 646                                  * freezing.  Note we already have exclusive buffer lock.
 647                                  */
 648                                 if (heap_freeze_tuple(tuple.t_data, FreezeLimit,
 649                                                                           InvalidBuffer))
 650                                         frozen[nfrozen++] = offnum;
 651                         }
 652                 }                                               /* scan along page */
 653
 654                 /*
 655                  * If we froze any tuples, mark the buffer dirty, and write a WAL
 656                  * record recording the changes.  We must log the changes to be
 657                  * crash-safe against future truncation of CLOG.
 658                  */
 659                 if (nfrozen > 0)
 660                 {
 661                         MarkBufferDirty(buf);
 662                         /* no XLOG for temp tables, though */
 663                         if (!onerel->rd_istemp)
 664                         {
 665                                 XLogRecPtr      recptr;
 666
 667                                 recptr = log_heap_freeze(onerel, buf, FreezeLimit,
 668                                                                                  frozen, nfrozen);
 669                                 PageSetLSN(page, recptr);
 670                                 PageSetTLI(page, ThisTimeLineID);
 671                         }
 672                 }
 673
 674                 /*
 675                  * If there are no indexes then we can vacuum the page right now
 676                  * instead of doing a second scan.
 677                  */
 678                 if (nindexes == 0 &&
 679                         vacrelstats->num_dead_tuples > 0)
 680                 {
 681                         /* Remove tuples from heap */
 682                         lazy_vacuum_page(onerel, blkno, buf, 0, vacrelstats);
 683                         /*
 684                          * Forget the now-vacuumed tuples, and press on, but be careful
 685                          * not to reset latestRemovedXid since we want that value to be valid.
 686                          */
 687                         Assert(TransactionIdIsValid(vacrelstats->latestRemovedXid));
 688                         vacrelstats->num_dead_tuples = 0;
 689                         vacuumed_pages++;
 690                 }
 691
 692                 freespace = PageGetHeapFreeSpace(page);
 693
 694                 /* Update the all-visible flag on the page */
 695                 if (!PageIsAllVisible(page) && all_visible)
 696                 {
 697                         PageSetAllVisible(page);
 698                         SetBufferCommitInfoNeedsSave(buf);
 699                 }
 700                 else if (PageIsAllVisible(page) && !all_visible)
 701                 {
 702                         elog(WARNING, "PD_ALL_VISIBLE flag was incorrectly set in relation \"%s\" page %u",
 703                                  relname, blkno);
 704                         PageClearAllVisible(page);
 705                         SetBufferCommitInfoNeedsSave(buf);
 706
 707                         /*
 708                          * Normally, we would drop the lock on the heap page before
 709                          * updating the visibility map, but since this case shouldn't
 710                          * happen anyway, don't worry about that.
 711                          */
 712                         visibilitymap_clear(onerel, blkno);
 713                 }
 714
 715                 LockBuffer(buf, BUFFER_LOCK_UNLOCK);
 716
 717                 /* Update the visibility map */
 718                 if (!all_visible_according_to_vm && all_visible)
 719                 {
 720                         visibilitymap_pin(onerel, blkno, &vmbuffer);
 721                         LockBuffer(buf, BUFFER_LOCK_SHARE);
 722                         if (PageIsAllVisible(page))
 723                                 visibilitymap_set(onerel, blkno, PageGetLSN(page), &vmbuffer);
 724                         LockBuffer(buf, BUFFER_LOCK_UNLOCK);
 725                 }
 726
 727                 ReleaseBuffer(buf);
 728
 729                 /* Remember the location of the last page with nonremovable tuples */
 730                 if (hastup)
 731                         vacrelstats->nonempty_pages = blkno + 1;
 732
 733                 /*
 734                  * If we remembered any tuples for deletion, then the page will be
 735                  * visited again by lazy_vacuum_heap, which will compute and record
 736                  * its post-compaction free space.      If not, then we're done with this
 737                  * page, so remember its free space as-is.      (This path will always be
 738                  * taken if there are no indexes.)
 739                  */
 740                 if (vacrelstats->num_dead_tuples == prev_dead_count)
 741                         RecordPageWithFreeSpace(onerel, blkno, freespace);
 742         }
 743
 744         /* save stats for use later */
 745         vacrelstats->rel_tuples = num_tuples;
 746         vacrelstats->tuples_deleted = tups_vacuumed;
 747
 748         /* If any tuples need to be deleted, perform final vacuum cycle */
 749         /* XXX put a threshold on min number of tuples here? */
 750         if (vacrelstats->num_dead_tuples > 0)
 751         {
 752                 /* Log cleanup info before we touch indexes */
 753                 vacuum_log_cleanup_info(onerel, vacrelstats);
 754
 755                 /* Remove index entries */
 756                 for (i = 0; i < nindexes; i++)
 757                         lazy_vacuum_index(Irel[i],
 758                                                           &indstats[i],
 759                                                           vacrelstats);
 760                 /* Remove tuples from heap */
 761                 lazy_vacuum_heap(onerel, vacrelstats);
 762                 vacrelstats->num_index_scans++;
 763         }
 764
 765         /* Release the pin on the visibility map page */
 766         if (BufferIsValid(vmbuffer))
 767         {
 768                 ReleaseBuffer(vmbuffer);
 769                 vmbuffer = InvalidBuffer;
 770         }
 771
 772         /* Do post-vacuum cleanup and statistics update for each index */
 773         for (i = 0; i < nindexes; i++)
 774                 lazy_cleanup_index(Irel[i], indstats[i], vacrelstats);
 775
 776         /* If no indexes, make log report that lazy_vacuum_heap would've made */
 777         if (vacuumed_pages)
 778                 ereport(elevel,
 779                                 (errmsg("\"%s\": removed %.0f row versions in %u pages",
 780                                                 RelationGetRelationName(onerel),
 781                                                 tups_vacuumed, vacuumed_pages)));
 782
 783         ereport(elevel,
 784                         (errmsg("\"%s\": found %.0f removable, %.0f nonremovable row versions in %u out of %u pages",
 785                                         RelationGetRelationName(onerel),
 786                                         tups_vacuumed, num_tuples, scanned_pages, nblocks),
 787                          errdetail("%.0f dead row versions cannot be removed yet.\n"
 788                                            "There were %.0f unused item pointers.\n"
 789                                            "%u pages are entirely empty.\n"
 790                                            "%s.",
 791                                            nkeep,
 792                                            nunused,
 793                                            empty_pages,
 794                                            pg_rusage_show(&ru0))));
 795 }
 796
 797
 798 /*
 799  *      lazy_vacuum_heap() -- second pass over the heap
 800  *
 801  *              This routine marks dead tuples as unused and compacts out free
 802  *              space on their pages.  Pages not having dead tuples recorded from
 803  *              lazy_scan_heap are not visited at all.
 804  *
 805  * Note: the reason for doing this as a second pass is we cannot remove
 806  * the tuples until we've removed their index entries, and we want to
 807  * process index entry removal in batches as large as possible.
 808  */
 809 static void
 810 lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats)
 811 {
 812         int                     tupindex;
 813         int                     npages;
 814         PGRUsage        ru0;
 815
 816         pg_rusage_init(&ru0);
 817         npages = 0;
 818
 819         tupindex = 0;
 820         while (tupindex < vacrelstats->num_dead_tuples)
 821         {
 822                 BlockNumber tblk;
 823                 Buffer          buf;
 824                 Page            page;
 825                 Size            freespace;
 826
 827                 vacuum_delay_point();
 828
 829                 tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples[tupindex]);
 830                 buf = ReadBufferExtended(onerel, MAIN_FORKNUM, tblk, RBM_NORMAL,
 831                                                                  vac_strategy);
 832                 LockBufferForCleanup(buf);
 833                 tupindex = lazy_vacuum_page(onerel, tblk, buf, tupindex, vacrelstats);
 834
 835                 /* Now that we've compacted the page, record its available space */
 836                 page = BufferGetPage(buf);
 837                 freespace = PageGetHeapFreeSpace(page);
 838
 839                 UnlockReleaseBuffer(buf);
 840                 RecordPageWithFreeSpace(onerel, tblk, freespace);
 841                 npages++;
 842         }
 843
 844         ereport(elevel,
 845                         (errmsg("\"%s\": removed %d row versions in %d pages",
 846                                         RelationGetRelationName(onerel),
 847                                         tupindex, npages),
 848                          errdetail("%s.",
 849                                            pg_rusage_show(&ru0))));
 850 }
 851
 852 /*
 853  *      lazy_vacuum_page() -- free dead tuples on a page
 854  *                                       and repair its fragmentation.
 855  *
 856  * Caller must hold pin and buffer cleanup lock on the buffer.
 857  *
 858  * tupindex is the index in vacrelstats->dead_tuples of the first dead
 859  * tuple for this page.  We assume the rest follow sequentially.
 860  * The return value is the first tupindex after the tuples of this page.
 861  */
 862 static int
 863 lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
 864                                  int tupindex, LVRelStats *vacrelstats)
 865 {
 866         Page            page = BufferGetPage(buffer);
 867         OffsetNumber unused[MaxOffsetNumber];
 868         int                     uncnt = 0;
 869
 870         START_CRIT_SECTION();
 871
 872         for (; tupindex < vacrelstats->num_dead_tuples; tupindex++)
 873         {
 874                 BlockNumber tblk;
 875                 OffsetNumber toff;
 876                 ItemId          itemid;
 877
 878                 tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples[tupindex]);
 879                 if (tblk != blkno)
 880                         break;                          /* past end of tuples for this block */
 881                 toff = ItemPointerGetOffsetNumber(&vacrelstats->dead_tuples[tupindex]);
 882                 itemid = PageGetItemId(page, toff);
 883                 ItemIdSetUnused(itemid);
 884                 unused[uncnt++] = toff;
 885         }
 886
 887         PageRepairFragmentation(page);
 888
 889         MarkBufferDirty(buffer);
 890
 891         /* XLOG stuff */
 892         if (!onerel->rd_istemp)
 893         {
 894                 XLogRecPtr      recptr;
 895
 896                 recptr = log_heap_clean(onerel, buffer,
 897                                                                 NULL, 0, NULL, 0,
 898                                                                 unused, uncnt,
 899                                                                 vacrelstats->latestRemovedXid);
 900                 PageSetLSN(page, recptr);
 901                 PageSetTLI(page, ThisTimeLineID);
 902         }
 903
 904         END_CRIT_SECTION();
 905
 906         return tupindex;
 907 }
 908
 909 /*
 910  *      lazy_vacuum_index() -- vacuum one index relation.
 911  *
 912  *              Delete all the index entries pointing to tuples listed in
 913  *              vacrelstats->dead_tuples, and update running statistics.
 914  */
 915 static void
 916 lazy_vacuum_index(Relation indrel,
 917                                   IndexBulkDeleteResult **stats,
 918                                   LVRelStats *vacrelstats)
 919 {
 920         IndexVacuumInfo ivinfo;
 921         PGRUsage        ru0;
 922
 923         pg_rusage_init(&ru0);
 924
 925         ivinfo.index = indrel;
 926         ivinfo.analyze_only = false;
 927         ivinfo.estimated_count = true;
 928         ivinfo.message_level = elevel;
 929         ivinfo.num_heap_tuples = vacrelstats->old_rel_tuples;
 930         ivinfo.strategy = vac_strategy;
 931
 932         /* Do bulk deletion */
 933         *stats = index_bulk_delete(&ivinfo, *stats,
 934                                                            lazy_tid_reaped, (void *) vacrelstats);
 935
 936         ereport(elevel,
 937                         (errmsg("scanned index \"%s\" to remove %d row versions",
 938                                         RelationGetRelationName(indrel),
 939                                         vacrelstats->num_dead_tuples),
 940                          errdetail("%s.", pg_rusage_show(&ru0))));
 941 }
 942
 943 /*
 944  *      lazy_cleanup_index() -- do post-vacuum cleanup for one index relation.
 945  */
 946 static void
 947 lazy_cleanup_index(Relation indrel,
 948                                    IndexBulkDeleteResult *stats,
 949                                    LVRelStats *vacrelstats)
 950 {
 951         IndexVacuumInfo ivinfo;
 952         PGRUsage        ru0;
 953
 954         pg_rusage_init(&ru0);
 955
 956         ivinfo.index = indrel;
 957         ivinfo.analyze_only = false;
 958         ivinfo.estimated_count = !vacrelstats->scanned_all;
 959         ivinfo.message_level = elevel;
 960         /* use rel_tuples only if we scanned all pages, else fall back */
 961         ivinfo.num_heap_tuples = vacrelstats->scanned_all ? vacrelstats->rel_tuples : vacrelstats->old_rel_tuples;
 962         ivinfo.strategy = vac_strategy;
 963
 964         stats = index_vacuum_cleanup(&ivinfo, stats);
 965
 966         if (!stats)
 967                 return;
 968
 969         /*
 970          * Now update statistics in pg_class, but only if the index says the count
 971          * is accurate.
 972          */
 973         if (!stats->estimated_count)
 974                 vac_update_relstats(indrel,
 975                                                         stats->num_pages, stats->num_index_tuples,
 976                                                         false, InvalidTransactionId);
 977
 978         ereport(elevel,
 979                         (errmsg("index \"%s\" now contains %.0f row versions in %u pages",
 980                                         RelationGetRelationName(indrel),
 981                                         stats->num_index_tuples,
 982                                         stats->num_pages),
 983                          errdetail("%.0f index row versions were removed.\n"
 984                          "%u index pages have been deleted, %u are currently reusable.\n"
 985                                            "%s.",
 986                                            stats->tuples_removed,
 987                                            stats->pages_deleted, stats->pages_free,
 988                                            pg_rusage_show(&ru0))));
 989
 990         pfree(stats);
 991 }
 992
 993 /*
 994  * lazy_truncate_heap - try to truncate off any empty pages at the end
 995  */
 996 static void
 997 lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats)
 998 {
 999         BlockNumber old_rel_pages = vacrelstats->rel_pages;
1000         BlockNumber new_rel_pages;
1001         PGRUsage        ru0;
1002
1003         pg_rusage_init(&ru0);
1004
1005         /*
1006          * We need full exclusive lock on the relation in order to do truncation.
1007          * If we can't get it, give up rather than waiting --- we don't want to
1008          * block other backends, and we don't want to deadlock (which is quite
1009          * possible considering we already hold a lower-grade lock).
1010          */
1011         if (!ConditionalLockRelation(onerel, AccessExclusiveLock))
1012                 return;
1013
1014         /*
1015          * Now that we have exclusive lock, look to see if the rel has grown
1016          * whilst we were vacuuming with non-exclusive lock.  If so, give up; the
1017          * newly added pages presumably contain non-deletable tuples.
1018          */
1019         new_rel_pages = RelationGetNumberOfBlocks(onerel);
1020         if (new_rel_pages != old_rel_pages)
1021         {
1022                 /* might as well use the latest news when we update pg_class stats */
1023                 vacrelstats->rel_pages = new_rel_pages;
1024                 UnlockRelation(onerel, AccessExclusiveLock);
1025                 return;
1026         }
1027
1028         /*
1029          * Scan backwards from the end to verify that the end pages actually
1030          * contain no tuples.  This is *necessary*, not optional, because other
1031          * backends could have added tuples to these pages whilst we were
1032          * vacuuming.
1033          */
1034         new_rel_pages = count_nondeletable_pages(onerel, vacrelstats);
1035
1036         if (new_rel_pages >= old_rel_pages)
1037         {
1038                 /* can't do anything after all */
1039                 UnlockRelation(onerel, AccessExclusiveLock);
1040                 return;
1041         }
1042
1043         /*
1044          * Okay to truncate.
1045          */
1046         RelationTruncate(onerel, new_rel_pages);
1047
1048         /*
1049          * We can release the exclusive lock as soon as we have truncated.      Other
1050          * backends can't safely access the relation until they have processed the
1051          * smgr invalidation that smgrtruncate sent out ... but that should happen
1052          * as part of standard invalidation processing once they acquire lock on
1053          * the relation.
1054          */
1055         UnlockRelation(onerel, AccessExclusiveLock);
1056
1057         /* update statistics */
1058         vacrelstats->rel_pages = new_rel_pages;
1059         vacrelstats->pages_removed = old_rel_pages - new_rel_pages;
1060
1061         ereport(elevel,
1062                         (errmsg("\"%s\": truncated %u to %u pages",
1063                                         RelationGetRelationName(onerel),
1064                                         old_rel_pages, new_rel_pages),
1065                          errdetail("%s.",
1066                                            pg_rusage_show(&ru0))));
1067 }
1068
1069 /*
1070  * Rescan end pages to verify that they are (still) empty of tuples.
1071  *
1072  * Returns number of nondeletable pages (last nonempty page + 1).
1073  */
1074 static BlockNumber
1075 count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats)
1076 {
1077         BlockNumber blkno;
1078
1079         /* Strange coding of loop control is needed because blkno is unsigned */
1080         blkno = vacrelstats->rel_pages;
1081         while (blkno > vacrelstats->nonempty_pages)
1082         {
1083                 Buffer          buf;
1084                 Page            page;
1085                 OffsetNumber offnum,
1086                                         maxoff;
1087                 bool            hastup;
1088
1089                 /*
1090                  * We don't insert a vacuum delay point here, because we have an
1091                  * exclusive lock on the table which we want to hold for as short a
1092                  * time as possible.  We still need to check for interrupts however.
1093                  */
1094                 CHECK_FOR_INTERRUPTS();
1095
1096                 blkno--;
1097
1098                 buf = ReadBufferExtended(onerel, MAIN_FORKNUM, blkno,
1099                                                                  RBM_NORMAL, vac_strategy);
1100
1101                 /* In this phase we only need shared access to the buffer */
1102                 LockBuffer(buf, BUFFER_LOCK_SHARE);
1103
1104                 page = BufferGetPage(buf);
1105
1106                 if (PageIsNew(page) || PageIsEmpty(page))
1107                 {
1108                         /* PageIsNew probably shouldn't happen... */
1109                         UnlockReleaseBuffer(buf);
1110                         continue;
1111                 }
1112
1113                 hastup = false;
1114                 maxoff = PageGetMaxOffsetNumber(page);
1115                 for (offnum = FirstOffsetNumber;
1116                          offnum <= maxoff;
1117                          offnum = OffsetNumberNext(offnum))
1118                 {
1119                         ItemId          itemid;
1120
1121                         itemid = PageGetItemId(page, offnum);
1122
1123                         /*
1124                          * Note: any non-unused item should be taken as a reason to keep
1125                          * this page.  We formerly thought that DEAD tuples could be
1126                          * thrown away, but that's not so, because we'd not have cleaned
1127                          * out their index entries.
1128                          */
1129                         if (ItemIdIsUsed(itemid))
1130                         {
1131                                 hastup = true;
1132                                 break;                  /* can stop scanning */
1133                         }
1134                 }                                               /* scan along page */
1135
1136                 UnlockReleaseBuffer(buf);
1137
1138                 /* Done scanning if we found a tuple here */
1139                 if (hastup)
1140                         return blkno + 1;
1141         }
1142
1143         /*
1144          * If we fall out of the loop, all the previously-thought-to-be-empty
1145          * pages still are; we need not bother to look at the last known-nonempty
1146          * page.
1147          */
1148         return vacrelstats->nonempty_pages;
1149 }
1150
1151 /*
1152  * lazy_space_alloc - space allocation decisions for lazy vacuum
1153  *
1154  * See the comments at the head of this file for rationale.
1155  */
1156 static void
1157 lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks)
1158 {
1159         long            maxtuples;
1160
1161         if (vacrelstats->hasindex)
1162         {
1163                 maxtuples = (maintenance_work_mem * 1024L) / sizeof(ItemPointerData);
1164                 maxtuples = Min(maxtuples, INT_MAX);
1165                 maxtuples = Min(maxtuples, MaxAllocSize / sizeof(ItemPointerData));
1166
1167                 /* curious coding here to ensure the multiplication can't overflow */
1168                 if ((BlockNumber) (maxtuples / LAZY_ALLOC_TUPLES) > relblocks)
1169                         maxtuples = relblocks * LAZY_ALLOC_TUPLES;
1170
1171                 /* stay sane if small maintenance_work_mem */
1172                 maxtuples = Max(maxtuples, MaxHeapTuplesPerPage);
1173         }
1174         else
1175         {
1176                 maxtuples = MaxHeapTuplesPerPage;
1177         }
1178
1179         vacrelstats->num_dead_tuples = 0;
1180         vacrelstats->max_dead_tuples = (int) maxtuples;
1181         vacrelstats->dead_tuples = (ItemPointer)
1182                 palloc(maxtuples * sizeof(ItemPointerData));
1183 }
1184
1185 /*
1186  * lazy_record_dead_tuple - remember one deletable tuple
1187  */
1188 static void
1189 lazy_record_dead_tuple(LVRelStats *vacrelstats,
1190                                            ItemPointer itemptr)
1191 {
1192         /*
1193          * The array shouldn't overflow under normal behavior, but perhaps it
1194          * could if we are given a really small maintenance_work_mem. In that
1195          * case, just forget the last few tuples (we'll get 'em next time).
1196          */
1197         if (vacrelstats->num_dead_tuples < vacrelstats->max_dead_tuples)
1198         {
1199                 vacrelstats->dead_tuples[vacrelstats->num_dead_tuples] = *itemptr;
1200                 vacrelstats->num_dead_tuples++;
1201         }
1202 }
1203
1204 /*
1205  *      lazy_tid_reaped() -- is a particular tid deletable?
1206  *
1207  *              This has the right signature to be an IndexBulkDeleteCallback.
1208  *
1209  *              Assumes dead_tuples array is in sorted order.
1210  */
1211 static bool
1212 lazy_tid_reaped(ItemPointer itemptr, void *state)
1213 {
1214         LVRelStats *vacrelstats = (LVRelStats *) state;
1215         ItemPointer res;
1216
1217         res = (ItemPointer) bsearch((void *) itemptr,
1218                                                                 (void *) vacrelstats->dead_tuples,
1219                                                                 vacrelstats->num_dead_tuples,
1220                                                                 sizeof(ItemPointerData),
1221                                                                 vac_cmp_itemptr);
1222
1223         return (res != NULL);
1224 }
1225
1226 /*
1227  * Comparator routines for use with qsort() and bsearch().
1228  */
1229 static int
1230 vac_cmp_itemptr(const void *left, const void *right)
1231 {
1232         BlockNumber lblk,
1233                                 rblk;
1234         OffsetNumber loff,
1235                                 roff;
1236
1237         lblk = ItemPointerGetBlockNumber((ItemPointer) left);
1238         rblk = ItemPointerGetBlockNumber((ItemPointer) right);
1239
1240         if (lblk < rblk)
1241                 return -1;
1242         if (lblk > rblk)
1243                 return 1;
1244
1245         loff = ItemPointerGetOffsetNumber((ItemPointer) left);
1246         roff = ItemPointerGetOffsetNumber((ItemPointer) right);
1247
1248         if (loff < roff)
1249                 return -1;
1250         if (loff > roff)
1251                 return 1;
1252
1253         return 0;
1254 }