* vacuum.c
* The postgres vacuum cleaner.
*
- * This file includes the "full" version of VACUUM, as well as control code
- * used by all three of full VACUUM, lazy VACUUM, and ANALYZE. See
- * vacuumlazy.c and analyze.c for the rest of the code for the latter two.
+ * This file now includes only control and dispatch code for VACUUM and
+ * ANALYZE commands. Regular VACUUM is implemented in vacuumlazy.c,
+ * ANALYZE in analyze.c, and VACUUM FULL is a variant of CLUSTER, handled
+ * in cluster.c.
*
*
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.404 2010/02/07 20:48:10 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.405 2010/02/08 04:33:53 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
-#include <sys/time.h>
-#include <unistd.h>
-
#include "access/clog.h"
#include "access/genam.h"
#include "access/heapam.h"
#include "access/transam.h"
-#include "access/visibilitymap.h"
#include "access/xact.h"
-#include "access/xlog.h"
-#include "catalog/catalog.h"
#include "catalog/namespace.h"
#include "catalog/pg_database.h"
#include "catalog/pg_namespace.h"
-#include "catalog/storage.h"
#include "commands/cluster.h"
-#include "commands/dbcommands.h"
#include "commands/vacuum.h"
-#include "executor/executor.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "postmaster/autovacuum.h"
#include "storage/bufmgr.h"
-#include "storage/freespace.h"
#include "storage/lmgr.h"
#include "storage/proc.h"
#include "storage/procarray.h"
#include "utils/acl.h"
-#include "utils/builtins.h"
#include "utils/fmgroids.h"
#include "utils/guc.h"
-#include "utils/inval.h"
-#include "utils/lsyscache.h"
#include "utils/memutils.h"
-#include "utils/pg_rusage.h"
-#include "utils/relcache.h"
#include "utils/snapmgr.h"
#include "utils/syscache.h"
#include "utils/tqual.h"
int vacuum_freeze_min_age;
int vacuum_freeze_table_age;
-/*
- * VacPage structures keep track of each page on which we find useful
- * amounts of free space.
- */
-typedef struct VacPageData
-{
- BlockNumber blkno; /* BlockNumber of this Page */
- Size free; /* FreeSpace on this Page */
- uint16 offsets_used; /* Number of OffNums used by vacuum */
- uint16 offsets_free; /* Number of OffNums free or to be free */
- OffsetNumber offsets[1]; /* Array of free OffNums */
-} VacPageData;
-
-typedef VacPageData *VacPage;
-
-typedef struct VacPageListData
-{
- BlockNumber empty_end_pages; /* Number of "empty" end-pages */
- int num_pages; /* Number of pages in pagedesc */
- int num_allocated_pages; /* Number of allocated pages in
- * pagedesc */
- VacPage *pagedesc; /* Descriptions of pages */
-} VacPageListData;
-
-typedef VacPageListData *VacPageList;
-
-/*
- * The "vtlinks" array keeps information about each recently-updated tuple
- * ("recent" meaning its XMAX is too new to let us recycle the tuple).
- * We store the tuple's own TID as well as its t_ctid (its link to the next
- * newer tuple version). Searching in this array allows us to follow update
- * chains backwards from newer to older tuples. When we move a member of an
- * update chain, we must move *all* the live members of the chain, so that we
- * can maintain their t_ctid link relationships (we must not just overwrite
- * t_ctid in an existing tuple).
- *
- * Note: because t_ctid links can be stale (this would only occur if a prior
- * VACUUM crashed partway through), it is possible that new_tid points to an
- * empty slot or unrelated tuple. We have to check the linkage as we follow
- * it, just as is done in EvalPlanQualFetch.
- */
-typedef struct VTupleLinkData
-{
- ItemPointerData new_tid; /* t_ctid of an updated tuple */
- ItemPointerData this_tid; /* t_self of the tuple */
-} VTupleLinkData;
-
-typedef VTupleLinkData *VTupleLink;
-
-/*
- * We use an array of VTupleMoveData to plan a chain tuple move fully
- * before we do it.
- */
-typedef struct VTupleMoveData
-{
- ItemPointerData tid; /* tuple ID */
- VacPage vacpage; /* where to move it to */
- bool cleanVpd; /* clean vacpage before using? */
-} VTupleMoveData;
-
-typedef VTupleMoveData *VTupleMove;
-
-/*
- * VRelStats contains the data acquired by scan_heap for use later
- */
-typedef struct VRelStats
-{
- /* miscellaneous statistics */
- BlockNumber rel_pages; /* pages in relation */
- double rel_tuples; /* tuples that remain after vacuuming */
- double rel_indexed_tuples; /* indexed tuples that remain */
- Size min_tlen; /* min surviving tuple size */
- Size max_tlen; /* max surviving tuple size */
- bool hasindex;
- /* vtlinks array for tuple chain following - sorted by new_tid */
- int num_vtlinks;
- VTupleLink vtlinks;
- TransactionId latestRemovedXid;
-} VRelStats;
-
-/*----------------------------------------------------------------------
- * ExecContext:
- *
- * As these variables always appear together, we put them into one struct
- * and pull initialization and cleanup into separate routines.
- * ExecContext is used by repair_frag() and move_xxx_tuple(). More
- * accurately: It is *used* only in move_xxx_tuple(), but because this
- * routine is called many times, we initialize the struct just once in
- * repair_frag() and pass it on to move_xxx_tuple().
- */
-typedef struct ExecContextData
-{
- ResultRelInfo *resultRelInfo;
- EState *estate;
- TupleTableSlot *slot;
-} ExecContextData;
-
-typedef ExecContextData *ExecContext;
-
-static void
-ExecContext_Init(ExecContext ec, Relation rel)
-{
- TupleDesc tupdesc = RelationGetDescr(rel);
-
- /*
- * We need a ResultRelInfo and an EState so we can use the regular
- * executor's index-entry-making machinery.
- */
- ec->estate = CreateExecutorState();
-
- ec->resultRelInfo = makeNode(ResultRelInfo);
- ec->resultRelInfo->ri_RangeTableIndex = 1; /* dummy */
- ec->resultRelInfo->ri_RelationDesc = rel;
- ec->resultRelInfo->ri_TrigDesc = NULL; /* we don't fire triggers */
-
- ExecOpenIndices(ec->resultRelInfo);
-
- ec->estate->es_result_relations = ec->resultRelInfo;
- ec->estate->es_num_result_relations = 1;
- ec->estate->es_result_relation_info = ec->resultRelInfo;
-
- /* Set up a tuple slot too */
- ec->slot = MakeSingleTupleTableSlot(tupdesc);
-}
-
-static void
-ExecContext_Finish(ExecContext ec)
-{
- ExecDropSingleTupleTableSlot(ec->slot);
- ExecCloseIndices(ec->resultRelInfo);
- FreeExecutorState(ec->estate);
-}
-
-/*
- * End of ExecContext Implementation
- *----------------------------------------------------------------------
- */
/* A few variables that don't seem worth passing around as parameters */
static MemoryContext vac_context = NULL;
-
-static int elevel = -1;
-
-static TransactionId OldestXmin;
-static TransactionId FreezeLimit;
-
static BufferAccessStrategy vac_strategy;
static void vac_truncate_clog(TransactionId frozenXID);
static void vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast,
bool for_wraparound, bool *scanned_all);
-static bool full_vacuum_rel(Relation onerel, VacuumStmt *vacstmt);
-static void scan_heap(VRelStats *vacrelstats, Relation onerel,
- VacPageList vacuum_pages, VacPageList fraged_pages);
-static bool repair_frag(VRelStats *vacrelstats, Relation onerel,
- VacPageList vacuum_pages, VacPageList fraged_pages,
- int nindexes, Relation *Irel);
-static void move_chain_tuple(VRelStats *vacrelstats, Relation rel,
- Buffer old_buf, Page old_page, HeapTuple old_tup,
- Buffer dst_buf, Page dst_page, VacPage dst_vacpage,
- ExecContext ec, ItemPointer ctid, bool cleanVpd);
-static void move_plain_tuple(Relation rel,
- Buffer old_buf, Page old_page, HeapTuple old_tup,
- Buffer dst_buf, Page dst_page, VacPage dst_vacpage,
- ExecContext ec);
-static void update_hint_bits(Relation rel, VacPageList fraged_pages,
- int num_fraged_pages, BlockNumber last_move_dest_block,
- int num_moved);
-static void vacuum_heap(VRelStats *vacrelstats, Relation onerel,
- VacPageList vacpagelist);
-static void vacuum_page(VRelStats *vacrelstats, Relation onerel, Buffer buffer, VacPage vacpage);
-static void vacuum_index(VacPageList vacpagelist, Relation indrel,
- double num_tuples, int keep_tuples);
-static void scan_index(Relation indrel, double num_tuples);
-static bool tid_reaped(ItemPointer itemptr, void *state);
-static void vac_update_fsm(Relation onerel, VacPageList fraged_pages,
- BlockNumber rel_pages);
-static VacPage copy_vac_page(VacPage vacpage);
-static void vpage_insert(VacPageList vacpagelist, VacPage vpnew);
-static void *vac_bsearch(const void *key, const void *base,
- size_t nelem, size_t size,
- int (*compar) (const void *, const void *));
-static int vac_cmp_blk(const void *left, const void *right);
-static int vac_cmp_offno(const void *left, const void *right);
-static int vac_cmp_vtlinks(const void *left, const void *right);
-static bool enough_space(VacPage vacpage, Size len);
-static Size PageGetFreeSpaceWithFillFactor(Relation relation, Page page);
-
-
-/****************************************************************************
- * *
- * Code common to all flavors of VACUUM and ANALYZE *
- * *
- ****************************************************************************
- */
/*
Assert((vacstmt->options & VACOPT_VACUUM) ||
!(vacstmt->options & (VACOPT_FULL | VACOPT_FREEZE)));
Assert((vacstmt->options & VACOPT_ANALYZE) || vacstmt->va_cols == NIL);
- Assert((vacstmt->options & VACOPT_FULL) ||
- !(vacstmt->options & VACOPT_INPLACE));
stmttype = (vacstmt->options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE";
- if (vacstmt->options & VACOPT_VERBOSE)
- elevel = INFO;
- else
- elevel = DEBUG2;
-
/*
* We cannot run VACUUM inside a user transaction block; if we were inside
* a transaction, then our commit- and start-transaction-command calls
- * would not have the intended effect! Furthermore, the forced commit that
- * occurs before truncating the relation's file would have the effect of
- * committing the rest of the user's transaction too, which would
- * certainly not be the desired behavior. (This only applies to VACUUM
- * FULL, though. We could in theory run lazy VACUUM inside a transaction
- * block, but we choose to disallow that case because we'd rather commit
- * as soon as possible after finishing the vacuum. This is mainly so that
- * we can let go the AccessExclusiveLock that we may be holding.)
+ * would not have the intended effect! There are numerous other subtle
+ * dependencies on this, too.
*
* ANALYZE (without VACUUM) can run either way.
*/
* We can always ignore processes running lazy vacuum. This is because we
* use these values only for deciding which tuples we must keep in the
* tables. Since lazy vacuum doesn't write its XID anywhere, it's safe to
- * ignore it. In theory it could be problematic to ignore lazy vacuums on
+ * ignore it. In theory it could be problematic to ignore lazy vacuums in
* a full vacuum, but keep in mind that only one vacuum process can be
* working on a particular table at any time, and that each vacuum is
* always an independent transaction.
* somebody vacuuming pg_class might think they could delete a tuple
* marked with xmin = our xid.
*
- * This routine is shared by full VACUUM, lazy VACUUM, and stand-alone
- * ANALYZE.
+ * This routine is shared by VACUUM and stand-alone ANALYZE.
*/
void
vac_update_relstats(Relation relation,
* safe since the new value is correct whether or not this transaction
* commits. As with vac_update_relstats, this avoids leaving dead tuples
* behind after a VACUUM.
- *
- * This routine is shared by full and lazy VACUUM.
*/
void
vac_update_datfrozenxid(void)
* The passed XID is simply the one I just wrote into my pg_database
* entry. It's used to initialize the "min" calculation.
*
- * This routine is shared by full and lazy VACUUM. Note that it's
- * only invoked when we've managed to change our DB's datfrozenxid
- * entry, or we found that the shared XID-wrap-limit info is stale.
+ * This routine is only only invoked when we've managed to change our
+ * DB's datfrozenxid entry, or we found that the shared XID-wrap-limit
+ * info is stale.
*/
static void
vac_truncate_clog(TransactionId frozenXID)
}
-/****************************************************************************
- * *
- * Code common to both flavors of VACUUM *
- * *
- ****************************************************************************
- */
-
-
/*
* vacuum_rel() -- vacuum one heap relation
*
* In lazy vacuum, we can set the PROC_IN_VACUUM flag, which lets
* other concurrent VACUUMs know that they can ignore this one while
* determining their OldestXmin. (The reason we don't set it during a
- * full VACUUM is exactly that we may have to run user- defined
+ * full VACUUM is exactly that we may have to run user-defined
* functions for functional indexes, and we want to make sure that if
* they use the snapshot set above, any tuples it requires can't get
* removed from other tables. An index function that depends on the
save_nestlevel = NewGUCNestLevel();
/*
- * Do the actual work --- either FULL, FULL INPLACE, or "lazy" vacuum.
+ * Do the actual work --- either FULL or "lazy" vacuum
*/
- if (!(vacstmt->options & VACOPT_FULL))
- heldoff = lazy_vacuum_rel(onerel, vacstmt, vac_strategy, scanned_all);
- else if (vacstmt->options & VACOPT_INPLACE)
- heldoff = full_vacuum_rel(onerel, vacstmt);
- else
+ if (vacstmt->options & VACOPT_FULL)
{
- /* close relation before clustering, but hold lock until commit */
+ /* close relation before vacuuming, but hold lock until commit */
relation_close(onerel, NoLock);
onerel = NULL;
+ /* VACUUM FULL is now a variant of CLUSTER; see cluster.c */
cluster_rel(relid, InvalidOid, false,
(vacstmt->options & VACOPT_VERBOSE) != 0,
vacstmt->freeze_min_age, vacstmt->freeze_table_age);
heldoff = false;
}
+ else
+ heldoff = lazy_vacuum_rel(onerel, vacstmt, vac_strategy, scanned_all);
/* Roll back any GUC changes executed by index functions */
AtEOXact_GUC(false, save_nestlevel);
}
-/****************************************************************************
- * *
- * Code for VACUUM FULL (only) *
- * *
- ****************************************************************************
- */
-
-
-/*
- * full_vacuum_rel() -- perform FULL VACUUM for one heap relation
- *
- * This routine vacuums a single heap, cleans out its indexes, and
- * updates its num_pages and num_tuples statistics.
- *
- * At entry, we have already established a transaction and opened
- * and locked the relation.
- *
- * The return value indicates whether this function has held off
- * interrupts -- caller must RESUME_INTERRUPTS() after commit if true.
- */
-static bool
-full_vacuum_rel(Relation onerel, VacuumStmt *vacstmt)
-{
- VacPageListData vacuum_pages; /* List of pages to vacuum and/or
- * clean indexes */
- VacPageListData fraged_pages; /* List of pages with space enough for
- * re-using */
- Relation *Irel;
- int nindexes,
- i;
- VRelStats *vacrelstats;
- bool heldoff = false;
-
- vacuum_set_xid_limits(vacstmt->freeze_min_age, vacstmt->freeze_table_age,
- onerel->rd_rel->relisshared,
- &OldestXmin, &FreezeLimit, NULL);
-
- /*
- * Flush any previous async-commit transactions. This does not guarantee
- * that we will be able to set hint bits for tuples they inserted, but it
- * improves the probability, especially in simple sequential-commands
- * cases. See scan_heap() and repair_frag() for more about this.
- */
- XLogAsyncCommitFlush();
-
- /*
- * Set up statistics-gathering machinery.
- */
- vacrelstats = (VRelStats *) palloc(sizeof(VRelStats));
- vacrelstats->rel_pages = 0;
- vacrelstats->rel_tuples = 0;
- vacrelstats->rel_indexed_tuples = 0;
- vacrelstats->hasindex = false;
- vacrelstats->latestRemovedXid = InvalidTransactionId;
-
- /* scan the heap */
- vacuum_pages.num_pages = fraged_pages.num_pages = 0;
- scan_heap(vacrelstats, onerel, &vacuum_pages, &fraged_pages);
-
- /* Now open all indexes of the relation */
- vac_open_indexes(onerel, AccessExclusiveLock, &nindexes, &Irel);
- if (nindexes > 0)
- vacrelstats->hasindex = true;
-
- /* Clean/scan index relation(s) */
- if (Irel != NULL)
- {
- if (vacuum_pages.num_pages > 0)
- {
- for (i = 0; i < nindexes; i++)
- vacuum_index(&vacuum_pages, Irel[i],
- vacrelstats->rel_indexed_tuples, 0);
- }
- else
- {
- /* just scan indexes to update statistic */
- for (i = 0; i < nindexes; i++)
- scan_index(Irel[i], vacrelstats->rel_indexed_tuples);
- }
- }
-
- if (fraged_pages.num_pages > 0)
- {
- /* Try to shrink heap */
- heldoff = repair_frag(vacrelstats, onerel, &vacuum_pages, &fraged_pages,
- nindexes, Irel);
- vac_close_indexes(nindexes, Irel, NoLock);
- }
- else
- {
- vac_close_indexes(nindexes, Irel, NoLock);
- if (vacuum_pages.num_pages > 0)
- {
- /* Clean pages from vacuum_pages list */
- vacuum_heap(vacrelstats, onerel, &vacuum_pages);
- }
- }
-
- /* update thefree space map with final free space info, and vacuum it */
- vac_update_fsm(onerel, &fraged_pages, vacrelstats->rel_pages);
- FreeSpaceMapVacuum(onerel);
-
- /* update statistics in pg_class */
- vac_update_relstats(onerel,
- vacrelstats->rel_pages, vacrelstats->rel_tuples,
- vacrelstats->hasindex, FreezeLimit);
-
- /* report results to the stats collector, too */
- pgstat_report_vacuum(RelationGetRelid(onerel),
- onerel->rd_rel->relisshared,
- true,
- vacrelstats->rel_tuples);
-
- return heldoff;
-}
-
-
/*
- * scan_heap() -- scan an open heap relation
- *
- * This routine sets commit status bits, constructs vacuum_pages (list
- * of pages we need to compact free space on and/or clean indexes of
- * deleted tuples), constructs fraged_pages (list of pages with free
- * space that tuples could be moved into), and calculates statistics
- * on the number of live tuples in the heap.
+ * Open all the indexes of the given relation, obtaining the specified kind
+ * of lock on each. Return an array of Relation pointers for the indexes
+ * into *Irel, and the number of indexes into *nindexes.
*/
-static void
-scan_heap(VRelStats *vacrelstats, Relation onerel,
- VacPageList vacuum_pages, VacPageList fraged_pages)
+void
+vac_open_indexes(Relation relation, LOCKMODE lockmode,
+ int *nindexes, Relation **Irel)
{
- BlockNumber nblocks,
- blkno;
- char *relname;
- VacPage vacpage;
- BlockNumber empty_pages,
- empty_end_pages;
- double num_tuples,
- num_indexed_tuples,
- tups_vacuumed,
- nkeep,
- nunused;
- double free_space,
- usable_free_space;
- Size min_tlen = MaxHeapTupleSize;
- Size max_tlen = 0;
- bool do_shrinking = true;
- VTupleLink vtlinks = (VTupleLink) palloc(100 * sizeof(VTupleLinkData));
- int num_vtlinks = 0;
- int free_vtlinks = 100;
- PGRUsage ru0;
-
- pg_rusage_init(&ru0);
-
- relname = RelationGetRelationName(onerel);
- ereport(elevel,
- (errmsg("vacuuming \"%s.%s\"",
- get_namespace_name(RelationGetNamespace(onerel)),
- relname)));
-
- empty_pages = empty_end_pages = 0;
- num_tuples = num_indexed_tuples = tups_vacuumed = nkeep = nunused = 0;
- free_space = 0;
-
- nblocks = RelationGetNumberOfBlocks(onerel);
-
- /*
- * We initially create each VacPage item in a maximal-sized workspace,
- * then copy the workspace into a just-large-enough copy.
- */
- vacpage = (VacPage) palloc(sizeof(VacPageData) + MaxOffsetNumber * sizeof(OffsetNumber));
-
- for (blkno = 0; blkno < nblocks; blkno++)
- {
- Page page,
- tempPage = NULL;
- bool do_reap,
- do_frag;
- Buffer buf;
- OffsetNumber offnum,
- maxoff;
- bool notup;
- OffsetNumber frozen[MaxOffsetNumber];
- int nfrozen;
-
- vacuum_delay_point();
-
- buf = ReadBufferExtended(onerel, MAIN_FORKNUM, blkno, RBM_NORMAL,
- vac_strategy);
- page = BufferGetPage(buf);
-
- /*
- * Since we are holding exclusive lock on the relation, no other
- * backend can be accessing the page; however it is possible that the
- * background writer will try to write the page if it's already marked
- * dirty. To ensure that invalid data doesn't get written to disk, we
- * must take exclusive buffer lock wherever we potentially modify
- * pages. In fact, we insist on cleanup lock so that we can safely
- * call heap_page_prune(). (This might be overkill, since the
- * bgwriter pays no attention to individual tuples, but on the other
- * hand it's unlikely that the bgwriter has this particular page
- * pinned at this instant. So violating the coding rule would buy us
- * little anyway.)
- */
- LockBufferForCleanup(buf);
-
- vacpage->blkno = blkno;
- vacpage->offsets_used = 0;
- vacpage->offsets_free = 0;
-
- if (PageIsNew(page))
- {
- VacPage vacpagecopy;
-
- ereport(WARNING,
- (errmsg("relation \"%s\" page %u is uninitialized --- fixing",
- relname, blkno)));
- PageInit(page, BufferGetPageSize(buf), 0);
- MarkBufferDirty(buf);
- vacpage->free = PageGetFreeSpaceWithFillFactor(onerel, page);
- free_space += vacpage->free;
- empty_pages++;
- empty_end_pages++;
- vacpagecopy = copy_vac_page(vacpage);
- vpage_insert(vacuum_pages, vacpagecopy);
- vpage_insert(fraged_pages, vacpagecopy);
- UnlockReleaseBuffer(buf);
- continue;
- }
-
- if (PageIsEmpty(page))
- {
- VacPage vacpagecopy;
-
- vacpage->free = PageGetFreeSpaceWithFillFactor(onerel, page);
- free_space += vacpage->free;
- empty_pages++;
- empty_end_pages++;
- vacpagecopy = copy_vac_page(vacpage);
- vpage_insert(vacuum_pages, vacpagecopy);
- vpage_insert(fraged_pages, vacpagecopy);
- UnlockReleaseBuffer(buf);
- continue;
- }
-
- /*
- * Prune all HOT-update chains in this page.
- *
- * We use the redirect_move option so that redirecting line pointers
- * get collapsed out; this allows us to not worry about them below.
- *
- * We count tuples removed by the pruning step as removed by VACUUM.
- */
- tups_vacuumed += heap_page_prune(onerel, buf, OldestXmin,
- true, false);
-
- /*
- * Now scan the page to collect vacuumable items and check for tuples
- * requiring freezing.
- */
- nfrozen = 0;
- notup = true;
- maxoff = PageGetMaxOffsetNumber(page);
- for (offnum = FirstOffsetNumber;
- offnum <= maxoff;
- offnum = OffsetNumberNext(offnum))
- {
- ItemId itemid = PageGetItemId(page, offnum);
- bool tupgone = false;
- HeapTupleData tuple;
-
- /*
- * Collect un-used items too - it's possible to have indexes
- * pointing here after crash. (That's an ancient comment and is
- * likely obsolete with WAL, but we might as well continue to
- * check for such problems.)
- */
- if (!ItemIdIsUsed(itemid))
- {
- vacpage->offsets[vacpage->offsets_free++] = offnum;
- nunused += 1;
- continue;
- }
-
- /*
- * DEAD item pointers are to be vacuumed normally; but we don't
- * count them in tups_vacuumed, else we'd be double-counting (at
- * least in the common case where heap_page_prune() just freed up
- * a non-HOT tuple).
- */
- if (ItemIdIsDead(itemid))
- {
- vacpage->offsets[vacpage->offsets_free++] = offnum;
- continue;
- }
-
- /* Shouldn't have any redirected items anymore */
- if (!ItemIdIsNormal(itemid))
- elog(ERROR, "relation \"%s\" TID %u/%u: unexpected redirect item",
- relname, blkno, offnum);
-
- tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
- tuple.t_len = ItemIdGetLength(itemid);
- ItemPointerSet(&(tuple.t_self), blkno, offnum);
-
- switch (HeapTupleSatisfiesVacuum(tuple.t_data, OldestXmin, buf))
- {
- case HEAPTUPLE_LIVE:
- /* Tuple is good --- but let's do some validity checks */
- if (onerel->rd_rel->relhasoids &&
- !OidIsValid(HeapTupleGetOid(&tuple)))
- elog(WARNING, "relation \"%s\" TID %u/%u: OID is invalid",
- relname, blkno, offnum);
-
- /*
- * The shrinkage phase of VACUUM FULL requires that all
- * live tuples have XMIN_COMMITTED set --- see comments in
- * repair_frag()'s walk-along-page loop. Use of async
- * commit may prevent HeapTupleSatisfiesVacuum from
- * setting the bit for a recently committed tuple. Rather
- * than trying to handle this corner case, we just give up
- * and don't shrink.
- */
- if (do_shrinking &&
- !(tuple.t_data->t_infomask & HEAP_XMIN_COMMITTED))
- {
- ereport(LOG,
- (errmsg("relation \"%s\" TID %u/%u: XMIN_COMMITTED not set for transaction %u --- cannot shrink relation",
- relname, blkno, offnum,
- HeapTupleHeaderGetXmin(tuple.t_data))));
- do_shrinking = false;
- }
- break;
- case HEAPTUPLE_DEAD:
-
- /*
- * Ordinarily, DEAD tuples would have been removed by
- * heap_page_prune(), but it's possible that the tuple
- * state changed since heap_page_prune() looked. In
- * particular an INSERT_IN_PROGRESS tuple could have
- * changed to DEAD if the inserter aborted. So this
- * cannot be considered an error condition, though it does
- * suggest that someone released a lock early.
- *
- * If the tuple is HOT-updated then it must only be
- * removed by a prune operation; so we keep it as if it
- * were RECENTLY_DEAD, and abandon shrinking. (XXX is it
- * worth trying to make the shrinking code smart enough to
- * handle this? It's an unusual corner case.)
- *
- * DEAD heap-only tuples can safely be removed if they
- * aren't themselves HOT-updated, although this is a bit
- * inefficient since we'll uselessly try to remove index
- * entries for them.
- */
- if (HeapTupleIsHotUpdated(&tuple))
- {
- nkeep += 1;
- if (do_shrinking)
- ereport(LOG,
- (errmsg("relation \"%s\" TID %u/%u: dead HOT-updated tuple --- cannot shrink relation",
- relname, blkno, offnum)));
- do_shrinking = false;
- }
- else
- {
- tupgone = true; /* we can delete the tuple */
-
- /*
- * We need not require XMIN_COMMITTED or
- * XMAX_COMMITTED to be set, since we will remove the
- * tuple without any further examination of its hint
- * bits.
- */
- }
- break;
- case HEAPTUPLE_RECENTLY_DEAD:
-
- /*
- * If tuple is recently deleted then we must not remove it
- * from relation.
- */
- nkeep += 1;
-
- /*
- * As with the LIVE case, shrinkage requires
- * XMIN_COMMITTED to be set.
- */
- if (do_shrinking &&
- !(tuple.t_data->t_infomask & HEAP_XMIN_COMMITTED))
- {
- ereport(LOG,
- (errmsg("relation \"%s\" TID %u/%u: XMIN_COMMITTED not set for transaction %u --- cannot shrink relation",
- relname, blkno, offnum,
- HeapTupleHeaderGetXmin(tuple.t_data))));
- do_shrinking = false;
- }
-
- /*
- * If we do shrinking and this tuple is updated one then
- * remember it to construct updated tuple dependencies.
- */
- if (do_shrinking &&
- !(ItemPointerEquals(&(tuple.t_self),
- &(tuple.t_data->t_ctid))))
- {
- if (free_vtlinks == 0)
- {
- free_vtlinks = 1000;
- vtlinks = (VTupleLink) repalloc(vtlinks,
- (free_vtlinks + num_vtlinks) *
- sizeof(VTupleLinkData));
- }
- vtlinks[num_vtlinks].new_tid = tuple.t_data->t_ctid;
- vtlinks[num_vtlinks].this_tid = tuple.t_self;
- free_vtlinks--;
- num_vtlinks++;
- }
- break;
- case HEAPTUPLE_INSERT_IN_PROGRESS:
-
- /*
- * This should not happen, since we hold exclusive lock on
- * the relation; shouldn't we raise an error? (Actually,
- * it can happen in system catalogs, since we tend to
- * release write lock before commit there.) As above, we
- * can't apply repair_frag() if the tuple state is
- * uncertain.
- */
- if (do_shrinking)
- ereport(LOG,
- (errmsg("relation \"%s\" TID %u/%u: InsertTransactionInProgress %u --- cannot shrink relation",
- relname, blkno, offnum,
- HeapTupleHeaderGetXmin(tuple.t_data))));
- do_shrinking = false;
- break;
- case HEAPTUPLE_DELETE_IN_PROGRESS:
-
- /*
- * This should not happen, since we hold exclusive lock on
- * the relation; shouldn't we raise an error? (Actually,
- * it can happen in system catalogs, since we tend to
- * release write lock before commit there.) As above, we
- * can't apply repair_frag() if the tuple state is
- * uncertain.
- */
- if (do_shrinking)
- ereport(LOG,
- (errmsg("relation \"%s\" TID %u/%u: DeleteTransactionInProgress %u --- cannot shrink relation",
- relname, blkno, offnum,
- HeapTupleHeaderGetXmax(tuple.t_data))));
- do_shrinking = false;
- break;
- default:
- elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
- break;
- }
-
- if (tupgone)
- {
- ItemId lpp;
-
- HeapTupleHeaderAdvanceLatestRemovedXid(tuple.t_data,
- &vacrelstats->latestRemovedXid);
-
- /*
- * Here we are building a temporary copy of the page with dead
- * tuples removed. Below we will apply
- * PageRepairFragmentation to the copy, so that we can
- * determine how much space will be available after removal of
- * dead tuples. But note we are NOT changing the real page
- * yet...
- */
- if (tempPage == NULL)
- {
- Size pageSize;
-
- pageSize = PageGetPageSize(page);
- tempPage = (Page) palloc(pageSize);
- memcpy(tempPage, page, pageSize);
- }
-
- /* mark it unused on the temp page */
- lpp = PageGetItemId(tempPage, offnum);
- ItemIdSetUnused(lpp);
-
- vacpage->offsets[vacpage->offsets_free++] = offnum;
- tups_vacuumed += 1;
- }
- else
- {
- num_tuples += 1;
- if (!HeapTupleIsHeapOnly(&tuple))
- num_indexed_tuples += 1;
- notup = false;
- if (tuple.t_len < min_tlen)
- min_tlen = tuple.t_len;
- if (tuple.t_len > max_tlen)
- max_tlen = tuple.t_len;
-
- /*
- * Each non-removable tuple must be checked to see if it needs
- * freezing.
- */
- if (heap_freeze_tuple(tuple.t_data, FreezeLimit,
- InvalidBuffer))
- frozen[nfrozen++] = offnum;
- }
- } /* scan along page */
-
- if (tempPage != NULL)
- {
- /* Some tuples are removable; figure free space after removal */
- PageRepairFragmentation(tempPage);
- vacpage->free = PageGetFreeSpaceWithFillFactor(onerel, tempPage);
- pfree(tempPage);
- do_reap = true;
- }
- else
- {
- /* Just use current available space */
- vacpage->free = PageGetFreeSpaceWithFillFactor(onerel, page);
- /* Need to reap the page if it has UNUSED or DEAD line pointers */
- do_reap = (vacpage->offsets_free > 0);
- }
-
- free_space += vacpage->free;
-
- /*
- * Add the page to vacuum_pages if it requires reaping, and add it to
- * fraged_pages if it has a useful amount of free space. "Useful"
- * means enough for a minimal-sized tuple. But we don't know that
- * accurately near the start of the relation, so add pages
- * unconditionally if they have >= BLCKSZ/10 free space. Also
- * forcibly add pages with no live tuples, to avoid confusing the
- * empty_end_pages logic. (In the presence of unreasonably small
- * fillfactor, it seems possible that such pages might not pass the
- * free-space test, but they had better be in the list anyway.)
- */
- do_frag = (vacpage->free >= min_tlen || vacpage->free >= BLCKSZ / 10 ||
- notup);
-
- if (do_reap || do_frag)
- {
- VacPage vacpagecopy = copy_vac_page(vacpage);
-
- if (do_reap)
- vpage_insert(vacuum_pages, vacpagecopy);
- if (do_frag)
- vpage_insert(fraged_pages, vacpagecopy);
- }
-
- /*
- * Include the page in empty_end_pages if it will be empty after
- * vacuuming; this is to keep us from using it as a move destination.
- * Note that such pages are guaranteed to be in fraged_pages.
- */
- if (notup)
- {
- empty_pages++;
- empty_end_pages++;
- }
- else
- empty_end_pages = 0;
-
- /*
- * If we froze any tuples, mark the buffer dirty, and write a WAL
- * record recording the changes. We must log the changes to be
- * crash-safe against future truncation of CLOG.
- */
- if (nfrozen > 0)
- {
- MarkBufferDirty(buf);
- /* no XLOG for temp tables, though */
- if (!onerel->rd_istemp)
- {
- XLogRecPtr recptr;
-
- recptr = log_heap_freeze(onerel, buf, FreezeLimit,
- frozen, nfrozen);
- PageSetLSN(page, recptr);
- PageSetTLI(page, ThisTimeLineID);
- }
- }
-
- UnlockReleaseBuffer(buf);
- }
-
- pfree(vacpage);
+ List *indexoidlist;
+ ListCell *indexoidscan;
+ int i;
- /* save stats in the rel list for use later */
- vacrelstats->rel_tuples = num_tuples;
- vacrelstats->rel_indexed_tuples = num_indexed_tuples;
- vacrelstats->rel_pages = nblocks;
- if (num_tuples == 0)
- min_tlen = max_tlen = 0;
- vacrelstats->min_tlen = min_tlen;
- vacrelstats->max_tlen = max_tlen;
+ Assert(lockmode != NoLock);
- vacuum_pages->empty_end_pages = empty_end_pages;
- fraged_pages->empty_end_pages = empty_end_pages;
+ indexoidlist = RelationGetIndexList(relation);
- /*
- * Clear the fraged_pages list if we found we couldn't shrink. Else,
- * remove any "empty" end-pages from the list, and compute usable free
- * space = free space in remaining pages.
- */
- if (do_shrinking)
- {
- int i;
+ *nindexes = list_length(indexoidlist);
- Assert((BlockNumber) fraged_pages->num_pages >= empty_end_pages);
- fraged_pages->num_pages -= empty_end_pages;
- usable_free_space = 0;
- for (i = 0; i < fraged_pages->num_pages; i++)
- usable_free_space += fraged_pages->pagedesc[i]->free;
- }
+ if (*nindexes > 0)
+ *Irel = (Relation *) palloc(*nindexes * sizeof(Relation));
else
- {
- fraged_pages->num_pages = 0;
- usable_free_space = 0;
- }
+ *Irel = NULL;
- /* don't bother to save vtlinks if we will not call repair_frag */
- if (fraged_pages->num_pages > 0 && num_vtlinks > 0)
- {
- qsort((char *) vtlinks, num_vtlinks, sizeof(VTupleLinkData),
- vac_cmp_vtlinks);
- vacrelstats->vtlinks = vtlinks;
- vacrelstats->num_vtlinks = num_vtlinks;
- }
- else
+ i = 0;
+ foreach(indexoidscan, indexoidlist)
{
- vacrelstats->vtlinks = NULL;
- vacrelstats->num_vtlinks = 0;
- pfree(vtlinks);
+ Oid indexoid = lfirst_oid(indexoidscan);
+
+ (*Irel)[i++] = index_open(indexoid, lockmode);
}
- ereport(elevel,
- (errmsg("\"%s\": found %.0f removable, %.0f nonremovable row versions in %u pages",
- RelationGetRelationName(onerel),
- tups_vacuumed, num_tuples, nblocks),
- errdetail("%.0f dead row versions cannot be removed yet.\n"
- "Nonremovable row versions range from %lu to %lu bytes long.\n"
- "There were %.0f unused item pointers.\n"
- "Total free space (including removable row versions) is %.0f bytes.\n"
- "%u pages are or will become empty, including %u at the end of the table.\n"
- "%u pages containing %.0f free bytes are potential move destinations.\n"
- "%s.",
- nkeep,
- (unsigned long) min_tlen, (unsigned long) max_tlen,
- nunused,
- free_space,
- empty_pages, empty_end_pages,
- fraged_pages->num_pages, usable_free_space,
- pg_rusage_show(&ru0))));
+ list_free(indexoidlist);
}
-
/*
- * repair_frag() -- try to repair relation's fragmentation
- *
- * This routine marks dead tuples as unused and tries re-use dead space
- * by moving tuples (and inserting indexes if needed). It constructs
- * Nvacpagelist list of free-ed pages (moved tuples) and clean indexes
- * for them after committing (in hack-manner - without losing locks
- * and freeing memory!) current transaction. It truncates relation
- * if some end-blocks are gone away.
- *
- * The return value indicates whether this function has held off
- * interrupts -- caller must RESUME_INTERRUPTS() after commit if true.
+ * Release the resources acquired by vac_open_indexes. Optionally release
+ * the locks (say NoLock to keep 'em).
*/
-static bool
-repair_frag(VRelStats *vacrelstats, Relation onerel,
- VacPageList vacuum_pages, VacPageList fraged_pages,
- int nindexes, Relation *Irel)
+void
+vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
{
- TransactionId myXID = GetCurrentTransactionId();
- Buffer dst_buffer = InvalidBuffer;
- BlockNumber nblocks,
- blkno;
- BlockNumber last_move_dest_block = 0,
- last_vacuum_block;
- Page dst_page = NULL;
- ExecContextData ec;
- VacPageListData Nvacpagelist;
- VacPage dst_vacpage = NULL,
- last_vacuum_page,
- vacpage,
- *curpage;
- int i;
- int num_moved = 0,
- num_fraged_pages,
- vacuumed_pages;
- int keep_tuples = 0;
- int keep_indexed_tuples = 0;
- PGRUsage ru0;
- bool heldoff = false;
-
- pg_rusage_init(&ru0);
-
- ExecContext_Init(&ec, onerel);
-
- Nvacpagelist.num_pages = 0;
- num_fraged_pages = fraged_pages->num_pages;
- Assert((BlockNumber) vacuum_pages->num_pages >= vacuum_pages->empty_end_pages);
- vacuumed_pages = vacuum_pages->num_pages - vacuum_pages->empty_end_pages;
- if (vacuumed_pages > 0)
- {
- /* get last reaped page from vacuum_pages */
- last_vacuum_page = vacuum_pages->pagedesc[vacuumed_pages - 1];
- last_vacuum_block = last_vacuum_page->blkno;
- }
- else
- {
- last_vacuum_page = NULL;
- last_vacuum_block = InvalidBlockNumber;
- }
-
- vacpage = (VacPage) palloc(sizeof(VacPageData) + MaxOffsetNumber * sizeof(OffsetNumber));
- vacpage->offsets_used = vacpage->offsets_free = 0;
+ if (Irel == NULL)
+ return;
- /*
- * Scan pages backwards from the last nonempty page, trying to move tuples
- * down to lower pages. Quit when we reach a page that we have moved any
- * tuples onto, or the first page if we haven't moved anything, or when we
- * find a page we cannot completely empty (this last condition is handled
- * by "break" statements within the loop).
- *
- * NB: this code depends on the vacuum_pages and fraged_pages lists being
- * in order by blkno.
- */
- nblocks = vacrelstats->rel_pages;
- for (blkno = nblocks - vacuum_pages->empty_end_pages - 1;
- blkno > last_move_dest_block;
- blkno--)
+ while (nindexes--)
{
- Buffer buf;
- Page page;
- OffsetNumber offnum,
- maxoff;
- bool isempty,
- chain_tuple_moved;
-
- vacuum_delay_point();
-
- /*
- * Forget fraged_pages pages at or after this one; they're no longer
- * useful as move targets, since we only want to move down. Note that
- * since we stop the outer loop at last_move_dest_block, pages removed
- * here cannot have had anything moved onto them already.
- *
- * Also note that we don't change the stored fraged_pages list, only
- * our local variable num_fraged_pages; so the forgotten pages are
- * still available to be loaded into the free space map later.
- */
- while (num_fraged_pages > 0 &&
- fraged_pages->pagedesc[num_fraged_pages - 1]->blkno >= blkno)
- {
- Assert(fraged_pages->pagedesc[num_fraged_pages - 1]->offsets_used == 0);
- --num_fraged_pages;
- }
-
- /*
- * Process this page of relation.
- */
- buf = ReadBufferExtended(onerel, MAIN_FORKNUM, blkno, RBM_NORMAL,
- vac_strategy);
- page = BufferGetPage(buf);
-
- vacpage->offsets_free = 0;
-
- isempty = PageIsEmpty(page);
-
- /* Is the page in the vacuum_pages list? */
- if (blkno == last_vacuum_block)
- {
- if (last_vacuum_page->offsets_free > 0)
- {
- /* there are dead tuples on this page - clean them */
- Assert(!isempty);
- LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
- vacuum_page(vacrelstats, onerel, buf, last_vacuum_page);
- LockBuffer(buf, BUFFER_LOCK_UNLOCK);
- }
- else
- Assert(isempty);
- --vacuumed_pages;
- if (vacuumed_pages > 0)
- {
- /* get prev reaped page from vacuum_pages */
- last_vacuum_page = vacuum_pages->pagedesc[vacuumed_pages - 1];
- last_vacuum_block = last_vacuum_page->blkno;
- }
- else
- {
- last_vacuum_page = NULL;
- last_vacuum_block = InvalidBlockNumber;
- }
- if (isempty)
- {
- ReleaseBuffer(buf);
- continue;
- }
- }
- else
- Assert(!isempty);
-
- chain_tuple_moved = false; /* no one chain-tuple was moved off
- * this page, yet */
- vacpage->blkno = blkno;
- maxoff = PageGetMaxOffsetNumber(page);
- for (offnum = FirstOffsetNumber;
- offnum <= maxoff;
- offnum = OffsetNumberNext(offnum))
- {
- Size tuple_len;
- HeapTupleData tuple;
- ItemId itemid = PageGetItemId(page, offnum);
-
- if (!ItemIdIsUsed(itemid))
- continue;
-
- if (ItemIdIsDead(itemid))
- {
- /* just remember it for vacuum_page() */
- vacpage->offsets[vacpage->offsets_free++] = offnum;
- continue;
- }
-
- /* Shouldn't have any redirected items now */
- Assert(ItemIdIsNormal(itemid));
-
- tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
- tuple_len = tuple.t_len = ItemIdGetLength(itemid);
- ItemPointerSet(&(tuple.t_self), blkno, offnum);
-
- /* ---
- * VACUUM FULL has an exclusive lock on the relation. So
- * normally no other transaction can have pending INSERTs or
- * DELETEs in this relation. A tuple is either:
- * (a) live (XMIN_COMMITTED)
- * (b) known dead (XMIN_INVALID, or XMAX_COMMITTED and xmax
- * is visible to all active transactions)
- * (c) inserted and deleted (XMIN_COMMITTED+XMAX_COMMITTED)
- * but at least one active transaction does not see the
- * deleting transaction (ie, it's RECENTLY_DEAD)
- * (d) moved by the currently running VACUUM
- * (e) inserted or deleted by a not yet committed transaction,
- * or by a transaction we couldn't set XMIN_COMMITTED for.
- * In case (e) we wouldn't be in repair_frag() at all, because
- * scan_heap() detects those cases and shuts off shrinking.
- * We can't see case (b) here either, because such tuples were
- * already removed by vacuum_page(). Cases (a) and (c) are
- * normal and will have XMIN_COMMITTED set. Case (d) is only
- * possible if a whole tuple chain has been moved while
- * processing this or a higher numbered block.
- * ---
- */
- if (!(tuple.t_data->t_infomask & HEAP_XMIN_COMMITTED))
- {
- if (tuple.t_data->t_infomask & HEAP_MOVED_IN)
- elog(ERROR, "HEAP_MOVED_IN was not expected");
- if (!(tuple.t_data->t_infomask & HEAP_MOVED_OFF))
- elog(ERROR, "HEAP_MOVED_OFF was expected");
-
- /*
- * MOVED_OFF by another VACUUM would have caused the
- * visibility check to set XMIN_COMMITTED or XMIN_INVALID.
- */
- if (HeapTupleHeaderGetXvac(tuple.t_data) != myXID)
- elog(ERROR, "invalid XVAC in tuple header");
-
- /*
- * If this (chain) tuple is moved by me already then I have to
- * check is it in vacpage or not - i.e. is it moved while
- * cleaning this page or some previous one.
- */
-
- /* Can't we Assert(keep_tuples > 0) here? */
- if (keep_tuples == 0)
- continue;
- if (chain_tuple_moved)
- {
- /* some chains were moved while cleaning this page */
- Assert(vacpage->offsets_free > 0);
- for (i = 0; i < vacpage->offsets_free; i++)
- {
- if (vacpage->offsets[i] == offnum)
- break;
- }
- if (i >= vacpage->offsets_free) /* not found */
- {
- vacpage->offsets[vacpage->offsets_free++] = offnum;
-
- /*
- * If this is not a heap-only tuple, there must be an
- * index entry for this item which will be removed in
- * the index cleanup. Decrement the
- * keep_indexed_tuples count to remember this.
- */
- if (!HeapTupleHeaderIsHeapOnly(tuple.t_data))
- keep_indexed_tuples--;
- keep_tuples--;
- }
- }
- else
- {
- vacpage->offsets[vacpage->offsets_free++] = offnum;
-
- /*
- * If this is not a heap-only tuple, there must be an
- * index entry for this item which will be removed in the
- * index cleanup. Decrement the keep_indexed_tuples count
- * to remember this.
- */
- if (!HeapTupleHeaderIsHeapOnly(tuple.t_data))
- keep_indexed_tuples--;
- keep_tuples--;
- }
- continue;
- }
-
- /*
- * If this tuple is in a chain of tuples created in updates by
- * "recent" transactions then we have to move the whole chain of
- * tuples to other places, so that we can write new t_ctid links
- * that preserve the chain relationship.
- *
- * This test is complicated. Read it as "if tuple is a recently
- * created updated version, OR if it is an obsoleted version". (In
- * the second half of the test, we needn't make any check on XMAX
- * --- it must be recently obsoleted, else scan_heap would have
- * deemed it removable.)
- *
- * NOTE: this test is not 100% accurate: it is possible for a
- * tuple to be an updated one with recent xmin, and yet not match
- * any new_tid entry in the vtlinks list. Presumably there was
- * once a parent tuple with xmax matching the xmin, but it's
- * possible that that tuple has been removed --- for example, if
- * it had xmin = xmax and wasn't itself an updated version, then
- * HeapTupleSatisfiesVacuum would deem it removable as soon as the
- * xmin xact completes.
- *
- * To be on the safe side, we abandon the repair_frag process if
- * we cannot find the parent tuple in vtlinks. This may be overly
- * conservative; AFAICS it would be safe to move the chain.
- *
- * Also, because we distinguish DEAD and RECENTLY_DEAD tuples
- * using OldestXmin, which is a rather coarse test, it is quite
- * possible to have an update chain in which a tuple we think is
- * RECENTLY_DEAD links forward to one that is definitely DEAD. In
- * such a case the RECENTLY_DEAD tuple must actually be dead, but
- * it seems too complicated to try to make VACUUM remove it. We
- * treat each contiguous set of RECENTLY_DEAD tuples as a
- * separately movable chain, ignoring any intervening DEAD ones.
- */
- if (((tuple.t_data->t_infomask & HEAP_UPDATED) &&
- !TransactionIdPrecedes(HeapTupleHeaderGetXmin(tuple.t_data),
- OldestXmin)) ||
- (!(tuple.t_data->t_infomask & (HEAP_XMAX_INVALID |
- HEAP_IS_LOCKED)) &&
- !(ItemPointerEquals(&(tuple.t_self),
- &(tuple.t_data->t_ctid)))))
- {
- Buffer Cbuf = buf;
- bool freeCbuf = false;
- bool chain_move_failed = false;
- bool moved_target = false;
- ItemPointerData Ctid;
- HeapTupleData tp = tuple;
- Size tlen = tuple_len;
- VTupleMove vtmove;
- int num_vtmove;
- int free_vtmove;
- VacPage to_vacpage = NULL;
- int to_item = 0;
- int ti;
-
- if (dst_buffer != InvalidBuffer)
- {
- ReleaseBuffer(dst_buffer);
- dst_buffer = InvalidBuffer;
- }
-
- /* Quick exit if we have no vtlinks to search in */
- if (vacrelstats->vtlinks == NULL)
- {
- elog(DEBUG2, "parent item in update-chain not found --- cannot continue repair_frag");
- break; /* out of walk-along-page loop */
- }
-
- /*
- * If this tuple is in the begin/middle of the chain then we
- * have to move to the end of chain. As with any t_ctid
- * chase, we have to verify that each new tuple is really the
- * descendant of the tuple we came from; however, here we need
- * even more than the normal amount of paranoia. If t_ctid
- * links forward to a tuple determined to be DEAD, then
- * depending on where that tuple is, it might already have
- * been removed, and perhaps even replaced by a MOVED_IN
- * tuple. We don't want to include any DEAD tuples in the
- * chain, so we have to recheck HeapTupleSatisfiesVacuum.
- */
- while (!(tp.t_data->t_infomask & (HEAP_XMAX_INVALID |
- HEAP_IS_LOCKED)) &&
- !(ItemPointerEquals(&(tp.t_self),
- &(tp.t_data->t_ctid))))
- {
- ItemPointerData nextTid;
- TransactionId priorXmax;
- Buffer nextBuf;
- Page nextPage;
- OffsetNumber nextOffnum;
- ItemId nextItemid;
- HeapTupleHeader nextTdata;
- HTSV_Result nextTstatus;
-
- nextTid = tp.t_data->t_ctid;
- priorXmax = HeapTupleHeaderGetXmax(tp.t_data);
- /* assume block# is OK (see heap_fetch comments) */
- nextBuf = ReadBufferExtended(onerel, MAIN_FORKNUM,
- ItemPointerGetBlockNumber(&nextTid),
- RBM_NORMAL, vac_strategy);
- nextPage = BufferGetPage(nextBuf);
- /* If bogus or unused slot, assume tp is end of chain */
- nextOffnum = ItemPointerGetOffsetNumber(&nextTid);
- if (nextOffnum < FirstOffsetNumber ||
- nextOffnum > PageGetMaxOffsetNumber(nextPage))
- {
- ReleaseBuffer(nextBuf);
- break;
- }
- nextItemid = PageGetItemId(nextPage, nextOffnum);
- if (!ItemIdIsNormal(nextItemid))
- {
- ReleaseBuffer(nextBuf);
- break;
- }
- /* if not matching XMIN, assume tp is end of chain */
- nextTdata = (HeapTupleHeader) PageGetItem(nextPage,
- nextItemid);
- if (!TransactionIdEquals(HeapTupleHeaderGetXmin(nextTdata),
- priorXmax))
- {
- ReleaseBuffer(nextBuf);
- break;
- }
-
- /*
- * Must check for DEAD or MOVED_IN tuple, too. This could
- * potentially update hint bits, so we'd better hold the
- * buffer content lock.
- */
- LockBuffer(nextBuf, BUFFER_LOCK_SHARE);
- nextTstatus = HeapTupleSatisfiesVacuum(nextTdata,
- OldestXmin,
- nextBuf);
- if (nextTstatus == HEAPTUPLE_DEAD ||
- nextTstatus == HEAPTUPLE_INSERT_IN_PROGRESS)
- {
- UnlockReleaseBuffer(nextBuf);
- break;
- }
- LockBuffer(nextBuf, BUFFER_LOCK_UNLOCK);
- /* if it's MOVED_OFF we shoulda moved this one with it */
- if (nextTstatus == HEAPTUPLE_DELETE_IN_PROGRESS)
- elog(ERROR, "updated tuple is already HEAP_MOVED_OFF");
- /* OK, switch our attention to the next tuple in chain */
- tp.t_data = nextTdata;
- tp.t_self = nextTid;
- tlen = tp.t_len = ItemIdGetLength(nextItemid);
- if (freeCbuf)
- ReleaseBuffer(Cbuf);
- Cbuf = nextBuf;
- freeCbuf = true;
- }
-
- /* Set up workspace for planning the chain move */
- vtmove = (VTupleMove) palloc(100 * sizeof(VTupleMoveData));
- num_vtmove = 0;
- free_vtmove = 100;
-
- /*
- * Now, walk backwards up the chain (towards older tuples) and
- * check if all items in chain can be moved. We record all
- * the moves that need to be made in the vtmove array.
- */
- for (;;)
- {
- Buffer Pbuf;
- Page Ppage;
- ItemId Pitemid;
- HeapTupleHeader PTdata;
- VTupleLinkData vtld,
- *vtlp;
-
- /* Identify a target page to move this tuple to */
- if (to_vacpage == NULL ||
- !enough_space(to_vacpage, tlen))
- {
- for (i = 0; i < num_fraged_pages; i++)
- {
- if (enough_space(fraged_pages->pagedesc[i], tlen))
- break;
- }
-
- if (i == num_fraged_pages)
- {
- /* can't move item anywhere */
- chain_move_failed = true;
- break; /* out of check-all-items loop */
- }
- to_item = i;
- to_vacpage = fraged_pages->pagedesc[to_item];
- }
- to_vacpage->free -= MAXALIGN(tlen);
- if (to_vacpage->offsets_used >= to_vacpage->offsets_free)
- to_vacpage->free -= sizeof(ItemIdData);
- (to_vacpage->offsets_used)++;
-
- /* Add an entry to vtmove list */
- if (free_vtmove == 0)
- {
- free_vtmove = 1000;
- vtmove = (VTupleMove)
- repalloc(vtmove,
- (free_vtmove + num_vtmove) *
- sizeof(VTupleMoveData));
- }
- vtmove[num_vtmove].tid = tp.t_self;
- vtmove[num_vtmove].vacpage = to_vacpage;
- if (to_vacpage->offsets_used == 1)
- vtmove[num_vtmove].cleanVpd = true;
- else
- vtmove[num_vtmove].cleanVpd = false;
- free_vtmove--;
- num_vtmove++;
-
- /* Remember if we reached the original target tuple */
- if (ItemPointerGetBlockNumber(&tp.t_self) == blkno &&
- ItemPointerGetOffsetNumber(&tp.t_self) == offnum)
- moved_target = true;
-
- /* Done if at beginning of chain */
- if (!(tp.t_data->t_infomask & HEAP_UPDATED) ||
- TransactionIdPrecedes(HeapTupleHeaderGetXmin(tp.t_data),
- OldestXmin))
- break; /* out of check-all-items loop */
-
- /* Move to tuple with prior row version */
- vtld.new_tid = tp.t_self;
- vtlp = (VTupleLink)
- vac_bsearch((void *) &vtld,
- (void *) (vacrelstats->vtlinks),
- vacrelstats->num_vtlinks,
- sizeof(VTupleLinkData),
- vac_cmp_vtlinks);
- if (vtlp == NULL)
- {
- /* see discussion above */
- elog(DEBUG2, "parent item in update-chain not found --- cannot continue repair_frag");
- chain_move_failed = true;
- break; /* out of check-all-items loop */
- }
- tp.t_self = vtlp->this_tid;
- Pbuf = ReadBufferExtended(onerel, MAIN_FORKNUM,
- ItemPointerGetBlockNumber(&(tp.t_self)),
- RBM_NORMAL, vac_strategy);
- Ppage = BufferGetPage(Pbuf);
- Pitemid = PageGetItemId(Ppage,
- ItemPointerGetOffsetNumber(&(tp.t_self)));
- /* this can't happen since we saw tuple earlier: */
- if (!ItemIdIsNormal(Pitemid))
- elog(ERROR, "parent itemid marked as unused");
- PTdata = (HeapTupleHeader) PageGetItem(Ppage, Pitemid);
-
- /* ctid should not have changed since we saved it */
- Assert(ItemPointerEquals(&(vtld.new_tid),
- &(PTdata->t_ctid)));
-
- /*
- * Read above about cases when !ItemIdIsUsed(nextItemid)
- * (child item is removed)... Due to the fact that at the
- * moment we don't remove unuseful part of update-chain,
- * it's possible to get non-matching parent row here. Like
- * as in the case which caused this problem, we stop
- * shrinking here. I could try to find real parent row but
- * want not to do it because of real solution will be
- * implemented anyway, later, and we are too close to 6.5
- * release. - vadim 06/11/99
- */
- if ((PTdata->t_infomask & HEAP_XMAX_IS_MULTI) ||
- !(TransactionIdEquals(HeapTupleHeaderGetXmax(PTdata),
- HeapTupleHeaderGetXmin(tp.t_data))))
- {
- ReleaseBuffer(Pbuf);
- elog(DEBUG2, "too old parent tuple found --- cannot continue repair_frag");
- chain_move_failed = true;
- break; /* out of check-all-items loop */
- }
- tp.t_data = PTdata;
- tlen = tp.t_len = ItemIdGetLength(Pitemid);
- if (freeCbuf)
- ReleaseBuffer(Cbuf);
- Cbuf = Pbuf;
- freeCbuf = true;
- } /* end of check-all-items loop */
-
- if (freeCbuf)
- ReleaseBuffer(Cbuf);
- freeCbuf = false;
-
- /* Double-check that we will move the current target tuple */
- if (!moved_target && !chain_move_failed)
- {
- elog(DEBUG2, "failed to chain back to target --- cannot continue repair_frag");
- chain_move_failed = true;
- }
-
- if (chain_move_failed)
- {
- /*
- * Undo changes to offsets_used state. We don't bother
- * cleaning up the amount-free state, since we're not
- * going to do any further tuple motion.
- */
- for (i = 0; i < num_vtmove; i++)
- {
- Assert(vtmove[i].vacpage->offsets_used > 0);
- (vtmove[i].vacpage->offsets_used)--;
- }
- pfree(vtmove);
- break; /* out of walk-along-page loop */
- }
-
- /*
- * Okay, move the whole tuple chain in reverse order.
- *
- * Ctid tracks the new location of the previously-moved tuple.
- */
- ItemPointerSetInvalid(&Ctid);
- for (ti = 0; ti < num_vtmove; ti++)
- {
- VacPage destvacpage = vtmove[ti].vacpage;
- Page Cpage;
- ItemId Citemid;
-
- /* Get page to move from */
- tuple.t_self = vtmove[ti].tid;
- Cbuf = ReadBufferExtended(onerel, MAIN_FORKNUM,
- ItemPointerGetBlockNumber(&(tuple.t_self)),
- RBM_NORMAL, vac_strategy);
-
- /* Get page to move to */
- dst_buffer = ReadBufferExtended(onerel, MAIN_FORKNUM,
- destvacpage->blkno,
- RBM_NORMAL, vac_strategy);
-
- LockBuffer(dst_buffer, BUFFER_LOCK_EXCLUSIVE);
- if (dst_buffer != Cbuf)
- LockBuffer(Cbuf, BUFFER_LOCK_EXCLUSIVE);
-
- dst_page = BufferGetPage(dst_buffer);
- Cpage = BufferGetPage(Cbuf);
-
- Citemid = PageGetItemId(Cpage,
- ItemPointerGetOffsetNumber(&(tuple.t_self)));
- tuple.t_data = (HeapTupleHeader) PageGetItem(Cpage, Citemid);
- tuple_len = tuple.t_len = ItemIdGetLength(Citemid);
-
- move_chain_tuple(vacrelstats, onerel, Cbuf, Cpage, &tuple,
- dst_buffer, dst_page, destvacpage,
- &ec, &Ctid, vtmove[ti].cleanVpd);
-
- /*
- * If the tuple we are moving is a heap-only tuple, this
- * move will generate an additional index entry, so
- * increment the rel_indexed_tuples count.
- */
- if (HeapTupleHeaderIsHeapOnly(tuple.t_data))
- vacrelstats->rel_indexed_tuples++;
-
- num_moved++;
- if (destvacpage->blkno > last_move_dest_block)
- last_move_dest_block = destvacpage->blkno;
-
- /*
- * Remember that we moved tuple from the current page
- * (corresponding index tuple will be cleaned).
- */
- if (Cbuf == buf)
- vacpage->offsets[vacpage->offsets_free++] =
- ItemPointerGetOffsetNumber(&(tuple.t_self));
- else
- {
- /*
- * When we move tuple chains, we may need to move
- * tuples from a block that we haven't yet scanned in
- * the outer walk-along-the-relation loop. Note that
- * we can't be moving a tuple from a block that we
- * have already scanned because if such a tuple
- * exists, then we must have moved the chain along
- * with that tuple when we scanned that block. IOW the
- * test of (Cbuf != buf) guarantees that the tuple we
- * are looking at right now is in a block which is yet
- * to be scanned.
- *
- * We maintain two counters to correctly count the
- * moved-off tuples from blocks that are not yet
- * scanned (keep_tuples) and how many of them have
- * index pointers (keep_indexed_tuples). The main
- * reason to track the latter is to help verify that
- * indexes have the expected number of entries when
- * all the dust settles.
- */
- if (!HeapTupleHeaderIsHeapOnly(tuple.t_data))
- keep_indexed_tuples++;
- keep_tuples++;
- }
-
- ReleaseBuffer(dst_buffer);
- ReleaseBuffer(Cbuf);
- } /* end of move-the-tuple-chain loop */
-
- dst_buffer = InvalidBuffer;
- pfree(vtmove);
- chain_tuple_moved = true;
-
- /* advance to next tuple in walk-along-page loop */
- continue;
- } /* end of is-tuple-in-chain test */
-
- /* try to find new page for this tuple */
- if (dst_buffer == InvalidBuffer ||
- !enough_space(dst_vacpage, tuple_len))
- {
- if (dst_buffer != InvalidBuffer)
- {
- ReleaseBuffer(dst_buffer);
- dst_buffer = InvalidBuffer;
- }
- for (i = 0; i < num_fraged_pages; i++)
- {
- if (enough_space(fraged_pages->pagedesc[i], tuple_len))
- break;
- }
- if (i == num_fraged_pages)
- break; /* can't move item anywhere */
- dst_vacpage = fraged_pages->pagedesc[i];
- dst_buffer = ReadBufferExtended(onerel, MAIN_FORKNUM,
- dst_vacpage->blkno,
- RBM_NORMAL, vac_strategy);
- LockBuffer(dst_buffer, BUFFER_LOCK_EXCLUSIVE);
- dst_page = BufferGetPage(dst_buffer);
- /* if this page was not used before - clean it */
- if (!PageIsEmpty(dst_page) && dst_vacpage->offsets_used == 0)
- vacuum_page(vacrelstats, onerel, dst_buffer, dst_vacpage);
- }
- else
- LockBuffer(dst_buffer, BUFFER_LOCK_EXCLUSIVE);
-
- LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
-
- move_plain_tuple(onerel, buf, page, &tuple,
- dst_buffer, dst_page, dst_vacpage, &ec);
-
- /*
- * If the tuple we are moving is a heap-only tuple, this move will
- * generate an additional index entry, so increment the
- * rel_indexed_tuples count.
- */
- if (HeapTupleHeaderIsHeapOnly(tuple.t_data))
- vacrelstats->rel_indexed_tuples++;
-
- num_moved++;
- if (dst_vacpage->blkno > last_move_dest_block)
- last_move_dest_block = dst_vacpage->blkno;
-
- /*
- * Remember that we moved tuple from the current page
- * (corresponding index tuple will be cleaned).
- */
- vacpage->offsets[vacpage->offsets_free++] = offnum;
- } /* walk along page */
-
- /*
- * If we broke out of the walk-along-page loop early (ie, still have
- * offnum <= maxoff), then we failed to move some tuple off this page.
- * No point in shrinking any more, so clean up and exit the per-page
- * loop.
- */
- if (offnum < maxoff && keep_tuples > 0)
- {
- OffsetNumber off;
-
- /*
- * Fix vacpage state for any unvisited tuples remaining on page
- */
- for (off = OffsetNumberNext(offnum);
- off <= maxoff;
- off = OffsetNumberNext(off))
- {
- ItemId itemid = PageGetItemId(page, off);
- HeapTupleHeader htup;
-
- if (!ItemIdIsUsed(itemid))
- continue;
- /* Shouldn't be any DEAD or REDIRECT items anymore */
- Assert(ItemIdIsNormal(itemid));
-
- htup = (HeapTupleHeader) PageGetItem(page, itemid);
- if (htup->t_infomask & HEAP_XMIN_COMMITTED)
- continue;
-
- /*
- * See comments in the walk-along-page loop above about why
- * only MOVED_OFF tuples should be found here.
- */
- if (htup->t_infomask & HEAP_MOVED_IN)
- elog(ERROR, "HEAP_MOVED_IN was not expected");
- if (!(htup->t_infomask & HEAP_MOVED_OFF))
- elog(ERROR, "HEAP_MOVED_OFF was expected");
- if (HeapTupleHeaderGetXvac(htup) != myXID)
- elog(ERROR, "invalid XVAC in tuple header");
-
- if (chain_tuple_moved)
- {
- /* some chains were moved while cleaning this page */
- Assert(vacpage->offsets_free > 0);
- for (i = 0; i < vacpage->offsets_free; i++)
- {
- if (vacpage->offsets[i] == off)
- break;
- }
- if (i >= vacpage->offsets_free) /* not found */
- {
- vacpage->offsets[vacpage->offsets_free++] = off;
- Assert(keep_tuples > 0);
-
- /*
- * If this is not a heap-only tuple, there must be an
- * index entry for this item which will be removed in
- * the index cleanup. Decrement the
- * keep_indexed_tuples count to remember this.
- */
- if (!HeapTupleHeaderIsHeapOnly(htup))
- keep_indexed_tuples--;
- keep_tuples--;
- }
- }
- else
- {
- vacpage->offsets[vacpage->offsets_free++] = off;
- Assert(keep_tuples > 0);
- if (!HeapTupleHeaderIsHeapOnly(htup))
- keep_indexed_tuples--;
- keep_tuples--;
- }
- }
- }
-
- if (vacpage->offsets_free > 0) /* some tuples were moved */
- {
- if (chain_tuple_moved) /* else - they are ordered */
- {
- qsort((char *) (vacpage->offsets), vacpage->offsets_free,
- sizeof(OffsetNumber), vac_cmp_offno);
- }
- vpage_insert(&Nvacpagelist, copy_vac_page(vacpage));
- }
-
- ReleaseBuffer(buf);
-
- if (offnum <= maxoff)
- break; /* had to quit early, see above note */
-
- } /* walk along relation */
-
- blkno++; /* new number of blocks */
-
- if (dst_buffer != InvalidBuffer)
- {
- Assert(num_moved > 0);
- ReleaseBuffer(dst_buffer);
- }
-
- if (num_moved > 0)
- {
- /*
- * We have to commit our tuple movings before we truncate the
- * relation. Ideally we should do Commit/StartTransactionCommand
- * here, relying on the session-level table lock to protect our
- * exclusive access to the relation. However, that would require a
- * lot of extra code to close and re-open the relation, indexes, etc.
- * For now, a quick hack: record status of current transaction as
- * committed, and continue. We force the commit to be synchronous so
- * that it's down to disk before we truncate. (Note: tqual.c knows
- * that VACUUM FULL always uses sync commit, too.) The transaction
- * continues to be shown as running in the ProcArray.
- *
- * XXX This desperately needs to be revisited. Any failure after this
- * point will result in a PANIC "cannot abort transaction nnn, it was
- * already committed"! As a precaution, we prevent cancel interrupts
- * after this point to mitigate this problem; caller is responsible for
- * re-enabling them after committing the transaction.
- */
- HOLD_INTERRUPTS();
- heldoff = true;
- ForceSyncCommit();
- (void) RecordTransactionCommit(true);
- }
-
- /*
- * We are not going to move any more tuples across pages, but we still
- * need to apply vacuum_page to compact free space in the remaining pages
- * in vacuum_pages list. Note that some of these pages may also be in the
- * fraged_pages list, and may have had tuples moved onto them; if so, we
- * already did vacuum_page and needn't do it again.
- */
- for (i = 0, curpage = vacuum_pages->pagedesc;
- i < vacuumed_pages;
- i++, curpage++)
- {
- vacuum_delay_point();
-
- Assert((*curpage)->blkno < blkno);
- if ((*curpage)->offsets_used == 0)
- {
- Buffer buf;
- Page page;
-
- /* this page was not used as a move target, so must clean it */
- buf = ReadBufferExtended(onerel, MAIN_FORKNUM, (*curpage)->blkno,
- RBM_NORMAL, vac_strategy);
- LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
- page = BufferGetPage(buf);
- if (!PageIsEmpty(page))
- vacuum_page(vacrelstats, onerel, buf, *curpage);
- UnlockReleaseBuffer(buf);
- }
- }
-
- /*
- * Now scan all the pages that we moved tuples onto and update tuple
- * status bits. This is not really necessary, but will save time for
- * future transactions examining these tuples.
- */
- update_hint_bits(onerel, fraged_pages, num_fraged_pages,
- last_move_dest_block, num_moved);
-
- /*
- * It'd be cleaner to make this report at the bottom of this routine, but
- * then the rusage would double-count the second pass of index vacuuming.
- * So do it here and ignore the relatively small amount of processing that
- * occurs below.
- */
- ereport(elevel,
- (errmsg("\"%s\": moved %u row versions, truncated %u to %u pages",
- RelationGetRelationName(onerel),
- num_moved, nblocks, blkno),
- errdetail("%s.",
- pg_rusage_show(&ru0))));
-
- /*
- * Reflect the motion of system tuples to catalog cache here.
- */
- CommandCounterIncrement();
-
- if (Nvacpagelist.num_pages > 0)
- {
- /* vacuum indexes again if needed */
- if (Irel != NULL)
- {
- VacPage *vpleft,
- *vpright,
- vpsave;
-
- /* re-sort Nvacpagelist.pagedesc */
- for (vpleft = Nvacpagelist.pagedesc,
- vpright = Nvacpagelist.pagedesc + Nvacpagelist.num_pages - 1;
- vpleft < vpright; vpleft++, vpright--)
- {
- vpsave = *vpleft;
- *vpleft = *vpright;
- *vpright = vpsave;
- }
-
- /*
- * keep_tuples is the number of tuples that have been moved off a
- * page during chain moves but not been scanned over subsequently.
- * The tuple ids of these tuples are not recorded as free offsets
- * for any VacPage, so they will not be cleared from the indexes.
- * keep_indexed_tuples is the portion of these that are expected
- * to have index entries.
- */
- Assert(keep_tuples >= 0);
- for (i = 0; i < nindexes; i++)
- vacuum_index(&Nvacpagelist, Irel[i],
- vacrelstats->rel_indexed_tuples,
- keep_indexed_tuples);
- }
-
- /*
- * Clean moved-off tuples from last page in Nvacpagelist list.
- *
- * We need only do this in this one page, because higher-numbered
- * pages are going to be truncated from the relation entirely. But see
- * comments for update_hint_bits().
- */
- if (vacpage->blkno == (blkno - 1) &&
- vacpage->offsets_free > 0)
- {
- Buffer buf;
- Page page;
- OffsetNumber unused[MaxOffsetNumber];
- OffsetNumber offnum,
- maxoff;
- int uncnt = 0;
- int num_tuples = 0;
-
- buf = ReadBufferExtended(onerel, MAIN_FORKNUM, vacpage->blkno,
- RBM_NORMAL, vac_strategy);
- LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
- page = BufferGetPage(buf);
- maxoff = PageGetMaxOffsetNumber(page);
- for (offnum = FirstOffsetNumber;
- offnum <= maxoff;
- offnum = OffsetNumberNext(offnum))
- {
- ItemId itemid = PageGetItemId(page, offnum);
- HeapTupleHeader htup;
-
- if (!ItemIdIsUsed(itemid))
- continue;
- /* Shouldn't be any DEAD or REDIRECT items anymore */
- Assert(ItemIdIsNormal(itemid));
-
- htup = (HeapTupleHeader) PageGetItem(page, itemid);
- if (htup->t_infomask & HEAP_XMIN_COMMITTED)
- continue;
-
- /*
- * See comments in the walk-along-page loop above about why
- * only MOVED_OFF tuples should be found here.
- */
- if (htup->t_infomask & HEAP_MOVED_IN)
- elog(ERROR, "HEAP_MOVED_IN was not expected");
- if (!(htup->t_infomask & HEAP_MOVED_OFF))
- elog(ERROR, "HEAP_MOVED_OFF was expected");
- if (HeapTupleHeaderGetXvac(htup) != myXID)
- elog(ERROR, "invalid XVAC in tuple header");
-
- ItemIdSetUnused(itemid);
- num_tuples++;
-
- unused[uncnt++] = offnum;
- }
- Assert(vacpage->offsets_free == num_tuples);
-
- START_CRIT_SECTION();
-
- PageRepairFragmentation(page);
-
- MarkBufferDirty(buf);
-
- /* XLOG stuff */
- if (!onerel->rd_istemp)
- {
- XLogRecPtr recptr;
-
- recptr = log_heap_clean(onerel, buf,
- NULL, 0, NULL, 0,
- unused, uncnt,
- vacrelstats->latestRemovedXid, false);
- PageSetLSN(page, recptr);
- PageSetTLI(page, ThisTimeLineID);
- }
-
- END_CRIT_SECTION();
-
- UnlockReleaseBuffer(buf);
- }
-
- /* now - free new list of reaped pages */
- curpage = Nvacpagelist.pagedesc;
- for (i = 0; i < Nvacpagelist.num_pages; i++, curpage++)
- pfree(*curpage);
- pfree(Nvacpagelist.pagedesc);
- }
-
- /* Truncate relation, if needed */
- if (blkno < nblocks)
- {
- RelationTruncate(onerel, blkno);
-
- /* force relcache inval so all backends reset their rd_targblock */
- CacheInvalidateRelcache(onerel);
-
- vacrelstats->rel_pages = blkno; /* set new number of blocks */
- }
-
- /* clean up */
- pfree(vacpage);
- if (vacrelstats->vtlinks != NULL)
- pfree(vacrelstats->vtlinks);
-
- ExecContext_Finish(&ec);
-
- return heldoff;
-}
-
-/*
- * move_chain_tuple() -- move one tuple that is part of a tuple chain
- *
- * This routine moves old_tup from old_page to dst_page.
- * old_page and dst_page might be the same page.
- * On entry old_buf and dst_buf are locked exclusively, both locks (or
- * the single lock, if this is a intra-page-move) are released before
- * exit.
- *
- * Yes, a routine with ten parameters is ugly, but it's still better
- * than having these 120 lines of code in repair_frag() which is
- * already too long and almost unreadable.
- */
-static void
-move_chain_tuple(VRelStats *vacrelstats, Relation rel,
- Buffer old_buf, Page old_page, HeapTuple old_tup,
- Buffer dst_buf, Page dst_page, VacPage dst_vacpage,
- ExecContext ec, ItemPointer ctid, bool cleanVpd)
-{
- TransactionId myXID = GetCurrentTransactionId();
- HeapTupleData newtup;
- OffsetNumber newoff;
- ItemId newitemid;
- Size tuple_len = old_tup->t_len;
- bool all_visible_cleared = false;
- bool all_visible_cleared_new = false;
-
- /*
- * make a modifiable copy of the source tuple.
- */
- heap_copytuple_with_tuple(old_tup, &newtup);
-
- /*
- * register invalidation of source tuple in catcaches.
- */
- CacheInvalidateHeapTuple(rel, old_tup);
-
- /* NO EREPORT(ERROR) TILL CHANGES ARE LOGGED */
- START_CRIT_SECTION();
-
- /*
- * mark the source tuple MOVED_OFF.
- */
- old_tup->t_data->t_infomask &= ~(HEAP_XMIN_COMMITTED |
- HEAP_XMIN_INVALID |
- HEAP_MOVED_IN);
- old_tup->t_data->t_infomask |= HEAP_MOVED_OFF;
- HeapTupleHeaderSetXvac(old_tup->t_data, myXID);
-
- /*
- * If this page was not used before - clean it.
- *
- * NOTE: a nasty bug used to lurk here. It is possible for the source and
- * destination pages to be the same (since this tuple-chain member can be
- * on a page lower than the one we're currently processing in the outer
- * loop). If that's true, then after vacuum_page() the source tuple will
- * have been moved, and tuple.t_data will be pointing at garbage.
- * Therefore we must do everything that uses old_tup->t_data BEFORE this
- * step!!
- *
- * This path is different from the other callers of vacuum_page, because
- * we have already incremented the vacpage's offsets_used field to account
- * for the tuple(s) we expect to move onto the page. Therefore
- * vacuum_page's check for offsets_used == 0 is wrong. But since that's a
- * good debugging check for all other callers, we work around it here
- * rather than remove it.
- */
- if (!PageIsEmpty(dst_page) && cleanVpd)
- {
- int sv_offsets_used = dst_vacpage->offsets_used;
-
- dst_vacpage->offsets_used = 0;
- vacuum_page(vacrelstats, rel, dst_buf, dst_vacpage);
- dst_vacpage->offsets_used = sv_offsets_used;
- }
-
- /*
- * Update the state of the copied tuple, and store it on the destination
- * page. The copied tuple is never part of a HOT chain.
- */
- newtup.t_data->t_infomask &= ~(HEAP_XMIN_COMMITTED |
- HEAP_XMIN_INVALID |
- HEAP_MOVED_OFF);
- newtup.t_data->t_infomask |= HEAP_MOVED_IN;
- HeapTupleHeaderClearHotUpdated(newtup.t_data);
- HeapTupleHeaderClearHeapOnly(newtup.t_data);
- HeapTupleHeaderSetXvac(newtup.t_data, myXID);
- newoff = PageAddItem(dst_page, (Item) newtup.t_data, tuple_len,
- InvalidOffsetNumber, false, true);
- if (newoff == InvalidOffsetNumber)
- elog(PANIC, "failed to add item with len = %lu to page %u while moving tuple chain",
- (unsigned long) tuple_len, dst_vacpage->blkno);
- newitemid = PageGetItemId(dst_page, newoff);
- /* drop temporary copy, and point to the version on the dest page */
- pfree(newtup.t_data);
- newtup.t_data = (HeapTupleHeader) PageGetItem(dst_page, newitemid);
-
- ItemPointerSet(&(newtup.t_self), dst_vacpage->blkno, newoff);
-
- /*
- * Set new tuple's t_ctid pointing to itself if last tuple in chain, and
- * to next tuple in chain otherwise. (Since we move the chain in reverse
- * order, this is actually the previously processed tuple.)
- */
- if (!ItemPointerIsValid(ctid))
- newtup.t_data->t_ctid = newtup.t_self;
- else
- newtup.t_data->t_ctid = *ctid;
- *ctid = newtup.t_self;
-
- /* clear PD_ALL_VISIBLE flags */
- if (PageIsAllVisible(old_page))
- {
- all_visible_cleared = true;
- PageClearAllVisible(old_page);
- }
- if (dst_buf != old_buf && PageIsAllVisible(dst_page))
- {
- all_visible_cleared_new = true;
- PageClearAllVisible(dst_page);
- }
-
- MarkBufferDirty(dst_buf);
- if (dst_buf != old_buf)
- MarkBufferDirty(old_buf);
-
- /* XLOG stuff */
- if (!rel->rd_istemp)
- {
- XLogRecPtr recptr = log_heap_move(rel, old_buf, old_tup->t_self,
- dst_buf, &newtup,
- all_visible_cleared,
- all_visible_cleared_new);
-
- if (old_buf != dst_buf)
- {
- PageSetLSN(old_page, recptr);
- PageSetTLI(old_page, ThisTimeLineID);
- }
- PageSetLSN(dst_page, recptr);
- PageSetTLI(dst_page, ThisTimeLineID);
- }
-
- END_CRIT_SECTION();
-
- LockBuffer(dst_buf, BUFFER_LOCK_UNLOCK);
- if (dst_buf != old_buf)
- LockBuffer(old_buf, BUFFER_LOCK_UNLOCK);
-
- /* Clear bits in visibility map */
- if (all_visible_cleared)
- visibilitymap_clear(rel, BufferGetBlockNumber(old_buf));
- if (all_visible_cleared_new)
- visibilitymap_clear(rel, BufferGetBlockNumber(dst_buf));
-
- /* Create index entries for the moved tuple */
- if (ec->resultRelInfo->ri_NumIndices > 0)
- {
- ExecStoreTuple(&newtup, ec->slot, InvalidBuffer, false);
- ExecInsertIndexTuples(ec->slot, &(newtup.t_self), ec->estate, true);
- ResetPerTupleExprContext(ec->estate);
- }
-}
-
-/*
- * move_plain_tuple() -- move one tuple that is not part of a chain
- *
- * This routine moves old_tup from old_page to dst_page.
- * On entry old_buf and dst_buf are locked exclusively, both locks are
- * released before exit.
- *
- * Yes, a routine with eight parameters is ugly, but it's still better
- * than having these 90 lines of code in repair_frag() which is already
- * too long and almost unreadable.
- */
-static void
-move_plain_tuple(Relation rel,
- Buffer old_buf, Page old_page, HeapTuple old_tup,
- Buffer dst_buf, Page dst_page, VacPage dst_vacpage,
- ExecContext ec)
-{
- TransactionId myXID = GetCurrentTransactionId();
- HeapTupleData newtup;
- OffsetNumber newoff;
- ItemId newitemid;
- Size tuple_len = old_tup->t_len;
- bool all_visible_cleared = false;
- bool all_visible_cleared_new = false;
-
- /* copy tuple */
- heap_copytuple_with_tuple(old_tup, &newtup);
-
- /*
- * register invalidation of source tuple in catcaches.
- *
- * (Note: we do not need to register the copied tuple, because we are not
- * changing the tuple contents and so there cannot be any need to flush
- * negative catcache entries.)
- */
- CacheInvalidateHeapTuple(rel, old_tup);
-
- /* NO EREPORT(ERROR) TILL CHANGES ARE LOGGED */
- START_CRIT_SECTION();
-
- /*
- * Mark new tuple as MOVED_IN by me; also mark it not HOT.
- */
- newtup.t_data->t_infomask &= ~(HEAP_XMIN_COMMITTED |
- HEAP_XMIN_INVALID |
- HEAP_MOVED_OFF);
- newtup.t_data->t_infomask |= HEAP_MOVED_IN;
- HeapTupleHeaderClearHotUpdated(newtup.t_data);
- HeapTupleHeaderClearHeapOnly(newtup.t_data);
- HeapTupleHeaderSetXvac(newtup.t_data, myXID);
-
- /* add tuple to the page */
- newoff = PageAddItem(dst_page, (Item) newtup.t_data, tuple_len,
- InvalidOffsetNumber, false, true);
- if (newoff == InvalidOffsetNumber)
- elog(PANIC, "failed to add item with len = %lu to page %u (free space %lu, nusd %u, noff %u)",
- (unsigned long) tuple_len,
- dst_vacpage->blkno, (unsigned long) dst_vacpage->free,
- dst_vacpage->offsets_used, dst_vacpage->offsets_free);
- newitemid = PageGetItemId(dst_page, newoff);
- pfree(newtup.t_data);
- newtup.t_data = (HeapTupleHeader) PageGetItem(dst_page, newitemid);
- ItemPointerSet(&(newtup.t_data->t_ctid), dst_vacpage->blkno, newoff);
- newtup.t_self = newtup.t_data->t_ctid;
-
- /*
- * Mark old tuple as MOVED_OFF by me.
- */
- old_tup->t_data->t_infomask &= ~(HEAP_XMIN_COMMITTED |
- HEAP_XMIN_INVALID |
- HEAP_MOVED_IN);
- old_tup->t_data->t_infomask |= HEAP_MOVED_OFF;
- HeapTupleHeaderSetXvac(old_tup->t_data, myXID);
-
- /* clear PD_ALL_VISIBLE flags */
- if (PageIsAllVisible(old_page))
- {
- all_visible_cleared = true;
- PageClearAllVisible(old_page);
- }
- if (PageIsAllVisible(dst_page))
- {
- all_visible_cleared_new = true;
- PageClearAllVisible(dst_page);
- }
-
- MarkBufferDirty(dst_buf);
- MarkBufferDirty(old_buf);
-
- /* XLOG stuff */
- if (!rel->rd_istemp)
- {
- XLogRecPtr recptr = log_heap_move(rel, old_buf, old_tup->t_self,
- dst_buf, &newtup,
- all_visible_cleared,
- all_visible_cleared_new);
-
- PageSetLSN(old_page, recptr);
- PageSetTLI(old_page, ThisTimeLineID);
- PageSetLSN(dst_page, recptr);
- PageSetTLI(dst_page, ThisTimeLineID);
- }
-
- END_CRIT_SECTION();
-
- dst_vacpage->free = PageGetFreeSpaceWithFillFactor(rel, dst_page);
- LockBuffer(dst_buf, BUFFER_LOCK_UNLOCK);
- LockBuffer(old_buf, BUFFER_LOCK_UNLOCK);
-
- dst_vacpage->offsets_used++;
-
- /* Clear bits in visibility map */
- if (all_visible_cleared)
- visibilitymap_clear(rel, BufferGetBlockNumber(old_buf));
- if (all_visible_cleared_new)
- visibilitymap_clear(rel, BufferGetBlockNumber(dst_buf));
-
- /* insert index' tuples if needed */
- if (ec->resultRelInfo->ri_NumIndices > 0)
- {
- ExecStoreTuple(&newtup, ec->slot, InvalidBuffer, false);
- ExecInsertIndexTuples(ec->slot, &(newtup.t_self), ec->estate, true);
- ResetPerTupleExprContext(ec->estate);
- }
-}
-
-/*
- * update_hint_bits() -- update hint bits in destination pages
- *
- * Scan all the pages that we moved tuples onto and update tuple status bits.
- * This is not really necessary, but it will save time for future transactions
- * examining these tuples.
- *
- * This pass guarantees that all HEAP_MOVED_IN tuples are marked as
- * XMIN_COMMITTED, so that future tqual tests won't need to check their XVAC.
- *
- * BUT NOTICE that this code fails to clear HEAP_MOVED_OFF tuples from
- * pages that were move source pages but not move dest pages. The bulk
- * of the move source pages will be physically truncated from the relation,
- * and the last page remaining in the rel will be fixed separately in
- * repair_frag(), so the only cases where a MOVED_OFF tuple won't get its
- * hint bits updated are tuples that are moved as part of a chain and were
- * on pages that were not either move destinations nor at the end of the rel.
- * To completely ensure that no MOVED_OFF tuples remain unmarked, we'd have
- * to remember and revisit those pages too.
- *
- * One wonders whether it wouldn't be better to skip this work entirely,
- * and let the tuple status updates happen someplace that's not holding an
- * exclusive lock on the relation.
- */
-static void
-update_hint_bits(Relation rel, VacPageList fraged_pages, int num_fraged_pages,
- BlockNumber last_move_dest_block, int num_moved)
-{
- TransactionId myXID = GetCurrentTransactionId();
- int checked_moved = 0;
- int i;
- VacPage *curpage;
-
- for (i = 0, curpage = fraged_pages->pagedesc;
- i < num_fraged_pages;
- i++, curpage++)
- {
- Buffer buf;
- Page page;
- OffsetNumber max_offset;
- OffsetNumber off;
- int num_tuples = 0;
-
- vacuum_delay_point();
-
- if ((*curpage)->blkno > last_move_dest_block)
- break; /* no need to scan any further */
- if ((*curpage)->offsets_used == 0)
- continue; /* this page was never used as a move dest */
- buf = ReadBufferExtended(rel, MAIN_FORKNUM, (*curpage)->blkno,
- RBM_NORMAL, vac_strategy);
- LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
- page = BufferGetPage(buf);
- max_offset = PageGetMaxOffsetNumber(page);
- for (off = FirstOffsetNumber;
- off <= max_offset;
- off = OffsetNumberNext(off))
- {
- ItemId itemid = PageGetItemId(page, off);
- HeapTupleHeader htup;
-
- if (!ItemIdIsUsed(itemid))
- continue;
- /* Shouldn't be any DEAD or REDIRECT items anymore */
- Assert(ItemIdIsNormal(itemid));
-
- htup = (HeapTupleHeader) PageGetItem(page, itemid);
- if (htup->t_infomask & HEAP_XMIN_COMMITTED)
- continue;
-
- /*
- * Here we may see either MOVED_OFF or MOVED_IN tuples.
- */
- if (!(htup->t_infomask & HEAP_MOVED))
- elog(ERROR, "HEAP_MOVED_OFF/HEAP_MOVED_IN was expected");
- if (HeapTupleHeaderGetXvac(htup) != myXID)
- elog(ERROR, "invalid XVAC in tuple header");
-
- if (htup->t_infomask & HEAP_MOVED_IN)
- {
- htup->t_infomask |= HEAP_XMIN_COMMITTED;
- htup->t_infomask &= ~HEAP_MOVED;
- num_tuples++;
- }
- else
- htup->t_infomask |= HEAP_XMIN_INVALID;
- }
- MarkBufferDirty(buf);
- UnlockReleaseBuffer(buf);
- Assert((*curpage)->offsets_used == num_tuples);
- checked_moved += num_tuples;
- }
- Assert(num_moved == checked_moved);
-}
-
-/*
- * vacuum_heap() -- free dead tuples
- *
- * This routine marks dead tuples as unused and truncates relation
- * if there are "empty" end-blocks.
- */
-static void
-vacuum_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages)
-{
- Buffer buf;
- VacPage *vacpage;
- BlockNumber relblocks;
- int nblocks;
- int i;
-
- nblocks = vacuum_pages->num_pages;
- nblocks -= vacuum_pages->empty_end_pages; /* nothing to do with them */
-
- for (i = 0, vacpage = vacuum_pages->pagedesc; i < nblocks; i++, vacpage++)
- {
- vacuum_delay_point();
-
- if ((*vacpage)->offsets_free > 0)
- {
- buf = ReadBufferExtended(onerel, MAIN_FORKNUM, (*vacpage)->blkno,
- RBM_NORMAL, vac_strategy);
- LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
- vacuum_page(vacrelstats, onerel, buf, *vacpage);
- UnlockReleaseBuffer(buf);
- }
- }
-
- /* Truncate relation if there are some empty end-pages */
- Assert(vacrelstats->rel_pages >= vacuum_pages->empty_end_pages);
- if (vacuum_pages->empty_end_pages > 0)
- {
- relblocks = vacrelstats->rel_pages - vacuum_pages->empty_end_pages;
- ereport(elevel,
- (errmsg("\"%s\": truncated %u to %u pages",
- RelationGetRelationName(onerel),
- vacrelstats->rel_pages, relblocks)));
- RelationTruncate(onerel, relblocks);
-
- /* force relcache inval so all backends reset their rd_targblock */
- CacheInvalidateRelcache(onerel);
-
- vacrelstats->rel_pages = relblocks; /* set new number of blocks */
- }
-}
-
-/*
- * vacuum_page() -- free dead tuples on a page
- * and repair its fragmentation.
- *
- * Caller must hold pin and lock on buffer.
- */
-static void
-vacuum_page(VRelStats *vacrelstats, Relation onerel, Buffer buffer, VacPage vacpage)
-{
- Page page = BufferGetPage(buffer);
- int i;
-
- /* There shouldn't be any tuples moved onto the page yet! */
- Assert(vacpage->offsets_used == 0);
-
- START_CRIT_SECTION();
-
- for (i = 0; i < vacpage->offsets_free; i++)
- {
- ItemId itemid = PageGetItemId(page, vacpage->offsets[i]);
-
- ItemIdSetUnused(itemid);
- }
-
- PageRepairFragmentation(page);
-
- MarkBufferDirty(buffer);
-
- /* XLOG stuff */
- if (!onerel->rd_istemp)
- {
- XLogRecPtr recptr;
-
- recptr = log_heap_clean(onerel, buffer,
- NULL, 0, NULL, 0,
- vacpage->offsets, vacpage->offsets_free,
- vacrelstats->latestRemovedXid, false);
- PageSetLSN(page, recptr);
- PageSetTLI(page, ThisTimeLineID);
- }
-
- END_CRIT_SECTION();
-}
-
-/*
- * scan_index() -- scan one index relation to update pg_class statistics.
- *
- * We use this when we have no deletions to do.
- */
-static void
-scan_index(Relation indrel, double num_tuples)
-{
- IndexBulkDeleteResult *stats;
- IndexVacuumInfo ivinfo;
- PGRUsage ru0;
-
- pg_rusage_init(&ru0);
-
- ivinfo.index = indrel;
- ivinfo.vacuum_full = true;
- ivinfo.analyze_only = false;
- ivinfo.estimated_count = false;
- ivinfo.message_level = elevel;
- ivinfo.num_heap_tuples = num_tuples;
- ivinfo.strategy = vac_strategy;
-
- stats = index_vacuum_cleanup(&ivinfo, NULL);
-
- if (!stats)
- return;
-
- /*
- * Now update statistics in pg_class, but only if the index says the count
- * is accurate.
- */
- if (!stats->estimated_count)
- vac_update_relstats(indrel,
- stats->num_pages, stats->num_index_tuples,
- false, InvalidTransactionId);
-
- ereport(elevel,
- (errmsg("index \"%s\" now contains %.0f row versions in %u pages",
- RelationGetRelationName(indrel),
- stats->num_index_tuples,
- stats->num_pages),
- errdetail("%u index pages have been deleted, %u are currently reusable.\n"
- "%s.",
- stats->pages_deleted, stats->pages_free,
- pg_rusage_show(&ru0))));
-
- /*
- * Check for tuple count mismatch. If the index is partial, then it's OK
- * for it to have fewer tuples than the heap; else we got trouble.
- */
- if (!stats->estimated_count &&
- stats->num_index_tuples != num_tuples)
- {
- if (stats->num_index_tuples > num_tuples ||
- !vac_is_partial_index(indrel))
- ereport(WARNING,
- (errmsg("index \"%s\" contains %.0f row versions, but table contains %.0f row versions",
- RelationGetRelationName(indrel),
- stats->num_index_tuples, num_tuples),
- errhint("Rebuild the index with REINDEX.")));
- }
-
- pfree(stats);
-}
-
-/*
- * vacuum_index() -- vacuum one index relation.
- *
- * Vpl is the VacPageList of the heap we're currently vacuuming.
- * It's locked. Indrel is an index relation on the vacuumed heap.
- *
- * We don't bother to set locks on the index relation here, since
- * the parent table is exclusive-locked already.
- *
- * Finally, we arrange to update the index relation's statistics in
- * pg_class.
- */
-static void
-vacuum_index(VacPageList vacpagelist, Relation indrel,
- double num_tuples, int keep_tuples)
-{
- IndexBulkDeleteResult *stats;
- IndexVacuumInfo ivinfo;
- PGRUsage ru0;
-
- pg_rusage_init(&ru0);
-
- ivinfo.index = indrel;
- ivinfo.vacuum_full = true;
- ivinfo.analyze_only = false;
- ivinfo.estimated_count = false;
- ivinfo.message_level = elevel;
- ivinfo.num_heap_tuples = num_tuples + keep_tuples;
- ivinfo.strategy = vac_strategy;
-
- /* Do bulk deletion */
- stats = index_bulk_delete(&ivinfo, NULL, tid_reaped, (void *) vacpagelist);
-
- /* Do post-VACUUM cleanup */
- stats = index_vacuum_cleanup(&ivinfo, stats);
-
- if (!stats)
- return;
-
- /*
- * Now update statistics in pg_class, but only if the index says the count
- * is accurate.
- */
- if (!stats->estimated_count)
- vac_update_relstats(indrel,
- stats->num_pages, stats->num_index_tuples,
- false, InvalidTransactionId);
-
- ereport(elevel,
- (errmsg("index \"%s\" now contains %.0f row versions in %u pages",
- RelationGetRelationName(indrel),
- stats->num_index_tuples,
- stats->num_pages),
- errdetail("%.0f index row versions were removed.\n"
- "%u index pages have been deleted, %u are currently reusable.\n"
- "%s.",
- stats->tuples_removed,
- stats->pages_deleted, stats->pages_free,
- pg_rusage_show(&ru0))));
-
- /*
- * Check for tuple count mismatch. If the index is partial, then it's OK
- * for it to have fewer tuples than the heap; else we got trouble.
- */
- if (!stats->estimated_count &&
- stats->num_index_tuples != num_tuples + keep_tuples)
- {
- if (stats->num_index_tuples > num_tuples + keep_tuples ||
- !vac_is_partial_index(indrel))
- ereport(WARNING,
- (errmsg("index \"%s\" contains %.0f row versions, but table contains %.0f row versions",
- RelationGetRelationName(indrel),
- stats->num_index_tuples, num_tuples + keep_tuples),
- errhint("Rebuild the index with REINDEX.")));
- }
-
- pfree(stats);
-}
-
-/*
- * tid_reaped() -- is a particular tid reaped?
- *
- * This has the right signature to be an IndexBulkDeleteCallback.
- *
- * vacpagelist->VacPage_array is sorted in right order.
- */
-static bool
-tid_reaped(ItemPointer itemptr, void *state)
-{
- VacPageList vacpagelist = (VacPageList) state;
- OffsetNumber ioffno;
- OffsetNumber *voff;
- VacPage vp,
- *vpp;
- VacPageData vacpage;
-
- vacpage.blkno = ItemPointerGetBlockNumber(itemptr);
- ioffno = ItemPointerGetOffsetNumber(itemptr);
-
- vp = &vacpage;
- vpp = (VacPage *) vac_bsearch((void *) &vp,
- (void *) (vacpagelist->pagedesc),
- vacpagelist->num_pages,
- sizeof(VacPage),
- vac_cmp_blk);
-
- if (vpp == NULL)
- return false;
-
- /* ok - we are on a partially or fully reaped page */
- vp = *vpp;
-
- if (vp->offsets_free == 0)
- {
- /* this is EmptyPage, so claim all tuples on it are reaped!!! */
- return true;
- }
-
- voff = (OffsetNumber *) vac_bsearch((void *) &ioffno,
- (void *) (vp->offsets),
- vp->offsets_free,
- sizeof(OffsetNumber),
- vac_cmp_offno);
-
- if (voff == NULL)
- return false;
-
- /* tid is reaped */
- return true;
-}
-
-/*
- * Update the Free Space Map with the info we now have about free space in
- * the relation.
- */
-static void
-vac_update_fsm(Relation onerel, VacPageList fraged_pages,
- BlockNumber rel_pages)
-{
- int nPages = fraged_pages->num_pages;
- VacPage *pagedesc = fraged_pages->pagedesc;
- int i;
-
- for (i = 0; i < nPages; i++)
- {
- /*
- * fraged_pages may contain entries for pages that we later decided to
- * truncate from the relation; don't enter them into the free space
- * map!
- */
- if (pagedesc[i]->blkno >= rel_pages)
- break;
-
- RecordPageWithFreeSpace(onerel, pagedesc[i]->blkno, pagedesc[i]->free);
- }
-
-}
-
-/* Copy a VacPage structure */
-static VacPage
-copy_vac_page(VacPage vacpage)
-{
- VacPage newvacpage;
-
- /* allocate a VacPageData entry */
- newvacpage = (VacPage) palloc(sizeof(VacPageData) +
- vacpage->offsets_free * sizeof(OffsetNumber));
-
- /* fill it in */
- if (vacpage->offsets_free > 0)
- memcpy(newvacpage->offsets, vacpage->offsets,
- vacpage->offsets_free * sizeof(OffsetNumber));
- newvacpage->blkno = vacpage->blkno;
- newvacpage->free = vacpage->free;
- newvacpage->offsets_used = vacpage->offsets_used;
- newvacpage->offsets_free = vacpage->offsets_free;
-
- return newvacpage;
-}
-
-/*
- * Add a VacPage pointer to a VacPageList.
- *
- * As a side effect of the way that scan_heap works,
- * higher pages come after lower pages in the array
- * (and highest tid on a page is last).
- */
-static void
-vpage_insert(VacPageList vacpagelist, VacPage vpnew)
-{
-#define PG_NPAGEDESC 1024
-
- /* allocate a VacPage entry if needed */
- if (vacpagelist->num_pages == 0)
- {
- vacpagelist->pagedesc = (VacPage *) palloc(PG_NPAGEDESC * sizeof(VacPage));
- vacpagelist->num_allocated_pages = PG_NPAGEDESC;
- }
- else if (vacpagelist->num_pages >= vacpagelist->num_allocated_pages)
- {
- vacpagelist->num_allocated_pages *= 2;
- vacpagelist->pagedesc = (VacPage *) repalloc(vacpagelist->pagedesc, vacpagelist->num_allocated_pages * sizeof(VacPage));
- }
- vacpagelist->pagedesc[vacpagelist->num_pages] = vpnew;
- (vacpagelist->num_pages)++;
-}
-
-/*
- * vac_bsearch: just like standard C library routine bsearch(),
- * except that we first test to see whether the target key is outside
- * the range of the table entries. This case is handled relatively slowly
- * by the normal binary search algorithm (ie, no faster than any other key)
- * but it occurs often enough in VACUUM to be worth optimizing.
- */
-static void *
-vac_bsearch(const void *key, const void *base,
- size_t nelem, size_t size,
- int (*compar) (const void *, const void *))
-{
- int res;
- const void *last;
-
- if (nelem == 0)
- return NULL;
- res = compar(key, base);
- if (res < 0)
- return NULL;
- if (res == 0)
- return (void *) base;
- if (nelem > 1)
- {
- last = (const void *) ((const char *) base + (nelem - 1) * size);
- res = compar(key, last);
- if (res > 0)
- return NULL;
- if (res == 0)
- return (void *) last;
- }
- if (nelem <= 2)
- return NULL; /* already checked 'em all */
- return bsearch(key, base, nelem, size, compar);
-}
-
-/*
- * Comparator routines for use with qsort() and bsearch().
- */
-static int
-vac_cmp_blk(const void *left, const void *right)
-{
- BlockNumber lblk,
- rblk;
-
- lblk = (*((VacPage *) left))->blkno;
- rblk = (*((VacPage *) right))->blkno;
-
- if (lblk < rblk)
- return -1;
- if (lblk == rblk)
- return 0;
- return 1;
-}
-
-static int
-vac_cmp_offno(const void *left, const void *right)
-{
- if (*(OffsetNumber *) left < *(OffsetNumber *) right)
- return -1;
- if (*(OffsetNumber *) left == *(OffsetNumber *) right)
- return 0;
- return 1;
-}
-
-static int
-vac_cmp_vtlinks(const void *left, const void *right)
-{
- if (((VTupleLink) left)->new_tid.ip_blkid.bi_hi <
- ((VTupleLink) right)->new_tid.ip_blkid.bi_hi)
- return -1;
- if (((VTupleLink) left)->new_tid.ip_blkid.bi_hi >
- ((VTupleLink) right)->new_tid.ip_blkid.bi_hi)
- return 1;
- /* bi_hi-es are equal */
- if (((VTupleLink) left)->new_tid.ip_blkid.bi_lo <
- ((VTupleLink) right)->new_tid.ip_blkid.bi_lo)
- return -1;
- if (((VTupleLink) left)->new_tid.ip_blkid.bi_lo >
- ((VTupleLink) right)->new_tid.ip_blkid.bi_lo)
- return 1;
- /* bi_lo-es are equal */
- if (((VTupleLink) left)->new_tid.ip_posid <
- ((VTupleLink) right)->new_tid.ip_posid)
- return -1;
- if (((VTupleLink) left)->new_tid.ip_posid >
- ((VTupleLink) right)->new_tid.ip_posid)
- return 1;
- return 0;
-}
-
-
-/*
- * Open all the indexes of the given relation, obtaining the specified kind
- * of lock on each. Return an array of Relation pointers for the indexes
- * into *Irel, and the number of indexes into *nindexes.
- */
-void
-vac_open_indexes(Relation relation, LOCKMODE lockmode,
- int *nindexes, Relation **Irel)
-{
- List *indexoidlist;
- ListCell *indexoidscan;
- int i;
-
- Assert(lockmode != NoLock);
-
- indexoidlist = RelationGetIndexList(relation);
-
- *nindexes = list_length(indexoidlist);
-
- if (*nindexes > 0)
- *Irel = (Relation *) palloc(*nindexes * sizeof(Relation));
- else
- *Irel = NULL;
-
- i = 0;
- foreach(indexoidscan, indexoidlist)
- {
- Oid indexoid = lfirst_oid(indexoidscan);
-
- (*Irel)[i++] = index_open(indexoid, lockmode);
- }
-
- list_free(indexoidlist);
-}
-
-/*
- * Release the resources acquired by vac_open_indexes. Optionally release
- * the locks (say NoLock to keep 'em).
- */
-void
-vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
-{
- if (Irel == NULL)
- return;
-
- while (nindexes--)
- {
- Relation ind = Irel[nindexes];
+ Relation ind = Irel[nindexes];
index_close(ind, lockmode);
}
pfree(Irel);
}
-
-/*
- * Is an index partial (ie, could it contain fewer tuples than the heap?)
- */
-bool
-vac_is_partial_index(Relation indrel)
-{
- /*
- * If the index's AM doesn't support nulls, it's partial for our purposes
- */
- if (!indrel->rd_am->amindexnulls)
- return true;
-
- /* Otherwise, look to see if there's a partial-index predicate */
- if (!heap_attisnull(indrel->rd_indextuple, Anum_pg_index_indpred))
- return true;
-
- return false;
-}
-
-
-static bool
-enough_space(VacPage vacpage, Size len)
-{
- len = MAXALIGN(len);
-
- if (len > vacpage->free)
- return false;
-
- /* if there are free itemid(s) and len <= free_space... */
- if (vacpage->offsets_used < vacpage->offsets_free)
- return true;
-
- /* noff_used >= noff_free and so we'll have to allocate new itemid */
- if (len + sizeof(ItemIdData) <= vacpage->free)
- return true;
-
- return false;
-}
-
-static Size
-PageGetFreeSpaceWithFillFactor(Relation relation, Page page)
-{
- /*
- * It is correct to use PageGetExactFreeSpace() here, *not*
- * PageGetHeapFreeSpace(). This is because (a) we do our own, exact
- * accounting for whether line pointers must be added, and (b) we will
- * recycle any LP_DEAD line pointers before starting to add rows to a
- * page, but that may not have happened yet at the time this function is
- * applied to a page, which means PageGetHeapFreeSpace()'s protection
- * against too many line pointers on a page could fire incorrectly. We do
- * not need that protection here: since VACUUM FULL always recycles all
- * dead line pointers first, it'd be physically impossible to insert more
- * than MaxHeapTuplesPerPage tuples anyway.
- */
- Size freespace = PageGetExactFreeSpace(page);
- Size targetfree;
-
- targetfree = RelationGetTargetPageFreeSpace(relation,
- HEAP_DEFAULT_FILLFACTOR);
- if (freespace > targetfree)
- return freespace - targetfree;
- else
- return 0;
-}
-
/*
* vacuum_delay_point --- check for interrupts and cost-based delay.
*