1 /*-------------------------------------------------------------------------
4 * The postgres vacuum cleaner.
6 * This file includes the "full" version of VACUUM, as well as control code
7 * used by all three of full VACUUM, lazy VACUUM, and ANALYZE. See
8 * vacuumlazy.c and analyze.c for the rest of the code for the latter two.
11 * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
12 * Portions Copyright (c) 1994, Regents of the University of California
16 * $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.402 2010/01/02 16:57:40 momjian Exp $
18 *-------------------------------------------------------------------------
25 #include "access/clog.h"
26 #include "access/genam.h"
27 #include "access/heapam.h"
28 #include "access/transam.h"
29 #include "access/visibilitymap.h"
30 #include "access/xact.h"
31 #include "access/xlog.h"
32 #include "catalog/namespace.h"
33 #include "catalog/pg_database.h"
34 #include "catalog/pg_namespace.h"
35 #include "catalog/storage.h"
36 #include "commands/dbcommands.h"
37 #include "commands/vacuum.h"
38 #include "executor/executor.h"
39 #include "miscadmin.h"
41 #include "postmaster/autovacuum.h"
42 #include "storage/bufmgr.h"
43 #include "storage/freespace.h"
44 #include "storage/lmgr.h"
45 #include "storage/proc.h"
46 #include "storage/procarray.h"
47 #include "utils/acl.h"
48 #include "utils/builtins.h"
49 #include "utils/fmgroids.h"
50 #include "utils/guc.h"
51 #include "utils/inval.h"
52 #include "utils/lsyscache.h"
53 #include "utils/memutils.h"
54 #include "utils/pg_rusage.h"
55 #include "utils/relcache.h"
56 #include "utils/snapmgr.h"
57 #include "utils/syscache.h"
58 #include "utils/tqual.h"
64 int vacuum_freeze_min_age;
65 int vacuum_freeze_table_age;
68 * VacPage structures keep track of each page on which we find useful
69 * amounts of free space.
71 typedef struct VacPageData
73 BlockNumber blkno; /* BlockNumber of this Page */
74 Size free; /* FreeSpace on this Page */
75 uint16 offsets_used; /* Number of OffNums used by vacuum */
76 uint16 offsets_free; /* Number of OffNums free or to be free */
77 OffsetNumber offsets[1]; /* Array of free OffNums */
80 typedef VacPageData *VacPage;
82 typedef struct VacPageListData
84 BlockNumber empty_end_pages; /* Number of "empty" end-pages */
85 int num_pages; /* Number of pages in pagedesc */
86 int num_allocated_pages; /* Number of allocated pages in
88 VacPage *pagedesc; /* Descriptions of pages */
91 typedef VacPageListData *VacPageList;
94 * The "vtlinks" array keeps information about each recently-updated tuple
95 * ("recent" meaning its XMAX is too new to let us recycle the tuple).
96 * We store the tuple's own TID as well as its t_ctid (its link to the next
97 * newer tuple version). Searching in this array allows us to follow update
98 * chains backwards from newer to older tuples. When we move a member of an
99 * update chain, we must move *all* the live members of the chain, so that we
100 * can maintain their t_ctid link relationships (we must not just overwrite
101 * t_ctid in an existing tuple).
103 * Note: because t_ctid links can be stale (this would only occur if a prior
104 * VACUUM crashed partway through), it is possible that new_tid points to an
105 * empty slot or unrelated tuple. We have to check the linkage as we follow
106 * it, just as is done in EvalPlanQualFetch.
108 typedef struct VTupleLinkData
110 ItemPointerData new_tid; /* t_ctid of an updated tuple */
111 ItemPointerData this_tid; /* t_self of the tuple */
114 typedef VTupleLinkData *VTupleLink;
117 * We use an array of VTupleMoveData to plan a chain tuple move fully
120 typedef struct VTupleMoveData
122 ItemPointerData tid; /* tuple ID */
123 VacPage vacpage; /* where to move it to */
124 bool cleanVpd; /* clean vacpage before using? */
127 typedef VTupleMoveData *VTupleMove;
130 * VRelStats contains the data acquired by scan_heap for use later
132 typedef struct VRelStats
134 /* miscellaneous statistics */
135 BlockNumber rel_pages; /* pages in relation */
136 double rel_tuples; /* tuples that remain after vacuuming */
137 double rel_indexed_tuples; /* indexed tuples that remain */
138 Size min_tlen; /* min surviving tuple size */
139 Size max_tlen; /* max surviving tuple size */
141 /* vtlinks array for tuple chain following - sorted by new_tid */
144 TransactionId latestRemovedXid;
147 /*----------------------------------------------------------------------
150 * As these variables always appear together, we put them into one struct
151 * and pull initialization and cleanup into separate routines.
152 * ExecContext is used by repair_frag() and move_xxx_tuple(). More
153 * accurately: It is *used* only in move_xxx_tuple(), but because this
154 * routine is called many times, we initialize the struct just once in
155 * repair_frag() and pass it on to move_xxx_tuple().
157 typedef struct ExecContextData
159 ResultRelInfo *resultRelInfo;
161 TupleTableSlot *slot;
164 typedef ExecContextData *ExecContext;
167 ExecContext_Init(ExecContext ec, Relation rel)
169 TupleDesc tupdesc = RelationGetDescr(rel);
172 * We need a ResultRelInfo and an EState so we can use the regular
173 * executor's index-entry-making machinery.
175 ec->estate = CreateExecutorState();
177 ec->resultRelInfo = makeNode(ResultRelInfo);
178 ec->resultRelInfo->ri_RangeTableIndex = 1; /* dummy */
179 ec->resultRelInfo->ri_RelationDesc = rel;
180 ec->resultRelInfo->ri_TrigDesc = NULL; /* we don't fire triggers */
182 ExecOpenIndices(ec->resultRelInfo);
184 ec->estate->es_result_relations = ec->resultRelInfo;
185 ec->estate->es_num_result_relations = 1;
186 ec->estate->es_result_relation_info = ec->resultRelInfo;
188 /* Set up a tuple slot too */
189 ec->slot = MakeSingleTupleTableSlot(tupdesc);
193 ExecContext_Finish(ExecContext ec)
195 ExecDropSingleTupleTableSlot(ec->slot);
196 ExecCloseIndices(ec->resultRelInfo);
197 FreeExecutorState(ec->estate);
201 * End of ExecContext Implementation
202 *----------------------------------------------------------------------
205 /* A few variables that don't seem worth passing around as parameters */
206 static MemoryContext vac_context = NULL;
208 static int elevel = -1;
210 static TransactionId OldestXmin;
211 static TransactionId FreezeLimit;
213 static BufferAccessStrategy vac_strategy;
216 /* non-export function prototypes */
217 static List *get_rel_oids(Oid relid, const RangeVar *vacrel,
218 const char *stmttype);
219 static void vac_truncate_clog(TransactionId frozenXID);
220 static void vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast,
221 bool for_wraparound, bool *scanned_all);
222 static bool full_vacuum_rel(Relation onerel, VacuumStmt *vacstmt);
223 static void scan_heap(VRelStats *vacrelstats, Relation onerel,
224 VacPageList vacuum_pages, VacPageList fraged_pages);
225 static bool repair_frag(VRelStats *vacrelstats, Relation onerel,
226 VacPageList vacuum_pages, VacPageList fraged_pages,
227 int nindexes, Relation *Irel);
228 static void move_chain_tuple(VRelStats *vacrelstats, Relation rel,
229 Buffer old_buf, Page old_page, HeapTuple old_tup,
230 Buffer dst_buf, Page dst_page, VacPage dst_vacpage,
231 ExecContext ec, ItemPointer ctid, bool cleanVpd);
232 static void move_plain_tuple(Relation rel,
233 Buffer old_buf, Page old_page, HeapTuple old_tup,
234 Buffer dst_buf, Page dst_page, VacPage dst_vacpage,
236 static void update_hint_bits(Relation rel, VacPageList fraged_pages,
237 int num_fraged_pages, BlockNumber last_move_dest_block,
239 static void vacuum_heap(VRelStats *vacrelstats, Relation onerel,
240 VacPageList vacpagelist);
241 static void vacuum_page(VRelStats *vacrelstats, Relation onerel, Buffer buffer, VacPage vacpage);
242 static void vacuum_index(VacPageList vacpagelist, Relation indrel,
243 double num_tuples, int keep_tuples);
244 static void scan_index(Relation indrel, double num_tuples);
245 static bool tid_reaped(ItemPointer itemptr, void *state);
246 static void vac_update_fsm(Relation onerel, VacPageList fraged_pages,
247 BlockNumber rel_pages);
248 static VacPage copy_vac_page(VacPage vacpage);
249 static void vpage_insert(VacPageList vacpagelist, VacPage vpnew);
250 static void *vac_bsearch(const void *key, const void *base,
251 size_t nelem, size_t size,
252 int (*compar) (const void *, const void *));
253 static int vac_cmp_blk(const void *left, const void *right);
254 static int vac_cmp_offno(const void *left, const void *right);
255 static int vac_cmp_vtlinks(const void *left, const void *right);
256 static bool enough_space(VacPage vacpage, Size len);
257 static Size PageGetFreeSpaceWithFillFactor(Relation relation, Page page);
260 /****************************************************************************
262 * Code common to all flavors of VACUUM and ANALYZE *
264 ****************************************************************************
269 * Primary entry point for VACUUM and ANALYZE commands.
271 * relid is normally InvalidOid; if it is not, then it provides the relation
272 * OID to be processed, and vacstmt->relation is ignored. (The non-invalid
273 * case is currently only used by autovacuum.)
275 * do_toast is passed as FALSE by autovacuum, because it processes TOAST
278 * for_wraparound is used by autovacuum to let us know when it's forcing
279 * a vacuum for wraparound, which should not be auto-cancelled.
281 * bstrategy is normally given as NULL, but in autovacuum it can be passed
282 * in to use the same buffer strategy object across multiple vacuum() calls.
284 * isTopLevel should be passed down from ProcessUtility.
286 * It is the caller's responsibility that vacstmt and bstrategy
287 * (if given) be allocated in a memory context that won't disappear
288 * at transaction commit.
291 vacuum(VacuumStmt *vacstmt, Oid relid, bool do_toast,
292 BufferAccessStrategy bstrategy, bool for_wraparound, bool isTopLevel)
294 const char *stmttype;
295 volatile bool all_rels,
300 /* sanity checks on options */
301 Assert(vacstmt->options & (VACOPT_VACUUM | VACOPT_ANALYZE));
302 Assert((vacstmt->options & VACOPT_VACUUM) ||
303 !(vacstmt->options & (VACOPT_FULL | VACOPT_FREEZE)));
304 Assert((vacstmt->options & VACOPT_ANALYZE) || vacstmt->va_cols == NIL);
306 stmttype = (vacstmt->options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE";
308 if (vacstmt->options & VACOPT_VERBOSE)
314 * We cannot run VACUUM inside a user transaction block; if we were inside
315 * a transaction, then our commit- and start-transaction-command calls
316 * would not have the intended effect! Furthermore, the forced commit that
317 * occurs before truncating the relation's file would have the effect of
318 * committing the rest of the user's transaction too, which would
319 * certainly not be the desired behavior. (This only applies to VACUUM
320 * FULL, though. We could in theory run lazy VACUUM inside a transaction
321 * block, but we choose to disallow that case because we'd rather commit
322 * as soon as possible after finishing the vacuum. This is mainly so that
323 * we can let go the AccessExclusiveLock that we may be holding.)
325 * ANALYZE (without VACUUM) can run either way.
327 if (vacstmt->options & VACOPT_VACUUM)
329 PreventTransactionChain(isTopLevel, stmttype);
330 in_outer_xact = false;
333 in_outer_xact = IsInTransactionChain(isTopLevel);
336 * Send info about dead objects to the statistics collector, unless we are
337 * in autovacuum --- autovacuum.c does this for itself.
339 if ((vacstmt->options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess())
340 pgstat_vacuum_stat();
343 * Create special memory context for cross-transaction storage.
345 * Since it is a child of PortalContext, it will go away eventually even
346 * if we suffer an error; there's no need for special abort cleanup logic.
348 vac_context = AllocSetContextCreate(PortalContext,
350 ALLOCSET_DEFAULT_MINSIZE,
351 ALLOCSET_DEFAULT_INITSIZE,
352 ALLOCSET_DEFAULT_MAXSIZE);
355 * If caller didn't give us a buffer strategy object, make one in the
356 * cross-transaction memory context.
358 if (bstrategy == NULL)
360 MemoryContext old_context = MemoryContextSwitchTo(vac_context);
362 bstrategy = GetAccessStrategy(BAS_VACUUM);
363 MemoryContextSwitchTo(old_context);
365 vac_strategy = bstrategy;
367 /* Remember whether we are processing everything in the DB */
368 all_rels = (!OidIsValid(relid) && vacstmt->relation == NULL);
371 * Build list of relations to process, unless caller gave us one. (If we
372 * build one, we put it in vac_context for safekeeping.)
374 relations = get_rel_oids(relid, vacstmt->relation, stmttype);
377 * Decide whether we need to start/commit our own transactions.
379 * For VACUUM (with or without ANALYZE): always do so, so that we can
380 * release locks as soon as possible. (We could possibly use the outer
381 * transaction for a one-table VACUUM, but handling TOAST tables would be
384 * For ANALYZE (no VACUUM): if inside a transaction block, we cannot
385 * start/commit our own transactions. Also, there's no need to do so if
386 * only processing one relation. For multiple relations when not within a
387 * transaction block, and also in an autovacuum worker, use own
388 * transactions so we can release locks sooner.
390 if (vacstmt->options & VACOPT_VACUUM)
391 use_own_xacts = true;
394 Assert(vacstmt->options & VACOPT_ANALYZE);
395 if (IsAutoVacuumWorkerProcess())
396 use_own_xacts = true;
397 else if (in_outer_xact)
398 use_own_xacts = false;
399 else if (list_length(relations) > 1)
400 use_own_xacts = true;
402 use_own_xacts = false;
406 * vacuum_rel expects to be entered with no transaction active; it will
407 * start and commit its own transaction. But we are called by an SQL
408 * command, and so we are executing inside a transaction already. We
409 * commit the transaction started in PostgresMain() here, and start
410 * another one before exiting to match the commit waiting for us back in
415 /* ActiveSnapshot is not set by autovacuum */
416 if (ActiveSnapshotSet())
419 /* matches the StartTransaction in PostgresMain() */
420 CommitTransactionCommand();
423 /* Turn vacuum cost accounting on or off */
428 VacuumCostActive = (VacuumCostDelay > 0);
429 VacuumCostBalance = 0;
432 * Loop to process each selected relation.
434 foreach(cur, relations)
436 Oid relid = lfirst_oid(cur);
437 bool scanned_all = false;
439 if (vacstmt->options & VACOPT_VACUUM)
440 vacuum_rel(relid, vacstmt, do_toast, for_wraparound,
443 if (vacstmt->options & VACOPT_ANALYZE)
446 * If using separate xacts, start one for analyze. Otherwise,
447 * we can use the outer transaction.
451 StartTransactionCommand();
452 /* functions in indexes may want a snapshot set */
453 PushActiveSnapshot(GetTransactionSnapshot());
456 analyze_rel(relid, vacstmt, vac_strategy, !scanned_all);
461 CommitTransactionCommand();
468 /* Make sure cost accounting is turned off after error */
469 VacuumCostActive = false;
474 /* Turn off vacuum cost accounting */
475 VacuumCostActive = false;
478 * Finish up processing.
482 /* here, we are not in a transaction */
485 * This matches the CommitTransaction waiting for us in
488 StartTransactionCommand();
491 if ((vacstmt->options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess())
494 * Update pg_database.datfrozenxid, and truncate pg_clog if possible.
495 * (autovacuum.c does this for itself.)
497 vac_update_datfrozenxid();
501 * Clean up working storage --- note we must do this after
502 * StartTransactionCommand, else we might be trying to delete the active
505 MemoryContextDelete(vac_context);
510 * Build a list of Oids for each relation to be processed
512 * The list is built in vac_context so that it will survive across our
513 * per-relation transactions.
516 get_rel_oids(Oid relid, const RangeVar *vacrel, const char *stmttype)
518 List *oid_list = NIL;
519 MemoryContext oldcontext;
521 /* OID supplied by VACUUM's caller? */
522 if (OidIsValid(relid))
524 oldcontext = MemoryContextSwitchTo(vac_context);
525 oid_list = lappend_oid(oid_list, relid);
526 MemoryContextSwitchTo(oldcontext);
530 /* Process a specific relation */
533 relid = RangeVarGetRelid(vacrel, false);
535 /* Make a relation list entry for this guy */
536 oldcontext = MemoryContextSwitchTo(vac_context);
537 oid_list = lappend_oid(oid_list, relid);
538 MemoryContextSwitchTo(oldcontext);
542 /* Process all plain relations listed in pg_class */
549 Anum_pg_class_relkind,
550 BTEqualStrategyNumber, F_CHAREQ,
551 CharGetDatum(RELKIND_RELATION));
553 pgclass = heap_open(RelationRelationId, AccessShareLock);
555 scan = heap_beginscan(pgclass, SnapshotNow, 1, &key);
557 while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
559 /* Make a relation list entry for this guy */
560 oldcontext = MemoryContextSwitchTo(vac_context);
561 oid_list = lappend_oid(oid_list, HeapTupleGetOid(tuple));
562 MemoryContextSwitchTo(oldcontext);
566 heap_close(pgclass, AccessShareLock);
573 * vacuum_set_xid_limits() -- compute oldest-Xmin and freeze cutoff points
576 vacuum_set_xid_limits(int freeze_min_age,
577 int freeze_table_age,
579 TransactionId *oldestXmin,
580 TransactionId *freezeLimit,
581 TransactionId *freezeTableLimit)
585 TransactionId safeLimit;
588 * We can always ignore processes running lazy vacuum. This is because we
589 * use these values only for deciding which tuples we must keep in the
590 * tables. Since lazy vacuum doesn't write its XID anywhere, it's safe to
591 * ignore it. In theory it could be problematic to ignore lazy vacuums on
592 * a full vacuum, but keep in mind that only one vacuum process can be
593 * working on a particular table at any time, and that each vacuum is
594 * always an independent transaction.
596 *oldestXmin = GetOldestXmin(sharedRel, true);
598 Assert(TransactionIdIsNormal(*oldestXmin));
601 * Determine the minimum freeze age to use: as specified by the caller, or
602 * vacuum_freeze_min_age, but in any case not more than half
603 * autovacuum_freeze_max_age, so that autovacuums to prevent XID
604 * wraparound won't occur too frequently.
606 freezemin = freeze_min_age;
608 freezemin = vacuum_freeze_min_age;
609 freezemin = Min(freezemin, autovacuum_freeze_max_age / 2);
610 Assert(freezemin >= 0);
613 * Compute the cutoff XID, being careful not to generate a "permanent" XID
615 limit = *oldestXmin - freezemin;
616 if (!TransactionIdIsNormal(limit))
617 limit = FirstNormalTransactionId;
620 * If oldestXmin is very far back (in practice, more than
621 * autovacuum_freeze_max_age / 2 XIDs old), complain and force a minimum
622 * freeze age of zero.
624 safeLimit = ReadNewTransactionId() - autovacuum_freeze_max_age;
625 if (!TransactionIdIsNormal(safeLimit))
626 safeLimit = FirstNormalTransactionId;
628 if (TransactionIdPrecedes(limit, safeLimit))
631 (errmsg("oldest xmin is far in the past"),
632 errhint("Close open transactions soon to avoid wraparound problems.")));
636 *freezeLimit = limit;
638 if (freezeTableLimit != NULL)
643 * Determine the table freeze age to use: as specified by the caller,
644 * or vacuum_freeze_table_age, but in any case not more than
645 * autovacuum_freeze_max_age * 0.95, so that if you have e.g nightly
646 * VACUUM schedule, the nightly VACUUM gets a chance to freeze tuples
647 * before anti-wraparound autovacuum is launched.
649 freezetable = freeze_min_age;
651 freezetable = vacuum_freeze_table_age;
652 freezetable = Min(freezetable, autovacuum_freeze_max_age * 0.95);
653 Assert(freezetable >= 0);
656 * Compute the cutoff XID, being careful not to generate a "permanent"
659 limit = ReadNewTransactionId() - freezetable;
660 if (!TransactionIdIsNormal(limit))
661 limit = FirstNormalTransactionId;
663 *freezeTableLimit = limit;
669 * vac_update_relstats() -- update statistics for one relation
671 * Update the whole-relation statistics that are kept in its pg_class
672 * row. There are additional stats that will be updated if we are
673 * doing ANALYZE, but we always update these stats. This routine works
674 * for both index and heap relation entries in pg_class.
676 * We violate transaction semantics here by overwriting the rel's
677 * existing pg_class tuple with the new values. This is reasonably
678 * safe since the new values are correct whether or not this transaction
679 * commits. The reason for this is that if we updated these tuples in
680 * the usual way, vacuuming pg_class itself wouldn't work very well ---
681 * by the time we got done with a vacuum cycle, most of the tuples in
682 * pg_class would've been obsoleted. Of course, this only works for
683 * fixed-size never-null columns, but these are.
685 * Note another assumption: that two VACUUMs/ANALYZEs on a table can't
686 * run in parallel, nor can VACUUM/ANALYZE run in parallel with a
687 * schema alteration such as adding an index, rule, or trigger. Otherwise
688 * our updates of relhasindex etc might overwrite uncommitted updates.
690 * Another reason for doing it this way is that when we are in a lazy
691 * VACUUM and have PROC_IN_VACUUM set, we mustn't do any updates ---
692 * somebody vacuuming pg_class might think they could delete a tuple
693 * marked with xmin = our xid.
695 * This routine is shared by full VACUUM, lazy VACUUM, and stand-alone
699 vac_update_relstats(Relation relation,
700 BlockNumber num_pages, double num_tuples,
701 bool hasindex, TransactionId frozenxid)
703 Oid relid = RelationGetRelid(relation);
706 Form_pg_class pgcform;
709 rd = heap_open(RelationRelationId, RowExclusiveLock);
711 /* Fetch a copy of the tuple to scribble on */
712 ctup = SearchSysCacheCopy(RELOID,
713 ObjectIdGetDatum(relid),
715 if (!HeapTupleIsValid(ctup))
716 elog(ERROR, "pg_class entry for relid %u vanished during vacuuming",
718 pgcform = (Form_pg_class) GETSTRUCT(ctup);
720 /* Apply required updates, if any, to copied tuple */
723 if (pgcform->relpages != (int32) num_pages)
725 pgcform->relpages = (int32) num_pages;
728 if (pgcform->reltuples != (float4) num_tuples)
730 pgcform->reltuples = (float4) num_tuples;
733 if (pgcform->relhasindex != hasindex)
735 pgcform->relhasindex = hasindex;
740 * If we have discovered that there are no indexes, then there's no
741 * primary key either, nor any exclusion constraints. This could be done
746 if (pgcform->relhaspkey)
748 pgcform->relhaspkey = false;
751 if (pgcform->relhasexclusion && pgcform->relkind != RELKIND_INDEX)
753 pgcform->relhasexclusion = false;
758 /* We also clear relhasrules and relhastriggers if needed */
759 if (pgcform->relhasrules && relation->rd_rules == NULL)
761 pgcform->relhasrules = false;
764 if (pgcform->relhastriggers && relation->trigdesc == NULL)
766 pgcform->relhastriggers = false;
771 * relfrozenxid should never go backward. Caller can pass
772 * InvalidTransactionId if it has no new data.
774 if (TransactionIdIsNormal(frozenxid) &&
775 TransactionIdPrecedes(pgcform->relfrozenxid, frozenxid))
777 pgcform->relfrozenxid = frozenxid;
781 /* If anything changed, write out the tuple. */
783 heap_inplace_update(rd, ctup);
785 heap_close(rd, RowExclusiveLock);
790 * vac_update_datfrozenxid() -- update pg_database.datfrozenxid for our DB
792 * Update pg_database's datfrozenxid entry for our database to be the
793 * minimum of the pg_class.relfrozenxid values. If we are able to
794 * advance pg_database.datfrozenxid, also try to truncate pg_clog.
796 * We violate transaction semantics here by overwriting the database's
797 * existing pg_database tuple with the new value. This is reasonably
798 * safe since the new value is correct whether or not this transaction
799 * commits. As with vac_update_relstats, this avoids leaving dead tuples
800 * behind after a VACUUM.
802 * This routine is shared by full and lazy VACUUM.
805 vac_update_datfrozenxid(void)
808 Form_pg_database dbform;
812 TransactionId newFrozenXid;
816 * Initialize the "min" calculation with GetOldestXmin, which is a
817 * reasonable approximation to the minimum relfrozenxid for not-yet-
818 * committed pg_class entries for new tables; see AddNewRelationTuple().
819 * Se we cannot produce a wrong minimum by starting with this.
821 newFrozenXid = GetOldestXmin(true, true);
824 * We must seqscan pg_class to find the minimum Xid, because there is no
825 * index that can help us here.
827 relation = heap_open(RelationRelationId, AccessShareLock);
829 scan = systable_beginscan(relation, InvalidOid, false,
830 SnapshotNow, 0, NULL);
832 while ((classTup = systable_getnext(scan)) != NULL)
834 Form_pg_class classForm = (Form_pg_class) GETSTRUCT(classTup);
837 * Only consider heap and TOAST tables (anything else should have
838 * InvalidTransactionId in relfrozenxid anyway.)
840 if (classForm->relkind != RELKIND_RELATION &&
841 classForm->relkind != RELKIND_TOASTVALUE)
844 Assert(TransactionIdIsNormal(classForm->relfrozenxid));
846 if (TransactionIdPrecedes(classForm->relfrozenxid, newFrozenXid))
847 newFrozenXid = classForm->relfrozenxid;
850 /* we're done with pg_class */
851 systable_endscan(scan);
852 heap_close(relation, AccessShareLock);
854 Assert(TransactionIdIsNormal(newFrozenXid));
856 /* Now fetch the pg_database tuple we need to update. */
857 relation = heap_open(DatabaseRelationId, RowExclusiveLock);
859 /* Fetch a copy of the tuple to scribble on */
860 tuple = SearchSysCacheCopy(DATABASEOID,
861 ObjectIdGetDatum(MyDatabaseId),
863 if (!HeapTupleIsValid(tuple))
864 elog(ERROR, "could not find tuple for database %u", MyDatabaseId);
865 dbform = (Form_pg_database) GETSTRUCT(tuple);
868 * Don't allow datfrozenxid to go backward (probably can't happen anyway);
869 * and detect the common case where it doesn't go forward either.
871 if (TransactionIdPrecedes(dbform->datfrozenxid, newFrozenXid))
873 dbform->datfrozenxid = newFrozenXid;
878 heap_inplace_update(relation, tuple);
880 heap_freetuple(tuple);
881 heap_close(relation, RowExclusiveLock);
884 * If we were able to advance datfrozenxid, see if we can truncate pg_clog.
885 * Also do it if the shared XID-wrap-limit info is stale, since this
886 * action will update that too.
888 if (dirty || ForceTransactionIdLimitUpdate())
889 vac_truncate_clog(newFrozenXid);
894 * vac_truncate_clog() -- attempt to truncate the commit log
896 * Scan pg_database to determine the system-wide oldest datfrozenxid,
897 * and use it to truncate the transaction commit log (pg_clog).
898 * Also update the XID wrap limit info maintained by varsup.c.
900 * The passed XID is simply the one I just wrote into my pg_database
901 * entry. It's used to initialize the "min" calculation.
903 * This routine is shared by full and lazy VACUUM. Note that it's
904 * only invoked when we've managed to change our DB's datfrozenxid
905 * entry, or we found that the shared XID-wrap-limit info is stale.
908 vac_truncate_clog(TransactionId frozenXID)
910 TransactionId myXID = GetCurrentTransactionId();
915 bool frozenAlreadyWrapped = false;
917 /* init oldest_datoid to sync with my frozenXID */
918 oldest_datoid = MyDatabaseId;
921 * Scan pg_database to compute the minimum datfrozenxid
923 * Note: we need not worry about a race condition with new entries being
924 * inserted by CREATE DATABASE. Any such entry will have a copy of some
925 * existing DB's datfrozenxid, and that source DB cannot be ours because
926 * of the interlock against copying a DB containing an active backend.
927 * Hence the new entry will not reduce the minimum. Also, if two VACUUMs
928 * concurrently modify the datfrozenxid's of different databases, the
929 * worst possible outcome is that pg_clog is not truncated as aggressively
932 relation = heap_open(DatabaseRelationId, AccessShareLock);
934 scan = heap_beginscan(relation, SnapshotNow, 0, NULL);
936 while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
938 Form_pg_database dbform = (Form_pg_database) GETSTRUCT(tuple);
940 Assert(TransactionIdIsNormal(dbform->datfrozenxid));
942 if (TransactionIdPrecedes(myXID, dbform->datfrozenxid))
943 frozenAlreadyWrapped = true;
944 else if (TransactionIdPrecedes(dbform->datfrozenxid, frozenXID))
946 frozenXID = dbform->datfrozenxid;
947 oldest_datoid = HeapTupleGetOid(tuple);
953 heap_close(relation, AccessShareLock);
956 * Do not truncate CLOG if we seem to have suffered wraparound already;
957 * the computed minimum XID might be bogus. This case should now be
958 * impossible due to the defenses in GetNewTransactionId, but we keep the
961 if (frozenAlreadyWrapped)
964 (errmsg("some databases have not been vacuumed in over 2 billion transactions"),
965 errdetail("You might have already suffered transaction-wraparound data loss.")));
969 /* Truncate CLOG to the oldest frozenxid */
970 TruncateCLOG(frozenXID);
973 * Update the wrap limit for GetNewTransactionId. Note: this function
974 * will also signal the postmaster for an(other) autovac cycle if needed.
976 SetTransactionIdLimit(frozenXID, oldest_datoid);
980 /****************************************************************************
982 * Code common to both flavors of VACUUM *
984 ****************************************************************************
989 * vacuum_rel() -- vacuum one heap relation
991 * Doing one heap at a time incurs extra overhead, since we need to
992 * check that the heap exists again just before we vacuum it. The
993 * reason that we do this is so that vacuuming can be spread across
994 * many small transactions. Otherwise, two-phase locking would require
995 * us to lock the entire database during one pass of the vacuum cleaner.
997 * We'll return true in *scanned_all if the vacuum scanned all heap
998 * pages, and updated pg_class.
1000 * At entry and exit, we are not inside a transaction.
1003 vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound,
1011 int save_sec_context;
1016 *scanned_all = false;
1018 /* Begin a transaction for vacuuming this relation */
1019 StartTransactionCommand();
1022 * Functions in indexes may want a snapshot set. Also, setting a snapshot
1023 * ensures that RecentGlobalXmin is kept truly recent.
1025 PushActiveSnapshot(GetTransactionSnapshot());
1027 if (!(vacstmt->options & VACOPT_FULL))
1030 * In lazy vacuum, we can set the PROC_IN_VACUUM flag, which lets
1031 * other concurrent VACUUMs know that they can ignore this one while
1032 * determining their OldestXmin. (The reason we don't set it during a
1033 * full VACUUM is exactly that we may have to run user- defined
1034 * functions for functional indexes, and we want to make sure that if
1035 * they use the snapshot set above, any tuples it requires can't get
1036 * removed from other tables. An index function that depends on the
1037 * contents of other tables is arguably broken, but we won't break it
1038 * here by violating transaction semantics.)
1040 * We also set the VACUUM_FOR_WRAPAROUND flag, which is passed down by
1041 * autovacuum; it's used to avoid cancelling a vacuum that was invoked
1044 * Note: these flags remain set until CommitTransaction or
1045 * AbortTransaction. We don't want to clear them until we reset
1046 * MyProc->xid/xmin, else OldestXmin might appear to go backwards,
1047 * which is probably Not Good.
1049 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
1050 MyProc->vacuumFlags |= PROC_IN_VACUUM;
1052 MyProc->vacuumFlags |= PROC_VACUUM_FOR_WRAPAROUND;
1053 LWLockRelease(ProcArrayLock);
1057 * Check for user-requested abort. Note we want this to be inside a
1058 * transaction, so xact.c doesn't issue useless WARNING.
1060 CHECK_FOR_INTERRUPTS();
1063 * Determine the type of lock we want --- hard exclusive lock for a FULL
1064 * vacuum, but just ShareUpdateExclusiveLock for concurrent vacuum. Either
1065 * way, we can be sure that no other backend is vacuuming the same table.
1067 lmode = (vacstmt->options & VACOPT_FULL) ? AccessExclusiveLock : ShareUpdateExclusiveLock;
1070 * Open the relation and get the appropriate lock on it.
1072 * There's a race condition here: the rel may have gone away since the
1073 * last time we saw it. If so, we don't need to vacuum it.
1075 onerel = try_relation_open(relid, lmode);
1079 PopActiveSnapshot();
1080 CommitTransactionCommand();
1085 * Check permissions.
1087 * We allow the user to vacuum a table if he is superuser, the table
1088 * owner, or the database owner (but in the latter case, only if it's not
1089 * a shared relation). pg_class_ownercheck includes the superuser case.
1091 * Note we choose to treat permissions failure as a WARNING and keep
1092 * trying to vacuum the rest of the DB --- is this appropriate?
1094 if (!(pg_class_ownercheck(RelationGetRelid(onerel), GetUserId()) ||
1095 (pg_database_ownercheck(MyDatabaseId, GetUserId()) && !onerel->rd_rel->relisshared)))
1097 if (onerel->rd_rel->relisshared)
1099 (errmsg("skipping \"%s\" --- only superuser can vacuum it",
1100 RelationGetRelationName(onerel))));
1101 else if (onerel->rd_rel->relnamespace == PG_CATALOG_NAMESPACE)
1103 (errmsg("skipping \"%s\" --- only superuser or database owner can vacuum it",
1104 RelationGetRelationName(onerel))));
1107 (errmsg("skipping \"%s\" --- only table or database owner can vacuum it",
1108 RelationGetRelationName(onerel))));
1109 relation_close(onerel, lmode);
1110 PopActiveSnapshot();
1111 CommitTransactionCommand();
1116 * Check that it's a vacuumable table; we used to do this in
1117 * get_rel_oids() but seems safer to check after we've locked the
1120 if (onerel->rd_rel->relkind != RELKIND_RELATION &&
1121 onerel->rd_rel->relkind != RELKIND_TOASTVALUE)
1124 (errmsg("skipping \"%s\" --- cannot vacuum indexes, views, or special system tables",
1125 RelationGetRelationName(onerel))));
1126 relation_close(onerel, lmode);
1127 PopActiveSnapshot();
1128 CommitTransactionCommand();
1133 * Silently ignore tables that are temp tables of other backends ---
1134 * trying to vacuum these will lead to great unhappiness, since their
1135 * contents are probably not up-to-date on disk. (We don't throw a
1136 * warning here; it would just lead to chatter during a database-wide
1139 if (RELATION_IS_OTHER_TEMP(onerel))
1141 relation_close(onerel, lmode);
1142 PopActiveSnapshot();
1143 CommitTransactionCommand();
1148 * Get a session-level lock too. This will protect our access to the
1149 * relation across multiple transactions, so that we can vacuum the
1150 * relation's TOAST table (if any) secure in the knowledge that no one is
1151 * deleting the parent relation.
1153 * NOTE: this cannot block, even if someone else is waiting for access,
1154 * because the lock manager knows that both lock requests are from the
1157 onerelid = onerel->rd_lockInfo.lockRelId;
1158 LockRelationIdForSession(&onerelid, lmode);
1161 * Remember the relation's TOAST relation for later, if the caller asked
1165 toast_relid = onerel->rd_rel->reltoastrelid;
1167 toast_relid = InvalidOid;
1170 * Switch to the table owner's userid, so that any index functions are run
1171 * as that user. Also lock down security-restricted operations and
1172 * arrange to make GUC variable changes local to this command.
1173 * (This is unnecessary, but harmless, for lazy VACUUM.)
1175 GetUserIdAndSecContext(&save_userid, &save_sec_context);
1176 SetUserIdAndSecContext(onerel->rd_rel->relowner,
1177 save_sec_context | SECURITY_RESTRICTED_OPERATION);
1178 save_nestlevel = NewGUCNestLevel();
1181 * Do the actual work --- either FULL or "lazy" vacuum
1183 if (vacstmt->options & VACOPT_FULL)
1184 heldoff = full_vacuum_rel(onerel, vacstmt);
1186 heldoff = lazy_vacuum_rel(onerel, vacstmt, vac_strategy, scanned_all);
1188 /* Roll back any GUC changes executed by index functions */
1189 AtEOXact_GUC(false, save_nestlevel);
1191 /* Restore userid and security context */
1192 SetUserIdAndSecContext(save_userid, save_sec_context);
1194 /* all done with this class, but hold lock until commit */
1195 relation_close(onerel, NoLock);
1198 * Complete the transaction and free all temporary memory used.
1200 PopActiveSnapshot();
1201 CommitTransactionCommand();
1203 /* now we can allow interrupts again, if disabled */
1205 RESUME_INTERRUPTS();
1208 * If the relation has a secondary toast rel, vacuum that too while we
1209 * still hold the session lock on the master table. Note however that
1210 * "analyze" will not get done on the toast table. This is good, because
1211 * the toaster always uses hardcoded index access and statistics are
1212 * totally unimportant for toast relations.
1214 if (toast_relid != InvalidOid)
1215 vacuum_rel(toast_relid, vacstmt, false, for_wraparound, NULL);
1218 * Now release the session-level lock on the master table.
1220 UnlockRelationIdForSession(&onerelid, lmode);
1224 /****************************************************************************
1226 * Code for VACUUM FULL (only) *
1228 ****************************************************************************
1233 * full_vacuum_rel() -- perform FULL VACUUM for one heap relation
1235 * This routine vacuums a single heap, cleans out its indexes, and
1236 * updates its num_pages and num_tuples statistics.
1238 * At entry, we have already established a transaction and opened
1239 * and locked the relation.
1241 * The return value indicates whether this function has held off
1242 * interrupts -- caller must RESUME_INTERRUPTS() after commit if true.
1245 full_vacuum_rel(Relation onerel, VacuumStmt *vacstmt)
1247 VacPageListData vacuum_pages; /* List of pages to vacuum and/or
1249 VacPageListData fraged_pages; /* List of pages with space enough for
1254 VRelStats *vacrelstats;
1255 bool heldoff = false;
1257 vacuum_set_xid_limits(vacstmt->freeze_min_age, vacstmt->freeze_table_age,
1258 onerel->rd_rel->relisshared,
1259 &OldestXmin, &FreezeLimit, NULL);
1262 * Flush any previous async-commit transactions. This does not guarantee
1263 * that we will be able to set hint bits for tuples they inserted, but it
1264 * improves the probability, especially in simple sequential-commands
1265 * cases. See scan_heap() and repair_frag() for more about this.
1267 XLogAsyncCommitFlush();
1270 * Set up statistics-gathering machinery.
1272 vacrelstats = (VRelStats *) palloc(sizeof(VRelStats));
1273 vacrelstats->rel_pages = 0;
1274 vacrelstats->rel_tuples = 0;
1275 vacrelstats->rel_indexed_tuples = 0;
1276 vacrelstats->hasindex = false;
1277 vacrelstats->latestRemovedXid = InvalidTransactionId;
1280 vacuum_pages.num_pages = fraged_pages.num_pages = 0;
1281 scan_heap(vacrelstats, onerel, &vacuum_pages, &fraged_pages);
1283 /* Now open all indexes of the relation */
1284 vac_open_indexes(onerel, AccessExclusiveLock, &nindexes, &Irel);
1286 vacrelstats->hasindex = true;
1288 /* Clean/scan index relation(s) */
1291 if (vacuum_pages.num_pages > 0)
1293 for (i = 0; i < nindexes; i++)
1294 vacuum_index(&vacuum_pages, Irel[i],
1295 vacrelstats->rel_indexed_tuples, 0);
1299 /* just scan indexes to update statistic */
1300 for (i = 0; i < nindexes; i++)
1301 scan_index(Irel[i], vacrelstats->rel_indexed_tuples);
1305 if (fraged_pages.num_pages > 0)
1307 /* Try to shrink heap */
1308 heldoff = repair_frag(vacrelstats, onerel, &vacuum_pages, &fraged_pages,
1310 vac_close_indexes(nindexes, Irel, NoLock);
1314 vac_close_indexes(nindexes, Irel, NoLock);
1315 if (vacuum_pages.num_pages > 0)
1317 /* Clean pages from vacuum_pages list */
1318 vacuum_heap(vacrelstats, onerel, &vacuum_pages);
1322 /* update thefree space map with final free space info, and vacuum it */
1323 vac_update_fsm(onerel, &fraged_pages, vacrelstats->rel_pages);
1324 FreeSpaceMapVacuum(onerel);
1326 /* update statistics in pg_class */
1327 vac_update_relstats(onerel,
1328 vacrelstats->rel_pages, vacrelstats->rel_tuples,
1329 vacrelstats->hasindex, FreezeLimit);
1331 /* report results to the stats collector, too */
1332 pgstat_report_vacuum(RelationGetRelid(onerel),
1333 onerel->rd_rel->relisshared,
1335 vacrelstats->rel_tuples);
1342 * scan_heap() -- scan an open heap relation
1344 * This routine sets commit status bits, constructs vacuum_pages (list
1345 * of pages we need to compact free space on and/or clean indexes of
1346 * deleted tuples), constructs fraged_pages (list of pages with free
1347 * space that tuples could be moved into), and calculates statistics
1348 * on the number of live tuples in the heap.
1351 scan_heap(VRelStats *vacrelstats, Relation onerel,
1352 VacPageList vacuum_pages, VacPageList fraged_pages)
1354 BlockNumber nblocks,
1358 BlockNumber empty_pages,
1367 Size min_tlen = MaxHeapTupleSize;
1369 bool do_shrinking = true;
1370 VTupleLink vtlinks = (VTupleLink) palloc(100 * sizeof(VTupleLinkData));
1371 int num_vtlinks = 0;
1372 int free_vtlinks = 100;
1375 pg_rusage_init(&ru0);
1377 relname = RelationGetRelationName(onerel);
1379 (errmsg("vacuuming \"%s.%s\"",
1380 get_namespace_name(RelationGetNamespace(onerel)),
1383 empty_pages = empty_end_pages = 0;
1384 num_tuples = num_indexed_tuples = tups_vacuumed = nkeep = nunused = 0;
1387 nblocks = RelationGetNumberOfBlocks(onerel);
1390 * We initially create each VacPage item in a maximal-sized workspace,
1391 * then copy the workspace into a just-large-enough copy.
1393 vacpage = (VacPage) palloc(sizeof(VacPageData) + MaxOffsetNumber * sizeof(OffsetNumber));
1395 for (blkno = 0; blkno < nblocks; blkno++)
1402 OffsetNumber offnum,
1405 OffsetNumber frozen[MaxOffsetNumber];
1408 vacuum_delay_point();
1410 buf = ReadBufferExtended(onerel, MAIN_FORKNUM, blkno, RBM_NORMAL,
1412 page = BufferGetPage(buf);
1415 * Since we are holding exclusive lock on the relation, no other
1416 * backend can be accessing the page; however it is possible that the
1417 * background writer will try to write the page if it's already marked
1418 * dirty. To ensure that invalid data doesn't get written to disk, we
1419 * must take exclusive buffer lock wherever we potentially modify
1420 * pages. In fact, we insist on cleanup lock so that we can safely
1421 * call heap_page_prune(). (This might be overkill, since the
1422 * bgwriter pays no attention to individual tuples, but on the other
1423 * hand it's unlikely that the bgwriter has this particular page
1424 * pinned at this instant. So violating the coding rule would buy us
1427 LockBufferForCleanup(buf);
1429 vacpage->blkno = blkno;
1430 vacpage->offsets_used = 0;
1431 vacpage->offsets_free = 0;
1433 if (PageIsNew(page))
1435 VacPage vacpagecopy;
1438 (errmsg("relation \"%s\" page %u is uninitialized --- fixing",
1440 PageInit(page, BufferGetPageSize(buf), 0);
1441 MarkBufferDirty(buf);
1442 vacpage->free = PageGetFreeSpaceWithFillFactor(onerel, page);
1443 free_space += vacpage->free;
1446 vacpagecopy = copy_vac_page(vacpage);
1447 vpage_insert(vacuum_pages, vacpagecopy);
1448 vpage_insert(fraged_pages, vacpagecopy);
1449 UnlockReleaseBuffer(buf);
1453 if (PageIsEmpty(page))
1455 VacPage vacpagecopy;
1457 vacpage->free = PageGetFreeSpaceWithFillFactor(onerel, page);
1458 free_space += vacpage->free;
1461 vacpagecopy = copy_vac_page(vacpage);
1462 vpage_insert(vacuum_pages, vacpagecopy);
1463 vpage_insert(fraged_pages, vacpagecopy);
1464 UnlockReleaseBuffer(buf);
1469 * Prune all HOT-update chains in this page.
1471 * We use the redirect_move option so that redirecting line pointers
1472 * get collapsed out; this allows us to not worry about them below.
1474 * We count tuples removed by the pruning step as removed by VACUUM.
1476 tups_vacuumed += heap_page_prune(onerel, buf, OldestXmin,
1480 * Now scan the page to collect vacuumable items and check for tuples
1481 * requiring freezing.
1485 maxoff = PageGetMaxOffsetNumber(page);
1486 for (offnum = FirstOffsetNumber;
1488 offnum = OffsetNumberNext(offnum))
1490 ItemId itemid = PageGetItemId(page, offnum);
1491 bool tupgone = false;
1492 HeapTupleData tuple;
1495 * Collect un-used items too - it's possible to have indexes
1496 * pointing here after crash. (That's an ancient comment and is
1497 * likely obsolete with WAL, but we might as well continue to
1498 * check for such problems.)
1500 if (!ItemIdIsUsed(itemid))
1502 vacpage->offsets[vacpage->offsets_free++] = offnum;
1508 * DEAD item pointers are to be vacuumed normally; but we don't
1509 * count them in tups_vacuumed, else we'd be double-counting (at
1510 * least in the common case where heap_page_prune() just freed up
1513 if (ItemIdIsDead(itemid))
1515 vacpage->offsets[vacpage->offsets_free++] = offnum;
1519 /* Shouldn't have any redirected items anymore */
1520 if (!ItemIdIsNormal(itemid))
1521 elog(ERROR, "relation \"%s\" TID %u/%u: unexpected redirect item",
1522 relname, blkno, offnum);
1524 tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
1525 tuple.t_len = ItemIdGetLength(itemid);
1526 ItemPointerSet(&(tuple.t_self), blkno, offnum);
1528 switch (HeapTupleSatisfiesVacuum(tuple.t_data, OldestXmin, buf))
1530 case HEAPTUPLE_LIVE:
1531 /* Tuple is good --- but let's do some validity checks */
1532 if (onerel->rd_rel->relhasoids &&
1533 !OidIsValid(HeapTupleGetOid(&tuple)))
1534 elog(WARNING, "relation \"%s\" TID %u/%u: OID is invalid",
1535 relname, blkno, offnum);
1538 * The shrinkage phase of VACUUM FULL requires that all
1539 * live tuples have XMIN_COMMITTED set --- see comments in
1540 * repair_frag()'s walk-along-page loop. Use of async
1541 * commit may prevent HeapTupleSatisfiesVacuum from
1542 * setting the bit for a recently committed tuple. Rather
1543 * than trying to handle this corner case, we just give up
1547 !(tuple.t_data->t_infomask & HEAP_XMIN_COMMITTED))
1550 (errmsg("relation \"%s\" TID %u/%u: XMIN_COMMITTED not set for transaction %u --- cannot shrink relation",
1551 relname, blkno, offnum,
1552 HeapTupleHeaderGetXmin(tuple.t_data))));
1553 do_shrinking = false;
1556 case HEAPTUPLE_DEAD:
1559 * Ordinarily, DEAD tuples would have been removed by
1560 * heap_page_prune(), but it's possible that the tuple
1561 * state changed since heap_page_prune() looked. In
1562 * particular an INSERT_IN_PROGRESS tuple could have
1563 * changed to DEAD if the inserter aborted. So this
1564 * cannot be considered an error condition, though it does
1565 * suggest that someone released a lock early.
1567 * If the tuple is HOT-updated then it must only be
1568 * removed by a prune operation; so we keep it as if it
1569 * were RECENTLY_DEAD, and abandon shrinking. (XXX is it
1570 * worth trying to make the shrinking code smart enough to
1571 * handle this? It's an unusual corner case.)
1573 * DEAD heap-only tuples can safely be removed if they
1574 * aren't themselves HOT-updated, although this is a bit
1575 * inefficient since we'll uselessly try to remove index
1578 if (HeapTupleIsHotUpdated(&tuple))
1583 (errmsg("relation \"%s\" TID %u/%u: dead HOT-updated tuple --- cannot shrink relation",
1584 relname, blkno, offnum)));
1585 do_shrinking = false;
1589 tupgone = true; /* we can delete the tuple */
1592 * We need not require XMIN_COMMITTED or
1593 * XMAX_COMMITTED to be set, since we will remove the
1594 * tuple without any further examination of its hint
1599 case HEAPTUPLE_RECENTLY_DEAD:
1602 * If tuple is recently deleted then we must not remove it
1608 * As with the LIVE case, shrinkage requires
1609 * XMIN_COMMITTED to be set.
1612 !(tuple.t_data->t_infomask & HEAP_XMIN_COMMITTED))
1615 (errmsg("relation \"%s\" TID %u/%u: XMIN_COMMITTED not set for transaction %u --- cannot shrink relation",
1616 relname, blkno, offnum,
1617 HeapTupleHeaderGetXmin(tuple.t_data))));
1618 do_shrinking = false;
1622 * If we do shrinking and this tuple is updated one then
1623 * remember it to construct updated tuple dependencies.
1626 !(ItemPointerEquals(&(tuple.t_self),
1627 &(tuple.t_data->t_ctid))))
1629 if (free_vtlinks == 0)
1631 free_vtlinks = 1000;
1632 vtlinks = (VTupleLink) repalloc(vtlinks,
1633 (free_vtlinks + num_vtlinks) *
1634 sizeof(VTupleLinkData));
1636 vtlinks[num_vtlinks].new_tid = tuple.t_data->t_ctid;
1637 vtlinks[num_vtlinks].this_tid = tuple.t_self;
1642 case HEAPTUPLE_INSERT_IN_PROGRESS:
1645 * This should not happen, since we hold exclusive lock on
1646 * the relation; shouldn't we raise an error? (Actually,
1647 * it can happen in system catalogs, since we tend to
1648 * release write lock before commit there.) As above, we
1649 * can't apply repair_frag() if the tuple state is
1654 (errmsg("relation \"%s\" TID %u/%u: InsertTransactionInProgress %u --- cannot shrink relation",
1655 relname, blkno, offnum,
1656 HeapTupleHeaderGetXmin(tuple.t_data))));
1657 do_shrinking = false;
1659 case HEAPTUPLE_DELETE_IN_PROGRESS:
1662 * This should not happen, since we hold exclusive lock on
1663 * the relation; shouldn't we raise an error? (Actually,
1664 * it can happen in system catalogs, since we tend to
1665 * release write lock before commit there.) As above, we
1666 * can't apply repair_frag() if the tuple state is
1671 (errmsg("relation \"%s\" TID %u/%u: DeleteTransactionInProgress %u --- cannot shrink relation",
1672 relname, blkno, offnum,
1673 HeapTupleHeaderGetXmax(tuple.t_data))));
1674 do_shrinking = false;
1677 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1685 HeapTupleHeaderAdvanceLatestRemovedXid(tuple.t_data,
1686 &vacrelstats->latestRemovedXid);
1689 * Here we are building a temporary copy of the page with dead
1690 * tuples removed. Below we will apply
1691 * PageRepairFragmentation to the copy, so that we can
1692 * determine how much space will be available after removal of
1693 * dead tuples. But note we are NOT changing the real page
1696 if (tempPage == NULL)
1700 pageSize = PageGetPageSize(page);
1701 tempPage = (Page) palloc(pageSize);
1702 memcpy(tempPage, page, pageSize);
1705 /* mark it unused on the temp page */
1706 lpp = PageGetItemId(tempPage, offnum);
1707 ItemIdSetUnused(lpp);
1709 vacpage->offsets[vacpage->offsets_free++] = offnum;
1715 if (!HeapTupleIsHeapOnly(&tuple))
1716 num_indexed_tuples += 1;
1718 if (tuple.t_len < min_tlen)
1719 min_tlen = tuple.t_len;
1720 if (tuple.t_len > max_tlen)
1721 max_tlen = tuple.t_len;
1724 * Each non-removable tuple must be checked to see if it needs
1727 if (heap_freeze_tuple(tuple.t_data, FreezeLimit,
1729 frozen[nfrozen++] = offnum;
1731 } /* scan along page */
1733 if (tempPage != NULL)
1735 /* Some tuples are removable; figure free space after removal */
1736 PageRepairFragmentation(tempPage);
1737 vacpage->free = PageGetFreeSpaceWithFillFactor(onerel, tempPage);
1743 /* Just use current available space */
1744 vacpage->free = PageGetFreeSpaceWithFillFactor(onerel, page);
1745 /* Need to reap the page if it has UNUSED or DEAD line pointers */
1746 do_reap = (vacpage->offsets_free > 0);
1749 free_space += vacpage->free;
1752 * Add the page to vacuum_pages if it requires reaping, and add it to
1753 * fraged_pages if it has a useful amount of free space. "Useful"
1754 * means enough for a minimal-sized tuple. But we don't know that
1755 * accurately near the start of the relation, so add pages
1756 * unconditionally if they have >= BLCKSZ/10 free space. Also
1757 * forcibly add pages with no live tuples, to avoid confusing the
1758 * empty_end_pages logic. (In the presence of unreasonably small
1759 * fillfactor, it seems possible that such pages might not pass the
1760 * free-space test, but they had better be in the list anyway.)
1762 do_frag = (vacpage->free >= min_tlen || vacpage->free >= BLCKSZ / 10 ||
1765 if (do_reap || do_frag)
1767 VacPage vacpagecopy = copy_vac_page(vacpage);
1770 vpage_insert(vacuum_pages, vacpagecopy);
1772 vpage_insert(fraged_pages, vacpagecopy);
1776 * Include the page in empty_end_pages if it will be empty after
1777 * vacuuming; this is to keep us from using it as a move destination.
1778 * Note that such pages are guaranteed to be in fraged_pages.
1786 empty_end_pages = 0;
1789 * If we froze any tuples, mark the buffer dirty, and write a WAL
1790 * record recording the changes. We must log the changes to be
1791 * crash-safe against future truncation of CLOG.
1795 MarkBufferDirty(buf);
1796 /* no XLOG for temp tables, though */
1797 if (!onerel->rd_istemp)
1801 recptr = log_heap_freeze(onerel, buf, FreezeLimit,
1803 PageSetLSN(page, recptr);
1804 PageSetTLI(page, ThisTimeLineID);
1808 UnlockReleaseBuffer(buf);
1813 /* save stats in the rel list for use later */
1814 vacrelstats->rel_tuples = num_tuples;
1815 vacrelstats->rel_indexed_tuples = num_indexed_tuples;
1816 vacrelstats->rel_pages = nblocks;
1817 if (num_tuples == 0)
1818 min_tlen = max_tlen = 0;
1819 vacrelstats->min_tlen = min_tlen;
1820 vacrelstats->max_tlen = max_tlen;
1822 vacuum_pages->empty_end_pages = empty_end_pages;
1823 fraged_pages->empty_end_pages = empty_end_pages;
1826 * Clear the fraged_pages list if we found we couldn't shrink. Else,
1827 * remove any "empty" end-pages from the list, and compute usable free
1828 * space = free space in remaining pages.
1834 Assert((BlockNumber) fraged_pages->num_pages >= empty_end_pages);
1835 fraged_pages->num_pages -= empty_end_pages;
1836 usable_free_space = 0;
1837 for (i = 0; i < fraged_pages->num_pages; i++)
1838 usable_free_space += fraged_pages->pagedesc[i]->free;
1842 fraged_pages->num_pages = 0;
1843 usable_free_space = 0;
1846 /* don't bother to save vtlinks if we will not call repair_frag */
1847 if (fraged_pages->num_pages > 0 && num_vtlinks > 0)
1849 qsort((char *) vtlinks, num_vtlinks, sizeof(VTupleLinkData),
1851 vacrelstats->vtlinks = vtlinks;
1852 vacrelstats->num_vtlinks = num_vtlinks;
1856 vacrelstats->vtlinks = NULL;
1857 vacrelstats->num_vtlinks = 0;
1862 (errmsg("\"%s\": found %.0f removable, %.0f nonremovable row versions in %u pages",
1863 RelationGetRelationName(onerel),
1864 tups_vacuumed, num_tuples, nblocks),
1865 errdetail("%.0f dead row versions cannot be removed yet.\n"
1866 "Nonremovable row versions range from %lu to %lu bytes long.\n"
1867 "There were %.0f unused item pointers.\n"
1868 "Total free space (including removable row versions) is %.0f bytes.\n"
1869 "%u pages are or will become empty, including %u at the end of the table.\n"
1870 "%u pages containing %.0f free bytes are potential move destinations.\n"
1873 (unsigned long) min_tlen, (unsigned long) max_tlen,
1876 empty_pages, empty_end_pages,
1877 fraged_pages->num_pages, usable_free_space,
1878 pg_rusage_show(&ru0))));
1883 * repair_frag() -- try to repair relation's fragmentation
1885 * This routine marks dead tuples as unused and tries re-use dead space
1886 * by moving tuples (and inserting indexes if needed). It constructs
1887 * Nvacpagelist list of free-ed pages (moved tuples) and clean indexes
1888 * for them after committing (in hack-manner - without losing locks
1889 * and freeing memory!) current transaction. It truncates relation
1890 * if some end-blocks are gone away.
1892 * The return value indicates whether this function has held off
1893 * interrupts -- caller must RESUME_INTERRUPTS() after commit if true.
1896 repair_frag(VRelStats *vacrelstats, Relation onerel,
1897 VacPageList vacuum_pages, VacPageList fraged_pages,
1898 int nindexes, Relation *Irel)
1900 TransactionId myXID = GetCurrentTransactionId();
1901 Buffer dst_buffer = InvalidBuffer;
1902 BlockNumber nblocks,
1904 BlockNumber last_move_dest_block = 0,
1906 Page dst_page = NULL;
1908 VacPageListData Nvacpagelist;
1909 VacPage dst_vacpage = NULL,
1917 int keep_tuples = 0;
1918 int keep_indexed_tuples = 0;
1920 bool heldoff = false;
1922 pg_rusage_init(&ru0);
1924 ExecContext_Init(&ec, onerel);
1926 Nvacpagelist.num_pages = 0;
1927 num_fraged_pages = fraged_pages->num_pages;
1928 Assert((BlockNumber) vacuum_pages->num_pages >= vacuum_pages->empty_end_pages);
1929 vacuumed_pages = vacuum_pages->num_pages - vacuum_pages->empty_end_pages;
1930 if (vacuumed_pages > 0)
1932 /* get last reaped page from vacuum_pages */
1933 last_vacuum_page = vacuum_pages->pagedesc[vacuumed_pages - 1];
1934 last_vacuum_block = last_vacuum_page->blkno;
1938 last_vacuum_page = NULL;
1939 last_vacuum_block = InvalidBlockNumber;
1942 vacpage = (VacPage) palloc(sizeof(VacPageData) + MaxOffsetNumber * sizeof(OffsetNumber));
1943 vacpage->offsets_used = vacpage->offsets_free = 0;
1946 * Scan pages backwards from the last nonempty page, trying to move tuples
1947 * down to lower pages. Quit when we reach a page that we have moved any
1948 * tuples onto, or the first page if we haven't moved anything, or when we
1949 * find a page we cannot completely empty (this last condition is handled
1950 * by "break" statements within the loop).
1952 * NB: this code depends on the vacuum_pages and fraged_pages lists being
1953 * in order by blkno.
1955 nblocks = vacrelstats->rel_pages;
1956 for (blkno = nblocks - vacuum_pages->empty_end_pages - 1;
1957 blkno > last_move_dest_block;
1962 OffsetNumber offnum,
1967 vacuum_delay_point();
1970 * Forget fraged_pages pages at or after this one; they're no longer
1971 * useful as move targets, since we only want to move down. Note that
1972 * since we stop the outer loop at last_move_dest_block, pages removed
1973 * here cannot have had anything moved onto them already.
1975 * Also note that we don't change the stored fraged_pages list, only
1976 * our local variable num_fraged_pages; so the forgotten pages are
1977 * still available to be loaded into the free space map later.
1979 while (num_fraged_pages > 0 &&
1980 fraged_pages->pagedesc[num_fraged_pages - 1]->blkno >= blkno)
1982 Assert(fraged_pages->pagedesc[num_fraged_pages - 1]->offsets_used == 0);
1987 * Process this page of relation.
1989 buf = ReadBufferExtended(onerel, MAIN_FORKNUM, blkno, RBM_NORMAL,
1991 page = BufferGetPage(buf);
1993 vacpage->offsets_free = 0;
1995 isempty = PageIsEmpty(page);
1997 /* Is the page in the vacuum_pages list? */
1998 if (blkno == last_vacuum_block)
2000 if (last_vacuum_page->offsets_free > 0)
2002 /* there are dead tuples on this page - clean them */
2004 LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
2005 vacuum_page(vacrelstats, onerel, buf, last_vacuum_page);
2006 LockBuffer(buf, BUFFER_LOCK_UNLOCK);
2011 if (vacuumed_pages > 0)
2013 /* get prev reaped page from vacuum_pages */
2014 last_vacuum_page = vacuum_pages->pagedesc[vacuumed_pages - 1];
2015 last_vacuum_block = last_vacuum_page->blkno;
2019 last_vacuum_page = NULL;
2020 last_vacuum_block = InvalidBlockNumber;
2031 chain_tuple_moved = false; /* no one chain-tuple was moved off
2033 vacpage->blkno = blkno;
2034 maxoff = PageGetMaxOffsetNumber(page);
2035 for (offnum = FirstOffsetNumber;
2037 offnum = OffsetNumberNext(offnum))
2040 HeapTupleData tuple;
2041 ItemId itemid = PageGetItemId(page, offnum);
2043 if (!ItemIdIsUsed(itemid))
2046 if (ItemIdIsDead(itemid))
2048 /* just remember it for vacuum_page() */
2049 vacpage->offsets[vacpage->offsets_free++] = offnum;
2053 /* Shouldn't have any redirected items now */
2054 Assert(ItemIdIsNormal(itemid));
2056 tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
2057 tuple_len = tuple.t_len = ItemIdGetLength(itemid);
2058 ItemPointerSet(&(tuple.t_self), blkno, offnum);
2061 * VACUUM FULL has an exclusive lock on the relation. So
2062 * normally no other transaction can have pending INSERTs or
2063 * DELETEs in this relation. A tuple is either:
2064 * (a) live (XMIN_COMMITTED)
2065 * (b) known dead (XMIN_INVALID, or XMAX_COMMITTED and xmax
2066 * is visible to all active transactions)
2067 * (c) inserted and deleted (XMIN_COMMITTED+XMAX_COMMITTED)
2068 * but at least one active transaction does not see the
2069 * deleting transaction (ie, it's RECENTLY_DEAD)
2070 * (d) moved by the currently running VACUUM
2071 * (e) inserted or deleted by a not yet committed transaction,
2072 * or by a transaction we couldn't set XMIN_COMMITTED for.
2073 * In case (e) we wouldn't be in repair_frag() at all, because
2074 * scan_heap() detects those cases and shuts off shrinking.
2075 * We can't see case (b) here either, because such tuples were
2076 * already removed by vacuum_page(). Cases (a) and (c) are
2077 * normal and will have XMIN_COMMITTED set. Case (d) is only
2078 * possible if a whole tuple chain has been moved while
2079 * processing this or a higher numbered block.
2082 if (!(tuple.t_data->t_infomask & HEAP_XMIN_COMMITTED))
2084 if (tuple.t_data->t_infomask & HEAP_MOVED_IN)
2085 elog(ERROR, "HEAP_MOVED_IN was not expected");
2086 if (!(tuple.t_data->t_infomask & HEAP_MOVED_OFF))
2087 elog(ERROR, "HEAP_MOVED_OFF was expected");
2090 * MOVED_OFF by another VACUUM would have caused the
2091 * visibility check to set XMIN_COMMITTED or XMIN_INVALID.
2093 if (HeapTupleHeaderGetXvac(tuple.t_data) != myXID)
2094 elog(ERROR, "invalid XVAC in tuple header");
2097 * If this (chain) tuple is moved by me already then I have to
2098 * check is it in vacpage or not - i.e. is it moved while
2099 * cleaning this page or some previous one.
2102 /* Can't we Assert(keep_tuples > 0) here? */
2103 if (keep_tuples == 0)
2105 if (chain_tuple_moved)
2107 /* some chains were moved while cleaning this page */
2108 Assert(vacpage->offsets_free > 0);
2109 for (i = 0; i < vacpage->offsets_free; i++)
2111 if (vacpage->offsets[i] == offnum)
2114 if (i >= vacpage->offsets_free) /* not found */
2116 vacpage->offsets[vacpage->offsets_free++] = offnum;
2119 * If this is not a heap-only tuple, there must be an
2120 * index entry for this item which will be removed in
2121 * the index cleanup. Decrement the
2122 * keep_indexed_tuples count to remember this.
2124 if (!HeapTupleHeaderIsHeapOnly(tuple.t_data))
2125 keep_indexed_tuples--;
2131 vacpage->offsets[vacpage->offsets_free++] = offnum;
2134 * If this is not a heap-only tuple, there must be an
2135 * index entry for this item which will be removed in the
2136 * index cleanup. Decrement the keep_indexed_tuples count
2139 if (!HeapTupleHeaderIsHeapOnly(tuple.t_data))
2140 keep_indexed_tuples--;
2147 * If this tuple is in a chain of tuples created in updates by
2148 * "recent" transactions then we have to move the whole chain of
2149 * tuples to other places, so that we can write new t_ctid links
2150 * that preserve the chain relationship.
2152 * This test is complicated. Read it as "if tuple is a recently
2153 * created updated version, OR if it is an obsoleted version". (In
2154 * the second half of the test, we needn't make any check on XMAX
2155 * --- it must be recently obsoleted, else scan_heap would have
2156 * deemed it removable.)
2158 * NOTE: this test is not 100% accurate: it is possible for a
2159 * tuple to be an updated one with recent xmin, and yet not match
2160 * any new_tid entry in the vtlinks list. Presumably there was
2161 * once a parent tuple with xmax matching the xmin, but it's
2162 * possible that that tuple has been removed --- for example, if
2163 * it had xmin = xmax and wasn't itself an updated version, then
2164 * HeapTupleSatisfiesVacuum would deem it removable as soon as the
2165 * xmin xact completes.
2167 * To be on the safe side, we abandon the repair_frag process if
2168 * we cannot find the parent tuple in vtlinks. This may be overly
2169 * conservative; AFAICS it would be safe to move the chain.
2171 * Also, because we distinguish DEAD and RECENTLY_DEAD tuples
2172 * using OldestXmin, which is a rather coarse test, it is quite
2173 * possible to have an update chain in which a tuple we think is
2174 * RECENTLY_DEAD links forward to one that is definitely DEAD. In
2175 * such a case the RECENTLY_DEAD tuple must actually be dead, but
2176 * it seems too complicated to try to make VACUUM remove it. We
2177 * treat each contiguous set of RECENTLY_DEAD tuples as a
2178 * separately movable chain, ignoring any intervening DEAD ones.
2180 if (((tuple.t_data->t_infomask & HEAP_UPDATED) &&
2181 !TransactionIdPrecedes(HeapTupleHeaderGetXmin(tuple.t_data),
2183 (!(tuple.t_data->t_infomask & (HEAP_XMAX_INVALID |
2185 !(ItemPointerEquals(&(tuple.t_self),
2186 &(tuple.t_data->t_ctid)))))
2189 bool freeCbuf = false;
2190 bool chain_move_failed = false;
2191 bool moved_target = false;
2192 ItemPointerData Ctid;
2193 HeapTupleData tp = tuple;
2194 Size tlen = tuple_len;
2198 VacPage to_vacpage = NULL;
2202 if (dst_buffer != InvalidBuffer)
2204 ReleaseBuffer(dst_buffer);
2205 dst_buffer = InvalidBuffer;
2208 /* Quick exit if we have no vtlinks to search in */
2209 if (vacrelstats->vtlinks == NULL)
2211 elog(DEBUG2, "parent item in update-chain not found --- cannot continue repair_frag");
2212 break; /* out of walk-along-page loop */
2216 * If this tuple is in the begin/middle of the chain then we
2217 * have to move to the end of chain. As with any t_ctid
2218 * chase, we have to verify that each new tuple is really the
2219 * descendant of the tuple we came from; however, here we need
2220 * even more than the normal amount of paranoia. If t_ctid
2221 * links forward to a tuple determined to be DEAD, then
2222 * depending on where that tuple is, it might already have
2223 * been removed, and perhaps even replaced by a MOVED_IN
2224 * tuple. We don't want to include any DEAD tuples in the
2225 * chain, so we have to recheck HeapTupleSatisfiesVacuum.
2227 while (!(tp.t_data->t_infomask & (HEAP_XMAX_INVALID |
2229 !(ItemPointerEquals(&(tp.t_self),
2230 &(tp.t_data->t_ctid))))
2232 ItemPointerData nextTid;
2233 TransactionId priorXmax;
2236 OffsetNumber nextOffnum;
2238 HeapTupleHeader nextTdata;
2239 HTSV_Result nextTstatus;
2241 nextTid = tp.t_data->t_ctid;
2242 priorXmax = HeapTupleHeaderGetXmax(tp.t_data);
2243 /* assume block# is OK (see heap_fetch comments) */
2244 nextBuf = ReadBufferExtended(onerel, MAIN_FORKNUM,
2245 ItemPointerGetBlockNumber(&nextTid),
2246 RBM_NORMAL, vac_strategy);
2247 nextPage = BufferGetPage(nextBuf);
2248 /* If bogus or unused slot, assume tp is end of chain */
2249 nextOffnum = ItemPointerGetOffsetNumber(&nextTid);
2250 if (nextOffnum < FirstOffsetNumber ||
2251 nextOffnum > PageGetMaxOffsetNumber(nextPage))
2253 ReleaseBuffer(nextBuf);
2256 nextItemid = PageGetItemId(nextPage, nextOffnum);
2257 if (!ItemIdIsNormal(nextItemid))
2259 ReleaseBuffer(nextBuf);
2262 /* if not matching XMIN, assume tp is end of chain */
2263 nextTdata = (HeapTupleHeader) PageGetItem(nextPage,
2265 if (!TransactionIdEquals(HeapTupleHeaderGetXmin(nextTdata),
2268 ReleaseBuffer(nextBuf);
2273 * Must check for DEAD or MOVED_IN tuple, too. This could
2274 * potentially update hint bits, so we'd better hold the
2275 * buffer content lock.
2277 LockBuffer(nextBuf, BUFFER_LOCK_SHARE);
2278 nextTstatus = HeapTupleSatisfiesVacuum(nextTdata,
2281 if (nextTstatus == HEAPTUPLE_DEAD ||
2282 nextTstatus == HEAPTUPLE_INSERT_IN_PROGRESS)
2284 UnlockReleaseBuffer(nextBuf);
2287 LockBuffer(nextBuf, BUFFER_LOCK_UNLOCK);
2288 /* if it's MOVED_OFF we shoulda moved this one with it */
2289 if (nextTstatus == HEAPTUPLE_DELETE_IN_PROGRESS)
2290 elog(ERROR, "updated tuple is already HEAP_MOVED_OFF");
2291 /* OK, switch our attention to the next tuple in chain */
2292 tp.t_data = nextTdata;
2293 tp.t_self = nextTid;
2294 tlen = tp.t_len = ItemIdGetLength(nextItemid);
2296 ReleaseBuffer(Cbuf);
2301 /* Set up workspace for planning the chain move */
2302 vtmove = (VTupleMove) palloc(100 * sizeof(VTupleMoveData));
2307 * Now, walk backwards up the chain (towards older tuples) and
2308 * check if all items in chain can be moved. We record all
2309 * the moves that need to be made in the vtmove array.
2316 HeapTupleHeader PTdata;
2317 VTupleLinkData vtld,
2320 /* Identify a target page to move this tuple to */
2321 if (to_vacpage == NULL ||
2322 !enough_space(to_vacpage, tlen))
2324 for (i = 0; i < num_fraged_pages; i++)
2326 if (enough_space(fraged_pages->pagedesc[i], tlen))
2330 if (i == num_fraged_pages)
2332 /* can't move item anywhere */
2333 chain_move_failed = true;
2334 break; /* out of check-all-items loop */
2337 to_vacpage = fraged_pages->pagedesc[to_item];
2339 to_vacpage->free -= MAXALIGN(tlen);
2340 if (to_vacpage->offsets_used >= to_vacpage->offsets_free)
2341 to_vacpage->free -= sizeof(ItemIdData);
2342 (to_vacpage->offsets_used)++;
2344 /* Add an entry to vtmove list */
2345 if (free_vtmove == 0)
2348 vtmove = (VTupleMove)
2350 (free_vtmove + num_vtmove) *
2351 sizeof(VTupleMoveData));
2353 vtmove[num_vtmove].tid = tp.t_self;
2354 vtmove[num_vtmove].vacpage = to_vacpage;
2355 if (to_vacpage->offsets_used == 1)
2356 vtmove[num_vtmove].cleanVpd = true;
2358 vtmove[num_vtmove].cleanVpd = false;
2362 /* Remember if we reached the original target tuple */
2363 if (ItemPointerGetBlockNumber(&tp.t_self) == blkno &&
2364 ItemPointerGetOffsetNumber(&tp.t_self) == offnum)
2365 moved_target = true;
2367 /* Done if at beginning of chain */
2368 if (!(tp.t_data->t_infomask & HEAP_UPDATED) ||
2369 TransactionIdPrecedes(HeapTupleHeaderGetXmin(tp.t_data),
2371 break; /* out of check-all-items loop */
2373 /* Move to tuple with prior row version */
2374 vtld.new_tid = tp.t_self;
2376 vac_bsearch((void *) &vtld,
2377 (void *) (vacrelstats->vtlinks),
2378 vacrelstats->num_vtlinks,
2379 sizeof(VTupleLinkData),
2383 /* see discussion above */
2384 elog(DEBUG2, "parent item in update-chain not found --- cannot continue repair_frag");
2385 chain_move_failed = true;
2386 break; /* out of check-all-items loop */
2388 tp.t_self = vtlp->this_tid;
2389 Pbuf = ReadBufferExtended(onerel, MAIN_FORKNUM,
2390 ItemPointerGetBlockNumber(&(tp.t_self)),
2391 RBM_NORMAL, vac_strategy);
2392 Ppage = BufferGetPage(Pbuf);
2393 Pitemid = PageGetItemId(Ppage,
2394 ItemPointerGetOffsetNumber(&(tp.t_self)));
2395 /* this can't happen since we saw tuple earlier: */
2396 if (!ItemIdIsNormal(Pitemid))
2397 elog(ERROR, "parent itemid marked as unused");
2398 PTdata = (HeapTupleHeader) PageGetItem(Ppage, Pitemid);
2400 /* ctid should not have changed since we saved it */
2401 Assert(ItemPointerEquals(&(vtld.new_tid),
2402 &(PTdata->t_ctid)));
2405 * Read above about cases when !ItemIdIsUsed(nextItemid)
2406 * (child item is removed)... Due to the fact that at the
2407 * moment we don't remove unuseful part of update-chain,
2408 * it's possible to get non-matching parent row here. Like
2409 * as in the case which caused this problem, we stop
2410 * shrinking here. I could try to find real parent row but
2411 * want not to do it because of real solution will be
2412 * implemented anyway, later, and we are too close to 6.5
2413 * release. - vadim 06/11/99
2415 if ((PTdata->t_infomask & HEAP_XMAX_IS_MULTI) ||
2416 !(TransactionIdEquals(HeapTupleHeaderGetXmax(PTdata),
2417 HeapTupleHeaderGetXmin(tp.t_data))))
2419 ReleaseBuffer(Pbuf);
2420 elog(DEBUG2, "too old parent tuple found --- cannot continue repair_frag");
2421 chain_move_failed = true;
2422 break; /* out of check-all-items loop */
2425 tlen = tp.t_len = ItemIdGetLength(Pitemid);
2427 ReleaseBuffer(Cbuf);
2430 } /* end of check-all-items loop */
2433 ReleaseBuffer(Cbuf);
2436 /* Double-check that we will move the current target tuple */
2437 if (!moved_target && !chain_move_failed)
2439 elog(DEBUG2, "failed to chain back to target --- cannot continue repair_frag");
2440 chain_move_failed = true;
2443 if (chain_move_failed)
2446 * Undo changes to offsets_used state. We don't bother
2447 * cleaning up the amount-free state, since we're not
2448 * going to do any further tuple motion.
2450 for (i = 0; i < num_vtmove; i++)
2452 Assert(vtmove[i].vacpage->offsets_used > 0);
2453 (vtmove[i].vacpage->offsets_used)--;
2456 break; /* out of walk-along-page loop */
2460 * Okay, move the whole tuple chain in reverse order.
2462 * Ctid tracks the new location of the previously-moved tuple.
2464 ItemPointerSetInvalid(&Ctid);
2465 for (ti = 0; ti < num_vtmove; ti++)
2467 VacPage destvacpage = vtmove[ti].vacpage;
2471 /* Get page to move from */
2472 tuple.t_self = vtmove[ti].tid;
2473 Cbuf = ReadBufferExtended(onerel, MAIN_FORKNUM,
2474 ItemPointerGetBlockNumber(&(tuple.t_self)),
2475 RBM_NORMAL, vac_strategy);
2477 /* Get page to move to */
2478 dst_buffer = ReadBufferExtended(onerel, MAIN_FORKNUM,
2480 RBM_NORMAL, vac_strategy);
2482 LockBuffer(dst_buffer, BUFFER_LOCK_EXCLUSIVE);
2483 if (dst_buffer != Cbuf)
2484 LockBuffer(Cbuf, BUFFER_LOCK_EXCLUSIVE);
2486 dst_page = BufferGetPage(dst_buffer);
2487 Cpage = BufferGetPage(Cbuf);
2489 Citemid = PageGetItemId(Cpage,
2490 ItemPointerGetOffsetNumber(&(tuple.t_self)));
2491 tuple.t_data = (HeapTupleHeader) PageGetItem(Cpage, Citemid);
2492 tuple_len = tuple.t_len = ItemIdGetLength(Citemid);
2494 move_chain_tuple(vacrelstats, onerel, Cbuf, Cpage, &tuple,
2495 dst_buffer, dst_page, destvacpage,
2496 &ec, &Ctid, vtmove[ti].cleanVpd);
2499 * If the tuple we are moving is a heap-only tuple, this
2500 * move will generate an additional index entry, so
2501 * increment the rel_indexed_tuples count.
2503 if (HeapTupleHeaderIsHeapOnly(tuple.t_data))
2504 vacrelstats->rel_indexed_tuples++;
2507 if (destvacpage->blkno > last_move_dest_block)
2508 last_move_dest_block = destvacpage->blkno;
2511 * Remember that we moved tuple from the current page
2512 * (corresponding index tuple will be cleaned).
2515 vacpage->offsets[vacpage->offsets_free++] =
2516 ItemPointerGetOffsetNumber(&(tuple.t_self));
2520 * When we move tuple chains, we may need to move
2521 * tuples from a block that we haven't yet scanned in
2522 * the outer walk-along-the-relation loop. Note that
2523 * we can't be moving a tuple from a block that we
2524 * have already scanned because if such a tuple
2525 * exists, then we must have moved the chain along
2526 * with that tuple when we scanned that block. IOW the
2527 * test of (Cbuf != buf) guarantees that the tuple we
2528 * are looking at right now is in a block which is yet
2531 * We maintain two counters to correctly count the
2532 * moved-off tuples from blocks that are not yet
2533 * scanned (keep_tuples) and how many of them have
2534 * index pointers (keep_indexed_tuples). The main
2535 * reason to track the latter is to help verify that
2536 * indexes have the expected number of entries when
2537 * all the dust settles.
2539 if (!HeapTupleHeaderIsHeapOnly(tuple.t_data))
2540 keep_indexed_tuples++;
2544 ReleaseBuffer(dst_buffer);
2545 ReleaseBuffer(Cbuf);
2546 } /* end of move-the-tuple-chain loop */
2548 dst_buffer = InvalidBuffer;
2550 chain_tuple_moved = true;
2552 /* advance to next tuple in walk-along-page loop */
2554 } /* end of is-tuple-in-chain test */
2556 /* try to find new page for this tuple */
2557 if (dst_buffer == InvalidBuffer ||
2558 !enough_space(dst_vacpage, tuple_len))
2560 if (dst_buffer != InvalidBuffer)
2562 ReleaseBuffer(dst_buffer);
2563 dst_buffer = InvalidBuffer;
2565 for (i = 0; i < num_fraged_pages; i++)
2567 if (enough_space(fraged_pages->pagedesc[i], tuple_len))
2570 if (i == num_fraged_pages)
2571 break; /* can't move item anywhere */
2572 dst_vacpage = fraged_pages->pagedesc[i];
2573 dst_buffer = ReadBufferExtended(onerel, MAIN_FORKNUM,
2575 RBM_NORMAL, vac_strategy);
2576 LockBuffer(dst_buffer, BUFFER_LOCK_EXCLUSIVE);
2577 dst_page = BufferGetPage(dst_buffer);
2578 /* if this page was not used before - clean it */
2579 if (!PageIsEmpty(dst_page) && dst_vacpage->offsets_used == 0)
2580 vacuum_page(vacrelstats, onerel, dst_buffer, dst_vacpage);
2583 LockBuffer(dst_buffer, BUFFER_LOCK_EXCLUSIVE);
2585 LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
2587 move_plain_tuple(onerel, buf, page, &tuple,
2588 dst_buffer, dst_page, dst_vacpage, &ec);
2591 * If the tuple we are moving is a heap-only tuple, this move will
2592 * generate an additional index entry, so increment the
2593 * rel_indexed_tuples count.
2595 if (HeapTupleHeaderIsHeapOnly(tuple.t_data))
2596 vacrelstats->rel_indexed_tuples++;
2599 if (dst_vacpage->blkno > last_move_dest_block)
2600 last_move_dest_block = dst_vacpage->blkno;
2603 * Remember that we moved tuple from the current page
2604 * (corresponding index tuple will be cleaned).
2606 vacpage->offsets[vacpage->offsets_free++] = offnum;
2607 } /* walk along page */
2610 * If we broke out of the walk-along-page loop early (ie, still have
2611 * offnum <= maxoff), then we failed to move some tuple off this page.
2612 * No point in shrinking any more, so clean up and exit the per-page
2615 if (offnum < maxoff && keep_tuples > 0)
2620 * Fix vacpage state for any unvisited tuples remaining on page
2622 for (off = OffsetNumberNext(offnum);
2624 off = OffsetNumberNext(off))
2626 ItemId itemid = PageGetItemId(page, off);
2627 HeapTupleHeader htup;
2629 if (!ItemIdIsUsed(itemid))
2631 /* Shouldn't be any DEAD or REDIRECT items anymore */
2632 Assert(ItemIdIsNormal(itemid));
2634 htup = (HeapTupleHeader) PageGetItem(page, itemid);
2635 if (htup->t_infomask & HEAP_XMIN_COMMITTED)
2639 * See comments in the walk-along-page loop above about why
2640 * only MOVED_OFF tuples should be found here.
2642 if (htup->t_infomask & HEAP_MOVED_IN)
2643 elog(ERROR, "HEAP_MOVED_IN was not expected");
2644 if (!(htup->t_infomask & HEAP_MOVED_OFF))
2645 elog(ERROR, "HEAP_MOVED_OFF was expected");
2646 if (HeapTupleHeaderGetXvac(htup) != myXID)
2647 elog(ERROR, "invalid XVAC in tuple header");
2649 if (chain_tuple_moved)
2651 /* some chains were moved while cleaning this page */
2652 Assert(vacpage->offsets_free > 0);
2653 for (i = 0; i < vacpage->offsets_free; i++)
2655 if (vacpage->offsets[i] == off)
2658 if (i >= vacpage->offsets_free) /* not found */
2660 vacpage->offsets[vacpage->offsets_free++] = off;
2661 Assert(keep_tuples > 0);
2664 * If this is not a heap-only tuple, there must be an
2665 * index entry for this item which will be removed in
2666 * the index cleanup. Decrement the
2667 * keep_indexed_tuples count to remember this.
2669 if (!HeapTupleHeaderIsHeapOnly(htup))
2670 keep_indexed_tuples--;
2676 vacpage->offsets[vacpage->offsets_free++] = off;
2677 Assert(keep_tuples > 0);
2678 if (!HeapTupleHeaderIsHeapOnly(htup))
2679 keep_indexed_tuples--;
2685 if (vacpage->offsets_free > 0) /* some tuples were moved */
2687 if (chain_tuple_moved) /* else - they are ordered */
2689 qsort((char *) (vacpage->offsets), vacpage->offsets_free,
2690 sizeof(OffsetNumber), vac_cmp_offno);
2692 vpage_insert(&Nvacpagelist, copy_vac_page(vacpage));
2697 if (offnum <= maxoff)
2698 break; /* had to quit early, see above note */
2700 } /* walk along relation */
2702 blkno++; /* new number of blocks */
2704 if (dst_buffer != InvalidBuffer)
2706 Assert(num_moved > 0);
2707 ReleaseBuffer(dst_buffer);
2713 * We have to commit our tuple movings before we truncate the
2714 * relation. Ideally we should do Commit/StartTransactionCommand
2715 * here, relying on the session-level table lock to protect our
2716 * exclusive access to the relation. However, that would require a
2717 * lot of extra code to close and re-open the relation, indexes, etc.
2718 * For now, a quick hack: record status of current transaction as
2719 * committed, and continue. We force the commit to be synchronous so
2720 * that it's down to disk before we truncate. (Note: tqual.c knows
2721 * that VACUUM FULL always uses sync commit, too.) The transaction
2722 * continues to be shown as running in the ProcArray.
2724 * XXX This desperately needs to be revisited. Any failure after this
2725 * point will result in a PANIC "cannot abort transaction nnn, it was
2726 * already committed"! As a precaution, we prevent cancel interrupts
2727 * after this point to mitigate this problem; caller is responsible for
2728 * re-enabling them after committing the transaction.
2733 (void) RecordTransactionCommit(true);
2737 * We are not going to move any more tuples across pages, but we still
2738 * need to apply vacuum_page to compact free space in the remaining pages
2739 * in vacuum_pages list. Note that some of these pages may also be in the
2740 * fraged_pages list, and may have had tuples moved onto them; if so, we
2741 * already did vacuum_page and needn't do it again.
2743 for (i = 0, curpage = vacuum_pages->pagedesc;
2747 vacuum_delay_point();
2749 Assert((*curpage)->blkno < blkno);
2750 if ((*curpage)->offsets_used == 0)
2755 /* this page was not used as a move target, so must clean it */
2756 buf = ReadBufferExtended(onerel, MAIN_FORKNUM, (*curpage)->blkno,
2757 RBM_NORMAL, vac_strategy);
2758 LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
2759 page = BufferGetPage(buf);
2760 if (!PageIsEmpty(page))
2761 vacuum_page(vacrelstats, onerel, buf, *curpage);
2762 UnlockReleaseBuffer(buf);
2767 * Now scan all the pages that we moved tuples onto and update tuple
2768 * status bits. This is not really necessary, but will save time for
2769 * future transactions examining these tuples.
2771 update_hint_bits(onerel, fraged_pages, num_fraged_pages,
2772 last_move_dest_block, num_moved);
2775 * It'd be cleaner to make this report at the bottom of this routine, but
2776 * then the rusage would double-count the second pass of index vacuuming.
2777 * So do it here and ignore the relatively small amount of processing that
2781 (errmsg("\"%s\": moved %u row versions, truncated %u to %u pages",
2782 RelationGetRelationName(onerel),
2783 num_moved, nblocks, blkno),
2785 pg_rusage_show(&ru0))));
2788 * Reflect the motion of system tuples to catalog cache here.
2790 CommandCounterIncrement();
2792 if (Nvacpagelist.num_pages > 0)
2794 /* vacuum indexes again if needed */
2801 /* re-sort Nvacpagelist.pagedesc */
2802 for (vpleft = Nvacpagelist.pagedesc,
2803 vpright = Nvacpagelist.pagedesc + Nvacpagelist.num_pages - 1;
2804 vpleft < vpright; vpleft++, vpright--)
2812 * keep_tuples is the number of tuples that have been moved off a
2813 * page during chain moves but not been scanned over subsequently.
2814 * The tuple ids of these tuples are not recorded as free offsets
2815 * for any VacPage, so they will not be cleared from the indexes.
2816 * keep_indexed_tuples is the portion of these that are expected
2817 * to have index entries.
2819 Assert(keep_tuples >= 0);
2820 for (i = 0; i < nindexes; i++)
2821 vacuum_index(&Nvacpagelist, Irel[i],
2822 vacrelstats->rel_indexed_tuples,
2823 keep_indexed_tuples);
2827 * Clean moved-off tuples from last page in Nvacpagelist list.
2829 * We need only do this in this one page, because higher-numbered
2830 * pages are going to be truncated from the relation entirely. But see
2831 * comments for update_hint_bits().
2833 if (vacpage->blkno == (blkno - 1) &&
2834 vacpage->offsets_free > 0)
2838 OffsetNumber unused[MaxOffsetNumber];
2839 OffsetNumber offnum,
2844 buf = ReadBufferExtended(onerel, MAIN_FORKNUM, vacpage->blkno,
2845 RBM_NORMAL, vac_strategy);
2846 LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
2847 page = BufferGetPage(buf);
2848 maxoff = PageGetMaxOffsetNumber(page);
2849 for (offnum = FirstOffsetNumber;
2851 offnum = OffsetNumberNext(offnum))
2853 ItemId itemid = PageGetItemId(page, offnum);
2854 HeapTupleHeader htup;
2856 if (!ItemIdIsUsed(itemid))
2858 /* Shouldn't be any DEAD or REDIRECT items anymore */
2859 Assert(ItemIdIsNormal(itemid));
2861 htup = (HeapTupleHeader) PageGetItem(page, itemid);
2862 if (htup->t_infomask & HEAP_XMIN_COMMITTED)
2866 * See comments in the walk-along-page loop above about why
2867 * only MOVED_OFF tuples should be found here.
2869 if (htup->t_infomask & HEAP_MOVED_IN)
2870 elog(ERROR, "HEAP_MOVED_IN was not expected");
2871 if (!(htup->t_infomask & HEAP_MOVED_OFF))
2872 elog(ERROR, "HEAP_MOVED_OFF was expected");
2873 if (HeapTupleHeaderGetXvac(htup) != myXID)
2874 elog(ERROR, "invalid XVAC in tuple header");
2876 ItemIdSetUnused(itemid);
2879 unused[uncnt++] = offnum;
2881 Assert(vacpage->offsets_free == num_tuples);
2883 START_CRIT_SECTION();
2885 PageRepairFragmentation(page);
2887 MarkBufferDirty(buf);
2890 if (!onerel->rd_istemp)
2894 recptr = log_heap_clean(onerel, buf,
2897 vacrelstats->latestRemovedXid, false);
2898 PageSetLSN(page, recptr);
2899 PageSetTLI(page, ThisTimeLineID);
2904 UnlockReleaseBuffer(buf);
2907 /* now - free new list of reaped pages */
2908 curpage = Nvacpagelist.pagedesc;
2909 for (i = 0; i < Nvacpagelist.num_pages; i++, curpage++)
2911 pfree(Nvacpagelist.pagedesc);
2914 /* Truncate relation, if needed */
2915 if (blkno < nblocks)
2917 RelationTruncate(onerel, blkno);
2919 /* force relcache inval so all backends reset their rd_targblock */
2920 CacheInvalidateRelcache(onerel);
2922 vacrelstats->rel_pages = blkno; /* set new number of blocks */
2927 if (vacrelstats->vtlinks != NULL)
2928 pfree(vacrelstats->vtlinks);
2930 ExecContext_Finish(&ec);
2936 * move_chain_tuple() -- move one tuple that is part of a tuple chain
2938 * This routine moves old_tup from old_page to dst_page.
2939 * old_page and dst_page might be the same page.
2940 * On entry old_buf and dst_buf are locked exclusively, both locks (or
2941 * the single lock, if this is a intra-page-move) are released before
2944 * Yes, a routine with ten parameters is ugly, but it's still better
2945 * than having these 120 lines of code in repair_frag() which is
2946 * already too long and almost unreadable.
2949 move_chain_tuple(VRelStats *vacrelstats, Relation rel,
2950 Buffer old_buf, Page old_page, HeapTuple old_tup,
2951 Buffer dst_buf, Page dst_page, VacPage dst_vacpage,
2952 ExecContext ec, ItemPointer ctid, bool cleanVpd)
2954 TransactionId myXID = GetCurrentTransactionId();
2955 HeapTupleData newtup;
2956 OffsetNumber newoff;
2958 Size tuple_len = old_tup->t_len;
2959 bool all_visible_cleared = false;
2960 bool all_visible_cleared_new = false;
2963 * make a modifiable copy of the source tuple.
2965 heap_copytuple_with_tuple(old_tup, &newtup);
2968 * register invalidation of source tuple in catcaches.
2970 CacheInvalidateHeapTuple(rel, old_tup);
2972 /* NO EREPORT(ERROR) TILL CHANGES ARE LOGGED */
2973 START_CRIT_SECTION();
2976 * mark the source tuple MOVED_OFF.
2978 old_tup->t_data->t_infomask &= ~(HEAP_XMIN_COMMITTED |
2981 old_tup->t_data->t_infomask |= HEAP_MOVED_OFF;
2982 HeapTupleHeaderSetXvac(old_tup->t_data, myXID);
2985 * If this page was not used before - clean it.
2987 * NOTE: a nasty bug used to lurk here. It is possible for the source and
2988 * destination pages to be the same (since this tuple-chain member can be
2989 * on a page lower than the one we're currently processing in the outer
2990 * loop). If that's true, then after vacuum_page() the source tuple will
2991 * have been moved, and tuple.t_data will be pointing at garbage.
2992 * Therefore we must do everything that uses old_tup->t_data BEFORE this
2995 * This path is different from the other callers of vacuum_page, because
2996 * we have already incremented the vacpage's offsets_used field to account
2997 * for the tuple(s) we expect to move onto the page. Therefore
2998 * vacuum_page's check for offsets_used == 0 is wrong. But since that's a
2999 * good debugging check for all other callers, we work around it here
3000 * rather than remove it.
3002 if (!PageIsEmpty(dst_page) && cleanVpd)
3004 int sv_offsets_used = dst_vacpage->offsets_used;
3006 dst_vacpage->offsets_used = 0;
3007 vacuum_page(vacrelstats, rel, dst_buf, dst_vacpage);
3008 dst_vacpage->offsets_used = sv_offsets_used;
3012 * Update the state of the copied tuple, and store it on the destination
3013 * page. The copied tuple is never part of a HOT chain.
3015 newtup.t_data->t_infomask &= ~(HEAP_XMIN_COMMITTED |
3018 newtup.t_data->t_infomask |= HEAP_MOVED_IN;
3019 HeapTupleHeaderClearHotUpdated(newtup.t_data);
3020 HeapTupleHeaderClearHeapOnly(newtup.t_data);
3021 HeapTupleHeaderSetXvac(newtup.t_data, myXID);
3022 newoff = PageAddItem(dst_page, (Item) newtup.t_data, tuple_len,
3023 InvalidOffsetNumber, false, true);
3024 if (newoff == InvalidOffsetNumber)
3025 elog(PANIC, "failed to add item with len = %lu to page %u while moving tuple chain",
3026 (unsigned long) tuple_len, dst_vacpage->blkno);
3027 newitemid = PageGetItemId(dst_page, newoff);
3028 /* drop temporary copy, and point to the version on the dest page */
3029 pfree(newtup.t_data);
3030 newtup.t_data = (HeapTupleHeader) PageGetItem(dst_page, newitemid);
3032 ItemPointerSet(&(newtup.t_self), dst_vacpage->blkno, newoff);
3035 * Set new tuple's t_ctid pointing to itself if last tuple in chain, and
3036 * to next tuple in chain otherwise. (Since we move the chain in reverse
3037 * order, this is actually the previously processed tuple.)
3039 if (!ItemPointerIsValid(ctid))
3040 newtup.t_data->t_ctid = newtup.t_self;
3042 newtup.t_data->t_ctid = *ctid;
3043 *ctid = newtup.t_self;
3045 /* clear PD_ALL_VISIBLE flags */
3046 if (PageIsAllVisible(old_page))
3048 all_visible_cleared = true;
3049 PageClearAllVisible(old_page);
3051 if (dst_buf != old_buf && PageIsAllVisible(dst_page))
3053 all_visible_cleared_new = true;
3054 PageClearAllVisible(dst_page);
3057 MarkBufferDirty(dst_buf);
3058 if (dst_buf != old_buf)
3059 MarkBufferDirty(old_buf);
3062 if (!rel->rd_istemp)
3064 XLogRecPtr recptr = log_heap_move(rel, old_buf, old_tup->t_self,
3066 all_visible_cleared,
3067 all_visible_cleared_new);
3069 if (old_buf != dst_buf)
3071 PageSetLSN(old_page, recptr);
3072 PageSetTLI(old_page, ThisTimeLineID);
3074 PageSetLSN(dst_page, recptr);
3075 PageSetTLI(dst_page, ThisTimeLineID);
3080 LockBuffer(dst_buf, BUFFER_LOCK_UNLOCK);
3081 if (dst_buf != old_buf)
3082 LockBuffer(old_buf, BUFFER_LOCK_UNLOCK);
3084 /* Clear bits in visibility map */
3085 if (all_visible_cleared)
3086 visibilitymap_clear(rel, BufferGetBlockNumber(old_buf));
3087 if (all_visible_cleared_new)
3088 visibilitymap_clear(rel, BufferGetBlockNumber(dst_buf));
3090 /* Create index entries for the moved tuple */
3091 if (ec->resultRelInfo->ri_NumIndices > 0)
3093 ExecStoreTuple(&newtup, ec->slot, InvalidBuffer, false);
3094 ExecInsertIndexTuples(ec->slot, &(newtup.t_self), ec->estate, true);
3095 ResetPerTupleExprContext(ec->estate);
3100 * move_plain_tuple() -- move one tuple that is not part of a chain
3102 * This routine moves old_tup from old_page to dst_page.
3103 * On entry old_buf and dst_buf are locked exclusively, both locks are
3104 * released before exit.
3106 * Yes, a routine with eight parameters is ugly, but it's still better
3107 * than having these 90 lines of code in repair_frag() which is already
3108 * too long and almost unreadable.
3111 move_plain_tuple(Relation rel,
3112 Buffer old_buf, Page old_page, HeapTuple old_tup,
3113 Buffer dst_buf, Page dst_page, VacPage dst_vacpage,
3116 TransactionId myXID = GetCurrentTransactionId();
3117 HeapTupleData newtup;
3118 OffsetNumber newoff;
3120 Size tuple_len = old_tup->t_len;
3121 bool all_visible_cleared = false;
3122 bool all_visible_cleared_new = false;
3125 heap_copytuple_with_tuple(old_tup, &newtup);
3128 * register invalidation of source tuple in catcaches.
3130 * (Note: we do not need to register the copied tuple, because we are not
3131 * changing the tuple contents and so there cannot be any need to flush
3132 * negative catcache entries.)
3134 CacheInvalidateHeapTuple(rel, old_tup);
3136 /* NO EREPORT(ERROR) TILL CHANGES ARE LOGGED */
3137 START_CRIT_SECTION();
3140 * Mark new tuple as MOVED_IN by me; also mark it not HOT.
3142 newtup.t_data->t_infomask &= ~(HEAP_XMIN_COMMITTED |
3145 newtup.t_data->t_infomask |= HEAP_MOVED_IN;
3146 HeapTupleHeaderClearHotUpdated(newtup.t_data);
3147 HeapTupleHeaderClearHeapOnly(newtup.t_data);
3148 HeapTupleHeaderSetXvac(newtup.t_data, myXID);
3150 /* add tuple to the page */
3151 newoff = PageAddItem(dst_page, (Item) newtup.t_data, tuple_len,
3152 InvalidOffsetNumber, false, true);
3153 if (newoff == InvalidOffsetNumber)
3154 elog(PANIC, "failed to add item with len = %lu to page %u (free space %lu, nusd %u, noff %u)",
3155 (unsigned long) tuple_len,
3156 dst_vacpage->blkno, (unsigned long) dst_vacpage->free,
3157 dst_vacpage->offsets_used, dst_vacpage->offsets_free);
3158 newitemid = PageGetItemId(dst_page, newoff);
3159 pfree(newtup.t_data);
3160 newtup.t_data = (HeapTupleHeader) PageGetItem(dst_page, newitemid);
3161 ItemPointerSet(&(newtup.t_data->t_ctid), dst_vacpage->blkno, newoff);
3162 newtup.t_self = newtup.t_data->t_ctid;
3165 * Mark old tuple as MOVED_OFF by me.
3167 old_tup->t_data->t_infomask &= ~(HEAP_XMIN_COMMITTED |
3170 old_tup->t_data->t_infomask |= HEAP_MOVED_OFF;
3171 HeapTupleHeaderSetXvac(old_tup->t_data, myXID);
3173 /* clear PD_ALL_VISIBLE flags */
3174 if (PageIsAllVisible(old_page))
3176 all_visible_cleared = true;
3177 PageClearAllVisible(old_page);
3179 if (PageIsAllVisible(dst_page))
3181 all_visible_cleared_new = true;
3182 PageClearAllVisible(dst_page);
3185 MarkBufferDirty(dst_buf);
3186 MarkBufferDirty(old_buf);
3189 if (!rel->rd_istemp)
3191 XLogRecPtr recptr = log_heap_move(rel, old_buf, old_tup->t_self,
3193 all_visible_cleared,
3194 all_visible_cleared_new);
3196 PageSetLSN(old_page, recptr);
3197 PageSetTLI(old_page, ThisTimeLineID);
3198 PageSetLSN(dst_page, recptr);
3199 PageSetTLI(dst_page, ThisTimeLineID);
3204 dst_vacpage->free = PageGetFreeSpaceWithFillFactor(rel, dst_page);
3205 LockBuffer(dst_buf, BUFFER_LOCK_UNLOCK);
3206 LockBuffer(old_buf, BUFFER_LOCK_UNLOCK);
3208 dst_vacpage->offsets_used++;
3210 /* Clear bits in visibility map */
3211 if (all_visible_cleared)
3212 visibilitymap_clear(rel, BufferGetBlockNumber(old_buf));
3213 if (all_visible_cleared_new)
3214 visibilitymap_clear(rel, BufferGetBlockNumber(dst_buf));
3216 /* insert index' tuples if needed */
3217 if (ec->resultRelInfo->ri_NumIndices > 0)
3219 ExecStoreTuple(&newtup, ec->slot, InvalidBuffer, false);
3220 ExecInsertIndexTuples(ec->slot, &(newtup.t_self), ec->estate, true);
3221 ResetPerTupleExprContext(ec->estate);
3226 * update_hint_bits() -- update hint bits in destination pages
3228 * Scan all the pages that we moved tuples onto and update tuple status bits.
3229 * This is not really necessary, but it will save time for future transactions
3230 * examining these tuples.
3232 * This pass guarantees that all HEAP_MOVED_IN tuples are marked as
3233 * XMIN_COMMITTED, so that future tqual tests won't need to check their XVAC.
3235 * BUT NOTICE that this code fails to clear HEAP_MOVED_OFF tuples from
3236 * pages that were move source pages but not move dest pages. The bulk
3237 * of the move source pages will be physically truncated from the relation,
3238 * and the last page remaining in the rel will be fixed separately in
3239 * repair_frag(), so the only cases where a MOVED_OFF tuple won't get its
3240 * hint bits updated are tuples that are moved as part of a chain and were
3241 * on pages that were not either move destinations nor at the end of the rel.
3242 * To completely ensure that no MOVED_OFF tuples remain unmarked, we'd have
3243 * to remember and revisit those pages too.
3245 * One wonders whether it wouldn't be better to skip this work entirely,
3246 * and let the tuple status updates happen someplace that's not holding an
3247 * exclusive lock on the relation.
3250 update_hint_bits(Relation rel, VacPageList fraged_pages, int num_fraged_pages,
3251 BlockNumber last_move_dest_block, int num_moved)
3253 TransactionId myXID = GetCurrentTransactionId();
3254 int checked_moved = 0;
3258 for (i = 0, curpage = fraged_pages->pagedesc;
3259 i < num_fraged_pages;
3264 OffsetNumber max_offset;
3268 vacuum_delay_point();
3270 if ((*curpage)->blkno > last_move_dest_block)
3271 break; /* no need to scan any further */
3272 if ((*curpage)->offsets_used == 0)
3273 continue; /* this page was never used as a move dest */
3274 buf = ReadBufferExtended(rel, MAIN_FORKNUM, (*curpage)->blkno,
3275 RBM_NORMAL, vac_strategy);
3276 LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
3277 page = BufferGetPage(buf);
3278 max_offset = PageGetMaxOffsetNumber(page);
3279 for (off = FirstOffsetNumber;
3281 off = OffsetNumberNext(off))
3283 ItemId itemid = PageGetItemId(page, off);
3284 HeapTupleHeader htup;
3286 if (!ItemIdIsUsed(itemid))
3288 /* Shouldn't be any DEAD or REDIRECT items anymore */
3289 Assert(ItemIdIsNormal(itemid));
3291 htup = (HeapTupleHeader) PageGetItem(page, itemid);
3292 if (htup->t_infomask & HEAP_XMIN_COMMITTED)
3296 * Here we may see either MOVED_OFF or MOVED_IN tuples.
3298 if (!(htup->t_infomask & HEAP_MOVED))
3299 elog(ERROR, "HEAP_MOVED_OFF/HEAP_MOVED_IN was expected");
3300 if (HeapTupleHeaderGetXvac(htup) != myXID)
3301 elog(ERROR, "invalid XVAC in tuple header");
3303 if (htup->t_infomask & HEAP_MOVED_IN)
3305 htup->t_infomask |= HEAP_XMIN_COMMITTED;
3306 htup->t_infomask &= ~HEAP_MOVED;
3310 htup->t_infomask |= HEAP_XMIN_INVALID;
3312 MarkBufferDirty(buf);
3313 UnlockReleaseBuffer(buf);
3314 Assert((*curpage)->offsets_used == num_tuples);
3315 checked_moved += num_tuples;
3317 Assert(num_moved == checked_moved);
3321 * vacuum_heap() -- free dead tuples
3323 * This routine marks dead tuples as unused and truncates relation
3324 * if there are "empty" end-blocks.
3327 vacuum_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages)
3331 BlockNumber relblocks;
3335 nblocks = vacuum_pages->num_pages;
3336 nblocks -= vacuum_pages->empty_end_pages; /* nothing to do with them */
3338 for (i = 0, vacpage = vacuum_pages->pagedesc; i < nblocks; i++, vacpage++)
3340 vacuum_delay_point();
3342 if ((*vacpage)->offsets_free > 0)
3344 buf = ReadBufferExtended(onerel, MAIN_FORKNUM, (*vacpage)->blkno,
3345 RBM_NORMAL, vac_strategy);
3346 LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
3347 vacuum_page(vacrelstats, onerel, buf, *vacpage);
3348 UnlockReleaseBuffer(buf);
3352 /* Truncate relation if there are some empty end-pages */
3353 Assert(vacrelstats->rel_pages >= vacuum_pages->empty_end_pages);
3354 if (vacuum_pages->empty_end_pages > 0)
3356 relblocks = vacrelstats->rel_pages - vacuum_pages->empty_end_pages;
3358 (errmsg("\"%s\": truncated %u to %u pages",
3359 RelationGetRelationName(onerel),
3360 vacrelstats->rel_pages, relblocks)));
3361 RelationTruncate(onerel, relblocks);
3363 /* force relcache inval so all backends reset their rd_targblock */
3364 CacheInvalidateRelcache(onerel);
3366 vacrelstats->rel_pages = relblocks; /* set new number of blocks */
3371 * vacuum_page() -- free dead tuples on a page
3372 * and repair its fragmentation.
3374 * Caller must hold pin and lock on buffer.
3377 vacuum_page(VRelStats *vacrelstats, Relation onerel, Buffer buffer, VacPage vacpage)
3379 Page page = BufferGetPage(buffer);
3382 /* There shouldn't be any tuples moved onto the page yet! */
3383 Assert(vacpage->offsets_used == 0);
3385 START_CRIT_SECTION();
3387 for (i = 0; i < vacpage->offsets_free; i++)
3389 ItemId itemid = PageGetItemId(page, vacpage->offsets[i]);
3391 ItemIdSetUnused(itemid);
3394 PageRepairFragmentation(page);
3396 MarkBufferDirty(buffer);
3399 if (!onerel->rd_istemp)
3403 recptr = log_heap_clean(onerel, buffer,
3405 vacpage->offsets, vacpage->offsets_free,
3406 vacrelstats->latestRemovedXid, false);
3407 PageSetLSN(page, recptr);
3408 PageSetTLI(page, ThisTimeLineID);
3415 * scan_index() -- scan one index relation to update pg_class statistics.
3417 * We use this when we have no deletions to do.
3420 scan_index(Relation indrel, double num_tuples)
3422 IndexBulkDeleteResult *stats;
3423 IndexVacuumInfo ivinfo;
3426 pg_rusage_init(&ru0);
3428 ivinfo.index = indrel;
3429 ivinfo.vacuum_full = true;
3430 ivinfo.analyze_only = false;
3431 ivinfo.estimated_count = false;
3432 ivinfo.message_level = elevel;
3433 ivinfo.num_heap_tuples = num_tuples;
3434 ivinfo.strategy = vac_strategy;
3436 stats = index_vacuum_cleanup(&ivinfo, NULL);
3442 * Now update statistics in pg_class, but only if the index says the count
3445 if (!stats->estimated_count)
3446 vac_update_relstats(indrel,
3447 stats->num_pages, stats->num_index_tuples,
3448 false, InvalidTransactionId);
3451 (errmsg("index \"%s\" now contains %.0f row versions in %u pages",
3452 RelationGetRelationName(indrel),
3453 stats->num_index_tuples,
3455 errdetail("%u index pages have been deleted, %u are currently reusable.\n"
3457 stats->pages_deleted, stats->pages_free,
3458 pg_rusage_show(&ru0))));
3461 * Check for tuple count mismatch. If the index is partial, then it's OK
3462 * for it to have fewer tuples than the heap; else we got trouble.
3464 if (!stats->estimated_count &&
3465 stats->num_index_tuples != num_tuples)
3467 if (stats->num_index_tuples > num_tuples ||
3468 !vac_is_partial_index(indrel))
3470 (errmsg("index \"%s\" contains %.0f row versions, but table contains %.0f row versions",
3471 RelationGetRelationName(indrel),
3472 stats->num_index_tuples, num_tuples),
3473 errhint("Rebuild the index with REINDEX.")));
3480 * vacuum_index() -- vacuum one index relation.
3482 * Vpl is the VacPageList of the heap we're currently vacuuming.
3483 * It's locked. Indrel is an index relation on the vacuumed heap.
3485 * We don't bother to set locks on the index relation here, since
3486 * the parent table is exclusive-locked already.
3488 * Finally, we arrange to update the index relation's statistics in
3492 vacuum_index(VacPageList vacpagelist, Relation indrel,
3493 double num_tuples, int keep_tuples)
3495 IndexBulkDeleteResult *stats;
3496 IndexVacuumInfo ivinfo;
3499 pg_rusage_init(&ru0);
3501 ivinfo.index = indrel;
3502 ivinfo.vacuum_full = true;
3503 ivinfo.analyze_only = false;
3504 ivinfo.estimated_count = false;
3505 ivinfo.message_level = elevel;
3506 ivinfo.num_heap_tuples = num_tuples + keep_tuples;
3507 ivinfo.strategy = vac_strategy;
3509 /* Do bulk deletion */
3510 stats = index_bulk_delete(&ivinfo, NULL, tid_reaped, (void *) vacpagelist);
3512 /* Do post-VACUUM cleanup */
3513 stats = index_vacuum_cleanup(&ivinfo, stats);
3519 * Now update statistics in pg_class, but only if the index says the count
3522 if (!stats->estimated_count)
3523 vac_update_relstats(indrel,
3524 stats->num_pages, stats->num_index_tuples,
3525 false, InvalidTransactionId);
3528 (errmsg("index \"%s\" now contains %.0f row versions in %u pages",
3529 RelationGetRelationName(indrel),
3530 stats->num_index_tuples,
3532 errdetail("%.0f index row versions were removed.\n"
3533 "%u index pages have been deleted, %u are currently reusable.\n"
3535 stats->tuples_removed,
3536 stats->pages_deleted, stats->pages_free,
3537 pg_rusage_show(&ru0))));
3540 * Check for tuple count mismatch. If the index is partial, then it's OK
3541 * for it to have fewer tuples than the heap; else we got trouble.
3543 if (!stats->estimated_count &&
3544 stats->num_index_tuples != num_tuples + keep_tuples)
3546 if (stats->num_index_tuples > num_tuples + keep_tuples ||
3547 !vac_is_partial_index(indrel))
3549 (errmsg("index \"%s\" contains %.0f row versions, but table contains %.0f row versions",
3550 RelationGetRelationName(indrel),
3551 stats->num_index_tuples, num_tuples + keep_tuples),
3552 errhint("Rebuild the index with REINDEX.")));
3559 * tid_reaped() -- is a particular tid reaped?
3561 * This has the right signature to be an IndexBulkDeleteCallback.
3563 * vacpagelist->VacPage_array is sorted in right order.
3566 tid_reaped(ItemPointer itemptr, void *state)
3568 VacPageList vacpagelist = (VacPageList) state;
3569 OffsetNumber ioffno;
3573 VacPageData vacpage;
3575 vacpage.blkno = ItemPointerGetBlockNumber(itemptr);
3576 ioffno = ItemPointerGetOffsetNumber(itemptr);
3579 vpp = (VacPage *) vac_bsearch((void *) &vp,
3580 (void *) (vacpagelist->pagedesc),
3581 vacpagelist->num_pages,
3588 /* ok - we are on a partially or fully reaped page */
3591 if (vp->offsets_free == 0)
3593 /* this is EmptyPage, so claim all tuples on it are reaped!!! */
3597 voff = (OffsetNumber *) vac_bsearch((void *) &ioffno,
3598 (void *) (vp->offsets),
3600 sizeof(OffsetNumber),
3611 * Update the Free Space Map with the info we now have about free space in
3615 vac_update_fsm(Relation onerel, VacPageList fraged_pages,
3616 BlockNumber rel_pages)
3618 int nPages = fraged_pages->num_pages;
3619 VacPage *pagedesc = fraged_pages->pagedesc;
3622 for (i = 0; i < nPages; i++)
3625 * fraged_pages may contain entries for pages that we later decided to
3626 * truncate from the relation; don't enter them into the free space
3629 if (pagedesc[i]->blkno >= rel_pages)
3632 RecordPageWithFreeSpace(onerel, pagedesc[i]->blkno, pagedesc[i]->free);
3637 /* Copy a VacPage structure */
3639 copy_vac_page(VacPage vacpage)
3643 /* allocate a VacPageData entry */
3644 newvacpage = (VacPage) palloc(sizeof(VacPageData) +
3645 vacpage->offsets_free * sizeof(OffsetNumber));
3648 if (vacpage->offsets_free > 0)
3649 memcpy(newvacpage->offsets, vacpage->offsets,
3650 vacpage->offsets_free * sizeof(OffsetNumber));
3651 newvacpage->blkno = vacpage->blkno;
3652 newvacpage->free = vacpage->free;
3653 newvacpage->offsets_used = vacpage->offsets_used;
3654 newvacpage->offsets_free = vacpage->offsets_free;
3660 * Add a VacPage pointer to a VacPageList.
3662 * As a side effect of the way that scan_heap works,
3663 * higher pages come after lower pages in the array
3664 * (and highest tid on a page is last).
3667 vpage_insert(VacPageList vacpagelist, VacPage vpnew)
3669 #define PG_NPAGEDESC 1024
3671 /* allocate a VacPage entry if needed */
3672 if (vacpagelist->num_pages == 0)
3674 vacpagelist->pagedesc = (VacPage *) palloc(PG_NPAGEDESC * sizeof(VacPage));
3675 vacpagelist->num_allocated_pages = PG_NPAGEDESC;
3677 else if (vacpagelist->num_pages >= vacpagelist->num_allocated_pages)
3679 vacpagelist->num_allocated_pages *= 2;
3680 vacpagelist->pagedesc = (VacPage *) repalloc(vacpagelist->pagedesc, vacpagelist->num_allocated_pages * sizeof(VacPage));
3682 vacpagelist->pagedesc[vacpagelist->num_pages] = vpnew;
3683 (vacpagelist->num_pages)++;
3687 * vac_bsearch: just like standard C library routine bsearch(),
3688 * except that we first test to see whether the target key is outside
3689 * the range of the table entries. This case is handled relatively slowly
3690 * by the normal binary search algorithm (ie, no faster than any other key)
3691 * but it occurs often enough in VACUUM to be worth optimizing.
3694 vac_bsearch(const void *key, const void *base,
3695 size_t nelem, size_t size,
3696 int (*compar) (const void *, const void *))
3703 res = compar(key, base);
3707 return (void *) base;
3710 last = (const void *) ((const char *) base + (nelem - 1) * size);
3711 res = compar(key, last);
3715 return (void *) last;
3718 return NULL; /* already checked 'em all */
3719 return bsearch(key, base, nelem, size, compar);
3723 * Comparator routines for use with qsort() and bsearch().
3726 vac_cmp_blk(const void *left, const void *right)
3731 lblk = (*((VacPage *) left))->blkno;
3732 rblk = (*((VacPage *) right))->blkno;
3742 vac_cmp_offno(const void *left, const void *right)
3744 if (*(OffsetNumber *) left < *(OffsetNumber *) right)
3746 if (*(OffsetNumber *) left == *(OffsetNumber *) right)
3752 vac_cmp_vtlinks(const void *left, const void *right)
3754 if (((VTupleLink) left)->new_tid.ip_blkid.bi_hi <
3755 ((VTupleLink) right)->new_tid.ip_blkid.bi_hi)
3757 if (((VTupleLink) left)->new_tid.ip_blkid.bi_hi >
3758 ((VTupleLink) right)->new_tid.ip_blkid.bi_hi)
3760 /* bi_hi-es are equal */
3761 if (((VTupleLink) left)->new_tid.ip_blkid.bi_lo <
3762 ((VTupleLink) right)->new_tid.ip_blkid.bi_lo)
3764 if (((VTupleLink) left)->new_tid.ip_blkid.bi_lo >
3765 ((VTupleLink) right)->new_tid.ip_blkid.bi_lo)
3767 /* bi_lo-es are equal */
3768 if (((VTupleLink) left)->new_tid.ip_posid <
3769 ((VTupleLink) right)->new_tid.ip_posid)
3771 if (((VTupleLink) left)->new_tid.ip_posid >
3772 ((VTupleLink) right)->new_tid.ip_posid)
3779 * Open all the indexes of the given relation, obtaining the specified kind
3780 * of lock on each. Return an array of Relation pointers for the indexes
3781 * into *Irel, and the number of indexes into *nindexes.
3784 vac_open_indexes(Relation relation, LOCKMODE lockmode,
3785 int *nindexes, Relation **Irel)
3788 ListCell *indexoidscan;
3791 Assert(lockmode != NoLock);
3793 indexoidlist = RelationGetIndexList(relation);
3795 *nindexes = list_length(indexoidlist);
3798 *Irel = (Relation *) palloc(*nindexes * sizeof(Relation));
3803 foreach(indexoidscan, indexoidlist)
3805 Oid indexoid = lfirst_oid(indexoidscan);
3807 (*Irel)[i++] = index_open(indexoid, lockmode);
3810 list_free(indexoidlist);
3814 * Release the resources acquired by vac_open_indexes. Optionally release
3815 * the locks (say NoLock to keep 'em).
3818 vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
3825 Relation ind = Irel[nindexes];
3827 index_close(ind, lockmode);
3834 * Is an index partial (ie, could it contain fewer tuples than the heap?)
3837 vac_is_partial_index(Relation indrel)
3840 * If the index's AM doesn't support nulls, it's partial for our purposes
3842 if (!indrel->rd_am->amindexnulls)
3845 /* Otherwise, look to see if there's a partial-index predicate */
3846 if (!heap_attisnull(indrel->rd_indextuple, Anum_pg_index_indpred))
3854 enough_space(VacPage vacpage, Size len)
3856 len = MAXALIGN(len);
3858 if (len > vacpage->free)
3861 /* if there are free itemid(s) and len <= free_space... */
3862 if (vacpage->offsets_used < vacpage->offsets_free)
3865 /* noff_used >= noff_free and so we'll have to allocate new itemid */
3866 if (len + sizeof(ItemIdData) <= vacpage->free)
3873 PageGetFreeSpaceWithFillFactor(Relation relation, Page page)
3876 * It is correct to use PageGetExactFreeSpace() here, *not*
3877 * PageGetHeapFreeSpace(). This is because (a) we do our own, exact
3878 * accounting for whether line pointers must be added, and (b) we will
3879 * recycle any LP_DEAD line pointers before starting to add rows to a
3880 * page, but that may not have happened yet at the time this function is
3881 * applied to a page, which means PageGetHeapFreeSpace()'s protection
3882 * against too many line pointers on a page could fire incorrectly. We do
3883 * not need that protection here: since VACUUM FULL always recycles all
3884 * dead line pointers first, it'd be physically impossible to insert more
3885 * than MaxHeapTuplesPerPage tuples anyway.
3887 Size freespace = PageGetExactFreeSpace(page);
3890 targetfree = RelationGetTargetPageFreeSpace(relation,
3891 HEAP_DEFAULT_FILLFACTOR);
3892 if (freespace > targetfree)
3893 return freespace - targetfree;
3899 * vacuum_delay_point --- check for interrupts and cost-based delay.
3901 * This should be called in each major loop of VACUUM processing,
3902 * typically once per page processed.
3905 vacuum_delay_point(void)
3907 /* Always check for interrupts */
3908 CHECK_FOR_INTERRUPTS();
3910 /* Nap if appropriate */
3911 if (VacuumCostActive && !InterruptPending &&
3912 VacuumCostBalance >= VacuumCostLimit)
3916 msec = VacuumCostDelay * VacuumCostBalance / VacuumCostLimit;
3917 if (msec > VacuumCostDelay * 4)
3918 msec = VacuumCostDelay * 4;
3920 pg_usleep(msec * 1000L);
3922 VacuumCostBalance = 0;
3924 /* update balance values for workers */
3925 AutoVacuumUpdateDelay();
3927 /* Might have gotten an interrupt while sleeping */
3928 CHECK_FOR_INTERRUPTS();