1 /*-------------------------------------------------------------------------
4 * The postgres vacuum cleaner.
6 * This file now includes only control and dispatch code for VACUUM and
7 * ANALYZE commands. Regular VACUUM is implemented in vacuumlazy.c,
8 * ANALYZE in analyze.c, and VACUUM FULL is a variant of CLUSTER, handled
12 * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
13 * Portions Copyright (c) 1994, Regents of the University of California
17 * src/backend/commands/vacuum.c
19 *-------------------------------------------------------------------------
25 #include "access/clog.h"
26 #include "access/genam.h"
27 #include "access/heapam.h"
28 #include "access/htup_details.h"
29 #include "access/multixact.h"
30 #include "access/transam.h"
31 #include "access/xact.h"
32 #include "catalog/namespace.h"
33 #include "catalog/pg_database.h"
34 #include "catalog/pg_namespace.h"
35 #include "commands/cluster.h"
36 #include "commands/vacuum.h"
37 #include "miscadmin.h"
39 #include "postmaster/autovacuum.h"
40 #include "storage/bufmgr.h"
41 #include "storage/lmgr.h"
42 #include "storage/proc.h"
43 #include "storage/procarray.h"
44 #include "utils/acl.h"
45 #include "utils/fmgroids.h"
46 #include "utils/guc.h"
47 #include "utils/memutils.h"
48 #include "utils/snapmgr.h"
49 #include "utils/syscache.h"
50 #include "utils/tqual.h"
56 int vacuum_freeze_min_age;
57 int vacuum_freeze_table_age;
60 /* A few variables that don't seem worth passing around as parameters */
61 static MemoryContext vac_context = NULL;
62 static BufferAccessStrategy vac_strategy;
65 /* non-export function prototypes */
66 static List *get_rel_oids(Oid relid, const RangeVar *vacrel);
67 static void vac_truncate_clog(TransactionId frozenXID, MultiXactId minMulti);
68 static bool vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast,
73 * Primary entry point for VACUUM and ANALYZE commands.
75 * relid is normally InvalidOid; if it is not, then it provides the relation
76 * OID to be processed, and vacstmt->relation is ignored. (The non-invalid
77 * case is currently only used by autovacuum.)
79 * do_toast is passed as FALSE by autovacuum, because it processes TOAST
82 * for_wraparound is used by autovacuum to let us know when it's forcing
83 * a vacuum for wraparound, which should not be auto-canceled.
85 * bstrategy is normally given as NULL, but in autovacuum it can be passed
86 * in to use the same buffer strategy object across multiple vacuum() calls.
88 * isTopLevel should be passed down from ProcessUtility.
90 * It is the caller's responsibility that vacstmt and bstrategy
91 * (if given) be allocated in a memory context that won't disappear
92 * at transaction commit.
95 vacuum(VacuumStmt *vacstmt, Oid relid, bool do_toast,
96 BufferAccessStrategy bstrategy, bool for_wraparound, bool isTopLevel)
99 volatile bool in_outer_xact,
103 /* sanity checks on options */
104 Assert(vacstmt->options & (VACOPT_VACUUM | VACOPT_ANALYZE));
105 Assert((vacstmt->options & VACOPT_VACUUM) ||
106 !(vacstmt->options & (VACOPT_FULL | VACOPT_FREEZE)));
107 Assert((vacstmt->options & VACOPT_ANALYZE) || vacstmt->va_cols == NIL);
109 stmttype = (vacstmt->options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE";
112 * We cannot run VACUUM inside a user transaction block; if we were inside
113 * a transaction, then our commit- and start-transaction-command calls
114 * would not have the intended effect! There are numerous other subtle
115 * dependencies on this, too.
117 * ANALYZE (without VACUUM) can run either way.
119 if (vacstmt->options & VACOPT_VACUUM)
121 PreventTransactionChain(isTopLevel, stmttype);
122 in_outer_xact = false;
125 in_outer_xact = IsInTransactionChain(isTopLevel);
128 * Send info about dead objects to the statistics collector, unless we are
129 * in autovacuum --- autovacuum.c does this for itself.
131 if ((vacstmt->options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess())
132 pgstat_vacuum_stat();
135 * Create special memory context for cross-transaction storage.
137 * Since it is a child of PortalContext, it will go away eventually even
138 * if we suffer an error; there's no need for special abort cleanup logic.
140 vac_context = AllocSetContextCreate(PortalContext,
142 ALLOCSET_DEFAULT_MINSIZE,
143 ALLOCSET_DEFAULT_INITSIZE,
144 ALLOCSET_DEFAULT_MAXSIZE);
147 * If caller didn't give us a buffer strategy object, make one in the
148 * cross-transaction memory context.
150 if (bstrategy == NULL)
152 MemoryContext old_context = MemoryContextSwitchTo(vac_context);
154 bstrategy = GetAccessStrategy(BAS_VACUUM);
155 MemoryContextSwitchTo(old_context);
157 vac_strategy = bstrategy;
160 * Build list of relations to process, unless caller gave us one. (If we
161 * build one, we put it in vac_context for safekeeping.)
163 relations = get_rel_oids(relid, vacstmt->relation);
166 * Decide whether we need to start/commit our own transactions.
168 * For VACUUM (with or without ANALYZE): always do so, so that we can
169 * release locks as soon as possible. (We could possibly use the outer
170 * transaction for a one-table VACUUM, but handling TOAST tables would be
173 * For ANALYZE (no VACUUM): if inside a transaction block, we cannot
174 * start/commit our own transactions. Also, there's no need to do so if
175 * only processing one relation. For multiple relations when not within a
176 * transaction block, and also in an autovacuum worker, use own
177 * transactions so we can release locks sooner.
179 if (vacstmt->options & VACOPT_VACUUM)
180 use_own_xacts = true;
183 Assert(vacstmt->options & VACOPT_ANALYZE);
184 if (IsAutoVacuumWorkerProcess())
185 use_own_xacts = true;
186 else if (in_outer_xact)
187 use_own_xacts = false;
188 else if (list_length(relations) > 1)
189 use_own_xacts = true;
191 use_own_xacts = false;
195 * vacuum_rel expects to be entered with no transaction active; it will
196 * start and commit its own transaction. But we are called by an SQL
197 * command, and so we are executing inside a transaction already. We
198 * commit the transaction started in PostgresMain() here, and start
199 * another one before exiting to match the commit waiting for us back in
204 /* ActiveSnapshot is not set by autovacuum */
205 if (ActiveSnapshotSet())
208 /* matches the StartTransaction in PostgresMain() */
209 CommitTransactionCommand();
212 /* Turn vacuum cost accounting on or off */
217 VacuumCostActive = (VacuumCostDelay > 0);
218 VacuumCostBalance = 0;
224 * Loop to process each selected relation.
226 foreach(cur, relations)
228 Oid relid = lfirst_oid(cur);
230 if (vacstmt->options & VACOPT_VACUUM)
232 if (!vacuum_rel(relid, vacstmt, do_toast, for_wraparound))
236 if (vacstmt->options & VACOPT_ANALYZE)
239 * If using separate xacts, start one for analyze. Otherwise,
240 * we can use the outer transaction.
244 StartTransactionCommand();
245 /* functions in indexes may want a snapshot set */
246 PushActiveSnapshot(GetTransactionSnapshot());
249 analyze_rel(relid, vacstmt, vac_strategy);
254 CommitTransactionCommand();
261 /* Make sure cost accounting is turned off after error */
262 VacuumCostActive = false;
267 /* Turn off vacuum cost accounting */
268 VacuumCostActive = false;
271 * Finish up processing.
275 /* here, we are not in a transaction */
278 * This matches the CommitTransaction waiting for us in
281 StartTransactionCommand();
284 if ((vacstmt->options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess())
287 * Update pg_database.datfrozenxid, and truncate pg_clog if possible.
288 * (autovacuum.c does this for itself.)
290 vac_update_datfrozenxid();
294 * Clean up working storage --- note we must do this after
295 * StartTransactionCommand, else we might be trying to delete the active
298 MemoryContextDelete(vac_context);
303 * Build a list of Oids for each relation to be processed
305 * The list is built in vac_context so that it will survive across our
306 * per-relation transactions.
309 get_rel_oids(Oid relid, const RangeVar *vacrel)
311 List *oid_list = NIL;
312 MemoryContext oldcontext;
314 /* OID supplied by VACUUM's caller? */
315 if (OidIsValid(relid))
317 oldcontext = MemoryContextSwitchTo(vac_context);
318 oid_list = lappend_oid(oid_list, relid);
319 MemoryContextSwitchTo(oldcontext);
323 /* Process a specific relation */
327 * Since we don't take a lock here, the relation might be gone, or the
328 * RangeVar might no longer refer to the OID we look up here. In the
329 * former case, VACUUM will do nothing; in the latter case, it will
330 * process the OID we looked up here, rather than the new one. Neither
331 * is ideal, but there's little practical alternative, since we're
332 * going to commit this transaction and begin a new one between now
335 relid = RangeVarGetRelid(vacrel, NoLock, false);
337 /* Make a relation list entry for this guy */
338 oldcontext = MemoryContextSwitchTo(vac_context);
339 oid_list = lappend_oid(oid_list, relid);
340 MemoryContextSwitchTo(oldcontext);
345 * Process all plain relations and materialized views listed in
352 pgclass = heap_open(RelationRelationId, AccessShareLock);
354 scan = heap_beginscan_catalog(pgclass, 0, NULL);
356 while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
358 Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
360 if (classForm->relkind != RELKIND_RELATION &&
361 classForm->relkind != RELKIND_MATVIEW)
364 /* Make a relation list entry for this guy */
365 oldcontext = MemoryContextSwitchTo(vac_context);
366 oid_list = lappend_oid(oid_list, HeapTupleGetOid(tuple));
367 MemoryContextSwitchTo(oldcontext);
371 heap_close(pgclass, AccessShareLock);
378 * vacuum_set_xid_limits() -- compute oldest-Xmin and freeze cutoff points
380 * The output parameters are:
381 * - oldestXmin is the cutoff value used to distinguish whether tuples are
382 * DEAD or RECENTLY_DEAD (see HeapTupleSatisfiesVacuum).
383 * - freezeLimit is the Xid below which all Xids are replaced by
384 * FrozenTransactionId during vacuum.
385 * - xidFullScanLimit (computed from table_freeze_age parameter)
386 * represents a minimum Xid value; a table whose relfrozenxid is older than
387 * this will have a full-table vacuum applied to it, to freeze tuples across
388 * the whole table. Vacuuming a table younger than this value can use a
390 * - multiXactCutoff is the value below which all MultiXactIds are removed from
392 * - mxactFullScanLimit is a value against which a table's relminmxid value is
393 * compared to produce a full-table vacuum, as with xidFullScanLimit.
395 * xidFullScanLimit and mxactFullScanLimit can be passed as NULL if caller is
399 vacuum_set_xid_limits(int freeze_min_age,
400 int freeze_table_age,
402 TransactionId *oldestXmin,
403 TransactionId *freezeLimit,
404 TransactionId *xidFullScanLimit,
405 MultiXactId *multiXactCutoff,
406 MultiXactId *mxactFullScanLimit)
410 TransactionId safeLimit;
411 MultiXactId mxactLimit;
414 * We can always ignore processes running lazy vacuum. This is because we
415 * use these values only for deciding which tuples we must keep in the
416 * tables. Since lazy vacuum doesn't write its XID anywhere, it's safe to
417 * ignore it. In theory it could be problematic to ignore lazy vacuums in
418 * a full vacuum, but keep in mind that only one vacuum process can be
419 * working on a particular table at any time, and that each vacuum is
420 * always an independent transaction.
422 *oldestXmin = GetOldestXmin(sharedRel, true);
424 Assert(TransactionIdIsNormal(*oldestXmin));
427 * Determine the minimum freeze age to use: as specified by the caller, or
428 * vacuum_freeze_min_age, but in any case not more than half
429 * autovacuum_freeze_max_age, so that autovacuums to prevent XID
430 * wraparound won't occur too frequently.
432 freezemin = freeze_min_age;
434 freezemin = vacuum_freeze_min_age;
435 freezemin = Min(freezemin, autovacuum_freeze_max_age / 2);
436 Assert(freezemin >= 0);
439 * Compute the cutoff XID, being careful not to generate a "permanent" XID
441 limit = *oldestXmin - freezemin;
442 if (!TransactionIdIsNormal(limit))
443 limit = FirstNormalTransactionId;
446 * If oldestXmin is very far back (in practice, more than
447 * autovacuum_freeze_max_age / 2 XIDs old), complain and force a minimum
448 * freeze age of zero.
450 safeLimit = ReadNewTransactionId() - autovacuum_freeze_max_age;
451 if (!TransactionIdIsNormal(safeLimit))
452 safeLimit = FirstNormalTransactionId;
454 if (TransactionIdPrecedes(limit, safeLimit))
457 (errmsg("oldest xmin is far in the past"),
458 errhint("Close open transactions soon to avoid wraparound problems.")));
462 *freezeLimit = limit;
465 * simplistic MultiXactId removal limit: use the same policy as for
466 * freezing Xids (except we use the oldest known mxact instead of the
467 * current next value).
469 mxactLimit = GetOldestMultiXactId() - freezemin;
470 if (mxactLimit < FirstMultiXactId)
471 mxactLimit = FirstMultiXactId;
472 *multiXactCutoff = mxactLimit;
474 if (xidFullScanLimit != NULL)
478 Assert(mxactFullScanLimit != NULL);
481 * Determine the table freeze age to use: as specified by the caller,
482 * or vacuum_freeze_table_age, but in any case not more than
483 * autovacuum_freeze_max_age * 0.95, so that if you have e.g nightly
484 * VACUUM schedule, the nightly VACUUM gets a chance to freeze tuples
485 * before anti-wraparound autovacuum is launched.
487 freezetable = freeze_table_age;
489 freezetable = vacuum_freeze_table_age;
490 freezetable = Min(freezetable, autovacuum_freeze_max_age * 0.95);
491 Assert(freezetable >= 0);
494 * Compute XID limit causing a full-table vacuum, being careful not to
495 * generate a "permanent" XID.
497 limit = ReadNewTransactionId() - freezetable;
498 if (!TransactionIdIsNormal(limit))
499 limit = FirstNormalTransactionId;
501 *xidFullScanLimit = limit;
504 * Compute MultiXactId limit to cause a full-table vacuum, being
505 * careful not to generate an invalid multi. We just copy the logic
506 * (and limits) from plain XIDs here.
508 mxactLimit = ReadNextMultiXactId() - freezetable;
509 if (mxactLimit < FirstMultiXactId)
510 mxactLimit = FirstMultiXactId;
512 *mxactFullScanLimit = mxactLimit;
517 * vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples
519 * If we scanned the whole relation then we should just use the count of
520 * live tuples seen; but if we did not, we should not trust the count
521 * unreservedly, especially not in VACUUM, which may have scanned a quite
522 * nonrandom subset of the table. When we have only partial information,
523 * we take the old value of pg_class.reltuples as a measurement of the
524 * tuple density in the unscanned pages.
526 * This routine is shared by VACUUM and ANALYZE.
529 vac_estimate_reltuples(Relation relation, bool is_analyze,
530 BlockNumber total_pages,
531 BlockNumber scanned_pages,
532 double scanned_tuples)
534 BlockNumber old_rel_pages = relation->rd_rel->relpages;
535 double old_rel_tuples = relation->rd_rel->reltuples;
539 double updated_density;
541 /* If we did scan the whole table, just use the count as-is */
542 if (scanned_pages >= total_pages)
543 return scanned_tuples;
546 * If scanned_pages is zero but total_pages isn't, keep the existing value
547 * of reltuples. (Note: callers should avoid updating the pg_class
548 * statistics in this situation, since no new information has been
551 if (scanned_pages == 0)
552 return old_rel_tuples;
555 * If old value of relpages is zero, old density is indeterminate; we
556 * can't do much except scale up scanned_tuples to match total_pages.
558 if (old_rel_pages == 0)
559 return floor((scanned_tuples / scanned_pages) * total_pages + 0.5);
562 * Okay, we've covered the corner cases. The normal calculation is to
563 * convert the old measurement to a density (tuples per page), then update
564 * the density using an exponential-moving-average approach, and finally
565 * compute reltuples as updated_density * total_pages.
567 * For ANALYZE, the moving average multiplier is just the fraction of the
568 * table's pages we scanned. This is equivalent to assuming that the
569 * tuple density in the unscanned pages didn't change. Of course, it
570 * probably did, if the new density measurement is different. But over
571 * repeated cycles, the value of reltuples will converge towards the
572 * correct value, if repeated measurements show the same new density.
574 * For VACUUM, the situation is a bit different: we have looked at a
575 * nonrandom sample of pages, but we know for certain that the pages we
576 * didn't look at are precisely the ones that haven't changed lately.
577 * Thus, there is a reasonable argument for doing exactly the same thing
578 * as for the ANALYZE case, that is use the old density measurement as the
579 * value for the unscanned pages.
581 * This logic could probably use further refinement.
583 old_density = old_rel_tuples / old_rel_pages;
584 new_density = scanned_tuples / scanned_pages;
585 multiplier = (double) scanned_pages / (double) total_pages;
586 updated_density = old_density + (new_density - old_density) * multiplier;
587 return floor(updated_density * total_pages + 0.5);
592 * vac_update_relstats() -- update statistics for one relation
594 * Update the whole-relation statistics that are kept in its pg_class
595 * row. There are additional stats that will be updated if we are
596 * doing ANALYZE, but we always update these stats. This routine works
597 * for both index and heap relation entries in pg_class.
599 * We violate transaction semantics here by overwriting the rel's
600 * existing pg_class tuple with the new values. This is reasonably
601 * safe since the new values are correct whether or not this transaction
602 * commits. The reason for this is that if we updated these tuples in
603 * the usual way, vacuuming pg_class itself wouldn't work very well ---
604 * by the time we got done with a vacuum cycle, most of the tuples in
605 * pg_class would've been obsoleted. Of course, this only works for
606 * fixed-size never-null columns, but these are.
608 * Note another assumption: that two VACUUMs/ANALYZEs on a table can't
609 * run in parallel, nor can VACUUM/ANALYZE run in parallel with a
610 * schema alteration such as adding an index, rule, or trigger. Otherwise
611 * our updates of relhasindex etc might overwrite uncommitted updates.
613 * Another reason for doing it this way is that when we are in a lazy
614 * VACUUM and have PROC_IN_VACUUM set, we mustn't do any updates ---
615 * somebody vacuuming pg_class might think they could delete a tuple
616 * marked with xmin = our xid.
618 * This routine is shared by VACUUM and ANALYZE.
621 vac_update_relstats(Relation relation,
622 BlockNumber num_pages, double num_tuples,
623 BlockNumber num_all_visible_pages,
624 bool hasindex, TransactionId frozenxid,
625 MultiXactId minmulti)
627 Oid relid = RelationGetRelid(relation);
630 Form_pg_class pgcform;
633 rd = heap_open(RelationRelationId, RowExclusiveLock);
635 /* Fetch a copy of the tuple to scribble on */
636 ctup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
637 if (!HeapTupleIsValid(ctup))
638 elog(ERROR, "pg_class entry for relid %u vanished during vacuuming",
640 pgcform = (Form_pg_class) GETSTRUCT(ctup);
642 /* Apply required updates, if any, to copied tuple */
645 if (pgcform->relpages != (int32) num_pages)
647 pgcform->relpages = (int32) num_pages;
650 if (pgcform->reltuples != (float4) num_tuples)
652 pgcform->reltuples = (float4) num_tuples;
655 if (pgcform->relallvisible != (int32) num_all_visible_pages)
657 pgcform->relallvisible = (int32) num_all_visible_pages;
660 if (pgcform->relhasindex != hasindex)
662 pgcform->relhasindex = hasindex;
667 * If we have discovered that there are no indexes, then there's no
668 * primary key either. This could be done more thoroughly...
670 if (pgcform->relhaspkey && !hasindex)
672 pgcform->relhaspkey = false;
676 /* We also clear relhasrules and relhastriggers if needed */
677 if (pgcform->relhasrules && relation->rd_rules == NULL)
679 pgcform->relhasrules = false;
682 if (pgcform->relhastriggers && relation->trigdesc == NULL)
684 pgcform->relhastriggers = false;
689 * relfrozenxid should never go backward. Caller can pass
690 * InvalidTransactionId if it has no new data.
692 if (TransactionIdIsNormal(frozenxid) &&
693 TransactionIdPrecedes(pgcform->relfrozenxid, frozenxid))
695 pgcform->relfrozenxid = frozenxid;
699 /* relminmxid must never go backward, either */
700 if (MultiXactIdIsValid(minmulti) &&
701 MultiXactIdPrecedes(pgcform->relminmxid, minmulti))
703 pgcform->relminmxid = minmulti;
707 /* If anything changed, write out the tuple. */
709 heap_inplace_update(rd, ctup);
711 heap_close(rd, RowExclusiveLock);
716 * vac_update_datfrozenxid() -- update pg_database.datfrozenxid for our DB
718 * Update pg_database's datfrozenxid entry for our database to be the
719 * minimum of the pg_class.relfrozenxid values.
721 * Similarly, update our datminmxid to be the minimum of the
722 * pg_class.relminmxid values.
724 * If we are able to advance either pg_database value, also try to
725 * truncate pg_clog and pg_multixact.
727 * We violate transaction semantics here by overwriting the database's
728 * existing pg_database tuple with the new value. This is reasonably
729 * safe since the new value is correct whether or not this transaction
730 * commits. As with vac_update_relstats, this avoids leaving dead tuples
731 * behind after a VACUUM.
734 vac_update_datfrozenxid(void)
737 Form_pg_database dbform;
741 TransactionId newFrozenXid;
742 MultiXactId newMinMulti;
746 * Initialize the "min" calculation with GetOldestXmin, which is a
747 * reasonable approximation to the minimum relfrozenxid for not-yet-
748 * committed pg_class entries for new tables; see AddNewRelationTuple().
749 * So we cannot produce a wrong minimum by starting with this.
751 newFrozenXid = GetOldestXmin(true, true);
754 * Similarly, initialize the MultiXact "min" with the value that would be
755 * used on pg_class for new tables. See AddNewRelationTuple().
757 newMinMulti = GetOldestMultiXactId();
760 * We must seqscan pg_class to find the minimum Xid, because there is no
761 * index that can help us here.
763 relation = heap_open(RelationRelationId, AccessShareLock);
765 scan = systable_beginscan(relation, InvalidOid, false,
768 while ((classTup = systable_getnext(scan)) != NULL)
770 Form_pg_class classForm = (Form_pg_class) GETSTRUCT(classTup);
773 * Only consider relations able to hold unfrozen XIDs (anything else
774 * should have InvalidTransactionId in relfrozenxid anyway.)
776 if (classForm->relkind != RELKIND_RELATION &&
777 classForm->relkind != RELKIND_MATVIEW &&
778 classForm->relkind != RELKIND_TOASTVALUE)
781 Assert(TransactionIdIsNormal(classForm->relfrozenxid));
782 Assert(MultiXactIdIsValid(classForm->relminmxid));
784 if (TransactionIdPrecedes(classForm->relfrozenxid, newFrozenXid))
785 newFrozenXid = classForm->relfrozenxid;
787 if (MultiXactIdPrecedes(classForm->relminmxid, newMinMulti))
788 newMinMulti = classForm->relminmxid;
791 /* we're done with pg_class */
792 systable_endscan(scan);
793 heap_close(relation, AccessShareLock);
795 Assert(TransactionIdIsNormal(newFrozenXid));
796 Assert(MultiXactIdIsValid(newMinMulti));
798 /* Now fetch the pg_database tuple we need to update. */
799 relation = heap_open(DatabaseRelationId, RowExclusiveLock);
801 /* Fetch a copy of the tuple to scribble on */
802 tuple = SearchSysCacheCopy1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
803 if (!HeapTupleIsValid(tuple))
804 elog(ERROR, "could not find tuple for database %u", MyDatabaseId);
805 dbform = (Form_pg_database) GETSTRUCT(tuple);
808 * Don't allow datfrozenxid to go backward (probably can't happen anyway);
809 * and detect the common case where it doesn't go forward either.
811 if (TransactionIdPrecedes(dbform->datfrozenxid, newFrozenXid))
813 dbform->datfrozenxid = newFrozenXid;
818 if (MultiXactIdPrecedes(dbform->datminmxid, newMinMulti))
820 dbform->datminmxid = newMinMulti;
825 heap_inplace_update(relation, tuple);
827 heap_freetuple(tuple);
828 heap_close(relation, RowExclusiveLock);
831 * If we were able to advance datfrozenxid, see if we can truncate
832 * pg_clog. Also do it if the shared XID-wrap-limit info is stale, since
833 * this action will update that too.
835 if (dirty || ForceTransactionIdLimitUpdate())
836 vac_truncate_clog(newFrozenXid, newMinMulti);
841 * vac_truncate_clog() -- attempt to truncate the commit log
843 * Scan pg_database to determine the system-wide oldest datfrozenxid,
844 * and use it to truncate the transaction commit log (pg_clog).
845 * Also update the XID wrap limit info maintained by varsup.c.
847 * The passed XID is simply the one I just wrote into my pg_database
848 * entry. It's used to initialize the "min" calculation.
850 * This routine is only invoked when we've managed to change our
851 * DB's datfrozenxid entry, or we found that the shared XID-wrap-limit
855 vac_truncate_clog(TransactionId frozenXID, MultiXactId minMulti)
857 TransactionId myXID = GetCurrentTransactionId();
861 Oid oldestxid_datoid;
863 bool frozenAlreadyWrapped = false;
865 /* init oldest datoids to sync with my frozen values */
866 oldestxid_datoid = MyDatabaseId;
867 minmulti_datoid = MyDatabaseId;
870 * Scan pg_database to compute the minimum datfrozenxid
872 * Note: we need not worry about a race condition with new entries being
873 * inserted by CREATE DATABASE. Any such entry will have a copy of some
874 * existing DB's datfrozenxid, and that source DB cannot be ours because
875 * of the interlock against copying a DB containing an active backend.
876 * Hence the new entry will not reduce the minimum. Also, if two VACUUMs
877 * concurrently modify the datfrozenxid's of different databases, the
878 * worst possible outcome is that pg_clog is not truncated as aggressively
881 relation = heap_open(DatabaseRelationId, AccessShareLock);
883 scan = heap_beginscan_catalog(relation, 0, NULL);
885 while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
887 Form_pg_database dbform = (Form_pg_database) GETSTRUCT(tuple);
889 Assert(TransactionIdIsNormal(dbform->datfrozenxid));
890 Assert(MultiXactIdIsValid(dbform->datminmxid));
892 if (TransactionIdPrecedes(myXID, dbform->datfrozenxid))
893 frozenAlreadyWrapped = true;
894 else if (TransactionIdPrecedes(dbform->datfrozenxid, frozenXID))
896 frozenXID = dbform->datfrozenxid;
897 oldestxid_datoid = HeapTupleGetOid(tuple);
900 if (MultiXactIdPrecedes(dbform->datminmxid, minMulti))
902 minMulti = dbform->datminmxid;
903 minmulti_datoid = HeapTupleGetOid(tuple);
909 heap_close(relation, AccessShareLock);
912 * Do not truncate CLOG if we seem to have suffered wraparound already;
913 * the computed minimum XID might be bogus. This case should now be
914 * impossible due to the defenses in GetNewTransactionId, but we keep the
917 if (frozenAlreadyWrapped)
920 (errmsg("some databases have not been vacuumed in over 2 billion transactions"),
921 errdetail("You might have already suffered transaction-wraparound data loss.")));
925 /* Truncate CLOG and Multi to the oldest computed value */
926 TruncateCLOG(frozenXID);
927 TruncateMultiXact(minMulti);
930 * Update the wrap limit for GetNewTransactionId and creation of new
931 * MultiXactIds. Note: these functions will also signal the postmaster
932 * for an(other) autovac cycle if needed. XXX should we avoid possibly
935 SetTransactionIdLimit(frozenXID, oldestxid_datoid);
936 MultiXactAdvanceOldest(minMulti, minmulti_datoid);
941 * vacuum_rel() -- vacuum one heap relation
943 * Doing one heap at a time incurs extra overhead, since we need to
944 * check that the heap exists again just before we vacuum it. The
945 * reason that we do this is so that vacuuming can be spread across
946 * many small transactions. Otherwise, two-phase locking would require
947 * us to lock the entire database during one pass of the vacuum cleaner.
949 * At entry and exit, we are not inside a transaction.
952 vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound)
959 int save_sec_context;
962 /* Begin a transaction for vacuuming this relation */
963 StartTransactionCommand();
966 * Functions in indexes may want a snapshot set. Also, setting a snapshot
967 * ensures that RecentGlobalXmin is kept truly recent.
969 PushActiveSnapshot(GetTransactionSnapshot());
971 if (!(vacstmt->options & VACOPT_FULL))
974 * In lazy vacuum, we can set the PROC_IN_VACUUM flag, which lets
975 * other concurrent VACUUMs know that they can ignore this one while
976 * determining their OldestXmin. (The reason we don't set it during a
977 * full VACUUM is exactly that we may have to run user-defined
978 * functions for functional indexes, and we want to make sure that if
979 * they use the snapshot set above, any tuples it requires can't get
980 * removed from other tables. An index function that depends on the
981 * contents of other tables is arguably broken, but we won't break it
982 * here by violating transaction semantics.)
984 * We also set the VACUUM_FOR_WRAPAROUND flag, which is passed down by
985 * autovacuum; it's used to avoid canceling a vacuum that was invoked
988 * Note: these flags remain set until CommitTransaction or
989 * AbortTransaction. We don't want to clear them until we reset
990 * MyPgXact->xid/xmin, else OldestXmin might appear to go backwards,
991 * which is probably Not Good.
993 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
994 MyPgXact->vacuumFlags |= PROC_IN_VACUUM;
996 MyPgXact->vacuumFlags |= PROC_VACUUM_FOR_WRAPAROUND;
997 LWLockRelease(ProcArrayLock);
1001 * Check for user-requested abort. Note we want this to be inside a
1002 * transaction, so xact.c doesn't issue useless WARNING.
1004 CHECK_FOR_INTERRUPTS();
1007 * Determine the type of lock we want --- hard exclusive lock for a FULL
1008 * vacuum, but just ShareUpdateExclusiveLock for concurrent vacuum. Either
1009 * way, we can be sure that no other backend is vacuuming the same table.
1011 lmode = (vacstmt->options & VACOPT_FULL) ? AccessExclusiveLock : ShareUpdateExclusiveLock;
1014 * Open the relation and get the appropriate lock on it.
1016 * There's a race condition here: the rel may have gone away since the
1017 * last time we saw it. If so, we don't need to vacuum it.
1019 * If we've been asked not to wait for the relation lock, acquire it first
1020 * in non-blocking mode, before calling try_relation_open().
1022 if (!(vacstmt->options & VACOPT_NOWAIT))
1023 onerel = try_relation_open(relid, lmode);
1024 else if (ConditionalLockRelationOid(relid, lmode))
1025 onerel = try_relation_open(relid, NoLock);
1029 if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration >= 0)
1031 (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
1032 errmsg("skipping vacuum of \"%s\" --- lock not available",
1033 vacstmt->relation->relname)));
1038 PopActiveSnapshot();
1039 CommitTransactionCommand();
1044 * Check permissions.
1046 * We allow the user to vacuum a table if he is superuser, the table
1047 * owner, or the database owner (but in the latter case, only if it's not
1048 * a shared relation). pg_class_ownercheck includes the superuser case.
1050 * Note we choose to treat permissions failure as a WARNING and keep
1051 * trying to vacuum the rest of the DB --- is this appropriate?
1053 if (!(pg_class_ownercheck(RelationGetRelid(onerel), GetUserId()) ||
1054 (pg_database_ownercheck(MyDatabaseId, GetUserId()) && !onerel->rd_rel->relisshared)))
1056 if (onerel->rd_rel->relisshared)
1058 (errmsg("skipping \"%s\" --- only superuser can vacuum it",
1059 RelationGetRelationName(onerel))));
1060 else if (onerel->rd_rel->relnamespace == PG_CATALOG_NAMESPACE)
1062 (errmsg("skipping \"%s\" --- only superuser or database owner can vacuum it",
1063 RelationGetRelationName(onerel))));
1066 (errmsg("skipping \"%s\" --- only table or database owner can vacuum it",
1067 RelationGetRelationName(onerel))));
1068 relation_close(onerel, lmode);
1069 PopActiveSnapshot();
1070 CommitTransactionCommand();
1075 * Check that it's a vacuumable relation; we used to do this in
1076 * get_rel_oids() but seems safer to check after we've locked the
1079 if (onerel->rd_rel->relkind != RELKIND_RELATION &&
1080 onerel->rd_rel->relkind != RELKIND_MATVIEW &&
1081 onerel->rd_rel->relkind != RELKIND_TOASTVALUE)
1084 (errmsg("skipping \"%s\" --- cannot vacuum non-tables or special system tables",
1085 RelationGetRelationName(onerel))));
1086 relation_close(onerel, lmode);
1087 PopActiveSnapshot();
1088 CommitTransactionCommand();
1093 * Silently ignore tables that are temp tables of other backends ---
1094 * trying to vacuum these will lead to great unhappiness, since their
1095 * contents are probably not up-to-date on disk. (We don't throw a
1096 * warning here; it would just lead to chatter during a database-wide
1099 if (RELATION_IS_OTHER_TEMP(onerel))
1101 relation_close(onerel, lmode);
1102 PopActiveSnapshot();
1103 CommitTransactionCommand();
1108 * Get a session-level lock too. This will protect our access to the
1109 * relation across multiple transactions, so that we can vacuum the
1110 * relation's TOAST table (if any) secure in the knowledge that no one is
1111 * deleting the parent relation.
1113 * NOTE: this cannot block, even if someone else is waiting for access,
1114 * because the lock manager knows that both lock requests are from the
1117 onerelid = onerel->rd_lockInfo.lockRelId;
1118 LockRelationIdForSession(&onerelid, lmode);
1121 * Remember the relation's TOAST relation for later, if the caller asked
1122 * us to process it. In VACUUM FULL, though, the toast table is
1123 * automatically rebuilt by cluster_rel so we shouldn't recurse to it.
1125 if (do_toast && !(vacstmt->options & VACOPT_FULL))
1126 toast_relid = onerel->rd_rel->reltoastrelid;
1128 toast_relid = InvalidOid;
1131 * Switch to the table owner's userid, so that any index functions are run
1132 * as that user. Also lock down security-restricted operations and
1133 * arrange to make GUC variable changes local to this command. (This is
1134 * unnecessary, but harmless, for lazy VACUUM.)
1136 GetUserIdAndSecContext(&save_userid, &save_sec_context);
1137 SetUserIdAndSecContext(onerel->rd_rel->relowner,
1138 save_sec_context | SECURITY_RESTRICTED_OPERATION);
1139 save_nestlevel = NewGUCNestLevel();
1142 * Do the actual work --- either FULL or "lazy" vacuum
1144 if (vacstmt->options & VACOPT_FULL)
1146 /* close relation before vacuuming, but hold lock until commit */
1147 relation_close(onerel, NoLock);
1150 /* VACUUM FULL is now a variant of CLUSTER; see cluster.c */
1151 cluster_rel(relid, InvalidOid, false,
1152 (vacstmt->options & VACOPT_VERBOSE) != 0);
1155 lazy_vacuum_rel(onerel, vacstmt, vac_strategy);
1157 /* Roll back any GUC changes executed by index functions */
1158 AtEOXact_GUC(false, save_nestlevel);
1160 /* Restore userid and security context */
1161 SetUserIdAndSecContext(save_userid, save_sec_context);
1163 /* all done with this class, but hold lock until commit */
1165 relation_close(onerel, NoLock);
1168 * Complete the transaction and free all temporary memory used.
1170 PopActiveSnapshot();
1171 CommitTransactionCommand();
1174 * If the relation has a secondary toast rel, vacuum that too while we
1175 * still hold the session lock on the master table. Note however that
1176 * "analyze" will not get done on the toast table. This is good, because
1177 * the toaster always uses hardcoded index access and statistics are
1178 * totally unimportant for toast relations.
1180 if (toast_relid != InvalidOid)
1181 vacuum_rel(toast_relid, vacstmt, false, for_wraparound);
1184 * Now release the session-level lock on the master table.
1186 UnlockRelationIdForSession(&onerelid, lmode);
1188 /* Report that we really did it. */
1194 * Open all the vacuumable indexes of the given relation, obtaining the
1195 * specified kind of lock on each. Return an array of Relation pointers for
1196 * the indexes into *Irel, and the number of indexes into *nindexes.
1198 * We consider an index vacuumable if it is marked insertable (IndexIsReady).
1199 * If it isn't, probably a CREATE INDEX CONCURRENTLY command failed early in
1200 * execution, and what we have is too corrupt to be processable. We will
1201 * vacuum even if the index isn't indisvalid; this is important because in a
1202 * unique index, uniqueness checks will be performed anyway and had better not
1203 * hit dangling index pointers.
1206 vac_open_indexes(Relation relation, LOCKMODE lockmode,
1207 int *nindexes, Relation **Irel)
1210 ListCell *indexoidscan;
1213 Assert(lockmode != NoLock);
1215 indexoidlist = RelationGetIndexList(relation);
1217 /* allocate enough memory for all indexes */
1218 i = list_length(indexoidlist);
1221 *Irel = (Relation *) palloc(i * sizeof(Relation));
1225 /* collect just the ready indexes */
1227 foreach(indexoidscan, indexoidlist)
1229 Oid indexoid = lfirst_oid(indexoidscan);
1232 indrel = index_open(indexoid, lockmode);
1233 if (IndexIsReady(indrel->rd_index))
1234 (*Irel)[i++] = indrel;
1236 index_close(indrel, lockmode);
1241 list_free(indexoidlist);
1245 * Release the resources acquired by vac_open_indexes. Optionally release
1246 * the locks (say NoLock to keep 'em).
1249 vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
1256 Relation ind = Irel[nindexes];
1258 index_close(ind, lockmode);
1264 * vacuum_delay_point --- check for interrupts and cost-based delay.
1266 * This should be called in each major loop of VACUUM processing,
1267 * typically once per page processed.
1270 vacuum_delay_point(void)
1272 /* Always check for interrupts */
1273 CHECK_FOR_INTERRUPTS();
1275 /* Nap if appropriate */
1276 if (VacuumCostActive && !InterruptPending &&
1277 VacuumCostBalance >= VacuumCostLimit)
1281 msec = VacuumCostDelay * VacuumCostBalance / VacuumCostLimit;
1282 if (msec > VacuumCostDelay * 4)
1283 msec = VacuumCostDelay * 4;
1285 pg_usleep(msec * 1000L);
1287 VacuumCostBalance = 0;
1289 /* update balance values for workers */
1290 AutoVacuumUpdateDelay();
1292 /* Might have gotten an interrupt while sleeping */
1293 CHECK_FOR_INTERRUPTS();