1 /*-------------------------------------------------------------------------
4 * The postgres vacuum cleaner.
6 * This file now includes only control and dispatch code for VACUUM and
7 * ANALYZE commands. Regular VACUUM is implemented in vacuumlazy.c,
8 * ANALYZE in analyze.c, and VACUUM FULL is a variant of CLUSTER, handled
12 * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
13 * Portions Copyright (c) 1994, Regents of the University of California
17 * src/backend/commands/vacuum.c
19 *-------------------------------------------------------------------------
25 #include "access/clog.h"
26 #include "access/commit_ts.h"
27 #include "access/genam.h"
28 #include "access/heapam.h"
29 #include "access/htup_details.h"
30 #include "access/multixact.h"
31 #include "access/transam.h"
32 #include "access/xact.h"
33 #include "catalog/namespace.h"
34 #include "catalog/pg_database.h"
35 #include "catalog/pg_namespace.h"
36 #include "commands/cluster.h"
37 #include "commands/vacuum.h"
38 #include "miscadmin.h"
40 #include "postmaster/autovacuum.h"
41 #include "storage/bufmgr.h"
42 #include "storage/lmgr.h"
43 #include "storage/proc.h"
44 #include "storage/procarray.h"
45 #include "utils/acl.h"
46 #include "utils/fmgroids.h"
47 #include "utils/guc.h"
48 #include "utils/memutils.h"
49 #include "utils/snapmgr.h"
50 #include "utils/syscache.h"
51 #include "utils/tqual.h"
57 int vacuum_freeze_min_age;
58 int vacuum_freeze_table_age;
59 int vacuum_multixact_freeze_min_age;
60 int vacuum_multixact_freeze_table_age;
63 /* A few variables that don't seem worth passing around as parameters */
64 static MemoryContext vac_context = NULL;
65 static BufferAccessStrategy vac_strategy;
68 /* non-export function prototypes */
69 static List *get_rel_oids(Oid relid, const RangeVar *vacrel);
70 static void vac_truncate_clog(TransactionId frozenXID,
72 TransactionId lastSaneFrozenXid,
73 MultiXactId lastSaneMinMulti);
74 static bool vacuum_rel(Oid relid, RangeVar *relation, int options,
75 VacuumParams *params);
78 * Primary entry point for manual VACUUM and ANALYZE commands
80 * This is mainly a preparation wrapper for the real operations that will
84 ExecVacuum(VacuumStmt *vacstmt, bool isTopLevel)
88 /* sanity checks on options */
89 Assert(vacstmt->options & (VACOPT_VACUUM | VACOPT_ANALYZE));
90 Assert((vacstmt->options & VACOPT_VACUUM) ||
91 !(vacstmt->options & (VACOPT_FULL | VACOPT_FREEZE)));
92 Assert((vacstmt->options & VACOPT_ANALYZE) || vacstmt->va_cols == NIL);
93 Assert(!(vacstmt->options & VACOPT_SKIPTOAST));
96 * All freeze ages are zero if the FREEZE option is given; otherwise pass
97 * them as -1 which means to use the default values.
99 if (vacstmt->options & VACOPT_FREEZE)
101 params.freeze_min_age = 0;
102 params.freeze_table_age = 0;
103 params.multixact_freeze_min_age = 0;
104 params.multixact_freeze_table_age = 0;
108 params.freeze_min_age = -1;
109 params.freeze_table_age = -1;
110 params.multixact_freeze_min_age = -1;
111 params.multixact_freeze_table_age = -1;
114 /* user-invoked vacuum is never "for wraparound" */
115 params.is_wraparound = false;
117 /* user-invoked vacuum never uses this parameter */
118 params.log_min_duration = -1;
120 /* Now go through the common routine */
121 vacuum(vacstmt->options, vacstmt->relation, InvalidOid, ¶ms,
122 vacstmt->va_cols, NULL, isTopLevel);
126 * Primary entry point for VACUUM and ANALYZE commands.
128 * options is a bitmask of VacuumOption flags, indicating what to do.
130 * relid, if not InvalidOid, indicate the relation to process; otherwise,
131 * the RangeVar is used. (The latter must always be passed, because it's
132 * used for error messages.)
134 * params contains a set of parameters that can be used to customize the
137 * va_cols is a list of columns to analyze, or NIL to process them all.
139 * bstrategy is normally given as NULL, but in autovacuum it can be passed
140 * in to use the same buffer strategy object across multiple vacuum() calls.
142 * isTopLevel should be passed down from ProcessUtility.
144 * It is the caller's responsibility that all parameters are allocated in a
145 * memory context that will not disappear at transaction commit.
148 vacuum(int options, RangeVar *relation, Oid relid, VacuumParams *params,
149 List *va_cols, BufferAccessStrategy bstrategy, bool isTopLevel)
151 const char *stmttype;
152 volatile bool in_outer_xact,
155 static bool in_vacuum = false;
157 Assert(params != NULL);
159 stmttype = (options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE";
162 * We cannot run VACUUM inside a user transaction block; if we were inside
163 * a transaction, then our commit- and start-transaction-command calls
164 * would not have the intended effect! There are numerous other subtle
165 * dependencies on this, too.
167 * ANALYZE (without VACUUM) can run either way.
169 if (options & VACOPT_VACUUM)
171 PreventTransactionChain(isTopLevel, stmttype);
172 in_outer_xact = false;
175 in_outer_xact = IsInTransactionChain(isTopLevel);
178 * Due to static variables vac_context, anl_context and vac_strategy,
179 * vacuum() is not reentrant. This matters when VACUUM FULL or ANALYZE
180 * calls a hostile index expression that itself calls ANALYZE.
184 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
185 errmsg("%s cannot be executed from VACUUM or ANALYZE",
189 * Send info about dead objects to the statistics collector, unless we are
190 * in autovacuum --- autovacuum.c does this for itself.
192 if ((options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess())
193 pgstat_vacuum_stat();
196 * Create special memory context for cross-transaction storage.
198 * Since it is a child of PortalContext, it will go away eventually even
199 * if we suffer an error; there's no need for special abort cleanup logic.
201 vac_context = AllocSetContextCreate(PortalContext,
203 ALLOCSET_DEFAULT_MINSIZE,
204 ALLOCSET_DEFAULT_INITSIZE,
205 ALLOCSET_DEFAULT_MAXSIZE);
208 * If caller didn't give us a buffer strategy object, make one in the
209 * cross-transaction memory context.
211 if (bstrategy == NULL)
213 MemoryContext old_context = MemoryContextSwitchTo(vac_context);
215 bstrategy = GetAccessStrategy(BAS_VACUUM);
216 MemoryContextSwitchTo(old_context);
218 vac_strategy = bstrategy;
221 * Build list of relations to process, unless caller gave us one. (If we
222 * build one, we put it in vac_context for safekeeping.)
224 relations = get_rel_oids(relid, relation);
227 * Decide whether we need to start/commit our own transactions.
229 * For VACUUM (with or without ANALYZE): always do so, so that we can
230 * release locks as soon as possible. (We could possibly use the outer
231 * transaction for a one-table VACUUM, but handling TOAST tables would be
234 * For ANALYZE (no VACUUM): if inside a transaction block, we cannot
235 * start/commit our own transactions. Also, there's no need to do so if
236 * only processing one relation. For multiple relations when not within a
237 * transaction block, and also in an autovacuum worker, use own
238 * transactions so we can release locks sooner.
240 if (options & VACOPT_VACUUM)
241 use_own_xacts = true;
244 Assert(options & VACOPT_ANALYZE);
245 if (IsAutoVacuumWorkerProcess())
246 use_own_xacts = true;
247 else if (in_outer_xact)
248 use_own_xacts = false;
249 else if (list_length(relations) > 1)
250 use_own_xacts = true;
252 use_own_xacts = false;
256 * vacuum_rel expects to be entered with no transaction active; it will
257 * start and commit its own transaction. But we are called by an SQL
258 * command, and so we are executing inside a transaction already. We
259 * commit the transaction started in PostgresMain() here, and start
260 * another one before exiting to match the commit waiting for us back in
265 Assert(!in_outer_xact);
267 /* ActiveSnapshot is not set by autovacuum */
268 if (ActiveSnapshotSet())
271 /* matches the StartTransaction in PostgresMain() */
272 CommitTransactionCommand();
275 /* Turn vacuum cost accounting on or off */
281 VacuumCostActive = (VacuumCostDelay > 0);
282 VacuumCostBalance = 0;
288 * Loop to process each selected relation.
290 foreach(cur, relations)
292 Oid relid = lfirst_oid(cur);
294 if (options & VACOPT_VACUUM)
296 if (!vacuum_rel(relid, relation, options, params))
300 if (options & VACOPT_ANALYZE)
303 * If using separate xacts, start one for analyze. Otherwise,
304 * we can use the outer transaction.
308 StartTransactionCommand();
309 /* functions in indexes may want a snapshot set */
310 PushActiveSnapshot(GetTransactionSnapshot());
313 analyze_rel(relid, relation, options, params,
314 va_cols, in_outer_xact, vac_strategy);
319 CommitTransactionCommand();
327 VacuumCostActive = false;
333 VacuumCostActive = false;
336 * Finish up processing.
340 /* here, we are not in a transaction */
343 * This matches the CommitTransaction waiting for us in
346 StartTransactionCommand();
349 if ((options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess())
352 * Update pg_database.datfrozenxid, and truncate pg_clog if possible.
353 * (autovacuum.c does this for itself.)
355 vac_update_datfrozenxid();
359 * Clean up working storage --- note we must do this after
360 * StartTransactionCommand, else we might be trying to delete the active
363 MemoryContextDelete(vac_context);
368 * Build a list of Oids for each relation to be processed
370 * The list is built in vac_context so that it will survive across our
371 * per-relation transactions.
374 get_rel_oids(Oid relid, const RangeVar *vacrel)
376 List *oid_list = NIL;
377 MemoryContext oldcontext;
379 /* OID supplied by VACUUM's caller? */
380 if (OidIsValid(relid))
382 oldcontext = MemoryContextSwitchTo(vac_context);
383 oid_list = lappend_oid(oid_list, relid);
384 MemoryContextSwitchTo(oldcontext);
388 /* Process a specific relation */
392 * Since we don't take a lock here, the relation might be gone, or the
393 * RangeVar might no longer refer to the OID we look up here. In the
394 * former case, VACUUM will do nothing; in the latter case, it will
395 * process the OID we looked up here, rather than the new one. Neither
396 * is ideal, but there's little practical alternative, since we're
397 * going to commit this transaction and begin a new one between now
400 relid = RangeVarGetRelid(vacrel, NoLock, false);
402 /* Make a relation list entry for this guy */
403 oldcontext = MemoryContextSwitchTo(vac_context);
404 oid_list = lappend_oid(oid_list, relid);
405 MemoryContextSwitchTo(oldcontext);
410 * Process all plain relations and materialized views listed in
417 pgclass = heap_open(RelationRelationId, AccessShareLock);
419 scan = heap_beginscan_catalog(pgclass, 0, NULL);
421 while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
423 Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
425 if (classForm->relkind != RELKIND_RELATION &&
426 classForm->relkind != RELKIND_MATVIEW)
429 /* Make a relation list entry for this guy */
430 oldcontext = MemoryContextSwitchTo(vac_context);
431 oid_list = lappend_oid(oid_list, HeapTupleGetOid(tuple));
432 MemoryContextSwitchTo(oldcontext);
436 heap_close(pgclass, AccessShareLock);
443 * vacuum_set_xid_limits() -- compute oldest-Xmin and freeze cutoff points
445 * The output parameters are:
446 * - oldestXmin is the cutoff value used to distinguish whether tuples are
447 * DEAD or RECENTLY_DEAD (see HeapTupleSatisfiesVacuum).
448 * - freezeLimit is the Xid below which all Xids are replaced by
449 * FrozenTransactionId during vacuum.
450 * - xidFullScanLimit (computed from table_freeze_age parameter)
451 * represents a minimum Xid value; a table whose relfrozenxid is older than
452 * this will have a full-table vacuum applied to it, to freeze tuples across
453 * the whole table. Vacuuming a table younger than this value can use a
455 * - multiXactCutoff is the value below which all MultiXactIds are removed from
457 * - mxactFullScanLimit is a value against which a table's relminmxid value is
458 * compared to produce a full-table vacuum, as with xidFullScanLimit.
460 * xidFullScanLimit and mxactFullScanLimit can be passed as NULL if caller is
464 vacuum_set_xid_limits(Relation rel,
466 int freeze_table_age,
467 int multixact_freeze_min_age,
468 int multixact_freeze_table_age,
469 TransactionId *oldestXmin,
470 TransactionId *freezeLimit,
471 TransactionId *xidFullScanLimit,
472 MultiXactId *multiXactCutoff,
473 MultiXactId *mxactFullScanLimit)
477 int effective_multixact_freeze_max_age;
479 TransactionId safeLimit;
480 MultiXactId mxactLimit;
481 MultiXactId safeMxactLimit;
484 * We can always ignore processes running lazy vacuum. This is because we
485 * use these values only for deciding which tuples we must keep in the
486 * tables. Since lazy vacuum doesn't write its XID anywhere, it's safe to
487 * ignore it. In theory it could be problematic to ignore lazy vacuums in
488 * a full vacuum, but keep in mind that only one vacuum process can be
489 * working on a particular table at any time, and that each vacuum is
490 * always an independent transaction.
493 TransactionIdLimitedForOldSnapshots(GetOldestXmin(rel, true), rel);
495 Assert(TransactionIdIsNormal(*oldestXmin));
498 * Determine the minimum freeze age to use: as specified by the caller, or
499 * vacuum_freeze_min_age, but in any case not more than half
500 * autovacuum_freeze_max_age, so that autovacuums to prevent XID
501 * wraparound won't occur too frequently.
503 freezemin = freeze_min_age;
505 freezemin = vacuum_freeze_min_age;
506 freezemin = Min(freezemin, autovacuum_freeze_max_age / 2);
507 Assert(freezemin >= 0);
510 * Compute the cutoff XID, being careful not to generate a "permanent" XID
512 limit = *oldestXmin - freezemin;
513 if (!TransactionIdIsNormal(limit))
514 limit = FirstNormalTransactionId;
517 * If oldestXmin is very far back (in practice, more than
518 * autovacuum_freeze_max_age / 2 XIDs old), complain and force a minimum
519 * freeze age of zero.
521 safeLimit = ReadNewTransactionId() - autovacuum_freeze_max_age;
522 if (!TransactionIdIsNormal(safeLimit))
523 safeLimit = FirstNormalTransactionId;
525 if (TransactionIdPrecedes(limit, safeLimit))
528 (errmsg("oldest xmin is far in the past"),
529 errhint("Close open transactions soon to avoid wraparound problems.")));
533 *freezeLimit = limit;
536 * Compute the multixact age for which freezing is urgent. This is
537 * normally autovacuum_multixact_freeze_max_age, but may be less if we are
538 * short of multixact member space.
540 effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold();
543 * Determine the minimum multixact freeze age to use: as specified by
544 * caller, or vacuum_multixact_freeze_min_age, but in any case not more
545 * than half effective_multixact_freeze_max_age, so that autovacuums to
546 * prevent MultiXact wraparound won't occur too frequently.
548 mxid_freezemin = multixact_freeze_min_age;
549 if (mxid_freezemin < 0)
550 mxid_freezemin = vacuum_multixact_freeze_min_age;
551 mxid_freezemin = Min(mxid_freezemin,
552 effective_multixact_freeze_max_age / 2);
553 Assert(mxid_freezemin >= 0);
555 /* compute the cutoff multi, being careful to generate a valid value */
556 mxactLimit = GetOldestMultiXactId() - mxid_freezemin;
557 if (mxactLimit < FirstMultiXactId)
558 mxactLimit = FirstMultiXactId;
561 ReadNextMultiXactId() - effective_multixact_freeze_max_age;
562 if (safeMxactLimit < FirstMultiXactId)
563 safeMxactLimit = FirstMultiXactId;
565 if (MultiXactIdPrecedes(mxactLimit, safeMxactLimit))
568 (errmsg("oldest multixact is far in the past"),
569 errhint("Close open transactions with multixacts soon to avoid wraparound problems.")));
570 mxactLimit = safeMxactLimit;
573 *multiXactCutoff = mxactLimit;
575 if (xidFullScanLimit != NULL)
579 Assert(mxactFullScanLimit != NULL);
582 * Determine the table freeze age to use: as specified by the caller,
583 * or vacuum_freeze_table_age, but in any case not more than
584 * autovacuum_freeze_max_age * 0.95, so that if you have e.g nightly
585 * VACUUM schedule, the nightly VACUUM gets a chance to freeze tuples
586 * before anti-wraparound autovacuum is launched.
588 freezetable = freeze_table_age;
590 freezetable = vacuum_freeze_table_age;
591 freezetable = Min(freezetable, autovacuum_freeze_max_age * 0.95);
592 Assert(freezetable >= 0);
595 * Compute XID limit causing a full-table vacuum, being careful not to
596 * generate a "permanent" XID.
598 limit = ReadNewTransactionId() - freezetable;
599 if (!TransactionIdIsNormal(limit))
600 limit = FirstNormalTransactionId;
602 *xidFullScanLimit = limit;
605 * Similar to the above, determine the table freeze age to use for
606 * multixacts: as specified by the caller, or
607 * vacuum_multixact_freeze_table_age, but in any case not more than
608 * autovacuum_multixact_freeze_table_age * 0.95, so that if you have
609 * e.g. nightly VACUUM schedule, the nightly VACUUM gets a chance to
610 * freeze multixacts before anti-wraparound autovacuum is launched.
612 freezetable = multixact_freeze_table_age;
614 freezetable = vacuum_multixact_freeze_table_age;
615 freezetable = Min(freezetable,
616 effective_multixact_freeze_max_age * 0.95);
617 Assert(freezetable >= 0);
620 * Compute MultiXact limit causing a full-table vacuum, being careful
621 * to generate a valid MultiXact value.
623 mxactLimit = ReadNextMultiXactId() - freezetable;
624 if (mxactLimit < FirstMultiXactId)
625 mxactLimit = FirstMultiXactId;
627 *mxactFullScanLimit = mxactLimit;
631 Assert(mxactFullScanLimit == NULL);
636 * vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples
638 * If we scanned the whole relation then we should just use the count of
639 * live tuples seen; but if we did not, we should not trust the count
640 * unreservedly, especially not in VACUUM, which may have scanned a quite
641 * nonrandom subset of the table. When we have only partial information,
642 * we take the old value of pg_class.reltuples as a measurement of the
643 * tuple density in the unscanned pages.
645 * This routine is shared by VACUUM and ANALYZE.
648 vac_estimate_reltuples(Relation relation, bool is_analyze,
649 BlockNumber total_pages,
650 BlockNumber scanned_pages,
651 double scanned_tuples)
653 BlockNumber old_rel_pages = relation->rd_rel->relpages;
654 double old_rel_tuples = relation->rd_rel->reltuples;
658 double updated_density;
660 /* If we did scan the whole table, just use the count as-is */
661 if (scanned_pages >= total_pages)
662 return scanned_tuples;
665 * If scanned_pages is zero but total_pages isn't, keep the existing value
666 * of reltuples. (Note: callers should avoid updating the pg_class
667 * statistics in this situation, since no new information has been
670 if (scanned_pages == 0)
671 return old_rel_tuples;
674 * If old value of relpages is zero, old density is indeterminate; we
675 * can't do much except scale up scanned_tuples to match total_pages.
677 if (old_rel_pages == 0)
678 return floor((scanned_tuples / scanned_pages) * total_pages + 0.5);
681 * Okay, we've covered the corner cases. The normal calculation is to
682 * convert the old measurement to a density (tuples per page), then update
683 * the density using an exponential-moving-average approach, and finally
684 * compute reltuples as updated_density * total_pages.
686 * For ANALYZE, the moving average multiplier is just the fraction of the
687 * table's pages we scanned. This is equivalent to assuming that the
688 * tuple density in the unscanned pages didn't change. Of course, it
689 * probably did, if the new density measurement is different. But over
690 * repeated cycles, the value of reltuples will converge towards the
691 * correct value, if repeated measurements show the same new density.
693 * For VACUUM, the situation is a bit different: we have looked at a
694 * nonrandom sample of pages, but we know for certain that the pages we
695 * didn't look at are precisely the ones that haven't changed lately.
696 * Thus, there is a reasonable argument for doing exactly the same thing
697 * as for the ANALYZE case, that is use the old density measurement as the
698 * value for the unscanned pages.
700 * This logic could probably use further refinement.
702 old_density = old_rel_tuples / old_rel_pages;
703 new_density = scanned_tuples / scanned_pages;
704 multiplier = (double) scanned_pages / (double) total_pages;
705 updated_density = old_density + (new_density - old_density) * multiplier;
706 return floor(updated_density * total_pages + 0.5);
711 * vac_update_relstats() -- update statistics for one relation
713 * Update the whole-relation statistics that are kept in its pg_class
714 * row. There are additional stats that will be updated if we are
715 * doing ANALYZE, but we always update these stats. This routine works
716 * for both index and heap relation entries in pg_class.
718 * We violate transaction semantics here by overwriting the rel's
719 * existing pg_class tuple with the new values. This is reasonably
720 * safe as long as we're sure that the new values are correct whether or
721 * not this transaction commits. The reason for doing this is that if
722 * we updated these tuples in the usual way, vacuuming pg_class itself
723 * wouldn't work very well --- by the time we got done with a vacuum
724 * cycle, most of the tuples in pg_class would've been obsoleted. Of
725 * course, this only works for fixed-size not-null columns, but these are.
727 * Another reason for doing it this way is that when we are in a lazy
728 * VACUUM and have PROC_IN_VACUUM set, we mustn't do any regular updates.
729 * Somebody vacuuming pg_class might think they could delete a tuple
730 * marked with xmin = our xid.
732 * In addition to fundamentally nontransactional statistics such as
733 * relpages and relallvisible, we try to maintain certain lazily-updated
734 * DDL flags such as relhasindex, by clearing them if no longer correct.
735 * It's safe to do this in VACUUM, which can't run in parallel with
736 * CREATE INDEX/RULE/TRIGGER and can't be part of a transaction block.
737 * However, it's *not* safe to do it in an ANALYZE that's within an
738 * outer transaction, because for example the current transaction might
739 * have dropped the last index; then we'd think relhasindex should be
740 * cleared, but if the transaction later rolls back this would be wrong.
741 * So we refrain from updating the DDL flags if we're inside an outer
742 * transaction. This is OK since postponing the flag maintenance is
745 * This routine is shared by VACUUM and ANALYZE.
748 vac_update_relstats(Relation relation,
749 BlockNumber num_pages, double num_tuples,
750 BlockNumber num_all_visible_pages,
751 bool hasindex, TransactionId frozenxid,
752 MultiXactId minmulti,
755 Oid relid = RelationGetRelid(relation);
758 Form_pg_class pgcform;
761 rd = heap_open(RelationRelationId, RowExclusiveLock);
763 /* Fetch a copy of the tuple to scribble on */
764 ctup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
765 if (!HeapTupleIsValid(ctup))
766 elog(ERROR, "pg_class entry for relid %u vanished during vacuuming",
768 pgcform = (Form_pg_class) GETSTRUCT(ctup);
770 /* Apply statistical updates, if any, to copied tuple */
773 if (pgcform->relpages != (int32) num_pages)
775 pgcform->relpages = (int32) num_pages;
778 if (pgcform->reltuples != (float4) num_tuples)
780 pgcform->reltuples = (float4) num_tuples;
783 if (pgcform->relallvisible != (int32) num_all_visible_pages)
785 pgcform->relallvisible = (int32) num_all_visible_pages;
789 /* Apply DDL updates, but not inside an outer transaction (see above) */
794 * If we didn't find any indexes, reset relhasindex.
796 if (pgcform->relhasindex && !hasindex)
798 pgcform->relhasindex = false;
803 * If we have discovered that there are no indexes, then there's no
804 * primary key either. This could be done more thoroughly...
806 if (pgcform->relhaspkey && !hasindex)
808 pgcform->relhaspkey = false;
812 /* We also clear relhasrules and relhastriggers if needed */
813 if (pgcform->relhasrules && relation->rd_rules == NULL)
815 pgcform->relhasrules = false;
818 if (pgcform->relhastriggers && relation->trigdesc == NULL)
820 pgcform->relhastriggers = false;
826 * Update relfrozenxid, unless caller passed InvalidTransactionId
827 * indicating it has no new data.
829 * Ordinarily, we don't let relfrozenxid go backwards: if things are
830 * working correctly, the only way the new frozenxid could be older would
831 * be if a previous VACUUM was done with a tighter freeze_min_age, in
832 * which case we don't want to forget the work it already did. However,
833 * if the stored relfrozenxid is "in the future", then it must be corrupt
834 * and it seems best to overwrite it with the cutoff we used this time.
835 * This should match vac_update_datfrozenxid() concerning what we consider
836 * to be "in the future".
838 if (TransactionIdIsNormal(frozenxid) &&
839 pgcform->relfrozenxid != frozenxid &&
840 (TransactionIdPrecedes(pgcform->relfrozenxid, frozenxid) ||
841 TransactionIdPrecedes(ReadNewTransactionId(),
842 pgcform->relfrozenxid)))
844 pgcform->relfrozenxid = frozenxid;
848 /* Similarly for relminmxid */
849 if (MultiXactIdIsValid(minmulti) &&
850 pgcform->relminmxid != minmulti &&
851 (MultiXactIdPrecedes(pgcform->relminmxid, minmulti) ||
852 MultiXactIdPrecedes(ReadNextMultiXactId(), pgcform->relminmxid)))
854 pgcform->relminmxid = minmulti;
858 /* If anything changed, write out the tuple. */
860 heap_inplace_update(rd, ctup);
862 heap_close(rd, RowExclusiveLock);
867 * vac_update_datfrozenxid() -- update pg_database.datfrozenxid for our DB
869 * Update pg_database's datfrozenxid entry for our database to be the
870 * minimum of the pg_class.relfrozenxid values.
872 * Similarly, update our datminmxid to be the minimum of the
873 * pg_class.relminmxid values.
875 * If we are able to advance either pg_database value, also try to
876 * truncate pg_clog and pg_multixact.
878 * We violate transaction semantics here by overwriting the database's
879 * existing pg_database tuple with the new values. This is reasonably
880 * safe since the new values are correct whether or not this transaction
881 * commits. As with vac_update_relstats, this avoids leaving dead tuples
882 * behind after a VACUUM.
885 vac_update_datfrozenxid(void)
888 Form_pg_database dbform;
892 TransactionId newFrozenXid;
893 MultiXactId newMinMulti;
894 TransactionId lastSaneFrozenXid;
895 MultiXactId lastSaneMinMulti;
900 * Initialize the "min" calculation with GetOldestXmin, which is a
901 * reasonable approximation to the minimum relfrozenxid for not-yet-
902 * committed pg_class entries for new tables; see AddNewRelationTuple().
903 * So we cannot produce a wrong minimum by starting with this.
905 newFrozenXid = GetOldestXmin(NULL, true);
908 * Similarly, initialize the MultiXact "min" with the value that would be
909 * used on pg_class for new tables. See AddNewRelationTuple().
911 newMinMulti = GetOldestMultiXactId();
914 * Identify the latest relfrozenxid and relminmxid values that we could
915 * validly see during the scan. These are conservative values, but it's
916 * not really worth trying to be more exact.
918 lastSaneFrozenXid = ReadNewTransactionId();
919 lastSaneMinMulti = ReadNextMultiXactId();
922 * We must seqscan pg_class to find the minimum Xid, because there is no
923 * index that can help us here.
925 relation = heap_open(RelationRelationId, AccessShareLock);
927 scan = systable_beginscan(relation, InvalidOid, false,
930 while ((classTup = systable_getnext(scan)) != NULL)
932 Form_pg_class classForm = (Form_pg_class) GETSTRUCT(classTup);
935 * Only consider relations able to hold unfrozen XIDs (anything else
936 * should have InvalidTransactionId in relfrozenxid anyway.)
938 if (classForm->relkind != RELKIND_RELATION &&
939 classForm->relkind != RELKIND_MATVIEW &&
940 classForm->relkind != RELKIND_TOASTVALUE)
943 Assert(TransactionIdIsNormal(classForm->relfrozenxid));
944 Assert(MultiXactIdIsValid(classForm->relminmxid));
947 * If things are working properly, no relation should have a
948 * relfrozenxid or relminmxid that is "in the future". However, such
949 * cases have been known to arise due to bugs in pg_upgrade. If we
950 * see any entries that are "in the future", chicken out and don't do
951 * anything. This ensures we won't truncate clog before those
952 * relations have been scanned and cleaned up.
954 if (TransactionIdPrecedes(lastSaneFrozenXid, classForm->relfrozenxid) ||
955 MultiXactIdPrecedes(lastSaneMinMulti, classForm->relminmxid))
961 if (TransactionIdPrecedes(classForm->relfrozenxid, newFrozenXid))
962 newFrozenXid = classForm->relfrozenxid;
964 if (MultiXactIdPrecedes(classForm->relminmxid, newMinMulti))
965 newMinMulti = classForm->relminmxid;
968 /* we're done with pg_class */
969 systable_endscan(scan);
970 heap_close(relation, AccessShareLock);
972 /* chicken out if bogus data found */
976 Assert(TransactionIdIsNormal(newFrozenXid));
977 Assert(MultiXactIdIsValid(newMinMulti));
979 /* Now fetch the pg_database tuple we need to update. */
980 relation = heap_open(DatabaseRelationId, RowExclusiveLock);
982 /* Fetch a copy of the tuple to scribble on */
983 tuple = SearchSysCacheCopy1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
984 if (!HeapTupleIsValid(tuple))
985 elog(ERROR, "could not find tuple for database %u", MyDatabaseId);
986 dbform = (Form_pg_database) GETSTRUCT(tuple);
989 * As in vac_update_relstats(), we ordinarily don't want to let
990 * datfrozenxid go backward; but if it's "in the future" then it must be
991 * corrupt and it seems best to overwrite it.
993 if (dbform->datfrozenxid != newFrozenXid &&
994 (TransactionIdPrecedes(dbform->datfrozenxid, newFrozenXid) ||
995 TransactionIdPrecedes(lastSaneFrozenXid, dbform->datfrozenxid)))
997 dbform->datfrozenxid = newFrozenXid;
1001 newFrozenXid = dbform->datfrozenxid;
1003 /* Ditto for datminmxid */
1004 if (dbform->datminmxid != newMinMulti &&
1005 (MultiXactIdPrecedes(dbform->datminmxid, newMinMulti) ||
1006 MultiXactIdPrecedes(lastSaneMinMulti, dbform->datminmxid)))
1008 dbform->datminmxid = newMinMulti;
1012 newMinMulti = dbform->datminmxid;
1015 heap_inplace_update(relation, tuple);
1017 heap_freetuple(tuple);
1018 heap_close(relation, RowExclusiveLock);
1021 * If we were able to advance datfrozenxid or datminmxid, see if we can
1022 * truncate pg_clog and/or pg_multixact. Also do it if the shared
1023 * XID-wrap-limit info is stale, since this action will update that too.
1025 if (dirty || ForceTransactionIdLimitUpdate())
1026 vac_truncate_clog(newFrozenXid, newMinMulti,
1027 lastSaneFrozenXid, lastSaneMinMulti);
1032 * vac_truncate_clog() -- attempt to truncate the commit log
1034 * Scan pg_database to determine the system-wide oldest datfrozenxid,
1035 * and use it to truncate the transaction commit log (pg_clog).
1036 * Also update the XID wrap limit info maintained by varsup.c.
1037 * Likewise for datminmxid.
1039 * The passed frozenXID and minMulti are the updated values for my own
1040 * pg_database entry. They're used to initialize the "min" calculations.
1041 * The caller also passes the "last sane" XID and MXID, since it has
1042 * those at hand already.
1044 * This routine is only invoked when we've managed to change our
1045 * DB's datfrozenxid/datminmxid values, or we found that the shared
1046 * XID-wrap-limit info is stale.
1049 vac_truncate_clog(TransactionId frozenXID,
1050 MultiXactId minMulti,
1051 TransactionId lastSaneFrozenXid,
1052 MultiXactId lastSaneMinMulti)
1054 TransactionId nextXID = ReadNewTransactionId();
1058 Oid oldestxid_datoid;
1059 Oid minmulti_datoid;
1061 bool frozenAlreadyWrapped = false;
1063 /* init oldest datoids to sync with my frozenXID/minMulti values */
1064 oldestxid_datoid = MyDatabaseId;
1065 minmulti_datoid = MyDatabaseId;
1068 * Scan pg_database to compute the minimum datfrozenxid/datminmxid
1070 * Note: we need not worry about a race condition with new entries being
1071 * inserted by CREATE DATABASE. Any such entry will have a copy of some
1072 * existing DB's datfrozenxid, and that source DB cannot be ours because
1073 * of the interlock against copying a DB containing an active backend.
1074 * Hence the new entry will not reduce the minimum. Also, if two VACUUMs
1075 * concurrently modify the datfrozenxid's of different databases, the
1076 * worst possible outcome is that pg_clog is not truncated as aggressively
1079 relation = heap_open(DatabaseRelationId, AccessShareLock);
1081 scan = heap_beginscan_catalog(relation, 0, NULL);
1083 while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1085 Form_pg_database dbform = (Form_pg_database) GETSTRUCT(tuple);
1087 Assert(TransactionIdIsNormal(dbform->datfrozenxid));
1088 Assert(MultiXactIdIsValid(dbform->datminmxid));
1091 * If things are working properly, no database should have a
1092 * datfrozenxid or datminmxid that is "in the future". However, such
1093 * cases have been known to arise due to bugs in pg_upgrade. If we
1094 * see any entries that are "in the future", chicken out and don't do
1095 * anything. This ensures we won't truncate clog before those
1096 * databases have been scanned and cleaned up. (We will issue the
1097 * "already wrapped" warning if appropriate, though.)
1099 if (TransactionIdPrecedes(lastSaneFrozenXid, dbform->datfrozenxid) ||
1100 MultiXactIdPrecedes(lastSaneMinMulti, dbform->datminmxid))
1103 if (TransactionIdPrecedes(nextXID, dbform->datfrozenxid))
1104 frozenAlreadyWrapped = true;
1105 else if (TransactionIdPrecedes(dbform->datfrozenxid, frozenXID))
1107 frozenXID = dbform->datfrozenxid;
1108 oldestxid_datoid = HeapTupleGetOid(tuple);
1111 if (MultiXactIdPrecedes(dbform->datminmxid, minMulti))
1113 minMulti = dbform->datminmxid;
1114 minmulti_datoid = HeapTupleGetOid(tuple);
1120 heap_close(relation, AccessShareLock);
1123 * Do not truncate CLOG if we seem to have suffered wraparound already;
1124 * the computed minimum XID might be bogus. This case should now be
1125 * impossible due to the defenses in GetNewTransactionId, but we keep the
1128 if (frozenAlreadyWrapped)
1131 (errmsg("some databases have not been vacuumed in over 2 billion transactions"),
1132 errdetail("You might have already suffered transaction-wraparound data loss.")));
1136 /* chicken out if data is bogus in any other way */
1141 * Truncate CLOG, multixact and CommitTs to the oldest computed value.
1143 TruncateCLOG(frozenXID);
1144 TruncateCommitTs(frozenXID);
1145 TruncateMultiXact(minMulti, minmulti_datoid);
1148 * Update the wrap limit for GetNewTransactionId and creation of new
1149 * MultiXactIds. Note: these functions will also signal the postmaster
1150 * for an(other) autovac cycle if needed. XXX should we avoid possibly
1153 SetTransactionIdLimit(frozenXID, oldestxid_datoid);
1154 SetMultiXactIdLimit(minMulti, minmulti_datoid);
1155 AdvanceOldestCommitTsXid(frozenXID);
1160 * vacuum_rel() -- vacuum one heap relation
1162 * Doing one heap at a time incurs extra overhead, since we need to
1163 * check that the heap exists again just before we vacuum it. The
1164 * reason that we do this is so that vacuuming can be spread across
1165 * many small transactions. Otherwise, two-phase locking would require
1166 * us to lock the entire database during one pass of the vacuum cleaner.
1168 * At entry and exit, we are not inside a transaction.
1171 vacuum_rel(Oid relid, RangeVar *relation, int options, VacuumParams *params)
1178 int save_sec_context;
1181 Assert(params != NULL);
1183 /* Begin a transaction for vacuuming this relation */
1184 StartTransactionCommand();
1187 * Functions in indexes may want a snapshot set. Also, setting a snapshot
1188 * ensures that RecentGlobalXmin is kept truly recent.
1190 PushActiveSnapshot(GetTransactionSnapshot());
1192 if (!(options & VACOPT_FULL))
1195 * In lazy vacuum, we can set the PROC_IN_VACUUM flag, which lets
1196 * other concurrent VACUUMs know that they can ignore this one while
1197 * determining their OldestXmin. (The reason we don't set it during a
1198 * full VACUUM is exactly that we may have to run user-defined
1199 * functions for functional indexes, and we want to make sure that if
1200 * they use the snapshot set above, any tuples it requires can't get
1201 * removed from other tables. An index function that depends on the
1202 * contents of other tables is arguably broken, but we won't break it
1203 * here by violating transaction semantics.)
1205 * We also set the VACUUM_FOR_WRAPAROUND flag, which is passed down by
1206 * autovacuum; it's used to avoid canceling a vacuum that was invoked
1209 * Note: these flags remain set until CommitTransaction or
1210 * AbortTransaction. We don't want to clear them until we reset
1211 * MyPgXact->xid/xmin, else OldestXmin might appear to go backwards,
1212 * which is probably Not Good.
1214 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
1215 MyPgXact->vacuumFlags |= PROC_IN_VACUUM;
1216 if (params->is_wraparound)
1217 MyPgXact->vacuumFlags |= PROC_VACUUM_FOR_WRAPAROUND;
1218 LWLockRelease(ProcArrayLock);
1222 * Check for user-requested abort. Note we want this to be inside a
1223 * transaction, so xact.c doesn't issue useless WARNING.
1225 CHECK_FOR_INTERRUPTS();
1228 * Determine the type of lock we want --- hard exclusive lock for a FULL
1229 * vacuum, but just ShareUpdateExclusiveLock for concurrent vacuum. Either
1230 * way, we can be sure that no other backend is vacuuming the same table.
1232 lmode = (options & VACOPT_FULL) ? AccessExclusiveLock : ShareUpdateExclusiveLock;
1235 * Open the relation and get the appropriate lock on it.
1237 * There's a race condition here: the rel may have gone away since the
1238 * last time we saw it. If so, we don't need to vacuum it.
1240 * If we've been asked not to wait for the relation lock, acquire it first
1241 * in non-blocking mode, before calling try_relation_open().
1243 if (!(options & VACOPT_NOWAIT))
1244 onerel = try_relation_open(relid, lmode);
1245 else if (ConditionalLockRelationOid(relid, lmode))
1246 onerel = try_relation_open(relid, NoLock);
1250 if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0)
1252 (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
1253 errmsg("skipping vacuum of \"%s\" --- lock not available",
1254 relation->relname)));
1259 PopActiveSnapshot();
1260 CommitTransactionCommand();
1265 * Check permissions.
1267 * We allow the user to vacuum a table if he is superuser, the table
1268 * owner, or the database owner (but in the latter case, only if it's not
1269 * a shared relation). pg_class_ownercheck includes the superuser case.
1271 * Note we choose to treat permissions failure as a WARNING and keep
1272 * trying to vacuum the rest of the DB --- is this appropriate?
1274 if (!(pg_class_ownercheck(RelationGetRelid(onerel), GetUserId()) ||
1275 (pg_database_ownercheck(MyDatabaseId, GetUserId()) && !onerel->rd_rel->relisshared)))
1277 if (onerel->rd_rel->relisshared)
1279 (errmsg("skipping \"%s\" --- only superuser can vacuum it",
1280 RelationGetRelationName(onerel))));
1281 else if (onerel->rd_rel->relnamespace == PG_CATALOG_NAMESPACE)
1283 (errmsg("skipping \"%s\" --- only superuser or database owner can vacuum it",
1284 RelationGetRelationName(onerel))));
1287 (errmsg("skipping \"%s\" --- only table or database owner can vacuum it",
1288 RelationGetRelationName(onerel))));
1289 relation_close(onerel, lmode);
1290 PopActiveSnapshot();
1291 CommitTransactionCommand();
1296 * Check that it's a vacuumable relation; we used to do this in
1297 * get_rel_oids() but seems safer to check after we've locked the
1300 if (onerel->rd_rel->relkind != RELKIND_RELATION &&
1301 onerel->rd_rel->relkind != RELKIND_MATVIEW &&
1302 onerel->rd_rel->relkind != RELKIND_TOASTVALUE)
1305 (errmsg("skipping \"%s\" --- cannot vacuum non-tables or special system tables",
1306 RelationGetRelationName(onerel))));
1307 relation_close(onerel, lmode);
1308 PopActiveSnapshot();
1309 CommitTransactionCommand();
1314 * Silently ignore tables that are temp tables of other backends ---
1315 * trying to vacuum these will lead to great unhappiness, since their
1316 * contents are probably not up-to-date on disk. (We don't throw a
1317 * warning here; it would just lead to chatter during a database-wide
1320 if (RELATION_IS_OTHER_TEMP(onerel))
1322 relation_close(onerel, lmode);
1323 PopActiveSnapshot();
1324 CommitTransactionCommand();
1329 * Get a session-level lock too. This will protect our access to the
1330 * relation across multiple transactions, so that we can vacuum the
1331 * relation's TOAST table (if any) secure in the knowledge that no one is
1332 * deleting the parent relation.
1334 * NOTE: this cannot block, even if someone else is waiting for access,
1335 * because the lock manager knows that both lock requests are from the
1338 onerelid = onerel->rd_lockInfo.lockRelId;
1339 LockRelationIdForSession(&onerelid, lmode);
1342 * Remember the relation's TOAST relation for later, if the caller asked
1343 * us to process it. In VACUUM FULL, though, the toast table is
1344 * automatically rebuilt by cluster_rel so we shouldn't recurse to it.
1346 if (!(options & VACOPT_SKIPTOAST) && !(options & VACOPT_FULL))
1347 toast_relid = onerel->rd_rel->reltoastrelid;
1349 toast_relid = InvalidOid;
1352 * Switch to the table owner's userid, so that any index functions are run
1353 * as that user. Also lock down security-restricted operations and
1354 * arrange to make GUC variable changes local to this command. (This is
1355 * unnecessary, but harmless, for lazy VACUUM.)
1357 GetUserIdAndSecContext(&save_userid, &save_sec_context);
1358 SetUserIdAndSecContext(onerel->rd_rel->relowner,
1359 save_sec_context | SECURITY_RESTRICTED_OPERATION);
1360 save_nestlevel = NewGUCNestLevel();
1363 * Do the actual work --- either FULL or "lazy" vacuum
1365 if (options & VACOPT_FULL)
1367 /* close relation before vacuuming, but hold lock until commit */
1368 relation_close(onerel, NoLock);
1371 /* VACUUM FULL is now a variant of CLUSTER; see cluster.c */
1372 cluster_rel(relid, InvalidOid, false,
1373 (options & VACOPT_VERBOSE) != 0);
1376 lazy_vacuum_rel(onerel, options, params, vac_strategy);
1378 /* Roll back any GUC changes executed by index functions */
1379 AtEOXact_GUC(false, save_nestlevel);
1381 /* Restore userid and security context */
1382 SetUserIdAndSecContext(save_userid, save_sec_context);
1384 /* all done with this class, but hold lock until commit */
1386 relation_close(onerel, NoLock);
1389 * Complete the transaction and free all temporary memory used.
1391 PopActiveSnapshot();
1392 CommitTransactionCommand();
1395 * If the relation has a secondary toast rel, vacuum that too while we
1396 * still hold the session lock on the master table. Note however that
1397 * "analyze" will not get done on the toast table. This is good, because
1398 * the toaster always uses hardcoded index access and statistics are
1399 * totally unimportant for toast relations.
1401 if (toast_relid != InvalidOid)
1402 vacuum_rel(toast_relid, relation, options, params);
1405 * Now release the session-level lock on the master table.
1407 UnlockRelationIdForSession(&onerelid, lmode);
1409 /* Report that we really did it. */
1415 * Open all the vacuumable indexes of the given relation, obtaining the
1416 * specified kind of lock on each. Return an array of Relation pointers for
1417 * the indexes into *Irel, and the number of indexes into *nindexes.
1419 * We consider an index vacuumable if it is marked insertable (IndexIsReady).
1420 * If it isn't, probably a CREATE INDEX CONCURRENTLY command failed early in
1421 * execution, and what we have is too corrupt to be processable. We will
1422 * vacuum even if the index isn't indisvalid; this is important because in a
1423 * unique index, uniqueness checks will be performed anyway and had better not
1424 * hit dangling index pointers.
1427 vac_open_indexes(Relation relation, LOCKMODE lockmode,
1428 int *nindexes, Relation **Irel)
1431 ListCell *indexoidscan;
1434 Assert(lockmode != NoLock);
1436 indexoidlist = RelationGetIndexList(relation);
1438 /* allocate enough memory for all indexes */
1439 i = list_length(indexoidlist);
1442 *Irel = (Relation *) palloc(i * sizeof(Relation));
1446 /* collect just the ready indexes */
1448 foreach(indexoidscan, indexoidlist)
1450 Oid indexoid = lfirst_oid(indexoidscan);
1453 indrel = index_open(indexoid, lockmode);
1454 if (IndexIsReady(indrel->rd_index))
1455 (*Irel)[i++] = indrel;
1457 index_close(indrel, lockmode);
1462 list_free(indexoidlist);
1466 * Release the resources acquired by vac_open_indexes. Optionally release
1467 * the locks (say NoLock to keep 'em).
1470 vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
1477 Relation ind = Irel[nindexes];
1479 index_close(ind, lockmode);
1485 * vacuum_delay_point --- check for interrupts and cost-based delay.
1487 * This should be called in each major loop of VACUUM processing,
1488 * typically once per page processed.
1491 vacuum_delay_point(void)
1493 /* Always check for interrupts */
1494 CHECK_FOR_INTERRUPTS();
1496 /* Nap if appropriate */
1497 if (VacuumCostActive && !InterruptPending &&
1498 VacuumCostBalance >= VacuumCostLimit)
1502 msec = VacuumCostDelay * VacuumCostBalance / VacuumCostLimit;
1503 if (msec > VacuumCostDelay * 4)
1504 msec = VacuumCostDelay * 4;
1506 pg_usleep(msec * 1000L);
1508 VacuumCostBalance = 0;
1510 /* update balance values for workers */
1511 AutoVacuumUpdateDelay();
1513 /* Might have gotten an interrupt while sleeping */
1514 CHECK_FOR_INTERRUPTS();