]> granicus.if.org Git - postgresql/blob - src/backend/commands/vacuum.c
Revise parse tree representation for VACUUM and ANALYZE.
[postgresql] / src / backend / commands / vacuum.c
1 /*-------------------------------------------------------------------------
2  *
3  * vacuum.c
4  *        The postgres vacuum cleaner.
5  *
6  * This file now includes only control and dispatch code for VACUUM and
7  * ANALYZE commands.  Regular VACUUM is implemented in vacuumlazy.c,
8  * ANALYZE in analyze.c, and VACUUM FULL is a variant of CLUSTER, handled
9  * in cluster.c.
10  *
11  *
12  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
13  * Portions Copyright (c) 1994, Regents of the University of California
14  *
15  *
16  * IDENTIFICATION
17  *        src/backend/commands/vacuum.c
18  *
19  *-------------------------------------------------------------------------
20  */
21 #include "postgres.h"
22
23 #include <math.h>
24
25 #include "access/clog.h"
26 #include "access/commit_ts.h"
27 #include "access/genam.h"
28 #include "access/heapam.h"
29 #include "access/htup_details.h"
30 #include "access/multixact.h"
31 #include "access/tableam.h"
32 #include "access/transam.h"
33 #include "access/xact.h"
34 #include "catalog/namespace.h"
35 #include "catalog/pg_database.h"
36 #include "catalog/pg_inherits.h"
37 #include "catalog/pg_namespace.h"
38 #include "commands/cluster.h"
39 #include "commands/vacuum.h"
40 #include "miscadmin.h"
41 #include "nodes/makefuncs.h"
42 #include "pgstat.h"
43 #include "postmaster/autovacuum.h"
44 #include "storage/bufmgr.h"
45 #include "storage/lmgr.h"
46 #include "storage/proc.h"
47 #include "storage/procarray.h"
48 #include "utils/acl.h"
49 #include "utils/fmgroids.h"
50 #include "utils/guc.h"
51 #include "utils/memutils.h"
52 #include "utils/snapmgr.h"
53 #include "utils/syscache.h"
54
55
56 /*
57  * GUC parameters
58  */
59 int                     vacuum_freeze_min_age;
60 int                     vacuum_freeze_table_age;
61 int                     vacuum_multixact_freeze_min_age;
62 int                     vacuum_multixact_freeze_table_age;
63
64
65 /* A few variables that don't seem worth passing around as parameters */
66 static MemoryContext vac_context = NULL;
67 static BufferAccessStrategy vac_strategy;
68
69
70 /* non-export function prototypes */
71 static List *expand_vacuum_rel(VacuumRelation *vrel, int options);
72 static List *get_all_vacuum_rels(int options);
73 static void vac_truncate_clog(TransactionId frozenXID,
74                                   MultiXactId minMulti,
75                                   TransactionId lastSaneFrozenXid,
76                                   MultiXactId lastSaneMinMulti);
77 static bool vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params);
78
79 /*
80  * Primary entry point for manual VACUUM and ANALYZE commands
81  *
82  * This is mainly a preparation wrapper for the real operations that will
83  * happen in vacuum().
84  */
85 void
86 ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel)
87 {
88         VacuumParams params;
89         ListCell        *lc;
90
91         params.options = vacstmt->is_vacuumcmd ? VACOPT_VACUUM : VACOPT_ANALYZE;
92
93         /* Parse options list */
94         foreach(lc, vacstmt->options)
95         {
96                 DefElem *opt = (DefElem *) lfirst(lc);
97
98                 /* Parse common options for VACUUM and ANALYZE */
99                 if (strcmp(opt->defname, "verbose") == 0)
100                         params.options |= VACOPT_VERBOSE;
101                 else if (strcmp(opt->defname, "skip_locked") == 0)
102                         params.options |= VACOPT_SKIP_LOCKED;
103                 else if (!vacstmt->is_vacuumcmd)
104                         ereport(ERROR,
105                                         (errcode(ERRCODE_SYNTAX_ERROR),
106                                          errmsg("unrecognized ANALYZE option \"%s\"", opt->defname),
107                                          parser_errposition(pstate, opt->location)));
108
109                 /* Parse options available on VACUUM */
110                 else if (strcmp(opt->defname, "analyze") == 0)
111                                 params.options |= VACOPT_ANALYZE;
112                 else if (strcmp(opt->defname, "freeze") == 0)
113                                 params.options |= VACOPT_FREEZE;
114                 else if (strcmp(opt->defname, "full") == 0)
115                         params.options |= VACOPT_FULL;
116                 else if (strcmp(opt->defname, "disable_page_skipping") == 0)
117                         params.options |= VACOPT_DISABLE_PAGE_SKIPPING;
118                 else
119                         ereport(ERROR,
120                                         (errcode(ERRCODE_SYNTAX_ERROR),
121                                          errmsg("unrecognized VACUUM option \"%s\"", opt->defname),
122                                          parser_errposition(pstate, opt->location)));
123         }
124
125         /* sanity checks on options */
126         Assert(params.options & (VACOPT_VACUUM | VACOPT_ANALYZE));
127         Assert((params.options & VACOPT_VACUUM) ||
128                    !(params.options & (VACOPT_FULL | VACOPT_FREEZE)));
129         Assert(!(params.options & VACOPT_SKIPTOAST));
130
131         /*
132          * Make sure VACOPT_ANALYZE is specified if any column lists are present.
133          */
134         if (!(params.options & VACOPT_ANALYZE))
135         {
136                 ListCell   *lc;
137
138                 foreach(lc, vacstmt->rels)
139                 {
140                         VacuumRelation *vrel = lfirst_node(VacuumRelation, lc);
141
142                         if (vrel->va_cols != NIL)
143                                 ereport(ERROR,
144                                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
145                                                  errmsg("ANALYZE option must be specified when a column list is provided")));
146                 }
147         }
148
149         /*
150          * All freeze ages are zero if the FREEZE option is given; otherwise pass
151          * them as -1 which means to use the default values.
152          */
153         if (params.options & VACOPT_FREEZE)
154         {
155                 params.freeze_min_age = 0;
156                 params.freeze_table_age = 0;
157                 params.multixact_freeze_min_age = 0;
158                 params.multixact_freeze_table_age = 0;
159         }
160         else
161         {
162                 params.freeze_min_age = -1;
163                 params.freeze_table_age = -1;
164                 params.multixact_freeze_min_age = -1;
165                 params.multixact_freeze_table_age = -1;
166         }
167
168         /* user-invoked vacuum is never "for wraparound" */
169         params.is_wraparound = false;
170
171         /* user-invoked vacuum never uses this parameter */
172         params.log_min_duration = -1;
173
174         /* Now go through the common routine */
175         vacuum(vacstmt->rels, &params, NULL, isTopLevel);
176 }
177
178 /*
179  * Internal entry point for VACUUM and ANALYZE commands.
180  *
181  * relations, if not NIL, is a list of VacuumRelation to process; otherwise,
182  * we process all relevant tables in the database.  For each VacuumRelation,
183  * if a valid OID is supplied, the table with that OID is what to process;
184  * otherwise, the VacuumRelation's RangeVar indicates what to process.
185  *
186  * params contains a set of parameters that can be used to customize the
187  * behavior.
188  *
189  * bstrategy is normally given as NULL, but in autovacuum it can be passed
190  * in to use the same buffer strategy object across multiple vacuum() calls.
191  *
192  * isTopLevel should be passed down from ProcessUtility.
193  *
194  * It is the caller's responsibility that all parameters are allocated in a
195  * memory context that will not disappear at transaction commit.
196  */
197 void
198 vacuum(List *relations, VacuumParams *params,
199            BufferAccessStrategy bstrategy, bool isTopLevel)
200 {
201         static bool in_vacuum = false;
202
203         const char *stmttype;
204         volatile bool in_outer_xact,
205                                 use_own_xacts;
206
207         Assert(params != NULL);
208
209         stmttype = (params->options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE";
210
211         /*
212          * We cannot run VACUUM inside a user transaction block; if we were inside
213          * a transaction, then our commit- and start-transaction-command calls
214          * would not have the intended effect!  There are numerous other subtle
215          * dependencies on this, too.
216          *
217          * ANALYZE (without VACUUM) can run either way.
218          */
219         if (params->options & VACOPT_VACUUM)
220         {
221                 PreventInTransactionBlock(isTopLevel, stmttype);
222                 in_outer_xact = false;
223         }
224         else
225                 in_outer_xact = IsInTransactionBlock(isTopLevel);
226
227         /*
228          * Due to static variables vac_context, anl_context and vac_strategy,
229          * vacuum() is not reentrant.  This matters when VACUUM FULL or ANALYZE
230          * calls a hostile index expression that itself calls ANALYZE.
231          */
232         if (in_vacuum)
233                 ereport(ERROR,
234                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
235                                  errmsg("%s cannot be executed from VACUUM or ANALYZE",
236                                                 stmttype)));
237
238         /*
239          * Sanity check DISABLE_PAGE_SKIPPING option.
240          */
241         if ((params->options & VACOPT_FULL) != 0 &&
242                 (params->options & VACOPT_DISABLE_PAGE_SKIPPING) != 0)
243                 ereport(ERROR,
244                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
245                                  errmsg("VACUUM option DISABLE_PAGE_SKIPPING cannot be used with FULL")));
246
247         /*
248          * Send info about dead objects to the statistics collector, unless we are
249          * in autovacuum --- autovacuum.c does this for itself.
250          */
251         if ((params->options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess())
252                 pgstat_vacuum_stat();
253
254         /*
255          * Create special memory context for cross-transaction storage.
256          *
257          * Since it is a child of PortalContext, it will go away eventually even
258          * if we suffer an error; there's no need for special abort cleanup logic.
259          */
260         vac_context = AllocSetContextCreate(PortalContext,
261                                                                                 "Vacuum",
262                                                                                 ALLOCSET_DEFAULT_SIZES);
263
264         /*
265          * If caller didn't give us a buffer strategy object, make one in the
266          * cross-transaction memory context.
267          */
268         if (bstrategy == NULL)
269         {
270                 MemoryContext old_context = MemoryContextSwitchTo(vac_context);
271
272                 bstrategy = GetAccessStrategy(BAS_VACUUM);
273                 MemoryContextSwitchTo(old_context);
274         }
275         vac_strategy = bstrategy;
276
277         /*
278          * Build list of relation(s) to process, putting any new data in
279          * vac_context for safekeeping.
280          */
281         if (relations != NIL)
282         {
283                 List       *newrels = NIL;
284                 ListCell   *lc;
285
286                 foreach(lc, relations)
287                 {
288                         VacuumRelation *vrel = lfirst_node(VacuumRelation, lc);
289                         List       *sublist;
290                         MemoryContext old_context;
291
292                         sublist = expand_vacuum_rel(vrel, params->options);
293                         old_context = MemoryContextSwitchTo(vac_context);
294                         newrels = list_concat(newrels, sublist);
295                         MemoryContextSwitchTo(old_context);
296                 }
297                 relations = newrels;
298         }
299         else
300                 relations = get_all_vacuum_rels(params->options);
301
302         /*
303          * Decide whether we need to start/commit our own transactions.
304          *
305          * For VACUUM (with or without ANALYZE): always do so, so that we can
306          * release locks as soon as possible.  (We could possibly use the outer
307          * transaction for a one-table VACUUM, but handling TOAST tables would be
308          * problematic.)
309          *
310          * For ANALYZE (no VACUUM): if inside a transaction block, we cannot
311          * start/commit our own transactions.  Also, there's no need to do so if
312          * only processing one relation.  For multiple relations when not within a
313          * transaction block, and also in an autovacuum worker, use own
314          * transactions so we can release locks sooner.
315          */
316         if (params->options & VACOPT_VACUUM)
317                 use_own_xacts = true;
318         else
319         {
320                 Assert(params->options & VACOPT_ANALYZE);
321                 if (IsAutoVacuumWorkerProcess())
322                         use_own_xacts = true;
323                 else if (in_outer_xact)
324                         use_own_xacts = false;
325                 else if (list_length(relations) > 1)
326                         use_own_xacts = true;
327                 else
328                         use_own_xacts = false;
329         }
330
331         /*
332          * vacuum_rel expects to be entered with no transaction active; it will
333          * start and commit its own transaction.  But we are called by an SQL
334          * command, and so we are executing inside a transaction already. We
335          * commit the transaction started in PostgresMain() here, and start
336          * another one before exiting to match the commit waiting for us back in
337          * PostgresMain().
338          */
339         if (use_own_xacts)
340         {
341                 Assert(!in_outer_xact);
342
343                 /* ActiveSnapshot is not set by autovacuum */
344                 if (ActiveSnapshotSet())
345                         PopActiveSnapshot();
346
347                 /* matches the StartTransaction in PostgresMain() */
348                 CommitTransactionCommand();
349         }
350
351         /* Turn vacuum cost accounting on or off, and set/clear in_vacuum */
352         PG_TRY();
353         {
354                 ListCell   *cur;
355
356                 in_vacuum = true;
357                 VacuumCostActive = (VacuumCostDelay > 0);
358                 VacuumCostBalance = 0;
359                 VacuumPageHit = 0;
360                 VacuumPageMiss = 0;
361                 VacuumPageDirty = 0;
362
363                 /*
364                  * Loop to process each selected relation.
365                  */
366                 foreach(cur, relations)
367                 {
368                         VacuumRelation *vrel = lfirst_node(VacuumRelation, cur);
369
370                         if (params->options & VACOPT_VACUUM)
371                         {
372                                 if (!vacuum_rel(vrel->oid, vrel->relation, params))
373                                         continue;
374                         }
375
376                         if (params->options & VACOPT_ANALYZE)
377                         {
378                                 /*
379                                  * If using separate xacts, start one for analyze. Otherwise,
380                                  * we can use the outer transaction.
381                                  */
382                                 if (use_own_xacts)
383                                 {
384                                         StartTransactionCommand();
385                                         /* functions in indexes may want a snapshot set */
386                                         PushActiveSnapshot(GetTransactionSnapshot());
387                                 }
388
389                                 analyze_rel(vrel->oid, vrel->relation, params,
390                                                         vrel->va_cols, in_outer_xact, vac_strategy);
391
392                                 if (use_own_xacts)
393                                 {
394                                         PopActiveSnapshot();
395                                         CommitTransactionCommand();
396                                 }
397                         }
398                 }
399         }
400         PG_CATCH();
401         {
402                 in_vacuum = false;
403                 VacuumCostActive = false;
404                 PG_RE_THROW();
405         }
406         PG_END_TRY();
407
408         in_vacuum = false;
409         VacuumCostActive = false;
410
411         /*
412          * Finish up processing.
413          */
414         if (use_own_xacts)
415         {
416                 /* here, we are not in a transaction */
417
418                 /*
419                  * This matches the CommitTransaction waiting for us in
420                  * PostgresMain().
421                  */
422                 StartTransactionCommand();
423         }
424
425         if ((params->options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess())
426         {
427                 /*
428                  * Update pg_database.datfrozenxid, and truncate pg_xact if possible.
429                  * (autovacuum.c does this for itself.)
430                  */
431                 vac_update_datfrozenxid();
432         }
433
434         /*
435          * Clean up working storage --- note we must do this after
436          * StartTransactionCommand, else we might be trying to delete the active
437          * context!
438          */
439         MemoryContextDelete(vac_context);
440         vac_context = NULL;
441 }
442
443 /*
444  * Check if a given relation can be safely vacuumed or analyzed.  If the
445  * user is not the relation owner, issue a WARNING log message and return
446  * false to let the caller decide what to do with this relation.  This
447  * routine is used to decide if a relation can be processed for VACUUM or
448  * ANALYZE.
449  */
450 bool
451 vacuum_is_relation_owner(Oid relid, Form_pg_class reltuple, int options)
452 {
453         char       *relname;
454
455         Assert((options & (VACOPT_VACUUM | VACOPT_ANALYZE)) != 0);
456
457         /*
458          * Check permissions.
459          *
460          * We allow the user to vacuum or analyze a table if he is superuser, the
461          * table owner, or the database owner (but in the latter case, only if
462          * it's not a shared relation).  pg_class_ownercheck includes the
463          * superuser case.
464          *
465          * Note we choose to treat permissions failure as a WARNING and keep
466          * trying to vacuum or analyze the rest of the DB --- is this appropriate?
467          */
468         if (pg_class_ownercheck(relid, GetUserId()) ||
469                 (pg_database_ownercheck(MyDatabaseId, GetUserId()) && !reltuple->relisshared))
470                 return true;
471
472         relname = NameStr(reltuple->relname);
473
474         if ((options & VACOPT_VACUUM) != 0)
475         {
476                 if (reltuple->relisshared)
477                         ereport(WARNING,
478                                         (errmsg("skipping \"%s\" --- only superuser can vacuum it",
479                                                         relname)));
480                 else if (reltuple->relnamespace == PG_CATALOG_NAMESPACE)
481                         ereport(WARNING,
482                                         (errmsg("skipping \"%s\" --- only superuser or database owner can vacuum it",
483                                                         relname)));
484                 else
485                         ereport(WARNING,
486                                         (errmsg("skipping \"%s\" --- only table or database owner can vacuum it",
487                                                         relname)));
488
489                 /*
490                  * For VACUUM ANALYZE, both logs could show up, but just generate
491                  * information for VACUUM as that would be the first one to be
492                  * processed.
493                  */
494                 return false;
495         }
496
497         if ((options & VACOPT_ANALYZE) != 0)
498         {
499                 if (reltuple->relisshared)
500                         ereport(WARNING,
501                                         (errmsg("skipping \"%s\" --- only superuser can analyze it",
502                                                         relname)));
503                 else if (reltuple->relnamespace == PG_CATALOG_NAMESPACE)
504                         ereport(WARNING,
505                                         (errmsg("skipping \"%s\" --- only superuser or database owner can analyze it",
506                                                         relname)));
507                 else
508                         ereport(WARNING,
509                                         (errmsg("skipping \"%s\" --- only table or database owner can analyze it",
510                                                         relname)));
511         }
512
513         return false;
514 }
515
516
517 /*
518  * vacuum_open_relation
519  *
520  * This routine is used for attempting to open and lock a relation which
521  * is going to be vacuumed or analyzed.  If the relation cannot be opened
522  * or locked, a log is emitted if possible.
523  */
524 Relation
525 vacuum_open_relation(Oid relid, RangeVar *relation, int options,
526                                          bool verbose, LOCKMODE lmode)
527 {
528         Relation        onerel;
529         bool            rel_lock = true;
530         int                     elevel;
531
532         Assert((options & (VACOPT_VACUUM | VACOPT_ANALYZE)) != 0);
533
534         /*
535          * Open the relation and get the appropriate lock on it.
536          *
537          * There's a race condition here: the relation may have gone away since
538          * the last time we saw it.  If so, we don't need to vacuum or analyze it.
539          *
540          * If we've been asked not to wait for the relation lock, acquire it first
541          * in non-blocking mode, before calling try_relation_open().
542          */
543         if (!(options & VACOPT_SKIP_LOCKED))
544                 onerel = try_relation_open(relid, lmode);
545         else if (ConditionalLockRelationOid(relid, lmode))
546                 onerel = try_relation_open(relid, NoLock);
547         else
548         {
549                 onerel = NULL;
550                 rel_lock = false;
551         }
552
553         /* if relation is opened, leave */
554         if (onerel)
555                 return onerel;
556
557         /*
558          * Relation could not be opened, hence generate if possible a log
559          * informing on the situation.
560          *
561          * If the RangeVar is not defined, we do not have enough information to
562          * provide a meaningful log statement.  Chances are that the caller has
563          * intentionally not provided this information so that this logging is
564          * skipped, anyway.
565          */
566         if (relation == NULL)
567                 return NULL;
568
569         /*
570          * Determine the log level.
571          *
572          * For manual VACUUM or ANALYZE, we emit a WARNING to match the log statements
573          * in the permission checks; otherwise, only log if the caller so requested.
574          */
575         if (!IsAutoVacuumWorkerProcess())
576                 elevel = WARNING;
577         else if (verbose)
578                 elevel = LOG;
579         else
580                 return NULL;
581
582         if ((options & VACOPT_VACUUM) != 0)
583         {
584                 if (!rel_lock)
585                         ereport(elevel,
586                                         (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
587                                          errmsg("skipping vacuum of \"%s\" --- lock not available",
588                                                         relation->relname)));
589                 else
590                         ereport(elevel,
591                                         (errcode(ERRCODE_UNDEFINED_TABLE),
592                                          errmsg("skipping vacuum of \"%s\" --- relation no longer exists",
593                                                         relation->relname)));
594
595                 /*
596                  * For VACUUM ANALYZE, both logs could show up, but just generate
597                  * information for VACUUM as that would be the first one to be
598                  * processed.
599                  */
600                 return NULL;
601         }
602
603         if ((options & VACOPT_ANALYZE) != 0)
604         {
605                 if (!rel_lock)
606                         ereport(elevel,
607                                         (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
608                                          errmsg("skipping analyze of \"%s\" --- lock not available",
609                                                         relation->relname)));
610                 else
611                         ereport(elevel,
612                                         (errcode(ERRCODE_UNDEFINED_TABLE),
613                                          errmsg("skipping analyze of \"%s\" --- relation no longer exists",
614                                                         relation->relname)));
615         }
616
617         return NULL;
618 }
619
620
621 /*
622  * Given a VacuumRelation, fill in the table OID if it wasn't specified,
623  * and optionally add VacuumRelations for partitions of the table.
624  *
625  * If a VacuumRelation does not have an OID supplied and is a partitioned
626  * table, an extra entry will be added to the output for each partition.
627  * Presently, only autovacuum supplies OIDs when calling vacuum(), and
628  * it does not want us to expand partitioned tables.
629  *
630  * We take care not to modify the input data structure, but instead build
631  * new VacuumRelation(s) to return.  (But note that they will reference
632  * unmodified parts of the input, eg column lists.)  New data structures
633  * are made in vac_context.
634  */
635 static List *
636 expand_vacuum_rel(VacuumRelation *vrel, int options)
637 {
638         List       *vacrels = NIL;
639         MemoryContext oldcontext;
640
641         /* If caller supplied OID, there's nothing we need do here. */
642         if (OidIsValid(vrel->oid))
643         {
644                 oldcontext = MemoryContextSwitchTo(vac_context);
645                 vacrels = lappend(vacrels, vrel);
646                 MemoryContextSwitchTo(oldcontext);
647         }
648         else
649         {
650                 /* Process a specific relation, and possibly partitions thereof */
651                 Oid                     relid;
652                 HeapTuple       tuple;
653                 Form_pg_class classForm;
654                 bool            include_parts;
655                 int                     rvr_opts;
656
657                 /*
658                  * Since autovacuum workers supply OIDs when calling vacuum(), no
659                  * autovacuum worker should reach this code.
660                  */
661                 Assert(!IsAutoVacuumWorkerProcess());
662
663                 /*
664                  * We transiently take AccessShareLock to protect the syscache lookup
665                  * below, as well as find_all_inheritors's expectation that the caller
666                  * holds some lock on the starting relation.
667                  */
668                 rvr_opts = (options & VACOPT_SKIP_LOCKED) ? RVR_SKIP_LOCKED : 0;
669                 relid = RangeVarGetRelidExtended(vrel->relation,
670                                                                                  AccessShareLock,
671                                                                                  rvr_opts,
672                                                                                  NULL, NULL);
673
674                 /*
675                  * If the lock is unavailable, emit the same log statement that
676                  * vacuum_rel() and analyze_rel() would.
677                  */
678                 if (!OidIsValid(relid))
679                 {
680                         if (options & VACOPT_VACUUM)
681                                 ereport(WARNING,
682                                                 (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
683                                                  errmsg("skipping vacuum of \"%s\" --- lock not available",
684                                                                 vrel->relation->relname)));
685                         else
686                                 ereport(WARNING,
687                                                 (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
688                                                  errmsg("skipping analyze of \"%s\" --- lock not available",
689                                                                 vrel->relation->relname)));
690                         return vacrels;
691                 }
692
693                 /*
694                  * To check whether the relation is a partitioned table and its
695                  * ownership, fetch its syscache entry.
696                  */
697                 tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
698                 if (!HeapTupleIsValid(tuple))
699                         elog(ERROR, "cache lookup failed for relation %u", relid);
700                 classForm = (Form_pg_class) GETSTRUCT(tuple);
701
702                 /*
703                  * Make a returnable VacuumRelation for this rel if user is a proper
704                  * owner.
705                  */
706                 if (vacuum_is_relation_owner(relid, classForm, options))
707                 {
708                         oldcontext = MemoryContextSwitchTo(vac_context);
709                         vacrels = lappend(vacrels, makeVacuumRelation(vrel->relation,
710                                                                                                                   relid,
711                                                                                                                   vrel->va_cols));
712                         MemoryContextSwitchTo(oldcontext);
713                 }
714
715
716                 include_parts = (classForm->relkind == RELKIND_PARTITIONED_TABLE);
717                 ReleaseSysCache(tuple);
718
719                 /*
720                  * If it is, make relation list entries for its partitions.  Note that
721                  * the list returned by find_all_inheritors() includes the passed-in
722                  * OID, so we have to skip that.  There's no point in taking locks on
723                  * the individual partitions yet, and doing so would just add
724                  * unnecessary deadlock risk.  For this last reason we do not check
725                  * yet the ownership of the partitions, which get added to the list to
726                  * process.  Ownership will be checked later on anyway.
727                  */
728                 if (include_parts)
729                 {
730                         List       *part_oids = find_all_inheritors(relid, NoLock, NULL);
731                         ListCell   *part_lc;
732
733                         foreach(part_lc, part_oids)
734                         {
735                                 Oid                     part_oid = lfirst_oid(part_lc);
736
737                                 if (part_oid == relid)
738                                         continue;       /* ignore original table */
739
740                                 /*
741                                  * We omit a RangeVar since it wouldn't be appropriate to
742                                  * complain about failure to open one of these relations
743                                  * later.
744                                  */
745                                 oldcontext = MemoryContextSwitchTo(vac_context);
746                                 vacrels = lappend(vacrels, makeVacuumRelation(NULL,
747                                                                                                                           part_oid,
748                                                                                                                           vrel->va_cols));
749                                 MemoryContextSwitchTo(oldcontext);
750                         }
751                 }
752
753                 /*
754                  * Release lock again.  This means that by the time we actually try to
755                  * process the table, it might be gone or renamed.  In the former case
756                  * we'll silently ignore it; in the latter case we'll process it
757                  * anyway, but we must beware that the RangeVar doesn't necessarily
758                  * identify it anymore.  This isn't ideal, perhaps, but there's little
759                  * practical alternative, since we're typically going to commit this
760                  * transaction and begin a new one between now and then.  Moreover,
761                  * holding locks on multiple relations would create significant risk
762                  * of deadlock.
763                  */
764                 UnlockRelationOid(relid, AccessShareLock);
765         }
766
767         return vacrels;
768 }
769
770 /*
771  * Construct a list of VacuumRelations for all vacuumable rels in
772  * the current database.  The list is built in vac_context.
773  */
774 static List *
775 get_all_vacuum_rels(int options)
776 {
777         List       *vacrels = NIL;
778         Relation        pgclass;
779         TableScanDesc scan;
780         HeapTuple       tuple;
781
782         pgclass = table_open(RelationRelationId, AccessShareLock);
783
784         scan = table_beginscan_catalog(pgclass, 0, NULL);
785
786         while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
787         {
788                 Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
789                 MemoryContext oldcontext;
790                 Oid                     relid = classForm->oid;
791
792                 /* check permissions of relation */
793                 if (!vacuum_is_relation_owner(relid, classForm, options))
794                         continue;
795
796                 /*
797                  * We include partitioned tables here; depending on which operation is
798                  * to be performed, caller will decide whether to process or ignore
799                  * them.
800                  */
801                 if (classForm->relkind != RELKIND_RELATION &&
802                         classForm->relkind != RELKIND_MATVIEW &&
803                         classForm->relkind != RELKIND_PARTITIONED_TABLE)
804                         continue;
805
806                 /*
807                  * Build VacuumRelation(s) specifying the table OIDs to be processed.
808                  * We omit a RangeVar since it wouldn't be appropriate to complain
809                  * about failure to open one of these relations later.
810                  */
811                 oldcontext = MemoryContextSwitchTo(vac_context);
812                 vacrels = lappend(vacrels, makeVacuumRelation(NULL,
813                                                                                                           relid,
814                                                                                                           NIL));
815                 MemoryContextSwitchTo(oldcontext);
816         }
817
818         table_endscan(scan);
819         table_close(pgclass, AccessShareLock);
820
821         return vacrels;
822 }
823
824 /*
825  * vacuum_set_xid_limits() -- compute oldest-Xmin and freeze cutoff points
826  *
827  * The output parameters are:
828  * - oldestXmin is the cutoff value used to distinguish whether tuples are
829  *       DEAD or RECENTLY_DEAD (see HeapTupleSatisfiesVacuum).
830  * - freezeLimit is the Xid below which all Xids are replaced by
831  *       FrozenTransactionId during vacuum.
832  * - xidFullScanLimit (computed from table_freeze_age parameter)
833  *       represents a minimum Xid value; a table whose relfrozenxid is older than
834  *       this will have a full-table vacuum applied to it, to freeze tuples across
835  *       the whole table.  Vacuuming a table younger than this value can use a
836  *       partial scan.
837  * - multiXactCutoff is the value below which all MultiXactIds are removed from
838  *       Xmax.
839  * - mxactFullScanLimit is a value against which a table's relminmxid value is
840  *       compared to produce a full-table vacuum, as with xidFullScanLimit.
841  *
842  * xidFullScanLimit and mxactFullScanLimit can be passed as NULL if caller is
843  * not interested.
844  */
845 void
846 vacuum_set_xid_limits(Relation rel,
847                                           int freeze_min_age,
848                                           int freeze_table_age,
849                                           int multixact_freeze_min_age,
850                                           int multixact_freeze_table_age,
851                                           TransactionId *oldestXmin,
852                                           TransactionId *freezeLimit,
853                                           TransactionId *xidFullScanLimit,
854                                           MultiXactId *multiXactCutoff,
855                                           MultiXactId *mxactFullScanLimit)
856 {
857         int                     freezemin;
858         int                     mxid_freezemin;
859         int                     effective_multixact_freeze_max_age;
860         TransactionId limit;
861         TransactionId safeLimit;
862         MultiXactId mxactLimit;
863         MultiXactId safeMxactLimit;
864
865         /*
866          * We can always ignore processes running lazy vacuum.  This is because we
867          * use these values only for deciding which tuples we must keep in the
868          * tables.  Since lazy vacuum doesn't write its XID anywhere, it's safe to
869          * ignore it.  In theory it could be problematic to ignore lazy vacuums in
870          * a full vacuum, but keep in mind that only one vacuum process can be
871          * working on a particular table at any time, and that each vacuum is
872          * always an independent transaction.
873          */
874         *oldestXmin =
875                 TransactionIdLimitedForOldSnapshots(GetOldestXmin(rel, PROCARRAY_FLAGS_VACUUM), rel);
876
877         Assert(TransactionIdIsNormal(*oldestXmin));
878
879         /*
880          * Determine the minimum freeze age to use: as specified by the caller, or
881          * vacuum_freeze_min_age, but in any case not more than half
882          * autovacuum_freeze_max_age, so that autovacuums to prevent XID
883          * wraparound won't occur too frequently.
884          */
885         freezemin = freeze_min_age;
886         if (freezemin < 0)
887                 freezemin = vacuum_freeze_min_age;
888         freezemin = Min(freezemin, autovacuum_freeze_max_age / 2);
889         Assert(freezemin >= 0);
890
891         /*
892          * Compute the cutoff XID, being careful not to generate a "permanent" XID
893          */
894         limit = *oldestXmin - freezemin;
895         if (!TransactionIdIsNormal(limit))
896                 limit = FirstNormalTransactionId;
897
898         /*
899          * If oldestXmin is very far back (in practice, more than
900          * autovacuum_freeze_max_age / 2 XIDs old), complain and force a minimum
901          * freeze age of zero.
902          */
903         safeLimit = ReadNewTransactionId() - autovacuum_freeze_max_age;
904         if (!TransactionIdIsNormal(safeLimit))
905                 safeLimit = FirstNormalTransactionId;
906
907         if (TransactionIdPrecedes(limit, safeLimit))
908         {
909                 ereport(WARNING,
910                                 (errmsg("oldest xmin is far in the past"),
911                                  errhint("Close open transactions soon to avoid wraparound problems.\n"
912                                                  "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
913                 limit = *oldestXmin;
914         }
915
916         *freezeLimit = limit;
917
918         /*
919          * Compute the multixact age for which freezing is urgent.  This is
920          * normally autovacuum_multixact_freeze_max_age, but may be less if we are
921          * short of multixact member space.
922          */
923         effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold();
924
925         /*
926          * Determine the minimum multixact freeze age to use: as specified by
927          * caller, or vacuum_multixact_freeze_min_age, but in any case not more
928          * than half effective_multixact_freeze_max_age, so that autovacuums to
929          * prevent MultiXact wraparound won't occur too frequently.
930          */
931         mxid_freezemin = multixact_freeze_min_age;
932         if (mxid_freezemin < 0)
933                 mxid_freezemin = vacuum_multixact_freeze_min_age;
934         mxid_freezemin = Min(mxid_freezemin,
935                                                  effective_multixact_freeze_max_age / 2);
936         Assert(mxid_freezemin >= 0);
937
938         /* compute the cutoff multi, being careful to generate a valid value */
939         mxactLimit = GetOldestMultiXactId() - mxid_freezemin;
940         if (mxactLimit < FirstMultiXactId)
941                 mxactLimit = FirstMultiXactId;
942
943         safeMxactLimit =
944                 ReadNextMultiXactId() - effective_multixact_freeze_max_age;
945         if (safeMxactLimit < FirstMultiXactId)
946                 safeMxactLimit = FirstMultiXactId;
947
948         if (MultiXactIdPrecedes(mxactLimit, safeMxactLimit))
949         {
950                 ereport(WARNING,
951                                 (errmsg("oldest multixact is far in the past"),
952                                  errhint("Close open transactions with multixacts soon to avoid wraparound problems.")));
953                 mxactLimit = safeMxactLimit;
954         }
955
956         *multiXactCutoff = mxactLimit;
957
958         if (xidFullScanLimit != NULL)
959         {
960                 int                     freezetable;
961
962                 Assert(mxactFullScanLimit != NULL);
963
964                 /*
965                  * Determine the table freeze age to use: as specified by the caller,
966                  * or vacuum_freeze_table_age, but in any case not more than
967                  * autovacuum_freeze_max_age * 0.95, so that if you have e.g nightly
968                  * VACUUM schedule, the nightly VACUUM gets a chance to freeze tuples
969                  * before anti-wraparound autovacuum is launched.
970                  */
971                 freezetable = freeze_table_age;
972                 if (freezetable < 0)
973                         freezetable = vacuum_freeze_table_age;
974                 freezetable = Min(freezetable, autovacuum_freeze_max_age * 0.95);
975                 Assert(freezetable >= 0);
976
977                 /*
978                  * Compute XID limit causing a full-table vacuum, being careful not to
979                  * generate a "permanent" XID.
980                  */
981                 limit = ReadNewTransactionId() - freezetable;
982                 if (!TransactionIdIsNormal(limit))
983                         limit = FirstNormalTransactionId;
984
985                 *xidFullScanLimit = limit;
986
987                 /*
988                  * Similar to the above, determine the table freeze age to use for
989                  * multixacts: as specified by the caller, or
990                  * vacuum_multixact_freeze_table_age, but in any case not more than
991                  * autovacuum_multixact_freeze_table_age * 0.95, so that if you have
992                  * e.g. nightly VACUUM schedule, the nightly VACUUM gets a chance to
993                  * freeze multixacts before anti-wraparound autovacuum is launched.
994                  */
995                 freezetable = multixact_freeze_table_age;
996                 if (freezetable < 0)
997                         freezetable = vacuum_multixact_freeze_table_age;
998                 freezetable = Min(freezetable,
999                                                   effective_multixact_freeze_max_age * 0.95);
1000                 Assert(freezetable >= 0);
1001
1002                 /*
1003                  * Compute MultiXact limit causing a full-table vacuum, being careful
1004                  * to generate a valid MultiXact value.
1005                  */
1006                 mxactLimit = ReadNextMultiXactId() - freezetable;
1007                 if (mxactLimit < FirstMultiXactId)
1008                         mxactLimit = FirstMultiXactId;
1009
1010                 *mxactFullScanLimit = mxactLimit;
1011         }
1012         else
1013         {
1014                 Assert(mxactFullScanLimit == NULL);
1015         }
1016 }
1017
1018 /*
1019  * vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples
1020  *
1021  *              If we scanned the whole relation then we should just use the count of
1022  *              live tuples seen; but if we did not, we should not blindly extrapolate
1023  *              from that number, since VACUUM may have scanned a quite nonrandom
1024  *              subset of the table.  When we have only partial information, we take
1025  *              the old value of pg_class.reltuples as a measurement of the
1026  *              tuple density in the unscanned pages.
1027  *
1028  *              Note: scanned_tuples should count only *live* tuples, since
1029  *              pg_class.reltuples is defined that way.
1030  */
1031 double
1032 vac_estimate_reltuples(Relation relation,
1033                                            BlockNumber total_pages,
1034                                            BlockNumber scanned_pages,
1035                                            double scanned_tuples)
1036 {
1037         BlockNumber old_rel_pages = relation->rd_rel->relpages;
1038         double          old_rel_tuples = relation->rd_rel->reltuples;
1039         double          old_density;
1040         double          unscanned_pages;
1041         double          total_tuples;
1042
1043         /* If we did scan the whole table, just use the count as-is */
1044         if (scanned_pages >= total_pages)
1045                 return scanned_tuples;
1046
1047         /*
1048          * If scanned_pages is zero but total_pages isn't, keep the existing value
1049          * of reltuples.  (Note: callers should avoid updating the pg_class
1050          * statistics in this situation, since no new information has been
1051          * provided.)
1052          */
1053         if (scanned_pages == 0)
1054                 return old_rel_tuples;
1055
1056         /*
1057          * If old value of relpages is zero, old density is indeterminate; we
1058          * can't do much except scale up scanned_tuples to match total_pages.
1059          */
1060         if (old_rel_pages == 0)
1061                 return floor((scanned_tuples / scanned_pages) * total_pages + 0.5);
1062
1063         /*
1064          * Okay, we've covered the corner cases.  The normal calculation is to
1065          * convert the old measurement to a density (tuples per page), then
1066          * estimate the number of tuples in the unscanned pages using that figure,
1067          * and finally add on the number of tuples in the scanned pages.
1068          */
1069         old_density = old_rel_tuples / old_rel_pages;
1070         unscanned_pages = (double) total_pages - (double) scanned_pages;
1071         total_tuples = old_density * unscanned_pages + scanned_tuples;
1072         return floor(total_tuples + 0.5);
1073 }
1074
1075
1076 /*
1077  *      vac_update_relstats() -- update statistics for one relation
1078  *
1079  *              Update the whole-relation statistics that are kept in its pg_class
1080  *              row.  There are additional stats that will be updated if we are
1081  *              doing ANALYZE, but we always update these stats.  This routine works
1082  *              for both index and heap relation entries in pg_class.
1083  *
1084  *              We violate transaction semantics here by overwriting the rel's
1085  *              existing pg_class tuple with the new values.  This is reasonably
1086  *              safe as long as we're sure that the new values are correct whether or
1087  *              not this transaction commits.  The reason for doing this is that if
1088  *              we updated these tuples in the usual way, vacuuming pg_class itself
1089  *              wouldn't work very well --- by the time we got done with a vacuum
1090  *              cycle, most of the tuples in pg_class would've been obsoleted.  Of
1091  *              course, this only works for fixed-size not-null columns, but these are.
1092  *
1093  *              Another reason for doing it this way is that when we are in a lazy
1094  *              VACUUM and have PROC_IN_VACUUM set, we mustn't do any regular updates.
1095  *              Somebody vacuuming pg_class might think they could delete a tuple
1096  *              marked with xmin = our xid.
1097  *
1098  *              In addition to fundamentally nontransactional statistics such as
1099  *              relpages and relallvisible, we try to maintain certain lazily-updated
1100  *              DDL flags such as relhasindex, by clearing them if no longer correct.
1101  *              It's safe to do this in VACUUM, which can't run in parallel with
1102  *              CREATE INDEX/RULE/TRIGGER and can't be part of a transaction block.
1103  *              However, it's *not* safe to do it in an ANALYZE that's within an
1104  *              outer transaction, because for example the current transaction might
1105  *              have dropped the last index; then we'd think relhasindex should be
1106  *              cleared, but if the transaction later rolls back this would be wrong.
1107  *              So we refrain from updating the DDL flags if we're inside an outer
1108  *              transaction.  This is OK since postponing the flag maintenance is
1109  *              always allowable.
1110  *
1111  *              Note: num_tuples should count only *live* tuples, since
1112  *              pg_class.reltuples is defined that way.
1113  *
1114  *              This routine is shared by VACUUM and ANALYZE.
1115  */
1116 void
1117 vac_update_relstats(Relation relation,
1118                                         BlockNumber num_pages, double num_tuples,
1119                                         BlockNumber num_all_visible_pages,
1120                                         bool hasindex, TransactionId frozenxid,
1121                                         MultiXactId minmulti,
1122                                         bool in_outer_xact)
1123 {
1124         Oid                     relid = RelationGetRelid(relation);
1125         Relation        rd;
1126         HeapTuple       ctup;
1127         Form_pg_class pgcform;
1128         bool            dirty;
1129
1130         rd = table_open(RelationRelationId, RowExclusiveLock);
1131
1132         /* Fetch a copy of the tuple to scribble on */
1133         ctup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
1134         if (!HeapTupleIsValid(ctup))
1135                 elog(ERROR, "pg_class entry for relid %u vanished during vacuuming",
1136                          relid);
1137         pgcform = (Form_pg_class) GETSTRUCT(ctup);
1138
1139         /* Apply statistical updates, if any, to copied tuple */
1140
1141         dirty = false;
1142         if (pgcform->relpages != (int32) num_pages)
1143         {
1144                 pgcform->relpages = (int32) num_pages;
1145                 dirty = true;
1146         }
1147         if (pgcform->reltuples != (float4) num_tuples)
1148         {
1149                 pgcform->reltuples = (float4) num_tuples;
1150                 dirty = true;
1151         }
1152         if (pgcform->relallvisible != (int32) num_all_visible_pages)
1153         {
1154                 pgcform->relallvisible = (int32) num_all_visible_pages;
1155                 dirty = true;
1156         }
1157
1158         /* Apply DDL updates, but not inside an outer transaction (see above) */
1159
1160         if (!in_outer_xact)
1161         {
1162                 /*
1163                  * If we didn't find any indexes, reset relhasindex.
1164                  */
1165                 if (pgcform->relhasindex && !hasindex)
1166                 {
1167                         pgcform->relhasindex = false;
1168                         dirty = true;
1169                 }
1170
1171                 /* We also clear relhasrules and relhastriggers if needed */
1172                 if (pgcform->relhasrules && relation->rd_rules == NULL)
1173                 {
1174                         pgcform->relhasrules = false;
1175                         dirty = true;
1176                 }
1177                 if (pgcform->relhastriggers && relation->trigdesc == NULL)
1178                 {
1179                         pgcform->relhastriggers = false;
1180                         dirty = true;
1181                 }
1182         }
1183
1184         /*
1185          * Update relfrozenxid, unless caller passed InvalidTransactionId
1186          * indicating it has no new data.
1187          *
1188          * Ordinarily, we don't let relfrozenxid go backwards: if things are
1189          * working correctly, the only way the new frozenxid could be older would
1190          * be if a previous VACUUM was done with a tighter freeze_min_age, in
1191          * which case we don't want to forget the work it already did.  However,
1192          * if the stored relfrozenxid is "in the future", then it must be corrupt
1193          * and it seems best to overwrite it with the cutoff we used this time.
1194          * This should match vac_update_datfrozenxid() concerning what we consider
1195          * to be "in the future".
1196          */
1197         if (TransactionIdIsNormal(frozenxid) &&
1198                 pgcform->relfrozenxid != frozenxid &&
1199                 (TransactionIdPrecedes(pgcform->relfrozenxid, frozenxid) ||
1200                  TransactionIdPrecedes(ReadNewTransactionId(),
1201                                                            pgcform->relfrozenxid)))
1202         {
1203                 pgcform->relfrozenxid = frozenxid;
1204                 dirty = true;
1205         }
1206
1207         /* Similarly for relminmxid */
1208         if (MultiXactIdIsValid(minmulti) &&
1209                 pgcform->relminmxid != minmulti &&
1210                 (MultiXactIdPrecedes(pgcform->relminmxid, minmulti) ||
1211                  MultiXactIdPrecedes(ReadNextMultiXactId(), pgcform->relminmxid)))
1212         {
1213                 pgcform->relminmxid = minmulti;
1214                 dirty = true;
1215         }
1216
1217         /* If anything changed, write out the tuple. */
1218         if (dirty)
1219                 heap_inplace_update(rd, ctup);
1220
1221         table_close(rd, RowExclusiveLock);
1222 }
1223
1224
1225 /*
1226  *      vac_update_datfrozenxid() -- update pg_database.datfrozenxid for our DB
1227  *
1228  *              Update pg_database's datfrozenxid entry for our database to be the
1229  *              minimum of the pg_class.relfrozenxid values.
1230  *
1231  *              Similarly, update our datminmxid to be the minimum of the
1232  *              pg_class.relminmxid values.
1233  *
1234  *              If we are able to advance either pg_database value, also try to
1235  *              truncate pg_xact and pg_multixact.
1236  *
1237  *              We violate transaction semantics here by overwriting the database's
1238  *              existing pg_database tuple with the new values.  This is reasonably
1239  *              safe since the new values are correct whether or not this transaction
1240  *              commits.  As with vac_update_relstats, this avoids leaving dead tuples
1241  *              behind after a VACUUM.
1242  */
1243 void
1244 vac_update_datfrozenxid(void)
1245 {
1246         HeapTuple       tuple;
1247         Form_pg_database dbform;
1248         Relation        relation;
1249         SysScanDesc scan;
1250         HeapTuple       classTup;
1251         TransactionId newFrozenXid;
1252         MultiXactId newMinMulti;
1253         TransactionId lastSaneFrozenXid;
1254         MultiXactId lastSaneMinMulti;
1255         bool            bogus = false;
1256         bool            dirty = false;
1257
1258         /*
1259          * Initialize the "min" calculation with GetOldestXmin, which is a
1260          * reasonable approximation to the minimum relfrozenxid for not-yet-
1261          * committed pg_class entries for new tables; see AddNewRelationTuple().
1262          * So we cannot produce a wrong minimum by starting with this.
1263          */
1264         newFrozenXid = GetOldestXmin(NULL, PROCARRAY_FLAGS_VACUUM);
1265
1266         /*
1267          * Similarly, initialize the MultiXact "min" with the value that would be
1268          * used on pg_class for new tables.  See AddNewRelationTuple().
1269          */
1270         newMinMulti = GetOldestMultiXactId();
1271
1272         /*
1273          * Identify the latest relfrozenxid and relminmxid values that we could
1274          * validly see during the scan.  These are conservative values, but it's
1275          * not really worth trying to be more exact.
1276          */
1277         lastSaneFrozenXid = ReadNewTransactionId();
1278         lastSaneMinMulti = ReadNextMultiXactId();
1279
1280         /*
1281          * We must seqscan pg_class to find the minimum Xid, because there is no
1282          * index that can help us here.
1283          */
1284         relation = table_open(RelationRelationId, AccessShareLock);
1285
1286         scan = systable_beginscan(relation, InvalidOid, false,
1287                                                           NULL, 0, NULL);
1288
1289         while ((classTup = systable_getnext(scan)) != NULL)
1290         {
1291                 Form_pg_class classForm = (Form_pg_class) GETSTRUCT(classTup);
1292
1293                 /*
1294                  * Only consider relations able to hold unfrozen XIDs (anything else
1295                  * should have InvalidTransactionId in relfrozenxid anyway.)
1296                  */
1297                 if (classForm->relkind != RELKIND_RELATION &&
1298                         classForm->relkind != RELKIND_MATVIEW &&
1299                         classForm->relkind != RELKIND_TOASTVALUE)
1300                         continue;
1301
1302                 Assert(TransactionIdIsNormal(classForm->relfrozenxid));
1303                 Assert(MultiXactIdIsValid(classForm->relminmxid));
1304
1305                 /*
1306                  * If things are working properly, no relation should have a
1307                  * relfrozenxid or relminmxid that is "in the future".  However, such
1308                  * cases have been known to arise due to bugs in pg_upgrade.  If we
1309                  * see any entries that are "in the future", chicken out and don't do
1310                  * anything.  This ensures we won't truncate clog before those
1311                  * relations have been scanned and cleaned up.
1312                  */
1313                 if (TransactionIdPrecedes(lastSaneFrozenXid, classForm->relfrozenxid) ||
1314                         MultiXactIdPrecedes(lastSaneMinMulti, classForm->relminmxid))
1315                 {
1316                         bogus = true;
1317                         break;
1318                 }
1319
1320                 if (TransactionIdPrecedes(classForm->relfrozenxid, newFrozenXid))
1321                         newFrozenXid = classForm->relfrozenxid;
1322
1323                 if (MultiXactIdPrecedes(classForm->relminmxid, newMinMulti))
1324                         newMinMulti = classForm->relminmxid;
1325         }
1326
1327         /* we're done with pg_class */
1328         systable_endscan(scan);
1329         table_close(relation, AccessShareLock);
1330
1331         /* chicken out if bogus data found */
1332         if (bogus)
1333                 return;
1334
1335         Assert(TransactionIdIsNormal(newFrozenXid));
1336         Assert(MultiXactIdIsValid(newMinMulti));
1337
1338         /* Now fetch the pg_database tuple we need to update. */
1339         relation = table_open(DatabaseRelationId, RowExclusiveLock);
1340
1341         /* Fetch a copy of the tuple to scribble on */
1342         tuple = SearchSysCacheCopy1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
1343         if (!HeapTupleIsValid(tuple))
1344                 elog(ERROR, "could not find tuple for database %u", MyDatabaseId);
1345         dbform = (Form_pg_database) GETSTRUCT(tuple);
1346
1347         /*
1348          * As in vac_update_relstats(), we ordinarily don't want to let
1349          * datfrozenxid go backward; but if it's "in the future" then it must be
1350          * corrupt and it seems best to overwrite it.
1351          */
1352         if (dbform->datfrozenxid != newFrozenXid &&
1353                 (TransactionIdPrecedes(dbform->datfrozenxid, newFrozenXid) ||
1354                  TransactionIdPrecedes(lastSaneFrozenXid, dbform->datfrozenxid)))
1355         {
1356                 dbform->datfrozenxid = newFrozenXid;
1357                 dirty = true;
1358         }
1359         else
1360                 newFrozenXid = dbform->datfrozenxid;
1361
1362         /* Ditto for datminmxid */
1363         if (dbform->datminmxid != newMinMulti &&
1364                 (MultiXactIdPrecedes(dbform->datminmxid, newMinMulti) ||
1365                  MultiXactIdPrecedes(lastSaneMinMulti, dbform->datminmxid)))
1366         {
1367                 dbform->datminmxid = newMinMulti;
1368                 dirty = true;
1369         }
1370         else
1371                 newMinMulti = dbform->datminmxid;
1372
1373         if (dirty)
1374                 heap_inplace_update(relation, tuple);
1375
1376         heap_freetuple(tuple);
1377         table_close(relation, RowExclusiveLock);
1378
1379         /*
1380          * If we were able to advance datfrozenxid or datminmxid, see if we can
1381          * truncate pg_xact and/or pg_multixact.  Also do it if the shared
1382          * XID-wrap-limit info is stale, since this action will update that too.
1383          */
1384         if (dirty || ForceTransactionIdLimitUpdate())
1385                 vac_truncate_clog(newFrozenXid, newMinMulti,
1386                                                   lastSaneFrozenXid, lastSaneMinMulti);
1387 }
1388
1389
1390 /*
1391  *      vac_truncate_clog() -- attempt to truncate the commit log
1392  *
1393  *              Scan pg_database to determine the system-wide oldest datfrozenxid,
1394  *              and use it to truncate the transaction commit log (pg_xact).
1395  *              Also update the XID wrap limit info maintained by varsup.c.
1396  *              Likewise for datminmxid.
1397  *
1398  *              The passed frozenXID and minMulti are the updated values for my own
1399  *              pg_database entry. They're used to initialize the "min" calculations.
1400  *              The caller also passes the "last sane" XID and MXID, since it has
1401  *              those at hand already.
1402  *
1403  *              This routine is only invoked when we've managed to change our
1404  *              DB's datfrozenxid/datminmxid values, or we found that the shared
1405  *              XID-wrap-limit info is stale.
1406  */
1407 static void
1408 vac_truncate_clog(TransactionId frozenXID,
1409                                   MultiXactId minMulti,
1410                                   TransactionId lastSaneFrozenXid,
1411                                   MultiXactId lastSaneMinMulti)
1412 {
1413         TransactionId nextXID = ReadNewTransactionId();
1414         Relation        relation;
1415         TableScanDesc scan;
1416         HeapTuple       tuple;
1417         Oid                     oldestxid_datoid;
1418         Oid                     minmulti_datoid;
1419         bool            bogus = false;
1420         bool            frozenAlreadyWrapped = false;
1421
1422         /* init oldest datoids to sync with my frozenXID/minMulti values */
1423         oldestxid_datoid = MyDatabaseId;
1424         minmulti_datoid = MyDatabaseId;
1425
1426         /*
1427          * Scan pg_database to compute the minimum datfrozenxid/datminmxid
1428          *
1429          * Since vac_update_datfrozenxid updates datfrozenxid/datminmxid in-place,
1430          * the values could change while we look at them.  Fetch each one just
1431          * once to ensure sane behavior of the comparison logic.  (Here, as in
1432          * many other places, we assume that fetching or updating an XID in shared
1433          * storage is atomic.)
1434          *
1435          * Note: we need not worry about a race condition with new entries being
1436          * inserted by CREATE DATABASE.  Any such entry will have a copy of some
1437          * existing DB's datfrozenxid, and that source DB cannot be ours because
1438          * of the interlock against copying a DB containing an active backend.
1439          * Hence the new entry will not reduce the minimum.  Also, if two VACUUMs
1440          * concurrently modify the datfrozenxid's of different databases, the
1441          * worst possible outcome is that pg_xact is not truncated as aggressively
1442          * as it could be.
1443          */
1444         relation = table_open(DatabaseRelationId, AccessShareLock);
1445
1446         scan = table_beginscan_catalog(relation, 0, NULL);
1447
1448         while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1449         {
1450                 volatile FormData_pg_database *dbform = (Form_pg_database) GETSTRUCT(tuple);
1451                 TransactionId datfrozenxid = dbform->datfrozenxid;
1452                 TransactionId datminmxid = dbform->datminmxid;
1453
1454                 Assert(TransactionIdIsNormal(datfrozenxid));
1455                 Assert(MultiXactIdIsValid(datminmxid));
1456
1457                 /*
1458                  * If things are working properly, no database should have a
1459                  * datfrozenxid or datminmxid that is "in the future".  However, such
1460                  * cases have been known to arise due to bugs in pg_upgrade.  If we
1461                  * see any entries that are "in the future", chicken out and don't do
1462                  * anything.  This ensures we won't truncate clog before those
1463                  * databases have been scanned and cleaned up.  (We will issue the
1464                  * "already wrapped" warning if appropriate, though.)
1465                  */
1466                 if (TransactionIdPrecedes(lastSaneFrozenXid, datfrozenxid) ||
1467                         MultiXactIdPrecedes(lastSaneMinMulti, datminmxid))
1468                         bogus = true;
1469
1470                 if (TransactionIdPrecedes(nextXID, datfrozenxid))
1471                         frozenAlreadyWrapped = true;
1472                 else if (TransactionIdPrecedes(datfrozenxid, frozenXID))
1473                 {
1474                         frozenXID = datfrozenxid;
1475                         oldestxid_datoid = dbform->oid;
1476                 }
1477
1478                 if (MultiXactIdPrecedes(datminmxid, minMulti))
1479                 {
1480                         minMulti = datminmxid;
1481                         minmulti_datoid = dbform->oid;
1482                 }
1483         }
1484
1485         table_endscan(scan);
1486
1487         table_close(relation, AccessShareLock);
1488
1489         /*
1490          * Do not truncate CLOG if we seem to have suffered wraparound already;
1491          * the computed minimum XID might be bogus.  This case should now be
1492          * impossible due to the defenses in GetNewTransactionId, but we keep the
1493          * test anyway.
1494          */
1495         if (frozenAlreadyWrapped)
1496         {
1497                 ereport(WARNING,
1498                                 (errmsg("some databases have not been vacuumed in over 2 billion transactions"),
1499                                  errdetail("You might have already suffered transaction-wraparound data loss.")));
1500                 return;
1501         }
1502
1503         /* chicken out if data is bogus in any other way */
1504         if (bogus)
1505                 return;
1506
1507         /*
1508          * Advance the oldest value for commit timestamps before truncating, so
1509          * that if a user requests a timestamp for a transaction we're truncating
1510          * away right after this point, they get NULL instead of an ugly "file not
1511          * found" error from slru.c.  This doesn't matter for xact/multixact
1512          * because they are not subject to arbitrary lookups from users.
1513          */
1514         AdvanceOldestCommitTsXid(frozenXID);
1515
1516         /*
1517          * Truncate CLOG, multixact and CommitTs to the oldest computed value.
1518          */
1519         TruncateCLOG(frozenXID, oldestxid_datoid);
1520         TruncateCommitTs(frozenXID);
1521         TruncateMultiXact(minMulti, minmulti_datoid);
1522
1523         /*
1524          * Update the wrap limit for GetNewTransactionId and creation of new
1525          * MultiXactIds.  Note: these functions will also signal the postmaster
1526          * for an(other) autovac cycle if needed.   XXX should we avoid possibly
1527          * signalling twice?
1528          */
1529         SetTransactionIdLimit(frozenXID, oldestxid_datoid);
1530         SetMultiXactIdLimit(minMulti, minmulti_datoid, false);
1531 }
1532
1533
1534 /*
1535  *      vacuum_rel() -- vacuum one heap relation
1536  *
1537  *              relid identifies the relation to vacuum.  If relation is supplied,
1538  *              use the name therein for reporting any failure to open/lock the rel;
1539  *              do not use it once we've successfully opened the rel, since it might
1540  *              be stale.
1541  *
1542  *              Returns true if it's okay to proceed with a requested ANALYZE
1543  *              operation on this table.
1544  *
1545  *              Doing one heap at a time incurs extra overhead, since we need to
1546  *              check that the heap exists again just before we vacuum it.  The
1547  *              reason that we do this is so that vacuuming can be spread across
1548  *              many small transactions.  Otherwise, two-phase locking would require
1549  *              us to lock the entire database during one pass of the vacuum cleaner.
1550  *
1551  *              At entry and exit, we are not inside a transaction.
1552  */
1553 static bool
1554 vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params)
1555 {
1556         LOCKMODE        lmode;
1557         Relation        onerel;
1558         LockRelId       onerelid;
1559         Oid                     toast_relid;
1560         Oid                     save_userid;
1561         int                     save_sec_context;
1562         int                     save_nestlevel;
1563
1564         Assert(params != NULL);
1565
1566         /* Begin a transaction for vacuuming this relation */
1567         StartTransactionCommand();
1568
1569         /*
1570          * Functions in indexes may want a snapshot set.  Also, setting a snapshot
1571          * ensures that RecentGlobalXmin is kept truly recent.
1572          */
1573         PushActiveSnapshot(GetTransactionSnapshot());
1574
1575         if (!(params->options & VACOPT_FULL))
1576         {
1577                 /*
1578                  * In lazy vacuum, we can set the PROC_IN_VACUUM flag, which lets
1579                  * other concurrent VACUUMs know that they can ignore this one while
1580                  * determining their OldestXmin.  (The reason we don't set it during a
1581                  * full VACUUM is exactly that we may have to run user-defined
1582                  * functions for functional indexes, and we want to make sure that if
1583                  * they use the snapshot set above, any tuples it requires can't get
1584                  * removed from other tables.  An index function that depends on the
1585                  * contents of other tables is arguably broken, but we won't break it
1586                  * here by violating transaction semantics.)
1587                  *
1588                  * We also set the VACUUM_FOR_WRAPAROUND flag, which is passed down by
1589                  * autovacuum; it's used to avoid canceling a vacuum that was invoked
1590                  * in an emergency.
1591                  *
1592                  * Note: these flags remain set until CommitTransaction or
1593                  * AbortTransaction.  We don't want to clear them until we reset
1594                  * MyPgXact->xid/xmin, else OldestXmin might appear to go backwards,
1595                  * which is probably Not Good.
1596                  */
1597                 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
1598                 MyPgXact->vacuumFlags |= PROC_IN_VACUUM;
1599                 if (params->is_wraparound)
1600                         MyPgXact->vacuumFlags |= PROC_VACUUM_FOR_WRAPAROUND;
1601                 LWLockRelease(ProcArrayLock);
1602         }
1603
1604         /*
1605          * Check for user-requested abort.  Note we want this to be inside a
1606          * transaction, so xact.c doesn't issue useless WARNING.
1607          */
1608         CHECK_FOR_INTERRUPTS();
1609
1610         /*
1611          * Determine the type of lock we want --- hard exclusive lock for a FULL
1612          * vacuum, but just ShareUpdateExclusiveLock for concurrent vacuum. Either
1613          * way, we can be sure that no other backend is vacuuming the same table.
1614          */
1615         lmode = (params->options & VACOPT_FULL) ?
1616                 AccessExclusiveLock : ShareUpdateExclusiveLock;
1617
1618         /* open the relation and get the appropriate lock on it */
1619         onerel = vacuum_open_relation(relid, relation, params->options,
1620                                                                   params->log_min_duration >= 0, lmode);
1621
1622         /* leave if relation could not be opened or locked */
1623         if (!onerel)
1624         {
1625                 PopActiveSnapshot();
1626                 CommitTransactionCommand();
1627                 return false;
1628         }
1629
1630         /*
1631          * Check if relation needs to be skipped based on ownership.  This check
1632          * happens also when building the relation list to vacuum for a manual
1633          * operation, and needs to be done additionally here as VACUUM could
1634          * happen across multiple transactions where relation ownership could have
1635          * changed in-between.  Make sure to only generate logs for VACUUM in this
1636          * case.
1637          */
1638         if (!vacuum_is_relation_owner(RelationGetRelid(onerel),
1639                                                                   onerel->rd_rel,
1640                                                                   params->options & VACOPT_VACUUM))
1641         {
1642                 relation_close(onerel, lmode);
1643                 PopActiveSnapshot();
1644                 CommitTransactionCommand();
1645                 return false;
1646         }
1647
1648         /*
1649          * Check that it's of a vacuumable relkind.
1650          */
1651         if (onerel->rd_rel->relkind != RELKIND_RELATION &&
1652                 onerel->rd_rel->relkind != RELKIND_MATVIEW &&
1653                 onerel->rd_rel->relkind != RELKIND_TOASTVALUE &&
1654                 onerel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
1655         {
1656                 ereport(WARNING,
1657                                 (errmsg("skipping \"%s\" --- cannot vacuum non-tables or special system tables",
1658                                                 RelationGetRelationName(onerel))));
1659                 relation_close(onerel, lmode);
1660                 PopActiveSnapshot();
1661                 CommitTransactionCommand();
1662                 return false;
1663         }
1664
1665         /*
1666          * Silently ignore tables that are temp tables of other backends ---
1667          * trying to vacuum these will lead to great unhappiness, since their
1668          * contents are probably not up-to-date on disk.  (We don't throw a
1669          * warning here; it would just lead to chatter during a database-wide
1670          * VACUUM.)
1671          */
1672         if (RELATION_IS_OTHER_TEMP(onerel))
1673         {
1674                 relation_close(onerel, lmode);
1675                 PopActiveSnapshot();
1676                 CommitTransactionCommand();
1677                 return false;
1678         }
1679
1680         /*
1681          * Silently ignore partitioned tables as there is no work to be done.  The
1682          * useful work is on their child partitions, which have been queued up for
1683          * us separately.
1684          */
1685         if (onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
1686         {
1687                 relation_close(onerel, lmode);
1688                 PopActiveSnapshot();
1689                 CommitTransactionCommand();
1690                 /* It's OK to proceed with ANALYZE on this table */
1691                 return true;
1692         }
1693
1694         /*
1695          * Get a session-level lock too. This will protect our access to the
1696          * relation across multiple transactions, so that we can vacuum the
1697          * relation's TOAST table (if any) secure in the knowledge that no one is
1698          * deleting the parent relation.
1699          *
1700          * NOTE: this cannot block, even if someone else is waiting for access,
1701          * because the lock manager knows that both lock requests are from the
1702          * same process.
1703          */
1704         onerelid = onerel->rd_lockInfo.lockRelId;
1705         LockRelationIdForSession(&onerelid, lmode);
1706
1707         /*
1708          * Remember the relation's TOAST relation for later, if the caller asked
1709          * us to process it.  In VACUUM FULL, though, the toast table is
1710          * automatically rebuilt by cluster_rel so we shouldn't recurse to it.
1711          */
1712         if (!(params->options & VACOPT_SKIPTOAST) && !(params->options & VACOPT_FULL))
1713                 toast_relid = onerel->rd_rel->reltoastrelid;
1714         else
1715                 toast_relid = InvalidOid;
1716
1717         /*
1718          * Switch to the table owner's userid, so that any index functions are run
1719          * as that user.  Also lock down security-restricted operations and
1720          * arrange to make GUC variable changes local to this command. (This is
1721          * unnecessary, but harmless, for lazy VACUUM.)
1722          */
1723         GetUserIdAndSecContext(&save_userid, &save_sec_context);
1724         SetUserIdAndSecContext(onerel->rd_rel->relowner,
1725                                                    save_sec_context | SECURITY_RESTRICTED_OPERATION);
1726         save_nestlevel = NewGUCNestLevel();
1727
1728         /*
1729          * Do the actual work --- either FULL or "lazy" vacuum
1730          */
1731         if (params->options & VACOPT_FULL)
1732         {
1733                 int                     cluster_options = 0;
1734
1735                 /* close relation before vacuuming, but hold lock until commit */
1736                 relation_close(onerel, NoLock);
1737                 onerel = NULL;
1738
1739                 if ((params->options & VACOPT_VERBOSE) != 0)
1740                         cluster_options |= CLUOPT_VERBOSE;
1741
1742                 /* VACUUM FULL is now a variant of CLUSTER; see cluster.c */
1743                 cluster_rel(relid, InvalidOid, cluster_options);
1744         }
1745         else
1746                 heap_vacuum_rel(onerel, params, vac_strategy);
1747
1748         /* Roll back any GUC changes executed by index functions */
1749         AtEOXact_GUC(false, save_nestlevel);
1750
1751         /* Restore userid and security context */
1752         SetUserIdAndSecContext(save_userid, save_sec_context);
1753
1754         /* all done with this class, but hold lock until commit */
1755         if (onerel)
1756                 relation_close(onerel, NoLock);
1757
1758         /*
1759          * Complete the transaction and free all temporary memory used.
1760          */
1761         PopActiveSnapshot();
1762         CommitTransactionCommand();
1763
1764         /*
1765          * If the relation has a secondary toast rel, vacuum that too while we
1766          * still hold the session lock on the master table.  Note however that
1767          * "analyze" will not get done on the toast table.  This is good, because
1768          * the toaster always uses hardcoded index access and statistics are
1769          * totally unimportant for toast relations.
1770          */
1771         if (toast_relid != InvalidOid)
1772                 vacuum_rel(toast_relid, NULL, params);
1773
1774         /*
1775          * Now release the session-level lock on the master table.
1776          */
1777         UnlockRelationIdForSession(&onerelid, lmode);
1778
1779         /* Report that we really did it. */
1780         return true;
1781 }
1782
1783
1784 /*
1785  * Open all the vacuumable indexes of the given relation, obtaining the
1786  * specified kind of lock on each.  Return an array of Relation pointers for
1787  * the indexes into *Irel, and the number of indexes into *nindexes.
1788  *
1789  * We consider an index vacuumable if it is marked insertable (indisready).
1790  * If it isn't, probably a CREATE INDEX CONCURRENTLY command failed early in
1791  * execution, and what we have is too corrupt to be processable.  We will
1792  * vacuum even if the index isn't indisvalid; this is important because in a
1793  * unique index, uniqueness checks will be performed anyway and had better not
1794  * hit dangling index pointers.
1795  */
1796 void
1797 vac_open_indexes(Relation relation, LOCKMODE lockmode,
1798                                  int *nindexes, Relation **Irel)
1799 {
1800         List       *indexoidlist;
1801         ListCell   *indexoidscan;
1802         int                     i;
1803
1804         Assert(lockmode != NoLock);
1805
1806         indexoidlist = RelationGetIndexList(relation);
1807
1808         /* allocate enough memory for all indexes */
1809         i = list_length(indexoidlist);
1810
1811         if (i > 0)
1812                 *Irel = (Relation *) palloc(i * sizeof(Relation));
1813         else
1814                 *Irel = NULL;
1815
1816         /* collect just the ready indexes */
1817         i = 0;
1818         foreach(indexoidscan, indexoidlist)
1819         {
1820                 Oid                     indexoid = lfirst_oid(indexoidscan);
1821                 Relation        indrel;
1822
1823                 indrel = index_open(indexoid, lockmode);
1824                 if (indrel->rd_index->indisready)
1825                         (*Irel)[i++] = indrel;
1826                 else
1827                         index_close(indrel, lockmode);
1828         }
1829
1830         *nindexes = i;
1831
1832         list_free(indexoidlist);
1833 }
1834
1835 /*
1836  * Release the resources acquired by vac_open_indexes.  Optionally release
1837  * the locks (say NoLock to keep 'em).
1838  */
1839 void
1840 vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
1841 {
1842         if (Irel == NULL)
1843                 return;
1844
1845         while (nindexes--)
1846         {
1847                 Relation        ind = Irel[nindexes];
1848
1849                 index_close(ind, lockmode);
1850         }
1851         pfree(Irel);
1852 }
1853
1854 /*
1855  * vacuum_delay_point --- check for interrupts and cost-based delay.
1856  *
1857  * This should be called in each major loop of VACUUM processing,
1858  * typically once per page processed.
1859  */
1860 void
1861 vacuum_delay_point(void)
1862 {
1863         /* Always check for interrupts */
1864         CHECK_FOR_INTERRUPTS();
1865
1866         /* Nap if appropriate */
1867         if (VacuumCostActive && !InterruptPending &&
1868                 VacuumCostBalance >= VacuumCostLimit)
1869         {
1870                 double          msec;
1871
1872                 msec = VacuumCostDelay * VacuumCostBalance / VacuumCostLimit;
1873                 if (msec > VacuumCostDelay * 4)
1874                         msec = VacuumCostDelay * 4;
1875
1876                 pg_usleep((long) (msec * 1000));
1877
1878                 VacuumCostBalance = 0;
1879
1880                 /* update balance values for workers */
1881                 AutoVacuumUpdateDelay();
1882
1883                 /* Might have gotten an interrupt while sleeping */
1884                 CHECK_FOR_INTERRUPTS();
1885         }
1886 }