granicus.if.org Git - postgresql/blob - src/backend/commands/vacuum.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * vacuum.c
   4  *        The postgres vacuum cleaner.
   5  *
   6  * This file now includes only control and dispatch code for VACUUM and
   7  * ANALYZE commands.  Regular VACUUM is implemented in vacuumlazy.c,
   8  * ANALYZE in analyze.c, and VACUUM FULL is a variant of CLUSTER, handled
   9  * in cluster.c.
  10  *
  11  *
  12  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
  13  * Portions Copyright (c) 1994, Regents of the University of California
  14  *
  15  *
  16  * IDENTIFICATION
  17  *        src/backend/commands/vacuum.c
  18  *
  19  *-------------------------------------------------------------------------
  20  */
  21 #include "postgres.h"
  22
  23 #include <math.h>
  24
  25 #include "access/clog.h"
  26 #include "access/commit_ts.h"
  27 #include "access/genam.h"
  28 #include "access/heapam.h"
  29 #include "access/htup_details.h"
  30 #include "access/multixact.h"
  31 #include "access/transam.h"
  32 #include "access/xact.h"
  33 #include "catalog/namespace.h"
  34 #include "catalog/pg_database.h"
  35 #include "catalog/pg_inherits_fn.h"
  36 #include "catalog/pg_namespace.h"
  37 #include "commands/cluster.h"
  38 #include "commands/vacuum.h"
  39 #include "miscadmin.h"
  40 #include "nodes/makefuncs.h"
  41 #include "pgstat.h"
  42 #include "postmaster/autovacuum.h"
  43 #include "storage/bufmgr.h"
  44 #include "storage/lmgr.h"
  45 #include "storage/proc.h"
  46 #include "storage/procarray.h"
  47 #include "utils/acl.h"
  48 #include "utils/fmgroids.h"
  49 #include "utils/guc.h"
  50 #include "utils/memutils.h"
  51 #include "utils/snapmgr.h"
  52 #include "utils/syscache.h"
  53 #include "utils/tqual.h"
  54
  55
  56 /*
  57  * GUC parameters
  58  */
  59 int                     vacuum_freeze_min_age;
  60 int                     vacuum_freeze_table_age;
  61 int                     vacuum_multixact_freeze_min_age;
  62 int                     vacuum_multixact_freeze_table_age;
  63
  64
  65 /* A few variables that don't seem worth passing around as parameters */
  66 static MemoryContext vac_context = NULL;
  67 static BufferAccessStrategy vac_strategy;
  68
  69
  70 /* non-export function prototypes */
  71 static List *expand_vacuum_rel(VacuumRelation *vrel);
  72 static List *get_all_vacuum_rels(void);
  73 static void vac_truncate_clog(TransactionId frozenXID,
  74                                   MultiXactId minMulti,
  75                                   TransactionId lastSaneFrozenXid,
  76                                   MultiXactId lastSaneMinMulti);
  77 static bool vacuum_rel(Oid relid, RangeVar *relation, int options,
  78                    VacuumParams *params);
  79
  80 /*
  81  * Primary entry point for manual VACUUM and ANALYZE commands
  82  *
  83  * This is mainly a preparation wrapper for the real operations that will
  84  * happen in vacuum().
  85  */
  86 void
  87 ExecVacuum(VacuumStmt *vacstmt, bool isTopLevel)
  88 {
  89         VacuumParams params;
  90
  91         /* sanity checks on options */
  92         Assert(vacstmt->options & (VACOPT_VACUUM | VACOPT_ANALYZE));
  93         Assert((vacstmt->options & VACOPT_VACUUM) ||
  94                    !(vacstmt->options & (VACOPT_FULL | VACOPT_FREEZE)));
  95         Assert(!(vacstmt->options & VACOPT_SKIPTOAST));
  96
  97         /*
  98          * Make sure VACOPT_ANALYZE is specified if any column lists are present.
  99          */
 100         if (!(vacstmt->options & VACOPT_ANALYZE))
 101         {
 102                 ListCell   *lc;
 103
 104                 foreach(lc, vacstmt->rels)
 105                 {
 106                         VacuumRelation *vrel = lfirst_node(VacuumRelation, lc);
 107
 108                         if (vrel->va_cols != NIL)
 109                                 ereport(ERROR,
 110                                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 111                                                  errmsg("ANALYZE option must be specified when a column list is provided")));
 112                 }
 113         }
 114
 115         /*
 116          * All freeze ages are zero if the FREEZE option is given; otherwise pass
 117          * them as -1 which means to use the default values.
 118          */
 119         if (vacstmt->options & VACOPT_FREEZE)
 120         {
 121                 params.freeze_min_age = 0;
 122                 params.freeze_table_age = 0;
 123                 params.multixact_freeze_min_age = 0;
 124                 params.multixact_freeze_table_age = 0;
 125         }
 126         else
 127         {
 128                 params.freeze_min_age = -1;
 129                 params.freeze_table_age = -1;
 130                 params.multixact_freeze_min_age = -1;
 131                 params.multixact_freeze_table_age = -1;
 132         }
 133
 134         /* user-invoked vacuum is never "for wraparound" */
 135         params.is_wraparound = false;
 136
 137         /* user-invoked vacuum never uses this parameter */
 138         params.log_min_duration = -1;
 139
 140         /* Now go through the common routine */
 141         vacuum(vacstmt->options, vacstmt->rels, &params, NULL, isTopLevel);
 142 }
 143
 144 /*
 145  * Internal entry point for VACUUM and ANALYZE commands.
 146  *
 147  * options is a bitmask of VacuumOption flags, indicating what to do.
 148  *
 149  * relations, if not NIL, is a list of VacuumRelation to process; otherwise,
 150  * we process all relevant tables in the database.  For each VacuumRelation,
 151  * if a valid OID is supplied, the table with that OID is what to process;
 152  * otherwise, the VacuumRelation's RangeVar indicates what to process.
 153  *
 154  * params contains a set of parameters that can be used to customize the
 155  * behavior.
 156  *
 157  * bstrategy is normally given as NULL, but in autovacuum it can be passed
 158  * in to use the same buffer strategy object across multiple vacuum() calls.
 159  *
 160  * isTopLevel should be passed down from ProcessUtility.
 161  *
 162  * It is the caller's responsibility that all parameters are allocated in a
 163  * memory context that will not disappear at transaction commit.
 164  */
 165 void
 166 vacuum(int options, List *relations, VacuumParams *params,
 167            BufferAccessStrategy bstrategy, bool isTopLevel)
 168 {
 169         static bool in_vacuum = false;
 170
 171         const char *stmttype;
 172         volatile bool in_outer_xact,
 173                                 use_own_xacts;
 174
 175         Assert(params != NULL);
 176
 177         stmttype = (options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE";
 178
 179         /*
 180          * We cannot run VACUUM inside a user transaction block; if we were inside
 181          * a transaction, then our commit- and start-transaction-command calls
 182          * would not have the intended effect!  There are numerous other subtle
 183          * dependencies on this, too.
 184          *
 185          * ANALYZE (without VACUUM) can run either way.
 186          */
 187         if (options & VACOPT_VACUUM)
 188         {
 189                 PreventTransactionChain(isTopLevel, stmttype);
 190                 in_outer_xact = false;
 191         }
 192         else
 193                 in_outer_xact = IsInTransactionChain(isTopLevel);
 194
 195         /*
 196          * Due to static variables vac_context, anl_context and vac_strategy,
 197          * vacuum() is not reentrant.  This matters when VACUUM FULL or ANALYZE
 198          * calls a hostile index expression that itself calls ANALYZE.
 199          */
 200         if (in_vacuum)
 201                 ereport(ERROR,
 202                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 203                                  errmsg("%s cannot be executed from VACUUM or ANALYZE",
 204                                                 stmttype)));
 205
 206         /*
 207          * Sanity check DISABLE_PAGE_SKIPPING option.
 208          */
 209         if ((options & VACOPT_FULL) != 0 &&
 210                 (options & VACOPT_DISABLE_PAGE_SKIPPING) != 0)
 211                 ereport(ERROR,
 212                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 213                                  errmsg("VACUUM option DISABLE_PAGE_SKIPPING cannot be used with FULL")));
 214
 215         /*
 216          * Send info about dead objects to the statistics collector, unless we are
 217          * in autovacuum --- autovacuum.c does this for itself.
 218          */
 219         if ((options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess())
 220                 pgstat_vacuum_stat();
 221
 222         /*
 223          * Create special memory context for cross-transaction storage.
 224          *
 225          * Since it is a child of PortalContext, it will go away eventually even
 226          * if we suffer an error; there's no need for special abort cleanup logic.
 227          */
 228         vac_context = AllocSetContextCreate(PortalContext,
 229                                                                                 "Vacuum",
 230                                                                                 ALLOCSET_DEFAULT_SIZES);
 231
 232         /*
 233          * If caller didn't give us a buffer strategy object, make one in the
 234          * cross-transaction memory context.
 235          */
 236         if (bstrategy == NULL)
 237         {
 238                 MemoryContext old_context = MemoryContextSwitchTo(vac_context);
 239
 240                 bstrategy = GetAccessStrategy(BAS_VACUUM);
 241                 MemoryContextSwitchTo(old_context);
 242         }
 243         vac_strategy = bstrategy;
 244
 245         /*
 246          * Build list of relation(s) to process, putting any new data in
 247          * vac_context for safekeeping.
 248          */
 249         if (relations != NIL)
 250         {
 251                 List       *newrels = NIL;
 252                 ListCell   *lc;
 253
 254                 foreach(lc, relations)
 255                 {
 256                         VacuumRelation *vrel = lfirst_node(VacuumRelation, lc);
 257                         List       *sublist;
 258                         MemoryContext old_context;
 259
 260                         sublist = expand_vacuum_rel(vrel);
 261                         old_context = MemoryContextSwitchTo(vac_context);
 262                         newrels = list_concat(newrels, sublist);
 263                         MemoryContextSwitchTo(old_context);
 264                 }
 265                 relations = newrels;
 266         }
 267         else
 268                 relations = get_all_vacuum_rels();
 269
 270         /*
 271          * Decide whether we need to start/commit our own transactions.
 272          *
 273          * For VACUUM (with or without ANALYZE): always do so, so that we can
 274          * release locks as soon as possible.  (We could possibly use the outer
 275          * transaction for a one-table VACUUM, but handling TOAST tables would be
 276          * problematic.)
 277          *
 278          * For ANALYZE (no VACUUM): if inside a transaction block, we cannot
 279          * start/commit our own transactions.  Also, there's no need to do so if
 280          * only processing one relation.  For multiple relations when not within a
 281          * transaction block, and also in an autovacuum worker, use own
 282          * transactions so we can release locks sooner.
 283          */
 284         if (options & VACOPT_VACUUM)
 285                 use_own_xacts = true;
 286         else
 287         {
 288                 Assert(options & VACOPT_ANALYZE);
 289                 if (IsAutoVacuumWorkerProcess())
 290                         use_own_xacts = true;
 291                 else if (in_outer_xact)
 292                         use_own_xacts = false;
 293                 else if (list_length(relations) > 1)
 294                         use_own_xacts = true;
 295                 else
 296                         use_own_xacts = false;
 297         }
 298
 299         /*
 300          * vacuum_rel expects to be entered with no transaction active; it will
 301          * start and commit its own transaction.  But we are called by an SQL
 302          * command, and so we are executing inside a transaction already. We
 303          * commit the transaction started in PostgresMain() here, and start
 304          * another one before exiting to match the commit waiting for us back in
 305          * PostgresMain().
 306          */
 307         if (use_own_xacts)
 308         {
 309                 Assert(!in_outer_xact);
 310
 311                 /* ActiveSnapshot is not set by autovacuum */
 312                 if (ActiveSnapshotSet())
 313                         PopActiveSnapshot();
 314
 315                 /* matches the StartTransaction in PostgresMain() */
 316                 CommitTransactionCommand();
 317         }
 318
 319         /* Turn vacuum cost accounting on or off, and set/clear in_vacuum */
 320         PG_TRY();
 321         {
 322                 ListCell   *cur;
 323
 324                 in_vacuum = true;
 325                 VacuumCostActive = (VacuumCostDelay > 0);
 326                 VacuumCostBalance = 0;
 327                 VacuumPageHit = 0;
 328                 VacuumPageMiss = 0;
 329                 VacuumPageDirty = 0;
 330
 331                 /*
 332                  * Loop to process each selected relation.
 333                  */
 334                 foreach(cur, relations)
 335                 {
 336                         VacuumRelation *vrel = lfirst_node(VacuumRelation, cur);
 337
 338                         if (options & VACOPT_VACUUM)
 339                         {
 340                                 if (!vacuum_rel(vrel->oid, vrel->relation, options, params))
 341                                         continue;
 342                         }
 343
 344                         if (options & VACOPT_ANALYZE)
 345                         {
 346                                 /*
 347                                  * If using separate xacts, start one for analyze. Otherwise,
 348                                  * we can use the outer transaction.
 349                                  */
 350                                 if (use_own_xacts)
 351                                 {
 352                                         StartTransactionCommand();
 353                                         /* functions in indexes may want a snapshot set */
 354                                         PushActiveSnapshot(GetTransactionSnapshot());
 355                                 }
 356
 357                                 analyze_rel(vrel->oid, vrel->relation, options, params,
 358                                                         vrel->va_cols, in_outer_xact, vac_strategy);
 359
 360                                 if (use_own_xacts)
 361                                 {
 362                                         PopActiveSnapshot();
 363                                         CommitTransactionCommand();
 364                                 }
 365                         }
 366                 }
 367         }
 368         PG_CATCH();
 369         {
 370                 in_vacuum = false;
 371                 VacuumCostActive = false;
 372                 PG_RE_THROW();
 373         }
 374         PG_END_TRY();
 375
 376         in_vacuum = false;
 377         VacuumCostActive = false;
 378
 379         /*
 380          * Finish up processing.
 381          */
 382         if (use_own_xacts)
 383         {
 384                 /* here, we are not in a transaction */
 385
 386                 /*
 387                  * This matches the CommitTransaction waiting for us in
 388                  * PostgresMain().
 389                  */
 390                 StartTransactionCommand();
 391         }
 392
 393         if ((options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess())
 394         {
 395                 /*
 396                  * Update pg_database.datfrozenxid, and truncate pg_xact if possible.
 397                  * (autovacuum.c does this for itself.)
 398                  */
 399                 vac_update_datfrozenxid();
 400         }
 401
 402         /*
 403          * Clean up working storage --- note we must do this after
 404          * StartTransactionCommand, else we might be trying to delete the active
 405          * context!
 406          */
 407         MemoryContextDelete(vac_context);
 408         vac_context = NULL;
 409 }
 410
 411 /*
 412  * Given a VacuumRelation, fill in the table OID if it wasn't specified,
 413  * and optionally add VacuumRelations for partitions of the table.
 414  *
 415  * If a VacuumRelation does not have an OID supplied and is a partitioned
 416  * table, an extra entry will be added to the output for each partition.
 417  * Presently, only autovacuum supplies OIDs when calling vacuum(), and
 418  * it does not want us to expand partitioned tables.
 419  *
 420  * We take care not to modify the input data structure, but instead build
 421  * new VacuumRelation(s) to return.  (But note that they will reference
 422  * unmodified parts of the input, eg column lists.)  New data structures
 423  * are made in vac_context.
 424  */
 425 static List *
 426 expand_vacuum_rel(VacuumRelation *vrel)
 427 {
 428         List       *vacrels = NIL;
 429         MemoryContext oldcontext;
 430
 431         /* If caller supplied OID, there's nothing we need do here. */
 432         if (OidIsValid(vrel->oid))
 433         {
 434                 oldcontext = MemoryContextSwitchTo(vac_context);
 435                 vacrels = lappend(vacrels, vrel);
 436                 MemoryContextSwitchTo(oldcontext);
 437         }
 438         else
 439         {
 440                 /* Process a specific relation, and possibly partitions thereof */
 441                 Oid                     relid;
 442                 HeapTuple       tuple;
 443                 Form_pg_class classForm;
 444                 bool            include_parts;
 445
 446                 /*
 447                  * We transiently take AccessShareLock to protect the syscache lookup
 448                  * below, as well as find_all_inheritors's expectation that the caller
 449                  * holds some lock on the starting relation.
 450                  */
 451                 relid = RangeVarGetRelid(vrel->relation, AccessShareLock, false);
 452
 453                 /*
 454                  * Make a returnable VacuumRelation for this rel.
 455                  */
 456                 oldcontext = MemoryContextSwitchTo(vac_context);
 457                 vacrels = lappend(vacrels, makeVacuumRelation(vrel->relation,
 458                                                                                                           relid,
 459                                                                                                           vrel->va_cols));
 460                 MemoryContextSwitchTo(oldcontext);
 461
 462                 /*
 463                  * To check whether the relation is a partitioned table, fetch its
 464                  * syscache entry.
 465                  */
 466                 tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
 467                 if (!HeapTupleIsValid(tuple))
 468                         elog(ERROR, "cache lookup failed for relation %u", relid);
 469                 classForm = (Form_pg_class) GETSTRUCT(tuple);
 470                 include_parts = (classForm->relkind == RELKIND_PARTITIONED_TABLE);
 471                 ReleaseSysCache(tuple);
 472
 473                 /*
 474                  * If it is, make relation list entries for its partitions.  Note that
 475                  * the list returned by find_all_inheritors() includes the passed-in
 476                  * OID, so we have to skip that.  There's no point in taking locks on
 477                  * the individual partitions yet, and doing so would just add
 478                  * unnecessary deadlock risk.
 479                  */
 480                 if (include_parts)
 481                 {
 482                         List       *part_oids = find_all_inheritors(relid, NoLock, NULL);
 483                         ListCell   *part_lc;
 484
 485                         foreach(part_lc, part_oids)
 486                         {
 487                                 Oid                     part_oid = lfirst_oid(part_lc);
 488
 489                                 if (part_oid == relid)
 490                                         continue;       /* ignore original table */
 491
 492                                 /*
 493                                  * We omit a RangeVar since it wouldn't be appropriate to
 494                                  * complain about failure to open one of these relations
 495                                  * later.
 496                                  */
 497                                 oldcontext = MemoryContextSwitchTo(vac_context);
 498                                 vacrels = lappend(vacrels, makeVacuumRelation(NULL,
 499                                                                                                                           part_oid,
 500                                                                                                                           vrel->va_cols));
 501                                 MemoryContextSwitchTo(oldcontext);
 502                         }
 503                 }
 504
 505                 /*
 506                  * Release lock again.  This means that by the time we actually try to
 507                  * process the table, it might be gone or renamed.  In the former case
 508                  * we'll silently ignore it; in the latter case we'll process it
 509                  * anyway, but we must beware that the RangeVar doesn't necessarily
 510                  * identify it anymore.  This isn't ideal, perhaps, but there's little
 511                  * practical alternative, since we're typically going to commit this
 512                  * transaction and begin a new one between now and then.  Moreover,
 513                  * holding locks on multiple relations would create significant risk
 514                  * of deadlock.
 515                  */
 516                 UnlockRelationOid(relid, AccessShareLock);
 517         }
 518
 519         return vacrels;
 520 }
 521
 522 /*
 523  * Construct a list of VacuumRelations for all vacuumable rels in
 524  * the current database.  The list is built in vac_context.
 525  */
 526 static List *
 527 get_all_vacuum_rels(void)
 528 {
 529         List       *vacrels = NIL;
 530         Relation        pgclass;
 531         HeapScanDesc scan;
 532         HeapTuple       tuple;
 533
 534         pgclass = heap_open(RelationRelationId, AccessShareLock);
 535
 536         scan = heap_beginscan_catalog(pgclass, 0, NULL);
 537
 538         while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
 539         {
 540                 Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
 541                 MemoryContext oldcontext;
 542
 543                 /*
 544                  * We include partitioned tables here; depending on which operation is
 545                  * to be performed, caller will decide whether to process or ignore
 546                  * them.
 547                  */
 548                 if (classForm->relkind != RELKIND_RELATION &&
 549                         classForm->relkind != RELKIND_MATVIEW &&
 550                         classForm->relkind != RELKIND_PARTITIONED_TABLE)
 551                         continue;
 552
 553                 /*
 554                  * Build VacuumRelation(s) specifying the table OIDs to be processed.
 555                  * We omit a RangeVar since it wouldn't be appropriate to complain
 556                  * about failure to open one of these relations later.
 557                  */
 558                 oldcontext = MemoryContextSwitchTo(vac_context);
 559                 vacrels = lappend(vacrels, makeVacuumRelation(NULL,
 560                                                                                                           HeapTupleGetOid(tuple),
 561                                                                                                           NIL));
 562                 MemoryContextSwitchTo(oldcontext);
 563         }
 564
 565         heap_endscan(scan);
 566         heap_close(pgclass, AccessShareLock);
 567
 568         return vacrels;
 569 }
 570
 571 /*
 572  * vacuum_set_xid_limits() -- compute oldest-Xmin and freeze cutoff points
 573  *
 574  * The output parameters are:
 575  * - oldestXmin is the cutoff value used to distinguish whether tuples are
 576  *       DEAD or RECENTLY_DEAD (see HeapTupleSatisfiesVacuum).
 577  * - freezeLimit is the Xid below which all Xids are replaced by
 578  *       FrozenTransactionId during vacuum.
 579  * - xidFullScanLimit (computed from table_freeze_age parameter)
 580  *       represents a minimum Xid value; a table whose relfrozenxid is older than
 581  *       this will have a full-table vacuum applied to it, to freeze tuples across
 582  *       the whole table.  Vacuuming a table younger than this value can use a
 583  *       partial scan.
 584  * - multiXactCutoff is the value below which all MultiXactIds are removed from
 585  *       Xmax.
 586  * - mxactFullScanLimit is a value against which a table's relminmxid value is
 587  *       compared to produce a full-table vacuum, as with xidFullScanLimit.
 588  *
 589  * xidFullScanLimit and mxactFullScanLimit can be passed as NULL if caller is
 590  * not interested.
 591  */
 592 void
 593 vacuum_set_xid_limits(Relation rel,
 594                                           int freeze_min_age,
 595                                           int freeze_table_age,
 596                                           int multixact_freeze_min_age,
 597                                           int multixact_freeze_table_age,
 598                                           TransactionId *oldestXmin,
 599                                           TransactionId *freezeLimit,
 600                                           TransactionId *xidFullScanLimit,
 601                                           MultiXactId *multiXactCutoff,
 602                                           MultiXactId *mxactFullScanLimit)
 603 {
 604         int                     freezemin;
 605         int                     mxid_freezemin;
 606         int                     effective_multixact_freeze_max_age;
 607         TransactionId limit;
 608         TransactionId safeLimit;
 609         MultiXactId mxactLimit;
 610         MultiXactId safeMxactLimit;
 611
 612         /*
 613          * We can always ignore processes running lazy vacuum.  This is because we
 614          * use these values only for deciding which tuples we must keep in the
 615          * tables.  Since lazy vacuum doesn't write its XID anywhere, it's safe to
 616          * ignore it.  In theory it could be problematic to ignore lazy vacuums in
 617          * a full vacuum, but keep in mind that only one vacuum process can be
 618          * working on a particular table at any time, and that each vacuum is
 619          * always an independent transaction.
 620          */
 621         *oldestXmin =
 622                 TransactionIdLimitedForOldSnapshots(GetOldestXmin(rel, PROCARRAY_FLAGS_VACUUM), rel);
 623
 624         Assert(TransactionIdIsNormal(*oldestXmin));
 625
 626         /*
 627          * Determine the minimum freeze age to use: as specified by the caller, or
 628          * vacuum_freeze_min_age, but in any case not more than half
 629          * autovacuum_freeze_max_age, so that autovacuums to prevent XID
 630          * wraparound won't occur too frequently.
 631          */
 632         freezemin = freeze_min_age;
 633         if (freezemin < 0)
 634                 freezemin = vacuum_freeze_min_age;
 635         freezemin = Min(freezemin, autovacuum_freeze_max_age / 2);
 636         Assert(freezemin >= 0);
 637
 638         /*
 639          * Compute the cutoff XID, being careful not to generate a "permanent" XID
 640          */
 641         limit = *oldestXmin - freezemin;
 642         if (!TransactionIdIsNormal(limit))
 643                 limit = FirstNormalTransactionId;
 644
 645         /*
 646          * If oldestXmin is very far back (in practice, more than
 647          * autovacuum_freeze_max_age / 2 XIDs old), complain and force a minimum
 648          * freeze age of zero.
 649          */
 650         safeLimit = ReadNewTransactionId() - autovacuum_freeze_max_age;
 651         if (!TransactionIdIsNormal(safeLimit))
 652                 safeLimit = FirstNormalTransactionId;
 653
 654         if (TransactionIdPrecedes(limit, safeLimit))
 655         {
 656                 ereport(WARNING,
 657                                 (errmsg("oldest xmin is far in the past"),
 658                                  errhint("Close open transactions soon to avoid wraparound problems.")));
 659                 limit = *oldestXmin;
 660         }
 661
 662         *freezeLimit = limit;
 663
 664         /*
 665          * Compute the multixact age for which freezing is urgent.  This is
 666          * normally autovacuum_multixact_freeze_max_age, but may be less if we are
 667          * short of multixact member space.
 668          */
 669         effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold();
 670
 671         /*
 672          * Determine the minimum multixact freeze age to use: as specified by
 673          * caller, or vacuum_multixact_freeze_min_age, but in any case not more
 674          * than half effective_multixact_freeze_max_age, so that autovacuums to
 675          * prevent MultiXact wraparound won't occur too frequently.
 676          */
 677         mxid_freezemin = multixact_freeze_min_age;
 678         if (mxid_freezemin < 0)
 679                 mxid_freezemin = vacuum_multixact_freeze_min_age;
 680         mxid_freezemin = Min(mxid_freezemin,
 681                                                  effective_multixact_freeze_max_age / 2);
 682         Assert(mxid_freezemin >= 0);
 683
 684         /* compute the cutoff multi, being careful to generate a valid value */
 685         mxactLimit = GetOldestMultiXactId() - mxid_freezemin;
 686         if (mxactLimit < FirstMultiXactId)
 687                 mxactLimit = FirstMultiXactId;
 688
 689         safeMxactLimit =
 690                 ReadNextMultiXactId() - effective_multixact_freeze_max_age;
 691         if (safeMxactLimit < FirstMultiXactId)
 692                 safeMxactLimit = FirstMultiXactId;
 693
 694         if (MultiXactIdPrecedes(mxactLimit, safeMxactLimit))
 695         {
 696                 ereport(WARNING,
 697                                 (errmsg("oldest multixact is far in the past"),
 698                                  errhint("Close open transactions with multixacts soon to avoid wraparound problems.")));
 699                 mxactLimit = safeMxactLimit;
 700         }
 701
 702         *multiXactCutoff = mxactLimit;
 703
 704         if (xidFullScanLimit != NULL)
 705         {
 706                 int                     freezetable;
 707
 708                 Assert(mxactFullScanLimit != NULL);
 709
 710                 /*
 711                  * Determine the table freeze age to use: as specified by the caller,
 712                  * or vacuum_freeze_table_age, but in any case not more than
 713                  * autovacuum_freeze_max_age * 0.95, so that if you have e.g nightly
 714                  * VACUUM schedule, the nightly VACUUM gets a chance to freeze tuples
 715                  * before anti-wraparound autovacuum is launched.
 716                  */
 717                 freezetable = freeze_table_age;
 718                 if (freezetable < 0)
 719                         freezetable = vacuum_freeze_table_age;
 720                 freezetable = Min(freezetable, autovacuum_freeze_max_age * 0.95);
 721                 Assert(freezetable >= 0);
 722
 723                 /*
 724                  * Compute XID limit causing a full-table vacuum, being careful not to
 725                  * generate a "permanent" XID.
 726                  */
 727                 limit = ReadNewTransactionId() - freezetable;
 728                 if (!TransactionIdIsNormal(limit))
 729                         limit = FirstNormalTransactionId;
 730
 731                 *xidFullScanLimit = limit;
 732
 733                 /*
 734                  * Similar to the above, determine the table freeze age to use for
 735                  * multixacts: as specified by the caller, or
 736                  * vacuum_multixact_freeze_table_age, but in any case not more than
 737                  * autovacuum_multixact_freeze_table_age * 0.95, so that if you have
 738                  * e.g. nightly VACUUM schedule, the nightly VACUUM gets a chance to
 739                  * freeze multixacts before anti-wraparound autovacuum is launched.
 740                  */
 741                 freezetable = multixact_freeze_table_age;
 742                 if (freezetable < 0)
 743                         freezetable = vacuum_multixact_freeze_table_age;
 744                 freezetable = Min(freezetable,
 745                                                   effective_multixact_freeze_max_age * 0.95);
 746                 Assert(freezetable >= 0);
 747
 748                 /*
 749                  * Compute MultiXact limit causing a full-table vacuum, being careful
 750                  * to generate a valid MultiXact value.
 751                  */
 752                 mxactLimit = ReadNextMultiXactId() - freezetable;
 753                 if (mxactLimit < FirstMultiXactId)
 754                         mxactLimit = FirstMultiXactId;
 755
 756                 *mxactFullScanLimit = mxactLimit;
 757         }
 758         else
 759         {
 760                 Assert(mxactFullScanLimit == NULL);
 761         }
 762 }
 763
 764 /*
 765  * vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples
 766  *
 767  *              If we scanned the whole relation then we should just use the count of
 768  *              live tuples seen; but if we did not, we should not trust the count
 769  *              unreservedly, especially not in VACUUM, which may have scanned a quite
 770  *              nonrandom subset of the table.  When we have only partial information,
 771  *              we take the old value of pg_class.reltuples as a measurement of the
 772  *              tuple density in the unscanned pages.
 773  *
 774  *              This routine is shared by VACUUM and ANALYZE.
 775  */
 776 double
 777 vac_estimate_reltuples(Relation relation, bool is_analyze,
 778                                            BlockNumber total_pages,
 779                                            BlockNumber scanned_pages,
 780                                            double scanned_tuples)
 781 {
 782         BlockNumber old_rel_pages = relation->rd_rel->relpages;
 783         double          old_rel_tuples = relation->rd_rel->reltuples;
 784         double          old_density;
 785         double          new_density;
 786         double          multiplier;
 787         double          updated_density;
 788
 789         /* If we did scan the whole table, just use the count as-is */
 790         if (scanned_pages >= total_pages)
 791                 return scanned_tuples;
 792
 793         /*
 794          * If scanned_pages is zero but total_pages isn't, keep the existing value
 795          * of reltuples.  (Note: callers should avoid updating the pg_class
 796          * statistics in this situation, since no new information has been
 797          * provided.)
 798          */
 799         if (scanned_pages == 0)
 800                 return old_rel_tuples;
 801
 802         /*
 803          * If old value of relpages is zero, old density is indeterminate; we
 804          * can't do much except scale up scanned_tuples to match total_pages.
 805          */
 806         if (old_rel_pages == 0)
 807                 return floor((scanned_tuples / scanned_pages) * total_pages + 0.5);
 808
 809         /*
 810          * Okay, we've covered the corner cases.  The normal calculation is to
 811          * convert the old measurement to a density (tuples per page), then update
 812          * the density using an exponential-moving-average approach, and finally
 813          * compute reltuples as updated_density * total_pages.
 814          *
 815          * For ANALYZE, the moving average multiplier is just the fraction of the
 816          * table's pages we scanned.  This is equivalent to assuming that the
 817          * tuple density in the unscanned pages didn't change.  Of course, it
 818          * probably did, if the new density measurement is different. But over
 819          * repeated cycles, the value of reltuples will converge towards the
 820          * correct value, if repeated measurements show the same new density.
 821          *
 822          * For VACUUM, the situation is a bit different: we have looked at a
 823          * nonrandom sample of pages, but we know for certain that the pages we
 824          * didn't look at are precisely the ones that haven't changed lately.
 825          * Thus, there is a reasonable argument for doing exactly the same thing
 826          * as for the ANALYZE case, that is use the old density measurement as the
 827          * value for the unscanned pages.
 828          *
 829          * This logic could probably use further refinement.
 830          */
 831         old_density = old_rel_tuples / old_rel_pages;
 832         new_density = scanned_tuples / scanned_pages;
 833         multiplier = (double) scanned_pages / (double) total_pages;
 834         updated_density = old_density + (new_density - old_density) * multiplier;
 835         return floor(updated_density * total_pages + 0.5);
 836 }
 837
 838
 839 /*
 840  *      vac_update_relstats() -- update statistics for one relation
 841  *
 842  *              Update the whole-relation statistics that are kept in its pg_class
 843  *              row.  There are additional stats that will be updated if we are
 844  *              doing ANALYZE, but we always update these stats.  This routine works
 845  *              for both index and heap relation entries in pg_class.
 846  *
 847  *              We violate transaction semantics here by overwriting the rel's
 848  *              existing pg_class tuple with the new values.  This is reasonably
 849  *              safe as long as we're sure that the new values are correct whether or
 850  *              not this transaction commits.  The reason for doing this is that if
 851  *              we updated these tuples in the usual way, vacuuming pg_class itself
 852  *              wouldn't work very well --- by the time we got done with a vacuum
 853  *              cycle, most of the tuples in pg_class would've been obsoleted.  Of
 854  *              course, this only works for fixed-size not-null columns, but these are.
 855  *
 856  *              Another reason for doing it this way is that when we are in a lazy
 857  *              VACUUM and have PROC_IN_VACUUM set, we mustn't do any regular updates.
 858  *              Somebody vacuuming pg_class might think they could delete a tuple
 859  *              marked with xmin = our xid.
 860  *
 861  *              In addition to fundamentally nontransactional statistics such as
 862  *              relpages and relallvisible, we try to maintain certain lazily-updated
 863  *              DDL flags such as relhasindex, by clearing them if no longer correct.
 864  *              It's safe to do this in VACUUM, which can't run in parallel with
 865  *              CREATE INDEX/RULE/TRIGGER and can't be part of a transaction block.
 866  *              However, it's *not* safe to do it in an ANALYZE that's within an
 867  *              outer transaction, because for example the current transaction might
 868  *              have dropped the last index; then we'd think relhasindex should be
 869  *              cleared, but if the transaction later rolls back this would be wrong.
 870  *              So we refrain from updating the DDL flags if we're inside an outer
 871  *              transaction.  This is OK since postponing the flag maintenance is
 872  *              always allowable.
 873  *
 874  *              This routine is shared by VACUUM and ANALYZE.
 875  */
 876 void
 877 vac_update_relstats(Relation relation,
 878                                         BlockNumber num_pages, double num_tuples,
 879                                         BlockNumber num_all_visible_pages,
 880                                         bool hasindex, TransactionId frozenxid,
 881                                         MultiXactId minmulti,
 882                                         bool in_outer_xact)
 883 {
 884         Oid                     relid = RelationGetRelid(relation);
 885         Relation        rd;
 886         HeapTuple       ctup;
 887         Form_pg_class pgcform;
 888         bool            dirty;
 889
 890         rd = heap_open(RelationRelationId, RowExclusiveLock);
 891
 892         /* Fetch a copy of the tuple to scribble on */
 893         ctup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
 894         if (!HeapTupleIsValid(ctup))
 895                 elog(ERROR, "pg_class entry for relid %u vanished during vacuuming",
 896                          relid);
 897         pgcform = (Form_pg_class) GETSTRUCT(ctup);
 898
 899         /* Apply statistical updates, if any, to copied tuple */
 900
 901         dirty = false;
 902         if (pgcform->relpages != (int32) num_pages)
 903         {
 904                 pgcform->relpages = (int32) num_pages;
 905                 dirty = true;
 906         }
 907         if (pgcform->reltuples != (float4) num_tuples)
 908         {
 909                 pgcform->reltuples = (float4) num_tuples;
 910                 dirty = true;
 911         }
 912         if (pgcform->relallvisible != (int32) num_all_visible_pages)
 913         {
 914                 pgcform->relallvisible = (int32) num_all_visible_pages;
 915                 dirty = true;
 916         }
 917
 918         /* Apply DDL updates, but not inside an outer transaction (see above) */
 919
 920         if (!in_outer_xact)
 921         {
 922                 /*
 923                  * If we didn't find any indexes, reset relhasindex.
 924                  */
 925                 if (pgcform->relhasindex && !hasindex)
 926                 {
 927                         pgcform->relhasindex = false;
 928                         dirty = true;
 929                 }
 930
 931                 /*
 932                  * If we have discovered that there are no indexes, then there's no
 933                  * primary key either.  This could be done more thoroughly...
 934                  */
 935                 if (pgcform->relhaspkey && !hasindex)
 936                 {
 937                         pgcform->relhaspkey = false;
 938                         dirty = true;
 939                 }
 940
 941                 /* We also clear relhasrules and relhastriggers if needed */
 942                 if (pgcform->relhasrules && relation->rd_rules == NULL)
 943                 {
 944                         pgcform->relhasrules = false;
 945                         dirty = true;
 946                 }
 947                 if (pgcform->relhastriggers && relation->trigdesc == NULL)
 948                 {
 949                         pgcform->relhastriggers = false;
 950                         dirty = true;
 951                 }
 952         }
 953
 954         /*
 955          * Update relfrozenxid, unless caller passed InvalidTransactionId
 956          * indicating it has no new data.
 957          *
 958          * Ordinarily, we don't let relfrozenxid go backwards: if things are
 959          * working correctly, the only way the new frozenxid could be older would
 960          * be if a previous VACUUM was done with a tighter freeze_min_age, in
 961          * which case we don't want to forget the work it already did.  However,
 962          * if the stored relfrozenxid is "in the future", then it must be corrupt
 963          * and it seems best to overwrite it with the cutoff we used this time.
 964          * This should match vac_update_datfrozenxid() concerning what we consider
 965          * to be "in the future".
 966          */
 967         if (TransactionIdIsNormal(frozenxid) &&
 968                 pgcform->relfrozenxid != frozenxid &&
 969                 (TransactionIdPrecedes(pgcform->relfrozenxid, frozenxid) ||
 970                  TransactionIdPrecedes(ReadNewTransactionId(),
 971                                                            pgcform->relfrozenxid)))
 972         {
 973                 pgcform->relfrozenxid = frozenxid;
 974                 dirty = true;
 975         }
 976
 977         /* Similarly for relminmxid */
 978         if (MultiXactIdIsValid(minmulti) &&
 979                 pgcform->relminmxid != minmulti &&
 980                 (MultiXactIdPrecedes(pgcform->relminmxid, minmulti) ||
 981                  MultiXactIdPrecedes(ReadNextMultiXactId(), pgcform->relminmxid)))
 982         {
 983                 pgcform->relminmxid = minmulti;
 984                 dirty = true;
 985         }
 986
 987         /* If anything changed, write out the tuple. */
 988         if (dirty)
 989                 heap_inplace_update(rd, ctup);
 990
 991         heap_close(rd, RowExclusiveLock);
 992 }
 993
 994
 995 /*
 996  *      vac_update_datfrozenxid() -- update pg_database.datfrozenxid for our DB
 997  *
 998  *              Update pg_database's datfrozenxid entry for our database to be the
 999  *              minimum of the pg_class.relfrozenxid values.
1000  *
1001  *              Similarly, update our datminmxid to be the minimum of the
1002  *              pg_class.relminmxid values.
1003  *
1004  *              If we are able to advance either pg_database value, also try to
1005  *              truncate pg_xact and pg_multixact.
1006  *
1007  *              We violate transaction semantics here by overwriting the database's
1008  *              existing pg_database tuple with the new values.  This is reasonably
1009  *              safe since the new values are correct whether or not this transaction
1010  *              commits.  As with vac_update_relstats, this avoids leaving dead tuples
1011  *              behind after a VACUUM.
1012  */
1013 void
1014 vac_update_datfrozenxid(void)
1015 {
1016         HeapTuple       tuple;
1017         Form_pg_database dbform;
1018         Relation        relation;
1019         SysScanDesc scan;
1020         HeapTuple       classTup;
1021         TransactionId newFrozenXid;
1022         MultiXactId newMinMulti;
1023         TransactionId lastSaneFrozenXid;
1024         MultiXactId lastSaneMinMulti;
1025         bool            bogus = false;
1026         bool            dirty = false;
1027
1028         /*
1029          * Initialize the "min" calculation with GetOldestXmin, which is a
1030          * reasonable approximation to the minimum relfrozenxid for not-yet-
1031          * committed pg_class entries for new tables; see AddNewRelationTuple().
1032          * So we cannot produce a wrong minimum by starting with this.
1033          */
1034         newFrozenXid = GetOldestXmin(NULL, PROCARRAY_FLAGS_VACUUM);
1035
1036         /*
1037          * Similarly, initialize the MultiXact "min" with the value that would be
1038          * used on pg_class for new tables.  See AddNewRelationTuple().
1039          */
1040         newMinMulti = GetOldestMultiXactId();
1041
1042         /*
1043          * Identify the latest relfrozenxid and relminmxid values that we could
1044          * validly see during the scan.  These are conservative values, but it's
1045          * not really worth trying to be more exact.
1046          */
1047         lastSaneFrozenXid = ReadNewTransactionId();
1048         lastSaneMinMulti = ReadNextMultiXactId();
1049
1050         /*
1051          * We must seqscan pg_class to find the minimum Xid, because there is no
1052          * index that can help us here.
1053          */
1054         relation = heap_open(RelationRelationId, AccessShareLock);
1055
1056         scan = systable_beginscan(relation, InvalidOid, false,
1057                                                           NULL, 0, NULL);
1058
1059         while ((classTup = systable_getnext(scan)) != NULL)
1060         {
1061                 Form_pg_class classForm = (Form_pg_class) GETSTRUCT(classTup);
1062
1063                 /*
1064                  * Only consider relations able to hold unfrozen XIDs (anything else
1065                  * should have InvalidTransactionId in relfrozenxid anyway.)
1066                  */
1067                 if (classForm->relkind != RELKIND_RELATION &&
1068                         classForm->relkind != RELKIND_MATVIEW &&
1069                         classForm->relkind != RELKIND_TOASTVALUE)
1070                         continue;
1071
1072                 Assert(TransactionIdIsNormal(classForm->relfrozenxid));
1073                 Assert(MultiXactIdIsValid(classForm->relminmxid));
1074
1075                 /*
1076                  * If things are working properly, no relation should have a
1077                  * relfrozenxid or relminmxid that is "in the future".  However, such
1078                  * cases have been known to arise due to bugs in pg_upgrade.  If we
1079                  * see any entries that are "in the future", chicken out and don't do
1080                  * anything.  This ensures we won't truncate clog before those
1081                  * relations have been scanned and cleaned up.
1082                  */
1083                 if (TransactionIdPrecedes(lastSaneFrozenXid, classForm->relfrozenxid) ||
1084                         MultiXactIdPrecedes(lastSaneMinMulti, classForm->relminmxid))
1085                 {
1086                         bogus = true;
1087                         break;
1088                 }
1089
1090                 if (TransactionIdPrecedes(classForm->relfrozenxid, newFrozenXid))
1091                         newFrozenXid = classForm->relfrozenxid;
1092
1093                 if (MultiXactIdPrecedes(classForm->relminmxid, newMinMulti))
1094                         newMinMulti = classForm->relminmxid;
1095         }
1096
1097         /* we're done with pg_class */
1098         systable_endscan(scan);
1099         heap_close(relation, AccessShareLock);
1100
1101         /* chicken out if bogus data found */
1102         if (bogus)
1103                 return;
1104
1105         Assert(TransactionIdIsNormal(newFrozenXid));
1106         Assert(MultiXactIdIsValid(newMinMulti));
1107
1108         /* Now fetch the pg_database tuple we need to update. */
1109         relation = heap_open(DatabaseRelationId, RowExclusiveLock);
1110
1111         /* Fetch a copy of the tuple to scribble on */
1112         tuple = SearchSysCacheCopy1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
1113         if (!HeapTupleIsValid(tuple))
1114                 elog(ERROR, "could not find tuple for database %u", MyDatabaseId);
1115         dbform = (Form_pg_database) GETSTRUCT(tuple);
1116
1117         /*
1118          * As in vac_update_relstats(), we ordinarily don't want to let
1119          * datfrozenxid go backward; but if it's "in the future" then it must be
1120          * corrupt and it seems best to overwrite it.
1121          */
1122         if (dbform->datfrozenxid != newFrozenXid &&
1123                 (TransactionIdPrecedes(dbform->datfrozenxid, newFrozenXid) ||
1124                  TransactionIdPrecedes(lastSaneFrozenXid, dbform->datfrozenxid)))
1125         {
1126                 dbform->datfrozenxid = newFrozenXid;
1127                 dirty = true;
1128         }
1129         else
1130                 newFrozenXid = dbform->datfrozenxid;
1131
1132         /* Ditto for datminmxid */
1133         if (dbform->datminmxid != newMinMulti &&
1134                 (MultiXactIdPrecedes(dbform->datminmxid, newMinMulti) ||
1135                  MultiXactIdPrecedes(lastSaneMinMulti, dbform->datminmxid)))
1136         {
1137                 dbform->datminmxid = newMinMulti;
1138                 dirty = true;
1139         }
1140         else
1141                 newMinMulti = dbform->datminmxid;
1142
1143         if (dirty)
1144                 heap_inplace_update(relation, tuple);
1145
1146         heap_freetuple(tuple);
1147         heap_close(relation, RowExclusiveLock);
1148
1149         /*
1150          * If we were able to advance datfrozenxid or datminmxid, see if we can
1151          * truncate pg_xact and/or pg_multixact.  Also do it if the shared
1152          * XID-wrap-limit info is stale, since this action will update that too.
1153          */
1154         if (dirty || ForceTransactionIdLimitUpdate())
1155                 vac_truncate_clog(newFrozenXid, newMinMulti,
1156                                                   lastSaneFrozenXid, lastSaneMinMulti);
1157 }
1158
1159
1160 /*
1161  *      vac_truncate_clog() -- attempt to truncate the commit log
1162  *
1163  *              Scan pg_database to determine the system-wide oldest datfrozenxid,
1164  *              and use it to truncate the transaction commit log (pg_xact).
1165  *              Also update the XID wrap limit info maintained by varsup.c.
1166  *              Likewise for datminmxid.
1167  *
1168  *              The passed frozenXID and minMulti are the updated values for my own
1169  *              pg_database entry. They're used to initialize the "min" calculations.
1170  *              The caller also passes the "last sane" XID and MXID, since it has
1171  *              those at hand already.
1172  *
1173  *              This routine is only invoked when we've managed to change our
1174  *              DB's datfrozenxid/datminmxid values, or we found that the shared
1175  *              XID-wrap-limit info is stale.
1176  */
1177 static void
1178 vac_truncate_clog(TransactionId frozenXID,
1179                                   MultiXactId minMulti,
1180                                   TransactionId lastSaneFrozenXid,
1181                                   MultiXactId lastSaneMinMulti)
1182 {
1183         TransactionId nextXID = ReadNewTransactionId();
1184         Relation        relation;
1185         HeapScanDesc scan;
1186         HeapTuple       tuple;
1187         Oid                     oldestxid_datoid;
1188         Oid                     minmulti_datoid;
1189         bool            bogus = false;
1190         bool            frozenAlreadyWrapped = false;
1191
1192         /* init oldest datoids to sync with my frozenXID/minMulti values */
1193         oldestxid_datoid = MyDatabaseId;
1194         minmulti_datoid = MyDatabaseId;
1195
1196         /*
1197          * Scan pg_database to compute the minimum datfrozenxid/datminmxid
1198          *
1199          * Since vac_update_datfrozenxid updates datfrozenxid/datminmxid in-place,
1200          * the values could change while we look at them.  Fetch each one just
1201          * once to ensure sane behavior of the comparison logic.  (Here, as in
1202          * many other places, we assume that fetching or updating an XID in shared
1203          * storage is atomic.)
1204          *
1205          * Note: we need not worry about a race condition with new entries being
1206          * inserted by CREATE DATABASE.  Any such entry will have a copy of some
1207          * existing DB's datfrozenxid, and that source DB cannot be ours because
1208          * of the interlock against copying a DB containing an active backend.
1209          * Hence the new entry will not reduce the minimum.  Also, if two VACUUMs
1210          * concurrently modify the datfrozenxid's of different databases, the
1211          * worst possible outcome is that pg_xact is not truncated as aggressively
1212          * as it could be.
1213          */
1214         relation = heap_open(DatabaseRelationId, AccessShareLock);
1215
1216         scan = heap_beginscan_catalog(relation, 0, NULL);
1217
1218         while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1219         {
1220                 volatile FormData_pg_database *dbform = (Form_pg_database) GETSTRUCT(tuple);
1221                 TransactionId datfrozenxid = dbform->datfrozenxid;
1222                 TransactionId datminmxid = dbform->datminmxid;
1223
1224                 Assert(TransactionIdIsNormal(datfrozenxid));
1225                 Assert(MultiXactIdIsValid(datminmxid));
1226
1227                 /*
1228                  * If things are working properly, no database should have a
1229                  * datfrozenxid or datminmxid that is "in the future".  However, such
1230                  * cases have been known to arise due to bugs in pg_upgrade.  If we
1231                  * see any entries that are "in the future", chicken out and don't do
1232                  * anything.  This ensures we won't truncate clog before those
1233                  * databases have been scanned and cleaned up.  (We will issue the
1234                  * "already wrapped" warning if appropriate, though.)
1235                  */
1236                 if (TransactionIdPrecedes(lastSaneFrozenXid, datfrozenxid) ||
1237                         MultiXactIdPrecedes(lastSaneMinMulti, datminmxid))
1238                         bogus = true;
1239
1240                 if (TransactionIdPrecedes(nextXID, datfrozenxid))
1241                         frozenAlreadyWrapped = true;
1242                 else if (TransactionIdPrecedes(datfrozenxid, frozenXID))
1243                 {
1244                         frozenXID = datfrozenxid;
1245                         oldestxid_datoid = HeapTupleGetOid(tuple);
1246                 }
1247
1248                 if (MultiXactIdPrecedes(datminmxid, minMulti))
1249                 {
1250                         minMulti = datminmxid;
1251                         minmulti_datoid = HeapTupleGetOid(tuple);
1252                 }
1253         }
1254
1255         heap_endscan(scan);
1256
1257         heap_close(relation, AccessShareLock);
1258
1259         /*
1260          * Do not truncate CLOG if we seem to have suffered wraparound already;
1261          * the computed minimum XID might be bogus.  This case should now be
1262          * impossible due to the defenses in GetNewTransactionId, but we keep the
1263          * test anyway.
1264          */
1265         if (frozenAlreadyWrapped)
1266         {
1267                 ereport(WARNING,
1268                                 (errmsg("some databases have not been vacuumed in over 2 billion transactions"),
1269                                  errdetail("You might have already suffered transaction-wraparound data loss.")));
1270                 return;
1271         }
1272
1273         /* chicken out if data is bogus in any other way */
1274         if (bogus)
1275                 return;
1276
1277         /*
1278          * Advance the oldest value for commit timestamps before truncating, so
1279          * that if a user requests a timestamp for a transaction we're truncating
1280          * away right after this point, they get NULL instead of an ugly "file not
1281          * found" error from slru.c.  This doesn't matter for xact/multixact
1282          * because they are not subject to arbitrary lookups from users.
1283          */
1284         AdvanceOldestCommitTsXid(frozenXID);
1285
1286         /*
1287          * Truncate CLOG, multixact and CommitTs to the oldest computed value.
1288          */
1289         TruncateCLOG(frozenXID, oldestxid_datoid);
1290         TruncateCommitTs(frozenXID);
1291         TruncateMultiXact(minMulti, minmulti_datoid);
1292
1293         /*
1294          * Update the wrap limit for GetNewTransactionId and creation of new
1295          * MultiXactIds.  Note: these functions will also signal the postmaster
1296          * for an(other) autovac cycle if needed.   XXX should we avoid possibly
1297          * signalling twice?
1298          */
1299         SetTransactionIdLimit(frozenXID, oldestxid_datoid);
1300         SetMultiXactIdLimit(minMulti, minmulti_datoid, false);
1301 }
1302
1303
1304 /*
1305  *      vacuum_rel() -- vacuum one heap relation
1306  *
1307  *              relid identifies the relation to vacuum.  If relation is supplied,
1308  *              use the name therein for reporting any failure to open/lock the rel;
1309  *              do not use it once we've successfully opened the rel, since it might
1310  *              be stale.
1311  *
1312  *              Returns true if it's okay to proceed with a requested ANALYZE
1313  *              operation on this table.
1314  *
1315  *              Doing one heap at a time incurs extra overhead, since we need to
1316  *              check that the heap exists again just before we vacuum it.  The
1317  *              reason that we do this is so that vacuuming can be spread across
1318  *              many small transactions.  Otherwise, two-phase locking would require
1319  *              us to lock the entire database during one pass of the vacuum cleaner.
1320  *
1321  *              At entry and exit, we are not inside a transaction.
1322  */
1323 static bool
1324 vacuum_rel(Oid relid, RangeVar *relation, int options, VacuumParams *params)
1325 {
1326         LOCKMODE        lmode;
1327         Relation        onerel;
1328         LockRelId       onerelid;
1329         Oid                     toast_relid;
1330         Oid                     save_userid;
1331         int                     save_sec_context;
1332         int                     save_nestlevel;
1333         bool            rel_lock = true;
1334
1335         Assert(params != NULL);
1336
1337         /* Begin a transaction for vacuuming this relation */
1338         StartTransactionCommand();
1339
1340         /*
1341          * Functions in indexes may want a snapshot set.  Also, setting a snapshot
1342          * ensures that RecentGlobalXmin is kept truly recent.
1343          */
1344         PushActiveSnapshot(GetTransactionSnapshot());
1345
1346         if (!(options & VACOPT_FULL))
1347         {
1348                 /*
1349                  * In lazy vacuum, we can set the PROC_IN_VACUUM flag, which lets
1350                  * other concurrent VACUUMs know that they can ignore this one while
1351                  * determining their OldestXmin.  (The reason we don't set it during a
1352                  * full VACUUM is exactly that we may have to run user-defined
1353                  * functions for functional indexes, and we want to make sure that if
1354                  * they use the snapshot set above, any tuples it requires can't get
1355                  * removed from other tables.  An index function that depends on the
1356                  * contents of other tables is arguably broken, but we won't break it
1357                  * here by violating transaction semantics.)
1358                  *
1359                  * We also set the VACUUM_FOR_WRAPAROUND flag, which is passed down by
1360                  * autovacuum; it's used to avoid canceling a vacuum that was invoked
1361                  * in an emergency.
1362                  *
1363                  * Note: these flags remain set until CommitTransaction or
1364                  * AbortTransaction.  We don't want to clear them until we reset
1365                  * MyPgXact->xid/xmin, else OldestXmin might appear to go backwards,
1366                  * which is probably Not Good.
1367                  */
1368                 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
1369                 MyPgXact->vacuumFlags |= PROC_IN_VACUUM;
1370                 if (params->is_wraparound)
1371                         MyPgXact->vacuumFlags |= PROC_VACUUM_FOR_WRAPAROUND;
1372                 LWLockRelease(ProcArrayLock);
1373         }
1374
1375         /*
1376          * Check for user-requested abort.  Note we want this to be inside a
1377          * transaction, so xact.c doesn't issue useless WARNING.
1378          */
1379         CHECK_FOR_INTERRUPTS();
1380
1381         /*
1382          * Determine the type of lock we want --- hard exclusive lock for a FULL
1383          * vacuum, but just ShareUpdateExclusiveLock for concurrent vacuum. Either
1384          * way, we can be sure that no other backend is vacuuming the same table.
1385          */
1386         lmode = (options & VACOPT_FULL) ? AccessExclusiveLock : ShareUpdateExclusiveLock;
1387
1388         /*
1389          * Open the relation and get the appropriate lock on it.
1390          *
1391          * There's a race condition here: the rel may have gone away since the
1392          * last time we saw it.  If so, we don't need to vacuum it.
1393          *
1394          * If we've been asked not to wait for the relation lock, acquire it first
1395          * in non-blocking mode, before calling try_relation_open().
1396          */
1397         if (!(options & VACOPT_NOWAIT))
1398                 onerel = try_relation_open(relid, lmode);
1399         else if (ConditionalLockRelationOid(relid, lmode))
1400                 onerel = try_relation_open(relid, NoLock);
1401         else
1402         {
1403                 onerel = NULL;
1404                 rel_lock = false;
1405         }
1406
1407         /*
1408          * If we failed to open or lock the relation, emit a log message before
1409          * exiting.
1410          */
1411         if (!onerel)
1412         {
1413                 int                     elevel = 0;
1414
1415                 /*
1416                  * Determine the log level.
1417                  *
1418                  * If the RangeVar is not defined, we do not have enough information
1419                  * to provide a meaningful log statement.  Chances are that
1420                  * vacuum_rel's caller has intentionally not provided this information
1421                  * so that this logging is skipped, anyway.
1422                  *
1423                  * Otherwise, for autovacuum logs, we emit a LOG if
1424                  * log_autovacuum_min_duration is not disabled.  For manual VACUUM, we
1425                  * emit a WARNING to match the log statements in the permission
1426                  * checks.
1427                  */
1428                 if (relation != NULL)
1429                 {
1430                         if (!IsAutoVacuumWorkerProcess())
1431                                 elevel = WARNING;
1432                         else if (params->log_min_duration >= 0)
1433                                 elevel = LOG;
1434                 }
1435
1436                 if (elevel != 0)
1437                 {
1438                         if (!rel_lock)
1439                                 ereport(elevel,
1440                                                 (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
1441                                                  errmsg("skipping vacuum of \"%s\" --- lock not available",
1442                                                                 relation->relname)));
1443                         else
1444                                 ereport(elevel,
1445                                                 (errcode(ERRCODE_UNDEFINED_TABLE),
1446                                                  errmsg("skipping vacuum of \"%s\" --- relation no longer exists",
1447                                                                 relation->relname)));
1448                 }
1449
1450                 PopActiveSnapshot();
1451                 CommitTransactionCommand();
1452                 return false;
1453         }
1454
1455         /*
1456          * Check permissions.
1457          *
1458          * We allow the user to vacuum a table if he is superuser, the table
1459          * owner, or the database owner (but in the latter case, only if it's not
1460          * a shared relation).  pg_class_ownercheck includes the superuser case.
1461          *
1462          * Note we choose to treat permissions failure as a WARNING and keep
1463          * trying to vacuum the rest of the DB --- is this appropriate?
1464          */
1465         if (!(pg_class_ownercheck(RelationGetRelid(onerel), GetUserId()) ||
1466                   (pg_database_ownercheck(MyDatabaseId, GetUserId()) && !onerel->rd_rel->relisshared)))
1467         {
1468                 if (onerel->rd_rel->relisshared)
1469                         ereport(WARNING,
1470                                         (errmsg("skipping \"%s\" --- only superuser can vacuum it",
1471                                                         RelationGetRelationName(onerel))));
1472                 else if (onerel->rd_rel->relnamespace == PG_CATALOG_NAMESPACE)
1473                         ereport(WARNING,
1474                                         (errmsg("skipping \"%s\" --- only superuser or database owner can vacuum it",
1475                                                         RelationGetRelationName(onerel))));
1476                 else
1477                         ereport(WARNING,
1478                                         (errmsg("skipping \"%s\" --- only table or database owner can vacuum it",
1479                                                         RelationGetRelationName(onerel))));
1480                 relation_close(onerel, lmode);
1481                 PopActiveSnapshot();
1482                 CommitTransactionCommand();
1483                 return false;
1484         }
1485
1486         /*
1487          * Check that it's of a vacuumable relkind.
1488          */
1489         if (onerel->rd_rel->relkind != RELKIND_RELATION &&
1490                 onerel->rd_rel->relkind != RELKIND_MATVIEW &&
1491                 onerel->rd_rel->relkind != RELKIND_TOASTVALUE &&
1492                 onerel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
1493         {
1494                 ereport(WARNING,
1495                                 (errmsg("skipping \"%s\" --- cannot vacuum non-tables or special system tables",
1496                                                 RelationGetRelationName(onerel))));
1497                 relation_close(onerel, lmode);
1498                 PopActiveSnapshot();
1499                 CommitTransactionCommand();
1500                 return false;
1501         }
1502
1503         /*
1504          * Silently ignore tables that are temp tables of other backends ---
1505          * trying to vacuum these will lead to great unhappiness, since their
1506          * contents are probably not up-to-date on disk.  (We don't throw a
1507          * warning here; it would just lead to chatter during a database-wide
1508          * VACUUM.)
1509          */
1510         if (RELATION_IS_OTHER_TEMP(onerel))
1511         {
1512                 relation_close(onerel, lmode);
1513                 PopActiveSnapshot();
1514                 CommitTransactionCommand();
1515                 return false;
1516         }
1517
1518         /*
1519          * Silently ignore partitioned tables as there is no work to be done.  The
1520          * useful work is on their child partitions, which have been queued up for
1521          * us separately.
1522          */
1523         if (onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
1524         {
1525                 relation_close(onerel, lmode);
1526                 PopActiveSnapshot();
1527                 CommitTransactionCommand();
1528                 /* It's OK to proceed with ANALYZE on this table */
1529                 return true;
1530         }
1531
1532         /*
1533          * Get a session-level lock too. This will protect our access to the
1534          * relation across multiple transactions, so that we can vacuum the
1535          * relation's TOAST table (if any) secure in the knowledge that no one is
1536          * deleting the parent relation.
1537          *
1538          * NOTE: this cannot block, even if someone else is waiting for access,
1539          * because the lock manager knows that both lock requests are from the
1540          * same process.
1541          */
1542         onerelid = onerel->rd_lockInfo.lockRelId;
1543         LockRelationIdForSession(&onerelid, lmode);
1544
1545         /*
1546          * Remember the relation's TOAST relation for later, if the caller asked
1547          * us to process it.  In VACUUM FULL, though, the toast table is
1548          * automatically rebuilt by cluster_rel so we shouldn't recurse to it.
1549          */
1550         if (!(options & VACOPT_SKIPTOAST) && !(options & VACOPT_FULL))
1551                 toast_relid = onerel->rd_rel->reltoastrelid;
1552         else
1553                 toast_relid = InvalidOid;
1554
1555         /*
1556          * Switch to the table owner's userid, so that any index functions are run
1557          * as that user.  Also lock down security-restricted operations and
1558          * arrange to make GUC variable changes local to this command. (This is
1559          * unnecessary, but harmless, for lazy VACUUM.)
1560          */
1561         GetUserIdAndSecContext(&save_userid, &save_sec_context);
1562         SetUserIdAndSecContext(onerel->rd_rel->relowner,
1563                                                    save_sec_context | SECURITY_RESTRICTED_OPERATION);
1564         save_nestlevel = NewGUCNestLevel();
1565
1566         /*
1567          * Do the actual work --- either FULL or "lazy" vacuum
1568          */
1569         if (options & VACOPT_FULL)
1570         {
1571                 /* close relation before vacuuming, but hold lock until commit */
1572                 relation_close(onerel, NoLock);
1573                 onerel = NULL;
1574
1575                 /* VACUUM FULL is now a variant of CLUSTER; see cluster.c */
1576                 cluster_rel(relid, InvalidOid, false,
1577                                         (options & VACOPT_VERBOSE) != 0);
1578         }
1579         else
1580                 lazy_vacuum_rel(onerel, options, params, vac_strategy);
1581
1582         /* Roll back any GUC changes executed by index functions */
1583         AtEOXact_GUC(false, save_nestlevel);
1584
1585         /* Restore userid and security context */
1586         SetUserIdAndSecContext(save_userid, save_sec_context);
1587
1588         /* all done with this class, but hold lock until commit */
1589         if (onerel)
1590                 relation_close(onerel, NoLock);
1591
1592         /*
1593          * Complete the transaction and free all temporary memory used.
1594          */
1595         PopActiveSnapshot();
1596         CommitTransactionCommand();
1597
1598         /*
1599          * If the relation has a secondary toast rel, vacuum that too while we
1600          * still hold the session lock on the master table.  Note however that
1601          * "analyze" will not get done on the toast table.  This is good, because
1602          * the toaster always uses hardcoded index access and statistics are
1603          * totally unimportant for toast relations.
1604          */
1605         if (toast_relid != InvalidOid)
1606                 vacuum_rel(toast_relid, NULL, options, params);
1607
1608         /*
1609          * Now release the session-level lock on the master table.
1610          */
1611         UnlockRelationIdForSession(&onerelid, lmode);
1612
1613         /* Report that we really did it. */
1614         return true;
1615 }
1616
1617
1618 /*
1619  * Open all the vacuumable indexes of the given relation, obtaining the
1620  * specified kind of lock on each.  Return an array of Relation pointers for
1621  * the indexes into *Irel, and the number of indexes into *nindexes.
1622  *
1623  * We consider an index vacuumable if it is marked insertable (IndexIsReady).
1624  * If it isn't, probably a CREATE INDEX CONCURRENTLY command failed early in
1625  * execution, and what we have is too corrupt to be processable.  We will
1626  * vacuum even if the index isn't indisvalid; this is important because in a
1627  * unique index, uniqueness checks will be performed anyway and had better not
1628  * hit dangling index pointers.
1629  */
1630 void
1631 vac_open_indexes(Relation relation, LOCKMODE lockmode,
1632                                  int *nindexes, Relation **Irel)
1633 {
1634         List       *indexoidlist;
1635         ListCell   *indexoidscan;
1636         int                     i;
1637
1638         Assert(lockmode != NoLock);
1639
1640         indexoidlist = RelationGetIndexList(relation);
1641
1642         /* allocate enough memory for all indexes */
1643         i = list_length(indexoidlist);
1644
1645         if (i > 0)
1646                 *Irel = (Relation *) palloc(i * sizeof(Relation));
1647         else
1648                 *Irel = NULL;
1649
1650         /* collect just the ready indexes */
1651         i = 0;
1652         foreach(indexoidscan, indexoidlist)
1653         {
1654                 Oid                     indexoid = lfirst_oid(indexoidscan);
1655                 Relation        indrel;
1656
1657                 indrel = index_open(indexoid, lockmode);
1658                 if (IndexIsReady(indrel->rd_index))
1659                         (*Irel)[i++] = indrel;
1660                 else
1661                         index_close(indrel, lockmode);
1662         }
1663
1664         *nindexes = i;
1665
1666         list_free(indexoidlist);
1667 }
1668
1669 /*
1670  * Release the resources acquired by vac_open_indexes.  Optionally release
1671  * the locks (say NoLock to keep 'em).
1672  */
1673 void
1674 vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
1675 {
1676         if (Irel == NULL)
1677                 return;
1678
1679         while (nindexes--)
1680         {
1681                 Relation        ind = Irel[nindexes];
1682
1683                 index_close(ind, lockmode);
1684         }
1685         pfree(Irel);
1686 }
1687
1688 /*
1689  * vacuum_delay_point --- check for interrupts and cost-based delay.
1690  *
1691  * This should be called in each major loop of VACUUM processing,
1692  * typically once per page processed.
1693  */
1694 void
1695 vacuum_delay_point(void)
1696 {
1697         /* Always check for interrupts */
1698         CHECK_FOR_INTERRUPTS();
1699
1700         /* Nap if appropriate */
1701         if (VacuumCostActive && !InterruptPending &&
1702                 VacuumCostBalance >= VacuumCostLimit)
1703         {
1704                 int                     msec;
1705
1706                 msec = VacuumCostDelay * VacuumCostBalance / VacuumCostLimit;
1707                 if (msec > VacuumCostDelay * 4)
1708                         msec = VacuumCostDelay * 4;
1709
1710                 pg_usleep(msec * 1000L);
1711
1712                 VacuumCostBalance = 0;
1713
1714                 /* update balance values for workers */
1715                 AutoVacuumUpdateDelay();
1716
1717                 /* Might have gotten an interrupt while sleeping */
1718                 CHECK_FOR_INTERRUPTS();
1719         }
1720 }