granicus.if.org Git - postgresql/blob - src/backend/commands/vacuum.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * vacuum.c
   4  *        The postgres vacuum cleaner.
   5  *
   6  * This file now includes only control and dispatch code for VACUUM and
   7  * ANALYZE commands.  Regular VACUUM is implemented in vacuumlazy.c,
   8  * ANALYZE in analyze.c, and VACUUM FULL is a variant of CLUSTER, handled
   9  * in cluster.c.
  10  *
  11  *
  12  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
  13  * Portions Copyright (c) 1994, Regents of the University of California
  14  *
  15  *
  16  * IDENTIFICATION
  17  *        src/backend/commands/vacuum.c
  18  *
  19  *-------------------------------------------------------------------------
  20  */
  21 #include "postgres.h"
  22
  23 #include <math.h>
  24
  25 #include "access/clog.h"
  26 #include "access/commit_ts.h"
  27 #include "access/genam.h"
  28 #include "access/heapam.h"
  29 #include "access/htup_details.h"
  30 #include "access/multixact.h"
  31 #include "access/transam.h"
  32 #include "access/xact.h"
  33 #include "catalog/namespace.h"
  34 #include "catalog/pg_database.h"
  35 #include "catalog/pg_inherits_fn.h"
  36 #include "catalog/pg_namespace.h"
  37 #include "commands/cluster.h"
  38 #include "commands/vacuum.h"
  39 #include "miscadmin.h"
  40 #include "nodes/makefuncs.h"
  41 #include "pgstat.h"
  42 #include "postmaster/autovacuum.h"
  43 #include "storage/bufmgr.h"
  44 #include "storage/lmgr.h"
  45 #include "storage/proc.h"
  46 #include "storage/procarray.h"
  47 #include "utils/acl.h"
  48 #include "utils/fmgroids.h"
  49 #include "utils/guc.h"
  50 #include "utils/memutils.h"
  51 #include "utils/snapmgr.h"
  52 #include "utils/syscache.h"
  53 #include "utils/tqual.h"
  54
  55
  56 /*
  57  * GUC parameters
  58  */
  59 int                     vacuum_freeze_min_age;
  60 int                     vacuum_freeze_table_age;
  61 int                     vacuum_multixact_freeze_min_age;
  62 int                     vacuum_multixact_freeze_table_age;
  63
  64
  65 /* A few variables that don't seem worth passing around as parameters */
  66 static MemoryContext vac_context = NULL;
  67 static BufferAccessStrategy vac_strategy;
  68
  69
  70 /* non-export function prototypes */
  71 static List *expand_vacuum_rel(VacuumRelation *vrel);
  72 static List *get_all_vacuum_rels(void);
  73 static void vac_truncate_clog(TransactionId frozenXID,
  74                                   MultiXactId minMulti,
  75                                   TransactionId lastSaneFrozenXid,
  76                                   MultiXactId lastSaneMinMulti);
  77 static bool vacuum_rel(Oid relid, RangeVar *relation, int options,
  78                    VacuumParams *params);
  79
  80 /*
  81  * Primary entry point for manual VACUUM and ANALYZE commands
  82  *
  83  * This is mainly a preparation wrapper for the real operations that will
  84  * happen in vacuum().
  85  */
  86 void
  87 ExecVacuum(VacuumStmt *vacstmt, bool isTopLevel)
  88 {
  89         VacuumParams params;
  90
  91         /* sanity checks on options */
  92         Assert(vacstmt->options & (VACOPT_VACUUM | VACOPT_ANALYZE));
  93         Assert((vacstmt->options & VACOPT_VACUUM) ||
  94                    !(vacstmt->options & (VACOPT_FULL | VACOPT_FREEZE)));
  95         Assert(!(vacstmt->options & VACOPT_SKIPTOAST));
  96
  97         /*
  98          * Make sure VACOPT_ANALYZE is specified if any column lists are present.
  99          */
 100         if (!(vacstmt->options & VACOPT_ANALYZE))
 101         {
 102                 ListCell   *lc;
 103
 104                 foreach(lc, vacstmt->rels)
 105                 {
 106                         VacuumRelation *vrel = lfirst_node(VacuumRelation, lc);
 107
 108                         if (vrel->va_cols != NIL)
 109                                 ereport(ERROR,
 110                                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 111                                                  errmsg("ANALYZE option must be specified when a column list is provided")));
 112                 }
 113         }
 114
 115         /*
 116          * All freeze ages are zero if the FREEZE option is given; otherwise pass
 117          * them as -1 which means to use the default values.
 118          */
 119         if (vacstmt->options & VACOPT_FREEZE)
 120         {
 121                 params.freeze_min_age = 0;
 122                 params.freeze_table_age = 0;
 123                 params.multixact_freeze_min_age = 0;
 124                 params.multixact_freeze_table_age = 0;
 125         }
 126         else
 127         {
 128                 params.freeze_min_age = -1;
 129                 params.freeze_table_age = -1;
 130                 params.multixact_freeze_min_age = -1;
 131                 params.multixact_freeze_table_age = -1;
 132         }
 133
 134         /* user-invoked vacuum is never "for wraparound" */
 135         params.is_wraparound = false;
 136
 137         /* user-invoked vacuum never uses this parameter */
 138         params.log_min_duration = -1;
 139
 140         /* Now go through the common routine */
 141         vacuum(vacstmt->options, vacstmt->rels, &params, NULL, isTopLevel);
 142 }
 143
 144 /*
 145  * Internal entry point for VACUUM and ANALYZE commands.
 146  *
 147  * options is a bitmask of VacuumOption flags, indicating what to do.
 148  *
 149  * relations, if not NIL, is a list of VacuumRelation to process; otherwise,
 150  * we process all relevant tables in the database.  For each VacuumRelation,
 151  * if a valid OID is supplied, the table with that OID is what to process;
 152  * otherwise, the VacuumRelation's RangeVar indicates what to process.
 153  *
 154  * params contains a set of parameters that can be used to customize the
 155  * behavior.
 156  *
 157  * bstrategy is normally given as NULL, but in autovacuum it can be passed
 158  * in to use the same buffer strategy object across multiple vacuum() calls.
 159  *
 160  * isTopLevel should be passed down from ProcessUtility.
 161  *
 162  * It is the caller's responsibility that all parameters are allocated in a
 163  * memory context that will not disappear at transaction commit.
 164  */
 165 void
 166 vacuum(int options, List *relations, VacuumParams *params,
 167            BufferAccessStrategy bstrategy, bool isTopLevel)
 168 {
 169         static bool in_vacuum = false;
 170
 171         const char *stmttype;
 172         volatile bool in_outer_xact,
 173                                 use_own_xacts;
 174
 175         Assert(params != NULL);
 176
 177         stmttype = (options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE";
 178
 179         /*
 180          * We cannot run VACUUM inside a user transaction block; if we were inside
 181          * a transaction, then our commit- and start-transaction-command calls
 182          * would not have the intended effect!  There are numerous other subtle
 183          * dependencies on this, too.
 184          *
 185          * ANALYZE (without VACUUM) can run either way.
 186          */
 187         if (options & VACOPT_VACUUM)
 188         {
 189                 PreventTransactionChain(isTopLevel, stmttype);
 190                 in_outer_xact = false;
 191         }
 192         else
 193                 in_outer_xact = IsInTransactionChain(isTopLevel);
 194
 195         /*
 196          * Due to static variables vac_context, anl_context and vac_strategy,
 197          * vacuum() is not reentrant.  This matters when VACUUM FULL or ANALYZE
 198          * calls a hostile index expression that itself calls ANALYZE.
 199          */
 200         if (in_vacuum)
 201                 ereport(ERROR,
 202                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 203                                  errmsg("%s cannot be executed from VACUUM or ANALYZE",
 204                                                 stmttype)));
 205
 206         /*
 207          * Sanity check DISABLE_PAGE_SKIPPING option.
 208          */
 209         if ((options & VACOPT_FULL) != 0 &&
 210                 (options & VACOPT_DISABLE_PAGE_SKIPPING) != 0)
 211                 ereport(ERROR,
 212                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 213                                  errmsg("VACUUM option DISABLE_PAGE_SKIPPING cannot be used with FULL")));
 214
 215         /*
 216          * Send info about dead objects to the statistics collector, unless we are
 217          * in autovacuum --- autovacuum.c does this for itself.
 218          */
 219         if ((options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess())
 220                 pgstat_vacuum_stat();
 221
 222         /*
 223          * Create special memory context for cross-transaction storage.
 224          *
 225          * Since it is a child of PortalContext, it will go away eventually even
 226          * if we suffer an error; there's no need for special abort cleanup logic.
 227          */
 228         vac_context = AllocSetContextCreate(PortalContext,
 229                                                                                 "Vacuum",
 230                                                                                 ALLOCSET_DEFAULT_SIZES);
 231
 232         /*
 233          * If caller didn't give us a buffer strategy object, make one in the
 234          * cross-transaction memory context.
 235          */
 236         if (bstrategy == NULL)
 237         {
 238                 MemoryContext old_context = MemoryContextSwitchTo(vac_context);
 239
 240                 bstrategy = GetAccessStrategy(BAS_VACUUM);
 241                 MemoryContextSwitchTo(old_context);
 242         }
 243         vac_strategy = bstrategy;
 244
 245         /*
 246          * Build list of relation(s) to process, putting any new data in
 247          * vac_context for safekeeping.
 248          */
 249         if (relations != NIL)
 250         {
 251                 List       *newrels = NIL;
 252                 ListCell   *lc;
 253
 254                 foreach(lc, relations)
 255                 {
 256                         VacuumRelation *vrel = lfirst_node(VacuumRelation, lc);
 257                         List       *sublist;
 258                         MemoryContext old_context;
 259
 260                         sublist = expand_vacuum_rel(vrel);
 261                         old_context = MemoryContextSwitchTo(vac_context);
 262                         newrels = list_concat(newrels, sublist);
 263                         MemoryContextSwitchTo(old_context);
 264                 }
 265                 relations = newrels;
 266         }
 267         else
 268                 relations = get_all_vacuum_rels();
 269
 270         /*
 271          * Decide whether we need to start/commit our own transactions.
 272          *
 273          * For VACUUM (with or without ANALYZE): always do so, so that we can
 274          * release locks as soon as possible.  (We could possibly use the outer
 275          * transaction for a one-table VACUUM, but handling TOAST tables would be
 276          * problematic.)
 277          *
 278          * For ANALYZE (no VACUUM): if inside a transaction block, we cannot
 279          * start/commit our own transactions.  Also, there's no need to do so if
 280          * only processing one relation.  For multiple relations when not within a
 281          * transaction block, and also in an autovacuum worker, use own
 282          * transactions so we can release locks sooner.
 283          */
 284         if (options & VACOPT_VACUUM)
 285                 use_own_xacts = true;
 286         else
 287         {
 288                 Assert(options & VACOPT_ANALYZE);
 289                 if (IsAutoVacuumWorkerProcess())
 290                         use_own_xacts = true;
 291                 else if (in_outer_xact)
 292                         use_own_xacts = false;
 293                 else if (list_length(relations) > 1)
 294                         use_own_xacts = true;
 295                 else
 296                         use_own_xacts = false;
 297         }
 298
 299         /*
 300          * vacuum_rel expects to be entered with no transaction active; it will
 301          * start and commit its own transaction.  But we are called by an SQL
 302          * command, and so we are executing inside a transaction already. We
 303          * commit the transaction started in PostgresMain() here, and start
 304          * another one before exiting to match the commit waiting for us back in
 305          * PostgresMain().
 306          */
 307         if (use_own_xacts)
 308         {
 309                 Assert(!in_outer_xact);
 310
 311                 /* ActiveSnapshot is not set by autovacuum */
 312                 if (ActiveSnapshotSet())
 313                         PopActiveSnapshot();
 314
 315                 /* matches the StartTransaction in PostgresMain() */
 316                 CommitTransactionCommand();
 317         }
 318
 319         /* Turn vacuum cost accounting on or off, and set/clear in_vacuum */
 320         PG_TRY();
 321         {
 322                 ListCell   *cur;
 323
 324                 in_vacuum = true;
 325                 VacuumCostActive = (VacuumCostDelay > 0);
 326                 VacuumCostBalance = 0;
 327                 VacuumPageHit = 0;
 328                 VacuumPageMiss = 0;
 329                 VacuumPageDirty = 0;
 330
 331                 /*
 332                  * Loop to process each selected relation.
 333                  */
 334                 foreach(cur, relations)
 335                 {
 336                         VacuumRelation *vrel = lfirst_node(VacuumRelation, cur);
 337
 338                         if (options & VACOPT_VACUUM)
 339                         {
 340                                 if (!vacuum_rel(vrel->oid, vrel->relation, options, params))
 341                                         continue;
 342                         }
 343
 344                         if (options & VACOPT_ANALYZE)
 345                         {
 346                                 /*
 347                                  * If using separate xacts, start one for analyze. Otherwise,
 348                                  * we can use the outer transaction.
 349                                  */
 350                                 if (use_own_xacts)
 351                                 {
 352                                         StartTransactionCommand();
 353                                         /* functions in indexes may want a snapshot set */
 354                                         PushActiveSnapshot(GetTransactionSnapshot());
 355                                 }
 356
 357                                 analyze_rel(vrel->oid, vrel->relation, options, params,
 358                                                         vrel->va_cols, in_outer_xact, vac_strategy);
 359
 360                                 if (use_own_xacts)
 361                                 {
 362                                         PopActiveSnapshot();
 363                                         CommitTransactionCommand();
 364                                 }
 365                         }
 366                 }
 367         }
 368         PG_CATCH();
 369         {
 370                 in_vacuum = false;
 371                 VacuumCostActive = false;
 372                 PG_RE_THROW();
 373         }
 374         PG_END_TRY();
 375
 376         in_vacuum = false;
 377         VacuumCostActive = false;
 378
 379         /*
 380          * Finish up processing.
 381          */
 382         if (use_own_xacts)
 383         {
 384                 /* here, we are not in a transaction */
 385
 386                 /*
 387                  * This matches the CommitTransaction waiting for us in
 388                  * PostgresMain().
 389                  */
 390                 StartTransactionCommand();
 391         }
 392
 393         if ((options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess())
 394         {
 395                 /*
 396                  * Update pg_database.datfrozenxid, and truncate pg_xact if possible.
 397                  * (autovacuum.c does this for itself.)
 398                  */
 399                 vac_update_datfrozenxid();
 400         }
 401
 402         /*
 403          * Clean up working storage --- note we must do this after
 404          * StartTransactionCommand, else we might be trying to delete the active
 405          * context!
 406          */
 407         MemoryContextDelete(vac_context);
 408         vac_context = NULL;
 409 }
 410
 411 /*
 412  * Given a VacuumRelation, fill in the table OID if it wasn't specified,
 413  * and optionally add VacuumRelations for partitions of the table.
 414  *
 415  * If a VacuumRelation does not have an OID supplied and is a partitioned
 416  * table, an extra entry will be added to the output for each partition.
 417  * Presently, only autovacuum supplies OIDs when calling vacuum(), and
 418  * it does not want us to expand partitioned tables.
 419  *
 420  * We take care not to modify the input data structure, but instead build
 421  * new VacuumRelation(s) to return.  (But note that they will reference
 422  * unmodified parts of the input, eg column lists.)  New data structures
 423  * are made in vac_context.
 424  */
 425 static List *
 426 expand_vacuum_rel(VacuumRelation *vrel)
 427 {
 428         List       *vacrels = NIL;
 429         MemoryContext oldcontext;
 430
 431         /* If caller supplied OID, there's nothing we need do here. */
 432         if (OidIsValid(vrel->oid))
 433         {
 434                 oldcontext = MemoryContextSwitchTo(vac_context);
 435                 vacrels = lappend(vacrels, vrel);
 436                 MemoryContextSwitchTo(oldcontext);
 437         }
 438         else
 439         {
 440                 /* Process a specific relation, and possibly partitions thereof */
 441                 Oid                     relid;
 442                 HeapTuple       tuple;
 443                 Form_pg_class classForm;
 444                 bool            include_parts;
 445
 446                 /*
 447                  * We transiently take AccessShareLock to protect the syscache lookup
 448                  * below, as well as find_all_inheritors's expectation that the caller
 449                  * holds some lock on the starting relation.
 450                  */
 451                 relid = RangeVarGetRelid(vrel->relation, AccessShareLock, false);
 452
 453                 /*
 454                  * Make a returnable VacuumRelation for this rel.
 455                  */
 456                 oldcontext = MemoryContextSwitchTo(vac_context);
 457                 vacrels = lappend(vacrels, makeVacuumRelation(vrel->relation,
 458                                                                                                           relid,
 459                                                                                                           vrel->va_cols));
 460                 MemoryContextSwitchTo(oldcontext);
 461
 462                 /*
 463                  * To check whether the relation is a partitioned table, fetch its
 464                  * syscache entry.
 465                  */
 466                 tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
 467                 if (!HeapTupleIsValid(tuple))
 468                         elog(ERROR, "cache lookup failed for relation %u", relid);
 469                 classForm = (Form_pg_class) GETSTRUCT(tuple);
 470                 include_parts = (classForm->relkind == RELKIND_PARTITIONED_TABLE);
 471                 ReleaseSysCache(tuple);
 472
 473                 /*
 474                  * If it is, make relation list entries for its partitions.  Note that
 475                  * the list returned by find_all_inheritors() includes the passed-in
 476                  * OID, so we have to skip that.  There's no point in taking locks on
 477                  * the individual partitions yet, and doing so would just add
 478                  * unnecessary deadlock risk.
 479                  */
 480                 if (include_parts)
 481                 {
 482                         List       *part_oids = find_all_inheritors(relid, NoLock, NULL);
 483                         ListCell   *part_lc;
 484
 485                         foreach(part_lc, part_oids)
 486                         {
 487                                 Oid                     part_oid = lfirst_oid(part_lc);
 488
 489                                 if (part_oid == relid)
 490                                         continue;       /* ignore original table */
 491
 492                                 /*
 493                                  * We omit a RangeVar since it wouldn't be appropriate to
 494                                  * complain about failure to open one of these relations
 495                                  * later.
 496                                  */
 497                                 oldcontext = MemoryContextSwitchTo(vac_context);
 498                                 vacrels = lappend(vacrels, makeVacuumRelation(NULL,
 499                                                                                                                           part_oid,
 500                                                                                                                           vrel->va_cols));
 501                                 MemoryContextSwitchTo(oldcontext);
 502                         }
 503                 }
 504
 505                 /*
 506                  * Release lock again.  This means that by the time we actually try to
 507                  * process the table, it might be gone or renamed.  In the former case
 508                  * we'll silently ignore it; in the latter case we'll process it
 509                  * anyway, but we must beware that the RangeVar doesn't necessarily
 510                  * identify it anymore.  This isn't ideal, perhaps, but there's little
 511                  * practical alternative, since we're typically going to commit this
 512                  * transaction and begin a new one between now and then.  Moreover,
 513                  * holding locks on multiple relations would create significant risk
 514                  * of deadlock.
 515                  */
 516                 UnlockRelationOid(relid, AccessShareLock);
 517         }
 518
 519         return vacrels;
 520 }
 521
 522 /*
 523  * Construct a list of VacuumRelations for all vacuumable rels in
 524  * the current database.  The list is built in vac_context.
 525  */
 526 static List *
 527 get_all_vacuum_rels(void)
 528 {
 529         List       *vacrels = NIL;
 530         Relation        pgclass;
 531         HeapScanDesc scan;
 532         HeapTuple       tuple;
 533
 534         pgclass = heap_open(RelationRelationId, AccessShareLock);
 535
 536         scan = heap_beginscan_catalog(pgclass, 0, NULL);
 537
 538         while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
 539         {
 540                 Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
 541                 MemoryContext oldcontext;
 542
 543                 /*
 544                  * We include partitioned tables here; depending on which operation is
 545                  * to be performed, caller will decide whether to process or ignore
 546                  * them.
 547                  */
 548                 if (classForm->relkind != RELKIND_RELATION &&
 549                         classForm->relkind != RELKIND_MATVIEW &&
 550                         classForm->relkind != RELKIND_PARTITIONED_TABLE)
 551                         continue;
 552
 553                 /*
 554                  * Build VacuumRelation(s) specifying the table OIDs to be processed.
 555                  * We omit a RangeVar since it wouldn't be appropriate to complain
 556                  * about failure to open one of these relations later.
 557                  */
 558                 oldcontext = MemoryContextSwitchTo(vac_context);
 559                 vacrels = lappend(vacrels, makeVacuumRelation(NULL,
 560                                                                                                           HeapTupleGetOid(tuple),
 561                                                                                                           NIL));
 562                 MemoryContextSwitchTo(oldcontext);
 563         }
 564
 565         heap_endscan(scan);
 566         heap_close(pgclass, AccessShareLock);
 567
 568         return vacrels;
 569 }
 570
 571 /*
 572  * vacuum_set_xid_limits() -- compute oldest-Xmin and freeze cutoff points
 573  *
 574  * The output parameters are:
 575  * - oldestXmin is the cutoff value used to distinguish whether tuples are
 576  *       DEAD or RECENTLY_DEAD (see HeapTupleSatisfiesVacuum).
 577  * - freezeLimit is the Xid below which all Xids are replaced by
 578  *       FrozenTransactionId during vacuum.
 579  * - xidFullScanLimit (computed from table_freeze_age parameter)
 580  *       represents a minimum Xid value; a table whose relfrozenxid is older than
 581  *       this will have a full-table vacuum applied to it, to freeze tuples across
 582  *       the whole table.  Vacuuming a table younger than this value can use a
 583  *       partial scan.
 584  * - multiXactCutoff is the value below which all MultiXactIds are removed from
 585  *       Xmax.
 586  * - mxactFullScanLimit is a value against which a table's relminmxid value is
 587  *       compared to produce a full-table vacuum, as with xidFullScanLimit.
 588  *
 589  * xidFullScanLimit and mxactFullScanLimit can be passed as NULL if caller is
 590  * not interested.
 591  */
 592 void
 593 vacuum_set_xid_limits(Relation rel,
 594                                           int freeze_min_age,
 595                                           int freeze_table_age,
 596                                           int multixact_freeze_min_age,
 597                                           int multixact_freeze_table_age,
 598                                           TransactionId *oldestXmin,
 599                                           TransactionId *freezeLimit,
 600                                           TransactionId *xidFullScanLimit,
 601                                           MultiXactId *multiXactCutoff,
 602                                           MultiXactId *mxactFullScanLimit)
 603 {
 604         int                     freezemin;
 605         int                     mxid_freezemin;
 606         int                     effective_multixact_freeze_max_age;
 607         TransactionId limit;
 608         TransactionId safeLimit;
 609         MultiXactId mxactLimit;
 610         MultiXactId safeMxactLimit;
 611
 612         /*
 613          * We can always ignore processes running lazy vacuum.  This is because we
 614          * use these values only for deciding which tuples we must keep in the
 615          * tables.  Since lazy vacuum doesn't write its XID anywhere, it's safe to
 616          * ignore it.  In theory it could be problematic to ignore lazy vacuums in
 617          * a full vacuum, but keep in mind that only one vacuum process can be
 618          * working on a particular table at any time, and that each vacuum is
 619          * always an independent transaction.
 620          */
 621         *oldestXmin =
 622                 TransactionIdLimitedForOldSnapshots(GetOldestXmin(rel, PROCARRAY_FLAGS_VACUUM), rel);
 623
 624         Assert(TransactionIdIsNormal(*oldestXmin));
 625
 626         /*
 627          * Determine the minimum freeze age to use: as specified by the caller, or
 628          * vacuum_freeze_min_age, but in any case not more than half
 629          * autovacuum_freeze_max_age, so that autovacuums to prevent XID
 630          * wraparound won't occur too frequently.
 631          */
 632         freezemin = freeze_min_age;
 633         if (freezemin < 0)
 634                 freezemin = vacuum_freeze_min_age;
 635         freezemin = Min(freezemin, autovacuum_freeze_max_age / 2);
 636         Assert(freezemin >= 0);
 637
 638         /*
 639          * Compute the cutoff XID, being careful not to generate a "permanent" XID
 640          */
 641         limit = *oldestXmin - freezemin;
 642         if (!TransactionIdIsNormal(limit))
 643                 limit = FirstNormalTransactionId;
 644
 645         /*
 646          * If oldestXmin is very far back (in practice, more than
 647          * autovacuum_freeze_max_age / 2 XIDs old), complain and force a minimum
 648          * freeze age of zero.
 649          */
 650         safeLimit = ReadNewTransactionId() - autovacuum_freeze_max_age;
 651         if (!TransactionIdIsNormal(safeLimit))
 652                 safeLimit = FirstNormalTransactionId;
 653
 654         if (TransactionIdPrecedes(limit, safeLimit))
 655         {
 656                 ereport(WARNING,
 657                                 (errmsg("oldest xmin is far in the past"),
 658                                  errhint("Close open transactions soon to avoid wraparound problems.\n"
 659                                                  "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
 660                 limit = *oldestXmin;
 661         }
 662
 663         *freezeLimit = limit;
 664
 665         /*
 666          * Compute the multixact age for which freezing is urgent.  This is
 667          * normally autovacuum_multixact_freeze_max_age, but may be less if we are
 668          * short of multixact member space.
 669          */
 670         effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold();
 671
 672         /*
 673          * Determine the minimum multixact freeze age to use: as specified by
 674          * caller, or vacuum_multixact_freeze_min_age, but in any case not more
 675          * than half effective_multixact_freeze_max_age, so that autovacuums to
 676          * prevent MultiXact wraparound won't occur too frequently.
 677          */
 678         mxid_freezemin = multixact_freeze_min_age;
 679         if (mxid_freezemin < 0)
 680                 mxid_freezemin = vacuum_multixact_freeze_min_age;
 681         mxid_freezemin = Min(mxid_freezemin,
 682                                                  effective_multixact_freeze_max_age / 2);
 683         Assert(mxid_freezemin >= 0);
 684
 685         /* compute the cutoff multi, being careful to generate a valid value */
 686         mxactLimit = GetOldestMultiXactId() - mxid_freezemin;
 687         if (mxactLimit < FirstMultiXactId)
 688                 mxactLimit = FirstMultiXactId;
 689
 690         safeMxactLimit =
 691                 ReadNextMultiXactId() - effective_multixact_freeze_max_age;
 692         if (safeMxactLimit < FirstMultiXactId)
 693                 safeMxactLimit = FirstMultiXactId;
 694
 695         if (MultiXactIdPrecedes(mxactLimit, safeMxactLimit))
 696         {
 697                 ereport(WARNING,
 698                                 (errmsg("oldest multixact is far in the past"),
 699                                  errhint("Close open transactions with multixacts soon to avoid wraparound problems.")));
 700                 mxactLimit = safeMxactLimit;
 701         }
 702
 703         *multiXactCutoff = mxactLimit;
 704
 705         if (xidFullScanLimit != NULL)
 706         {
 707                 int                     freezetable;
 708
 709                 Assert(mxactFullScanLimit != NULL);
 710
 711                 /*
 712                  * Determine the table freeze age to use: as specified by the caller,
 713                  * or vacuum_freeze_table_age, but in any case not more than
 714                  * autovacuum_freeze_max_age * 0.95, so that if you have e.g nightly
 715                  * VACUUM schedule, the nightly VACUUM gets a chance to freeze tuples
 716                  * before anti-wraparound autovacuum is launched.
 717                  */
 718                 freezetable = freeze_table_age;
 719                 if (freezetable < 0)
 720                         freezetable = vacuum_freeze_table_age;
 721                 freezetable = Min(freezetable, autovacuum_freeze_max_age * 0.95);
 722                 Assert(freezetable >= 0);
 723
 724                 /*
 725                  * Compute XID limit causing a full-table vacuum, being careful not to
 726                  * generate a "permanent" XID.
 727                  */
 728                 limit = ReadNewTransactionId() - freezetable;
 729                 if (!TransactionIdIsNormal(limit))
 730                         limit = FirstNormalTransactionId;
 731
 732                 *xidFullScanLimit = limit;
 733
 734                 /*
 735                  * Similar to the above, determine the table freeze age to use for
 736                  * multixacts: as specified by the caller, or
 737                  * vacuum_multixact_freeze_table_age, but in any case not more than
 738                  * autovacuum_multixact_freeze_table_age * 0.95, so that if you have
 739                  * e.g. nightly VACUUM schedule, the nightly VACUUM gets a chance to
 740                  * freeze multixacts before anti-wraparound autovacuum is launched.
 741                  */
 742                 freezetable = multixact_freeze_table_age;
 743                 if (freezetable < 0)
 744                         freezetable = vacuum_multixact_freeze_table_age;
 745                 freezetable = Min(freezetable,
 746                                                   effective_multixact_freeze_max_age * 0.95);
 747                 Assert(freezetable >= 0);
 748
 749                 /*
 750                  * Compute MultiXact limit causing a full-table vacuum, being careful
 751                  * to generate a valid MultiXact value.
 752                  */
 753                 mxactLimit = ReadNextMultiXactId() - freezetable;
 754                 if (mxactLimit < FirstMultiXactId)
 755                         mxactLimit = FirstMultiXactId;
 756
 757                 *mxactFullScanLimit = mxactLimit;
 758         }
 759         else
 760         {
 761                 Assert(mxactFullScanLimit == NULL);
 762         }
 763 }
 764
 765 /*
 766  * vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples
 767  *
 768  *              If we scanned the whole relation then we should just use the count of
 769  *              live tuples seen; but if we did not, we should not blindly extrapolate
 770  *              from that number, since VACUUM may have scanned a quite nonrandom
 771  *              subset of the table.  When we have only partial information, we take
 772  *              the old value of pg_class.reltuples as a measurement of the
 773  *              tuple density in the unscanned pages.
 774  */
 775 double
 776 vac_estimate_reltuples(Relation relation,
 777                                            BlockNumber total_pages,
 778                                            BlockNumber scanned_pages,
 779                                            double scanned_tuples)
 780 {
 781         BlockNumber old_rel_pages = relation->rd_rel->relpages;
 782         double          old_rel_tuples = relation->rd_rel->reltuples;
 783         double          old_density;
 784         double          unscanned_pages;
 785         double          total_tuples;
 786
 787         /* If we did scan the whole table, just use the count as-is */
 788         if (scanned_pages >= total_pages)
 789                 return scanned_tuples;
 790
 791         /*
 792          * If scanned_pages is zero but total_pages isn't, keep the existing value
 793          * of reltuples.  (Note: callers should avoid updating the pg_class
 794          * statistics in this situation, since no new information has been
 795          * provided.)
 796          */
 797         if (scanned_pages == 0)
 798                 return old_rel_tuples;
 799
 800         /*
 801          * If old value of relpages is zero, old density is indeterminate; we
 802          * can't do much except scale up scanned_tuples to match total_pages.
 803          */
 804         if (old_rel_pages == 0)
 805                 return floor((scanned_tuples / scanned_pages) * total_pages + 0.5);
 806
 807         /*
 808          * Okay, we've covered the corner cases.  The normal calculation is to
 809          * convert the old measurement to a density (tuples per page), then
 810          * estimate the number of tuples in the unscanned pages using that figure,
 811          * and finally add on the number of tuples in the scanned pages.
 812          */
 813         old_density = old_rel_tuples / old_rel_pages;
 814         unscanned_pages = (double) total_pages - (double) scanned_pages;
 815         total_tuples = old_density * unscanned_pages + scanned_tuples;
 816         return floor(total_tuples + 0.5);
 817 }
 818
 819
 820 /*
 821  *      vac_update_relstats() -- update statistics for one relation
 822  *
 823  *              Update the whole-relation statistics that are kept in its pg_class
 824  *              row.  There are additional stats that will be updated if we are
 825  *              doing ANALYZE, but we always update these stats.  This routine works
 826  *              for both index and heap relation entries in pg_class.
 827  *
 828  *              We violate transaction semantics here by overwriting the rel's
 829  *              existing pg_class tuple with the new values.  This is reasonably
 830  *              safe as long as we're sure that the new values are correct whether or
 831  *              not this transaction commits.  The reason for doing this is that if
 832  *              we updated these tuples in the usual way, vacuuming pg_class itself
 833  *              wouldn't work very well --- by the time we got done with a vacuum
 834  *              cycle, most of the tuples in pg_class would've been obsoleted.  Of
 835  *              course, this only works for fixed-size not-null columns, but these are.
 836  *
 837  *              Another reason for doing it this way is that when we are in a lazy
 838  *              VACUUM and have PROC_IN_VACUUM set, we mustn't do any regular updates.
 839  *              Somebody vacuuming pg_class might think they could delete a tuple
 840  *              marked with xmin = our xid.
 841  *
 842  *              In addition to fundamentally nontransactional statistics such as
 843  *              relpages and relallvisible, we try to maintain certain lazily-updated
 844  *              DDL flags such as relhasindex, by clearing them if no longer correct.
 845  *              It's safe to do this in VACUUM, which can't run in parallel with
 846  *              CREATE INDEX/RULE/TRIGGER and can't be part of a transaction block.
 847  *              However, it's *not* safe to do it in an ANALYZE that's within an
 848  *              outer transaction, because for example the current transaction might
 849  *              have dropped the last index; then we'd think relhasindex should be
 850  *              cleared, but if the transaction later rolls back this would be wrong.
 851  *              So we refrain from updating the DDL flags if we're inside an outer
 852  *              transaction.  This is OK since postponing the flag maintenance is
 853  *              always allowable.
 854  *
 855  *              This routine is shared by VACUUM and ANALYZE.
 856  */
 857 void
 858 vac_update_relstats(Relation relation,
 859                                         BlockNumber num_pages, double num_tuples,
 860                                         BlockNumber num_all_visible_pages,
 861                                         bool hasindex, TransactionId frozenxid,
 862                                         MultiXactId minmulti,
 863                                         bool in_outer_xact)
 864 {
 865         Oid                     relid = RelationGetRelid(relation);
 866         Relation        rd;
 867         HeapTuple       ctup;
 868         Form_pg_class pgcform;
 869         bool            dirty;
 870
 871         rd = heap_open(RelationRelationId, RowExclusiveLock);
 872
 873         /* Fetch a copy of the tuple to scribble on */
 874         ctup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
 875         if (!HeapTupleIsValid(ctup))
 876                 elog(ERROR, "pg_class entry for relid %u vanished during vacuuming",
 877                          relid);
 878         pgcform = (Form_pg_class) GETSTRUCT(ctup);
 879
 880         /* Apply statistical updates, if any, to copied tuple */
 881
 882         dirty = false;
 883         if (pgcform->relpages != (int32) num_pages)
 884         {
 885                 pgcform->relpages = (int32) num_pages;
 886                 dirty = true;
 887         }
 888         if (pgcform->reltuples != (float4) num_tuples)
 889         {
 890                 pgcform->reltuples = (float4) num_tuples;
 891                 dirty = true;
 892         }
 893         if (pgcform->relallvisible != (int32) num_all_visible_pages)
 894         {
 895                 pgcform->relallvisible = (int32) num_all_visible_pages;
 896                 dirty = true;
 897         }
 898
 899         /* Apply DDL updates, but not inside an outer transaction (see above) */
 900
 901         if (!in_outer_xact)
 902         {
 903                 /*
 904                  * If we didn't find any indexes, reset relhasindex.
 905                  */
 906                 if (pgcform->relhasindex && !hasindex)
 907                 {
 908                         pgcform->relhasindex = false;
 909                         dirty = true;
 910                 }
 911
 912                 /* We also clear relhasrules and relhastriggers if needed */
 913                 if (pgcform->relhasrules && relation->rd_rules == NULL)
 914                 {
 915                         pgcform->relhasrules = false;
 916                         dirty = true;
 917                 }
 918                 if (pgcform->relhastriggers && relation->trigdesc == NULL)
 919                 {
 920                         pgcform->relhastriggers = false;
 921                         dirty = true;
 922                 }
 923         }
 924
 925         /*
 926          * Update relfrozenxid, unless caller passed InvalidTransactionId
 927          * indicating it has no new data.
 928          *
 929          * Ordinarily, we don't let relfrozenxid go backwards: if things are
 930          * working correctly, the only way the new frozenxid could be older would
 931          * be if a previous VACUUM was done with a tighter freeze_min_age, in
 932          * which case we don't want to forget the work it already did.  However,
 933          * if the stored relfrozenxid is "in the future", then it must be corrupt
 934          * and it seems best to overwrite it with the cutoff we used this time.
 935          * This should match vac_update_datfrozenxid() concerning what we consider
 936          * to be "in the future".
 937          */
 938         if (TransactionIdIsNormal(frozenxid) &&
 939                 pgcform->relfrozenxid != frozenxid &&
 940                 (TransactionIdPrecedes(pgcform->relfrozenxid, frozenxid) ||
 941                  TransactionIdPrecedes(ReadNewTransactionId(),
 942                                                            pgcform->relfrozenxid)))
 943         {
 944                 pgcform->relfrozenxid = frozenxid;
 945                 dirty = true;
 946         }
 947
 948         /* Similarly for relminmxid */
 949         if (MultiXactIdIsValid(minmulti) &&
 950                 pgcform->relminmxid != minmulti &&
 951                 (MultiXactIdPrecedes(pgcform->relminmxid, minmulti) ||
 952                  MultiXactIdPrecedes(ReadNextMultiXactId(), pgcform->relminmxid)))
 953         {
 954                 pgcform->relminmxid = minmulti;
 955                 dirty = true;
 956         }
 957
 958         /* If anything changed, write out the tuple. */
 959         if (dirty)
 960                 heap_inplace_update(rd, ctup);
 961
 962         heap_close(rd, RowExclusiveLock);
 963 }
 964
 965
 966 /*
 967  *      vac_update_datfrozenxid() -- update pg_database.datfrozenxid for our DB
 968  *
 969  *              Update pg_database's datfrozenxid entry for our database to be the
 970  *              minimum of the pg_class.relfrozenxid values.
 971  *
 972  *              Similarly, update our datminmxid to be the minimum of the
 973  *              pg_class.relminmxid values.
 974  *
 975  *              If we are able to advance either pg_database value, also try to
 976  *              truncate pg_xact and pg_multixact.
 977  *
 978  *              We violate transaction semantics here by overwriting the database's
 979  *              existing pg_database tuple with the new values.  This is reasonably
 980  *              safe since the new values are correct whether or not this transaction
 981  *              commits.  As with vac_update_relstats, this avoids leaving dead tuples
 982  *              behind after a VACUUM.
 983  */
 984 void
 985 vac_update_datfrozenxid(void)
 986 {
 987         HeapTuple       tuple;
 988         Form_pg_database dbform;
 989         Relation        relation;
 990         SysScanDesc scan;
 991         HeapTuple       classTup;
 992         TransactionId newFrozenXid;
 993         MultiXactId newMinMulti;
 994         TransactionId lastSaneFrozenXid;
 995         MultiXactId lastSaneMinMulti;
 996         bool            bogus = false;
 997         bool            dirty = false;
 998
 999         /*
1000          * Initialize the "min" calculation with GetOldestXmin, which is a
1001          * reasonable approximation to the minimum relfrozenxid for not-yet-
1002          * committed pg_class entries for new tables; see AddNewRelationTuple().
1003          * So we cannot produce a wrong minimum by starting with this.
1004          */
1005         newFrozenXid = GetOldestXmin(NULL, PROCARRAY_FLAGS_VACUUM);
1006
1007         /*
1008          * Similarly, initialize the MultiXact "min" with the value that would be
1009          * used on pg_class for new tables.  See AddNewRelationTuple().
1010          */
1011         newMinMulti = GetOldestMultiXactId();
1012
1013         /*
1014          * Identify the latest relfrozenxid and relminmxid values that we could
1015          * validly see during the scan.  These are conservative values, but it's
1016          * not really worth trying to be more exact.
1017          */
1018         lastSaneFrozenXid = ReadNewTransactionId();
1019         lastSaneMinMulti = ReadNextMultiXactId();
1020
1021         /*
1022          * We must seqscan pg_class to find the minimum Xid, because there is no
1023          * index that can help us here.
1024          */
1025         relation = heap_open(RelationRelationId, AccessShareLock);
1026
1027         scan = systable_beginscan(relation, InvalidOid, false,
1028                                                           NULL, 0, NULL);
1029
1030         while ((classTup = systable_getnext(scan)) != NULL)
1031         {
1032                 Form_pg_class classForm = (Form_pg_class) GETSTRUCT(classTup);
1033
1034                 /*
1035                  * Only consider relations able to hold unfrozen XIDs (anything else
1036                  * should have InvalidTransactionId in relfrozenxid anyway.)
1037                  */
1038                 if (classForm->relkind != RELKIND_RELATION &&
1039                         classForm->relkind != RELKIND_MATVIEW &&
1040                         classForm->relkind != RELKIND_TOASTVALUE)
1041                         continue;
1042
1043                 Assert(TransactionIdIsNormal(classForm->relfrozenxid));
1044                 Assert(MultiXactIdIsValid(classForm->relminmxid));
1045
1046                 /*
1047                  * If things are working properly, no relation should have a
1048                  * relfrozenxid or relminmxid that is "in the future".  However, such
1049                  * cases have been known to arise due to bugs in pg_upgrade.  If we
1050                  * see any entries that are "in the future", chicken out and don't do
1051                  * anything.  This ensures we won't truncate clog before those
1052                  * relations have been scanned and cleaned up.
1053                  */
1054                 if (TransactionIdPrecedes(lastSaneFrozenXid, classForm->relfrozenxid) ||
1055                         MultiXactIdPrecedes(lastSaneMinMulti, classForm->relminmxid))
1056                 {
1057                         bogus = true;
1058                         break;
1059                 }
1060
1061                 if (TransactionIdPrecedes(classForm->relfrozenxid, newFrozenXid))
1062                         newFrozenXid = classForm->relfrozenxid;
1063
1064                 if (MultiXactIdPrecedes(classForm->relminmxid, newMinMulti))
1065                         newMinMulti = classForm->relminmxid;
1066         }
1067
1068         /* we're done with pg_class */
1069         systable_endscan(scan);
1070         heap_close(relation, AccessShareLock);
1071
1072         /* chicken out if bogus data found */
1073         if (bogus)
1074                 return;
1075
1076         Assert(TransactionIdIsNormal(newFrozenXid));
1077         Assert(MultiXactIdIsValid(newMinMulti));
1078
1079         /* Now fetch the pg_database tuple we need to update. */
1080         relation = heap_open(DatabaseRelationId, RowExclusiveLock);
1081
1082         /* Fetch a copy of the tuple to scribble on */
1083         tuple = SearchSysCacheCopy1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
1084         if (!HeapTupleIsValid(tuple))
1085                 elog(ERROR, "could not find tuple for database %u", MyDatabaseId);
1086         dbform = (Form_pg_database) GETSTRUCT(tuple);
1087
1088         /*
1089          * As in vac_update_relstats(), we ordinarily don't want to let
1090          * datfrozenxid go backward; but if it's "in the future" then it must be
1091          * corrupt and it seems best to overwrite it.
1092          */
1093         if (dbform->datfrozenxid != newFrozenXid &&
1094                 (TransactionIdPrecedes(dbform->datfrozenxid, newFrozenXid) ||
1095                  TransactionIdPrecedes(lastSaneFrozenXid, dbform->datfrozenxid)))
1096         {
1097                 dbform->datfrozenxid = newFrozenXid;
1098                 dirty = true;
1099         }
1100         else
1101                 newFrozenXid = dbform->datfrozenxid;
1102
1103         /* Ditto for datminmxid */
1104         if (dbform->datminmxid != newMinMulti &&
1105                 (MultiXactIdPrecedes(dbform->datminmxid, newMinMulti) ||
1106                  MultiXactIdPrecedes(lastSaneMinMulti, dbform->datminmxid)))
1107         {
1108                 dbform->datminmxid = newMinMulti;
1109                 dirty = true;
1110         }
1111         else
1112                 newMinMulti = dbform->datminmxid;
1113
1114         if (dirty)
1115                 heap_inplace_update(relation, tuple);
1116
1117         heap_freetuple(tuple);
1118         heap_close(relation, RowExclusiveLock);
1119
1120         /*
1121          * If we were able to advance datfrozenxid or datminmxid, see if we can
1122          * truncate pg_xact and/or pg_multixact.  Also do it if the shared
1123          * XID-wrap-limit info is stale, since this action will update that too.
1124          */
1125         if (dirty || ForceTransactionIdLimitUpdate())
1126                 vac_truncate_clog(newFrozenXid, newMinMulti,
1127                                                   lastSaneFrozenXid, lastSaneMinMulti);
1128 }
1129
1130
1131 /*
1132  *      vac_truncate_clog() -- attempt to truncate the commit log
1133  *
1134  *              Scan pg_database to determine the system-wide oldest datfrozenxid,
1135  *              and use it to truncate the transaction commit log (pg_xact).
1136  *              Also update the XID wrap limit info maintained by varsup.c.
1137  *              Likewise for datminmxid.
1138  *
1139  *              The passed frozenXID and minMulti are the updated values for my own
1140  *              pg_database entry. They're used to initialize the "min" calculations.
1141  *              The caller also passes the "last sane" XID and MXID, since it has
1142  *              those at hand already.
1143  *
1144  *              This routine is only invoked when we've managed to change our
1145  *              DB's datfrozenxid/datminmxid values, or we found that the shared
1146  *              XID-wrap-limit info is stale.
1147  */
1148 static void
1149 vac_truncate_clog(TransactionId frozenXID,
1150                                   MultiXactId minMulti,
1151                                   TransactionId lastSaneFrozenXid,
1152                                   MultiXactId lastSaneMinMulti)
1153 {
1154         TransactionId nextXID = ReadNewTransactionId();
1155         Relation        relation;
1156         HeapScanDesc scan;
1157         HeapTuple       tuple;
1158         Oid                     oldestxid_datoid;
1159         Oid                     minmulti_datoid;
1160         bool            bogus = false;
1161         bool            frozenAlreadyWrapped = false;
1162
1163         /* init oldest datoids to sync with my frozenXID/minMulti values */
1164         oldestxid_datoid = MyDatabaseId;
1165         minmulti_datoid = MyDatabaseId;
1166
1167         /*
1168          * Scan pg_database to compute the minimum datfrozenxid/datminmxid
1169          *
1170          * Since vac_update_datfrozenxid updates datfrozenxid/datminmxid in-place,
1171          * the values could change while we look at them.  Fetch each one just
1172          * once to ensure sane behavior of the comparison logic.  (Here, as in
1173          * many other places, we assume that fetching or updating an XID in shared
1174          * storage is atomic.)
1175          *
1176          * Note: we need not worry about a race condition with new entries being
1177          * inserted by CREATE DATABASE.  Any such entry will have a copy of some
1178          * existing DB's datfrozenxid, and that source DB cannot be ours because
1179          * of the interlock against copying a DB containing an active backend.
1180          * Hence the new entry will not reduce the minimum.  Also, if two VACUUMs
1181          * concurrently modify the datfrozenxid's of different databases, the
1182          * worst possible outcome is that pg_xact is not truncated as aggressively
1183          * as it could be.
1184          */
1185         relation = heap_open(DatabaseRelationId, AccessShareLock);
1186
1187         scan = heap_beginscan_catalog(relation, 0, NULL);
1188
1189         while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1190         {
1191                 volatile FormData_pg_database *dbform = (Form_pg_database) GETSTRUCT(tuple);
1192                 TransactionId datfrozenxid = dbform->datfrozenxid;
1193                 TransactionId datminmxid = dbform->datminmxid;
1194
1195                 Assert(TransactionIdIsNormal(datfrozenxid));
1196                 Assert(MultiXactIdIsValid(datminmxid));
1197
1198                 /*
1199                  * If things are working properly, no database should have a
1200                  * datfrozenxid or datminmxid that is "in the future".  However, such
1201                  * cases have been known to arise due to bugs in pg_upgrade.  If we
1202                  * see any entries that are "in the future", chicken out and don't do
1203                  * anything.  This ensures we won't truncate clog before those
1204                  * databases have been scanned and cleaned up.  (We will issue the
1205                  * "already wrapped" warning if appropriate, though.)
1206                  */
1207                 if (TransactionIdPrecedes(lastSaneFrozenXid, datfrozenxid) ||
1208                         MultiXactIdPrecedes(lastSaneMinMulti, datminmxid))
1209                         bogus = true;
1210
1211                 if (TransactionIdPrecedes(nextXID, datfrozenxid))
1212                         frozenAlreadyWrapped = true;
1213                 else if (TransactionIdPrecedes(datfrozenxid, frozenXID))
1214                 {
1215                         frozenXID = datfrozenxid;
1216                         oldestxid_datoid = HeapTupleGetOid(tuple);
1217                 }
1218
1219                 if (MultiXactIdPrecedes(datminmxid, minMulti))
1220                 {
1221                         minMulti = datminmxid;
1222                         minmulti_datoid = HeapTupleGetOid(tuple);
1223                 }
1224         }
1225
1226         heap_endscan(scan);
1227
1228         heap_close(relation, AccessShareLock);
1229
1230         /*
1231          * Do not truncate CLOG if we seem to have suffered wraparound already;
1232          * the computed minimum XID might be bogus.  This case should now be
1233          * impossible due to the defenses in GetNewTransactionId, but we keep the
1234          * test anyway.
1235          */
1236         if (frozenAlreadyWrapped)
1237         {
1238                 ereport(WARNING,
1239                                 (errmsg("some databases have not been vacuumed in over 2 billion transactions"),
1240                                  errdetail("You might have already suffered transaction-wraparound data loss.")));
1241                 return;
1242         }
1243
1244         /* chicken out if data is bogus in any other way */
1245         if (bogus)
1246                 return;
1247
1248         /*
1249          * Advance the oldest value for commit timestamps before truncating, so
1250          * that if a user requests a timestamp for a transaction we're truncating
1251          * away right after this point, they get NULL instead of an ugly "file not
1252          * found" error from slru.c.  This doesn't matter for xact/multixact
1253          * because they are not subject to arbitrary lookups from users.
1254          */
1255         AdvanceOldestCommitTsXid(frozenXID);
1256
1257         /*
1258          * Truncate CLOG, multixact and CommitTs to the oldest computed value.
1259          */
1260         TruncateCLOG(frozenXID, oldestxid_datoid);
1261         TruncateCommitTs(frozenXID);
1262         TruncateMultiXact(minMulti, minmulti_datoid);
1263
1264         /*
1265          * Update the wrap limit for GetNewTransactionId and creation of new
1266          * MultiXactIds.  Note: these functions will also signal the postmaster
1267          * for an(other) autovac cycle if needed.   XXX should we avoid possibly
1268          * signalling twice?
1269          */
1270         SetTransactionIdLimit(frozenXID, oldestxid_datoid);
1271         SetMultiXactIdLimit(minMulti, minmulti_datoid, false);
1272 }
1273
1274
1275 /*
1276  *      vacuum_rel() -- vacuum one heap relation
1277  *
1278  *              relid identifies the relation to vacuum.  If relation is supplied,
1279  *              use the name therein for reporting any failure to open/lock the rel;
1280  *              do not use it once we've successfully opened the rel, since it might
1281  *              be stale.
1282  *
1283  *              Returns true if it's okay to proceed with a requested ANALYZE
1284  *              operation on this table.
1285  *
1286  *              Doing one heap at a time incurs extra overhead, since we need to
1287  *              check that the heap exists again just before we vacuum it.  The
1288  *              reason that we do this is so that vacuuming can be spread across
1289  *              many small transactions.  Otherwise, two-phase locking would require
1290  *              us to lock the entire database during one pass of the vacuum cleaner.
1291  *
1292  *              At entry and exit, we are not inside a transaction.
1293  */
1294 static bool
1295 vacuum_rel(Oid relid, RangeVar *relation, int options, VacuumParams *params)
1296 {
1297         LOCKMODE        lmode;
1298         Relation        onerel;
1299         LockRelId       onerelid;
1300         Oid                     toast_relid;
1301         Oid                     save_userid;
1302         int                     save_sec_context;
1303         int                     save_nestlevel;
1304         bool            rel_lock = true;
1305
1306         Assert(params != NULL);
1307
1308         /* Begin a transaction for vacuuming this relation */
1309         StartTransactionCommand();
1310
1311         /*
1312          * Functions in indexes may want a snapshot set.  Also, setting a snapshot
1313          * ensures that RecentGlobalXmin is kept truly recent.
1314          */
1315         PushActiveSnapshot(GetTransactionSnapshot());
1316
1317         if (!(options & VACOPT_FULL))
1318         {
1319                 /*
1320                  * In lazy vacuum, we can set the PROC_IN_VACUUM flag, which lets
1321                  * other concurrent VACUUMs know that they can ignore this one while
1322                  * determining their OldestXmin.  (The reason we don't set it during a
1323                  * full VACUUM is exactly that we may have to run user-defined
1324                  * functions for functional indexes, and we want to make sure that if
1325                  * they use the snapshot set above, any tuples it requires can't get
1326                  * removed from other tables.  An index function that depends on the
1327                  * contents of other tables is arguably broken, but we won't break it
1328                  * here by violating transaction semantics.)
1329                  *
1330                  * We also set the VACUUM_FOR_WRAPAROUND flag, which is passed down by
1331                  * autovacuum; it's used to avoid canceling a vacuum that was invoked
1332                  * in an emergency.
1333                  *
1334                  * Note: these flags remain set until CommitTransaction or
1335                  * AbortTransaction.  We don't want to clear them until we reset
1336                  * MyPgXact->xid/xmin, else OldestXmin might appear to go backwards,
1337                  * which is probably Not Good.
1338                  */
1339                 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
1340                 MyPgXact->vacuumFlags |= PROC_IN_VACUUM;
1341                 if (params->is_wraparound)
1342                         MyPgXact->vacuumFlags |= PROC_VACUUM_FOR_WRAPAROUND;
1343                 LWLockRelease(ProcArrayLock);
1344         }
1345
1346         /*
1347          * Check for user-requested abort.  Note we want this to be inside a
1348          * transaction, so xact.c doesn't issue useless WARNING.
1349          */
1350         CHECK_FOR_INTERRUPTS();
1351
1352         /*
1353          * Determine the type of lock we want --- hard exclusive lock for a FULL
1354          * vacuum, but just ShareUpdateExclusiveLock for concurrent vacuum. Either
1355          * way, we can be sure that no other backend is vacuuming the same table.
1356          */
1357         lmode = (options & VACOPT_FULL) ? AccessExclusiveLock : ShareUpdateExclusiveLock;
1358
1359         /*
1360          * Open the relation and get the appropriate lock on it.
1361          *
1362          * There's a race condition here: the rel may have gone away since the
1363          * last time we saw it.  If so, we don't need to vacuum it.
1364          *
1365          * If we've been asked not to wait for the relation lock, acquire it first
1366          * in non-blocking mode, before calling try_relation_open().
1367          */
1368         if (!(options & VACOPT_NOWAIT))
1369                 onerel = try_relation_open(relid, lmode);
1370         else if (ConditionalLockRelationOid(relid, lmode))
1371                 onerel = try_relation_open(relid, NoLock);
1372         else
1373         {
1374                 onerel = NULL;
1375                 rel_lock = false;
1376         }
1377
1378         /*
1379          * If we failed to open or lock the relation, emit a log message before
1380          * exiting.
1381          */
1382         if (!onerel)
1383         {
1384                 int                     elevel = 0;
1385
1386                 /*
1387                  * Determine the log level.
1388                  *
1389                  * If the RangeVar is not defined, we do not have enough information
1390                  * to provide a meaningful log statement.  Chances are that
1391                  * vacuum_rel's caller has intentionally not provided this information
1392                  * so that this logging is skipped, anyway.
1393                  *
1394                  * Otherwise, for autovacuum logs, we emit a LOG if
1395                  * log_autovacuum_min_duration is not disabled.  For manual VACUUM, we
1396                  * emit a WARNING to match the log statements in the permission
1397                  * checks.
1398                  */
1399                 if (relation != NULL)
1400                 {
1401                         if (!IsAutoVacuumWorkerProcess())
1402                                 elevel = WARNING;
1403                         else if (params->log_min_duration >= 0)
1404                                 elevel = LOG;
1405                 }
1406
1407                 if (elevel != 0)
1408                 {
1409                         if (!rel_lock)
1410                                 ereport(elevel,
1411                                                 (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
1412                                                  errmsg("skipping vacuum of \"%s\" --- lock not available",
1413                                                                 relation->relname)));
1414                         else
1415                                 ereport(elevel,
1416                                                 (errcode(ERRCODE_UNDEFINED_TABLE),
1417                                                  errmsg("skipping vacuum of \"%s\" --- relation no longer exists",
1418                                                                 relation->relname)));
1419                 }
1420
1421                 PopActiveSnapshot();
1422                 CommitTransactionCommand();
1423                 return false;
1424         }
1425
1426         /*
1427          * Check permissions.
1428          *
1429          * We allow the user to vacuum a table if he is superuser, the table
1430          * owner, or the database owner (but in the latter case, only if it's not
1431          * a shared relation).  pg_class_ownercheck includes the superuser case.
1432          *
1433          * Note we choose to treat permissions failure as a WARNING and keep
1434          * trying to vacuum the rest of the DB --- is this appropriate?
1435          */
1436         if (!(pg_class_ownercheck(RelationGetRelid(onerel), GetUserId()) ||
1437                   (pg_database_ownercheck(MyDatabaseId, GetUserId()) && !onerel->rd_rel->relisshared)))
1438         {
1439                 if (onerel->rd_rel->relisshared)
1440                         ereport(WARNING,
1441                                         (errmsg("skipping \"%s\" --- only superuser can vacuum it",
1442                                                         RelationGetRelationName(onerel))));
1443                 else if (onerel->rd_rel->relnamespace == PG_CATALOG_NAMESPACE)
1444                         ereport(WARNING,
1445                                         (errmsg("skipping \"%s\" --- only superuser or database owner can vacuum it",
1446                                                         RelationGetRelationName(onerel))));
1447                 else
1448                         ereport(WARNING,
1449                                         (errmsg("skipping \"%s\" --- only table or database owner can vacuum it",
1450                                                         RelationGetRelationName(onerel))));
1451                 relation_close(onerel, lmode);
1452                 PopActiveSnapshot();
1453                 CommitTransactionCommand();
1454                 return false;
1455         }
1456
1457         /*
1458          * Check that it's of a vacuumable relkind.
1459          */
1460         if (onerel->rd_rel->relkind != RELKIND_RELATION &&
1461                 onerel->rd_rel->relkind != RELKIND_MATVIEW &&
1462                 onerel->rd_rel->relkind != RELKIND_TOASTVALUE &&
1463                 onerel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
1464         {
1465                 ereport(WARNING,
1466                                 (errmsg("skipping \"%s\" --- cannot vacuum non-tables or special system tables",
1467                                                 RelationGetRelationName(onerel))));
1468                 relation_close(onerel, lmode);
1469                 PopActiveSnapshot();
1470                 CommitTransactionCommand();
1471                 return false;
1472         }
1473
1474         /*
1475          * Silently ignore tables that are temp tables of other backends ---
1476          * trying to vacuum these will lead to great unhappiness, since their
1477          * contents are probably not up-to-date on disk.  (We don't throw a
1478          * warning here; it would just lead to chatter during a database-wide
1479          * VACUUM.)
1480          */
1481         if (RELATION_IS_OTHER_TEMP(onerel))
1482         {
1483                 relation_close(onerel, lmode);
1484                 PopActiveSnapshot();
1485                 CommitTransactionCommand();
1486                 return false;
1487         }
1488
1489         /*
1490          * Silently ignore partitioned tables as there is no work to be done.  The
1491          * useful work is on their child partitions, which have been queued up for
1492          * us separately.
1493          */
1494         if (onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
1495         {
1496                 relation_close(onerel, lmode);
1497                 PopActiveSnapshot();
1498                 CommitTransactionCommand();
1499                 /* It's OK to proceed with ANALYZE on this table */
1500                 return true;
1501         }
1502
1503         /*
1504          * Get a session-level lock too. This will protect our access to the
1505          * relation across multiple transactions, so that we can vacuum the
1506          * relation's TOAST table (if any) secure in the knowledge that no one is
1507          * deleting the parent relation.
1508          *
1509          * NOTE: this cannot block, even if someone else is waiting for access,
1510          * because the lock manager knows that both lock requests are from the
1511          * same process.
1512          */
1513         onerelid = onerel->rd_lockInfo.lockRelId;
1514         LockRelationIdForSession(&onerelid, lmode);
1515
1516         /*
1517          * Remember the relation's TOAST relation for later, if the caller asked
1518          * us to process it.  In VACUUM FULL, though, the toast table is
1519          * automatically rebuilt by cluster_rel so we shouldn't recurse to it.
1520          */
1521         if (!(options & VACOPT_SKIPTOAST) && !(options & VACOPT_FULL))
1522                 toast_relid = onerel->rd_rel->reltoastrelid;
1523         else
1524                 toast_relid = InvalidOid;
1525
1526         /*
1527          * Switch to the table owner's userid, so that any index functions are run
1528          * as that user.  Also lock down security-restricted operations and
1529          * arrange to make GUC variable changes local to this command. (This is
1530          * unnecessary, but harmless, for lazy VACUUM.)
1531          */
1532         GetUserIdAndSecContext(&save_userid, &save_sec_context);
1533         SetUserIdAndSecContext(onerel->rd_rel->relowner,
1534                                                    save_sec_context | SECURITY_RESTRICTED_OPERATION);
1535         save_nestlevel = NewGUCNestLevel();
1536
1537         /*
1538          * Do the actual work --- either FULL or "lazy" vacuum
1539          */
1540         if (options & VACOPT_FULL)
1541         {
1542                 /* close relation before vacuuming, but hold lock until commit */
1543                 relation_close(onerel, NoLock);
1544                 onerel = NULL;
1545
1546                 /* VACUUM FULL is now a variant of CLUSTER; see cluster.c */
1547                 cluster_rel(relid, InvalidOid, false,
1548                                         (options & VACOPT_VERBOSE) != 0);
1549         }
1550         else
1551                 lazy_vacuum_rel(onerel, options, params, vac_strategy);
1552
1553         /* Roll back any GUC changes executed by index functions */
1554         AtEOXact_GUC(false, save_nestlevel);
1555
1556         /* Restore userid and security context */
1557         SetUserIdAndSecContext(save_userid, save_sec_context);
1558
1559         /* all done with this class, but hold lock until commit */
1560         if (onerel)
1561                 relation_close(onerel, NoLock);
1562
1563         /*
1564          * Complete the transaction and free all temporary memory used.
1565          */
1566         PopActiveSnapshot();
1567         CommitTransactionCommand();
1568
1569         /*
1570          * If the relation has a secondary toast rel, vacuum that too while we
1571          * still hold the session lock on the master table.  Note however that
1572          * "analyze" will not get done on the toast table.  This is good, because
1573          * the toaster always uses hardcoded index access and statistics are
1574          * totally unimportant for toast relations.
1575          */
1576         if (toast_relid != InvalidOid)
1577                 vacuum_rel(toast_relid, NULL, options, params);
1578
1579         /*
1580          * Now release the session-level lock on the master table.
1581          */
1582         UnlockRelationIdForSession(&onerelid, lmode);
1583
1584         /* Report that we really did it. */
1585         return true;
1586 }
1587
1588
1589 /*
1590  * Open all the vacuumable indexes of the given relation, obtaining the
1591  * specified kind of lock on each.  Return an array of Relation pointers for
1592  * the indexes into *Irel, and the number of indexes into *nindexes.
1593  *
1594  * We consider an index vacuumable if it is marked insertable (IndexIsReady).
1595  * If it isn't, probably a CREATE INDEX CONCURRENTLY command failed early in
1596  * execution, and what we have is too corrupt to be processable.  We will
1597  * vacuum even if the index isn't indisvalid; this is important because in a
1598  * unique index, uniqueness checks will be performed anyway and had better not
1599  * hit dangling index pointers.
1600  */
1601 void
1602 vac_open_indexes(Relation relation, LOCKMODE lockmode,
1603                                  int *nindexes, Relation **Irel)
1604 {
1605         List       *indexoidlist;
1606         ListCell   *indexoidscan;
1607         int                     i;
1608
1609         Assert(lockmode != NoLock);
1610
1611         indexoidlist = RelationGetIndexList(relation);
1612
1613         /* allocate enough memory for all indexes */
1614         i = list_length(indexoidlist);
1615
1616         if (i > 0)
1617                 *Irel = (Relation *) palloc(i * sizeof(Relation));
1618         else
1619                 *Irel = NULL;
1620
1621         /* collect just the ready indexes */
1622         i = 0;
1623         foreach(indexoidscan, indexoidlist)
1624         {
1625                 Oid                     indexoid = lfirst_oid(indexoidscan);
1626                 Relation        indrel;
1627
1628                 indrel = index_open(indexoid, lockmode);
1629                 if (IndexIsReady(indrel->rd_index))
1630                         (*Irel)[i++] = indrel;
1631                 else
1632                         index_close(indrel, lockmode);
1633         }
1634
1635         *nindexes = i;
1636
1637         list_free(indexoidlist);
1638 }
1639
1640 /*
1641  * Release the resources acquired by vac_open_indexes.  Optionally release
1642  * the locks (say NoLock to keep 'em).
1643  */
1644 void
1645 vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
1646 {
1647         if (Irel == NULL)
1648                 return;
1649
1650         while (nindexes--)
1651         {
1652                 Relation        ind = Irel[nindexes];
1653
1654                 index_close(ind, lockmode);
1655         }
1656         pfree(Irel);
1657 }
1658
1659 /*
1660  * vacuum_delay_point --- check for interrupts and cost-based delay.
1661  *
1662  * This should be called in each major loop of VACUUM processing,
1663  * typically once per page processed.
1664  */
1665 void
1666 vacuum_delay_point(void)
1667 {
1668         /* Always check for interrupts */
1669         CHECK_FOR_INTERRUPTS();
1670
1671         /* Nap if appropriate */
1672         if (VacuumCostActive && !InterruptPending &&
1673                 VacuumCostBalance >= VacuumCostLimit)
1674         {
1675                 int                     msec;
1676
1677                 msec = VacuumCostDelay * VacuumCostBalance / VacuumCostLimit;
1678                 if (msec > VacuumCostDelay * 4)
1679                         msec = VacuumCostDelay * 4;
1680
1681                 pg_usleep(msec * 1000L);
1682
1683                 VacuumCostBalance = 0;
1684
1685                 /* update balance values for workers */
1686                 AutoVacuumUpdateDelay();
1687
1688                 /* Might have gotten an interrupt while sleeping */
1689                 CHECK_FOR_INTERRUPTS();
1690         }
1691 }