granicus.if.org Git - postgresql/blob - src/backend/commands/vacuum.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * vacuum.c
   4  *        The postgres vacuum cleaner.
   5  *
   6  * This file now includes only control and dispatch code for VACUUM and
   7  * ANALYZE commands.  Regular VACUUM is implemented in vacuumlazy.c,
   8  * ANALYZE in analyze.c, and VACUUM FULL is a variant of CLUSTER, handled
   9  * in cluster.c.
  10  *
  11  *
  12  * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
  13  * Portions Copyright (c) 1994, Regents of the University of California
  14  *
  15  *
  16  * IDENTIFICATION
  17  *        src/backend/commands/vacuum.c
  18  *
  19  *-------------------------------------------------------------------------
  20  */
  21 #include "postgres.h"
  22
  23 #include <math.h>
  24
  25 #include "access/clog.h"
  26 #include "access/commit_ts.h"
  27 #include "access/genam.h"
  28 #include "access/heapam.h"
  29 #include "access/htup_details.h"
  30 #include "access/multixact.h"
  31 #include "access/transam.h"
  32 #include "access/xact.h"
  33 #include "catalog/namespace.h"
  34 #include "catalog/pg_database.h"
  35 #include "catalog/pg_namespace.h"
  36 #include "commands/cluster.h"
  37 #include "commands/vacuum.h"
  38 #include "miscadmin.h"
  39 #include "pgstat.h"
  40 #include "postmaster/autovacuum.h"
  41 #include "storage/bufmgr.h"
  42 #include "storage/lmgr.h"
  43 #include "storage/proc.h"
  44 #include "storage/procarray.h"
  45 #include "utils/acl.h"
  46 #include "utils/fmgroids.h"
  47 #include "utils/guc.h"
  48 #include "utils/memutils.h"
  49 #include "utils/snapmgr.h"
  50 #include "utils/syscache.h"
  51 #include "utils/tqual.h"
  52
  53
  54 /*
  55  * GUC parameters
  56  */
  57 int                     vacuum_freeze_min_age;
  58 int                     vacuum_freeze_table_age;
  59 int                     vacuum_multixact_freeze_min_age;
  60 int                     vacuum_multixact_freeze_table_age;
  61
  62
  63 /* A few variables that don't seem worth passing around as parameters */
  64 static MemoryContext vac_context = NULL;
  65 static BufferAccessStrategy vac_strategy;
  66
  67
  68 /* non-export function prototypes */
  69 static List *get_rel_oids(Oid relid, const RangeVar *vacrel);
  70 static void vac_truncate_clog(TransactionId frozenXID,
  71                                   MultiXactId minMulti,
  72                                   TransactionId lastSaneFrozenXid,
  73                                   MultiXactId lastSaneMinMulti);
  74 static bool vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast,
  75                    bool for_wraparound);
  76
  77
  78 /*
  79  * Primary entry point for VACUUM and ANALYZE commands.
  80  *
  81  * relid is normally InvalidOid; if it is not, then it provides the relation
  82  * OID to be processed, and vacstmt->relation is ignored.  (The non-invalid
  83  * case is currently only used by autovacuum.)
  84  *
  85  * do_toast is passed as FALSE by autovacuum, because it processes TOAST
  86  * tables separately.
  87  *
  88  * for_wraparound is used by autovacuum to let us know when it's forcing
  89  * a vacuum for wraparound, which should not be auto-canceled.
  90  *
  91  * bstrategy is normally given as NULL, but in autovacuum it can be passed
  92  * in to use the same buffer strategy object across multiple vacuum() calls.
  93  *
  94  * isTopLevel should be passed down from ProcessUtility.
  95  *
  96  * It is the caller's responsibility that vacstmt and bstrategy
  97  * (if given) be allocated in a memory context that won't disappear
  98  * at transaction commit.
  99  */
 100 void
 101 vacuum(VacuumStmt *vacstmt, Oid relid, bool do_toast,
 102            BufferAccessStrategy bstrategy, bool for_wraparound, bool isTopLevel)
 103 {
 104         const char *stmttype;
 105         volatile bool in_outer_xact,
 106                                 use_own_xacts;
 107         List       *relations;
 108
 109         /* sanity checks on options */
 110         Assert(vacstmt->options & (VACOPT_VACUUM | VACOPT_ANALYZE));
 111         Assert((vacstmt->options & VACOPT_VACUUM) ||
 112                    !(vacstmt->options & (VACOPT_FULL | VACOPT_FREEZE)));
 113         Assert((vacstmt->options & VACOPT_ANALYZE) || vacstmt->va_cols == NIL);
 114
 115         stmttype = (vacstmt->options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE";
 116
 117         /*
 118          * We cannot run VACUUM inside a user transaction block; if we were inside
 119          * a transaction, then our commit- and start-transaction-command calls
 120          * would not have the intended effect!  There are numerous other subtle
 121          * dependencies on this, too.
 122          *
 123          * ANALYZE (without VACUUM) can run either way.
 124          */
 125         if (vacstmt->options & VACOPT_VACUUM)
 126         {
 127                 PreventTransactionChain(isTopLevel, stmttype);
 128                 in_outer_xact = false;
 129         }
 130         else
 131                 in_outer_xact = IsInTransactionChain(isTopLevel);
 132
 133         /*
 134          * Send info about dead objects to the statistics collector, unless we are
 135          * in autovacuum --- autovacuum.c does this for itself.
 136          */
 137         if ((vacstmt->options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess())
 138                 pgstat_vacuum_stat();
 139
 140         /*
 141          * Create special memory context for cross-transaction storage.
 142          *
 143          * Since it is a child of PortalContext, it will go away eventually even
 144          * if we suffer an error; there's no need for special abort cleanup logic.
 145          */
 146         vac_context = AllocSetContextCreate(PortalContext,
 147                                                                                 "Vacuum",
 148                                                                                 ALLOCSET_DEFAULT_MINSIZE,
 149                                                                                 ALLOCSET_DEFAULT_INITSIZE,
 150                                                                                 ALLOCSET_DEFAULT_MAXSIZE);
 151
 152         /*
 153          * If caller didn't give us a buffer strategy object, make one in the
 154          * cross-transaction memory context.
 155          */
 156         if (bstrategy == NULL)
 157         {
 158                 MemoryContext old_context = MemoryContextSwitchTo(vac_context);
 159
 160                 bstrategy = GetAccessStrategy(BAS_VACUUM);
 161                 MemoryContextSwitchTo(old_context);
 162         }
 163         vac_strategy = bstrategy;
 164
 165         /*
 166          * Build list of relations to process, unless caller gave us one. (If we
 167          * build one, we put it in vac_context for safekeeping.)
 168          */
 169         relations = get_rel_oids(relid, vacstmt->relation);
 170
 171         /*
 172          * Decide whether we need to start/commit our own transactions.
 173          *
 174          * For VACUUM (with or without ANALYZE): always do so, so that we can
 175          * release locks as soon as possible.  (We could possibly use the outer
 176          * transaction for a one-table VACUUM, but handling TOAST tables would be
 177          * problematic.)
 178          *
 179          * For ANALYZE (no VACUUM): if inside a transaction block, we cannot
 180          * start/commit our own transactions.  Also, there's no need to do so if
 181          * only processing one relation.  For multiple relations when not within a
 182          * transaction block, and also in an autovacuum worker, use own
 183          * transactions so we can release locks sooner.
 184          */
 185         if (vacstmt->options & VACOPT_VACUUM)
 186                 use_own_xacts = true;
 187         else
 188         {
 189                 Assert(vacstmt->options & VACOPT_ANALYZE);
 190                 if (IsAutoVacuumWorkerProcess())
 191                         use_own_xacts = true;
 192                 else if (in_outer_xact)
 193                         use_own_xacts = false;
 194                 else if (list_length(relations) > 1)
 195                         use_own_xacts = true;
 196                 else
 197                         use_own_xacts = false;
 198         }
 199
 200         /*
 201          * vacuum_rel expects to be entered with no transaction active; it will
 202          * start and commit its own transaction.  But we are called by an SQL
 203          * command, and so we are executing inside a transaction already. We
 204          * commit the transaction started in PostgresMain() here, and start
 205          * another one before exiting to match the commit waiting for us back in
 206          * PostgresMain().
 207          */
 208         if (use_own_xacts)
 209         {
 210                 Assert(!in_outer_xact);
 211
 212                 /* ActiveSnapshot is not set by autovacuum */
 213                 if (ActiveSnapshotSet())
 214                         PopActiveSnapshot();
 215
 216                 /* matches the StartTransaction in PostgresMain() */
 217                 CommitTransactionCommand();
 218         }
 219
 220         /* Turn vacuum cost accounting on or off */
 221         PG_TRY();
 222         {
 223                 ListCell   *cur;
 224
 225                 VacuumCostActive = (VacuumCostDelay > 0);
 226                 VacuumCostBalance = 0;
 227                 VacuumPageHit = 0;
 228                 VacuumPageMiss = 0;
 229                 VacuumPageDirty = 0;
 230
 231                 /*
 232                  * Loop to process each selected relation.
 233                  */
 234                 foreach(cur, relations)
 235                 {
 236                         Oid                     relid = lfirst_oid(cur);
 237
 238                         if (vacstmt->options & VACOPT_VACUUM)
 239                         {
 240                                 if (!vacuum_rel(relid, vacstmt, do_toast, for_wraparound))
 241                                         continue;
 242                         }
 243
 244                         if (vacstmt->options & VACOPT_ANALYZE)
 245                         {
 246                                 /*
 247                                  * If using separate xacts, start one for analyze. Otherwise,
 248                                  * we can use the outer transaction.
 249                                  */
 250                                 if (use_own_xacts)
 251                                 {
 252                                         StartTransactionCommand();
 253                                         /* functions in indexes may want a snapshot set */
 254                                         PushActiveSnapshot(GetTransactionSnapshot());
 255                                 }
 256
 257                                 analyze_rel(relid, vacstmt, in_outer_xact, vac_strategy);
 258
 259                                 if (use_own_xacts)
 260                                 {
 261                                         PopActiveSnapshot();
 262                                         CommitTransactionCommand();
 263                                 }
 264                         }
 265                 }
 266         }
 267         PG_CATCH();
 268         {
 269                 /* Make sure cost accounting is turned off after error */
 270                 VacuumCostActive = false;
 271                 PG_RE_THROW();
 272         }
 273         PG_END_TRY();
 274
 275         /* Turn off vacuum cost accounting */
 276         VacuumCostActive = false;
 277
 278         /*
 279          * Finish up processing.
 280          */
 281         if (use_own_xacts)
 282         {
 283                 /* here, we are not in a transaction */
 284
 285                 /*
 286                  * This matches the CommitTransaction waiting for us in
 287                  * PostgresMain().
 288                  */
 289                 StartTransactionCommand();
 290         }
 291
 292         if ((vacstmt->options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess())
 293         {
 294                 /*
 295                  * Update pg_database.datfrozenxid, and truncate pg_clog if possible.
 296                  * (autovacuum.c does this for itself.)
 297                  */
 298                 vac_update_datfrozenxid();
 299         }
 300
 301         /*
 302          * Clean up working storage --- note we must do this after
 303          * StartTransactionCommand, else we might be trying to delete the active
 304          * context!
 305          */
 306         MemoryContextDelete(vac_context);
 307         vac_context = NULL;
 308 }
 309
 310 /*
 311  * Build a list of Oids for each relation to be processed
 312  *
 313  * The list is built in vac_context so that it will survive across our
 314  * per-relation transactions.
 315  */
 316 static List *
 317 get_rel_oids(Oid relid, const RangeVar *vacrel)
 318 {
 319         List       *oid_list = NIL;
 320         MemoryContext oldcontext;
 321
 322         /* OID supplied by VACUUM's caller? */
 323         if (OidIsValid(relid))
 324         {
 325                 oldcontext = MemoryContextSwitchTo(vac_context);
 326                 oid_list = lappend_oid(oid_list, relid);
 327                 MemoryContextSwitchTo(oldcontext);
 328         }
 329         else if (vacrel)
 330         {
 331                 /* Process a specific relation */
 332                 Oid                     relid;
 333
 334                 /*
 335                  * Since we don't take a lock here, the relation might be gone, or the
 336                  * RangeVar might no longer refer to the OID we look up here.  In the
 337                  * former case, VACUUM will do nothing; in the latter case, it will
 338                  * process the OID we looked up here, rather than the new one. Neither
 339                  * is ideal, but there's little practical alternative, since we're
 340                  * going to commit this transaction and begin a new one between now
 341                  * and then.
 342                  */
 343                 relid = RangeVarGetRelid(vacrel, NoLock, false);
 344
 345                 /* Make a relation list entry for this guy */
 346                 oldcontext = MemoryContextSwitchTo(vac_context);
 347                 oid_list = lappend_oid(oid_list, relid);
 348                 MemoryContextSwitchTo(oldcontext);
 349         }
 350         else
 351         {
 352                 /*
 353                  * Process all plain relations and materialized views listed in
 354                  * pg_class
 355                  */
 356                 Relation        pgclass;
 357                 HeapScanDesc scan;
 358                 HeapTuple       tuple;
 359
 360                 pgclass = heap_open(RelationRelationId, AccessShareLock);
 361
 362                 scan = heap_beginscan_catalog(pgclass, 0, NULL);
 363
 364                 while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
 365                 {
 366                         Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
 367
 368                         if (classForm->relkind != RELKIND_RELATION &&
 369                                 classForm->relkind != RELKIND_MATVIEW)
 370                                 continue;
 371
 372                         /* Make a relation list entry for this guy */
 373                         oldcontext = MemoryContextSwitchTo(vac_context);
 374                         oid_list = lappend_oid(oid_list, HeapTupleGetOid(tuple));
 375                         MemoryContextSwitchTo(oldcontext);
 376                 }
 377
 378                 heap_endscan(scan);
 379                 heap_close(pgclass, AccessShareLock);
 380         }
 381
 382         return oid_list;
 383 }
 384
 385 /*
 386  * vacuum_set_xid_limits() -- compute oldest-Xmin and freeze cutoff points
 387  *
 388  * The output parameters are:
 389  * - oldestXmin is the cutoff value used to distinguish whether tuples are
 390  *       DEAD or RECENTLY_DEAD (see HeapTupleSatisfiesVacuum).
 391  * - freezeLimit is the Xid below which all Xids are replaced by
 392  *       FrozenTransactionId during vacuum.
 393  * - xidFullScanLimit (computed from table_freeze_age parameter)
 394  *       represents a minimum Xid value; a table whose relfrozenxid is older than
 395  *       this will have a full-table vacuum applied to it, to freeze tuples across
 396  *       the whole table.  Vacuuming a table younger than this value can use a
 397  *       partial scan.
 398  * - multiXactCutoff is the value below which all MultiXactIds are removed from
 399  *       Xmax.
 400  * - mxactFullScanLimit is a value against which a table's relminmxid value is
 401  *       compared to produce a full-table vacuum, as with xidFullScanLimit.
 402  *
 403  * xidFullScanLimit and mxactFullScanLimit can be passed as NULL if caller is
 404  * not interested.
 405  */
 406 void
 407 vacuum_set_xid_limits(Relation rel,
 408                                           int freeze_min_age,
 409                                           int freeze_table_age,
 410                                           int multixact_freeze_min_age,
 411                                           int multixact_freeze_table_age,
 412                                           TransactionId *oldestXmin,
 413                                           TransactionId *freezeLimit,
 414                                           TransactionId *xidFullScanLimit,
 415                                           MultiXactId *multiXactCutoff,
 416                                           MultiXactId *mxactFullScanLimit)
 417 {
 418         int                     freezemin;
 419         int                     mxid_freezemin;
 420         TransactionId limit;
 421         TransactionId safeLimit;
 422         MultiXactId mxactLimit;
 423         MultiXactId safeMxactLimit;
 424
 425         /*
 426          * We can always ignore processes running lazy vacuum.  This is because we
 427          * use these values only for deciding which tuples we must keep in the
 428          * tables.  Since lazy vacuum doesn't write its XID anywhere, it's safe to
 429          * ignore it.  In theory it could be problematic to ignore lazy vacuums in
 430          * a full vacuum, but keep in mind that only one vacuum process can be
 431          * working on a particular table at any time, and that each vacuum is
 432          * always an independent transaction.
 433          */
 434         *oldestXmin = GetOldestXmin(rel, true);
 435
 436         Assert(TransactionIdIsNormal(*oldestXmin));
 437
 438         /*
 439          * Determine the minimum freeze age to use: as specified by the caller, or
 440          * vacuum_freeze_min_age, but in any case not more than half
 441          * autovacuum_freeze_max_age, so that autovacuums to prevent XID
 442          * wraparound won't occur too frequently.
 443          */
 444         freezemin = freeze_min_age;
 445         if (freezemin < 0)
 446                 freezemin = vacuum_freeze_min_age;
 447         freezemin = Min(freezemin, autovacuum_freeze_max_age / 2);
 448         Assert(freezemin >= 0);
 449
 450         /*
 451          * Compute the cutoff XID, being careful not to generate a "permanent" XID
 452          */
 453         limit = *oldestXmin - freezemin;
 454         if (!TransactionIdIsNormal(limit))
 455                 limit = FirstNormalTransactionId;
 456
 457         /*
 458          * If oldestXmin is very far back (in practice, more than
 459          * autovacuum_freeze_max_age / 2 XIDs old), complain and force a minimum
 460          * freeze age of zero.
 461          */
 462         safeLimit = ReadNewTransactionId() - autovacuum_freeze_max_age;
 463         if (!TransactionIdIsNormal(safeLimit))
 464                 safeLimit = FirstNormalTransactionId;
 465
 466         if (TransactionIdPrecedes(limit, safeLimit))
 467         {
 468                 ereport(WARNING,
 469                                 (errmsg("oldest xmin is far in the past"),
 470                                  errhint("Close open transactions soon to avoid wraparound problems.")));
 471                 limit = *oldestXmin;
 472         }
 473
 474         *freezeLimit = limit;
 475
 476         /*
 477          * Determine the minimum multixact freeze age to use: as specified by
 478          * caller, or vacuum_multixact_freeze_min_age, but in any case not more
 479          * than half autovacuum_multixact_freeze_max_age, so that autovacuums to
 480          * prevent MultiXact wraparound won't occur too frequently.
 481          */
 482         mxid_freezemin = multixact_freeze_min_age;
 483         if (mxid_freezemin < 0)
 484                 mxid_freezemin = vacuum_multixact_freeze_min_age;
 485         mxid_freezemin = Min(mxid_freezemin,
 486                                                  autovacuum_multixact_freeze_max_age / 2);
 487         Assert(mxid_freezemin >= 0);
 488
 489         /* compute the cutoff multi, being careful to generate a valid value */
 490         mxactLimit = GetOldestMultiXactId() - mxid_freezemin;
 491         if (mxactLimit < FirstMultiXactId)
 492                 mxactLimit = FirstMultiXactId;
 493
 494         safeMxactLimit =
 495                 ReadNextMultiXactId() - autovacuum_multixact_freeze_max_age;
 496         if (safeMxactLimit < FirstMultiXactId)
 497                 safeMxactLimit = FirstMultiXactId;
 498
 499         if (MultiXactIdPrecedes(mxactLimit, safeMxactLimit))
 500         {
 501                 ereport(WARNING,
 502                                 (errmsg("oldest multixact is far in the past"),
 503                                  errhint("Close open transactions with multixacts soon to avoid wraparound problems.")));
 504                 mxactLimit = safeMxactLimit;
 505         }
 506
 507         *multiXactCutoff = mxactLimit;
 508
 509         if (xidFullScanLimit != NULL)
 510         {
 511                 int                     freezetable;
 512
 513                 Assert(mxactFullScanLimit != NULL);
 514
 515                 /*
 516                  * Determine the table freeze age to use: as specified by the caller,
 517                  * or vacuum_freeze_table_age, but in any case not more than
 518                  * autovacuum_freeze_max_age * 0.95, so that if you have e.g nightly
 519                  * VACUUM schedule, the nightly VACUUM gets a chance to freeze tuples
 520                  * before anti-wraparound autovacuum is launched.
 521                  */
 522                 freezetable = freeze_table_age;
 523                 if (freezetable < 0)
 524                         freezetable = vacuum_freeze_table_age;
 525                 freezetable = Min(freezetable, autovacuum_freeze_max_age * 0.95);
 526                 Assert(freezetable >= 0);
 527
 528                 /*
 529                  * Compute XID limit causing a full-table vacuum, being careful not to
 530                  * generate a "permanent" XID.
 531                  */
 532                 limit = ReadNewTransactionId() - freezetable;
 533                 if (!TransactionIdIsNormal(limit))
 534                         limit = FirstNormalTransactionId;
 535
 536                 *xidFullScanLimit = limit;
 537
 538                 /*
 539                  * Similar to the above, determine the table freeze age to use for
 540                  * multixacts: as specified by the caller, or
 541                  * vacuum_multixact_freeze_table_age, but in any case not more than
 542                  * autovacuum_multixact_freeze_table_age * 0.95, so that if you have
 543                  * e.g. nightly VACUUM schedule, the nightly VACUUM gets a chance to
 544                  * freeze multixacts before anti-wraparound autovacuum is launched.
 545                  */
 546                 freezetable = multixact_freeze_table_age;
 547                 if (freezetable < 0)
 548                         freezetable = vacuum_multixact_freeze_table_age;
 549                 freezetable = Min(freezetable,
 550                                                   autovacuum_multixact_freeze_max_age * 0.95);
 551                 Assert(freezetable >= 0);
 552
 553                 /*
 554                  * Compute MultiXact limit causing a full-table vacuum, being careful
 555                  * to generate a valid MultiXact value.
 556                  */
 557                 mxactLimit = ReadNextMultiXactId() - freezetable;
 558                 if (mxactLimit < FirstMultiXactId)
 559                         mxactLimit = FirstMultiXactId;
 560
 561                 *mxactFullScanLimit = mxactLimit;
 562         }
 563         else
 564         {
 565                 Assert(mxactFullScanLimit == NULL);
 566         }
 567 }
 568
 569 /*
 570  * vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples
 571  *
 572  *              If we scanned the whole relation then we should just use the count of
 573  *              live tuples seen; but if we did not, we should not trust the count
 574  *              unreservedly, especially not in VACUUM, which may have scanned a quite
 575  *              nonrandom subset of the table.  When we have only partial information,
 576  *              we take the old value of pg_class.reltuples as a measurement of the
 577  *              tuple density in the unscanned pages.
 578  *
 579  *              This routine is shared by VACUUM and ANALYZE.
 580  */
 581 double
 582 vac_estimate_reltuples(Relation relation, bool is_analyze,
 583                                            BlockNumber total_pages,
 584                                            BlockNumber scanned_pages,
 585                                            double scanned_tuples)
 586 {
 587         BlockNumber old_rel_pages = relation->rd_rel->relpages;
 588         double          old_rel_tuples = relation->rd_rel->reltuples;
 589         double          old_density;
 590         double          new_density;
 591         double          multiplier;
 592         double          updated_density;
 593
 594         /* If we did scan the whole table, just use the count as-is */
 595         if (scanned_pages >= total_pages)
 596                 return scanned_tuples;
 597
 598         /*
 599          * If scanned_pages is zero but total_pages isn't, keep the existing value
 600          * of reltuples.  (Note: callers should avoid updating the pg_class
 601          * statistics in this situation, since no new information has been
 602          * provided.)
 603          */
 604         if (scanned_pages == 0)
 605                 return old_rel_tuples;
 606
 607         /*
 608          * If old value of relpages is zero, old density is indeterminate; we
 609          * can't do much except scale up scanned_tuples to match total_pages.
 610          */
 611         if (old_rel_pages == 0)
 612                 return floor((scanned_tuples / scanned_pages) * total_pages + 0.5);
 613
 614         /*
 615          * Okay, we've covered the corner cases.  The normal calculation is to
 616          * convert the old measurement to a density (tuples per page), then update
 617          * the density using an exponential-moving-average approach, and finally
 618          * compute reltuples as updated_density * total_pages.
 619          *
 620          * For ANALYZE, the moving average multiplier is just the fraction of the
 621          * table's pages we scanned.  This is equivalent to assuming that the
 622          * tuple density in the unscanned pages didn't change.  Of course, it
 623          * probably did, if the new density measurement is different. But over
 624          * repeated cycles, the value of reltuples will converge towards the
 625          * correct value, if repeated measurements show the same new density.
 626          *
 627          * For VACUUM, the situation is a bit different: we have looked at a
 628          * nonrandom sample of pages, but we know for certain that the pages we
 629          * didn't look at are precisely the ones that haven't changed lately.
 630          * Thus, there is a reasonable argument for doing exactly the same thing
 631          * as for the ANALYZE case, that is use the old density measurement as the
 632          * value for the unscanned pages.
 633          *
 634          * This logic could probably use further refinement.
 635          */
 636         old_density = old_rel_tuples / old_rel_pages;
 637         new_density = scanned_tuples / scanned_pages;
 638         multiplier = (double) scanned_pages / (double) total_pages;
 639         updated_density = old_density + (new_density - old_density) * multiplier;
 640         return floor(updated_density * total_pages + 0.5);
 641 }
 642
 643
 644 /*
 645  *      vac_update_relstats() -- update statistics for one relation
 646  *
 647  *              Update the whole-relation statistics that are kept in its pg_class
 648  *              row.  There are additional stats that will be updated if we are
 649  *              doing ANALYZE, but we always update these stats.  This routine works
 650  *              for both index and heap relation entries in pg_class.
 651  *
 652  *              We violate transaction semantics here by overwriting the rel's
 653  *              existing pg_class tuple with the new values.  This is reasonably
 654  *              safe as long as we're sure that the new values are correct whether or
 655  *              not this transaction commits.  The reason for doing this is that if
 656  *              we updated these tuples in the usual way, vacuuming pg_class itself
 657  *              wouldn't work very well --- by the time we got done with a vacuum
 658  *              cycle, most of the tuples in pg_class would've been obsoleted.  Of
 659  *              course, this only works for fixed-size not-null columns, but these are.
 660  *
 661  *              Another reason for doing it this way is that when we are in a lazy
 662  *              VACUUM and have PROC_IN_VACUUM set, we mustn't do any regular updates.
 663  *              Somebody vacuuming pg_class might think they could delete a tuple
 664  *              marked with xmin = our xid.
 665  *
 666  *              In addition to fundamentally nontransactional statistics such as
 667  *              relpages and relallvisible, we try to maintain certain lazily-updated
 668  *              DDL flags such as relhasindex, by clearing them if no longer correct.
 669  *              It's safe to do this in VACUUM, which can't run in parallel with
 670  *              CREATE INDEX/RULE/TRIGGER and can't be part of a transaction block.
 671  *              However, it's *not* safe to do it in an ANALYZE that's within an
 672  *              outer transaction, because for example the current transaction might
 673  *              have dropped the last index; then we'd think relhasindex should be
 674  *              cleared, but if the transaction later rolls back this would be wrong.
 675  *              So we refrain from updating the DDL flags if we're inside an outer
 676  *              transaction.  This is OK since postponing the flag maintenance is
 677  *              always allowable.
 678  *
 679  *              This routine is shared by VACUUM and ANALYZE.
 680  */
 681 void
 682 vac_update_relstats(Relation relation,
 683                                         BlockNumber num_pages, double num_tuples,
 684                                         BlockNumber num_all_visible_pages,
 685                                         bool hasindex, TransactionId frozenxid,
 686                                         MultiXactId minmulti,
 687                                         bool in_outer_xact)
 688 {
 689         Oid                     relid = RelationGetRelid(relation);
 690         Relation        rd;
 691         HeapTuple       ctup;
 692         Form_pg_class pgcform;
 693         bool            dirty;
 694
 695         rd = heap_open(RelationRelationId, RowExclusiveLock);
 696
 697         /* Fetch a copy of the tuple to scribble on */
 698         ctup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
 699         if (!HeapTupleIsValid(ctup))
 700                 elog(ERROR, "pg_class entry for relid %u vanished during vacuuming",
 701                          relid);
 702         pgcform = (Form_pg_class) GETSTRUCT(ctup);
 703
 704         /* Apply statistical updates, if any, to copied tuple */
 705
 706         dirty = false;
 707         if (pgcform->relpages != (int32) num_pages)
 708         {
 709                 pgcform->relpages = (int32) num_pages;
 710                 dirty = true;
 711         }
 712         if (pgcform->reltuples != (float4) num_tuples)
 713         {
 714                 pgcform->reltuples = (float4) num_tuples;
 715                 dirty = true;
 716         }
 717         if (pgcform->relallvisible != (int32) num_all_visible_pages)
 718         {
 719                 pgcform->relallvisible = (int32) num_all_visible_pages;
 720                 dirty = true;
 721         }
 722
 723         /* Apply DDL updates, but not inside an outer transaction (see above) */
 724
 725         if (!in_outer_xact)
 726         {
 727                 /*
 728                  * If we didn't find any indexes, reset relhasindex.
 729                  */
 730                 if (pgcform->relhasindex && !hasindex)
 731                 {
 732                         pgcform->relhasindex = false;
 733                         dirty = true;
 734                 }
 735
 736                 /*
 737                  * If we have discovered that there are no indexes, then there's no
 738                  * primary key either.  This could be done more thoroughly...
 739                  */
 740                 if (pgcform->relhaspkey && !hasindex)
 741                 {
 742                         pgcform->relhaspkey = false;
 743                         dirty = true;
 744                 }
 745
 746                 /* We also clear relhasrules and relhastriggers if needed */
 747                 if (pgcform->relhasrules && relation->rd_rules == NULL)
 748                 {
 749                         pgcform->relhasrules = false;
 750                         dirty = true;
 751                 }
 752                 if (pgcform->relhastriggers && relation->trigdesc == NULL)
 753                 {
 754                         pgcform->relhastriggers = false;
 755                         dirty = true;
 756                 }
 757         }
 758
 759         /*
 760          * Update relfrozenxid, unless caller passed InvalidTransactionId
 761          * indicating it has no new data.
 762          *
 763          * Ordinarily, we don't let relfrozenxid go backwards: if things are
 764          * working correctly, the only way the new frozenxid could be older would
 765          * be if a previous VACUUM was done with a tighter freeze_min_age, in
 766          * which case we don't want to forget the work it already did.  However,
 767          * if the stored relfrozenxid is "in the future", then it must be corrupt
 768          * and it seems best to overwrite it with the cutoff we used this time.
 769          * This should match vac_update_datfrozenxid() concerning what we consider
 770          * to be "in the future".
 771          */
 772         if (TransactionIdIsNormal(frozenxid) &&
 773                 pgcform->relfrozenxid != frozenxid &&
 774                 (TransactionIdPrecedes(pgcform->relfrozenxid, frozenxid) ||
 775                  TransactionIdPrecedes(ReadNewTransactionId(),
 776                                                            pgcform->relfrozenxid)))
 777         {
 778                 pgcform->relfrozenxid = frozenxid;
 779                 dirty = true;
 780         }
 781
 782         /* Similarly for relminmxid */
 783         if (MultiXactIdIsValid(minmulti) &&
 784                 pgcform->relminmxid != minmulti &&
 785                 (MultiXactIdPrecedes(pgcform->relminmxid, minmulti) ||
 786                  MultiXactIdPrecedes(ReadNextMultiXactId(), pgcform->relminmxid)))
 787         {
 788                 pgcform->relminmxid = minmulti;
 789                 dirty = true;
 790         }
 791
 792         /* If anything changed, write out the tuple. */
 793         if (dirty)
 794                 heap_inplace_update(rd, ctup);
 795
 796         heap_close(rd, RowExclusiveLock);
 797 }
 798
 799
 800 /*
 801  *      vac_update_datfrozenxid() -- update pg_database.datfrozenxid for our DB
 802  *
 803  *              Update pg_database's datfrozenxid entry for our database to be the
 804  *              minimum of the pg_class.relfrozenxid values.
 805  *
 806  *              Similarly, update our datminmxid to be the minimum of the
 807  *              pg_class.relminmxid values.
 808  *
 809  *              If we are able to advance either pg_database value, also try to
 810  *              truncate pg_clog and pg_multixact.
 811  *
 812  *              We violate transaction semantics here by overwriting the database's
 813  *              existing pg_database tuple with the new values.  This is reasonably
 814  *              safe since the new values are correct whether or not this transaction
 815  *              commits.  As with vac_update_relstats, this avoids leaving dead tuples
 816  *              behind after a VACUUM.
 817  */
 818 void
 819 vac_update_datfrozenxid(void)
 820 {
 821         HeapTuple       tuple;
 822         Form_pg_database dbform;
 823         Relation        relation;
 824         SysScanDesc scan;
 825         HeapTuple       classTup;
 826         TransactionId newFrozenXid;
 827         MultiXactId newMinMulti;
 828         TransactionId lastSaneFrozenXid;
 829         MultiXactId lastSaneMinMulti;
 830         bool            bogus = false;
 831         bool            dirty = false;
 832
 833         /*
 834          * Initialize the "min" calculation with GetOldestXmin, which is a
 835          * reasonable approximation to the minimum relfrozenxid for not-yet-
 836          * committed pg_class entries for new tables; see AddNewRelationTuple().
 837          * So we cannot produce a wrong minimum by starting with this.
 838          */
 839         newFrozenXid = GetOldestXmin(NULL, true);
 840
 841         /*
 842          * Similarly, initialize the MultiXact "min" with the value that would be
 843          * used on pg_class for new tables.  See AddNewRelationTuple().
 844          */
 845         newMinMulti = GetOldestMultiXactId();
 846
 847         /*
 848          * Identify the latest relfrozenxid and relminmxid values that we could
 849          * validly see during the scan.  These are conservative values, but it's
 850          * not really worth trying to be more exact.
 851          */
 852         lastSaneFrozenXid = ReadNewTransactionId();
 853         lastSaneMinMulti = ReadNextMultiXactId();
 854
 855         /*
 856          * We must seqscan pg_class to find the minimum Xid, because there is no
 857          * index that can help us here.
 858          */
 859         relation = heap_open(RelationRelationId, AccessShareLock);
 860
 861         scan = systable_beginscan(relation, InvalidOid, false,
 862                                                           NULL, 0, NULL);
 863
 864         while ((classTup = systable_getnext(scan)) != NULL)
 865         {
 866                 Form_pg_class classForm = (Form_pg_class) GETSTRUCT(classTup);
 867
 868                 /*
 869                  * Only consider relations able to hold unfrozen XIDs (anything else
 870                  * should have InvalidTransactionId in relfrozenxid anyway.)
 871                  */
 872                 if (classForm->relkind != RELKIND_RELATION &&
 873                         classForm->relkind != RELKIND_MATVIEW &&
 874                         classForm->relkind != RELKIND_TOASTVALUE)
 875                         continue;
 876
 877                 Assert(TransactionIdIsNormal(classForm->relfrozenxid));
 878                 Assert(MultiXactIdIsValid(classForm->relminmxid));
 879
 880                 /*
 881                  * If things are working properly, no relation should have a
 882                  * relfrozenxid or relminmxid that is "in the future".  However, such
 883                  * cases have been known to arise due to bugs in pg_upgrade.  If we
 884                  * see any entries that are "in the future", chicken out and don't do
 885                  * anything.  This ensures we won't truncate clog before those
 886                  * relations have been scanned and cleaned up.
 887                  */
 888                 if (TransactionIdPrecedes(lastSaneFrozenXid, classForm->relfrozenxid) ||
 889                         MultiXactIdPrecedes(lastSaneMinMulti, classForm->relminmxid))
 890                 {
 891                         bogus = true;
 892                         break;
 893                 }
 894
 895                 if (TransactionIdPrecedes(classForm->relfrozenxid, newFrozenXid))
 896                         newFrozenXid = classForm->relfrozenxid;
 897
 898                 if (MultiXactIdPrecedes(classForm->relminmxid, newMinMulti))
 899                         newMinMulti = classForm->relminmxid;
 900         }
 901
 902         /* we're done with pg_class */
 903         systable_endscan(scan);
 904         heap_close(relation, AccessShareLock);
 905
 906         /* chicken out if bogus data found */
 907         if (bogus)
 908                 return;
 909
 910         Assert(TransactionIdIsNormal(newFrozenXid));
 911         Assert(MultiXactIdIsValid(newMinMulti));
 912
 913         /* Now fetch the pg_database tuple we need to update. */
 914         relation = heap_open(DatabaseRelationId, RowExclusiveLock);
 915
 916         /* Fetch a copy of the tuple to scribble on */
 917         tuple = SearchSysCacheCopy1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
 918         if (!HeapTupleIsValid(tuple))
 919                 elog(ERROR, "could not find tuple for database %u", MyDatabaseId);
 920         dbform = (Form_pg_database) GETSTRUCT(tuple);
 921
 922         /*
 923          * As in vac_update_relstats(), we ordinarily don't want to let
 924          * datfrozenxid go backward; but if it's "in the future" then it must be
 925          * corrupt and it seems best to overwrite it.
 926          */
 927         if (dbform->datfrozenxid != newFrozenXid &&
 928                 (TransactionIdPrecedes(dbform->datfrozenxid, newFrozenXid) ||
 929                  TransactionIdPrecedes(lastSaneFrozenXid, dbform->datfrozenxid)))
 930         {
 931                 dbform->datfrozenxid = newFrozenXid;
 932                 dirty = true;
 933         }
 934         else
 935                 newFrozenXid = dbform->datfrozenxid;
 936
 937         /* Ditto for datminmxid */
 938         if (dbform->datminmxid != newMinMulti &&
 939                 (MultiXactIdPrecedes(dbform->datminmxid, newMinMulti) ||
 940                  MultiXactIdPrecedes(lastSaneMinMulti, dbform->datminmxid)))
 941         {
 942                 dbform->datminmxid = newMinMulti;
 943                 dirty = true;
 944         }
 945         else
 946                 newMinMulti = dbform->datminmxid;
 947
 948         if (dirty)
 949                 heap_inplace_update(relation, tuple);
 950
 951         heap_freetuple(tuple);
 952         heap_close(relation, RowExclusiveLock);
 953
 954         /*
 955          * If we were able to advance datfrozenxid or datminmxid, see if we can
 956          * truncate pg_clog and/or pg_multixact.  Also do it if the shared
 957          * XID-wrap-limit info is stale, since this action will update that too.
 958          */
 959         if (dirty || ForceTransactionIdLimitUpdate())
 960                 vac_truncate_clog(newFrozenXid, newMinMulti,
 961                                                   lastSaneFrozenXid, lastSaneMinMulti);
 962 }
 963
 964
 965 /*
 966  *      vac_truncate_clog() -- attempt to truncate the commit log
 967  *
 968  *              Scan pg_database to determine the system-wide oldest datfrozenxid,
 969  *              and use it to truncate the transaction commit log (pg_clog).
 970  *              Also update the XID wrap limit info maintained by varsup.c.
 971  *              Likewise for datminmxid.
 972  *
 973  *              The passed frozenXID and minMulti are the updated values for my own
 974  *              pg_database entry. They're used to initialize the "min" calculations.
 975  *              The caller also passes the "last sane" XID and MXID, since it has
 976  *              those at hand already.
 977  *
 978  *              This routine is only invoked when we've managed to change our
 979  *              DB's datfrozenxid/datminmxid values, or we found that the shared
 980  *              XID-wrap-limit info is stale.
 981  */
 982 static void
 983 vac_truncate_clog(TransactionId frozenXID,
 984                                   MultiXactId minMulti,
 985                                   TransactionId lastSaneFrozenXid,
 986                                   MultiXactId lastSaneMinMulti)
 987 {
 988         TransactionId myXID = GetCurrentTransactionId();
 989         Relation        relation;
 990         HeapScanDesc scan;
 991         HeapTuple       tuple;
 992         Oid                     oldestxid_datoid;
 993         Oid                     minmulti_datoid;
 994         bool            bogus = false;
 995         bool            frozenAlreadyWrapped = false;
 996
 997         /* init oldest datoids to sync with my frozenXID/minMulti values */
 998         oldestxid_datoid = MyDatabaseId;
 999         minmulti_datoid = MyDatabaseId;
1000
1001         /*
1002          * Scan pg_database to compute the minimum datfrozenxid/datminmxid
1003          *
1004          * Note: we need not worry about a race condition with new entries being
1005          * inserted by CREATE DATABASE.  Any such entry will have a copy of some
1006          * existing DB's datfrozenxid, and that source DB cannot be ours because
1007          * of the interlock against copying a DB containing an active backend.
1008          * Hence the new entry will not reduce the minimum.  Also, if two VACUUMs
1009          * concurrently modify the datfrozenxid's of different databases, the
1010          * worst possible outcome is that pg_clog is not truncated as aggressively
1011          * as it could be.
1012          */
1013         relation = heap_open(DatabaseRelationId, AccessShareLock);
1014
1015         scan = heap_beginscan_catalog(relation, 0, NULL);
1016
1017         while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1018         {
1019                 Form_pg_database dbform = (Form_pg_database) GETSTRUCT(tuple);
1020
1021                 Assert(TransactionIdIsNormal(dbform->datfrozenxid));
1022                 Assert(MultiXactIdIsValid(dbform->datminmxid));
1023
1024                 /*
1025                  * If things are working properly, no database should have a
1026                  * datfrozenxid or datminmxid that is "in the future".  However, such
1027                  * cases have been known to arise due to bugs in pg_upgrade.  If we
1028                  * see any entries that are "in the future", chicken out and don't do
1029                  * anything.  This ensures we won't truncate clog before those
1030                  * databases have been scanned and cleaned up.  (We will issue the
1031                  * "already wrapped" warning if appropriate, though.)
1032                  */
1033                 if (TransactionIdPrecedes(lastSaneFrozenXid, dbform->datfrozenxid) ||
1034                         MultiXactIdPrecedes(lastSaneMinMulti, dbform->datminmxid))
1035                         bogus = true;
1036
1037                 if (TransactionIdPrecedes(myXID, dbform->datfrozenxid))
1038                         frozenAlreadyWrapped = true;
1039                 else if (TransactionIdPrecedes(dbform->datfrozenxid, frozenXID))
1040                 {
1041                         frozenXID = dbform->datfrozenxid;
1042                         oldestxid_datoid = HeapTupleGetOid(tuple);
1043                 }
1044
1045                 if (MultiXactIdPrecedes(dbform->datminmxid, minMulti))
1046                 {
1047                         minMulti = dbform->datminmxid;
1048                         minmulti_datoid = HeapTupleGetOid(tuple);
1049                 }
1050         }
1051
1052         heap_endscan(scan);
1053
1054         heap_close(relation, AccessShareLock);
1055
1056         /*
1057          * Do not truncate CLOG if we seem to have suffered wraparound already;
1058          * the computed minimum XID might be bogus.  This case should now be
1059          * impossible due to the defenses in GetNewTransactionId, but we keep the
1060          * test anyway.
1061          */
1062         if (frozenAlreadyWrapped)
1063         {
1064                 ereport(WARNING,
1065                                 (errmsg("some databases have not been vacuumed in over 2 billion transactions"),
1066                                  errdetail("You might have already suffered transaction-wraparound data loss.")));
1067                 return;
1068         }
1069
1070         /* chicken out if data is bogus in any other way */
1071         if (bogus)
1072                 return;
1073
1074         /*
1075          * Truncate CLOG and CommitTs to the oldest computed value.
1076          * Note we don't truncate multixacts; that will be done by the next
1077          * checkpoint.
1078          */
1079         TruncateCLOG(frozenXID);
1080         TruncateCommitTs(frozenXID);
1081
1082         /*
1083          * Update the wrap limit for GetNewTransactionId and creation of new
1084          * MultiXactIds.  Note: these functions will also signal the postmaster
1085          * for an(other) autovac cycle if needed.   XXX should we avoid possibly
1086          * signalling twice?
1087          */
1088         SetTransactionIdLimit(frozenXID, oldestxid_datoid);
1089         SetMultiXactIdLimit(minMulti, minmulti_datoid);
1090         AdvanceOldestCommitTs(frozenXID);
1091 }
1092
1093
1094 /*
1095  *      vacuum_rel() -- vacuum one heap relation
1096  *
1097  *              Doing one heap at a time incurs extra overhead, since we need to
1098  *              check that the heap exists again just before we vacuum it.  The
1099  *              reason that we do this is so that vacuuming can be spread across
1100  *              many small transactions.  Otherwise, two-phase locking would require
1101  *              us to lock the entire database during one pass of the vacuum cleaner.
1102  *
1103  *              At entry and exit, we are not inside a transaction.
1104  */
1105 static bool
1106 vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound)
1107 {
1108         LOCKMODE        lmode;
1109         Relation        onerel;
1110         LockRelId       onerelid;
1111         Oid                     toast_relid;
1112         Oid                     save_userid;
1113         int                     save_sec_context;
1114         int                     save_nestlevel;
1115
1116         /* Begin a transaction for vacuuming this relation */
1117         StartTransactionCommand();
1118
1119         /*
1120          * Functions in indexes may want a snapshot set.  Also, setting a snapshot
1121          * ensures that RecentGlobalXmin is kept truly recent.
1122          */
1123         PushActiveSnapshot(GetTransactionSnapshot());
1124
1125         if (!(vacstmt->options & VACOPT_FULL))
1126         {
1127                 /*
1128                  * In lazy vacuum, we can set the PROC_IN_VACUUM flag, which lets
1129                  * other concurrent VACUUMs know that they can ignore this one while
1130                  * determining their OldestXmin.  (The reason we don't set it during a
1131                  * full VACUUM is exactly that we may have to run user-defined
1132                  * functions for functional indexes, and we want to make sure that if
1133                  * they use the snapshot set above, any tuples it requires can't get
1134                  * removed from other tables.  An index function that depends on the
1135                  * contents of other tables is arguably broken, but we won't break it
1136                  * here by violating transaction semantics.)
1137                  *
1138                  * We also set the VACUUM_FOR_WRAPAROUND flag, which is passed down by
1139                  * autovacuum; it's used to avoid canceling a vacuum that was invoked
1140                  * in an emergency.
1141                  *
1142                  * Note: these flags remain set until CommitTransaction or
1143                  * AbortTransaction.  We don't want to clear them until we reset
1144                  * MyPgXact->xid/xmin, else OldestXmin might appear to go backwards,
1145                  * which is probably Not Good.
1146                  */
1147                 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
1148                 MyPgXact->vacuumFlags |= PROC_IN_VACUUM;
1149                 if (for_wraparound)
1150                         MyPgXact->vacuumFlags |= PROC_VACUUM_FOR_WRAPAROUND;
1151                 LWLockRelease(ProcArrayLock);
1152         }
1153
1154         /*
1155          * Check for user-requested abort.  Note we want this to be inside a
1156          * transaction, so xact.c doesn't issue useless WARNING.
1157          */
1158         CHECK_FOR_INTERRUPTS();
1159
1160         /*
1161          * Determine the type of lock we want --- hard exclusive lock for a FULL
1162          * vacuum, but just ShareUpdateExclusiveLock for concurrent vacuum. Either
1163          * way, we can be sure that no other backend is vacuuming the same table.
1164          */
1165         lmode = (vacstmt->options & VACOPT_FULL) ? AccessExclusiveLock : ShareUpdateExclusiveLock;
1166
1167         /*
1168          * Open the relation and get the appropriate lock on it.
1169          *
1170          * There's a race condition here: the rel may have gone away since the
1171          * last time we saw it.  If so, we don't need to vacuum it.
1172          *
1173          * If we've been asked not to wait for the relation lock, acquire it first
1174          * in non-blocking mode, before calling try_relation_open().
1175          */
1176         if (!(vacstmt->options & VACOPT_NOWAIT))
1177                 onerel = try_relation_open(relid, lmode);
1178         else if (ConditionalLockRelationOid(relid, lmode))
1179                 onerel = try_relation_open(relid, NoLock);
1180         else
1181         {
1182                 onerel = NULL;
1183                 if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration >= 0)
1184                         ereport(LOG,
1185                                         (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
1186                                    errmsg("skipping vacuum of \"%s\" --- lock not available",
1187                                                   vacstmt->relation->relname)));
1188         }
1189
1190         if (!onerel)
1191         {
1192                 PopActiveSnapshot();
1193                 CommitTransactionCommand();
1194                 return false;
1195         }
1196
1197         /*
1198          * Check permissions.
1199          *
1200          * We allow the user to vacuum a table if he is superuser, the table
1201          * owner, or the database owner (but in the latter case, only if it's not
1202          * a shared relation).  pg_class_ownercheck includes the superuser case.
1203          *
1204          * Note we choose to treat permissions failure as a WARNING and keep
1205          * trying to vacuum the rest of the DB --- is this appropriate?
1206          */
1207         if (!(pg_class_ownercheck(RelationGetRelid(onerel), GetUserId()) ||
1208                   (pg_database_ownercheck(MyDatabaseId, GetUserId()) && !onerel->rd_rel->relisshared)))
1209         {
1210                 if (onerel->rd_rel->relisshared)
1211                         ereport(WARNING,
1212                                   (errmsg("skipping \"%s\" --- only superuser can vacuum it",
1213                                                   RelationGetRelationName(onerel))));
1214                 else if (onerel->rd_rel->relnamespace == PG_CATALOG_NAMESPACE)
1215                         ereport(WARNING,
1216                                         (errmsg("skipping \"%s\" --- only superuser or database owner can vacuum it",
1217                                                         RelationGetRelationName(onerel))));
1218                 else
1219                         ereport(WARNING,
1220                                         (errmsg("skipping \"%s\" --- only table or database owner can vacuum it",
1221                                                         RelationGetRelationName(onerel))));
1222                 relation_close(onerel, lmode);
1223                 PopActiveSnapshot();
1224                 CommitTransactionCommand();
1225                 return false;
1226         }
1227
1228         /*
1229          * Check that it's a vacuumable relation; we used to do this in
1230          * get_rel_oids() but seems safer to check after we've locked the
1231          * relation.
1232          */
1233         if (onerel->rd_rel->relkind != RELKIND_RELATION &&
1234                 onerel->rd_rel->relkind != RELKIND_MATVIEW &&
1235                 onerel->rd_rel->relkind != RELKIND_TOASTVALUE)
1236         {
1237                 ereport(WARNING,
1238                                 (errmsg("skipping \"%s\" --- cannot vacuum non-tables or special system tables",
1239                                                 RelationGetRelationName(onerel))));
1240                 relation_close(onerel, lmode);
1241                 PopActiveSnapshot();
1242                 CommitTransactionCommand();
1243                 return false;
1244         }
1245
1246         /*
1247          * Silently ignore tables that are temp tables of other backends ---
1248          * trying to vacuum these will lead to great unhappiness, since their
1249          * contents are probably not up-to-date on disk.  (We don't throw a
1250          * warning here; it would just lead to chatter during a database-wide
1251          * VACUUM.)
1252          */
1253         if (RELATION_IS_OTHER_TEMP(onerel))
1254         {
1255                 relation_close(onerel, lmode);
1256                 PopActiveSnapshot();
1257                 CommitTransactionCommand();
1258                 return false;
1259         }
1260
1261         /*
1262          * Get a session-level lock too. This will protect our access to the
1263          * relation across multiple transactions, so that we can vacuum the
1264          * relation's TOAST table (if any) secure in the knowledge that no one is
1265          * deleting the parent relation.
1266          *
1267          * NOTE: this cannot block, even if someone else is waiting for access,
1268          * because the lock manager knows that both lock requests are from the
1269          * same process.
1270          */
1271         onerelid = onerel->rd_lockInfo.lockRelId;
1272         LockRelationIdForSession(&onerelid, lmode);
1273
1274         /*
1275          * Remember the relation's TOAST relation for later, if the caller asked
1276          * us to process it.  In VACUUM FULL, though, the toast table is
1277          * automatically rebuilt by cluster_rel so we shouldn't recurse to it.
1278          */
1279         if (do_toast && !(vacstmt->options & VACOPT_FULL))
1280                 toast_relid = onerel->rd_rel->reltoastrelid;
1281         else
1282                 toast_relid = InvalidOid;
1283
1284         /*
1285          * Switch to the table owner's userid, so that any index functions are run
1286          * as that user.  Also lock down security-restricted operations and
1287          * arrange to make GUC variable changes local to this command. (This is
1288          * unnecessary, but harmless, for lazy VACUUM.)
1289          */
1290         GetUserIdAndSecContext(&save_userid, &save_sec_context);
1291         SetUserIdAndSecContext(onerel->rd_rel->relowner,
1292                                                    save_sec_context | SECURITY_RESTRICTED_OPERATION);
1293         save_nestlevel = NewGUCNestLevel();
1294
1295         /*
1296          * Do the actual work --- either FULL or "lazy" vacuum
1297          */
1298         if (vacstmt->options & VACOPT_FULL)
1299         {
1300                 /* close relation before vacuuming, but hold lock until commit */
1301                 relation_close(onerel, NoLock);
1302                 onerel = NULL;
1303
1304                 /* VACUUM FULL is now a variant of CLUSTER; see cluster.c */
1305                 cluster_rel(relid, InvalidOid, false,
1306                                         (vacstmt->options & VACOPT_VERBOSE) != 0);
1307         }
1308         else
1309                 lazy_vacuum_rel(onerel, vacstmt, vac_strategy);
1310
1311         /* Roll back any GUC changes executed by index functions */
1312         AtEOXact_GUC(false, save_nestlevel);
1313
1314         /* Restore userid and security context */
1315         SetUserIdAndSecContext(save_userid, save_sec_context);
1316
1317         /* all done with this class, but hold lock until commit */
1318         if (onerel)
1319                 relation_close(onerel, NoLock);
1320
1321         /*
1322          * Complete the transaction and free all temporary memory used.
1323          */
1324         PopActiveSnapshot();
1325         CommitTransactionCommand();
1326
1327         /*
1328          * If the relation has a secondary toast rel, vacuum that too while we
1329          * still hold the session lock on the master table.  Note however that
1330          * "analyze" will not get done on the toast table.  This is good, because
1331          * the toaster always uses hardcoded index access and statistics are
1332          * totally unimportant for toast relations.
1333          */
1334         if (toast_relid != InvalidOid)
1335                 vacuum_rel(toast_relid, vacstmt, false, for_wraparound);
1336
1337         /*
1338          * Now release the session-level lock on the master table.
1339          */
1340         UnlockRelationIdForSession(&onerelid, lmode);
1341
1342         /* Report that we really did it. */
1343         return true;
1344 }
1345
1346
1347 /*
1348  * Open all the vacuumable indexes of the given relation, obtaining the
1349  * specified kind of lock on each.  Return an array of Relation pointers for
1350  * the indexes into *Irel, and the number of indexes into *nindexes.
1351  *
1352  * We consider an index vacuumable if it is marked insertable (IndexIsReady).
1353  * If it isn't, probably a CREATE INDEX CONCURRENTLY command failed early in
1354  * execution, and what we have is too corrupt to be processable.  We will
1355  * vacuum even if the index isn't indisvalid; this is important because in a
1356  * unique index, uniqueness checks will be performed anyway and had better not
1357  * hit dangling index pointers.
1358  */
1359 void
1360 vac_open_indexes(Relation relation, LOCKMODE lockmode,
1361                                  int *nindexes, Relation **Irel)
1362 {
1363         List       *indexoidlist;
1364         ListCell   *indexoidscan;
1365         int                     i;
1366
1367         Assert(lockmode != NoLock);
1368
1369         indexoidlist = RelationGetIndexList(relation);
1370
1371         /* allocate enough memory for all indexes */
1372         i = list_length(indexoidlist);
1373
1374         if (i > 0)
1375                 *Irel = (Relation *) palloc(i * sizeof(Relation));
1376         else
1377                 *Irel = NULL;
1378
1379         /* collect just the ready indexes */
1380         i = 0;
1381         foreach(indexoidscan, indexoidlist)
1382         {
1383                 Oid                     indexoid = lfirst_oid(indexoidscan);
1384                 Relation        indrel;
1385
1386                 indrel = index_open(indexoid, lockmode);
1387                 if (IndexIsReady(indrel->rd_index))
1388                         (*Irel)[i++] = indrel;
1389                 else
1390                         index_close(indrel, lockmode);
1391         }
1392
1393         *nindexes = i;
1394
1395         list_free(indexoidlist);
1396 }
1397
1398 /*
1399  * Release the resources acquired by vac_open_indexes.  Optionally release
1400  * the locks (say NoLock to keep 'em).
1401  */
1402 void
1403 vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
1404 {
1405         if (Irel == NULL)
1406                 return;
1407
1408         while (nindexes--)
1409         {
1410                 Relation        ind = Irel[nindexes];
1411
1412                 index_close(ind, lockmode);
1413         }
1414         pfree(Irel);
1415 }
1416
1417 /*
1418  * vacuum_delay_point --- check for interrupts and cost-based delay.
1419  *
1420  * This should be called in each major loop of VACUUM processing,
1421  * typically once per page processed.
1422  */
1423 void
1424 vacuum_delay_point(void)
1425 {
1426         /* Always check for interrupts */
1427         CHECK_FOR_INTERRUPTS();
1428
1429         /* Nap if appropriate */
1430         if (VacuumCostActive && !InterruptPending &&
1431                 VacuumCostBalance >= VacuumCostLimit)
1432         {
1433                 int                     msec;
1434
1435                 msec = VacuumCostDelay * VacuumCostBalance / VacuumCostLimit;
1436                 if (msec > VacuumCostDelay * 4)
1437                         msec = VacuumCostDelay * 4;
1438
1439                 pg_usleep(msec * 1000L);
1440
1441                 VacuumCostBalance = 0;
1442
1443                 /* update balance values for workers */
1444                 AutoVacuumUpdateDelay();
1445
1446                 /* Might have gotten an interrupt while sleeping */
1447                 CHECK_FOR_INTERRUPTS();
1448         }
1449 }