granicus.if.org Git - postgresql/blob - src/backend/commands/vacuum.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * vacuum.c
   4  *        The postgres vacuum cleaner.
   5  *
   6  * This file now includes only control and dispatch code for VACUUM and
   7  * ANALYZE commands.  Regular VACUUM is implemented in vacuumlazy.c,
   8  * ANALYZE in analyze.c, and VACUUM FULL is a variant of CLUSTER, handled
   9  * in cluster.c.
  10  *
  11  *
  12  * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
  13  * Portions Copyright (c) 1994, Regents of the University of California
  14  *
  15  *
  16  * IDENTIFICATION
  17  *        src/backend/commands/vacuum.c
  18  *
  19  *-------------------------------------------------------------------------
  20  */
  21 #include "postgres.h"
  22
  23 #include <math.h>
  24
  25 #include "access/clog.h"
  26 #include "access/commit_ts.h"
  27 #include "access/genam.h"
  28 #include "access/heapam.h"
  29 #include "access/htup_details.h"
  30 #include "access/multixact.h"
  31 #include "access/transam.h"
  32 #include "access/xact.h"
  33 #include "catalog/namespace.h"
  34 #include "catalog/pg_database.h"
  35 #include "catalog/pg_namespace.h"
  36 #include "commands/cluster.h"
  37 #include "commands/vacuum.h"
  38 #include "miscadmin.h"
  39 #include "pgstat.h"
  40 #include "postmaster/autovacuum.h"
  41 #include "storage/bufmgr.h"
  42 #include "storage/lmgr.h"
  43 #include "storage/proc.h"
  44 #include "storage/procarray.h"
  45 #include "utils/acl.h"
  46 #include "utils/fmgroids.h"
  47 #include "utils/guc.h"
  48 #include "utils/memutils.h"
  49 #include "utils/snapmgr.h"
  50 #include "utils/syscache.h"
  51 #include "utils/tqual.h"
  52
  53
  54 /*
  55  * GUC parameters
  56  */
  57 int                     vacuum_freeze_min_age;
  58 int                     vacuum_freeze_table_age;
  59 int                     vacuum_multixact_freeze_min_age;
  60 int                     vacuum_multixact_freeze_table_age;
  61
  62
  63 /* A few variables that don't seem worth passing around as parameters */
  64 static MemoryContext vac_context = NULL;
  65 static BufferAccessStrategy vac_strategy;
  66
  67
  68 /* non-export function prototypes */
  69 static List *get_rel_oids(Oid relid, const RangeVar *vacrel);
  70 static void vac_truncate_clog(TransactionId frozenXID,
  71                                   MultiXactId minMulti,
  72                                   TransactionId lastSaneFrozenXid,
  73                                   MultiXactId lastSaneMinMulti);
  74 static bool vacuum_rel(Oid relid, RangeVar *relation, int options,
  75                    VacuumParams *params);
  76
  77 /*
  78  * Primary entry point for manual VACUUM and ANALYZE commands
  79  *
  80  * This is mainly a preparation wrapper for the real operations that will
  81  * happen in vacuum().
  82  */
  83 void
  84 ExecVacuum(VacuumStmt *vacstmt, bool isTopLevel)
  85 {
  86         VacuumParams params;
  87
  88         /* sanity checks on options */
  89         Assert(vacstmt->options & (VACOPT_VACUUM | VACOPT_ANALYZE));
  90         Assert((vacstmt->options & VACOPT_VACUUM) ||
  91                    !(vacstmt->options & (VACOPT_FULL | VACOPT_FREEZE)));
  92         Assert((vacstmt->options & VACOPT_ANALYZE) || vacstmt->va_cols == NIL);
  93         Assert(!(vacstmt->options & VACOPT_SKIPTOAST));
  94
  95         /*
  96          * All freeze ages are zero if the FREEZE option is given; otherwise pass
  97          * them as -1 which means to use the default values.
  98          */
  99         if (vacstmt->options & VACOPT_FREEZE)
 100         {
 101                 params.freeze_min_age = 0;
 102                 params.freeze_table_age = 0;
 103                 params.multixact_freeze_min_age = 0;
 104                 params.multixact_freeze_table_age = 0;
 105         }
 106         else
 107         {
 108                 params.freeze_min_age = -1;
 109                 params.freeze_table_age = -1;
 110                 params.multixact_freeze_min_age = -1;
 111                 params.multixact_freeze_table_age = -1;
 112         }
 113
 114         /* user-invoked vacuum is never "for wraparound" */
 115         params.is_wraparound = false;
 116
 117         /* user-invoked vacuum never uses this parameter */
 118         params.log_min_duration = -1;
 119
 120         /* Now go through the common routine */
 121         vacuum(vacstmt->options, vacstmt->relation, InvalidOid, &params,
 122                    vacstmt->va_cols, NULL, isTopLevel);
 123 }
 124
 125 /*
 126  * Primary entry point for VACUUM and ANALYZE commands.
 127  *
 128  * options is a bitmask of VacuumOption flags, indicating what to do.
 129  *
 130  * relid, if not InvalidOid, indicate the relation to process; otherwise,
 131  * the RangeVar is used.  (The latter must always be passed, because it's
 132  * used for error messages.)
 133  *
 134  * params contains a set of parameters that can be used to customize the
 135  * behavior.
 136  *
 137  * va_cols is a list of columns to analyze, or NIL to process them all.
 138  *
 139  * bstrategy is normally given as NULL, but in autovacuum it can be passed
 140  * in to use the same buffer strategy object across multiple vacuum() calls.
 141  *
 142  * isTopLevel should be passed down from ProcessUtility.
 143  *
 144  * It is the caller's responsibility that all parameters are allocated in a
 145  * memory context that will not disappear at transaction commit.
 146  */
 147 void
 148 vacuum(int options, RangeVar *relation, Oid relid, VacuumParams *params,
 149            List *va_cols, BufferAccessStrategy bstrategy, bool isTopLevel)
 150 {
 151         const char *stmttype;
 152         volatile bool in_outer_xact,
 153                                 use_own_xacts;
 154         List       *relations;
 155         static bool in_vacuum = false;
 156
 157         Assert(params != NULL);
 158
 159         stmttype = (options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE";
 160
 161         /*
 162          * We cannot run VACUUM inside a user transaction block; if we were inside
 163          * a transaction, then our commit- and start-transaction-command calls
 164          * would not have the intended effect!  There are numerous other subtle
 165          * dependencies on this, too.
 166          *
 167          * ANALYZE (without VACUUM) can run either way.
 168          */
 169         if (options & VACOPT_VACUUM)
 170         {
 171                 PreventTransactionChain(isTopLevel, stmttype);
 172                 in_outer_xact = false;
 173         }
 174         else
 175                 in_outer_xact = IsInTransactionChain(isTopLevel);
 176
 177         /*
 178          * Due to static variables vac_context, anl_context and vac_strategy,
 179          * vacuum() is not reentrant.  This matters when VACUUM FULL or ANALYZE
 180          * calls a hostile index expression that itself calls ANALYZE.
 181          */
 182         if (in_vacuum)
 183                 ereport(ERROR,
 184                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 185                                  errmsg("%s cannot be executed from VACUUM or ANALYZE",
 186                                                 stmttype)));
 187
 188         /*
 189          * Send info about dead objects to the statistics collector, unless we are
 190          * in autovacuum --- autovacuum.c does this for itself.
 191          */
 192         if ((options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess())
 193                 pgstat_vacuum_stat();
 194
 195         /*
 196          * Create special memory context for cross-transaction storage.
 197          *
 198          * Since it is a child of PortalContext, it will go away eventually even
 199          * if we suffer an error; there's no need for special abort cleanup logic.
 200          */
 201         vac_context = AllocSetContextCreate(PortalContext,
 202                                                                                 "Vacuum",
 203                                                                                 ALLOCSET_DEFAULT_MINSIZE,
 204                                                                                 ALLOCSET_DEFAULT_INITSIZE,
 205                                                                                 ALLOCSET_DEFAULT_MAXSIZE);
 206
 207         /*
 208          * If caller didn't give us a buffer strategy object, make one in the
 209          * cross-transaction memory context.
 210          */
 211         if (bstrategy == NULL)
 212         {
 213                 MemoryContext old_context = MemoryContextSwitchTo(vac_context);
 214
 215                 bstrategy = GetAccessStrategy(BAS_VACUUM);
 216                 MemoryContextSwitchTo(old_context);
 217         }
 218         vac_strategy = bstrategy;
 219
 220         /*
 221          * Build list of relations to process, unless caller gave us one. (If we
 222          * build one, we put it in vac_context for safekeeping.)
 223          */
 224         relations = get_rel_oids(relid, relation);
 225
 226         /*
 227          * Decide whether we need to start/commit our own transactions.
 228          *
 229          * For VACUUM (with or without ANALYZE): always do so, so that we can
 230          * release locks as soon as possible.  (We could possibly use the outer
 231          * transaction for a one-table VACUUM, but handling TOAST tables would be
 232          * problematic.)
 233          *
 234          * For ANALYZE (no VACUUM): if inside a transaction block, we cannot
 235          * start/commit our own transactions.  Also, there's no need to do so if
 236          * only processing one relation.  For multiple relations when not within a
 237          * transaction block, and also in an autovacuum worker, use own
 238          * transactions so we can release locks sooner.
 239          */
 240         if (options & VACOPT_VACUUM)
 241                 use_own_xacts = true;
 242         else
 243         {
 244                 Assert(options & VACOPT_ANALYZE);
 245                 if (IsAutoVacuumWorkerProcess())
 246                         use_own_xacts = true;
 247                 else if (in_outer_xact)
 248                         use_own_xacts = false;
 249                 else if (list_length(relations) > 1)
 250                         use_own_xacts = true;
 251                 else
 252                         use_own_xacts = false;
 253         }
 254
 255         /*
 256          * vacuum_rel expects to be entered with no transaction active; it will
 257          * start and commit its own transaction.  But we are called by an SQL
 258          * command, and so we are executing inside a transaction already. We
 259          * commit the transaction started in PostgresMain() here, and start
 260          * another one before exiting to match the commit waiting for us back in
 261          * PostgresMain().
 262          */
 263         if (use_own_xacts)
 264         {
 265                 Assert(!in_outer_xact);
 266
 267                 /* ActiveSnapshot is not set by autovacuum */
 268                 if (ActiveSnapshotSet())
 269                         PopActiveSnapshot();
 270
 271                 /* matches the StartTransaction in PostgresMain() */
 272                 CommitTransactionCommand();
 273         }
 274
 275         /* Turn vacuum cost accounting on or off */
 276         PG_TRY();
 277         {
 278                 ListCell   *cur;
 279
 280                 in_vacuum = true;
 281                 VacuumCostActive = (VacuumCostDelay > 0);
 282                 VacuumCostBalance = 0;
 283                 VacuumPageHit = 0;
 284                 VacuumPageMiss = 0;
 285                 VacuumPageDirty = 0;
 286
 287                 /*
 288                  * Loop to process each selected relation.
 289                  */
 290                 foreach(cur, relations)
 291                 {
 292                         Oid                     relid = lfirst_oid(cur);
 293
 294                         if (options & VACOPT_VACUUM)
 295                         {
 296                                 if (!vacuum_rel(relid, relation, options, params))
 297                                         continue;
 298                         }
 299
 300                         if (options & VACOPT_ANALYZE)
 301                         {
 302                                 /*
 303                                  * If using separate xacts, start one for analyze. Otherwise,
 304                                  * we can use the outer transaction.
 305                                  */
 306                                 if (use_own_xacts)
 307                                 {
 308                                         StartTransactionCommand();
 309                                         /* functions in indexes may want a snapshot set */
 310                                         PushActiveSnapshot(GetTransactionSnapshot());
 311                                 }
 312
 313                                 analyze_rel(relid, relation, options, params,
 314                                                         va_cols, in_outer_xact, vac_strategy);
 315
 316                                 if (use_own_xacts)
 317                                 {
 318                                         PopActiveSnapshot();
 319                                         CommitTransactionCommand();
 320                                 }
 321                         }
 322                 }
 323         }
 324         PG_CATCH();
 325         {
 326                 in_vacuum = false;
 327                 VacuumCostActive = false;
 328                 PG_RE_THROW();
 329         }
 330         PG_END_TRY();
 331
 332         in_vacuum = false;
 333         VacuumCostActive = false;
 334
 335         /*
 336          * Finish up processing.
 337          */
 338         if (use_own_xacts)
 339         {
 340                 /* here, we are not in a transaction */
 341
 342                 /*
 343                  * This matches the CommitTransaction waiting for us in
 344                  * PostgresMain().
 345                  */
 346                 StartTransactionCommand();
 347         }
 348
 349         if ((options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess())
 350         {
 351                 /*
 352                  * Update pg_database.datfrozenxid, and truncate pg_clog if possible.
 353                  * (autovacuum.c does this for itself.)
 354                  */
 355                 vac_update_datfrozenxid();
 356         }
 357
 358         /*
 359          * Clean up working storage --- note we must do this after
 360          * StartTransactionCommand, else we might be trying to delete the active
 361          * context!
 362          */
 363         MemoryContextDelete(vac_context);
 364         vac_context = NULL;
 365 }
 366
 367 /*
 368  * Build a list of Oids for each relation to be processed
 369  *
 370  * The list is built in vac_context so that it will survive across our
 371  * per-relation transactions.
 372  */
 373 static List *
 374 get_rel_oids(Oid relid, const RangeVar *vacrel)
 375 {
 376         List       *oid_list = NIL;
 377         MemoryContext oldcontext;
 378
 379         /* OID supplied by VACUUM's caller? */
 380         if (OidIsValid(relid))
 381         {
 382                 oldcontext = MemoryContextSwitchTo(vac_context);
 383                 oid_list = lappend_oid(oid_list, relid);
 384                 MemoryContextSwitchTo(oldcontext);
 385         }
 386         else if (vacrel)
 387         {
 388                 /* Process a specific relation */
 389                 Oid                     relid;
 390
 391                 /*
 392                  * Since we don't take a lock here, the relation might be gone, or the
 393                  * RangeVar might no longer refer to the OID we look up here.  In the
 394                  * former case, VACUUM will do nothing; in the latter case, it will
 395                  * process the OID we looked up here, rather than the new one. Neither
 396                  * is ideal, but there's little practical alternative, since we're
 397                  * going to commit this transaction and begin a new one between now
 398                  * and then.
 399                  */
 400                 relid = RangeVarGetRelid(vacrel, NoLock, false);
 401
 402                 /* Make a relation list entry for this guy */
 403                 oldcontext = MemoryContextSwitchTo(vac_context);
 404                 oid_list = lappend_oid(oid_list, relid);
 405                 MemoryContextSwitchTo(oldcontext);
 406         }
 407         else
 408         {
 409                 /*
 410                  * Process all plain relations and materialized views listed in
 411                  * pg_class
 412                  */
 413                 Relation        pgclass;
 414                 HeapScanDesc scan;
 415                 HeapTuple       tuple;
 416
 417                 pgclass = heap_open(RelationRelationId, AccessShareLock);
 418
 419                 scan = heap_beginscan_catalog(pgclass, 0, NULL);
 420
 421                 while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
 422                 {
 423                         Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
 424
 425                         if (classForm->relkind != RELKIND_RELATION &&
 426                                 classForm->relkind != RELKIND_MATVIEW)
 427                                 continue;
 428
 429                         /* Make a relation list entry for this guy */
 430                         oldcontext = MemoryContextSwitchTo(vac_context);
 431                         oid_list = lappend_oid(oid_list, HeapTupleGetOid(tuple));
 432                         MemoryContextSwitchTo(oldcontext);
 433                 }
 434
 435                 heap_endscan(scan);
 436                 heap_close(pgclass, AccessShareLock);
 437         }
 438
 439         return oid_list;
 440 }
 441
 442 /*
 443  * vacuum_set_xid_limits() -- compute oldest-Xmin and freeze cutoff points
 444  *
 445  * The output parameters are:
 446  * - oldestXmin is the cutoff value used to distinguish whether tuples are
 447  *       DEAD or RECENTLY_DEAD (see HeapTupleSatisfiesVacuum).
 448  * - freezeLimit is the Xid below which all Xids are replaced by
 449  *       FrozenTransactionId during vacuum.
 450  * - xidFullScanLimit (computed from table_freeze_age parameter)
 451  *       represents a minimum Xid value; a table whose relfrozenxid is older than
 452  *       this will have a full-table vacuum applied to it, to freeze tuples across
 453  *       the whole table.  Vacuuming a table younger than this value can use a
 454  *       partial scan.
 455  * - multiXactCutoff is the value below which all MultiXactIds are removed from
 456  *       Xmax.
 457  * - mxactFullScanLimit is a value against which a table's relminmxid value is
 458  *       compared to produce a full-table vacuum, as with xidFullScanLimit.
 459  *
 460  * xidFullScanLimit and mxactFullScanLimit can be passed as NULL if caller is
 461  * not interested.
 462  */
 463 void
 464 vacuum_set_xid_limits(Relation rel,
 465                                           int freeze_min_age,
 466                                           int freeze_table_age,
 467                                           int multixact_freeze_min_age,
 468                                           int multixact_freeze_table_age,
 469                                           TransactionId *oldestXmin,
 470                                           TransactionId *freezeLimit,
 471                                           TransactionId *xidFullScanLimit,
 472                                           MultiXactId *multiXactCutoff,
 473                                           MultiXactId *mxactFullScanLimit)
 474 {
 475         int                     freezemin;
 476         int                     mxid_freezemin;
 477         int                     effective_multixact_freeze_max_age;
 478         TransactionId limit;
 479         TransactionId safeLimit;
 480         MultiXactId mxactLimit;
 481         MultiXactId safeMxactLimit;
 482
 483         /*
 484          * We can always ignore processes running lazy vacuum.  This is because we
 485          * use these values only for deciding which tuples we must keep in the
 486          * tables.  Since lazy vacuum doesn't write its XID anywhere, it's safe to
 487          * ignore it.  In theory it could be problematic to ignore lazy vacuums in
 488          * a full vacuum, but keep in mind that only one vacuum process can be
 489          * working on a particular table at any time, and that each vacuum is
 490          * always an independent transaction.
 491          */
 492         *oldestXmin =
 493                 TransactionIdLimitedForOldSnapshots(GetOldestXmin(rel, true), rel);
 494
 495         Assert(TransactionIdIsNormal(*oldestXmin));
 496
 497         /*
 498          * Determine the minimum freeze age to use: as specified by the caller, or
 499          * vacuum_freeze_min_age, but in any case not more than half
 500          * autovacuum_freeze_max_age, so that autovacuums to prevent XID
 501          * wraparound won't occur too frequently.
 502          */
 503         freezemin = freeze_min_age;
 504         if (freezemin < 0)
 505                 freezemin = vacuum_freeze_min_age;
 506         freezemin = Min(freezemin, autovacuum_freeze_max_age / 2);
 507         Assert(freezemin >= 0);
 508
 509         /*
 510          * Compute the cutoff XID, being careful not to generate a "permanent" XID
 511          */
 512         limit = *oldestXmin - freezemin;
 513         if (!TransactionIdIsNormal(limit))
 514                 limit = FirstNormalTransactionId;
 515
 516         /*
 517          * If oldestXmin is very far back (in practice, more than
 518          * autovacuum_freeze_max_age / 2 XIDs old), complain and force a minimum
 519          * freeze age of zero.
 520          */
 521         safeLimit = ReadNewTransactionId() - autovacuum_freeze_max_age;
 522         if (!TransactionIdIsNormal(safeLimit))
 523                 safeLimit = FirstNormalTransactionId;
 524
 525         if (TransactionIdPrecedes(limit, safeLimit))
 526         {
 527                 ereport(WARNING,
 528                                 (errmsg("oldest xmin is far in the past"),
 529                                  errhint("Close open transactions soon to avoid wraparound problems.")));
 530                 limit = *oldestXmin;
 531         }
 532
 533         *freezeLimit = limit;
 534
 535         /*
 536          * Compute the multixact age for which freezing is urgent.  This is
 537          * normally autovacuum_multixact_freeze_max_age, but may be less if we are
 538          * short of multixact member space.
 539          */
 540         effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold();
 541
 542         /*
 543          * Determine the minimum multixact freeze age to use: as specified by
 544          * caller, or vacuum_multixact_freeze_min_age, but in any case not more
 545          * than half effective_multixact_freeze_max_age, so that autovacuums to
 546          * prevent MultiXact wraparound won't occur too frequently.
 547          */
 548         mxid_freezemin = multixact_freeze_min_age;
 549         if (mxid_freezemin < 0)
 550                 mxid_freezemin = vacuum_multixact_freeze_min_age;
 551         mxid_freezemin = Min(mxid_freezemin,
 552                                                  effective_multixact_freeze_max_age / 2);
 553         Assert(mxid_freezemin >= 0);
 554
 555         /* compute the cutoff multi, being careful to generate a valid value */
 556         mxactLimit = GetOldestMultiXactId() - mxid_freezemin;
 557         if (mxactLimit < FirstMultiXactId)
 558                 mxactLimit = FirstMultiXactId;
 559
 560         safeMxactLimit =
 561                 ReadNextMultiXactId() - effective_multixact_freeze_max_age;
 562         if (safeMxactLimit < FirstMultiXactId)
 563                 safeMxactLimit = FirstMultiXactId;
 564
 565         if (MultiXactIdPrecedes(mxactLimit, safeMxactLimit))
 566         {
 567                 ereport(WARNING,
 568                                 (errmsg("oldest multixact is far in the past"),
 569                                  errhint("Close open transactions with multixacts soon to avoid wraparound problems.")));
 570                 mxactLimit = safeMxactLimit;
 571         }
 572
 573         *multiXactCutoff = mxactLimit;
 574
 575         if (xidFullScanLimit != NULL)
 576         {
 577                 int                     freezetable;
 578
 579                 Assert(mxactFullScanLimit != NULL);
 580
 581                 /*
 582                  * Determine the table freeze age to use: as specified by the caller,
 583                  * or vacuum_freeze_table_age, but in any case not more than
 584                  * autovacuum_freeze_max_age * 0.95, so that if you have e.g nightly
 585                  * VACUUM schedule, the nightly VACUUM gets a chance to freeze tuples
 586                  * before anti-wraparound autovacuum is launched.
 587                  */
 588                 freezetable = freeze_table_age;
 589                 if (freezetable < 0)
 590                         freezetable = vacuum_freeze_table_age;
 591                 freezetable = Min(freezetable, autovacuum_freeze_max_age * 0.95);
 592                 Assert(freezetable >= 0);
 593
 594                 /*
 595                  * Compute XID limit causing a full-table vacuum, being careful not to
 596                  * generate a "permanent" XID.
 597                  */
 598                 limit = ReadNewTransactionId() - freezetable;
 599                 if (!TransactionIdIsNormal(limit))
 600                         limit = FirstNormalTransactionId;
 601
 602                 *xidFullScanLimit = limit;
 603
 604                 /*
 605                  * Similar to the above, determine the table freeze age to use for
 606                  * multixacts: as specified by the caller, or
 607                  * vacuum_multixact_freeze_table_age, but in any case not more than
 608                  * autovacuum_multixact_freeze_table_age * 0.95, so that if you have
 609                  * e.g. nightly VACUUM schedule, the nightly VACUUM gets a chance to
 610                  * freeze multixacts before anti-wraparound autovacuum is launched.
 611                  */
 612                 freezetable = multixact_freeze_table_age;
 613                 if (freezetable < 0)
 614                         freezetable = vacuum_multixact_freeze_table_age;
 615                 freezetable = Min(freezetable,
 616                                                   effective_multixact_freeze_max_age * 0.95);
 617                 Assert(freezetable >= 0);
 618
 619                 /*
 620                  * Compute MultiXact limit causing a full-table vacuum, being careful
 621                  * to generate a valid MultiXact value.
 622                  */
 623                 mxactLimit = ReadNextMultiXactId() - freezetable;
 624                 if (mxactLimit < FirstMultiXactId)
 625                         mxactLimit = FirstMultiXactId;
 626
 627                 *mxactFullScanLimit = mxactLimit;
 628         }
 629         else
 630         {
 631                 Assert(mxactFullScanLimit == NULL);
 632         }
 633 }
 634
 635 /*
 636  * vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples
 637  *
 638  *              If we scanned the whole relation then we should just use the count of
 639  *              live tuples seen; but if we did not, we should not trust the count
 640  *              unreservedly, especially not in VACUUM, which may have scanned a quite
 641  *              nonrandom subset of the table.  When we have only partial information,
 642  *              we take the old value of pg_class.reltuples as a measurement of the
 643  *              tuple density in the unscanned pages.
 644  *
 645  *              This routine is shared by VACUUM and ANALYZE.
 646  */
 647 double
 648 vac_estimate_reltuples(Relation relation, bool is_analyze,
 649                                            BlockNumber total_pages,
 650                                            BlockNumber scanned_pages,
 651                                            double scanned_tuples)
 652 {
 653         BlockNumber old_rel_pages = relation->rd_rel->relpages;
 654         double          old_rel_tuples = relation->rd_rel->reltuples;
 655         double          old_density;
 656         double          new_density;
 657         double          multiplier;
 658         double          updated_density;
 659
 660         /* If we did scan the whole table, just use the count as-is */
 661         if (scanned_pages >= total_pages)
 662                 return scanned_tuples;
 663
 664         /*
 665          * If scanned_pages is zero but total_pages isn't, keep the existing value
 666          * of reltuples.  (Note: callers should avoid updating the pg_class
 667          * statistics in this situation, since no new information has been
 668          * provided.)
 669          */
 670         if (scanned_pages == 0)
 671                 return old_rel_tuples;
 672
 673         /*
 674          * If old value of relpages is zero, old density is indeterminate; we
 675          * can't do much except scale up scanned_tuples to match total_pages.
 676          */
 677         if (old_rel_pages == 0)
 678                 return floor((scanned_tuples / scanned_pages) * total_pages + 0.5);
 679
 680         /*
 681          * Okay, we've covered the corner cases.  The normal calculation is to
 682          * convert the old measurement to a density (tuples per page), then update
 683          * the density using an exponential-moving-average approach, and finally
 684          * compute reltuples as updated_density * total_pages.
 685          *
 686          * For ANALYZE, the moving average multiplier is just the fraction of the
 687          * table's pages we scanned.  This is equivalent to assuming that the
 688          * tuple density in the unscanned pages didn't change.  Of course, it
 689          * probably did, if the new density measurement is different. But over
 690          * repeated cycles, the value of reltuples will converge towards the
 691          * correct value, if repeated measurements show the same new density.
 692          *
 693          * For VACUUM, the situation is a bit different: we have looked at a
 694          * nonrandom sample of pages, but we know for certain that the pages we
 695          * didn't look at are precisely the ones that haven't changed lately.
 696          * Thus, there is a reasonable argument for doing exactly the same thing
 697          * as for the ANALYZE case, that is use the old density measurement as the
 698          * value for the unscanned pages.
 699          *
 700          * This logic could probably use further refinement.
 701          */
 702         old_density = old_rel_tuples / old_rel_pages;
 703         new_density = scanned_tuples / scanned_pages;
 704         multiplier = (double) scanned_pages / (double) total_pages;
 705         updated_density = old_density + (new_density - old_density) * multiplier;
 706         return floor(updated_density * total_pages + 0.5);
 707 }
 708
 709
 710 /*
 711  *      vac_update_relstats() -- update statistics for one relation
 712  *
 713  *              Update the whole-relation statistics that are kept in its pg_class
 714  *              row.  There are additional stats that will be updated if we are
 715  *              doing ANALYZE, but we always update these stats.  This routine works
 716  *              for both index and heap relation entries in pg_class.
 717  *
 718  *              We violate transaction semantics here by overwriting the rel's
 719  *              existing pg_class tuple with the new values.  This is reasonably
 720  *              safe as long as we're sure that the new values are correct whether or
 721  *              not this transaction commits.  The reason for doing this is that if
 722  *              we updated these tuples in the usual way, vacuuming pg_class itself
 723  *              wouldn't work very well --- by the time we got done with a vacuum
 724  *              cycle, most of the tuples in pg_class would've been obsoleted.  Of
 725  *              course, this only works for fixed-size not-null columns, but these are.
 726  *
 727  *              Another reason for doing it this way is that when we are in a lazy
 728  *              VACUUM and have PROC_IN_VACUUM set, we mustn't do any regular updates.
 729  *              Somebody vacuuming pg_class might think they could delete a tuple
 730  *              marked with xmin = our xid.
 731  *
 732  *              In addition to fundamentally nontransactional statistics such as
 733  *              relpages and relallvisible, we try to maintain certain lazily-updated
 734  *              DDL flags such as relhasindex, by clearing them if no longer correct.
 735  *              It's safe to do this in VACUUM, which can't run in parallel with
 736  *              CREATE INDEX/RULE/TRIGGER and can't be part of a transaction block.
 737  *              However, it's *not* safe to do it in an ANALYZE that's within an
 738  *              outer transaction, because for example the current transaction might
 739  *              have dropped the last index; then we'd think relhasindex should be
 740  *              cleared, but if the transaction later rolls back this would be wrong.
 741  *              So we refrain from updating the DDL flags if we're inside an outer
 742  *              transaction.  This is OK since postponing the flag maintenance is
 743  *              always allowable.
 744  *
 745  *              This routine is shared by VACUUM and ANALYZE.
 746  */
 747 void
 748 vac_update_relstats(Relation relation,
 749                                         BlockNumber num_pages, double num_tuples,
 750                                         BlockNumber num_all_visible_pages,
 751                                         bool hasindex, TransactionId frozenxid,
 752                                         MultiXactId minmulti,
 753                                         bool in_outer_xact)
 754 {
 755         Oid                     relid = RelationGetRelid(relation);
 756         Relation        rd;
 757         HeapTuple       ctup;
 758         Form_pg_class pgcform;
 759         bool            dirty;
 760
 761         rd = heap_open(RelationRelationId, RowExclusiveLock);
 762
 763         /* Fetch a copy of the tuple to scribble on */
 764         ctup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
 765         if (!HeapTupleIsValid(ctup))
 766                 elog(ERROR, "pg_class entry for relid %u vanished during vacuuming",
 767                          relid);
 768         pgcform = (Form_pg_class) GETSTRUCT(ctup);
 769
 770         /* Apply statistical updates, if any, to copied tuple */
 771
 772         dirty = false;
 773         if (pgcform->relpages != (int32) num_pages)
 774         {
 775                 pgcform->relpages = (int32) num_pages;
 776                 dirty = true;
 777         }
 778         if (pgcform->reltuples != (float4) num_tuples)
 779         {
 780                 pgcform->reltuples = (float4) num_tuples;
 781                 dirty = true;
 782         }
 783         if (pgcform->relallvisible != (int32) num_all_visible_pages)
 784         {
 785                 pgcform->relallvisible = (int32) num_all_visible_pages;
 786                 dirty = true;
 787         }
 788
 789         /* Apply DDL updates, but not inside an outer transaction (see above) */
 790
 791         if (!in_outer_xact)
 792         {
 793                 /*
 794                  * If we didn't find any indexes, reset relhasindex.
 795                  */
 796                 if (pgcform->relhasindex && !hasindex)
 797                 {
 798                         pgcform->relhasindex = false;
 799                         dirty = true;
 800                 }
 801
 802                 /*
 803                  * If we have discovered that there are no indexes, then there's no
 804                  * primary key either.  This could be done more thoroughly...
 805                  */
 806                 if (pgcform->relhaspkey && !hasindex)
 807                 {
 808                         pgcform->relhaspkey = false;
 809                         dirty = true;
 810                 }
 811
 812                 /* We also clear relhasrules and relhastriggers if needed */
 813                 if (pgcform->relhasrules && relation->rd_rules == NULL)
 814                 {
 815                         pgcform->relhasrules = false;
 816                         dirty = true;
 817                 }
 818                 if (pgcform->relhastriggers && relation->trigdesc == NULL)
 819                 {
 820                         pgcform->relhastriggers = false;
 821                         dirty = true;
 822                 }
 823         }
 824
 825         /*
 826          * Update relfrozenxid, unless caller passed InvalidTransactionId
 827          * indicating it has no new data.
 828          *
 829          * Ordinarily, we don't let relfrozenxid go backwards: if things are
 830          * working correctly, the only way the new frozenxid could be older would
 831          * be if a previous VACUUM was done with a tighter freeze_min_age, in
 832          * which case we don't want to forget the work it already did.  However,
 833          * if the stored relfrozenxid is "in the future", then it must be corrupt
 834          * and it seems best to overwrite it with the cutoff we used this time.
 835          * This should match vac_update_datfrozenxid() concerning what we consider
 836          * to be "in the future".
 837          */
 838         if (TransactionIdIsNormal(frozenxid) &&
 839                 pgcform->relfrozenxid != frozenxid &&
 840                 (TransactionIdPrecedes(pgcform->relfrozenxid, frozenxid) ||
 841                  TransactionIdPrecedes(ReadNewTransactionId(),
 842                                                            pgcform->relfrozenxid)))
 843         {
 844                 pgcform->relfrozenxid = frozenxid;
 845                 dirty = true;
 846         }
 847
 848         /* Similarly for relminmxid */
 849         if (MultiXactIdIsValid(minmulti) &&
 850                 pgcform->relminmxid != minmulti &&
 851                 (MultiXactIdPrecedes(pgcform->relminmxid, minmulti) ||
 852                  MultiXactIdPrecedes(ReadNextMultiXactId(), pgcform->relminmxid)))
 853         {
 854                 pgcform->relminmxid = minmulti;
 855                 dirty = true;
 856         }
 857
 858         /* If anything changed, write out the tuple. */
 859         if (dirty)
 860                 heap_inplace_update(rd, ctup);
 861
 862         heap_close(rd, RowExclusiveLock);
 863 }
 864
 865
 866 /*
 867  *      vac_update_datfrozenxid() -- update pg_database.datfrozenxid for our DB
 868  *
 869  *              Update pg_database's datfrozenxid entry for our database to be the
 870  *              minimum of the pg_class.relfrozenxid values.
 871  *
 872  *              Similarly, update our datminmxid to be the minimum of the
 873  *              pg_class.relminmxid values.
 874  *
 875  *              If we are able to advance either pg_database value, also try to
 876  *              truncate pg_clog and pg_multixact.
 877  *
 878  *              We violate transaction semantics here by overwriting the database's
 879  *              existing pg_database tuple with the new values.  This is reasonably
 880  *              safe since the new values are correct whether or not this transaction
 881  *              commits.  As with vac_update_relstats, this avoids leaving dead tuples
 882  *              behind after a VACUUM.
 883  */
 884 void
 885 vac_update_datfrozenxid(void)
 886 {
 887         HeapTuple       tuple;
 888         Form_pg_database dbform;
 889         Relation        relation;
 890         SysScanDesc scan;
 891         HeapTuple       classTup;
 892         TransactionId newFrozenXid;
 893         MultiXactId newMinMulti;
 894         TransactionId lastSaneFrozenXid;
 895         MultiXactId lastSaneMinMulti;
 896         bool            bogus = false;
 897         bool            dirty = false;
 898
 899         /*
 900          * Initialize the "min" calculation with GetOldestXmin, which is a
 901          * reasonable approximation to the minimum relfrozenxid for not-yet-
 902          * committed pg_class entries for new tables; see AddNewRelationTuple().
 903          * So we cannot produce a wrong minimum by starting with this.
 904          */
 905         newFrozenXid = GetOldestXmin(NULL, true);
 906
 907         /*
 908          * Similarly, initialize the MultiXact "min" with the value that would be
 909          * used on pg_class for new tables.  See AddNewRelationTuple().
 910          */
 911         newMinMulti = GetOldestMultiXactId();
 912
 913         /*
 914          * Identify the latest relfrozenxid and relminmxid values that we could
 915          * validly see during the scan.  These are conservative values, but it's
 916          * not really worth trying to be more exact.
 917          */
 918         lastSaneFrozenXid = ReadNewTransactionId();
 919         lastSaneMinMulti = ReadNextMultiXactId();
 920
 921         /*
 922          * We must seqscan pg_class to find the minimum Xid, because there is no
 923          * index that can help us here.
 924          */
 925         relation = heap_open(RelationRelationId, AccessShareLock);
 926
 927         scan = systable_beginscan(relation, InvalidOid, false,
 928                                                           NULL, 0, NULL);
 929
 930         while ((classTup = systable_getnext(scan)) != NULL)
 931         {
 932                 Form_pg_class classForm = (Form_pg_class) GETSTRUCT(classTup);
 933
 934                 /*
 935                  * Only consider relations able to hold unfrozen XIDs (anything else
 936                  * should have InvalidTransactionId in relfrozenxid anyway.)
 937                  */
 938                 if (classForm->relkind != RELKIND_RELATION &&
 939                         classForm->relkind != RELKIND_MATVIEW &&
 940                         classForm->relkind != RELKIND_TOASTVALUE)
 941                         continue;
 942
 943                 Assert(TransactionIdIsNormal(classForm->relfrozenxid));
 944                 Assert(MultiXactIdIsValid(classForm->relminmxid));
 945
 946                 /*
 947                  * If things are working properly, no relation should have a
 948                  * relfrozenxid or relminmxid that is "in the future".  However, such
 949                  * cases have been known to arise due to bugs in pg_upgrade.  If we
 950                  * see any entries that are "in the future", chicken out and don't do
 951                  * anything.  This ensures we won't truncate clog before those
 952                  * relations have been scanned and cleaned up.
 953                  */
 954                 if (TransactionIdPrecedes(lastSaneFrozenXid, classForm->relfrozenxid) ||
 955                         MultiXactIdPrecedes(lastSaneMinMulti, classForm->relminmxid))
 956                 {
 957                         bogus = true;
 958                         break;
 959                 }
 960
 961                 if (TransactionIdPrecedes(classForm->relfrozenxid, newFrozenXid))
 962                         newFrozenXid = classForm->relfrozenxid;
 963
 964                 if (MultiXactIdPrecedes(classForm->relminmxid, newMinMulti))
 965                         newMinMulti = classForm->relminmxid;
 966         }
 967
 968         /* we're done with pg_class */
 969         systable_endscan(scan);
 970         heap_close(relation, AccessShareLock);
 971
 972         /* chicken out if bogus data found */
 973         if (bogus)
 974                 return;
 975
 976         Assert(TransactionIdIsNormal(newFrozenXid));
 977         Assert(MultiXactIdIsValid(newMinMulti));
 978
 979         /* Now fetch the pg_database tuple we need to update. */
 980         relation = heap_open(DatabaseRelationId, RowExclusiveLock);
 981
 982         /* Fetch a copy of the tuple to scribble on */
 983         tuple = SearchSysCacheCopy1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
 984         if (!HeapTupleIsValid(tuple))
 985                 elog(ERROR, "could not find tuple for database %u", MyDatabaseId);
 986         dbform = (Form_pg_database) GETSTRUCT(tuple);
 987
 988         /*
 989          * As in vac_update_relstats(), we ordinarily don't want to let
 990          * datfrozenxid go backward; but if it's "in the future" then it must be
 991          * corrupt and it seems best to overwrite it.
 992          */
 993         if (dbform->datfrozenxid != newFrozenXid &&
 994                 (TransactionIdPrecedes(dbform->datfrozenxid, newFrozenXid) ||
 995                  TransactionIdPrecedes(lastSaneFrozenXid, dbform->datfrozenxid)))
 996         {
 997                 dbform->datfrozenxid = newFrozenXid;
 998                 dirty = true;
 999         }
1000         else
1001                 newFrozenXid = dbform->datfrozenxid;
1002
1003         /* Ditto for datminmxid */
1004         if (dbform->datminmxid != newMinMulti &&
1005                 (MultiXactIdPrecedes(dbform->datminmxid, newMinMulti) ||
1006                  MultiXactIdPrecedes(lastSaneMinMulti, dbform->datminmxid)))
1007         {
1008                 dbform->datminmxid = newMinMulti;
1009                 dirty = true;
1010         }
1011         else
1012                 newMinMulti = dbform->datminmxid;
1013
1014         if (dirty)
1015                 heap_inplace_update(relation, tuple);
1016
1017         heap_freetuple(tuple);
1018         heap_close(relation, RowExclusiveLock);
1019
1020         /*
1021          * If we were able to advance datfrozenxid or datminmxid, see if we can
1022          * truncate pg_clog and/or pg_multixact.  Also do it if the shared
1023          * XID-wrap-limit info is stale, since this action will update that too.
1024          */
1025         if (dirty || ForceTransactionIdLimitUpdate())
1026                 vac_truncate_clog(newFrozenXid, newMinMulti,
1027                                                   lastSaneFrozenXid, lastSaneMinMulti);
1028 }
1029
1030
1031 /*
1032  *      vac_truncate_clog() -- attempt to truncate the commit log
1033  *
1034  *              Scan pg_database to determine the system-wide oldest datfrozenxid,
1035  *              and use it to truncate the transaction commit log (pg_clog).
1036  *              Also update the XID wrap limit info maintained by varsup.c.
1037  *              Likewise for datminmxid.
1038  *
1039  *              The passed frozenXID and minMulti are the updated values for my own
1040  *              pg_database entry. They're used to initialize the "min" calculations.
1041  *              The caller also passes the "last sane" XID and MXID, since it has
1042  *              those at hand already.
1043  *
1044  *              This routine is only invoked when we've managed to change our
1045  *              DB's datfrozenxid/datminmxid values, or we found that the shared
1046  *              XID-wrap-limit info is stale.
1047  */
1048 static void
1049 vac_truncate_clog(TransactionId frozenXID,
1050                                   MultiXactId minMulti,
1051                                   TransactionId lastSaneFrozenXid,
1052                                   MultiXactId lastSaneMinMulti)
1053 {
1054         TransactionId nextXID = ReadNewTransactionId();
1055         Relation        relation;
1056         HeapScanDesc scan;
1057         HeapTuple       tuple;
1058         Oid                     oldestxid_datoid;
1059         Oid                     minmulti_datoid;
1060         bool            bogus = false;
1061         bool            frozenAlreadyWrapped = false;
1062
1063         /* init oldest datoids to sync with my frozenXID/minMulti values */
1064         oldestxid_datoid = MyDatabaseId;
1065         minmulti_datoid = MyDatabaseId;
1066
1067         /*
1068          * Scan pg_database to compute the minimum datfrozenxid/datminmxid
1069          *
1070          * Note: we need not worry about a race condition with new entries being
1071          * inserted by CREATE DATABASE.  Any such entry will have a copy of some
1072          * existing DB's datfrozenxid, and that source DB cannot be ours because
1073          * of the interlock against copying a DB containing an active backend.
1074          * Hence the new entry will not reduce the minimum.  Also, if two VACUUMs
1075          * concurrently modify the datfrozenxid's of different databases, the
1076          * worst possible outcome is that pg_clog is not truncated as aggressively
1077          * as it could be.
1078          */
1079         relation = heap_open(DatabaseRelationId, AccessShareLock);
1080
1081         scan = heap_beginscan_catalog(relation, 0, NULL);
1082
1083         while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1084         {
1085                 Form_pg_database dbform = (Form_pg_database) GETSTRUCT(tuple);
1086
1087                 Assert(TransactionIdIsNormal(dbform->datfrozenxid));
1088                 Assert(MultiXactIdIsValid(dbform->datminmxid));
1089
1090                 /*
1091                  * If things are working properly, no database should have a
1092                  * datfrozenxid or datminmxid that is "in the future".  However, such
1093                  * cases have been known to arise due to bugs in pg_upgrade.  If we
1094                  * see any entries that are "in the future", chicken out and don't do
1095                  * anything.  This ensures we won't truncate clog before those
1096                  * databases have been scanned and cleaned up.  (We will issue the
1097                  * "already wrapped" warning if appropriate, though.)
1098                  */
1099                 if (TransactionIdPrecedes(lastSaneFrozenXid, dbform->datfrozenxid) ||
1100                         MultiXactIdPrecedes(lastSaneMinMulti, dbform->datminmxid))
1101                         bogus = true;
1102
1103                 if (TransactionIdPrecedes(nextXID, dbform->datfrozenxid))
1104                         frozenAlreadyWrapped = true;
1105                 else if (TransactionIdPrecedes(dbform->datfrozenxid, frozenXID))
1106                 {
1107                         frozenXID = dbform->datfrozenxid;
1108                         oldestxid_datoid = HeapTupleGetOid(tuple);
1109                 }
1110
1111                 if (MultiXactIdPrecedes(dbform->datminmxid, minMulti))
1112                 {
1113                         minMulti = dbform->datminmxid;
1114                         minmulti_datoid = HeapTupleGetOid(tuple);
1115                 }
1116         }
1117
1118         heap_endscan(scan);
1119
1120         heap_close(relation, AccessShareLock);
1121
1122         /*
1123          * Do not truncate CLOG if we seem to have suffered wraparound already;
1124          * the computed minimum XID might be bogus.  This case should now be
1125          * impossible due to the defenses in GetNewTransactionId, but we keep the
1126          * test anyway.
1127          */
1128         if (frozenAlreadyWrapped)
1129         {
1130                 ereport(WARNING,
1131                                 (errmsg("some databases have not been vacuumed in over 2 billion transactions"),
1132                                  errdetail("You might have already suffered transaction-wraparound data loss.")));
1133                 return;
1134         }
1135
1136         /* chicken out if data is bogus in any other way */
1137         if (bogus)
1138                 return;
1139
1140         /*
1141          * Truncate CLOG, multixact and CommitTs to the oldest computed value.
1142          */
1143         TruncateCLOG(frozenXID);
1144         TruncateCommitTs(frozenXID);
1145         TruncateMultiXact(minMulti, minmulti_datoid);
1146
1147         /*
1148          * Update the wrap limit for GetNewTransactionId and creation of new
1149          * MultiXactIds.  Note: these functions will also signal the postmaster
1150          * for an(other) autovac cycle if needed.   XXX should we avoid possibly
1151          * signalling twice?
1152          */
1153         SetTransactionIdLimit(frozenXID, oldestxid_datoid);
1154         SetMultiXactIdLimit(minMulti, minmulti_datoid);
1155         AdvanceOldestCommitTsXid(frozenXID);
1156 }
1157
1158
1159 /*
1160  *      vacuum_rel() -- vacuum one heap relation
1161  *
1162  *              Doing one heap at a time incurs extra overhead, since we need to
1163  *              check that the heap exists again just before we vacuum it.  The
1164  *              reason that we do this is so that vacuuming can be spread across
1165  *              many small transactions.  Otherwise, two-phase locking would require
1166  *              us to lock the entire database during one pass of the vacuum cleaner.
1167  *
1168  *              At entry and exit, we are not inside a transaction.
1169  */
1170 static bool
1171 vacuum_rel(Oid relid, RangeVar *relation, int options, VacuumParams *params)
1172 {
1173         LOCKMODE        lmode;
1174         Relation        onerel;
1175         LockRelId       onerelid;
1176         Oid                     toast_relid;
1177         Oid                     save_userid;
1178         int                     save_sec_context;
1179         int                     save_nestlevel;
1180
1181         Assert(params != NULL);
1182
1183         /* Begin a transaction for vacuuming this relation */
1184         StartTransactionCommand();
1185
1186         /*
1187          * Functions in indexes may want a snapshot set.  Also, setting a snapshot
1188          * ensures that RecentGlobalXmin is kept truly recent.
1189          */
1190         PushActiveSnapshot(GetTransactionSnapshot());
1191
1192         if (!(options & VACOPT_FULL))
1193         {
1194                 /*
1195                  * In lazy vacuum, we can set the PROC_IN_VACUUM flag, which lets
1196                  * other concurrent VACUUMs know that they can ignore this one while
1197                  * determining their OldestXmin.  (The reason we don't set it during a
1198                  * full VACUUM is exactly that we may have to run user-defined
1199                  * functions for functional indexes, and we want to make sure that if
1200                  * they use the snapshot set above, any tuples it requires can't get
1201                  * removed from other tables.  An index function that depends on the
1202                  * contents of other tables is arguably broken, but we won't break it
1203                  * here by violating transaction semantics.)
1204                  *
1205                  * We also set the VACUUM_FOR_WRAPAROUND flag, which is passed down by
1206                  * autovacuum; it's used to avoid canceling a vacuum that was invoked
1207                  * in an emergency.
1208                  *
1209                  * Note: these flags remain set until CommitTransaction or
1210                  * AbortTransaction.  We don't want to clear them until we reset
1211                  * MyPgXact->xid/xmin, else OldestXmin might appear to go backwards,
1212                  * which is probably Not Good.
1213                  */
1214                 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
1215                 MyPgXact->vacuumFlags |= PROC_IN_VACUUM;
1216                 if (params->is_wraparound)
1217                         MyPgXact->vacuumFlags |= PROC_VACUUM_FOR_WRAPAROUND;
1218                 LWLockRelease(ProcArrayLock);
1219         }
1220
1221         /*
1222          * Check for user-requested abort.  Note we want this to be inside a
1223          * transaction, so xact.c doesn't issue useless WARNING.
1224          */
1225         CHECK_FOR_INTERRUPTS();
1226
1227         /*
1228          * Determine the type of lock we want --- hard exclusive lock for a FULL
1229          * vacuum, but just ShareUpdateExclusiveLock for concurrent vacuum. Either
1230          * way, we can be sure that no other backend is vacuuming the same table.
1231          */
1232         lmode = (options & VACOPT_FULL) ? AccessExclusiveLock : ShareUpdateExclusiveLock;
1233
1234         /*
1235          * Open the relation and get the appropriate lock on it.
1236          *
1237          * There's a race condition here: the rel may have gone away since the
1238          * last time we saw it.  If so, we don't need to vacuum it.
1239          *
1240          * If we've been asked not to wait for the relation lock, acquire it first
1241          * in non-blocking mode, before calling try_relation_open().
1242          */
1243         if (!(options & VACOPT_NOWAIT))
1244                 onerel = try_relation_open(relid, lmode);
1245         else if (ConditionalLockRelationOid(relid, lmode))
1246                 onerel = try_relation_open(relid, NoLock);
1247         else
1248         {
1249                 onerel = NULL;
1250                 if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0)
1251                         ereport(LOG,
1252                                         (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
1253                                    errmsg("skipping vacuum of \"%s\" --- lock not available",
1254                                                   relation->relname)));
1255         }
1256
1257         if (!onerel)
1258         {
1259                 PopActiveSnapshot();
1260                 CommitTransactionCommand();
1261                 return false;
1262         }
1263
1264         /*
1265          * Check permissions.
1266          *
1267          * We allow the user to vacuum a table if he is superuser, the table
1268          * owner, or the database owner (but in the latter case, only if it's not
1269          * a shared relation).  pg_class_ownercheck includes the superuser case.
1270          *
1271          * Note we choose to treat permissions failure as a WARNING and keep
1272          * trying to vacuum the rest of the DB --- is this appropriate?
1273          */
1274         if (!(pg_class_ownercheck(RelationGetRelid(onerel), GetUserId()) ||
1275                   (pg_database_ownercheck(MyDatabaseId, GetUserId()) && !onerel->rd_rel->relisshared)))
1276         {
1277                 if (onerel->rd_rel->relisshared)
1278                         ereport(WARNING,
1279                                   (errmsg("skipping \"%s\" --- only superuser can vacuum it",
1280                                                   RelationGetRelationName(onerel))));
1281                 else if (onerel->rd_rel->relnamespace == PG_CATALOG_NAMESPACE)
1282                         ereport(WARNING,
1283                                         (errmsg("skipping \"%s\" --- only superuser or database owner can vacuum it",
1284                                                         RelationGetRelationName(onerel))));
1285                 else
1286                         ereport(WARNING,
1287                                         (errmsg("skipping \"%s\" --- only table or database owner can vacuum it",
1288                                                         RelationGetRelationName(onerel))));
1289                 relation_close(onerel, lmode);
1290                 PopActiveSnapshot();
1291                 CommitTransactionCommand();
1292                 return false;
1293         }
1294
1295         /*
1296          * Check that it's a vacuumable relation; we used to do this in
1297          * get_rel_oids() but seems safer to check after we've locked the
1298          * relation.
1299          */
1300         if (onerel->rd_rel->relkind != RELKIND_RELATION &&
1301                 onerel->rd_rel->relkind != RELKIND_MATVIEW &&
1302                 onerel->rd_rel->relkind != RELKIND_TOASTVALUE)
1303         {
1304                 ereport(WARNING,
1305                                 (errmsg("skipping \"%s\" --- cannot vacuum non-tables or special system tables",
1306                                                 RelationGetRelationName(onerel))));
1307                 relation_close(onerel, lmode);
1308                 PopActiveSnapshot();
1309                 CommitTransactionCommand();
1310                 return false;
1311         }
1312
1313         /*
1314          * Silently ignore tables that are temp tables of other backends ---
1315          * trying to vacuum these will lead to great unhappiness, since their
1316          * contents are probably not up-to-date on disk.  (We don't throw a
1317          * warning here; it would just lead to chatter during a database-wide
1318          * VACUUM.)
1319          */
1320         if (RELATION_IS_OTHER_TEMP(onerel))
1321         {
1322                 relation_close(onerel, lmode);
1323                 PopActiveSnapshot();
1324                 CommitTransactionCommand();
1325                 return false;
1326         }
1327
1328         /*
1329          * Get a session-level lock too. This will protect our access to the
1330          * relation across multiple transactions, so that we can vacuum the
1331          * relation's TOAST table (if any) secure in the knowledge that no one is
1332          * deleting the parent relation.
1333          *
1334          * NOTE: this cannot block, even if someone else is waiting for access,
1335          * because the lock manager knows that both lock requests are from the
1336          * same process.
1337          */
1338         onerelid = onerel->rd_lockInfo.lockRelId;
1339         LockRelationIdForSession(&onerelid, lmode);
1340
1341         /*
1342          * Remember the relation's TOAST relation for later, if the caller asked
1343          * us to process it.  In VACUUM FULL, though, the toast table is
1344          * automatically rebuilt by cluster_rel so we shouldn't recurse to it.
1345          */
1346         if (!(options & VACOPT_SKIPTOAST) && !(options & VACOPT_FULL))
1347                 toast_relid = onerel->rd_rel->reltoastrelid;
1348         else
1349                 toast_relid = InvalidOid;
1350
1351         /*
1352          * Switch to the table owner's userid, so that any index functions are run
1353          * as that user.  Also lock down security-restricted operations and
1354          * arrange to make GUC variable changes local to this command. (This is
1355          * unnecessary, but harmless, for lazy VACUUM.)
1356          */
1357         GetUserIdAndSecContext(&save_userid, &save_sec_context);
1358         SetUserIdAndSecContext(onerel->rd_rel->relowner,
1359                                                    save_sec_context | SECURITY_RESTRICTED_OPERATION);
1360         save_nestlevel = NewGUCNestLevel();
1361
1362         /*
1363          * Do the actual work --- either FULL or "lazy" vacuum
1364          */
1365         if (options & VACOPT_FULL)
1366         {
1367                 /* close relation before vacuuming, but hold lock until commit */
1368                 relation_close(onerel, NoLock);
1369                 onerel = NULL;
1370
1371                 /* VACUUM FULL is now a variant of CLUSTER; see cluster.c */
1372                 cluster_rel(relid, InvalidOid, false,
1373                                         (options & VACOPT_VERBOSE) != 0);
1374         }
1375         else
1376                 lazy_vacuum_rel(onerel, options, params, vac_strategy);
1377
1378         /* Roll back any GUC changes executed by index functions */
1379         AtEOXact_GUC(false, save_nestlevel);
1380
1381         /* Restore userid and security context */
1382         SetUserIdAndSecContext(save_userid, save_sec_context);
1383
1384         /* all done with this class, but hold lock until commit */
1385         if (onerel)
1386                 relation_close(onerel, NoLock);
1387
1388         /*
1389          * Complete the transaction and free all temporary memory used.
1390          */
1391         PopActiveSnapshot();
1392         CommitTransactionCommand();
1393
1394         /*
1395          * If the relation has a secondary toast rel, vacuum that too while we
1396          * still hold the session lock on the master table.  Note however that
1397          * "analyze" will not get done on the toast table.  This is good, because
1398          * the toaster always uses hardcoded index access and statistics are
1399          * totally unimportant for toast relations.
1400          */
1401         if (toast_relid != InvalidOid)
1402                 vacuum_rel(toast_relid, relation, options, params);
1403
1404         /*
1405          * Now release the session-level lock on the master table.
1406          */
1407         UnlockRelationIdForSession(&onerelid, lmode);
1408
1409         /* Report that we really did it. */
1410         return true;
1411 }
1412
1413
1414 /*
1415  * Open all the vacuumable indexes of the given relation, obtaining the
1416  * specified kind of lock on each.  Return an array of Relation pointers for
1417  * the indexes into *Irel, and the number of indexes into *nindexes.
1418  *
1419  * We consider an index vacuumable if it is marked insertable (IndexIsReady).
1420  * If it isn't, probably a CREATE INDEX CONCURRENTLY command failed early in
1421  * execution, and what we have is too corrupt to be processable.  We will
1422  * vacuum even if the index isn't indisvalid; this is important because in a
1423  * unique index, uniqueness checks will be performed anyway and had better not
1424  * hit dangling index pointers.
1425  */
1426 void
1427 vac_open_indexes(Relation relation, LOCKMODE lockmode,
1428                                  int *nindexes, Relation **Irel)
1429 {
1430         List       *indexoidlist;
1431         ListCell   *indexoidscan;
1432         int                     i;
1433
1434         Assert(lockmode != NoLock);
1435
1436         indexoidlist = RelationGetIndexList(relation);
1437
1438         /* allocate enough memory for all indexes */
1439         i = list_length(indexoidlist);
1440
1441         if (i > 0)
1442                 *Irel = (Relation *) palloc(i * sizeof(Relation));
1443         else
1444                 *Irel = NULL;
1445
1446         /* collect just the ready indexes */
1447         i = 0;
1448         foreach(indexoidscan, indexoidlist)
1449         {
1450                 Oid                     indexoid = lfirst_oid(indexoidscan);
1451                 Relation        indrel;
1452
1453                 indrel = index_open(indexoid, lockmode);
1454                 if (IndexIsReady(indrel->rd_index))
1455                         (*Irel)[i++] = indrel;
1456                 else
1457                         index_close(indrel, lockmode);
1458         }
1459
1460         *nindexes = i;
1461
1462         list_free(indexoidlist);
1463 }
1464
1465 /*
1466  * Release the resources acquired by vac_open_indexes.  Optionally release
1467  * the locks (say NoLock to keep 'em).
1468  */
1469 void
1470 vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
1471 {
1472         if (Irel == NULL)
1473                 return;
1474
1475         while (nindexes--)
1476         {
1477                 Relation        ind = Irel[nindexes];
1478
1479                 index_close(ind, lockmode);
1480         }
1481         pfree(Irel);
1482 }
1483
1484 /*
1485  * vacuum_delay_point --- check for interrupts and cost-based delay.
1486  *
1487  * This should be called in each major loop of VACUUM processing,
1488  * typically once per page processed.
1489  */
1490 void
1491 vacuum_delay_point(void)
1492 {
1493         /* Always check for interrupts */
1494         CHECK_FOR_INTERRUPTS();
1495
1496         /* Nap if appropriate */
1497         if (VacuumCostActive && !InterruptPending &&
1498                 VacuumCostBalance >= VacuumCostLimit)
1499         {
1500                 int                     msec;
1501
1502                 msec = VacuumCostDelay * VacuumCostBalance / VacuumCostLimit;
1503                 if (msec > VacuumCostDelay * 4)
1504                         msec = VacuumCostDelay * 4;
1505
1506                 pg_usleep(msec * 1000L);
1507
1508                 VacuumCostBalance = 0;
1509
1510                 /* update balance values for workers */
1511                 AutoVacuumUpdateDelay();
1512
1513                 /* Might have gotten an interrupt while sleeping */
1514                 CHECK_FOR_INTERRUPTS();
1515         }
1516 }