granicus.if.org Git - postgresql/blob - src/backend/commands/vacuum.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * vacuum.c
   4  *        The postgres vacuum cleaner.
   5  *
   6  * This file now includes only control and dispatch code for VACUUM and
   7  * ANALYZE commands.  Regular VACUUM is implemented in vacuumlazy.c,
   8  * ANALYZE in analyze.c, and VACUUM FULL is a variant of CLUSTER, handled
   9  * in cluster.c.
  10  *
  11  *
  12  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
  13  * Portions Copyright (c) 1994, Regents of the University of California
  14  *
  15  *
  16  * IDENTIFICATION
  17  *        src/backend/commands/vacuum.c
  18  *
  19  *-------------------------------------------------------------------------
  20  */
  21 #include "postgres.h"
  22
  23 #include <math.h>
  24
  25 #include "access/clog.h"
  26 #include "access/commit_ts.h"
  27 #include "access/genam.h"
  28 #include "access/heapam.h"
  29 #include "access/htup_details.h"
  30 #include "access/multixact.h"
  31 #include "access/transam.h"
  32 #include "access/xact.h"
  33 #include "catalog/namespace.h"
  34 #include "catalog/pg_database.h"
  35 #include "catalog/pg_inherits_fn.h"
  36 #include "catalog/pg_namespace.h"
  37 #include "commands/cluster.h"
  38 #include "commands/vacuum.h"
  39 #include "miscadmin.h"
  40 #include "pgstat.h"
  41 #include "postmaster/autovacuum.h"
  42 #include "storage/bufmgr.h"
  43 #include "storage/lmgr.h"
  44 #include "storage/proc.h"
  45 #include "storage/procarray.h"
  46 #include "utils/acl.h"
  47 #include "utils/fmgroids.h"
  48 #include "utils/guc.h"
  49 #include "utils/memutils.h"
  50 #include "utils/snapmgr.h"
  51 #include "utils/syscache.h"
  52 #include "utils/tqual.h"
  53
  54
  55 /*
  56  * GUC parameters
  57  */
  58 int                     vacuum_freeze_min_age;
  59 int                     vacuum_freeze_table_age;
  60 int                     vacuum_multixact_freeze_min_age;
  61 int                     vacuum_multixact_freeze_table_age;
  62
  63
  64 /* A few variables that don't seem worth passing around as parameters */
  65 static MemoryContext vac_context = NULL;
  66 static BufferAccessStrategy vac_strategy;
  67
  68
  69 /* non-export function prototypes */
  70 static List *get_rel_oids(Oid relid, const RangeVar *vacrel);
  71 static void vac_truncate_clog(TransactionId frozenXID,
  72                                   MultiXactId minMulti,
  73                                   TransactionId lastSaneFrozenXid,
  74                                   MultiXactId lastSaneMinMulti);
  75 static bool vacuum_rel(Oid relid, RangeVar *relation, int options,
  76                    VacuumParams *params);
  77
  78 /*
  79  * Primary entry point for manual VACUUM and ANALYZE commands
  80  *
  81  * This is mainly a preparation wrapper for the real operations that will
  82  * happen in vacuum().
  83  */
  84 void
  85 ExecVacuum(VacuumStmt *vacstmt, bool isTopLevel)
  86 {
  87         VacuumParams params;
  88
  89         /* sanity checks on options */
  90         Assert(vacstmt->options & (VACOPT_VACUUM | VACOPT_ANALYZE));
  91         Assert((vacstmt->options & VACOPT_VACUUM) ||
  92                    !(vacstmt->options & (VACOPT_FULL | VACOPT_FREEZE)));
  93         Assert((vacstmt->options & VACOPT_ANALYZE) || vacstmt->va_cols == NIL);
  94         Assert(!(vacstmt->options & VACOPT_SKIPTOAST));
  95
  96         /*
  97          * All freeze ages are zero if the FREEZE option is given; otherwise pass
  98          * them as -1 which means to use the default values.
  99          */
 100         if (vacstmt->options & VACOPT_FREEZE)
 101         {
 102                 params.freeze_min_age = 0;
 103                 params.freeze_table_age = 0;
 104                 params.multixact_freeze_min_age = 0;
 105                 params.multixact_freeze_table_age = 0;
 106         }
 107         else
 108         {
 109                 params.freeze_min_age = -1;
 110                 params.freeze_table_age = -1;
 111                 params.multixact_freeze_min_age = -1;
 112                 params.multixact_freeze_table_age = -1;
 113         }
 114
 115         /* user-invoked vacuum is never "for wraparound" */
 116         params.is_wraparound = false;
 117
 118         /* user-invoked vacuum never uses this parameter */
 119         params.log_min_duration = -1;
 120
 121         /* Now go through the common routine */
 122         vacuum(vacstmt->options, vacstmt->relation, InvalidOid, &params,
 123                    vacstmt->va_cols, NULL, isTopLevel);
 124 }
 125
 126 /*
 127  * Primary entry point for VACUUM and ANALYZE commands.
 128  *
 129  * options is a bitmask of VacuumOption flags, indicating what to do.
 130  *
 131  * relid, if not InvalidOid, indicate the relation to process; otherwise,
 132  * the RangeVar is used.  (The latter must always be passed, because it's
 133  * used for error messages.)
 134  *
 135  * params contains a set of parameters that can be used to customize the
 136  * behavior.
 137  *
 138  * va_cols is a list of columns to analyze, or NIL to process them all.
 139  *
 140  * bstrategy is normally given as NULL, but in autovacuum it can be passed
 141  * in to use the same buffer strategy object across multiple vacuum() calls.
 142  *
 143  * isTopLevel should be passed down from ProcessUtility.
 144  *
 145  * It is the caller's responsibility that all parameters are allocated in a
 146  * memory context that will not disappear at transaction commit.
 147  */
 148 void
 149 vacuum(int options, RangeVar *relation, Oid relid, VacuumParams *params,
 150            List *va_cols, BufferAccessStrategy bstrategy, bool isTopLevel)
 151 {
 152         const char *stmttype;
 153         volatile bool in_outer_xact,
 154                                 use_own_xacts;
 155         List       *relations;
 156         static bool in_vacuum = false;
 157
 158         Assert(params != NULL);
 159
 160         stmttype = (options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE";
 161
 162         /*
 163          * We cannot run VACUUM inside a user transaction block; if we were inside
 164          * a transaction, then our commit- and start-transaction-command calls
 165          * would not have the intended effect!  There are numerous other subtle
 166          * dependencies on this, too.
 167          *
 168          * ANALYZE (without VACUUM) can run either way.
 169          */
 170         if (options & VACOPT_VACUUM)
 171         {
 172                 PreventTransactionChain(isTopLevel, stmttype);
 173                 in_outer_xact = false;
 174         }
 175         else
 176                 in_outer_xact = IsInTransactionChain(isTopLevel);
 177
 178         /*
 179          * Due to static variables vac_context, anl_context and vac_strategy,
 180          * vacuum() is not reentrant.  This matters when VACUUM FULL or ANALYZE
 181          * calls a hostile index expression that itself calls ANALYZE.
 182          */
 183         if (in_vacuum)
 184                 ereport(ERROR,
 185                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 186                                  errmsg("%s cannot be executed from VACUUM or ANALYZE",
 187                                                 stmttype)));
 188
 189         /*
 190          * Sanity check DISABLE_PAGE_SKIPPING option.
 191          */
 192         if ((options & VACOPT_FULL) != 0 &&
 193                 (options & VACOPT_DISABLE_PAGE_SKIPPING) != 0)
 194                 ereport(ERROR,
 195                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 196                                  errmsg("VACUUM option DISABLE_PAGE_SKIPPING cannot be used with FULL")));
 197
 198         /*
 199          * Send info about dead objects to the statistics collector, unless we are
 200          * in autovacuum --- autovacuum.c does this for itself.
 201          */
 202         if ((options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess())
 203                 pgstat_vacuum_stat();
 204
 205         /*
 206          * Create special memory context for cross-transaction storage.
 207          *
 208          * Since it is a child of PortalContext, it will go away eventually even
 209          * if we suffer an error; there's no need for special abort cleanup logic.
 210          */
 211         vac_context = AllocSetContextCreate(PortalContext,
 212                                                                                 "Vacuum",
 213                                                                                 ALLOCSET_DEFAULT_SIZES);
 214
 215         /*
 216          * If caller didn't give us a buffer strategy object, make one in the
 217          * cross-transaction memory context.
 218          */
 219         if (bstrategy == NULL)
 220         {
 221                 MemoryContext old_context = MemoryContextSwitchTo(vac_context);
 222
 223                 bstrategy = GetAccessStrategy(BAS_VACUUM);
 224                 MemoryContextSwitchTo(old_context);
 225         }
 226         vac_strategy = bstrategy;
 227
 228         /*
 229          * Build list of relations to process, unless caller gave us one. (If we
 230          * build one, we put it in vac_context for safekeeping.)
 231          */
 232         relations = get_rel_oids(relid, relation);
 233
 234         /*
 235          * Decide whether we need to start/commit our own transactions.
 236          *
 237          * For VACUUM (with or without ANALYZE): always do so, so that we can
 238          * release locks as soon as possible.  (We could possibly use the outer
 239          * transaction for a one-table VACUUM, but handling TOAST tables would be
 240          * problematic.)
 241          *
 242          * For ANALYZE (no VACUUM): if inside a transaction block, we cannot
 243          * start/commit our own transactions.  Also, there's no need to do so if
 244          * only processing one relation.  For multiple relations when not within a
 245          * transaction block, and also in an autovacuum worker, use own
 246          * transactions so we can release locks sooner.
 247          */
 248         if (options & VACOPT_VACUUM)
 249                 use_own_xacts = true;
 250         else
 251         {
 252                 Assert(options & VACOPT_ANALYZE);
 253                 if (IsAutoVacuumWorkerProcess())
 254                         use_own_xacts = true;
 255                 else if (in_outer_xact)
 256                         use_own_xacts = false;
 257                 else if (list_length(relations) > 1)
 258                         use_own_xacts = true;
 259                 else
 260                         use_own_xacts = false;
 261         }
 262
 263         /*
 264          * vacuum_rel expects to be entered with no transaction active; it will
 265          * start and commit its own transaction.  But we are called by an SQL
 266          * command, and so we are executing inside a transaction already. We
 267          * commit the transaction started in PostgresMain() here, and start
 268          * another one before exiting to match the commit waiting for us back in
 269          * PostgresMain().
 270          */
 271         if (use_own_xacts)
 272         {
 273                 Assert(!in_outer_xact);
 274
 275                 /* ActiveSnapshot is not set by autovacuum */
 276                 if (ActiveSnapshotSet())
 277                         PopActiveSnapshot();
 278
 279                 /* matches the StartTransaction in PostgresMain() */
 280                 CommitTransactionCommand();
 281         }
 282
 283         /* Turn vacuum cost accounting on or off */
 284         PG_TRY();
 285         {
 286                 ListCell   *cur;
 287
 288                 in_vacuum = true;
 289                 VacuumCostActive = (VacuumCostDelay > 0);
 290                 VacuumCostBalance = 0;
 291                 VacuumPageHit = 0;
 292                 VacuumPageMiss = 0;
 293                 VacuumPageDirty = 0;
 294
 295                 /*
 296                  * Loop to process each selected relation.
 297                  */
 298                 foreach(cur, relations)
 299                 {
 300                         Oid                     relid = lfirst_oid(cur);
 301
 302                         if (options & VACOPT_VACUUM)
 303                         {
 304                                 if (!vacuum_rel(relid, relation, options, params))
 305                                         continue;
 306                         }
 307
 308                         if (options & VACOPT_ANALYZE)
 309                         {
 310                                 /*
 311                                  * If using separate xacts, start one for analyze. Otherwise,
 312                                  * we can use the outer transaction.
 313                                  */
 314                                 if (use_own_xacts)
 315                                 {
 316                                         StartTransactionCommand();
 317                                         /* functions in indexes may want a snapshot set */
 318                                         PushActiveSnapshot(GetTransactionSnapshot());
 319                                 }
 320
 321                                 analyze_rel(relid, relation, options, params,
 322                                                         va_cols, in_outer_xact, vac_strategy);
 323
 324                                 if (use_own_xacts)
 325                                 {
 326                                         PopActiveSnapshot();
 327                                         CommitTransactionCommand();
 328                                 }
 329                         }
 330                 }
 331         }
 332         PG_CATCH();
 333         {
 334                 in_vacuum = false;
 335                 VacuumCostActive = false;
 336                 PG_RE_THROW();
 337         }
 338         PG_END_TRY();
 339
 340         in_vacuum = false;
 341         VacuumCostActive = false;
 342
 343         /*
 344          * Finish up processing.
 345          */
 346         if (use_own_xacts)
 347         {
 348                 /* here, we are not in a transaction */
 349
 350                 /*
 351                  * This matches the CommitTransaction waiting for us in
 352                  * PostgresMain().
 353                  */
 354                 StartTransactionCommand();
 355         }
 356
 357         if ((options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess())
 358         {
 359                 /*
 360                  * Update pg_database.datfrozenxid, and truncate pg_xact if possible.
 361                  * (autovacuum.c does this for itself.)
 362                  */
 363                 vac_update_datfrozenxid();
 364         }
 365
 366         /*
 367          * Clean up working storage --- note we must do this after
 368          * StartTransactionCommand, else we might be trying to delete the active
 369          * context!
 370          */
 371         MemoryContextDelete(vac_context);
 372         vac_context = NULL;
 373 }
 374
 375 /*
 376  * Build a list of Oids for each relation to be processed
 377  *
 378  * The list is built in vac_context so that it will survive across our
 379  * per-relation transactions.
 380  */
 381 static List *
 382 get_rel_oids(Oid relid, const RangeVar *vacrel)
 383 {
 384         List       *oid_list = NIL;
 385         MemoryContext oldcontext;
 386
 387         /* OID supplied by VACUUM's caller? */
 388         if (OidIsValid(relid))
 389         {
 390                 oldcontext = MemoryContextSwitchTo(vac_context);
 391                 oid_list = lappend_oid(oid_list, relid);
 392                 MemoryContextSwitchTo(oldcontext);
 393         }
 394         else if (vacrel)
 395         {
 396                 /* Process a specific relation */
 397                 Oid                     relid;
 398                 HeapTuple       tuple;
 399                 Form_pg_class classForm;
 400                 bool            include_parts;
 401
 402                 /*
 403                  * Since we don't take a lock here, the relation might be gone, or the
 404                  * RangeVar might no longer refer to the OID we look up here.  In the
 405                  * former case, VACUUM will do nothing; in the latter case, it will
 406                  * process the OID we looked up here, rather than the new one. Neither
 407                  * is ideal, but there's little practical alternative, since we're
 408                  * going to commit this transaction and begin a new one between now
 409                  * and then.
 410                  */
 411                 relid = RangeVarGetRelid(vacrel, NoLock, false);
 412
 413                 /*
 414                  * To check whether the relation is a partitioned table, fetch its
 415                  * syscache entry.
 416                  */
 417                 tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
 418                 if (!HeapTupleIsValid(tuple))
 419                         elog(ERROR, "cache lookup failed for relation %u", relid);
 420                 classForm = (Form_pg_class) GETSTRUCT(tuple);
 421                 include_parts = (classForm->relkind == RELKIND_PARTITIONED_TABLE);
 422                 ReleaseSysCache(tuple);
 423
 424                 /*
 425                  * Make relation list entries for this guy and its partitions, if any.
 426                  * Note that the list returned by find_all_inheritors() include the
 427                  * passed-in OID at its head.  Also note that we did not request a
 428                  * lock to be taken to match what would be done otherwise.
 429                  */
 430                 oldcontext = MemoryContextSwitchTo(vac_context);
 431                 if (include_parts)
 432                         oid_list = list_concat(oid_list,
 433                                                                    find_all_inheritors(relid, NoLock, NULL));
 434                 else
 435                         oid_list = lappend_oid(oid_list, relid);
 436                 MemoryContextSwitchTo(oldcontext);
 437         }
 438         else
 439         {
 440                 /*
 441                  * Process all plain relations and materialized views listed in
 442                  * pg_class
 443                  */
 444                 Relation        pgclass;
 445                 HeapScanDesc scan;
 446                 HeapTuple       tuple;
 447
 448                 pgclass = heap_open(RelationRelationId, AccessShareLock);
 449
 450                 scan = heap_beginscan_catalog(pgclass, 0, NULL);
 451
 452                 while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
 453                 {
 454                         Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
 455
 456                         /*
 457                          * We include partitioned tables here; depending on which
 458                          * operation is to be performed, caller will decide whether to
 459                          * process or ignore them.
 460                          */
 461                         if (classForm->relkind != RELKIND_RELATION &&
 462                                 classForm->relkind != RELKIND_MATVIEW &&
 463                                 classForm->relkind != RELKIND_PARTITIONED_TABLE)
 464                                 continue;
 465
 466                         /* Make a relation list entry for this guy */
 467                         oldcontext = MemoryContextSwitchTo(vac_context);
 468                         oid_list = lappend_oid(oid_list, HeapTupleGetOid(tuple));
 469                         MemoryContextSwitchTo(oldcontext);
 470                 }
 471
 472                 heap_endscan(scan);
 473                 heap_close(pgclass, AccessShareLock);
 474         }
 475
 476         return oid_list;
 477 }
 478
 479 /*
 480  * vacuum_set_xid_limits() -- compute oldest-Xmin and freeze cutoff points
 481  *
 482  * The output parameters are:
 483  * - oldestXmin is the cutoff value used to distinguish whether tuples are
 484  *       DEAD or RECENTLY_DEAD (see HeapTupleSatisfiesVacuum).
 485  * - freezeLimit is the Xid below which all Xids are replaced by
 486  *       FrozenTransactionId during vacuum.
 487  * - xidFullScanLimit (computed from table_freeze_age parameter)
 488  *       represents a minimum Xid value; a table whose relfrozenxid is older than
 489  *       this will have a full-table vacuum applied to it, to freeze tuples across
 490  *       the whole table.  Vacuuming a table younger than this value can use a
 491  *       partial scan.
 492  * - multiXactCutoff is the value below which all MultiXactIds are removed from
 493  *       Xmax.
 494  * - mxactFullScanLimit is a value against which a table's relminmxid value is
 495  *       compared to produce a full-table vacuum, as with xidFullScanLimit.
 496  *
 497  * xidFullScanLimit and mxactFullScanLimit can be passed as NULL if caller is
 498  * not interested.
 499  */
 500 void
 501 vacuum_set_xid_limits(Relation rel,
 502                                           int freeze_min_age,
 503                                           int freeze_table_age,
 504                                           int multixact_freeze_min_age,
 505                                           int multixact_freeze_table_age,
 506                                           TransactionId *oldestXmin,
 507                                           TransactionId *freezeLimit,
 508                                           TransactionId *xidFullScanLimit,
 509                                           MultiXactId *multiXactCutoff,
 510                                           MultiXactId *mxactFullScanLimit)
 511 {
 512         int                     freezemin;
 513         int                     mxid_freezemin;
 514         int                     effective_multixact_freeze_max_age;
 515         TransactionId limit;
 516         TransactionId safeLimit;
 517         MultiXactId mxactLimit;
 518         MultiXactId safeMxactLimit;
 519
 520         /*
 521          * We can always ignore processes running lazy vacuum.  This is because we
 522          * use these values only for deciding which tuples we must keep in the
 523          * tables.  Since lazy vacuum doesn't write its XID anywhere, it's safe to
 524          * ignore it.  In theory it could be problematic to ignore lazy vacuums in
 525          * a full vacuum, but keep in mind that only one vacuum process can be
 526          * working on a particular table at any time, and that each vacuum is
 527          * always an independent transaction.
 528          */
 529         *oldestXmin =
 530                 TransactionIdLimitedForOldSnapshots(GetOldestXmin(rel, PROCARRAY_FLAGS_VACUUM), rel);
 531
 532         Assert(TransactionIdIsNormal(*oldestXmin));
 533
 534         /*
 535          * Determine the minimum freeze age to use: as specified by the caller, or
 536          * vacuum_freeze_min_age, but in any case not more than half
 537          * autovacuum_freeze_max_age, so that autovacuums to prevent XID
 538          * wraparound won't occur too frequently.
 539          */
 540         freezemin = freeze_min_age;
 541         if (freezemin < 0)
 542                 freezemin = vacuum_freeze_min_age;
 543         freezemin = Min(freezemin, autovacuum_freeze_max_age / 2);
 544         Assert(freezemin >= 0);
 545
 546         /*
 547          * Compute the cutoff XID, being careful not to generate a "permanent" XID
 548          */
 549         limit = *oldestXmin - freezemin;
 550         if (!TransactionIdIsNormal(limit))
 551                 limit = FirstNormalTransactionId;
 552
 553         /*
 554          * If oldestXmin is very far back (in practice, more than
 555          * autovacuum_freeze_max_age / 2 XIDs old), complain and force a minimum
 556          * freeze age of zero.
 557          */
 558         safeLimit = ReadNewTransactionId() - autovacuum_freeze_max_age;
 559         if (!TransactionIdIsNormal(safeLimit))
 560                 safeLimit = FirstNormalTransactionId;
 561
 562         if (TransactionIdPrecedes(limit, safeLimit))
 563         {
 564                 ereport(WARNING,
 565                                 (errmsg("oldest xmin is far in the past"),
 566                                  errhint("Close open transactions soon to avoid wraparound problems.")));
 567                 limit = *oldestXmin;
 568         }
 569
 570         *freezeLimit = limit;
 571
 572         /*
 573          * Compute the multixact age for which freezing is urgent.  This is
 574          * normally autovacuum_multixact_freeze_max_age, but may be less if we are
 575          * short of multixact member space.
 576          */
 577         effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold();
 578
 579         /*
 580          * Determine the minimum multixact freeze age to use: as specified by
 581          * caller, or vacuum_multixact_freeze_min_age, but in any case not more
 582          * than half effective_multixact_freeze_max_age, so that autovacuums to
 583          * prevent MultiXact wraparound won't occur too frequently.
 584          */
 585         mxid_freezemin = multixact_freeze_min_age;
 586         if (mxid_freezemin < 0)
 587                 mxid_freezemin = vacuum_multixact_freeze_min_age;
 588         mxid_freezemin = Min(mxid_freezemin,
 589                                                  effective_multixact_freeze_max_age / 2);
 590         Assert(mxid_freezemin >= 0);
 591
 592         /* compute the cutoff multi, being careful to generate a valid value */
 593         mxactLimit = GetOldestMultiXactId() - mxid_freezemin;
 594         if (mxactLimit < FirstMultiXactId)
 595                 mxactLimit = FirstMultiXactId;
 596
 597         safeMxactLimit =
 598                 ReadNextMultiXactId() - effective_multixact_freeze_max_age;
 599         if (safeMxactLimit < FirstMultiXactId)
 600                 safeMxactLimit = FirstMultiXactId;
 601
 602         if (MultiXactIdPrecedes(mxactLimit, safeMxactLimit))
 603         {
 604                 ereport(WARNING,
 605                                 (errmsg("oldest multixact is far in the past"),
 606                                  errhint("Close open transactions with multixacts soon to avoid wraparound problems.")));
 607                 mxactLimit = safeMxactLimit;
 608         }
 609
 610         *multiXactCutoff = mxactLimit;
 611
 612         if (xidFullScanLimit != NULL)
 613         {
 614                 int                     freezetable;
 615
 616                 Assert(mxactFullScanLimit != NULL);
 617
 618                 /*
 619                  * Determine the table freeze age to use: as specified by the caller,
 620                  * or vacuum_freeze_table_age, but in any case not more than
 621                  * autovacuum_freeze_max_age * 0.95, so that if you have e.g nightly
 622                  * VACUUM schedule, the nightly VACUUM gets a chance to freeze tuples
 623                  * before anti-wraparound autovacuum is launched.
 624                  */
 625                 freezetable = freeze_table_age;
 626                 if (freezetable < 0)
 627                         freezetable = vacuum_freeze_table_age;
 628                 freezetable = Min(freezetable, autovacuum_freeze_max_age * 0.95);
 629                 Assert(freezetable >= 0);
 630
 631                 /*
 632                  * Compute XID limit causing a full-table vacuum, being careful not to
 633                  * generate a "permanent" XID.
 634                  */
 635                 limit = ReadNewTransactionId() - freezetable;
 636                 if (!TransactionIdIsNormal(limit))
 637                         limit = FirstNormalTransactionId;
 638
 639                 *xidFullScanLimit = limit;
 640
 641                 /*
 642                  * Similar to the above, determine the table freeze age to use for
 643                  * multixacts: as specified by the caller, or
 644                  * vacuum_multixact_freeze_table_age, but in any case not more than
 645                  * autovacuum_multixact_freeze_table_age * 0.95, so that if you have
 646                  * e.g. nightly VACUUM schedule, the nightly VACUUM gets a chance to
 647                  * freeze multixacts before anti-wraparound autovacuum is launched.
 648                  */
 649                 freezetable = multixact_freeze_table_age;
 650                 if (freezetable < 0)
 651                         freezetable = vacuum_multixact_freeze_table_age;
 652                 freezetable = Min(freezetable,
 653                                                   effective_multixact_freeze_max_age * 0.95);
 654                 Assert(freezetable >= 0);
 655
 656                 /*
 657                  * Compute MultiXact limit causing a full-table vacuum, being careful
 658                  * to generate a valid MultiXact value.
 659                  */
 660                 mxactLimit = ReadNextMultiXactId() - freezetable;
 661                 if (mxactLimit < FirstMultiXactId)
 662                         mxactLimit = FirstMultiXactId;
 663
 664                 *mxactFullScanLimit = mxactLimit;
 665         }
 666         else
 667         {
 668                 Assert(mxactFullScanLimit == NULL);
 669         }
 670 }
 671
 672 /*
 673  * vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples
 674  *
 675  *              If we scanned the whole relation then we should just use the count of
 676  *              live tuples seen; but if we did not, we should not trust the count
 677  *              unreservedly, especially not in VACUUM, which may have scanned a quite
 678  *              nonrandom subset of the table.  When we have only partial information,
 679  *              we take the old value of pg_class.reltuples as a measurement of the
 680  *              tuple density in the unscanned pages.
 681  *
 682  *              This routine is shared by VACUUM and ANALYZE.
 683  */
 684 double
 685 vac_estimate_reltuples(Relation relation, bool is_analyze,
 686                                            BlockNumber total_pages,
 687                                            BlockNumber scanned_pages,
 688                                            double scanned_tuples)
 689 {
 690         BlockNumber old_rel_pages = relation->rd_rel->relpages;
 691         double          old_rel_tuples = relation->rd_rel->reltuples;
 692         double          old_density;
 693         double          new_density;
 694         double          multiplier;
 695         double          updated_density;
 696
 697         /* If we did scan the whole table, just use the count as-is */
 698         if (scanned_pages >= total_pages)
 699                 return scanned_tuples;
 700
 701         /*
 702          * If scanned_pages is zero but total_pages isn't, keep the existing value
 703          * of reltuples.  (Note: callers should avoid updating the pg_class
 704          * statistics in this situation, since no new information has been
 705          * provided.)
 706          */
 707         if (scanned_pages == 0)
 708                 return old_rel_tuples;
 709
 710         /*
 711          * If old value of relpages is zero, old density is indeterminate; we
 712          * can't do much except scale up scanned_tuples to match total_pages.
 713          */
 714         if (old_rel_pages == 0)
 715                 return floor((scanned_tuples / scanned_pages) * total_pages + 0.5);
 716
 717         /*
 718          * Okay, we've covered the corner cases.  The normal calculation is to
 719          * convert the old measurement to a density (tuples per page), then update
 720          * the density using an exponential-moving-average approach, and finally
 721          * compute reltuples as updated_density * total_pages.
 722          *
 723          * For ANALYZE, the moving average multiplier is just the fraction of the
 724          * table's pages we scanned.  This is equivalent to assuming that the
 725          * tuple density in the unscanned pages didn't change.  Of course, it
 726          * probably did, if the new density measurement is different. But over
 727          * repeated cycles, the value of reltuples will converge towards the
 728          * correct value, if repeated measurements show the same new density.
 729          *
 730          * For VACUUM, the situation is a bit different: we have looked at a
 731          * nonrandom sample of pages, but we know for certain that the pages we
 732          * didn't look at are precisely the ones that haven't changed lately.
 733          * Thus, there is a reasonable argument for doing exactly the same thing
 734          * as for the ANALYZE case, that is use the old density measurement as the
 735          * value for the unscanned pages.
 736          *
 737          * This logic could probably use further refinement.
 738          */
 739         old_density = old_rel_tuples / old_rel_pages;
 740         new_density = scanned_tuples / scanned_pages;
 741         multiplier = (double) scanned_pages / (double) total_pages;
 742         updated_density = old_density + (new_density - old_density) * multiplier;
 743         return floor(updated_density * total_pages + 0.5);
 744 }
 745
 746
 747 /*
 748  *      vac_update_relstats() -- update statistics for one relation
 749  *
 750  *              Update the whole-relation statistics that are kept in its pg_class
 751  *              row.  There are additional stats that will be updated if we are
 752  *              doing ANALYZE, but we always update these stats.  This routine works
 753  *              for both index and heap relation entries in pg_class.
 754  *
 755  *              We violate transaction semantics here by overwriting the rel's
 756  *              existing pg_class tuple with the new values.  This is reasonably
 757  *              safe as long as we're sure that the new values are correct whether or
 758  *              not this transaction commits.  The reason for doing this is that if
 759  *              we updated these tuples in the usual way, vacuuming pg_class itself
 760  *              wouldn't work very well --- by the time we got done with a vacuum
 761  *              cycle, most of the tuples in pg_class would've been obsoleted.  Of
 762  *              course, this only works for fixed-size not-null columns, but these are.
 763  *
 764  *              Another reason for doing it this way is that when we are in a lazy
 765  *              VACUUM and have PROC_IN_VACUUM set, we mustn't do any regular updates.
 766  *              Somebody vacuuming pg_class might think they could delete a tuple
 767  *              marked with xmin = our xid.
 768  *
 769  *              In addition to fundamentally nontransactional statistics such as
 770  *              relpages and relallvisible, we try to maintain certain lazily-updated
 771  *              DDL flags such as relhasindex, by clearing them if no longer correct.
 772  *              It's safe to do this in VACUUM, which can't run in parallel with
 773  *              CREATE INDEX/RULE/TRIGGER and can't be part of a transaction block.
 774  *              However, it's *not* safe to do it in an ANALYZE that's within an
 775  *              outer transaction, because for example the current transaction might
 776  *              have dropped the last index; then we'd think relhasindex should be
 777  *              cleared, but if the transaction later rolls back this would be wrong.
 778  *              So we refrain from updating the DDL flags if we're inside an outer
 779  *              transaction.  This is OK since postponing the flag maintenance is
 780  *              always allowable.
 781  *
 782  *              This routine is shared by VACUUM and ANALYZE.
 783  */
 784 void
 785 vac_update_relstats(Relation relation,
 786                                         BlockNumber num_pages, double num_tuples,
 787                                         BlockNumber num_all_visible_pages,
 788                                         bool hasindex, TransactionId frozenxid,
 789                                         MultiXactId minmulti,
 790                                         bool in_outer_xact)
 791 {
 792         Oid                     relid = RelationGetRelid(relation);
 793         Relation        rd;
 794         HeapTuple       ctup;
 795         Form_pg_class pgcform;
 796         bool            dirty;
 797
 798         rd = heap_open(RelationRelationId, RowExclusiveLock);
 799
 800         /* Fetch a copy of the tuple to scribble on */
 801         ctup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
 802         if (!HeapTupleIsValid(ctup))
 803                 elog(ERROR, "pg_class entry for relid %u vanished during vacuuming",
 804                          relid);
 805         pgcform = (Form_pg_class) GETSTRUCT(ctup);
 806
 807         /* Apply statistical updates, if any, to copied tuple */
 808
 809         dirty = false;
 810         if (pgcform->relpages != (int32) num_pages)
 811         {
 812                 pgcform->relpages = (int32) num_pages;
 813                 dirty = true;
 814         }
 815         if (pgcform->reltuples != (float4) num_tuples)
 816         {
 817                 pgcform->reltuples = (float4) num_tuples;
 818                 dirty = true;
 819         }
 820         if (pgcform->relallvisible != (int32) num_all_visible_pages)
 821         {
 822                 pgcform->relallvisible = (int32) num_all_visible_pages;
 823                 dirty = true;
 824         }
 825
 826         /* Apply DDL updates, but not inside an outer transaction (see above) */
 827
 828         if (!in_outer_xact)
 829         {
 830                 /*
 831                  * If we didn't find any indexes, reset relhasindex.
 832                  */
 833                 if (pgcform->relhasindex && !hasindex)
 834                 {
 835                         pgcform->relhasindex = false;
 836                         dirty = true;
 837                 }
 838
 839                 /*
 840                  * If we have discovered that there are no indexes, then there's no
 841                  * primary key either.  This could be done more thoroughly...
 842                  */
 843                 if (pgcform->relhaspkey && !hasindex)
 844                 {
 845                         pgcform->relhaspkey = false;
 846                         dirty = true;
 847                 }
 848
 849                 /* We also clear relhasrules and relhastriggers if needed */
 850                 if (pgcform->relhasrules && relation->rd_rules == NULL)
 851                 {
 852                         pgcform->relhasrules = false;
 853                         dirty = true;
 854                 }
 855                 if (pgcform->relhastriggers && relation->trigdesc == NULL)
 856                 {
 857                         pgcform->relhastriggers = false;
 858                         dirty = true;
 859                 }
 860         }
 861
 862         /*
 863          * Update relfrozenxid, unless caller passed InvalidTransactionId
 864          * indicating it has no new data.
 865          *
 866          * Ordinarily, we don't let relfrozenxid go backwards: if things are
 867          * working correctly, the only way the new frozenxid could be older would
 868          * be if a previous VACUUM was done with a tighter freeze_min_age, in
 869          * which case we don't want to forget the work it already did.  However,
 870          * if the stored relfrozenxid is "in the future", then it must be corrupt
 871          * and it seems best to overwrite it with the cutoff we used this time.
 872          * This should match vac_update_datfrozenxid() concerning what we consider
 873          * to be "in the future".
 874          */
 875         if (TransactionIdIsNormal(frozenxid) &&
 876                 pgcform->relfrozenxid != frozenxid &&
 877                 (TransactionIdPrecedes(pgcform->relfrozenxid, frozenxid) ||
 878                  TransactionIdPrecedes(ReadNewTransactionId(),
 879                                                            pgcform->relfrozenxid)))
 880         {
 881                 pgcform->relfrozenxid = frozenxid;
 882                 dirty = true;
 883         }
 884
 885         /* Similarly for relminmxid */
 886         if (MultiXactIdIsValid(minmulti) &&
 887                 pgcform->relminmxid != minmulti &&
 888                 (MultiXactIdPrecedes(pgcform->relminmxid, minmulti) ||
 889                  MultiXactIdPrecedes(ReadNextMultiXactId(), pgcform->relminmxid)))
 890         {
 891                 pgcform->relminmxid = minmulti;
 892                 dirty = true;
 893         }
 894
 895         /* If anything changed, write out the tuple. */
 896         if (dirty)
 897                 heap_inplace_update(rd, ctup);
 898
 899         heap_close(rd, RowExclusiveLock);
 900 }
 901
 902
 903 /*
 904  *      vac_update_datfrozenxid() -- update pg_database.datfrozenxid for our DB
 905  *
 906  *              Update pg_database's datfrozenxid entry for our database to be the
 907  *              minimum of the pg_class.relfrozenxid values.
 908  *
 909  *              Similarly, update our datminmxid to be the minimum of the
 910  *              pg_class.relminmxid values.
 911  *
 912  *              If we are able to advance either pg_database value, also try to
 913  *              truncate pg_xact and pg_multixact.
 914  *
 915  *              We violate transaction semantics here by overwriting the database's
 916  *              existing pg_database tuple with the new values.  This is reasonably
 917  *              safe since the new values are correct whether or not this transaction
 918  *              commits.  As with vac_update_relstats, this avoids leaving dead tuples
 919  *              behind after a VACUUM.
 920  */
 921 void
 922 vac_update_datfrozenxid(void)
 923 {
 924         HeapTuple       tuple;
 925         Form_pg_database dbform;
 926         Relation        relation;
 927         SysScanDesc scan;
 928         HeapTuple       classTup;
 929         TransactionId newFrozenXid;
 930         MultiXactId newMinMulti;
 931         TransactionId lastSaneFrozenXid;
 932         MultiXactId lastSaneMinMulti;
 933         bool            bogus = false;
 934         bool            dirty = false;
 935
 936         /*
 937          * Initialize the "min" calculation with GetOldestXmin, which is a
 938          * reasonable approximation to the minimum relfrozenxid for not-yet-
 939          * committed pg_class entries for new tables; see AddNewRelationTuple().
 940          * So we cannot produce a wrong minimum by starting with this.
 941          */
 942         newFrozenXid = GetOldestXmin(NULL, PROCARRAY_FLAGS_VACUUM);
 943
 944         /*
 945          * Similarly, initialize the MultiXact "min" with the value that would be
 946          * used on pg_class for new tables.  See AddNewRelationTuple().
 947          */
 948         newMinMulti = GetOldestMultiXactId();
 949
 950         /*
 951          * Identify the latest relfrozenxid and relminmxid values that we could
 952          * validly see during the scan.  These are conservative values, but it's
 953          * not really worth trying to be more exact.
 954          */
 955         lastSaneFrozenXid = ReadNewTransactionId();
 956         lastSaneMinMulti = ReadNextMultiXactId();
 957
 958         /*
 959          * We must seqscan pg_class to find the minimum Xid, because there is no
 960          * index that can help us here.
 961          */
 962         relation = heap_open(RelationRelationId, AccessShareLock);
 963
 964         scan = systable_beginscan(relation, InvalidOid, false,
 965                                                           NULL, 0, NULL);
 966
 967         while ((classTup = systable_getnext(scan)) != NULL)
 968         {
 969                 Form_pg_class classForm = (Form_pg_class) GETSTRUCT(classTup);
 970
 971                 /*
 972                  * Only consider relations able to hold unfrozen XIDs (anything else
 973                  * should have InvalidTransactionId in relfrozenxid anyway.)
 974                  */
 975                 if (classForm->relkind != RELKIND_RELATION &&
 976                         classForm->relkind != RELKIND_MATVIEW &&
 977                         classForm->relkind != RELKIND_TOASTVALUE)
 978                         continue;
 979
 980                 Assert(TransactionIdIsNormal(classForm->relfrozenxid));
 981                 Assert(MultiXactIdIsValid(classForm->relminmxid));
 982
 983                 /*
 984                  * If things are working properly, no relation should have a
 985                  * relfrozenxid or relminmxid that is "in the future".  However, such
 986                  * cases have been known to arise due to bugs in pg_upgrade.  If we
 987                  * see any entries that are "in the future", chicken out and don't do
 988                  * anything.  This ensures we won't truncate clog before those
 989                  * relations have been scanned and cleaned up.
 990                  */
 991                 if (TransactionIdPrecedes(lastSaneFrozenXid, classForm->relfrozenxid) ||
 992                         MultiXactIdPrecedes(lastSaneMinMulti, classForm->relminmxid))
 993                 {
 994                         bogus = true;
 995                         break;
 996                 }
 997
 998                 if (TransactionIdPrecedes(classForm->relfrozenxid, newFrozenXid))
 999                         newFrozenXid = classForm->relfrozenxid;
1000
1001                 if (MultiXactIdPrecedes(classForm->relminmxid, newMinMulti))
1002                         newMinMulti = classForm->relminmxid;
1003         }
1004
1005         /* we're done with pg_class */
1006         systable_endscan(scan);
1007         heap_close(relation, AccessShareLock);
1008
1009         /* chicken out if bogus data found */
1010         if (bogus)
1011                 return;
1012
1013         Assert(TransactionIdIsNormal(newFrozenXid));
1014         Assert(MultiXactIdIsValid(newMinMulti));
1015
1016         /* Now fetch the pg_database tuple we need to update. */
1017         relation = heap_open(DatabaseRelationId, RowExclusiveLock);
1018
1019         /* Fetch a copy of the tuple to scribble on */
1020         tuple = SearchSysCacheCopy1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
1021         if (!HeapTupleIsValid(tuple))
1022                 elog(ERROR, "could not find tuple for database %u", MyDatabaseId);
1023         dbform = (Form_pg_database) GETSTRUCT(tuple);
1024
1025         /*
1026          * As in vac_update_relstats(), we ordinarily don't want to let
1027          * datfrozenxid go backward; but if it's "in the future" then it must be
1028          * corrupt and it seems best to overwrite it.
1029          */
1030         if (dbform->datfrozenxid != newFrozenXid &&
1031                 (TransactionIdPrecedes(dbform->datfrozenxid, newFrozenXid) ||
1032                  TransactionIdPrecedes(lastSaneFrozenXid, dbform->datfrozenxid)))
1033         {
1034                 dbform->datfrozenxid = newFrozenXid;
1035                 dirty = true;
1036         }
1037         else
1038                 newFrozenXid = dbform->datfrozenxid;
1039
1040         /* Ditto for datminmxid */
1041         if (dbform->datminmxid != newMinMulti &&
1042                 (MultiXactIdPrecedes(dbform->datminmxid, newMinMulti) ||
1043                  MultiXactIdPrecedes(lastSaneMinMulti, dbform->datminmxid)))
1044         {
1045                 dbform->datminmxid = newMinMulti;
1046                 dirty = true;
1047         }
1048         else
1049                 newMinMulti = dbform->datminmxid;
1050
1051         if (dirty)
1052                 heap_inplace_update(relation, tuple);
1053
1054         heap_freetuple(tuple);
1055         heap_close(relation, RowExclusiveLock);
1056
1057         /*
1058          * If we were able to advance datfrozenxid or datminmxid, see if we can
1059          * truncate pg_xact and/or pg_multixact.  Also do it if the shared
1060          * XID-wrap-limit info is stale, since this action will update that too.
1061          */
1062         if (dirty || ForceTransactionIdLimitUpdate())
1063                 vac_truncate_clog(newFrozenXid, newMinMulti,
1064                                                   lastSaneFrozenXid, lastSaneMinMulti);
1065 }
1066
1067
1068 /*
1069  *      vac_truncate_clog() -- attempt to truncate the commit log
1070  *
1071  *              Scan pg_database to determine the system-wide oldest datfrozenxid,
1072  *              and use it to truncate the transaction commit log (pg_xact).
1073  *              Also update the XID wrap limit info maintained by varsup.c.
1074  *              Likewise for datminmxid.
1075  *
1076  *              The passed frozenXID and minMulti are the updated values for my own
1077  *              pg_database entry. They're used to initialize the "min" calculations.
1078  *              The caller also passes the "last sane" XID and MXID, since it has
1079  *              those at hand already.
1080  *
1081  *              This routine is only invoked when we've managed to change our
1082  *              DB's datfrozenxid/datminmxid values, or we found that the shared
1083  *              XID-wrap-limit info is stale.
1084  */
1085 static void
1086 vac_truncate_clog(TransactionId frozenXID,
1087                                   MultiXactId minMulti,
1088                                   TransactionId lastSaneFrozenXid,
1089                                   MultiXactId lastSaneMinMulti)
1090 {
1091         TransactionId nextXID = ReadNewTransactionId();
1092         Relation        relation;
1093         HeapScanDesc scan;
1094         HeapTuple       tuple;
1095         Oid                     oldestxid_datoid;
1096         Oid                     minmulti_datoid;
1097         bool            bogus = false;
1098         bool            frozenAlreadyWrapped = false;
1099
1100         /* init oldest datoids to sync with my frozenXID/minMulti values */
1101         oldestxid_datoid = MyDatabaseId;
1102         minmulti_datoid = MyDatabaseId;
1103
1104         /*
1105          * Scan pg_database to compute the minimum datfrozenxid/datminmxid
1106          *
1107          * Since vac_update_datfrozenxid updates datfrozenxid/datminmxid in-place,
1108          * the values could change while we look at them.  Fetch each one just
1109          * once to ensure sane behavior of the comparison logic.  (Here, as in
1110          * many other places, we assume that fetching or updating an XID in shared
1111          * storage is atomic.)
1112          *
1113          * Note: we need not worry about a race condition with new entries being
1114          * inserted by CREATE DATABASE.  Any such entry will have a copy of some
1115          * existing DB's datfrozenxid, and that source DB cannot be ours because
1116          * of the interlock against copying a DB containing an active backend.
1117          * Hence the new entry will not reduce the minimum.  Also, if two VACUUMs
1118          * concurrently modify the datfrozenxid's of different databases, the
1119          * worst possible outcome is that pg_xact is not truncated as aggressively
1120          * as it could be.
1121          */
1122         relation = heap_open(DatabaseRelationId, AccessShareLock);
1123
1124         scan = heap_beginscan_catalog(relation, 0, NULL);
1125
1126         while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1127         {
1128                 volatile FormData_pg_database *dbform = (Form_pg_database) GETSTRUCT(tuple);
1129                 TransactionId datfrozenxid = dbform->datfrozenxid;
1130                 TransactionId datminmxid = dbform->datminmxid;
1131
1132                 Assert(TransactionIdIsNormal(datfrozenxid));
1133                 Assert(MultiXactIdIsValid(datminmxid));
1134
1135                 /*
1136                  * If things are working properly, no database should have a
1137                  * datfrozenxid or datminmxid that is "in the future".  However, such
1138                  * cases have been known to arise due to bugs in pg_upgrade.  If we
1139                  * see any entries that are "in the future", chicken out and don't do
1140                  * anything.  This ensures we won't truncate clog before those
1141                  * databases have been scanned and cleaned up.  (We will issue the
1142                  * "already wrapped" warning if appropriate, though.)
1143                  */
1144                 if (TransactionIdPrecedes(lastSaneFrozenXid, datfrozenxid) ||
1145                         MultiXactIdPrecedes(lastSaneMinMulti, datminmxid))
1146                         bogus = true;
1147
1148                 if (TransactionIdPrecedes(nextXID, datfrozenxid))
1149                         frozenAlreadyWrapped = true;
1150                 else if (TransactionIdPrecedes(datfrozenxid, frozenXID))
1151                 {
1152                         frozenXID = datfrozenxid;
1153                         oldestxid_datoid = HeapTupleGetOid(tuple);
1154                 }
1155
1156                 if (MultiXactIdPrecedes(datminmxid, minMulti))
1157                 {
1158                         minMulti = datminmxid;
1159                         minmulti_datoid = HeapTupleGetOid(tuple);
1160                 }
1161         }
1162
1163         heap_endscan(scan);
1164
1165         heap_close(relation, AccessShareLock);
1166
1167         /*
1168          * Do not truncate CLOG if we seem to have suffered wraparound already;
1169          * the computed minimum XID might be bogus.  This case should now be
1170          * impossible due to the defenses in GetNewTransactionId, but we keep the
1171          * test anyway.
1172          */
1173         if (frozenAlreadyWrapped)
1174         {
1175                 ereport(WARNING,
1176                                 (errmsg("some databases have not been vacuumed in over 2 billion transactions"),
1177                                  errdetail("You might have already suffered transaction-wraparound data loss.")));
1178                 return;
1179         }
1180
1181         /* chicken out if data is bogus in any other way */
1182         if (bogus)
1183                 return;
1184
1185         /*
1186          * Advance the oldest value for commit timestamps before truncating, so
1187          * that if a user requests a timestamp for a transaction we're truncating
1188          * away right after this point, they get NULL instead of an ugly "file not
1189          * found" error from slru.c.  This doesn't matter for xact/multixact
1190          * because they are not subject to arbitrary lookups from users.
1191          */
1192         AdvanceOldestCommitTsXid(frozenXID);
1193
1194         /*
1195          * Truncate CLOG, multixact and CommitTs to the oldest computed value.
1196          */
1197         TruncateCLOG(frozenXID, oldestxid_datoid);
1198         TruncateCommitTs(frozenXID);
1199         TruncateMultiXact(minMulti, minmulti_datoid);
1200
1201         /*
1202          * Update the wrap limit for GetNewTransactionId and creation of new
1203          * MultiXactIds.  Note: these functions will also signal the postmaster
1204          * for an(other) autovac cycle if needed.   XXX should we avoid possibly
1205          * signalling twice?
1206          */
1207         SetTransactionIdLimit(frozenXID, oldestxid_datoid);
1208         SetMultiXactIdLimit(minMulti, minmulti_datoid, false);
1209 }
1210
1211
1212 /*
1213  *      vacuum_rel() -- vacuum one heap relation
1214  *
1215  *              Doing one heap at a time incurs extra overhead, since we need to
1216  *              check that the heap exists again just before we vacuum it.  The
1217  *              reason that we do this is so that vacuuming can be spread across
1218  *              many small transactions.  Otherwise, two-phase locking would require
1219  *              us to lock the entire database during one pass of the vacuum cleaner.
1220  *
1221  *              At entry and exit, we are not inside a transaction.
1222  */
1223 static bool
1224 vacuum_rel(Oid relid, RangeVar *relation, int options, VacuumParams *params)
1225 {
1226         LOCKMODE        lmode;
1227         Relation        onerel;
1228         LockRelId       onerelid;
1229         Oid                     toast_relid;
1230         Oid                     save_userid;
1231         int                     save_sec_context;
1232         int                     save_nestlevel;
1233
1234         Assert(params != NULL);
1235
1236         /* Begin a transaction for vacuuming this relation */
1237         StartTransactionCommand();
1238
1239         /*
1240          * Functions in indexes may want a snapshot set.  Also, setting a snapshot
1241          * ensures that RecentGlobalXmin is kept truly recent.
1242          */
1243         PushActiveSnapshot(GetTransactionSnapshot());
1244
1245         if (!(options & VACOPT_FULL))
1246         {
1247                 /*
1248                  * In lazy vacuum, we can set the PROC_IN_VACUUM flag, which lets
1249                  * other concurrent VACUUMs know that they can ignore this one while
1250                  * determining their OldestXmin.  (The reason we don't set it during a
1251                  * full VACUUM is exactly that we may have to run user-defined
1252                  * functions for functional indexes, and we want to make sure that if
1253                  * they use the snapshot set above, any tuples it requires can't get
1254                  * removed from other tables.  An index function that depends on the
1255                  * contents of other tables is arguably broken, but we won't break it
1256                  * here by violating transaction semantics.)
1257                  *
1258                  * We also set the VACUUM_FOR_WRAPAROUND flag, which is passed down by
1259                  * autovacuum; it's used to avoid canceling a vacuum that was invoked
1260                  * in an emergency.
1261                  *
1262                  * Note: these flags remain set until CommitTransaction or
1263                  * AbortTransaction.  We don't want to clear them until we reset
1264                  * MyPgXact->xid/xmin, else OldestXmin might appear to go backwards,
1265                  * which is probably Not Good.
1266                  */
1267                 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
1268                 MyPgXact->vacuumFlags |= PROC_IN_VACUUM;
1269                 if (params->is_wraparound)
1270                         MyPgXact->vacuumFlags |= PROC_VACUUM_FOR_WRAPAROUND;
1271                 LWLockRelease(ProcArrayLock);
1272         }
1273
1274         /*
1275          * Check for user-requested abort.  Note we want this to be inside a
1276          * transaction, so xact.c doesn't issue useless WARNING.
1277          */
1278         CHECK_FOR_INTERRUPTS();
1279
1280         /*
1281          * Determine the type of lock we want --- hard exclusive lock for a FULL
1282          * vacuum, but just ShareUpdateExclusiveLock for concurrent vacuum. Either
1283          * way, we can be sure that no other backend is vacuuming the same table.
1284          */
1285         lmode = (options & VACOPT_FULL) ? AccessExclusiveLock : ShareUpdateExclusiveLock;
1286
1287         /*
1288          * Open the relation and get the appropriate lock on it.
1289          *
1290          * There's a race condition here: the rel may have gone away since the
1291          * last time we saw it.  If so, we don't need to vacuum it.
1292          *
1293          * If we've been asked not to wait for the relation lock, acquire it first
1294          * in non-blocking mode, before calling try_relation_open().
1295          */
1296         if (!(options & VACOPT_NOWAIT))
1297                 onerel = try_relation_open(relid, lmode);
1298         else if (ConditionalLockRelationOid(relid, lmode))
1299                 onerel = try_relation_open(relid, NoLock);
1300         else
1301         {
1302                 onerel = NULL;
1303                 if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0)
1304                         ereport(LOG,
1305                                         (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
1306                                          errmsg("skipping vacuum of \"%s\" --- lock not available",
1307                                                         relation->relname)));
1308         }
1309
1310         if (!onerel)
1311         {
1312                 PopActiveSnapshot();
1313                 CommitTransactionCommand();
1314                 return false;
1315         }
1316
1317         /*
1318          * Check permissions.
1319          *
1320          * We allow the user to vacuum a table if he is superuser, the table
1321          * owner, or the database owner (but in the latter case, only if it's not
1322          * a shared relation).  pg_class_ownercheck includes the superuser case.
1323          *
1324          * Note we choose to treat permissions failure as a WARNING and keep
1325          * trying to vacuum the rest of the DB --- is this appropriate?
1326          */
1327         if (!(pg_class_ownercheck(RelationGetRelid(onerel), GetUserId()) ||
1328                   (pg_database_ownercheck(MyDatabaseId, GetUserId()) && !onerel->rd_rel->relisshared)))
1329         {
1330                 if (onerel->rd_rel->relisshared)
1331                         ereport(WARNING,
1332                                         (errmsg("skipping \"%s\" --- only superuser can vacuum it",
1333                                                         RelationGetRelationName(onerel))));
1334                 else if (onerel->rd_rel->relnamespace == PG_CATALOG_NAMESPACE)
1335                         ereport(WARNING,
1336                                         (errmsg("skipping \"%s\" --- only superuser or database owner can vacuum it",
1337                                                         RelationGetRelationName(onerel))));
1338                 else
1339                         ereport(WARNING,
1340                                         (errmsg("skipping \"%s\" --- only table or database owner can vacuum it",
1341                                                         RelationGetRelationName(onerel))));
1342                 relation_close(onerel, lmode);
1343                 PopActiveSnapshot();
1344                 CommitTransactionCommand();
1345                 return false;
1346         }
1347
1348         /*
1349          * Check that it's a vacuumable relation; we used to do this in
1350          * get_rel_oids() but seems safer to check after we've locked the
1351          * relation.
1352          */
1353         if (onerel->rd_rel->relkind != RELKIND_RELATION &&
1354                 onerel->rd_rel->relkind != RELKIND_MATVIEW &&
1355                 onerel->rd_rel->relkind != RELKIND_TOASTVALUE &&
1356                 onerel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
1357         {
1358                 ereport(WARNING,
1359                                 (errmsg("skipping \"%s\" --- cannot vacuum non-tables or special system tables",
1360                                                 RelationGetRelationName(onerel))));
1361                 relation_close(onerel, lmode);
1362                 PopActiveSnapshot();
1363                 CommitTransactionCommand();
1364                 return false;
1365         }
1366
1367         /*
1368          * Silently ignore tables that are temp tables of other backends ---
1369          * trying to vacuum these will lead to great unhappiness, since their
1370          * contents are probably not up-to-date on disk.  (We don't throw a
1371          * warning here; it would just lead to chatter during a database-wide
1372          * VACUUM.)
1373          */
1374         if (RELATION_IS_OTHER_TEMP(onerel))
1375         {
1376                 relation_close(onerel, lmode);
1377                 PopActiveSnapshot();
1378                 CommitTransactionCommand();
1379                 return false;
1380         }
1381
1382         /*
1383          * Ignore partitioned tables as there is no work to be done.  Since we
1384          * release the lock here, it's possible that any partitions added from
1385          * this point on will not get processed, but that seems harmless.
1386          */
1387         if (onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
1388         {
1389                 relation_close(onerel, lmode);
1390                 PopActiveSnapshot();
1391                 CommitTransactionCommand();
1392
1393                 /* It's OK for other commands to look at this table */
1394                 return true;
1395         }
1396
1397         /*
1398          * Get a session-level lock too. This will protect our access to the
1399          * relation across multiple transactions, so that we can vacuum the
1400          * relation's TOAST table (if any) secure in the knowledge that no one is
1401          * deleting the parent relation.
1402          *
1403          * NOTE: this cannot block, even if someone else is waiting for access,
1404          * because the lock manager knows that both lock requests are from the
1405          * same process.
1406          */
1407         onerelid = onerel->rd_lockInfo.lockRelId;
1408         LockRelationIdForSession(&onerelid, lmode);
1409
1410         /*
1411          * Remember the relation's TOAST relation for later, if the caller asked
1412          * us to process it.  In VACUUM FULL, though, the toast table is
1413          * automatically rebuilt by cluster_rel so we shouldn't recurse to it.
1414          */
1415         if (!(options & VACOPT_SKIPTOAST) && !(options & VACOPT_FULL))
1416                 toast_relid = onerel->rd_rel->reltoastrelid;
1417         else
1418                 toast_relid = InvalidOid;
1419
1420         /*
1421          * Switch to the table owner's userid, so that any index functions are run
1422          * as that user.  Also lock down security-restricted operations and
1423          * arrange to make GUC variable changes local to this command. (This is
1424          * unnecessary, but harmless, for lazy VACUUM.)
1425          */
1426         GetUserIdAndSecContext(&save_userid, &save_sec_context);
1427         SetUserIdAndSecContext(onerel->rd_rel->relowner,
1428                                                    save_sec_context | SECURITY_RESTRICTED_OPERATION);
1429         save_nestlevel = NewGUCNestLevel();
1430
1431         /*
1432          * Do the actual work --- either FULL or "lazy" vacuum
1433          */
1434         if (options & VACOPT_FULL)
1435         {
1436                 /* close relation before vacuuming, but hold lock until commit */
1437                 relation_close(onerel, NoLock);
1438                 onerel = NULL;
1439
1440                 /* VACUUM FULL is now a variant of CLUSTER; see cluster.c */
1441                 cluster_rel(relid, InvalidOid, false,
1442                                         (options & VACOPT_VERBOSE) != 0);
1443         }
1444         else
1445                 lazy_vacuum_rel(onerel, options, params, vac_strategy);
1446
1447         /* Roll back any GUC changes executed by index functions */
1448         AtEOXact_GUC(false, save_nestlevel);
1449
1450         /* Restore userid and security context */
1451         SetUserIdAndSecContext(save_userid, save_sec_context);
1452
1453         /* all done with this class, but hold lock until commit */
1454         if (onerel)
1455                 relation_close(onerel, NoLock);
1456
1457         /*
1458          * Complete the transaction and free all temporary memory used.
1459          */
1460         PopActiveSnapshot();
1461         CommitTransactionCommand();
1462
1463         /*
1464          * If the relation has a secondary toast rel, vacuum that too while we
1465          * still hold the session lock on the master table.  Note however that
1466          * "analyze" will not get done on the toast table.  This is good, because
1467          * the toaster always uses hardcoded index access and statistics are
1468          * totally unimportant for toast relations.
1469          */
1470         if (toast_relid != InvalidOid)
1471                 vacuum_rel(toast_relid, relation, options, params);
1472
1473         /*
1474          * Now release the session-level lock on the master table.
1475          */
1476         UnlockRelationIdForSession(&onerelid, lmode);
1477
1478         /* Report that we really did it. */
1479         return true;
1480 }
1481
1482
1483 /*
1484  * Open all the vacuumable indexes of the given relation, obtaining the
1485  * specified kind of lock on each.  Return an array of Relation pointers for
1486  * the indexes into *Irel, and the number of indexes into *nindexes.
1487  *
1488  * We consider an index vacuumable if it is marked insertable (IndexIsReady).
1489  * If it isn't, probably a CREATE INDEX CONCURRENTLY command failed early in
1490  * execution, and what we have is too corrupt to be processable.  We will
1491  * vacuum even if the index isn't indisvalid; this is important because in a
1492  * unique index, uniqueness checks will be performed anyway and had better not
1493  * hit dangling index pointers.
1494  */
1495 void
1496 vac_open_indexes(Relation relation, LOCKMODE lockmode,
1497                                  int *nindexes, Relation **Irel)
1498 {
1499         List       *indexoidlist;
1500         ListCell   *indexoidscan;
1501         int                     i;
1502
1503         Assert(lockmode != NoLock);
1504
1505         indexoidlist = RelationGetIndexList(relation);
1506
1507         /* allocate enough memory for all indexes */
1508         i = list_length(indexoidlist);
1509
1510         if (i > 0)
1511                 *Irel = (Relation *) palloc(i * sizeof(Relation));
1512         else
1513                 *Irel = NULL;
1514
1515         /* collect just the ready indexes */
1516         i = 0;
1517         foreach(indexoidscan, indexoidlist)
1518         {
1519                 Oid                     indexoid = lfirst_oid(indexoidscan);
1520                 Relation        indrel;
1521
1522                 indrel = index_open(indexoid, lockmode);
1523                 if (IndexIsReady(indrel->rd_index))
1524                         (*Irel)[i++] = indrel;
1525                 else
1526                         index_close(indrel, lockmode);
1527         }
1528
1529         *nindexes = i;
1530
1531         list_free(indexoidlist);
1532 }
1533
1534 /*
1535  * Release the resources acquired by vac_open_indexes.  Optionally release
1536  * the locks (say NoLock to keep 'em).
1537  */
1538 void
1539 vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
1540 {
1541         if (Irel == NULL)
1542                 return;
1543
1544         while (nindexes--)
1545         {
1546                 Relation        ind = Irel[nindexes];
1547
1548                 index_close(ind, lockmode);
1549         }
1550         pfree(Irel);
1551 }
1552
1553 /*
1554  * vacuum_delay_point --- check for interrupts and cost-based delay.
1555  *
1556  * This should be called in each major loop of VACUUM processing,
1557  * typically once per page processed.
1558  */
1559 void
1560 vacuum_delay_point(void)
1561 {
1562         /* Always check for interrupts */
1563         CHECK_FOR_INTERRUPTS();
1564
1565         /* Nap if appropriate */
1566         if (VacuumCostActive && !InterruptPending &&
1567                 VacuumCostBalance >= VacuumCostLimit)
1568         {
1569                 int                     msec;
1570
1571                 msec = VacuumCostDelay * VacuumCostBalance / VacuumCostLimit;
1572                 if (msec > VacuumCostDelay * 4)
1573                         msec = VacuumCostDelay * 4;
1574
1575                 pg_usleep(msec * 1000L);
1576
1577                 VacuumCostBalance = 0;
1578
1579                 /* update balance values for workers */
1580                 AutoVacuumUpdateDelay();
1581
1582                 /* Might have gotten an interrupt while sleeping */
1583                 CHECK_FOR_INTERRUPTS();
1584         }
1585 }