granicus.if.org Git - postgresql/blob - src/backend/commands/vacuum.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * vacuum.c
   4  *        The postgres vacuum cleaner.
   5  *
   6  * This file now includes only control and dispatch code for VACUUM and
   7  * ANALYZE commands.  Regular VACUUM is implemented in vacuumlazy.c,
   8  * ANALYZE in analyze.c, and VACUUM FULL is a variant of CLUSTER, handled
   9  * in cluster.c.
  10  *
  11  *
  12  * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
  13  * Portions Copyright (c) 1994, Regents of the University of California
  14  *
  15  *
  16  * IDENTIFICATION
  17  *        $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.405 2010/02/08 04:33:53 tgl Exp $
  18  *
  19  *-------------------------------------------------------------------------
  20  */
  21 #include "postgres.h"
  22
  23 #include "access/clog.h"
  24 #include "access/genam.h"
  25 #include "access/heapam.h"
  26 #include "access/transam.h"
  27 #include "access/xact.h"
  28 #include "catalog/namespace.h"
  29 #include "catalog/pg_database.h"
  30 #include "catalog/pg_namespace.h"
  31 #include "commands/cluster.h"
  32 #include "commands/vacuum.h"
  33 #include "miscadmin.h"
  34 #include "pgstat.h"
  35 #include "postmaster/autovacuum.h"
  36 #include "storage/bufmgr.h"
  37 #include "storage/lmgr.h"
  38 #include "storage/proc.h"
  39 #include "storage/procarray.h"
  40 #include "utils/acl.h"
  41 #include "utils/fmgroids.h"
  42 #include "utils/guc.h"
  43 #include "utils/memutils.h"
  44 #include "utils/snapmgr.h"
  45 #include "utils/syscache.h"
  46 #include "utils/tqual.h"
  47
  48
  49 /*
  50  * GUC parameters
  51  */
  52 int                     vacuum_freeze_min_age;
  53 int                     vacuum_freeze_table_age;
  54
  55
  56 /* A few variables that don't seem worth passing around as parameters */
  57 static MemoryContext vac_context = NULL;
  58 static BufferAccessStrategy vac_strategy;
  59
  60
  61 /* non-export function prototypes */
  62 static List *get_rel_oids(Oid relid, const RangeVar *vacrel,
  63                          const char *stmttype);
  64 static void vac_truncate_clog(TransactionId frozenXID);
  65 static void vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast,
  66                    bool for_wraparound, bool *scanned_all);
  67
  68
  69 /*
  70  * Primary entry point for VACUUM and ANALYZE commands.
  71  *
  72  * relid is normally InvalidOid; if it is not, then it provides the relation
  73  * OID to be processed, and vacstmt->relation is ignored.  (The non-invalid
  74  * case is currently only used by autovacuum.)
  75  *
  76  * do_toast is passed as FALSE by autovacuum, because it processes TOAST
  77  * tables separately.
  78  *
  79  * for_wraparound is used by autovacuum to let us know when it's forcing
  80  * a vacuum for wraparound, which should not be auto-cancelled.
  81  *
  82  * bstrategy is normally given as NULL, but in autovacuum it can be passed
  83  * in to use the same buffer strategy object across multiple vacuum() calls.
  84  *
  85  * isTopLevel should be passed down from ProcessUtility.
  86  *
  87  * It is the caller's responsibility that vacstmt and bstrategy
  88  * (if given) be allocated in a memory context that won't disappear
  89  * at transaction commit.
  90  */
  91 void
  92 vacuum(VacuumStmt *vacstmt, Oid relid, bool do_toast,
  93            BufferAccessStrategy bstrategy, bool for_wraparound, bool isTopLevel)
  94 {
  95         const char *stmttype;
  96         volatile bool all_rels,
  97                                 in_outer_xact,
  98                                 use_own_xacts;
  99         List       *relations;
 100
 101         /* sanity checks on options */
 102         Assert(vacstmt->options & (VACOPT_VACUUM | VACOPT_ANALYZE));
 103         Assert((vacstmt->options & VACOPT_VACUUM) ||
 104                    !(vacstmt->options & (VACOPT_FULL | VACOPT_FREEZE)));
 105         Assert((vacstmt->options & VACOPT_ANALYZE) || vacstmt->va_cols == NIL);
 106
 107         stmttype = (vacstmt->options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE";
 108
 109         /*
 110          * We cannot run VACUUM inside a user transaction block; if we were inside
 111          * a transaction, then our commit- and start-transaction-command calls
 112          * would not have the intended effect!  There are numerous other subtle
 113          * dependencies on this, too.
 114          *
 115          * ANALYZE (without VACUUM) can run either way.
 116          */
 117         if (vacstmt->options & VACOPT_VACUUM)
 118         {
 119                 PreventTransactionChain(isTopLevel, stmttype);
 120                 in_outer_xact = false;
 121         }
 122         else
 123                 in_outer_xact = IsInTransactionChain(isTopLevel);
 124
 125         /*
 126          * Send info about dead objects to the statistics collector, unless we are
 127          * in autovacuum --- autovacuum.c does this for itself.
 128          */
 129         if ((vacstmt->options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess())
 130                 pgstat_vacuum_stat();
 131
 132         /*
 133          * Create special memory context for cross-transaction storage.
 134          *
 135          * Since it is a child of PortalContext, it will go away eventually even
 136          * if we suffer an error; there's no need for special abort cleanup logic.
 137          */
 138         vac_context = AllocSetContextCreate(PortalContext,
 139                                                                                 "Vacuum",
 140                                                                                 ALLOCSET_DEFAULT_MINSIZE,
 141                                                                                 ALLOCSET_DEFAULT_INITSIZE,
 142                                                                                 ALLOCSET_DEFAULT_MAXSIZE);
 143
 144         /*
 145          * If caller didn't give us a buffer strategy object, make one in the
 146          * cross-transaction memory context.
 147          */
 148         if (bstrategy == NULL)
 149         {
 150                 MemoryContext old_context = MemoryContextSwitchTo(vac_context);
 151
 152                 bstrategy = GetAccessStrategy(BAS_VACUUM);
 153                 MemoryContextSwitchTo(old_context);
 154         }
 155         vac_strategy = bstrategy;
 156
 157         /* Remember whether we are processing everything in the DB */
 158         all_rels = (!OidIsValid(relid) && vacstmt->relation == NULL);
 159
 160         /*
 161          * Build list of relations to process, unless caller gave us one. (If we
 162          * build one, we put it in vac_context for safekeeping.)
 163          */
 164         relations = get_rel_oids(relid, vacstmt->relation, stmttype);
 165
 166         /*
 167          * Decide whether we need to start/commit our own transactions.
 168          *
 169          * For VACUUM (with or without ANALYZE): always do so, so that we can
 170          * release locks as soon as possible.  (We could possibly use the outer
 171          * transaction for a one-table VACUUM, but handling TOAST tables would be
 172          * problematic.)
 173          *
 174          * For ANALYZE (no VACUUM): if inside a transaction block, we cannot
 175          * start/commit our own transactions.  Also, there's no need to do so if
 176          * only processing one relation.  For multiple relations when not within a
 177          * transaction block, and also in an autovacuum worker, use own
 178          * transactions so we can release locks sooner.
 179          */
 180         if (vacstmt->options & VACOPT_VACUUM)
 181                 use_own_xacts = true;
 182         else
 183         {
 184                 Assert(vacstmt->options & VACOPT_ANALYZE);
 185                 if (IsAutoVacuumWorkerProcess())
 186                         use_own_xacts = true;
 187                 else if (in_outer_xact)
 188                         use_own_xacts = false;
 189                 else if (list_length(relations) > 1)
 190                         use_own_xacts = true;
 191                 else
 192                         use_own_xacts = false;
 193         }
 194
 195         /*
 196          * vacuum_rel expects to be entered with no transaction active; it will
 197          * start and commit its own transaction.  But we are called by an SQL
 198          * command, and so we are executing inside a transaction already. We
 199          * commit the transaction started in PostgresMain() here, and start
 200          * another one before exiting to match the commit waiting for us back in
 201          * PostgresMain().
 202          */
 203         if (use_own_xacts)
 204         {
 205                 /* ActiveSnapshot is not set by autovacuum */
 206                 if (ActiveSnapshotSet())
 207                         PopActiveSnapshot();
 208
 209                 /* matches the StartTransaction in PostgresMain() */
 210                 CommitTransactionCommand();
 211         }
 212
 213         /* Turn vacuum cost accounting on or off */
 214         PG_TRY();
 215         {
 216                 ListCell   *cur;
 217
 218                 VacuumCostActive = (VacuumCostDelay > 0);
 219                 VacuumCostBalance = 0;
 220
 221                 /*
 222                  * Loop to process each selected relation.
 223                  */
 224                 foreach(cur, relations)
 225                 {
 226                         Oid                     relid = lfirst_oid(cur);
 227                         bool            scanned_all = false;
 228
 229                         if (vacstmt->options & VACOPT_VACUUM)
 230                                 vacuum_rel(relid, vacstmt, do_toast, for_wraparound,
 231                                                    &scanned_all);
 232
 233                         if (vacstmt->options & VACOPT_ANALYZE)
 234                         {
 235                                 /*
 236                                  * If using separate xacts, start one for analyze. Otherwise,
 237                                  * we can use the outer transaction.
 238                                  */
 239                                 if (use_own_xacts)
 240                                 {
 241                                         StartTransactionCommand();
 242                                         /* functions in indexes may want a snapshot set */
 243                                         PushActiveSnapshot(GetTransactionSnapshot());
 244                                 }
 245
 246                                 analyze_rel(relid, vacstmt, vac_strategy, !scanned_all);
 247
 248                                 if (use_own_xacts)
 249                                 {
 250                                         PopActiveSnapshot();
 251                                         CommitTransactionCommand();
 252                                 }
 253                         }
 254                 }
 255         }
 256         PG_CATCH();
 257         {
 258                 /* Make sure cost accounting is turned off after error */
 259                 VacuumCostActive = false;
 260                 PG_RE_THROW();
 261         }
 262         PG_END_TRY();
 263
 264         /* Turn off vacuum cost accounting */
 265         VacuumCostActive = false;
 266
 267         /*
 268          * Finish up processing.
 269          */
 270         if (use_own_xacts)
 271         {
 272                 /* here, we are not in a transaction */
 273
 274                 /*
 275                  * This matches the CommitTransaction waiting for us in
 276                  * PostgresMain().
 277                  */
 278                 StartTransactionCommand();
 279         }
 280
 281         if ((vacstmt->options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess())
 282         {
 283                 /*
 284                  * Update pg_database.datfrozenxid, and truncate pg_clog if possible.
 285                  * (autovacuum.c does this for itself.)
 286                  */
 287                 vac_update_datfrozenxid();
 288         }
 289
 290         /*
 291          * Clean up working storage --- note we must do this after
 292          * StartTransactionCommand, else we might be trying to delete the active
 293          * context!
 294          */
 295         MemoryContextDelete(vac_context);
 296         vac_context = NULL;
 297 }
 298
 299 /*
 300  * Build a list of Oids for each relation to be processed
 301  *
 302  * The list is built in vac_context so that it will survive across our
 303  * per-relation transactions.
 304  */
 305 static List *
 306 get_rel_oids(Oid relid, const RangeVar *vacrel, const char *stmttype)
 307 {
 308         List       *oid_list = NIL;
 309         MemoryContext oldcontext;
 310
 311         /* OID supplied by VACUUM's caller? */
 312         if (OidIsValid(relid))
 313         {
 314                 oldcontext = MemoryContextSwitchTo(vac_context);
 315                 oid_list = lappend_oid(oid_list, relid);
 316                 MemoryContextSwitchTo(oldcontext);
 317         }
 318         else if (vacrel)
 319         {
 320                 /* Process a specific relation */
 321                 Oid                     relid;
 322
 323                 relid = RangeVarGetRelid(vacrel, false);
 324
 325                 /* Make a relation list entry for this guy */
 326                 oldcontext = MemoryContextSwitchTo(vac_context);
 327                 oid_list = lappend_oid(oid_list, relid);
 328                 MemoryContextSwitchTo(oldcontext);
 329         }
 330         else
 331         {
 332                 /* Process all plain relations listed in pg_class */
 333                 Relation        pgclass;
 334                 HeapScanDesc scan;
 335                 HeapTuple       tuple;
 336                 ScanKeyData key;
 337
 338                 ScanKeyInit(&key,
 339                                         Anum_pg_class_relkind,
 340                                         BTEqualStrategyNumber, F_CHAREQ,
 341                                         CharGetDatum(RELKIND_RELATION));
 342
 343                 pgclass = heap_open(RelationRelationId, AccessShareLock);
 344
 345                 scan = heap_beginscan(pgclass, SnapshotNow, 1, &key);
 346
 347                 while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
 348                 {
 349                         /* Make a relation list entry for this guy */
 350                         oldcontext = MemoryContextSwitchTo(vac_context);
 351                         oid_list = lappend_oid(oid_list, HeapTupleGetOid(tuple));
 352                         MemoryContextSwitchTo(oldcontext);
 353                 }
 354
 355                 heap_endscan(scan);
 356                 heap_close(pgclass, AccessShareLock);
 357         }
 358
 359         return oid_list;
 360 }
 361
 362 /*
 363  * vacuum_set_xid_limits() -- compute oldest-Xmin and freeze cutoff points
 364  */
 365 void
 366 vacuum_set_xid_limits(int freeze_min_age,
 367                                           int freeze_table_age,
 368                                           bool sharedRel,
 369                                           TransactionId *oldestXmin,
 370                                           TransactionId *freezeLimit,
 371                                           TransactionId *freezeTableLimit)
 372 {
 373         int                     freezemin;
 374         TransactionId limit;
 375         TransactionId safeLimit;
 376
 377         /*
 378          * We can always ignore processes running lazy vacuum.  This is because we
 379          * use these values only for deciding which tuples we must keep in the
 380          * tables.      Since lazy vacuum doesn't write its XID anywhere, it's safe to
 381          * ignore it.  In theory it could be problematic to ignore lazy vacuums in
 382          * a full vacuum, but keep in mind that only one vacuum process can be
 383          * working on a particular table at any time, and that each vacuum is
 384          * always an independent transaction.
 385          */
 386         *oldestXmin = GetOldestXmin(sharedRel, true);
 387
 388         Assert(TransactionIdIsNormal(*oldestXmin));
 389
 390         /*
 391          * Determine the minimum freeze age to use: as specified by the caller, or
 392          * vacuum_freeze_min_age, but in any case not more than half
 393          * autovacuum_freeze_max_age, so that autovacuums to prevent XID
 394          * wraparound won't occur too frequently.
 395          */
 396         freezemin = freeze_min_age;
 397         if (freezemin < 0)
 398                 freezemin = vacuum_freeze_min_age;
 399         freezemin = Min(freezemin, autovacuum_freeze_max_age / 2);
 400         Assert(freezemin >= 0);
 401
 402         /*
 403          * Compute the cutoff XID, being careful not to generate a "permanent" XID
 404          */
 405         limit = *oldestXmin - freezemin;
 406         if (!TransactionIdIsNormal(limit))
 407                 limit = FirstNormalTransactionId;
 408
 409         /*
 410          * If oldestXmin is very far back (in practice, more than
 411          * autovacuum_freeze_max_age / 2 XIDs old), complain and force a minimum
 412          * freeze age of zero.
 413          */
 414         safeLimit = ReadNewTransactionId() - autovacuum_freeze_max_age;
 415         if (!TransactionIdIsNormal(safeLimit))
 416                 safeLimit = FirstNormalTransactionId;
 417
 418         if (TransactionIdPrecedes(limit, safeLimit))
 419         {
 420                 ereport(WARNING,
 421                                 (errmsg("oldest xmin is far in the past"),
 422                                  errhint("Close open transactions soon to avoid wraparound problems.")));
 423                 limit = *oldestXmin;
 424         }
 425
 426         *freezeLimit = limit;
 427
 428         if (freezeTableLimit != NULL)
 429         {
 430                 int                     freezetable;
 431
 432                 /*
 433                  * Determine the table freeze age to use: as specified by the caller,
 434                  * or vacuum_freeze_table_age, but in any case not more than
 435                  * autovacuum_freeze_max_age * 0.95, so that if you have e.g nightly
 436                  * VACUUM schedule, the nightly VACUUM gets a chance to freeze tuples
 437                  * before anti-wraparound autovacuum is launched.
 438                  */
 439                 freezetable = freeze_min_age;
 440                 if (freezetable < 0)
 441                         freezetable = vacuum_freeze_table_age;
 442                 freezetable = Min(freezetable, autovacuum_freeze_max_age * 0.95);
 443                 Assert(freezetable >= 0);
 444
 445                 /*
 446                  * Compute the cutoff XID, being careful not to generate a "permanent"
 447                  * XID.
 448                  */
 449                 limit = ReadNewTransactionId() - freezetable;
 450                 if (!TransactionIdIsNormal(limit))
 451                         limit = FirstNormalTransactionId;
 452
 453                 *freezeTableLimit = limit;
 454         }
 455 }
 456
 457
 458 /*
 459  *      vac_update_relstats() -- update statistics for one relation
 460  *
 461  *              Update the whole-relation statistics that are kept in its pg_class
 462  *              row.  There are additional stats that will be updated if we are
 463  *              doing ANALYZE, but we always update these stats.  This routine works
 464  *              for both index and heap relation entries in pg_class.
 465  *
 466  *              We violate transaction semantics here by overwriting the rel's
 467  *              existing pg_class tuple with the new values.  This is reasonably
 468  *              safe since the new values are correct whether or not this transaction
 469  *              commits.  The reason for this is that if we updated these tuples in
 470  *              the usual way, vacuuming pg_class itself wouldn't work very well ---
 471  *              by the time we got done with a vacuum cycle, most of the tuples in
 472  *              pg_class would've been obsoleted.  Of course, this only works for
 473  *              fixed-size never-null columns, but these are.
 474  *
 475  *              Note another assumption: that two VACUUMs/ANALYZEs on a table can't
 476  *              run in parallel, nor can VACUUM/ANALYZE run in parallel with a
 477  *              schema alteration such as adding an index, rule, or trigger.  Otherwise
 478  *              our updates of relhasindex etc might overwrite uncommitted updates.
 479  *
 480  *              Another reason for doing it this way is that when we are in a lazy
 481  *              VACUUM and have PROC_IN_VACUUM set, we mustn't do any updates ---
 482  *              somebody vacuuming pg_class might think they could delete a tuple
 483  *              marked with xmin = our xid.
 484  *
 485  *              This routine is shared by VACUUM and stand-alone ANALYZE.
 486  */
 487 void
 488 vac_update_relstats(Relation relation,
 489                                         BlockNumber num_pages, double num_tuples,
 490                                         bool hasindex, TransactionId frozenxid)
 491 {
 492         Oid                     relid = RelationGetRelid(relation);
 493         Relation        rd;
 494         HeapTuple       ctup;
 495         Form_pg_class pgcform;
 496         bool            dirty;
 497
 498         rd = heap_open(RelationRelationId, RowExclusiveLock);
 499
 500         /* Fetch a copy of the tuple to scribble on */
 501         ctup = SearchSysCacheCopy(RELOID,
 502                                                           ObjectIdGetDatum(relid),
 503                                                           0, 0, 0);
 504         if (!HeapTupleIsValid(ctup))
 505                 elog(ERROR, "pg_class entry for relid %u vanished during vacuuming",
 506                          relid);
 507         pgcform = (Form_pg_class) GETSTRUCT(ctup);
 508
 509         /* Apply required updates, if any, to copied tuple */
 510
 511         dirty = false;
 512         if (pgcform->relpages != (int32) num_pages)
 513         {
 514                 pgcform->relpages = (int32) num_pages;
 515                 dirty = true;
 516         }
 517         if (pgcform->reltuples != (float4) num_tuples)
 518         {
 519                 pgcform->reltuples = (float4) num_tuples;
 520                 dirty = true;
 521         }
 522         if (pgcform->relhasindex != hasindex)
 523         {
 524                 pgcform->relhasindex = hasindex;
 525                 dirty = true;
 526         }
 527
 528         /*
 529          * If we have discovered that there are no indexes, then there's no
 530          * primary key either, nor any exclusion constraints.  This could be done
 531          * more thoroughly...
 532          */
 533         if (!hasindex)
 534         {
 535                 if (pgcform->relhaspkey)
 536                 {
 537                         pgcform->relhaspkey = false;
 538                         dirty = true;
 539                 }
 540                 if (pgcform->relhasexclusion && pgcform->relkind != RELKIND_INDEX)
 541                 {
 542                         pgcform->relhasexclusion = false;
 543                         dirty = true;
 544                 }
 545         }
 546
 547         /* We also clear relhasrules and relhastriggers if needed */
 548         if (pgcform->relhasrules && relation->rd_rules == NULL)
 549         {
 550                 pgcform->relhasrules = false;
 551                 dirty = true;
 552         }
 553         if (pgcform->relhastriggers && relation->trigdesc == NULL)
 554         {
 555                 pgcform->relhastriggers = false;
 556                 dirty = true;
 557         }
 558
 559         /*
 560          * relfrozenxid should never go backward.  Caller can pass
 561          * InvalidTransactionId if it has no new data.
 562          */
 563         if (TransactionIdIsNormal(frozenxid) &&
 564                 TransactionIdPrecedes(pgcform->relfrozenxid, frozenxid))
 565         {
 566                 pgcform->relfrozenxid = frozenxid;
 567                 dirty = true;
 568         }
 569
 570         /* If anything changed, write out the tuple. */
 571         if (dirty)
 572                 heap_inplace_update(rd, ctup);
 573
 574         heap_close(rd, RowExclusiveLock);
 575 }
 576
 577
 578 /*
 579  *      vac_update_datfrozenxid() -- update pg_database.datfrozenxid for our DB
 580  *
 581  *              Update pg_database's datfrozenxid entry for our database to be the
 582  *              minimum of the pg_class.relfrozenxid values.  If we are able to
 583  *              advance pg_database.datfrozenxid, also try to truncate pg_clog.
 584  *
 585  *              We violate transaction semantics here by overwriting the database's
 586  *              existing pg_database tuple with the new value.  This is reasonably
 587  *              safe since the new value is correct whether or not this transaction
 588  *              commits.  As with vac_update_relstats, this avoids leaving dead tuples
 589  *              behind after a VACUUM.
 590  */
 591 void
 592 vac_update_datfrozenxid(void)
 593 {
 594         HeapTuple       tuple;
 595         Form_pg_database dbform;
 596         Relation        relation;
 597         SysScanDesc scan;
 598         HeapTuple       classTup;
 599         TransactionId newFrozenXid;
 600         bool            dirty = false;
 601
 602         /*
 603          * Initialize the "min" calculation with GetOldestXmin, which is a
 604          * reasonable approximation to the minimum relfrozenxid for not-yet-
 605          * committed pg_class entries for new tables; see AddNewRelationTuple().
 606          * Se we cannot produce a wrong minimum by starting with this.
 607          */
 608         newFrozenXid = GetOldestXmin(true, true);
 609
 610         /*
 611          * We must seqscan pg_class to find the minimum Xid, because there is no
 612          * index that can help us here.
 613          */
 614         relation = heap_open(RelationRelationId, AccessShareLock);
 615
 616         scan = systable_beginscan(relation, InvalidOid, false,
 617                                                           SnapshotNow, 0, NULL);
 618
 619         while ((classTup = systable_getnext(scan)) != NULL)
 620         {
 621                 Form_pg_class classForm = (Form_pg_class) GETSTRUCT(classTup);
 622
 623                 /*
 624                  * Only consider heap and TOAST tables (anything else should have
 625                  * InvalidTransactionId in relfrozenxid anyway.)
 626                  */
 627                 if (classForm->relkind != RELKIND_RELATION &&
 628                         classForm->relkind != RELKIND_TOASTVALUE)
 629                         continue;
 630
 631                 Assert(TransactionIdIsNormal(classForm->relfrozenxid));
 632
 633                 if (TransactionIdPrecedes(classForm->relfrozenxid, newFrozenXid))
 634                         newFrozenXid = classForm->relfrozenxid;
 635         }
 636
 637         /* we're done with pg_class */
 638         systable_endscan(scan);
 639         heap_close(relation, AccessShareLock);
 640
 641         Assert(TransactionIdIsNormal(newFrozenXid));
 642
 643         /* Now fetch the pg_database tuple we need to update. */
 644         relation = heap_open(DatabaseRelationId, RowExclusiveLock);
 645
 646         /* Fetch a copy of the tuple to scribble on */
 647         tuple = SearchSysCacheCopy(DATABASEOID,
 648                                                            ObjectIdGetDatum(MyDatabaseId),
 649                                                            0, 0, 0);
 650         if (!HeapTupleIsValid(tuple))
 651                 elog(ERROR, "could not find tuple for database %u", MyDatabaseId);
 652         dbform = (Form_pg_database) GETSTRUCT(tuple);
 653
 654         /*
 655          * Don't allow datfrozenxid to go backward (probably can't happen anyway);
 656          * and detect the common case where it doesn't go forward either.
 657          */
 658         if (TransactionIdPrecedes(dbform->datfrozenxid, newFrozenXid))
 659         {
 660                 dbform->datfrozenxid = newFrozenXid;
 661                 dirty = true;
 662         }
 663
 664         if (dirty)
 665                 heap_inplace_update(relation, tuple);
 666
 667         heap_freetuple(tuple);
 668         heap_close(relation, RowExclusiveLock);
 669
 670         /*
 671          * If we were able to advance datfrozenxid, see if we can truncate pg_clog.
 672          * Also do it if the shared XID-wrap-limit info is stale, since this
 673          * action will update that too.
 674          */
 675         if (dirty || ForceTransactionIdLimitUpdate())
 676                 vac_truncate_clog(newFrozenXid);
 677 }
 678
 679
 680 /*
 681  *      vac_truncate_clog() -- attempt to truncate the commit log
 682  *
 683  *              Scan pg_database to determine the system-wide oldest datfrozenxid,
 684  *              and use it to truncate the transaction commit log (pg_clog).
 685  *              Also update the XID wrap limit info maintained by varsup.c.
 686  *
 687  *              The passed XID is simply the one I just wrote into my pg_database
 688  *              entry.  It's used to initialize the "min" calculation.
 689  *
 690  *              This routine is only only invoked when we've managed to change our
 691  *              DB's datfrozenxid entry, or we found that the shared XID-wrap-limit
 692  *              info is stale.
 693  */
 694 static void
 695 vac_truncate_clog(TransactionId frozenXID)
 696 {
 697         TransactionId myXID = GetCurrentTransactionId();
 698         Relation        relation;
 699         HeapScanDesc scan;
 700         HeapTuple       tuple;
 701         Oid                     oldest_datoid;
 702         bool            frozenAlreadyWrapped = false;
 703
 704         /* init oldest_datoid to sync with my frozenXID */
 705         oldest_datoid = MyDatabaseId;
 706
 707         /*
 708          * Scan pg_database to compute the minimum datfrozenxid
 709          *
 710          * Note: we need not worry about a race condition with new entries being
 711          * inserted by CREATE DATABASE.  Any such entry will have a copy of some
 712          * existing DB's datfrozenxid, and that source DB cannot be ours because
 713          * of the interlock against copying a DB containing an active backend.
 714          * Hence the new entry will not reduce the minimum.  Also, if two VACUUMs
 715          * concurrently modify the datfrozenxid's of different databases, the
 716          * worst possible outcome is that pg_clog is not truncated as aggressively
 717          * as it could be.
 718          */
 719         relation = heap_open(DatabaseRelationId, AccessShareLock);
 720
 721         scan = heap_beginscan(relation, SnapshotNow, 0, NULL);
 722
 723         while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
 724         {
 725                 Form_pg_database dbform = (Form_pg_database) GETSTRUCT(tuple);
 726
 727                 Assert(TransactionIdIsNormal(dbform->datfrozenxid));
 728
 729                 if (TransactionIdPrecedes(myXID, dbform->datfrozenxid))
 730                         frozenAlreadyWrapped = true;
 731                 else if (TransactionIdPrecedes(dbform->datfrozenxid, frozenXID))
 732                 {
 733                         frozenXID = dbform->datfrozenxid;
 734                         oldest_datoid = HeapTupleGetOid(tuple);
 735                 }
 736         }
 737
 738         heap_endscan(scan);
 739
 740         heap_close(relation, AccessShareLock);
 741
 742         /*
 743          * Do not truncate CLOG if we seem to have suffered wraparound already;
 744          * the computed minimum XID might be bogus.  This case should now be
 745          * impossible due to the defenses in GetNewTransactionId, but we keep the
 746          * test anyway.
 747          */
 748         if (frozenAlreadyWrapped)
 749         {
 750                 ereport(WARNING,
 751                                 (errmsg("some databases have not been vacuumed in over 2 billion transactions"),
 752                                  errdetail("You might have already suffered transaction-wraparound data loss.")));
 753                 return;
 754         }
 755
 756         /* Truncate CLOG to the oldest frozenxid */
 757         TruncateCLOG(frozenXID);
 758
 759         /*
 760          * Update the wrap limit for GetNewTransactionId.  Note: this function
 761          * will also signal the postmaster for an(other) autovac cycle if needed.
 762          */
 763         SetTransactionIdLimit(frozenXID, oldest_datoid);
 764 }
 765
 766
 767 /*
 768  *      vacuum_rel() -- vacuum one heap relation
 769  *
 770  *              Doing one heap at a time incurs extra overhead, since we need to
 771  *              check that the heap exists again just before we vacuum it.      The
 772  *              reason that we do this is so that vacuuming can be spread across
 773  *              many small transactions.  Otherwise, two-phase locking would require
 774  *              us to lock the entire database during one pass of the vacuum cleaner.
 775  *
 776  *              We'll return true in *scanned_all if the vacuum scanned all heap
 777  *              pages, and updated pg_class.
 778  *
 779  *              At entry and exit, we are not inside a transaction.
 780  */
 781 static void
 782 vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound,
 783                    bool *scanned_all)
 784 {
 785         LOCKMODE        lmode;
 786         Relation        onerel;
 787         LockRelId       onerelid;
 788         Oid                     toast_relid;
 789         Oid                     save_userid;
 790         int                     save_sec_context;
 791         int                     save_nestlevel;
 792         bool            heldoff;
 793
 794         if (scanned_all)
 795                 *scanned_all = false;
 796
 797         /* Begin a transaction for vacuuming this relation */
 798         StartTransactionCommand();
 799
 800         /*
 801          * Functions in indexes may want a snapshot set.  Also, setting a snapshot
 802          * ensures that RecentGlobalXmin is kept truly recent.
 803          */
 804         PushActiveSnapshot(GetTransactionSnapshot());
 805
 806         if (!(vacstmt->options & VACOPT_FULL))
 807         {
 808                 /*
 809                  * In lazy vacuum, we can set the PROC_IN_VACUUM flag, which lets
 810                  * other concurrent VACUUMs know that they can ignore this one while
 811                  * determining their OldestXmin.  (The reason we don't set it during a
 812                  * full VACUUM is exactly that we may have to run user-defined
 813                  * functions for functional indexes, and we want to make sure that if
 814                  * they use the snapshot set above, any tuples it requires can't get
 815                  * removed from other tables.  An index function that depends on the
 816                  * contents of other tables is arguably broken, but we won't break it
 817                  * here by violating transaction semantics.)
 818                  *
 819                  * We also set the VACUUM_FOR_WRAPAROUND flag, which is passed down by
 820                  * autovacuum; it's used to avoid cancelling a vacuum that was invoked
 821                  * in an emergency.
 822                  *
 823                  * Note: these flags remain set until CommitTransaction or
 824                  * AbortTransaction.  We don't want to clear them until we reset
 825                  * MyProc->xid/xmin, else OldestXmin might appear to go backwards,
 826                  * which is probably Not Good.
 827                  */
 828                 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
 829                 MyProc->vacuumFlags |= PROC_IN_VACUUM;
 830                 if (for_wraparound)
 831                         MyProc->vacuumFlags |= PROC_VACUUM_FOR_WRAPAROUND;
 832                 LWLockRelease(ProcArrayLock);
 833         }
 834
 835         /*
 836          * Check for user-requested abort.      Note we want this to be inside a
 837          * transaction, so xact.c doesn't issue useless WARNING.
 838          */
 839         CHECK_FOR_INTERRUPTS();
 840
 841         /*
 842          * Determine the type of lock we want --- hard exclusive lock for a FULL
 843          * vacuum, but just ShareUpdateExclusiveLock for concurrent vacuum. Either
 844          * way, we can be sure that no other backend is vacuuming the same table.
 845          */
 846         lmode = (vacstmt->options & VACOPT_FULL) ? AccessExclusiveLock : ShareUpdateExclusiveLock;
 847
 848         /*
 849          * Open the relation and get the appropriate lock on it.
 850          *
 851          * There's a race condition here: the rel may have gone away since the
 852          * last time we saw it.  If so, we don't need to vacuum it.
 853          */
 854         onerel = try_relation_open(relid, lmode);
 855
 856         if (!onerel)
 857         {
 858                 PopActiveSnapshot();
 859                 CommitTransactionCommand();
 860                 return;
 861         }
 862
 863         /*
 864          * Check permissions.
 865          *
 866          * We allow the user to vacuum a table if he is superuser, the table
 867          * owner, or the database owner (but in the latter case, only if it's not
 868          * a shared relation).  pg_class_ownercheck includes the superuser case.
 869          *
 870          * Note we choose to treat permissions failure as a WARNING and keep
 871          * trying to vacuum the rest of the DB --- is this appropriate?
 872          */
 873         if (!(pg_class_ownercheck(RelationGetRelid(onerel), GetUserId()) ||
 874                   (pg_database_ownercheck(MyDatabaseId, GetUserId()) && !onerel->rd_rel->relisshared)))
 875         {
 876                 if (onerel->rd_rel->relisshared)
 877                         ereport(WARNING,
 878                                   (errmsg("skipping \"%s\" --- only superuser can vacuum it",
 879                                                   RelationGetRelationName(onerel))));
 880                 else if (onerel->rd_rel->relnamespace == PG_CATALOG_NAMESPACE)
 881                         ereport(WARNING,
 882                                         (errmsg("skipping \"%s\" --- only superuser or database owner can vacuum it",
 883                                                         RelationGetRelationName(onerel))));
 884                 else
 885                         ereport(WARNING,
 886                                         (errmsg("skipping \"%s\" --- only table or database owner can vacuum it",
 887                                                         RelationGetRelationName(onerel))));
 888                 relation_close(onerel, lmode);
 889                 PopActiveSnapshot();
 890                 CommitTransactionCommand();
 891                 return;
 892         }
 893
 894         /*
 895          * Check that it's a vacuumable table; we used to do this in
 896          * get_rel_oids() but seems safer to check after we've locked the
 897          * relation.
 898          */
 899         if (onerel->rd_rel->relkind != RELKIND_RELATION &&
 900                 onerel->rd_rel->relkind != RELKIND_TOASTVALUE)
 901         {
 902                 ereport(WARNING,
 903                                 (errmsg("skipping \"%s\" --- cannot vacuum indexes, views, or special system tables",
 904                                                 RelationGetRelationName(onerel))));
 905                 relation_close(onerel, lmode);
 906                 PopActiveSnapshot();
 907                 CommitTransactionCommand();
 908                 return;
 909         }
 910
 911         /*
 912          * Silently ignore tables that are temp tables of other backends ---
 913          * trying to vacuum these will lead to great unhappiness, since their
 914          * contents are probably not up-to-date on disk.  (We don't throw a
 915          * warning here; it would just lead to chatter during a database-wide
 916          * VACUUM.)
 917          */
 918         if (RELATION_IS_OTHER_TEMP(onerel))
 919         {
 920                 relation_close(onerel, lmode);
 921                 PopActiveSnapshot();
 922                 CommitTransactionCommand();
 923                 return;
 924         }
 925
 926         /*
 927          * Get a session-level lock too. This will protect our access to the
 928          * relation across multiple transactions, so that we can vacuum the
 929          * relation's TOAST table (if any) secure in the knowledge that no one is
 930          * deleting the parent relation.
 931          *
 932          * NOTE: this cannot block, even if someone else is waiting for access,
 933          * because the lock manager knows that both lock requests are from the
 934          * same process.
 935          */
 936         onerelid = onerel->rd_lockInfo.lockRelId;
 937         LockRelationIdForSession(&onerelid, lmode);
 938
 939         /*
 940          * Remember the relation's TOAST relation for later, if the caller asked
 941          * us to process it.
 942          */
 943         if (do_toast)
 944                 toast_relid = onerel->rd_rel->reltoastrelid;
 945         else
 946                 toast_relid = InvalidOid;
 947
 948         /*
 949          * Switch to the table owner's userid, so that any index functions are run
 950          * as that user.  Also lock down security-restricted operations and
 951          * arrange to make GUC variable changes local to this command.
 952          * (This is unnecessary, but harmless, for lazy VACUUM.)
 953          */
 954         GetUserIdAndSecContext(&save_userid, &save_sec_context);
 955         SetUserIdAndSecContext(onerel->rd_rel->relowner,
 956                                                    save_sec_context | SECURITY_RESTRICTED_OPERATION);
 957         save_nestlevel = NewGUCNestLevel();
 958
 959         /*
 960          * Do the actual work --- either FULL or "lazy" vacuum
 961          */
 962         if (vacstmt->options & VACOPT_FULL)
 963         {
 964                 /* close relation before vacuuming, but hold lock until commit */
 965                 relation_close(onerel, NoLock);
 966                 onerel = NULL;
 967
 968                 /* VACUUM FULL is now a variant of CLUSTER; see cluster.c */
 969                 cluster_rel(relid, InvalidOid, false,
 970                                         (vacstmt->options & VACOPT_VERBOSE) != 0,
 971                                         vacstmt->freeze_min_age, vacstmt->freeze_table_age);
 972                 heldoff = false;
 973         }
 974         else
 975                 heldoff = lazy_vacuum_rel(onerel, vacstmt, vac_strategy, scanned_all);
 976
 977         /* Roll back any GUC changes executed by index functions */
 978         AtEOXact_GUC(false, save_nestlevel);
 979
 980         /* Restore userid and security context */
 981         SetUserIdAndSecContext(save_userid, save_sec_context);
 982
 983         /* all done with this class, but hold lock until commit */
 984         if (onerel)
 985                 relation_close(onerel, NoLock);
 986
 987         /*
 988          * Complete the transaction and free all temporary memory used.
 989          */
 990         PopActiveSnapshot();
 991         CommitTransactionCommand();
 992
 993         /* now we can allow interrupts again, if disabled */
 994         if (heldoff)
 995                 RESUME_INTERRUPTS();
 996
 997         /*
 998          * If the relation has a secondary toast rel, vacuum that too while we
 999          * still hold the session lock on the master table.  Note however that
1000          * "analyze" will not get done on the toast table.      This is good, because
1001          * the toaster always uses hardcoded index access and statistics are
1002          * totally unimportant for toast relations.
1003          */
1004         if (toast_relid != InvalidOid)
1005                 vacuum_rel(toast_relid, vacstmt, false, for_wraparound, NULL);
1006
1007         /*
1008          * Now release the session-level lock on the master table.
1009          */
1010         UnlockRelationIdForSession(&onerelid, lmode);
1011 }
1012
1013
1014 /*
1015  * Open all the indexes of the given relation, obtaining the specified kind
1016  * of lock on each.  Return an array of Relation pointers for the indexes
1017  * into *Irel, and the number of indexes into *nindexes.
1018  */
1019 void
1020 vac_open_indexes(Relation relation, LOCKMODE lockmode,
1021                                  int *nindexes, Relation **Irel)
1022 {
1023         List       *indexoidlist;
1024         ListCell   *indexoidscan;
1025         int                     i;
1026
1027         Assert(lockmode != NoLock);
1028
1029         indexoidlist = RelationGetIndexList(relation);
1030
1031         *nindexes = list_length(indexoidlist);
1032
1033         if (*nindexes > 0)
1034                 *Irel = (Relation *) palloc(*nindexes * sizeof(Relation));
1035         else
1036                 *Irel = NULL;
1037
1038         i = 0;
1039         foreach(indexoidscan, indexoidlist)
1040         {
1041                 Oid                     indexoid = lfirst_oid(indexoidscan);
1042
1043                 (*Irel)[i++] = index_open(indexoid, lockmode);
1044         }
1045
1046         list_free(indexoidlist);
1047 }
1048
1049 /*
1050  * Release the resources acquired by vac_open_indexes.  Optionally release
1051  * the locks (say NoLock to keep 'em).
1052  */
1053 void
1054 vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
1055 {
1056         if (Irel == NULL)
1057                 return;
1058
1059         while (nindexes--)
1060         {
1061                 Relation        ind = Irel[nindexes];
1062
1063                 index_close(ind, lockmode);
1064         }
1065         pfree(Irel);
1066 }
1067
1068 /*
1069  * vacuum_delay_point --- check for interrupts and cost-based delay.
1070  *
1071  * This should be called in each major loop of VACUUM processing,
1072  * typically once per page processed.
1073  */
1074 void
1075 vacuum_delay_point(void)
1076 {
1077         /* Always check for interrupts */
1078         CHECK_FOR_INTERRUPTS();
1079
1080         /* Nap if appropriate */
1081         if (VacuumCostActive && !InterruptPending &&
1082                 VacuumCostBalance >= VacuumCostLimit)
1083         {
1084                 int                     msec;
1085
1086                 msec = VacuumCostDelay * VacuumCostBalance / VacuumCostLimit;
1087                 if (msec > VacuumCostDelay * 4)
1088                         msec = VacuumCostDelay * 4;
1089
1090                 pg_usleep(msec * 1000L);
1091
1092                 VacuumCostBalance = 0;
1093
1094                 /* update balance values for workers */
1095                 AutoVacuumUpdateDelay();
1096
1097                 /* Might have gotten an interrupt while sleeping */
1098                 CHECK_FOR_INTERRUPTS();
1099         }
1100 }