granicus.if.org Git - postgresql/blob - src/backend/commands/vacuum.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * vacuum.c
   4  *        The postgres vacuum cleaner.
   5  *
   6  * This file now includes only control and dispatch code for VACUUM and
   7  * ANALYZE commands.  Regular VACUUM is implemented in vacuumlazy.c,
   8  * ANALYZE in analyze.c, and VACUUM FULL is a variant of CLUSTER, handled
   9  * in cluster.c.
  10  *
  11  *
  12  * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
  13  * Portions Copyright (c) 1994, Regents of the University of California
  14  *
  15  *
  16  * IDENTIFICATION
  17  *        src/backend/commands/vacuum.c
  18  *
  19  *-------------------------------------------------------------------------
  20  */
  21 #include "postgres.h"
  22
  23 #include <math.h>
  24
  25 #include "access/clog.h"
  26 #include "access/genam.h"
  27 #include "access/heapam.h"
  28 #include "access/transam.h"
  29 #include "access/xact.h"
  30 #include "catalog/namespace.h"
  31 #include "catalog/pg_database.h"
  32 #include "catalog/pg_namespace.h"
  33 #include "commands/cluster.h"
  34 #include "commands/vacuum.h"
  35 #include "miscadmin.h"
  36 #include "pgstat.h"
  37 #include "postmaster/autovacuum.h"
  38 #include "storage/bufmgr.h"
  39 #include "storage/lmgr.h"
  40 #include "storage/proc.h"
  41 #include "storage/procarray.h"
  42 #include "utils/acl.h"
  43 #include "utils/fmgroids.h"
  44 #include "utils/guc.h"
  45 #include "utils/memutils.h"
  46 #include "utils/snapmgr.h"
  47 #include "utils/syscache.h"
  48 #include "utils/tqual.h"
  49
  50
  51 /*
  52  * GUC parameters
  53  */
  54 int                     vacuum_freeze_min_age;
  55 int                     vacuum_freeze_table_age;
  56
  57
  58 /* A few variables that don't seem worth passing around as parameters */
  59 static MemoryContext vac_context = NULL;
  60 static BufferAccessStrategy vac_strategy;
  61
  62
  63 /* non-export function prototypes */
  64 static List *get_rel_oids(Oid relid, const RangeVar *vacrel);
  65 static void vac_truncate_clog(TransactionId frozenXID);
  66 static bool vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast,
  67                    bool for_wraparound);
  68
  69
  70 /*
  71  * Primary entry point for VACUUM and ANALYZE commands.
  72  *
  73  * relid is normally InvalidOid; if it is not, then it provides the relation
  74  * OID to be processed, and vacstmt->relation is ignored.  (The non-invalid
  75  * case is currently only used by autovacuum.)
  76  *
  77  * do_toast is passed as FALSE by autovacuum, because it processes TOAST
  78  * tables separately.
  79  *
  80  * for_wraparound is used by autovacuum to let us know when it's forcing
  81  * a vacuum for wraparound, which should not be auto-canceled.
  82  *
  83  * bstrategy is normally given as NULL, but in autovacuum it can be passed
  84  * in to use the same buffer strategy object across multiple vacuum() calls.
  85  *
  86  * isTopLevel should be passed down from ProcessUtility.
  87  *
  88  * It is the caller's responsibility that vacstmt and bstrategy
  89  * (if given) be allocated in a memory context that won't disappear
  90  * at transaction commit.
  91  */
  92 void
  93 vacuum(VacuumStmt *vacstmt, Oid relid, bool do_toast,
  94            BufferAccessStrategy bstrategy, bool for_wraparound, bool isTopLevel)
  95 {
  96         const char *stmttype;
  97         volatile bool in_outer_xact,
  98                                 use_own_xacts;
  99         List       *relations;
 100
 101         /* sanity checks on options */
 102         Assert(vacstmt->options & (VACOPT_VACUUM | VACOPT_ANALYZE));
 103         Assert((vacstmt->options & VACOPT_VACUUM) ||
 104                    !(vacstmt->options & (VACOPT_FULL | VACOPT_FREEZE)));
 105         Assert((vacstmt->options & VACOPT_ANALYZE) || vacstmt->va_cols == NIL);
 106
 107         stmttype = (vacstmt->options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE";
 108
 109         /*
 110          * We cannot run VACUUM inside a user transaction block; if we were inside
 111          * a transaction, then our commit- and start-transaction-command calls
 112          * would not have the intended effect!  There are numerous other subtle
 113          * dependencies on this, too.
 114          *
 115          * ANALYZE (without VACUUM) can run either way.
 116          */
 117         if (vacstmt->options & VACOPT_VACUUM)
 118         {
 119                 PreventTransactionChain(isTopLevel, stmttype);
 120                 in_outer_xact = false;
 121         }
 122         else
 123                 in_outer_xact = IsInTransactionChain(isTopLevel);
 124
 125         /*
 126          * Send info about dead objects to the statistics collector, unless we are
 127          * in autovacuum --- autovacuum.c does this for itself.
 128          */
 129         if ((vacstmt->options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess())
 130                 pgstat_vacuum_stat();
 131
 132         /*
 133          * Create special memory context for cross-transaction storage.
 134          *
 135          * Since it is a child of PortalContext, it will go away eventually even
 136          * if we suffer an error; there's no need for special abort cleanup logic.
 137          */
 138         vac_context = AllocSetContextCreate(PortalContext,
 139                                                                                 "Vacuum",
 140                                                                                 ALLOCSET_DEFAULT_MINSIZE,
 141                                                                                 ALLOCSET_DEFAULT_INITSIZE,
 142                                                                                 ALLOCSET_DEFAULT_MAXSIZE);
 143
 144         /*
 145          * If caller didn't give us a buffer strategy object, make one in the
 146          * cross-transaction memory context.
 147          */
 148         if (bstrategy == NULL)
 149         {
 150                 MemoryContext old_context = MemoryContextSwitchTo(vac_context);
 151
 152                 bstrategy = GetAccessStrategy(BAS_VACUUM);
 153                 MemoryContextSwitchTo(old_context);
 154         }
 155         vac_strategy = bstrategy;
 156
 157         /*
 158          * Build list of relations to process, unless caller gave us one. (If we
 159          * build one, we put it in vac_context for safekeeping.)
 160          */
 161         relations = get_rel_oids(relid, vacstmt->relation);
 162
 163         /*
 164          * Decide whether we need to start/commit our own transactions.
 165          *
 166          * For VACUUM (with or without ANALYZE): always do so, so that we can
 167          * release locks as soon as possible.  (We could possibly use the outer
 168          * transaction for a one-table VACUUM, but handling TOAST tables would be
 169          * problematic.)
 170          *
 171          * For ANALYZE (no VACUUM): if inside a transaction block, we cannot
 172          * start/commit our own transactions.  Also, there's no need to do so if
 173          * only processing one relation.  For multiple relations when not within a
 174          * transaction block, and also in an autovacuum worker, use own
 175          * transactions so we can release locks sooner.
 176          */
 177         if (vacstmt->options & VACOPT_VACUUM)
 178                 use_own_xacts = true;
 179         else
 180         {
 181                 Assert(vacstmt->options & VACOPT_ANALYZE);
 182                 if (IsAutoVacuumWorkerProcess())
 183                         use_own_xacts = true;
 184                 else if (in_outer_xact)
 185                         use_own_xacts = false;
 186                 else if (list_length(relations) > 1)
 187                         use_own_xacts = true;
 188                 else
 189                         use_own_xacts = false;
 190         }
 191
 192         /*
 193          * vacuum_rel expects to be entered with no transaction active; it will
 194          * start and commit its own transaction.  But we are called by an SQL
 195          * command, and so we are executing inside a transaction already. We
 196          * commit the transaction started in PostgresMain() here, and start
 197          * another one before exiting to match the commit waiting for us back in
 198          * PostgresMain().
 199          */
 200         if (use_own_xacts)
 201         {
 202                 /* ActiveSnapshot is not set by autovacuum */
 203                 if (ActiveSnapshotSet())
 204                         PopActiveSnapshot();
 205
 206                 /* matches the StartTransaction in PostgresMain() */
 207                 CommitTransactionCommand();
 208         }
 209
 210         /* Turn vacuum cost accounting on or off */
 211         PG_TRY();
 212         {
 213                 ListCell   *cur;
 214
 215                 VacuumCostActive = (VacuumCostDelay > 0);
 216                 VacuumCostBalance = 0;
 217                 VacuumPageHit = 0;
 218                 VacuumPageMiss = 0;
 219                 VacuumPageDirty = 0;
 220
 221                 /*
 222                  * Loop to process each selected relation.
 223                  */
 224                 foreach(cur, relations)
 225                 {
 226                         Oid                     relid = lfirst_oid(cur);
 227
 228                         if (vacstmt->options & VACOPT_VACUUM)
 229                         {
 230                                 if (!vacuum_rel(relid, vacstmt, do_toast, for_wraparound))
 231                                         continue;
 232                         }
 233
 234                         if (vacstmt->options & VACOPT_ANALYZE)
 235                         {
 236                                 /*
 237                                  * If using separate xacts, start one for analyze. Otherwise,
 238                                  * we can use the outer transaction.
 239                                  */
 240                                 if (use_own_xacts)
 241                                 {
 242                                         StartTransactionCommand();
 243                                         /* functions in indexes may want a snapshot set */
 244                                         PushActiveSnapshot(GetTransactionSnapshot());
 245                                 }
 246
 247                                 analyze_rel(relid, vacstmt, vac_strategy);
 248
 249                                 if (use_own_xacts)
 250                                 {
 251                                         PopActiveSnapshot();
 252                                         CommitTransactionCommand();
 253                                 }
 254                         }
 255                 }
 256         }
 257         PG_CATCH();
 258         {
 259                 /* Make sure cost accounting is turned off after error */
 260                 VacuumCostActive = false;
 261                 PG_RE_THROW();
 262         }
 263         PG_END_TRY();
 264
 265         /* Turn off vacuum cost accounting */
 266         VacuumCostActive = false;
 267
 268         /*
 269          * Finish up processing.
 270          */
 271         if (use_own_xacts)
 272         {
 273                 /* here, we are not in a transaction */
 274
 275                 /*
 276                  * This matches the CommitTransaction waiting for us in
 277                  * PostgresMain().
 278                  */
 279                 StartTransactionCommand();
 280         }
 281
 282         if ((vacstmt->options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess())
 283         {
 284                 /*
 285                  * Update pg_database.datfrozenxid, and truncate pg_clog if possible.
 286                  * (autovacuum.c does this for itself.)
 287                  */
 288                 vac_update_datfrozenxid();
 289         }
 290
 291         /*
 292          * Clean up working storage --- note we must do this after
 293          * StartTransactionCommand, else we might be trying to delete the active
 294          * context!
 295          */
 296         MemoryContextDelete(vac_context);
 297         vac_context = NULL;
 298 }
 299
 300 /*
 301  * Build a list of Oids for each relation to be processed
 302  *
 303  * The list is built in vac_context so that it will survive across our
 304  * per-relation transactions.
 305  */
 306 static List *
 307 get_rel_oids(Oid relid, const RangeVar *vacrel)
 308 {
 309         List       *oid_list = NIL;
 310         MemoryContext oldcontext;
 311
 312         /* OID supplied by VACUUM's caller? */
 313         if (OidIsValid(relid))
 314         {
 315                 oldcontext = MemoryContextSwitchTo(vac_context);
 316                 oid_list = lappend_oid(oid_list, relid);
 317                 MemoryContextSwitchTo(oldcontext);
 318         }
 319         else if (vacrel)
 320         {
 321                 /* Process a specific relation */
 322                 Oid                     relid;
 323
 324                 /*
 325                  * Since we don't take a lock here, the relation might be gone,
 326                  * or the RangeVar might no longer refer to the OID we look up
 327                  * here.  In the former case, VACUUM will do nothing; in the
 328                  * latter case, it will process the OID we looked up here, rather
 329                  * than the new one.  Neither is ideal, but there's little practical
 330                  * alternative, since we're going to commit this transaction and
 331                  * begin a new one between now and then.
 332                  */
 333                 relid = RangeVarGetRelid(vacrel, NoLock, false);
 334
 335                 /* Make a relation list entry for this guy */
 336                 oldcontext = MemoryContextSwitchTo(vac_context);
 337                 oid_list = lappend_oid(oid_list, relid);
 338                 MemoryContextSwitchTo(oldcontext);
 339         }
 340         else
 341         {
 342                 /* Process all plain relations listed in pg_class */
 343                 Relation        pgclass;
 344                 HeapScanDesc scan;
 345                 HeapTuple       tuple;
 346                 ScanKeyData key;
 347
 348                 ScanKeyInit(&key,
 349                                         Anum_pg_class_relkind,
 350                                         BTEqualStrategyNumber, F_CHAREQ,
 351                                         CharGetDatum(RELKIND_RELATION));
 352
 353                 pgclass = heap_open(RelationRelationId, AccessShareLock);
 354
 355                 scan = heap_beginscan(pgclass, SnapshotNow, 1, &key);
 356
 357                 while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
 358                 {
 359                         /* Make a relation list entry for this guy */
 360                         oldcontext = MemoryContextSwitchTo(vac_context);
 361                         oid_list = lappend_oid(oid_list, HeapTupleGetOid(tuple));
 362                         MemoryContextSwitchTo(oldcontext);
 363                 }
 364
 365                 heap_endscan(scan);
 366                 heap_close(pgclass, AccessShareLock);
 367         }
 368
 369         return oid_list;
 370 }
 371
 372 /*
 373  * vacuum_set_xid_limits() -- compute oldest-Xmin and freeze cutoff points
 374  */
 375 void
 376 vacuum_set_xid_limits(int freeze_min_age,
 377                                           int freeze_table_age,
 378                                           bool sharedRel,
 379                                           TransactionId *oldestXmin,
 380                                           TransactionId *freezeLimit,
 381                                           TransactionId *freezeTableLimit)
 382 {
 383         int                     freezemin;
 384         TransactionId limit;
 385         TransactionId safeLimit;
 386
 387         /*
 388          * We can always ignore processes running lazy vacuum.  This is because we
 389          * use these values only for deciding which tuples we must keep in the
 390          * tables.      Since lazy vacuum doesn't write its XID anywhere, it's safe to
 391          * ignore it.  In theory it could be problematic to ignore lazy vacuums in
 392          * a full vacuum, but keep in mind that only one vacuum process can be
 393          * working on a particular table at any time, and that each vacuum is
 394          * always an independent transaction.
 395          */
 396         *oldestXmin = GetOldestXmin(sharedRel, true);
 397
 398         Assert(TransactionIdIsNormal(*oldestXmin));
 399
 400         /*
 401          * Determine the minimum freeze age to use: as specified by the caller, or
 402          * vacuum_freeze_min_age, but in any case not more than half
 403          * autovacuum_freeze_max_age, so that autovacuums to prevent XID
 404          * wraparound won't occur too frequently.
 405          */
 406         freezemin = freeze_min_age;
 407         if (freezemin < 0)
 408                 freezemin = vacuum_freeze_min_age;
 409         freezemin = Min(freezemin, autovacuum_freeze_max_age / 2);
 410         Assert(freezemin >= 0);
 411
 412         /*
 413          * Compute the cutoff XID, being careful not to generate a "permanent" XID
 414          */
 415         limit = *oldestXmin - freezemin;
 416         if (!TransactionIdIsNormal(limit))
 417                 limit = FirstNormalTransactionId;
 418
 419         /*
 420          * If oldestXmin is very far back (in practice, more than
 421          * autovacuum_freeze_max_age / 2 XIDs old), complain and force a minimum
 422          * freeze age of zero.
 423          */
 424         safeLimit = ReadNewTransactionId() - autovacuum_freeze_max_age;
 425         if (!TransactionIdIsNormal(safeLimit))
 426                 safeLimit = FirstNormalTransactionId;
 427
 428         if (TransactionIdPrecedes(limit, safeLimit))
 429         {
 430                 ereport(WARNING,
 431                                 (errmsg("oldest xmin is far in the past"),
 432                                  errhint("Close open transactions soon to avoid wraparound problems.")));
 433                 limit = *oldestXmin;
 434         }
 435
 436         *freezeLimit = limit;
 437
 438         if (freezeTableLimit != NULL)
 439         {
 440                 int                     freezetable;
 441
 442                 /*
 443                  * Determine the table freeze age to use: as specified by the caller,
 444                  * or vacuum_freeze_table_age, but in any case not more than
 445                  * autovacuum_freeze_max_age * 0.95, so that if you have e.g nightly
 446                  * VACUUM schedule, the nightly VACUUM gets a chance to freeze tuples
 447                  * before anti-wraparound autovacuum is launched.
 448                  */
 449                 freezetable = freeze_min_age;
 450                 if (freezetable < 0)
 451                         freezetable = vacuum_freeze_table_age;
 452                 freezetable = Min(freezetable, autovacuum_freeze_max_age * 0.95);
 453                 Assert(freezetable >= 0);
 454
 455                 /*
 456                  * Compute the cutoff XID, being careful not to generate a "permanent"
 457                  * XID.
 458                  */
 459                 limit = ReadNewTransactionId() - freezetable;
 460                 if (!TransactionIdIsNormal(limit))
 461                         limit = FirstNormalTransactionId;
 462
 463                 *freezeTableLimit = limit;
 464         }
 465 }
 466
 467
 468 /*
 469  * vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples
 470  *
 471  *              If we scanned the whole relation then we should just use the count of
 472  *              live tuples seen; but if we did not, we should not trust the count
 473  *              unreservedly, especially not in VACUUM, which may have scanned a quite
 474  *              nonrandom subset of the table.  When we have only partial information,
 475  *              we take the old value of pg_class.reltuples as a measurement of the
 476  *              tuple density in the unscanned pages.
 477  *
 478  *              This routine is shared by VACUUM and ANALYZE.
 479  */
 480 double
 481 vac_estimate_reltuples(Relation relation, bool is_analyze,
 482                                            BlockNumber total_pages,
 483                                            BlockNumber scanned_pages,
 484                                            double scanned_tuples)
 485 {
 486         BlockNumber old_rel_pages = relation->rd_rel->relpages;
 487         double          old_rel_tuples = relation->rd_rel->reltuples;
 488         double          old_density;
 489         double          new_density;
 490         double          multiplier;
 491         double          updated_density;
 492
 493         /* If we did scan the whole table, just use the count as-is */
 494         if (scanned_pages >= total_pages)
 495                 return scanned_tuples;
 496
 497         /*
 498          * If scanned_pages is zero but total_pages isn't, keep the existing value
 499          * of reltuples.  (Note: callers should avoid updating the pg_class
 500          * statistics in this situation, since no new information has been
 501          * provided.)
 502          */
 503         if (scanned_pages == 0)
 504                 return old_rel_tuples;
 505
 506         /*
 507          * If old value of relpages is zero, old density is indeterminate; we
 508          * can't do much except scale up scanned_tuples to match total_pages.
 509          */
 510         if (old_rel_pages == 0)
 511                 return floor((scanned_tuples / scanned_pages) * total_pages + 0.5);
 512
 513         /*
 514          * Okay, we've covered the corner cases.  The normal calculation is to
 515          * convert the old measurement to a density (tuples per page), then update
 516          * the density using an exponential-moving-average approach, and finally
 517          * compute reltuples as updated_density * total_pages.
 518          *
 519          * For ANALYZE, the moving average multiplier is just the fraction of the
 520          * table's pages we scanned.  This is equivalent to assuming that the
 521          * tuple density in the unscanned pages didn't change.  Of course, it
 522          * probably did, if the new density measurement is different. But over
 523          * repeated cycles, the value of reltuples will converge towards the
 524          * correct value, if repeated measurements show the same new density.
 525          *
 526          * For VACUUM, the situation is a bit different: we have looked at a
 527          * nonrandom sample of pages, but we know for certain that the pages we
 528          * didn't look at are precisely the ones that haven't changed lately.
 529          * Thus, there is a reasonable argument for doing exactly the same thing
 530          * as for the ANALYZE case, that is use the old density measurement as the
 531          * value for the unscanned pages.
 532          *
 533          * This logic could probably use further refinement.
 534          */
 535         old_density = old_rel_tuples / old_rel_pages;
 536         new_density = scanned_tuples / scanned_pages;
 537         multiplier = (double) scanned_pages / (double) total_pages;
 538         updated_density = old_density + (new_density - old_density) * multiplier;
 539         return floor(updated_density * total_pages + 0.5);
 540 }
 541
 542
 543 /*
 544  *      vac_update_relstats() -- update statistics for one relation
 545  *
 546  *              Update the whole-relation statistics that are kept in its pg_class
 547  *              row.  There are additional stats that will be updated if we are
 548  *              doing ANALYZE, but we always update these stats.  This routine works
 549  *              for both index and heap relation entries in pg_class.
 550  *
 551  *              We violate transaction semantics here by overwriting the rel's
 552  *              existing pg_class tuple with the new values.  This is reasonably
 553  *              safe since the new values are correct whether or not this transaction
 554  *              commits.  The reason for this is that if we updated these tuples in
 555  *              the usual way, vacuuming pg_class itself wouldn't work very well ---
 556  *              by the time we got done with a vacuum cycle, most of the tuples in
 557  *              pg_class would've been obsoleted.  Of course, this only works for
 558  *              fixed-size never-null columns, but these are.
 559  *
 560  *              Note another assumption: that two VACUUMs/ANALYZEs on a table can't
 561  *              run in parallel, nor can VACUUM/ANALYZE run in parallel with a
 562  *              schema alteration such as adding an index, rule, or trigger.  Otherwise
 563  *              our updates of relhasindex etc might overwrite uncommitted updates.
 564  *
 565  *              Another reason for doing it this way is that when we are in a lazy
 566  *              VACUUM and have PROC_IN_VACUUM set, we mustn't do any updates ---
 567  *              somebody vacuuming pg_class might think they could delete a tuple
 568  *              marked with xmin = our xid.
 569  *
 570  *              This routine is shared by VACUUM and ANALYZE.
 571  */
 572 void
 573 vac_update_relstats(Relation relation,
 574                                         BlockNumber num_pages, double num_tuples,
 575                                         BlockNumber num_all_visible_pages,
 576                                         bool hasindex, TransactionId frozenxid)
 577 {
 578         Oid                     relid = RelationGetRelid(relation);
 579         Relation        rd;
 580         HeapTuple       ctup;
 581         Form_pg_class pgcform;
 582         bool            dirty;
 583
 584         rd = heap_open(RelationRelationId, RowExclusiveLock);
 585
 586         /* Fetch a copy of the tuple to scribble on */
 587         ctup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
 588         if (!HeapTupleIsValid(ctup))
 589                 elog(ERROR, "pg_class entry for relid %u vanished during vacuuming",
 590                          relid);
 591         pgcform = (Form_pg_class) GETSTRUCT(ctup);
 592
 593         /* Apply required updates, if any, to copied tuple */
 594
 595         dirty = false;
 596         if (pgcform->relpages != (int32) num_pages)
 597         {
 598                 pgcform->relpages = (int32) num_pages;
 599                 dirty = true;
 600         }
 601         if (pgcform->reltuples != (float4) num_tuples)
 602         {
 603                 pgcform->reltuples = (float4) num_tuples;
 604                 dirty = true;
 605         }
 606         if (pgcform->relallvisible != (int32) num_all_visible_pages)
 607         {
 608                 pgcform->relallvisible = (int32) num_all_visible_pages;
 609                 dirty = true;
 610         }
 611         if (pgcform->relhasindex != hasindex)
 612         {
 613                 pgcform->relhasindex = hasindex;
 614                 dirty = true;
 615         }
 616
 617         /*
 618          * If we have discovered that there are no indexes, then there's no
 619          * primary key either.  This could be done more thoroughly...
 620          */
 621         if (pgcform->relhaspkey && !hasindex)
 622         {
 623                 pgcform->relhaspkey = false;
 624                 dirty = true;
 625         }
 626
 627         /* We also clear relhasrules and relhastriggers if needed */
 628         if (pgcform->relhasrules && relation->rd_rules == NULL)
 629         {
 630                 pgcform->relhasrules = false;
 631                 dirty = true;
 632         }
 633         if (pgcform->relhastriggers && relation->trigdesc == NULL)
 634         {
 635                 pgcform->relhastriggers = false;
 636                 dirty = true;
 637         }
 638
 639         /*
 640          * relfrozenxid should never go backward.  Caller can pass
 641          * InvalidTransactionId if it has no new data.
 642          */
 643         if (TransactionIdIsNormal(frozenxid) &&
 644                 TransactionIdPrecedes(pgcform->relfrozenxid, frozenxid))
 645         {
 646                 pgcform->relfrozenxid = frozenxid;
 647                 dirty = true;
 648         }
 649
 650         /* If anything changed, write out the tuple. */
 651         if (dirty)
 652                 heap_inplace_update(rd, ctup);
 653
 654         heap_close(rd, RowExclusiveLock);
 655 }
 656
 657
 658 /*
 659  *      vac_update_datfrozenxid() -- update pg_database.datfrozenxid for our DB
 660  *
 661  *              Update pg_database's datfrozenxid entry for our database to be the
 662  *              minimum of the pg_class.relfrozenxid values.  If we are able to
 663  *              advance pg_database.datfrozenxid, also try to truncate pg_clog.
 664  *
 665  *              We violate transaction semantics here by overwriting the database's
 666  *              existing pg_database tuple with the new value.  This is reasonably
 667  *              safe since the new value is correct whether or not this transaction
 668  *              commits.  As with vac_update_relstats, this avoids leaving dead tuples
 669  *              behind after a VACUUM.
 670  */
 671 void
 672 vac_update_datfrozenxid(void)
 673 {
 674         HeapTuple       tuple;
 675         Form_pg_database dbform;
 676         Relation        relation;
 677         SysScanDesc scan;
 678         HeapTuple       classTup;
 679         TransactionId newFrozenXid;
 680         bool            dirty = false;
 681
 682         /*
 683          * Initialize the "min" calculation with GetOldestXmin, which is a
 684          * reasonable approximation to the minimum relfrozenxid for not-yet-
 685          * committed pg_class entries for new tables; see AddNewRelationTuple().
 686          * Se we cannot produce a wrong minimum by starting with this.
 687          */
 688         newFrozenXid = GetOldestXmin(true, true);
 689
 690         /*
 691          * We must seqscan pg_class to find the minimum Xid, because there is no
 692          * index that can help us here.
 693          */
 694         relation = heap_open(RelationRelationId, AccessShareLock);
 695
 696         scan = systable_beginscan(relation, InvalidOid, false,
 697                                                           SnapshotNow, 0, NULL);
 698
 699         while ((classTup = systable_getnext(scan)) != NULL)
 700         {
 701                 Form_pg_class classForm = (Form_pg_class) GETSTRUCT(classTup);
 702
 703                 /*
 704                  * Only consider heap and TOAST tables (anything else should have
 705                  * InvalidTransactionId in relfrozenxid anyway.)
 706                  */
 707                 if (classForm->relkind != RELKIND_RELATION &&
 708                         classForm->relkind != RELKIND_TOASTVALUE)
 709                         continue;
 710
 711                 Assert(TransactionIdIsNormal(classForm->relfrozenxid));
 712
 713                 if (TransactionIdPrecedes(classForm->relfrozenxid, newFrozenXid))
 714                         newFrozenXid = classForm->relfrozenxid;
 715         }
 716
 717         /* we're done with pg_class */
 718         systable_endscan(scan);
 719         heap_close(relation, AccessShareLock);
 720
 721         Assert(TransactionIdIsNormal(newFrozenXid));
 722
 723         /* Now fetch the pg_database tuple we need to update. */
 724         relation = heap_open(DatabaseRelationId, RowExclusiveLock);
 725
 726         /* Fetch a copy of the tuple to scribble on */
 727         tuple = SearchSysCacheCopy1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
 728         if (!HeapTupleIsValid(tuple))
 729                 elog(ERROR, "could not find tuple for database %u", MyDatabaseId);
 730         dbform = (Form_pg_database) GETSTRUCT(tuple);
 731
 732         /*
 733          * Don't allow datfrozenxid to go backward (probably can't happen anyway);
 734          * and detect the common case where it doesn't go forward either.
 735          */
 736         if (TransactionIdPrecedes(dbform->datfrozenxid, newFrozenXid))
 737         {
 738                 dbform->datfrozenxid = newFrozenXid;
 739                 dirty = true;
 740         }
 741
 742         if (dirty)
 743                 heap_inplace_update(relation, tuple);
 744
 745         heap_freetuple(tuple);
 746         heap_close(relation, RowExclusiveLock);
 747
 748         /*
 749          * If we were able to advance datfrozenxid, see if we can truncate
 750          * pg_clog. Also do it if the shared XID-wrap-limit info is stale, since
 751          * this action will update that too.
 752          */
 753         if (dirty || ForceTransactionIdLimitUpdate())
 754                 vac_truncate_clog(newFrozenXid);
 755 }
 756
 757
 758 /*
 759  *      vac_truncate_clog() -- attempt to truncate the commit log
 760  *
 761  *              Scan pg_database to determine the system-wide oldest datfrozenxid,
 762  *              and use it to truncate the transaction commit log (pg_clog).
 763  *              Also update the XID wrap limit info maintained by varsup.c.
 764  *
 765  *              The passed XID is simply the one I just wrote into my pg_database
 766  *              entry.  It's used to initialize the "min" calculation.
 767  *
 768  *              This routine is only invoked when we've managed to change our
 769  *              DB's datfrozenxid entry, or we found that the shared XID-wrap-limit
 770  *              info is stale.
 771  */
 772 static void
 773 vac_truncate_clog(TransactionId frozenXID)
 774 {
 775         TransactionId myXID = GetCurrentTransactionId();
 776         Relation        relation;
 777         HeapScanDesc scan;
 778         HeapTuple       tuple;
 779         Oid                     oldest_datoid;
 780         bool            frozenAlreadyWrapped = false;
 781
 782         /* init oldest_datoid to sync with my frozenXID */
 783         oldest_datoid = MyDatabaseId;
 784
 785         /*
 786          * Scan pg_database to compute the minimum datfrozenxid
 787          *
 788          * Note: we need not worry about a race condition with new entries being
 789          * inserted by CREATE DATABASE.  Any such entry will have a copy of some
 790          * existing DB's datfrozenxid, and that source DB cannot be ours because
 791          * of the interlock against copying a DB containing an active backend.
 792          * Hence the new entry will not reduce the minimum.  Also, if two VACUUMs
 793          * concurrently modify the datfrozenxid's of different databases, the
 794          * worst possible outcome is that pg_clog is not truncated as aggressively
 795          * as it could be.
 796          */
 797         relation = heap_open(DatabaseRelationId, AccessShareLock);
 798
 799         scan = heap_beginscan(relation, SnapshotNow, 0, NULL);
 800
 801         while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
 802         {
 803                 Form_pg_database dbform = (Form_pg_database) GETSTRUCT(tuple);
 804
 805                 Assert(TransactionIdIsNormal(dbform->datfrozenxid));
 806
 807                 if (TransactionIdPrecedes(myXID, dbform->datfrozenxid))
 808                         frozenAlreadyWrapped = true;
 809                 else if (TransactionIdPrecedes(dbform->datfrozenxid, frozenXID))
 810                 {
 811                         frozenXID = dbform->datfrozenxid;
 812                         oldest_datoid = HeapTupleGetOid(tuple);
 813                 }
 814         }
 815
 816         heap_endscan(scan);
 817
 818         heap_close(relation, AccessShareLock);
 819
 820         /*
 821          * Do not truncate CLOG if we seem to have suffered wraparound already;
 822          * the computed minimum XID might be bogus.  This case should now be
 823          * impossible due to the defenses in GetNewTransactionId, but we keep the
 824          * test anyway.
 825          */
 826         if (frozenAlreadyWrapped)
 827         {
 828                 ereport(WARNING,
 829                                 (errmsg("some databases have not been vacuumed in over 2 billion transactions"),
 830                                  errdetail("You might have already suffered transaction-wraparound data loss.")));
 831                 return;
 832         }
 833
 834         /* Truncate CLOG to the oldest frozenxid */
 835         TruncateCLOG(frozenXID);
 836
 837         /*
 838          * Update the wrap limit for GetNewTransactionId.  Note: this function
 839          * will also signal the postmaster for an(other) autovac cycle if needed.
 840          */
 841         SetTransactionIdLimit(frozenXID, oldest_datoid);
 842 }
 843
 844
 845 /*
 846  *      vacuum_rel() -- vacuum one heap relation
 847  *
 848  *              Doing one heap at a time incurs extra overhead, since we need to
 849  *              check that the heap exists again just before we vacuum it.      The
 850  *              reason that we do this is so that vacuuming can be spread across
 851  *              many small transactions.  Otherwise, two-phase locking would require
 852  *              us to lock the entire database during one pass of the vacuum cleaner.
 853  *
 854  *              At entry and exit, we are not inside a transaction.
 855  */
 856 static bool
 857 vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound)
 858 {
 859         LOCKMODE        lmode;
 860         Relation        onerel;
 861         LockRelId       onerelid;
 862         Oid                     toast_relid;
 863         Oid                     save_userid;
 864         int                     save_sec_context;
 865         int                     save_nestlevel;
 866
 867         /* Begin a transaction for vacuuming this relation */
 868         StartTransactionCommand();
 869
 870         /*
 871          * Functions in indexes may want a snapshot set.  Also, setting a snapshot
 872          * ensures that RecentGlobalXmin is kept truly recent.
 873          */
 874         PushActiveSnapshot(GetTransactionSnapshot());
 875
 876         if (!(vacstmt->options & VACOPT_FULL))
 877         {
 878                 /*
 879                  * In lazy vacuum, we can set the PROC_IN_VACUUM flag, which lets
 880                  * other concurrent VACUUMs know that they can ignore this one while
 881                  * determining their OldestXmin.  (The reason we don't set it during a
 882                  * full VACUUM is exactly that we may have to run user-defined
 883                  * functions for functional indexes, and we want to make sure that if
 884                  * they use the snapshot set above, any tuples it requires can't get
 885                  * removed from other tables.  An index function that depends on the
 886                  * contents of other tables is arguably broken, but we won't break it
 887                  * here by violating transaction semantics.)
 888                  *
 889                  * We also set the VACUUM_FOR_WRAPAROUND flag, which is passed down by
 890                  * autovacuum; it's used to avoid canceling a vacuum that was invoked
 891                  * in an emergency.
 892                  *
 893                  * Note: these flags remain set until CommitTransaction or
 894                  * AbortTransaction.  We don't want to clear them until we reset
 895                  * MyProc->xid/xmin, else OldestXmin might appear to go backwards,
 896                  * which is probably Not Good.
 897                  */
 898                 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
 899                 MyPgXact->vacuumFlags |= PROC_IN_VACUUM;
 900                 if (for_wraparound)
 901                         MyPgXact->vacuumFlags |= PROC_VACUUM_FOR_WRAPAROUND;
 902                 LWLockRelease(ProcArrayLock);
 903         }
 904
 905         /*
 906          * Check for user-requested abort.      Note we want this to be inside a
 907          * transaction, so xact.c doesn't issue useless WARNING.
 908          */
 909         CHECK_FOR_INTERRUPTS();
 910
 911         /*
 912          * Determine the type of lock we want --- hard exclusive lock for a FULL
 913          * vacuum, but just ShareUpdateExclusiveLock for concurrent vacuum. Either
 914          * way, we can be sure that no other backend is vacuuming the same table.
 915          */
 916         lmode = (vacstmt->options & VACOPT_FULL) ? AccessExclusiveLock : ShareUpdateExclusiveLock;
 917
 918         /*
 919          * Open the relation and get the appropriate lock on it.
 920          *
 921          * There's a race condition here: the rel may have gone away since the
 922          * last time we saw it.  If so, we don't need to vacuum it.
 923          *
 924          * If we've been asked not to wait for the relation lock, acquire it first
 925          * in non-blocking mode, before calling try_relation_open().
 926          */
 927         if (!(vacstmt->options & VACOPT_NOWAIT))
 928                 onerel = try_relation_open(relid, lmode);
 929         else if (ConditionalLockRelationOid(relid, lmode))
 930                 onerel = try_relation_open(relid, NoLock);
 931         else
 932         {
 933                 onerel = NULL;
 934                 if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration >= 0)
 935                         ereport(LOG,
 936                                         (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
 937                                    errmsg("skipping vacuum of \"%s\" --- lock not available",
 938                                                   vacstmt->relation->relname)));
 939         }
 940
 941         if (!onerel)
 942         {
 943                 PopActiveSnapshot();
 944                 CommitTransactionCommand();
 945                 return false;
 946         }
 947
 948         /*
 949          * Check permissions.
 950          *
 951          * We allow the user to vacuum a table if he is superuser, the table
 952          * owner, or the database owner (but in the latter case, only if it's not
 953          * a shared relation).  pg_class_ownercheck includes the superuser case.
 954          *
 955          * Note we choose to treat permissions failure as a WARNING and keep
 956          * trying to vacuum the rest of the DB --- is this appropriate?
 957          */
 958         if (!(pg_class_ownercheck(RelationGetRelid(onerel), GetUserId()) ||
 959                   (pg_database_ownercheck(MyDatabaseId, GetUserId()) && !onerel->rd_rel->relisshared)))
 960         {
 961                 if (onerel->rd_rel->relisshared)
 962                         ereport(WARNING,
 963                                   (errmsg("skipping \"%s\" --- only superuser can vacuum it",
 964                                                   RelationGetRelationName(onerel))));
 965                 else if (onerel->rd_rel->relnamespace == PG_CATALOG_NAMESPACE)
 966                         ereport(WARNING,
 967                                         (errmsg("skipping \"%s\" --- only superuser or database owner can vacuum it",
 968                                                         RelationGetRelationName(onerel))));
 969                 else
 970                         ereport(WARNING,
 971                                         (errmsg("skipping \"%s\" --- only table or database owner can vacuum it",
 972                                                         RelationGetRelationName(onerel))));
 973                 relation_close(onerel, lmode);
 974                 PopActiveSnapshot();
 975                 CommitTransactionCommand();
 976                 return false;
 977         }
 978
 979         /*
 980          * Check that it's a vacuumable table; we used to do this in
 981          * get_rel_oids() but seems safer to check after we've locked the
 982          * relation.
 983          */
 984         if (onerel->rd_rel->relkind != RELKIND_RELATION &&
 985                 onerel->rd_rel->relkind != RELKIND_TOASTVALUE)
 986         {
 987                 ereport(WARNING,
 988                                 (errmsg("skipping \"%s\" --- cannot vacuum non-tables or special system tables",
 989                                                 RelationGetRelationName(onerel))));
 990                 relation_close(onerel, lmode);
 991                 PopActiveSnapshot();
 992                 CommitTransactionCommand();
 993                 return false;
 994         }
 995
 996         /*
 997          * Silently ignore tables that are temp tables of other backends ---
 998          * trying to vacuum these will lead to great unhappiness, since their
 999          * contents are probably not up-to-date on disk.  (We don't throw a
1000          * warning here; it would just lead to chatter during a database-wide
1001          * VACUUM.)
1002          */
1003         if (RELATION_IS_OTHER_TEMP(onerel))
1004         {
1005                 relation_close(onerel, lmode);
1006                 PopActiveSnapshot();
1007                 CommitTransactionCommand();
1008                 return false;
1009         }
1010
1011         /*
1012          * Get a session-level lock too. This will protect our access to the
1013          * relation across multiple transactions, so that we can vacuum the
1014          * relation's TOAST table (if any) secure in the knowledge that no one is
1015          * deleting the parent relation.
1016          *
1017          * NOTE: this cannot block, even if someone else is waiting for access,
1018          * because the lock manager knows that both lock requests are from the
1019          * same process.
1020          */
1021         onerelid = onerel->rd_lockInfo.lockRelId;
1022         LockRelationIdForSession(&onerelid, lmode);
1023
1024         /*
1025          * Remember the relation's TOAST relation for later, if the caller asked
1026          * us to process it.  In VACUUM FULL, though, the toast table is
1027          * automatically rebuilt by cluster_rel so we shouldn't recurse to it.
1028          */
1029         if (do_toast && !(vacstmt->options & VACOPT_FULL))
1030                 toast_relid = onerel->rd_rel->reltoastrelid;
1031         else
1032                 toast_relid = InvalidOid;
1033
1034         /*
1035          * Switch to the table owner's userid, so that any index functions are run
1036          * as that user.  Also lock down security-restricted operations and
1037          * arrange to make GUC variable changes local to this command. (This is
1038          * unnecessary, but harmless, for lazy VACUUM.)
1039          */
1040         GetUserIdAndSecContext(&save_userid, &save_sec_context);
1041         SetUserIdAndSecContext(onerel->rd_rel->relowner,
1042                                                    save_sec_context | SECURITY_RESTRICTED_OPERATION);
1043         save_nestlevel = NewGUCNestLevel();
1044
1045         /*
1046          * Do the actual work --- either FULL or "lazy" vacuum
1047          */
1048         if (vacstmt->options & VACOPT_FULL)
1049         {
1050                 /* close relation before vacuuming, but hold lock until commit */
1051                 relation_close(onerel, NoLock);
1052                 onerel = NULL;
1053
1054                 /* VACUUM FULL is now a variant of CLUSTER; see cluster.c */
1055                 cluster_rel(relid, InvalidOid, false,
1056                                         (vacstmt->options & VACOPT_VERBOSE) != 0,
1057                                         vacstmt->freeze_min_age, vacstmt->freeze_table_age);
1058         }
1059         else
1060                 lazy_vacuum_rel(onerel, vacstmt, vac_strategy);
1061
1062         /* Roll back any GUC changes executed by index functions */
1063         AtEOXact_GUC(false, save_nestlevel);
1064
1065         /* Restore userid and security context */
1066         SetUserIdAndSecContext(save_userid, save_sec_context);
1067
1068         /* all done with this class, but hold lock until commit */
1069         if (onerel)
1070                 relation_close(onerel, NoLock);
1071
1072         /*
1073          * Complete the transaction and free all temporary memory used.
1074          */
1075         PopActiveSnapshot();
1076         CommitTransactionCommand();
1077
1078         /*
1079          * If the relation has a secondary toast rel, vacuum that too while we
1080          * still hold the session lock on the master table.  Note however that
1081          * "analyze" will not get done on the toast table.      This is good, because
1082          * the toaster always uses hardcoded index access and statistics are
1083          * totally unimportant for toast relations.
1084          */
1085         if (toast_relid != InvalidOid)
1086                 vacuum_rel(toast_relid, vacstmt, false, for_wraparound);
1087
1088         /*
1089          * Now release the session-level lock on the master table.
1090          */
1091         UnlockRelationIdForSession(&onerelid, lmode);
1092
1093         /* Report that we really did it. */
1094         return true;
1095 }
1096
1097
1098 /*
1099  * Open all the indexes of the given relation, obtaining the specified kind
1100  * of lock on each.  Return an array of Relation pointers for the indexes
1101  * into *Irel, and the number of indexes into *nindexes.
1102  */
1103 void
1104 vac_open_indexes(Relation relation, LOCKMODE lockmode,
1105                                  int *nindexes, Relation **Irel)
1106 {
1107         List       *indexoidlist;
1108         ListCell   *indexoidscan;
1109         int                     i;
1110
1111         Assert(lockmode != NoLock);
1112
1113         indexoidlist = RelationGetIndexList(relation);
1114
1115         *nindexes = list_length(indexoidlist);
1116
1117         if (*nindexes > 0)
1118                 *Irel = (Relation *) palloc(*nindexes * sizeof(Relation));
1119         else
1120                 *Irel = NULL;
1121
1122         i = 0;
1123         foreach(indexoidscan, indexoidlist)
1124         {
1125                 Oid                     indexoid = lfirst_oid(indexoidscan);
1126
1127                 (*Irel)[i++] = index_open(indexoid, lockmode);
1128         }
1129
1130         list_free(indexoidlist);
1131 }
1132
1133 /*
1134  * Release the resources acquired by vac_open_indexes.  Optionally release
1135  * the locks (say NoLock to keep 'em).
1136  */
1137 void
1138 vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
1139 {
1140         if (Irel == NULL)
1141                 return;
1142
1143         while (nindexes--)
1144         {
1145                 Relation        ind = Irel[nindexes];
1146
1147                 index_close(ind, lockmode);
1148         }
1149         pfree(Irel);
1150 }
1151
1152 /*
1153  * vacuum_delay_point --- check for interrupts and cost-based delay.
1154  *
1155  * This should be called in each major loop of VACUUM processing,
1156  * typically once per page processed.
1157  */
1158 void
1159 vacuum_delay_point(void)
1160 {
1161         /* Always check for interrupts */
1162         CHECK_FOR_INTERRUPTS();
1163
1164         /* Nap if appropriate */
1165         if (VacuumCostActive && !InterruptPending &&
1166                 VacuumCostBalance >= VacuumCostLimit)
1167         {
1168                 int                     msec;
1169
1170                 msec = VacuumCostDelay * VacuumCostBalance / VacuumCostLimit;
1171                 if (msec > VacuumCostDelay * 4)
1172                         msec = VacuumCostDelay * 4;
1173
1174                 pg_usleep(msec * 1000L);
1175
1176                 VacuumCostBalance = 0;
1177
1178                 /* update balance values for workers */
1179                 AutoVacuumUpdateDelay();
1180
1181                 /* Might have gotten an interrupt while sleeping */
1182                 CHECK_FOR_INTERRUPTS();
1183         }
1184 }