Have multixact be truncated by checkpoint, not vacuum

[postgresql] / src / backend / commands / vacuum.c
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c

index 90c413a9880f671981c8e45196029189a826f408..8822a154dccee7552403b1b6bb847c741219b9a4 100644 (file)
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -9,7 +9,7 @@
   * in cluster.c.
   *
   *
- * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   *
@@ -20,9 +20,13 @@
   */
  #include "postgres.h"
  
+#include <math.h>
+
  #include "access/clog.h"
  #include "access/genam.h"
  #include "access/heapam.h"
+#include "access/htup_details.h"
+#include "access/multixact.h"
  #include "access/transam.h"
  #include "access/xact.h"
  #include "catalog/namespace.h"
@@ -51,6 +55,8 @@
   */
  int                    vacuum_freeze_min_age;
  int                    vacuum_freeze_table_age;
+int                    vacuum_multixact_freeze_min_age;
+int                    vacuum_multixact_freeze_table_age;
  
  
  /* A few variables that don't seem worth passing around as parameters */
@@ -60,9 +66,9 @@ static BufferAccessStrategy vac_strategy;
  
  /* non-export function prototypes */
  static List *get_rel_oids(Oid relid, const RangeVar *vacrel);
-static void vac_truncate_clog(TransactionId frozenXID);
+static void vac_truncate_clog(TransactionId frozenXID, MultiXactId minMulti);
  static bool vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast,
-                  bool for_wraparound, bool *scanned_all);
+                  bool for_wraparound);
  
  
  /*
@@ -76,7 +82,7 @@ static bool vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast,
   * tables separately.
   *
   * for_wraparound is used by autovacuum to let us know when it's forcing
- * a vacuum for wraparound, which should not be auto-cancelled.
+ * a vacuum for wraparound, which should not be auto-canceled.
   *
   * bstrategy is normally given as NULL, but in autovacuum it can be passed
   * in to use the same buffer strategy object across multiple vacuum() calls.
@@ -92,8 +98,7 @@ vacuum(VacuumStmt *vacstmt, Oid relid, bool do_toast,
            BufferAccessStrategy bstrategy, bool for_wraparound, bool isTopLevel)
  {
         const char *stmttype;
-       volatile bool all_rels,
-                               in_outer_xact,
+       volatile bool in_outer_xact,
                                 use_own_xacts;
         List       *relations;
  
@@ -153,9 +158,6 @@ vacuum(VacuumStmt *vacstmt, Oid relid, bool do_toast,
         }
         vac_strategy = bstrategy;
  
-       /* Remember whether we are processing everything in the DB */
-       all_rels = (!OidIsValid(relid) && vacstmt->relation == NULL);
-
         /*
          * Build list of relations to process, unless caller gave us one. (If we
          * build one, we put it in vac_context for safekeeping.)
@@ -216,6 +218,9 @@ vacuum(VacuumStmt *vacstmt, Oid relid, bool do_toast,
  
                 VacuumCostActive = (VacuumCostDelay > 0);
                 VacuumCostBalance = 0;
+               VacuumPageHit = 0;
+               VacuumPageMiss = 0;
+               VacuumPageDirty = 0;
  
                 /*
                  * Loop to process each selected relation.
@@ -223,12 +228,10 @@ vacuum(VacuumStmt *vacstmt, Oid relid, bool do_toast,
                 foreach(cur, relations)
                 {
                         Oid                     relid = lfirst_oid(cur);
-                       bool            scanned_all = false;
  
                         if (vacstmt->options & VACOPT_VACUUM)
                         {
-                               if (!vacuum_rel(relid, vacstmt, do_toast, for_wraparound,
-                                                               &scanned_all))
+                               if (!vacuum_rel(relid, vacstmt, do_toast, for_wraparound))
                                         continue;
                         }
  
@@ -245,7 +248,7 @@ vacuum(VacuumStmt *vacstmt, Oid relid, bool do_toast,
                                         PushActiveSnapshot(GetTransactionSnapshot());
                                 }
  
-                               analyze_rel(relid, vacstmt, vac_strategy, !scanned_all);
+                               analyze_rel(relid, vacstmt, vac_strategy);
  
                                 if (use_own_xacts)
                                 {
@@ -322,7 +325,16 @@ get_rel_oids(Oid relid, const RangeVar *vacrel)
                 /* Process a specific relation */
                 Oid                     relid;
  
-               relid = RangeVarGetRelid(vacrel, false);
+               /*
+                * Since we don't take a lock here, the relation might be gone, or the
+                * RangeVar might no longer refer to the OID we look up here.  In the
+                * former case, VACUUM will do nothing; in the latter case, it will
+                * process the OID we looked up here, rather than the new one. Neither
+                * is ideal, but there's little practical alternative, since we're
+                * going to commit this transaction and begin a new one between now
+                * and then.
+                */
+               relid = RangeVarGetRelid(vacrel, NoLock, false);
  
                 /* Make a relation list entry for this guy */
                 oldcontext = MemoryContextSwitchTo(vac_context);
@@ -331,23 +343,26 @@ get_rel_oids(Oid relid, const RangeVar *vacrel)
         }
         else
         {
-               /* Process all plain relations listed in pg_class */
+               /*
+                * Process all plain relations and materialized views listed in
+                * pg_class
+                */
                 Relation        pgclass;
                 HeapScanDesc scan;
                 HeapTuple       tuple;
-               ScanKeyData key;
-
-               ScanKeyInit(&key,
-                                       Anum_pg_class_relkind,
-                                       BTEqualStrategyNumber, F_CHAREQ,
-                                       CharGetDatum(RELKIND_RELATION));
  
                 pgclass = heap_open(RelationRelationId, AccessShareLock);
  
-               scan = heap_beginscan(pgclass, SnapshotNow, 1, &key);
+               scan = heap_beginscan_catalog(pgclass, 0, NULL);
  
                 while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
                 {
+                       Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
+
+                       if (classForm->relkind != RELKIND_RELATION &&
+                               classForm->relkind != RELKIND_MATVIEW)
+                               continue;
+
                         /* Make a relation list entry for this guy */
                         oldcontext = MemoryContextSwitchTo(vac_context);
                         oid_list = lappend_oid(oid_list, HeapTupleGetOid(tuple));
@@ -363,29 +378,54 @@ get_rel_oids(Oid relid, const RangeVar *vacrel)
  
  /*
   * vacuum_set_xid_limits() -- compute oldest-Xmin and freeze cutoff points
+ *
+ * The output parameters are:
+ * - oldestXmin is the cutoff value used to distinguish whether tuples are
+ *      DEAD or RECENTLY_DEAD (see HeapTupleSatisfiesVacuum).
+ * - freezeLimit is the Xid below which all Xids are replaced by
+ *      FrozenTransactionId during vacuum.
+ * - xidFullScanLimit (computed from table_freeze_age parameter)
+ *      represents a minimum Xid value; a table whose relfrozenxid is older than
+ *      this will have a full-table vacuum applied to it, to freeze tuples across
+ *      the whole table.  Vacuuming a table younger than this value can use a
+ *      partial scan.
+ * - multiXactCutoff is the value below which all MultiXactIds are removed from
+ *      Xmax.
+ * - mxactFullScanLimit is a value against which a table's relminmxid value is
+ *      compared to produce a full-table vacuum, as with xidFullScanLimit.
+ *
+ * xidFullScanLimit and mxactFullScanLimit can be passed as NULL if caller is
+ * not interested.
   */
  void
-vacuum_set_xid_limits(int freeze_min_age,
+vacuum_set_xid_limits(Relation rel,
+                                         int freeze_min_age,
                                           int freeze_table_age,
-                                         bool sharedRel,
+                                         int multixact_freeze_min_age,
+                                         int multixact_freeze_table_age,
                                           TransactionId *oldestXmin,
                                           TransactionId *freezeLimit,
-                                         TransactionId *freezeTableLimit)
+                                         TransactionId *xidFullScanLimit,
+                                         MultiXactId *multiXactCutoff,
+                                         MultiXactId *mxactFullScanLimit)
  {
         int                     freezemin;
+       int                     mxid_freezemin;
         TransactionId limit;
         TransactionId safeLimit;
+       MultiXactId mxactLimit;
+       MultiXactId safeMxactLimit;
  
         /*
-        * We can always ignore processes running lazy vacuum.  This is because we
+        * We can always ignore processes running lazy vacuum.  This is because we
          * use these values only for deciding which tuples we must keep in the
-        * tables.      Since lazy vacuum doesn't write its XID anywhere, it's safe to
+        * tables.  Since lazy vacuum doesn't write its XID anywhere, it's safe to
          * ignore it.  In theory it could be problematic to ignore lazy vacuums in
          * a full vacuum, but keep in mind that only one vacuum process can be
          * working on a particular table at any time, and that each vacuum is
          * always an independent transaction.
          */
-       *oldestXmin = GetOldestXmin(sharedRel, true);
+       *oldestXmin = GetOldestXmin(rel, true);
  
         Assert(TransactionIdIsNormal(*oldestXmin));
  
@@ -427,10 +467,45 @@ vacuum_set_xid_limits(int freeze_min_age,
  
         *freezeLimit = limit;
  
-       if (freezeTableLimit != NULL)
+       /*
+        * Determine the minimum multixact freeze age to use: as specified by
+        * caller, or vacuum_multixact_freeze_min_age, but in any case not more
+        * than half autovacuum_multixact_freeze_max_age, so that autovacuums to
+        * prevent MultiXact wraparound won't occur too frequently.
+        */
+       mxid_freezemin = multixact_freeze_min_age;
+       if (mxid_freezemin < 0)
+               mxid_freezemin = vacuum_multixact_freeze_min_age;
+       mxid_freezemin = Min(mxid_freezemin,
+                                                autovacuum_multixact_freeze_max_age / 2);
+       Assert(mxid_freezemin >= 0);
+
+       /* compute the cutoff multi, being careful to generate a valid value */
+       mxactLimit = GetOldestMultiXactId() - mxid_freezemin;
+       if (mxactLimit < FirstMultiXactId)
+               mxactLimit = FirstMultiXactId;
+
+       safeMxactLimit =
+               ReadNextMultiXactId() - autovacuum_multixact_freeze_max_age;
+       if (safeMxactLimit < FirstMultiXactId)
+               safeMxactLimit = FirstMultiXactId;
+
+       if (MultiXactIdPrecedes(mxactLimit, safeMxactLimit))
+       {
+               ereport(WARNING,
+                               (errmsg("oldest multixact is far in the past"),
+                                errhint("Close open transactions with multixacts soon to avoid wraparound problems.")));
+               mxactLimit = safeMxactLimit;
+       }
+
+       *multiXactCutoff = mxactLimit;
+
+       if (xidFullScanLimit != NULL)
         {
                 int                     freezetable;
  
+               Assert(mxactFullScanLimit != NULL);
+
                 /*
                  * Determine the table freeze age to use: as specified by the caller,
                  * or vacuum_freeze_table_age, but in any case not more than
@@ -438,22 +513,125 @@ vacuum_set_xid_limits(int freeze_min_age,
                  * VACUUM schedule, the nightly VACUUM gets a chance to freeze tuples
                  * before anti-wraparound autovacuum is launched.
                  */
-               freezetable = freeze_min_age;
+               freezetable = freeze_table_age;
                 if (freezetable < 0)
                         freezetable = vacuum_freeze_table_age;
                 freezetable = Min(freezetable, autovacuum_freeze_max_age * 0.95);
                 Assert(freezetable >= 0);
  
                 /*
-                * Compute the cutoff XID, being careful not to generate a "permanent"
-                * XID.
+                * Compute XID limit causing a full-table vacuum, being careful not to
+                * generate a "permanent" XID.
                  */
                 limit = ReadNewTransactionId() - freezetable;
                 if (!TransactionIdIsNormal(limit))
                         limit = FirstNormalTransactionId;
  
-               *freezeTableLimit = limit;
+               *xidFullScanLimit = limit;
+
+               /*
+                * Similar to the above, determine the table freeze age to use for
+                * multixacts: as specified by the caller, or
+                * vacuum_multixact_freeze_table_age, but in any case not more than
+                * autovacuum_multixact_freeze_table_age * 0.95, so that if you have
+                * e.g. nightly VACUUM schedule, the nightly VACUUM gets a chance to
+                * freeze multixacts before anti-wraparound autovacuum is launched.
+                */
+               freezetable = multixact_freeze_table_age;
+               if (freezetable < 0)
+                       freezetable = vacuum_multixact_freeze_table_age;
+               freezetable = Min(freezetable,
+                                                 autovacuum_multixact_freeze_max_age * 0.95);
+               Assert(freezetable >= 0);
+
+               /*
+                * Compute MultiXact limit causing a full-table vacuum, being careful
+                * to generate a valid MultiXact value.
+                */
+               mxactLimit = ReadNextMultiXactId() - freezetable;
+               if (mxactLimit < FirstMultiXactId)
+                       mxactLimit = FirstMultiXactId;
+
+               *mxactFullScanLimit = mxactLimit;
         }
+       else
+       {
+               Assert(mxactFullScanLimit == NULL);
+       }
+}
+
+/*
+ * vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples
+ *
+ *             If we scanned the whole relation then we should just use the count of
+ *             live tuples seen; but if we did not, we should not trust the count
+ *             unreservedly, especially not in VACUUM, which may have scanned a quite
+ *             nonrandom subset of the table.  When we have only partial information,
+ *             we take the old value of pg_class.reltuples as a measurement of the
+ *             tuple density in the unscanned pages.
+ *
+ *             This routine is shared by VACUUM and ANALYZE.
+ */
+double
+vac_estimate_reltuples(Relation relation, bool is_analyze,
+                                          BlockNumber total_pages,
+                                          BlockNumber scanned_pages,
+                                          double scanned_tuples)
+{
+       BlockNumber old_rel_pages = relation->rd_rel->relpages;
+       double          old_rel_tuples = relation->rd_rel->reltuples;
+       double          old_density;
+       double          new_density;
+       double          multiplier;
+       double          updated_density;
+
+       /* If we did scan the whole table, just use the count as-is */
+       if (scanned_pages >= total_pages)
+               return scanned_tuples;
+
+       /*
+        * If scanned_pages is zero but total_pages isn't, keep the existing value
+        * of reltuples.  (Note: callers should avoid updating the pg_class
+        * statistics in this situation, since no new information has been
+        * provided.)
+        */
+       if (scanned_pages == 0)
+               return old_rel_tuples;
+
+       /*
+        * If old value of relpages is zero, old density is indeterminate; we
+        * can't do much except scale up scanned_tuples to match total_pages.
+        */
+       if (old_rel_pages == 0)
+               return floor((scanned_tuples / scanned_pages) * total_pages + 0.5);
+
+       /*
+        * Okay, we've covered the corner cases.  The normal calculation is to
+        * convert the old measurement to a density (tuples per page), then update
+        * the density using an exponential-moving-average approach, and finally
+        * compute reltuples as updated_density * total_pages.
+        *
+        * For ANALYZE, the moving average multiplier is just the fraction of the
+        * table's pages we scanned.  This is equivalent to assuming that the
+        * tuple density in the unscanned pages didn't change.  Of course, it
+        * probably did, if the new density measurement is different. But over
+        * repeated cycles, the value of reltuples will converge towards the
+        * correct value, if repeated measurements show the same new density.
+        *
+        * For VACUUM, the situation is a bit different: we have looked at a
+        * nonrandom sample of pages, but we know for certain that the pages we
+        * didn't look at are precisely the ones that haven't changed lately.
+        * Thus, there is a reasonable argument for doing exactly the same thing
+        * as for the ANALYZE case, that is use the old density measurement as the
+        * value for the unscanned pages.
+        *
+        * This logic could probably use further refinement.
+        */
+       old_density = old_rel_tuples / old_rel_pages;
+       new_density = scanned_tuples / scanned_pages;
+       multiplier = (double) scanned_pages / (double) total_pages;
+       updated_density = old_density + (new_density - old_density) * multiplier;
+       return floor(updated_density * total_pages + 0.5);
  }
  
  
@@ -484,12 +662,14 @@ vacuum_set_xid_limits(int freeze_min_age,
   *             somebody vacuuming pg_class might think they could delete a tuple
   *             marked with xmin = our xid.
   *
- *             This routine is shared by VACUUM and stand-alone ANALYZE.
+ *             This routine is shared by VACUUM and ANALYZE.
   */
  void
  vac_update_relstats(Relation relation,
                                         BlockNumber num_pages, double num_tuples,
-                                       bool hasindex, TransactionId frozenxid)
+                                       BlockNumber num_all_visible_pages,
+                                       bool hasindex, TransactionId frozenxid,
+                                       MultiXactId minmulti)
  {
         Oid                     relid = RelationGetRelid(relation);
         Relation        rd;
@@ -519,6 +699,11 @@ vac_update_relstats(Relation relation,
                 pgcform->reltuples = (float4) num_tuples;
                 dirty = true;
         }
+       if (pgcform->relallvisible != (int32) num_all_visible_pages)
+       {
+               pgcform->relallvisible = (int32) num_all_visible_pages;
+               dirty = true;
+       }
         if (pgcform->relhasindex != hasindex)
         {
                 pgcform->relhasindex = hasindex;
@@ -527,7 +712,7 @@ vac_update_relstats(Relation relation,
  
         /*
          * If we have discovered that there are no indexes, then there's no
-        * primary key either.  This could be done more thoroughly...
+        * primary key either.  This could be done more thoroughly...
          */
         if (pgcform->relhaspkey && !hasindex)
         {
@@ -558,6 +743,14 @@ vac_update_relstats(Relation relation,
                 dirty = true;
         }
  
+       /* relminmxid must never go backward, either */
+       if (MultiXactIdIsValid(minmulti) &&
+               MultiXactIdPrecedes(pgcform->relminmxid, minmulti))
+       {
+               pgcform->relminmxid = minmulti;
+               dirty = true;
+       }
+
         /* If anything changed, write out the tuple. */
         if (dirty)
                 heap_inplace_update(rd, ctup);
@@ -570,11 +763,16 @@ vac_update_relstats(Relation relation,
   *     vac_update_datfrozenxid() -- update pg_database.datfrozenxid for our DB
   *
   *             Update pg_database's datfrozenxid entry for our database to be the
- *             minimum of the pg_class.relfrozenxid values.  If we are able to
- *             advance pg_database.datfrozenxid, also try to truncate pg_clog.
+ *             minimum of the pg_class.relfrozenxid values.
+ *
+ *             Similarly, update our datminmxid to be the minimum of the
+ *             pg_class.relminmxid values.
+ *
+ *             If we are able to advance either pg_database value, also try to
+ *             truncate pg_clog and pg_multixact.
   *
   *             We violate transaction semantics here by overwriting the database's
- *             existing pg_database tuple with the new value.  This is reasonably
+ *             existing pg_database tuple with the new value.  This is reasonably
   *             safe since the new value is correct whether or not this transaction
   *             commits.  As with vac_update_relstats, this avoids leaving dead tuples
   *             behind after a VACUUM.
@@ -588,15 +786,22 @@ vac_update_datfrozenxid(void)
         SysScanDesc scan;
         HeapTuple       classTup;
         TransactionId newFrozenXid;
+       MultiXactId newMinMulti;
         bool            dirty = false;
  
         /*
          * Initialize the "min" calculation with GetOldestXmin, which is a
          * reasonable approximation to the minimum relfrozenxid for not-yet-
          * committed pg_class entries for new tables; see AddNewRelationTuple().
-        * Se we cannot produce a wrong minimum by starting with this.
+        * So we cannot produce a wrong minimum by starting with this.
          */
-       newFrozenXid = GetOldestXmin(true, true);
+       newFrozenXid = GetOldestXmin(NULL, true);
+
+       /*
+        * Similarly, initialize the MultiXact "min" with the value that would be
+        * used on pg_class for new tables.  See AddNewRelationTuple().
+        */
+       newMinMulti = GetOldestMultiXactId();
  
         /*
          * We must seqscan pg_class to find the minimum Xid, because there is no
@@ -605,24 +810,29 @@ vac_update_datfrozenxid(void)
         relation = heap_open(RelationRelationId, AccessShareLock);
  
         scan = systable_beginscan(relation, InvalidOid, false,
-                                                         SnapshotNow, 0, NULL);
+                                                         NULL, 0, NULL);
  
         while ((classTup = systable_getnext(scan)) != NULL)
         {
                 Form_pg_class classForm = (Form_pg_class) GETSTRUCT(classTup);
  
                 /*
-                * Only consider heap and TOAST tables (anything else should have
-                * InvalidTransactionId in relfrozenxid anyway.)
+                * Only consider relations able to hold unfrozen XIDs (anything else
+                * should have InvalidTransactionId in relfrozenxid anyway.)
                  */
                 if (classForm->relkind != RELKIND_RELATION &&
+                       classForm->relkind != RELKIND_MATVIEW &&
                         classForm->relkind != RELKIND_TOASTVALUE)
                         continue;
  
                 Assert(TransactionIdIsNormal(classForm->relfrozenxid));
+               Assert(MultiXactIdIsValid(classForm->relminmxid));
  
                 if (TransactionIdPrecedes(classForm->relfrozenxid, newFrozenXid))
                         newFrozenXid = classForm->relfrozenxid;
+
+               if (MultiXactIdPrecedes(classForm->relminmxid, newMinMulti))
+                       newMinMulti = classForm->relminmxid;
         }
  
         /* we're done with pg_class */
@@ -630,6 +840,7 @@ vac_update_datfrozenxid(void)
         heap_close(relation, AccessShareLock);
  
         Assert(TransactionIdIsNormal(newFrozenXid));
+       Assert(MultiXactIdIsValid(newMinMulti));
  
         /* Now fetch the pg_database tuple we need to update. */
         relation = heap_open(DatabaseRelationId, RowExclusiveLock);
@@ -650,6 +861,13 @@ vac_update_datfrozenxid(void)
                 dirty = true;
         }
  
+       /* ditto */
+       if (MultiXactIdPrecedes(dbform->datminmxid, newMinMulti))
+       {
+               dbform->datminmxid = newMinMulti;
+               dirty = true;
+       }
+
         if (dirty)
                 heap_inplace_update(relation, tuple);
  
@@ -662,7 +880,7 @@ vac_update_datfrozenxid(void)
          * this action will update that too.
          */
         if (dirty || ForceTransactionIdLimitUpdate())
-               vac_truncate_clog(newFrozenXid);
+               vac_truncate_clog(newFrozenXid, newMinMulti);
  }
  
  
@@ -674,24 +892,26 @@ vac_update_datfrozenxid(void)
   *             Also update the XID wrap limit info maintained by varsup.c.
   *
   *             The passed XID is simply the one I just wrote into my pg_database
- *             entry.  It's used to initialize the "min" calculation.
+ *             entry.  It's used to initialize the "min" calculation.
   *
   *             This routine is only invoked when we've managed to change our
   *             DB's datfrozenxid entry, or we found that the shared XID-wrap-limit
   *             info is stale.
   */
  static void
-vac_truncate_clog(TransactionId frozenXID)
+vac_truncate_clog(TransactionId frozenXID, MultiXactId minMulti)
  {
         TransactionId myXID = GetCurrentTransactionId();
         Relation        relation;
         HeapScanDesc scan;
         HeapTuple       tuple;
-       Oid                     oldest_datoid;
+       Oid                     oldestxid_datoid;
+       Oid                     minmulti_datoid;
         bool            frozenAlreadyWrapped = false;
  
-       /* init oldest_datoid to sync with my frozenXID */
-       oldest_datoid = MyDatabaseId;
+       /* init oldest datoids to sync with my frozen values */
+       oldestxid_datoid = MyDatabaseId;
+       minmulti_datoid = MyDatabaseId;
  
         /*
          * Scan pg_database to compute the minimum datfrozenxid
@@ -707,20 +927,27 @@ vac_truncate_clog(TransactionId frozenXID)
          */
         relation = heap_open(DatabaseRelationId, AccessShareLock);
  
-       scan = heap_beginscan(relation, SnapshotNow, 0, NULL);
+       scan = heap_beginscan_catalog(relation, 0, NULL);
  
         while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
         {
                 Form_pg_database dbform = (Form_pg_database) GETSTRUCT(tuple);
  
                 Assert(TransactionIdIsNormal(dbform->datfrozenxid));
+               Assert(MultiXactIdIsValid(dbform->datminmxid));
  
                 if (TransactionIdPrecedes(myXID, dbform->datfrozenxid))
                         frozenAlreadyWrapped = true;
                 else if (TransactionIdPrecedes(dbform->datfrozenxid, frozenXID))
                 {
                         frozenXID = dbform->datfrozenxid;
-                       oldest_datoid = HeapTupleGetOid(tuple);
+                       oldestxid_datoid = HeapTupleGetOid(tuple);
+               }
+
+               if (MultiXactIdPrecedes(dbform->datminmxid, minMulti))
+               {
+                       minMulti = dbform->datminmxid;
+                       minmulti_datoid = HeapTupleGetOid(tuple);
                 }
         }
  
@@ -742,14 +969,20 @@ vac_truncate_clog(TransactionId frozenXID)
                 return;
         }
  
-       /* Truncate CLOG to the oldest frozenxid */
+       /*
+        * Truncate CLOG to the oldest computed value.  Note we don't truncate
+        * multixacts; that will be done by the next checkpoint.
+        */
         TruncateCLOG(frozenXID);
  
         /*
-        * Update the wrap limit for GetNewTransactionId.  Note: this function
-        * will also signal the postmaster for an(other) autovac cycle if needed.
+        * Update the wrap limit for GetNewTransactionId and creation of new
+        * MultiXactIds.  Note: these functions will also signal the postmaster
+        * for an(other) autovac cycle if needed.   XXX should we avoid possibly
+        * signalling twice?
          */
-       SetTransactionIdLimit(frozenXID, oldest_datoid);
+       SetTransactionIdLimit(frozenXID, oldestxid_datoid);
+       SetMultiXactIdLimit(minMulti, minmulti_datoid);
  }
  
  
@@ -757,19 +990,15 @@ vac_truncate_clog(TransactionId frozenXID)
   *     vacuum_rel() -- vacuum one heap relation
   *
   *             Doing one heap at a time incurs extra overhead, since we need to
- *             check that the heap exists again just before we vacuum it.      The
+ *             check that the heap exists again just before we vacuum it.  The
   *             reason that we do this is so that vacuuming can be spread across
   *             many small transactions.  Otherwise, two-phase locking would require
   *             us to lock the entire database during one pass of the vacuum cleaner.
   *
- *             We'll return true in *scanned_all if the vacuum scanned all heap
- *             pages, and updated pg_class.
- *
   *             At entry and exit, we are not inside a transaction.
   */
  static bool
-vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound,
-                  bool *scanned_all)
+vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound)
  {
         LOCKMODE        lmode;
         Relation        onerel;
@@ -779,9 +1008,6 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound,
         int                     save_sec_context;
         int                     save_nestlevel;
  
-       if (scanned_all)
-               *scanned_all = false;
-
         /* Begin a transaction for vacuuming this relation */
         StartTransactionCommand();
  
@@ -805,23 +1031,23 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound,
                  * here by violating transaction semantics.)
                  *
                  * We also set the VACUUM_FOR_WRAPAROUND flag, which is passed down by
-                * autovacuum; it's used to avoid cancelling a vacuum that was invoked
+                * autovacuum; it's used to avoid canceling a vacuum that was invoked
                  * in an emergency.
                  *
                  * Note: these flags remain set until CommitTransaction or
                  * AbortTransaction.  We don't want to clear them until we reset
-                * MyProc->xid/xmin, else OldestXmin might appear to go backwards,
+                * MyPgXact->xid/xmin, else OldestXmin might appear to go backwards,
                  * which is probably Not Good.
                  */
                 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
-               MyProc->vacuumFlags |= PROC_IN_VACUUM;
+               MyPgXact->vacuumFlags |= PROC_IN_VACUUM;
                 if (for_wraparound)
-                       MyProc->vacuumFlags |= PROC_VACUUM_FOR_WRAPAROUND;
+                       MyPgXact->vacuumFlags |= PROC_VACUUM_FOR_WRAPAROUND;
                 LWLockRelease(ProcArrayLock);
         }
  
         /*
-        * Check for user-requested abort.      Note we want this to be inside a
+        * Check for user-requested abort.  Note we want this to be inside a
          * transaction, so xact.c doesn't issue useless WARNING.
          */
         CHECK_FOR_INTERRUPTS();
@@ -868,7 +1094,7 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound,
          *
          * We allow the user to vacuum a table if he is superuser, the table
          * owner, or the database owner (but in the latter case, only if it's not
-        * a shared relation).  pg_class_ownercheck includes the superuser case.
+        * a shared relation).  pg_class_ownercheck includes the superuser case.
          *
          * Note we choose to treat permissions failure as a WARNING and keep
          * trying to vacuum the rest of the DB --- is this appropriate?
@@ -895,11 +1121,12 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound,
         }
  
         /*
-        * Check that it's a vacuumable table; we used to do this in
+        * Check that it's a vacuumable relation; we used to do this in
          * get_rel_oids() but seems safer to check after we've locked the
          * relation.
          */
         if (onerel->rd_rel->relkind != RELKIND_RELATION &&
+               onerel->rd_rel->relkind != RELKIND_MATVIEW &&
                 onerel->rd_rel->relkind != RELKIND_TOASTVALUE)
         {
                 ereport(WARNING,
@@ -971,11 +1198,10 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound,
  
                 /* VACUUM FULL is now a variant of CLUSTER; see cluster.c */
                 cluster_rel(relid, InvalidOid, false,
-                                       (vacstmt->options & VACOPT_VERBOSE) != 0,
-                                       vacstmt->freeze_min_age, vacstmt->freeze_table_age);
+                                       (vacstmt->options & VACOPT_VERBOSE) != 0);
         }
         else
-               lazy_vacuum_rel(onerel, vacstmt, vac_strategy, scanned_all);
+               lazy_vacuum_rel(onerel, vacstmt, vac_strategy);
  
         /* Roll back any GUC changes executed by index functions */
         AtEOXact_GUC(false, save_nestlevel);
@@ -996,12 +1222,12 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound,
         /*
          * If the relation has a secondary toast rel, vacuum that too while we
          * still hold the session lock on the master table.  Note however that
-        * "analyze" will not get done on the toast table.      This is good, because
+        * "analyze" will not get done on the toast table.  This is good, because
          * the toaster always uses hardcoded index access and statistics are
          * totally unimportant for toast relations.
          */
         if (toast_relid != InvalidOid)
-               vacuum_rel(toast_relid, vacstmt, false, for_wraparound, NULL);
+               vacuum_rel(toast_relid, vacstmt, false, for_wraparound);
  
         /*
          * Now release the session-level lock on the master table.
@@ -1014,9 +1240,16 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound,
  
  
  /*
- * Open all the indexes of the given relation, obtaining the specified kind
- * of lock on each.  Return an array of Relation pointers for the indexes
- * into *Irel, and the number of indexes into *nindexes.
+ * Open all the vacuumable indexes of the given relation, obtaining the
+ * specified kind of lock on each.  Return an array of Relation pointers for
+ * the indexes into *Irel, and the number of indexes into *nindexes.
+ *
+ * We consider an index vacuumable if it is marked insertable (IndexIsReady).
+ * If it isn't, probably a CREATE INDEX CONCURRENTLY command failed early in
+ * execution, and what we have is too corrupt to be processable.  We will
+ * vacuum even if the index isn't indisvalid; this is important because in a
+ * unique index, uniqueness checks will be performed anyway and had better not
+ * hit dangling index pointers.
   */
  void
  vac_open_indexes(Relation relation, LOCKMODE lockmode,
@@ -1030,26 +1263,35 @@ vac_open_indexes(Relation relation, LOCKMODE lockmode,
  
         indexoidlist = RelationGetIndexList(relation);
  
-       *nindexes = list_length(indexoidlist);
+       /* allocate enough memory for all indexes */
+       i = list_length(indexoidlist);
  
-       if (*nindexes > 0)
-               *Irel = (Relation *) palloc(*nindexes * sizeof(Relation));
+       if (i > 0)
+               *Irel = (Relation *) palloc(i * sizeof(Relation));
         else
                 *Irel = NULL;
  
+       /* collect just the ready indexes */
         i = 0;
         foreach(indexoidscan, indexoidlist)
         {
                 Oid                     indexoid = lfirst_oid(indexoidscan);
+               Relation        indrel;
  
-               (*Irel)[i++] = index_open(indexoid, lockmode);
+               indrel = index_open(indexoid, lockmode);
+               if (IndexIsReady(indrel->rd_index))
+                       (*Irel)[i++] = indrel;
+               else
+                       index_close(indrel, lockmode);
         }
  
+       *nindexes = i;
+
         list_free(indexoidlist);
  }
  
  /*
- * Release the resources acquired by vac_open_indexes. Optionally release
+ * Release the resources acquired by vac_open_indexes.  Optionally release
   * the locks (say NoLock to keep 'em).
   */
  void