]> granicus.if.org Git - postgresql/blobdiff - src/backend/commands/vacuum.c
Have multixact be truncated by checkpoint, not vacuum
[postgresql] / src / backend / commands / vacuum.c
index 90c413a9880f671981c8e45196029189a826f408..8822a154dccee7552403b1b6bb847c741219b9a4 100644 (file)
@@ -9,7 +9,7 @@
  * in cluster.c.
  *
  *
- * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  *
  */
 #include "postgres.h"
 
+#include <math.h>
+
 #include "access/clog.h"
 #include "access/genam.h"
 #include "access/heapam.h"
+#include "access/htup_details.h"
+#include "access/multixact.h"
 #include "access/transam.h"
 #include "access/xact.h"
 #include "catalog/namespace.h"
@@ -51,6 +55,8 @@
  */
 int                    vacuum_freeze_min_age;
 int                    vacuum_freeze_table_age;
+int                    vacuum_multixact_freeze_min_age;
+int                    vacuum_multixact_freeze_table_age;
 
 
 /* A few variables that don't seem worth passing around as parameters */
@@ -60,9 +66,9 @@ static BufferAccessStrategy vac_strategy;
 
 /* non-export function prototypes */
 static List *get_rel_oids(Oid relid, const RangeVar *vacrel);
-static void vac_truncate_clog(TransactionId frozenXID);
+static void vac_truncate_clog(TransactionId frozenXID, MultiXactId minMulti);
 static bool vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast,
-                  bool for_wraparound, bool *scanned_all);
+                  bool for_wraparound);
 
 
 /*
@@ -76,7 +82,7 @@ static bool vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast,
  * tables separately.
  *
  * for_wraparound is used by autovacuum to let us know when it's forcing
- * a vacuum for wraparound, which should not be auto-cancelled.
+ * a vacuum for wraparound, which should not be auto-canceled.
  *
  * bstrategy is normally given as NULL, but in autovacuum it can be passed
  * in to use the same buffer strategy object across multiple vacuum() calls.
@@ -92,8 +98,7 @@ vacuum(VacuumStmt *vacstmt, Oid relid, bool do_toast,
           BufferAccessStrategy bstrategy, bool for_wraparound, bool isTopLevel)
 {
        const char *stmttype;
-       volatile bool all_rels,
-                               in_outer_xact,
+       volatile bool in_outer_xact,
                                use_own_xacts;
        List       *relations;
 
@@ -153,9 +158,6 @@ vacuum(VacuumStmt *vacstmt, Oid relid, bool do_toast,
        }
        vac_strategy = bstrategy;
 
-       /* Remember whether we are processing everything in the DB */
-       all_rels = (!OidIsValid(relid) && vacstmt->relation == NULL);
-
        /*
         * Build list of relations to process, unless caller gave us one. (If we
         * build one, we put it in vac_context for safekeeping.)
@@ -216,6 +218,9 @@ vacuum(VacuumStmt *vacstmt, Oid relid, bool do_toast,
 
                VacuumCostActive = (VacuumCostDelay > 0);
                VacuumCostBalance = 0;
+               VacuumPageHit = 0;
+               VacuumPageMiss = 0;
+               VacuumPageDirty = 0;
 
                /*
                 * Loop to process each selected relation.
@@ -223,12 +228,10 @@ vacuum(VacuumStmt *vacstmt, Oid relid, bool do_toast,
                foreach(cur, relations)
                {
                        Oid                     relid = lfirst_oid(cur);
-                       bool            scanned_all = false;
 
                        if (vacstmt->options & VACOPT_VACUUM)
                        {
-                               if (!vacuum_rel(relid, vacstmt, do_toast, for_wraparound,
-                                                               &scanned_all))
+                               if (!vacuum_rel(relid, vacstmt, do_toast, for_wraparound))
                                        continue;
                        }
 
@@ -245,7 +248,7 @@ vacuum(VacuumStmt *vacstmt, Oid relid, bool do_toast,
                                        PushActiveSnapshot(GetTransactionSnapshot());
                                }
 
-                               analyze_rel(relid, vacstmt, vac_strategy, !scanned_all);
+                               analyze_rel(relid, vacstmt, vac_strategy);
 
                                if (use_own_xacts)
                                {
@@ -322,7 +325,16 @@ get_rel_oids(Oid relid, const RangeVar *vacrel)
                /* Process a specific relation */
                Oid                     relid;
 
-               relid = RangeVarGetRelid(vacrel, false);
+               /*
+                * Since we don't take a lock here, the relation might be gone, or the
+                * RangeVar might no longer refer to the OID we look up here.  In the
+                * former case, VACUUM will do nothing; in the latter case, it will
+                * process the OID we looked up here, rather than the new one. Neither
+                * is ideal, but there's little practical alternative, since we're
+                * going to commit this transaction and begin a new one between now
+                * and then.
+                */
+               relid = RangeVarGetRelid(vacrel, NoLock, false);
 
                /* Make a relation list entry for this guy */
                oldcontext = MemoryContextSwitchTo(vac_context);
@@ -331,23 +343,26 @@ get_rel_oids(Oid relid, const RangeVar *vacrel)
        }
        else
        {
-               /* Process all plain relations listed in pg_class */
+               /*
+                * Process all plain relations and materialized views listed in
+                * pg_class
+                */
                Relation        pgclass;
                HeapScanDesc scan;
                HeapTuple       tuple;
-               ScanKeyData key;
-
-               ScanKeyInit(&key,
-                                       Anum_pg_class_relkind,
-                                       BTEqualStrategyNumber, F_CHAREQ,
-                                       CharGetDatum(RELKIND_RELATION));
 
                pgclass = heap_open(RelationRelationId, AccessShareLock);
 
-               scan = heap_beginscan(pgclass, SnapshotNow, 1, &key);
+               scan = heap_beginscan_catalog(pgclass, 0, NULL);
 
                while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
                {
+                       Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
+
+                       if (classForm->relkind != RELKIND_RELATION &&
+                               classForm->relkind != RELKIND_MATVIEW)
+                               continue;
+
                        /* Make a relation list entry for this guy */
                        oldcontext = MemoryContextSwitchTo(vac_context);
                        oid_list = lappend_oid(oid_list, HeapTupleGetOid(tuple));
@@ -363,29 +378,54 @@ get_rel_oids(Oid relid, const RangeVar *vacrel)
 
 /*
  * vacuum_set_xid_limits() -- compute oldest-Xmin and freeze cutoff points
+ *
+ * The output parameters are:
+ * - oldestXmin is the cutoff value used to distinguish whether tuples are
+ *      DEAD or RECENTLY_DEAD (see HeapTupleSatisfiesVacuum).
+ * - freezeLimit is the Xid below which all Xids are replaced by
+ *      FrozenTransactionId during vacuum.
+ * - xidFullScanLimit (computed from table_freeze_age parameter)
+ *      represents a minimum Xid value; a table whose relfrozenxid is older than
+ *      this will have a full-table vacuum applied to it, to freeze tuples across
+ *      the whole table.  Vacuuming a table younger than this value can use a
+ *      partial scan.
+ * - multiXactCutoff is the value below which all MultiXactIds are removed from
+ *      Xmax.
+ * - mxactFullScanLimit is a value against which a table's relminmxid value is
+ *      compared to produce a full-table vacuum, as with xidFullScanLimit.
+ *
+ * xidFullScanLimit and mxactFullScanLimit can be passed as NULL if caller is
+ * not interested.
  */
 void
-vacuum_set_xid_limits(int freeze_min_age,
+vacuum_set_xid_limits(Relation rel,
+                                         int freeze_min_age,
                                          int freeze_table_age,
-                                         bool sharedRel,
+                                         int multixact_freeze_min_age,
+                                         int multixact_freeze_table_age,
                                          TransactionId *oldestXmin,
                                          TransactionId *freezeLimit,
-                                         TransactionId *freezeTableLimit)
+                                         TransactionId *xidFullScanLimit,
+                                         MultiXactId *multiXactCutoff,
+                                         MultiXactId *mxactFullScanLimit)
 {
        int                     freezemin;
+       int                     mxid_freezemin;
        TransactionId limit;
        TransactionId safeLimit;
+       MultiXactId mxactLimit;
+       MultiXactId safeMxactLimit;
 
        /*
-        * We can always ignore processes running lazy vacuum.  This is because we
+        * We can always ignore processes running lazy vacuum.  This is because we
         * use these values only for deciding which tuples we must keep in the
-        * tables.      Since lazy vacuum doesn't write its XID anywhere, it's safe to
+        * tables.  Since lazy vacuum doesn't write its XID anywhere, it's safe to
         * ignore it.  In theory it could be problematic to ignore lazy vacuums in
         * a full vacuum, but keep in mind that only one vacuum process can be
         * working on a particular table at any time, and that each vacuum is
         * always an independent transaction.
         */
-       *oldestXmin = GetOldestXmin(sharedRel, true);
+       *oldestXmin = GetOldestXmin(rel, true);
 
        Assert(TransactionIdIsNormal(*oldestXmin));
 
@@ -427,10 +467,45 @@ vacuum_set_xid_limits(int freeze_min_age,
 
        *freezeLimit = limit;
 
-       if (freezeTableLimit != NULL)
+       /*
+        * Determine the minimum multixact freeze age to use: as specified by
+        * caller, or vacuum_multixact_freeze_min_age, but in any case not more
+        * than half autovacuum_multixact_freeze_max_age, so that autovacuums to
+        * prevent MultiXact wraparound won't occur too frequently.
+        */
+       mxid_freezemin = multixact_freeze_min_age;
+       if (mxid_freezemin < 0)
+               mxid_freezemin = vacuum_multixact_freeze_min_age;
+       mxid_freezemin = Min(mxid_freezemin,
+                                                autovacuum_multixact_freeze_max_age / 2);
+       Assert(mxid_freezemin >= 0);
+
+       /* compute the cutoff multi, being careful to generate a valid value */
+       mxactLimit = GetOldestMultiXactId() - mxid_freezemin;
+       if (mxactLimit < FirstMultiXactId)
+               mxactLimit = FirstMultiXactId;
+
+       safeMxactLimit =
+               ReadNextMultiXactId() - autovacuum_multixact_freeze_max_age;
+       if (safeMxactLimit < FirstMultiXactId)
+               safeMxactLimit = FirstMultiXactId;
+
+       if (MultiXactIdPrecedes(mxactLimit, safeMxactLimit))
+       {
+               ereport(WARNING,
+                               (errmsg("oldest multixact is far in the past"),
+                                errhint("Close open transactions with multixacts soon to avoid wraparound problems.")));
+               mxactLimit = safeMxactLimit;
+       }
+
+       *multiXactCutoff = mxactLimit;
+
+       if (xidFullScanLimit != NULL)
        {
                int                     freezetable;
 
+               Assert(mxactFullScanLimit != NULL);
+
                /*
                 * Determine the table freeze age to use: as specified by the caller,
                 * or vacuum_freeze_table_age, but in any case not more than
@@ -438,22 +513,125 @@ vacuum_set_xid_limits(int freeze_min_age,
                 * VACUUM schedule, the nightly VACUUM gets a chance to freeze tuples
                 * before anti-wraparound autovacuum is launched.
                 */
-               freezetable = freeze_min_age;
+               freezetable = freeze_table_age;
                if (freezetable < 0)
                        freezetable = vacuum_freeze_table_age;
                freezetable = Min(freezetable, autovacuum_freeze_max_age * 0.95);
                Assert(freezetable >= 0);
 
                /*
-                * Compute the cutoff XID, being careful not to generate a "permanent"
-                * XID.
+                * Compute XID limit causing a full-table vacuum, being careful not to
+                * generate a "permanent" XID.
                 */
                limit = ReadNewTransactionId() - freezetable;
                if (!TransactionIdIsNormal(limit))
                        limit = FirstNormalTransactionId;
 
-               *freezeTableLimit = limit;
+               *xidFullScanLimit = limit;
+
+               /*
+                * Similar to the above, determine the table freeze age to use for
+                * multixacts: as specified by the caller, or
+                * vacuum_multixact_freeze_table_age, but in any case not more than
+                * autovacuum_multixact_freeze_table_age * 0.95, so that if you have
+                * e.g. nightly VACUUM schedule, the nightly VACUUM gets a chance to
+                * freeze multixacts before anti-wraparound autovacuum is launched.
+                */
+               freezetable = multixact_freeze_table_age;
+               if (freezetable < 0)
+                       freezetable = vacuum_multixact_freeze_table_age;
+               freezetable = Min(freezetable,
+                                                 autovacuum_multixact_freeze_max_age * 0.95);
+               Assert(freezetable >= 0);
+
+               /*
+                * Compute MultiXact limit causing a full-table vacuum, being careful
+                * to generate a valid MultiXact value.
+                */
+               mxactLimit = ReadNextMultiXactId() - freezetable;
+               if (mxactLimit < FirstMultiXactId)
+                       mxactLimit = FirstMultiXactId;
+
+               *mxactFullScanLimit = mxactLimit;
        }
+       else
+       {
+               Assert(mxactFullScanLimit == NULL);
+       }
+}
+
+/*
+ * vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples
+ *
+ *             If we scanned the whole relation then we should just use the count of
+ *             live tuples seen; but if we did not, we should not trust the count
+ *             unreservedly, especially not in VACUUM, which may have scanned a quite
+ *             nonrandom subset of the table.  When we have only partial information,
+ *             we take the old value of pg_class.reltuples as a measurement of the
+ *             tuple density in the unscanned pages.
+ *
+ *             This routine is shared by VACUUM and ANALYZE.
+ */
+double
+vac_estimate_reltuples(Relation relation, bool is_analyze,
+                                          BlockNumber total_pages,
+                                          BlockNumber scanned_pages,
+                                          double scanned_tuples)
+{
+       BlockNumber old_rel_pages = relation->rd_rel->relpages;
+       double          old_rel_tuples = relation->rd_rel->reltuples;
+       double          old_density;
+       double          new_density;
+       double          multiplier;
+       double          updated_density;
+
+       /* If we did scan the whole table, just use the count as-is */
+       if (scanned_pages >= total_pages)
+               return scanned_tuples;
+
+       /*
+        * If scanned_pages is zero but total_pages isn't, keep the existing value
+        * of reltuples.  (Note: callers should avoid updating the pg_class
+        * statistics in this situation, since no new information has been
+        * provided.)
+        */
+       if (scanned_pages == 0)
+               return old_rel_tuples;
+
+       /*
+        * If old value of relpages is zero, old density is indeterminate; we
+        * can't do much except scale up scanned_tuples to match total_pages.
+        */
+       if (old_rel_pages == 0)
+               return floor((scanned_tuples / scanned_pages) * total_pages + 0.5);
+
+       /*
+        * Okay, we've covered the corner cases.  The normal calculation is to
+        * convert the old measurement to a density (tuples per page), then update
+        * the density using an exponential-moving-average approach, and finally
+        * compute reltuples as updated_density * total_pages.
+        *
+        * For ANALYZE, the moving average multiplier is just the fraction of the
+        * table's pages we scanned.  This is equivalent to assuming that the
+        * tuple density in the unscanned pages didn't change.  Of course, it
+        * probably did, if the new density measurement is different. But over
+        * repeated cycles, the value of reltuples will converge towards the
+        * correct value, if repeated measurements show the same new density.
+        *
+        * For VACUUM, the situation is a bit different: we have looked at a
+        * nonrandom sample of pages, but we know for certain that the pages we
+        * didn't look at are precisely the ones that haven't changed lately.
+        * Thus, there is a reasonable argument for doing exactly the same thing
+        * as for the ANALYZE case, that is use the old density measurement as the
+        * value for the unscanned pages.
+        *
+        * This logic could probably use further refinement.
+        */
+       old_density = old_rel_tuples / old_rel_pages;
+       new_density = scanned_tuples / scanned_pages;
+       multiplier = (double) scanned_pages / (double) total_pages;
+       updated_density = old_density + (new_density - old_density) * multiplier;
+       return floor(updated_density * total_pages + 0.5);
 }
 
 
@@ -484,12 +662,14 @@ vacuum_set_xid_limits(int freeze_min_age,
  *             somebody vacuuming pg_class might think they could delete a tuple
  *             marked with xmin = our xid.
  *
- *             This routine is shared by VACUUM and stand-alone ANALYZE.
+ *             This routine is shared by VACUUM and ANALYZE.
  */
 void
 vac_update_relstats(Relation relation,
                                        BlockNumber num_pages, double num_tuples,
-                                       bool hasindex, TransactionId frozenxid)
+                                       BlockNumber num_all_visible_pages,
+                                       bool hasindex, TransactionId frozenxid,
+                                       MultiXactId minmulti)
 {
        Oid                     relid = RelationGetRelid(relation);
        Relation        rd;
@@ -519,6 +699,11 @@ vac_update_relstats(Relation relation,
                pgcform->reltuples = (float4) num_tuples;
                dirty = true;
        }
+       if (pgcform->relallvisible != (int32) num_all_visible_pages)
+       {
+               pgcform->relallvisible = (int32) num_all_visible_pages;
+               dirty = true;
+       }
        if (pgcform->relhasindex != hasindex)
        {
                pgcform->relhasindex = hasindex;
@@ -527,7 +712,7 @@ vac_update_relstats(Relation relation,
 
        /*
         * If we have discovered that there are no indexes, then there's no
-        * primary key either.  This could be done more thoroughly...
+        * primary key either.  This could be done more thoroughly...
         */
        if (pgcform->relhaspkey && !hasindex)
        {
@@ -558,6 +743,14 @@ vac_update_relstats(Relation relation,
                dirty = true;
        }
 
+       /* relminmxid must never go backward, either */
+       if (MultiXactIdIsValid(minmulti) &&
+               MultiXactIdPrecedes(pgcform->relminmxid, minmulti))
+       {
+               pgcform->relminmxid = minmulti;
+               dirty = true;
+       }
+
        /* If anything changed, write out the tuple. */
        if (dirty)
                heap_inplace_update(rd, ctup);
@@ -570,11 +763,16 @@ vac_update_relstats(Relation relation,
  *     vac_update_datfrozenxid() -- update pg_database.datfrozenxid for our DB
  *
  *             Update pg_database's datfrozenxid entry for our database to be the
- *             minimum of the pg_class.relfrozenxid values.  If we are able to
- *             advance pg_database.datfrozenxid, also try to truncate pg_clog.
+ *             minimum of the pg_class.relfrozenxid values.
+ *
+ *             Similarly, update our datminmxid to be the minimum of the
+ *             pg_class.relminmxid values.
+ *
+ *             If we are able to advance either pg_database value, also try to
+ *             truncate pg_clog and pg_multixact.
  *
  *             We violate transaction semantics here by overwriting the database's
- *             existing pg_database tuple with the new value.  This is reasonably
+ *             existing pg_database tuple with the new value.  This is reasonably
  *             safe since the new value is correct whether or not this transaction
  *             commits.  As with vac_update_relstats, this avoids leaving dead tuples
  *             behind after a VACUUM.
@@ -588,15 +786,22 @@ vac_update_datfrozenxid(void)
        SysScanDesc scan;
        HeapTuple       classTup;
        TransactionId newFrozenXid;
+       MultiXactId newMinMulti;
        bool            dirty = false;
 
        /*
         * Initialize the "min" calculation with GetOldestXmin, which is a
         * reasonable approximation to the minimum relfrozenxid for not-yet-
         * committed pg_class entries for new tables; see AddNewRelationTuple().
-        * Se we cannot produce a wrong minimum by starting with this.
+        * So we cannot produce a wrong minimum by starting with this.
         */
-       newFrozenXid = GetOldestXmin(true, true);
+       newFrozenXid = GetOldestXmin(NULL, true);
+
+       /*
+        * Similarly, initialize the MultiXact "min" with the value that would be
+        * used on pg_class for new tables.  See AddNewRelationTuple().
+        */
+       newMinMulti = GetOldestMultiXactId();
 
        /*
         * We must seqscan pg_class to find the minimum Xid, because there is no
@@ -605,24 +810,29 @@ vac_update_datfrozenxid(void)
        relation = heap_open(RelationRelationId, AccessShareLock);
 
        scan = systable_beginscan(relation, InvalidOid, false,
-                                                         SnapshotNow, 0, NULL);
+                                                         NULL, 0, NULL);
 
        while ((classTup = systable_getnext(scan)) != NULL)
        {
                Form_pg_class classForm = (Form_pg_class) GETSTRUCT(classTup);
 
                /*
-                * Only consider heap and TOAST tables (anything else should have
-                * InvalidTransactionId in relfrozenxid anyway.)
+                * Only consider relations able to hold unfrozen XIDs (anything else
+                * should have InvalidTransactionId in relfrozenxid anyway.)
                 */
                if (classForm->relkind != RELKIND_RELATION &&
+                       classForm->relkind != RELKIND_MATVIEW &&
                        classForm->relkind != RELKIND_TOASTVALUE)
                        continue;
 
                Assert(TransactionIdIsNormal(classForm->relfrozenxid));
+               Assert(MultiXactIdIsValid(classForm->relminmxid));
 
                if (TransactionIdPrecedes(classForm->relfrozenxid, newFrozenXid))
                        newFrozenXid = classForm->relfrozenxid;
+
+               if (MultiXactIdPrecedes(classForm->relminmxid, newMinMulti))
+                       newMinMulti = classForm->relminmxid;
        }
 
        /* we're done with pg_class */
@@ -630,6 +840,7 @@ vac_update_datfrozenxid(void)
        heap_close(relation, AccessShareLock);
 
        Assert(TransactionIdIsNormal(newFrozenXid));
+       Assert(MultiXactIdIsValid(newMinMulti));
 
        /* Now fetch the pg_database tuple we need to update. */
        relation = heap_open(DatabaseRelationId, RowExclusiveLock);
@@ -650,6 +861,13 @@ vac_update_datfrozenxid(void)
                dirty = true;
        }
 
+       /* ditto */
+       if (MultiXactIdPrecedes(dbform->datminmxid, newMinMulti))
+       {
+               dbform->datminmxid = newMinMulti;
+               dirty = true;
+       }
+
        if (dirty)
                heap_inplace_update(relation, tuple);
 
@@ -662,7 +880,7 @@ vac_update_datfrozenxid(void)
         * this action will update that too.
         */
        if (dirty || ForceTransactionIdLimitUpdate())
-               vac_truncate_clog(newFrozenXid);
+               vac_truncate_clog(newFrozenXid, newMinMulti);
 }
 
 
@@ -674,24 +892,26 @@ vac_update_datfrozenxid(void)
  *             Also update the XID wrap limit info maintained by varsup.c.
  *
  *             The passed XID is simply the one I just wrote into my pg_database
- *             entry.  It's used to initialize the "min" calculation.
+ *             entry.  It's used to initialize the "min" calculation.
  *
  *             This routine is only invoked when we've managed to change our
  *             DB's datfrozenxid entry, or we found that the shared XID-wrap-limit
  *             info is stale.
  */
 static void
-vac_truncate_clog(TransactionId frozenXID)
+vac_truncate_clog(TransactionId frozenXID, MultiXactId minMulti)
 {
        TransactionId myXID = GetCurrentTransactionId();
        Relation        relation;
        HeapScanDesc scan;
        HeapTuple       tuple;
-       Oid                     oldest_datoid;
+       Oid                     oldestxid_datoid;
+       Oid                     minmulti_datoid;
        bool            frozenAlreadyWrapped = false;
 
-       /* init oldest_datoid to sync with my frozenXID */
-       oldest_datoid = MyDatabaseId;
+       /* init oldest datoids to sync with my frozen values */
+       oldestxid_datoid = MyDatabaseId;
+       minmulti_datoid = MyDatabaseId;
 
        /*
         * Scan pg_database to compute the minimum datfrozenxid
@@ -707,20 +927,27 @@ vac_truncate_clog(TransactionId frozenXID)
         */
        relation = heap_open(DatabaseRelationId, AccessShareLock);
 
-       scan = heap_beginscan(relation, SnapshotNow, 0, NULL);
+       scan = heap_beginscan_catalog(relation, 0, NULL);
 
        while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
        {
                Form_pg_database dbform = (Form_pg_database) GETSTRUCT(tuple);
 
                Assert(TransactionIdIsNormal(dbform->datfrozenxid));
+               Assert(MultiXactIdIsValid(dbform->datminmxid));
 
                if (TransactionIdPrecedes(myXID, dbform->datfrozenxid))
                        frozenAlreadyWrapped = true;
                else if (TransactionIdPrecedes(dbform->datfrozenxid, frozenXID))
                {
                        frozenXID = dbform->datfrozenxid;
-                       oldest_datoid = HeapTupleGetOid(tuple);
+                       oldestxid_datoid = HeapTupleGetOid(tuple);
+               }
+
+               if (MultiXactIdPrecedes(dbform->datminmxid, minMulti))
+               {
+                       minMulti = dbform->datminmxid;
+                       minmulti_datoid = HeapTupleGetOid(tuple);
                }
        }
 
@@ -742,14 +969,20 @@ vac_truncate_clog(TransactionId frozenXID)
                return;
        }
 
-       /* Truncate CLOG to the oldest frozenxid */
+       /*
+        * Truncate CLOG to the oldest computed value.  Note we don't truncate
+        * multixacts; that will be done by the next checkpoint.
+        */
        TruncateCLOG(frozenXID);
 
        /*
-        * Update the wrap limit for GetNewTransactionId.  Note: this function
-        * will also signal the postmaster for an(other) autovac cycle if needed.
+        * Update the wrap limit for GetNewTransactionId and creation of new
+        * MultiXactIds.  Note: these functions will also signal the postmaster
+        * for an(other) autovac cycle if needed.   XXX should we avoid possibly
+        * signalling twice?
         */
-       SetTransactionIdLimit(frozenXID, oldest_datoid);
+       SetTransactionIdLimit(frozenXID, oldestxid_datoid);
+       SetMultiXactIdLimit(minMulti, minmulti_datoid);
 }
 
 
@@ -757,19 +990,15 @@ vac_truncate_clog(TransactionId frozenXID)
  *     vacuum_rel() -- vacuum one heap relation
  *
  *             Doing one heap at a time incurs extra overhead, since we need to
- *             check that the heap exists again just before we vacuum it.      The
+ *             check that the heap exists again just before we vacuum it.  The
  *             reason that we do this is so that vacuuming can be spread across
  *             many small transactions.  Otherwise, two-phase locking would require
  *             us to lock the entire database during one pass of the vacuum cleaner.
  *
- *             We'll return true in *scanned_all if the vacuum scanned all heap
- *             pages, and updated pg_class.
- *
  *             At entry and exit, we are not inside a transaction.
  */
 static bool
-vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound,
-                  bool *scanned_all)
+vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound)
 {
        LOCKMODE        lmode;
        Relation        onerel;
@@ -779,9 +1008,6 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound,
        int                     save_sec_context;
        int                     save_nestlevel;
 
-       if (scanned_all)
-               *scanned_all = false;
-
        /* Begin a transaction for vacuuming this relation */
        StartTransactionCommand();
 
@@ -805,23 +1031,23 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound,
                 * here by violating transaction semantics.)
                 *
                 * We also set the VACUUM_FOR_WRAPAROUND flag, which is passed down by
-                * autovacuum; it's used to avoid cancelling a vacuum that was invoked
+                * autovacuum; it's used to avoid canceling a vacuum that was invoked
                 * in an emergency.
                 *
                 * Note: these flags remain set until CommitTransaction or
                 * AbortTransaction.  We don't want to clear them until we reset
-                * MyProc->xid/xmin, else OldestXmin might appear to go backwards,
+                * MyPgXact->xid/xmin, else OldestXmin might appear to go backwards,
                 * which is probably Not Good.
                 */
                LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
-               MyProc->vacuumFlags |= PROC_IN_VACUUM;
+               MyPgXact->vacuumFlags |= PROC_IN_VACUUM;
                if (for_wraparound)
-                       MyProc->vacuumFlags |= PROC_VACUUM_FOR_WRAPAROUND;
+                       MyPgXact->vacuumFlags |= PROC_VACUUM_FOR_WRAPAROUND;
                LWLockRelease(ProcArrayLock);
        }
 
        /*
-        * Check for user-requested abort.      Note we want this to be inside a
+        * Check for user-requested abort.  Note we want this to be inside a
         * transaction, so xact.c doesn't issue useless WARNING.
         */
        CHECK_FOR_INTERRUPTS();
@@ -868,7 +1094,7 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound,
         *
         * We allow the user to vacuum a table if he is superuser, the table
         * owner, or the database owner (but in the latter case, only if it's not
-        * a shared relation).  pg_class_ownercheck includes the superuser case.
+        * a shared relation).  pg_class_ownercheck includes the superuser case.
         *
         * Note we choose to treat permissions failure as a WARNING and keep
         * trying to vacuum the rest of the DB --- is this appropriate?
@@ -895,11 +1121,12 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound,
        }
 
        /*
-        * Check that it's a vacuumable table; we used to do this in
+        * Check that it's a vacuumable relation; we used to do this in
         * get_rel_oids() but seems safer to check after we've locked the
         * relation.
         */
        if (onerel->rd_rel->relkind != RELKIND_RELATION &&
+               onerel->rd_rel->relkind != RELKIND_MATVIEW &&
                onerel->rd_rel->relkind != RELKIND_TOASTVALUE)
        {
                ereport(WARNING,
@@ -971,11 +1198,10 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound,
 
                /* VACUUM FULL is now a variant of CLUSTER; see cluster.c */
                cluster_rel(relid, InvalidOid, false,
-                                       (vacstmt->options & VACOPT_VERBOSE) != 0,
-                                       vacstmt->freeze_min_age, vacstmt->freeze_table_age);
+                                       (vacstmt->options & VACOPT_VERBOSE) != 0);
        }
        else
-               lazy_vacuum_rel(onerel, vacstmt, vac_strategy, scanned_all);
+               lazy_vacuum_rel(onerel, vacstmt, vac_strategy);
 
        /* Roll back any GUC changes executed by index functions */
        AtEOXact_GUC(false, save_nestlevel);
@@ -996,12 +1222,12 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound,
        /*
         * If the relation has a secondary toast rel, vacuum that too while we
         * still hold the session lock on the master table.  Note however that
-        * "analyze" will not get done on the toast table.      This is good, because
+        * "analyze" will not get done on the toast table.  This is good, because
         * the toaster always uses hardcoded index access and statistics are
         * totally unimportant for toast relations.
         */
        if (toast_relid != InvalidOid)
-               vacuum_rel(toast_relid, vacstmt, false, for_wraparound, NULL);
+               vacuum_rel(toast_relid, vacstmt, false, for_wraparound);
 
        /*
         * Now release the session-level lock on the master table.
@@ -1014,9 +1240,16 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound,
 
 
 /*
- * Open all the indexes of the given relation, obtaining the specified kind
- * of lock on each.  Return an array of Relation pointers for the indexes
- * into *Irel, and the number of indexes into *nindexes.
+ * Open all the vacuumable indexes of the given relation, obtaining the
+ * specified kind of lock on each.  Return an array of Relation pointers for
+ * the indexes into *Irel, and the number of indexes into *nindexes.
+ *
+ * We consider an index vacuumable if it is marked insertable (IndexIsReady).
+ * If it isn't, probably a CREATE INDEX CONCURRENTLY command failed early in
+ * execution, and what we have is too corrupt to be processable.  We will
+ * vacuum even if the index isn't indisvalid; this is important because in a
+ * unique index, uniqueness checks will be performed anyway and had better not
+ * hit dangling index pointers.
  */
 void
 vac_open_indexes(Relation relation, LOCKMODE lockmode,
@@ -1030,26 +1263,35 @@ vac_open_indexes(Relation relation, LOCKMODE lockmode,
 
        indexoidlist = RelationGetIndexList(relation);
 
-       *nindexes = list_length(indexoidlist);
+       /* allocate enough memory for all indexes */
+       i = list_length(indexoidlist);
 
-       if (*nindexes > 0)
-               *Irel = (Relation *) palloc(*nindexes * sizeof(Relation));
+       if (i > 0)
+               *Irel = (Relation *) palloc(i * sizeof(Relation));
        else
                *Irel = NULL;
 
+       /* collect just the ready indexes */
        i = 0;
        foreach(indexoidscan, indexoidlist)
        {
                Oid                     indexoid = lfirst_oid(indexoidscan);
+               Relation        indrel;
 
-               (*Irel)[i++] = index_open(indexoid, lockmode);
+               indrel = index_open(indexoid, lockmode);
+               if (IndexIsReady(indrel->rd_index))
+                       (*Irel)[i++] = indrel;
+               else
+                       index_close(indrel, lockmode);
        }
 
+       *nindexes = i;
+
        list_free(indexoidlist);
 }
 
 /*
- * Release the resources acquired by vac_open_indexes. Optionally release
+ * Release the resources acquired by vac_open_indexes.  Optionally release
  * the locks (say NoLock to keep 'em).
  */
 void