TABLESAMPLE, SQL Standard and extensible

[postgresql] / src / backend / commands / analyze.c
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c

index d549fa3bb9f9aaaa1e39d5dd263411bc2854288b..65e329eab0795f9d96cc42050cedec014bc16648 100644 (file)
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -3,12 +3,12 @@
   * analyze.c
   *       the Postgres statistics generator
   *
- * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.138 2009/06/06 22:13:51 tgl Exp $
+ *       src/backend/commands/analyze.c
   *
   *-------------------------------------------------------------------------
   */
@@ -16,17 +16,23 @@
  
  #include <math.h>
  
-#include "access/heapam.h"
+#include "access/multixact.h"
  #include "access/transam.h"
+#include "access/tupconvert.h"
  #include "access/tuptoaster.h"
+#include "access/visibilitymap.h"
  #include "access/xact.h"
+#include "catalog/catalog.h"
  #include "catalog/index.h"
  #include "catalog/indexing.h"
-#include "catalog/namespace.h"
+#include "catalog/pg_collation.h"
+#include "catalog/pg_inherits_fn.h"
  #include "catalog/pg_namespace.h"
  #include "commands/dbcommands.h"
+#include "commands/tablecmds.h"
  #include "commands/vacuum.h"
  #include "executor/executor.h"
+#include "foreign/fdwapi.h"
  #include "miscadmin.h"
  #include "nodes/nodeFuncs.h"
  #include "parser/parse_oper.h"
@@ -34,28 +40,23 @@
  #include "pgstat.h"
  #include "postmaster/autovacuum.h"
  #include "storage/bufmgr.h"
+#include "storage/lmgr.h"
  #include "storage/proc.h"
  #include "storage/procarray.h"
  #include "utils/acl.h"
+#include "utils/attoptcache.h"
  #include "utils/datum.h"
+#include "utils/guc.h"
  #include "utils/lsyscache.h"
  #include "utils/memutils.h"
  #include "utils/pg_rusage.h"
+#include "utils/sampling.h"
+#include "utils/sortsupport.h"
  #include "utils/syscache.h"
-#include "utils/tuplesort.h"
+#include "utils/timestamp.h"
  #include "utils/tqual.h"
  
  
-/* Data structure for Algorithm S from Knuth 3.4.2 */
-typedef struct
-{
-       BlockNumber N;                          /* number of blocks, known in advance */
-       int                     n;                              /* desired sample size */
-       BlockNumber t;                          /* current block number */
-       int                     m;                              /* blocks selected so far */
-} BlockSamplerData;
-typedef BlockSamplerData *BlockSampler;
-
  /* Per-index data for ANALYZE */
  typedef struct AnlIndexData
  {
@@ -70,88 +71,57 @@ typedef struct AnlIndexData
  int                    default_statistics_target = 100;
  
  /* A few variables that don't seem worth passing around as parameters */
-static int     elevel = -1;
-
  static MemoryContext anl_context = NULL;
-
  static BufferAccessStrategy vac_strategy;
  
  
-static void BlockSampler_Init(BlockSampler bs, BlockNumber nblocks,
-                                 int samplesize);
-static bool BlockSampler_HasMore(BlockSampler bs);
-static BlockNumber BlockSampler_Next(BlockSampler bs);
+static void do_analyze_rel(Relation onerel, int options,
+                          VacuumParams *params, List *va_cols,
+                          AcquireSampleRowsFunc acquirefunc, BlockNumber relpages,
+                          bool inh, bool in_outer_xact, int elevel);
  static void compute_index_stats(Relation onerel, double totalrows,
                                         AnlIndexData *indexdata, int nindexes,
                                         HeapTuple *rows, int numrows,
                                         MemoryContext col_context);
-static VacAttrStats *examine_attribute(Relation onerel, int attnum);
-static int acquire_sample_rows(Relation onerel, HeapTuple *rows,
-                                       int targrows, double *totalrows, double *totaldeadrows);
-static double random_fract(void);
-static double init_selection_state(int n);
-static double get_next_S(double t, int n, double *stateptr);
+static VacAttrStats *examine_attribute(Relation onerel, int attnum,
+                                 Node *index_expr);
+static int acquire_sample_rows(Relation onerel, int elevel,
+                                       HeapTuple *rows, int targrows,
+                                       double *totalrows, double *totaldeadrows);
  static int     compare_rows(const void *a, const void *b);
-static void update_attstats(Oid relid, int natts, VacAttrStats **vacattrstats);
+static int acquire_inherited_sample_rows(Relation onerel, int elevel,
+                                                         HeapTuple *rows, int targrows,
+                                                         double *totalrows, double *totaldeadrows);
+static void update_attstats(Oid relid, bool inh,
+                               int natts, VacAttrStats **vacattrstats);
  static Datum std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
  static Datum ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
  
-static bool std_typanalyze(VacAttrStats *stats);
-
  
  /*
   *     analyze_rel() -- analyze one relation
- *
- * If update_reltuples is true, we update reltuples and relpages columns
- * in pg_class.  Caller should pass false if we're part of VACUUM ANALYZE,
- * and the VACUUM didn't skip any pages.  We only have an approximate count,
- * so we don't want to overwrite the accurate values already inserted by the
- * VACUUM in that case.  VACUUM always scans all indexes, however, so the
- * pg_class entries for indexes are never updated if we're part of VACUUM
- * ANALYZE.
   */
  void
-analyze_rel(Oid relid, VacuumStmt *vacstmt,
-                       BufferAccessStrategy bstrategy, bool update_reltuples)
+analyze_rel(Oid relid, RangeVar *relation, int options,
+                       VacuumParams *params, List *va_cols, bool in_outer_xact,
+                       BufferAccessStrategy bstrategy)
  {
         Relation        onerel;
-       int                     attr_cnt,
-                               tcnt,
-                               i,
-                               ind;
-       Relation   *Irel;
-       int                     nindexes;
-       bool            hasindex;
-       bool            analyzableindex;
-       VacAttrStats **vacattrstats;
-       AnlIndexData *indexdata;
-       int                     targrows,
-                               numrows;
-       double          totalrows,
-                               totaldeadrows;
-       HeapTuple  *rows;
-       PGRUsage        ru0;
-       TimestampTz starttime = 0;
-       Oid                     save_userid;
-       bool            save_secdefcxt;
+       int                     elevel;
+       AcquireSampleRowsFunc acquirefunc = NULL;
+       BlockNumber relpages = 0;
  
-       if (vacstmt->verbose)
+       /* Select logging level */
+       if (options & VACOPT_VERBOSE)
                 elevel = INFO;
         else
                 elevel = DEBUG2;
  
+       /* Set up static variables */
         vac_strategy = bstrategy;
  
         /*
-        * Use the current context for storing analysis info.  vacuum.c ensures
-        * that this context will be cleared when I return, thus releasing the
-        * memory allocated here.
-        */
-       anl_context = CurrentMemoryContext;
-
-       /*
-        * Check for user-requested abort.      Note we want this to be inside a
-        * transaction, so xact.c doesn't issue useless WARNING.
+        * Check for user-requested abort.
          */
         CHECK_FOR_INTERRUPTS();
  
@@ -162,7 +132,19 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt,
          * matter if we ever try to accumulate stats on dead tuples.) If the rel
          * has been dropped since we last saw it, we don't need to process it.
          */
-       onerel = try_relation_open(relid, ShareUpdateExclusiveLock);
+       if (!(options & VACOPT_NOWAIT))
+               onerel = try_relation_open(relid, ShareUpdateExclusiveLock);
+       else if (ConditionalLockRelationOid(relid, ShareUpdateExclusiveLock))
+               onerel = try_relation_open(relid, NoLock);
+       else
+       {
+               onerel = NULL;
+               if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0)
+                       ereport(LOG,
+                                       (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
+                                 errmsg("skipping analyze of \"%s\" --- lock not available",
+                                                relation->relname)));
+       }
         if (!onerel)
                 return;
  
@@ -173,12 +155,12 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt,
                   (pg_database_ownercheck(MyDatabaseId, GetUserId()) && !onerel->rd_rel->relisshared)))
         {
                 /* No need for a WARNING if we already complained during VACUUM */
-               if (!vacstmt->vacuum)
+               if (!(options & VACOPT_VACUUM))
                 {
                         if (onerel->rd_rel->relisshared)
                                 ereport(WARNING,
-                                               (errmsg("skipping \"%s\" --- only superuser can analyze it",
-                                                               RelationGetRelationName(onerel))));
+                                (errmsg("skipping \"%s\" --- only superuser can analyze it",
+                                                RelationGetRelationName(onerel))));
                         else if (onerel->rd_rel->relnamespace == PG_CATALOG_NAMESPACE)
                                 ereport(WARNING,
                                                 (errmsg("skipping \"%s\" --- only superuser or database owner can analyze it",
@@ -192,21 +174,6 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt,
                 return;
         }
  
-       /*
-        * Check that it's a plain table; we used to do this in get_rel_oids() but
-        * seems safer to check after we've locked the relation.
-        */
-       if (onerel->rd_rel->relkind != RELKIND_RELATION)
-       {
-               /* No need for a WARNING if we already complained during VACUUM */
-               if (!vacstmt->vacuum)
-                       ereport(WARNING,
-                                       (errmsg("skipping \"%s\" --- cannot analyze indexes, views, or special system tables",
-                                                       RelationGetRelationName(onerel))));
-               relation_close(onerel, ShareUpdateExclusiveLock);
-               return;
-       }
-
         /*
          * Silently ignore tables that are temp tables of other backends ---
          * trying to analyze these is rather pointless, since their contents are
@@ -228,28 +195,163 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt,
                 return;
         }
  
-       ereport(elevel,
-                       (errmsg("analyzing \"%s.%s\"",
-                                       get_namespace_name(RelationGetNamespace(onerel)),
-                                       RelationGetRelationName(onerel))));
+       /*
+        * Check that it's a plain table, materialized view, or foreign table; we
+        * used to do this in get_rel_oids() but seems safer to check after we've
+        * locked the relation.
+        */
+       if (onerel->rd_rel->relkind == RELKIND_RELATION ||
+               onerel->rd_rel->relkind == RELKIND_MATVIEW)
+       {
+               /* Regular table, so we'll use the regular row acquisition function */
+               acquirefunc = acquire_sample_rows;
+               /* Also get regular table's size */
+               relpages = RelationGetNumberOfBlocks(onerel);
+       }
+       else if (onerel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+       {
+               /*
+                * For a foreign table, call the FDW's hook function to see whether it
+                * supports analysis.
+                */
+               FdwRoutine *fdwroutine;
+               bool            ok = false;
+
+               fdwroutine = GetFdwRoutineForRelation(onerel, false);
+
+               if (fdwroutine->AnalyzeForeignTable != NULL)
+                       ok = fdwroutine->AnalyzeForeignTable(onerel,
+                                                                                                &acquirefunc,
+                                                                                                &relpages);
+
+               if (!ok)
+               {
+                       ereport(WARNING,
+                        (errmsg("skipping \"%s\" --- cannot analyze this foreign table",
+                                        RelationGetRelationName(onerel))));
+                       relation_close(onerel, ShareUpdateExclusiveLock);
+                       return;
+               }
+       }
+       else
+       {
+               /* No need for a WARNING if we already complained during VACUUM */
+               if (!(options & VACOPT_VACUUM))
+                       ereport(WARNING,
+                                       (errmsg("skipping \"%s\" --- cannot analyze non-tables or special system tables",
+                                                       RelationGetRelationName(onerel))));
+               relation_close(onerel, ShareUpdateExclusiveLock);
+               return;
+       }
+
+       /*
+        * OK, let's do it.  First let other backends know I'm in ANALYZE.
+        */
+       LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+       MyPgXact->vacuumFlags |= PROC_IN_ANALYZE;
+       LWLockRelease(ProcArrayLock);
+
+       /*
+        * Do the normal non-recursive ANALYZE.
+        */
+       do_analyze_rel(onerel, options, params, va_cols, acquirefunc, relpages,
+                                  false, in_outer_xact, elevel);
  
         /*
-        * Switch to the table owner's userid, so that any index functions are
-        * run as that user.
+        * If there are child tables, do recursive ANALYZE.
+        */
+       if (onerel->rd_rel->relhassubclass)
+               do_analyze_rel(onerel, options, params, va_cols, acquirefunc, relpages,
+                                          true, in_outer_xact, elevel);
+
+       /*
+        * Close source relation now, but keep lock so that no one deletes it
+        * before we commit.  (If someone did, they'd fail to clean up the entries
+        * we made in pg_statistic.  Also, releasing the lock before commit would
+        * expose us to concurrent-update failures in update_attstats.)
          */
-       GetUserIdAndContext(&save_userid, &save_secdefcxt);
-       SetUserIdAndContext(onerel->rd_rel->relowner, true);
+       relation_close(onerel, NoLock);
  
-       /* let others know what I'm doing */
+       /*
+        * Reset my PGXACT flag.  Note: we need this here, and not in vacuum_rel,
+        * because the vacuum flag is cleared by the end-of-xact code.
+        */
         LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
-       MyProc->vacuumFlags |= PROC_IN_ANALYZE;
+       MyPgXact->vacuumFlags &= ~PROC_IN_ANALYZE;
         LWLockRelease(ProcArrayLock);
+}
+
+/*
+ *     do_analyze_rel() -- analyze one relation, recursively or not
+ *
+ * Note that "acquirefunc" is only relevant for the non-inherited case.
+ * For the inherited case, acquire_inherited_sample_rows() determines the
+ * appropriate acquirefunc for each child table.
+ */
+static void
+do_analyze_rel(Relation onerel, int options, VacuumParams *params,
+                          List *va_cols, AcquireSampleRowsFunc acquirefunc,
+                          BlockNumber relpages, bool inh, bool in_outer_xact,
+                          int elevel)
+{
+       int                     attr_cnt,
+                               tcnt,
+                               i,
+                               ind;
+       Relation   *Irel;
+       int                     nindexes;
+       bool            hasindex;
+       VacAttrStats **vacattrstats;
+       AnlIndexData *indexdata;
+       int                     targrows,
+                               numrows;
+       double          totalrows,
+                               totaldeadrows;
+       HeapTuple  *rows;
+       PGRUsage        ru0;
+       TimestampTz starttime = 0;
+       MemoryContext caller_context;
+       Oid                     save_userid;
+       int                     save_sec_context;
+       int                     save_nestlevel;
+
+       if (inh)
+               ereport(elevel,
+                               (errmsg("analyzing \"%s.%s\" inheritance tree",
+                                               get_namespace_name(RelationGetNamespace(onerel)),
+                                               RelationGetRelationName(onerel))));
+       else
+               ereport(elevel,
+                               (errmsg("analyzing \"%s.%s\"",
+                                               get_namespace_name(RelationGetNamespace(onerel)),
+                                               RelationGetRelationName(onerel))));
+
+       /*
+        * Set up a working context so that we can easily free whatever junk gets
+        * created.
+        */
+       anl_context = AllocSetContextCreate(CurrentMemoryContext,
+                                                                               "Analyze",
+                                                                               ALLOCSET_DEFAULT_MINSIZE,
+                                                                               ALLOCSET_DEFAULT_INITSIZE,
+                                                                               ALLOCSET_DEFAULT_MAXSIZE);
+       caller_context = MemoryContextSwitchTo(anl_context);
+
+       /*
+        * Switch to the table owner's userid, so that any index functions are run
+        * as that user.  Also lock down security-restricted operations and
+        * arrange to make GUC variable changes local to this command.
+        */
+       GetUserIdAndSecContext(&save_userid, &save_sec_context);
+       SetUserIdAndSecContext(onerel->rd_rel->relowner,
+                                                  save_sec_context | SECURITY_RESTRICTED_OPERATION);
+       save_nestlevel = NewGUCNestLevel();
  
         /* measure elapsed time iff autovacuum logging requires it */
-       if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration >= 0)
+       if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0)
         {
                 pg_rusage_init(&ru0);
-               if (Log_autovacuum_min_duration > 0)
+               if (params->log_min_duration > 0)
                         starttime = GetCurrentTimestamp();
         }
  
@@ -258,14 +360,14 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt,
          *
          * Note that system attributes are never analyzed.
          */
-       if (vacstmt->va_cols != NIL)
+       if (va_cols != NIL)
         {
                 ListCell   *le;
  
-               vacattrstats = (VacAttrStats **) palloc(list_length(vacstmt->va_cols) *
+               vacattrstats = (VacAttrStats **) palloc(list_length(va_cols) *
                                                                                                 sizeof(VacAttrStats *));
                 tcnt = 0;
-               foreach(le, vacstmt->va_cols)
+               foreach(le, va_cols)
                 {
                         char       *col = strVal(lfirst(le));
  
@@ -275,7 +377,7 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt,
                                                 (errcode(ERRCODE_UNDEFINED_COLUMN),
                                         errmsg("column \"%s\" of relation \"%s\" does not exist",
                                                    col, RelationGetRelationName(onerel))));
-                       vacattrstats[tcnt] = examine_attribute(onerel, i);
+                       vacattrstats[tcnt] = examine_attribute(onerel, i, NULL);
                         if (vacattrstats[tcnt] != NULL)
                                 tcnt++;
                 }
@@ -289,7 +391,7 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt,
                 tcnt = 0;
                 for (i = 1; i <= attr_cnt; i++)
                 {
-                       vacattrstats[tcnt] = examine_attribute(onerel, i);
+                       vacattrstats[tcnt] = examine_attribute(onerel, i, NULL);
                         if (vacattrstats[tcnt] != NULL)
                                 tcnt++;
                 }
@@ -298,13 +400,20 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt,
  
         /*
          * Open all indexes of the relation, and see if there are any analyzable
-        * columns in the indexes.      We do not analyze index columns if there was
-        * an explicit column list in the ANALYZE command, however.
+        * columns in the indexes.  We do not analyze index columns if there was
+        * an explicit column list in the ANALYZE command, however.  If we are
+        * doing a recursive scan, we don't want to touch the parent's indexes at
+        * all.
          */
-       vac_open_indexes(onerel, AccessShareLock, &nindexes, &Irel);
+       if (!inh)
+               vac_open_indexes(onerel, AccessShareLock, &nindexes, &Irel);
+       else
+       {
+               Irel = NULL;
+               nindexes = 0;
+       }
         hasindex = (nindexes > 0);
         indexdata = NULL;
-       analyzableindex = false;
         if (hasindex)
         {
                 indexdata = (AnlIndexData *) palloc0(nindexes * sizeof(AnlIndexData));
@@ -315,7 +424,7 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt,
  
                         thisdata->indexInfo = indexInfo = BuildIndexInfo(Irel[ind]);
                         thisdata->tupleFract = 1.0; /* fix later if partial */
-                       if (indexInfo->ii_Expressions != NIL && vacstmt->va_cols == NIL)
+                       if (indexInfo->ii_Expressions != NIL && va_cols == NIL)
                         {
                                 ListCell   *indexpr_item = list_head(indexInfo->ii_Expressions);
  
@@ -335,26 +444,10 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt,
                                                         elog(ERROR, "too few entries in indexprs list");
                                                 indexkey = (Node *) lfirst(indexpr_item);
                                                 indexpr_item = lnext(indexpr_item);
-
-                                               /*
-                                                * Can't analyze if the opclass uses a storage type
-                                                * different from the expression result type. We'd get
-                                                * confused because the type shown in pg_attribute for
-                                                * the index column doesn't match what we are getting
-                                                * from the expression. Perhaps this can be fixed
-                                                * someday, but for now, punt.
-                                                */
-                                               if (exprType(indexkey) !=
-                                                       Irel[ind]->rd_att->attrs[i]->atttypid)
-                                                       continue;
-
                                                 thisdata->vacattrstats[tcnt] =
-                                                       examine_attribute(Irel[ind], i + 1);
+                                                       examine_attribute(Irel[ind], i + 1, indexkey);
                                                 if (thisdata->vacattrstats[tcnt] != NULL)
-                                               {
                                                         tcnt++;
-                                                       analyzableindex = true;
-                                               }
                                         }
                                 }
                                 thisdata->attr_cnt = tcnt;
@@ -362,16 +455,11 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt,
                 }
         }
  
-       /*
-        * Quit if no analyzable columns and no pg_class update needed.
-        */
-       if (attr_cnt <= 0 && !analyzableindex && !update_reltuples)
-               goto cleanup;
-
         /*
          * Determine how many rows we need to sample, using the worst case from
-        * all analyzable columns.      We use a lower bound of 100 rows to avoid
-        * possible overflow in Vitter's algorithm.
+        * all analyzable columns.  We use a lower bound of 100 rows to avoid
+        * possible overflow in Vitter's algorithm.  (Note: that will also be the
+        * target in the corner case where there are no analyzable columns.)
          */
         targrows = 100;
         for (i = 0; i < attr_cnt; i++)
@@ -394,11 +482,17 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt,
          * Acquire the sample rows
          */
         rows = (HeapTuple *) palloc(targrows * sizeof(HeapTuple));
-       numrows = acquire_sample_rows(onerel, rows, targrows,
+       if (inh)
+               numrows = acquire_inherited_sample_rows(onerel, elevel,
+                                                                                               rows, targrows,
+                                                                                               &totalrows, &totaldeadrows);
+       else
+               numrows = (*acquirefunc) (onerel, elevel,
+                                                                 rows, targrows,
                                                                   &totalrows, &totaldeadrows);
  
         /*
-        * Compute the statistics.      Temporary results during the calculations for
+        * Compute the statistics.  Temporary results during the calculations for
          * each column are stored in a child context.  The calc routines are
          * responsible to make sure that whatever they store into the VacAttrStats
          * structure is allocated in anl_context.
@@ -418,6 +512,7 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt,
                 for (i = 0; i < attr_cnt; i++)
                 {
                         VacAttrStats *stats = vacattrstats[i];
+                       AttributeOpts *aopt;
  
                         stats->rows = rows;
                         stats->tupDesc = onerel->rd_att;
@@ -425,6 +520,21 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt,
                                                                          std_fetch_func,
                                                                          numrows,
                                                                          totalrows);
+
+                       /*
+                        * If the appropriate flavor of the n_distinct option is
+                        * specified, override with the corresponding value.
+                        */
+                       aopt = get_attribute_options(onerel->rd_id, stats->attr->attnum);
+                       if (aopt != NULL)
+                       {
+                               float8          n_distinct;
+
+                               n_distinct = inh ? aopt->n_distinct_inherited : aopt->n_distinct;
+                               if (n_distinct != 0.0)
+                                       stats->stadistinct = n_distinct;
+                       }
+
                         MemoryContextResetAndDeleteChildren(col_context);
                 }
  
@@ -439,38 +549,41 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt,
  
                 /*
                  * Emit the completed stats rows into pg_statistic, replacing any
-                * previous statistics for the target columns.  (If there are stats in
+                * previous statistics for the target columns.  (If there are stats in
                  * pg_statistic for columns we didn't process, we leave them alone.)
                  */
-               update_attstats(relid, attr_cnt, vacattrstats);
+               update_attstats(RelationGetRelid(onerel), inh,
+                                               attr_cnt, vacattrstats);
  
                 for (ind = 0; ind < nindexes; ind++)
                 {
                         AnlIndexData *thisdata = &indexdata[ind];
  
-                       update_attstats(RelationGetRelid(Irel[ind]),
+                       update_attstats(RelationGetRelid(Irel[ind]), false,
                                                         thisdata->attr_cnt, thisdata->vacattrstats);
                 }
         }
  
         /*
-        * Update pages/tuples stats in pg_class.
+        * Update pages/tuples stats in pg_class ... but not if we're doing
+        * inherited stats.
          */
-       if (update_reltuples)
-       {
+       if (!inh)
                 vac_update_relstats(onerel,
-                                                       RelationGetNumberOfBlocks(onerel),
-                                                       totalrows, hasindex, InvalidTransactionId);
-               /* report results to the stats collector, too */
-               pgstat_report_analyze(onerel, totalrows, totaldeadrows);
-       }
+                                                       relpages,
+                                                       totalrows,
+                                                       visibilitymap_count(onerel),
+                                                       hasindex,
+                                                       InvalidTransactionId,
+                                                       InvalidMultiXactId,
+                                                       in_outer_xact);
  
         /*
          * Same for indexes. Vacuum always scans all indexes, so if we're part of
-        * VACUUM ANALYZE, don't overwrite the accurate count already inserted by 
+        * VACUUM ANALYZE, don't overwrite the accurate count already inserted by
          * VACUUM.
          */
-       if (!vacstmt->vacuum)
+       if (!inh && !(options & VACOPT_VACUUM))
         {
                 for (ind = 0; ind < nindexes; ind++)
                 {
@@ -480,15 +593,25 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt,
                         totalindexrows = ceil(thisdata->tupleFract * totalrows);
                         vac_update_relstats(Irel[ind],
                                                                 RelationGetNumberOfBlocks(Irel[ind]),
-                                                               totalindexrows, false, InvalidTransactionId);
+                                                               totalindexrows,
+                                                               0,
+                                                               false,
+                                                               InvalidTransactionId,
+                                                               InvalidMultiXactId,
+                                                               in_outer_xact);
                 }
         }
  
-       /* We skip to here if there were no analyzable columns */
-cleanup:
+       /*
+        * Report ANALYZE to the stats collector, too.  However, if doing
+        * inherited stats we shouldn't report, because the stats collector only
+        * tracks per-table stats.
+        */
+       if (!inh)
+               pgstat_report_analyze(onerel, totalrows, totaldeadrows);
  
         /* If this isn't part of VACUUM ANALYZE, let index AMs do cleanup */
-       if (!vacstmt->vacuum)
+       if (!(options & VACOPT_VACUUM))
         {
                 for (ind = 0; ind < nindexes; ind++)
                 {
@@ -496,7 +619,6 @@ cleanup:
                         IndexVacuumInfo ivinfo;
  
                         ivinfo.index = Irel[ind];
-                       ivinfo.vacuum_full = false;
                         ivinfo.analyze_only = true;
                         ivinfo.estimated_count = true;
                         ivinfo.message_level = elevel;
@@ -513,20 +635,12 @@ cleanup:
         /* Done with indexes */
         vac_close_indexes(nindexes, Irel, NoLock);
  
-       /*
-        * Close source relation now, but keep lock so that no one deletes it
-        * before we commit.  (If someone did, they'd fail to clean up the entries
-        * we made in pg_statistic.  Also, releasing the lock before commit would
-        * expose us to concurrent-update failures in update_attstats.)
-        */
-       relation_close(onerel, NoLock);
-
         /* Log the action if appropriate */
-       if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration >= 0)
+       if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0)
         {
-               if (Log_autovacuum_min_duration == 0 ||
+               if (params->log_min_duration == 0 ||
                         TimestampDifferenceExceeds(starttime, GetCurrentTimestamp(),
-                                                                          Log_autovacuum_min_duration))
+                                                                          params->log_min_duration))
                         ereport(LOG,
                                         (errmsg("automatic analyze of table \"%s.%s.%s\" system usage: %s",
                                                         get_database_name(MyDatabaseId),
@@ -535,16 +649,16 @@ cleanup:
                                                         pg_rusage_show(&ru0))));
         }
  
-       /*
-        * Reset my PGPROC flag.  Note: we need this here, and not in vacuum_rel,
-        * because the vacuum flag is cleared by the end-of-xact code.
-        */
-       LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
-       MyProc->vacuumFlags &= ~PROC_IN_ANALYZE;
-       LWLockRelease(ProcArrayLock);
+       /* Roll back any GUC changes executed by index functions */
+       AtEOXact_GUC(false, save_nestlevel);
  
-       /* Restore userid */
-       SetUserIdAndContext(save_userid, save_secdefcxt);
+       /* Restore userid and security context */
+       SetUserIdAndSecContext(save_userid, save_sec_context);
+
+       /* Restore current context and release memory */
+       MemoryContextSwitchTo(caller_context);
+       MemoryContextDelete(anl_context);
+       anl_context = NULL;
  }
  
  /*
@@ -617,6 +731,14 @@ compute_index_stats(Relation onerel, double totalrows,
                 {
                         HeapTuple       heapTuple = rows[rowno];
  
+                       vacuum_delay_point();
+
+                       /*
+                        * Reset the per-tuple context each time, to reclaim any cruft
+                        * left behind by evaluating the predicate or index expressions.
+                        */
+                       ResetExprContext(econtext);
+
                         /* Set up for predicate or expression evaluation */
                         ExecStoreTuple(heapTuple, slot, InvalidBuffer, false);
  
@@ -641,15 +763,26 @@ compute_index_stats(Relation onerel, double totalrows,
                                                            isnull);
  
                                 /*
-                                * Save just the columns we care about.
+                                * Save just the columns we care about.  We copy the values
+                                * into ind_context from the estate's per-tuple context.
                                  */
                                 for (i = 0; i < attr_cnt; i++)
                                 {
                                         VacAttrStats *stats = thisdata->vacattrstats[i];
                                         int                     attnum = stats->attr->attnum;
  
-                                       exprvals[tcnt] = values[attnum - 1];
-                                       exprnulls[tcnt] = isnull[attnum - 1];
+                                       if (isnull[attnum - 1])
+                                       {
+                                               exprvals[tcnt] = (Datum) 0;
+                                               exprnulls[tcnt] = true;
+                                       }
+                                       else
+                                       {
+                                               exprvals[tcnt] = datumCopy(values[attnum - 1],
+                                                                                                  stats->attrtype->typbyval,
+                                                                                                  stats->attrtype->typlen);
+                                               exprnulls[tcnt] = false;
+                                       }
                                         tcnt++;
                                 }
                         }
@@ -671,6 +804,9 @@ compute_index_stats(Relation onerel, double totalrows,
                         for (i = 0; i < attr_cnt; i++)
                         {
                                 VacAttrStats *stats = thisdata->vacattrstats[i];
+                               AttributeOpts *aopt =
+                               get_attribute_options(stats->attr->attrelid,
+                                                                         stats->attr->attnum);
  
                                 stats->exprvals = exprvals + i;
                                 stats->exprnulls = exprnulls + i;
@@ -679,6 +815,15 @@ compute_index_stats(Relation onerel, double totalrows,
                                                                                  ind_fetch_func,
                                                                                  numindexrows,
                                                                                  totalindexrows);
+
+                               /*
+                                * If the n_distinct option is specified, it overrides the
+                                * above computation.  For indices, we always use just
+                                * n_distinct, not n_distinct_inherited.
+                                */
+                               if (aopt != NULL && aopt->n_distinct != 0.0)
+                                       stats->stadistinct = aopt->n_distinct;
+
                                 MemoryContextResetAndDeleteChildren(col_context);
                         }
                 }
@@ -700,9 +845,12 @@ compute_index_stats(Relation onerel, double totalrows,
   *
   * Determine whether the column is analyzable; if so, create and initialize
   * a VacAttrStats struct for it.  If not, return NULL.
+ *
+ * If index_expr isn't NULL, then we're trying to analyze an expression index,
+ * and index_expr is the expression tree representing the column's data.
   */
  static VacAttrStats *
-examine_attribute(Relation onerel, int attnum)
+examine_attribute(Relation onerel, int attnum, Node *index_expr)
  {
         Form_pg_attribute attr = onerel->rd_att->attrs[attnum - 1];
         HeapTuple       typtuple;
@@ -719,39 +867,56 @@ examine_attribute(Relation onerel, int attnum)
                 return NULL;
  
         /*
-        * Create the VacAttrStats struct.  Note that we only have a copy of
-        * the fixed fields of the pg_attribute tuple.
+        * Create the VacAttrStats struct.  Note that we only have a copy of the
+        * fixed fields of the pg_attribute tuple.
          */
         stats = (VacAttrStats *) palloc0(sizeof(VacAttrStats));
         stats->attr = (Form_pg_attribute) palloc(ATTRIBUTE_FIXED_PART_SIZE);
         memcpy(stats->attr, attr, ATTRIBUTE_FIXED_PART_SIZE);
-       typtuple = SearchSysCache(TYPEOID,
-                                                         ObjectIdGetDatum(attr->atttypid),
-                                                         0, 0, 0);
+
+       /*
+        * When analyzing an expression index, believe the expression tree's type
+        * not the column datatype --- the latter might be the opckeytype storage
+        * type of the opclass, which is not interesting for our purposes.  (Note:
+        * if we did anything with non-expression index columns, we'd need to
+        * figure out where to get the correct type info from, but for now that's
+        * not a problem.)      It's not clear whether anyone will care about the
+        * typmod, but we store that too just in case.
+        */
+       if (index_expr)
+       {
+               stats->attrtypid = exprType(index_expr);
+               stats->attrtypmod = exprTypmod(index_expr);
+       }
+       else
+       {
+               stats->attrtypid = attr->atttypid;
+               stats->attrtypmod = attr->atttypmod;
+       }
+
+       typtuple = SearchSysCacheCopy1(TYPEOID,
+                                                                  ObjectIdGetDatum(stats->attrtypid));
         if (!HeapTupleIsValid(typtuple))
-               elog(ERROR, "cache lookup failed for type %u", attr->atttypid);
-       stats->attrtype = (Form_pg_type) palloc(sizeof(FormData_pg_type));
-       memcpy(stats->attrtype, GETSTRUCT(typtuple), sizeof(FormData_pg_type));
-       ReleaseSysCache(typtuple);
+               elog(ERROR, "cache lookup failed for type %u", stats->attrtypid);
+       stats->attrtype = (Form_pg_type) GETSTRUCT(typtuple);
         stats->anl_context = anl_context;
         stats->tupattnum = attnum;
  
         /*
-        * The fields describing the stats->stavalues[n] element types default
-        * to the type of the field being analyzed, but the type-specific
-        * typanalyze function can change them if it wants to store something
-        * else.
+        * The fields describing the stats->stavalues[n] element types default to
+        * the type of the data being analyzed, but the type-specific typanalyze
+        * function can change them if it wants to store something else.
          */
         for (i = 0; i < STATISTIC_NUM_SLOTS; i++)
         {
-               stats->statypid[i] = stats->attr->atttypid;
+               stats->statypid[i] = stats->attrtypid;
                 stats->statyplen[i] = stats->attrtype->typlen;
                 stats->statypbyval[i] = stats->attrtype->typbyval;
                 stats->statypalign[i] = stats->attrtype->typalign;
         }
  
         /*
-        * Call the type-specific typanalyze function.  If none is specified, use
+        * Call the type-specific typanalyze function.  If none is specified, use
          * std_typanalyze().
          */
         if (OidIsValid(stats->attrtype->typanalyze))
@@ -762,7 +927,7 @@ examine_attribute(Relation onerel, int attnum)
  
         if (!ok || stats->compute_stats == NULL || stats->minrows <= 0)
         {
-               pfree(stats->attrtype);
+               heap_freetuple(typtuple);
                 pfree(stats->attr);
                 pfree(stats);
                 return NULL;
@@ -772,95 +937,16 @@ examine_attribute(Relation onerel, int attnum)
  }
  
  /*
- * BlockSampler_Init -- prepare for random sampling of blocknumbers
+ * acquire_sample_rows -- acquire a random sample of rows from the table
   *
- * BlockSampler is used for stage one of our new two-stage tuple
- * sampling mechanism as discussed on pgsql-hackers 2004-04-02 (subject
- * "Large DB").  It selects a random sample of samplesize blocks out of
- * the nblocks blocks in the table.  If the table has less than
- * samplesize blocks, all blocks are selected.
+ * Selected rows are returned in the caller-allocated array rows[], which
+ * must have at least targrows entries.
+ * The actual number of rows selected is returned as the function result.
+ * We also estimate the total numbers of live and dead rows in the table,
+ * and return them into *totalrows and *totaldeadrows, respectively.
   *
- * Since we know the total number of blocks in advance, we can use the
- * straightforward Algorithm S from Knuth 3.4.2, rather than Vitter's
- * algorithm.
- */
-static void
-BlockSampler_Init(BlockSampler bs, BlockNumber nblocks, int samplesize)
-{
-       bs->N = nblocks;                        /* measured table size */
-
-       /*
-        * If we decide to reduce samplesize for tables that have less or not much
-        * more than samplesize blocks, here is the place to do it.
-        */
-       bs->n = samplesize;
-       bs->t = 0;                                      /* blocks scanned so far */
-       bs->m = 0;                                      /* blocks selected so far */
-}
-
-static bool
-BlockSampler_HasMore(BlockSampler bs)
-{
-       return (bs->t < bs->N) && (bs->m < bs->n);
-}
-
-static BlockNumber
-BlockSampler_Next(BlockSampler bs)
-{
-       BlockNumber K = bs->N - bs->t;          /* remaining blocks */
-       int                     k = bs->n - bs->m;              /* blocks still to sample */
-       double          p;                              /* probability to skip block */
-       double          V;                              /* random */
-
-       Assert(BlockSampler_HasMore(bs));       /* hence K > 0 and k > 0 */
-
-       if ((BlockNumber) k >= K)
-       {
-               /* need all the rest */
-               bs->m++;
-               return bs->t++;
-       }
-
-       /*----------
-        * It is not obvious that this code matches Knuth's Algorithm S.
-        * Knuth says to skip the current block with probability 1 - k/K.
-        * If we are to skip, we should advance t (hence decrease K), and
-        * repeat the same probabilistic test for the next block.  The naive
-        * implementation thus requires a random_fract() call for each block
-        * number.      But we can reduce this to one random_fract() call per
-        * selected block, by noting that each time the while-test succeeds,
-        * we can reinterpret V as a uniform random number in the range 0 to p.
-        * Therefore, instead of choosing a new V, we just adjust p to be
-        * the appropriate fraction of its former value, and our next loop
-        * makes the appropriate probabilistic test.
-        *
-        * We have initially K > k > 0.  If the loop reduces K to equal k,
-        * the next while-test must fail since p will become exactly zero
-        * (we assume there will not be roundoff error in the division).
-        * (Note: Knuth suggests a "<=" loop condition, but we use "<" just
-        * to be doubly sure about roundoff error.)  Therefore K cannot become
-        * less than k, which means that we cannot fail to select enough blocks.
-        *----------
-        */
-       V = random_fract();
-       p = 1.0 - (double) k / (double) K;
-       while (V < p)
-       {
-               /* skip */
-               bs->t++;
-               K--;                                    /* keep K == N - t */
-
-               /* adjust p to be new cutoff point in reduced range */
-               p *= 1.0 - (double) k / (double) K;
-       }
-
-       /* select */
-       bs->m++;
-       return bs->t++;
-}
-
-/*
- * acquire_sample_rows -- acquire a random sample of rows from the table
+ * The returned list of tuples is in order by physical position in the table.
+ * (We will rely on this later to derive correlation estimates.)
   *
   * As of May 2004 we use a new two-stage method:  Stage one selects up
   * to targrows random blocks (or all blocks, if there aren't so many).
@@ -877,43 +963,38 @@ BlockSampler_Next(BlockSampler bs)
   * the number of different blocks represented by the sample tends to be
   * too small.  We can live with that for now.  Improvements are welcome.
   *
- * We also estimate the total numbers of live and dead rows in the table,
- * and return them into *totalrows and *totaldeadrows, respectively.
- *
   * An important property of this sampling method is that because we do
   * look at a statistically unbiased set of blocks, we should get
   * unbiased estimates of the average numbers of live and dead rows per
   * block.  The previous sampling method put too much credence in the row
   * density near the start of the table.
- *
- * The returned list of tuples is in order by physical position in the table.
- * (We will rely on this later to derive correlation estimates.)
   */
  static int
-acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows,
+acquire_sample_rows(Relation onerel, int elevel,
+                                       HeapTuple *rows, int targrows,
                                         double *totalrows, double *totaldeadrows)
  {
         int                     numrows = 0;    /* # rows now in reservoir */
-       double          samplerows = 0; /* total # rows collected */
+       double          samplerows = 0; /* total # rows collected */
         double          liverows = 0;   /* # live rows seen */
         double          deadrows = 0;   /* # dead rows seen */
         double          rowstoskip = -1;        /* -1 means not set yet */
         BlockNumber totalblocks;
         TransactionId OldestXmin;
         BlockSamplerData bs;
-       double          rstate;
+       ReservoirStateData rstate;
  
-       Assert(targrows > 1);
+       Assert(targrows > 0);
  
         totalblocks = RelationGetNumberOfBlocks(onerel);
  
         /* Need a cutoff xmin for HeapTupleSatisfiesVacuum */
-       OldestXmin = GetOldestXmin(onerel->rd_rel->relisshared, true);
+       OldestXmin = GetOldestXmin(onerel, true);
  
         /* Prepare for sampling block numbers */
-       BlockSampler_Init(&bs, totalblocks, targrows);
+       BlockSampler_Init(&bs, totalblocks, targrows, random());
         /* Prepare for sampling rows */
-       rstate = init_selection_state(targrows);
+       reservoir_init_selection_state(&rstate, targrows);
  
         /* Outer loop over blocks to sample */
         while (BlockSampler_HasMore(&bs))
@@ -931,9 +1012,9 @@ acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows,
                  * the maxoffset value stays good (else concurrent VACUUM might delete
                  * tuples out from under us).  Hence, pin the page until we are done
                  * looking at it.  We also choose to hold sharelock on the buffer
-                * throughout --- we could release and re-acquire sharelock for
-                * each tuple, but since we aren't doing much work per tuple, the
-                * extra lock traffic is probably better avoided.
+                * throughout --- we could release and re-acquire sharelock for each
+                * tuple, but since we aren't doing much work per tuple, the extra
+                * lock traffic is probably better avoided.
                  */
                 targbuffer = ReadBufferExtended(onerel, MAIN_FORKNUM, targblock,
                                                                                 RBM_NORMAL, vac_strategy);
@@ -952,9 +1033,9 @@ acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows,
  
                         /*
                          * We ignore unused and redirect line pointers.  DEAD line
-                        * pointers should be counted as dead, because we need vacuum
-                        * to run to get rid of them.  Note that this rule agrees with
-                        * the way that heap_page_prune() counts things.
+                        * pointers should be counted as dead, because we need vacuum to
+                        * run to get rid of them.  Note that this rule agrees with the
+                        * way that heap_page_prune() counts things.
                          */
                         if (!ItemIdIsNormal(itemid))
                         {
@@ -965,10 +1046,11 @@ acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows,
  
                         ItemPointerSet(&targtuple.t_self, targblock, targoffset);
  
+                       targtuple.t_tableOid = RelationGetRelid(onerel);
                         targtuple.t_data = (HeapTupleHeader) PageGetItem(targpage, itemid);
                         targtuple.t_len = ItemIdGetLength(itemid);
  
-                       switch (HeapTupleSatisfiesVacuum(targtuple.t_data,
+                       switch (HeapTupleSatisfiesVacuum(&targtuple,
                                                                                          OldestXmin,
                                                                                          targbuffer))
                         {
@@ -984,6 +1066,7 @@ acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows,
                                         break;
  
                                 case HEAPTUPLE_INSERT_IN_PROGRESS:
+
                                         /*
                                          * Insert-in-progress rows are not counted.  We assume
                                          * that when the inserting transaction commits or aborts,
@@ -991,17 +1074,17 @@ acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows,
                                          * count.  This works right only if that transaction ends
                                          * after we finish analyzing the table; if things happen
                                          * in the other order, its stats update will be
-                                        * overwritten by ours.  However, the error will be
-                                        * large only if the other transaction runs long enough
-                                        * to insert many tuples, so assuming it will finish
-                                        * after us is the safer option.
+                                        * overwritten by ours.  However, the error will be large
+                                        * only if the other transaction runs long enough to
+                                        * insert many tuples, so assuming it will finish after us
+                                        * is the safer option.
                                          *
                                          * A special case is that the inserting transaction might
                                          * be our own.  In this case we should count and sample
                                          * the row, to accommodate users who load a table and
                                          * analyze it in one transaction.  (pgstat_report_analyze
-                                        * has to adjust the numbers we send to the stats collector
-                                        * to make this come out right.)
+                                        * has to adjust the numbers we send to the stats
+                                        * collector to make this come out right.)
                                          */
                                         if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(targtuple.t_data)))
                                         {
@@ -1011,6 +1094,7 @@ acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows,
                                         break;
  
                                 case HEAPTUPLE_DELETE_IN_PROGRESS:
+
                                         /*
                                          * We count delete-in-progress rows as still live, using
                                          * the same reasoning given above; but we don't bother to
@@ -1019,10 +1103,10 @@ acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows,
                                          * If the delete was done by our own transaction, however,
                                          * we must count the row as dead to make
                                          * pgstat_report_analyze's stats adjustments come out
-                                        * right.  (Note: this works out properly when the row
-                                        * was both inserted and deleted in our xact.)
+                                        * right.  (Note: this works out properly when the row was
+                                        * both inserted and deleted in our xact.)
                                          */
-                                       if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmax(targtuple.t_data)))
+                                       if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(targtuple.t_data)))
                                                 deadrows += 1;
                                         else
                                                 liverows += 1;
@@ -1038,7 +1122,7 @@ acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows,
                                 /*
                                  * The first targrows sample rows are simply copied into the
                                  * reservoir. Then we start replacing tuples in the sample
-                                * until we reach the end of the relation.      This algorithm is
+                                * until we reach the end of the relation.  This algorithm is
                                  * from Jeff Vitter's paper (see full citation below). It
                                  * works by repeatedly computing the number of tuples to skip
                                  * before selecting a tuple, which replaces a randomly chosen
@@ -1054,11 +1138,11 @@ acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows,
                                         /*
                                          * t in Vitter's paper is the number of records already
                                          * processed.  If we need to compute a new S value, we
-                                        * must use the not-yet-incremented value of samplerows
-                                        * as t.
+                                        * must use the not-yet-incremented value of samplerows as
+                                        * t.
                                          */
                                         if (rowstoskip < 0)
-                                               rowstoskip = get_next_S(samplerows, targrows, &rstate);
+                                               rowstoskip = reservoir_get_next_S(&rstate, samplerows, targrows);
  
                                         if (rowstoskip <= 0)
                                         {
@@ -1066,7 +1150,7 @@ acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows,
                                                  * Found a suitable tuple, so save it, replacing one
                                                  * old tuple at random
                                                  */
-                                               int                     k = (int) (targrows * random_fract());
+                                               int                     k = (int) (targrows * sampler_random_fract(rstate.randstate));
  
                                                 Assert(k >= 0 && k < targrows);
                                                 heap_freetuple(rows[k]);
@@ -1096,18 +1180,19 @@ acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows,
                 qsort((void *) rows, numrows, sizeof(HeapTuple), compare_rows);
  
         /*
-        * Estimate total numbers of rows in relation.
+        * Estimate total numbers of rows in relation.  For live rows, use
+        * vac_estimate_reltuples; for dead rows, we have no source of old
+        * information, so we have to assume the density is the same in unseen
+        * pages as in the pages we scanned.
          */
+       *totalrows = vac_estimate_reltuples(onerel, true,
+                                                                               totalblocks,
+                                                                               bs.m,
+                                                                               liverows);
         if (bs.m > 0)
-       {
-               *totalrows = floor((liverows * totalblocks) / bs.m + 0.5);
-               *totaldeadrows = floor((deadrows * totalblocks) / bs.m + 0.5);
-       }
+               *totaldeadrows = floor((deadrows / bs.m) * totalblocks + 0.5);
         else
-       {
-               *totalrows = 0.0;
                 *totaldeadrows = 0.0;
-       }
  
         /*
          * Emit some interesting relation info
@@ -1124,138 +1209,243 @@ acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows,
         return numrows;
  }
  
-/* Select a random value R uniformly distributed in (0 - 1) */
-static double
-random_fract(void)
+/*
+ * qsort comparator for sorting rows[] array
+ */
+static int
+compare_rows(const void *a, const void *b)
  {
-       return ((double) random() + 1) / ((double) MAX_RANDOM_VALUE + 2);
+       HeapTuple       ha = *(const HeapTuple *) a;
+       HeapTuple       hb = *(const HeapTuple *) b;
+       BlockNumber ba = ItemPointerGetBlockNumber(&ha->t_self);
+       OffsetNumber oa = ItemPointerGetOffsetNumber(&ha->t_self);
+       BlockNumber bb = ItemPointerGetBlockNumber(&hb->t_self);
+       OffsetNumber ob = ItemPointerGetOffsetNumber(&hb->t_self);
+
+       if (ba < bb)
+               return -1;
+       if (ba > bb)
+               return 1;
+       if (oa < ob)
+               return -1;
+       if (oa > ob)
+               return 1;
+       return 0;
  }
  
+
  /*
- * These two routines embody Algorithm Z from "Random sampling with a
- * reservoir" by Jeffrey S. Vitter, in ACM Trans. Math. Softw. 11, 1
- * (Mar. 1985), Pages 37-57.  Vitter describes his algorithm in terms
- * of the count S of records to skip before processing another record.
- * It is computed primarily based on t, the number of records already read.
- * The only extra state needed between calls is W, a random state variable.
+ * acquire_inherited_sample_rows -- acquire sample rows from inheritance tree
   *
- * init_selection_state computes the initial W value.
- *
- * Given that we've already read t records (t >= n), get_next_S
- * determines the number of records to skip before the next record is
- * processed.
+ * This has the same API as acquire_sample_rows, except that rows are
+ * collected from all inheritance children as well as the specified table.
+ * We fail and return zero if there are no inheritance children, or if all
+ * children are foreign tables that don't support ANALYZE.
   */
-static double
-init_selection_state(int n)
+static int
+acquire_inherited_sample_rows(Relation onerel, int elevel,
+                                                         HeapTuple *rows, int targrows,
+                                                         double *totalrows, double *totaldeadrows)
  {
-       /* Initial value of W (for use when Algorithm Z is first applied) */
-       return exp(-log(random_fract()) / n);
-}
+       List       *tableOIDs;
+       Relation   *rels;
+       AcquireSampleRowsFunc *acquirefuncs;
+       double     *relblocks;
+       double          totalblocks;
+       int                     numrows,
+                               nrels,
+                               i;
+       ListCell   *lc;
  
-static double
-get_next_S(double t, int n, double *stateptr)
-{
-       double          S;
+       /*
+        * Find all members of inheritance set.  We only need AccessShareLock on
+        * the children.
+        */
+       tableOIDs =
+               find_all_inheritors(RelationGetRelid(onerel), AccessShareLock, NULL);
  
-       /* The magic constant here is T from Vitter's paper */
-       if (t <= (22.0 * n))
+       /*
+        * Check that there's at least one descendant, else fail.  This could
+        * happen despite analyze_rel's relhassubclass check, if table once had a
+        * child but no longer does.  In that case, we can clear the
+        * relhassubclass field so as not to make the same mistake again later.
+        * (This is safe because we hold ShareUpdateExclusiveLock.)
+        */
+       if (list_length(tableOIDs) < 2)
         {
-               /* Process records using Algorithm X until t is large enough */
-               double          V,
-                                       quot;
-
-               V = random_fract();             /* Generate V */
-               S = 0;
-               t += 1;
-               /* Note: "num" in Vitter's code is always equal to t - n */
-               quot = (t - (double) n) / t;
-               /* Find min S satisfying (4.1) */
-               while (quot > V)
-               {
-                       S += 1;
-                       t += 1;
-                       quot *= (t - (double) n) / t;
-               }
+               /* CCI because we already updated the pg_class row in this command */
+               CommandCounterIncrement();
+               SetRelationHasSubclass(RelationGetRelid(onerel), false);
+               ereport(elevel,
+                               (errmsg("skipping analyze of \"%s.%s\" inheritance tree --- this inheritance tree contains no child tables",
+                                               get_namespace_name(RelationGetNamespace(onerel)),
+                                               RelationGetRelationName(onerel))));
+               return 0;
         }
-       else
+
+       /*
+        * Identify acquirefuncs to use, and count blocks in all the relations.
+        * The result could overflow BlockNumber, so we use double arithmetic.
+        */
+       rels = (Relation *) palloc(list_length(tableOIDs) * sizeof(Relation));
+       acquirefuncs = (AcquireSampleRowsFunc *)
+               palloc(list_length(tableOIDs) * sizeof(AcquireSampleRowsFunc));
+       relblocks = (double *) palloc(list_length(tableOIDs) * sizeof(double));
+       totalblocks = 0;
+       nrels = 0;
+       foreach(lc, tableOIDs)
         {
-               /* Now apply Algorithm Z */
-               double          W = *stateptr;
-               double          term = t - (double) n + 1;
+               Oid                     childOID = lfirst_oid(lc);
+               Relation        childrel;
+               AcquireSampleRowsFunc acquirefunc = NULL;
+               BlockNumber relpages = 0;
+
+               /* We already got the needed lock */
+               childrel = heap_open(childOID, NoLock);
  
-               for (;;)
+               /* Ignore if temp table of another backend */
+               if (RELATION_IS_OTHER_TEMP(childrel))
                 {
-                       double          numer,
-                                               numer_lim,
-                                               denom;
-                       double          U,
-                                               X,
-                                               lhs,
-                                               rhs,
-                                               y,
-                                               tmp;
-
-                       /* Generate U and X */
-                       U = random_fract();
-                       X = t * (W - 1.0);
-                       S = floor(X);           /* S is tentatively set to floor(X) */
-                       /* Test if U <= h(S)/cg(X) in the manner of (6.3) */
-                       tmp = (t + 1) / term;
-                       lhs = exp(log(((U * tmp * tmp) * (term + S)) / (t + X)) / n);
-                       rhs = (((t + X) / (term + S)) * term) / t;
-                       if (lhs <= rhs)
-                       {
-                               W = rhs / lhs;
-                               break;
-                       }
-                       /* Test if U <= f(S)/cg(X) */
-                       y = (((U * (t + 1)) / term) * (t + S + 1)) / (t + X);
-                       if ((double) n < S)
-                       {
-                               denom = t;
-                               numer_lim = term + S;
-                       }
-                       else
+                       /* ... but release the lock on it */
+                       Assert(childrel != onerel);
+                       heap_close(childrel, AccessShareLock);
+                       continue;
+               }
+
+               /* Check table type (MATVIEW can't happen, but might as well allow) */
+               if (childrel->rd_rel->relkind == RELKIND_RELATION ||
+                       childrel->rd_rel->relkind == RELKIND_MATVIEW)
+               {
+                       /* Regular table, so use the regular row acquisition function */
+                       acquirefunc = acquire_sample_rows;
+                       relpages = RelationGetNumberOfBlocks(childrel);
+               }
+               else if (childrel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+               {
+                       /*
+                        * For a foreign table, call the FDW's hook function to see
+                        * whether it supports analysis.
+                        */
+                       FdwRoutine *fdwroutine;
+                       bool            ok = false;
+
+                       fdwroutine = GetFdwRoutineForRelation(childrel, false);
+
+                       if (fdwroutine->AnalyzeForeignTable != NULL)
+                               ok = fdwroutine->AnalyzeForeignTable(childrel,
+                                                                                                        &acquirefunc,
+                                                                                                        &relpages);
+
+                       if (!ok)
                         {
-                               denom = t - (double) n + S;
-                               numer_lim = t + 1;
+                               /* ignore, but release the lock on it */
+                               Assert(childrel != onerel);
+                               heap_close(childrel, AccessShareLock);
+                               continue;
                         }
-                       for (numer = t + S; numer >= numer_lim; numer -= 1)
+               }
+               else
+               {
+                       /* ignore, but release the lock on it */
+                       Assert(childrel != onerel);
+                       heap_close(childrel, AccessShareLock);
+                       continue;
+               }
+
+               /* OK, we'll process this child */
+               rels[nrels] = childrel;
+               acquirefuncs[nrels] = acquirefunc;
+               relblocks[nrels] = (double) relpages;
+               totalblocks += (double) relpages;
+               nrels++;
+       }
+
+       /*
+        * If we don't have at least two tables to consider, fail.
+        */
+       if (nrels < 2)
+       {
+               ereport(elevel,
+                               (errmsg("skipping analyze of \"%s.%s\" inheritance tree --- this inheritance tree contains no analyzable child tables",
+                                               get_namespace_name(RelationGetNamespace(onerel)),
+                                               RelationGetRelationName(onerel))));
+               return 0;
+       }
+
+       /*
+        * Now sample rows from each relation, proportionally to its fraction of
+        * the total block count.  (This might be less than desirable if the child
+        * rels have radically different free-space percentages, but it's not
+        * clear that it's worth working harder.)
+        */
+       numrows = 0;
+       *totalrows = 0;
+       *totaldeadrows = 0;
+       for (i = 0; i < nrels; i++)
+       {
+               Relation        childrel = rels[i];
+               AcquireSampleRowsFunc acquirefunc = acquirefuncs[i];
+               double          childblocks = relblocks[i];
+
+               if (childblocks > 0)
+               {
+                       int                     childtargrows;
+
+                       childtargrows = (int) rint(targrows * childblocks / totalblocks);
+                       /* Make sure we don't overrun due to roundoff error */
+                       childtargrows = Min(childtargrows, targrows - numrows);
+                       if (childtargrows > 0)
                         {
-                               y *= numer / denom;
-                               denom -= 1;
+                               int                     childrows;
+                               double          trows,
+                                                       tdrows;
+
+                               /* Fetch a random sample of the child's rows */
+                               childrows = (*acquirefunc) (childrel, elevel,
+                                                                                       rows + numrows, childtargrows,
+                                                                                       &trows, &tdrows);
+
+                               /* We may need to convert from child's rowtype to parent's */
+                               if (childrows > 0 &&
+                                       !equalTupleDescs(RelationGetDescr(childrel),
+                                                                        RelationGetDescr(onerel)))
+                               {
+                                       TupleConversionMap *map;
+
+                                       map = convert_tuples_by_name(RelationGetDescr(childrel),
+                                                                                                RelationGetDescr(onerel),
+                                                                gettext_noop("could not convert row type"));
+                                       if (map != NULL)
+                                       {
+                                               int                     j;
+
+                                               for (j = 0; j < childrows; j++)
+                                               {
+                                                       HeapTuple       newtup;
+
+                                                       newtup = do_convert_tuple(rows[numrows + j], map);
+                                                       heap_freetuple(rows[numrows + j]);
+                                                       rows[numrows + j] = newtup;
+                                               }
+                                               free_conversion_map(map);
+                                       }
+                               }
+
+                               /* And add to counts */
+                               numrows += childrows;
+                               *totalrows += trows;
+                               *totaldeadrows += tdrows;
                         }
-                       W = exp(-log(random_fract()) / n);      /* Generate W in advance */
-                       if (exp(log(y) / n) <= (t + X) / t)
-                               break;
                 }
-               *stateptr = W;
-       }
-       return S;
-}
  
-/*
- * qsort comparator for sorting rows[] array
- */
-static int
-compare_rows(const void *a, const void *b)
-{
-       HeapTuple       ha = *(HeapTuple *) a;
-       HeapTuple       hb = *(HeapTuple *) b;
-       BlockNumber ba = ItemPointerGetBlockNumber(&ha->t_self);
-       OffsetNumber oa = ItemPointerGetOffsetNumber(&ha->t_self);
-       BlockNumber bb = ItemPointerGetBlockNumber(&hb->t_self);
-       OffsetNumber ob = ItemPointerGetOffsetNumber(&hb->t_self);
+               /*
+                * Note: we cannot release the child-table locks, since we may have
+                * pointers to their TOAST tables in the sampled rows.
+                */
+               heap_close(childrel, NoLock);
+       }
  
-       if (ba < bb)
-               return -1;
-       if (ba > bb)
-               return 1;
-       if (oa < ob)
-               return -1;
-       if (oa > ob)
-               return 1;
-       return 0;
+       return numrows;
  }
  
  
@@ -1265,7 +1455,7 @@ compare_rows(const void *a, const void *b)
   *             Statistics are stored in several places: the pg_class row for the
   *             relation has stats about the whole relation, and there is a
   *             pg_statistic row for each (non-system) attribute that has ever
- *             been analyzed.  The pg_class values are updated by VACUUM, not here.
+ *             been analyzed.  The pg_class values are updated by VACUUM, not here.
   *
   *             pg_statistic rows are just added or updated normally.  This means
   *             that pg_statistic will probably contain some deleted rows at the
@@ -1282,7 +1472,7 @@ compare_rows(const void *a, const void *b)
   *             by taking a self-exclusive lock on the relation in analyze_rel().
   */
  static void
-update_attstats(Oid relid, int natts, VacAttrStats **vacattrstats)
+update_attstats(Oid relid, bool inh, int natts, VacAttrStats **vacattrstats)
  {
         Relation        sd;
         int                     attno;
@@ -1317,20 +1507,23 @@ update_attstats(Oid relid, int natts, VacAttrStats **vacattrstats)
                         replaces[i] = true;
                 }
  
-               i = 0;
-               values[i++] = ObjectIdGetDatum(relid);  /* starelid */
-               values[i++] = Int16GetDatum(stats->attr->attnum);               /* staattnum */
-               values[i++] = Float4GetDatum(stats->stanullfrac);               /* stanullfrac */
-               values[i++] = Int32GetDatum(stats->stawidth);   /* stawidth */
-               values[i++] = Float4GetDatum(stats->stadistinct);               /* stadistinct */
+               values[Anum_pg_statistic_starelid - 1] = ObjectIdGetDatum(relid);
+               values[Anum_pg_statistic_staattnum - 1] = Int16GetDatum(stats->attr->attnum);
+               values[Anum_pg_statistic_stainherit - 1] = BoolGetDatum(inh);
+               values[Anum_pg_statistic_stanullfrac - 1] = Float4GetDatum(stats->stanullfrac);
+               values[Anum_pg_statistic_stawidth - 1] = Int32GetDatum(stats->stawidth);
+               values[Anum_pg_statistic_stadistinct - 1] = Float4GetDatum(stats->stadistinct);
+               i = Anum_pg_statistic_stakind1 - 1;
                 for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
                 {
                         values[i++] = Int16GetDatum(stats->stakind[k]);         /* stakindN */
                 }
+               i = Anum_pg_statistic_staop1 - 1;
                 for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
                 {
                         values[i++] = ObjectIdGetDatum(stats->staop[k]);        /* staopN */
                 }
+               i = Anum_pg_statistic_stanumbers1 - 1;
                 for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
                 {
                         int                     nnum = stats->numnumbers[k];
@@ -1354,6 +1547,7 @@ update_attstats(Oid relid, int natts, VacAttrStats **vacattrstats)
                                 values[i++] = (Datum) 0;
                         }
                 }
+               i = Anum_pg_statistic_stavalues1 - 1;
                 for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
                 {
                         if (stats->numvalues[k] > 0)
@@ -1376,19 +1570,19 @@ update_attstats(Oid relid, int natts, VacAttrStats **vacattrstats)
                 }
  
                 /* Is there already a pg_statistic tuple for this attribute? */
-               oldtup = SearchSysCache(STATRELATT,
-                                                               ObjectIdGetDatum(relid),
-                                                               Int16GetDatum(stats->attr->attnum),
-                                                               0, 0);
+               oldtup = SearchSysCache3(STATRELATTINH,
+                                                                ObjectIdGetDatum(relid),
+                                                                Int16GetDatum(stats->attr->attnum),
+                                                                BoolGetDatum(inh));
  
                 if (HeapTupleIsValid(oldtup))
                 {
                         /* Yes, replace it */
                         stup = heap_modify_tuple(oldtup,
-                                                                       RelationGetDescr(sd),
-                                                                       values,
-                                                                       nulls,
-                                                                       replaces);
+                                                                        RelationGetDescr(sd),
+                                                                        values,
+                                                                        nulls,
+                                                                        replaces);
                         ReleaseSysCache(oldtup);
                         simple_heap_update(sd, &stup->t_self, stup);
                 }
@@ -1490,8 +1684,7 @@ typedef struct
  
  typedef struct
  {
-       FmgrInfo   *cmpFn;
-       int                     cmpFlags;
+       SortSupport ssup;
         int                *tupnoLink;
  } CompareScalarsContext;
  
@@ -1511,7 +1704,7 @@ static int        compare_mcvs(const void *a, const void *b);
  /*
   * std_typanalyze -- the default type-specific typanalyze function
   */
-static bool
+bool
  std_typanalyze(VacAttrStats *stats)
  {
         Form_pg_attribute attr = stats->attr;
@@ -1525,9 +1718,10 @@ std_typanalyze(VacAttrStats *stats)
                 attr->attstattarget = default_statistics_target;
  
         /* Look for default "<" and "=" operators for column's type */
-       get_sort_group_operators(attr->atttypid,
+       get_sort_group_operators(stats->attrtypid,
                                                          false, false, false,
-                                                        &ltopr, &eqopr, NULL);
+                                                        &ltopr, &eqopr, NULL,
+                                                        NULL);
  
         /* If column has no "=" operator, we can't do much of anything */
         if (!OidIsValid(eqopr))
@@ -1605,10 +1799,10 @@ compute_minimal_stats(VacAttrStatsP stats,
         int                     nonnull_cnt = 0;
         int                     toowide_cnt = 0;
         double          total_width = 0;
-       bool            is_varlena = (!stats->attr->attbyval &&
-                                                         stats->attr->attlen == -1);
-       bool            is_varwidth = (!stats->attr->attbyval &&
-                                                          stats->attr->attlen < 0);
+       bool            is_varlena = (!stats->attrtype->typbyval &&
+                                                         stats->attrtype->typlen == -1);
+       bool            is_varwidth = (!stats->attrtype->typbyval &&
+                                                          stats->attrtype->typlen < 0);
         FmgrInfo        f_cmpeq;
         typedef struct
         {
@@ -1665,7 +1859,7 @@ compute_minimal_stats(VacAttrStatsP stats,
                         /*
                          * If the value is toasted, we want to detoast it just once to
                          * avoid repeated detoastings and resultant excess memory usage
-                        * during the comparisons.      Also, check to see if the value is
+                        * during the comparisons.  Also, check to see if the value is
                          * excessively wide, and if so don't detoast at all --- just
                          * ignore the value.
                          */
@@ -1689,7 +1883,10 @@ compute_minimal_stats(VacAttrStatsP stats,
                 firstcount1 = track_cnt;
                 for (j = 0; j < track_cnt; j++)
                 {
-                       if (DatumGetBool(FunctionCall2(&f_cmpeq, value, track[j].value)))
+                       /* We always use the default collation for statistics */
+                       if (DatumGetBool(FunctionCall2Coll(&f_cmpeq,
+                                                                                          DEFAULT_COLLATION_OID,
+                                                                                          value, track[j].value)))
                         {
                                 match = true;
                                 break;
@@ -1782,7 +1979,7 @@ compute_minimal_stats(VacAttrStatsP stats,
                          * We assume (not very reliably!) that all the multiply-occurring
                          * values are reflected in the final track[] list, and the other
                          * nonnull values all appeared but once.  (XXX this usually
-                        * results in a drastic overestimate of ndistinct.      Can we do
+                        * results in a drastic overestimate of ndistinct.  Can we do
                          * any better?)
                          *----------
                          */
@@ -1819,7 +2016,7 @@ compute_minimal_stats(VacAttrStatsP stats,
                  * Decide how many values are worth storing as most-common values. If
                  * we are able to generate a complete MCV list (all the values in the
                  * sample will fit, and we think these are all the ones in the table),
-                * then do so.  Otherwise, store only those values that are
+                * then do so.  Otherwise, store only those values that are
                  * significantly more common than the (estimated) average. We set the
                  * threshold rather arbitrarily at 25% more than average, with at
                  * least 2 instances in the sample.
@@ -1871,8 +2068,8 @@ compute_minimal_stats(VacAttrStatsP stats,
                         for (i = 0; i < num_mcv; i++)
                         {
                                 mcv_values[i] = datumCopy(track[i].value,
-                                                                                 stats->attr->attbyval,
-                                                                                 stats->attr->attlen);
+                                                                                 stats->attrtype->typbyval,
+                                                                                 stats->attrtype->typlen);
                                 mcv_freqs[i] = (double) track[i].count / (double) samplerows;
                         }
                         MemoryContextSwitchTo(old_context);
@@ -1883,9 +2080,10 @@ compute_minimal_stats(VacAttrStatsP stats,
                         stats->numnumbers[0] = num_mcv;
                         stats->stavalues[0] = mcv_values;
                         stats->numvalues[0] = num_mcv;
+
                         /*
-                        * Accept the defaults for stats->statypid and others.
-                        * They have been set before we were called (see vacuum.h)
+                        * Accept the defaults for stats->statypid and others. They have
+                        * been set before we were called (see vacuum.h)
                          */
                 }
         }
@@ -1928,14 +2126,12 @@ compute_scalar_stats(VacAttrStatsP stats,
         int                     nonnull_cnt = 0;
         int                     toowide_cnt = 0;
         double          total_width = 0;
-       bool            is_varlena = (!stats->attr->attbyval &&
-                                                         stats->attr->attlen == -1);
-       bool            is_varwidth = (!stats->attr->attbyval &&
-                                                          stats->attr->attlen < 0);
+       bool            is_varlena = (!stats->attrtype->typbyval &&
+                                                         stats->attrtype->typlen == -1);
+       bool            is_varwidth = (!stats->attrtype->typbyval &&
+                                                          stats->attrtype->typlen < 0);
         double          corr_xysum;
-       Oid                     cmpFn;
-       int                     cmpFlags;
-       FmgrInfo        f_cmpfn;
+       SortSupportData ssup;
         ScalarItem *values;
         int                     values_cnt = 0;
         int                *tupnoLink;
@@ -1949,8 +2145,19 @@ compute_scalar_stats(VacAttrStatsP stats,
         tupnoLink = (int *) palloc(samplerows * sizeof(int));
         track = (ScalarMCVItem *) palloc(num_mcv * sizeof(ScalarMCVItem));
  
-       SelectSortFunction(mystats->ltopr, false, &cmpFn, &cmpFlags);
-       fmgr_info(cmpFn, &f_cmpfn);
+       memset(&ssup, 0, sizeof(ssup));
+       ssup.ssup_cxt = CurrentMemoryContext;
+       /* We always use the default collation for statistics */
+       ssup.ssup_collation = DEFAULT_COLLATION_OID;
+       ssup.ssup_nulls_first = false;
+       /*
+        * For now, don't perform abbreviated key conversion, because full values
+        * are required for MCV slot generation.  Supporting that optimization
+        * would necessitate teaching compare_scalars() to call a tie-breaker.
+        */
+       ssup.abbreviate = false;
+
+       PrepareSortSupportFromOrderingOp(mystats->ltopr, &ssup);
  
         /* Initial scan to find sortable values */
         for (i = 0; i < samplerows; i++)
@@ -1983,7 +2190,7 @@ compute_scalar_stats(VacAttrStatsP stats,
                         /*
                          * If the value is toasted, we want to detoast it just once to
                          * avoid repeated detoastings and resultant excess memory usage
-                        * during the comparisons.      Also, check to see if the value is
+                        * during the comparisons.  Also, check to see if the value is
                          * excessively wide, and if so don't detoast at all --- just
                          * ignore the value.
                          */
@@ -2018,8 +2225,7 @@ compute_scalar_stats(VacAttrStatsP stats,
                 CompareScalarsContext cxt;
  
                 /* Sort the collected values */
-               cxt.cmpFn = &f_cmpfn;
-               cxt.cmpFlags = cmpFlags;
+               cxt.ssup = &ssup;
                 cxt.tupnoLink = tupnoLink;
                 qsort_arg((void *) values, values_cnt, sizeof(ScalarItem),
                                   compare_scalars, (void *) &cxt);
@@ -2029,7 +2235,7 @@ compute_scalar_stats(VacAttrStatsP stats,
                  * accumulate ordering-correlation statistics.
                  *
                  * To determine which are most common, we first have to count the
-                * number of duplicates of each value.  The duplicates are adjacent in
+                * number of duplicates of each value.  The duplicates are adjacent in
                  * the sorted list, so a brute-force approach is to compare successive
                  * datum values until we find two that are not equal. However, that
                  * requires N-1 invocations of the datum comparison routine, which are
@@ -2038,7 +2244,7 @@ compute_scalar_stats(VacAttrStatsP stats,
                  * that are adjacent in the sorted order; otherwise it could not know
                  * that it's ordered the pair correctly.) We exploit this by having
                  * compare_scalars remember the highest tupno index that each
-                * ScalarItem has been found equal to.  At the end of the sort, a
+                * ScalarItem has been found equal to.  At the end of the sort, a
                  * ScalarItem's tupnoLink will still point to itself if and only if it
                  * is the last item of its group of duplicates (since the group will
                  * be ordered by tupno).
@@ -2158,7 +2364,7 @@ compute_scalar_stats(VacAttrStatsP stats,
                  * Decide how many values are worth storing as most-common values. If
                  * we are able to generate a complete MCV list (all the values in the
                  * sample will fit, and we think these are all the ones in the table),
-                * then do so.  Otherwise, store only those values that are
+                * then do so.  Otherwise, store only those values that are
                  * significantly more common than the (estimated) average. We set the
                  * threshold rather arbitrarily at 25% more than average, with at
                  * least 2 instances in the sample.  Also, we won't suppress values
@@ -2220,8 +2426,8 @@ compute_scalar_stats(VacAttrStatsP stats,
                         for (i = 0; i < num_mcv; i++)
                         {
                                 mcv_values[i] = datumCopy(values[track[i].first].value,
-                                                                                 stats->attr->attbyval,
-                                                                                 stats->attr->attlen);
+                                                                                 stats->attrtype->typbyval,
+                                                                                 stats->attrtype->typlen);
                                 mcv_freqs[i] = (double) track[i].count / (double) samplerows;
                         }
                         MemoryContextSwitchTo(old_context);
@@ -2232,9 +2438,10 @@ compute_scalar_stats(VacAttrStatsP stats,
                         stats->numnumbers[slot_idx] = num_mcv;
                         stats->stavalues[slot_idx] = mcv_values;
                         stats->numvalues[slot_idx] = num_mcv;
+
                         /*
-                        * Accept the defaults for stats->statypid and others.
-                        * They have been set before we were called (see vacuum.h)
+                        * Accept the defaults for stats->statypid and others. They have
+                        * been set before we were called (see vacuum.h)
                          */
                         slot_idx++;
                 }
@@ -2326,8 +2533,8 @@ compute_scalar_stats(VacAttrStatsP stats,
                         for (i = 0; i < num_hist; i++)
                         {
                                 hist_values[i] = datumCopy(values[pos].value,
-                                                                                  stats->attr->attbyval,
-                                                                                  stats->attr->attlen);
+                                                                                  stats->attrtype->typbyval,
+                                                                                  stats->attrtype->typlen);
                                 pos += delta;
                                 posfrac += deltafrac;
                                 if (posfrac >= (num_hist - 1))
@@ -2344,9 +2551,10 @@ compute_scalar_stats(VacAttrStatsP stats,
                         stats->staop[slot_idx] = mystats->ltopr;
                         stats->stavalues[slot_idx] = hist_values;
                         stats->numvalues[slot_idx] = num_hist;
+
                         /*
-                        * Accept the defaults for stats->statypid and others.
-                        * They have been set before we were called (see vacuum.h)
+                        * Accept the defaults for stats->statypid and others. They have
+                        * been set before we were called (see vacuum.h)
                          */
                         slot_idx++;
                 }
@@ -2389,7 +2597,21 @@ compute_scalar_stats(VacAttrStatsP stats,
                         slot_idx++;
                 }
         }
-       else if (nonnull_cnt == 0 && null_cnt > 0)
+       else if (nonnull_cnt > 0)
+       {
+               /* We found some non-null values, but they were all too wide */
+               Assert(nonnull_cnt == toowide_cnt);
+               stats->stats_valid = true;
+               /* Do the simple null-frac and width stats */
+               stats->stanullfrac = (double) null_cnt / (double) samplerows;
+               if (is_varwidth)
+                       stats->stawidth = total_width / (double) nonnull_cnt;
+               else
+                       stats->stawidth = stats->attrtype->typlen;
+               /* Assume all too-wide values are distinct, so it's a unique column */
+               stats->stadistinct = -1.0;
+       }
+       else if (null_cnt > 0)
         {
                 /* We found only nulls; assume the column is entirely null */
                 stats->stats_valid = true;
@@ -2408,7 +2630,7 @@ compute_scalar_stats(VacAttrStatsP stats,
   * qsort_arg comparator for sorting ScalarItems
   *
   * Aside from sorting the items, we update the tupnoLink[] array
- * whenever two ScalarItems are found to contain equal datums. The array
+ * whenever two ScalarItems are found to contain equal datums.  The array
   * is indexed by tupno; for each ScalarItem, it contains the highest
   * tupno that that item's datum has been found to be equal to.  This allows
   * us to avoid additional comparisons in compute_scalar_stats().
@@ -2416,15 +2638,14 @@ compute_scalar_stats(VacAttrStatsP stats,
  static int
  compare_scalars(const void *a, const void *b, void *arg)
  {
-       Datum           da = ((ScalarItem *) a)->value;
-       int                     ta = ((ScalarItem *) a)->tupno;
-       Datum           db = ((ScalarItem *) b)->value;
-       int                     tb = ((ScalarItem *) b)->tupno;
+       Datum           da = ((const ScalarItem *) a)->value;
+       int                     ta = ((const ScalarItem *) a)->tupno;
+       Datum           db = ((const ScalarItem *) b)->value;
+       int                     tb = ((const ScalarItem *) b)->tupno;
         CompareScalarsContext *cxt = (CompareScalarsContext *) arg;
-       int32           compare;
+       int                     compare;
  
-       compare = ApplySortFunction(cxt->cmpFn, cxt->cmpFlags,
-                                                               da, false, db, false);
+       compare = ApplySortComparator(da, false, db, false, cxt->ssup);
         if (compare != 0)
                 return compare;
  
@@ -2448,8 +2669,8 @@ compare_scalars(const void *a, const void *b, void *arg)
  static int
  compare_mcvs(const void *a, const void *b)
  {
-       int                     da = ((ScalarMCVItem *) a)->first;
-       int                     db = ((ScalarMCVItem *) b)->first;
+       int                     da = ((const ScalarMCVItem *) a)->first;
+       int                     db = ((const ScalarMCVItem *) b)->first;
  
         return da - db;
  }