Post-PG 10 beta1 pgindent run

[postgresql] / src / backend / utils / adt / selfuncs.c
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c

index 37fad8692a8b6cdfc5baa242d01375e1a6e5e148..6e491bbc21ec9660dc45949455c03f1a17a70597 100644 (file)
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -7,10 +7,10 @@
   *       Selectivity routines are registered in the pg_operator catalog
   *       in the "oprrest" and "oprjoin" attributes.
   *
- *       Index cost functions are registered in the pg_am catalog
- *       in the "amcostestimate" attribute.
+ *       Index cost functions are located via the index AM's API struct,
+ *       which is obtained from the handler function registered in pg_am.
   *
- * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   *
@@ -98,19 +98,24 @@
  #include "postgres.h"
  
  #include <ctype.h>
+#include <float.h>
  #include <math.h>
  
+#include "access/brin.h"
  #include "access/gin.h"
  #include "access/htup_details.h"
  #include "access/sysattr.h"
  #include "catalog/index.h"
+#include "catalog/pg_am.h"
  #include "catalog/pg_collation.h"
  #include "catalog/pg_operator.h"
  #include "catalog/pg_opfamily.h"
  #include "catalog/pg_statistic.h"
+#include "catalog/pg_statistic_ext.h"
  #include "catalog/pg_type.h"
  #include "executor/executor.h"
  #include "mb/pg_wchar.h"
+#include "miscadmin.h"
  #include "nodes/makefuncs.h"
  #include "nodes/nodeFuncs.h"
  #include "optimizer/clauses.h"
@@ -124,11 +129,14 @@
  #include "parser/parse_clause.h"
  #include "parser/parse_coerce.h"
  #include "parser/parsetree.h"
+#include "statistics/statistics.h"
+#include "utils/acl.h"
  #include "utils/builtins.h"
  #include "utils/bytea.h"
  #include "utils/date.h"
  #include "utils/datum.h"
  #include "utils/fmgroids.h"
+#include "utils/index_selfuncs.h"
  #include "utils/lsyscache.h"
  #include "utils/nabstime.h"
  #include "utils/pg_locale.h"
@@ -139,6 +147,7 @@
  #include "utils/timestamp.h"
  #include "utils/tqual.h"
  #include "utils/typcache.h"
+#include "utils/varlena.h"
  
  
  /* Hooks for plugins to get control when we ask for stats */
@@ -160,6 +169,8 @@ static double eqjoinsel_inner(Oid operator,
  static double eqjoinsel_semi(Oid operator,
                            VariableStatData *vardata1, VariableStatData *vardata2,
                            RelOptInfo *inner_rel);
+static bool estimate_multivariate_ndistinct(PlannerInfo *root,
+                                               RelOptInfo *rel, List **varinfos, double *ndistinct);
  static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
                                   Datum lobound, Datum hibound, Oid boundstypid,
                                   double *scaledlobound, double *scaledhibound);
@@ -264,6 +275,7 @@ var_eq_const(VariableStatData *vardata, Oid operator,
  {
         double          selec;
         bool            isdefault;
+       Oid                     opfuncoid;
  
         /*
          * If the constant is NULL, assume operator is strict and return zero, ie,
@@ -282,13 +294,12 @@ var_eq_const(VariableStatData *vardata, Oid operator,
         if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
                 return 1.0 / vardata->rel->tuples;
  
-       if (HeapTupleIsValid(vardata->statsTuple))
+       if (HeapTupleIsValid(vardata->statsTuple) &&
+               statistic_proc_security_check(vardata,
+                                                                         (opfuncoid = get_opcode(operator))))
         {
                 Form_pg_statistic stats;
-               Datum      *values;
-               int                     nvalues;
-               float4     *numbers;
-               int                     nnumbers;
+               AttStatsSlot sslot;
                 bool            match = false;
                 int                     i;
  
@@ -301,30 +312,27 @@ var_eq_const(VariableStatData *vardata, Oid operator,
                  * don't like this, maybe you shouldn't be using eqsel for your
                  * operator...)
                  */
-               if (get_attstatsslot(vardata->statsTuple,
-                                                        vardata->atttype, vardata->atttypmod,
+               if (get_attstatsslot(&sslot, vardata->statsTuple,
                                                          STATISTIC_KIND_MCV, InvalidOid,
-                                                        NULL,
-                                                        &values, &nvalues,
-                                                        &numbers, &nnumbers))
+                                                        ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS))
                 {
                         FmgrInfo        eqproc;
  
-                       fmgr_info(get_opcode(operator), &eqproc);
+                       fmgr_info(opfuncoid, &eqproc);
  
-                       for (i = 0; i < nvalues; i++)
+                       for (i = 0; i < sslot.nvalues; i++)
                         {
                                 /* be careful to apply operator right way 'round */
                                 if (varonleft)
                                         match = DatumGetBool(FunctionCall2Coll(&eqproc,
                                                                                                            DEFAULT_COLLATION_OID,
-                                                                                                                  values[i],
+                                                                                                                  sslot.values[i],
                                                                                                                    constval));
                                 else
                                         match = DatumGetBool(FunctionCall2Coll(&eqproc,
                                                                                                            DEFAULT_COLLATION_OID,
                                                                                                                    constval,
-                                                                                                                  values[i]));
+                                                                                                                  sslot.values[i]));
                                 if (match)
                                         break;
                         }
@@ -332,9 +340,7 @@ var_eq_const(VariableStatData *vardata, Oid operator,
                 else
                 {
                         /* no most-common-value info available */
-                       values = NULL;
-                       numbers = NULL;
-                       i = nvalues = nnumbers = 0;
+                       i = 0;                          /* keep compiler quiet */
                 }
  
                 if (match)
@@ -343,7 +349,7 @@ var_eq_const(VariableStatData *vardata, Oid operator,
                          * Constant is "=" to this common value.  We know selectivity
                          * exactly (or as exactly as ANALYZE could calculate it, anyway).
                          */
-                       selec = numbers[i];
+                       selec = sslot.numbers[i];
                 }
                 else
                 {
@@ -355,8 +361,8 @@ var_eq_const(VariableStatData *vardata, Oid operator,
                         double          sumcommon = 0.0;
                         double          otherdistinct;
  
-                       for (i = 0; i < nnumbers; i++)
-                               sumcommon += numbers[i];
+                       for (i = 0; i < sslot.nnumbers; i++)
+                               sumcommon += sslot.numbers[i];
                         selec = 1.0 - sumcommon - stats->stanullfrac;
                         CLAMP_PROBABILITY(selec);
  
@@ -365,7 +371,8 @@ var_eq_const(VariableStatData *vardata, Oid operator,
                          * all the not-common values share this remaining fraction
                          * equally, so we divide by the number of other distinct values.
                          */
-                       otherdistinct = get_variable_numdistinct(vardata, &isdefault) - nnumbers;
+                       otherdistinct = get_variable_numdistinct(vardata, &isdefault) -
+                               sslot.nnumbers;
                         if (otherdistinct > 1)
                                 selec /= otherdistinct;
  
@@ -373,12 +380,11 @@ var_eq_const(VariableStatData *vardata, Oid operator,
                          * Another cross-check: selectivity shouldn't be estimated as more
                          * than the least common "most common value".
                          */
-                       if (nnumbers > 0 && selec > numbers[nnumbers - 1])
-                               selec = numbers[nnumbers - 1];
+                       if (sslot.nnumbers > 0 && selec > sslot.numbers[sslot.nnumbers - 1])
+                               selec = sslot.numbers[sslot.nnumbers - 1];
                 }
  
-               free_attstatsslot(vardata->atttype, values, nvalues,
-                                                 numbers, nnumbers);
+               free_attstatsslot(&sslot);
         }
         else
         {
@@ -421,8 +427,7 @@ var_eq_non_const(VariableStatData *vardata, Oid operator,
         {
                 Form_pg_statistic stats;
                 double          ndistinct;
-               float4     *numbers;
-               int                     nnumbers;
+               AttStatsSlot sslot;
  
                 stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
  
@@ -445,16 +450,13 @@ var_eq_non_const(VariableStatData *vardata, Oid operator,
                  * Cross-check: selectivity should never be estimated as more than the
                  * most common value's.
                  */
-               if (get_attstatsslot(vardata->statsTuple,
-                                                        vardata->atttype, vardata->atttypmod,
+               if (get_attstatsslot(&sslot, vardata->statsTuple,
                                                          STATISTIC_KIND_MCV, InvalidOid,
-                                                        NULL,
-                                                        NULL, NULL,
-                                                        &numbers, &nnumbers))
+                                                        ATTSTATSSLOT_NUMBERS))
                 {
-                       if (nnumbers > 0 && selec > numbers[0])
-                               selec = numbers[0];
-                       free_attstatsslot(vardata->atttype, NULL, 0, numbers, nnumbers);
+                       if (sslot.nnumbers > 0 && selec > sslot.numbers[0])
+                               selec = sslot.numbers[0];
+                       free_attstatsslot(&sslot);
                 }
         }
         else
@@ -606,39 +608,33 @@ mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
  {
         double          mcv_selec,
                                 sumcommon;
-       Datum      *values;
-       int                     nvalues;
-       float4     *numbers;
-       int                     nnumbers;
+       AttStatsSlot sslot;
         int                     i;
  
         mcv_selec = 0.0;
         sumcommon = 0.0;
  
         if (HeapTupleIsValid(vardata->statsTuple) &&
-               get_attstatsslot(vardata->statsTuple,
-                                                vardata->atttype, vardata->atttypmod,
+               statistic_proc_security_check(vardata, opproc->fn_oid) &&
+               get_attstatsslot(&sslot, vardata->statsTuple,
                                                  STATISTIC_KIND_MCV, InvalidOid,
-                                                NULL,
-                                                &values, &nvalues,
-                                                &numbers, &nnumbers))
+                                                ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS))
         {
-               for (i = 0; i < nvalues; i++)
+               for (i = 0; i < sslot.nvalues; i++)
                 {
                         if (varonleft ?
                                 DatumGetBool(FunctionCall2Coll(opproc,
                                                                                            DEFAULT_COLLATION_OID,
-                                                                                          values[i],
+                                                                                          sslot.values[i],
                                                                                            constval)) :
                                 DatumGetBool(FunctionCall2Coll(opproc,
                                                                                            DEFAULT_COLLATION_OID,
                                                                                            constval,
-                                                                                          values[i])))
-                               mcv_selec += numbers[i];
-                       sumcommon += numbers[i];
+                                                                                          sslot.values[i])))
+                               mcv_selec += sslot.numbers[i];
+                       sumcommon += sslot.numbers[i];
                 }
-               free_attstatsslot(vardata->atttype, values, nvalues,
-                                                 numbers, nnumbers);
+               free_attstatsslot(&sslot);
         }
  
         *sumcommonp = sumcommon;
@@ -684,45 +680,42 @@ histogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
                                           int *hist_size)
  {
         double          result;
-       Datum      *values;
-       int                     nvalues;
+       AttStatsSlot sslot;
  
         /* check sanity of parameters */
         Assert(n_skip >= 0);
         Assert(min_hist_size > 2 * n_skip);
  
         if (HeapTupleIsValid(vardata->statsTuple) &&
-               get_attstatsslot(vardata->statsTuple,
-                                                vardata->atttype, vardata->atttypmod,
+               statistic_proc_security_check(vardata, opproc->fn_oid) &&
+               get_attstatsslot(&sslot, vardata->statsTuple,
                                                  STATISTIC_KIND_HISTOGRAM, InvalidOid,
-                                                NULL,
-                                                &values, &nvalues,
-                                                NULL, NULL))
+                                                ATTSTATSSLOT_VALUES))
         {
-               *hist_size = nvalues;
-               if (nvalues >= min_hist_size)
+               *hist_size = sslot.nvalues;
+               if (sslot.nvalues >= min_hist_size)
                 {
                         int                     nmatch = 0;
                         int                     i;
  
-                       for (i = n_skip; i < nvalues - n_skip; i++)
+                       for (i = n_skip; i < sslot.nvalues - n_skip; i++)
                         {
                                 if (varonleft ?
                                         DatumGetBool(FunctionCall2Coll(opproc,
                                                                                                    DEFAULT_COLLATION_OID,
-                                                                                                  values[i],
+                                                                                                  sslot.values[i],
                                                                                                    constval)) :
                                         DatumGetBool(FunctionCall2Coll(opproc,
                                                                                                    DEFAULT_COLLATION_OID,
                                                                                                    constval,
-                                                                                                  values[i])))
+                                                                                                  sslot.values[i])))
                                         nmatch++;
                         }
-                       result = ((double) nmatch) / ((double) (nvalues - 2 * n_skip));
+                       result = ((double) nmatch) / ((double) (sslot.nvalues - 2 * n_skip));
                 }
                 else
                         result = -1;
-               free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0);
+               free_attstatsslot(&sslot);
         }
         else
         {
@@ -752,9 +745,7 @@ ineq_histogram_selectivity(PlannerInfo *root,
                                                    Datum constval, Oid consttype)
  {
         double          hist_selec;
-       Oid                     hist_op;
-       Datum      *values;
-       int                     nvalues;
+       AttStatsSlot sslot;
  
         hist_selec = -1.0;
  
@@ -769,14 +760,12 @@ ineq_histogram_selectivity(PlannerInfo *root,
          * the reverse way if isgt is TRUE.
          */
         if (HeapTupleIsValid(vardata->statsTuple) &&
-               get_attstatsslot(vardata->statsTuple,
-                                                vardata->atttype, vardata->atttypmod,
+               statistic_proc_security_check(vardata, opproc->fn_oid) &&
+               get_attstatsslot(&sslot, vardata->statsTuple,
                                                  STATISTIC_KIND_HISTOGRAM, InvalidOid,
-                                                &hist_op,
-                                                &values, &nvalues,
-                                                NULL, NULL))
+                                                ATTSTATSSLOT_VALUES))
         {
-               if (nvalues > 1)
+               if (sslot.nvalues > 1)
                 {
                         /*
                          * Use binary search to find proper location, ie, the first slot
@@ -795,7 +784,7 @@ ineq_histogram_selectivity(PlannerInfo *root,
                          */
                         double          histfrac;
                         int                     lobound = 0;    /* first possible slot to search */
-                       int                     hibound = nvalues;              /* last+1 slot to search */
+                       int                     hibound = sslot.nvalues;                /* last+1 slot to search */
                         bool            have_end = false;
  
                         /*
@@ -804,12 +793,12 @@ ineq_histogram_selectivity(PlannerInfo *root,
                          * one of them to be updated, so we deal with that within the
                          * loop.)
                          */
-                       if (nvalues == 2)
+                       if (sslot.nvalues == 2)
                                 have_end = get_actual_variable_range(root,
                                                                                                          vardata,
-                                                                                                        hist_op,
-                                                                                                        &values[0],
-                                                                                                        &values[1]);
+                                                                                                        sslot.staop,
+                                                                                                        &sslot.values[0],
+                                                                                                        &sslot.values[1]);
  
                         while (lobound < hibound)
                         {
@@ -821,22 +810,22 @@ ineq_histogram_selectivity(PlannerInfo *root,
                                  * histogram entry, first try to replace it with the actual
                                  * current min or max (unless we already did so above).
                                  */
-                               if (probe == 0 && nvalues > 2)
+                               if (probe == 0 && sslot.nvalues > 2)
                                         have_end = get_actual_variable_range(root,
                                                                                                                  vardata,
-                                                                                                                hist_op,
-                                                                                                                &values[0],
+                                                                                                                sslot.staop,
+                                                                                                                &sslot.values[0],
                                                                                                                  NULL);
-                               else if (probe == nvalues - 1 && nvalues > 2)
+                               else if (probe == sslot.nvalues - 1 && sslot.nvalues > 2)
                                         have_end = get_actual_variable_range(root,
                                                                                                                  vardata,
-                                                                                                                hist_op,
+                                                                                                                sslot.staop,
                                                                                                                  NULL,
-                                                                                                                &values[probe]);
+                                                                                                          &sslot.values[probe]);
  
                                 ltcmp = DatumGetBool(FunctionCall2Coll(opproc,
                                                                                                            DEFAULT_COLLATION_OID,
-                                                                                                          values[probe],
+                                                                                                          sslot.values[probe],
                                                                                                            constval));
                                 if (isgt)
                                         ltcmp = !ltcmp;
@@ -851,7 +840,7 @@ ineq_histogram_selectivity(PlannerInfo *root,
                                 /* Constant is below lower histogram boundary. */
                                 histfrac = 0.0;
                         }
-                       else if (lobound >= nvalues)
+                       else if (lobound >= sslot.nvalues)
                         {
                                 /* Constant is above upper histogram boundary. */
                                 histfrac = 1.0;
@@ -872,7 +861,7 @@ ineq_histogram_selectivity(PlannerInfo *root,
                                  * interpolation within this bin.
                                  */
                                 if (convert_to_scalar(constval, consttype, &val,
-                                                                         values[i - 1], values[i],
+                                                                         sslot.values[i - 1], sslot.values[i],
                                                                           vardata->vartype,
                                                                           &low, &high))
                                 {
@@ -919,7 +908,7 @@ ineq_histogram_selectivity(PlannerInfo *root,
                                  * binfrac partial bin below the constant.
                                  */
                                 histfrac = (double) (i - 1) + binfrac;
-                               histfrac /= (double) (nvalues - 1);
+                               histfrac /= (double) (sslot.nvalues - 1);
                         }
  
                         /*
@@ -947,7 +936,7 @@ ineq_histogram_selectivity(PlannerInfo *root,
                         }
                 }
  
-               free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0);
+               free_attstatsslot(&sslot);
         }
  
         return hist_selec;
@@ -1500,21 +1489,15 @@ booltestsel(PlannerInfo *root, BoolTestType booltesttype, Node *arg,
         {
                 Form_pg_statistic stats;
                 double          freq_null;
-               Datum      *values;
-               int                     nvalues;
-               float4     *numbers;
-               int                     nnumbers;
+               AttStatsSlot sslot;
  
                 stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
                 freq_null = stats->stanullfrac;
  
-               if (get_attstatsslot(vardata.statsTuple,
-                                                        vardata.atttype, vardata.atttypmod,
+               if (get_attstatsslot(&sslot, vardata.statsTuple,
                                                          STATISTIC_KIND_MCV, InvalidOid,
-                                                        NULL,
-                                                        &values, &nvalues,
-                                                        &numbers, &nnumbers)
-                       && nnumbers > 0)
+                                                        ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS)
+                       && sslot.nnumbers > 0)
                 {
                         double          freq_true;
                         double          freq_false;
@@ -1522,10 +1505,10 @@ booltestsel(PlannerInfo *root, BoolTestType booltesttype, Node *arg,
                         /*
                          * Get first MCV frequency and derive frequency for true.
                          */
-                       if (DatumGetBool(values[0]))
-                               freq_true = numbers[0];
+                       if (DatumGetBool(sslot.values[0]))
+                               freq_true = sslot.numbers[0];
                         else
-                               freq_true = 1.0 - numbers[0] - freq_null;
+                               freq_true = 1.0 - sslot.numbers[0] - freq_null;
  
                         /*
                          * Next derive frequency for false. Then use these as appropriate
@@ -1566,8 +1549,7 @@ booltestsel(PlannerInfo *root, BoolTestType booltesttype, Node *arg,
                                         break;
                         }
  
-                       free_attstatsslot(vardata.atttype, values, nvalues,
-                                                         numbers, nnumbers);
+                       free_attstatsslot(&sslot);
                 }
                 else
                 {
@@ -2253,46 +2235,40 @@ eqjoinsel_inner(Oid operator,
         double          nd2;
         bool            isdefault1;
         bool            isdefault2;
+       Oid                     opfuncoid;
         Form_pg_statistic stats1 = NULL;
         Form_pg_statistic stats2 = NULL;
         bool            have_mcvs1 = false;
-       Datum      *values1 = NULL;
-       int                     nvalues1 = 0;
-       float4     *numbers1 = NULL;
-       int                     nnumbers1 = 0;
         bool            have_mcvs2 = false;
-       Datum      *values2 = NULL;
-       int                     nvalues2 = 0;
-       float4     *numbers2 = NULL;
-       int                     nnumbers2 = 0;
+       AttStatsSlot sslot1;
+       AttStatsSlot sslot2;
  
         nd1 = get_variable_numdistinct(vardata1, &isdefault1);
         nd2 = get_variable_numdistinct(vardata2, &isdefault2);
  
+       opfuncoid = get_opcode(operator);
+
+       memset(&sslot1, 0, sizeof(sslot1));
+       memset(&sslot2, 0, sizeof(sslot2));
+
         if (HeapTupleIsValid(vardata1->statsTuple))
         {
+               /* note we allow use of nullfrac regardless of security check */
                 stats1 = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple);
-               have_mcvs1 = get_attstatsslot(vardata1->statsTuple,
-                                                                         vardata1->atttype,
-                                                                         vardata1->atttypmod,
-                                                                         STATISTIC_KIND_MCV,
-                                                                         InvalidOid,
-                                                                         NULL,
-                                                                         &values1, &nvalues1,
-                                                                         &numbers1, &nnumbers1);
+               if (statistic_proc_security_check(vardata1, opfuncoid))
+                       have_mcvs1 = get_attstatsslot(&sslot1, vardata1->statsTuple,
+                                                                                 STATISTIC_KIND_MCV, InvalidOid,
+                                                                ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS);
         }
  
         if (HeapTupleIsValid(vardata2->statsTuple))
         {
+               /* note we allow use of nullfrac regardless of security check */
                 stats2 = (Form_pg_statistic) GETSTRUCT(vardata2->statsTuple);
-               have_mcvs2 = get_attstatsslot(vardata2->statsTuple,
-                                                                         vardata2->atttype,
-                                                                         vardata2->atttypmod,
-                                                                         STATISTIC_KIND_MCV,
-                                                                         InvalidOid,
-                                                                         NULL,
-                                                                         &values2, &nvalues2,
-                                                                         &numbers2, &nnumbers2);
+               if (statistic_proc_security_check(vardata2, opfuncoid))
+                       have_mcvs2 = get_attstatsslot(&sslot2, vardata2->statsTuple,
+                                                                                 STATISTIC_KIND_MCV, InvalidOid,
+                                                                ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS);
         }
  
         if (have_mcvs1 && have_mcvs2)
@@ -2326,9 +2302,9 @@ eqjoinsel_inner(Oid operator,
                 int                     i,
                                         nmatches;
  
-               fmgr_info(get_opcode(operator), &eqproc);
-               hasmatch1 = (bool *) palloc0(nvalues1 * sizeof(bool));
-               hasmatch2 = (bool *) palloc0(nvalues2 * sizeof(bool));
+               fmgr_info(opfuncoid, &eqproc);
+               hasmatch1 = (bool *) palloc0(sslot1.nvalues * sizeof(bool));
+               hasmatch2 = (bool *) palloc0(sslot2.nvalues * sizeof(bool));
  
                 /*
                  * Note we assume that each MCV will match at most one member of the
@@ -2338,21 +2314,21 @@ eqjoinsel_inner(Oid operator,
                  */
                 matchprodfreq = 0.0;
                 nmatches = 0;
-               for (i = 0; i < nvalues1; i++)
+               for (i = 0; i < sslot1.nvalues; i++)
                 {
                         int                     j;
  
-                       for (j = 0; j < nvalues2; j++)
+                       for (j = 0; j < sslot2.nvalues; j++)
                         {
                                 if (hasmatch2[j])
                                         continue;
                                 if (DatumGetBool(FunctionCall2Coll(&eqproc,
                                                                                                    DEFAULT_COLLATION_OID,
-                                                                                                  values1[i],
-                                                                                                  values2[j])))
+                                                                                                  sslot1.values[i],
+                                                                                                  sslot2.values[j])))
                                 {
                                         hasmatch1[i] = hasmatch2[j] = true;
-                                       matchprodfreq += numbers1[i] * numbers2[j];
+                                       matchprodfreq += sslot1.numbers[i] * sslot2.numbers[j];
                                         nmatches++;
                                         break;
                                 }
@@ -2361,22 +2337,22 @@ eqjoinsel_inner(Oid operator,
                 CLAMP_PROBABILITY(matchprodfreq);
                 /* Sum up frequencies of matched and unmatched MCVs */
                 matchfreq1 = unmatchfreq1 = 0.0;
-               for (i = 0; i < nvalues1; i++)
+               for (i = 0; i < sslot1.nvalues; i++)
                 {
                         if (hasmatch1[i])
-                               matchfreq1 += numbers1[i];
+                               matchfreq1 += sslot1.numbers[i];
                         else
-                               unmatchfreq1 += numbers1[i];
+                               unmatchfreq1 += sslot1.numbers[i];
                 }
                 CLAMP_PROBABILITY(matchfreq1);
                 CLAMP_PROBABILITY(unmatchfreq1);
                 matchfreq2 = unmatchfreq2 = 0.0;
-               for (i = 0; i < nvalues2; i++)
+               for (i = 0; i < sslot2.nvalues; i++)
                 {
                         if (hasmatch2[i])
-                               matchfreq2 += numbers2[i];
+                               matchfreq2 += sslot2.numbers[i];
                         else
-                               unmatchfreq2 += numbers2[i];
+                               unmatchfreq2 += sslot2.numbers[i];
                 }
                 CLAMP_PROBABILITY(matchfreq2);
                 CLAMP_PROBABILITY(unmatchfreq2);
@@ -2401,15 +2377,15 @@ eqjoinsel_inner(Oid operator,
                  * MCVs plus non-MCV values.
                  */
                 totalsel1 = matchprodfreq;
-               if (nd2 > nvalues2)
-                       totalsel1 += unmatchfreq1 * otherfreq2 / (nd2 - nvalues2);
+               if (nd2 > sslot2.nvalues)
+                       totalsel1 += unmatchfreq1 * otherfreq2 / (nd2 - sslot2.nvalues);
                 if (nd2 > nmatches)
                         totalsel1 += otherfreq1 * (otherfreq2 + unmatchfreq2) /
                                 (nd2 - nmatches);
                 /* Same estimate from the point of view of relation 2. */
                 totalsel2 = matchprodfreq;
-               if (nd1 > nvalues1)
-                       totalsel2 += unmatchfreq2 * otherfreq1 / (nd1 - nvalues1);
+               if (nd1 > sslot1.nvalues)
+                       totalsel2 += unmatchfreq2 * otherfreq1 / (nd1 - sslot1.nvalues);
                 if (nd1 > nmatches)
                         totalsel2 += otherfreq2 * (otherfreq1 + unmatchfreq1) /
                                 (nd1 - nmatches);
@@ -2454,12 +2430,8 @@ eqjoinsel_inner(Oid operator,
                         selec /= nd2;
         }
  
-       if (have_mcvs1)
-               free_attstatsslot(vardata1->atttype, values1, nvalues1,
-                                                 numbers1, nnumbers1);
-       if (have_mcvs2)
-               free_attstatsslot(vardata2->atttype, values2, nvalues2,
-                                                 numbers2, nnumbers2);
+       free_attstatsslot(&sslot1);
+       free_attstatsslot(&sslot2);
  
         return selec;
  }
@@ -2469,6 +2441,7 @@ eqjoinsel_inner(Oid operator,
   *
   * (Also used for anti join, which we are supposed to estimate the same way.)
   * Caller has ensured that vardata1 is the LHS variable.
+ * Unlike eqjoinsel_inner, we have to cope with operator being InvalidOid.
   */
  static double
  eqjoinsel_semi(Oid operator,
@@ -2480,21 +2453,21 @@ eqjoinsel_semi(Oid operator,
         double          nd2;
         bool            isdefault1;
         bool            isdefault2;
+       Oid                     opfuncoid;
         Form_pg_statistic stats1 = NULL;
         bool            have_mcvs1 = false;
-       Datum      *values1 = NULL;
-       int                     nvalues1 = 0;
-       float4     *numbers1 = NULL;
-       int                     nnumbers1 = 0;
         bool            have_mcvs2 = false;
-       Datum      *values2 = NULL;
-       int                     nvalues2 = 0;
-       float4     *numbers2 = NULL;
-       int                     nnumbers2 = 0;
+       AttStatsSlot sslot1;
+       AttStatsSlot sslot2;
  
         nd1 = get_variable_numdistinct(vardata1, &isdefault1);
         nd2 = get_variable_numdistinct(vardata2, &isdefault2);
  
+       opfuncoid = OidIsValid(operator) ? get_opcode(operator) : InvalidOid;
+
+       memset(&sslot1, 0, sizeof(sslot1));
+       memset(&sslot2, 0, sizeof(sslot2));
+
         /*
          * We clamp nd2 to be not more than what we estimate the inner relation's
          * size to be.  This is intuitively somewhat reasonable since obviously
@@ -2509,34 +2482,42 @@ eqjoinsel_semi(Oid operator,
          * We can apply this clamping both with respect to the base relation from
          * which the join variable comes (if there is just one), and to the
          * immediate inner input relation of the current join.
+        *
+        * If we clamp, we can treat nd2 as being a non-default estimate; it's not
+        * great, maybe, but it didn't come out of nowhere either.  This is most
+        * helpful when the inner relation is empty and consequently has no stats.
          */
         if (vardata2->rel)
-               nd2 = Min(nd2, vardata2->rel->rows);
-       nd2 = Min(nd2, inner_rel->rows);
+       {
+               if (nd2 >= vardata2->rel->rows)
+               {
+                       nd2 = vardata2->rel->rows;
+                       isdefault2 = false;
+               }
+       }
+       if (nd2 >= inner_rel->rows)
+       {
+               nd2 = inner_rel->rows;
+               isdefault2 = false;
+       }
  
         if (HeapTupleIsValid(vardata1->statsTuple))
         {
+               /* note we allow use of nullfrac regardless of security check */
                 stats1 = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple);
-               have_mcvs1 = get_attstatsslot(vardata1->statsTuple,
-                                                                         vardata1->atttype,
-                                                                         vardata1->atttypmod,
-                                                                         STATISTIC_KIND_MCV,
-                                                                         InvalidOid,
-                                                                         NULL,
-                                                                         &values1, &nvalues1,
-                                                                         &numbers1, &nnumbers1);
+               if (statistic_proc_security_check(vardata1, opfuncoid))
+                       have_mcvs1 = get_attstatsslot(&sslot1, vardata1->statsTuple,
+                                                                                 STATISTIC_KIND_MCV, InvalidOid,
+                                                                ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS);
         }
  
-       if (HeapTupleIsValid(vardata2->statsTuple))
+       if (HeapTupleIsValid(vardata2->statsTuple) &&
+               statistic_proc_security_check(vardata2, opfuncoid))
         {
-               have_mcvs2 = get_attstatsslot(vardata2->statsTuple,
-                                                                         vardata2->atttype,
-                                                                         vardata2->atttypmod,
-                                                                         STATISTIC_KIND_MCV,
-                                                                         InvalidOid,
-                                                                         NULL,
-                                                                         &values2, &nvalues2,
-                                                                         &numbers2, &nnumbers2);
+               have_mcvs2 = get_attstatsslot(&sslot2, vardata2->statsTuple,
+                                                                         STATISTIC_KIND_MCV, InvalidOid,
+                                                                         ATTSTATSSLOT_VALUES);
+               /* note: currently don't need stanumbers from RHS */
         }
  
         if (have_mcvs1 && have_mcvs2 && OidIsValid(operator))
@@ -2562,15 +2543,15 @@ eqjoinsel_semi(Oid operator,
  
                 /*
                  * The clamping above could have resulted in nd2 being less than
-                * nvalues2; in which case, we assume that precisely the nd2 most
-                * common values in the relation will appear in the join input, and so
-                * compare to only the first nd2 members of the MCV list.  Of course
-                * this is frequently wrong, but it's the best bet we can make.
+                * sslot2.nvalues; in which case, we assume that precisely the nd2
+                * most common values in the relation will appear in the join input,
+                * and so compare to only the first nd2 members of the MCV list.  Of
+                * course this is frequently wrong, but it's the best bet we can make.
                  */
-               clamped_nvalues2 = Min(nvalues2, nd2);
+               clamped_nvalues2 = Min(sslot2.nvalues, nd2);
  
-               fmgr_info(get_opcode(operator), &eqproc);
-               hasmatch1 = (bool *) palloc0(nvalues1 * sizeof(bool));
+               fmgr_info(opfuncoid, &eqproc);
+               hasmatch1 = (bool *) palloc0(sslot1.nvalues * sizeof(bool));
                 hasmatch2 = (bool *) palloc0(clamped_nvalues2 * sizeof(bool));
  
                 /*
@@ -2580,7 +2561,7 @@ eqjoinsel_semi(Oid operator,
                  * and because the math wouldn't add up...
                  */
                 nmatches = 0;
-               for (i = 0; i < nvalues1; i++)
+               for (i = 0; i < sslot1.nvalues; i++)
                 {
                         int                     j;
  
@@ -2590,8 +2571,8 @@ eqjoinsel_semi(Oid operator,
                                         continue;
                                 if (DatumGetBool(FunctionCall2Coll(&eqproc,
                                                                                                    DEFAULT_COLLATION_OID,
-                                                                                                  values1[i],
-                                                                                                  values2[j])))
+                                                                                                  sslot1.values[i],
+                                                                                                  sslot2.values[j])))
                                 {
                                         hasmatch1[i] = hasmatch2[j] = true;
                                         nmatches++;
@@ -2601,10 +2582,10 @@ eqjoinsel_semi(Oid operator,
                 }
                 /* Sum up frequencies of matched MCVs */
                 matchfreq1 = 0.0;
-               for (i = 0; i < nvalues1; i++)
+               for (i = 0; i < sslot1.nvalues; i++)
                 {
                         if (hasmatch1[i])
-                               matchfreq1 += numbers1[i];
+                               matchfreq1 += sslot1.numbers[i];
                 }
                 CLAMP_PROBABILITY(matchfreq1);
                 pfree(hasmatch1);
@@ -2659,12 +2640,8 @@ eqjoinsel_semi(Oid operator,
                         selec = 0.5 * (1.0 - nullfrac1);
         }
  
-       if (have_mcvs1)
-               free_attstatsslot(vardata1->atttype, values1, nvalues1,
-                                                 numbers1, nnumbers1);
-       if (have_mcvs2)
-               free_attstatsslot(vardata2->atttype, values2, nvalues2,
-                                                 numbers2, nnumbers2);
+       free_attstatsslot(&sslot1);
+       free_attstatsslot(&sslot2);
  
         return selec;
  }
@@ -3235,15 +3212,15 @@ add_unique_group_var(PlannerInfo *root, List *varinfos,
   *             restriction selectivity of the equality in the next step.
   *     4.  For Vars within a single source rel, we multiply together the numbers
   *             of values, clamp to the number of rows in the rel (divided by 10 if
- *             more than one Var), and then multiply by the selectivity of the
- *             restriction clauses for that rel.  When there's more than one Var,
- *             the initial product is probably too high (it's the worst case) but
- *             clamping to a fraction of the rel's rows seems to be a helpful
- *             heuristic for not letting the estimate get out of hand.  (The factor
- *             of 10 is derived from pre-Postgres-7.4 practice.)  Multiplying
- *             by the restriction selectivity is effectively assuming that the
- *             restriction clauses are independent of the grouping, which is a crummy
- *             assumption, but it's hard to do better.
+ *             more than one Var), and then multiply by a factor based on the
+ *             selectivity of the restriction clauses for that rel.  When there's
+ *             more than one Var, the initial product is probably too high (it's the
+ *             worst case) but clamping to a fraction of the rel's rows seems to be a
+ *             helpful heuristic for not letting the estimate get out of hand.  (The
+ *             factor of 10 is derived from pre-Postgres-7.4 practice.)  The factor
+ *             we multiply by to adjust for the restriction selectivity assumes that
+ *             the restriction clauses are independent of the grouping, which may not
+ *             be a valid assumption, but it's hard to do better.
   *     5.  If there are Vars from multiple rels, we repeat step 4 for each such
   *             rel, and multiply the results together.
   * Note that rels not containing grouped Vars are ignored completely, as are
@@ -3326,7 +3303,8 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
                  * down to ignoring the possible addition of nulls to the result set).
                  */
                 varshere = pull_var_clause(groupexpr,
-                                                                  PVC_RECURSE_AGGREGATES,
+                                                                  PVC_RECURSE_AGGREGATES |
+                                                                  PVC_RECURSE_WINDOWFUNCS |
                                                                    PVC_RECURSE_PLACEHOLDERS);
  
                 /*
@@ -3379,25 +3357,25 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
         {
                 GroupVarInfo *varinfo1 = (GroupVarInfo *) linitial(varinfos);
                 RelOptInfo *rel = varinfo1->rel;
-               double          reldistinct = varinfo1->ndistinct;
+               double          reldistinct = 1;
                 double          relmaxndistinct = reldistinct;
-               int                     relvarcount = 1;
+               int                     relvarcount = 0;
                 List       *newvarinfos = NIL;
+               List       *relvarinfos = NIL;
  
                 /*
-                * Get the product of numdistinct estimates of the Vars for this rel.
-                * Also, construct new varinfos list of remaining Vars.
+                * Split the list of varinfos in two - one for the current rel, one
+                * for remaining Vars on other rels.
                  */
+               relvarinfos = lcons(varinfo1, relvarinfos);
                 for_each_cell(l, lnext(list_head(varinfos)))
                 {
                         GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
  
                         if (varinfo2->rel == varinfo1->rel)
                         {
-                               reldistinct *= varinfo2->ndistinct;
-                               if (relmaxndistinct < varinfo2->ndistinct)
-                                       relmaxndistinct = varinfo2->ndistinct;
-                               relvarcount++;
+                               /* varinfos on current rel */
+                               relvarinfos = lcons(varinfo2, relvarinfos);
                         }
                         else
                         {
@@ -3406,10 +3384,51 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
                         }
                 }
  
+               /*
+                * Get the numdistinct estimate for the Vars of this rel.  We
+                * iteratively search for multivariate n-distinct with maximum number
+                * of vars; assuming that each var group is independent of the others,
+                * we multiply them together.  Any remaining relvarinfos after no more
+                * multivariate matches are found are assumed independent too, so
+                * their individual ndistinct estimates are multiplied also.
+                *
+                * While iterating, count how many separate numdistinct values we
+                * apply.  We apply a fudge factor below, but only if we multiplied
+                * more than one such values.
+                */
+               while (relvarinfos)
+               {
+                       double          mvndistinct;
+
+                       if (estimate_multivariate_ndistinct(root, rel, &relvarinfos,
+                                                                                               &mvndistinct))
+                       {
+                               reldistinct *= mvndistinct;
+                               if (relmaxndistinct < mvndistinct)
+                                       relmaxndistinct = mvndistinct;
+                               relvarcount++;
+                       }
+                       else
+                       {
+                               foreach(l, relvarinfos)
+                               {
+                                       GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
+
+                                       reldistinct *= varinfo2->ndistinct;
+                                       if (relmaxndistinct < varinfo2->ndistinct)
+                                               relmaxndistinct = varinfo2->ndistinct;
+                                       relvarcount++;
+                               }
+
+                               /* we're done with this relation */
+                               relvarinfos = NIL;
+                       }
+               }
+
                 /*
                  * Sanity check --- don't divide by zero if empty relation.
                  */
-               Assert(rel->reloptkind == RELOPT_BASEREL);
+               Assert(IS_SIMPLE_REL(rel));
                 if (rel->tuples > 0)
                 {
                         /*
@@ -3436,9 +3455,51 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
                                 reldistinct = clamp;
  
                         /*
-                        * Multiply by restriction selectivity.
+                        * Update the estimate based on the restriction selectivity,
+                        * guarding against division by zero when reldistinct is zero.
+                        * Also skip this if we know that we are returning all rows.
                          */
-                       reldistinct *= rel->rows / rel->tuples;
+                       if (reldistinct > 0 && rel->rows < rel->tuples)
+                       {
+                               /*
+                                * Given a table containing N rows with n distinct values in a
+                                * uniform distribution, if we select p rows at random then
+                                * the expected number of distinct values selected is
+                                *
+                                * n * (1 - product((N-N/n-i)/(N-i), i=0..p-1))
+                                *
+                                * = n * (1 - (N-N/n)! / (N-N/n-p)! * (N-p)! / N!)
+                                *
+                                * See "Approximating block accesses in database
+                                * organizations", S. B. Yao, Communications of the ACM,
+                                * Volume 20 Issue 4, April 1977 Pages 260-261.
+                                *
+                                * Alternatively, re-arranging the terms from the factorials,
+                                * this may be written as
+                                *
+                                * n * (1 - product((N-p-i)/(N-i), i=0..N/n-1))
+                                *
+                                * This form of the formula is more efficient to compute in
+                                * the common case where p is larger than N/n.  Additionally,
+                                * as pointed out by Dell'Era, if i << N for all terms in the
+                                * product, it can be approximated by
+                                *
+                                * n * (1 - ((N-p)/N)^(N/n))
+                                *
+                                * See "Expected distinct values when selecting from a bag
+                                * without replacement", Alberto Dell'Era,
+                                * http://www.adellera.it/investigations/distinct_balls/.
+                                *
+                                * The condition i << N is equivalent to n >> 1, so this is a
+                                * good approximation when the number of distinct values in
+                                * the table is large.  It turns out that this formula also
+                                * works well even when n is small.
+                                */
+                               reldistinct *=
+                                       (1 - pow((rel->tuples - rel->rows) / rel->tuples,
+                                                        rel->tuples / reldistinct));
+                       }
+                       reldistinct = clamp_row_est(reldistinct);
  
                         /*
                          * Update estimate of total distinct groups.
@@ -3500,8 +3561,7 @@ estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey, double nbuckets)
                                 mcvfreq,
                                 avgfreq;
         bool            isdefault;
-       float4     *numbers;
-       int                     nnumbers;
+       AttStatsSlot sslot;
  
         examine_variable(root, hashkey, 0, &vardata);
  
@@ -3537,8 +3597,11 @@ estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey, double nbuckets)
          * XXX Possibly better way, but much more expensive: multiply by
          * selectivity of rel's restriction clauses that mention the target Var.
          */
-       if (vardata.rel)
+       if (vardata.rel && vardata.rel->tuples > 0)
+       {
                 ndistinct *= vardata.rel->rows / vardata.rel->tuples;
+               ndistinct = clamp_row_est(ndistinct);
+       }
  
         /*
          * Initial estimate of bucketsize fraction is 1/nbuckets as long as the
@@ -3557,20 +3620,16 @@ estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey, double nbuckets)
  
         if (HeapTupleIsValid(vardata.statsTuple))
         {
-               if (get_attstatsslot(vardata.statsTuple,
-                                                        vardata.atttype, vardata.atttypmod,
+               if (get_attstatsslot(&sslot, vardata.statsTuple,
                                                          STATISTIC_KIND_MCV, InvalidOid,
-                                                        NULL,
-                                                        NULL, NULL,
-                                                        &numbers, &nnumbers))
+                                                        ATTSTATSSLOT_NUMBERS))
                 {
                         /*
                          * The first MCV stat is for the most common value.
                          */
-                       if (nnumbers > 0)
-                               mcvfreq = numbers[0];
-                       free_attstatsslot(vardata.atttype, NULL, 0,
-                                                         numbers, nnumbers);
+                       if (sslot.nnumbers > 0)
+                               mcvfreq = sslot.numbers[0];
+                       free_attstatsslot(&sslot);
                 }
         }
  
@@ -3603,6 +3662,134 @@ estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey, double nbuckets)
   *-------------------------------------------------------------------------
   */
  
+/*
+ * Find applicable ndistinct statistics for the given list of VarInfos (which
+ * must all belong to the given rel), and update *ndistinct to the estimate of
+ * the MVNDistinctItem that best matches.  If a match it found, *varinfos is
+ * updated to remove the list of matched varinfos.
+ *
+ * Varinfos that aren't for simple Vars are ignored.
+ *
+ * Return TRUE if we're able to find a match, FALSE otherwise.
+ */
+static bool
+estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel,
+                                                               List **varinfos, double *ndistinct)
+{
+       ListCell   *lc;
+       Bitmapset  *attnums = NULL;
+       int                     nmatches;
+       Oid                     statOid = InvalidOid;
+       MVNDistinct *stats;
+       Bitmapset  *matched = NULL;
+
+       /* bail out immediately if the table has no extended statistics */
+       if (!rel->statlist)
+               return false;
+
+       /* Determine the attnums we're looking for */
+       foreach(lc, *varinfos)
+       {
+               GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc);
+
+               Assert(varinfo->rel == rel);
+
+               if (IsA(varinfo->var, Var))
+               {
+                       attnums = bms_add_member(attnums,
+                                                                        ((Var *) varinfo->var)->varattno);
+               }
+       }
+
+       /* look for the ndistinct statistics matching the most vars */
+       nmatches = 1;                           /* we require at least two matches */
+       foreach(lc, rel->statlist)
+       {
+               StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc);
+               Bitmapset  *shared;
+               int                     nshared;
+
+               /* skip statistics of other kinds */
+               if (info->kind != STATS_EXT_NDISTINCT)
+                       continue;
+
+               /* compute attnums shared by the vars and the statistics object */
+               shared = bms_intersect(info->keys, attnums);
+               nshared = bms_num_members(shared);
+
+               /*
+                * Does this statistics object match more columns than the currently
+                * best object?  If so, use this one instead.
+                *
+                * XXX This should break ties using name of the object, or something
+                * like that, to make the outcome stable.
+                */
+               if (nshared > nmatches)
+               {
+                       statOid = info->statOid;
+                       nmatches = nshared;
+                       matched = shared;
+               }
+       }
+
+       /* No match? */
+       if (statOid == InvalidOid)
+               return false;
+       Assert(nmatches > 1 && matched != NULL);
+
+       stats = statext_ndistinct_load(statOid);
+
+       /*
+        * If we have a match, search it for the specific item that matches (there
+        * must be one), and construct the output values.
+        */
+       if (stats)
+       {
+               int                     i;
+               List       *newlist = NIL;
+               MVNDistinctItem *item = NULL;
+
+               /* Find the specific item that exactly matches the combination */
+               for (i = 0; i < stats->nitems; i++)
+               {
+                       MVNDistinctItem *tmpitem = &stats->items[i];
+
+                       if (bms_subset_compare(tmpitem->attrs, matched) == BMS_EQUAL)
+                       {
+                               item = tmpitem;
+                               break;
+                       }
+               }
+
+               /* make sure we found an item */
+               if (!item)
+                       elog(ERROR, "corrupt MVNDistinct entry");
+
+               /* Form the output varinfo list, keeping only unmatched ones */
+               foreach(lc, *varinfos)
+               {
+                       GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc);
+                       AttrNumber      attnum;
+
+                       if (!IsA(varinfo->var, Var))
+                       {
+                               newlist = lappend(newlist, varinfo);
+                               continue;
+                       }
+
+                       attnum = ((Var *) varinfo->var)->varattno;
+                       if (!bms_is_member(attnum, matched))
+                               newlist = lappend(newlist, varinfo);
+               }
+
+               *varinfos = newlist;
+               *ndistinct = item->ndistinct;
+               return true;
+       }
+
+       return false;
+}
+
  /*
   * convert_to_scalar
   *       Convert non-NULL values of the indicated types to the comparison
@@ -3736,6 +3923,7 @@ convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
                 case INETOID:
                 case CIDROID:
                 case MACADDROID:
+               case MACADDR8OID:
                         *scaledvalue = convert_network_to_scalar(value, valuetypid);
                         *scaledlobound = convert_network_to_scalar(lobound, boundstypid);
                         *scaledhibound = convert_network_to_scalar(hibound, boundstypid);
@@ -4149,31 +4337,17 @@ convert_timevalue_to_scalar(Datum value, Oid typid)
                                  * average month length of 365.25/12.0 days.  Not too
                                  * accurate, but plenty good enough for our purposes.
                                  */
-#ifdef HAVE_INT64_TIMESTAMP
                                 return interval->time + interval->day * (double) USECS_PER_DAY +
                                         interval->month * ((DAYS_PER_YEAR / (double) MONTHS_PER_YEAR) * USECS_PER_DAY);
-#else
-                               return interval->time + interval->day * SECS_PER_DAY +
-                                       interval->month * ((DAYS_PER_YEAR / (double) MONTHS_PER_YEAR) * (double) SECS_PER_DAY);
-#endif
                         }
                 case RELTIMEOID:
-#ifdef HAVE_INT64_TIMESTAMP
                         return (DatumGetRelativeTime(value) * 1000000.0);
-#else
-                       return DatumGetRelativeTime(value);
-#endif
                 case TINTERVALOID:
                         {
                                 TimeInterval tinterval = DatumGetTimeInterval(value);
  
-#ifdef HAVE_INT64_TIMESTAMP
                                 if (tinterval->status != 0)
                                         return ((tinterval->data[1] - tinterval->data[0]) * 1000000.0);
-#else
-                               if (tinterval->status != 0)
-                                       return tinterval->data[1] - tinterval->data[0];
-#endif
                                 return 0;               /* for lack of a better idea */
                         }
                 case TIMEOID:
@@ -4183,11 +4357,7 @@ convert_timevalue_to_scalar(Datum value, Oid typid)
                                 TimeTzADT  *timetz = DatumGetTimeTzADTP(value);
  
                                 /* use GMT-equivalent time */
-#ifdef HAVE_INT64_TIMESTAMP
                                 return (double) (timetz->time + (timetz->zone * 1000000.0));
-#else
-                               return (double) (timetz->time + timetz->zone);
-#endif
                         }
         }
  
@@ -4266,7 +4436,7 @@ get_restriction_variable(PlannerInfo *root, List *args, int varRelid,
                 return true;
         }
  
-       /* Ooops, clause has wrong structure (probably var op var) */
+       /* Oops, clause has wrong structure (probably var op var) */
         ReleaseVariableStats(*vardata);
         ReleaseVariableStats(rdata);
  
@@ -4331,7 +4501,7 @@ get_join_variables(PlannerInfo *root, List *args, SpecialJoinInfo *sjinfo,
   *     freefunc: pointer to a function to release statsTuple with.
   *     vartype: exposed type of the expression; this should always match
   *             the declared input type of the operator we are estimating for.
- *     atttype, atttypmod: type data to pass to get_attstatsslot().  This is
+ *     atttype, atttypmod: actual type/typmod of the "var" expression.  This is
   *             commonly the same as the exposed type of the variable argument,
   *             but can be different in binary-compatible-type cases.
   *     isunique: TRUE if we were able to match the var to a unique index or a
@@ -4339,6 +4509,9 @@ get_join_variables(PlannerInfo *root, List *args, SpecialJoinInfo *sjinfo,
   *             this query.  (Caution: this should be trusted for statistical
   *             purposes only, since we do not check indimmediate nor verify that
   *             the exact same definition of equality applies.)
+ *     acl_ok: TRUE if current user has permission to read the column(s)
+ *             underlying the pg_statistic entry.  This is consulted by
+ *             statistic_proc_security_check().
   *
   * Caller is responsible for doing ReleaseVariableStats() before exiting.
   */
@@ -4507,6 +4680,30 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
                                                                                                 Int16GetDatum(pos + 1),
                                                                                                 BoolGetDatum(false));
                                                         vardata->freefunc = ReleaseSysCache;
+
+                                                       if (HeapTupleIsValid(vardata->statsTuple))
+                                                       {
+                                                               /* Get index's table for permission check */
+                                                               RangeTblEntry *rte;
+
+                                                               rte = planner_rt_fetch(index->rel->relid, root);
+                                                               Assert(rte->rtekind == RTE_RELATION);
+
+                                                               /*
+                                                                * For simplicity, we insist on the whole
+                                                                * table being selectable, rather than trying
+                                                                * to identify which column(s) the index
+                                                                * depends on.
+                                                                */
+                                                               vardata->acl_ok =
+                                                                       (pg_class_aclcheck(rte->relid, GetUserId(),
+                                                                                                ACL_SELECT) == ACLCHECK_OK);
+                                                       }
+                                                       else
+                                                       {
+                                                               /* suppress leakproofness checks later */
+                                                               vardata->acl_ok = true;
+                                                       }
                                                 }
                                                 if (vardata->statsTuple)
                                                         break;
@@ -4559,6 +4756,21 @@ examine_simple_variable(PlannerInfo *root, Var *var,
                                                                                           Int16GetDatum(var->varattno),
                                                                                           BoolGetDatum(rte->inh));
                 vardata->freefunc = ReleaseSysCache;
+
+               if (HeapTupleIsValid(vardata->statsTuple))
+               {
+                       /* check if user has permission to read this column */
+                       vardata->acl_ok =
+                               (pg_class_aclcheck(rte->relid, GetUserId(),
+                                                                  ACL_SELECT) == ACLCHECK_OK) ||
+                               (pg_attribute_aclcheck(rte->relid, var->varattno, GetUserId(),
+                                                                          ACL_SELECT) == ACLCHECK_OK);
+               }
+               else
+               {
+                       /* suppress any possible leakproofness checks later */
+                       vardata->acl_ok = true;
+               }
         }
         else if (rte->rtekind == RTE_SUBQUERY && !rte->inh)
         {
@@ -4675,6 +4887,30 @@ examine_simple_variable(PlannerInfo *root, Var *var,
         }
  }
  
+/*
+ * Check whether it is permitted to call func_oid passing some of the
+ * pg_statistic data in vardata.  We allow this either if the user has SELECT
+ * privileges on the table or column underlying the pg_statistic data or if
+ * the function is marked leak-proof.
+ */
+bool
+statistic_proc_security_check(VariableStatData *vardata, Oid func_oid)
+{
+       if (vardata->acl_ok)
+               return true;
+
+       if (!OidIsValid(func_oid))
+               return false;
+
+       if (get_func_leakproof(func_oid))
+               return true;
+
+       ereport(DEBUG2,
+                       (errmsg_internal("not using statistics because function \"%s\" is not leak-proof",
+                                                        get_func_name(func_oid))));
+       return false;
+}
+
  /*
   * get_variable_numdistinct
   *       Estimate the number of distinct values of a variable.
@@ -4690,6 +4926,7 @@ double
  get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
  {
         double          stadistinct;
+       double          stanullfrac = 0.0;
         double          ntuples;
  
         *isdefault = false;
@@ -4697,7 +4934,8 @@ get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
         /*
          * Determine the stadistinct value to use.  There are cases where we can
          * get an estimate even without a pg_statistic entry, or can get a better
-        * value than is in pg_statistic.
+        * value than is in pg_statistic.  Grab stanullfrac too if we can find it
+        * (otherwise, assume no nulls, for lack of any better idea).
          */
         if (HeapTupleIsValid(vardata->statsTuple))
         {
@@ -4706,6 +4944,7 @@ get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
  
                 stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
                 stadistinct = stats->stadistinct;
+               stanullfrac = stats->stanullfrac;
         }
         else if (vardata->vartype == BOOLOID)
         {
@@ -4729,7 +4968,7 @@ get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
                         {
                                 case ObjectIdAttributeNumber:
                                 case SelfItemPointerAttributeNumber:
-                                       stadistinct = -1.0; /* unique */
+                                       stadistinct = -1.0; /* unique (and all non null) */
                                         break;
                                 case TableOidAttributeNumber:
                                         stadistinct = 1.0;      /* only 1 value */
@@ -4751,10 +4990,11 @@ get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
          * If there is a unique index or DISTINCT clause for the variable, assume
          * it is unique no matter what pg_statistic says; the statistics could be
          * out of date, or we might have found a partial unique index that proves
-        * the var is unique for this query.
+        * the var is unique for this query.  However, we'd better still believe
+        * the null-fraction statistic.
          */
         if (vardata->isunique)
-               stadistinct = -1.0;
+               stadistinct = -1.0 * (1.0 - stanullfrac);
  
         /*
          * If we had an absolute estimate, use that.
@@ -4813,8 +5053,8 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop,
         bool            have_data = false;
         int16           typLen;
         bool            typByVal;
-       Datum      *values;
-       int                     nvalues;
+       Oid                     opfuncoid;
+       AttStatsSlot sslot;
         int                     i;
  
         /*
@@ -4835,6 +5075,17 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop,
                 return false;
         }
  
+       /*
+        * If we can't apply the sortop to the stats data, just fail.  In
+        * principle, if there's a histogram and no MCVs, we could return the
+        * histogram endpoints without ever applying the sortop ... but it's
+        * probably not worth trying, because whatever the caller wants to do with
+        * the endpoints would likely fail the security check too.
+        */
+       if (!statistic_proc_security_check(vardata,
+                                                                          (opfuncoid = get_opcode(sortop))))
+               return false;
+
         get_typlenbyval(vardata->atttype, &typLen, &typByVal);
  
         /*
@@ -4844,29 +5095,23 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop,
          * the one we want, fail --- this suggests that there is data we can't
          * use.
          */
-       if (get_attstatsslot(vardata->statsTuple,
-                                                vardata->atttype, vardata->atttypmod,
+       if (get_attstatsslot(&sslot, vardata->statsTuple,
                                                  STATISTIC_KIND_HISTOGRAM, sortop,
-                                                NULL,
-                                                &values, &nvalues,
-                                                NULL, NULL))
+                                                ATTSTATSSLOT_VALUES))
         {
-               if (nvalues > 0)
+               if (sslot.nvalues > 0)
                 {
-                       tmin = datumCopy(values[0], typByVal, typLen);
-                       tmax = datumCopy(values[nvalues - 1], typByVal, typLen);
+                       tmin = datumCopy(sslot.values[0], typByVal, typLen);
+                       tmax = datumCopy(sslot.values[sslot.nvalues - 1], typByVal, typLen);
                         have_data = true;
                 }
-               free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0);
+               free_attstatsslot(&sslot);
         }
-       else if (get_attstatsslot(vardata->statsTuple,
-                                                         vardata->atttype, vardata->atttypmod,
+       else if (get_attstatsslot(&sslot, vardata->statsTuple,
                                                           STATISTIC_KIND_HISTOGRAM, InvalidOid,
-                                                         NULL,
-                                                         &values, &nvalues,
-                                                         NULL, NULL))
+                                                         0))
         {
-               free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0);
+               free_attstatsslot(&sslot);
                 return false;
         }
  
@@ -4876,39 +5121,36 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop,
          * the MCVs.  However, usually the MCVs will not be the extreme values, so
          * avoid unnecessary data copying.
          */
-       if (get_attstatsslot(vardata->statsTuple,
-                                                vardata->atttype, vardata->atttypmod,
+       if (get_attstatsslot(&sslot, vardata->statsTuple,
                                                  STATISTIC_KIND_MCV, InvalidOid,
-                                                NULL,
-                                                &values, &nvalues,
-                                                NULL, NULL))
+                                                ATTSTATSSLOT_VALUES))
         {
                 bool            tmin_is_mcv = false;
                 bool            tmax_is_mcv = false;
                 FmgrInfo        opproc;
  
-               fmgr_info(get_opcode(sortop), &opproc);
+               fmgr_info(opfuncoid, &opproc);
  
-               for (i = 0; i < nvalues; i++)
+               for (i = 0; i < sslot.nvalues; i++)
                 {
                         if (!have_data)
                         {
-                               tmin = tmax = values[i];
+                               tmin = tmax = sslot.values[i];
                                 tmin_is_mcv = tmax_is_mcv = have_data = true;
                                 continue;
                         }
                         if (DatumGetBool(FunctionCall2Coll(&opproc,
                                                                                            DEFAULT_COLLATION_OID,
-                                                                                          values[i], tmin)))
+                                                                                          sslot.values[i], tmin)))
                         {
-                               tmin = values[i];
+                               tmin = sslot.values[i];
                                 tmin_is_mcv = true;
                         }
                         if (DatumGetBool(FunctionCall2Coll(&opproc,
                                                                                            DEFAULT_COLLATION_OID,
-                                                                                          tmax, values[i])))
+                                                                                          tmax, sslot.values[i])))
                         {
-                               tmax = values[i];
+                               tmax = sslot.values[i];
                                 tmax_is_mcv = true;
                         }
                 }
@@ -4916,7 +5158,7 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop,
                         tmin = datumCopy(tmin, typByVal, typLen);
                 if (tmax_is_mcv)
                         tmax = datumCopy(tmax, typByVal, typLen);
-               free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0);
+               free_attstatsslot(&sslot);
         }
  
         *min = tmin;
@@ -5208,7 +5450,7 @@ find_join_input_rel(PlannerInfo *root, Relids relids)
  /*
   * Check whether char is a letter (and, hence, subject to case-folding)
   *
- * In multibyte character sets, we can't use isalpha, and it does not seem
+ * In multibyte character sets or with ICU, we can't use isalpha, and it does not seem
   * worth trying to convert to wchar_t to use iswalpha.  Instead, just assume
   * any multibyte char is potentially case-varying.
   */
@@ -5220,9 +5462,11 @@ pattern_char_isalpha(char c, bool is_multibyte,
                 return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
         else if (is_multibyte && IS_HIGHBIT_SET(c))
                 return true;
+       else if (locale && locale->provider == COLLPROVIDER_ICU)
+               return IS_HIGHBIT_SET(c) ? true : false;
  #ifdef HAVE_LOCALE_T
-       else if (locale)
-               return isalpha_l((unsigned char) c, locale);
+       else if (locale && locale->provider == COLLPROVIDER_LIBC)
+               return isalpha_l((unsigned char) c, locale->info.lt);
  #endif
         else
                 return isalpha((unsigned char) c);
@@ -5292,13 +5536,12 @@ like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
         }
         else
         {
-               bytea      *bstr = DatumGetByteaP(patt_const->constvalue);
+               bytea      *bstr = DatumGetByteaPP(patt_const->constvalue);
  
-               pattlen = VARSIZE(bstr) - VARHDRSZ;
+               pattlen = VARSIZE_ANY_EXHDR(bstr);
                 patt = (char *) palloc(pattlen);
-               memcpy(patt, VARDATA(bstr), pattlen);
-               if ((Pointer) bstr != DatumGetPointer(patt_const->constvalue))
-                       pfree(bstr);
+               memcpy(patt, VARDATA_ANY(bstr), pattlen);
+               Assert((Pointer) bstr == DatumGetPointer(patt_const->constvalue));
         }
  
         match = palloc(pattlen + 1);
@@ -5808,13 +6051,12 @@ make_greater_string(const Const *str_const, FmgrInfo *ltproc, Oid collation)
         }
         else if (datatype == BYTEAOID)
         {
-               bytea      *bstr = DatumGetByteaP(str_const->constvalue);
+               bytea      *bstr = DatumGetByteaPP(str_const->constvalue);
  
-               len = VARSIZE(bstr) - VARHDRSZ;
+               len = VARSIZE_ANY_EXHDR(bstr);
                 workstr = (char *) palloc(len);
-               memcpy(workstr, VARDATA(bstr), len);
-               if ((Pointer) bstr != DatumGetPointer(str_const->constvalue))
-                       pfree(bstr);
+               memcpy(workstr, VARDATA_ANY(bstr), len);
+               Assert((Pointer) bstr == DatumGetPointer(str_const->constvalue));
                 cmpstr = str_const->constvalue;
         }
         else
@@ -6010,21 +6252,7 @@ string_to_bytea_const(const char *str, size_t str_len)
   *-------------------------------------------------------------------------
   */
  
-/*
- * deconstruct_indexquals is a simple function to examine the indexquals
- * attached to a proposed IndexPath.  It returns a list of IndexQualInfo
- * structs, one per qual expression.
- */
-typedef struct
-{
-       RestrictInfo *rinfo;            /* the indexqual itself */
-       int                     indexcol;               /* zero-based index column number */
-       bool            varonleft;              /* true if index column is on left of qual */
-       Oid                     clause_op;              /* qual's operator OID, if relevant */
-       Node       *other_operand;      /* non-index operand of qual's operator */
-} IndexQualInfo;
-
-static List *
+List *
  deconstruct_indexquals(IndexPath *path)
  {
         List       *result = NIL;
@@ -6034,14 +6262,13 @@ deconstruct_indexquals(IndexPath *path)
  
         forboth(lcc, path->indexquals, lci, path->indexqualcols)
         {
-               RestrictInfo *rinfo = (RestrictInfo *) lfirst(lcc);
+               RestrictInfo *rinfo = lfirst_node(RestrictInfo, lcc);
                 int                     indexcol = lfirst_int(lci);
                 Expr       *clause;
                 Node       *leftop,
                                    *rightop;
                 IndexQualInfo *qinfo;
  
-               Assert(IsA(rinfo, RestrictInfo));
                 clause = rinfo->clause;
  
                 qinfo = (IndexQualInfo *) palloc(sizeof(IndexQualInfo));
@@ -6174,35 +6401,7 @@ orderby_operands_eval_cost(PlannerInfo *root, IndexPath *path)
         return qual_arg_cost;
  }
  
-/*
- * genericcostestimate is a general-purpose estimator that can be used for
- * most index types.  In some cases we use genericcostestimate as the base
- * code and then incorporate additional index-type-specific knowledge in
- * the type-specific calling function.  To avoid code duplication, we make
- * genericcostestimate return a number of intermediate values as well as
- * its preliminary estimates of the output cost values.  The GenericCosts
- * struct includes all these values.
- *
- * Callers should initialize all fields of GenericCosts to zero.  In addition,
- * they can set numIndexTuples to some positive value if they have a better
- * than default way of estimating the number of leaf index tuples visited.
- */
-typedef struct
-{
-       /* These are the values the cost estimator must return to the planner */
-       Cost            indexStartupCost;               /* index-related startup cost */
-       Cost            indexTotalCost; /* total index-related scan cost */
-       Selectivity indexSelectivity;           /* selectivity of index */
-       double          indexCorrelation;               /* order correlation of index */
-
-       /* Intermediate values we obtain along the way */
-       double          numIndexPages;  /* number of leaf pages visited */
-       double          numIndexTuples; /* number of leaf tuples visited */
-       double          spc_random_page_cost;   /* relevant random_page_cost value */
-       double          num_sa_scans;   /* # indexscans from ScalarArrayOps */
-} GenericCosts;
-
-static void
+void
  genericcostestimate(PlannerInfo *root,
                                         IndexPath *path,
                                         double loop_count,
@@ -6443,16 +6642,12 @@ add_predicate_to_quals(IndexOptInfo *index, List *indexQuals)
  }
  
  
-Datum
-btcostestimate(PG_FUNCTION_ARGS)
+void
+btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
+                          Cost *indexStartupCost, Cost *indexTotalCost,
+                          Selectivity *indexSelectivity, double *indexCorrelation,
+                          double *indexPages)
  {
-       PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
-       IndexPath  *path = (IndexPath *) PG_GETARG_POINTER(1);
-       double          loop_count = PG_GETARG_FLOAT8(2);
-       Cost       *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
-       Cost       *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
-       Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
-       double     *indexCorrelation = (double *) PG_GETARG_POINTER(6);
         IndexOptInfo *index = path->indexinfo;
         List       *qinfos;
         GenericCosts costs;
@@ -6703,25 +6898,21 @@ btcostestimate(PG_FUNCTION_ARGS)
         if (HeapTupleIsValid(vardata.statsTuple))
         {
                 Oid                     sortop;
-               float4     *numbers;
-               int                     nnumbers;
+               AttStatsSlot sslot;
  
                 sortop = get_opfamily_member(index->opfamily[0],
                                                                          index->opcintype[0],
                                                                          index->opcintype[0],
                                                                          BTLessStrategyNumber);
                 if (OidIsValid(sortop) &&
-                       get_attstatsslot(vardata.statsTuple, InvalidOid, 0,
-                                                        STATISTIC_KIND_CORRELATION,
-                                                        sortop,
-                                                        NULL,
-                                                        NULL, NULL,
-                                                        &numbers, &nnumbers))
+                       get_attstatsslot(&sslot, vardata.statsTuple,
+                                                        STATISTIC_KIND_CORRELATION, sortop,
+                                                        ATTSTATSSLOT_NUMBERS))
                 {
                         double          varCorrelation;
  
-                       Assert(nnumbers == 1);
-                       varCorrelation = numbers[0];
+                       Assert(sslot.nnumbers == 1);
+                       varCorrelation = sslot.numbers[0];
  
                         if (index->reverse_sort[0])
                                 varCorrelation = -varCorrelation;
@@ -6731,7 +6922,7 @@ btcostestimate(PG_FUNCTION_ARGS)
                         else
                                 costs.indexCorrelation = varCorrelation;
  
-                       free_attstatsslot(InvalidOid, NULL, 0, numbers, nnumbers);
+                       free_attstatsslot(&sslot);
                 }
         }
  
@@ -6741,20 +6932,15 @@ btcostestimate(PG_FUNCTION_ARGS)
         *indexTotalCost = costs.indexTotalCost;
         *indexSelectivity = costs.indexSelectivity;
         *indexCorrelation = costs.indexCorrelation;
-
-       PG_RETURN_VOID();
+       *indexPages = costs.numIndexPages;
  }
  
-Datum
-hashcostestimate(PG_FUNCTION_ARGS)
+void
+hashcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
+                                Cost *indexStartupCost, Cost *indexTotalCost,
+                                Selectivity *indexSelectivity, double *indexCorrelation,
+                                double *indexPages)
  {
-       PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
-       IndexPath  *path = (IndexPath *) PG_GETARG_POINTER(1);
-       double          loop_count = PG_GETARG_FLOAT8(2);
-       Cost       *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
-       Cost       *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
-       Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
-       double     *indexCorrelation = (double *) PG_GETARG_POINTER(6);
         List       *qinfos;
         GenericCosts costs;
  
@@ -6794,20 +6980,15 @@ hashcostestimate(PG_FUNCTION_ARGS)
         *indexTotalCost = costs.indexTotalCost;
         *indexSelectivity = costs.indexSelectivity;
         *indexCorrelation = costs.indexCorrelation;
-
-       PG_RETURN_VOID();
+       *indexPages = costs.numIndexPages;
  }
  
-Datum
-gistcostestimate(PG_FUNCTION_ARGS)
+void
+gistcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
+                                Cost *indexStartupCost, Cost *indexTotalCost,
+                                Selectivity *indexSelectivity, double *indexCorrelation,
+                                double *indexPages)
  {
-       PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
-       IndexPath  *path = (IndexPath *) PG_GETARG_POINTER(1);
-       double          loop_count = PG_GETARG_FLOAT8(2);
-       Cost       *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
-       Cost       *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
-       Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
-       double     *indexCorrelation = (double *) PG_GETARG_POINTER(6);
         IndexOptInfo *index = path->indexinfo;
         List       *qinfos;
         GenericCosts costs;
@@ -6860,20 +7041,15 @@ gistcostestimate(PG_FUNCTION_ARGS)
         *indexTotalCost = costs.indexTotalCost;
         *indexSelectivity = costs.indexSelectivity;
         *indexCorrelation = costs.indexCorrelation;
-
-       PG_RETURN_VOID();
+       *indexPages = costs.numIndexPages;
  }
  
-Datum
-spgcostestimate(PG_FUNCTION_ARGS)
+void
+spgcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
+                               Cost *indexStartupCost, Cost *indexTotalCost,
+                               Selectivity *indexSelectivity, double *indexCorrelation,
+                               double *indexPages)
  {
-       PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
-       IndexPath  *path = (IndexPath *) PG_GETARG_POINTER(1);
-       double          loop_count = PG_GETARG_FLOAT8(2);
-       Cost       *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
-       Cost       *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
-       Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
-       double     *indexCorrelation = (double *) PG_GETARG_POINTER(6);
         IndexOptInfo *index = path->indexinfo;
         List       *qinfos;
         GenericCosts costs;
@@ -6926,8 +7102,7 @@ spgcostestimate(PG_FUNCTION_ARGS)
         *indexTotalCost = costs.indexTotalCost;
         *indexSelectivity = costs.indexSelectivity;
         *indexCorrelation = costs.indexCorrelation;
-
-       PG_RETURN_VOID();
+       *indexPages = costs.numIndexPages;
  }
  
  
@@ -7222,16 +7397,12 @@ gincost_scalararrayopexpr(PlannerInfo *root,
  /*
   * GIN has search behavior completely different from other index types
   */
-Datum
-gincostestimate(PG_FUNCTION_ARGS)
+void
+gincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
+                               Cost *indexStartupCost, Cost *indexTotalCost,
+                               Selectivity *indexSelectivity, double *indexCorrelation,
+                               double *indexPages)
  {
-       PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
-       IndexPath  *path = (IndexPath *) PG_GETARG_POINTER(1);
-       double          loop_count = PG_GETARG_FLOAT8(2);
-       Cost       *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
-       Cost       *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
-       Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
-       double     *indexCorrelation = (double *) PG_GETARG_POINTER(6);
         IndexOptInfo *index = path->indexinfo;
         List       *indexQuals = path->indexquals;
         List       *indexOrderBys = path->indexorderbys;
@@ -7246,6 +7417,7 @@ gincostestimate(PG_FUNCTION_ARGS)
                                 numEntries;
         GinQualCounts counts;
         bool            matchPossible;
+       double          partialScale;
         double          entryPagesFetched,
                                 dataPagesFetched,
                                 dataPagesFetchedBySel;
@@ -7260,39 +7432,74 @@ gincostestimate(PG_FUNCTION_ARGS)
         qinfos = deconstruct_indexquals(path);
  
         /*
-        * Obtain statistic information from the meta page
+        * Obtain statistical information from the meta page, if possible.  Else
+        * set ginStats to zeroes, and we'll cope below.
          */
-       indexRel = index_open(index->indexoid, AccessShareLock);
-       ginGetStats(indexRel, &ginStats);
-       index_close(indexRel, AccessShareLock);
-
-       numEntryPages = ginStats.nEntryPages;
-       numDataPages = ginStats.nDataPages;
-       numPendingPages = ginStats.nPendingPages;
-       numEntries = ginStats.nEntries;
+       if (!index->hypothetical)
+       {
+               indexRel = index_open(index->indexoid, AccessShareLock);
+               ginGetStats(indexRel, &ginStats);
+               index_close(indexRel, AccessShareLock);
+       }
+       else
+       {
+               memset(&ginStats, 0, sizeof(ginStats));
+       }
  
         /*
-        * nPendingPages can be trusted, but the other fields are as of the last
-        * VACUUM.  Scale them by the ratio numPages / nTotalPages to account for
-        * growth since then.  If the fields are zero (implying no VACUUM at all,
-        * and an index created pre-9.1), assume all pages are entry pages.
+        * Assuming we got valid (nonzero) stats at all, nPendingPages can be
+        * trusted, but the other fields are data as of the last VACUUM.  We can
+        * scale them up to account for growth since then, but that method only
+        * goes so far; in the worst case, the stats might be for a completely
+        * empty index, and scaling them will produce pretty bogus numbers.
+        * Somewhat arbitrarily, set the cutoff for doing scaling at 4X growth; if
+        * it's grown more than that, fall back to estimating things only from the
+        * assumed-accurate index size.  But we'll trust nPendingPages in any case
+        * so long as it's not clearly insane, ie, more than the index size.
          */
-       if (ginStats.nTotalPages == 0 || ginStats.nEntryPages == 0)
-       {
-               numEntryPages = numPages;
-               numDataPages = 0;
-               numEntries = numTuples; /* bogus, but no other info available */
-       }
+       if (ginStats.nPendingPages < numPages)
+               numPendingPages = ginStats.nPendingPages;
         else
+               numPendingPages = 0;
+
+       if (numPages > 0 && ginStats.nTotalPages <= numPages &&
+               ginStats.nTotalPages > numPages / 4 &&
+               ginStats.nEntryPages > 0 && ginStats.nEntries > 0)
         {
+               /*
+                * OK, the stats seem close enough to sane to be trusted.  But we
+                * still need to scale them by the ratio numPages / nTotalPages to
+                * account for growth since the last VACUUM.
+                */
                 double          scale = numPages / ginStats.nTotalPages;
  
-               numEntryPages = ceil(numEntryPages * scale);
-               numDataPages = ceil(numDataPages * scale);
-               numEntries = ceil(numEntries * scale);
+               numEntryPages = ceil(ginStats.nEntryPages * scale);
+               numDataPages = ceil(ginStats.nDataPages * scale);
+               numEntries = ceil(ginStats.nEntries * scale);
                 /* ensure we didn't round up too much */
-               numEntryPages = Min(numEntryPages, numPages);
-               numDataPages = Min(numDataPages, numPages - numEntryPages);
+               numEntryPages = Min(numEntryPages, numPages - numPendingPages);
+               numDataPages = Min(numDataPages,
+                                                  numPages - numPendingPages - numEntryPages);
+       }
+       else
+       {
+               /*
+                * We might get here because it's a hypothetical index, or an index
+                * created pre-9.1 and never vacuumed since upgrading (in which case
+                * its stats would read as zeroes), or just because it's grown too
+                * much since the last VACUUM for us to put our faith in scaling.
+                *
+                * Invent some plausible internal statistics based on the index page
+                * count (and clamp that to at least 10 pages, just in case).  We
+                * estimate that 90% of the index is entry pages, and the rest is data
+                * pages.  Estimate 100 entries per entry page; this is rather bogus
+                * since it'll depend on the size of the keys, but it's more robust
+                * than trying to predict the number of entries per heap tuple.
+                */
+               numPages = Max(numPages, 10);
+               numEntryPages = floor((numPages - numPendingPages) * 0.90);
+               numDataPages = numPages - numPendingPages - numEntryPages;
+               numEntries = floor(numEntryPages * 100);
         }
  
         /* In an empty index, numEntries could be zero.  Avoid divide-by-zero */
@@ -7382,7 +7589,7 @@ gincostestimate(PG_FUNCTION_ARGS)
                 *indexStartupCost = 0;
                 *indexTotalCost = 0;
                 *indexSelectivity = 0;
-               PG_RETURN_VOID();
+               return;
         }
  
         if (counts.haveFullScan || indexQuals == NIL)
@@ -7417,16 +7624,21 @@ gincostestimate(PG_FUNCTION_ARGS)
         /*
          * Add an estimate of entry pages read by partial match algorithm. It's a
          * scan over leaf pages in entry tree.  We haven't any useful stats here,
-        * so estimate it as proportion.
+        * so estimate it as proportion.  Because counts.partialEntries is really
+        * pretty bogus (see code above), it's possible that it is more than
+        * numEntries; clamp the proportion to ensure sanity.
          */
-       entryPagesFetched += ceil(numEntryPages * counts.partialEntries / numEntries);
+       partialScale = counts.partialEntries / numEntries;
+       partialScale = Min(partialScale, 1.0);
+
+       entryPagesFetched += ceil(numEntryPages * partialScale);
  
         /*
          * Partial match algorithm reads all data pages before doing actual scan,
-        * so it's a startup cost. Again, we haven't any useful stats here, so,
-        * estimate it as proportion
+        * so it's a startup cost.  Again, we haven't any useful stats here, so
+        * estimate it as proportion.
          */
-       dataPagesFetched = ceil(numDataPages * counts.partialEntries / numEntries);
+       dataPagesFetched = ceil(numDataPages * partialScale);
  
         /*
          * Calculate cache effects if more than one scan due to nestloops or array
@@ -7504,75 +7716,209 @@ gincostestimate(PG_FUNCTION_ARGS)
         *indexStartupCost += qual_arg_cost;
         *indexTotalCost += qual_arg_cost;
         *indexTotalCost += (numTuples * *indexSelectivity) * (cpu_index_tuple_cost + qual_op_cost);
-
-       PG_RETURN_VOID();
+       *indexPages = dataPagesFetched;
  }
  
  /*
   * BRIN has search behavior completely different from other index types
   */
-Datum
-brincostestimate(PG_FUNCTION_ARGS)
+void
+brincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
+                                Cost *indexStartupCost, Cost *indexTotalCost,
+                                Selectivity *indexSelectivity, double *indexCorrelation,
+                                double *indexPages)
  {
-       PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
-       IndexPath  *path = (IndexPath *) PG_GETARG_POINTER(1);
-       double          loop_count = PG_GETARG_FLOAT8(2);
-       Cost       *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
-       Cost       *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
-       Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
-       double     *indexCorrelation = (double *) PG_GETARG_POINTER(6);
         IndexOptInfo *index = path->indexinfo;
         List       *indexQuals = path->indexquals;
-       List       *indexOrderBys = path->indexorderbys;
         double          numPages = index->pages;
-       double          numTuples = index->tuples;
+       RelOptInfo *baserel = index->rel;
+       RangeTblEntry *rte = planner_rt_fetch(baserel->relid, root);
         List       *qinfos;
         Cost            spc_seq_page_cost;
         Cost            spc_random_page_cost;
-       double          qual_op_cost;
         double          qual_arg_cost;
+       double          qualSelectivity;
+       BrinStatsData statsData;
+       double          indexRanges;
+       double          minimalRanges;
+       double          estimatedRanges;
+       double          selec;
+       Relation        indexRel;
+       ListCell   *l;
+       VariableStatData vardata;
  
-       /* Do preliminary analysis of indexquals */
-       qinfos = deconstruct_indexquals(path);
+       Assert(rte->rtekind == RTE_RELATION);
  
-       /* fetch estimated page cost for tablespace containing index */
+       /* fetch estimated page cost for the tablespace containing the index */
         get_tablespace_page_costs(index->reltablespace,
                                                           &spc_random_page_cost,
                                                           &spc_seq_page_cost);
  
         /*
-        * BRIN indexes are always read in full; use that as startup cost.
+        * Obtain some data from the index itself.
+        */
+       indexRel = index_open(index->indexoid, AccessShareLock);
+       brinGetStats(indexRel, &statsData);
+       index_close(indexRel, AccessShareLock);
+
+       /*
+        * Compute index correlation
          *
-        * XXX maybe only include revmap pages here?
+        * Because we can use all index quals equally when scanning, we can use
+        * the largest correlation (in absolute value) among columns used by the
+        * query.  Start at zero, the worst possible case.  If we cannot find any
+        * correlation statistics, we will keep it as 0.
+        */
+       *indexCorrelation = 0;
+
+       qinfos = deconstruct_indexquals(path);
+       foreach(l, qinfos)
+       {
+               IndexQualInfo *qinfo = (IndexQualInfo *) lfirst(l);
+               AttrNumber      attnum = index->indexkeys[qinfo->indexcol];
+
+               /* attempt to lookup stats in relation for this index column */
+               if (attnum != 0)
+               {
+                       /* Simple variable -- look to stats for the underlying table */
+                       if (get_relation_stats_hook &&
+                               (*get_relation_stats_hook) (root, rte, attnum, &vardata))
+                       {
+                               /*
+                                * The hook took control of acquiring a stats tuple.  If it
+                                * did supply a tuple, it'd better have supplied a freefunc.
+                                */
+                               if (HeapTupleIsValid(vardata.statsTuple) && !vardata.freefunc)
+                                       elog(ERROR,
+                                         "no function provided to release variable stats with");
+                       }
+                       else
+                       {
+                               vardata.statsTuple =
+                                       SearchSysCache3(STATRELATTINH,
+                                                                       ObjectIdGetDatum(rte->relid),
+                                                                       Int16GetDatum(attnum),
+                                                                       BoolGetDatum(false));
+                               vardata.freefunc = ReleaseSysCache;
+                       }
+               }
+               else
+               {
+                       /*
+                        * Looks like we've found an expression column in the index. Let's
+                        * see if there's any stats for it.
+                        */
+
+                       /* get the attnum from the 0-based index. */
+                       attnum = qinfo->indexcol + 1;
+
+                       if (get_index_stats_hook &&
+                       (*get_index_stats_hook) (root, index->indexoid, attnum, &vardata))
+                       {
+                               /*
+                                * The hook took control of acquiring a stats tuple.  If it
+                                * did supply a tuple, it'd better have supplied a freefunc.
+                                */
+                               if (HeapTupleIsValid(vardata.statsTuple) &&
+                                       !vardata.freefunc)
+                                       elog(ERROR, "no function provided to release variable stats with");
+                       }
+                       else
+                       {
+                               vardata.statsTuple = SearchSysCache3(STATRELATTINH,
+                                                                                  ObjectIdGetDatum(index->indexoid),
+                                                                                                        Int16GetDatum(attnum),
+                                                                                                        BoolGetDatum(false));
+                               vardata.freefunc = ReleaseSysCache;
+                       }
+               }
+
+               if (HeapTupleIsValid(vardata.statsTuple))
+               {
+                       AttStatsSlot sslot;
+
+                       if (get_attstatsslot(&sslot, vardata.statsTuple,
+                                                                STATISTIC_KIND_CORRELATION, InvalidOid,
+                                                                ATTSTATSSLOT_NUMBERS))
+                       {
+                               double          varCorrelation = 0.0;
+
+                               if (sslot.nnumbers > 0)
+                                       varCorrelation = Abs(sslot.numbers[0]);
+
+                               if (varCorrelation > *indexCorrelation)
+                                       *indexCorrelation = varCorrelation;
+
+                               free_attstatsslot(&sslot);
+                       }
+               }
+
+               ReleaseVariableStats(vardata);
+       }
+
+       qualSelectivity = clauselist_selectivity(root, indexQuals,
+                                                                                        baserel->relid,
+                                                                                        JOIN_INNER, NULL);
+
+       /* work out the actual number of ranges in the index */
+       indexRanges = Max(ceil((double) baserel->pages / statsData.pagesPerRange),
+                                         1.0);
+
+       /*
+        * Now calculate the minimum possible ranges we could match with if all of
+        * the rows were in the perfect order in the table's heap.
          */
-       *indexStartupCost = spc_seq_page_cost * numPages * loop_count;
+       minimalRanges = ceil(indexRanges * qualSelectivity);
  
         /*
-        * To read a BRIN index there might be a bit of back and forth over
-        * regular pages, as revmap might point to them out of sequential order;
-        * calculate this as reading the whole index in random order.
+        * Now estimate the number of ranges that we'll touch by using the
+        * indexCorrelation from the stats. Careful not to divide by zero (note
+        * we're using the absolute value of the correlation).
          */
-       *indexTotalCost = spc_random_page_cost * numPages * loop_count;
+       if (*indexCorrelation < 1.0e-10)
+               estimatedRanges = indexRanges;
+       else
+               estimatedRanges = Min(minimalRanges / *indexCorrelation, indexRanges);
  
-       *indexSelectivity =
-               clauselist_selectivity(root, indexQuals,
-                                                          path->indexinfo->rel->relid,
-                                                          JOIN_INNER, NULL);
-       *indexCorrelation = 1;
+       /* we expect to visit this portion of the table */
+       selec = estimatedRanges / indexRanges;
+
+       CLAMP_PROBABILITY(selec);
+
+       *indexSelectivity = selec;
  
         /*
-        * Add on index qual eval costs, much as in genericcostestimate.
+        * Compute the index qual costs, much as in genericcostestimate, to add to
+        * the index costs.
          */
         qual_arg_cost = other_operands_eval_cost(root, qinfos) +
                 orderby_operands_eval_cost(root, path);
-       qual_op_cost = cpu_operator_cost *
-               (list_length(indexQuals) + list_length(indexOrderBys));
  
+       /*
+        * Compute the startup cost as the cost to read the whole revmap
+        * sequentially, including the cost to execute the index quals.
+        */
+       *indexStartupCost =
+               spc_seq_page_cost * statsData.revmapNumPages * loop_count;
         *indexStartupCost += qual_arg_cost;
-       *indexTotalCost += qual_arg_cost;
-       *indexTotalCost += (numTuples * *indexSelectivity) * (cpu_index_tuple_cost + qual_op_cost);
  
-       /* XXX what about pages_per_range? */
+       /*
+        * To read a BRIN index there might be a bit of back and forth over
+        * regular pages, as revmap might point to them out of sequential order;
+        * calculate the total cost as reading the whole index in random order.
+        */
+       *indexTotalCost = *indexStartupCost +
+               spc_random_page_cost * (numPages - statsData.revmapNumPages) * loop_count;
+
+       /*
+        * Charge a small amount per range tuple which we expect to match to. This
+        * is meant to reflect the costs of manipulating the bitmap. The BRIN scan
+        * will set a bit for each page in the range when we find a matching
+        * range, so we must multiply the charge by the number of pages in the
+        * range.
+        */
+       *indexTotalCost += 0.1 * cpu_operator_cost * estimatedRanges *
+               statsData.pagesPerRange;
  
-       PG_RETURN_VOID();
+       *indexPages = index->pages;
  }