]> granicus.if.org Git - postgresql/blobdiff - src/backend/utils/adt/selfuncs.c
Post-PG 10 beta1 pgindent run
[postgresql] / src / backend / utils / adt / selfuncs.c
index fa8cecafcbe5b4bef44d12549a35f7e27c838c3c..6e491bbc21ec9660dc45949455c03f1a17a70597 100644 (file)
@@ -7,10 +7,10 @@
  *       Selectivity routines are registered in the pg_operator catalog
  *       in the "oprrest" and "oprjoin" attributes.
  *
- *       Index cost functions are registered in the pg_am catalog
- *       in the "amcostestimate" attribute.
+ *       Index cost functions are located via the index AM's API struct,
+ *       which is obtained from the handler function registered in pg_am.
  *
- * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  *
@@ -72,7 +72,7 @@
  *             float8 oprjoin (internal, oid, internal, int2, internal);
  *
  * (Before Postgres 8.4, join estimators had only the first four of these
- * parameters. That signature is still allowed, but deprecated.)  The
+ * parameters.  That signature is still allowed, but deprecated.)  The
  * relationship between jointype and sjinfo is explained in the comments for
  * clause_selectivity() --- the short version is that jointype is usually
  * best ignored in favor of examining sjinfo.
  * joins, however, the selectivity is defined as the fraction of the left-hand
  * side relation's rows that are expected to have a match (ie, at least one
  * row with a TRUE result) in the right-hand side.
+ *
+ * For both oprrest and oprjoin functions, the operator's input collation OID
+ * (if any) is passed using the standard fmgr mechanism, so that the estimator
+ * function can fetch it with PG_GET_COLLATION().  Note, however, that all
+ * statistics in pg_statistic are currently built using the database's default
+ * collation.  Thus, in most cases where we are looking at statistics, we
+ * should ignore the actual operator collation and use DEFAULT_COLLATION_OID.
+ * We expect that the error induced by doing this is usually not large enough
+ * to justify complicating matters.
  *----------
  */
 
 #include "postgres.h"
 
 #include <ctype.h>
+#include <float.h>
 #include <math.h>
 
+#include "access/brin.h"
 #include "access/gin.h"
+#include "access/htup_details.h"
 #include "access/sysattr.h"
 #include "catalog/index.h"
+#include "catalog/pg_am.h"
 #include "catalog/pg_collation.h"
+#include "catalog/pg_operator.h"
 #include "catalog/pg_opfamily.h"
 #include "catalog/pg_statistic.h"
+#include "catalog/pg_statistic_ext.h"
 #include "catalog/pg_type.h"
 #include "executor/executor.h"
 #include "mb/pg_wchar.h"
+#include "miscadmin.h"
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
 #include "optimizer/clauses.h"
 #include "optimizer/predtest.h"
 #include "optimizer/restrictinfo.h"
 #include "optimizer/var.h"
+#include "parser/parse_clause.h"
 #include "parser/parse_coerce.h"
 #include "parser/parsetree.h"
+#include "statistics/statistics.h"
+#include "utils/acl.h"
 #include "utils/builtins.h"
 #include "utils/bytea.h"
 #include "utils/date.h"
 #include "utils/datum.h"
 #include "utils/fmgroids.h"
+#include "utils/index_selfuncs.h"
 #include "utils/lsyscache.h"
 #include "utils/nabstime.h"
 #include "utils/pg_locale.h"
+#include "utils/rel.h"
 #include "utils/selfuncs.h"
 #include "utils/spccache.h"
 #include "utils/syscache.h"
+#include "utils/timestamp.h"
 #include "utils/tqual.h"
+#include "utils/typcache.h"
+#include "utils/varlena.h"
 
 
 /* Hooks for plugins to get control when we ask for stats */
@@ -143,7 +167,10 @@ static double ineq_histogram_selectivity(PlannerInfo *root,
 static double eqjoinsel_inner(Oid operator,
                                VariableStatData *vardata1, VariableStatData *vardata2);
 static double eqjoinsel_semi(Oid operator,
-                          VariableStatData *vardata1, VariableStatData *vardata2);
+                          VariableStatData *vardata1, VariableStatData *vardata2,
+                          RelOptInfo *inner_rel);
+static bool estimate_multivariate_ndistinct(PlannerInfo *root,
+                                               RelOptInfo *rel, List **varinfos, double *ndistinct);
 static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
                                  Datum lobound, Datum hibound, Oid boundstypid,
                                  double *scaledlobound, double *scaledhibound);
@@ -166,19 +193,27 @@ static double convert_one_bytea_to_scalar(unsigned char *value, int valuelen,
                                                        int rangelo, int rangehi);
 static char *convert_string_datum(Datum value, Oid typid);
 static double convert_timevalue_to_scalar(Datum value, Oid typid);
+static void examine_simple_variable(PlannerInfo *root, Var *var,
+                                               VariableStatData *vardata);
 static bool get_variable_range(PlannerInfo *root, VariableStatData *vardata,
                                   Oid sortop, Datum *min, Datum *max);
 static bool get_actual_variable_range(PlannerInfo *root,
                                                  VariableStatData *vardata,
                                                  Oid sortop,
                                                  Datum *min, Datum *max);
+static RelOptInfo *find_join_input_rel(PlannerInfo *root, Relids relids);
 static Selectivity prefix_selectivity(PlannerInfo *root,
                                   VariableStatData *vardata,
                                   Oid vartype, Oid opfamily, Const *prefixcon);
-static Selectivity pattern_selectivity(Const *patt, Pattern_Type ptype);
+static Selectivity like_selectivity(const char *patt, int pattlen,
+                                bool case_insensitive);
+static Selectivity regex_selectivity(const char *patt, int pattlen,
+                                 bool case_insensitive,
+                                 int fixed_prefix_len);
 static Datum string_to_datum(const char *str, Oid datatype);
 static Const *string_to_const(const char *str, Oid datatype);
 static Const *string_to_bytea_const(const char *str, size_t str_len);
+static List *add_predicate_to_quals(IndexOptInfo *index, List *indexQuals);
 
 
 /*
@@ -186,7 +221,7 @@ static Const *string_to_bytea_const(const char *str, size_t str_len);
  *
  * Note: this routine is also used to estimate selectivity for some
  * operators that are not "=" but have comparable selectivity behavior,
- * such as "~=" (geometric approximate-match). Even for "=", we must
+ * such as "~=" (geometric approximate-match).  Even for "=", we must
  * keep in mind that the left and right datatypes may differ.
  */
 Datum
@@ -239,6 +274,8 @@ var_eq_const(VariableStatData *vardata, Oid operator,
                         bool varonleft)
 {
        double          selec;
+       bool            isdefault;
+       Oid                     opfuncoid;
 
        /*
         * If the constant is NULL, assume operator is strict and return zero, ie,
@@ -248,21 +285,21 @@ var_eq_const(VariableStatData *vardata, Oid operator,
                return 0.0;
 
        /*
-        * If we matched the var to a unique index, assume there is exactly one
-        * match regardless of anything else.  (This is slightly bogus, since the
-        * index's equality operator might be different from ours, but it's more
-        * likely to be right than ignoring the information.)
+        * If we matched the var to a unique index or DISTINCT clause, assume
+        * there is exactly one match regardless of anything else.  (This is
+        * slightly bogus, since the index or clause's equality operator might be
+        * different from ours, but it's much more likely to be right than
+        * ignoring the information.)
         */
        if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
                return 1.0 / vardata->rel->tuples;
 
-       if (HeapTupleIsValid(vardata->statsTuple))
+       if (HeapTupleIsValid(vardata->statsTuple) &&
+               statistic_proc_security_check(vardata,
+                                                                         (opfuncoid = get_opcode(operator))))
        {
                Form_pg_statistic stats;
-               Datum      *values;
-               int                     nvalues;
-               float4     *numbers;
-               int                     nnumbers;
+               AttStatsSlot sslot;
                bool            match = false;
                int                     i;
 
@@ -271,34 +308,31 @@ var_eq_const(VariableStatData *vardata, Oid operator,
                /*
                 * Is the constant "=" to any of the column's most common values?
                 * (Although the given operator may not really be "=", we will assume
-                * that seeing whether it returns TRUE is an appropriate test.  If you
+                * that seeing whether it returns TRUE is an appropriate test.  If you
                 * don't like this, maybe you shouldn't be using eqsel for your
                 * operator...)
                 */
-               if (get_attstatsslot(vardata->statsTuple,
-                                                        vardata->atttype, vardata->atttypmod,
+               if (get_attstatsslot(&sslot, vardata->statsTuple,
                                                         STATISTIC_KIND_MCV, InvalidOid,
-                                                        NULL,
-                                                        &values, &nvalues,
-                                                        &numbers, &nnumbers))
+                                                        ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS))
                {
                        FmgrInfo        eqproc;
 
-                       fmgr_info(get_opcode(operator), &eqproc);
+                       fmgr_info(opfuncoid, &eqproc);
 
-                       for (i = 0; i < nvalues; i++)
+                       for (i = 0; i < sslot.nvalues; i++)
                        {
                                /* be careful to apply operator right way 'round */
                                if (varonleft)
                                        match = DatumGetBool(FunctionCall2Coll(&eqproc,
-                                                                                                                  DEFAULT_COLLATION_OID,
-                                                                                                                  values[i],
+                                                                                                          DEFAULT_COLLATION_OID,
+                                                                                                                  sslot.values[i],
                                                                                                                   constval));
                                else
                                        match = DatumGetBool(FunctionCall2Coll(&eqproc,
-                                                                                                                  DEFAULT_COLLATION_OID,
+                                                                                                          DEFAULT_COLLATION_OID,
                                                                                                                   constval,
-                                                                                                                  values[i]));
+                                                                                                                  sslot.values[i]));
                                if (match)
                                        break;
                        }
@@ -306,9 +340,7 @@ var_eq_const(VariableStatData *vardata, Oid operator,
                else
                {
                        /* no most-common-value info available */
-                       values = NULL;
-                       numbers = NULL;
-                       i = nvalues = nnumbers = 0;
+                       i = 0;                          /* keep compiler quiet */
                }
 
                if (match)
@@ -317,7 +349,7 @@ var_eq_const(VariableStatData *vardata, Oid operator,
                         * Constant is "=" to this common value.  We know selectivity
                         * exactly (or as exactly as ANALYZE could calculate it, anyway).
                         */
-                       selec = numbers[i];
+                       selec = sslot.numbers[i];
                }
                else
                {
@@ -329,8 +361,8 @@ var_eq_const(VariableStatData *vardata, Oid operator,
                        double          sumcommon = 0.0;
                        double          otherdistinct;
 
-                       for (i = 0; i < nnumbers; i++)
-                               sumcommon += numbers[i];
+                       for (i = 0; i < sslot.nnumbers; i++)
+                               sumcommon += sslot.numbers[i];
                        selec = 1.0 - sumcommon - stats->stanullfrac;
                        CLAMP_PROBABILITY(selec);
 
@@ -339,7 +371,8 @@ var_eq_const(VariableStatData *vardata, Oid operator,
                         * all the not-common values share this remaining fraction
                         * equally, so we divide by the number of other distinct values.
                         */
-                       otherdistinct = get_variable_numdistinct(vardata) - nnumbers;
+                       otherdistinct = get_variable_numdistinct(vardata, &isdefault) -
+                               sslot.nnumbers;
                        if (otherdistinct > 1)
                                selec /= otherdistinct;
 
@@ -347,12 +380,11 @@ var_eq_const(VariableStatData *vardata, Oid operator,
                         * Another cross-check: selectivity shouldn't be estimated as more
                         * than the least common "most common value".
                         */
-                       if (nnumbers > 0 && selec > numbers[nnumbers - 1])
-                               selec = numbers[nnumbers - 1];
+                       if (sslot.nnumbers > 0 && selec > sslot.numbers[sslot.nnumbers - 1])
+                               selec = sslot.numbers[sslot.nnumbers - 1];
                }
 
-               free_attstatsslot(vardata->atttype, values, nvalues,
-                                                 numbers, nnumbers);
+               free_attstatsslot(&sslot);
        }
        else
        {
@@ -361,7 +393,7 @@ var_eq_const(VariableStatData *vardata, Oid operator,
                 * of distinct values and assuming they are equally common. (The guess
                 * is unlikely to be very good, but we do know a few special cases.)
                 */
-               selec = 1.0 / get_variable_numdistinct(vardata);
+               selec = 1.0 / get_variable_numdistinct(vardata, &isdefault);
        }
 
        /* result should be in range, but make sure... */
@@ -379,12 +411,14 @@ var_eq_non_const(VariableStatData *vardata, Oid operator,
                                 bool varonleft)
 {
        double          selec;
+       bool            isdefault;
 
        /*
-        * If we matched the var to a unique index, assume there is exactly one
-        * match regardless of anything else.  (This is slightly bogus, since the
-        * index's equality operator might be different from ours, but it's more
-        * likely to be right than ignoring the information.)
+        * If we matched the var to a unique index or DISTINCT clause, assume
+        * there is exactly one match regardless of anything else.  (This is
+        * slightly bogus, since the index or clause's equality operator might be
+        * different from ours, but it's much more likely to be right than
+        * ignoring the information.)
         */
        if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
                return 1.0 / vardata->rel->tuples;
@@ -393,8 +427,7 @@ var_eq_non_const(VariableStatData *vardata, Oid operator,
        {
                Form_pg_statistic stats;
                double          ndistinct;
-               float4     *numbers;
-               int                     nnumbers;
+               AttStatsSlot sslot;
 
                stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
 
@@ -405,11 +438,11 @@ var_eq_non_const(VariableStatData *vardata, Oid operator,
                 * result averaged over all possible values whether common or
                 * uncommon.  (Essentially, we are assuming that the not-yet-known
                 * comparison value is equally likely to be any of the possible
-                * values, regardless of their frequency in the table.  Is that a good
+                * values, regardless of their frequency in the table.  Is that a good
                 * idea?)
                 */
                selec = 1.0 - stats->stanullfrac;
-               ndistinct = get_variable_numdistinct(vardata);
+               ndistinct = get_variable_numdistinct(vardata, &isdefault);
                if (ndistinct > 1)
                        selec /= ndistinct;
 
@@ -417,16 +450,13 @@ var_eq_non_const(VariableStatData *vardata, Oid operator,
                 * Cross-check: selectivity should never be estimated as more than the
                 * most common value's.
                 */
-               if (get_attstatsslot(vardata->statsTuple,
-                                                        vardata->atttype, vardata->atttypmod,
+               if (get_attstatsslot(&sslot, vardata->statsTuple,
                                                         STATISTIC_KIND_MCV, InvalidOid,
-                                                        NULL,
-                                                        NULL, NULL,
-                                                        &numbers, &nnumbers))
+                                                        ATTSTATSSLOT_NUMBERS))
                {
-                       if (nnumbers > 0 && selec > numbers[0])
-                               selec = numbers[0];
-                       free_attstatsslot(vardata->atttype, NULL, 0, numbers, nnumbers);
+                       if (sslot.nnumbers > 0 && selec > sslot.numbers[0])
+                               selec = sslot.numbers[0];
+                       free_attstatsslot(&sslot);
                }
        }
        else
@@ -436,7 +466,7 @@ var_eq_non_const(VariableStatData *vardata, Oid operator,
                 * of distinct values and assuming they are equally common. (The guess
                 * is unlikely to be very good, but we do know a few special cases.)
                 */
-               selec = 1.0 / get_variable_numdistinct(vardata);
+               selec = 1.0 / get_variable_numdistinct(vardata, &isdefault);
        }
 
        /* result should be in range, but make sure... */
@@ -578,39 +608,33 @@ mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
 {
        double          mcv_selec,
                                sumcommon;
-       Datum      *values;
-       int                     nvalues;
-       float4     *numbers;
-       int                     nnumbers;
+       AttStatsSlot sslot;
        int                     i;
 
        mcv_selec = 0.0;
        sumcommon = 0.0;
 
        if (HeapTupleIsValid(vardata->statsTuple) &&
-               get_attstatsslot(vardata->statsTuple,
-                                                vardata->atttype, vardata->atttypmod,
+               statistic_proc_security_check(vardata, opproc->fn_oid) &&
+               get_attstatsslot(&sslot, vardata->statsTuple,
                                                 STATISTIC_KIND_MCV, InvalidOid,
-                                                NULL,
-                                                &values, &nvalues,
-                                                &numbers, &nnumbers))
+                                                ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS))
        {
-               for (i = 0; i < nvalues; i++)
+               for (i = 0; i < sslot.nvalues; i++)
                {
                        if (varonleft ?
                                DatumGetBool(FunctionCall2Coll(opproc,
                                                                                           DEFAULT_COLLATION_OID,
-                                                                                          values[i],
+                                                                                          sslot.values[i],
                                                                                           constval)) :
                                DatumGetBool(FunctionCall2Coll(opproc,
                                                                                           DEFAULT_COLLATION_OID,
                                                                                           constval,
-                                                                                          values[i])))
-                               mcv_selec += numbers[i];
-                       sumcommon += numbers[i];
+                                                                                          sslot.values[i])))
+                               mcv_selec += sslot.numbers[i];
+                       sumcommon += sslot.numbers[i];
                }
-               free_attstatsslot(vardata->atttype, values, nvalues,
-                                                 numbers, nnumbers);
+               free_attstatsslot(&sslot);
        }
 
        *sumcommonp = sumcommon;
@@ -628,7 +652,7 @@ mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
  * essentially using the histogram just as a representative sample.  However,
  * small histograms are unlikely to be all that representative, so the caller
  * should be prepared to fall back on some other estimation approach when the
- * histogram is missing or very small. It may also be prudent to combine this
+ * histogram is missing or very small.  It may also be prudent to combine this
  * approach with another one when the histogram is small.
  *
  * If the actual histogram size is not at least min_hist_size, we won't bother
@@ -646,7 +670,7 @@ mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
  *
  * Note that the result disregards both the most-common-values (if any) and
  * null entries.  The caller is expected to combine this result with
- * statistics for those portions of the column population.     It may also be
+ * statistics for those portions of the column population.  It may also be
  * prudent to clamp the result range, ie, disbelieve exact 0 or 1 outputs.
  */
 double
@@ -656,45 +680,42 @@ histogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
                                          int *hist_size)
 {
        double          result;
-       Datum      *values;
-       int                     nvalues;
+       AttStatsSlot sslot;
 
        /* check sanity of parameters */
        Assert(n_skip >= 0);
        Assert(min_hist_size > 2 * n_skip);
 
        if (HeapTupleIsValid(vardata->statsTuple) &&
-               get_attstatsslot(vardata->statsTuple,
-                                                vardata->atttype, vardata->atttypmod,
+               statistic_proc_security_check(vardata, opproc->fn_oid) &&
+               get_attstatsslot(&sslot, vardata->statsTuple,
                                                 STATISTIC_KIND_HISTOGRAM, InvalidOid,
-                                                NULL,
-                                                &values, &nvalues,
-                                                NULL, NULL))
+                                                ATTSTATSSLOT_VALUES))
        {
-               *hist_size = nvalues;
-               if (nvalues >= min_hist_size)
+               *hist_size = sslot.nvalues;
+               if (sslot.nvalues >= min_hist_size)
                {
                        int                     nmatch = 0;
                        int                     i;
 
-                       for (i = n_skip; i < nvalues - n_skip; i++)
+                       for (i = n_skip; i < sslot.nvalues - n_skip; i++)
                        {
                                if (varonleft ?
                                        DatumGetBool(FunctionCall2Coll(opproc,
                                                                                                   DEFAULT_COLLATION_OID,
-                                                                                                  values[i],
+                                                                                                  sslot.values[i],
                                                                                                   constval)) :
                                        DatumGetBool(FunctionCall2Coll(opproc,
                                                                                                   DEFAULT_COLLATION_OID,
                                                                                                   constval,
-                                                                                                  values[i])))
+                                                                                                  sslot.values[i])))
                                        nmatch++;
                        }
-                       result = ((double) nmatch) / ((double) (nvalues - 2 * n_skip));
+                       result = ((double) nmatch) / ((double) (sslot.nvalues - 2 * n_skip));
                }
                else
                        result = -1;
-               free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0);
+               free_attstatsslot(&sslot);
        }
        else
        {
@@ -724,9 +745,7 @@ ineq_histogram_selectivity(PlannerInfo *root,
                                                   Datum constval, Oid consttype)
 {
        double          hist_selec;
-       Oid                     hist_op;
-       Datum      *values;
-       int                     nvalues;
+       AttStatsSlot sslot;
 
        hist_selec = -1.0;
 
@@ -741,14 +760,12 @@ ineq_histogram_selectivity(PlannerInfo *root,
         * the reverse way if isgt is TRUE.
         */
        if (HeapTupleIsValid(vardata->statsTuple) &&
-               get_attstatsslot(vardata->statsTuple,
-                                                vardata->atttype, vardata->atttypmod,
+               statistic_proc_security_check(vardata, opproc->fn_oid) &&
+               get_attstatsslot(&sslot, vardata->statsTuple,
                                                 STATISTIC_KIND_HISTOGRAM, InvalidOid,
-                                                &hist_op,
-                                                &values, &nvalues,
-                                                NULL, NULL))
+                                                ATTSTATSSLOT_VALUES))
        {
-               if (nvalues > 1)
+               if (sslot.nvalues > 1)
                {
                        /*
                         * Use binary search to find proper location, ie, the first slot
@@ -759,7 +776,7 @@ ineq_histogram_selectivity(PlannerInfo *root,
                         *
                         * If the binary search accesses the first or last histogram
                         * entry, we try to replace that endpoint with the true column min
-                        * or max as found by get_actual_variable_range().      This
+                        * or max as found by get_actual_variable_range().  This
                         * ameliorates misestimates when the min or max is moving as a
                         * result of changes since the last ANALYZE.  Note that this could
                         * result in effectively including MCVs into the histogram that
@@ -767,7 +784,7 @@ ineq_histogram_selectivity(PlannerInfo *root,
                         */
                        double          histfrac;
                        int                     lobound = 0;    /* first possible slot to search */
-                       int                     hibound = nvalues;              /* last+1 slot to search */
+                       int                     hibound = sslot.nvalues;                /* last+1 slot to search */
                        bool            have_end = false;
 
                        /*
@@ -776,12 +793,12 @@ ineq_histogram_selectivity(PlannerInfo *root,
                         * one of them to be updated, so we deal with that within the
                         * loop.)
                         */
-                       if (nvalues == 2)
+                       if (sslot.nvalues == 2)
                                have_end = get_actual_variable_range(root,
                                                                                                         vardata,
-                                                                                                        hist_op,
-                                                                                                        &values[0],
-                                                                                                        &values[1]);
+                                                                                                        sslot.staop,
+                                                                                                        &sslot.values[0],
+                                                                                                        &sslot.values[1]);
 
                        while (lobound < hibound)
                        {
@@ -793,22 +810,22 @@ ineq_histogram_selectivity(PlannerInfo *root,
                                 * histogram entry, first try to replace it with the actual
                                 * current min or max (unless we already did so above).
                                 */
-                               if (probe == 0 && nvalues > 2)
+                               if (probe == 0 && sslot.nvalues > 2)
                                        have_end = get_actual_variable_range(root,
                                                                                                                 vardata,
-                                                                                                                hist_op,
-                                                                                                                &values[0],
+                                                                                                                sslot.staop,
+                                                                                                                &sslot.values[0],
                                                                                                                 NULL);
-                               else if (probe == nvalues - 1 && nvalues > 2)
+                               else if (probe == sslot.nvalues - 1 && sslot.nvalues > 2)
                                        have_end = get_actual_variable_range(root,
                                                                                                                 vardata,
-                                                                                                                hist_op,
+                                                                                                                sslot.staop,
                                                                                                                 NULL,
-                                                                                                                &values[probe]);
+                                                                                                          &sslot.values[probe]);
 
                                ltcmp = DatumGetBool(FunctionCall2Coll(opproc,
                                                                                                           DEFAULT_COLLATION_OID,
-                                                                                                          values[probe],
+                                                                                                          sslot.values[probe],
                                                                                                           constval));
                                if (isgt)
                                        ltcmp = !ltcmp;
@@ -823,7 +840,7 @@ ineq_histogram_selectivity(PlannerInfo *root,
                                /* Constant is below lower histogram boundary. */
                                histfrac = 0.0;
                        }
-                       else if (lobound >= nvalues)
+                       else if (lobound >= sslot.nvalues)
                        {
                                /* Constant is above upper histogram boundary. */
                                histfrac = 1.0;
@@ -844,7 +861,7 @@ ineq_histogram_selectivity(PlannerInfo *root,
                                 * interpolation within this bin.
                                 */
                                if (convert_to_scalar(constval, consttype, &val,
-                                                                         values[i - 1], values[i],
+                                                                         sslot.values[i - 1], sslot.values[i],
                                                                          vardata->vartype,
                                                                          &low, &high))
                                {
@@ -863,7 +880,7 @@ ineq_histogram_selectivity(PlannerInfo *root,
 
                                                /*
                                                 * Watch out for the possibility that we got a NaN or
-                                                * Infinity from the division.  This can happen
+                                                * Infinity from the division.  This can happen
                                                 * despite the previous checks, if for example "low"
                                                 * is -Infinity.
                                                 */
@@ -878,7 +895,7 @@ ineq_histogram_selectivity(PlannerInfo *root,
                                         * Ideally we'd produce an error here, on the grounds that
                                         * the given operator shouldn't have scalarXXsel
                                         * registered as its selectivity func unless we can deal
-                                        * with its operand types.      But currently, all manner of
+                                        * with its operand types.  But currently, all manner of
                                         * stuff is invoking scalarXXsel, so give a default
                                         * estimate until that can be fixed.
                                         */
@@ -891,7 +908,7 @@ ineq_histogram_selectivity(PlannerInfo *root,
                                 * binfrac partial bin below the constant.
                                 */
                                histfrac = (double) (i - 1) + binfrac;
-                               histfrac /= (double) (nvalues - 1);
+                               histfrac /= (double) (sslot.nvalues - 1);
                        }
 
                        /*
@@ -904,7 +921,7 @@ ineq_histogram_selectivity(PlannerInfo *root,
 
                        /*
                         * The histogram boundaries are only approximate to begin with,
-                        * and may well be out of date anyway.  Therefore, don't believe
+                        * and may well be out of date anyway.  Therefore, don't believe
                         * extremely small or large selectivity estimates --- unless we
                         * got actual current endpoint values from the table.
                         */
@@ -919,7 +936,7 @@ ineq_histogram_selectivity(PlannerInfo *root,
                        }
                }
 
-               free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0);
+               free_attstatsslot(&sslot);
        }
 
        return hist_selec;
@@ -1085,6 +1102,7 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
        Oid                     operator = PG_GETARG_OID(1);
        List       *args = (List *) PG_GETARG_POINTER(2);
        int                     varRelid = PG_GETARG_INT32(3);
+       Oid                     collation = PG_GET_COLLATION();
        VariableStatData vardata;
        Node       *other;
        bool            varonleft;
@@ -1093,14 +1111,14 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
        Oid                     vartype;
        Oid                     opfamily;
        Pattern_Prefix_Status pstatus;
-       Const      *patt = NULL;
+       Const      *patt;
        Const      *prefix = NULL;
-       Const      *rest = NULL;
+       Selectivity rest_selec = 0;
        double          result;
 
        /*
         * If this is for a NOT LIKE or similar operator, get the corresponding
-        * positive-match operator and work with that.  Set result to the correct
+        * positive-match operator and work with that.  Set result to the correct
         * default estimate, too.
         */
        if (negate)
@@ -1185,17 +1203,20 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
        }
 
        /*
-        * Divide pattern into fixed prefix and remainder.  XXX we have to assume
-        * default collation here, because we don't have access to the actual
-        * input collation for the operator.  FIXME ...
+        * Pull out any fixed prefix implied by the pattern, and estimate the
+        * fractional selectivity of the remainder of the pattern.  Unlike many of
+        * the other functions in this file, we use the pattern operator's actual
+        * collation for this step.  This is not because we expect the collation
+        * to make a big difference in the selectivity estimate (it seldom would),
+        * but because we want to be sure we cache compiled regexps under the
+        * right cache key, so that they can be re-used at runtime.
         */
        patt = (Const *) other;
-       pstatus = pattern_fixed_prefix(patt, ptype, DEFAULT_COLLATION_OID,
-                                                                  &prefix, &rest);
+       pstatus = pattern_fixed_prefix(patt, ptype, collation,
+                                                                  &prefix, &rest_selec);
 
        /*
-        * If necessary, coerce the prefix constant to the right type. (The "rest"
-        * constant need not be changed.)
+        * If necessary, coerce the prefix constant to the right type.
         */
        if (prefix && prefix->consttype != vartype)
        {
@@ -1269,15 +1290,13 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
                {
                        Selectivity heursel;
                        Selectivity prefixsel;
-                       Selectivity restsel;
 
                        if (pstatus == Pattern_Prefix_Partial)
                                prefixsel = prefix_selectivity(root, &vardata, vartype,
                                                                                           opfamily, prefix);
                        else
                                prefixsel = 1.0;
-                       restsel = pattern_selectivity(rest, ptype);
-                       heursel = prefixsel * restsel;
+                       heursel = prefixsel * rest_selec;
 
                        if (selec < 0)          /* fewer than 10 histogram entries? */
                                selec = heursel;
@@ -1303,7 +1322,7 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
                /*
                 * If we have most-common-values info, add up the fractions of the MCV
                 * entries that satisfy MCV OP PATTERN.  These fractions contribute
-                * directly to the result selectivity.  Also add up the total fraction
+                * directly to the result selectivity.  Also add up the total fraction
                 * represented by MCV entries.
                 */
                mcv_selec = mcv_selectivity(&vardata, &opproc, constval, true,
@@ -1410,6 +1429,50 @@ icnlikesel(PG_FUNCTION_ARGS)
        PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like_IC, true));
 }
 
+/*
+ *             boolvarsel              - Selectivity of Boolean variable.
+ *
+ * This can actually be called on any boolean-valued expression.  If it
+ * involves only Vars of the specified relation, and if there are statistics
+ * about the Var or expression (the latter is possible if it's indexed) then
+ * we'll produce a real estimate; otherwise it's just a default.
+ */
+Selectivity
+boolvarsel(PlannerInfo *root, Node *arg, int varRelid)
+{
+       VariableStatData vardata;
+       double          selec;
+
+       examine_variable(root, arg, varRelid, &vardata);
+       if (HeapTupleIsValid(vardata.statsTuple))
+       {
+               /*
+                * A boolean variable V is equivalent to the clause V = 't', so we
+                * compute the selectivity as if that is what we have.
+                */
+               selec = var_eq_const(&vardata, BooleanEqualOperator,
+                                                        BoolGetDatum(true), false, true);
+       }
+       else if (is_funcclause(arg))
+       {
+               /*
+                * If we have no stats and it's a function call, estimate 0.3333333.
+                * This seems a pretty unprincipled choice, but Postgres has been
+                * using that estimate for function calls since 1992.  The hoariness
+                * of this behavior suggests that we should not be in too much hurry
+                * to use another value.
+                */
+               selec = 0.3333333;
+       }
+       else
+       {
+               /* Otherwise, the default estimate is 0.5 */
+               selec = 0.5;
+       }
+       ReleaseVariableStats(vardata);
+       return selec;
+}
+
 /*
  *             booltestsel             - Selectivity of BooleanTest Node.
  */
@@ -1426,21 +1489,15 @@ booltestsel(PlannerInfo *root, BoolTestType booltesttype, Node *arg,
        {
                Form_pg_statistic stats;
                double          freq_null;
-               Datum      *values;
-               int                     nvalues;
-               float4     *numbers;
-               int                     nnumbers;
+               AttStatsSlot sslot;
 
                stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
                freq_null = stats->stanullfrac;
 
-               if (get_attstatsslot(vardata.statsTuple,
-                                                        vardata.atttype, vardata.atttypmod,
+               if (get_attstatsslot(&sslot, vardata.statsTuple,
                                                         STATISTIC_KIND_MCV, InvalidOid,
-                                                        NULL,
-                                                        &values, &nvalues,
-                                                        &numbers, &nnumbers)
-                       && nnumbers > 0)
+                                                        ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS)
+                       && sslot.nnumbers > 0)
                {
                        double          freq_true;
                        double          freq_false;
@@ -1448,10 +1505,10 @@ booltestsel(PlannerInfo *root, BoolTestType booltesttype, Node *arg,
                        /*
                         * Get first MCV frequency and derive frequency for true.
                         */
-                       if (DatumGetBool(values[0]))
-                               freq_true = numbers[0];
+                       if (DatumGetBool(sslot.values[0]))
+                               freq_true = sslot.numbers[0];
                        else
-                               freq_true = 1.0 - numbers[0] - freq_null;
+                               freq_true = 1.0 - sslot.numbers[0] - freq_null;
 
                        /*
                         * Next derive frequency for false. Then use these as appropriate
@@ -1492,39 +1549,36 @@ booltestsel(PlannerInfo *root, BoolTestType booltesttype, Node *arg,
                                        break;
                        }
 
-                       free_attstatsslot(vardata.atttype, values, nvalues,
-                                                         numbers, nnumbers);
+                       free_attstatsslot(&sslot);
                }
                else
                {
                        /*
                         * No most-common-value info available. Still have null fraction
                         * information, so use it for IS [NOT] UNKNOWN. Otherwise adjust
-                        * for null fraction and assume an even split for boolean tests.
+                        * for null fraction and assume a 50-50 split of TRUE and FALSE.
                         */
                        switch (booltesttype)
                        {
                                case IS_UNKNOWN:
-
-                                       /*
-                                        * Use freq_null directly.
-                                        */
+                                       /* select only NULL values */
                                        selec = freq_null;
                                        break;
                                case IS_NOT_UNKNOWN:
-
-                                       /*
-                                        * Select not unknown (not null) values. Calculate from
-                                        * freq_null.
-                                        */
+                                       /* select non-NULL values */
                                        selec = 1.0 - freq_null;
                                        break;
                                case IS_TRUE:
-                               case IS_NOT_TRUE:
                                case IS_FALSE:
-                               case IS_NOT_FALSE:
+                                       /* Assume we select half of the non-NULL values */
                                        selec = (1.0 - freq_null) / 2.0;
                                        break;
+                               case IS_NOT_TRUE:
+                               case IS_NOT_FALSE:
+                                       /* Assume we select NULLs plus half of the non-NULLs */
+                                       /* equiv. to freq_null + (1.0 - freq_null) / 2.0 */
+                                       selec = (freq_null + 1.0) / 2.0;
+                                       break;
                                default:
                                        elog(ERROR, "unrecognized booltesttype: %d",
                                                 (int) booltesttype);
@@ -1690,31 +1744,27 @@ scalararraysel(PlannerInfo *root,
 {
        Oid                     operator = clause->opno;
        bool            useOr = clause->useOr;
+       bool            isEquality = false;
+       bool            isInequality = false;
        Node       *leftop;
        Node       *rightop;
        Oid                     nominal_element_type;
        Oid                     nominal_element_collation;
+       TypeCacheEntry *typentry;
        RegProcedure oprsel;
        FmgrInfo        oprselproc;
        Selectivity s1;
+       Selectivity s1disjoint;
 
-       /*
-        * First, look up the underlying operator's selectivity estimator. Punt if
-        * it hasn't got one.
-        */
-       if (is_join_clause)
-               oprsel = get_oprjoin(operator);
-       else
-               oprsel = get_oprrest(operator);
-       if (!oprsel)
-               return (Selectivity) 0.5;
-       fmgr_info(oprsel, &oprselproc);
-
-       /* deconstruct the expression */
+       /* First, deconstruct the expression */
        Assert(list_length(clause->args) == 2);
        leftop = (Node *) linitial(clause->args);
        rightop = (Node *) lsecond(clause->args);
 
+       /* aggressively reduce both sides to constants */
+       leftop = estimate_expression_value(root, leftop);
+       rightop = estimate_expression_value(root, rightop);
+
        /* get nominal (after relabeling) element type of rightop */
        nominal_element_type = get_base_element_type(exprType(rightop));
        if (!OidIsValid(nominal_element_type))
@@ -1725,6 +1775,59 @@ scalararraysel(PlannerInfo *root,
        /* look through any binary-compatible relabeling of rightop */
        rightop = strip_array_coercion(rightop);
 
+       /*
+        * Detect whether the operator is the default equality or inequality
+        * operator of the array element type.
+        */
+       typentry = lookup_type_cache(nominal_element_type, TYPECACHE_EQ_OPR);
+       if (OidIsValid(typentry->eq_opr))
+       {
+               if (operator == typentry->eq_opr)
+                       isEquality = true;
+               else if (get_negator(operator) == typentry->eq_opr)
+                       isInequality = true;
+       }
+
+       /*
+        * If it is equality or inequality, we might be able to estimate this as a
+        * form of array containment; for instance "const = ANY(column)" can be
+        * treated as "ARRAY[const] <@ column".  scalararraysel_containment tries
+        * that, and returns the selectivity estimate if successful, or -1 if not.
+        */
+       if ((isEquality || isInequality) && !is_join_clause)
+       {
+               s1 = scalararraysel_containment(root, leftop, rightop,
+                                                                               nominal_element_type,
+                                                                               isEquality, useOr, varRelid);
+               if (s1 >= 0.0)
+                       return s1;
+       }
+
+       /*
+        * Look up the underlying operator's selectivity estimator. Punt if it
+        * hasn't got one.
+        */
+       if (is_join_clause)
+               oprsel = get_oprjoin(operator);
+       else
+               oprsel = get_oprrest(operator);
+       if (!oprsel)
+               return (Selectivity) 0.5;
+       fmgr_info(oprsel, &oprselproc);
+
+       /*
+        * In the array-containment check above, we must only believe that an
+        * operator is equality or inequality if it is the default btree equality
+        * operator (or its negator) for the element type, since those are the
+        * operators that array containment will use.  But in what follows, we can
+        * be a little laxer, and also believe that any operators using eqsel() or
+        * neqsel() as selectivity estimator act like equality or inequality.
+        */
+       if (oprsel == F_EQSEL || oprsel == F_EQJOINSEL)
+               isEquality = true;
+       else if (oprsel == F_NEQSEL || oprsel == F_NEQJOINSEL)
+               isInequality = true;
+
        /*
         * We consider three cases:
         *
@@ -1759,7 +1862,23 @@ scalararraysel(PlannerInfo *root,
                                                  ARR_ELEMTYPE(arrayval),
                                                  elmlen, elmbyval, elmalign,
                                                  &elem_values, &elem_nulls, &num_elems);
-               s1 = useOr ? 0.0 : 1.0;
+
+               /*
+                * For generic operators, we assume the probability of success is
+                * independent for each array element.  But for "= ANY" or "<> ALL",
+                * if the array elements are distinct (which'd typically be the case)
+                * then the probabilities are disjoint, and we should just sum them.
+                *
+                * If we were being really tense we would try to confirm that the
+                * elements are all distinct, but that would be expensive and it
+                * doesn't seem to be worth the cycles; it would amount to penalizing
+                * well-written queries in favor of poorly-written ones.  However, we
+                * do protect ourselves a little bit by checking whether the
+                * disjointness assumption leads to an impossible (out of range)
+                * probability; if so, we fall back to the normal calculation.
+                */
+               s1 = s1disjoint = (useOr ? 0.0 : 1.0);
+
                for (i = 0; i < num_elems; i++)
                {
                        List       *args;
@@ -1774,23 +1893,39 @@ scalararraysel(PlannerInfo *root,
                                                                                elem_nulls[i],
                                                                                elmbyval));
                        if (is_join_clause)
-                               s2 = DatumGetFloat8(FunctionCall5(&oprselproc,
-                                                                                                 PointerGetDatum(root),
+                               s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
+                                                                                                         clause->inputcollid,
+                                                                                                         PointerGetDatum(root),
                                                                                                  ObjectIdGetDatum(operator),
-                                                                                                 PointerGetDatum(args),
-                                                                                                 Int16GetDatum(jointype),
-                                                                                                 PointerGetDatum(sjinfo)));
+                                                                                                         PointerGetDatum(args),
+                                                                                                         Int16GetDatum(jointype),
+                                                                                                  PointerGetDatum(sjinfo)));
                        else
-                               s2 = DatumGetFloat8(FunctionCall4(&oprselproc,
-                                                                                                 PointerGetDatum(root),
+                               s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
+                                                                                                         clause->inputcollid,
+                                                                                                         PointerGetDatum(root),
                                                                                                  ObjectIdGetDatum(operator),
-                                                                                                 PointerGetDatum(args),
-                                                                                                 Int32GetDatum(varRelid)));
+                                                                                                         PointerGetDatum(args),
+                                                                                                  Int32GetDatum(varRelid)));
+
                        if (useOr)
+                       {
                                s1 = s1 + s2 - s1 * s2;
+                               if (isEquality)
+                                       s1disjoint += s2;
+                       }
                        else
+                       {
                                s1 = s1 * s2;
+                               if (isInequality)
+                                       s1disjoint += s2 - 1.0;
+                       }
                }
+
+               /* accept disjoint-probability estimate if in range */
+               if ((useOr ? isEquality : isInequality) &&
+                       s1disjoint >= 0.0 && s1disjoint <= 1.0)
+                       s1 = s1disjoint;
        }
        else if (rightop && IsA(rightop, ArrayExpr) &&
                         !((ArrayExpr *) rightop)->multidims)
@@ -1802,7 +1937,16 @@ scalararraysel(PlannerInfo *root,
 
                get_typlenbyval(arrayexpr->element_typeid,
                                                &elmlen, &elmbyval);
-               s1 = useOr ? 0.0 : 1.0;
+
+               /*
+                * We use the assumption of disjoint probabilities here too, although
+                * the odds of equal array elements are rather higher if the elements
+                * are not all constants (which they won't be, else constant folding
+                * would have reduced the ArrayExpr to a Const).  In this path it's
+                * critical to have the sanity check on the s1disjoint estimate.
+                */
+               s1 = s1disjoint = (useOr ? 0.0 : 1.0);
+
                foreach(l, arrayexpr->elements)
                {
                        Node       *elem = (Node *) lfirst(l);
@@ -1816,23 +1960,39 @@ scalararraysel(PlannerInfo *root,
                         */
                        args = list_make2(leftop, elem);
                        if (is_join_clause)
-                               s2 = DatumGetFloat8(FunctionCall5(&oprselproc,
-                                                                                                 PointerGetDatum(root),
+                               s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
+                                                                                                         clause->inputcollid,
+                                                                                                         PointerGetDatum(root),
                                                                                                  ObjectIdGetDatum(operator),
-                                                                                                 PointerGetDatum(args),
-                                                                                                 Int16GetDatum(jointype),
-                                                                                                 PointerGetDatum(sjinfo)));
+                                                                                                         PointerGetDatum(args),
+                                                                                                         Int16GetDatum(jointype),
+                                                                                                  PointerGetDatum(sjinfo)));
                        else
-                               s2 = DatumGetFloat8(FunctionCall4(&oprselproc,
-                                                                                                 PointerGetDatum(root),
+                               s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
+                                                                                                         clause->inputcollid,
+                                                                                                         PointerGetDatum(root),
                                                                                                  ObjectIdGetDatum(operator),
-                                                                                                 PointerGetDatum(args),
-                                                                                                 Int32GetDatum(varRelid)));
+                                                                                                         PointerGetDatum(args),
+                                                                                                  Int32GetDatum(varRelid)));
+
                        if (useOr)
+                       {
                                s1 = s1 + s2 - s1 * s2;
+                               if (isEquality)
+                                       s1disjoint += s2;
+                       }
                        else
+                       {
                                s1 = s1 * s2;
+                               if (isInequality)
+                                       s1disjoint += s2 - 1.0;
+                       }
                }
+
+               /* accept disjoint-probability estimate if in range */
+               if ((useOr ? isEquality : isInequality) &&
+                       s1disjoint >= 0.0 && s1disjoint <= 1.0)
+                       s1 = s1disjoint;
        }
        else
        {
@@ -1852,23 +2012,26 @@ scalararraysel(PlannerInfo *root,
                dummyexpr->collation = clause->inputcollid;
                args = list_make2(leftop, dummyexpr);
                if (is_join_clause)
-                       s2 = DatumGetFloat8(FunctionCall5(&oprselproc,
-                                                                                         PointerGetDatum(root),
-                                                                                         ObjectIdGetDatum(operator),
-                                                                                         PointerGetDatum(args),
-                                                                                         Int16GetDatum(jointype),
-                                                                                         PointerGetDatum(sjinfo)));
+                       s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
+                                                                                                 clause->inputcollid,
+                                                                                                 PointerGetDatum(root),
+                                                                                                 ObjectIdGetDatum(operator),
+                                                                                                 PointerGetDatum(args),
+                                                                                                 Int16GetDatum(jointype),
+                                                                                                 PointerGetDatum(sjinfo)));
                else
-                       s2 = DatumGetFloat8(FunctionCall4(&oprselproc,
-                                                                                         PointerGetDatum(root),
-                                                                                         ObjectIdGetDatum(operator),
-                                                                                         PointerGetDatum(args),
-                                                                                         Int32GetDatum(varRelid)));
+                       s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
+                                                                                                 clause->inputcollid,
+                                                                                                 PointerGetDatum(root),
+                                                                                                 ObjectIdGetDatum(operator),
+                                                                                                 PointerGetDatum(args),
+                                                                                                 Int32GetDatum(varRelid)));
                s1 = useOr ? 0.0 : 1.0;
 
                /*
                 * Arbitrarily assume 10 elements in the eventual array value (see
-                * also estimate_array_length)
+                * also estimate_array_length).  We don't risk an assumption of
+                * disjoint probabilities here.
                 */
                for (i = 0; i < 10; i++)
                {
@@ -1935,6 +2098,7 @@ rowcomparesel(PlannerInfo *root,
 {
        Selectivity s1;
        Oid                     opno = linitial_oid(clause->opnos);
+       Oid                     inputcollid = linitial_oid(clause->inputcollids);
        List       *opargs;
        bool            is_join_clause;
 
@@ -1975,6 +2139,7 @@ rowcomparesel(PlannerInfo *root,
                /* Estimate selectivity for a join clause. */
                s1 = join_selectivity(root, opno,
                                                          opargs,
+                                                         inputcollid,
                                                          jointype,
                                                          sjinfo);
        }
@@ -1983,6 +2148,7 @@ rowcomparesel(PlannerInfo *root,
                /* Estimate selectivity for a restriction clause. */
                s1 = restriction_selectivity(root, opno,
                                                                         opargs,
+                                                                        inputcollid,
                                                                         varRelid);
        }
 
@@ -2007,6 +2173,7 @@ eqjoinsel(PG_FUNCTION_ARGS)
        VariableStatData vardata1;
        VariableStatData vardata2;
        bool            join_is_reversed;
+       RelOptInfo *inner_rel;
 
        get_join_variables(root, args, sjinfo,
                                           &vardata1, &vardata2, &join_is_reversed);
@@ -2020,11 +2187,22 @@ eqjoinsel(PG_FUNCTION_ARGS)
                        break;
                case JOIN_SEMI:
                case JOIN_ANTI:
+
+                       /*
+                        * Look up the join's inner relation.  min_righthand is sufficient
+                        * information because neither SEMI nor ANTI joins permit any
+                        * reassociation into or out of their RHS, so the righthand will
+                        * always be exactly that set of rels.
+                        */
+                       inner_rel = find_join_input_rel(root, sjinfo->min_righthand);
+
                        if (!join_is_reversed)
-                               selec = eqjoinsel_semi(operator, &vardata1, &vardata2);
+                               selec = eqjoinsel_semi(operator, &vardata1, &vardata2,
+                                                                          inner_rel);
                        else
                                selec = eqjoinsel_semi(get_commutator(operator),
-                                                                          &vardata2, &vardata1);
+                                                                          &vardata2, &vardata1,
+                                                                          inner_rel);
                        break;
                default:
                        /* other values not expected here */
@@ -2055,54 +2233,50 @@ eqjoinsel_inner(Oid operator,
        double          selec;
        double          nd1;
        double          nd2;
+       bool            isdefault1;
+       bool            isdefault2;
+       Oid                     opfuncoid;
        Form_pg_statistic stats1 = NULL;
        Form_pg_statistic stats2 = NULL;
        bool            have_mcvs1 = false;
-       Datum      *values1 = NULL;
-       int                     nvalues1 = 0;
-       float4     *numbers1 = NULL;
-       int                     nnumbers1 = 0;
        bool            have_mcvs2 = false;
-       Datum      *values2 = NULL;
-       int                     nvalues2 = 0;
-       float4     *numbers2 = NULL;
-       int                     nnumbers2 = 0;
+       AttStatsSlot sslot1;
+       AttStatsSlot sslot2;
+
+       nd1 = get_variable_numdistinct(vardata1, &isdefault1);
+       nd2 = get_variable_numdistinct(vardata2, &isdefault2);
 
-       nd1 = get_variable_numdistinct(vardata1);
-       nd2 = get_variable_numdistinct(vardata2);
+       opfuncoid = get_opcode(operator);
+
+       memset(&sslot1, 0, sizeof(sslot1));
+       memset(&sslot2, 0, sizeof(sslot2));
 
        if (HeapTupleIsValid(vardata1->statsTuple))
        {
+               /* note we allow use of nullfrac regardless of security check */
                stats1 = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple);
-               have_mcvs1 = get_attstatsslot(vardata1->statsTuple,
-                                                                         vardata1->atttype,
-                                                                         vardata1->atttypmod,
-                                                                         STATISTIC_KIND_MCV,
-                                                                         InvalidOid,
-                                                                         NULL,
-                                                                         &values1, &nvalues1,
-                                                                         &numbers1, &nnumbers1);
+               if (statistic_proc_security_check(vardata1, opfuncoid))
+                       have_mcvs1 = get_attstatsslot(&sslot1, vardata1->statsTuple,
+                                                                                 STATISTIC_KIND_MCV, InvalidOid,
+                                                                ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS);
        }
 
        if (HeapTupleIsValid(vardata2->statsTuple))
        {
+               /* note we allow use of nullfrac regardless of security check */
                stats2 = (Form_pg_statistic) GETSTRUCT(vardata2->statsTuple);
-               have_mcvs2 = get_attstatsslot(vardata2->statsTuple,
-                                                                         vardata2->atttype,
-                                                                         vardata2->atttypmod,
-                                                                         STATISTIC_KIND_MCV,
-                                                                         InvalidOid,
-                                                                         NULL,
-                                                                         &values2, &nvalues2,
-                                                                         &numbers2, &nnumbers2);
+               if (statistic_proc_security_check(vardata2, opfuncoid))
+                       have_mcvs2 = get_attstatsslot(&sslot2, vardata2->statsTuple,
+                                                                                 STATISTIC_KIND_MCV, InvalidOid,
+                                                                ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS);
        }
 
        if (have_mcvs1 && have_mcvs2)
        {
                /*
-                * We have most-common-value lists for both relations.  Run through
+                * We have most-common-value lists for both relations.  Run through
                 * the lists to see which MCVs actually join to each other with the
-                * given operator.      This allows us to determine the exact join
+                * given operator.  This allows us to determine the exact join
                 * selectivity for the portion of the relations represented by the MCV
                 * lists.  We still have to estimate for the remaining population, but
                 * in a skewed distribution this gives us a big leg up in accuracy.
@@ -2128,33 +2302,33 @@ eqjoinsel_inner(Oid operator,
                int                     i,
                                        nmatches;
 
-               fmgr_info(get_opcode(operator), &eqproc);
-               hasmatch1 = (bool *) palloc0(nvalues1 * sizeof(bool));
-               hasmatch2 = (bool *) palloc0(nvalues2 * sizeof(bool));
+               fmgr_info(opfuncoid, &eqproc);
+               hasmatch1 = (bool *) palloc0(sslot1.nvalues * sizeof(bool));
+               hasmatch2 = (bool *) palloc0(sslot2.nvalues * sizeof(bool));
 
                /*
                 * Note we assume that each MCV will match at most one member of the
-                * other MCV list.      If the operator isn't really equality, there could
+                * other MCV list.  If the operator isn't really equality, there could
                 * be multiple matches --- but we don't look for them, both for speed
                 * and because the math wouldn't add up...
                 */
                matchprodfreq = 0.0;
                nmatches = 0;
-               for (i = 0; i < nvalues1; i++)
+               for (i = 0; i < sslot1.nvalues; i++)
                {
                        int                     j;
 
-                       for (j = 0; j < nvalues2; j++)
+                       for (j = 0; j < sslot2.nvalues; j++)
                        {
                                if (hasmatch2[j])
                                        continue;
                                if (DatumGetBool(FunctionCall2Coll(&eqproc,
                                                                                                   DEFAULT_COLLATION_OID,
-                                                                                                  values1[i],
-                                                                                                  values2[j])))
+                                                                                                  sslot1.values[i],
+                                                                                                  sslot2.values[j])))
                                {
                                        hasmatch1[i] = hasmatch2[j] = true;
-                                       matchprodfreq += numbers1[i] * numbers2[j];
+                                       matchprodfreq += sslot1.numbers[i] * sslot2.numbers[j];
                                        nmatches++;
                                        break;
                                }
@@ -2163,22 +2337,22 @@ eqjoinsel_inner(Oid operator,
                CLAMP_PROBABILITY(matchprodfreq);
                /* Sum up frequencies of matched and unmatched MCVs */
                matchfreq1 = unmatchfreq1 = 0.0;
-               for (i = 0; i < nvalues1; i++)
+               for (i = 0; i < sslot1.nvalues; i++)
                {
                        if (hasmatch1[i])
-                               matchfreq1 += numbers1[i];
+                               matchfreq1 += sslot1.numbers[i];
                        else
-                               unmatchfreq1 += numbers1[i];
+                               unmatchfreq1 += sslot1.numbers[i];
                }
                CLAMP_PROBABILITY(matchfreq1);
                CLAMP_PROBABILITY(unmatchfreq1);
                matchfreq2 = unmatchfreq2 = 0.0;
-               for (i = 0; i < nvalues2; i++)
+               for (i = 0; i < sslot2.nvalues; i++)
                {
                        if (hasmatch2[i])
-                               matchfreq2 += numbers2[i];
+                               matchfreq2 += sslot2.numbers[i];
                        else
-                               unmatchfreq2 += numbers2[i];
+                               unmatchfreq2 += sslot2.numbers[i];
                }
                CLAMP_PROBABILITY(matchfreq2);
                CLAMP_PROBABILITY(unmatchfreq2);
@@ -2203,15 +2377,15 @@ eqjoinsel_inner(Oid operator,
                 * MCVs plus non-MCV values.
                 */
                totalsel1 = matchprodfreq;
-               if (nd2 > nvalues2)
-                       totalsel1 += unmatchfreq1 * otherfreq2 / (nd2 - nvalues2);
+               if (nd2 > sslot2.nvalues)
+                       totalsel1 += unmatchfreq1 * otherfreq2 / (nd2 - sslot2.nvalues);
                if (nd2 > nmatches)
                        totalsel1 += otherfreq1 * (otherfreq2 + unmatchfreq2) /
                                (nd2 - nmatches);
                /* Same estimate from the point of view of relation 2. */
                totalsel2 = matchprodfreq;
-               if (nd1 > nvalues1)
-                       totalsel2 += unmatchfreq2 * otherfreq1 / (nd1 - nvalues1);
+               if (nd1 > sslot1.nvalues)
+                       totalsel2 += unmatchfreq2 * otherfreq1 / (nd1 - sslot1.nvalues);
                if (nd1 > nmatches)
                        totalsel2 += otherfreq2 * (otherfreq1 + unmatchfreq1) /
                                (nd1 - nmatches);
@@ -2245,22 +2419,10 @@ eqjoinsel_inner(Oid operator,
                 * XXX Can we be smarter if we have an MCV list for just one side? It
                 * seems that if we assume equal distribution for the other side, we
                 * end up with the same answer anyway.
-                *
-                * An additional hack we use here is to clamp the nd1 and nd2 values
-                * to not more than what we are estimating the input relation sizes to
-                * be, providing a crude correction for the selectivity of restriction
-                * clauses on those relations.  (We don't do that in the other path
-                * since there we are comparing the nd values to stats for the whole
-                * relations.)
                 */
                double          nullfrac1 = stats1 ? stats1->stanullfrac : 0.0;
                double          nullfrac2 = stats2 ? stats2->stanullfrac : 0.0;
 
-               if (vardata1->rel)
-                       nd1 = Min(nd1, vardata1->rel->rows);
-               if (vardata2->rel)
-                       nd2 = Min(nd2, vardata2->rel->rows);
-
                selec = (1.0 - nullfrac1) * (1.0 - nullfrac2);
                if (nd1 > nd2)
                        selec /= nd1;
@@ -2268,12 +2430,8 @@ eqjoinsel_inner(Oid operator,
                        selec /= nd2;
        }
 
-       if (have_mcvs1)
-               free_attstatsslot(vardata1->atttype, values1, nvalues1,
-                                                 numbers1, nnumbers1);
-       if (have_mcvs2)
-               free_attstatsslot(vardata2->atttype, values2, nvalues2,
-                                                 numbers2, nnumbers2);
+       free_attstatsslot(&sslot1);
+       free_attstatsslot(&sslot2);
 
        return selec;
 }
@@ -2283,60 +2441,91 @@ eqjoinsel_inner(Oid operator,
  *
  * (Also used for anti join, which we are supposed to estimate the same way.)
  * Caller has ensured that vardata1 is the LHS variable.
+ * Unlike eqjoinsel_inner, we have to cope with operator being InvalidOid.
  */
 static double
 eqjoinsel_semi(Oid operator,
-                          VariableStatData *vardata1, VariableStatData *vardata2)
+                          VariableStatData *vardata1, VariableStatData *vardata2,
+                          RelOptInfo *inner_rel)
 {
        double          selec;
        double          nd1;
        double          nd2;
+       bool            isdefault1;
+       bool            isdefault2;
+       Oid                     opfuncoid;
        Form_pg_statistic stats1 = NULL;
        bool            have_mcvs1 = false;
-       Datum      *values1 = NULL;
-       int                     nvalues1 = 0;
-       float4     *numbers1 = NULL;
-       int                     nnumbers1 = 0;
        bool            have_mcvs2 = false;
-       Datum      *values2 = NULL;
-       int                     nvalues2 = 0;
-       float4     *numbers2 = NULL;
-       int                     nnumbers2 = 0;
+       AttStatsSlot sslot1;
+       AttStatsSlot sslot2;
+
+       nd1 = get_variable_numdistinct(vardata1, &isdefault1);
+       nd2 = get_variable_numdistinct(vardata2, &isdefault2);
 
-       nd1 = get_variable_numdistinct(vardata1);
-       nd2 = get_variable_numdistinct(vardata2);
+       opfuncoid = OidIsValid(operator) ? get_opcode(operator) : InvalidOid;
+
+       memset(&sslot1, 0, sizeof(sslot1));
+       memset(&sslot2, 0, sizeof(sslot2));
+
+       /*
+        * We clamp nd2 to be not more than what we estimate the inner relation's
+        * size to be.  This is intuitively somewhat reasonable since obviously
+        * there can't be more than that many distinct values coming from the
+        * inner rel.  The reason for the asymmetry (ie, that we don't clamp nd1
+        * likewise) is that this is the only pathway by which restriction clauses
+        * applied to the inner rel will affect the join result size estimate,
+        * since set_joinrel_size_estimates will multiply SEMI/ANTI selectivity by
+        * only the outer rel's size.  If we clamped nd1 we'd be double-counting
+        * the selectivity of outer-rel restrictions.
+        *
+        * We can apply this clamping both with respect to the base relation from
+        * which the join variable comes (if there is just one), and to the
+        * immediate inner input relation of the current join.
+        *
+        * If we clamp, we can treat nd2 as being a non-default estimate; it's not
+        * great, maybe, but it didn't come out of nowhere either.  This is most
+        * helpful when the inner relation is empty and consequently has no stats.
+        */
+       if (vardata2->rel)
+       {
+               if (nd2 >= vardata2->rel->rows)
+               {
+                       nd2 = vardata2->rel->rows;
+                       isdefault2 = false;
+               }
+       }
+       if (nd2 >= inner_rel->rows)
+       {
+               nd2 = inner_rel->rows;
+               isdefault2 = false;
+       }
 
        if (HeapTupleIsValid(vardata1->statsTuple))
        {
+               /* note we allow use of nullfrac regardless of security check */
                stats1 = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple);
-               have_mcvs1 = get_attstatsslot(vardata1->statsTuple,
-                                                                         vardata1->atttype,
-                                                                         vardata1->atttypmod,
-                                                                         STATISTIC_KIND_MCV,
-                                                                         InvalidOid,
-                                                                         NULL,
-                                                                         &values1, &nvalues1,
-                                                                         &numbers1, &nnumbers1);
+               if (statistic_proc_security_check(vardata1, opfuncoid))
+                       have_mcvs1 = get_attstatsslot(&sslot1, vardata1->statsTuple,
+                                                                                 STATISTIC_KIND_MCV, InvalidOid,
+                                                                ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS);
        }
 
-       if (HeapTupleIsValid(vardata2->statsTuple))
+       if (HeapTupleIsValid(vardata2->statsTuple) &&
+               statistic_proc_security_check(vardata2, opfuncoid))
        {
-               have_mcvs2 = get_attstatsslot(vardata2->statsTuple,
-                                                                         vardata2->atttype,
-                                                                         vardata2->atttypmod,
-                                                                         STATISTIC_KIND_MCV,
-                                                                         InvalidOid,
-                                                                         NULL,
-                                                                         &values2, &nvalues2,
-                                                                         &numbers2, &nnumbers2);
+               have_mcvs2 = get_attstatsslot(&sslot2, vardata2->statsTuple,
+                                                                         STATISTIC_KIND_MCV, InvalidOid,
+                                                                         ATTSTATSSLOT_VALUES);
+               /* note: currently don't need stanumbers from RHS */
        }
 
        if (have_mcvs1 && have_mcvs2 && OidIsValid(operator))
        {
                /*
-                * We have most-common-value lists for both relations.  Run through
+                * We have most-common-value lists for both relations.  Run through
                 * the lists to see which MCVs actually join to each other with the
-                * given operator.      This allows us to determine the exact join
+                * given operator.  This allows us to determine the exact join
                 * selectivity for the portion of the relations represented by the MCV
                 * lists.  We still have to estimate for the remaining population, but
                 * in a skewed distribution this gives us a big leg up in accuracy.
@@ -2349,31 +2538,41 @@ eqjoinsel_semi(Oid operator,
                                        uncertainfrac,
                                        uncertain;
                int                     i,
-                                       nmatches;
+                                       nmatches,
+                                       clamped_nvalues2;
+
+               /*
+                * The clamping above could have resulted in nd2 being less than
+                * sslot2.nvalues; in which case, we assume that precisely the nd2
+                * most common values in the relation will appear in the join input,
+                * and so compare to only the first nd2 members of the MCV list.  Of
+                * course this is frequently wrong, but it's the best bet we can make.
+                */
+               clamped_nvalues2 = Min(sslot2.nvalues, nd2);
 
-               fmgr_info(get_opcode(operator), &eqproc);
-               hasmatch1 = (bool *) palloc0(nvalues1 * sizeof(bool));
-               hasmatch2 = (bool *) palloc0(nvalues2 * sizeof(bool));
+               fmgr_info(opfuncoid, &eqproc);
+               hasmatch1 = (bool *) palloc0(sslot1.nvalues * sizeof(bool));
+               hasmatch2 = (bool *) palloc0(clamped_nvalues2 * sizeof(bool));
 
                /*
                 * Note we assume that each MCV will match at most one member of the
-                * other MCV list.      If the operator isn't really equality, there could
+                * other MCV list.  If the operator isn't really equality, there could
                 * be multiple matches --- but we don't look for them, both for speed
                 * and because the math wouldn't add up...
                 */
                nmatches = 0;
-               for (i = 0; i < nvalues1; i++)
+               for (i = 0; i < sslot1.nvalues; i++)
                {
                        int                     j;
 
-                       for (j = 0; j < nvalues2; j++)
+                       for (j = 0; j < clamped_nvalues2; j++)
                        {
                                if (hasmatch2[j])
                                        continue;
                                if (DatumGetBool(FunctionCall2Coll(&eqproc,
                                                                                                   DEFAULT_COLLATION_OID,
-                                                                                                  values1[i],
-                                                                                                  values2[j])))
+                                                                                                  sslot1.values[i],
+                                                                                                  sslot2.values[j])))
                                {
                                        hasmatch1[i] = hasmatch2[j] = true;
                                        nmatches++;
@@ -2383,10 +2582,10 @@ eqjoinsel_semi(Oid operator,
                }
                /* Sum up frequencies of matched MCVs */
                matchfreq1 = 0.0;
-               for (i = 0; i < nvalues1; i++)
+               for (i = 0; i < sslot1.nvalues; i++)
                {
                        if (hasmatch1[i])
-                               matchfreq1 += numbers1[i];
+                               matchfreq1 += sslot1.numbers[i];
                }
                CLAMP_PROBABILITY(matchfreq1);
                pfree(hasmatch1);
@@ -2394,7 +2593,7 @@ eqjoinsel_semi(Oid operator,
 
                /*
                 * Now we need to estimate the fraction of relation 1 that has at
-                * least one join partner.      We know for certain that the matched MCVs
+                * least one join partner.  We know for certain that the matched MCVs
                 * do, so that gives us a lower bound, but we're really in the dark
                 * about everything else.  Our crude approach is: if nd1 <= nd2 then
                 * assume all non-null rel1 rows have join partners, else assume for
@@ -2403,15 +2602,15 @@ eqjoinsel_semi(Oid operator,
                 * before doing the division.
                 *
                 * Crude as the above is, it's completely useless if we don't have
-                * reliable ndistinct values for both sides.  Hence, if either nd1
-                * or nd2 is default, punt and assume half of the uncertain rows
-                * have join partners.
+                * reliable ndistinct values for both sides.  Hence, if either nd1 or
+                * nd2 is default, punt and assume half of the uncertain rows have
+                * join partners.
                 */
-               if (nd1 != DEFAULT_NUM_DISTINCT && nd2 != DEFAULT_NUM_DISTINCT)
+               if (!isdefault1 && !isdefault2)
                {
                        nd1 -= nmatches;
                        nd2 -= nmatches;
-                       if (nd1 <= nd2 || nd2 <= 0)
+                       if (nd1 <= nd2 || nd2 < 0)
                                uncertainfrac = 1.0;
                        else
                                uncertainfrac = nd2 / nd1;
@@ -2430,14 +2629,9 @@ eqjoinsel_semi(Oid operator,
                 */
                double          nullfrac1 = stats1 ? stats1->stanullfrac : 0.0;
 
-               if (nd1 != DEFAULT_NUM_DISTINCT && nd2 != DEFAULT_NUM_DISTINCT)
+               if (!isdefault1 && !isdefault2)
                {
-                       if (vardata1->rel)
-                               nd1 = Min(nd1, vardata1->rel->rows);
-                       if (vardata2->rel)
-                               nd2 = Min(nd2, vardata2->rel->rows);
-
-                       if (nd1 <= nd2 || nd2 <= 0)
+                       if (nd1 <= nd2 || nd2 < 0)
                                selec = 1.0 - nullfrac1;
                        else
                                selec = (nd2 / nd1) * (1.0 - nullfrac1);
@@ -2446,12 +2640,8 @@ eqjoinsel_semi(Oid operator,
                        selec = 0.5 * (1.0 - nullfrac1);
        }
 
-       if (have_mcvs1)
-               free_attstatsslot(vardata1->atttype, values1, nvalues1,
-                                                 numbers1, nnumbers1);
-       if (have_mcvs2)
-               free_attstatsslot(vardata2->atttype, values2, nvalues2,
-                                                 numbers2, nnumbers2);
+       free_attstatsslot(&sslot1);
+       free_attstatsslot(&sslot2);
 
        return selec;
 }
@@ -2926,9 +3116,10 @@ add_unique_group_var(PlannerInfo *root, List *varinfos,
 {
        GroupVarInfo *varinfo;
        double          ndistinct;
+       bool            isdefault;
        ListCell   *lc;
 
-       ndistinct = get_variable_numdistinct(vardata);
+       ndistinct = get_variable_numdistinct(vardata, &isdefault);
 
        /* cannot use foreach here because of possible list_delete */
        lc = list_head(varinfos);
@@ -2989,6 +3180,8 @@ add_unique_group_var(PlannerInfo *root, List *varinfos,
  *     groupExprs - list of expressions being grouped by
  *     input_rows - number of rows estimated to arrive at the group/unique
  *             filter step
+ *     pgset - NULL, or a List** pointing to a grouping set to filter the
+ *             groupExprs against
  *
  * Given the lack of any cross-correlation statistics in the system, it's
  * impossible to do anything really trustworthy with GROUP BY conditions
@@ -2996,11 +3189,11 @@ add_unique_group_var(PlannerInfo *root, List *varinfos,
  * case (all possible cross-product terms actually appear as groups) since
  * very often the grouped-by Vars are highly correlated.  Our current approach
  * is as follows:
- *     1.      Expressions yielding boolean are assumed to contribute two groups,
+ *     1.  Expressions yielding boolean are assumed to contribute two groups,
  *             independently of their content, and are ignored in the subsequent
- *             steps.  This is mainly because tests like "col IS NULL" break the
+ *             steps.  This is mainly because tests like "col IS NULL" break the
  *             heuristic used in step 2 especially badly.
- *     2.      Reduce the given expressions to a list of unique Vars used.  For
+ *     2.  Reduce the given expressions to a list of unique Vars used.  For
  *             example, GROUP BY a, a + b is treated the same as GROUP BY a, b.
  *             It is clearly correct not to count the same Var more than once.
  *             It is also reasonable to treat f(x) the same as x: f() cannot
@@ -3010,25 +3203,25 @@ add_unique_group_var(PlannerInfo *root, List *varinfos,
  *             As a special case, if a GROUP BY expression can be matched to an
  *             expressional index for which we have statistics, then we treat the
  *             whole expression as though it were just a Var.
- *     3.      If the list contains Vars of different relations that are known equal
+ *     3.  If the list contains Vars of different relations that are known equal
  *             due to equivalence classes, then drop all but one of the Vars from each
  *             known-equal set, keeping the one with smallest estimated # of values
  *             (since the extra values of the others can't appear in joined rows).
  *             Note the reason we only consider Vars of different relations is that
  *             if we considered ones of the same rel, we'd be double-counting the
  *             restriction selectivity of the equality in the next step.
- *     4.      For Vars within a single source rel, we multiply together the numbers
+ *     4.  For Vars within a single source rel, we multiply together the numbers
  *             of values, clamp to the number of rows in the rel (divided by 10 if
- *             more than one Var), and then multiply by the selectivity of the
- *             restriction clauses for that rel.  When there's more than one Var,
- *             the initial product is probably too high (it's the worst case) but
- *             clamping to a fraction of the rel's rows seems to be a helpful
- *             heuristic for not letting the estimate get out of hand.  (The factor
- *             of 10 is derived from pre-Postgres-7.4 practice.)  Multiplying
- *             by the restriction selectivity is effectively assuming that the
- *             restriction clauses are independent of the grouping, which is a crummy
- *             assumption, but it's hard to do better.
- *     5.      If there are Vars from multiple rels, we repeat step 4 for each such
+ *             more than one Var), and then multiply by a factor based on the
+ *             selectivity of the restriction clauses for that rel.  When there's
+ *             more than one Var, the initial product is probably too high (it's the
+ *             worst case) but clamping to a fraction of the rel's rows seems to be a
+ *             helpful heuristic for not letting the estimate get out of hand.  (The
+ *             factor of 10 is derived from pre-Postgres-7.4 practice.)  The factor
+ *             we multiply by to adjust for the restriction selectivity assumes that
+ *             the restriction clauses are independent of the grouping, which may not
+ *             be a valid assumption, but it's hard to do better.
+ *     5.  If there are Vars from multiple rels, we repeat step 4 for each such
  *             rel, and multiply the results together.
  * Note that rels not containing grouped Vars are ignored completely, as are
  * join clauses.  Such rels cannot increase the number of groups, and we
@@ -3036,17 +3229,32 @@ add_unique_group_var(PlannerInfo *root, List *varinfos,
  * but we don't have the info to do better).
  */
 double
-estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows)
+estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
+                                       List **pgset)
 {
        List       *varinfos = NIL;
        double          numdistinct;
        ListCell   *l;
+       int                     i;
+
+       /*
+        * We don't ever want to return an estimate of zero groups, as that tends
+        * to lead to division-by-zero and other unpleasantness.  The input_rows
+        * estimate is usually already at least 1, but clamp it just in case it
+        * isn't.
+        */
+       input_rows = clamp_row_est(input_rows);
 
-       /* We should not be called unless query has GROUP BY (or DISTINCT) */
-       Assert(groupExprs != NIL);
+       /*
+        * If no grouping columns, there's exactly one group.  (This can't happen
+        * for normal cases with GROUP BY or DISTINCT, but it is possible for
+        * corner cases with set operations.)
+        */
+       if (groupExprs == NIL || (pgset && list_length(*pgset) < 1))
+               return 1.0;
 
        /*
-        * Count groups derived from boolean grouping expressions.      For other
+        * Count groups derived from boolean grouping expressions.  For other
         * expressions, find the unique Vars used, treating an expression as a Var
         * if we can find stats for it.  For each one, record the statistical
         * estimate of number of distinct values (total in its table, without
@@ -3054,6 +3262,7 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows)
         */
        numdistinct = 1.0;
 
+       i = 0;
        foreach(l, groupExprs)
        {
                Node       *groupexpr = (Node *) lfirst(l);
@@ -3061,6 +3270,10 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows)
                List       *varshere;
                ListCell   *l2;
 
+               /* is expression in this grouping set? */
+               if (pgset && !list_member_int(*pgset, i++))
+                       continue;
+
                /* Short-circuit for expressions returning boolean */
                if (exprType(groupexpr) == BOOLOID)
                {
@@ -3089,7 +3302,10 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows)
                 * PlaceHolderVar doesn't change the number of groups, which boils
                 * down to ignoring the possible addition of nulls to the result set).
                 */
-               varshere = pull_var_clause(groupexpr, PVC_RECURSE_PLACEHOLDERS);
+               varshere = pull_var_clause(groupexpr,
+                                                                  PVC_RECURSE_AGGREGATES |
+                                                                  PVC_RECURSE_WINDOWFUNCS |
+                                                                  PVC_RECURSE_PLACEHOLDERS);
 
                /*
                 * If we find any variable-free GROUP BY item, then either it is a
@@ -3133,7 +3349,7 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows)
         * Group Vars by relation and estimate total numdistinct.
         *
         * For each iteration of the outer loop, we process the frontmost Var in
-        * varinfos, plus all other Vars in the same relation.  We remove these
+        * varinfos, plus all other Vars in the same relation.  We remove these
         * Vars from the newvarinfos list for the next iteration. This is the
         * easiest way to group Vars of same rel together.
         */
@@ -3141,25 +3357,25 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows)
        {
                GroupVarInfo *varinfo1 = (GroupVarInfo *) linitial(varinfos);
                RelOptInfo *rel = varinfo1->rel;
-               double          reldistinct = varinfo1->ndistinct;
+               double          reldistinct = 1;
                double          relmaxndistinct = reldistinct;
-               int                     relvarcount = 1;
+               int                     relvarcount = 0;
                List       *newvarinfos = NIL;
+               List       *relvarinfos = NIL;
 
                /*
-                * Get the product of numdistinct estimates of the Vars for this rel.
-                * Also, construct new varinfos list of remaining Vars.
+                * Split the list of varinfos in two - one for the current rel, one
+                * for remaining Vars on other rels.
                 */
+               relvarinfos = lcons(varinfo1, relvarinfos);
                for_each_cell(l, lnext(list_head(varinfos)))
                {
                        GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
 
                        if (varinfo2->rel == varinfo1->rel)
                        {
-                               reldistinct *= varinfo2->ndistinct;
-                               if (relmaxndistinct < varinfo2->ndistinct)
-                                       relmaxndistinct = varinfo2->ndistinct;
-                               relvarcount++;
+                               /* varinfos on current rel */
+                               relvarinfos = lcons(varinfo2, relvarinfos);
                        }
                        else
                        {
@@ -3168,10 +3384,51 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows)
                        }
                }
 
+               /*
+                * Get the numdistinct estimate for the Vars of this rel.  We
+                * iteratively search for multivariate n-distinct with maximum number
+                * of vars; assuming that each var group is independent of the others,
+                * we multiply them together.  Any remaining relvarinfos after no more
+                * multivariate matches are found are assumed independent too, so
+                * their individual ndistinct estimates are multiplied also.
+                *
+                * While iterating, count how many separate numdistinct values we
+                * apply.  We apply a fudge factor below, but only if we multiplied
+                * more than one such values.
+                */
+               while (relvarinfos)
+               {
+                       double          mvndistinct;
+
+                       if (estimate_multivariate_ndistinct(root, rel, &relvarinfos,
+                                                                                               &mvndistinct))
+                       {
+                               reldistinct *= mvndistinct;
+                               if (relmaxndistinct < mvndistinct)
+                                       relmaxndistinct = mvndistinct;
+                               relvarcount++;
+                       }
+                       else
+                       {
+                               foreach(l, relvarinfos)
+                               {
+                                       GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
+
+                                       reldistinct *= varinfo2->ndistinct;
+                                       if (relmaxndistinct < varinfo2->ndistinct)
+                                               relmaxndistinct = varinfo2->ndistinct;
+                                       relvarcount++;
+                               }
+
+                               /* we're done with this relation */
+                               relvarinfos = NIL;
+                       }
+               }
+
                /*
                 * Sanity check --- don't divide by zero if empty relation.
                 */
-               Assert(rel->reloptkind == RELOPT_BASEREL);
+               Assert(IS_SIMPLE_REL(rel));
                if (rel->tuples > 0)
                {
                        /*
@@ -3198,9 +3455,51 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows)
                                reldistinct = clamp;
 
                        /*
-                        * Multiply by restriction selectivity.
+                        * Update the estimate based on the restriction selectivity,
+                        * guarding against division by zero when reldistinct is zero.
+                        * Also skip this if we know that we are returning all rows.
                         */
-                       reldistinct *= rel->rows / rel->tuples;
+                       if (reldistinct > 0 && rel->rows < rel->tuples)
+                       {
+                               /*
+                                * Given a table containing N rows with n distinct values in a
+                                * uniform distribution, if we select p rows at random then
+                                * the expected number of distinct values selected is
+                                *
+                                * n * (1 - product((N-N/n-i)/(N-i), i=0..p-1))
+                                *
+                                * = n * (1 - (N-N/n)! / (N-N/n-p)! * (N-p)! / N!)
+                                *
+                                * See "Approximating block accesses in database
+                                * organizations", S. B. Yao, Communications of the ACM,
+                                * Volume 20 Issue 4, April 1977 Pages 260-261.
+                                *
+                                * Alternatively, re-arranging the terms from the factorials,
+                                * this may be written as
+                                *
+                                * n * (1 - product((N-p-i)/(N-i), i=0..N/n-1))
+                                *
+                                * This form of the formula is more efficient to compute in
+                                * the common case where p is larger than N/n.  Additionally,
+                                * as pointed out by Dell'Era, if i << N for all terms in the
+                                * product, it can be approximated by
+                                *
+                                * n * (1 - ((N-p)/N)^(N/n))
+                                *
+                                * See "Expected distinct values when selecting from a bag
+                                * without replacement", Alberto Dell'Era,
+                                * http://www.adellera.it/investigations/distinct_balls/.
+                                *
+                                * The condition i << N is equivalent to n >> 1, so this is a
+                                * good approximation when the number of distinct values in
+                                * the table is large.  It turns out that this formula also
+                                * works well even when n is small.
+                                */
+                               reldistinct *=
+                                       (1 - pow((rel->tuples - rel->rows) / rel->tuples,
+                                                        rel->tuples / reldistinct));
+                       }
+                       reldistinct = clamp_row_est(reldistinct);
 
                        /*
                         * Update estimate of total distinct groups.
@@ -3234,11 +3533,11 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows)
  * distribution, so this will have to do for now.
  *
  * We are passed the number of buckets the executor will use for the given
- * input relation.     If the data were perfectly distributed, with the same
+ * input relation.  If the data were perfectly distributed, with the same
  * number of tuples going into each available bucket, then the bucketsize
  * fraction would be 1/nbuckets.  But this happy state of affairs will occur
  * only if (a) there are at least nbuckets distinct data values, and (b)
- * we have a not-too-skewed data distribution. Otherwise the buckets will
+ * we have a not-too-skewed data distribution.  Otherwise the buckets will
  * be nonuniformly occupied.  If the other relation in the join has a key
  * distribution similar to this one's, then the most-loaded buckets are
  * exactly those that will be probed most often.  Therefore, the "average"
@@ -3261,14 +3560,22 @@ estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey, double nbuckets)
                                stanullfrac,
                                mcvfreq,
                                avgfreq;
-       float4     *numbers;
-       int                     nnumbers;
+       bool            isdefault;
+       AttStatsSlot sslot;
 
        examine_variable(root, hashkey, 0, &vardata);
 
-       /* Get number of distinct values and fraction that are null */
-       ndistinct = get_variable_numdistinct(&vardata);
+       /* Get number of distinct values */
+       ndistinct = get_variable_numdistinct(&vardata, &isdefault);
+
+       /* If ndistinct isn't real, punt and return 0.1, per comments above */
+       if (isdefault)
+       {
+               ReleaseVariableStats(vardata);
+               return (Selectivity) 0.1;
+       }
 
+       /* Get fraction that are null */
        if (HeapTupleIsValid(vardata.statsTuple))
        {
                Form_pg_statistic stats;
@@ -3277,19 +3584,7 @@ estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey, double nbuckets)
                stanullfrac = stats->stanullfrac;
        }
        else
-       {
-               /*
-                * Believe a default ndistinct only if it came from stats. Otherwise
-                * punt and return 0.1, per comments above.
-                */
-               if (ndistinct == DEFAULT_NUM_DISTINCT)
-               {
-                       ReleaseVariableStats(vardata);
-                       return (Selectivity) 0.1;
-               }
-
                stanullfrac = 0.0;
-       }
 
        /* Compute avg freq of all distinct data values in raw relation */
        avgfreq = (1.0 - stanullfrac) / ndistinct;
@@ -3302,8 +3597,11 @@ estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey, double nbuckets)
         * XXX Possibly better way, but much more expensive: multiply by
         * selectivity of rel's restriction clauses that mention the target Var.
         */
-       if (vardata.rel)
+       if (vardata.rel && vardata.rel->tuples > 0)
+       {
                ndistinct *= vardata.rel->rows / vardata.rel->tuples;
+               ndistinct = clamp_row_est(ndistinct);
+       }
 
        /*
         * Initial estimate of bucketsize fraction is 1/nbuckets as long as the
@@ -3322,20 +3620,16 @@ estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey, double nbuckets)
 
        if (HeapTupleIsValid(vardata.statsTuple))
        {
-               if (get_attstatsslot(vardata.statsTuple,
-                                                        vardata.atttype, vardata.atttypmod,
+               if (get_attstatsslot(&sslot, vardata.statsTuple,
                                                         STATISTIC_KIND_MCV, InvalidOid,
-                                                        NULL,
-                                                        NULL, NULL,
-                                                        &numbers, &nnumbers))
+                                                        ATTSTATSSLOT_NUMBERS))
                {
                        /*
                         * The first MCV stat is for the most common value.
                         */
-                       if (nnumbers > 0)
-                               mcvfreq = numbers[0];
-                       free_attstatsslot(vardata.atttype, NULL, 0,
-                                                         numbers, nnumbers);
+                       if (sslot.nnumbers > 0)
+                               mcvfreq = sslot.numbers[0];
+                       free_attstatsslot(&sslot);
                }
        }
 
@@ -3369,63 +3663,191 @@ estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey, double nbuckets)
  */
 
 /*
- * convert_to_scalar
- *       Convert non-NULL values of the indicated types to the comparison
- *       scale needed by scalarineqsel().
- *       Returns "true" if successful.
- *
- * XXX this routine is a hack: ideally we should look up the conversion
- * subroutines in pg_type.
- *
- * All numeric datatypes are simply converted to their equivalent
- * "double" values.  (NUMERIC values that are outside the range of "double"
- * are clamped to +/- HUGE_VAL.)
- *
- * String datatypes are converted by convert_string_to_scalar(),
- * which is explained below.  The reason why this routine deals with
- * three values at a time, not just one, is that we need it for strings.
- *
- * The bytea datatype is just enough different from strings that it has
- * to be treated separately.
+ * Find applicable ndistinct statistics for the given list of VarInfos (which
+ * must all belong to the given rel), and update *ndistinct to the estimate of
+ * the MVNDistinctItem that best matches.  If a match it found, *varinfos is
+ * updated to remove the list of matched varinfos.
  *
- * The several datatypes representing absolute times are all converted
- * to Timestamp, which is actually a double, and then we just use that
- * double value.  Note this will give correct results even for the "special"
- * values of Timestamp, since those are chosen to compare correctly;
- * see timestamp_cmp.
+ * Varinfos that aren't for simple Vars are ignored.
  *
- * The several datatypes representing relative times (intervals) are all
- * converted to measurements expressed in seconds.
+ * Return TRUE if we're able to find a match, FALSE otherwise.
  */
 static bool
-convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
-                                 Datum lobound, Datum hibound, Oid boundstypid,
-                                 double *scaledlobound, double *scaledhibound)
+estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel,
+                                                               List **varinfos, double *ndistinct)
 {
-       /*
-        * Both the valuetypid and the boundstypid should exactly match the
-        * declared input type(s) of the operator we are invoked for, so we just
-        * error out if either is not recognized.
-        *
-        * XXX The histogram we are interpolating between points of could belong
-        * to a column that's only binary-compatible with the declared type. In
-        * essence we are assuming that the semantics of binary-compatible types
-        * are enough alike that we can use a histogram generated with one type's
-        * operators to estimate selectivity for the other's.  This is outright
-        * wrong in some cases --- in particular signed versus unsigned
-        * interpretation could trip us up.  But it's useful enough in the
-        * majority of cases that we do it anyway.      Should think about more
-        * rigorous ways to do it.
-        */
-       switch (valuetypid)
+       ListCell   *lc;
+       Bitmapset  *attnums = NULL;
+       int                     nmatches;
+       Oid                     statOid = InvalidOid;
+       MVNDistinct *stats;
+       Bitmapset  *matched = NULL;
+
+       /* bail out immediately if the table has no extended statistics */
+       if (!rel->statlist)
+               return false;
+
+       /* Determine the attnums we're looking for */
+       foreach(lc, *varinfos)
        {
-                       /*
-                        * Built-in numeric types
-                        */
-               case BOOLOID:
-               case INT2OID:
-               case INT4OID:
-               case INT8OID:
+               GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc);
+
+               Assert(varinfo->rel == rel);
+
+               if (IsA(varinfo->var, Var))
+               {
+                       attnums = bms_add_member(attnums,
+                                                                        ((Var *) varinfo->var)->varattno);
+               }
+       }
+
+       /* look for the ndistinct statistics matching the most vars */
+       nmatches = 1;                           /* we require at least two matches */
+       foreach(lc, rel->statlist)
+       {
+               StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc);
+               Bitmapset  *shared;
+               int                     nshared;
+
+               /* skip statistics of other kinds */
+               if (info->kind != STATS_EXT_NDISTINCT)
+                       continue;
+
+               /* compute attnums shared by the vars and the statistics object */
+               shared = bms_intersect(info->keys, attnums);
+               nshared = bms_num_members(shared);
+
+               /*
+                * Does this statistics object match more columns than the currently
+                * best object?  If so, use this one instead.
+                *
+                * XXX This should break ties using name of the object, or something
+                * like that, to make the outcome stable.
+                */
+               if (nshared > nmatches)
+               {
+                       statOid = info->statOid;
+                       nmatches = nshared;
+                       matched = shared;
+               }
+       }
+
+       /* No match? */
+       if (statOid == InvalidOid)
+               return false;
+       Assert(nmatches > 1 && matched != NULL);
+
+       stats = statext_ndistinct_load(statOid);
+
+       /*
+        * If we have a match, search it for the specific item that matches (there
+        * must be one), and construct the output values.
+        */
+       if (stats)
+       {
+               int                     i;
+               List       *newlist = NIL;
+               MVNDistinctItem *item = NULL;
+
+               /* Find the specific item that exactly matches the combination */
+               for (i = 0; i < stats->nitems; i++)
+               {
+                       MVNDistinctItem *tmpitem = &stats->items[i];
+
+                       if (bms_subset_compare(tmpitem->attrs, matched) == BMS_EQUAL)
+                       {
+                               item = tmpitem;
+                               break;
+                       }
+               }
+
+               /* make sure we found an item */
+               if (!item)
+                       elog(ERROR, "corrupt MVNDistinct entry");
+
+               /* Form the output varinfo list, keeping only unmatched ones */
+               foreach(lc, *varinfos)
+               {
+                       GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc);
+                       AttrNumber      attnum;
+
+                       if (!IsA(varinfo->var, Var))
+                       {
+                               newlist = lappend(newlist, varinfo);
+                               continue;
+                       }
+
+                       attnum = ((Var *) varinfo->var)->varattno;
+                       if (!bms_is_member(attnum, matched))
+                               newlist = lappend(newlist, varinfo);
+               }
+
+               *varinfos = newlist;
+               *ndistinct = item->ndistinct;
+               return true;
+       }
+
+       return false;
+}
+
+/*
+ * convert_to_scalar
+ *       Convert non-NULL values of the indicated types to the comparison
+ *       scale needed by scalarineqsel().
+ *       Returns "true" if successful.
+ *
+ * XXX this routine is a hack: ideally we should look up the conversion
+ * subroutines in pg_type.
+ *
+ * All numeric datatypes are simply converted to their equivalent
+ * "double" values.  (NUMERIC values that are outside the range of "double"
+ * are clamped to +/- HUGE_VAL.)
+ *
+ * String datatypes are converted by convert_string_to_scalar(),
+ * which is explained below.  The reason why this routine deals with
+ * three values at a time, not just one, is that we need it for strings.
+ *
+ * The bytea datatype is just enough different from strings that it has
+ * to be treated separately.
+ *
+ * The several datatypes representing absolute times are all converted
+ * to Timestamp, which is actually a double, and then we just use that
+ * double value.  Note this will give correct results even for the "special"
+ * values of Timestamp, since those are chosen to compare correctly;
+ * see timestamp_cmp.
+ *
+ * The several datatypes representing relative times (intervals) are all
+ * converted to measurements expressed in seconds.
+ */
+static bool
+convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
+                                 Datum lobound, Datum hibound, Oid boundstypid,
+                                 double *scaledlobound, double *scaledhibound)
+{
+       /*
+        * Both the valuetypid and the boundstypid should exactly match the
+        * declared input type(s) of the operator we are invoked for, so we just
+        * error out if either is not recognized.
+        *
+        * XXX The histogram we are interpolating between points of could belong
+        * to a column that's only binary-compatible with the declared type. In
+        * essence we are assuming that the semantics of binary-compatible types
+        * are enough alike that we can use a histogram generated with one type's
+        * operators to estimate selectivity for the other's.  This is outright
+        * wrong in some cases --- in particular signed versus unsigned
+        * interpretation could trip us up.  But it's useful enough in the
+        * majority of cases that we do it anyway.  Should think about more
+        * rigorous ways to do it.
+        */
+       switch (valuetypid)
+       {
+                       /*
+                        * Built-in numeric types
+                        */
+               case BOOLOID:
+               case INT2OID:
+               case INT4OID:
+               case INT8OID:
                case FLOAT4OID:
                case FLOAT8OID:
                case NUMERICOID:
@@ -3438,6 +3860,8 @@ convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
                case REGTYPEOID:
                case REGCONFIGOID:
                case REGDICTIONARYOID:
+               case REGROLEOID:
+               case REGNAMESPACEOID:
                        *scaledvalue = convert_numeric_to_scalar(value, valuetypid);
                        *scaledlobound = convert_numeric_to_scalar(lobound, boundstypid);
                        *scaledhibound = convert_numeric_to_scalar(hibound, boundstypid);
@@ -3499,6 +3923,7 @@ convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
                case INETOID:
                case CIDROID:
                case MACADDROID:
+               case MACADDR8OID:
                        *scaledvalue = convert_network_to_scalar(value, valuetypid);
                        *scaledlobound = convert_network_to_scalar(lobound, boundstypid);
                        *scaledhibound = convert_network_to_scalar(hibound, boundstypid);
@@ -3543,6 +3968,8 @@ convert_numeric_to_scalar(Datum value, Oid typid)
                case REGTYPEOID:
                case REGCONFIGOID:
                case REGDICTIONARYOID:
+               case REGROLEOID:
+               case REGNAMESPACEOID:
                        /* we can treat OIDs as integers... */
                        return (double) DatumGetObjectId(value);
        }
@@ -3667,10 +4094,16 @@ convert_one_string_to_scalar(char *value, int rangelo, int rangehi)
                return 0.0;                             /* empty string has scalar value 0 */
 
        /*
-        * Since base is at least 10, need not consider more than about 20 chars
+        * There seems little point in considering more than a dozen bytes from
+        * the string.  Since base is at least 10, that will give us nominal
+        * resolution of at least 12 decimal digits, which is surely far more
+        * precision than this estimation technique has got anyway (especially in
+        * non-C locales).  Also, even with the maximum possible base of 256, this
+        * ensures denom cannot grow larger than 256^13 = 2.03e31, which will not
+        * overflow on any known machine.
         */
-       if (slen > 20)
-               slen = 20;
+       if (slen > 12)
+               slen = 12;
 
        /* Convert initial characters to fraction */
        base = rangehi - rangelo + 1;
@@ -3735,19 +4168,11 @@ convert_string_datum(Datum value, Oid typid)
        {
                char       *xfrmstr;
                size_t          xfrmlen;
-               size_t          xfrmlen2;
+               size_t xfrmlen2 PG_USED_FOR_ASSERTS_ONLY;
 
                /*
-                * Note: originally we guessed at a suitable output buffer size, and
-                * only needed to call strxfrm twice if our guess was too small.
-                * However, it seems that some versions of Solaris have buggy strxfrm
-                * that can write past the specified buffer length in that scenario.
-                * So, do it the dumb way for portability.
-                *
-                * Yet other systems (e.g., glibc) sometimes return a smaller value
-                * from the second call than the first; thus the Assert must be <= not
-                * == as you'd expect.  Can't any of these people program their way
-                * out of a paper bag?
+                * XXX: We could guess at a suitable output buffer size and only call
+                * strxfrm twice if our guess is too small.
                 *
                 * XXX: strxfrm doesn't support UTF-8 encoding on Win32, it can return
                 * bogus data or set an error. This is not really a problem unless it
@@ -3780,6 +4205,11 @@ convert_string_datum(Datum value, Oid typid)
 #endif
                xfrmstr = (char *) palloc(xfrmlen + 1);
                xfrmlen2 = strxfrm(xfrmstr, val, xfrmlen + 1);
+
+               /*
+                * Some systems (e.g., glibc) can return a smaller value from the
+                * second call than the first; thus the Assert must be <= not ==.
+                */
                Assert(xfrmlen2 <= xfrmlen);
                pfree(val);
                val = xfrmstr;
@@ -3907,31 +4337,17 @@ convert_timevalue_to_scalar(Datum value, Oid typid)
                                 * average month length of 365.25/12.0 days.  Not too
                                 * accurate, but plenty good enough for our purposes.
                                 */
-#ifdef HAVE_INT64_TIMESTAMP
                                return interval->time + interval->day * (double) USECS_PER_DAY +
                                        interval->month * ((DAYS_PER_YEAR / (double) MONTHS_PER_YEAR) * USECS_PER_DAY);
-#else
-                               return interval->time + interval->day * SECS_PER_DAY +
-                                       interval->month * ((DAYS_PER_YEAR / (double) MONTHS_PER_YEAR) * (double) SECS_PER_DAY);
-#endif
                        }
                case RELTIMEOID:
-#ifdef HAVE_INT64_TIMESTAMP
                        return (DatumGetRelativeTime(value) * 1000000.0);
-#else
-                       return DatumGetRelativeTime(value);
-#endif
                case TINTERVALOID:
                        {
                                TimeInterval tinterval = DatumGetTimeInterval(value);
 
-#ifdef HAVE_INT64_TIMESTAMP
                                if (tinterval->status != 0)
                                        return ((tinterval->data[1] - tinterval->data[0]) * 1000000.0);
-#else
-                               if (tinterval->status != 0)
-                                       return tinterval->data[1] - tinterval->data[0];
-#endif
                                return 0;               /* for lack of a better idea */
                        }
                case TIMEOID:
@@ -3941,11 +4357,7 @@ convert_timevalue_to_scalar(Datum value, Oid typid)
                                TimeTzADT  *timetz = DatumGetTimeTzADTP(value);
 
                                /* use GMT-equivalent time */
-#ifdef HAVE_INT64_TIMESTAMP
                                return (double) (timetz->time + (timetz->zone * 1000000.0));
-#else
-                               return (double) (timetz->time + timetz->zone);
-#endif
                        }
        }
 
@@ -3998,7 +4410,7 @@ get_restriction_variable(PlannerInfo *root, List *args, int varRelid,
        right = (Node *) lsecond(args);
 
        /*
-        * Examine both sides.  Note that when varRelid is nonzero, Vars of other
+        * Examine both sides.  Note that when varRelid is nonzero, Vars of other
         * relations will be treated as pseudoconstants.
         */
        examine_variable(root, left, varRelid, vardata);
@@ -4024,7 +4436,7 @@ get_restriction_variable(PlannerInfo *root, List *args, int varRelid,
                return true;
        }
 
-       /* Ooops, clause has wrong structure (probably var op var) */
+       /* Oops, clause has wrong structure (probably var op var) */
        ReleaseVariableStats(*vardata);
        ReleaseVariableStats(rdata);
 
@@ -4089,11 +4501,17 @@ get_join_variables(PlannerInfo *root, List *args, SpecialJoinInfo *sjinfo,
  *     freefunc: pointer to a function to release statsTuple with.
  *     vartype: exposed type of the expression; this should always match
  *             the declared input type of the operator we are estimating for.
- *     atttype, atttypmod: type data to pass to get_attstatsslot().  This is
+ *     atttype, atttypmod: actual type/typmod of the "var" expression.  This is
  *             commonly the same as the exposed type of the variable argument,
  *             but can be different in binary-compatible-type cases.
- *     isunique: TRUE if we were able to match the var to a unique index,
- *             implying its values are unique for this query.
+ *     isunique: TRUE if we were able to match the var to a unique index or a
+ *             single-column DISTINCT clause, implying its values are unique for
+ *             this query.  (Caution: this should be trusted for statistical
+ *             purposes only, since we do not check indimmediate nor verify that
+ *             the exact same definition of equality applies.)
+ *     acl_ok: TRUE if current user has permission to read the column(s)
+ *             underlying the pg_statistic entry.  This is consulted by
+ *             statistic_proc_security_check().
  *
  * Caller is responsible for doing ReleaseVariableStats() before exiting.
  */
@@ -4124,53 +4542,23 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
                (varRelid == 0 || varRelid == ((Var *) basenode)->varno))
        {
                Var                *var = (Var *) basenode;
-               RangeTblEntry *rte;
 
+               /* Set up result fields other than the stats tuple */
                vardata->var = basenode;        /* return Var without relabeling */
                vardata->rel = find_base_rel(root, var->varno);
                vardata->atttype = var->vartype;
                vardata->atttypmod = var->vartypmod;
                vardata->isunique = has_unique_index(vardata->rel, var->varattno);
 
-               rte = root->simple_rte_array[var->varno];
-
-               if (get_relation_stats_hook &&
-                       (*get_relation_stats_hook) (root, rte, var->varattno, vardata))
-               {
-                       /*
-                        * The hook took control of acquiring a stats tuple.  If it did
-                        * supply a tuple, it'd better have supplied a freefunc.
-                        */
-                       if (HeapTupleIsValid(vardata->statsTuple) &&
-                               !vardata->freefunc)
-                               elog(ERROR, "no function provided to release variable stats with");
-               }
-               else if (rte->rtekind == RTE_RELATION)
-               {
-                       vardata->statsTuple = SearchSysCache3(STATRELATTINH,
-                                                                                               ObjectIdGetDatum(rte->relid),
-                                                                                               Int16GetDatum(var->varattno),
-                                                                                                 BoolGetDatum(rte->inh));
-                       vardata->freefunc = ReleaseSysCache;
-               }
-               else
-               {
-                       /*
-                        * XXX This means the Var comes from a JOIN or sub-SELECT. Later
-                        * add code to dig down into the join etc and see if we can trace
-                        * the variable to something with stats.  (But beware of
-                        * sub-SELECTs with DISTINCT/GROUP BY/etc.      Perhaps there are no
-                        * cases where this would really be useful, because we'd have
-                        * flattened the subselect if it is??)
-                        */
-               }
+               /* Try to locate some stats */
+               examine_simple_variable(root, var, vardata);
 
                return;
        }
 
        /*
         * Okay, it's a more complicated expression.  Determine variable
-        * membership.  Note that when varRelid isn't zero, only vars of that
+        * membership.  Note that when varRelid isn't zero, only vars of that
         * relation are considered "real" vars.
         */
        varnos = pull_varnos(basenode);
@@ -4219,13 +4607,13 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
        if (onerel)
        {
                /*
-                * We have an expression in vars of a single relation.  Try to match
+                * We have an expression in vars of a single relation.  Try to match
                 * it to expressional index columns, in hopes of finding some
                 * statistics.
                 *
                 * XXX it's conceivable that there are multiple matches with different
                 * index opfamilies; if so, we need to pick one that matches the
-                * operator we are estimating for.      FIXME later.
+                * operator we are estimating for.  FIXME later.
                 */
                ListCell   *ilist;
 
@@ -4292,6 +4680,30 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
                                                                                                Int16GetDatum(pos + 1),
                                                                                                BoolGetDatum(false));
                                                        vardata->freefunc = ReleaseSysCache;
+
+                                                       if (HeapTupleIsValid(vardata->statsTuple))
+                                                       {
+                                                               /* Get index's table for permission check */
+                                                               RangeTblEntry *rte;
+
+                                                               rte = planner_rt_fetch(index->rel->relid, root);
+                                                               Assert(rte->rtekind == RTE_RELATION);
+
+                                                               /*
+                                                                * For simplicity, we insist on the whole
+                                                                * table being selectable, rather than trying
+                                                                * to identify which column(s) the index
+                                                                * depends on.
+                                                                */
+                                                               vardata->acl_ok =
+                                                                       (pg_class_aclcheck(rte->relid, GetUserId(),
+                                                                                                ACL_SELECT) == ACLCHECK_OK);
+                                                       }
+                                                       else
+                                                       {
+                                                               /* suppress leakproofness checks later */
+                                                               vardata->acl_ok = true;
+                                                       }
                                                }
                                                if (vardata->statsTuple)
                                                        break;
@@ -4305,25 +4717,225 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
        }
 }
 
+/*
+ * examine_simple_variable
+ *             Handle a simple Var for examine_variable
+ *
+ * This is split out as a subroutine so that we can recurse to deal with
+ * Vars referencing subqueries.
+ *
+ * We already filled in all the fields of *vardata except for the stats tuple.
+ */
+static void
+examine_simple_variable(PlannerInfo *root, Var *var,
+                                               VariableStatData *vardata)
+{
+       RangeTblEntry *rte = root->simple_rte_array[var->varno];
+
+       Assert(IsA(rte, RangeTblEntry));
+
+       if (get_relation_stats_hook &&
+               (*get_relation_stats_hook) (root, rte, var->varattno, vardata))
+       {
+               /*
+                * The hook took control of acquiring a stats tuple.  If it did supply
+                * a tuple, it'd better have supplied a freefunc.
+                */
+               if (HeapTupleIsValid(vardata->statsTuple) &&
+                       !vardata->freefunc)
+                       elog(ERROR, "no function provided to release variable stats with");
+       }
+       else if (rte->rtekind == RTE_RELATION)
+       {
+               /*
+                * Plain table or parent of an inheritance appendrel, so look up the
+                * column in pg_statistic
+                */
+               vardata->statsTuple = SearchSysCache3(STATRELATTINH,
+                                                                                         ObjectIdGetDatum(rte->relid),
+                                                                                         Int16GetDatum(var->varattno),
+                                                                                         BoolGetDatum(rte->inh));
+               vardata->freefunc = ReleaseSysCache;
+
+               if (HeapTupleIsValid(vardata->statsTuple))
+               {
+                       /* check if user has permission to read this column */
+                       vardata->acl_ok =
+                               (pg_class_aclcheck(rte->relid, GetUserId(),
+                                                                  ACL_SELECT) == ACLCHECK_OK) ||
+                               (pg_attribute_aclcheck(rte->relid, var->varattno, GetUserId(),
+                                                                          ACL_SELECT) == ACLCHECK_OK);
+               }
+               else
+               {
+                       /* suppress any possible leakproofness checks later */
+                       vardata->acl_ok = true;
+               }
+       }
+       else if (rte->rtekind == RTE_SUBQUERY && !rte->inh)
+       {
+               /*
+                * Plain subquery (not one that was converted to an appendrel).
+                */
+               Query      *subquery = rte->subquery;
+               RelOptInfo *rel;
+               TargetEntry *ste;
+
+               /*
+                * Punt if it's a whole-row var rather than a plain column reference.
+                */
+               if (var->varattno == InvalidAttrNumber)
+                       return;
+
+               /*
+                * Punt if subquery uses set operations or GROUP BY, as these will
+                * mash underlying columns' stats beyond recognition.  (Set ops are
+                * particularly nasty; if we forged ahead, we would return stats
+                * relevant to only the leftmost subselect...)  DISTINCT is also
+                * problematic, but we check that later because there is a possibility
+                * of learning something even with it.
+                */
+               if (subquery->setOperations ||
+                       subquery->groupClause)
+                       return;
+
+               /*
+                * OK, fetch RelOptInfo for subquery.  Note that we don't change the
+                * rel returned in vardata, since caller expects it to be a rel of the
+                * caller's query level.  Because we might already be recursing, we
+                * can't use that rel pointer either, but have to look up the Var's
+                * rel afresh.
+                */
+               rel = find_base_rel(root, var->varno);
+
+               /* If the subquery hasn't been planned yet, we have to punt */
+               if (rel->subroot == NULL)
+                       return;
+               Assert(IsA(rel->subroot, PlannerInfo));
+
+               /*
+                * Switch our attention to the subquery as mangled by the planner. It
+                * was okay to look at the pre-planning version for the tests above,
+                * but now we need a Var that will refer to the subroot's live
+                * RelOptInfos.  For instance, if any subquery pullup happened during
+                * planning, Vars in the targetlist might have gotten replaced, and we
+                * need to see the replacement expressions.
+                */
+               subquery = rel->subroot->parse;
+               Assert(IsA(subquery, Query));
+
+               /* Get the subquery output expression referenced by the upper Var */
+               ste = get_tle_by_resno(subquery->targetList, var->varattno);
+               if (ste == NULL || ste->resjunk)
+                       elog(ERROR, "subquery %s does not have attribute %d",
+                                rte->eref->aliasname, var->varattno);
+               var = (Var *) ste->expr;
+
+               /*
+                * If subquery uses DISTINCT, we can't make use of any stats for the
+                * variable ... but, if it's the only DISTINCT column, we are entitled
+                * to consider it unique.  We do the test this way so that it works
+                * for cases involving DISTINCT ON.
+                */
+               if (subquery->distinctClause)
+               {
+                       if (list_length(subquery->distinctClause) == 1 &&
+                               targetIsInSortList(ste, InvalidOid, subquery->distinctClause))
+                               vardata->isunique = true;
+                       /* cannot go further */
+                       return;
+               }
+
+               /*
+                * If the sub-query originated from a view with the security_barrier
+                * attribute, we must not look at the variable's statistics, though it
+                * seems all right to notice the existence of a DISTINCT clause. So
+                * stop here.
+                *
+                * This is probably a harsher restriction than necessary; it's
+                * certainly OK for the selectivity estimator (which is a C function,
+                * and therefore omnipotent anyway) to look at the statistics.  But
+                * many selectivity estimators will happily *invoke the operator
+                * function* to try to work out a good estimate - and that's not OK.
+                * So for now, don't dig down for stats.
+                */
+               if (rte->security_barrier)
+                       return;
+
+               /* Can only handle a simple Var of subquery's query level */
+               if (var && IsA(var, Var) &&
+                       var->varlevelsup == 0)
+               {
+                       /*
+                        * OK, recurse into the subquery.  Note that the original setting
+                        * of vardata->isunique (which will surely be false) is left
+                        * unchanged in this situation.  That's what we want, since even
+                        * if the underlying column is unique, the subquery may have
+                        * joined to other tables in a way that creates duplicates.
+                        */
+                       examine_simple_variable(rel->subroot, var, vardata);
+               }
+       }
+       else
+       {
+               /*
+                * Otherwise, the Var comes from a FUNCTION, VALUES, or CTE RTE.  (We
+                * won't see RTE_JOIN here because join alias Vars have already been
+                * flattened.)  There's not much we can do with function outputs, but
+                * maybe someday try to be smarter about VALUES and/or CTEs.
+                */
+       }
+}
+
+/*
+ * Check whether it is permitted to call func_oid passing some of the
+ * pg_statistic data in vardata.  We allow this either if the user has SELECT
+ * privileges on the table or column underlying the pg_statistic data or if
+ * the function is marked leak-proof.
+ */
+bool
+statistic_proc_security_check(VariableStatData *vardata, Oid func_oid)
+{
+       if (vardata->acl_ok)
+               return true;
+
+       if (!OidIsValid(func_oid))
+               return false;
+
+       if (get_func_leakproof(func_oid))
+               return true;
+
+       ereport(DEBUG2,
+                       (errmsg_internal("not using statistics because function \"%s\" is not leak-proof",
+                                                        get_func_name(func_oid))));
+       return false;
+}
+
 /*
  * get_variable_numdistinct
  *       Estimate the number of distinct values of a variable.
  *
  * vardata: results of examine_variable
+ * *isdefault: set to TRUE if the result is a default rather than based on
+ * anything meaningful.
  *
- * NB: be careful to produce an integral result, since callers may compare
- * the result to exact integer counts.
+ * NB: be careful to produce a positive integral result, since callers may
+ * compare the result to exact integer counts, or might divide by it.
  */
 double
-get_variable_numdistinct(VariableStatData *vardata)
+get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
 {
        double          stadistinct;
+       double          stanullfrac = 0.0;
        double          ntuples;
 
+       *isdefault = false;
+
        /*
-        * Determine the stadistinct value to use.      There are cases where we can
+        * Determine the stadistinct value to use.  There are cases where we can
         * get an estimate even without a pg_statistic entry, or can get a better
-        * value than is in pg_statistic.
+        * value than is in pg_statistic.  Grab stanullfrac too if we can find it
+        * (otherwise, assume no nulls, for lack of any better idea).
         */
        if (HeapTupleIsValid(vardata->statsTuple))
        {
@@ -4332,6 +4944,7 @@ get_variable_numdistinct(VariableStatData *vardata)
 
                stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
                stadistinct = stats->stadistinct;
+               stanullfrac = stats->stanullfrac;
        }
        else if (vardata->vartype == BOOLOID)
        {
@@ -4355,7 +4968,7 @@ get_variable_numdistinct(VariableStatData *vardata)
                        {
                                case ObjectIdAttributeNumber:
                                case SelfItemPointerAttributeNumber:
-                                       stadistinct = -1.0; /* unique */
+                                       stadistinct = -1.0; /* unique (and all non null) */
                                        break;
                                case TableOidAttributeNumber:
                                        stadistinct = 1.0;      /* only 1 value */
@@ -4374,42 +4987,51 @@ get_variable_numdistinct(VariableStatData *vardata)
        }
 
        /*
-        * If there is a unique index for the variable, assume it is unique no
-        * matter what pg_statistic says; the statistics could be out of date, or
-        * we might have found a partial unique index that proves the var is
-        * unique for this query.
+        * If there is a unique index or DISTINCT clause for the variable, assume
+        * it is unique no matter what pg_statistic says; the statistics could be
+        * out of date, or we might have found a partial unique index that proves
+        * the var is unique for this query.  However, we'd better still believe
+        * the null-fraction statistic.
         */
        if (vardata->isunique)
-               stadistinct = -1.0;
+               stadistinct = -1.0 * (1.0 - stanullfrac);
 
        /*
         * If we had an absolute estimate, use that.
         */
        if (stadistinct > 0.0)
-               return stadistinct;
+               return clamp_row_est(stadistinct);
 
        /*
         * Otherwise we need to get the relation size; punt if not available.
         */
        if (vardata->rel == NULL)
+       {
+               *isdefault = true;
                return DEFAULT_NUM_DISTINCT;
+       }
        ntuples = vardata->rel->tuples;
        if (ntuples <= 0.0)
+       {
+               *isdefault = true;
                return DEFAULT_NUM_DISTINCT;
+       }
 
        /*
         * If we had a relative estimate, use that.
         */
        if (stadistinct < 0.0)
-               return floor((-stadistinct * ntuples) + 0.5);
+               return clamp_row_est(-stadistinct * ntuples);
 
        /*
         * With no data, estimate ndistinct = ntuples if the table is small, else
-        * use default.
+        * use default.  We use DEFAULT_NUM_DISTINCT as the cutoff for "small" so
+        * that the behavior isn't discontinuous.
         */
        if (ntuples < DEFAULT_NUM_DISTINCT)
-               return ntuples;
+               return clamp_row_est(ntuples);
 
+       *isdefault = true;
        return DEFAULT_NUM_DISTINCT;
 }
 
@@ -4431,13 +5053,13 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop,
        bool            have_data = false;
        int16           typLen;
        bool            typByVal;
-       Datum      *values;
-       int                     nvalues;
+       Oid                     opfuncoid;
+       AttStatsSlot sslot;
        int                     i;
 
        /*
         * XXX It's very tempting to try to use the actual column min and max, if
-        * we can get them relatively-cheaply with an index probe.      However, since
+        * we can get them relatively-cheaply with an index probe.  However, since
         * this function is called many times during join planning, that could
         * have unpleasant effects on planning speed.  Need more investigation
         * before enabling this.
@@ -4453,6 +5075,17 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop,
                return false;
        }
 
+       /*
+        * If we can't apply the sortop to the stats data, just fail.  In
+        * principle, if there's a histogram and no MCVs, we could return the
+        * histogram endpoints without ever applying the sortop ... but it's
+        * probably not worth trying, because whatever the caller wants to do with
+        * the endpoints would likely fail the security check too.
+        */
+       if (!statistic_proc_security_check(vardata,
+                                                                          (opfuncoid = get_opcode(sortop))))
+               return false;
+
        get_typlenbyval(vardata->atttype, &typLen, &typByVal);
 
        /*
@@ -4462,29 +5095,23 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop,
         * the one we want, fail --- this suggests that there is data we can't
         * use.
         */
-       if (get_attstatsslot(vardata->statsTuple,
-                                                vardata->atttype, vardata->atttypmod,
+       if (get_attstatsslot(&sslot, vardata->statsTuple,
                                                 STATISTIC_KIND_HISTOGRAM, sortop,
-                                                NULL,
-                                                &values, &nvalues,
-                                                NULL, NULL))
+                                                ATTSTATSSLOT_VALUES))
        {
-               if (nvalues > 0)
+               if (sslot.nvalues > 0)
                {
-                       tmin = datumCopy(values[0], typByVal, typLen);
-                       tmax = datumCopy(values[nvalues - 1], typByVal, typLen);
+                       tmin = datumCopy(sslot.values[0], typByVal, typLen);
+                       tmax = datumCopy(sslot.values[sslot.nvalues - 1], typByVal, typLen);
                        have_data = true;
                }
-               free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0);
+               free_attstatsslot(&sslot);
        }
-       else if (get_attstatsslot(vardata->statsTuple,
-                                                         vardata->atttype, vardata->atttypmod,
+       else if (get_attstatsslot(&sslot, vardata->statsTuple,
                                                          STATISTIC_KIND_HISTOGRAM, InvalidOid,
-                                                         NULL,
-                                                         &values, &nvalues,
-                                                         NULL, NULL))
+                                                         0))
        {
-               free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0);
+               free_attstatsslot(&sslot);
                return false;
        }
 
@@ -4494,39 +5121,36 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop,
         * the MCVs.  However, usually the MCVs will not be the extreme values, so
         * avoid unnecessary data copying.
         */
-       if (get_attstatsslot(vardata->statsTuple,
-                                                vardata->atttype, vardata->atttypmod,
+       if (get_attstatsslot(&sslot, vardata->statsTuple,
                                                 STATISTIC_KIND_MCV, InvalidOid,
-                                                NULL,
-                                                &values, &nvalues,
-                                                NULL, NULL))
+                                                ATTSTATSSLOT_VALUES))
        {
                bool            tmin_is_mcv = false;
                bool            tmax_is_mcv = false;
                FmgrInfo        opproc;
 
-               fmgr_info(get_opcode(sortop), &opproc);
+               fmgr_info(opfuncoid, &opproc);
 
-               for (i = 0; i < nvalues; i++)
+               for (i = 0; i < sslot.nvalues; i++)
                {
                        if (!have_data)
                        {
-                               tmin = tmax = values[i];
+                               tmin = tmax = sslot.values[i];
                                tmin_is_mcv = tmax_is_mcv = have_data = true;
                                continue;
                        }
                        if (DatumGetBool(FunctionCall2Coll(&opproc,
                                                                                           DEFAULT_COLLATION_OID,
-                                                                                          values[i], tmin)))
+                                                                                          sslot.values[i], tmin)))
                        {
-                               tmin = values[i];
+                               tmin = sslot.values[i];
                                tmin_is_mcv = true;
                        }
                        if (DatumGetBool(FunctionCall2Coll(&opproc,
                                                                                           DEFAULT_COLLATION_OID,
-                                                                                          tmax, values[i])))
+                                                                                          tmax, sslot.values[i])))
                        {
-                               tmax = values[i];
+                               tmax = sslot.values[i];
                                tmax_is_mcv = true;
                        }
                }
@@ -4534,7 +5158,7 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop,
                        tmin = datumCopy(tmin, typByVal, typLen);
                if (tmax_is_mcv)
                        tmax = datumCopy(tmax, typByVal, typLen);
-               free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0);
+               free_attstatsslot(&sslot);
        }
 
        *min = tmin;
@@ -4640,6 +5264,7 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
                        HeapTuple       tup;
                        Datum           values[INDEX_MAX_KEYS];
                        bool            isnull[INDEX_MAX_KEYS];
+                       SnapshotData SnapshotDirty;
 
                        estate = CreateExecutorState();
                        econtext = GetPerTupleExprContext(estate);
@@ -4662,6 +5287,7 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
                        slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRel));
                        econtext->ecxt_scantuple = slot;
                        get_typlenbyval(vardata->atttype, &typLen, &typByVal);
+                       InitDirtySnapshot(SnapshotDirty);
 
                        /* set up an IS NOT NULL scan key so that we ignore nulls */
                        ScanKeyEntryInitialize(&scankeys[0],
@@ -4678,7 +5304,22 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
                        /* If min is requested ... */
                        if (min)
                        {
-                               index_scan = index_beginscan(heapRel, indexRel, SnapshotNow,
+                               /*
+                                * In principle, we should scan the index with our current
+                                * active snapshot, which is the best approximation we've got
+                                * to what the query will see when executed.  But that won't
+                                * be exact if a new snap is taken before running the query,
+                                * and it can be very expensive if a lot of uncommitted rows
+                                * exist at the end of the index (because we'll laboriously
+                                * fetch each one and reject it).  What seems like a good
+                                * compromise is to use SnapshotDirty.  That will accept
+                                * uncommitted rows, and thus avoid fetching multiple heap
+                                * tuples in this scenario.  On the other hand, it will reject
+                                * known-dead rows, and thus not give a bogus answer when the
+                                * extreme value has been deleted; that case motivates not
+                                * using SnapshotAny here.
+                                */
+                               index_scan = index_beginscan(heapRel, indexRel, &SnapshotDirty,
                                                                                         1, 0);
                                index_rescan(index_scan, scankeys, 1, NULL, 0);
 
@@ -4710,7 +5351,7 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
                        /* If max is requested, and we didn't find the index is empty */
                        if (max && have_data)
                        {
-                               index_scan = index_beginscan(heapRel, indexRel, SnapshotNow,
+                               index_scan = index_beginscan(heapRel, indexRel, &SnapshotDirty,
                                                                                         1, 0);
                                index_rescan(index_scan, scankeys, 1, NULL, 0);
 
@@ -4756,6 +5397,37 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
        return have_data;
 }
 
+/*
+ * find_join_input_rel
+ *             Look up the input relation for a join.
+ *
+ * We assume that the input relation's RelOptInfo must have been constructed
+ * already.
+ */
+static RelOptInfo *
+find_join_input_rel(PlannerInfo *root, Relids relids)
+{
+       RelOptInfo *rel = NULL;
+
+       switch (bms_membership(relids))
+       {
+               case BMS_EMPTY_SET:
+                       /* should not happen */
+                       break;
+               case BMS_SINGLETON:
+                       rel = find_base_rel(root, bms_singleton_member(relids));
+                       break;
+               case BMS_MULTIPLE:
+                       rel = find_join_rel(root, relids);
+                       break;
+       }
+
+       if (rel == NULL)
+               elog(ERROR, "could not find RelOptInfo for given relids");
+
+       return rel;
+}
+
 
 /*-------------------------------------------------------------------------
  *
@@ -4778,7 +5450,7 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
 /*
  * Check whether char is a letter (and, hence, subject to case-folding)
  *
- * In multibyte character sets, we can't use isalpha, and it does not seem
+ * In multibyte character sets or with ICU, we can't use isalpha, and it does not seem
  * worth trying to convert to wchar_t to use iswalpha.  Instead, just assume
  * any multibyte char is potentially case-varying.
  */
@@ -4790,9 +5462,11 @@ pattern_char_isalpha(char c, bool is_multibyte,
                return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
        else if (is_multibyte && IS_HIGHBIT_SET(c))
                return true;
+       else if (locale && locale->provider == COLLPROVIDER_ICU)
+               return IS_HIGHBIT_SET(c) ? true : false;
 #ifdef HAVE_LOCALE_T
-       else if (locale)
-               return isalpha_l((unsigned char) c, locale);
+       else if (locale && locale->provider == COLLPROVIDER_LIBC)
+               return isalpha_l((unsigned char) c, locale->info.lt);
 #endif
        else
                return isalpha((unsigned char) c);
@@ -4803,9 +5477,9 @@ pattern_char_isalpha(char c, bool is_multibyte,
  *
  * *prefix is set to a palloc'd prefix string (in the form of a Const node),
  *     or to NULL if no fixed prefix exists for the pattern.
- * *rest is set to a palloc'd Const representing the remainder of the pattern
- *     after the portion describing the fixed prefix.
- * Each of these has the same type (TEXT or BYTEA) as the given pattern Const.
+ * If rest_selec is not NULL, *rest_selec is set to an estimate of the
+ *     selectivity of the remainder of the pattern (without any fixed prefix).
+ * The prefix Const has the same type (TEXT or BYTEA) as the input pattern.
  *
  * The return value distinguishes no fixed prefix, a partial prefix,
  * or an exact-match-only pattern.
@@ -4813,17 +5487,16 @@ pattern_char_isalpha(char c, bool is_multibyte,
 
 static Pattern_Prefix_Status
 like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
-                                 Const **prefix_const, Const **rest_const)
+                                 Const **prefix_const, Selectivity *rest_selec)
 {
        char       *match;
        char       *patt;
        int                     pattlen;
-       char       *rest;
        Oid                     typeid = patt_const->consttype;
        int                     pos,
                                match_pos;
        bool            is_multibyte = (pg_database_encoding_max_length() > 1);
-       pg_locale_t     locale = 0;
+       pg_locale_t locale = 0;
        bool            locale_is_c = false;
 
        /* the right-hand const is type text or bytea */
@@ -4834,7 +5507,7 @@ like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
                if (typeid == BYTEAOID)
                        ereport(ERROR,
                                        (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-                  errmsg("case insensitive matching not supported on type bytea")));
+                       errmsg("case insensitive matching not supported on type bytea")));
 
                /* If case-insensitive, we need locale info */
                if (lc_ctype_is_c(collation))
@@ -4863,13 +5536,12 @@ like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
        }
        else
        {
-               bytea      *bstr = DatumGetByteaP(patt_const->constvalue);
+               bytea      *bstr = DatumGetByteaPP(patt_const->constvalue);
 
-               pattlen = VARSIZE(bstr) - VARHDRSZ;
+               pattlen = VARSIZE_ANY_EXHDR(bstr);
                patt = (char *) palloc(pattlen);
-               memcpy(patt, VARDATA(bstr), pattlen);
-               if ((Pointer) bstr != DatumGetPointer(patt_const->constvalue))
-                       pfree(bstr);
+               memcpy(patt, VARDATA_ANY(bstr), pattlen);
+               Assert((Pointer) bstr == DatumGetPointer(patt_const->constvalue));
        }
 
        match = palloc(pattlen + 1);
@@ -4891,25 +5563,22 @@ like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
 
                /* Stop if case-varying character (it's sort of a wildcard) */
                if (case_insensitive &&
-                       pattern_char_isalpha(patt[pos], is_multibyte, locale, locale_is_c))
+                 pattern_char_isalpha(patt[pos], is_multibyte, locale, locale_is_c))
                        break;
 
                match[match_pos++] = patt[pos];
        }
 
        match[match_pos] = '\0';
-       rest = &patt[pos];
 
        if (typeid != BYTEAOID)
-       {
                *prefix_const = string_to_const(match, typeid);
-               *rest_const = string_to_const(rest, typeid);
-       }
        else
-       {
                *prefix_const = string_to_bytea_const(match, match_pos);
-               *rest_const = string_to_bytea_const(rest, pattlen - pos);
-       }
+
+       if (rest_selec != NULL)
+               *rest_selec = like_selectivity(&patt[pos], pattlen - pos,
+                                                                          case_insensitive);
 
        pfree(patt);
        pfree(match);
@@ -4926,20 +5595,11 @@ like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
 
 static Pattern_Prefix_Status
 regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
-                                  Const **prefix_const, Const **rest_const)
+                                  Const **prefix_const, Selectivity *rest_selec)
 {
-       char       *match;
-       int                     pos,
-                               match_pos,
-                               prev_pos,
-                               prev_match_pos;
-       bool            have_leading_paren;
-       char       *patt;
-       char       *rest;
        Oid                     typeid = patt_const->consttype;
-       bool            is_multibyte = (pg_database_encoding_max_length() > 1);
-       pg_locale_t     locale = 0;
-       bool            locale_is_c = false;
+       char       *prefix;
+       bool            exact;
 
        /*
         * Should be unnecessary, there are no bytea regex operators defined. As
@@ -4951,201 +5611,79 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
                                (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
                 errmsg("regular-expression matching not supported on type bytea")));
 
-       if (case_insensitive)
+       /* Use the regexp machinery to extract the prefix, if any */
+       prefix = regexp_fixed_prefix(DatumGetTextPP(patt_const->constvalue),
+                                                                case_insensitive, collation,
+                                                                &exact);
+
+       if (prefix == NULL)
        {
-               /* If case-insensitive, we need locale info */
-               if (lc_ctype_is_c(collation))
-                       locale_is_c = true;
-               else if (collation != DEFAULT_COLLATION_OID)
+               *prefix_const = NULL;
+
+               if (rest_selec != NULL)
                {
-                       if (!OidIsValid(collation))
-                       {
-                               /*
-                                * This typically means that the parser could not resolve a
-                                * conflict of implicit collations, so report it that way.
-                                */
-                               ereport(ERROR,
-                                               (errcode(ERRCODE_INDETERMINATE_COLLATION),
-                                                errmsg("could not determine which collation to use for regular expression"),
-                                                errhint("Use the COLLATE clause to set the collation explicitly.")));
-                       }
-                       locale = pg_newlocale_from_collation(collation);
-               }
-       }
-
-       /* the right-hand const is type text for all of these */
-       patt = TextDatumGetCString(patt_const->constvalue);
-
-       /*
-        * Check for ARE director prefix.  It's worth our trouble to recognize
-        * this because similar_escape() used to use it, and some other code might
-        * still use it, to force ARE mode.
-        */
-       pos = 0;
-       if (strncmp(patt, "***:", 4) == 0)
-               pos = 4;
-
-       /* Pattern must be anchored left */
-       if (patt[pos] != '^')
-       {
-               rest = patt;
-
-               *prefix_const = NULL;
-               *rest_const = string_to_const(rest, typeid);
-
-               return Pattern_Prefix_None;
-       }
-       pos++;
+                       char       *patt = TextDatumGetCString(patt_const->constvalue);
 
-       /*
-        * If '|' is present in pattern, then there may be multiple alternatives
-        * for the start of the string.  (There are cases where this isn't so, for
-        * instance if the '|' is inside parens, but detecting that reliably is
-        * too hard.)
-        */
-       if (strchr(patt + pos, '|') != NULL)
-       {
-               rest = patt;
-
-               *prefix_const = NULL;
-               *rest_const = string_to_const(rest, typeid);
+                       *rest_selec = regex_selectivity(patt, strlen(patt),
+                                                                                       case_insensitive,
+                                                                                       0);
+                       pfree(patt);
+               }
 
                return Pattern_Prefix_None;
        }
 
-       /* OK, allocate space for pattern */
-       match = palloc(strlen(patt) + 1);
-       prev_match_pos = match_pos = 0;
-
-       /*
-        * We special-case the syntax '^(...)$' because psql uses it.  But beware:
-        * sequences beginning "(?" are not what they seem, unless they're "(?:".
-        * (We must recognize that because of similar_escape().)
-        */
-       have_leading_paren = false;
-       if (patt[pos] == '(' &&
-               (patt[pos + 1] != '?' || patt[pos + 2] == ':'))
-       {
-               have_leading_paren = true;
-               pos += (patt[pos + 1] != '?' ? 1 : 3);
-       }
+       *prefix_const = string_to_const(prefix, typeid);
 
-       /* Scan remainder of pattern */
-       prev_pos = pos;
-       while (patt[pos])
+       if (rest_selec != NULL)
        {
-               int                     len;
-
-               /*
-                * Check for characters that indicate multiple possible matches here.
-                * Also, drop out at ')' or '$' so the termination test works right.
-                */
-               if (patt[pos] == '.' ||
-                       patt[pos] == '(' ||
-                       patt[pos] == ')' ||
-                       patt[pos] == '[' ||
-                       patt[pos] == '^' ||
-                       patt[pos] == '$')
-                       break;
-
-               /* Stop if case-varying character (it's sort of a wildcard) */
-               if (case_insensitive &&
-                       pattern_char_isalpha(patt[pos], is_multibyte, locale, locale_is_c))
-                       break;
-
-               /*
-                * Check for quantifiers.  Except for +, this means the preceding
-                * character is optional, so we must remove it from the prefix too!
-                */
-               if (patt[pos] == '*' ||
-                       patt[pos] == '?' ||
-                       patt[pos] == '{')
+               if (exact)
                {
-                       match_pos = prev_match_pos;
-                       pos = prev_pos;
-                       break;
+                       /* Exact match, so there's no additional selectivity */
+                       *rest_selec = 1.0;
                }
-               if (patt[pos] == '+')
+               else
                {
-                       pos = prev_pos;
-                       break;
-               }
+                       char       *patt = TextDatumGetCString(patt_const->constvalue);
 
-               /*
-                * Normally, backslash quotes the next character.  But in AREs,
-                * backslash followed by alphanumeric is an escape, not a quoted
-                * character.  Must treat it as having multiple possible matches.
-                * Note: since only ASCII alphanumerics are escapes, we don't have to
-                * be paranoid about multibyte or collations here.
-                */
-               if (patt[pos] == '\\')
-               {
-                       if (isalnum((unsigned char) patt[pos + 1]))
-                               break;
-                       pos++;
-                       if (patt[pos] == '\0')
-                               break;
+                       *rest_selec = regex_selectivity(patt, strlen(patt),
+                                                                                       case_insensitive,
+                                                                                       strlen(prefix));
+                       pfree(patt);
                }
-               /* save position in case we need to back up on next loop cycle */
-               prev_match_pos = match_pos;
-               prev_pos = pos;
-               /* must use encoding-aware processing here */
-               len = pg_mblen(&patt[pos]);
-               memcpy(&match[match_pos], &patt[pos], len);
-               match_pos += len;
-               pos += len;
        }
 
-       match[match_pos] = '\0';
-       rest = &patt[pos];
-
-       if (have_leading_paren && patt[pos] == ')')
-               pos++;
-
-       if (patt[pos] == '$' && patt[pos + 1] == '\0')
-       {
-               rest = &patt[pos + 1];
-
-               *prefix_const = string_to_const(match, typeid);
-               *rest_const = string_to_const(rest, typeid);
-
-               pfree(patt);
-               pfree(match);
+       pfree(prefix);
 
+       if (exact)
                return Pattern_Prefix_Exact;    /* pattern specifies exact match */
-       }
-
-       *prefix_const = string_to_const(match, typeid);
-       *rest_const = string_to_const(rest, typeid);
-
-       pfree(patt);
-       pfree(match);
-
-       if (match_pos > 0)
+       else
                return Pattern_Prefix_Partial;
-
-       return Pattern_Prefix_None;
 }
 
 Pattern_Prefix_Status
 pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Oid collation,
-                                        Const **prefix, Const **rest)
+                                        Const **prefix, Selectivity *rest_selec)
 {
        Pattern_Prefix_Status result;
 
        switch (ptype)
        {
                case Pattern_Type_Like:
-                       result = like_fixed_prefix(patt, false, collation, prefix, rest);
+                       result = like_fixed_prefix(patt, false, collation,
+                                                                          prefix, rest_selec);
                        break;
                case Pattern_Type_Like_IC:
-                       result = like_fixed_prefix(patt, true, collation, prefix, rest);
+                       result = like_fixed_prefix(patt, true, collation,
+                                                                          prefix, rest_selec);
                        break;
                case Pattern_Type_Regex:
-                       result = regex_fixed_prefix(patt, false, collation, prefix, rest);
+                       result = regex_fixed_prefix(patt, false, collation,
+                                                                               prefix, rest_selec);
                        break;
                case Pattern_Type_Regex_IC:
-                       result = regex_fixed_prefix(patt, true, collation, prefix, rest);
+                       result = regex_fixed_prefix(patt, true, collation,
+                                                                               prefix, rest_selec);
                        break;
                default:
                        elog(ERROR, "unrecognized ptype: %d", (int) ptype);
@@ -5166,7 +5704,7 @@ pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Oid collation,
  * together with info about MCVs and NULLs.
  *
  * We use the >= and < operators from the specified btree opfamily to do the
- * estimation. The given variable and Const must be of the associated
+ * estimation.  The given variable and Const must be of the associated
  * datatype.
  *
  * XXX Note: we make use of the upper bound to estimate operator selectivity
@@ -5225,7 +5763,7 @@ prefix_selectivity(PlannerInfo *root, VariableStatData *vardata,
 
                /*
                 * Merge the two selectivities in the same way as for a range query
-                * (see clauselist_selectivity()).      Note that we don't need to worry
+                * (see clauselist_selectivity()).  Note that we don't need to worry
                 * about double-exclusion of nulls, since ineq_histogram_selectivity
                 * doesn't count those anyway.
                 */
@@ -5260,7 +5798,8 @@ prefix_selectivity(PlannerInfo *root, VariableStatData *vardata,
 
 /*
  * Estimate the selectivity of a pattern of the specified type.
- * Note that any fixed prefix of the pattern will have been removed already.
+ * Note that any fixed prefix of the pattern will have been removed already,
+ * so actually we may be looking at just a fragment of the pattern.
  *
  * For now, we use a very simplistic approach: fixed characters reduce the
  * selectivity a good deal, character ranges reduce it a little,
@@ -5274,37 +5813,10 @@ prefix_selectivity(PlannerInfo *root, VariableStatData *vardata,
 #define PARTIAL_WILDCARD_SEL 2.0
 
 static Selectivity
-like_selectivity(Const *patt_const, bool case_insensitive)
+like_selectivity(const char *patt, int pattlen, bool case_insensitive)
 {
        Selectivity sel = 1.0;
        int                     pos;
-       Oid                     typeid = patt_const->consttype;
-       char       *patt;
-       int                     pattlen;
-
-       /* the right-hand const is type text or bytea */
-       Assert(typeid == BYTEAOID || typeid == TEXTOID);
-
-       if (typeid == BYTEAOID && case_insensitive)
-               ereport(ERROR,
-                               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-                  errmsg("case insensitive matching not supported on type bytea")));
-
-       if (typeid != BYTEAOID)
-       {
-               patt = TextDatumGetCString(patt_const->constvalue);
-               pattlen = strlen(patt);
-       }
-       else
-       {
-               bytea      *bstr = DatumGetByteaP(patt_const->constvalue);
-
-               pattlen = VARSIZE(bstr) - VARHDRSZ;
-               patt = (char *) palloc(pattlen);
-               memcpy(patt, VARDATA(bstr), pattlen);
-               if ((Pointer) bstr != DatumGetPointer(patt_const->constvalue))
-                       pfree(bstr);
-       }
 
        /* Skip any leading wildcard; it's already factored into initial sel */
        for (pos = 0; pos < pattlen; pos++)
@@ -5334,13 +5846,11 @@ like_selectivity(Const *patt_const, bool case_insensitive)
        /* Could get sel > 1 if multiple wildcards */
        if (sel > 1.0)
                sel = 1.0;
-
-       pfree(patt);
        return sel;
 }
 
 static Selectivity
-regex_selectivity_sub(char *patt, int pattlen, bool case_insensitive)
+regex_selectivity_sub(const char *patt, int pattlen, bool case_insensitive)
 {
        Selectivity sel = 1.0;
        int                     paren_depth = 0;
@@ -5433,26 +5943,10 @@ regex_selectivity_sub(char *patt, int pattlen, bool case_insensitive)
 }
 
 static Selectivity
-regex_selectivity(Const *patt_const, bool case_insensitive)
+regex_selectivity(const char *patt, int pattlen, bool case_insensitive,
+                                 int fixed_prefix_len)
 {
        Selectivity sel;
-       char       *patt;
-       int                     pattlen;
-       Oid                     typeid = patt_const->consttype;
-
-       /*
-        * Should be unnecessary, there are no bytea regex operators defined. As
-        * such, it should be noted that the rest of this function has *not* been
-        * made safe for binary (possibly NULL containing) strings.
-        */
-       if (typeid == BYTEAOID)
-               ereport(ERROR,
-                               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-                errmsg("regular-expression matching not supported on type bytea")));
-
-       /* the right-hand const is type text for all of these */
-       patt = TextDatumGetCString(patt_const->constvalue);
-       pattlen = strlen(patt);
 
        /* If patt doesn't end with $, consider it to have a trailing wildcard */
        if (pattlen > 0 && patt[pattlen - 1] == '$' &&
@@ -5466,40 +5960,31 @@ regex_selectivity(Const *patt_const, bool case_insensitive)
                /* no trailing $ */
                sel = regex_selectivity_sub(patt, pattlen, case_insensitive);
                sel *= FULL_WILDCARD_SEL;
-               if (sel > 1.0)
-                       sel = 1.0;
        }
+
+       /* If there's a fixed prefix, discount its selectivity */
+       if (fixed_prefix_len > 0)
+               sel /= pow(FIXED_CHAR_SEL, fixed_prefix_len);
+
+       /* Make sure result stays in range */
+       CLAMP_PROBABILITY(sel);
        return sel;
 }
 
-static Selectivity
-pattern_selectivity(Const *patt, Pattern_Type ptype)
-{
-       Selectivity result;
 
-       switch (ptype)
-       {
-               case Pattern_Type_Like:
-                       result = like_selectivity(patt, false);
-                       break;
-               case Pattern_Type_Like_IC:
-                       result = like_selectivity(patt, true);
-                       break;
-               case Pattern_Type_Regex:
-                       result = regex_selectivity(patt, false);
-                       break;
-               case Pattern_Type_Regex_IC:
-                       result = regex_selectivity(patt, true);
-                       break;
-               default:
-                       elog(ERROR, "unrecognized ptype: %d", (int) ptype);
-                       result = 1.0;           /* keep compiler quiet */
-                       break;
-       }
-       return result;
+/*
+ * For bytea, the increment function need only increment the current byte
+ * (there are no multibyte characters to worry about).
+ */
+static bool
+byte_increment(unsigned char *ptr, int len)
+{
+       if (*ptr >= 255)
+               return false;
+       (*ptr)++;
+       return true;
 }
 
-
 /*
  * Try to generate a string greater than the given string or any
  * string it is a prefix of.  If successful, return a palloc'd string
@@ -5515,7 +6000,7 @@ pattern_selectivity(Const *patt, Pattern_Type ptype)
  * that is not a bulletproof guarantee that an extension of the string might
  * not sort after it; an example is that "foo " is less than "foo!", but it
  * is not clear that a "dictionary" sort ordering will consider "foo!" less
- * than "foo bar".     CAUTION: Therefore, this function should be used only for
+ * than "foo bar".  CAUTION: Therefore, this function should be used only for
  * estimation purposes when working in a non-C collation.
  *
  * To try to catch most cases where an extended string might otherwise sort
@@ -5523,13 +6008,23 @@ pattern_selectivity(Const *patt, Pattern_Type ptype)
  * and "9" is seen as largest by the collation, and append that to the given
  * prefix before trying to find a string that compares as larger.
  *
- * If we max out the righthand byte, truncate off the last character
- * and start incrementing the next.  For example, if "z" were the last
- * character in the sort order, then we could produce "foo" as a
- * string greater than "fonz".
+ * To search for a greater string, we repeatedly "increment" the rightmost
+ * character, using an encoding-specific character incrementer function.
+ * When it's no longer possible to increment the last character, we truncate
+ * off that character and start incrementing the next-to-rightmost.
+ * For example, if "z" were the last character in the sort order, then we
+ * could produce "foo" as a string greater than "fonz".
  *
  * This could be rather slow in the worst case, but in most cases we
  * won't have to try more than one or two strings before succeeding.
+ *
+ * Note that it's important for the character incrementer not to be too anal
+ * about producing every possible character code, since in some cases the only
+ * way to get a larger string is to increment a previous character position.
+ * So we don't want to spend too much time trying every possible character
+ * code at the last position.  A good rule of thumb is to be sure that we
+ * don't try more than 256*K values for a K-byte character (and definitely
+ * not 256^K, which is what an exhaustive search would approach).
  */
 Const *
 make_greater_string(const Const *str_const, FmgrInfo *ltproc, Oid collation)
@@ -5539,6 +6034,7 @@ make_greater_string(const Const *str_const, FmgrInfo *ltproc, Oid collation)
        int                     len;
        Datum           cmpstr;
        text       *cmptxt = NULL;
+       mbcharacter_incrementer charinc;
 
        /*
         * Get a modifiable copy of the prefix string in C-string format, and set
@@ -5555,13 +6051,12 @@ make_greater_string(const Const *str_const, FmgrInfo *ltproc, Oid collation)
        }
        else if (datatype == BYTEAOID)
        {
-               bytea      *bstr = DatumGetByteaP(str_const->constvalue);
+               bytea      *bstr = DatumGetByteaPP(str_const->constvalue);
 
-               len = VARSIZE(bstr) - VARHDRSZ;
+               len = VARSIZE_ANY_EXHDR(bstr);
                workstr = (char *) palloc(len);
-               memcpy(workstr, VARDATA(bstr), len);
-               if ((Pointer) bstr != DatumGetPointer(str_const->constvalue))
-                       pfree(bstr);
+               memcpy(workstr, VARDATA_ANY(bstr), len);
+               Assert((Pointer) bstr == DatumGetPointer(str_const->constvalue));
                cmpstr = str_const->constvalue;
        }
        else
@@ -5600,29 +6095,41 @@ make_greater_string(const Const *str_const, FmgrInfo *ltproc, Oid collation)
                }
        }
 
+       /* Select appropriate character-incrementer function */
+       if (datatype == BYTEAOID)
+               charinc = byte_increment;
+       else
+               charinc = pg_database_encoding_character_incrementer();
+
+       /* And search ... */
        while (len > 0)
        {
-               unsigned char *lastchar = (unsigned char *) (workstr + len - 1);
-               unsigned char savelastchar = *lastchar;
+               int                     charlen;
+               unsigned char *lastchar;
+
+               /* Identify the last character --- for bytea, just the last byte */
+               if (datatype == BYTEAOID)
+                       charlen = 1;
+               else
+                       charlen = len - pg_mbcliplen(workstr, len, len - 1);
+               lastchar = (unsigned char *) (workstr + len - charlen);
 
                /*
-                * Try to generate a larger string by incrementing the last byte.
+                * Try to generate a larger string by incrementing the last character
+                * (for BYTEA, we treat each byte as a character).
+                *
+                * Note: the incrementer function is expected to return true if it's
+                * generated a valid-per-the-encoding new character, otherwise false.
+                * The contents of the character on false return are unspecified.
                 */
-               while (*lastchar < (unsigned char) 255)
+               while (charinc(lastchar, charlen))
                {
                        Const      *workstr_const;
 
-                       (*lastchar)++;
-
-                       if (datatype != BYTEAOID)
-                       {
-                               /* do not generate invalid encoding sequences */
-                               if (!pg_verifymbstr(workstr, len, true))
-                                       continue;
-                               workstr_const = string_to_const(workstr, datatype);
-                       }
-                       else
+                       if (datatype == BYTEAOID)
                                workstr_const = string_to_bytea_const(workstr, len);
+                       else
+                               workstr_const = string_to_const(workstr, datatype);
 
                        if (DatumGetBool(FunctionCall2Coll(ltproc,
                                                                                           collation,
@@ -5641,20 +6148,12 @@ make_greater_string(const Const *str_const, FmgrInfo *ltproc, Oid collation)
                        pfree(workstr_const);
                }
 
-               /* restore last byte so we don't confuse pg_mbcliplen */
-               *lastchar = savelastchar;
-
                /*
-                * Truncate off the last character, which might be more than 1 byte,
-                * depending on the character encoding.
+                * No luck here, so truncate off the last character and try to
+                * increment the next one.
                 */
-               if (datatype != BYTEAOID && pg_database_encoding_max_length() > 1)
-                       len = pg_mbcliplen(workstr, len, len - 1);
-               else
-                       len -= 1;
-
-               if (datatype != BYTEAOID)
-                       workstr[len] = '\0';
+               len -= charlen;
+               workstr[len] = '\0';
        }
 
        /* Failed... */
@@ -5750,77 +6249,189 @@ string_to_bytea_const(const char *str, size_t str_len)
  *
  * Index cost estimation functions
  *
- * genericcostestimate is a general-purpose estimator for use when we
- * don't have any better idea about how to estimate.  Index-type-specific
- * knowledge can be incorporated in the type-specific routines.
- *
- * One bit of index-type-specific knowledge we can relatively easily use
- * in genericcostestimate is the estimate of the number of index tuples
- * visited.  If numIndexTuples is not 0 then it is used as the estimate,
- * otherwise we compute a generic estimate.
- *
  *-------------------------------------------------------------------------
  */
 
-static void
+List *
+deconstruct_indexquals(IndexPath *path)
+{
+       List       *result = NIL;
+       IndexOptInfo *index = path->indexinfo;
+       ListCell   *lcc,
+                          *lci;
+
+       forboth(lcc, path->indexquals, lci, path->indexqualcols)
+       {
+               RestrictInfo *rinfo = lfirst_node(RestrictInfo, lcc);
+               int                     indexcol = lfirst_int(lci);
+               Expr       *clause;
+               Node       *leftop,
+                                  *rightop;
+               IndexQualInfo *qinfo;
+
+               clause = rinfo->clause;
+
+               qinfo = (IndexQualInfo *) palloc(sizeof(IndexQualInfo));
+               qinfo->rinfo = rinfo;
+               qinfo->indexcol = indexcol;
+
+               if (IsA(clause, OpExpr))
+               {
+                       qinfo->clause_op = ((OpExpr *) clause)->opno;
+                       leftop = get_leftop(clause);
+                       rightop = get_rightop(clause);
+                       if (match_index_to_operand(leftop, indexcol, index))
+                       {
+                               qinfo->varonleft = true;
+                               qinfo->other_operand = rightop;
+                       }
+                       else
+                       {
+                               Assert(match_index_to_operand(rightop, indexcol, index));
+                               qinfo->varonleft = false;
+                               qinfo->other_operand = leftop;
+                       }
+               }
+               else if (IsA(clause, RowCompareExpr))
+               {
+                       RowCompareExpr *rc = (RowCompareExpr *) clause;
+
+                       qinfo->clause_op = linitial_oid(rc->opnos);
+                       /* Examine only first columns to determine left/right sides */
+                       if (match_index_to_operand((Node *) linitial(rc->largs),
+                                                                          indexcol, index))
+                       {
+                               qinfo->varonleft = true;
+                               qinfo->other_operand = (Node *) rc->rargs;
+                       }
+                       else
+                       {
+                               Assert(match_index_to_operand((Node *) linitial(rc->rargs),
+                                                                                         indexcol, index));
+                               qinfo->varonleft = false;
+                               qinfo->other_operand = (Node *) rc->largs;
+                       }
+               }
+               else if (IsA(clause, ScalarArrayOpExpr))
+               {
+                       ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
+
+                       qinfo->clause_op = saop->opno;
+                       /* index column is always on the left in this case */
+                       Assert(match_index_to_operand((Node *) linitial(saop->args),
+                                                                                 indexcol, index));
+                       qinfo->varonleft = true;
+                       qinfo->other_operand = (Node *) lsecond(saop->args);
+               }
+               else if (IsA(clause, NullTest))
+               {
+                       qinfo->clause_op = InvalidOid;
+                       Assert(match_index_to_operand((Node *) ((NullTest *) clause)->arg,
+                                                                                 indexcol, index));
+                       qinfo->varonleft = true;
+                       qinfo->other_operand = NULL;
+               }
+               else
+               {
+                       elog(ERROR, "unsupported indexqual type: %d",
+                                (int) nodeTag(clause));
+               }
+
+               result = lappend(result, qinfo);
+       }
+       return result;
+}
+
+/*
+ * Simple function to compute the total eval cost of the "other operands"
+ * in an IndexQualInfo list.  Since we know these will be evaluated just
+ * once per scan, there's no need to distinguish startup from per-row cost.
+ */
+static Cost
+other_operands_eval_cost(PlannerInfo *root, List *qinfos)
+{
+       Cost            qual_arg_cost = 0;
+       ListCell   *lc;
+
+       foreach(lc, qinfos)
+       {
+               IndexQualInfo *qinfo = (IndexQualInfo *) lfirst(lc);
+               QualCost        index_qual_cost;
+
+               cost_qual_eval_node(&index_qual_cost, qinfo->other_operand, root);
+               qual_arg_cost += index_qual_cost.startup + index_qual_cost.per_tuple;
+       }
+       return qual_arg_cost;
+}
+
+/*
+ * Get other-operand eval cost for an index orderby list.
+ *
+ * Index orderby expressions aren't represented as RestrictInfos (since they
+ * aren't boolean, usually).  So we can't apply deconstruct_indexquals to
+ * them.  However, they are much simpler to deal with since they are always
+ * OpExprs and the index column is always on the left.
+ */
+static Cost
+orderby_operands_eval_cost(PlannerInfo *root, IndexPath *path)
+{
+       Cost            qual_arg_cost = 0;
+       ListCell   *lc;
+
+       foreach(lc, path->indexorderbys)
+       {
+               Expr       *clause = (Expr *) lfirst(lc);
+               Node       *other_operand;
+               QualCost        index_qual_cost;
+
+               if (IsA(clause, OpExpr))
+               {
+                       other_operand = get_rightop(clause);
+               }
+               else
+               {
+                       elog(ERROR, "unsupported indexorderby type: %d",
+                                (int) nodeTag(clause));
+                       other_operand = NULL;           /* keep compiler quiet */
+               }
+
+               cost_qual_eval_node(&index_qual_cost, other_operand, root);
+               qual_arg_cost += index_qual_cost.startup + index_qual_cost.per_tuple;
+       }
+       return qual_arg_cost;
+}
+
+void
 genericcostestimate(PlannerInfo *root,
-                                       IndexOptInfo *index,
-                                       List *indexQuals,
-                                       List *indexOrderBys,
-                                       RelOptInfo *outer_rel,
-                                       double numIndexTuples,
-                                       Cost *indexStartupCost,
-                                       Cost *indexTotalCost,
-                                       Selectivity *indexSelectivity,
-                                       double *indexCorrelation)
+                                       IndexPath *path,
+                                       double loop_count,
+                                       List *qinfos,
+                                       GenericCosts *costs)
 {
+       IndexOptInfo *index = path->indexinfo;
+       List       *indexQuals = path->indexquals;
+       List       *indexOrderBys = path->indexorderbys;
+       Cost            indexStartupCost;
+       Cost            indexTotalCost;
+       Selectivity indexSelectivity;
+       double          indexCorrelation;
        double          numIndexPages;
+       double          numIndexTuples;
+       double          spc_random_page_cost;
        double          num_sa_scans;
        double          num_outer_scans;
        double          num_scans;
-       QualCost        index_qual_cost;
        double          qual_op_cost;
        double          qual_arg_cost;
-       double          spc_random_page_cost;
        List       *selectivityQuals;
        ListCell   *l;
 
-       /*----------
+       /*
         * If the index is partial, AND the index predicate with the explicitly
         * given indexquals to produce a more accurate idea of the index
-        * selectivity.  However, we need to be careful not to insert redundant
-        * clauses, because clauselist_selectivity() is easily fooled into
-        * computing a too-low selectivity estimate.  Our approach is to add
-        * only the index predicate clause(s) that cannot be proven to be implied
-        * by the given indexquals.  This successfully handles cases such as a
-        * qual "x = 42" used with a partial index "WHERE x >= 40 AND x < 50".
-        * There are many other cases where we won't detect redundancy, leading
-        * to a too-low selectivity estimate, which will bias the system in favor
-        * of using partial indexes where possible.  That is not necessarily bad
-        * though.
-        *
-        * Note that indexQuals contains RestrictInfo nodes while the indpred
-        * does not.  This is OK for both predicate_implied_by() and
-        * clauselist_selectivity().
-        *----------
+        * selectivity.
         */
-       if (index->indpred != NIL)
-       {
-               List       *predExtraQuals = NIL;
-
-               foreach(l, index->indpred)
-               {
-                       Node       *predQual = (Node *) lfirst(l);
-                       List       *oneQual = list_make1(predQual);
-
-                       if (!predicate_implied_by(oneQual, indexQuals))
-                               predExtraQuals = list_concat(predExtraQuals, oneQual);
-               }
-               /* list_concat avoids modifying the passed-in indexQuals list */
-               selectivityQuals = list_concat(predExtraQuals, indexQuals);
-       }
-       else
-               selectivityQuals = indexQuals;
+       selectivityQuals = add_predicate_to_quals(index, indexQuals);
 
        /*
         * Check for ScalarArrayOpExpr index quals, and estimate the number of
@@ -5842,19 +6453,20 @@ genericcostestimate(PlannerInfo *root,
        }
 
        /* Estimate the fraction of main-table tuples that will be visited */
-       *indexSelectivity = clauselist_selectivity(root, selectivityQuals,
-                                                                                          index->rel->relid,
-                                                                                          JOIN_INNER,
-                                                                                          NULL);
+       indexSelectivity = clauselist_selectivity(root, selectivityQuals,
+                                                                                         index->rel->relid,
+                                                                                         JOIN_INNER,
+                                                                                         NULL);
 
        /*
         * If caller didn't give us an estimate, estimate the number of index
         * tuples that will be visited.  We do it in this rather peculiar-looking
         * way in order to get the right answer for partial indexes.
         */
+       numIndexTuples = costs->numIndexTuples;
        if (numIndexTuples <= 0.0)
        {
-               numIndexTuples = *indexSelectivity * index->rel->tuples;
+               numIndexTuples = indexSelectivity * index->rel->tuples;
 
                /*
                 * The above calculation counts all the tuples visited across all
@@ -5881,16 +6493,19 @@ genericcostestimate(PlannerInfo *root,
         *
         * We use the simplistic method of taking a pro-rata fraction of the total
         * number of index pages.  In effect, this counts only leaf pages and not
-        * any overhead such as index metapage or upper tree levels. In practice
-        * this seems a better approximation than charging for access to the upper
-        * levels, perhaps because those tend to stay in cache under load.
+        * any overhead such as index metapage or upper tree levels.
+        *
+        * In practice access to upper index levels is often nearly free because
+        * those tend to stay in cache under load; moreover, the cost involved is
+        * highly dependent on index type.  We therefore ignore such costs here
+        * and leave it to the caller to add a suitable charge if needed.
         */
        if (index->pages > 1 && index->tuples > 1)
                numIndexPages = ceil(numIndexTuples * index->pages / index->tuples);
        else
                numIndexPages = 1.0;
 
-       /* fetch estimated page cost for schema containing index */
+       /* fetch estimated page cost for tablespace containing index */
        get_tablespace_page_costs(index->reltablespace,
                                                          &spc_random_page_cost,
                                                          NULL);
@@ -5901,9 +6516,9 @@ genericcostestimate(PlannerInfo *root,
         * The above calculations are all per-index-scan.  However, if we are in a
         * nestloop inner scan, we can expect the scan to be repeated (with
         * different search keys) for each row of the outer relation.  Likewise,
-        * ScalarArrayOpExpr quals result in multiple index scans.      This creates
+        * ScalarArrayOpExpr quals result in multiple index scans.  This creates
         * the potential for cache effects to reduce the number of disk page
-        * fetches needed.      We want to estimate the average per-scan I/O cost in
+        * fetches needed.  We want to estimate the average per-scan I/O cost in
         * the presence of caching.
         *
         * We use the Mackert-Lohman formula (see costsize.c for details) to
@@ -5912,16 +6527,8 @@ genericcostestimate(PlannerInfo *root,
         * Note that we are counting pages not tuples anymore, so we take N = T =
         * index size, as if there were one "tuple" per page.
         */
-       if (outer_rel != NULL && outer_rel->rows > 1)
-       {
-               num_outer_scans = outer_rel->rows;
-               num_scans = num_sa_scans * num_outer_scans;
-       }
-       else
-       {
-               num_outer_scans = 1;
-               num_scans = num_sa_scans;
-       }
+       num_outer_scans = loop_count;
+       num_scans = num_sa_scans * num_outer_scans;
 
        if (num_scans > 1)
        {
@@ -5941,7 +6548,7 @@ genericcostestimate(PlannerInfo *root,
                 * share for each outer scan.  (Don't pro-rate for ScalarArrayOpExpr,
                 * since that's internal to the indexscan.)
                 */
-               *indexTotalCost = (pages_fetched * spc_random_page_cost)
+               indexTotalCost = (pages_fetched * spc_random_page_cost)
                        / num_outer_scans;
        }
        else
@@ -5950,101 +6557,115 @@ genericcostestimate(PlannerInfo *root,
                 * For a single index scan, we just charge spc_random_page_cost per
                 * page touched.
                 */
-               *indexTotalCost = numIndexPages * spc_random_page_cost;
+               indexTotalCost = numIndexPages * spc_random_page_cost;
        }
 
-       /*
-        * A difficulty with the leaf-pages-only cost approach is that for small
-        * selectivities (eg, single index tuple fetched) all indexes will look
-        * equally attractive because we will estimate exactly 1 leaf page to be
-        * fetched.  All else being equal, we should prefer physically smaller
-        * indexes over larger ones.  (An index might be smaller because it is
-        * partial or because it contains fewer columns; presumably the other
-        * columns in the larger index aren't useful to the query, or the larger
-        * index would have better selectivity.)
-        *
-        * We can deal with this by adding a very small "fudge factor" that
-        * depends on the index size.  The fudge factor used here is one
-        * spc_random_page_cost per 100000 index pages, which should be small
-        * enough to not alter index-vs-seqscan decisions, but will prevent
-        * indexes of different sizes from looking exactly equally attractive.
-        */
-       *indexTotalCost += index->pages * spc_random_page_cost / 100000.0;
-
        /*
         * CPU cost: any complex expressions in the indexquals will need to be
         * evaluated once at the start of the scan to reduce them to runtime keys
         * to pass to the index AM (see nodeIndexscan.c).  We model the per-tuple
         * CPU costs as cpu_index_tuple_cost plus one cpu_operator_cost per
-        * indexqual operator.  Because we have numIndexTuples as a per-scan
+        * indexqual operator.  Because we have numIndexTuples as a per-scan
         * number, we have to multiply by num_sa_scans to get the correct result
         * for ScalarArrayOpExpr cases.  Similarly add in costs for any index
         * ORDER BY expressions.
         *
-        * Note: this neglects the possible costs of rechecking lossy operators
-        * and OR-clause expressions.  Detecting that that might be needed seems
-        * more expensive than it's worth, though, considering all the other
-        * inaccuracies here ...
-        */
-       cost_qual_eval(&index_qual_cost, indexQuals, root);
-       qual_arg_cost = index_qual_cost.startup + index_qual_cost.per_tuple;
-       cost_qual_eval(&index_qual_cost, indexOrderBys, root);
-       qual_arg_cost += index_qual_cost.startup + index_qual_cost.per_tuple;
+        * Note: this neglects the possible costs of rechecking lossy operators.
+        * Detecting that that might be needed seems more expensive than it's
+        * worth, though, considering all the other inaccuracies here ...
+        */
+       qual_arg_cost = other_operands_eval_cost(root, qinfos) +
+               orderby_operands_eval_cost(root, path);
        qual_op_cost = cpu_operator_cost *
                (list_length(indexQuals) + list_length(indexOrderBys));
-       qual_arg_cost -= qual_op_cost;
-       if (qual_arg_cost < 0)          /* just in case... */
-               qual_arg_cost = 0;
 
-       *indexStartupCost = qual_arg_cost;
-       *indexTotalCost += qual_arg_cost;
-       *indexTotalCost += numIndexTuples * num_sa_scans * (cpu_index_tuple_cost + qual_op_cost);
+       indexStartupCost = qual_arg_cost;
+       indexTotalCost += qual_arg_cost;
+       indexTotalCost += numIndexTuples * num_sa_scans * (cpu_index_tuple_cost + qual_op_cost);
 
        /*
-        * We also add a CPU-cost component to represent the general costs of
-        * starting an indexscan, such as analysis of btree index keys and initial
-        * tree descent.  This is estimated at 100x cpu_operator_cost, which is a
-        * bit arbitrary but seems the right order of magnitude. (As noted above,
-        * we don't charge any I/O for touching upper tree levels, but charging
-        * nothing at all has been found too optimistic.)
-        *
-        * Although this is startup cost with respect to any one scan, we add it
-        * to the "total" cost component because it's only very interesting in the
-        * many-ScalarArrayOpExpr-scan case, and there it will be paid over the
-        * life of the scan node.
+        * Generic assumption about index correlation: there isn't any.
         */
-       *indexTotalCost += num_sa_scans * 100.0 * cpu_operator_cost;
+       indexCorrelation = 0.0;
 
        /*
-        * Generic assumption about index correlation: there isn't any.
+        * Return everything to caller.
         */
-       *indexCorrelation = 0.0;
+       costs->indexStartupCost = indexStartupCost;
+       costs->indexTotalCost = indexTotalCost;
+       costs->indexSelectivity = indexSelectivity;
+       costs->indexCorrelation = indexCorrelation;
+       costs->numIndexPages = numIndexPages;
+       costs->numIndexTuples = numIndexTuples;
+       costs->spc_random_page_cost = spc_random_page_cost;
+       costs->num_sa_scans = num_sa_scans;
 }
 
+/*
+ * If the index is partial, add its predicate to the given qual list.
+ *
+ * ANDing the index predicate with the explicitly given indexquals produces
+ * a more accurate idea of the index's selectivity.  However, we need to be
+ * careful not to insert redundant clauses, because clauselist_selectivity()
+ * is easily fooled into computing a too-low selectivity estimate.  Our
+ * approach is to add only the predicate clause(s) that cannot be proven to
+ * be implied by the given indexquals.  This successfully handles cases such
+ * as a qual "x = 42" used with a partial index "WHERE x >= 40 AND x < 50".
+ * There are many other cases where we won't detect redundancy, leading to a
+ * too-low selectivity estimate, which will bias the system in favor of using
+ * partial indexes where possible.  That is not necessarily bad though.
+ *
+ * Note that indexQuals contains RestrictInfo nodes while the indpred
+ * does not, so the output list will be mixed.  This is OK for both
+ * predicate_implied_by() and clauselist_selectivity(), but might be
+ * problematic if the result were passed to other things.
+ */
+static List *
+add_predicate_to_quals(IndexOptInfo *index, List *indexQuals)
+{
+       List       *predExtraQuals = NIL;
+       ListCell   *lc;
 
-Datum
-btcostestimate(PG_FUNCTION_ARGS)
+       if (index->indpred == NIL)
+               return indexQuals;
+
+       foreach(lc, index->indpred)
+       {
+               Node       *predQual = (Node *) lfirst(lc);
+               List       *oneQual = list_make1(predQual);
+
+               if (!predicate_implied_by(oneQual, indexQuals))
+                       predExtraQuals = list_concat(predExtraQuals, oneQual);
+       }
+       /* list_concat avoids modifying the passed-in indexQuals list */
+       return list_concat(predExtraQuals, indexQuals);
+}
+
+
+void
+btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
+                          Cost *indexStartupCost, Cost *indexTotalCost,
+                          Selectivity *indexSelectivity, double *indexCorrelation,
+                          double *indexPages)
 {
-       PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
-       IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(1);
-       List       *indexQuals = (List *) PG_GETARG_POINTER(2);
-       List       *indexOrderBys = (List *) PG_GETARG_POINTER(3);
-       RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(4);
-       Cost       *indexStartupCost = (Cost *) PG_GETARG_POINTER(5);
-       Cost       *indexTotalCost = (Cost *) PG_GETARG_POINTER(6);
-       Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(7);
-       double     *indexCorrelation = (double *) PG_GETARG_POINTER(8);
+       IndexOptInfo *index = path->indexinfo;
+       List       *qinfos;
+       GenericCosts costs;
        Oid                     relid;
        AttrNumber      colnum;
        VariableStatData vardata;
        double          numIndexTuples;
+       Cost            descentCost;
        List       *indexBoundQuals;
        int                     indexcol;
        bool            eqQualHere;
        bool            found_saop;
        bool            found_is_null_op;
        double          num_sa_scans;
-       ListCell   *l;
+       ListCell   *lc;
+
+       /* Do preliminary analysis of indexquals */
+       qinfos = deconstruct_indexquals(path);
 
        /*
         * For a btree scan, only leading '=' quals plus inequality quals for the
@@ -6052,8 +6673,8 @@ btcostestimate(PG_FUNCTION_ARGS)
         * the "boundary quals" that determine the starting and stopping points of
         * the index scan).  Additional quals can suppress visits to the heap, so
         * it's OK to count them in indexSelectivity, but they should not count
-        * for estimating numIndexTuples.  So we must examine the given indexQuals
-        * to find out which ones count as boundary quals.      We rely on the
+        * for estimating numIndexTuples.  So we must examine the given indexquals
+        * to find out which ones count as boundary quals.  We rely on the
         * knowledge that they are given in index column order.
         *
         * For a RowCompareExpr, we consider only the first column, just as
@@ -6069,91 +6690,53 @@ btcostestimate(PG_FUNCTION_ARGS)
        found_saop = false;
        found_is_null_op = false;
        num_sa_scans = 1;
-       foreach(l, indexQuals)
+       foreach(lc, qinfos)
        {
-               RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
-               Expr       *clause;
-               Node       *leftop,
-                                  *rightop;
+               IndexQualInfo *qinfo = (IndexQualInfo *) lfirst(lc);
+               RestrictInfo *rinfo = qinfo->rinfo;
+               Expr       *clause = rinfo->clause;
                Oid                     clause_op;
                int                     op_strategy;
-               bool            is_null_op = false;
 
-               Assert(IsA(rinfo, RestrictInfo));
-               clause = rinfo->clause;
-               if (IsA(clause, OpExpr))
+               if (indexcol != qinfo->indexcol)
                {
-                       leftop = get_leftop(clause);
-                       rightop = get_rightop(clause);
-                       clause_op = ((OpExpr *) clause)->opno;
+                       /* Beginning of a new column's quals */
+                       if (!eqQualHere)
+                               break;                  /* done if no '=' qual for indexcol */
+                       eqQualHere = false;
+                       indexcol++;
+                       if (indexcol != qinfo->indexcol)
+                               break;                  /* no quals at all for indexcol */
                }
-               else if (IsA(clause, RowCompareExpr))
-               {
-                       RowCompareExpr *rc = (RowCompareExpr *) clause;
 
-                       leftop = (Node *) linitial(rc->largs);
-                       rightop = (Node *) linitial(rc->rargs);
-                       clause_op = linitial_oid(rc->opnos);
-               }
-               else if (IsA(clause, ScalarArrayOpExpr))
+               if (IsA(clause, ScalarArrayOpExpr))
                {
-                       ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
+                       int                     alength = estimate_array_length(qinfo->other_operand);
 
-                       leftop = (Node *) linitial(saop->args);
-                       rightop = (Node *) lsecond(saop->args);
-                       clause_op = saop->opno;
                        found_saop = true;
+                       /* count up number of SA scans induced by indexBoundQuals only */
+                       if (alength > 1)
+                               num_sa_scans *= alength;
                }
                else if (IsA(clause, NullTest))
                {
                        NullTest   *nt = (NullTest *) clause;
 
-                       leftop = (Node *) nt->arg;
-                       rightop = NULL;
-                       clause_op = InvalidOid;
                        if (nt->nulltesttype == IS_NULL)
                        {
                                found_is_null_op = true;
-                               is_null_op = true;
-                       }
-               }
-               else
-               {
-                       elog(ERROR, "unsupported indexqual type: %d",
-                                (int) nodeTag(clause));
-                       continue;                       /* keep compiler quiet */
-               }
-               if (match_index_to_operand(leftop, indexcol, index))
-               {
-                       /* clause_op is correct */
-               }
-               else if (match_index_to_operand(rightop, indexcol, index))
-               {
-                       /* Must flip operator to get the opfamily member */
-                       clause_op = get_commutator(clause_op);
-               }
-               else
-               {
-                       /* Must be past the end of quals for indexcol, try next */
-                       if (!eqQualHere)
-                               break;                  /* done if no '=' qual for indexcol */
-                       indexcol++;
-                       eqQualHere = false;
-                       if (match_index_to_operand(leftop, indexcol, index))
-                       {
-                               /* clause_op is correct */
-                       }
-                       else if (match_index_to_operand(rightop, indexcol, index))
-                       {
-                               /* Must flip operator to get the opfamily member */
-                               clause_op = get_commutator(clause_op);
-                       }
-                       else
-                       {
-                               /* No quals for new indexcol, so we are done */
-                               break;
+                               /* IS NULL is like = for selectivity determination purposes */
+                               eqQualHere = true;
                        }
                }
+
+               /*
+                * We would need to commute the clause_op if not varonleft, except
+                * that we only care if it's equality or not, so that refinement is
+                * unnecessary.
+                */
+               clause_op = qinfo->clause_op;
+
                /* check for equality operator */
                if (OidIsValid(clause_op))
                {
@@ -6163,20 +6746,7 @@ btcostestimate(PG_FUNCTION_ARGS)
                        if (op_strategy == BTEqualStrategyNumber)
                                eqQualHere = true;
                }
-               else if (is_null_op)
-               {
-                       /* IS NULL is like = for purposes of selectivity determination */
-                       eqQualHere = true;
-               }
-               /* count up number of SA scans induced by indexBoundQuals only */
-               if (IsA(clause, ScalarArrayOpExpr))
-               {
-                       ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
-                       int                     alength = estimate_array_length(lsecond(saop->args));
 
-                       if (alength > 1)
-                               num_sa_scans *= alength;
-               }
                indexBoundQuals = lappend(indexBoundQuals, rinfo);
        }
 
@@ -6194,9 +6764,17 @@ btcostestimate(PG_FUNCTION_ARGS)
                numIndexTuples = 1.0;
        else
        {
+               List       *selectivityQuals;
                Selectivity btreeSelectivity;
 
-               btreeSelectivity = clauselist_selectivity(root, indexBoundQuals,
+               /*
+                * If the index is partial, AND the index predicate with the
+                * index-bound quals to produce a more accurate idea of the number of
+                * rows covered by the bound conditions.
+                */
+               selectivityQuals = add_predicate_to_quals(index, indexBoundQuals);
+
+               btreeSelectivity = clauselist_selectivity(root, selectivityQuals,
                                                                                                  index->rel->relid,
                                                                                                  JOIN_INNER,
                                                                                                  NULL);
@@ -6210,10 +6788,45 @@ btcostestimate(PG_FUNCTION_ARGS)
                numIndexTuples = rint(numIndexTuples / num_sa_scans);
        }
 
-       genericcostestimate(root, index, indexQuals, indexOrderBys,
-                                               outer_rel, numIndexTuples,
-                                               indexStartupCost, indexTotalCost,
-                                               indexSelectivity, indexCorrelation);
+       /*
+        * Now do generic index cost estimation.
+        */
+       MemSet(&costs, 0, sizeof(costs));
+       costs.numIndexTuples = numIndexTuples;
+
+       genericcostestimate(root, path, loop_count, qinfos, &costs);
+
+       /*
+        * Add a CPU-cost component to represent the costs of initial btree
+        * descent.  We don't charge any I/O cost for touching upper btree levels,
+        * since they tend to stay in cache, but we still have to do about log2(N)
+        * comparisons to descend a btree of N leaf tuples.  We charge one
+        * cpu_operator_cost per comparison.
+        *
+        * If there are ScalarArrayOpExprs, charge this once per SA scan.  The
+        * ones after the first one are not startup cost so far as the overall
+        * plan is concerned, so add them only to "total" cost.
+        */
+       if (index->tuples > 1)          /* avoid computing log(0) */
+       {
+               descentCost = ceil(log(index->tuples) / log(2.0)) * cpu_operator_cost;
+               costs.indexStartupCost += descentCost;
+               costs.indexTotalCost += costs.num_sa_scans * descentCost;
+       }
+
+       /*
+        * Even though we're not charging I/O cost for touching upper btree pages,
+        * it's still reasonable to charge some CPU cost per page descended
+        * through.  Moreover, if we had no such charge at all, bloated indexes
+        * would appear to have the same search cost as unbloated ones, at least
+        * in cases where only a single leaf page is expected to be visited.  This
+        * cost is somewhat arbitrarily set at 50x cpu_operator_cost per page
+        * touched.  The number of such pages is btree tree height plus one (ie,
+        * we charge for the leaf page too).  As above, charge once per SA scan.
+        */
+       descentCost = (index->tree_height + 1) * 50.0 * cpu_operator_cost;
+       costs.indexStartupCost += descentCost;
+       costs.indexTotalCost += costs.num_sa_scans * descentCost;
 
        /*
         * If we can get an estimate of the first column's ordering correlation C
@@ -6222,14 +6835,7 @@ btcostestimate(PG_FUNCTION_ARGS)
         * is that multiple columns dilute the importance of the first column's
         * ordering, but don't negate it entirely.  Before 8.0 we divided the
         * correlation by the number of columns, but that seems too strong.)
-        *
-        * We can skip all this if we found a ScalarArrayOpExpr, because then the
-        * call must be for a bitmap index scan, and the caller isn't going to
-        * care what the index correlation is.
         */
-       if (found_saop)
-               PG_RETURN_VOID();
-
        MemSet(&vardata, 0, sizeof(vardata));
 
        if (index->indexkeys[0] != 0)
@@ -6292,113 +6898,515 @@ btcostestimate(PG_FUNCTION_ARGS)
        if (HeapTupleIsValid(vardata.statsTuple))
        {
                Oid                     sortop;
-               float4     *numbers;
-               int                     nnumbers;
+               AttStatsSlot sslot;
 
                sortop = get_opfamily_member(index->opfamily[0],
                                                                         index->opcintype[0],
                                                                         index->opcintype[0],
                                                                         BTLessStrategyNumber);
                if (OidIsValid(sortop) &&
-                       get_attstatsslot(vardata.statsTuple, InvalidOid, 0,
-                                                        STATISTIC_KIND_CORRELATION,
-                                                        sortop,
-                                                        NULL,
-                                                        NULL, NULL,
-                                                        &numbers, &nnumbers))
+                       get_attstatsslot(&sslot, vardata.statsTuple,
+                                                        STATISTIC_KIND_CORRELATION, sortop,
+                                                        ATTSTATSSLOT_NUMBERS))
                {
                        double          varCorrelation;
 
-                       Assert(nnumbers == 1);
-                       varCorrelation = numbers[0];
+                       Assert(sslot.nnumbers == 1);
+                       varCorrelation = sslot.numbers[0];
 
                        if (index->reverse_sort[0])
                                varCorrelation = -varCorrelation;
 
                        if (index->ncolumns > 1)
-                               *indexCorrelation = varCorrelation * 0.75;
+                               costs.indexCorrelation = varCorrelation * 0.75;
                        else
-                               *indexCorrelation = varCorrelation;
+                               costs.indexCorrelation = varCorrelation;
 
-                       free_attstatsslot(InvalidOid, NULL, 0, numbers, nnumbers);
+                       free_attstatsslot(&sslot);
                }
        }
 
        ReleaseVariableStats(vardata);
 
-       PG_RETURN_VOID();
+       *indexStartupCost = costs.indexStartupCost;
+       *indexTotalCost = costs.indexTotalCost;
+       *indexSelectivity = costs.indexSelectivity;
+       *indexCorrelation = costs.indexCorrelation;
+       *indexPages = costs.numIndexPages;
 }
 
-Datum
-hashcostestimate(PG_FUNCTION_ARGS)
+void
+hashcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
+                                Cost *indexStartupCost, Cost *indexTotalCost,
+                                Selectivity *indexSelectivity, double *indexCorrelation,
+                                double *indexPages)
 {
-       PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
-       IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(1);
-       List       *indexQuals = (List *) PG_GETARG_POINTER(2);
-       List       *indexOrderBys = (List *) PG_GETARG_POINTER(3);
-       RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(4);
-       Cost       *indexStartupCost = (Cost *) PG_GETARG_POINTER(5);
-       Cost       *indexTotalCost = (Cost *) PG_GETARG_POINTER(6);
-       Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(7);
-       double     *indexCorrelation = (double *) PG_GETARG_POINTER(8);
-
-       genericcostestimate(root, index, indexQuals, indexOrderBys, outer_rel, 0.0,
-                                               indexStartupCost, indexTotalCost,
-                                               indexSelectivity, indexCorrelation);
-
-       PG_RETURN_VOID();
+       List       *qinfos;
+       GenericCosts costs;
+
+       /* Do preliminary analysis of indexquals */
+       qinfos = deconstruct_indexquals(path);
+
+       MemSet(&costs, 0, sizeof(costs));
+
+       genericcostestimate(root, path, loop_count, qinfos, &costs);
+
+       /*
+        * A hash index has no descent costs as such, since the index AM can go
+        * directly to the target bucket after computing the hash value.  There
+        * are a couple of other hash-specific costs that we could conceivably add
+        * here, though:
+        *
+        * Ideally we'd charge spc_random_page_cost for each page in the target
+        * bucket, not just the numIndexPages pages that genericcostestimate
+        * thought we'd visit.  However in most cases we don't know which bucket
+        * that will be.  There's no point in considering the average bucket size
+        * because the hash AM makes sure that's always one page.
+        *
+        * Likewise, we could consider charging some CPU for each index tuple in
+        * the bucket, if we knew how many there were.  But the per-tuple cost is
+        * just a hash value comparison, not a general datatype-dependent
+        * comparison, so any such charge ought to be quite a bit less than
+        * cpu_operator_cost; which makes it probably not worth worrying about.
+        *
+        * A bigger issue is that chance hash-value collisions will result in
+        * wasted probes into the heap.  We don't currently attempt to model this
+        * cost on the grounds that it's rare, but maybe it's not rare enough.
+        * (Any fix for this ought to consider the generic lossy-operator problem,
+        * though; it's not entirely hash-specific.)
+        */
+
+       *indexStartupCost = costs.indexStartupCost;
+       *indexTotalCost = costs.indexTotalCost;
+       *indexSelectivity = costs.indexSelectivity;
+       *indexCorrelation = costs.indexCorrelation;
+       *indexPages = costs.numIndexPages;
 }
 
-Datum
-gistcostestimate(PG_FUNCTION_ARGS)
+void
+gistcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
+                                Cost *indexStartupCost, Cost *indexTotalCost,
+                                Selectivity *indexSelectivity, double *indexCorrelation,
+                                double *indexPages)
 {
-       PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
-       IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(1);
-       List       *indexQuals = (List *) PG_GETARG_POINTER(2);
-       List       *indexOrderBys = (List *) PG_GETARG_POINTER(3);
-       RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(4);
-       Cost       *indexStartupCost = (Cost *) PG_GETARG_POINTER(5);
-       Cost       *indexTotalCost = (Cost *) PG_GETARG_POINTER(6);
-       Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(7);
-       double     *indexCorrelation = (double *) PG_GETARG_POINTER(8);
-
-       genericcostestimate(root, index, indexQuals, indexOrderBys, outer_rel, 0.0,
-                                               indexStartupCost, indexTotalCost,
-                                               indexSelectivity, indexCorrelation);
-
-       PG_RETURN_VOID();
+       IndexOptInfo *index = path->indexinfo;
+       List       *qinfos;
+       GenericCosts costs;
+       Cost            descentCost;
+
+       /* Do preliminary analysis of indexquals */
+       qinfos = deconstruct_indexquals(path);
+
+       MemSet(&costs, 0, sizeof(costs));
+
+       genericcostestimate(root, path, loop_count, qinfos, &costs);
+
+       /*
+        * We model index descent costs similarly to those for btree, but to do
+        * that we first need an idea of the tree height.  We somewhat arbitrarily
+        * assume that the fanout is 100, meaning the tree height is at most
+        * log100(index->pages).
+        *
+        * Although this computation isn't really expensive enough to require
+        * caching, we might as well use index->tree_height to cache it.
+        */
+       if (index->tree_height < 0) /* unknown? */
+       {
+               if (index->pages > 1)   /* avoid computing log(0) */
+                       index->tree_height = (int) (log(index->pages) / log(100.0));
+               else
+                       index->tree_height = 0;
+       }
+
+       /*
+        * Add a CPU-cost component to represent the costs of initial descent. We
+        * just use log(N) here not log2(N) since the branching factor isn't
+        * necessarily two anyway.  As for btree, charge once per SA scan.
+        */
+       if (index->tuples > 1)          /* avoid computing log(0) */
+       {
+               descentCost = ceil(log(index->tuples)) * cpu_operator_cost;
+               costs.indexStartupCost += descentCost;
+               costs.indexTotalCost += costs.num_sa_scans * descentCost;
+       }
+
+       /*
+        * Likewise add a per-page charge, calculated the same as for btrees.
+        */
+       descentCost = (index->tree_height + 1) * 50.0 * cpu_operator_cost;
+       costs.indexStartupCost += descentCost;
+       costs.indexTotalCost += costs.num_sa_scans * descentCost;
+
+       *indexStartupCost = costs.indexStartupCost;
+       *indexTotalCost = costs.indexTotalCost;
+       *indexSelectivity = costs.indexSelectivity;
+       *indexCorrelation = costs.indexCorrelation;
+       *indexPages = costs.numIndexPages;
 }
 
-/* Find the index column matching "op"; return its index, or -1 if no match */
-static int
-find_index_column(Node *op, IndexOptInfo *index)
+void
+spgcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
+                               Cost *indexStartupCost, Cost *indexTotalCost,
+                               Selectivity *indexSelectivity, double *indexCorrelation,
+                               double *indexPages)
+{
+       IndexOptInfo *index = path->indexinfo;
+       List       *qinfos;
+       GenericCosts costs;
+       Cost            descentCost;
+
+       /* Do preliminary analysis of indexquals */
+       qinfos = deconstruct_indexquals(path);
+
+       MemSet(&costs, 0, sizeof(costs));
+
+       genericcostestimate(root, path, loop_count, qinfos, &costs);
+
+       /*
+        * We model index descent costs similarly to those for btree, but to do
+        * that we first need an idea of the tree height.  We somewhat arbitrarily
+        * assume that the fanout is 100, meaning the tree height is at most
+        * log100(index->pages).
+        *
+        * Although this computation isn't really expensive enough to require
+        * caching, we might as well use index->tree_height to cache it.
+        */
+       if (index->tree_height < 0) /* unknown? */
+       {
+               if (index->pages > 1)   /* avoid computing log(0) */
+                       index->tree_height = (int) (log(index->pages) / log(100.0));
+               else
+                       index->tree_height = 0;
+       }
+
+       /*
+        * Add a CPU-cost component to represent the costs of initial descent. We
+        * just use log(N) here not log2(N) since the branching factor isn't
+        * necessarily two anyway.  As for btree, charge once per SA scan.
+        */
+       if (index->tuples > 1)          /* avoid computing log(0) */
+       {
+               descentCost = ceil(log(index->tuples)) * cpu_operator_cost;
+               costs.indexStartupCost += descentCost;
+               costs.indexTotalCost += costs.num_sa_scans * descentCost;
+       }
+
+       /*
+        * Likewise add a per-page charge, calculated the same as for btrees.
+        */
+       descentCost = (index->tree_height + 1) * 50.0 * cpu_operator_cost;
+       costs.indexStartupCost += descentCost;
+       costs.indexTotalCost += costs.num_sa_scans * descentCost;
+
+       *indexStartupCost = costs.indexStartupCost;
+       *indexTotalCost = costs.indexTotalCost;
+       *indexSelectivity = costs.indexSelectivity;
+       *indexCorrelation = costs.indexCorrelation;
+       *indexPages = costs.numIndexPages;
+}
+
+
+/*
+ * Support routines for gincostestimate
+ */
+
+typedef struct
+{
+       bool            haveFullScan;
+       double          partialEntries;
+       double          exactEntries;
+       double          searchEntries;
+       double          arrayScans;
+} GinQualCounts;
+
+/*
+ * Estimate the number of index terms that need to be searched for while
+ * testing the given GIN query, and increment the counts in *counts
+ * appropriately.  If the query is unsatisfiable, return false.
+ */
+static bool
+gincost_pattern(IndexOptInfo *index, int indexcol,
+                               Oid clause_op, Datum query,
+                               GinQualCounts *counts)
 {
+       Oid                     extractProcOid;
+       Oid                     collation;
+       int                     strategy_op;
+       Oid                     lefttype,
+                               righttype;
+       int32           nentries = 0;
+       bool       *partial_matches = NULL;
+       Pointer    *extra_data = NULL;
+       bool       *nullFlags = NULL;
+       int32           searchMode = GIN_SEARCH_MODE_DEFAULT;
+       int32           i;
+
+       /*
+        * Get the operator's strategy number and declared input data types within
+        * the index opfamily.  (We don't need the latter, but we use
+        * get_op_opfamily_properties because it will throw error if it fails to
+        * find a matching pg_amop entry.)
+        */
+       get_op_opfamily_properties(clause_op, index->opfamily[indexcol], false,
+                                                          &strategy_op, &lefttype, &righttype);
+
+       /*
+        * GIN always uses the "default" support functions, which are those with
+        * lefttype == righttype == the opclass' opcintype (see
+        * IndexSupportInitialize in relcache.c).
+        */
+       extractProcOid = get_opfamily_proc(index->opfamily[indexcol],
+                                                                          index->opcintype[indexcol],
+                                                                          index->opcintype[indexcol],
+                                                                          GIN_EXTRACTQUERY_PROC);
+
+       if (!OidIsValid(extractProcOid))
+       {
+               /* should not happen; throw same error as index_getprocinfo */
+               elog(ERROR, "missing support function %d for attribute %d of index \"%s\"",
+                        GIN_EXTRACTQUERY_PROC, indexcol + 1,
+                        get_rel_name(index->indexoid));
+       }
+
+       /*
+        * Choose collation to pass to extractProc (should match initGinState).
+        */
+       if (OidIsValid(index->indexcollations[indexcol]))
+               collation = index->indexcollations[indexcol];
+       else
+               collation = DEFAULT_COLLATION_OID;
+
+       OidFunctionCall7Coll(extractProcOid,
+                                                collation,
+                                                query,
+                                                PointerGetDatum(&nentries),
+                                                UInt16GetDatum(strategy_op),
+                                                PointerGetDatum(&partial_matches),
+                                                PointerGetDatum(&extra_data),
+                                                PointerGetDatum(&nullFlags),
+                                                PointerGetDatum(&searchMode));
+
+       if (nentries <= 0 && searchMode == GIN_SEARCH_MODE_DEFAULT)
+       {
+               /* No match is possible */
+               return false;
+       }
+
+       for (i = 0; i < nentries; i++)
+       {
+               /*
+                * For partial match we haven't any information to estimate number of
+                * matched entries in index, so, we just estimate it as 100
+                */
+               if (partial_matches && partial_matches[i])
+                       counts->partialEntries += 100;
+               else
+                       counts->exactEntries++;
+
+               counts->searchEntries++;
+       }
+
+       if (searchMode == GIN_SEARCH_MODE_INCLUDE_EMPTY)
+       {
+               /* Treat "include empty" like an exact-match item */
+               counts->exactEntries++;
+               counts->searchEntries++;
+       }
+       else if (searchMode != GIN_SEARCH_MODE_DEFAULT)
+       {
+               /* It's GIN_SEARCH_MODE_ALL */
+               counts->haveFullScan = true;
+       }
+
+       return true;
+}
+
+/*
+ * Estimate the number of index terms that need to be searched for while
+ * testing the given GIN index clause, and increment the counts in *counts
+ * appropriately.  If the query is unsatisfiable, return false.
+ */
+static bool
+gincost_opexpr(PlannerInfo *root,
+                          IndexOptInfo *index,
+                          IndexQualInfo *qinfo,
+                          GinQualCounts *counts)
+{
+       int                     indexcol = qinfo->indexcol;
+       Oid                     clause_op = qinfo->clause_op;
+       Node       *operand = qinfo->other_operand;
+
+       if (!qinfo->varonleft)
+       {
+               /* must commute the operator */
+               clause_op = get_commutator(clause_op);
+       }
+
+       /* aggressively reduce to a constant, and look through relabeling */
+       operand = estimate_expression_value(root, operand);
+
+       if (IsA(operand, RelabelType))
+               operand = (Node *) ((RelabelType *) operand)->arg;
+
+       /*
+        * It's impossible to call extractQuery method for unknown operand. So
+        * unless operand is a Const we can't do much; just assume there will be
+        * one ordinary search entry from the operand at runtime.
+        */
+       if (!IsA(operand, Const))
+       {
+               counts->exactEntries++;
+               counts->searchEntries++;
+               return true;
+       }
+
+       /* If Const is null, there can be no matches */
+       if (((Const *) operand)->constisnull)
+               return false;
+
+       /* Otherwise, apply extractQuery and get the actual term counts */
+       return gincost_pattern(index, indexcol, clause_op,
+                                                  ((Const *) operand)->constvalue,
+                                                  counts);
+}
+
+/*
+ * Estimate the number of index terms that need to be searched for while
+ * testing the given GIN index clause, and increment the counts in *counts
+ * appropriately.  If the query is unsatisfiable, return false.
+ *
+ * A ScalarArrayOpExpr will give rise to N separate indexscans at runtime,
+ * each of which involves one value from the RHS array, plus all the
+ * non-array quals (if any).  To model this, we average the counts across
+ * the RHS elements, and add the averages to the counts in *counts (which
+ * correspond to per-indexscan costs).  We also multiply counts->arrayScans
+ * by N, causing gincostestimate to scale up its estimates accordingly.
+ */
+static bool
+gincost_scalararrayopexpr(PlannerInfo *root,
+                                                 IndexOptInfo *index,
+                                                 IndexQualInfo *qinfo,
+                                                 double numIndexEntries,
+                                                 GinQualCounts *counts)
+{
+       int                     indexcol = qinfo->indexcol;
+       Oid                     clause_op = qinfo->clause_op;
+       Node       *rightop = qinfo->other_operand;
+       ArrayType  *arrayval;
+       int16           elmlen;
+       bool            elmbyval;
+       char            elmalign;
+       int                     numElems;
+       Datum      *elemValues;
+       bool       *elemNulls;
+       GinQualCounts arraycounts;
+       int                     numPossible = 0;
        int                     i;
 
-       for (i = 0; i < index->ncolumns; i++)
+       Assert(((ScalarArrayOpExpr *) qinfo->rinfo->clause)->useOr);
+
+       /* aggressively reduce to a constant, and look through relabeling */
+       rightop = estimate_expression_value(root, rightop);
+
+       if (IsA(rightop, RelabelType))
+               rightop = (Node *) ((RelabelType *) rightop)->arg;
+
+       /*
+        * It's impossible to call extractQuery method for unknown operand. So
+        * unless operand is a Const we can't do much; just assume there will be
+        * one ordinary search entry from each array entry at runtime, and fall
+        * back on a probably-bad estimate of the number of array entries.
+        */
+       if (!IsA(rightop, Const))
        {
-               if (match_index_to_operand(op, i, index))
-                       return i;
+               counts->exactEntries++;
+               counts->searchEntries++;
+               counts->arrayScans *= estimate_array_length(rightop);
+               return true;
        }
 
-       return -1;
+       /* If Const is null, there can be no matches */
+       if (((Const *) rightop)->constisnull)
+               return false;
+
+       /* Otherwise, extract the array elements and iterate over them */
+       arrayval = DatumGetArrayTypeP(((Const *) rightop)->constvalue);
+       get_typlenbyvalalign(ARR_ELEMTYPE(arrayval),
+                                                &elmlen, &elmbyval, &elmalign);
+       deconstruct_array(arrayval,
+                                         ARR_ELEMTYPE(arrayval),
+                                         elmlen, elmbyval, elmalign,
+                                         &elemValues, &elemNulls, &numElems);
+
+       memset(&arraycounts, 0, sizeof(arraycounts));
+
+       for (i = 0; i < numElems; i++)
+       {
+               GinQualCounts elemcounts;
+
+               /* NULL can't match anything, so ignore, as the executor will */
+               if (elemNulls[i])
+                       continue;
+
+               /* Otherwise, apply extractQuery and get the actual term counts */
+               memset(&elemcounts, 0, sizeof(elemcounts));
+
+               if (gincost_pattern(index, indexcol, clause_op, elemValues[i],
+                                                       &elemcounts))
+               {
+                       /* We ignore array elements that are unsatisfiable patterns */
+                       numPossible++;
+
+                       if (elemcounts.haveFullScan)
+                       {
+                               /*
+                                * Full index scan will be required.  We treat this as if
+                                * every key in the index had been listed in the query; is
+                                * that reasonable?
+                                */
+                               elemcounts.partialEntries = 0;
+                               elemcounts.exactEntries = numIndexEntries;
+                               elemcounts.searchEntries = numIndexEntries;
+                       }
+                       arraycounts.partialEntries += elemcounts.partialEntries;
+                       arraycounts.exactEntries += elemcounts.exactEntries;
+                       arraycounts.searchEntries += elemcounts.searchEntries;
+               }
+       }
+
+       if (numPossible == 0)
+       {
+               /* No satisfiable patterns in the array */
+               return false;
+       }
+
+       /*
+        * Now add the averages to the global counts.  This will give us an
+        * estimate of the average number of terms searched for in each indexscan,
+        * including contributions from both array and non-array quals.
+        */
+       counts->partialEntries += arraycounts.partialEntries / numPossible;
+       counts->exactEntries += arraycounts.exactEntries / numPossible;
+       counts->searchEntries += arraycounts.searchEntries / numPossible;
+
+       counts->arrayScans *= numPossible;
+
+       return true;
 }
 
 /*
  * GIN has search behavior completely different from other index types
  */
-Datum
-gincostestimate(PG_FUNCTION_ARGS)
+void
+gincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
+                               Cost *indexStartupCost, Cost *indexTotalCost,
+                               Selectivity *indexSelectivity, double *indexCorrelation,
+                               double *indexPages)
 {
-       PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
-       IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(1);
-       List       *indexQuals = (List *) PG_GETARG_POINTER(2);
-       List       *indexOrderBys = (List *) PG_GETARG_POINTER(3);
-       RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(4);
-       Cost       *indexStartupCost = (Cost *) PG_GETARG_POINTER(5);
-       Cost       *indexTotalCost = (Cost *) PG_GETARG_POINTER(6);
-       Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(7);
-       double     *indexCorrelation = (double *) PG_GETARG_POINTER(8);
+       IndexOptInfo *index = path->indexinfo;
+       List       *indexQuals = path->indexquals;
+       List       *indexOrderBys = path->indexorderbys;
+       List       *qinfos;
        ListCell   *l;
        List       *selectivityQuals;
        double          numPages = index->pages,
@@ -6407,55 +7415,91 @@ gincostestimate(PG_FUNCTION_ARGS)
                                numDataPages,
                                numPendingPages,
                                numEntries;
-       bool            haveFullScan = false;
-       double          partialEntriesInQuals = 0.0;
-       double          searchEntriesInQuals = 0.0;
-       double          exactEntriesInQuals = 0.0;
+       GinQualCounts counts;
+       bool            matchPossible;
+       double          partialScale;
        double          entryPagesFetched,
                                dataPagesFetched,
                                dataPagesFetchedBySel;
        double          qual_op_cost,
                                qual_arg_cost,
                                spc_random_page_cost,
-                               num_scans;
-       QualCost        index_qual_cost;
+                               outer_scans;
        Relation        indexRel;
        GinStatsData ginStats;
 
-       /*
-        * Obtain statistic information from the meta page
-        */
-       indexRel = index_open(index->indexoid, AccessShareLock);
-       ginGetStats(indexRel, &ginStats);
-       index_close(indexRel, AccessShareLock);
-
-       numEntryPages = ginStats.nEntryPages;
-       numDataPages = ginStats.nDataPages;
-       numPendingPages = ginStats.nPendingPages;
-       numEntries = ginStats.nEntries;
+       /* Do preliminary analysis of indexquals */
+       qinfos = deconstruct_indexquals(path);
 
        /*
-        * nPendingPages can be trusted, but the other fields are as of the last
-        * VACUUM.      Scale them by the ratio numPages / nTotalPages to account for
-        * growth since then.  If the fields are zero (implying no VACUUM at all,
-        * and an index created pre-9.1), assume all pages are entry pages.
+        * Obtain statistical information from the meta page, if possible.  Else
+        * set ginStats to zeroes, and we'll cope below.
         */
-       if (ginStats.nTotalPages == 0 || ginStats.nEntryPages == 0)
+       if (!index->hypothetical)
+       {
+               indexRel = index_open(index->indexoid, AccessShareLock);
+               ginGetStats(indexRel, &ginStats);
+               index_close(indexRel, AccessShareLock);
+       }
+       else
        {
-               numEntryPages = numPages;
-               numDataPages = 0;
-               numEntries = numTuples; /* bogus, but no other info available */
+               memset(&ginStats, 0, sizeof(ginStats));
        }
+
+       /*
+        * Assuming we got valid (nonzero) stats at all, nPendingPages can be
+        * trusted, but the other fields are data as of the last VACUUM.  We can
+        * scale them up to account for growth since then, but that method only
+        * goes so far; in the worst case, the stats might be for a completely
+        * empty index, and scaling them will produce pretty bogus numbers.
+        * Somewhat arbitrarily, set the cutoff for doing scaling at 4X growth; if
+        * it's grown more than that, fall back to estimating things only from the
+        * assumed-accurate index size.  But we'll trust nPendingPages in any case
+        * so long as it's not clearly insane, ie, more than the index size.
+        */
+       if (ginStats.nPendingPages < numPages)
+               numPendingPages = ginStats.nPendingPages;
        else
+               numPendingPages = 0;
+
+       if (numPages > 0 && ginStats.nTotalPages <= numPages &&
+               ginStats.nTotalPages > numPages / 4 &&
+               ginStats.nEntryPages > 0 && ginStats.nEntries > 0)
        {
+               /*
+                * OK, the stats seem close enough to sane to be trusted.  But we
+                * still need to scale them by the ratio numPages / nTotalPages to
+                * account for growth since the last VACUUM.
+                */
                double          scale = numPages / ginStats.nTotalPages;
 
-               numEntryPages = ceil(numEntryPages * scale);
-               numDataPages = ceil(numDataPages * scale);
-               numEntries = ceil(numEntries * scale);
+               numEntryPages = ceil(ginStats.nEntryPages * scale);
+               numDataPages = ceil(ginStats.nDataPages * scale);
+               numEntries = ceil(ginStats.nEntries * scale);
                /* ensure we didn't round up too much */
-               numEntryPages = Min(numEntryPages, numPages);
-               numDataPages = Min(numDataPages, numPages - numEntryPages);
+               numEntryPages = Min(numEntryPages, numPages - numPendingPages);
+               numDataPages = Min(numDataPages,
+                                                  numPages - numPendingPages - numEntryPages);
+       }
+       else
+       {
+               /*
+                * We might get here because it's a hypothetical index, or an index
+                * created pre-9.1 and never vacuumed since upgrading (in which case
+                * its stats would read as zeroes), or just because it's grown too
+                * much since the last VACUUM for us to put our faith in scaling.
+                *
+                * Invent some plausible internal statistics based on the index page
+                * count (and clamp that to at least 10 pages, just in case).  We
+                * estimate that 90% of the index is entry pages, and the rest is data
+                * pages.  Estimate 100 entries per entry page; this is rather bogus
+                * since it'll depend on the size of the keys, but it's more robust
+                * than trying to predict the number of entries per heap tuple.
+                */
+               numPages = Max(numPages, 10);
+               numEntryPages = floor((numPages - numPendingPages) * 0.90);
+               numDataPages = numPages - numPendingPages - numEntryPages;
+               numEntries = floor(numEntryPages * 100);
        }
 
        /* In an empty index, numEntries could be zero.  Avoid divide-by-zero */
@@ -6490,7 +7534,7 @@ gincostestimate(PG_FUNCTION_ARGS)
                                                                                           JOIN_INNER,
                                                                                           NULL);
 
-       /* fetch estimated page cost for schema containing index */
+       /* fetch estimated page cost for tablespace containing index */
        get_tablespace_page_costs(index->reltablespace,
                                                          &spc_random_page_cost,
                                                          NULL);
@@ -6503,199 +7547,116 @@ gincostestimate(PG_FUNCTION_ARGS)
        /*
         * Examine quals to estimate number of search entries & partial matches
         */
-       foreach(l, indexQuals)
-       {
-               RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
-               Expr       *clause;
-               Node       *leftop,
-                                  *rightop,
-                                  *operand;
-               Oid                     extractProcOid;
-               Oid                     clause_op;
-               int                     strategy_op;
-               Oid                     lefttype,
-                                       righttype;
-               int32           nentries = 0;
-               bool       *partial_matches = NULL;
-               Pointer    *extra_data = NULL;
-               bool       *nullFlags = NULL;
-               int32           searchMode = GIN_SEARCH_MODE_DEFAULT;
-               int                     indexcol;
-
-               Assert(IsA(rinfo, RestrictInfo));
-               clause = rinfo->clause;
-               Assert(IsA(clause, OpExpr));
-               leftop = get_leftop(clause);
-               rightop = get_rightop(clause);
-               clause_op = ((OpExpr *) clause)->opno;
+       memset(&counts, 0, sizeof(counts));
+       counts.arrayScans = 1;
+       matchPossible = true;
 
-               if ((indexcol = find_index_column(leftop, index)) >= 0)
-               {
-                       operand = rightop;
-               }
-               else if ((indexcol = find_index_column(rightop, index)) >= 0)
-               {
-                       operand = leftop;
-                       clause_op = get_commutator(clause_op);
-               }
-               else
-               {
-                       elog(ERROR, "could not match index to operand");
-                       operand = NULL;         /* keep compiler quiet */
-               }
-
-               if (IsA(operand, RelabelType))
-                       operand = (Node *) ((RelabelType *) operand)->arg;
-
-               /*
-                * It's impossible to call extractQuery method for unknown operand. So
-                * unless operand is a Const we can't do much; just assume there will
-                * be one ordinary search entry from the operand at runtime.
-                */
-               if (!IsA(operand, Const))
-               {
-                       searchEntriesInQuals++;
-                       continue;
-               }
-
-               /* If Const is null, there can be no matches */
-               if (((Const *) operand)->constisnull)
-               {
-                       *indexStartupCost = 0;
-                       *indexTotalCost = 0;
-                       *indexSelectivity = 0;
-                       PG_RETURN_VOID();
-               }
-
-               /*
-                * Get the operator's strategy number and declared input data types
-                * within the index opfamily.  (We don't need the latter, but we use
-                * get_op_opfamily_properties because it will throw error if it fails
-                * to find a matching pg_amop entry.)
-                */
-               get_op_opfamily_properties(clause_op, index->opfamily[indexcol], false,
-                                                                  &strategy_op, &lefttype, &righttype);
-
-               /*
-                * GIN always uses the "default" support functions, which are those
-                * with lefttype == righttype == the opclass' opcintype (see
-                * IndexSupportInitialize in relcache.c).
-                */
-               extractProcOid = get_opfamily_proc(index->opfamily[indexcol],
-                                                                                  index->opcintype[indexcol],
-                                                                                  index->opcintype[indexcol],
-                                                                                  GIN_EXTRACTQUERY_PROC);
+       foreach(l, qinfos)
+       {
+               IndexQualInfo *qinfo = (IndexQualInfo *) lfirst(l);
+               Expr       *clause = qinfo->rinfo->clause;
 
-               if (!OidIsValid(extractProcOid))
+               if (IsA(clause, OpExpr))
                {
-                       /* should not happen; throw same error as index_getprocinfo */
-                       elog(ERROR, "missing support function %d for attribute %d of index \"%s\"",
-                                GIN_EXTRACTQUERY_PROC, indexcol + 1,
-                                get_rel_name(index->indexoid));
+                       matchPossible = gincost_opexpr(root,
+                                                                                  index,
+                                                                                  qinfo,
+                                                                                  &counts);
+                       if (!matchPossible)
+                               break;
                }
-
-               OidFunctionCall7(extractProcOid,
-                                                ((Const *) operand)->constvalue,
-                                                PointerGetDatum(&nentries),
-                                                UInt16GetDatum(strategy_op),
-                                                PointerGetDatum(&partial_matches),
-                                                PointerGetDatum(&extra_data),
-                                                PointerGetDatum(&nullFlags),
-                                                PointerGetDatum(&searchMode));
-
-               if (nentries <= 0 && searchMode == GIN_SEARCH_MODE_DEFAULT)
+               else if (IsA(clause, ScalarArrayOpExpr))
                {
-                       /* No match is possible */
-                       *indexStartupCost = 0;
-                       *indexTotalCost = 0;
-                       *indexSelectivity = 0;
-                       PG_RETURN_VOID();
+                       matchPossible = gincost_scalararrayopexpr(root,
+                                                                                                         index,
+                                                                                                         qinfo,
+                                                                                                         numEntries,
+                                                                                                         &counts);
+                       if (!matchPossible)
+                               break;
                }
                else
                {
-                       int32           i;
-
-                       for (i = 0; i < nentries; i++)
-                       {
-                               /*
-                                * For partial match we haven't any information to estimate
-                                * number of matched entries in index, so, we just estimate it
-                                * as 100
-                                */
-                               if (partial_matches && partial_matches[i])
-                                       partialEntriesInQuals += 100;
-                               else
-                                       exactEntriesInQuals++;
-
-                               searchEntriesInQuals++;
-                       }
+                       /* shouldn't be anything else for a GIN index */
+                       elog(ERROR, "unsupported GIN indexqual type: %d",
+                                (int) nodeTag(clause));
                }
+       }
 
-               if (searchMode == GIN_SEARCH_MODE_INCLUDE_EMPTY)
-               {
-                       /* Treat "include empty" like an exact-match item */
-                       exactEntriesInQuals++;
-                       searchEntriesInQuals++;
-               }
-               else if (searchMode != GIN_SEARCH_MODE_DEFAULT)
-               {
-                       /* It's GIN_SEARCH_MODE_ALL */
-                       haveFullScan = true;
-               }
+       /* Fall out if there were any provably-unsatisfiable quals */
+       if (!matchPossible)
+       {
+               *indexStartupCost = 0;
+               *indexTotalCost = 0;
+               *indexSelectivity = 0;
+               return;
        }
 
-       if (haveFullScan || indexQuals == NIL)
+       if (counts.haveFullScan || indexQuals == NIL)
        {
                /*
                 * Full index scan will be required.  We treat this as if every key in
                 * the index had been listed in the query; is that reasonable?
                 */
-               searchEntriesInQuals = numEntries;
+               counts.partialEntries = 0;
+               counts.exactEntries = numEntries;
+               counts.searchEntries = numEntries;
        }
 
        /* Will we have more than one iteration of a nestloop scan? */
-       if (outer_rel != NULL && outer_rel->rows > 1)
-               num_scans = outer_rel->rows;
-       else
-               num_scans = 1;
+       outer_scans = loop_count;
 
        /*
-        * cost to begin scan, first of all, pay attention to pending list.
+        * Compute cost to begin scan, first of all, pay attention to pending
+        * list.
         */
        entryPagesFetched = numPendingPages;
 
        /*
         * Estimate number of entry pages read.  We need to do
-        * searchEntriesInQuals searches.  Use a power function as it should be,
+        * counts.searchEntries searches.  Use a power function as it should be,
         * but tuples on leaf pages usually is much greater. Here we include all
         * searches in entry tree, including search of first entry in partial
         * match algorithm
         */
-       entryPagesFetched += ceil(searchEntriesInQuals * rint(pow(numEntryPages, 0.15)));
+       entryPagesFetched += ceil(counts.searchEntries * rint(pow(numEntryPages, 0.15)));
 
        /*
         * Add an estimate of entry pages read by partial match algorithm. It's a
-        * scan over leaf pages in entry tree.  We haven't any useful stats here,
-        * so estimate it as proportion.
+        * scan over leaf pages in entry tree.  We haven't any useful stats here,
+        * so estimate it as proportion.  Because counts.partialEntries is really
+        * pretty bogus (see code above), it's possible that it is more than
+        * numEntries; clamp the proportion to ensure sanity.
         */
-       entryPagesFetched += ceil(numEntryPages * partialEntriesInQuals / numEntries);
+       partialScale = counts.partialEntries / numEntries;
+       partialScale = Min(partialScale, 1.0);
+
+       entryPagesFetched += ceil(numEntryPages * partialScale);
 
        /*
         * Partial match algorithm reads all data pages before doing actual scan,
-        * so it's a startup cost. Again, we havn't any useful stats here, so,
-        * estimate it as proportion
+        * so it's a startup cost.  Again, we haven't any useful stats here, so
+        * estimate it as proportion.
         */
-       dataPagesFetched = ceil(numDataPages * partialEntriesInQuals / numEntries);
+       dataPagesFetched = ceil(numDataPages * partialScale);
 
-       /* calculate cache effects */
-       if (num_scans > 1 || searchEntriesInQuals > 1)
+       /*
+        * Calculate cache effects if more than one scan due to nestloops or array
+        * quals.  The result is pro-rated per nestloop scan, but the array qual
+        * factor shouldn't be pro-rated (compare genericcostestimate).
+        */
+       if (outer_scans > 1 || counts.arrayScans > 1)
        {
+               entryPagesFetched *= outer_scans * counts.arrayScans;
                entryPagesFetched = index_pages_fetched(entryPagesFetched,
                                                                                                (BlockNumber) numEntryPages,
                                                                                                numEntryPages, root);
+               entryPagesFetched /= outer_scans;
+               dataPagesFetched *= outer_scans * counts.arrayScans;
                dataPagesFetched = index_pages_fetched(dataPagesFetched,
                                                                                           (BlockNumber) numDataPages,
                                                                                           numDataPages, root);
+               dataPagesFetched /= outer_scans;
        }
 
        /*
@@ -6704,51 +7665,260 @@ gincostestimate(PG_FUNCTION_ARGS)
         */
        *indexStartupCost = (entryPagesFetched + dataPagesFetched) * spc_random_page_cost;
 
-       /* cost to scan data pages for each exact (non-partial) matched entry */
-       dataPagesFetched = ceil(numDataPages * exactEntriesInQuals / numEntries);
+       /*
+        * Now compute the number of data pages fetched during the scan.
+        *
+        * We assume every entry to have the same number of items, and that there
+        * is no overlap between them. (XXX: tsvector and array opclasses collect
+        * statistics on the frequency of individual keys; it would be nice to use
+        * those here.)
+        */
+       dataPagesFetched = ceil(numDataPages * counts.exactEntries / numEntries);
 
        /*
-        * Estimate number of data pages read, using selectivity estimation and
-        * capacity of data page.
+        * If there is a lot of overlap among the entries, in particular if one of
+        * the entries is very frequent, the above calculation can grossly
+        * under-estimate.  As a simple cross-check, calculate a lower bound based
+        * on the overall selectivity of the quals.  At a minimum, we must read
+        * one item pointer for each matching entry.
+        *
+        * The width of each item pointer varies, based on the level of
+        * compression.  We don't have statistics on that, but an average of
+        * around 3 bytes per item is fairly typical.
         */
        dataPagesFetchedBySel = ceil(*indexSelectivity *
-                                                                (numTuples / (BLCKSZ / SizeOfIptrData)));
-
+                                                                (numTuples / (BLCKSZ / 3)));
        if (dataPagesFetchedBySel > dataPagesFetched)
-       {
-               /*
-                * At least one of entries is very frequent and, unfortunately, we
-                * couldn't get statistic about entries (only tsvector has such
-                * statistics). So, we obviously have too small estimation of pages
-                * fetched from data tree. Re-estimate it from known capacity of data
-                * pages
-                */
                dataPagesFetched = dataPagesFetchedBySel;
-       }
 
-       if (num_scans > 1)
+       /* Account for cache effects, the same as above */
+       if (outer_scans > 1 || counts.arrayScans > 1)
+       {
+               dataPagesFetched *= outer_scans * counts.arrayScans;
                dataPagesFetched = index_pages_fetched(dataPagesFetched,
                                                                                           (BlockNumber) numDataPages,
                                                                                           numDataPages, root);
+               dataPagesFetched /= outer_scans;
+       }
+
+       /* And apply random_page_cost as the cost per page */
        *indexTotalCost = *indexStartupCost +
                dataPagesFetched * spc_random_page_cost;
 
        /*
         * Add on index qual eval costs, much as in genericcostestimate
         */
-       cost_qual_eval(&index_qual_cost, indexQuals, root);
-       qual_arg_cost = index_qual_cost.startup + index_qual_cost.per_tuple;
-       cost_qual_eval(&index_qual_cost, indexOrderBys, root);
-       qual_arg_cost += index_qual_cost.startup + index_qual_cost.per_tuple;
+       qual_arg_cost = other_operands_eval_cost(root, qinfos) +
+               orderby_operands_eval_cost(root, path);
        qual_op_cost = cpu_operator_cost *
                (list_length(indexQuals) + list_length(indexOrderBys));
-       qual_arg_cost -= qual_op_cost;
-       if (qual_arg_cost < 0)          /* just in case... */
-               qual_arg_cost = 0;
 
        *indexStartupCost += qual_arg_cost;
        *indexTotalCost += qual_arg_cost;
        *indexTotalCost += (numTuples * *indexSelectivity) * (cpu_index_tuple_cost + qual_op_cost);
+       *indexPages = dataPagesFetched;
+}
+
+/*
+ * BRIN has search behavior completely different from other index types
+ */
+void
+brincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
+                                Cost *indexStartupCost, Cost *indexTotalCost,
+                                Selectivity *indexSelectivity, double *indexCorrelation,
+                                double *indexPages)
+{
+       IndexOptInfo *index = path->indexinfo;
+       List       *indexQuals = path->indexquals;
+       double          numPages = index->pages;
+       RelOptInfo *baserel = index->rel;
+       RangeTblEntry *rte = planner_rt_fetch(baserel->relid, root);
+       List       *qinfos;
+       Cost            spc_seq_page_cost;
+       Cost            spc_random_page_cost;
+       double          qual_arg_cost;
+       double          qualSelectivity;
+       BrinStatsData statsData;
+       double          indexRanges;
+       double          minimalRanges;
+       double          estimatedRanges;
+       double          selec;
+       Relation        indexRel;
+       ListCell   *l;
+       VariableStatData vardata;
+
+       Assert(rte->rtekind == RTE_RELATION);
+
+       /* fetch estimated page cost for the tablespace containing the index */
+       get_tablespace_page_costs(index->reltablespace,
+                                                         &spc_random_page_cost,
+                                                         &spc_seq_page_cost);
+
+       /*
+        * Obtain some data from the index itself.
+        */
+       indexRel = index_open(index->indexoid, AccessShareLock);
+       brinGetStats(indexRel, &statsData);
+       index_close(indexRel, AccessShareLock);
+
+       /*
+        * Compute index correlation
+        *
+        * Because we can use all index quals equally when scanning, we can use
+        * the largest correlation (in absolute value) among columns used by the
+        * query.  Start at zero, the worst possible case.  If we cannot find any
+        * correlation statistics, we will keep it as 0.
+        */
+       *indexCorrelation = 0;
+
+       qinfos = deconstruct_indexquals(path);
+       foreach(l, qinfos)
+       {
+               IndexQualInfo *qinfo = (IndexQualInfo *) lfirst(l);
+               AttrNumber      attnum = index->indexkeys[qinfo->indexcol];
+
+               /* attempt to lookup stats in relation for this index column */
+               if (attnum != 0)
+               {
+                       /* Simple variable -- look to stats for the underlying table */
+                       if (get_relation_stats_hook &&
+                               (*get_relation_stats_hook) (root, rte, attnum, &vardata))
+                       {
+                               /*
+                                * The hook took control of acquiring a stats tuple.  If it
+                                * did supply a tuple, it'd better have supplied a freefunc.
+                                */
+                               if (HeapTupleIsValid(vardata.statsTuple) && !vardata.freefunc)
+                                       elog(ERROR,
+                                         "no function provided to release variable stats with");
+                       }
+                       else
+                       {
+                               vardata.statsTuple =
+                                       SearchSysCache3(STATRELATTINH,
+                                                                       ObjectIdGetDatum(rte->relid),
+                                                                       Int16GetDatum(attnum),
+                                                                       BoolGetDatum(false));
+                               vardata.freefunc = ReleaseSysCache;
+                       }
+               }
+               else
+               {
+                       /*
+                        * Looks like we've found an expression column in the index. Let's
+                        * see if there's any stats for it.
+                        */
+
+                       /* get the attnum from the 0-based index. */
+                       attnum = qinfo->indexcol + 1;
+
+                       if (get_index_stats_hook &&
+                       (*get_index_stats_hook) (root, index->indexoid, attnum, &vardata))
+                       {
+                               /*
+                                * The hook took control of acquiring a stats tuple.  If it
+                                * did supply a tuple, it'd better have supplied a freefunc.
+                                */
+                               if (HeapTupleIsValid(vardata.statsTuple) &&
+                                       !vardata.freefunc)
+                                       elog(ERROR, "no function provided to release variable stats with");
+                       }
+                       else
+                       {
+                               vardata.statsTuple = SearchSysCache3(STATRELATTINH,
+                                                                                  ObjectIdGetDatum(index->indexoid),
+                                                                                                        Int16GetDatum(attnum),
+                                                                                                        BoolGetDatum(false));
+                               vardata.freefunc = ReleaseSysCache;
+                       }
+               }
+
+               if (HeapTupleIsValid(vardata.statsTuple))
+               {
+                       AttStatsSlot sslot;
+
+                       if (get_attstatsslot(&sslot, vardata.statsTuple,
+                                                                STATISTIC_KIND_CORRELATION, InvalidOid,
+                                                                ATTSTATSSLOT_NUMBERS))
+                       {
+                               double          varCorrelation = 0.0;
+
+                               if (sslot.nnumbers > 0)
+                                       varCorrelation = Abs(sslot.numbers[0]);
+
+                               if (varCorrelation > *indexCorrelation)
+                                       *indexCorrelation = varCorrelation;
+
+                               free_attstatsslot(&sslot);
+                       }
+               }
+
+               ReleaseVariableStats(vardata);
+       }
+
+       qualSelectivity = clauselist_selectivity(root, indexQuals,
+                                                                                        baserel->relid,
+                                                                                        JOIN_INNER, NULL);
+
+       /* work out the actual number of ranges in the index */
+       indexRanges = Max(ceil((double) baserel->pages / statsData.pagesPerRange),
+                                         1.0);
+
+       /*
+        * Now calculate the minimum possible ranges we could match with if all of
+        * the rows were in the perfect order in the table's heap.
+        */
+       minimalRanges = ceil(indexRanges * qualSelectivity);
+
+       /*
+        * Now estimate the number of ranges that we'll touch by using the
+        * indexCorrelation from the stats. Careful not to divide by zero (note
+        * we're using the absolute value of the correlation).
+        */
+       if (*indexCorrelation < 1.0e-10)
+               estimatedRanges = indexRanges;
+       else
+               estimatedRanges = Min(minimalRanges / *indexCorrelation, indexRanges);
+
+       /* we expect to visit this portion of the table */
+       selec = estimatedRanges / indexRanges;
+
+       CLAMP_PROBABILITY(selec);
+
+       *indexSelectivity = selec;
+
+       /*
+        * Compute the index qual costs, much as in genericcostestimate, to add to
+        * the index costs.
+        */
+       qual_arg_cost = other_operands_eval_cost(root, qinfos) +
+               orderby_operands_eval_cost(root, path);
+
+       /*
+        * Compute the startup cost as the cost to read the whole revmap
+        * sequentially, including the cost to execute the index quals.
+        */
+       *indexStartupCost =
+               spc_seq_page_cost * statsData.revmapNumPages * loop_count;
+       *indexStartupCost += qual_arg_cost;
+
+       /*
+        * To read a BRIN index there might be a bit of back and forth over
+        * regular pages, as revmap might point to them out of sequential order;
+        * calculate the total cost as reading the whole index in random order.
+        */
+       *indexTotalCost = *indexStartupCost +
+               spc_random_page_cost * (numPages - statsData.revmapNumPages) * loop_count;
+
+       /*
+        * Charge a small amount per range tuple which we expect to match to. This
+        * is meant to reflect the costs of manipulating the bitmap. The BRIN scan
+        * will set a bit for each page in the range when we find a matching
+        * range, so we must multiply the charge by the number of pages in the
+        * range.
+        */
+       *indexTotalCost += 0.1 * cpu_operator_cost * estimatedRanges *
+               statsData.pagesPerRange;
 
-       PG_RETURN_VOID();
+       *indexPages = index->pages;
 }