]> granicus.if.org Git - postgresql/blobdiff - src/backend/utils/adt/selfuncs.c
Clean up possibly-uninitialized-variable warnings reported by gcc 4.x.
[postgresql] / src / backend / utils / adt / selfuncs.c
index cd9a6444d41d90272e82f545e8b3091c8f6e7065..e987a66a1cf09e2f68aa5a4da5657357e58885d9 100644 (file)
  *       Index cost functions are registered in the pg_am catalog
  *       in the "amcostestimate" attribute.
  *
- * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.159 2004/05/26 04:41:39 neilc Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.189 2005/09/24 22:54:38 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -38,7 +38,7 @@
  *
  * The call convention for a restriction estimator (oprrest function) is
  *
- *             Selectivity oprrest (Query *root,
+ *             Selectivity oprrest (PlannerInfo *root,
  *                                                      Oid operator,
  *                                                      List *args,
  *                                                      int varRelid);
@@ -59,7 +59,7 @@
  * except that varRelid is not needed, and instead the join type is
  * supplied:
  *
- *             Selectivity oprjoin (Query *root,
+ *             Selectivity oprjoin (PlannerInfo *root,
  *                                                      Oid operator,
  *                                                      List *args,
  *                                                      JoinType jointype);
@@ -79,7 +79,6 @@
 #include "access/heapam.h"
 #include "access/nbtree.h"
 #include "access/tuptoaster.h"
-#include "catalog/catname.h"
 #include "catalog/pg_namespace.h"
 #include "catalog/pg_opclass.h"
 #include "catalog/pg_operator.h"
 #include "utils/datum.h"
 #include "utils/int8.h"
 #include "utils/lsyscache.h"
+#include "utils/nabstime.h"
 #include "utils/pg_locale.h"
 #include "utils/selfuncs.h"
 #include "utils/syscache.h"
 
 
-/*
- * Note: the default selectivity estimates are not chosen entirely at random.
- * We want them to be small enough to ensure that indexscans will be used if
- * available, for typical table densities of ~100 tuples/page. Thus, for
- * example, 0.01 is not quite small enough, since that makes it appear that
- * nearly all pages will be hit anyway.  Also, since we sometimes estimate
- * eqsel as 1/num_distinct, we probably want DEFAULT_NUM_DISTINCT to equal
- * 1/DEFAULT_EQ_SEL.
- */
-
-/* default selectivity estimate for equalities such as "A = b" */
-#define DEFAULT_EQ_SEL 0.005
-
-/* default selectivity estimate for inequalities such as "A < b" */
-#define DEFAULT_INEQ_SEL  (1.0 / 3.0)
-
-/* default selectivity estimate for pattern-match operators such as LIKE */
-#define DEFAULT_MATCH_SEL      0.005
-
-/* default number of distinct values in a table */
-#define DEFAULT_NUM_DISTINCT  200
-
-/* default selectivity estimate for boolean and null test nodes */
-#define DEFAULT_UNK_SEL                        0.005
-#define DEFAULT_NOT_UNK_SEL            (1.0 - DEFAULT_UNK_SEL)
-
-/*
- * Clamp a computed probability estimate (which may suffer from roundoff or
- * estimation errors) to valid range.  Argument must be a float variable.
- */
-#define CLAMP_PROBABILITY(p) \
-       do { \
-               if (p < 0.0) \
-                       p = 0.0; \
-               else if (p > 1.0) \
-                       p = 1.0; \
-       } while (0)
-
-
 /* Return data from examine_variable and friends */
 typedef struct
 {
@@ -157,6 +118,7 @@ typedef struct
        RelOptInfo *rel;                        /* Relation, or NULL if not identifiable */
        HeapTuple       statsTuple;             /* pg_statistic tuple, or NULL if none */
        /* NB: if statsTuple!=NULL, it must be freed when caller is done */
+       Oid                     vartype;                /* exposed type of expression */
        Oid                     atttype;                /* type to pass to get_attstatsslot */
        int32           atttypmod;              /* typmod to pass to get_attstatsslot */
        bool            isunique;               /* true if matched to a unique index */
@@ -173,11 +135,11 @@ static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
                                  Datum lobound, Datum hibound, Oid boundstypid,
                                  double *scaledlobound, double *scaledhibound);
 static double convert_numeric_to_scalar(Datum value, Oid typid);
-static void convert_string_to_scalar(unsigned char *value,
+static void convert_string_to_scalar(char *value,
                                                 double *scaledvalue,
-                                                unsigned char *lobound,
+                                                char *lobound,
                                                 double *scaledlobound,
-                                                unsigned char *hibound,
+                                                char *hibound,
                                                 double *scaledhibound);
 static void convert_bytea_to_scalar(Datum value,
                                                double *scaledvalue,
@@ -185,24 +147,24 @@ static void convert_bytea_to_scalar(Datum value,
                                                double *scaledlobound,
                                                Datum hibound,
                                                double *scaledhibound);
-static double convert_one_string_to_scalar(unsigned char *value,
+static double convert_one_string_to_scalar(char *value,
                                                         int rangelo, int rangehi);
 static double convert_one_bytea_to_scalar(unsigned char *value, int valuelen,
                                                        int rangelo, int rangehi);
-static unsigned char *convert_string_datum(Datum value, Oid typid);
+static char *convert_string_datum(Datum value, Oid typid);
 static double convert_timevalue_to_scalar(Datum value, Oid typid);
-static bool get_restriction_variable(Query *root, List *args, int varRelid,
-                                       VariableStatData *vardata, Node **other,
-                                       bool *varonleft);
-static void get_join_variables(Query *root, List *args,
-                                                          VariableStatData *vardata1,
-                                                          VariableStatData *vardata2);
-static void examine_variable(Query *root, Node *node, int varRelid,
-                                                        VariableStatData *vardata);
+static bool get_restriction_variable(PlannerInfo *root, List *args, int varRelid,
+                                                VariableStatData *vardata, Node **other,
+                                                bool *varonleft);
+static void get_join_variables(PlannerInfo *root, List *args,
+                                  VariableStatData *vardata1,
+                                  VariableStatData *vardata2);
+static void examine_variable(PlannerInfo *root, Node *node, int varRelid,
+                                VariableStatData *vardata);
 static double get_variable_numdistinct(VariableStatData *vardata);
-static bool get_variable_maximum(Query *root, VariableStatData *vardata,
-                                                                Oid sortop, Datum *max);
-static Selectivity prefix_selectivity(Query *root, VariableStatData *vardata,
+static bool get_variable_maximum(PlannerInfo *root, VariableStatData *vardata,
+                                        Oid sortop, Datum *max);
+static Selectivity prefix_selectivity(PlannerInfo *root, Node *variable,
                                   Oid opclass, Const *prefix);
 static Selectivity pattern_selectivity(Const *patt, Pattern_Type ptype);
 static Datum string_to_datum(const char *str, Oid datatype);
@@ -221,7 +183,7 @@ static Const *string_to_bytea_const(const char *str, size_t str_len);
 Datum
 eqsel(PG_FUNCTION_ARGS)
 {
-       Query      *root = (Query *) PG_GETARG_POINTER(0);
+       PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
        Oid                     operator = PG_GETARG_OID(1);
        List       *args = (List *) PG_GETARG_POINTER(2);
        int                     varRelid = PG_GETARG_INT32(3);
@@ -416,7 +378,7 @@ eqsel(PG_FUNCTION_ARGS)
 Datum
 neqsel(PG_FUNCTION_ARGS)
 {
-       Query      *root = (Query *) PG_GETARG_POINTER(0);
+       PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
        Oid                     operator = PG_GETARG_OID(1);
        List       *args = (List *) PG_GETARG_POINTER(2);
        int                     varRelid = PG_GETARG_INT32(3);
@@ -459,7 +421,7 @@ neqsel(PG_FUNCTION_ARGS)
  * it will return a default estimate.
  */
 static double
-scalarineqsel(Query *root, Oid operator, bool isgt,
+scalarineqsel(PlannerInfo *root, Oid operator, bool isgt,
                          VariableStatData *vardata, Datum constval, Oid consttype)
 {
        Form_pg_statistic stats;
@@ -585,7 +547,7 @@ scalarineqsel(Query *root, Oid operator, bool isgt,
                                         */
                                        if (convert_to_scalar(constval, consttype, &val,
                                                                                  values[i - 1], values[i],
-                                                                                 vardata->atttype,
+                                                                                 vardata->vartype,
                                                                                  &low, &high))
                                        {
                                                if (high <= low)
@@ -691,7 +653,7 @@ scalarineqsel(Query *root, Oid operator, bool isgt,
 Datum
 scalarltsel(PG_FUNCTION_ARGS)
 {
-       Query      *root = (Query *) PG_GETARG_POINTER(0);
+       PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
        Oid                     operator = PG_GETARG_OID(1);
        List       *args = (List *) PG_GETARG_POINTER(2);
        int                     varRelid = PG_GETARG_INT32(3);
@@ -704,8 +666,8 @@ scalarltsel(PG_FUNCTION_ARGS)
        double          selec;
 
        /*
-        * If expression is not variable op something or something op variable,
-        * then punt and return a default estimate.
+        * If expression is not variable op something or something op
+        * variable, then punt and return a default estimate.
         */
        if (!get_restriction_variable(root, args, varRelid,
                                                                  &vardata, &other, &varonleft))
@@ -767,7 +729,7 @@ scalarltsel(PG_FUNCTION_ARGS)
 Datum
 scalargtsel(PG_FUNCTION_ARGS)
 {
-       Query      *root = (Query *) PG_GETARG_POINTER(0);
+       PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
        Oid                     operator = PG_GETARG_OID(1);
        List       *args = (List *) PG_GETARG_POINTER(2);
        int                     varRelid = PG_GETARG_INT32(3);
@@ -780,8 +742,8 @@ scalargtsel(PG_FUNCTION_ARGS)
        double          selec;
 
        /*
-        * If expression is not variable op something or something op variable,
-        * then punt and return a default estimate.
+        * If expression is not variable op something or something op
+        * variable, then punt and return a default estimate.
         */
        if (!get_restriction_variable(root, args, varRelid,
                                                                  &vardata, &other, &varonleft))
@@ -843,7 +805,7 @@ scalargtsel(PG_FUNCTION_ARGS)
 static double
 patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype)
 {
-       Query      *root = (Query *) PG_GETARG_POINTER(0);
+       PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
 
 #ifdef NOT_USED
        Oid                     operator = PG_GETARG_OID(1);
@@ -851,6 +813,7 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype)
        List       *args = (List *) PG_GETARG_POINTER(2);
        int                     varRelid = PG_GETARG_INT32(3);
        VariableStatData vardata;
+       Node       *variable;
        Node       *other;
        bool            varonleft;
        Datum           constval;
@@ -875,6 +838,7 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype)
                ReleaseVariableStats(vardata);
                return DEFAULT_MATCH_SEL;
        }
+       variable = (Node *) linitial(args);
 
        /*
         * If the constant is NULL, assume operator is strict and return zero,
@@ -901,23 +865,18 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype)
        }
 
        /*
-        * The var, on the other hand, might be a binary-compatible type;
-        * particularly a domain.  Try to fold it if it's not recognized
-        * immediately.
-        */
-       vartype = vardata.atttype;
-       if (vartype != consttype)
-               vartype = getBaseType(vartype);
-
-       /*
-        * We should now be able to recognize the var's datatype.  Choose the
-        * index opclass from which we must draw the comparison operators.
+        * Similarly, the exposed type of the left-hand side should be one
+        * of those we know.  (Do not look at vardata.atttype, which might be
+        * something binary-compatible but different.)  We can use it to choose
+        * the index opclass from which we must draw the comparison operators.
         *
         * NOTE: It would be more correct to use the PATTERN opclasses than the
         * simple ones, but at the moment ANALYZE will not generate statistics
         * for the PATTERN operators.  But our results are so approximate
         * anyway that it probably hardly matters.
         */
+       vartype = vardata.vartype;
+
        switch (vartype)
        {
                case TEXTOID:
@@ -983,7 +942,7 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype)
 
                if (eqopr == InvalidOid)
                        elog(ERROR, "no = operator for opclass %u", opclass);
-               eqargs = makeList2(vardata.var, prefix);
+               eqargs = list_make2(variable, prefix);
                result = DatumGetFloat8(DirectFunctionCall4(eqsel,
                                                                                                        PointerGetDatum(root),
                                                                                                 ObjectIdGetDatum(eqopr),
@@ -1002,7 +961,7 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype)
                Selectivity selec;
 
                if (pstatus == Pattern_Prefix_Partial)
-                       prefixsel = prefix_selectivity(root, &vardata, opclass, prefix);
+                       prefixsel = prefix_selectivity(root, variable, opclass, prefix);
                else
                        prefixsel = 1.0;
                restsel = pattern_selectivity(rest, ptype);
@@ -1115,7 +1074,7 @@ icnlikesel(PG_FUNCTION_ARGS)
  *             booltestsel             - Selectivity of BooleanTest Node.
  */
 Selectivity
-booltestsel(Query *root, BoolTestType booltesttype, Node *arg,
+booltestsel(PlannerInfo *root, BoolTestType booltesttype, Node *arg,
                        int varRelid, JoinType jointype)
 {
        VariableStatData vardata;
@@ -1238,9 +1197,9 @@ booltestsel(Query *root, BoolTestType booltesttype, Node *arg,
        {
                /*
                 * If we can't get variable statistics for the argument, perhaps
-                * clause_selectivity can do something with it.  We ignore
-                * the possibility of a NULL value when using clause_selectivity,
-                * and just assume the value is either TRUE or FALSE.
+                * clause_selectivity can do something with it.  We ignore the
+                * possibility of a NULL value when using clause_selectivity, and
+                * just assume the value is either TRUE or FALSE.
                 */
                switch (booltesttype)
                {
@@ -1258,7 +1217,7 @@ booltestsel(Query *root, BoolTestType booltesttype, Node *arg,
                        case IS_FALSE:
                        case IS_NOT_TRUE:
                                selec = 1.0 - (double) clause_selectivity(root, arg,
-                                                                                                                 varRelid, jointype);
+                                                                                                        varRelid, jointype);
                                break;
                        default:
                                elog(ERROR, "unrecognized booltesttype: %d",
@@ -1280,7 +1239,8 @@ booltestsel(Query *root, BoolTestType booltesttype, Node *arg,
  *             nulltestsel             - Selectivity of NullTest Node.
  */
 Selectivity
-nulltestsel(Query *root, NullTestType nulltesttype, Node *arg, int varRelid)
+nulltestsel(PlannerInfo *root, NullTestType nulltesttype,
+                       Node *arg, int varRelid)
 {
        VariableStatData vardata;
        double          selec;
@@ -1334,7 +1294,7 @@ nulltestsel(Query *root, NullTestType nulltesttype, Node *arg, int varRelid)
                        default:
                                elog(ERROR, "unrecognized nulltesttype: %d",
                                         (int) nulltesttype);
-                               return (Selectivity) 0;         /* keep compiler quiet */
+                               return (Selectivity) 0; /* keep compiler quiet */
                }
        }
 
@@ -1352,7 +1312,7 @@ nulltestsel(Query *root, NullTestType nulltesttype, Node *arg, int varRelid)
 Datum
 eqjoinsel(PG_FUNCTION_ARGS)
 {
-       Query      *root = (Query *) PG_GETARG_POINTER(0);
+       PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
        Oid                     operator = PG_GETARG_OID(1);
        List       *args = (List *) PG_GETARG_POINTER(2);
        JoinType        jointype = (JoinType) PG_GETARG_INT16(3);
@@ -1407,17 +1367,16 @@ eqjoinsel(PG_FUNCTION_ARGS)
        {
                /*
                 * We have most-common-value lists for both relations.  Run
-                * through the lists to see which MCVs actually join to each
-                * other with the given operator.  This allows us to determine
-                * the exact join selectivity for the portion of the relations
-                * represented by the MCV lists.  We still have to estimate
-                * for the remaining population, but in a skewed distribution
-                * this gives us a big leg up in accuracy.      For motivation see
-                * the analysis in Y. Ioannidis and S. Christodoulakis, "On
-                * the propagation of errors in the size of join results",
-                * Technical Report 1018, Computer Science Dept., University
-                * of Wisconsin, Madison, March 1991 (available from
-                * ftp.cs.wisc.edu).
+                * through the lists to see which MCVs actually join to each other
+                * with the given operator.  This allows us to determine the exact
+                * join selectivity for the portion of the relations represented
+                * by the MCV lists.  We still have to estimate for the remaining
+                * population, but in a skewed distribution this gives us a big
+                * leg up in accuracy.  For motivation see the analysis in Y.
+                * Ioannidis and S. Christodoulakis, "On the propagation of errors
+                * in the size of join results", Technical Report 1018, Computer
+                * Science Dept., University of Wisconsin, Madison, March 1991
+                * (available from ftp.cs.wisc.edu).
                 */
                FmgrInfo        eqproc;
                bool       *hasmatch1;
@@ -1441,22 +1400,20 @@ eqjoinsel(PG_FUNCTION_ARGS)
                hasmatch2 = (bool *) palloc0(nvalues2 * sizeof(bool));
 
                /*
-                * If we are doing any variant of JOIN_IN, pretend all the
-                * values of the righthand relation are unique (ie, act as if
-                * it's been DISTINCT'd).
+                * If we are doing any variant of JOIN_IN, pretend all the values
+                * of the righthand relation are unique (ie, act as if it's been
+                * DISTINCT'd).
                 *
-                * NOTE: it might seem that we should unique-ify the lefthand
-                * input when considering JOIN_REVERSE_IN.      But this is not
-                * so, because the join clause we've been handed has not been
-                * commuted from the way the parser originally wrote it.  We
-                * know that the unique side of the IN clause is *always* on
-                * the right.
+                * NOTE: it might seem that we should unique-ify the lefthand input
+                * when considering JOIN_REVERSE_IN.  But this is not so, because
+                * the join clause we've been handed has not been commuted from
+                * the way the parser originally wrote it.      We know that the
+                * unique side of the IN clause is *always* on the right.
                 *
-                * NOTE: it would be dangerous to try to be smart about JOIN_LEFT
-                * or JOIN_RIGHT here, because we do not have enough
-                * information to determine which var is really on which side
-                * of the join. Perhaps someday we should pass in more
-                * information.
+                * NOTE: it would be dangerous to try to be smart about JOIN_LEFT or
+                * JOIN_RIGHT here, because we do not have enough information to
+                * determine which var is really on which side of the join.
+                * Perhaps someday we should pass in more information.
                 */
                if (jointype == JOIN_IN ||
                        jointype == JOIN_REVERSE_IN ||
@@ -1471,11 +1428,10 @@ eqjoinsel(PG_FUNCTION_ARGS)
                }
 
                /*
-                * Note we assume that each MCV will match at most one member
-                * of the other MCV list.  If the operator isn't really
-                * equality, there could be multiple matches --- but we don't
-                * look for them, both for speed and because the math wouldn't
-                * add up...
+                * Note we assume that each MCV will match at most one member of
+                * the other MCV list.  If the operator isn't really equality,
+                * there could be multiple matches --- but we don't look for them,
+                * both for speed and because the math wouldn't add up...
                 */
                matchprodfreq = 0.0;
                nmatches = 0;
@@ -1524,8 +1480,8 @@ eqjoinsel(PG_FUNCTION_ARGS)
                pfree(hasmatch2);
 
                /*
-                * Compute total frequency of non-null values that are not in
-                * the MCV lists.
+                * Compute total frequency of non-null values that are not in the
+                * MCV lists.
                 */
                otherfreq1 = 1.0 - nullfrac1 - matchfreq1 - unmatchfreq1;
                otherfreq2 = 1.0 - nullfrac2 - matchfreq2 - unmatchfreq2;
@@ -1533,12 +1489,12 @@ eqjoinsel(PG_FUNCTION_ARGS)
                CLAMP_PROBABILITY(otherfreq2);
 
                /*
-                * We can estimate the total selectivity from the point of
-                * view of relation 1 as: the known selectivity for matched
-                * MCVs, plus unmatched MCVs that are assumed to match against
-                * random members of relation 2's non-MCV population, plus
-                * non-MCV values that are assumed to match against random
-                * members of relation 2's unmatched MCVs plus non-MCV values.
+                * We can estimate the total selectivity from the point of view of
+                * relation 1 as: the known selectivity for matched MCVs, plus
+                * unmatched MCVs that are assumed to match against random members
+                * of relation 2's non-MCV population, plus non-MCV values that
+                * are assumed to match against random members of relation 2's
+                * unmatched MCVs plus non-MCV values.
                 */
                totalsel1 = matchprodfreq;
                if (nd2 > nvalues2)
@@ -1555,11 +1511,10 @@ eqjoinsel(PG_FUNCTION_ARGS)
                                (nd1 - nmatches);
 
                /*
-                * Use the smaller of the two estimates.  This can be
-                * justified in essentially the same terms as given below for
-                * the no-stats case: to a first approximation, we are
-                * estimating from the point of view of the relation with
-                * smaller nd.
+                * Use the smaller of the two estimates.  This can be justified in
+                * essentially the same terms as given below for the no-stats
+                * case: to a first approximation, we are estimating from the
+                * point of view of the relation with smaller nd.
                 */
                selec = (totalsel1 < totalsel2) ? totalsel1 : totalsel2;
        }
@@ -1567,26 +1522,24 @@ eqjoinsel(PG_FUNCTION_ARGS)
        {
                /*
                 * We do not have MCV lists for both sides.  Estimate the join
-                * selectivity as
-                * MIN(1/nd1,1/nd2)*(1-nullfrac1)*(1-nullfrac2). This is
-                * plausible if we assume that the join operator is strict and
-                * the non-null values are about equally distributed: a given
+                * selectivity as MIN(1/nd1,1/nd2)*(1-nullfrac1)*(1-nullfrac2).
+                * This is plausible if we assume that the join operator is strict
+                * and the non-null values are about equally distributed: a given
                 * non-null tuple of rel1 will join to either zero or
-                * N2*(1-nullfrac2)/nd2 rows of rel2, so total join rows are
-                * at most N1*(1-nullfrac1)*N2*(1-nullfrac2)/nd2 giving a join
-                * selectivity of not more than
-                * (1-nullfrac1)*(1-nullfrac2)/nd2. By the same logic it is
-                * not more than (1-nullfrac1)*(1-nullfrac2)/nd1, so the
-                * expression with MIN() is an upper bound.  Using the MIN()
-                * means we estimate from the point of view of the relation
-                * with smaller nd (since the larger nd is determining the
-                * MIN).  It is reasonable to assume that most tuples in this
-                * rel will have join partners, so the bound is probably
-                * reasonably tight and should be taken as-is.
+                * N2*(1-nullfrac2)/nd2 rows of rel2, so total join rows are at
+                * most N1*(1-nullfrac1)*N2*(1-nullfrac2)/nd2 giving a join
+                * selectivity of not more than (1-nullfrac1)*(1-nullfrac2)/nd2.
+                * By the same logic it is not more than
+                * (1-nullfrac1)*(1-nullfrac2)/nd1, so the expression with MIN()
+                * is an upper bound.  Using the MIN() means we estimate from the
+                * point of view of the relation with smaller nd (since the larger
+                * nd is determining the MIN).  It is reasonable to assume that
+                * most tuples in this rel will have join partners, so the bound
+                * is probably reasonably tight and should be taken as-is.
                 *
-                * XXX Can we be smarter if we have an MCV list for just one
-                * side? It seems that if we assume equal distribution for the
-                * other side, we end up with the same answer anyway.
+                * XXX Can we be smarter if we have an MCV list for just one side? It
+                * seems that if we assume equal distribution for the other side,
+                * we end up with the same answer anyway.
                 */
                double          nullfrac1 = stats1 ? stats1->stanullfrac : 0.0;
                double          nullfrac2 = stats2 ? stats2->stanullfrac : 0.0;
@@ -1619,7 +1572,7 @@ eqjoinsel(PG_FUNCTION_ARGS)
 Datum
 neqjoinsel(PG_FUNCTION_ARGS)
 {
-       Query      *root = (Query *) PG_GETARG_POINTER(0);
+       PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
        Oid                     operator = PG_GETARG_OID(1);
        List       *args = (List *) PG_GETARG_POINTER(2);
        JoinType        jointype = (JoinType) PG_GETARG_INT16(3);
@@ -1769,7 +1722,7 @@ icnlikejoinsel(PG_FUNCTION_ARGS)
  * variable.
  */
 void
-mergejoinscansel(Query *root, Node *clause,
+mergejoinscansel(PlannerInfo *root, Node *clause,
                                 Selectivity *leftscan,
                                 Selectivity *rightscan)
 {
@@ -1876,6 +1829,71 @@ fail:
        ReleaseVariableStats(rightvar);
 }
 
+
+/*
+ * Helper routine for estimate_num_groups: add an item to a list of
+ * GroupVarInfos, but only if it's not known equal to any of the existing
+ * entries.
+ */
+typedef struct
+{
+       Node       *var;                /* might be an expression, not just a Var */
+       RelOptInfo *rel;                /* relation it belongs to */
+       double          ndistinct;      /* # distinct values */
+} GroupVarInfo;
+
+static List *
+add_unique_group_var(PlannerInfo *root, List *varinfos,
+                                        Node *var, VariableStatData *vardata)
+{
+       GroupVarInfo *varinfo;
+       double          ndistinct;
+       ListCell   *lc;
+
+       ndistinct = get_variable_numdistinct(vardata);
+
+       /* cannot use foreach here because of possible list_delete */
+       lc = list_head(varinfos);
+       while (lc)
+       {
+               varinfo = (GroupVarInfo *) lfirst(lc);
+
+               /* must advance lc before list_delete possibly pfree's it */
+               lc = lnext(lc);
+
+               /* Drop exact duplicates */
+               if (equal(var, varinfo->var))
+                       return varinfos;
+
+               /*
+                * Drop known-equal vars, but only if they belong to different
+                * relations (see comments for estimate_num_groups)
+                */
+               if (vardata->rel != varinfo->rel &&
+                       exprs_known_equal(root, var, varinfo->var))
+               {
+                       if (varinfo->ndistinct <= ndistinct)
+                       {
+                               /* Keep older item, forget new one */
+                               return varinfos;
+                       }
+                       else
+                       {
+                               /* Delete the older item */
+                               varinfos = list_delete_ptr(varinfos, varinfo);
+                       }
+               }
+       }
+
+       varinfo = (GroupVarInfo *) palloc(sizeof(GroupVarInfo));
+
+       varinfo->var = var;
+       varinfo->rel = vardata->rel;
+       varinfo->ndistinct = ndistinct;
+       varinfos = lappend(varinfos, varinfo);
+       return varinfos;
+}
+
 /*
  * estimate_num_groups         - Estimate number of groups in a grouped query
  *
@@ -1907,6 +1925,9 @@ fail:
  *             increase the number of distinct values (unless it is volatile,
  *             which we consider unlikely for grouping), but it probably won't
  *             reduce the number of distinct values much either.
+ *             As a special case, if a GROUP BY expression can be matched to an
+ *             expressional index for which we have statistics, then we treat the
+ *             whole expression as though it were just a Var.
  *     2.      If the list contains Vars of different relations that are known equal
  *             due to equijoin clauses, then drop all but one of the Vars from each
  *             known-equal set, keeping the one with smallest estimated # of values
@@ -1915,10 +1936,13 @@ fail:
  *             if we considered ones of the same rel, we'd be double-counting the
  *             restriction selectivity of the equality in the next step.
  *     3.      For Vars within a single source rel, we multiply together the numbers
- *             of values, clamp to the number of rows in the rel, and then multiply
- *             by the selectivity of the restriction clauses for that rel.  The
- *             initial product is probably too high (it's the worst case) but since
- *             we can clamp to the rel's rows it won't be hugely bad.  Multiplying
+ *             of values, clamp to the number of rows in the rel (divided by 10 if
+ *             more than one Var), and then multiply by the selectivity of the
+ *             restriction clauses for that rel.  When there's more than one Var,
+ *             the initial product is probably too high (it's the worst case) but
+ *             clamping to a fraction of the rel's rows seems to be a helpful
+ *             heuristic for not letting the estimate get out of hand.  (The factor
+ *             of 10 is derived from pre-Postgres-7.4 practice.)  Multiplying
  *             by the restriction selectivity is effectively assuming that the
  *             restriction clauses are independent of the grouping, which is a crummy
  *             assumption, but it's hard to do better.
@@ -1931,27 +1955,46 @@ fail:
  * do better).
  */
 double
-estimate_num_groups(Query *root, List *groupExprs, double input_rows)
+estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows)
 {
-       List       *allvars = NIL;
        List       *varinfos = NIL;
        double          numdistinct;
        ListCell   *l;
-       typedef struct
-       {                                                       /* varinfos is a List of these */
-               Var                *var;
-               double          ndistinct;
-       } MyVarInfo;
 
        /* We should not be called unless query has GROUP BY (or DISTINCT) */
        Assert(groupExprs != NIL);
 
-       /* Step 1: get the unique Vars used */
+       /*
+        * Steps 1/2: find the unique Vars used, treating an expression as a Var
+        * if we can find stats for it.  For each one, record the statistical
+        * estimate of number of distinct values (total in its table, without
+        * regard for filtering).
+        */
        foreach(l, groupExprs)
        {
                Node       *groupexpr = (Node *) lfirst(l);
+               VariableStatData vardata;
                List       *varshere;
+               ListCell   *l2;
+
+               /*
+                * If examine_variable is able to deduce anything about the GROUP BY
+                * expression, treat it as a single variable even if it's really more
+                * complicated.
+                */
+               examine_variable(root, groupexpr, 0, &vardata);
+               if (vardata.statsTuple != NULL || vardata.isunique)
+               {
+                       varinfos = add_unique_group_var(root, varinfos,
+                                                                                       groupexpr, &vardata);
+                       ReleaseVariableStats(vardata);
+                       continue;
+               }
+               ReleaseVariableStats(vardata);
 
+               /*
+                * Else pull out the component Vars
+                */
                varshere = pull_var_clause(groupexpr, false);
 
                /*
@@ -1966,70 +2009,24 @@ estimate_num_groups(Query *root, List *groupExprs, double input_rows)
                                return input_rows;
                        continue;
                }
-               allvars = nconc(allvars, varshere);
-       }
-
-       /* If now no Vars, we must have an all-constant GROUP BY list. */
-       if (allvars == NIL)
-               return 1.0;
 
-       /* Use set_union() to discard duplicates */
-       allvars = set_union(NIL, allvars);
-
-       /*
-        * Step 2: acquire statistical estimate of number of distinct values
-        * of each Var (total in its table, without regard for filtering).
-        * Also, detect known-equal Vars and discard the ones we don't want.
-        */
-       foreach(l, allvars)
-       {
-               Var                *var = (Var *) lfirst(l);
-               VariableStatData vardata;
-               double          ndistinct;
-               bool            keep = true;
-               ListCell   *l2;
-
-               examine_variable(root, (Node *) var, 0, &vardata);
-               ndistinct = get_variable_numdistinct(&vardata);
-               ReleaseVariableStats(vardata);
-
-               /* cannot use foreach here because of possible lremove */
-               l2 = list_head(varinfos);
-               while (l2)
-               {
-                       MyVarInfo  *varinfo = (MyVarInfo *) lfirst(l2);
-
-                       /* must advance l2 before lremove possibly pfree's it */
-                       l2 = lnext(l2);
-
-                       if (var->varno != varinfo->var->varno &&
-                       exprs_known_equal(root, (Node *) var, (Node *) varinfo->var))
-                       {
-                               /* Found a match */
-                               if (varinfo->ndistinct <= ndistinct)
-                               {
-                                       /* Keep older item, forget new one */
-                                       keep = false;
-                                       break;
-                               }
-                               else
-                               {
-                                       /* Delete the older item */
-                                       varinfos = lremove(varinfo, varinfos);
-                               }
-                       }
-               }
-
-               if (keep)
+               /*
+                * Else add variables to varinfos list
+                */
+               foreach(l2, varshere)
                {
-                       MyVarInfo  *varinfo = (MyVarInfo *) palloc(sizeof(MyVarInfo));
+                       Node       *var = (Node *) lfirst(l2);
 
-                       varinfo->var = var;
-                       varinfo->ndistinct = ndistinct;
-                       varinfos = lcons(varinfo, varinfos);
+                       examine_variable(root, var, 0, &vardata);
+                       varinfos = add_unique_group_var(root, varinfos, var, &vardata);
+                       ReleaseVariableStats(vardata);
                }
        }
 
+       /* If now no Vars, we must have an all-constant GROUP BY list. */
+       if (varinfos == NIL)
+               return 1.0;
+
        /*
         * Steps 3/4: group Vars by relation and estimate total numdistinct.
         *
@@ -2038,26 +2035,32 @@ estimate_num_groups(Query *root, List *groupExprs, double input_rows)
         * these Vars from the newvarinfos list for the next iteration. This
         * is the easiest way to group Vars of same rel together.
         */
-       Assert(varinfos != NIL);
        numdistinct = 1.0;
 
        do
        {
-               MyVarInfo  *varinfo1 = (MyVarInfo *) linitial(varinfos);
-               RelOptInfo *rel = find_base_rel(root, varinfo1->var->varno);
+               GroupVarInfo *varinfo1 = (GroupVarInfo *) linitial(varinfos);
+               RelOptInfo *rel = varinfo1->rel;
                double          reldistinct = varinfo1->ndistinct;
+               double          relmaxndistinct = reldistinct;
+               int                     relvarcount = 1;
                List       *newvarinfos = NIL;
 
                /*
-                * Get the largest numdistinct estimate of the Vars for this rel.
+                * Get the product of numdistinct estimates of the Vars for this rel.
                 * Also, construct new varinfos list of remaining Vars.
                 */
                for_each_cell(l, lnext(list_head(varinfos)))
                {
-                       MyVarInfo  *varinfo2 = (MyVarInfo *) lfirst(l);
+                       GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
 
-                       if (varinfo2->var->varno == varinfo1->var->varno)
+                       if (varinfo2->rel == varinfo1->rel)
+                       {
                                reldistinct *= varinfo2->ndistinct;
+                               if (relmaxndistinct < varinfo2->ndistinct)
+                                       relmaxndistinct = varinfo2->ndistinct;
+                               relvarcount++;
+                       }
                        else
                        {
                                /* not time to process varinfo2 yet */
@@ -2072,10 +2075,31 @@ estimate_num_groups(Query *root, List *groupExprs, double input_rows)
                if (rel->tuples > 0)
                {
                        /*
-                        * Clamp to size of rel, multiply by restriction selectivity.
+                        * Clamp to size of rel, or size of rel / 10 if multiple Vars.
+                        * The fudge factor is because the Vars are probably correlated
+                        * but we don't know by how much.  We should never clamp to less
+                        * than the largest ndistinct value for any of the Vars, though,
+                        * since there will surely be at least that many groups.
+                        */
+                       double          clamp = rel->tuples;
+
+                       if (relvarcount > 1)
+                       {
+                               clamp *= 0.1;
+                               if (clamp < relmaxndistinct)
+                               {
+                                       clamp = relmaxndistinct;
+                                       /* for sanity in case some ndistinct is too large: */
+                                       if (clamp > rel->tuples)
+                                               clamp = rel->tuples;
+                               }
+                       }
+                       if (reldistinct > clamp)
+                               reldistinct = clamp;
+
+                       /*
+                        * Multiply by restriction selectivity.
                         */
-                       if (reldistinct > rel->tuples)
-                               reldistinct = rel->tuples;
                        reldistinct *= rel->rows / rel->tuples;
 
                        /*
@@ -2129,7 +2153,7 @@ estimate_num_groups(Query *root, List *groupExprs, double input_rows)
  * inner rel is well-dispersed (or the alternatives seem much worse).
  */
 Selectivity
-estimate_hash_bucketsize(Query *root, Node *hashkey, int nbuckets)
+estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey, double nbuckets)
 {
        VariableStatData vardata;
        double          estfract,
@@ -2187,8 +2211,8 @@ estimate_hash_bucketsize(Query *root, Node *hashkey, int nbuckets)
         * the number of buckets is less than the expected number of distinct
         * values; otherwise it is 1/ndistinct.
         */
-       if (ndistinct > (double) nbuckets)
-               estfract = 1.0 / (double) nbuckets;
+       if (ndistinct > nbuckets)
+               estfract = 1.0 / nbuckets;
        else
                estfract = 1.0 / ndistinct;
 
@@ -2279,19 +2303,20 @@ convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
                                  double *scaledlobound, double *scaledhibound)
 {
        /*
-        * In present usage, we can assume that the valuetypid exactly matches
-        * the declared input type of the operator we are invoked for (because
-        * constant-folding will ensure that any Const passed to the operator
-        * has been reduced to the correct type).  However, the boundstypid is
-        * the type of some variable that might be only binary-compatible with
-        * the declared type; in particular it might be a domain type.  Must
-        * fold the variable type down to base type so we can recognize it.
-        * (But we can skip that lookup if the variable type matches the
-        * const.)
+        * Both the valuetypid and the boundstypid should exactly match
+        * the declared input type(s) of the operator we are invoked for,
+        * so we just error out if either is not recognized.
+        *
+        * XXX The histogram we are interpolating between points of could belong
+        * to a column that's only binary-compatible with the declared type.
+        * In essence we are assuming that the semantics of binary-compatible
+        * types are enough alike that we can use a histogram generated with one
+        * type's operators to estimate selectivity for the other's.  This is
+        * outright wrong in some cases --- in particular signed versus unsigned
+        * interpretation could trip us up.  But it's useful enough in the
+        * majority of cases that we do it anyway.  Should think about more
+        * rigorous ways to do it.
         */
-       if (boundstypid != valuetypid)
-               boundstypid = getBaseType(boundstypid);
-
        switch (valuetypid)
        {
                        /*
@@ -2325,9 +2350,9 @@ convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
                case TEXTOID:
                case NAMEOID:
                        {
-                               unsigned char *valstr = convert_string_datum(value, valuetypid);
-                               unsigned char *lostr = convert_string_datum(lobound, boundstypid);
-                               unsigned char *histr = convert_string_datum(hibound, boundstypid);
+                               char *valstr = convert_string_datum(value, valuetypid);
+                               char *lostr = convert_string_datum(lobound, boundstypid);
+                               char *histr = convert_string_datum(hibound, boundstypid);
 
                                convert_string_to_scalar(valstr, scaledvalue,
                                                                                 lostr, scaledlobound,
@@ -2378,6 +2403,7 @@ convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
                        return true;
        }
        /* Don't know how to convert */
+       *scaledvalue = *scaledlobound = *scaledhibound = 0;
        return false;
 }
 
@@ -2446,31 +2472,31 @@ convert_numeric_to_scalar(Datum value, Oid typid)
  * so this is more likely to happen than you might think.)
  */
 static void
-convert_string_to_scalar(unsigned char *value,
+convert_string_to_scalar(char *value,
                                                 double *scaledvalue,
-                                                unsigned char *lobound,
+                                                char *lobound,
                                                 double *scaledlobound,
-                                                unsigned char *hibound,
+                                                char *hibound,
                                                 double *scaledhibound)
 {
        int                     rangelo,
                                rangehi;
-       unsigned char *sptr;
+       char       *sptr;
 
-       rangelo = rangehi = hibound[0];
+       rangelo = rangehi = (unsigned char) hibound[0];
        for (sptr = lobound; *sptr; sptr++)
        {
-               if (rangelo > *sptr)
-                       rangelo = *sptr;
-               if (rangehi < *sptr)
-                       rangehi = *sptr;
+               if (rangelo > (unsigned char) *sptr)
+                       rangelo = (unsigned char) *sptr;
+               if (rangehi < (unsigned char) *sptr)
+                       rangehi = (unsigned char) *sptr;
        }
        for (sptr = hibound; *sptr; sptr++)
        {
-               if (rangelo > *sptr)
-                       rangelo = *sptr;
-               if (rangehi < *sptr)
-                       rangehi = *sptr;
+               if (rangelo > (unsigned char) *sptr)
+                       rangelo = (unsigned char) *sptr;
+               if (rangehi < (unsigned char) *sptr)
+                       rangehi = (unsigned char) *sptr;
        }
        /* If range includes any upper-case ASCII chars, make it include all */
        if (rangelo <= 'Z' && rangehi >= 'A')
@@ -2526,9 +2552,9 @@ convert_string_to_scalar(unsigned char *value,
 }
 
 static double
-convert_one_string_to_scalar(unsigned char *value, int rangelo, int rangehi)
+convert_one_string_to_scalar(char *value, int rangelo, int rangehi)
 {
-       int                     slen = strlen((char *) value);
+       int                     slen = strlen(value);
        double          num,
                                denom,
                                base;
@@ -2549,7 +2575,7 @@ convert_one_string_to_scalar(unsigned char *value, int rangelo, int rangehi)
        denom = base;
        while (slen-- > 0)
        {
-               int                     ch = *value++;
+               int                     ch = (unsigned char) *value++;
 
                if (ch < rangelo)
                        ch = rangelo - 1;
@@ -2568,7 +2594,7 @@ convert_one_string_to_scalar(unsigned char *value, int rangelo, int rangehi)
  * When using a non-C locale, we must pass the string through strxfrm()
  * before continuing, so as to generate correct locale-specific results.
  */
-static unsigned char *
+static char *
 convert_string_datum(Datum value, Oid typid)
 {
        char       *val;
@@ -2635,7 +2661,7 @@ convert_string_datum(Datum value, Oid typid)
                val = xfrmstr;
        }
 
-       return (unsigned char *) val;
+       return val;
 }
 
 /*
@@ -2759,10 +2785,11 @@ convert_timevalue_to_scalar(Datum value, Oid typid)
                                 * too accurate, but plenty good enough for our purposes.
                                 */
 #ifdef HAVE_INT64_TIMESTAMP
-                               return (interval->time + (interval->month * ((365.25 / 12.0) * 86400000000.0)));
+                               return interval->time + interval->day * (double)USECS_PER_DAY +
+                                          interval->month * ((DAYS_PER_YEAR / (double)MONTHS_PER_YEAR) * USECS_PER_DAY);
 #else
-                               return interval->time +
-                                       interval->month * (365.25 / 12.0 * 24.0 * 60.0 * 60.0);
+                               return interval->time + interval->day * SECS_PER_DAY +
+                                               interval->month * ((DAYS_PER_YEAR / (double)MONTHS_PER_YEAR) * (double)SECS_PER_DAY);
 #endif
                        }
                case RELTIMEOID:
@@ -2773,14 +2800,14 @@ convert_timevalue_to_scalar(Datum value, Oid typid)
 #endif
                case TINTERVALOID:
                        {
-                               TimeInterval interval = DatumGetTimeInterval(value);
+                               TimeInterval tinterval = DatumGetTimeInterval(value);
 
 #ifdef HAVE_INT64_TIMESTAMP
-                               if (interval->status != 0)
-                                       return ((interval->data[1] - interval->data[0]) * 1000000.0);
+                               if (tinterval->status != 0)
+                                       return ((tinterval->data[1] - tinterval->data[0]) * 1000000.0);
 #else
-                               if (interval->status != 0)
-                                       return interval->data[1] - interval->data[0];
+                               if (tinterval->status != 0)
+                                       return tinterval->data[1] - tinterval->data[0];
 #endif
                                return 0;               /* for lack of a better idea */
                        }
@@ -2817,13 +2844,13 @@ convert_timevalue_to_scalar(Datum value, Oid typid)
  *             and also indicate which side it was on and the other argument.
  *
  * Inputs:
- *     root: the Query
+ *     root: the planner info
  *     args: clause argument list
  *     varRelid: see specs for restriction selectivity functions
  *
  * Outputs: (these are valid only if TRUE is returned)
  *     *vardata: gets information about variable (see examine_variable)
- *     *other: gets other clause argument, stripped of binary relabeling
+ *     *other: gets other clause argument, aggressively reduced to a constant
  *     *varonleft: set TRUE if variable is on the left, FALSE if on the right
  *
  * Returns TRUE if a variable is identified, otherwise FALSE.
@@ -2832,7 +2859,7 @@ convert_timevalue_to_scalar(Datum value, Oid typid)
  * callers are expecting that the other side will act like a pseudoconstant.
  */
 static bool
-get_restriction_variable(Query *root, List *args, int varRelid,
+get_restriction_variable(PlannerInfo *root, List *args, int varRelid,
                                                 VariableStatData *vardata, Node **other,
                                                 bool *varonleft)
 {
@@ -2841,14 +2868,14 @@ get_restriction_variable(Query *root, List *args, int varRelid,
        VariableStatData rdata;
 
        /* Fail if not a binary opclause (probably shouldn't happen) */
-       if (length(args) != 2)
+       if (list_length(args) != 2)
                return false;
 
        left = (Node *) linitial(args);
        right = (Node *) lsecond(args);
 
        /*
-        * Examine both sides.  Note that when varRelid is nonzero, Vars of
+        * Examine both sides.  Note that when varRelid is nonzero, Vars of
         * other relations will be treated as pseudoconstants.
         */
        examine_variable(root, left, varRelid, vardata);
@@ -2860,7 +2887,7 @@ get_restriction_variable(Query *root, List *args, int varRelid,
        if (vardata->rel && rdata.rel == NULL)
        {
                *varonleft = true;
-               *other = rdata.var;
+               *other = estimate_expression_value(rdata.var);
                /* Assume we need no ReleaseVariableStats(rdata) here */
                return true;
        }
@@ -2868,7 +2895,7 @@ get_restriction_variable(Query *root, List *args, int varRelid,
        if (vardata->rel == NULL && rdata.rel)
        {
                *varonleft = false;
-               *other = vardata->var;
+               *other = estimate_expression_value(vardata->var);
                /* Assume we need no ReleaseVariableStats(*vardata) here */
                *vardata = rdata;
                return true;
@@ -2886,13 +2913,13 @@ get_restriction_variable(Query *root, List *args, int varRelid,
  *             Apply examine_variable() to each side of a join clause.
  */
 static void
-get_join_variables(Query *root, List *args,
+get_join_variables(PlannerInfo *root, List *args,
                                   VariableStatData *vardata1, VariableStatData *vardata2)
 {
        Node       *left,
                           *right;
 
-       if (length(args) != 2)
+       if (list_length(args) != 2)
                elog(ERROR, "join operator should take two arguments");
 
        left = (Node *) linitial(args);
@@ -2908,17 +2935,20 @@ get_join_variables(Query *root, List *args,
  *             Fill in a VariableStatData struct to describe the expression.
  *
  * Inputs:
- *     root: the Query
+ *     root: the planner info
  *     node: the expression tree to examine
  *     varRelid: see specs for restriction selectivity functions
  *
  * Outputs: *vardata is filled as follows:
- *     var: the input expression (with any binary relabeling stripped)
+ *     var: the input expression (with any binary relabeling stripped, if
+ *             it is or contains a variable; but otherwise the type is preserved)
  *     rel: RelOptInfo for relation containing variable; NULL if expression
  *             contains no Vars (NOTE this could point to a RelOptInfo of a
  *             subquery, not one in the current query).
  *     statsTuple: the pg_statistic entry for the variable, if one exists;
  *             otherwise NULL.
+ *     vartype: exposed type of the expression; this should always match
+ *             the declared input type of the operator we are estimating for.
  *     atttype, atttypmod: type data to pass to get_attstatsslot().  This is
  *             commonly the same as the exposed type of the variable argument,
  *             but can be different in binary-compatible-type cases.
@@ -2926,52 +2956,57 @@ get_join_variables(Query *root, List *args,
  * Caller is responsible for doing ReleaseVariableStats() before exiting.
  */
 static void
-examine_variable(Query *root, Node *node, int varRelid,
+examine_variable(PlannerInfo *root, Node *node, int varRelid,
                                 VariableStatData *vardata)
 {
+       Node       *basenode;
        Relids          varnos;
        RelOptInfo *onerel;
 
        /* Make sure we don't return dangling pointers in vardata */
        MemSet(vardata, 0, sizeof(VariableStatData));
 
-       /* Ignore any binary-compatible relabeling */
+       /* Save the exposed type of the expression */
+       vardata->vartype = exprType(node);
 
-       if (IsA(node, RelabelType))
-               node = (Node *) ((RelabelType *) node)->arg;
+       /* Look inside any binary-compatible relabeling */
 
-       vardata->var = node;
+       if (IsA(node, RelabelType))
+               basenode = (Node *) ((RelabelType *) node)->arg;
+       else
+               basenode = node;
 
        /* Fast path for a simple Var */
 
-       if (IsA(node, Var) &&
-               (varRelid == 0 || varRelid == ((Var *) node)->varno))
+       if (IsA(basenode, Var) &&
+               (varRelid == 0 || varRelid == ((Var *) basenode)->varno))
        {
-               Var                *var = (Var *) node;
+               Var                *var = (Var *) basenode;
                Oid                     relid;
 
+               vardata->var = basenode;        /* return Var without relabeling */
                vardata->rel = find_base_rel(root, var->varno);
                vardata->atttype = var->vartype;
                vardata->atttypmod = var->vartypmod;
 
-               relid = getrelid(var->varno, root->rtable);
+               relid = getrelid(var->varno, root->parse->rtable);
 
                if (OidIsValid(relid))
                {
                        vardata->statsTuple = SearchSysCache(STATRELATT,
                                                                                                 ObjectIdGetDatum(relid),
-                                                                                                Int16GetDatum(var->varattno),
+                                                                                       Int16GetDatum(var->varattno),
                                                                                                 0, 0);
                }
                else
                {
                        /*
-                        * XXX This means the Var comes from a JOIN or sub-SELECT.  Later
-                        * add code to dig down into the join etc and see if we can trace
-                        * the variable to something with stats.  (But beware of
-                        * sub-SELECTs with DISTINCT/GROUP BY/etc.  Perhaps there are
-                        * no cases where this would really be useful, because we'd have
-                        * flattened the subselect if it is??)
+                        * XXX This means the Var comes from a JOIN or sub-SELECT.
+                        * Later add code to dig down into the join etc and see if we
+                        * can trace the variable to something with stats.      (But
+                        * beware of sub-SELECTs with DISTINCT/GROUP BY/etc.  Perhaps
+                        * there are no cases where this would really be useful,
+                        * because we'd have flattened the subselect if it is??)
                         */
                }
 
@@ -2980,10 +3015,10 @@ examine_variable(Query *root, Node *node, int varRelid,
 
        /*
         * Okay, it's a more complicated expression.  Determine variable
-        * membership.  Note that when varRelid isn't zero, only vars of
-        * that relation are considered "real" vars.
+        * membership.  Note that when varRelid isn't zero, only vars of that
+        * relation are considered "real" vars.
         */
-       varnos = pull_varnos(node);
+       varnos = pull_varnos(basenode);
 
        onerel = NULL;
 
@@ -2996,8 +3031,9 @@ examine_variable(Query *root, Node *node, int varRelid,
                        if (varRelid == 0 || bms_is_member(varRelid, varnos))
                        {
                                onerel = find_base_rel(root,
-                                                (varRelid ? varRelid : bms_singleton_member(varnos)));
+                                  (varRelid ? varRelid : bms_singleton_member(varnos)));
                                vardata->rel = onerel;
+                               node = basenode; /* strip any relabeling */
                        }
                        /* else treat it as a constant */
                        break;
@@ -3006,11 +3042,13 @@ examine_variable(Query *root, Node *node, int varRelid,
                        {
                                /* treat it as a variable of a join relation */
                                vardata->rel = find_join_rel(root, varnos);
+                               node = basenode; /* strip any relabeling */
                        }
                        else if (bms_is_member(varRelid, varnos))
                        {
                                /* ignore the vars belonging to other relations */
                                vardata->rel = find_base_rel(root, varRelid);
+                               node = basenode; /* strip any relabeling */
                                /* note: no point in expressional-index search here */
                        }
                        /* else treat it as a constant */
@@ -3019,19 +3057,20 @@ examine_variable(Query *root, Node *node, int varRelid,
 
        bms_free(varnos);
 
+       vardata->var = node;
        vardata->atttype = exprType(node);
        vardata->atttypmod = exprTypmod(node);
 
        if (onerel)
        {
                /*
-                * We have an expression in vars of a single relation.  Try to
+                * We have an expression in vars of a single relation.  Try to
                 * match it to expressional index columns, in hopes of finding
                 * some statistics.
                 *
                 * XXX it's conceivable that there are multiple matches with
                 * different index opclasses; if so, we need to pick one that
-                * matches the operator we are estimating for.  FIXME later.
+                * matches the operator we are estimating for.  FIXME later.
                 */
                ListCell   *ilist;
 
@@ -3066,8 +3105,8 @@ examine_variable(Query *root, Node *node, int varRelid,
                                        if (equal(node, indexkey))
                                        {
                                                /*
-                                                * Found a match ... is it a unique index?
-                                                * Tests here should match has_unique_index().
+                                                * Found a match ... is it a unique index? Tests
+                                                * here should match has_unique_index().
                                                 */
                                                if (index->unique &&
                                                        index->ncolumns == 1 &&
@@ -3075,8 +3114,8 @@ examine_variable(Query *root, Node *node, int varRelid,
                                                        vardata->isunique = true;
                                                /* Has it got stats? */
                                                vardata->statsTuple = SearchSysCache(STATRELATT,
-                                                                                                                        ObjectIdGetDatum(index->indexoid),
-                                                                                                                        Int16GetDatum(pos + 1),
+                                                                          ObjectIdGetDatum(index->indexoid),
+                                                                                                 Int16GetDatum(pos + 1),
                                                                                                                         0, 0);
                                                if (vardata->statsTuple)
                                                        break;
@@ -3106,9 +3145,9 @@ get_variable_numdistinct(VariableStatData *vardata)
        double          ntuples;
 
        /*
-        * Determine the stadistinct value to use.  There are cases where
-        * we can get an estimate even without a pg_statistic entry, or
-        * can get a better value than is in pg_statistic.
+        * Determine the stadistinct value to use.      There are cases where we
+        * can get an estimate even without a pg_statistic entry, or can get a
+        * better value than is in pg_statistic.
         */
        if (HeapTupleIsValid(vardata->statsTuple))
        {
@@ -3118,21 +3157,21 @@ get_variable_numdistinct(VariableStatData *vardata)
                stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
                stadistinct = stats->stadistinct;
        }
-       else if (vardata->atttype == BOOLOID)
+       else if (vardata->vartype == BOOLOID)
        {
                /*
                 * Special-case boolean columns: presumably, two distinct values.
                 *
-                * Are there any other datatypes we should wire in special
-                * estimates for?
+                * Are there any other datatypes we should wire in special estimates
+                * for?
                 */
                stadistinct = 2.0;
        }
        else
        {
                /*
-                * We don't keep statistics for system columns, but in some
-                * cases we can infer distinctness anyway.
+                * We don't keep statistics for system columns, but in some cases
+                * we can infer distinctness anyway.
                 */
                if (vardata->var && IsA(vardata->var, Var))
                {
@@ -3140,27 +3179,28 @@ get_variable_numdistinct(VariableStatData *vardata)
                        {
                                case ObjectIdAttributeNumber:
                                case SelfItemPointerAttributeNumber:
-                                       stadistinct = -1.0;                     /* unique */
+                                       stadistinct = -1.0; /* unique */
                                        break;
                                case TableOidAttributeNumber:
-                                       stadistinct = 1.0;                      /* only 1 value */
+                                       stadistinct = 1.0;      /* only 1 value */
                                        break;
                                default:
-                                       stadistinct = 0.0;                      /* means "unknown" */
+                                       stadistinct = 0.0;      /* means "unknown" */
                                        break;
                        }
                }
                else
-                       stadistinct = 0.0;                                      /* means "unknown" */
+                       stadistinct = 0.0;      /* means "unknown" */
+
                /*
                 * XXX consider using estimate_num_groups on expressions?
                 */
        }
 
        /*
-        * If there is a unique index for the variable, assume it is unique
-        * no matter what pg_statistic says (the statistics could be out
-        * of date).  Can skip search if we already think it's unique.
+        * If there is a unique index for the variable, assume it is unique no
+        * matter what pg_statistic says (the statistics could be out of
+        * date).  Can skip search if we already think it's unique.
         */
        if (stadistinct != -1.0)
        {
@@ -3168,7 +3208,7 @@ get_variable_numdistinct(VariableStatData *vardata)
                        stadistinct = -1.0;
                else if (vardata->var && IsA(vardata->var, Var) &&
                                 vardata->rel &&
-                                has_unique_index(vardata->rel, 
+                                has_unique_index(vardata->rel,
                                                                  ((Var *) vardata->var)->varattno))
                        stadistinct = -1.0;
        }
@@ -3214,7 +3254,7 @@ get_variable_numdistinct(VariableStatData *vardata)
  * minimum instead of the maximum, just pass the ">" operator instead.)
  */
 static bool
-get_variable_maximum(Query *root, VariableStatData *vardata,
+get_variable_maximum(PlannerInfo *root, VariableStatData *vardata,
                                         Oid sortop, Datum *max)
 {
        Datum           tmax = 0;
@@ -3380,7 +3420,7 @@ like_fixed_prefix(Const *patt_const, bool case_insensitive,
        }
        else
        {
-               bytea   *bstr = DatumGetByteaP(patt_const->constvalue);
+               bytea      *bstr = DatumGetByteaP(patt_const->constvalue);
 
                pattlen = VARSIZE(bstr) - VARHDRSZ;
                if (pattlen > 0)
@@ -3463,6 +3503,8 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive,
        char       *match;
        int                     pos,
                                match_pos,
+                               prev_pos,
+                               prev_match_pos,
                                paren_depth;
        char       *patt;
        char       *rest;
@@ -3523,11 +3565,13 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive,
 
        /* OK, allocate space for pattern */
        match = palloc(strlen(patt) + 1);
-       match_pos = 0;
+       prev_match_pos = match_pos = 0;
 
        /* note start at pos 1 to skip leading ^ */
-       for (pos = 1; patt[pos]; pos++)
+       for (prev_pos = pos = 1; patt[pos]; )
        {
+               int             len;
+
                /*
                 * Check for characters that indicate multiple possible matches
                 * here. XXX I suspect isalpha() is not an adequately
@@ -3541,6 +3585,14 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive,
                        (case_insensitive && isalpha((unsigned char) patt[pos])))
                        break;
 
+               /*
+                * In AREs, backslash followed by alphanumeric is an escape, not
+                * a quoted character.  Must treat it as having multiple possible
+                * matches.
+                */
+               if (patt[pos] == '\\' && isalnum((unsigned char) patt[pos + 1]))
+                       break;
+
                /*
                 * Check for quantifiers.  Except for +, this means the preceding
                 * character is optional, so we must remove it from the prefix
@@ -3550,14 +3602,13 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive,
                        patt[pos] == '?' ||
                        patt[pos] == '{')
                {
-                       if (match_pos > 0)
-                               match_pos--;
-                       pos--;
+                       match_pos = prev_match_pos;
+                       pos = prev_pos;
                        break;
                }
                if (patt[pos] == '+')
                {
-                       pos--;
+                       pos = prev_pos;
                        break;
                }
                if (patt[pos] == '\\')
@@ -3567,7 +3618,14 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive,
                        if (patt[pos] == '\0')
                                break;
                }
-               match[match_pos++] = patt[pos];
+               /* save position in case we need to back up on next loop cycle */
+               prev_match_pos = match_pos;
+               prev_pos = pos;
+               /* must use encoding-aware processing here */
+               len = pg_mblen(&patt[pos]);
+               memcpy(&match[match_pos], &patt[pos], len);
+               match_pos += len;
+               pos += len;
        }
 
        match[match_pos] = '\0';
@@ -3630,7 +3688,7 @@ pattern_fixed_prefix(Const *patt, Pattern_Type ptype,
  * Estimate the selectivity of a fixed prefix for a pattern match.
  *
  * A fixed prefix "foo" is estimated as the selectivity of the expression
- * "variable >= 'foo' AND variable < 'fop'" (see also indxqual.c).
+ * "variable >= 'foo' AND variable < 'fop'" (see also indxpath.c).
  *
  * We use the >= and < operators from the specified btree opclass to do the
  * estimation. The given variable and Const must be of the associated
@@ -3642,7 +3700,7 @@ pattern_fixed_prefix(Const *patt, Pattern_Type ptype,
  * more useful to use the upper-bound code than not.
  */
 static Selectivity
-prefix_selectivity(Query *root, VariableStatData *vardata,
+prefix_selectivity(PlannerInfo *root, Node *variable,
                                   Oid opclass, Const *prefixcon)
 {
        Selectivity prefixsel;
@@ -3654,7 +3712,7 @@ prefix_selectivity(Query *root, VariableStatData *vardata,
                                                                BTGreaterEqualStrategyNumber);
        if (cmpopr == InvalidOid)
                elog(ERROR, "no >= operator for opclass %u", opclass);
-       cmpargs = makeList2(vardata->var, prefixcon);
+       cmpargs = list_make2(variable, prefixcon);
        /* Assume scalargtsel is appropriate for all supported types */
        prefixsel = DatumGetFloat8(DirectFunctionCall4(scalargtsel,
                                                                                                   PointerGetDatum(root),
@@ -3676,7 +3734,7 @@ prefix_selectivity(Query *root, VariableStatData *vardata,
                                                                        BTLessStrategyNumber);
                if (cmpopr == InvalidOid)
                        elog(ERROR, "no < operator for opclass %u", opclass);
-               cmpargs = makeList2(vardata->var, greaterstrcon);
+               cmpargs = list_make2(variable, greaterstrcon);
                /* Assume scalarltsel is appropriate for all supported types */
                topsel = DatumGetFloat8(DirectFunctionCall4(scalarltsel,
                                                                                                        PointerGetDatum(root),
@@ -3691,7 +3749,7 @@ prefix_selectivity(Query *root, VariableStatData *vardata,
                prefixsel = topsel + prefixsel - 1.0;
 
                /* Adjust for double-exclusion of NULLs */
-               prefixsel += nulltestsel(root, IS_NULL, vardata->var, 0);
+               prefixsel += nulltestsel(root, IS_NULL, variable, 0);
 
                /*
                 * A zero or slightly negative prefixsel should be converted into
@@ -3767,7 +3825,7 @@ like_selectivity(Const *patt_const, bool case_insensitive)
        }
        else
        {
-               bytea   *bstr = DatumGetByteaP(patt_const->constvalue);
+               bytea      *bstr = DatumGetByteaP(patt_const->constvalue);
 
                pattlen = VARSIZE(bstr) - VARHDRSZ;
                if (pattlen > 0)
@@ -3990,7 +4048,7 @@ pattern_selectivity(Const *patt, Pattern_Type ptype)
  *
  * NOTE: at present this assumes we are in the C locale, so that simple
  * bytewise comparison applies.  However, we might be in a multibyte
- * encoding such as UTF-8, so we do have to watch out for generating
+ * encoding such as UTF8, so we do have to watch out for generating
  * invalid encoding sequences.
  */
 Const *
@@ -4004,12 +4062,12 @@ make_greater_string(const Const *str_const)
        if (datatype == NAMEOID)
        {
                workstr = DatumGetCString(DirectFunctionCall1(nameout,
-                                                                                                         str_const->constvalue));
+                                                                                                str_const->constvalue));
                len = strlen(workstr);
        }
        else if (datatype == BYTEAOID)
        {
-               bytea   *bstr = DatumGetByteaP(str_const->constvalue);
+               bytea      *bstr = DatumGetByteaP(str_const->constvalue);
 
                len = VARSIZE(bstr) - VARHDRSZ;
                if (len > 0)
@@ -4026,7 +4084,7 @@ make_greater_string(const Const *str_const)
        else
        {
                workstr = DatumGetCString(DirectFunctionCall1(textout,
-                                                                                                         str_const->constvalue));
+                                                                                                str_const->constvalue));
                len = strlen(workstr);
        }
 
@@ -4047,8 +4105,7 @@ make_greater_string(const Const *str_const)
                        if (datatype != BYTEAOID)
                        {
                                /* do not generate invalid encoding sequences */
-                               if (!pg_verifymbstr((const unsigned char *) workstr,
-                                                                       len, true))
+                               if (!pg_verifymbstr(workstr, len, true))
                                        continue;
                                workstr_const = string_to_const(workstr, datatype);
                        }
@@ -4067,7 +4124,7 @@ make_greater_string(const Const *str_const)
                 * byte, depending on the character encoding.
                 */
                if (datatype != BYTEAOID && pg_database_encoding_max_length() > 1)
-                       len = pg_mbcliplen((const unsigned char *) workstr, len, len - 1);
+                       len = pg_mbcliplen(workstr, len, len - 1);
                else
                        len -= 1;
 
@@ -4122,8 +4179,8 @@ string_to_const(const char *str, Oid datatype)
 static Const *
 string_to_bytea_const(const char *str, size_t str_len)
 {
-       bytea  *bstr = palloc(VARHDRSZ + str_len);
-       Datum   conval;
+       bytea      *bstr = palloc(VARHDRSZ + str_len);
+       Datum           conval;
 
        memcpy(VARDATA(bstr), str, str_len);
        VARATT_SIZEP(bstr) = VARHDRSZ + str_len;
@@ -4140,18 +4197,23 @@ string_to_bytea_const(const char *str, size_t str_len)
  * don't have any better idea about how to estimate.  Index-type-specific
  * knowledge can be incorporated in the type-specific routines.
  *
+ * One bit of index-type-specific knowledge we can relatively easily use
+ * in genericcostestimate is the estimate of the number of index tuples
+ * visited.  If numIndexTuples is not 0 then it is used as the estimate,
+ * otherwise we compute a generic estimate.
+ *
  *-------------------------------------------------------------------------
  */
 
 static void
-genericcostestimate(Query *root, RelOptInfo *rel,
+genericcostestimate(PlannerInfo *root,
                                        IndexOptInfo *index, List *indexQuals,
+                                       double numIndexTuples,
                                        Cost *indexStartupCost,
                                        Cost *indexTotalCost,
                                        Selectivity *indexSelectivity,
                                        double *indexCorrelation)
 {
-       double          numIndexTuples;
        double          numIndexPages;
        QualCost        index_qual_cost;
        double          qual_op_cost;
@@ -4161,58 +4223,58 @@ genericcostestimate(Query *root, RelOptInfo *rel,
        /*
         * If the index is partial, AND the index predicate with the
         * explicitly given indexquals to produce a more accurate idea of the
-        * index selectivity.  This may produce redundant clauses.  We get rid
-        * of exact duplicates in the code below.  We expect that most
-        * cases of partial redundancy (such as "x < 4" from the qual and
-        * "x < 5" from the predicate) will be recognized and handled correctly
-        * by clauselist_selectivity().  This assumption is somewhat fragile,
-        * since it depends on pred_test() and clauselist_selectivity() having
-        * similar capabilities, and there are certainly many cases where we will
-        * end up with a too-low selectivity estimate.  This will bias the system
-        * in favor of using partial indexes where possible, which is not
-        * necessarily a bad thing.  But it'd be nice to do better someday.
+        * index selectivity.  This may produce redundant clauses.      We get rid
+        * of exact duplicates in the code below.  We expect that most cases
+        * of partial redundancy (such as "x < 4" from the qual and "x < 5"
+        * from the predicate) will be recognized and handled correctly by
+        * clauselist_selectivity().  This assumption is somewhat fragile,
+        * since it depends on predicate_implied_by() and clauselist_selectivity()
+        * having similar capabilities, and there are certainly many cases where
+        * we will end up with a too-low selectivity estimate.  This will bias the
+        * system in favor of using partial indexes where possible, which is not
+        * necessarily a bad thing. But it'd be nice to do better someday.
         *
         * Note that index->indpred and indexQuals are both in implicit-AND form,
         * so ANDing them together just takes merging the lists.  However,
-        * eliminating duplicates is a bit trickier because indexQuals contains
-        * RestrictInfo nodes and the indpred does not.  It is okay to pass a
-        * mixed list to clauselist_selectivity, but we have to work a bit to
-        * generate a list without logical duplicates.  (We could just set_union
-        * indpred and strippedQuals, but then we'd not get caching of per-qual
-        * selectivity estimates.)
+        * eliminating duplicates is a bit trickier because indexQuals
+        * contains RestrictInfo nodes and the indpred does not.  It is okay
+        * to pass a mixed list to clauselist_selectivity, but we have to work
+        * a bit to generate a list without logical duplicates.  (We could
+        * just list_union indpred and strippedQuals, but then we'd not get
+        * caching of per-qual selectivity estimates.)
         */
        if (index->indpred != NIL)
        {
-               List   *strippedQuals;
-               List   *predExtraQuals;
+               List       *strippedQuals;
+               List       *predExtraQuals;
 
                strippedQuals = get_actual_clauses(indexQuals);
-               predExtraQuals = set_difference(index->indpred, strippedQuals);
-               selectivityQuals = nconc(predExtraQuals, indexQuals);
+               predExtraQuals = list_difference(index->indpred, strippedQuals);
+               selectivityQuals = list_concat(predExtraQuals, indexQuals);
        }
        else
                selectivityQuals = indexQuals;
 
        /* Estimate the fraction of main-table tuples that will be visited */
        *indexSelectivity = clauselist_selectivity(root, selectivityQuals,
-                                                                                          rel->relid,
+                                                                                          index->rel->relid,
                                                                                           JOIN_INNER);
 
        /*
-        * Estimate the number of tuples that will be visited.  We do it in
-        * this rather peculiar-looking way in order to get the right answer
-        * for partial indexes.  We can bound the number of tuples by the
-        * index size, in any case.
+        * If caller didn't give us an estimate, estimate the number of index
+        * tuples that will be visited.  We do it in this rather peculiar-looking
+        * way in order to get the right answer for partial indexes.
         */
-       numIndexTuples = *indexSelectivity * rel->tuples;
-
-       if (numIndexTuples > index->tuples)
-               numIndexTuples = index->tuples;
+       if (numIndexTuples <= 0.0)
+               numIndexTuples = *indexSelectivity * index->rel->tuples;
 
        /*
-        * Always estimate at least one tuple is touched, even when
+        * We can bound the number of tuples by the index size in any case.
+        * Also, always estimate at least one tuple is touched, even when
         * indexSelectivity estimate is tiny.
         */
+       if (numIndexTuples > index->tuples)
+               numIndexTuples = index->tuples;
        if (numIndexTuples < 1.0)
                numIndexTuples = 1.0;
 
@@ -4235,25 +4297,25 @@ genericcostestimate(Query *root, RelOptInfo *rel,
        /*
         * Compute the index access cost.
         *
-        * Disk cost: our generic assumption is that the index pages will be
-        * read sequentially, so they have cost 1.0 each, not random_page_cost.
+        * Disk cost: our generic assumption is that the index pages will be read
+        * sequentially, so they have cost 1.0 each, not random_page_cost.
         */
        *indexTotalCost = numIndexPages;
 
        /*
-        * CPU cost: any complex expressions in the indexquals will need to
-        * be evaluated once at the start of the scan to reduce them to runtime
-        * keys to pass to the index AM (see nodeIndexscan.c).  We model the
-        * per-tuple CPU costs as cpu_index_tuple_cost plus one cpu_operator_cost
-        * per indexqual operator.
+        * CPU cost: any complex expressions in the indexquals will need to be
+        * evaluated once at the start of the scan to reduce them to runtime
+        * keys to pass to the index AM (see nodeIndexscan.c).  We model the
+        * per-tuple CPU costs as cpu_index_tuple_cost plus one
+        * cpu_operator_cost per indexqual operator.
         *
         * Note: this neglects the possible costs of rechecking lossy operators
-        * and OR-clause expressions.  Detecting that that might be needed seems
-        * more expensive than it's worth, though, considering all the other
-        * inaccuracies here ...
+        * and OR-clause expressions.  Detecting that that might be needed
+        * seems more expensive than it's worth, though, considering all the
+        * other inaccuracies here ...
         */
        cost_qual_eval(&index_qual_cost, indexQuals);
-       qual_op_cost = cpu_operator_cost * length(indexQuals);
+       qual_op_cost = cpu_operator_cost * list_length(indexQuals);
        qual_arg_cost = index_qual_cost.startup +
                index_qual_cost.per_tuple - qual_op_cost;
        if (qual_arg_cost < 0)          /* just in case... */
@@ -4272,34 +4334,121 @@ genericcostestimate(Query *root, RelOptInfo *rel,
 Datum
 btcostestimate(PG_FUNCTION_ARGS)
 {
-       Query      *root = (Query *) PG_GETARG_POINTER(0);
-       RelOptInfo *rel = (RelOptInfo *) PG_GETARG_POINTER(1);
-       IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(2);
-       List       *indexQuals = (List *) PG_GETARG_POINTER(3);
-       Cost       *indexStartupCost = (Cost *) PG_GETARG_POINTER(4);
-       Cost       *indexTotalCost = (Cost *) PG_GETARG_POINTER(5);
-       Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(6);
-       double     *indexCorrelation = (double *) PG_GETARG_POINTER(7);
+       PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
+       IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(1);
+       List       *indexQuals = (List *) PG_GETARG_POINTER(2);
+       Cost       *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
+       Cost       *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
+       Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
+       double     *indexCorrelation = (double *) PG_GETARG_POINTER(6);
        Oid                     relid;
        AttrNumber      colnum;
        HeapTuple       tuple;
+       double          numIndexTuples;
+       List       *indexBoundQuals;
+       int                     indexcol;
+       bool            eqQualHere;
+       ListCell   *l;
+
+       /*
+        * For a btree scan, only leading '=' quals plus inequality quals
+        * for the immediately next attribute contribute to index selectivity
+        * (these are the "boundary quals" that determine the starting and
+        * stopping points of the index scan).  Additional quals can suppress
+        * visits to the heap, so it's OK to count them in indexSelectivity,
+        * but they should not count for estimating numIndexTuples.  So we must
+        * examine the given indexQuals to find out which ones count as boundary
+        * quals.  We rely on the knowledge that they are given in index column
+        * order.
+        */
+       indexBoundQuals = NIL;
+       indexcol = 0;
+       eqQualHere = false;
+       foreach(l, indexQuals)
+       {
+               RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
+               Expr   *clause;
+               Oid             clause_op;
+               int             op_strategy;
+
+               Assert(IsA(rinfo, RestrictInfo));
+               clause = rinfo->clause;
+               Assert(IsA(clause, OpExpr));
+               clause_op = ((OpExpr *) clause)->opno;
+               if (match_index_to_operand(get_leftop(clause), indexcol, index))
+               {
+                       /* clause_op is correct */
+               }
+               else if (match_index_to_operand(get_rightop(clause), indexcol, index))
+               {
+                       /* Must flip operator to get the opclass member */
+                       clause_op = get_commutator(clause_op);
+               }
+               else
+               {
+                       /* Must be past the end of quals for indexcol, try next */
+                       if (!eqQualHere)
+                               break;                  /* done if no '=' qual for indexcol */
+                       indexcol++;
+                       eqQualHere = false;
+                       if (match_index_to_operand(get_leftop(clause), indexcol, index))
+                       {
+                               /* clause_op is correct */
+                       }
+                       else if (match_index_to_operand(get_rightop(clause),
+                                                                                       indexcol, index))
+                       {
+                               /* Must flip operator to get the opclass member */
+                               clause_op = get_commutator(clause_op);
+                       }
+                       else
+                       {
+                               /* No quals for new indexcol, so we are done */
+                               break;
+                       }
+               }
+               op_strategy = get_op_opclass_strategy(clause_op,
+                                                                                         index->classlist[indexcol]);
+               Assert(op_strategy != 0);                       /* not a member of opclass?? */
+               if (op_strategy == BTEqualStrategyNumber)
+                       eqQualHere = true;
+               indexBoundQuals = lappend(indexBoundQuals, rinfo);
+       }
+
+       /*
+        * If index is unique and we found an '=' clause for each column,
+        * we can just assume numIndexTuples = 1 and skip the expensive
+        * clauselist_selectivity calculations.
+        */
+       if (index->unique && indexcol == index->ncolumns - 1 && eqQualHere)
+               numIndexTuples = 1.0;
+       else
+       {
+               Selectivity btreeSelectivity;
+
+               btreeSelectivity = clauselist_selectivity(root, indexBoundQuals,
+                                                                                                 index->rel->relid,
+                                                                                                 JOIN_INNER);
+               numIndexTuples = btreeSelectivity * index->rel->tuples;
+       }
 
-       genericcostestimate(root, rel, index, indexQuals,
+       genericcostestimate(root, index, indexQuals, numIndexTuples,
                                                indexStartupCost, indexTotalCost,
                                                indexSelectivity, indexCorrelation);
 
        /*
-        * If we can get an estimate of the first column's ordering correlation C
-        * from pg_statistic, estimate the index correlation as C for a single-
-        * column index, or C * 0.75 for multiple columns.  (The idea here is
-        * that multiple columns dilute the importance of the first column's
-        * ordering, but don't negate it entirely.  Before 7.5 we divided the
-        * correlation by the number of columns, but that seems too strong.)
+        * If we can get an estimate of the first column's ordering
+        * correlation C from pg_statistic, estimate the index correlation as
+        * C for a single-column index, or C * 0.75 for multiple columns.
+        * (The idea here is that multiple columns dilute the importance of
+        * the first column's ordering, but don't negate it entirely.  Before
+        * 8.0 we divided the correlation by the number of columns, but that
+        * seems too strong.)
         */
        if (index->indexkeys[0] != 0)
        {
                /* Simple variable --- look to stats for the underlying table */
-               relid = getrelid(rel->relid, root->rtable);
+               relid = getrelid(index->rel->relid, root->parse->rtable);
                Assert(relid != InvalidOid);
                colnum = index->indexkeys[0];
        }
@@ -4351,16 +4500,15 @@ btcostestimate(PG_FUNCTION_ARGS)
 Datum
 rtcostestimate(PG_FUNCTION_ARGS)
 {
-       Query      *root = (Query *) PG_GETARG_POINTER(0);
-       RelOptInfo *rel = (RelOptInfo *) PG_GETARG_POINTER(1);
-       IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(2);
-       List       *indexQuals = (List *) PG_GETARG_POINTER(3);
-       Cost       *indexStartupCost = (Cost *) PG_GETARG_POINTER(4);
-       Cost       *indexTotalCost = (Cost *) PG_GETARG_POINTER(5);
-       Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(6);
-       double     *indexCorrelation = (double *) PG_GETARG_POINTER(7);
-
-       genericcostestimate(root, rel, index, indexQuals,
+       PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
+       IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(1);
+       List       *indexQuals = (List *) PG_GETARG_POINTER(2);
+       Cost       *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
+       Cost       *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
+       Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
+       double     *indexCorrelation = (double *) PG_GETARG_POINTER(6);
+
+       genericcostestimate(root, index, indexQuals, 0.0,
                                                indexStartupCost, indexTotalCost,
                                                indexSelectivity, indexCorrelation);
 
@@ -4370,16 +4518,15 @@ rtcostestimate(PG_FUNCTION_ARGS)
 Datum
 hashcostestimate(PG_FUNCTION_ARGS)
 {
-       Query      *root = (Query *) PG_GETARG_POINTER(0);
-       RelOptInfo *rel = (RelOptInfo *) PG_GETARG_POINTER(1);
-       IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(2);
-       List       *indexQuals = (List *) PG_GETARG_POINTER(3);
-       Cost       *indexStartupCost = (Cost *) PG_GETARG_POINTER(4);
-       Cost       *indexTotalCost = (Cost *) PG_GETARG_POINTER(5);
-       Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(6);
-       double     *indexCorrelation = (double *) PG_GETARG_POINTER(7);
-
-       genericcostestimate(root, rel, index, indexQuals,
+       PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
+       IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(1);
+       List       *indexQuals = (List *) PG_GETARG_POINTER(2);
+       Cost       *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
+       Cost       *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
+       Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
+       double     *indexCorrelation = (double *) PG_GETARG_POINTER(6);
+
+       genericcostestimate(root, index, indexQuals, 0.0,
                                                indexStartupCost, indexTotalCost,
                                                indexSelectivity, indexCorrelation);
 
@@ -4389,16 +4536,15 @@ hashcostestimate(PG_FUNCTION_ARGS)
 Datum
 gistcostestimate(PG_FUNCTION_ARGS)
 {
-       Query      *root = (Query *) PG_GETARG_POINTER(0);
-       RelOptInfo *rel = (RelOptInfo *) PG_GETARG_POINTER(1);
-       IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(2);
-       List       *indexQuals = (List *) PG_GETARG_POINTER(3);
-       Cost       *indexStartupCost = (Cost *) PG_GETARG_POINTER(4);
-       Cost       *indexTotalCost = (Cost *) PG_GETARG_POINTER(5);
-       Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(6);
-       double     *indexCorrelation = (double *) PG_GETARG_POINTER(7);
-
-       genericcostestimate(root, rel, index, indexQuals,
+       PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
+       IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(1);
+       List       *indexQuals = (List *) PG_GETARG_POINTER(2);
+       Cost       *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
+       Cost       *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
+       Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
+       double     *indexCorrelation = (double *) PG_GETARG_POINTER(6);
+
+       genericcostestimate(root, index, indexQuals, 0.0,
                                                indexStartupCost, indexTotalCost,
                                                indexSelectivity, indexCorrelation);