Add support for EUI-64 MAC addresses as macaddr8

[postgresql] / src / backend / utils / adt / selfuncs.c
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c

index a28868c3130263d709e8631fd33eec602ae0590c..bb9a5446861bde72e1caee147ef7e0c382b8de16 100644 (file)
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -7,10 +7,10 @@
   *       Selectivity routines are registered in the pg_operator catalog
   *       in the "oprrest" and "oprjoin" attributes.
   *
- *       Index cost functions are registered in the pg_am catalog
- *       in the "amcostestimate" attribute.
+ *       Index cost functions are located via the index AM's API struct,
+ *       which is obtained from the handler function registered in pg_am.
   *
- * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   *
@@ -98,13 +98,16 @@
  #include "postgres.h"
  
  #include <ctype.h>
+#include <float.h>
  #include <math.h>
  
  #include "access/gin.h"
  #include "access/htup_details.h"
  #include "access/sysattr.h"
  #include "catalog/index.h"
+#include "catalog/pg_am.h"
  #include "catalog/pg_collation.h"
+#include "catalog/pg_operator.h"
  #include "catalog/pg_opfamily.h"
  #include "catalog/pg_statistic.h"
  #include "catalog/pg_type.h"
@@ -128,6 +131,7 @@
  #include "utils/date.h"
  #include "utils/datum.h"
  #include "utils/fmgroids.h"
+#include "utils/index_selfuncs.h"
  #include "utils/lsyscache.h"
  #include "utils/nabstime.h"
  #include "utils/pg_locale.h"
@@ -138,6 +142,7 @@
  #include "utils/timestamp.h"
  #include "utils/tqual.h"
  #include "utils/typcache.h"
+#include "utils/varlena.h"
  
  
  /* Hooks for plugins to get control when we ask for stats */
@@ -1439,6 +1444,50 @@ icnlikesel(PG_FUNCTION_ARGS)
         PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like_IC, true));
  }
  
+/*
+ *             boolvarsel              - Selectivity of Boolean variable.
+ *
+ * This can actually be called on any boolean-valued expression.  If it
+ * involves only Vars of the specified relation, and if there are statistics
+ * about the Var or expression (the latter is possible if it's indexed) then
+ * we'll produce a real estimate; otherwise it's just a default.
+ */
+Selectivity
+boolvarsel(PlannerInfo *root, Node *arg, int varRelid)
+{
+       VariableStatData vardata;
+       double          selec;
+
+       examine_variable(root, arg, varRelid, &vardata);
+       if (HeapTupleIsValid(vardata.statsTuple))
+       {
+               /*
+                * A boolean variable V is equivalent to the clause V = 't', so we
+                * compute the selectivity as if that is what we have.
+                */
+               selec = var_eq_const(&vardata, BooleanEqualOperator,
+                                                        BoolGetDatum(true), false, true);
+       }
+       else if (is_funcclause(arg))
+       {
+               /*
+                * If we have no stats and it's a function call, estimate 0.3333333.
+                * This seems a pretty unprincipled choice, but Postgres has been
+                * using that estimate for function calls since 1992.  The hoariness
+                * of this behavior suggests that we should not be in too much hurry
+                * to use another value.
+                */
+               selec = 0.3333333;
+       }
+       else
+       {
+               /* Otherwise, the default estimate is 0.5 */
+               selec = 0.5;
+       }
+       ReleaseVariableStats(vardata);
+       return selec;
+}
+
  /*
   *             booltestsel             - Selectivity of BooleanTest Node.
   */
@@ -2464,10 +2513,24 @@ eqjoinsel_semi(Oid operator,
          * We can apply this clamping both with respect to the base relation from
          * which the join variable comes (if there is just one), and to the
          * immediate inner input relation of the current join.
+        *
+        * If we clamp, we can treat nd2 as being a non-default estimate; it's not
+        * great, maybe, but it didn't come out of nowhere either.  This is most
+        * helpful when the inner relation is empty and consequently has no stats.
          */
         if (vardata2->rel)
-               nd2 = Min(nd2, vardata2->rel->rows);
-       nd2 = Min(nd2, inner_rel->rows);
+       {
+               if (nd2 >= vardata2->rel->rows)
+               {
+                       nd2 = vardata2->rel->rows;
+                       isdefault2 = false;
+               }
+       }
+       if (nd2 >= inner_rel->rows)
+       {
+               nd2 = inner_rel->rows;
+               isdefault2 = false;
+       }
  
         if (HeapTupleIsValid(vardata1->statsTuple))
         {
@@ -3158,6 +3221,8 @@ add_unique_group_var(PlannerInfo *root, List *varinfos,
   *     groupExprs - list of expressions being grouped by
   *     input_rows - number of rows estimated to arrive at the group/unique
   *             filter step
+ *     pgset - NULL, or a List** pointing to a grouping set to filter the
+ *             groupExprs against
   *
   * Given the lack of any cross-correlation statistics in the system, it's
   * impossible to do anything really trustworthy with GROUP BY conditions
@@ -3188,15 +3253,15 @@ add_unique_group_var(PlannerInfo *root, List *varinfos,
   *             restriction selectivity of the equality in the next step.
   *     4.  For Vars within a single source rel, we multiply together the numbers
   *             of values, clamp to the number of rows in the rel (divided by 10 if
- *             more than one Var), and then multiply by the selectivity of the
- *             restriction clauses for that rel.  When there's more than one Var,
- *             the initial product is probably too high (it's the worst case) but
- *             clamping to a fraction of the rel's rows seems to be a helpful
- *             heuristic for not letting the estimate get out of hand.  (The factor
- *             of 10 is derived from pre-Postgres-7.4 practice.)  Multiplying
- *             by the restriction selectivity is effectively assuming that the
- *             restriction clauses are independent of the grouping, which is a crummy
- *             assumption, but it's hard to do better.
+ *             more than one Var), and then multiply by a factor based on the
+ *             selectivity of the restriction clauses for that rel.  When there's
+ *             more than one Var, the initial product is probably too high (it's the
+ *             worst case) but clamping to a fraction of the rel's rows seems to be a
+ *             helpful heuristic for not letting the estimate get out of hand.  (The
+ *             factor of 10 is derived from pre-Postgres-7.4 practice.)  The factor
+ *             we multiply by to adjust for the restriction selectivity assumes that
+ *             the restriction clauses are independent of the grouping, which may not
+ *             be a valid assumption, but it's hard to do better.
   *     5.  If there are Vars from multiple rels, we repeat step 4 for each such
   *             rel, and multiply the results together.
   * Note that rels not containing grouped Vars are ignored completely, as are
@@ -3205,11 +3270,13 @@ add_unique_group_var(PlannerInfo *root, List *varinfos,
   * but we don't have the info to do better).
   */
  double
-estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows)
+estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
+                                       List **pgset)
  {
         List       *varinfos = NIL;
         double          numdistinct;
         ListCell   *l;
+       int                     i;
  
         /*
          * We don't ever want to return an estimate of zero groups, as that tends
@@ -3224,7 +3291,7 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows)
          * for normal cases with GROUP BY or DISTINCT, but it is possible for
          * corner cases with set operations.)
          */
-       if (groupExprs == NIL)
+       if (groupExprs == NIL || (pgset && list_length(*pgset) < 1))
                 return 1.0;
  
         /*
@@ -3236,6 +3303,7 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows)
          */
         numdistinct = 1.0;
  
+       i = 0;
         foreach(l, groupExprs)
         {
                 Node       *groupexpr = (Node *) lfirst(l);
@@ -3243,6 +3311,10 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows)
                 List       *varshere;
                 ListCell   *l2;
  
+               /* is expression in this grouping set? */
+               if (pgset && !list_member_int(*pgset, i++))
+                       continue;
+
                 /* Short-circuit for expressions returning boolean */
                 if (exprType(groupexpr) == BOOLOID)
                 {
@@ -3272,7 +3344,8 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows)
                  * down to ignoring the possible addition of nulls to the result set).
                  */
                 varshere = pull_var_clause(groupexpr,
-                                                                  PVC_RECURSE_AGGREGATES,
+                                                                  PVC_RECURSE_AGGREGATES |
+                                                                  PVC_RECURSE_WINDOWFUNCS |
                                                                    PVC_RECURSE_PLACEHOLDERS);
  
                 /*
@@ -3382,9 +3455,51 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows)
                                 reldistinct = clamp;
  
                         /*
-                        * Multiply by restriction selectivity.
+                        * Update the estimate based on the restriction selectivity,
+                        * guarding against division by zero when reldistinct is zero.
+                        * Also skip this if we know that we are returning all rows.
                          */
-                       reldistinct *= rel->rows / rel->tuples;
+                       if (reldistinct > 0 && rel->rows < rel->tuples)
+                       {
+                               /*
+                                * Given a table containing N rows with n distinct values in a
+                                * uniform distribution, if we select p rows at random then
+                                * the expected number of distinct values selected is
+                                *
+                                * n * (1 - product((N-N/n-i)/(N-i), i=0..p-1))
+                                *
+                                * = n * (1 - (N-N/n)! / (N-N/n-p)! * (N-p)! / N!)
+                                *
+                                * See "Approximating block accesses in database
+                                * organizations", S. B. Yao, Communications of the ACM,
+                                * Volume 20 Issue 4, April 1977 Pages 260-261.
+                                *
+                                * Alternatively, re-arranging the terms from the factorials,
+                                * this may be written as
+                                *
+                                * n * (1 - product((N-p-i)/(N-i), i=0..N/n-1))
+                                *
+                                * This form of the formula is more efficient to compute in
+                                * the common case where p is larger than N/n.  Additionally,
+                                * as pointed out by Dell'Era, if i << N for all terms in the
+                                * product, it can be approximated by
+                                *
+                                * n * (1 - ((N-p)/N)^(N/n))
+                                *
+                                * See "Expected distinct values when selecting from a bag
+                                * without replacement", Alberto Dell'Era,
+                                * http://www.adellera.it/investigations/distinct_balls/.
+                                *
+                                * The condition i << N is equivalent to n >> 1, so this is a
+                                * good approximation when the number of distinct values in
+                                * the table is large.  It turns out that this formula also
+                                * works well even when n is small.
+                                */
+                               reldistinct *=
+                                       (1 - pow((rel->tuples - rel->rows) / rel->tuples,
+                                                        rel->tuples / reldistinct));
+                       }
+                       reldistinct = clamp_row_est(reldistinct);
  
                         /*
                          * Update estimate of total distinct groups.
@@ -3483,8 +3598,11 @@ estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey, double nbuckets)
          * XXX Possibly better way, but much more expensive: multiply by
          * selectivity of rel's restriction clauses that mention the target Var.
          */
-       if (vardata.rel)
+       if (vardata.rel && vardata.rel->tuples > 0)
+       {
                 ndistinct *= vardata.rel->rows / vardata.rel->tuples;
+               ndistinct = clamp_row_est(ndistinct);
+       }
  
         /*
          * Initial estimate of bucketsize fraction is 1/nbuckets as long as the
@@ -3620,6 +3738,7 @@ convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
                 case REGCONFIGOID:
                 case REGDICTIONARYOID:
                 case REGROLEOID:
+               case REGNAMESPACEOID:
                         *scaledvalue = convert_numeric_to_scalar(value, valuetypid);
                         *scaledlobound = convert_numeric_to_scalar(lobound, boundstypid);
                         *scaledhibound = convert_numeric_to_scalar(hibound, boundstypid);
@@ -3681,6 +3800,7 @@ convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
                 case INETOID:
                 case CIDROID:
                 case MACADDROID:
+               case MACADDR8OID:
                         *scaledvalue = convert_network_to_scalar(value, valuetypid);
                         *scaledlobound = convert_network_to_scalar(lobound, boundstypid);
                         *scaledhibound = convert_network_to_scalar(hibound, boundstypid);
@@ -3726,6 +3846,7 @@ convert_numeric_to_scalar(Datum value, Oid typid)
                 case REGCONFIGOID:
                 case REGDICTIONARYOID:
                 case REGROLEOID:
+               case REGNAMESPACEOID:
                         /* we can treat OIDs as integers... */
                         return (double) DatumGetObjectId(value);
         }
@@ -3850,10 +3971,16 @@ convert_one_string_to_scalar(char *value, int rangelo, int rangehi)
                 return 0.0;                             /* empty string has scalar value 0 */
  
         /*
-        * Since base is at least 10, need not consider more than about 20 chars
+        * There seems little point in considering more than a dozen bytes from
+        * the string.  Since base is at least 10, that will give us nominal
+        * resolution of at least 12 decimal digits, which is surely far more
+        * precision than this estimation technique has got anyway (especially in
+        * non-C locales).  Also, even with the maximum possible base of 256, this
+        * ensures denom cannot grow larger than 256^13 = 2.03e31, which will not
+        * overflow on any known machine.
          */
-       if (slen > 20)
-               slen = 20;
+       if (slen > 12)
+               slen = 12;
  
         /* Convert initial characters to fraction */
         base = rangehi - rangelo + 1;
@@ -3921,16 +4048,8 @@ convert_string_datum(Datum value, Oid typid)
                 size_t xfrmlen2 PG_USED_FOR_ASSERTS_ONLY;
  
                 /*
-                * Note: originally we guessed at a suitable output buffer size, and
-                * only needed to call strxfrm twice if our guess was too small.
-                * However, it seems that some versions of Solaris have buggy strxfrm
-                * that can write past the specified buffer length in that scenario.
-                * So, do it the dumb way for portability.
-                *
-                * Yet other systems (e.g., glibc) sometimes return a smaller value
-                * from the second call than the first; thus the Assert must be <= not
-                * == as you'd expect.  Can't any of these people program their way
-                * out of a paper bag?
+                * XXX: We could guess at a suitable output buffer size and only call
+                * strxfrm twice if our guess is too small.
                  *
                  * XXX: strxfrm doesn't support UTF-8 encoding on Win32, it can return
                  * bogus data or set an error. This is not really a problem unless it
@@ -3963,6 +4082,11 @@ convert_string_datum(Datum value, Oid typid)
  #endif
                 xfrmstr = (char *) palloc(xfrmlen + 1);
                 xfrmlen2 = strxfrm(xfrmstr, val, xfrmlen + 1);
+
+               /*
+                * Some systems (e.g., glibc) can return a smaller value from the
+                * second call than the first; thus the Assert must be <= not ==.
+                */
                 Assert(xfrmlen2 <= xfrmlen);
                 pfree(val);
                 val = xfrmstr;
@@ -4090,31 +4214,17 @@ convert_timevalue_to_scalar(Datum value, Oid typid)
                                  * average month length of 365.25/12.0 days.  Not too
                                  * accurate, but plenty good enough for our purposes.
                                  */
-#ifdef HAVE_INT64_TIMESTAMP
                                 return interval->time + interval->day * (double) USECS_PER_DAY +
                                         interval->month * ((DAYS_PER_YEAR / (double) MONTHS_PER_YEAR) * USECS_PER_DAY);
-#else
-                               return interval->time + interval->day * SECS_PER_DAY +
-                                       interval->month * ((DAYS_PER_YEAR / (double) MONTHS_PER_YEAR) * (double) SECS_PER_DAY);
-#endif
                         }
                 case RELTIMEOID:
-#ifdef HAVE_INT64_TIMESTAMP
                         return (DatumGetRelativeTime(value) * 1000000.0);
-#else
-                       return DatumGetRelativeTime(value);
-#endif
                 case TINTERVALOID:
                         {
                                 TimeInterval tinterval = DatumGetTimeInterval(value);
  
-#ifdef HAVE_INT64_TIMESTAMP
                                 if (tinterval->status != 0)
                                         return ((tinterval->data[1] - tinterval->data[0]) * 1000000.0);
-#else
-                               if (tinterval->status != 0)
-                                       return tinterval->data[1] - tinterval->data[0];
-#endif
                                 return 0;               /* for lack of a better idea */
                         }
                 case TIMEOID:
@@ -4124,11 +4234,7 @@ convert_timevalue_to_scalar(Datum value, Oid typid)
                                 TimeTzADT  *timetz = DatumGetTimeTzADTP(value);
  
                                 /* use GMT-equivalent time */
-#ifdef HAVE_INT64_TIMESTAMP
                                 return (double) (timetz->time + (timetz->zone * 1000000.0));
-#else
-                               return (double) (timetz->time + timetz->zone);
-#endif
                         }
         }
  
@@ -4207,7 +4313,7 @@ get_restriction_variable(PlannerInfo *root, List *args, int varRelid,
                 return true;
         }
  
-       /* Ooops, clause has wrong structure (probably var op var) */
+       /* Oops, clause has wrong structure (probably var op var) */
         ReleaseVariableStats(*vardata);
         ReleaseVariableStats(rdata);
  
@@ -4624,13 +4730,14 @@ examine_simple_variable(PlannerInfo *root, Var *var,
   * *isdefault: set to TRUE if the result is a default rather than based on
   * anything meaningful.
   *
- * NB: be careful to produce an integral result, since callers may compare
- * the result to exact integer counts.
+ * NB: be careful to produce a positive integral result, since callers may
+ * compare the result to exact integer counts, or might divide by it.
   */
  double
  get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
  {
         double          stadistinct;
+       double          stanullfrac = 0.0;
         double          ntuples;
  
         *isdefault = false;
@@ -4638,7 +4745,8 @@ get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
         /*
          * Determine the stadistinct value to use.  There are cases where we can
          * get an estimate even without a pg_statistic entry, or can get a better
-        * value than is in pg_statistic.
+        * value than is in pg_statistic.  Grab stanullfrac too if we can find it
+        * (otherwise, assume no nulls, for lack of any better idea).
          */
         if (HeapTupleIsValid(vardata->statsTuple))
         {
@@ -4647,6 +4755,7 @@ get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
  
                 stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
                 stadistinct = stats->stadistinct;
+               stanullfrac = stats->stanullfrac;
         }
         else if (vardata->vartype == BOOLOID)
         {
@@ -4670,7 +4779,7 @@ get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
                         {
                                 case ObjectIdAttributeNumber:
                                 case SelfItemPointerAttributeNumber:
-                                       stadistinct = -1.0; /* unique */
+                                       stadistinct = -1.0; /* unique (and all non null) */
                                         break;
                                 case TableOidAttributeNumber:
                                         stadistinct = 1.0;      /* only 1 value */
@@ -4692,16 +4801,17 @@ get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
          * If there is a unique index or DISTINCT clause for the variable, assume
          * it is unique no matter what pg_statistic says; the statistics could be
          * out of date, or we might have found a partial unique index that proves
-        * the var is unique for this query.
+        * the var is unique for this query.  However, we'd better still believe
+        * the null-fraction statistic.
          */
         if (vardata->isunique)
-               stadistinct = -1.0;
+               stadistinct = -1.0 * (1.0 - stanullfrac);
  
         /*
          * If we had an absolute estimate, use that.
          */
         if (stadistinct > 0.0)
-               return stadistinct;
+               return clamp_row_est(stadistinct);
  
         /*
          * Otherwise we need to get the relation size; punt if not available.
@@ -4722,7 +4832,7 @@ get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
          * If we had a relative estimate, use that.
          */
         if (stadistinct < 0.0)
-               return floor((-stadistinct * ntuples) + 0.5);
+               return clamp_row_est(-stadistinct * ntuples);
  
         /*
          * With no data, estimate ndistinct = ntuples if the table is small, else
@@ -4730,7 +4840,7 @@ get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
          * that the behavior isn't discontinuous.
          */
         if (ntuples < DEFAULT_NUM_DISTINCT)
-               return ntuples;
+               return clamp_row_est(ntuples);
  
         *isdefault = true;
         return DEFAULT_NUM_DISTINCT;
@@ -5233,13 +5343,12 @@ like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
         }
         else
         {
-               bytea      *bstr = DatumGetByteaP(patt_const->constvalue);
+               bytea      *bstr = DatumGetByteaPP(patt_const->constvalue);
  
-               pattlen = VARSIZE(bstr) - VARHDRSZ;
+               pattlen = VARSIZE_ANY_EXHDR(bstr);
                 patt = (char *) palloc(pattlen);
-               memcpy(patt, VARDATA(bstr), pattlen);
-               if ((Pointer) bstr != DatumGetPointer(patt_const->constvalue))
-                       pfree(bstr);
+               memcpy(patt, VARDATA_ANY(bstr), pattlen);
+               Assert((Pointer) bstr == DatumGetPointer(patt_const->constvalue));
         }
  
         match = palloc(pattlen + 1);
@@ -5749,13 +5858,12 @@ make_greater_string(const Const *str_const, FmgrInfo *ltproc, Oid collation)
         }
         else if (datatype == BYTEAOID)
         {
-               bytea      *bstr = DatumGetByteaP(str_const->constvalue);
+               bytea      *bstr = DatumGetByteaPP(str_const->constvalue);
  
-               len = VARSIZE(bstr) - VARHDRSZ;
+               len = VARSIZE_ANY_EXHDR(bstr);
                 workstr = (char *) palloc(len);
-               memcpy(workstr, VARDATA(bstr), len);
-               if ((Pointer) bstr != DatumGetPointer(str_const->constvalue))
-                       pfree(bstr);
+               memcpy(workstr, VARDATA_ANY(bstr), len);
+               Assert((Pointer) bstr == DatumGetPointer(str_const->constvalue));
                 cmpstr = str_const->constvalue;
         }
         else
@@ -5951,21 +6059,7 @@ string_to_bytea_const(const char *str, size_t str_len)
   *-------------------------------------------------------------------------
   */
  
-/*
- * deconstruct_indexquals is a simple function to examine the indexquals
- * attached to a proposed IndexPath.  It returns a list of IndexQualInfo
- * structs, one per qual expression.
- */
-typedef struct
-{
-       RestrictInfo *rinfo;            /* the indexqual itself */
-       int                     indexcol;               /* zero-based index column number */
-       bool            varonleft;              /* true if index column is on left of qual */
-       Oid                     clause_op;              /* qual's operator OID, if relevant */
-       Node       *other_operand;      /* non-index operand of qual's operator */
-} IndexQualInfo;
-
-static List *
+List *
  deconstruct_indexquals(IndexPath *path)
  {
         List       *result = NIL;
@@ -5975,14 +6069,13 @@ deconstruct_indexquals(IndexPath *path)
  
         forboth(lcc, path->indexquals, lci, path->indexqualcols)
         {
-               RestrictInfo *rinfo = (RestrictInfo *) lfirst(lcc);
+               RestrictInfo *rinfo = castNode(RestrictInfo, lfirst(lcc));
                 int                     indexcol = lfirst_int(lci);
                 Expr       *clause;
                 Node       *leftop,
                                    *rightop;
                 IndexQualInfo *qinfo;
  
-               Assert(IsA(rinfo, RestrictInfo));
                 clause = rinfo->clause;
  
                 qinfo = (IndexQualInfo *) palloc(sizeof(IndexQualInfo));
@@ -6115,35 +6208,7 @@ orderby_operands_eval_cost(PlannerInfo *root, IndexPath *path)
         return qual_arg_cost;
  }
  
-/*
- * genericcostestimate is a general-purpose estimator that can be used for
- * most index types.  In some cases we use genericcostestimate as the base
- * code and then incorporate additional index-type-specific knowledge in
- * the type-specific calling function.  To avoid code duplication, we make
- * genericcostestimate return a number of intermediate values as well as
- * its preliminary estimates of the output cost values.  The GenericCosts
- * struct includes all these values.
- *
- * Callers should initialize all fields of GenericCosts to zero.  In addition,
- * they can set numIndexTuples to some positive value if they have a better
- * than default way of estimating the number of leaf index tuples visited.
- */
-typedef struct
-{
-       /* These are the values the cost estimator must return to the planner */
-       Cost            indexStartupCost;               /* index-related startup cost */
-       Cost            indexTotalCost; /* total index-related scan cost */
-       Selectivity indexSelectivity;           /* selectivity of index */
-       double          indexCorrelation;               /* order correlation of index */
-
-       /* Intermediate values we obtain along the way */
-       double          numIndexPages;  /* number of leaf pages visited */
-       double          numIndexTuples; /* number of leaf tuples visited */
-       double          spc_random_page_cost;   /* relevant random_page_cost value */
-       double          num_sa_scans;   /* # indexscans from ScalarArrayOps */
-} GenericCosts;
-
-static void
+void
  genericcostestimate(PlannerInfo *root,
                                         IndexPath *path,
                                         double loop_count,
@@ -6384,16 +6449,12 @@ add_predicate_to_quals(IndexOptInfo *index, List *indexQuals)
  }
  
  
-Datum
-btcostestimate(PG_FUNCTION_ARGS)
+void
+btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
+                          Cost *indexStartupCost, Cost *indexTotalCost,
+                          Selectivity *indexSelectivity, double *indexCorrelation,
+                          double *indexPages)
  {
-       PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
-       IndexPath  *path = (IndexPath *) PG_GETARG_POINTER(1);
-       double          loop_count = PG_GETARG_FLOAT8(2);
-       Cost       *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
-       Cost       *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
-       Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
-       double     *indexCorrelation = (double *) PG_GETARG_POINTER(6);
         IndexOptInfo *index = path->indexinfo;
         List       *qinfos;
         GenericCosts costs;
@@ -6682,20 +6743,15 @@ btcostestimate(PG_FUNCTION_ARGS)
         *indexTotalCost = costs.indexTotalCost;
         *indexSelectivity = costs.indexSelectivity;
         *indexCorrelation = costs.indexCorrelation;
-
-       PG_RETURN_VOID();
+       *indexPages = costs.numIndexPages;
  }
  
-Datum
-hashcostestimate(PG_FUNCTION_ARGS)
+void
+hashcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
+                                Cost *indexStartupCost, Cost *indexTotalCost,
+                                Selectivity *indexSelectivity, double *indexCorrelation,
+                                double *indexPages)
  {
-       PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
-       IndexPath  *path = (IndexPath *) PG_GETARG_POINTER(1);
-       double          loop_count = PG_GETARG_FLOAT8(2);
-       Cost       *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
-       Cost       *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
-       Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
-       double     *indexCorrelation = (double *) PG_GETARG_POINTER(6);
         List       *qinfos;
         GenericCosts costs;
  
@@ -6735,20 +6791,15 @@ hashcostestimate(PG_FUNCTION_ARGS)
         *indexTotalCost = costs.indexTotalCost;
         *indexSelectivity = costs.indexSelectivity;
         *indexCorrelation = costs.indexCorrelation;
-
-       PG_RETURN_VOID();
+       *indexPages = costs.numIndexPages;
  }
  
-Datum
-gistcostestimate(PG_FUNCTION_ARGS)
+void
+gistcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
+                                Cost *indexStartupCost, Cost *indexTotalCost,
+                                Selectivity *indexSelectivity, double *indexCorrelation,
+                                double *indexPages)
  {
-       PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
-       IndexPath  *path = (IndexPath *) PG_GETARG_POINTER(1);
-       double          loop_count = PG_GETARG_FLOAT8(2);
-       Cost       *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
-       Cost       *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
-       Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
-       double     *indexCorrelation = (double *) PG_GETARG_POINTER(6);
         IndexOptInfo *index = path->indexinfo;
         List       *qinfos;
         GenericCosts costs;
@@ -6801,20 +6852,15 @@ gistcostestimate(PG_FUNCTION_ARGS)
         *indexTotalCost = costs.indexTotalCost;
         *indexSelectivity = costs.indexSelectivity;
         *indexCorrelation = costs.indexCorrelation;
-
-       PG_RETURN_VOID();
+       *indexPages = costs.numIndexPages;
  }
  
-Datum
-spgcostestimate(PG_FUNCTION_ARGS)
+void
+spgcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
+                               Cost *indexStartupCost, Cost *indexTotalCost,
+                               Selectivity *indexSelectivity, double *indexCorrelation,
+                               double *indexPages)
  {
-       PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
-       IndexPath  *path = (IndexPath *) PG_GETARG_POINTER(1);
-       double          loop_count = PG_GETARG_FLOAT8(2);
-       Cost       *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
-       Cost       *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
-       Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
-       double     *indexCorrelation = (double *) PG_GETARG_POINTER(6);
         IndexOptInfo *index = path->indexinfo;
         List       *qinfos;
         GenericCosts costs;
@@ -6867,8 +6913,7 @@ spgcostestimate(PG_FUNCTION_ARGS)
         *indexTotalCost = costs.indexTotalCost;
         *indexSelectivity = costs.indexSelectivity;
         *indexCorrelation = costs.indexCorrelation;
-
-       PG_RETURN_VOID();
+       *indexPages = costs.numIndexPages;
  }
  
  
@@ -7163,16 +7208,12 @@ gincost_scalararrayopexpr(PlannerInfo *root,
  /*
   * GIN has search behavior completely different from other index types
   */
-Datum
-gincostestimate(PG_FUNCTION_ARGS)
+void
+gincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
+                               Cost *indexStartupCost, Cost *indexTotalCost,
+                               Selectivity *indexSelectivity, double *indexCorrelation,
+                               double *indexPages)
  {
-       PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
-       IndexPath  *path = (IndexPath *) PG_GETARG_POINTER(1);
-       double          loop_count = PG_GETARG_FLOAT8(2);
-       Cost       *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
-       Cost       *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
-       Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
-       double     *indexCorrelation = (double *) PG_GETARG_POINTER(6);
         IndexOptInfo *index = path->indexinfo;
         List       *indexQuals = path->indexquals;
         List       *indexOrderBys = path->indexorderbys;
@@ -7187,6 +7228,7 @@ gincostestimate(PG_FUNCTION_ARGS)
                                 numEntries;
         GinQualCounts counts;
         bool            matchPossible;
+       double          partialScale;
         double          entryPagesFetched,
                                 dataPagesFetched,
                                 dataPagesFetchedBySel;
@@ -7201,39 +7243,74 @@ gincostestimate(PG_FUNCTION_ARGS)
         qinfos = deconstruct_indexquals(path);
  
         /*
-        * Obtain statistic information from the meta page
+        * Obtain statistical information from the meta page, if possible.  Else
+        * set ginStats to zeroes, and we'll cope below.
          */
-       indexRel = index_open(index->indexoid, AccessShareLock);
-       ginGetStats(indexRel, &ginStats);
-       index_close(indexRel, AccessShareLock);
-
-       numEntryPages = ginStats.nEntryPages;
-       numDataPages = ginStats.nDataPages;
-       numPendingPages = ginStats.nPendingPages;
-       numEntries = ginStats.nEntries;
+       if (!index->hypothetical)
+       {
+               indexRel = index_open(index->indexoid, AccessShareLock);
+               ginGetStats(indexRel, &ginStats);
+               index_close(indexRel, AccessShareLock);
+       }
+       else
+       {
+               memset(&ginStats, 0, sizeof(ginStats));
+       }
  
         /*
-        * nPendingPages can be trusted, but the other fields are as of the last
-        * VACUUM.  Scale them by the ratio numPages / nTotalPages to account for
-        * growth since then.  If the fields are zero (implying no VACUUM at all,
-        * and an index created pre-9.1), assume all pages are entry pages.
+        * Assuming we got valid (nonzero) stats at all, nPendingPages can be
+        * trusted, but the other fields are data as of the last VACUUM.  We can
+        * scale them up to account for growth since then, but that method only
+        * goes so far; in the worst case, the stats might be for a completely
+        * empty index, and scaling them will produce pretty bogus numbers.
+        * Somewhat arbitrarily, set the cutoff for doing scaling at 4X growth; if
+        * it's grown more than that, fall back to estimating things only from the
+        * assumed-accurate index size.  But we'll trust nPendingPages in any case
+        * so long as it's not clearly insane, ie, more than the index size.
          */
-       if (ginStats.nTotalPages == 0 || ginStats.nEntryPages == 0)
-       {
-               numEntryPages = numPages;
-               numDataPages = 0;
-               numEntries = numTuples; /* bogus, but no other info available */
-       }
+       if (ginStats.nPendingPages < numPages)
+               numPendingPages = ginStats.nPendingPages;
         else
+               numPendingPages = 0;
+
+       if (numPages > 0 && ginStats.nTotalPages <= numPages &&
+               ginStats.nTotalPages > numPages / 4 &&
+               ginStats.nEntryPages > 0 && ginStats.nEntries > 0)
         {
+               /*
+                * OK, the stats seem close enough to sane to be trusted.  But we
+                * still need to scale them by the ratio numPages / nTotalPages to
+                * account for growth since the last VACUUM.
+                */
                 double          scale = numPages / ginStats.nTotalPages;
  
-               numEntryPages = ceil(numEntryPages * scale);
-               numDataPages = ceil(numDataPages * scale);
-               numEntries = ceil(numEntries * scale);
+               numEntryPages = ceil(ginStats.nEntryPages * scale);
+               numDataPages = ceil(ginStats.nDataPages * scale);
+               numEntries = ceil(ginStats.nEntries * scale);
                 /* ensure we didn't round up too much */
-               numEntryPages = Min(numEntryPages, numPages);
-               numDataPages = Min(numDataPages, numPages - numEntryPages);
+               numEntryPages = Min(numEntryPages, numPages - numPendingPages);
+               numDataPages = Min(numDataPages,
+                                                  numPages - numPendingPages - numEntryPages);
+       }
+       else
+       {
+               /*
+                * We might get here because it's a hypothetical index, or an index
+                * created pre-9.1 and never vacuumed since upgrading (in which case
+                * its stats would read as zeroes), or just because it's grown too
+                * much since the last VACUUM for us to put our faith in scaling.
+                *
+                * Invent some plausible internal statistics based on the index page
+                * count (and clamp that to at least 10 pages, just in case).  We
+                * estimate that 90% of the index is entry pages, and the rest is data
+                * pages.  Estimate 100 entries per entry page; this is rather bogus
+                * since it'll depend on the size of the keys, but it's more robust
+                * than trying to predict the number of entries per heap tuple.
+                */
+               numPages = Max(numPages, 10);
+               numEntryPages = floor((numPages - numPendingPages) * 0.90);
+               numDataPages = numPages - numPendingPages - numEntryPages;
+               numEntries = floor(numEntryPages * 100);
         }
  
         /* In an empty index, numEntries could be zero.  Avoid divide-by-zero */
@@ -7323,7 +7400,7 @@ gincostestimate(PG_FUNCTION_ARGS)
                 *indexStartupCost = 0;
                 *indexTotalCost = 0;
                 *indexSelectivity = 0;
-               PG_RETURN_VOID();
+               return;
         }
  
         if (counts.haveFullScan || indexQuals == NIL)
@@ -7358,16 +7435,21 @@ gincostestimate(PG_FUNCTION_ARGS)
         /*
          * Add an estimate of entry pages read by partial match algorithm. It's a
          * scan over leaf pages in entry tree.  We haven't any useful stats here,
-        * so estimate it as proportion.
+        * so estimate it as proportion.  Because counts.partialEntries is really
+        * pretty bogus (see code above), it's possible that it is more than
+        * numEntries; clamp the proportion to ensure sanity.
          */
-       entryPagesFetched += ceil(numEntryPages * counts.partialEntries / numEntries);
+       partialScale = counts.partialEntries / numEntries;
+       partialScale = Min(partialScale, 1.0);
+
+       entryPagesFetched += ceil(numEntryPages * partialScale);
  
         /*
          * Partial match algorithm reads all data pages before doing actual scan,
-        * so it's a startup cost. Again, we haven't any useful stats here, so,
-        * estimate it as proportion
+        * so it's a startup cost.  Again, we haven't any useful stats here, so
+        * estimate it as proportion.
          */
-       dataPagesFetched = ceil(numDataPages * counts.partialEntries / numEntries);
+       dataPagesFetched = ceil(numDataPages * partialScale);
  
         /*
          * Calculate cache effects if more than one scan due to nestloops or array
@@ -7445,23 +7527,18 @@ gincostestimate(PG_FUNCTION_ARGS)
         *indexStartupCost += qual_arg_cost;
         *indexTotalCost += qual_arg_cost;
         *indexTotalCost += (numTuples * *indexSelectivity) * (cpu_index_tuple_cost + qual_op_cost);
-
-       PG_RETURN_VOID();
+       *indexPages = dataPagesFetched;
  }
  
  /*
   * BRIN has search behavior completely different from other index types
   */
-Datum
-brincostestimate(PG_FUNCTION_ARGS)
+void
+brincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
+                                Cost *indexStartupCost, Cost *indexTotalCost,
+                                Selectivity *indexSelectivity, double *indexCorrelation,
+                                double *indexPages)
  {
-       PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
-       IndexPath  *path = (IndexPath *) PG_GETARG_POINTER(1);
-       double          loop_count = PG_GETARG_FLOAT8(2);
-       Cost       *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
-       Cost       *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
-       Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
-       double     *indexCorrelation = (double *) PG_GETARG_POINTER(6);
         IndexOptInfo *index = path->indexinfo;
         List       *indexQuals = path->indexquals;
         List       *indexOrderBys = path->indexorderbys;
@@ -7512,8 +7589,7 @@ brincostestimate(PG_FUNCTION_ARGS)
         *indexStartupCost += qual_arg_cost;
         *indexTotalCost += qual_arg_cost;
         *indexTotalCost += (numTuples * *indexSelectivity) * (cpu_index_tuple_cost + qual_op_cost);
+       *indexPages = index->pages;
  
         /* XXX what about pages_per_range? */
-
-       PG_RETURN_VOID();
  }