]> granicus.if.org Git - postgresql/blobdiff - src/backend/utils/adt/selfuncs.c
Add support for EUI-64 MAC addresses as macaddr8
[postgresql] / src / backend / utils / adt / selfuncs.c
index d396ef142f9563ce5f620152983a873c082439c8..bb9a5446861bde72e1caee147ef7e0c382b8de16 100644 (file)
@@ -10,7 +10,7 @@
  *       Index cost functions are located via the index AM's API struct,
  *       which is obtained from the handler function registered in pg_am.
  *
- * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  *
@@ -98,6 +98,7 @@
 #include "postgres.h"
 
 #include <ctype.h>
+#include <float.h>
 #include <math.h>
 
 #include "access/gin.h"
 #include "utils/timestamp.h"
 #include "utils/tqual.h"
 #include "utils/typcache.h"
+#include "utils/varlena.h"
 
 
 /* Hooks for plugins to get control when we ask for stats */
@@ -2511,10 +2513,24 @@ eqjoinsel_semi(Oid operator,
         * We can apply this clamping both with respect to the base relation from
         * which the join variable comes (if there is just one), and to the
         * immediate inner input relation of the current join.
+        *
+        * If we clamp, we can treat nd2 as being a non-default estimate; it's not
+        * great, maybe, but it didn't come out of nowhere either.  This is most
+        * helpful when the inner relation is empty and consequently has no stats.
         */
        if (vardata2->rel)
-               nd2 = Min(nd2, vardata2->rel->rows);
-       nd2 = Min(nd2, inner_rel->rows);
+       {
+               if (nd2 >= vardata2->rel->rows)
+               {
+                       nd2 = vardata2->rel->rows;
+                       isdefault2 = false;
+               }
+       }
+       if (nd2 >= inner_rel->rows)
+       {
+               nd2 = inner_rel->rows;
+               isdefault2 = false;
+       }
 
        if (HeapTupleIsValid(vardata1->statsTuple))
        {
@@ -3237,15 +3253,15 @@ add_unique_group_var(PlannerInfo *root, List *varinfos,
  *             restriction selectivity of the equality in the next step.
  *     4.  For Vars within a single source rel, we multiply together the numbers
  *             of values, clamp to the number of rows in the rel (divided by 10 if
- *             more than one Var), and then multiply by the selectivity of the
- *             restriction clauses for that rel.  When there's more than one Var,
- *             the initial product is probably too high (it's the worst case) but
- *             clamping to a fraction of the rel's rows seems to be a helpful
- *             heuristic for not letting the estimate get out of hand.  (The factor
- *             of 10 is derived from pre-Postgres-7.4 practice.)  Multiplying
- *             by the restriction selectivity is effectively assuming that the
- *             restriction clauses are independent of the grouping, which is a crummy
- *             assumption, but it's hard to do better.
+ *             more than one Var), and then multiply by a factor based on the
+ *             selectivity of the restriction clauses for that rel.  When there's
+ *             more than one Var, the initial product is probably too high (it's the
+ *             worst case) but clamping to a fraction of the rel's rows seems to be a
+ *             helpful heuristic for not letting the estimate get out of hand.  (The
+ *             factor of 10 is derived from pre-Postgres-7.4 practice.)  The factor
+ *             we multiply by to adjust for the restriction selectivity assumes that
+ *             the restriction clauses are independent of the grouping, which may not
+ *             be a valid assumption, but it's hard to do better.
  *     5.  If there are Vars from multiple rels, we repeat step 4 for each such
  *             rel, and multiply the results together.
  * Note that rels not containing grouped Vars are ignored completely, as are
@@ -3439,9 +3455,51 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
                                reldistinct = clamp;
 
                        /*
-                        * Multiply by restriction selectivity.
+                        * Update the estimate based on the restriction selectivity,
+                        * guarding against division by zero when reldistinct is zero.
+                        * Also skip this if we know that we are returning all rows.
                         */
-                       reldistinct *= rel->rows / rel->tuples;
+                       if (reldistinct > 0 && rel->rows < rel->tuples)
+                       {
+                               /*
+                                * Given a table containing N rows with n distinct values in a
+                                * uniform distribution, if we select p rows at random then
+                                * the expected number of distinct values selected is
+                                *
+                                * n * (1 - product((N-N/n-i)/(N-i), i=0..p-1))
+                                *
+                                * = n * (1 - (N-N/n)! / (N-N/n-p)! * (N-p)! / N!)
+                                *
+                                * See "Approximating block accesses in database
+                                * organizations", S. B. Yao, Communications of the ACM,
+                                * Volume 20 Issue 4, April 1977 Pages 260-261.
+                                *
+                                * Alternatively, re-arranging the terms from the factorials,
+                                * this may be written as
+                                *
+                                * n * (1 - product((N-p-i)/(N-i), i=0..N/n-1))
+                                *
+                                * This form of the formula is more efficient to compute in
+                                * the common case where p is larger than N/n.  Additionally,
+                                * as pointed out by Dell'Era, if i << N for all terms in the
+                                * product, it can be approximated by
+                                *
+                                * n * (1 - ((N-p)/N)^(N/n))
+                                *
+                                * See "Expected distinct values when selecting from a bag
+                                * without replacement", Alberto Dell'Era,
+                                * http://www.adellera.it/investigations/distinct_balls/.
+                                *
+                                * The condition i << N is equivalent to n >> 1, so this is a
+                                * good approximation when the number of distinct values in
+                                * the table is large.  It turns out that this formula also
+                                * works well even when n is small.
+                                */
+                               reldistinct *=
+                                       (1 - pow((rel->tuples - rel->rows) / rel->tuples,
+                                                        rel->tuples / reldistinct));
+                       }
+                       reldistinct = clamp_row_est(reldistinct);
 
                        /*
                         * Update estimate of total distinct groups.
@@ -3540,8 +3598,11 @@ estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey, double nbuckets)
         * XXX Possibly better way, but much more expensive: multiply by
         * selectivity of rel's restriction clauses that mention the target Var.
         */
-       if (vardata.rel)
+       if (vardata.rel && vardata.rel->tuples > 0)
+       {
                ndistinct *= vardata.rel->rows / vardata.rel->tuples;
+               ndistinct = clamp_row_est(ndistinct);
+       }
 
        /*
         * Initial estimate of bucketsize fraction is 1/nbuckets as long as the
@@ -3739,6 +3800,7 @@ convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
                case INETOID:
                case CIDROID:
                case MACADDROID:
+               case MACADDR8OID:
                        *scaledvalue = convert_network_to_scalar(value, valuetypid);
                        *scaledlobound = convert_network_to_scalar(lobound, boundstypid);
                        *scaledhibound = convert_network_to_scalar(hibound, boundstypid);
@@ -4152,31 +4214,17 @@ convert_timevalue_to_scalar(Datum value, Oid typid)
                                 * average month length of 365.25/12.0 days.  Not too
                                 * accurate, but plenty good enough for our purposes.
                                 */
-#ifdef HAVE_INT64_TIMESTAMP
                                return interval->time + interval->day * (double) USECS_PER_DAY +
                                        interval->month * ((DAYS_PER_YEAR / (double) MONTHS_PER_YEAR) * USECS_PER_DAY);
-#else
-                               return interval->time + interval->day * SECS_PER_DAY +
-                                       interval->month * ((DAYS_PER_YEAR / (double) MONTHS_PER_YEAR) * (double) SECS_PER_DAY);
-#endif
                        }
                case RELTIMEOID:
-#ifdef HAVE_INT64_TIMESTAMP
                        return (DatumGetRelativeTime(value) * 1000000.0);
-#else
-                       return DatumGetRelativeTime(value);
-#endif
                case TINTERVALOID:
                        {
                                TimeInterval tinterval = DatumGetTimeInterval(value);
 
-#ifdef HAVE_INT64_TIMESTAMP
                                if (tinterval->status != 0)
                                        return ((tinterval->data[1] - tinterval->data[0]) * 1000000.0);
-#else
-                               if (tinterval->status != 0)
-                                       return tinterval->data[1] - tinterval->data[0];
-#endif
                                return 0;               /* for lack of a better idea */
                        }
                case TIMEOID:
@@ -4186,11 +4234,7 @@ convert_timevalue_to_scalar(Datum value, Oid typid)
                                TimeTzADT  *timetz = DatumGetTimeTzADTP(value);
 
                                /* use GMT-equivalent time */
-#ifdef HAVE_INT64_TIMESTAMP
                                return (double) (timetz->time + (timetz->zone * 1000000.0));
-#else
-                               return (double) (timetz->time + timetz->zone);
-#endif
                        }
        }
 
@@ -4269,7 +4313,7 @@ get_restriction_variable(PlannerInfo *root, List *args, int varRelid,
                return true;
        }
 
-       /* Ooops, clause has wrong structure (probably var op var) */
+       /* Oops, clause has wrong structure (probably var op var) */
        ReleaseVariableStats(*vardata);
        ReleaseVariableStats(rdata);
 
@@ -4693,6 +4737,7 @@ double
 get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
 {
        double          stadistinct;
+       double          stanullfrac = 0.0;
        double          ntuples;
 
        *isdefault = false;
@@ -4700,7 +4745,8 @@ get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
        /*
         * Determine the stadistinct value to use.  There are cases where we can
         * get an estimate even without a pg_statistic entry, or can get a better
-        * value than is in pg_statistic.
+        * value than is in pg_statistic.  Grab stanullfrac too if we can find it
+        * (otherwise, assume no nulls, for lack of any better idea).
         */
        if (HeapTupleIsValid(vardata->statsTuple))
        {
@@ -4709,6 +4755,7 @@ get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
 
                stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
                stadistinct = stats->stadistinct;
+               stanullfrac = stats->stanullfrac;
        }
        else if (vardata->vartype == BOOLOID)
        {
@@ -4732,7 +4779,7 @@ get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
                        {
                                case ObjectIdAttributeNumber:
                                case SelfItemPointerAttributeNumber:
-                                       stadistinct = -1.0; /* unique */
+                                       stadistinct = -1.0; /* unique (and all non null) */
                                        break;
                                case TableOidAttributeNumber:
                                        stadistinct = 1.0;      /* only 1 value */
@@ -4754,10 +4801,11 @@ get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
         * If there is a unique index or DISTINCT clause for the variable, assume
         * it is unique no matter what pg_statistic says; the statistics could be
         * out of date, or we might have found a partial unique index that proves
-        * the var is unique for this query.
+        * the var is unique for this query.  However, we'd better still believe
+        * the null-fraction statistic.
         */
        if (vardata->isunique)
-               stadistinct = -1.0;
+               stadistinct = -1.0 * (1.0 - stanullfrac);
 
        /*
         * If we had an absolute estimate, use that.
@@ -5295,13 +5343,12 @@ like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
        }
        else
        {
-               bytea      *bstr = DatumGetByteaP(patt_const->constvalue);
+               bytea      *bstr = DatumGetByteaPP(patt_const->constvalue);
 
-               pattlen = VARSIZE(bstr) - VARHDRSZ;
+               pattlen = VARSIZE_ANY_EXHDR(bstr);
                patt = (char *) palloc(pattlen);
-               memcpy(patt, VARDATA(bstr), pattlen);
-               if ((Pointer) bstr != DatumGetPointer(patt_const->constvalue))
-                       pfree(bstr);
+               memcpy(patt, VARDATA_ANY(bstr), pattlen);
+               Assert((Pointer) bstr == DatumGetPointer(patt_const->constvalue));
        }
 
        match = palloc(pattlen + 1);
@@ -5811,13 +5858,12 @@ make_greater_string(const Const *str_const, FmgrInfo *ltproc, Oid collation)
        }
        else if (datatype == BYTEAOID)
        {
-               bytea      *bstr = DatumGetByteaP(str_const->constvalue);
+               bytea      *bstr = DatumGetByteaPP(str_const->constvalue);
 
-               len = VARSIZE(bstr) - VARHDRSZ;
+               len = VARSIZE_ANY_EXHDR(bstr);
                workstr = (char *) palloc(len);
-               memcpy(workstr, VARDATA(bstr), len);
-               if ((Pointer) bstr != DatumGetPointer(str_const->constvalue))
-                       pfree(bstr);
+               memcpy(workstr, VARDATA_ANY(bstr), len);
+               Assert((Pointer) bstr == DatumGetPointer(str_const->constvalue));
                cmpstr = str_const->constvalue;
        }
        else
@@ -6013,21 +6059,7 @@ string_to_bytea_const(const char *str, size_t str_len)
  *-------------------------------------------------------------------------
  */
 
-/*
- * deconstruct_indexquals is a simple function to examine the indexquals
- * attached to a proposed IndexPath.  It returns a list of IndexQualInfo
- * structs, one per qual expression.
- */
-typedef struct
-{
-       RestrictInfo *rinfo;            /* the indexqual itself */
-       int                     indexcol;               /* zero-based index column number */
-       bool            varonleft;              /* true if index column is on left of qual */
-       Oid                     clause_op;              /* qual's operator OID, if relevant */
-       Node       *other_operand;      /* non-index operand of qual's operator */
-} IndexQualInfo;
-
-static List *
+List *
 deconstruct_indexquals(IndexPath *path)
 {
        List       *result = NIL;
@@ -6037,14 +6069,13 @@ deconstruct_indexquals(IndexPath *path)
 
        forboth(lcc, path->indexquals, lci, path->indexqualcols)
        {
-               RestrictInfo *rinfo = (RestrictInfo *) lfirst(lcc);
+               RestrictInfo *rinfo = castNode(RestrictInfo, lfirst(lcc));
                int                     indexcol = lfirst_int(lci);
                Expr       *clause;
                Node       *leftop,
                                   *rightop;
                IndexQualInfo *qinfo;
 
-               Assert(IsA(rinfo, RestrictInfo));
                clause = rinfo->clause;
 
                qinfo = (IndexQualInfo *) palloc(sizeof(IndexQualInfo));
@@ -6177,35 +6208,7 @@ orderby_operands_eval_cost(PlannerInfo *root, IndexPath *path)
        return qual_arg_cost;
 }
 
-/*
- * genericcostestimate is a general-purpose estimator that can be used for
- * most index types.  In some cases we use genericcostestimate as the base
- * code and then incorporate additional index-type-specific knowledge in
- * the type-specific calling function.  To avoid code duplication, we make
- * genericcostestimate return a number of intermediate values as well as
- * its preliminary estimates of the output cost values.  The GenericCosts
- * struct includes all these values.
- *
- * Callers should initialize all fields of GenericCosts to zero.  In addition,
- * they can set numIndexTuples to some positive value if they have a better
- * than default way of estimating the number of leaf index tuples visited.
- */
-typedef struct
-{
-       /* These are the values the cost estimator must return to the planner */
-       Cost            indexStartupCost;               /* index-related startup cost */
-       Cost            indexTotalCost; /* total index-related scan cost */
-       Selectivity indexSelectivity;           /* selectivity of index */
-       double          indexCorrelation;               /* order correlation of index */
-
-       /* Intermediate values we obtain along the way */
-       double          numIndexPages;  /* number of leaf pages visited */
-       double          numIndexTuples; /* number of leaf tuples visited */
-       double          spc_random_page_cost;   /* relevant random_page_cost value */
-       double          num_sa_scans;   /* # indexscans from ScalarArrayOps */
-} GenericCosts;
-
-static void
+void
 genericcostestimate(PlannerInfo *root,
                                        IndexPath *path,
                                        double loop_count,
@@ -6449,7 +6452,8 @@ add_predicate_to_quals(IndexOptInfo *index, List *indexQuals)
 void
 btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
                           Cost *indexStartupCost, Cost *indexTotalCost,
-                          Selectivity *indexSelectivity, double *indexCorrelation)
+                          Selectivity *indexSelectivity, double *indexCorrelation,
+                          double *indexPages)
 {
        IndexOptInfo *index = path->indexinfo;
        List       *qinfos;
@@ -6739,12 +6743,14 @@ btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
        *indexTotalCost = costs.indexTotalCost;
        *indexSelectivity = costs.indexSelectivity;
        *indexCorrelation = costs.indexCorrelation;
+       *indexPages = costs.numIndexPages;
 }
 
 void
 hashcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
                                 Cost *indexStartupCost, Cost *indexTotalCost,
-                                Selectivity *indexSelectivity, double *indexCorrelation)
+                                Selectivity *indexSelectivity, double *indexCorrelation,
+                                double *indexPages)
 {
        List       *qinfos;
        GenericCosts costs;
@@ -6785,12 +6791,14 @@ hashcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
        *indexTotalCost = costs.indexTotalCost;
        *indexSelectivity = costs.indexSelectivity;
        *indexCorrelation = costs.indexCorrelation;
+       *indexPages = costs.numIndexPages;
 }
 
 void
 gistcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
                                 Cost *indexStartupCost, Cost *indexTotalCost,
-                                Selectivity *indexSelectivity, double *indexCorrelation)
+                                Selectivity *indexSelectivity, double *indexCorrelation,
+                                double *indexPages)
 {
        IndexOptInfo *index = path->indexinfo;
        List       *qinfos;
@@ -6844,12 +6852,14 @@ gistcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
        *indexTotalCost = costs.indexTotalCost;
        *indexSelectivity = costs.indexSelectivity;
        *indexCorrelation = costs.indexCorrelation;
+       *indexPages = costs.numIndexPages;
 }
 
 void
 spgcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
                                Cost *indexStartupCost, Cost *indexTotalCost,
-                               Selectivity *indexSelectivity, double *indexCorrelation)
+                               Selectivity *indexSelectivity, double *indexCorrelation,
+                               double *indexPages)
 {
        IndexOptInfo *index = path->indexinfo;
        List       *qinfos;
@@ -6903,6 +6913,7 @@ spgcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
        *indexTotalCost = costs.indexTotalCost;
        *indexSelectivity = costs.indexSelectivity;
        *indexCorrelation = costs.indexCorrelation;
+       *indexPages = costs.numIndexPages;
 }
 
 
@@ -7200,7 +7211,8 @@ gincost_scalararrayopexpr(PlannerInfo *root,
 void
 gincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
                                Cost *indexStartupCost, Cost *indexTotalCost,
-                               Selectivity *indexSelectivity, double *indexCorrelation)
+                               Selectivity *indexSelectivity, double *indexCorrelation,
+                               double *indexPages)
 {
        IndexOptInfo *index = path->indexinfo;
        List       *indexQuals = path->indexquals;
@@ -7515,6 +7527,7 @@ gincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
        *indexStartupCost += qual_arg_cost;
        *indexTotalCost += qual_arg_cost;
        *indexTotalCost += (numTuples * *indexSelectivity) * (cpu_index_tuple_cost + qual_op_cost);
+       *indexPages = dataPagesFetched;
 }
 
 /*
@@ -7523,7 +7536,8 @@ gincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
 void
 brincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
                                 Cost *indexStartupCost, Cost *indexTotalCost,
-                                Selectivity *indexSelectivity, double *indexCorrelation)
+                                Selectivity *indexSelectivity, double *indexCorrelation,
+                                double *indexPages)
 {
        IndexOptInfo *index = path->indexinfo;
        List       *indexQuals = path->indexquals;
@@ -7575,6 +7589,7 @@ brincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
        *indexStartupCost += qual_arg_cost;
        *indexTotalCost += qual_arg_cost;
        *indexTotalCost += (numTuples * *indexSelectivity) * (cpu_index_tuple_cost + qual_op_cost);
+       *indexPages = index->pages;
 
        /* XXX what about pages_per_range? */
 }