* Selectivity routines are registered in the pg_operator catalog
* in the "oprrest" and "oprjoin" attributes.
*
- * Index cost functions are registered in the pg_am catalog
- * in the "amcostestimate" attribute.
+ * Index cost functions are located via the index AM's API struct,
+ * which is obtained from the handler function registered in pg_am.
*
- * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
#include "postgres.h"
#include <ctype.h>
+#include <float.h>
#include <math.h>
#include "access/gin.h"
#include "access/htup_details.h"
#include "access/sysattr.h"
#include "catalog/index.h"
+#include "catalog/pg_am.h"
#include "catalog/pg_collation.h"
+#include "catalog/pg_operator.h"
#include "catalog/pg_opfamily.h"
#include "catalog/pg_statistic.h"
#include "catalog/pg_type.h"
#include "utils/date.h"
#include "utils/datum.h"
#include "utils/fmgroids.h"
+#include "utils/index_selfuncs.h"
#include "utils/lsyscache.h"
#include "utils/nabstime.h"
#include "utils/pg_locale.h"
#include "utils/timestamp.h"
#include "utils/tqual.h"
#include "utils/typcache.h"
+#include "utils/varlena.h"
/* Hooks for plugins to get control when we ask for stats */
PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like_IC, true));
}
+/*
+ * boolvarsel - Selectivity of Boolean variable.
+ *
+ * This can actually be called on any boolean-valued expression. If it
+ * involves only Vars of the specified relation, and if there are statistics
+ * about the Var or expression (the latter is possible if it's indexed) then
+ * we'll produce a real estimate; otherwise it's just a default.
+ */
+Selectivity
+boolvarsel(PlannerInfo *root, Node *arg, int varRelid)
+{
+ VariableStatData vardata;
+ double selec;
+
+ examine_variable(root, arg, varRelid, &vardata);
+ if (HeapTupleIsValid(vardata.statsTuple))
+ {
+ /*
+ * A boolean variable V is equivalent to the clause V = 't', so we
+ * compute the selectivity as if that is what we have.
+ */
+ selec = var_eq_const(&vardata, BooleanEqualOperator,
+ BoolGetDatum(true), false, true);
+ }
+ else if (is_funcclause(arg))
+ {
+ /*
+ * If we have no stats and it's a function call, estimate 0.3333333.
+ * This seems a pretty unprincipled choice, but Postgres has been
+ * using that estimate for function calls since 1992. The hoariness
+ * of this behavior suggests that we should not be in too much hurry
+ * to use another value.
+ */
+ selec = 0.3333333;
+ }
+ else
+ {
+ /* Otherwise, the default estimate is 0.5 */
+ selec = 0.5;
+ }
+ ReleaseVariableStats(vardata);
+ return selec;
+}
+
/*
* booltestsel - Selectivity of BooleanTest Node.
*/
* We can apply this clamping both with respect to the base relation from
* which the join variable comes (if there is just one), and to the
* immediate inner input relation of the current join.
+ *
+ * If we clamp, we can treat nd2 as being a non-default estimate; it's not
+ * great, maybe, but it didn't come out of nowhere either. This is most
+ * helpful when the inner relation is empty and consequently has no stats.
*/
if (vardata2->rel)
- nd2 = Min(nd2, vardata2->rel->rows);
- nd2 = Min(nd2, inner_rel->rows);
+ {
+ if (nd2 >= vardata2->rel->rows)
+ {
+ nd2 = vardata2->rel->rows;
+ isdefault2 = false;
+ }
+ }
+ if (nd2 >= inner_rel->rows)
+ {
+ nd2 = inner_rel->rows;
+ isdefault2 = false;
+ }
if (HeapTupleIsValid(vardata1->statsTuple))
{
* groupExprs - list of expressions being grouped by
* input_rows - number of rows estimated to arrive at the group/unique
* filter step
+ * pgset - NULL, or a List** pointing to a grouping set to filter the
+ * groupExprs against
*
* Given the lack of any cross-correlation statistics in the system, it's
* impossible to do anything really trustworthy with GROUP BY conditions
* restriction selectivity of the equality in the next step.
* 4. For Vars within a single source rel, we multiply together the numbers
* of values, clamp to the number of rows in the rel (divided by 10 if
- * more than one Var), and then multiply by the selectivity of the
- * restriction clauses for that rel. When there's more than one Var,
- * the initial product is probably too high (it's the worst case) but
- * clamping to a fraction of the rel's rows seems to be a helpful
- * heuristic for not letting the estimate get out of hand. (The factor
- * of 10 is derived from pre-Postgres-7.4 practice.) Multiplying
- * by the restriction selectivity is effectively assuming that the
- * restriction clauses are independent of the grouping, which is a crummy
- * assumption, but it's hard to do better.
+ * more than one Var), and then multiply by a factor based on the
+ * selectivity of the restriction clauses for that rel. When there's
+ * more than one Var, the initial product is probably too high (it's the
+ * worst case) but clamping to a fraction of the rel's rows seems to be a
+ * helpful heuristic for not letting the estimate get out of hand. (The
+ * factor of 10 is derived from pre-Postgres-7.4 practice.) The factor
+ * we multiply by to adjust for the restriction selectivity assumes that
+ * the restriction clauses are independent of the grouping, which may not
+ * be a valid assumption, but it's hard to do better.
* 5. If there are Vars from multiple rels, we repeat step 4 for each such
* rel, and multiply the results together.
* Note that rels not containing grouped Vars are ignored completely, as are
* but we don't have the info to do better).
*/
double
-estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows)
+estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
+ List **pgset)
{
List *varinfos = NIL;
double numdistinct;
ListCell *l;
+ int i;
/*
* We don't ever want to return an estimate of zero groups, as that tends
* for normal cases with GROUP BY or DISTINCT, but it is possible for
* corner cases with set operations.)
*/
- if (groupExprs == NIL)
+ if (groupExprs == NIL || (pgset && list_length(*pgset) < 1))
return 1.0;
/*
*/
numdistinct = 1.0;
+ i = 0;
foreach(l, groupExprs)
{
Node *groupexpr = (Node *) lfirst(l);
List *varshere;
ListCell *l2;
+ /* is expression in this grouping set? */
+ if (pgset && !list_member_int(*pgset, i++))
+ continue;
+
/* Short-circuit for expressions returning boolean */
if (exprType(groupexpr) == BOOLOID)
{
* down to ignoring the possible addition of nulls to the result set).
*/
varshere = pull_var_clause(groupexpr,
- PVC_RECURSE_AGGREGATES,
+ PVC_RECURSE_AGGREGATES |
+ PVC_RECURSE_WINDOWFUNCS |
PVC_RECURSE_PLACEHOLDERS);
/*
reldistinct = clamp;
/*
- * Multiply by restriction selectivity.
+ * Update the estimate based on the restriction selectivity,
+ * guarding against division by zero when reldistinct is zero.
+ * Also skip this if we know that we are returning all rows.
*/
- reldistinct *= rel->rows / rel->tuples;
+ if (reldistinct > 0 && rel->rows < rel->tuples)
+ {
+ /*
+ * Given a table containing N rows with n distinct values in a
+ * uniform distribution, if we select p rows at random then
+ * the expected number of distinct values selected is
+ *
+ * n * (1 - product((N-N/n-i)/(N-i), i=0..p-1))
+ *
+ * = n * (1 - (N-N/n)! / (N-N/n-p)! * (N-p)! / N!)
+ *
+ * See "Approximating block accesses in database
+ * organizations", S. B. Yao, Communications of the ACM,
+ * Volume 20 Issue 4, April 1977 Pages 260-261.
+ *
+ * Alternatively, re-arranging the terms from the factorials,
+ * this may be written as
+ *
+ * n * (1 - product((N-p-i)/(N-i), i=0..N/n-1))
+ *
+ * This form of the formula is more efficient to compute in
+ * the common case where p is larger than N/n. Additionally,
+ * as pointed out by Dell'Era, if i << N for all terms in the
+ * product, it can be approximated by
+ *
+ * n * (1 - ((N-p)/N)^(N/n))
+ *
+ * See "Expected distinct values when selecting from a bag
+ * without replacement", Alberto Dell'Era,
+ * http://www.adellera.it/investigations/distinct_balls/.
+ *
+ * The condition i << N is equivalent to n >> 1, so this is a
+ * good approximation when the number of distinct values in
+ * the table is large. It turns out that this formula also
+ * works well even when n is small.
+ */
+ reldistinct *=
+ (1 - pow((rel->tuples - rel->rows) / rel->tuples,
+ rel->tuples / reldistinct));
+ }
+ reldistinct = clamp_row_est(reldistinct);
/*
* Update estimate of total distinct groups.
* XXX Possibly better way, but much more expensive: multiply by
* selectivity of rel's restriction clauses that mention the target Var.
*/
- if (vardata.rel)
+ if (vardata.rel && vardata.rel->tuples > 0)
+ {
ndistinct *= vardata.rel->rows / vardata.rel->tuples;
+ ndistinct = clamp_row_est(ndistinct);
+ }
/*
* Initial estimate of bucketsize fraction is 1/nbuckets as long as the
case REGTYPEOID:
case REGCONFIGOID:
case REGDICTIONARYOID:
+ case REGROLEOID:
+ case REGNAMESPACEOID:
*scaledvalue = convert_numeric_to_scalar(value, valuetypid);
*scaledlobound = convert_numeric_to_scalar(lobound, boundstypid);
*scaledhibound = convert_numeric_to_scalar(hibound, boundstypid);
case INETOID:
case CIDROID:
case MACADDROID:
+ case MACADDR8OID:
*scaledvalue = convert_network_to_scalar(value, valuetypid);
*scaledlobound = convert_network_to_scalar(lobound, boundstypid);
*scaledhibound = convert_network_to_scalar(hibound, boundstypid);
case REGTYPEOID:
case REGCONFIGOID:
case REGDICTIONARYOID:
+ case REGROLEOID:
+ case REGNAMESPACEOID:
/* we can treat OIDs as integers... */
return (double) DatumGetObjectId(value);
}
return 0.0; /* empty string has scalar value 0 */
/*
- * Since base is at least 10, need not consider more than about 20 chars
+ * There seems little point in considering more than a dozen bytes from
+ * the string. Since base is at least 10, that will give us nominal
+ * resolution of at least 12 decimal digits, which is surely far more
+ * precision than this estimation technique has got anyway (especially in
+ * non-C locales). Also, even with the maximum possible base of 256, this
+ * ensures denom cannot grow larger than 256^13 = 2.03e31, which will not
+ * overflow on any known machine.
*/
- if (slen > 20)
- slen = 20;
+ if (slen > 12)
+ slen = 12;
/* Convert initial characters to fraction */
base = rangehi - rangelo + 1;
size_t xfrmlen2 PG_USED_FOR_ASSERTS_ONLY;
/*
- * Note: originally we guessed at a suitable output buffer size, and
- * only needed to call strxfrm twice if our guess was too small.
- * However, it seems that some versions of Solaris have buggy strxfrm
- * that can write past the specified buffer length in that scenario.
- * So, do it the dumb way for portability.
- *
- * Yet other systems (e.g., glibc) sometimes return a smaller value
- * from the second call than the first; thus the Assert must be <= not
- * == as you'd expect. Can't any of these people program their way
- * out of a paper bag?
+ * XXX: We could guess at a suitable output buffer size and only call
+ * strxfrm twice if our guess is too small.
*
* XXX: strxfrm doesn't support UTF-8 encoding on Win32, it can return
* bogus data or set an error. This is not really a problem unless it
#endif
xfrmstr = (char *) palloc(xfrmlen + 1);
xfrmlen2 = strxfrm(xfrmstr, val, xfrmlen + 1);
+
+ /*
+ * Some systems (e.g., glibc) can return a smaller value from the
+ * second call than the first; thus the Assert must be <= not ==.
+ */
Assert(xfrmlen2 <= xfrmlen);
pfree(val);
val = xfrmstr;
* average month length of 365.25/12.0 days. Not too
* accurate, but plenty good enough for our purposes.
*/
-#ifdef HAVE_INT64_TIMESTAMP
return interval->time + interval->day * (double) USECS_PER_DAY +
interval->month * ((DAYS_PER_YEAR / (double) MONTHS_PER_YEAR) * USECS_PER_DAY);
-#else
- return interval->time + interval->day * SECS_PER_DAY +
- interval->month * ((DAYS_PER_YEAR / (double) MONTHS_PER_YEAR) * (double) SECS_PER_DAY);
-#endif
}
case RELTIMEOID:
-#ifdef HAVE_INT64_TIMESTAMP
return (DatumGetRelativeTime(value) * 1000000.0);
-#else
- return DatumGetRelativeTime(value);
-#endif
case TINTERVALOID:
{
TimeInterval tinterval = DatumGetTimeInterval(value);
-#ifdef HAVE_INT64_TIMESTAMP
if (tinterval->status != 0)
return ((tinterval->data[1] - tinterval->data[0]) * 1000000.0);
-#else
- if (tinterval->status != 0)
- return tinterval->data[1] - tinterval->data[0];
-#endif
return 0; /* for lack of a better idea */
}
case TIMEOID:
TimeTzADT *timetz = DatumGetTimeTzADTP(value);
/* use GMT-equivalent time */
-#ifdef HAVE_INT64_TIMESTAMP
return (double) (timetz->time + (timetz->zone * 1000000.0));
-#else
- return (double) (timetz->time + timetz->zone);
-#endif
}
}
return true;
}
- /* Ooops, clause has wrong structure (probably var op var) */
+ /* Oops, clause has wrong structure (probably var op var) */
ReleaseVariableStats(*vardata);
ReleaseVariableStats(rdata);
* *isdefault: set to TRUE if the result is a default rather than based on
* anything meaningful.
*
- * NB: be careful to produce an integral result, since callers may compare
- * the result to exact integer counts.
+ * NB: be careful to produce a positive integral result, since callers may
+ * compare the result to exact integer counts, or might divide by it.
*/
double
get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
{
double stadistinct;
+ double stanullfrac = 0.0;
double ntuples;
*isdefault = false;
/*
* Determine the stadistinct value to use. There are cases where we can
* get an estimate even without a pg_statistic entry, or can get a better
- * value than is in pg_statistic.
+ * value than is in pg_statistic. Grab stanullfrac too if we can find it
+ * (otherwise, assume no nulls, for lack of any better idea).
*/
if (HeapTupleIsValid(vardata->statsTuple))
{
stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
stadistinct = stats->stadistinct;
+ stanullfrac = stats->stanullfrac;
}
else if (vardata->vartype == BOOLOID)
{
{
case ObjectIdAttributeNumber:
case SelfItemPointerAttributeNumber:
- stadistinct = -1.0; /* unique */
+ stadistinct = -1.0; /* unique (and all non null) */
break;
case TableOidAttributeNumber:
stadistinct = 1.0; /* only 1 value */
* If there is a unique index or DISTINCT clause for the variable, assume
* it is unique no matter what pg_statistic says; the statistics could be
* out of date, or we might have found a partial unique index that proves
- * the var is unique for this query.
+ * the var is unique for this query. However, we'd better still believe
+ * the null-fraction statistic.
*/
if (vardata->isunique)
- stadistinct = -1.0;
+ stadistinct = -1.0 * (1.0 - stanullfrac);
/*
* If we had an absolute estimate, use that.
*/
if (stadistinct > 0.0)
- return stadistinct;
+ return clamp_row_est(stadistinct);
/*
* Otherwise we need to get the relation size; punt if not available.
* If we had a relative estimate, use that.
*/
if (stadistinct < 0.0)
- return floor((-stadistinct * ntuples) + 0.5);
+ return clamp_row_est(-stadistinct * ntuples);
/*
* With no data, estimate ndistinct = ntuples if the table is small, else
* that the behavior isn't discontinuous.
*/
if (ntuples < DEFAULT_NUM_DISTINCT)
- return ntuples;
+ return clamp_row_est(ntuples);
*isdefault = true;
return DEFAULT_NUM_DISTINCT;
}
else
{
- bytea *bstr = DatumGetByteaP(patt_const->constvalue);
+ bytea *bstr = DatumGetByteaPP(patt_const->constvalue);
- pattlen = VARSIZE(bstr) - VARHDRSZ;
+ pattlen = VARSIZE_ANY_EXHDR(bstr);
patt = (char *) palloc(pattlen);
- memcpy(patt, VARDATA(bstr), pattlen);
- if ((Pointer) bstr != DatumGetPointer(patt_const->constvalue))
- pfree(bstr);
+ memcpy(patt, VARDATA_ANY(bstr), pattlen);
+ Assert((Pointer) bstr == DatumGetPointer(patt_const->constvalue));
}
match = palloc(pattlen + 1);
}
else if (datatype == BYTEAOID)
{
- bytea *bstr = DatumGetByteaP(str_const->constvalue);
+ bytea *bstr = DatumGetByteaPP(str_const->constvalue);
- len = VARSIZE(bstr) - VARHDRSZ;
+ len = VARSIZE_ANY_EXHDR(bstr);
workstr = (char *) palloc(len);
- memcpy(workstr, VARDATA(bstr), len);
- if ((Pointer) bstr != DatumGetPointer(str_const->constvalue))
- pfree(bstr);
+ memcpy(workstr, VARDATA_ANY(bstr), len);
+ Assert((Pointer) bstr == DatumGetPointer(str_const->constvalue));
cmpstr = str_const->constvalue;
}
else
*-------------------------------------------------------------------------
*/
-/*
- * deconstruct_indexquals is a simple function to examine the indexquals
- * attached to a proposed IndexPath. It returns a list of IndexQualInfo
- * structs, one per qual expression.
- */
-typedef struct
-{
- RestrictInfo *rinfo; /* the indexqual itself */
- int indexcol; /* zero-based index column number */
- bool varonleft; /* true if index column is on left of qual */
- Oid clause_op; /* qual's operator OID, if relevant */
- Node *other_operand; /* non-index operand of qual's operator */
-} IndexQualInfo;
-
-static List *
+List *
deconstruct_indexquals(IndexPath *path)
{
List *result = NIL;
forboth(lcc, path->indexquals, lci, path->indexqualcols)
{
- RestrictInfo *rinfo = (RestrictInfo *) lfirst(lcc);
+ RestrictInfo *rinfo = castNode(RestrictInfo, lfirst(lcc));
int indexcol = lfirst_int(lci);
Expr *clause;
Node *leftop,
*rightop;
IndexQualInfo *qinfo;
- Assert(IsA(rinfo, RestrictInfo));
clause = rinfo->clause;
qinfo = (IndexQualInfo *) palloc(sizeof(IndexQualInfo));
}
else if (IsA(clause, NullTest))
{
- NullTest *nt = (NullTest *) clause;
-
qinfo->clause_op = InvalidOid;
- Assert(match_index_to_operand((Node *) nt->arg, indexcol, index));
+ Assert(match_index_to_operand((Node *) ((NullTest *) clause)->arg,
+ indexcol, index));
qinfo->varonleft = true;
qinfo->other_operand = NULL;
}
return qual_arg_cost;
}
-/*
- * genericcostestimate is a general-purpose estimator that can be used for
- * most index types. In some cases we use genericcostestimate as the base
- * code and then incorporate additional index-type-specific knowledge in
- * the type-specific calling function. To avoid code duplication, we make
- * genericcostestimate return a number of intermediate values as well as
- * its preliminary estimates of the output cost values. The GenericCosts
- * struct includes all these values.
- *
- * Callers should initialize all fields of GenericCosts to zero. In addition,
- * they can set numIndexTuples to some positive value if they have a better
- * than default way of estimating the number of leaf index tuples visited.
- */
-typedef struct
-{
- /* These are the values the cost estimator must return to the planner */
- Cost indexStartupCost; /* index-related startup cost */
- Cost indexTotalCost; /* total index-related scan cost */
- Selectivity indexSelectivity; /* selectivity of index */
- double indexCorrelation; /* order correlation of index */
-
- /* Intermediate values we obtain along the way */
- double numIndexPages; /* number of leaf pages visited */
- double numIndexTuples; /* number of leaf tuples visited */
- double spc_random_page_cost; /* relevant random_page_cost value */
- double num_sa_scans; /* # indexscans from ScalarArrayOps */
-} GenericCosts;
-
-static void
+void
genericcostestimate(PlannerInfo *root,
IndexPath *path,
double loop_count,
}
-Datum
-btcostestimate(PG_FUNCTION_ARGS)
+void
+btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
+ Cost *indexStartupCost, Cost *indexTotalCost,
+ Selectivity *indexSelectivity, double *indexCorrelation,
+ double *indexPages)
{
- PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
- IndexPath *path = (IndexPath *) PG_GETARG_POINTER(1);
- double loop_count = PG_GETARG_FLOAT8(2);
- Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
- Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
- Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
- double *indexCorrelation = (double *) PG_GETARG_POINTER(6);
IndexOptInfo *index = path->indexinfo;
List *qinfos;
GenericCosts costs;
*indexTotalCost = costs.indexTotalCost;
*indexSelectivity = costs.indexSelectivity;
*indexCorrelation = costs.indexCorrelation;
-
- PG_RETURN_VOID();
+ *indexPages = costs.numIndexPages;
}
-Datum
-hashcostestimate(PG_FUNCTION_ARGS)
+void
+hashcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
+ Cost *indexStartupCost, Cost *indexTotalCost,
+ Selectivity *indexSelectivity, double *indexCorrelation,
+ double *indexPages)
{
- PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
- IndexPath *path = (IndexPath *) PG_GETARG_POINTER(1);
- double loop_count = PG_GETARG_FLOAT8(2);
- Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
- Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
- Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
- double *indexCorrelation = (double *) PG_GETARG_POINTER(6);
List *qinfos;
GenericCosts costs;
*indexTotalCost = costs.indexTotalCost;
*indexSelectivity = costs.indexSelectivity;
*indexCorrelation = costs.indexCorrelation;
-
- PG_RETURN_VOID();
+ *indexPages = costs.numIndexPages;
}
-Datum
-gistcostestimate(PG_FUNCTION_ARGS)
+void
+gistcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
+ Cost *indexStartupCost, Cost *indexTotalCost,
+ Selectivity *indexSelectivity, double *indexCorrelation,
+ double *indexPages)
{
- PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
- IndexPath *path = (IndexPath *) PG_GETARG_POINTER(1);
- double loop_count = PG_GETARG_FLOAT8(2);
- Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
- Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
- Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
- double *indexCorrelation = (double *) PG_GETARG_POINTER(6);
IndexOptInfo *index = path->indexinfo;
List *qinfos;
GenericCosts costs;
*indexTotalCost = costs.indexTotalCost;
*indexSelectivity = costs.indexSelectivity;
*indexCorrelation = costs.indexCorrelation;
-
- PG_RETURN_VOID();
+ *indexPages = costs.numIndexPages;
}
-Datum
-spgcostestimate(PG_FUNCTION_ARGS)
+void
+spgcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
+ Cost *indexStartupCost, Cost *indexTotalCost,
+ Selectivity *indexSelectivity, double *indexCorrelation,
+ double *indexPages)
{
- PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
- IndexPath *path = (IndexPath *) PG_GETARG_POINTER(1);
- double loop_count = PG_GETARG_FLOAT8(2);
- Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
- Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
- Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
- double *indexCorrelation = (double *) PG_GETARG_POINTER(6);
IndexOptInfo *index = path->indexinfo;
List *qinfos;
GenericCosts costs;
*indexTotalCost = costs.indexTotalCost;
*indexSelectivity = costs.indexSelectivity;
*indexCorrelation = costs.indexCorrelation;
-
- PG_RETURN_VOID();
+ *indexPages = costs.numIndexPages;
}
/*
* GIN has search behavior completely different from other index types
*/
-Datum
-gincostestimate(PG_FUNCTION_ARGS)
+void
+gincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
+ Cost *indexStartupCost, Cost *indexTotalCost,
+ Selectivity *indexSelectivity, double *indexCorrelation,
+ double *indexPages)
{
- PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
- IndexPath *path = (IndexPath *) PG_GETARG_POINTER(1);
- double loop_count = PG_GETARG_FLOAT8(2);
- Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
- Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
- Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
- double *indexCorrelation = (double *) PG_GETARG_POINTER(6);
IndexOptInfo *index = path->indexinfo;
List *indexQuals = path->indexquals;
List *indexOrderBys = path->indexorderbys;
numEntries;
GinQualCounts counts;
bool matchPossible;
+ double partialScale;
double entryPagesFetched,
dataPagesFetched,
dataPagesFetchedBySel;
qinfos = deconstruct_indexquals(path);
/*
- * Obtain statistic information from the meta page
+ * Obtain statistical information from the meta page, if possible. Else
+ * set ginStats to zeroes, and we'll cope below.
*/
- indexRel = index_open(index->indexoid, AccessShareLock);
- ginGetStats(indexRel, &ginStats);
- index_close(indexRel, AccessShareLock);
-
- numEntryPages = ginStats.nEntryPages;
- numDataPages = ginStats.nDataPages;
- numPendingPages = ginStats.nPendingPages;
- numEntries = ginStats.nEntries;
+ if (!index->hypothetical)
+ {
+ indexRel = index_open(index->indexoid, AccessShareLock);
+ ginGetStats(indexRel, &ginStats);
+ index_close(indexRel, AccessShareLock);
+ }
+ else
+ {
+ memset(&ginStats, 0, sizeof(ginStats));
+ }
/*
- * nPendingPages can be trusted, but the other fields are as of the last
- * VACUUM. Scale them by the ratio numPages / nTotalPages to account for
- * growth since then. If the fields are zero (implying no VACUUM at all,
- * and an index created pre-9.1), assume all pages are entry pages.
+ * Assuming we got valid (nonzero) stats at all, nPendingPages can be
+ * trusted, but the other fields are data as of the last VACUUM. We can
+ * scale them up to account for growth since then, but that method only
+ * goes so far; in the worst case, the stats might be for a completely
+ * empty index, and scaling them will produce pretty bogus numbers.
+ * Somewhat arbitrarily, set the cutoff for doing scaling at 4X growth; if
+ * it's grown more than that, fall back to estimating things only from the
+ * assumed-accurate index size. But we'll trust nPendingPages in any case
+ * so long as it's not clearly insane, ie, more than the index size.
*/
- if (ginStats.nTotalPages == 0 || ginStats.nEntryPages == 0)
- {
- numEntryPages = numPages;
- numDataPages = 0;
- numEntries = numTuples; /* bogus, but no other info available */
- }
+ if (ginStats.nPendingPages < numPages)
+ numPendingPages = ginStats.nPendingPages;
else
+ numPendingPages = 0;
+
+ if (numPages > 0 && ginStats.nTotalPages <= numPages &&
+ ginStats.nTotalPages > numPages / 4 &&
+ ginStats.nEntryPages > 0 && ginStats.nEntries > 0)
{
+ /*
+ * OK, the stats seem close enough to sane to be trusted. But we
+ * still need to scale them by the ratio numPages / nTotalPages to
+ * account for growth since the last VACUUM.
+ */
double scale = numPages / ginStats.nTotalPages;
- numEntryPages = ceil(numEntryPages * scale);
- numDataPages = ceil(numDataPages * scale);
- numEntries = ceil(numEntries * scale);
+ numEntryPages = ceil(ginStats.nEntryPages * scale);
+ numDataPages = ceil(ginStats.nDataPages * scale);
+ numEntries = ceil(ginStats.nEntries * scale);
/* ensure we didn't round up too much */
- numEntryPages = Min(numEntryPages, numPages);
- numDataPages = Min(numDataPages, numPages - numEntryPages);
+ numEntryPages = Min(numEntryPages, numPages - numPendingPages);
+ numDataPages = Min(numDataPages,
+ numPages - numPendingPages - numEntryPages);
+ }
+ else
+ {
+ /*
+ * We might get here because it's a hypothetical index, or an index
+ * created pre-9.1 and never vacuumed since upgrading (in which case
+ * its stats would read as zeroes), or just because it's grown too
+ * much since the last VACUUM for us to put our faith in scaling.
+ *
+ * Invent some plausible internal statistics based on the index page
+ * count (and clamp that to at least 10 pages, just in case). We
+ * estimate that 90% of the index is entry pages, and the rest is data
+ * pages. Estimate 100 entries per entry page; this is rather bogus
+ * since it'll depend on the size of the keys, but it's more robust
+ * than trying to predict the number of entries per heap tuple.
+ */
+ numPages = Max(numPages, 10);
+ numEntryPages = floor((numPages - numPendingPages) * 0.90);
+ numDataPages = numPages - numPendingPages - numEntryPages;
+ numEntries = floor(numEntryPages * 100);
}
/* In an empty index, numEntries could be zero. Avoid divide-by-zero */
*indexStartupCost = 0;
*indexTotalCost = 0;
*indexSelectivity = 0;
- PG_RETURN_VOID();
+ return;
}
if (counts.haveFullScan || indexQuals == NIL)
/*
* Add an estimate of entry pages read by partial match algorithm. It's a
* scan over leaf pages in entry tree. We haven't any useful stats here,
- * so estimate it as proportion.
+ * so estimate it as proportion. Because counts.partialEntries is really
+ * pretty bogus (see code above), it's possible that it is more than
+ * numEntries; clamp the proportion to ensure sanity.
*/
- entryPagesFetched += ceil(numEntryPages * counts.partialEntries / numEntries);
+ partialScale = counts.partialEntries / numEntries;
+ partialScale = Min(partialScale, 1.0);
+
+ entryPagesFetched += ceil(numEntryPages * partialScale);
/*
* Partial match algorithm reads all data pages before doing actual scan,
- * so it's a startup cost. Again, we haven't any useful stats here, so,
- * estimate it as proportion
+ * so it's a startup cost. Again, we haven't any useful stats here, so
+ * estimate it as proportion.
*/
- dataPagesFetched = ceil(numDataPages * counts.partialEntries / numEntries);
+ dataPagesFetched = ceil(numDataPages * partialScale);
/*
* Calculate cache effects if more than one scan due to nestloops or array
*indexStartupCost += qual_arg_cost;
*indexTotalCost += qual_arg_cost;
*indexTotalCost += (numTuples * *indexSelectivity) * (cpu_index_tuple_cost + qual_op_cost);
-
- PG_RETURN_VOID();
+ *indexPages = dataPagesFetched;
}
/*
* BRIN has search behavior completely different from other index types
*/
-Datum
-brincostestimate(PG_FUNCTION_ARGS)
+void
+brincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
+ Cost *indexStartupCost, Cost *indexTotalCost,
+ Selectivity *indexSelectivity, double *indexCorrelation,
+ double *indexPages)
{
- PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
- IndexPath *path = (IndexPath *) PG_GETARG_POINTER(1);
- double loop_count = PG_GETARG_FLOAT8(2);
- Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
- Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
- Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
- double *indexCorrelation = (double *) PG_GETARG_POINTER(6);
IndexOptInfo *index = path->indexinfo;
List *indexQuals = path->indexquals;
List *indexOrderBys = path->indexorderbys;
*indexStartupCost += qual_arg_cost;
*indexTotalCost += qual_arg_cost;
*indexTotalCost += (numTuples * *indexSelectivity) * (cpu_index_tuple_cost + qual_op_cost);
+ *indexPages = index->pages;
/* XXX what about pages_per_range? */
-
- PG_RETURN_VOID();
}