* Selectivity routines are registered in the pg_operator catalog
* in the "oprrest" and "oprjoin" attributes.
*
- * Index cost functions are registered in the pg_am catalog
- * in the "amcostestimate" attribute.
+ * Index cost functions are located via the index AM's API struct,
+ * which is obtained from the handler function registered in pg_am.
*
- * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* float8 oprjoin (internal, oid, internal, int2, internal);
*
* (Before Postgres 8.4, join estimators had only the first four of these
- * parameters. That signature is still allowed, but deprecated.) The
+ * parameters. That signature is still allowed, but deprecated.) The
* relationship between jointype and sjinfo is explained in the comments for
* clause_selectivity() --- the short version is that jointype is usually
* best ignored in favor of examining sjinfo.
* joins, however, the selectivity is defined as the fraction of the left-hand
* side relation's rows that are expected to have a match (ie, at least one
* row with a TRUE result) in the right-hand side.
+ *
+ * For both oprrest and oprjoin functions, the operator's input collation OID
+ * (if any) is passed using the standard fmgr mechanism, so that the estimator
+ * function can fetch it with PG_GET_COLLATION(). Note, however, that all
+ * statistics in pg_statistic are currently built using the database's default
+ * collation. Thus, in most cases where we are looking at statistics, we
+ * should ignore the actual operator collation and use DEFAULT_COLLATION_OID.
+ * We expect that the error induced by doing this is usually not large enough
+ * to justify complicating matters.
*----------
*/
#include "postgres.h"
#include <ctype.h>
+#include <float.h>
#include <math.h>
#include "access/gin.h"
+#include "access/htup_details.h"
#include "access/sysattr.h"
#include "catalog/index.h"
+#include "catalog/pg_am.h"
#include "catalog/pg_collation.h"
+#include "catalog/pg_operator.h"
#include "catalog/pg_opfamily.h"
#include "catalog/pg_statistic.h"
#include "catalog/pg_type.h"
#include "optimizer/predtest.h"
#include "optimizer/restrictinfo.h"
#include "optimizer/var.h"
+#include "parser/parse_clause.h"
#include "parser/parse_coerce.h"
#include "parser/parsetree.h"
#include "utils/builtins.h"
#include "utils/date.h"
#include "utils/datum.h"
#include "utils/fmgroids.h"
+#include "utils/index_selfuncs.h"
#include "utils/lsyscache.h"
#include "utils/nabstime.h"
#include "utils/pg_locale.h"
#include "utils/selfuncs.h"
#include "utils/spccache.h"
#include "utils/syscache.h"
+#include "utils/timestamp.h"
#include "utils/tqual.h"
+#include "utils/typcache.h"
+#include "utils/varlena.h"
/* Hooks for plugins to get control when we ask for stats */
static double eqjoinsel_inner(Oid operator,
VariableStatData *vardata1, VariableStatData *vardata2);
static double eqjoinsel_semi(Oid operator,
- VariableStatData *vardata1, VariableStatData *vardata2);
+ VariableStatData *vardata1, VariableStatData *vardata2,
+ RelOptInfo *inner_rel);
static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
Datum lobound, Datum hibound, Oid boundstypid,
double *scaledlobound, double *scaledhibound);
int rangelo, int rangehi);
static char *convert_string_datum(Datum value, Oid typid);
static double convert_timevalue_to_scalar(Datum value, Oid typid);
+static void examine_simple_variable(PlannerInfo *root, Var *var,
+ VariableStatData *vardata);
static bool get_variable_range(PlannerInfo *root, VariableStatData *vardata,
Oid sortop, Datum *min, Datum *max);
static bool get_actual_variable_range(PlannerInfo *root,
VariableStatData *vardata,
Oid sortop,
Datum *min, Datum *max);
+static RelOptInfo *find_join_input_rel(PlannerInfo *root, Relids relids);
static Selectivity prefix_selectivity(PlannerInfo *root,
VariableStatData *vardata,
Oid vartype, Oid opfamily, Const *prefixcon);
-static Selectivity pattern_selectivity(Const *patt, Pattern_Type ptype);
+static Selectivity like_selectivity(const char *patt, int pattlen,
+ bool case_insensitive);
+static Selectivity regex_selectivity(const char *patt, int pattlen,
+ bool case_insensitive,
+ int fixed_prefix_len);
static Datum string_to_datum(const char *str, Oid datatype);
static Const *string_to_const(const char *str, Oid datatype);
static Const *string_to_bytea_const(const char *str, size_t str_len);
+static List *add_predicate_to_quals(IndexOptInfo *index, List *indexQuals);
/*
*
* Note: this routine is also used to estimate selectivity for some
* operators that are not "=" but have comparable selectivity behavior,
- * such as "~=" (geometric approximate-match). Even for "=", we must
+ * such as "~=" (geometric approximate-match). Even for "=", we must
* keep in mind that the left and right datatypes may differ.
*/
Datum
bool varonleft)
{
double selec;
+ bool isdefault;
/*
* If the constant is NULL, assume operator is strict and return zero, ie,
return 0.0;
/*
- * If we matched the var to a unique index, assume there is exactly one
- * match regardless of anything else. (This is slightly bogus, since the
- * index's equality operator might be different from ours, but it's more
- * likely to be right than ignoring the information.)
+ * If we matched the var to a unique index or DISTINCT clause, assume
+ * there is exactly one match regardless of anything else. (This is
+ * slightly bogus, since the index or clause's equality operator might be
+ * different from ours, but it's much more likely to be right than
+ * ignoring the information.)
*/
if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
return 1.0 / vardata->rel->tuples;
/*
* Is the constant "=" to any of the column's most common values?
* (Although the given operator may not really be "=", we will assume
- * that seeing whether it returns TRUE is an appropriate test. If you
+ * that seeing whether it returns TRUE is an appropriate test. If you
* don't like this, maybe you shouldn't be using eqsel for your
* operator...)
*/
* all the not-common values share this remaining fraction
* equally, so we divide by the number of other distinct values.
*/
- otherdistinct = get_variable_numdistinct(vardata) - nnumbers;
+ otherdistinct = get_variable_numdistinct(vardata, &isdefault) - nnumbers;
if (otherdistinct > 1)
selec /= otherdistinct;
* of distinct values and assuming they are equally common. (The guess
* is unlikely to be very good, but we do know a few special cases.)
*/
- selec = 1.0 / get_variable_numdistinct(vardata);
+ selec = 1.0 / get_variable_numdistinct(vardata, &isdefault);
}
/* result should be in range, but make sure... */
bool varonleft)
{
double selec;
+ bool isdefault;
/*
- * If we matched the var to a unique index, assume there is exactly one
- * match regardless of anything else. (This is slightly bogus, since the
- * index's equality operator might be different from ours, but it's more
- * likely to be right than ignoring the information.)
+ * If we matched the var to a unique index or DISTINCT clause, assume
+ * there is exactly one match regardless of anything else. (This is
+ * slightly bogus, since the index or clause's equality operator might be
+ * different from ours, but it's much more likely to be right than
+ * ignoring the information.)
*/
if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
return 1.0 / vardata->rel->tuples;
* result averaged over all possible values whether common or
* uncommon. (Essentially, we are assuming that the not-yet-known
* comparison value is equally likely to be any of the possible
- * values, regardless of their frequency in the table. Is that a good
+ * values, regardless of their frequency in the table. Is that a good
* idea?)
*/
selec = 1.0 - stats->stanullfrac;
- ndistinct = get_variable_numdistinct(vardata);
+ ndistinct = get_variable_numdistinct(vardata, &isdefault);
if (ndistinct > 1)
selec /= ndistinct;
* of distinct values and assuming they are equally common. (The guess
* is unlikely to be very good, but we do know a few special cases.)
*/
- selec = 1.0 / get_variable_numdistinct(vardata);
+ selec = 1.0 / get_variable_numdistinct(vardata, &isdefault);
}
/* result should be in range, but make sure... */
* essentially using the histogram just as a representative sample. However,
* small histograms are unlikely to be all that representative, so the caller
* should be prepared to fall back on some other estimation approach when the
- * histogram is missing or very small. It may also be prudent to combine this
+ * histogram is missing or very small. It may also be prudent to combine this
* approach with another one when the histogram is small.
*
* If the actual histogram size is not at least min_hist_size, we won't bother
*
* Note that the result disregards both the most-common-values (if any) and
* null entries. The caller is expected to combine this result with
- * statistics for those portions of the column population. It may also be
+ * statistics for those portions of the column population. It may also be
* prudent to clamp the result range, ie, disbelieve exact 0 or 1 outputs.
*/
double
*
* If the binary search accesses the first or last histogram
* entry, we try to replace that endpoint with the true column min
- * or max as found by get_actual_variable_range(). This
+ * or max as found by get_actual_variable_range(). This
* ameliorates misestimates when the min or max is moving as a
* result of changes since the last ANALYZE. Note that this could
* result in effectively including MCVs into the histogram that
/*
* Watch out for the possibility that we got a NaN or
- * Infinity from the division. This can happen
+ * Infinity from the division. This can happen
* despite the previous checks, if for example "low"
* is -Infinity.
*/
* Ideally we'd produce an error here, on the grounds that
* the given operator shouldn't have scalarXXsel
* registered as its selectivity func unless we can deal
- * with its operand types. But currently, all manner of
+ * with its operand types. But currently, all manner of
* stuff is invoking scalarXXsel, so give a default
* estimate until that can be fixed.
*/
/*
* The histogram boundaries are only approximate to begin with,
- * and may well be out of date anyway. Therefore, don't believe
+ * and may well be out of date anyway. Therefore, don't believe
* extremely small or large selectivity estimates --- unless we
* got actual current endpoint values from the table.
*/
Oid operator = PG_GETARG_OID(1);
List *args = (List *) PG_GETARG_POINTER(2);
int varRelid = PG_GETARG_INT32(3);
+ Oid collation = PG_GET_COLLATION();
VariableStatData vardata;
Node *other;
bool varonleft;
Oid vartype;
Oid opfamily;
Pattern_Prefix_Status pstatus;
- Const *patt = NULL;
+ Const *patt;
Const *prefix = NULL;
- Const *rest = NULL;
+ Selectivity rest_selec = 0;
double result;
/*
* If this is for a NOT LIKE or similar operator, get the corresponding
- * positive-match operator and work with that. Set result to the correct
+ * positive-match operator and work with that. Set result to the correct
* default estimate, too.
*/
if (negate)
}
/*
- * Divide pattern into fixed prefix and remainder. XXX we have to assume
- * default collation here, because we don't have access to the actual
- * input collation for the operator. FIXME ...
+ * Pull out any fixed prefix implied by the pattern, and estimate the
+ * fractional selectivity of the remainder of the pattern. Unlike many of
+ * the other functions in this file, we use the pattern operator's actual
+ * collation for this step. This is not because we expect the collation
+ * to make a big difference in the selectivity estimate (it seldom would),
+ * but because we want to be sure we cache compiled regexps under the
+ * right cache key, so that they can be re-used at runtime.
*/
patt = (Const *) other;
- pstatus = pattern_fixed_prefix(patt, ptype, DEFAULT_COLLATION_OID,
- &prefix, &rest);
+ pstatus = pattern_fixed_prefix(patt, ptype, collation,
+ &prefix, &rest_selec);
/*
- * If necessary, coerce the prefix constant to the right type. (The "rest"
- * constant need not be changed.)
+ * If necessary, coerce the prefix constant to the right type.
*/
if (prefix && prefix->consttype != vartype)
{
{
Selectivity heursel;
Selectivity prefixsel;
- Selectivity restsel;
if (pstatus == Pattern_Prefix_Partial)
prefixsel = prefix_selectivity(root, &vardata, vartype,
opfamily, prefix);
else
prefixsel = 1.0;
- restsel = pattern_selectivity(rest, ptype);
- heursel = prefixsel * restsel;
+ heursel = prefixsel * rest_selec;
if (selec < 0) /* fewer than 10 histogram entries? */
selec = heursel;
/*
* If we have most-common-values info, add up the fractions of the MCV
* entries that satisfy MCV OP PATTERN. These fractions contribute
- * directly to the result selectivity. Also add up the total fraction
+ * directly to the result selectivity. Also add up the total fraction
* represented by MCV entries.
*/
mcv_selec = mcv_selectivity(&vardata, &opproc, constval, true,
PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like_IC, true));
}
+/*
+ * boolvarsel - Selectivity of Boolean variable.
+ *
+ * This can actually be called on any boolean-valued expression. If it
+ * involves only Vars of the specified relation, and if there are statistics
+ * about the Var or expression (the latter is possible if it's indexed) then
+ * we'll produce a real estimate; otherwise it's just a default.
+ */
+Selectivity
+boolvarsel(PlannerInfo *root, Node *arg, int varRelid)
+{
+ VariableStatData vardata;
+ double selec;
+
+ examine_variable(root, arg, varRelid, &vardata);
+ if (HeapTupleIsValid(vardata.statsTuple))
+ {
+ /*
+ * A boolean variable V is equivalent to the clause V = 't', so we
+ * compute the selectivity as if that is what we have.
+ */
+ selec = var_eq_const(&vardata, BooleanEqualOperator,
+ BoolGetDatum(true), false, true);
+ }
+ else if (is_funcclause(arg))
+ {
+ /*
+ * If we have no stats and it's a function call, estimate 0.3333333.
+ * This seems a pretty unprincipled choice, but Postgres has been
+ * using that estimate for function calls since 1992. The hoariness
+ * of this behavior suggests that we should not be in too much hurry
+ * to use another value.
+ */
+ selec = 0.3333333;
+ }
+ else
+ {
+ /* Otherwise, the default estimate is 0.5 */
+ selec = 0.5;
+ }
+ ReleaseVariableStats(vardata);
+ return selec;
+}
+
/*
* booltestsel - Selectivity of BooleanTest Node.
*/
/*
* No most-common-value info available. Still have null fraction
* information, so use it for IS [NOT] UNKNOWN. Otherwise adjust
- * for null fraction and assume an even split for boolean tests.
+ * for null fraction and assume a 50-50 split of TRUE and FALSE.
*/
switch (booltesttype)
{
case IS_UNKNOWN:
-
- /*
- * Use freq_null directly.
- */
+ /* select only NULL values */
selec = freq_null;
break;
case IS_NOT_UNKNOWN:
-
- /*
- * Select not unknown (not null) values. Calculate from
- * freq_null.
- */
+ /* select non-NULL values */
selec = 1.0 - freq_null;
break;
case IS_TRUE:
- case IS_NOT_TRUE:
case IS_FALSE:
- case IS_NOT_FALSE:
+ /* Assume we select half of the non-NULL values */
selec = (1.0 - freq_null) / 2.0;
break;
+ case IS_NOT_TRUE:
+ case IS_NOT_FALSE:
+ /* Assume we select NULLs plus half of the non-NULLs */
+ /* equiv. to freq_null + (1.0 - freq_null) / 2.0 */
+ selec = (freq_null + 1.0) / 2.0;
+ break;
default:
elog(ERROR, "unrecognized booltesttype: %d",
(int) booltesttype);
{
Oid operator = clause->opno;
bool useOr = clause->useOr;
+ bool isEquality = false;
+ bool isInequality = false;
Node *leftop;
Node *rightop;
Oid nominal_element_type;
Oid nominal_element_collation;
+ TypeCacheEntry *typentry;
RegProcedure oprsel;
FmgrInfo oprselproc;
Selectivity s1;
+ Selectivity s1disjoint;
- /*
- * First, look up the underlying operator's selectivity estimator. Punt if
- * it hasn't got one.
- */
- if (is_join_clause)
- oprsel = get_oprjoin(operator);
- else
- oprsel = get_oprrest(operator);
- if (!oprsel)
- return (Selectivity) 0.5;
- fmgr_info(oprsel, &oprselproc);
-
- /* deconstruct the expression */
+ /* First, deconstruct the expression */
Assert(list_length(clause->args) == 2);
leftop = (Node *) linitial(clause->args);
rightop = (Node *) lsecond(clause->args);
+ /* aggressively reduce both sides to constants */
+ leftop = estimate_expression_value(root, leftop);
+ rightop = estimate_expression_value(root, rightop);
+
/* get nominal (after relabeling) element type of rightop */
nominal_element_type = get_base_element_type(exprType(rightop));
if (!OidIsValid(nominal_element_type))
/* look through any binary-compatible relabeling of rightop */
rightop = strip_array_coercion(rightop);
+ /*
+ * Detect whether the operator is the default equality or inequality
+ * operator of the array element type.
+ */
+ typentry = lookup_type_cache(nominal_element_type, TYPECACHE_EQ_OPR);
+ if (OidIsValid(typentry->eq_opr))
+ {
+ if (operator == typentry->eq_opr)
+ isEquality = true;
+ else if (get_negator(operator) == typentry->eq_opr)
+ isInequality = true;
+ }
+
+ /*
+ * If it is equality or inequality, we might be able to estimate this as a
+ * form of array containment; for instance "const = ANY(column)" can be
+ * treated as "ARRAY[const] <@ column". scalararraysel_containment tries
+ * that, and returns the selectivity estimate if successful, or -1 if not.
+ */
+ if ((isEquality || isInequality) && !is_join_clause)
+ {
+ s1 = scalararraysel_containment(root, leftop, rightop,
+ nominal_element_type,
+ isEquality, useOr, varRelid);
+ if (s1 >= 0.0)
+ return s1;
+ }
+
+ /*
+ * Look up the underlying operator's selectivity estimator. Punt if it
+ * hasn't got one.
+ */
+ if (is_join_clause)
+ oprsel = get_oprjoin(operator);
+ else
+ oprsel = get_oprrest(operator);
+ if (!oprsel)
+ return (Selectivity) 0.5;
+ fmgr_info(oprsel, &oprselproc);
+
+ /*
+ * In the array-containment check above, we must only believe that an
+ * operator is equality or inequality if it is the default btree equality
+ * operator (or its negator) for the element type, since those are the
+ * operators that array containment will use. But in what follows, we can
+ * be a little laxer, and also believe that any operators using eqsel() or
+ * neqsel() as selectivity estimator act like equality or inequality.
+ */
+ if (oprsel == F_EQSEL || oprsel == F_EQJOINSEL)
+ isEquality = true;
+ else if (oprsel == F_NEQSEL || oprsel == F_NEQJOINSEL)
+ isInequality = true;
+
/*
* We consider three cases:
*
ARR_ELEMTYPE(arrayval),
elmlen, elmbyval, elmalign,
&elem_values, &elem_nulls, &num_elems);
- s1 = useOr ? 0.0 : 1.0;
+
+ /*
+ * For generic operators, we assume the probability of success is
+ * independent for each array element. But for "= ANY" or "<> ALL",
+ * if the array elements are distinct (which'd typically be the case)
+ * then the probabilities are disjoint, and we should just sum them.
+ *
+ * If we were being really tense we would try to confirm that the
+ * elements are all distinct, but that would be expensive and it
+ * doesn't seem to be worth the cycles; it would amount to penalizing
+ * well-written queries in favor of poorly-written ones. However, we
+ * do protect ourselves a little bit by checking whether the
+ * disjointness assumption leads to an impossible (out of range)
+ * probability; if so, we fall back to the normal calculation.
+ */
+ s1 = s1disjoint = (useOr ? 0.0 : 1.0);
+
for (i = 0; i < num_elems; i++)
{
List *args;
elem_nulls[i],
elmbyval));
if (is_join_clause)
- s2 = DatumGetFloat8(FunctionCall5(&oprselproc,
- PointerGetDatum(root),
+ s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
+ clause->inputcollid,
+ PointerGetDatum(root),
ObjectIdGetDatum(operator),
- PointerGetDatum(args),
- Int16GetDatum(jointype),
- PointerGetDatum(sjinfo)));
+ PointerGetDatum(args),
+ Int16GetDatum(jointype),
+ PointerGetDatum(sjinfo)));
else
- s2 = DatumGetFloat8(FunctionCall4(&oprselproc,
- PointerGetDatum(root),
+ s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
+ clause->inputcollid,
+ PointerGetDatum(root),
ObjectIdGetDatum(operator),
- PointerGetDatum(args),
- Int32GetDatum(varRelid)));
+ PointerGetDatum(args),
+ Int32GetDatum(varRelid)));
+
if (useOr)
+ {
s1 = s1 + s2 - s1 * s2;
+ if (isEquality)
+ s1disjoint += s2;
+ }
else
+ {
s1 = s1 * s2;
+ if (isInequality)
+ s1disjoint += s2 - 1.0;
+ }
}
+
+ /* accept disjoint-probability estimate if in range */
+ if ((useOr ? isEquality : isInequality) &&
+ s1disjoint >= 0.0 && s1disjoint <= 1.0)
+ s1 = s1disjoint;
}
else if (rightop && IsA(rightop, ArrayExpr) &&
!((ArrayExpr *) rightop)->multidims)
get_typlenbyval(arrayexpr->element_typeid,
&elmlen, &elmbyval);
- s1 = useOr ? 0.0 : 1.0;
+
+ /*
+ * We use the assumption of disjoint probabilities here too, although
+ * the odds of equal array elements are rather higher if the elements
+ * are not all constants (which they won't be, else constant folding
+ * would have reduced the ArrayExpr to a Const). In this path it's
+ * critical to have the sanity check on the s1disjoint estimate.
+ */
+ s1 = s1disjoint = (useOr ? 0.0 : 1.0);
+
foreach(l, arrayexpr->elements)
{
Node *elem = (Node *) lfirst(l);
*/
args = list_make2(leftop, elem);
if (is_join_clause)
- s2 = DatumGetFloat8(FunctionCall5(&oprselproc,
- PointerGetDatum(root),
+ s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
+ clause->inputcollid,
+ PointerGetDatum(root),
ObjectIdGetDatum(operator),
- PointerGetDatum(args),
- Int16GetDatum(jointype),
- PointerGetDatum(sjinfo)));
+ PointerGetDatum(args),
+ Int16GetDatum(jointype),
+ PointerGetDatum(sjinfo)));
else
- s2 = DatumGetFloat8(FunctionCall4(&oprselproc,
- PointerGetDatum(root),
+ s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
+ clause->inputcollid,
+ PointerGetDatum(root),
ObjectIdGetDatum(operator),
- PointerGetDatum(args),
- Int32GetDatum(varRelid)));
+ PointerGetDatum(args),
+ Int32GetDatum(varRelid)));
+
if (useOr)
+ {
s1 = s1 + s2 - s1 * s2;
+ if (isEquality)
+ s1disjoint += s2;
+ }
else
+ {
s1 = s1 * s2;
+ if (isInequality)
+ s1disjoint += s2 - 1.0;
+ }
}
+
+ /* accept disjoint-probability estimate if in range */
+ if ((useOr ? isEquality : isInequality) &&
+ s1disjoint >= 0.0 && s1disjoint <= 1.0)
+ s1 = s1disjoint;
}
else
{
dummyexpr->collation = clause->inputcollid;
args = list_make2(leftop, dummyexpr);
if (is_join_clause)
- s2 = DatumGetFloat8(FunctionCall5(&oprselproc,
- PointerGetDatum(root),
- ObjectIdGetDatum(operator),
- PointerGetDatum(args),
- Int16GetDatum(jointype),
- PointerGetDatum(sjinfo)));
+ s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
+ clause->inputcollid,
+ PointerGetDatum(root),
+ ObjectIdGetDatum(operator),
+ PointerGetDatum(args),
+ Int16GetDatum(jointype),
+ PointerGetDatum(sjinfo)));
else
- s2 = DatumGetFloat8(FunctionCall4(&oprselproc,
- PointerGetDatum(root),
- ObjectIdGetDatum(operator),
- PointerGetDatum(args),
- Int32GetDatum(varRelid)));
+ s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
+ clause->inputcollid,
+ PointerGetDatum(root),
+ ObjectIdGetDatum(operator),
+ PointerGetDatum(args),
+ Int32GetDatum(varRelid)));
s1 = useOr ? 0.0 : 1.0;
/*
* Arbitrarily assume 10 elements in the eventual array value (see
- * also estimate_array_length)
+ * also estimate_array_length). We don't risk an assumption of
+ * disjoint probabilities here.
*/
for (i = 0; i < 10; i++)
{
{
Selectivity s1;
Oid opno = linitial_oid(clause->opnos);
+ Oid inputcollid = linitial_oid(clause->inputcollids);
List *opargs;
bool is_join_clause;
/* Estimate selectivity for a join clause. */
s1 = join_selectivity(root, opno,
opargs,
+ inputcollid,
jointype,
sjinfo);
}
/* Estimate selectivity for a restriction clause. */
s1 = restriction_selectivity(root, opno,
opargs,
+ inputcollid,
varRelid);
}
VariableStatData vardata1;
VariableStatData vardata2;
bool join_is_reversed;
+ RelOptInfo *inner_rel;
get_join_variables(root, args, sjinfo,
&vardata1, &vardata2, &join_is_reversed);
break;
case JOIN_SEMI:
case JOIN_ANTI:
+
+ /*
+ * Look up the join's inner relation. min_righthand is sufficient
+ * information because neither SEMI nor ANTI joins permit any
+ * reassociation into or out of their RHS, so the righthand will
+ * always be exactly that set of rels.
+ */
+ inner_rel = find_join_input_rel(root, sjinfo->min_righthand);
+
if (!join_is_reversed)
- selec = eqjoinsel_semi(operator, &vardata1, &vardata2);
+ selec = eqjoinsel_semi(operator, &vardata1, &vardata2,
+ inner_rel);
else
selec = eqjoinsel_semi(get_commutator(operator),
- &vardata2, &vardata1);
+ &vardata2, &vardata1,
+ inner_rel);
break;
default:
/* other values not expected here */
double selec;
double nd1;
double nd2;
+ bool isdefault1;
+ bool isdefault2;
Form_pg_statistic stats1 = NULL;
Form_pg_statistic stats2 = NULL;
bool have_mcvs1 = false;
float4 *numbers2 = NULL;
int nnumbers2 = 0;
- nd1 = get_variable_numdistinct(vardata1);
- nd2 = get_variable_numdistinct(vardata2);
+ nd1 = get_variable_numdistinct(vardata1, &isdefault1);
+ nd2 = get_variable_numdistinct(vardata2, &isdefault2);
if (HeapTupleIsValid(vardata1->statsTuple))
{
if (have_mcvs1 && have_mcvs2)
{
/*
- * We have most-common-value lists for both relations. Run through
+ * We have most-common-value lists for both relations. Run through
* the lists to see which MCVs actually join to each other with the
- * given operator. This allows us to determine the exact join
+ * given operator. This allows us to determine the exact join
* selectivity for the portion of the relations represented by the MCV
* lists. We still have to estimate for the remaining population, but
* in a skewed distribution this gives us a big leg up in accuracy.
/*
* Note we assume that each MCV will match at most one member of the
- * other MCV list. If the operator isn't really equality, there could
+ * other MCV list. If the operator isn't really equality, there could
* be multiple matches --- but we don't look for them, both for speed
* and because the math wouldn't add up...
*/
* XXX Can we be smarter if we have an MCV list for just one side? It
* seems that if we assume equal distribution for the other side, we
* end up with the same answer anyway.
- *
- * An additional hack we use here is to clamp the nd1 and nd2 values
- * to not more than what we are estimating the input relation sizes to
- * be, providing a crude correction for the selectivity of restriction
- * clauses on those relations. (We don't do that in the other path
- * since there we are comparing the nd values to stats for the whole
- * relations.)
*/
double nullfrac1 = stats1 ? stats1->stanullfrac : 0.0;
double nullfrac2 = stats2 ? stats2->stanullfrac : 0.0;
- if (vardata1->rel)
- nd1 = Min(nd1, vardata1->rel->rows);
- if (vardata2->rel)
- nd2 = Min(nd2, vardata2->rel->rows);
-
selec = (1.0 - nullfrac1) * (1.0 - nullfrac2);
if (nd1 > nd2)
selec /= nd1;
*/
static double
eqjoinsel_semi(Oid operator,
- VariableStatData *vardata1, VariableStatData *vardata2)
+ VariableStatData *vardata1, VariableStatData *vardata2,
+ RelOptInfo *inner_rel)
{
double selec;
double nd1;
double nd2;
+ bool isdefault1;
+ bool isdefault2;
Form_pg_statistic stats1 = NULL;
bool have_mcvs1 = false;
Datum *values1 = NULL;
float4 *numbers2 = NULL;
int nnumbers2 = 0;
- nd1 = get_variable_numdistinct(vardata1);
- nd2 = get_variable_numdistinct(vardata2);
+ nd1 = get_variable_numdistinct(vardata1, &isdefault1);
+ nd2 = get_variable_numdistinct(vardata2, &isdefault2);
+
+ /*
+ * We clamp nd2 to be not more than what we estimate the inner relation's
+ * size to be. This is intuitively somewhat reasonable since obviously
+ * there can't be more than that many distinct values coming from the
+ * inner rel. The reason for the asymmetry (ie, that we don't clamp nd1
+ * likewise) is that this is the only pathway by which restriction clauses
+ * applied to the inner rel will affect the join result size estimate,
+ * since set_joinrel_size_estimates will multiply SEMI/ANTI selectivity by
+ * only the outer rel's size. If we clamped nd1 we'd be double-counting
+ * the selectivity of outer-rel restrictions.
+ *
+ * We can apply this clamping both with respect to the base relation from
+ * which the join variable comes (if there is just one), and to the
+ * immediate inner input relation of the current join.
+ *
+ * If we clamp, we can treat nd2 as being a non-default estimate; it's not
+ * great, maybe, but it didn't come out of nowhere either. This is most
+ * helpful when the inner relation is empty and consequently has no stats.
+ */
+ if (vardata2->rel)
+ {
+ if (nd2 >= vardata2->rel->rows)
+ {
+ nd2 = vardata2->rel->rows;
+ isdefault2 = false;
+ }
+ }
+ if (nd2 >= inner_rel->rows)
+ {
+ nd2 = inner_rel->rows;
+ isdefault2 = false;
+ }
if (HeapTupleIsValid(vardata1->statsTuple))
{
if (have_mcvs1 && have_mcvs2 && OidIsValid(operator))
{
/*
- * We have most-common-value lists for both relations. Run through
+ * We have most-common-value lists for both relations. Run through
* the lists to see which MCVs actually join to each other with the
- * given operator. This allows us to determine the exact join
+ * given operator. This allows us to determine the exact join
* selectivity for the portion of the relations represented by the MCV
* lists. We still have to estimate for the remaining population, but
* in a skewed distribution this gives us a big leg up in accuracy.
uncertainfrac,
uncertain;
int i,
- nmatches;
+ nmatches,
+ clamped_nvalues2;
+
+ /*
+ * The clamping above could have resulted in nd2 being less than
+ * nvalues2; in which case, we assume that precisely the nd2 most
+ * common values in the relation will appear in the join input, and so
+ * compare to only the first nd2 members of the MCV list. Of course
+ * this is frequently wrong, but it's the best bet we can make.
+ */
+ clamped_nvalues2 = Min(nvalues2, nd2);
fmgr_info(get_opcode(operator), &eqproc);
hasmatch1 = (bool *) palloc0(nvalues1 * sizeof(bool));
- hasmatch2 = (bool *) palloc0(nvalues2 * sizeof(bool));
+ hasmatch2 = (bool *) palloc0(clamped_nvalues2 * sizeof(bool));
/*
* Note we assume that each MCV will match at most one member of the
- * other MCV list. If the operator isn't really equality, there could
+ * other MCV list. If the operator isn't really equality, there could
* be multiple matches --- but we don't look for them, both for speed
* and because the math wouldn't add up...
*/
{
int j;
- for (j = 0; j < nvalues2; j++)
+ for (j = 0; j < clamped_nvalues2; j++)
{
if (hasmatch2[j])
continue;
/*
* Now we need to estimate the fraction of relation 1 that has at
- * least one join partner. We know for certain that the matched MCVs
+ * least one join partner. We know for certain that the matched MCVs
* do, so that gives us a lower bound, but we're really in the dark
* about everything else. Our crude approach is: if nd1 <= nd2 then
* assume all non-null rel1 rows have join partners, else assume for
* nd2 is default, punt and assume half of the uncertain rows have
* join partners.
*/
- if (nd1 != DEFAULT_NUM_DISTINCT && nd2 != DEFAULT_NUM_DISTINCT)
+ if (!isdefault1 && !isdefault2)
{
nd1 -= nmatches;
nd2 -= nmatches;
- if (nd1 <= nd2 || nd2 <= 0)
+ if (nd1 <= nd2 || nd2 < 0)
uncertainfrac = 1.0;
else
uncertainfrac = nd2 / nd1;
*/
double nullfrac1 = stats1 ? stats1->stanullfrac : 0.0;
- if (nd1 != DEFAULT_NUM_DISTINCT && nd2 != DEFAULT_NUM_DISTINCT)
+ if (!isdefault1 && !isdefault2)
{
- if (vardata1->rel)
- nd1 = Min(nd1, vardata1->rel->rows);
- if (vardata2->rel)
- nd2 = Min(nd2, vardata2->rel->rows);
-
- if (nd1 <= nd2 || nd2 <= 0)
+ if (nd1 <= nd2 || nd2 < 0)
selec = 1.0 - nullfrac1;
else
selec = (nd2 / nd1) * (1.0 - nullfrac1);
{
GroupVarInfo *varinfo;
double ndistinct;
+ bool isdefault;
ListCell *lc;
- ndistinct = get_variable_numdistinct(vardata);
+ ndistinct = get_variable_numdistinct(vardata, &isdefault);
/* cannot use foreach here because of possible list_delete */
lc = list_head(varinfos);
* groupExprs - list of expressions being grouped by
* input_rows - number of rows estimated to arrive at the group/unique
* filter step
+ * pgset - NULL, or a List** pointing to a grouping set to filter the
+ * groupExprs against
*
* Given the lack of any cross-correlation statistics in the system, it's
* impossible to do anything really trustworthy with GROUP BY conditions
* case (all possible cross-product terms actually appear as groups) since
* very often the grouped-by Vars are highly correlated. Our current approach
* is as follows:
- * 1. Expressions yielding boolean are assumed to contribute two groups,
+ * 1. Expressions yielding boolean are assumed to contribute two groups,
* independently of their content, and are ignored in the subsequent
- * steps. This is mainly because tests like "col IS NULL" break the
+ * steps. This is mainly because tests like "col IS NULL" break the
* heuristic used in step 2 especially badly.
- * 2. Reduce the given expressions to a list of unique Vars used. For
+ * 2. Reduce the given expressions to a list of unique Vars used. For
* example, GROUP BY a, a + b is treated the same as GROUP BY a, b.
* It is clearly correct not to count the same Var more than once.
* It is also reasonable to treat f(x) the same as x: f() cannot
* As a special case, if a GROUP BY expression can be matched to an
* expressional index for which we have statistics, then we treat the
* whole expression as though it were just a Var.
- * 3. If the list contains Vars of different relations that are known equal
+ * 3. If the list contains Vars of different relations that are known equal
* due to equivalence classes, then drop all but one of the Vars from each
* known-equal set, keeping the one with smallest estimated # of values
* (since the extra values of the others can't appear in joined rows).
* Note the reason we only consider Vars of different relations is that
* if we considered ones of the same rel, we'd be double-counting the
* restriction selectivity of the equality in the next step.
- * 4. For Vars within a single source rel, we multiply together the numbers
+ * 4. For Vars within a single source rel, we multiply together the numbers
* of values, clamp to the number of rows in the rel (divided by 10 if
- * more than one Var), and then multiply by the selectivity of the
- * restriction clauses for that rel. When there's more than one Var,
- * the initial product is probably too high (it's the worst case) but
- * clamping to a fraction of the rel's rows seems to be a helpful
- * heuristic for not letting the estimate get out of hand. (The factor
- * of 10 is derived from pre-Postgres-7.4 practice.) Multiplying
- * by the restriction selectivity is effectively assuming that the
- * restriction clauses are independent of the grouping, which is a crummy
- * assumption, but it's hard to do better.
- * 5. If there are Vars from multiple rels, we repeat step 4 for each such
+ * more than one Var), and then multiply by a factor based on the
+ * selectivity of the restriction clauses for that rel. When there's
+ * more than one Var, the initial product is probably too high (it's the
+ * worst case) but clamping to a fraction of the rel's rows seems to be a
+ * helpful heuristic for not letting the estimate get out of hand. (The
+ * factor of 10 is derived from pre-Postgres-7.4 practice.) The factor
+ * we multiply by to adjust for the restriction selectivity assumes that
+ * the restriction clauses are independent of the grouping, which may not
+ * be a valid assumption, but it's hard to do better.
+ * 5. If there are Vars from multiple rels, we repeat step 4 for each such
* rel, and multiply the results together.
* Note that rels not containing grouped Vars are ignored completely, as are
* join clauses. Such rels cannot increase the number of groups, and we
* but we don't have the info to do better).
*/
double
-estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows)
+estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
+ List **pgset)
{
List *varinfos = NIL;
double numdistinct;
ListCell *l;
+ int i;
- /* We should not be called unless query has GROUP BY (or DISTINCT) */
- Assert(groupExprs != NIL);
+ /*
+ * We don't ever want to return an estimate of zero groups, as that tends
+ * to lead to division-by-zero and other unpleasantness. The input_rows
+ * estimate is usually already at least 1, but clamp it just in case it
+ * isn't.
+ */
+ input_rows = clamp_row_est(input_rows);
+
+ /*
+ * If no grouping columns, there's exactly one group. (This can't happen
+ * for normal cases with GROUP BY or DISTINCT, but it is possible for
+ * corner cases with set operations.)
+ */
+ if (groupExprs == NIL || (pgset && list_length(*pgset) < 1))
+ return 1.0;
/*
- * Count groups derived from boolean grouping expressions. For other
+ * Count groups derived from boolean grouping expressions. For other
* expressions, find the unique Vars used, treating an expression as a Var
* if we can find stats for it. For each one, record the statistical
* estimate of number of distinct values (total in its table, without
*/
numdistinct = 1.0;
+ i = 0;
foreach(l, groupExprs)
{
Node *groupexpr = (Node *) lfirst(l);
List *varshere;
ListCell *l2;
+ /* is expression in this grouping set? */
+ if (pgset && !list_member_int(*pgset, i++))
+ continue;
+
/* Short-circuit for expressions returning boolean */
if (exprType(groupexpr) == BOOLOID)
{
* PlaceHolderVar doesn't change the number of groups, which boils
* down to ignoring the possible addition of nulls to the result set).
*/
- varshere = pull_var_clause(groupexpr, PVC_RECURSE_PLACEHOLDERS);
+ varshere = pull_var_clause(groupexpr,
+ PVC_RECURSE_AGGREGATES |
+ PVC_RECURSE_WINDOWFUNCS |
+ PVC_RECURSE_PLACEHOLDERS);
/*
* If we find any variable-free GROUP BY item, then either it is a
* Group Vars by relation and estimate total numdistinct.
*
* For each iteration of the outer loop, we process the frontmost Var in
- * varinfos, plus all other Vars in the same relation. We remove these
+ * varinfos, plus all other Vars in the same relation. We remove these
* Vars from the newvarinfos list for the next iteration. This is the
* easiest way to group Vars of same rel together.
*/
reldistinct = clamp;
/*
- * Multiply by restriction selectivity.
+ * Update the estimate based on the restriction selectivity,
+ * guarding against division by zero when reldistinct is zero.
+ * Also skip this if we know that we are returning all rows.
*/
- reldistinct *= rel->rows / rel->tuples;
+ if (reldistinct > 0 && rel->rows < rel->tuples)
+ {
+ /*
+ * Given a table containing N rows with n distinct values in a
+ * uniform distribution, if we select p rows at random then
+ * the expected number of distinct values selected is
+ *
+ * n * (1 - product((N-N/n-i)/(N-i), i=0..p-1))
+ *
+ * = n * (1 - (N-N/n)! / (N-N/n-p)! * (N-p)! / N!)
+ *
+ * See "Approximating block accesses in database
+ * organizations", S. B. Yao, Communications of the ACM,
+ * Volume 20 Issue 4, April 1977 Pages 260-261.
+ *
+ * Alternatively, re-arranging the terms from the factorials,
+ * this may be written as
+ *
+ * n * (1 - product((N-p-i)/(N-i), i=0..N/n-1))
+ *
+ * This form of the formula is more efficient to compute in
+ * the common case where p is larger than N/n. Additionally,
+ * as pointed out by Dell'Era, if i << N for all terms in the
+ * product, it can be approximated by
+ *
+ * n * (1 - ((N-p)/N)^(N/n))
+ *
+ * See "Expected distinct values when selecting from a bag
+ * without replacement", Alberto Dell'Era,
+ * http://www.adellera.it/investigations/distinct_balls/.
+ *
+ * The condition i << N is equivalent to n >> 1, so this is a
+ * good approximation when the number of distinct values in
+ * the table is large. It turns out that this formula also
+ * works well even when n is small.
+ */
+ reldistinct *=
+ (1 - pow((rel->tuples - rel->rows) / rel->tuples,
+ rel->tuples / reldistinct));
+ }
+ reldistinct = clamp_row_est(reldistinct);
/*
* Update estimate of total distinct groups.
* distribution, so this will have to do for now.
*
* We are passed the number of buckets the executor will use for the given
- * input relation. If the data were perfectly distributed, with the same
+ * input relation. If the data were perfectly distributed, with the same
* number of tuples going into each available bucket, then the bucketsize
* fraction would be 1/nbuckets. But this happy state of affairs will occur
* only if (a) there are at least nbuckets distinct data values, and (b)
- * we have a not-too-skewed data distribution. Otherwise the buckets will
+ * we have a not-too-skewed data distribution. Otherwise the buckets will
* be nonuniformly occupied. If the other relation in the join has a key
* distribution similar to this one's, then the most-loaded buckets are
* exactly those that will be probed most often. Therefore, the "average"
stanullfrac,
mcvfreq,
avgfreq;
+ bool isdefault;
float4 *numbers;
int nnumbers;
examine_variable(root, hashkey, 0, &vardata);
- /* Get number of distinct values and fraction that are null */
- ndistinct = get_variable_numdistinct(&vardata);
+ /* Get number of distinct values */
+ ndistinct = get_variable_numdistinct(&vardata, &isdefault);
+
+ /* If ndistinct isn't real, punt and return 0.1, per comments above */
+ if (isdefault)
+ {
+ ReleaseVariableStats(vardata);
+ return (Selectivity) 0.1;
+ }
+ /* Get fraction that are null */
if (HeapTupleIsValid(vardata.statsTuple))
{
Form_pg_statistic stats;
stanullfrac = stats->stanullfrac;
}
else
- {
- /*
- * Believe a default ndistinct only if it came from stats. Otherwise
- * punt and return 0.1, per comments above.
- */
- if (ndistinct == DEFAULT_NUM_DISTINCT)
- {
- ReleaseVariableStats(vardata);
- return (Selectivity) 0.1;
- }
-
stanullfrac = 0.0;
- }
/* Compute avg freq of all distinct data values in raw relation */
avgfreq = (1.0 - stanullfrac) / ndistinct;
* XXX Possibly better way, but much more expensive: multiply by
* selectivity of rel's restriction clauses that mention the target Var.
*/
- if (vardata.rel)
+ if (vardata.rel && vardata.rel->tuples > 0)
+ {
ndistinct *= vardata.rel->rows / vardata.rel->tuples;
+ ndistinct = clamp_row_est(ndistinct);
+ }
/*
* Initial estimate of bucketsize fraction is 1/nbuckets as long as the
* operators to estimate selectivity for the other's. This is outright
* wrong in some cases --- in particular signed versus unsigned
* interpretation could trip us up. But it's useful enough in the
- * majority of cases that we do it anyway. Should think about more
+ * majority of cases that we do it anyway. Should think about more
* rigorous ways to do it.
*/
switch (valuetypid)
case REGTYPEOID:
case REGCONFIGOID:
case REGDICTIONARYOID:
+ case REGROLEOID:
+ case REGNAMESPACEOID:
*scaledvalue = convert_numeric_to_scalar(value, valuetypid);
*scaledlobound = convert_numeric_to_scalar(lobound, boundstypid);
*scaledhibound = convert_numeric_to_scalar(hibound, boundstypid);
case INETOID:
case CIDROID:
case MACADDROID:
+ case MACADDR8OID:
*scaledvalue = convert_network_to_scalar(value, valuetypid);
*scaledlobound = convert_network_to_scalar(lobound, boundstypid);
*scaledhibound = convert_network_to_scalar(hibound, boundstypid);
case REGTYPEOID:
case REGCONFIGOID:
case REGDICTIONARYOID:
+ case REGROLEOID:
+ case REGNAMESPACEOID:
/* we can treat OIDs as integers... */
return (double) DatumGetObjectId(value);
}
return 0.0; /* empty string has scalar value 0 */
/*
- * Since base is at least 10, need not consider more than about 20 chars
+ * There seems little point in considering more than a dozen bytes from
+ * the string. Since base is at least 10, that will give us nominal
+ * resolution of at least 12 decimal digits, which is surely far more
+ * precision than this estimation technique has got anyway (especially in
+ * non-C locales). Also, even with the maximum possible base of 256, this
+ * ensures denom cannot grow larger than 256^13 = 2.03e31, which will not
+ * overflow on any known machine.
*/
- if (slen > 20)
- slen = 20;
+ if (slen > 12)
+ slen = 12;
/* Convert initial characters to fraction */
base = rangehi - rangelo + 1;
{
char *xfrmstr;
size_t xfrmlen;
- size_t xfrmlen2;
+ size_t xfrmlen2 PG_USED_FOR_ASSERTS_ONLY;
/*
- * Note: originally we guessed at a suitable output buffer size, and
- * only needed to call strxfrm twice if our guess was too small.
- * However, it seems that some versions of Solaris have buggy strxfrm
- * that can write past the specified buffer length in that scenario.
- * So, do it the dumb way for portability.
- *
- * Yet other systems (e.g., glibc) sometimes return a smaller value
- * from the second call than the first; thus the Assert must be <= not
- * == as you'd expect. Can't any of these people program their way
- * out of a paper bag?
+ * XXX: We could guess at a suitable output buffer size and only call
+ * strxfrm twice if our guess is too small.
*
* XXX: strxfrm doesn't support UTF-8 encoding on Win32, it can return
* bogus data or set an error. This is not really a problem unless it
#endif
xfrmstr = (char *) palloc(xfrmlen + 1);
xfrmlen2 = strxfrm(xfrmstr, val, xfrmlen + 1);
+
+ /*
+ * Some systems (e.g., glibc) can return a smaller value from the
+ * second call than the first; thus the Assert must be <= not ==.
+ */
Assert(xfrmlen2 <= xfrmlen);
pfree(val);
val = xfrmstr;
* average month length of 365.25/12.0 days. Not too
* accurate, but plenty good enough for our purposes.
*/
-#ifdef HAVE_INT64_TIMESTAMP
return interval->time + interval->day * (double) USECS_PER_DAY +
interval->month * ((DAYS_PER_YEAR / (double) MONTHS_PER_YEAR) * USECS_PER_DAY);
-#else
- return interval->time + interval->day * SECS_PER_DAY +
- interval->month * ((DAYS_PER_YEAR / (double) MONTHS_PER_YEAR) * (double) SECS_PER_DAY);
-#endif
}
case RELTIMEOID:
-#ifdef HAVE_INT64_TIMESTAMP
return (DatumGetRelativeTime(value) * 1000000.0);
-#else
- return DatumGetRelativeTime(value);
-#endif
case TINTERVALOID:
{
TimeInterval tinterval = DatumGetTimeInterval(value);
-#ifdef HAVE_INT64_TIMESTAMP
if (tinterval->status != 0)
return ((tinterval->data[1] - tinterval->data[0]) * 1000000.0);
-#else
- if (tinterval->status != 0)
- return tinterval->data[1] - tinterval->data[0];
-#endif
return 0; /* for lack of a better idea */
}
case TIMEOID:
TimeTzADT *timetz = DatumGetTimeTzADTP(value);
/* use GMT-equivalent time */
-#ifdef HAVE_INT64_TIMESTAMP
return (double) (timetz->time + (timetz->zone * 1000000.0));
-#else
- return (double) (timetz->time + timetz->zone);
-#endif
}
}
right = (Node *) lsecond(args);
/*
- * Examine both sides. Note that when varRelid is nonzero, Vars of other
+ * Examine both sides. Note that when varRelid is nonzero, Vars of other
* relations will be treated as pseudoconstants.
*/
examine_variable(root, left, varRelid, vardata);
return true;
}
- /* Ooops, clause has wrong structure (probably var op var) */
+ /* Oops, clause has wrong structure (probably var op var) */
ReleaseVariableStats(*vardata);
ReleaseVariableStats(rdata);
* atttype, atttypmod: type data to pass to get_attstatsslot(). This is
* commonly the same as the exposed type of the variable argument,
* but can be different in binary-compatible-type cases.
- * isunique: TRUE if we were able to match the var to a unique index,
- * implying its values are unique for this query.
+ * isunique: TRUE if we were able to match the var to a unique index or a
+ * single-column DISTINCT clause, implying its values are unique for
+ * this query. (Caution: this should be trusted for statistical
+ * purposes only, since we do not check indimmediate nor verify that
+ * the exact same definition of equality applies.)
*
* Caller is responsible for doing ReleaseVariableStats() before exiting.
*/
(varRelid == 0 || varRelid == ((Var *) basenode)->varno))
{
Var *var = (Var *) basenode;
- RangeTblEntry *rte;
+ /* Set up result fields other than the stats tuple */
vardata->var = basenode; /* return Var without relabeling */
vardata->rel = find_base_rel(root, var->varno);
vardata->atttype = var->vartype;
vardata->atttypmod = var->vartypmod;
vardata->isunique = has_unique_index(vardata->rel, var->varattno);
- rte = root->simple_rte_array[var->varno];
-
- if (get_relation_stats_hook &&
- (*get_relation_stats_hook) (root, rte, var->varattno, vardata))
- {
- /*
- * The hook took control of acquiring a stats tuple. If it did
- * supply a tuple, it'd better have supplied a freefunc.
- */
- if (HeapTupleIsValid(vardata->statsTuple) &&
- !vardata->freefunc)
- elog(ERROR, "no function provided to release variable stats with");
- }
- else if (rte->rtekind == RTE_RELATION)
- {
- vardata->statsTuple = SearchSysCache3(STATRELATTINH,
- ObjectIdGetDatum(rte->relid),
- Int16GetDatum(var->varattno),
- BoolGetDatum(rte->inh));
- vardata->freefunc = ReleaseSysCache;
- }
- else
- {
- /*
- * XXX This means the Var comes from a JOIN or sub-SELECT. Later
- * add code to dig down into the join etc and see if we can trace
- * the variable to something with stats. (But beware of
- * sub-SELECTs with DISTINCT/GROUP BY/etc. Perhaps there are no
- * cases where this would really be useful, because we'd have
- * flattened the subselect if it is??)
- */
- }
+ /* Try to locate some stats */
+ examine_simple_variable(root, var, vardata);
return;
}
/*
* Okay, it's a more complicated expression. Determine variable
- * membership. Note that when varRelid isn't zero, only vars of that
+ * membership. Note that when varRelid isn't zero, only vars of that
* relation are considered "real" vars.
*/
varnos = pull_varnos(basenode);
if (onerel)
{
/*
- * We have an expression in vars of a single relation. Try to match
+ * We have an expression in vars of a single relation. Try to match
* it to expressional index columns, in hopes of finding some
* statistics.
*
* XXX it's conceivable that there are multiple matches with different
* index opfamilies; if so, we need to pick one that matches the
- * operator we are estimating for. FIXME later.
+ * operator we are estimating for. FIXME later.
*/
ListCell *ilist;
}
/*
- * get_variable_numdistinct
- * Estimate the number of distinct values of a variable.
+ * examine_simple_variable
+ * Handle a simple Var for examine_variable
*
- * vardata: results of examine_variable
+ * This is split out as a subroutine so that we can recurse to deal with
+ * Vars referencing subqueries.
*
- * NB: be careful to produce an integral result, since callers may compare
- * the result to exact integer counts.
+ * We already filled in all the fields of *vardata except for the stats tuple.
*/
-double
-get_variable_numdistinct(VariableStatData *vardata)
+static void
+examine_simple_variable(PlannerInfo *root, Var *var,
+ VariableStatData *vardata)
{
- double stadistinct;
- double ntuples;
+ RangeTblEntry *rte = root->simple_rte_array[var->varno];
- /*
- * Determine the stadistinct value to use. There are cases where we can
- * get an estimate even without a pg_statistic entry, or can get a better
- * value than is in pg_statistic.
- */
- if (HeapTupleIsValid(vardata->statsTuple))
- {
- /* Use the pg_statistic entry */
- Form_pg_statistic stats;
+ Assert(IsA(rte, RangeTblEntry));
- stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
- stadistinct = stats->stadistinct;
+ if (get_relation_stats_hook &&
+ (*get_relation_stats_hook) (root, rte, var->varattno, vardata))
+ {
+ /*
+ * The hook took control of acquiring a stats tuple. If it did supply
+ * a tuple, it'd better have supplied a freefunc.
+ */
+ if (HeapTupleIsValid(vardata->statsTuple) &&
+ !vardata->freefunc)
+ elog(ERROR, "no function provided to release variable stats with");
}
- else if (vardata->vartype == BOOLOID)
+ else if (rte->rtekind == RTE_RELATION)
{
/*
- * Special-case boolean columns: presumably, two distinct values.
- *
- * Are there any other datatypes we should wire in special estimates
- * for?
+ * Plain table or parent of an inheritance appendrel, so look up the
+ * column in pg_statistic
*/
- stadistinct = 2.0;
+ vardata->statsTuple = SearchSysCache3(STATRELATTINH,
+ ObjectIdGetDatum(rte->relid),
+ Int16GetDatum(var->varattno),
+ BoolGetDatum(rte->inh));
+ vardata->freefunc = ReleaseSysCache;
}
- else
+ else if (rte->rtekind == RTE_SUBQUERY && !rte->inh)
{
/*
- * We don't keep statistics for system columns, but in some cases we
- * can infer distinctness anyway.
+ * Plain subquery (not one that was converted to an appendrel).
*/
- if (vardata->var && IsA(vardata->var, Var))
- {
- switch (((Var *) vardata->var)->varattno)
- {
- case ObjectIdAttributeNumber:
- case SelfItemPointerAttributeNumber:
- stadistinct = -1.0; /* unique */
- break;
- case TableOidAttributeNumber:
- stadistinct = 1.0; /* only 1 value */
- break;
- default:
- stadistinct = 0.0; /* means "unknown" */
- break;
- }
- }
- else
- stadistinct = 0.0; /* means "unknown" */
+ Query *subquery = rte->subquery;
+ RelOptInfo *rel;
+ TargetEntry *ste;
/*
- * XXX consider using estimate_num_groups on expressions?
+ * Punt if it's a whole-row var rather than a plain column reference.
*/
- }
+ if (var->varattno == InvalidAttrNumber)
+ return;
- /*
- * If there is a unique index for the variable, assume it is unique no
- * matter what pg_statistic says; the statistics could be out of date, or
- * we might have found a partial unique index that proves the var is
- * unique for this query.
- */
+ /*
+ * Punt if subquery uses set operations or GROUP BY, as these will
+ * mash underlying columns' stats beyond recognition. (Set ops are
+ * particularly nasty; if we forged ahead, we would return stats
+ * relevant to only the leftmost subselect...) DISTINCT is also
+ * problematic, but we check that later because there is a possibility
+ * of learning something even with it.
+ */
+ if (subquery->setOperations ||
+ subquery->groupClause)
+ return;
+
+ /*
+ * OK, fetch RelOptInfo for subquery. Note that we don't change the
+ * rel returned in vardata, since caller expects it to be a rel of the
+ * caller's query level. Because we might already be recursing, we
+ * can't use that rel pointer either, but have to look up the Var's
+ * rel afresh.
+ */
+ rel = find_base_rel(root, var->varno);
+
+ /* If the subquery hasn't been planned yet, we have to punt */
+ if (rel->subroot == NULL)
+ return;
+ Assert(IsA(rel->subroot, PlannerInfo));
+
+ /*
+ * Switch our attention to the subquery as mangled by the planner. It
+ * was okay to look at the pre-planning version for the tests above,
+ * but now we need a Var that will refer to the subroot's live
+ * RelOptInfos. For instance, if any subquery pullup happened during
+ * planning, Vars in the targetlist might have gotten replaced, and we
+ * need to see the replacement expressions.
+ */
+ subquery = rel->subroot->parse;
+ Assert(IsA(subquery, Query));
+
+ /* Get the subquery output expression referenced by the upper Var */
+ ste = get_tle_by_resno(subquery->targetList, var->varattno);
+ if (ste == NULL || ste->resjunk)
+ elog(ERROR, "subquery %s does not have attribute %d",
+ rte->eref->aliasname, var->varattno);
+ var = (Var *) ste->expr;
+
+ /*
+ * If subquery uses DISTINCT, we can't make use of any stats for the
+ * variable ... but, if it's the only DISTINCT column, we are entitled
+ * to consider it unique. We do the test this way so that it works
+ * for cases involving DISTINCT ON.
+ */
+ if (subquery->distinctClause)
+ {
+ if (list_length(subquery->distinctClause) == 1 &&
+ targetIsInSortList(ste, InvalidOid, subquery->distinctClause))
+ vardata->isunique = true;
+ /* cannot go further */
+ return;
+ }
+
+ /*
+ * If the sub-query originated from a view with the security_barrier
+ * attribute, we must not look at the variable's statistics, though it
+ * seems all right to notice the existence of a DISTINCT clause. So
+ * stop here.
+ *
+ * This is probably a harsher restriction than necessary; it's
+ * certainly OK for the selectivity estimator (which is a C function,
+ * and therefore omnipotent anyway) to look at the statistics. But
+ * many selectivity estimators will happily *invoke the operator
+ * function* to try to work out a good estimate - and that's not OK.
+ * So for now, don't dig down for stats.
+ */
+ if (rte->security_barrier)
+ return;
+
+ /* Can only handle a simple Var of subquery's query level */
+ if (var && IsA(var, Var) &&
+ var->varlevelsup == 0)
+ {
+ /*
+ * OK, recurse into the subquery. Note that the original setting
+ * of vardata->isunique (which will surely be false) is left
+ * unchanged in this situation. That's what we want, since even
+ * if the underlying column is unique, the subquery may have
+ * joined to other tables in a way that creates duplicates.
+ */
+ examine_simple_variable(rel->subroot, var, vardata);
+ }
+ }
+ else
+ {
+ /*
+ * Otherwise, the Var comes from a FUNCTION, VALUES, or CTE RTE. (We
+ * won't see RTE_JOIN here because join alias Vars have already been
+ * flattened.) There's not much we can do with function outputs, but
+ * maybe someday try to be smarter about VALUES and/or CTEs.
+ */
+ }
+}
+
+/*
+ * get_variable_numdistinct
+ * Estimate the number of distinct values of a variable.
+ *
+ * vardata: results of examine_variable
+ * *isdefault: set to TRUE if the result is a default rather than based on
+ * anything meaningful.
+ *
+ * NB: be careful to produce a positive integral result, since callers may
+ * compare the result to exact integer counts, or might divide by it.
+ */
+double
+get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
+{
+ double stadistinct;
+ double stanullfrac = 0.0;
+ double ntuples;
+
+ *isdefault = false;
+
+ /*
+ * Determine the stadistinct value to use. There are cases where we can
+ * get an estimate even without a pg_statistic entry, or can get a better
+ * value than is in pg_statistic. Grab stanullfrac too if we can find it
+ * (otherwise, assume no nulls, for lack of any better idea).
+ */
+ if (HeapTupleIsValid(vardata->statsTuple))
+ {
+ /* Use the pg_statistic entry */
+ Form_pg_statistic stats;
+
+ stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
+ stadistinct = stats->stadistinct;
+ stanullfrac = stats->stanullfrac;
+ }
+ else if (vardata->vartype == BOOLOID)
+ {
+ /*
+ * Special-case boolean columns: presumably, two distinct values.
+ *
+ * Are there any other datatypes we should wire in special estimates
+ * for?
+ */
+ stadistinct = 2.0;
+ }
+ else
+ {
+ /*
+ * We don't keep statistics for system columns, but in some cases we
+ * can infer distinctness anyway.
+ */
+ if (vardata->var && IsA(vardata->var, Var))
+ {
+ switch (((Var *) vardata->var)->varattno)
+ {
+ case ObjectIdAttributeNumber:
+ case SelfItemPointerAttributeNumber:
+ stadistinct = -1.0; /* unique (and all non null) */
+ break;
+ case TableOidAttributeNumber:
+ stadistinct = 1.0; /* only 1 value */
+ break;
+ default:
+ stadistinct = 0.0; /* means "unknown" */
+ break;
+ }
+ }
+ else
+ stadistinct = 0.0; /* means "unknown" */
+
+ /*
+ * XXX consider using estimate_num_groups on expressions?
+ */
+ }
+
+ /*
+ * If there is a unique index or DISTINCT clause for the variable, assume
+ * it is unique no matter what pg_statistic says; the statistics could be
+ * out of date, or we might have found a partial unique index that proves
+ * the var is unique for this query. However, we'd better still believe
+ * the null-fraction statistic.
+ */
if (vardata->isunique)
- stadistinct = -1.0;
+ stadistinct = -1.0 * (1.0 - stanullfrac);
/*
* If we had an absolute estimate, use that.
*/
if (stadistinct > 0.0)
- return stadistinct;
+ return clamp_row_est(stadistinct);
/*
* Otherwise we need to get the relation size; punt if not available.
*/
if (vardata->rel == NULL)
+ {
+ *isdefault = true;
return DEFAULT_NUM_DISTINCT;
+ }
ntuples = vardata->rel->tuples;
if (ntuples <= 0.0)
+ {
+ *isdefault = true;
return DEFAULT_NUM_DISTINCT;
+ }
/*
* If we had a relative estimate, use that.
*/
if (stadistinct < 0.0)
- return floor((-stadistinct * ntuples) + 0.5);
+ return clamp_row_est(-stadistinct * ntuples);
/*
* With no data, estimate ndistinct = ntuples if the table is small, else
- * use default.
+ * use default. We use DEFAULT_NUM_DISTINCT as the cutoff for "small" so
+ * that the behavior isn't discontinuous.
*/
if (ntuples < DEFAULT_NUM_DISTINCT)
- return ntuples;
+ return clamp_row_est(ntuples);
+ *isdefault = true;
return DEFAULT_NUM_DISTINCT;
}
/*
* XXX It's very tempting to try to use the actual column min and max, if
- * we can get them relatively-cheaply with an index probe. However, since
+ * we can get them relatively-cheaply with an index probe. However, since
* this function is called many times during join planning, that could
* have unpleasant effects on planning speed. Need more investigation
* before enabling this.
HeapTuple tup;
Datum values[INDEX_MAX_KEYS];
bool isnull[INDEX_MAX_KEYS];
+ SnapshotData SnapshotDirty;
estate = CreateExecutorState();
econtext = GetPerTupleExprContext(estate);
slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRel));
econtext->ecxt_scantuple = slot;
get_typlenbyval(vardata->atttype, &typLen, &typByVal);
+ InitDirtySnapshot(SnapshotDirty);
/* set up an IS NOT NULL scan key so that we ignore nulls */
ScanKeyEntryInitialize(&scankeys[0],
/* If min is requested ... */
if (min)
{
- index_scan = index_beginscan(heapRel, indexRel, SnapshotNow,
+ /*
+ * In principle, we should scan the index with our current
+ * active snapshot, which is the best approximation we've got
+ * to what the query will see when executed. But that won't
+ * be exact if a new snap is taken before running the query,
+ * and it can be very expensive if a lot of uncommitted rows
+ * exist at the end of the index (because we'll laboriously
+ * fetch each one and reject it). What seems like a good
+ * compromise is to use SnapshotDirty. That will accept
+ * uncommitted rows, and thus avoid fetching multiple heap
+ * tuples in this scenario. On the other hand, it will reject
+ * known-dead rows, and thus not give a bogus answer when the
+ * extreme value has been deleted; that case motivates not
+ * using SnapshotAny here.
+ */
+ index_scan = index_beginscan(heapRel, indexRel, &SnapshotDirty,
1, 0);
index_rescan(index_scan, scankeys, 1, NULL, 0);
/* If max is requested, and we didn't find the index is empty */
if (max && have_data)
{
- index_scan = index_beginscan(heapRel, indexRel, SnapshotNow,
+ index_scan = index_beginscan(heapRel, indexRel, &SnapshotDirty,
1, 0);
index_rescan(index_scan, scankeys, 1, NULL, 0);
return have_data;
}
+/*
+ * find_join_input_rel
+ * Look up the input relation for a join.
+ *
+ * We assume that the input relation's RelOptInfo must have been constructed
+ * already.
+ */
+static RelOptInfo *
+find_join_input_rel(PlannerInfo *root, Relids relids)
+{
+ RelOptInfo *rel = NULL;
+
+ switch (bms_membership(relids))
+ {
+ case BMS_EMPTY_SET:
+ /* should not happen */
+ break;
+ case BMS_SINGLETON:
+ rel = find_base_rel(root, bms_singleton_member(relids));
+ break;
+ case BMS_MULTIPLE:
+ rel = find_join_rel(root, relids);
+ break;
+ }
+
+ if (rel == NULL)
+ elog(ERROR, "could not find RelOptInfo for given relids");
+
+ return rel;
+}
+
/*-------------------------------------------------------------------------
*
* Check whether char is a letter (and, hence, subject to case-folding)
*
* In multibyte character sets, we can't use isalpha, and it does not seem
- * worth trying to convert to wchar_t to use iswalpha. Instead, just assume
+ * worth trying to convert to wchar_t to use iswalpha. Instead, just assume
* any multibyte char is potentially case-varying.
*/
static int
*
* *prefix is set to a palloc'd prefix string (in the form of a Const node),
* or to NULL if no fixed prefix exists for the pattern.
- * *rest is set to a palloc'd Const representing the remainder of the pattern
- * after the portion describing the fixed prefix.
- * Each of these has the same type (TEXT or BYTEA) as the given pattern Const.
+ * If rest_selec is not NULL, *rest_selec is set to an estimate of the
+ * selectivity of the remainder of the pattern (without any fixed prefix).
+ * The prefix Const has the same type (TEXT or BYTEA) as the input pattern.
*
* The return value distinguishes no fixed prefix, a partial prefix,
* or an exact-match-only pattern.
static Pattern_Prefix_Status
like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
- Const **prefix_const, Const **rest_const)
+ Const **prefix_const, Selectivity *rest_selec)
{
char *match;
char *patt;
int pattlen;
- char *rest;
Oid typeid = patt_const->consttype;
int pos,
match_pos;
}
else
{
- bytea *bstr = DatumGetByteaP(patt_const->constvalue);
+ bytea *bstr = DatumGetByteaPP(patt_const->constvalue);
- pattlen = VARSIZE(bstr) - VARHDRSZ;
+ pattlen = VARSIZE_ANY_EXHDR(bstr);
patt = (char *) palloc(pattlen);
- memcpy(patt, VARDATA(bstr), pattlen);
- if ((Pointer) bstr != DatumGetPointer(patt_const->constvalue))
- pfree(bstr);
+ memcpy(patt, VARDATA_ANY(bstr), pattlen);
+ Assert((Pointer) bstr == DatumGetPointer(patt_const->constvalue));
}
match = palloc(pattlen + 1);
}
match[match_pos] = '\0';
- rest = &patt[pos];
if (typeid != BYTEAOID)
- {
*prefix_const = string_to_const(match, typeid);
- *rest_const = string_to_const(rest, typeid);
- }
else
- {
*prefix_const = string_to_bytea_const(match, match_pos);
- *rest_const = string_to_bytea_const(rest, pattlen - pos);
- }
+
+ if (rest_selec != NULL)
+ *rest_selec = like_selectivity(&patt[pos], pattlen - pos,
+ case_insensitive);
pfree(patt);
pfree(match);
static Pattern_Prefix_Status
regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
- Const **prefix_const, Const **rest_const)
+ Const **prefix_const, Selectivity *rest_selec)
{
- char *match;
- int pos,
- match_pos,
- prev_pos,
- prev_match_pos;
- bool have_leading_paren;
- char *patt;
- char *rest;
Oid typeid = patt_const->consttype;
- bool is_multibyte = (pg_database_encoding_max_length() > 1);
- pg_locale_t locale = 0;
- bool locale_is_c = false;
+ char *prefix;
+ bool exact;
/*
* Should be unnecessary, there are no bytea regex operators defined. As
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("regular-expression matching not supported on type bytea")));
- if (case_insensitive)
- {
- /* If case-insensitive, we need locale info */
- if (lc_ctype_is_c(collation))
- locale_is_c = true;
- else if (collation != DEFAULT_COLLATION_OID)
- {
- if (!OidIsValid(collation))
- {
- /*
- * This typically means that the parser could not resolve a
- * conflict of implicit collations, so report it that way.
- */
- ereport(ERROR,
- (errcode(ERRCODE_INDETERMINATE_COLLATION),
- errmsg("could not determine which collation to use for regular expression"),
- errhint("Use the COLLATE clause to set the collation explicitly.")));
- }
- locale = pg_newlocale_from_collation(collation);
- }
- }
-
- /* the right-hand const is type text for all of these */
- patt = TextDatumGetCString(patt_const->constvalue);
-
- /*
- * Check for ARE director prefix. It's worth our trouble to recognize
- * this because similar_escape() used to use it, and some other code might
- * still use it, to force ARE mode.
- */
- pos = 0;
- if (strncmp(patt, "***:", 4) == 0)
- pos = 4;
+ /* Use the regexp machinery to extract the prefix, if any */
+ prefix = regexp_fixed_prefix(DatumGetTextPP(patt_const->constvalue),
+ case_insensitive, collation,
+ &exact);
- /* Pattern must be anchored left */
- if (patt[pos] != '^')
+ if (prefix == NULL)
{
- rest = patt;
-
*prefix_const = NULL;
- *rest_const = string_to_const(rest, typeid);
-
- return Pattern_Prefix_None;
- }
- pos++;
- /*
- * If '|' is present in pattern, then there may be multiple alternatives
- * for the start of the string. (There are cases where this isn't so, for
- * instance if the '|' is inside parens, but detecting that reliably is
- * too hard.)
- */
- if (strchr(patt + pos, '|') != NULL)
- {
- rest = patt;
+ if (rest_selec != NULL)
+ {
+ char *patt = TextDatumGetCString(patt_const->constvalue);
- *prefix_const = NULL;
- *rest_const = string_to_const(rest, typeid);
+ *rest_selec = regex_selectivity(patt, strlen(patt),
+ case_insensitive,
+ 0);
+ pfree(patt);
+ }
return Pattern_Prefix_None;
}
- /* OK, allocate space for pattern */
- match = palloc(strlen(patt) + 1);
- prev_match_pos = match_pos = 0;
-
- /*
- * We special-case the syntax '^(...)$' because psql uses it. But beware:
- * sequences beginning "(?" are not what they seem, unless they're "(?:".
- * (We must recognize that because of similar_escape().)
- */
- have_leading_paren = false;
- if (patt[pos] == '(' &&
- (patt[pos + 1] != '?' || patt[pos + 2] == ':'))
- {
- have_leading_paren = true;
- pos += (patt[pos + 1] != '?' ? 1 : 3);
- }
+ *prefix_const = string_to_const(prefix, typeid);
- /* Scan remainder of pattern */
- prev_pos = pos;
- while (patt[pos])
+ if (rest_selec != NULL)
{
- int len;
-
- /*
- * Check for characters that indicate multiple possible matches here.
- * Also, drop out at ')' or '$' so the termination test works right.
- */
- if (patt[pos] == '.' ||
- patt[pos] == '(' ||
- patt[pos] == ')' ||
- patt[pos] == '[' ||
- patt[pos] == '^' ||
- patt[pos] == '$')
- break;
-
- /* Stop if case-varying character (it's sort of a wildcard) */
- if (case_insensitive &&
- pattern_char_isalpha(patt[pos], is_multibyte, locale, locale_is_c))
- break;
-
- /*
- * Check for quantifiers. Except for +, this means the preceding
- * character is optional, so we must remove it from the prefix too!
- */
- if (patt[pos] == '*' ||
- patt[pos] == '?' ||
- patt[pos] == '{')
+ if (exact)
{
- match_pos = prev_match_pos;
- pos = prev_pos;
- break;
+ /* Exact match, so there's no additional selectivity */
+ *rest_selec = 1.0;
}
- if (patt[pos] == '+')
+ else
{
- pos = prev_pos;
- break;
- }
+ char *patt = TextDatumGetCString(patt_const->constvalue);
- /*
- * Normally, backslash quotes the next character. But in AREs,
- * backslash followed by alphanumeric is an escape, not a quoted
- * character. Must treat it as having multiple possible matches.
- * Note: since only ASCII alphanumerics are escapes, we don't have to
- * be paranoid about multibyte or collations here.
- */
- if (patt[pos] == '\\')
- {
- if (isalnum((unsigned char) patt[pos + 1]))
- break;
- pos++;
- if (patt[pos] == '\0')
- break;
+ *rest_selec = regex_selectivity(patt, strlen(patt),
+ case_insensitive,
+ strlen(prefix));
+ pfree(patt);
}
- /* save position in case we need to back up on next loop cycle */
- prev_match_pos = match_pos;
- prev_pos = pos;
- /* must use encoding-aware processing here */
- len = pg_mblen(&patt[pos]);
- memcpy(&match[match_pos], &patt[pos], len);
- match_pos += len;
- pos += len;
}
- match[match_pos] = '\0';
- rest = &patt[pos];
-
- if (have_leading_paren && patt[pos] == ')')
- pos++;
-
- if (patt[pos] == '$' && patt[pos + 1] == '\0')
- {
- rest = &patt[pos + 1];
-
- *prefix_const = string_to_const(match, typeid);
- *rest_const = string_to_const(rest, typeid);
-
- pfree(patt);
- pfree(match);
+ pfree(prefix);
+ if (exact)
return Pattern_Prefix_Exact; /* pattern specifies exact match */
- }
-
- *prefix_const = string_to_const(match, typeid);
- *rest_const = string_to_const(rest, typeid);
-
- pfree(patt);
- pfree(match);
-
- if (match_pos > 0)
+ else
return Pattern_Prefix_Partial;
-
- return Pattern_Prefix_None;
}
Pattern_Prefix_Status
pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Oid collation,
- Const **prefix, Const **rest)
+ Const **prefix, Selectivity *rest_selec)
{
Pattern_Prefix_Status result;
switch (ptype)
{
case Pattern_Type_Like:
- result = like_fixed_prefix(patt, false, collation, prefix, rest);
+ result = like_fixed_prefix(patt, false, collation,
+ prefix, rest_selec);
break;
case Pattern_Type_Like_IC:
- result = like_fixed_prefix(patt, true, collation, prefix, rest);
+ result = like_fixed_prefix(patt, true, collation,
+ prefix, rest_selec);
break;
case Pattern_Type_Regex:
- result = regex_fixed_prefix(patt, false, collation, prefix, rest);
+ result = regex_fixed_prefix(patt, false, collation,
+ prefix, rest_selec);
break;
case Pattern_Type_Regex_IC:
- result = regex_fixed_prefix(patt, true, collation, prefix, rest);
+ result = regex_fixed_prefix(patt, true, collation,
+ prefix, rest_selec);
break;
default:
elog(ERROR, "unrecognized ptype: %d", (int) ptype);
* together with info about MCVs and NULLs.
*
* We use the >= and < operators from the specified btree opfamily to do the
- * estimation. The given variable and Const must be of the associated
+ * estimation. The given variable and Const must be of the associated
* datatype.
*
* XXX Note: we make use of the upper bound to estimate operator selectivity
/*
* Merge the two selectivities in the same way as for a range query
- * (see clauselist_selectivity()). Note that we don't need to worry
+ * (see clauselist_selectivity()). Note that we don't need to worry
* about double-exclusion of nulls, since ineq_histogram_selectivity
* doesn't count those anyway.
*/
/*
* Estimate the selectivity of a pattern of the specified type.
- * Note that any fixed prefix of the pattern will have been removed already.
+ * Note that any fixed prefix of the pattern will have been removed already,
+ * so actually we may be looking at just a fragment of the pattern.
*
* For now, we use a very simplistic approach: fixed characters reduce the
* selectivity a good deal, character ranges reduce it a little,
#define PARTIAL_WILDCARD_SEL 2.0
static Selectivity
-like_selectivity(Const *patt_const, bool case_insensitive)
+like_selectivity(const char *patt, int pattlen, bool case_insensitive)
{
Selectivity sel = 1.0;
int pos;
- Oid typeid = patt_const->consttype;
- char *patt;
- int pattlen;
-
- /* the right-hand const is type text or bytea */
- Assert(typeid == BYTEAOID || typeid == TEXTOID);
-
- if (typeid == BYTEAOID && case_insensitive)
- ereport(ERROR,
- (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("case insensitive matching not supported on type bytea")));
-
- if (typeid != BYTEAOID)
- {
- patt = TextDatumGetCString(patt_const->constvalue);
- pattlen = strlen(patt);
- }
- else
- {
- bytea *bstr = DatumGetByteaP(patt_const->constvalue);
-
- pattlen = VARSIZE(bstr) - VARHDRSZ;
- patt = (char *) palloc(pattlen);
- memcpy(patt, VARDATA(bstr), pattlen);
- if ((Pointer) bstr != DatumGetPointer(patt_const->constvalue))
- pfree(bstr);
- }
/* Skip any leading wildcard; it's already factored into initial sel */
for (pos = 0; pos < pattlen; pos++)
/* Could get sel > 1 if multiple wildcards */
if (sel > 1.0)
sel = 1.0;
-
- pfree(patt);
return sel;
}
static Selectivity
-regex_selectivity_sub(char *patt, int pattlen, bool case_insensitive)
+regex_selectivity_sub(const char *patt, int pattlen, bool case_insensitive)
{
Selectivity sel = 1.0;
int paren_depth = 0;
}
static Selectivity
-regex_selectivity(Const *patt_const, bool case_insensitive)
+regex_selectivity(const char *patt, int pattlen, bool case_insensitive,
+ int fixed_prefix_len)
{
Selectivity sel;
- char *patt;
- int pattlen;
- Oid typeid = patt_const->consttype;
-
- /*
- * Should be unnecessary, there are no bytea regex operators defined. As
- * such, it should be noted that the rest of this function has *not* been
- * made safe for binary (possibly NULL containing) strings.
- */
- if (typeid == BYTEAOID)
- ereport(ERROR,
- (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("regular-expression matching not supported on type bytea")));
-
- /* the right-hand const is type text for all of these */
- patt = TextDatumGetCString(patt_const->constvalue);
- pattlen = strlen(patt);
/* If patt doesn't end with $, consider it to have a trailing wildcard */
if (pattlen > 0 && patt[pattlen - 1] == '$' &&
/* no trailing $ */
sel = regex_selectivity_sub(patt, pattlen, case_insensitive);
sel *= FULL_WILDCARD_SEL;
- if (sel > 1.0)
- sel = 1.0;
}
+
+ /* If there's a fixed prefix, discount its selectivity */
+ if (fixed_prefix_len > 0)
+ sel /= pow(FIXED_CHAR_SEL, fixed_prefix_len);
+
+ /* Make sure result stays in range */
+ CLAMP_PROBABILITY(sel);
return sel;
}
-static Selectivity
-pattern_selectivity(Const *patt, Pattern_Type ptype)
+
+/*
+ * For bytea, the increment function need only increment the current byte
+ * (there are no multibyte characters to worry about).
+ */
+static bool
+byte_increment(unsigned char *ptr, int len)
{
- Selectivity result;
-
- switch (ptype)
- {
- case Pattern_Type_Like:
- result = like_selectivity(patt, false);
- break;
- case Pattern_Type_Like_IC:
- result = like_selectivity(patt, true);
- break;
- case Pattern_Type_Regex:
- result = regex_selectivity(patt, false);
- break;
- case Pattern_Type_Regex_IC:
- result = regex_selectivity(patt, true);
- break;
- default:
- elog(ERROR, "unrecognized ptype: %d", (int) ptype);
- result = 1.0; /* keep compiler quiet */
- break;
- }
- return result;
-}
-
+ if (*ptr >= 255)
+ return false;
+ (*ptr)++;
+ return true;
+}
/*
* Try to generate a string greater than the given string or any
* that is not a bulletproof guarantee that an extension of the string might
* not sort after it; an example is that "foo " is less than "foo!", but it
* is not clear that a "dictionary" sort ordering will consider "foo!" less
- * than "foo bar". CAUTION: Therefore, this function should be used only for
+ * than "foo bar". CAUTION: Therefore, this function should be used only for
* estimation purposes when working in a non-C collation.
*
* To try to catch most cases where an extended string might otherwise sort
* and "9" is seen as largest by the collation, and append that to the given
* prefix before trying to find a string that compares as larger.
*
- * If we max out the righthand byte, truncate off the last character
- * and start incrementing the next. For example, if "z" were the last
- * character in the sort order, then we could produce "foo" as a
- * string greater than "fonz".
+ * To search for a greater string, we repeatedly "increment" the rightmost
+ * character, using an encoding-specific character incrementer function.
+ * When it's no longer possible to increment the last character, we truncate
+ * off that character and start incrementing the next-to-rightmost.
+ * For example, if "z" were the last character in the sort order, then we
+ * could produce "foo" as a string greater than "fonz".
*
* This could be rather slow in the worst case, but in most cases we
* won't have to try more than one or two strings before succeeding.
+ *
+ * Note that it's important for the character incrementer not to be too anal
+ * about producing every possible character code, since in some cases the only
+ * way to get a larger string is to increment a previous character position.
+ * So we don't want to spend too much time trying every possible character
+ * code at the last position. A good rule of thumb is to be sure that we
+ * don't try more than 256*K values for a K-byte character (and definitely
+ * not 256^K, which is what an exhaustive search would approach).
*/
Const *
make_greater_string(const Const *str_const, FmgrInfo *ltproc, Oid collation)
int len;
Datum cmpstr;
text *cmptxt = NULL;
+ mbcharacter_incrementer charinc;
/*
* Get a modifiable copy of the prefix string in C-string format, and set
}
else if (datatype == BYTEAOID)
{
- bytea *bstr = DatumGetByteaP(str_const->constvalue);
+ bytea *bstr = DatumGetByteaPP(str_const->constvalue);
- len = VARSIZE(bstr) - VARHDRSZ;
+ len = VARSIZE_ANY_EXHDR(bstr);
workstr = (char *) palloc(len);
- memcpy(workstr, VARDATA(bstr), len);
- if ((Pointer) bstr != DatumGetPointer(str_const->constvalue))
- pfree(bstr);
+ memcpy(workstr, VARDATA_ANY(bstr), len);
+ Assert((Pointer) bstr == DatumGetPointer(str_const->constvalue));
cmpstr = str_const->constvalue;
}
else
}
}
+ /* Select appropriate character-incrementer function */
+ if (datatype == BYTEAOID)
+ charinc = byte_increment;
+ else
+ charinc = pg_database_encoding_character_incrementer();
+
+ /* And search ... */
while (len > 0)
{
- unsigned char *lastchar = (unsigned char *) (workstr + len - 1);
- unsigned char savelastchar = *lastchar;
+ int charlen;
+ unsigned char *lastchar;
+
+ /* Identify the last character --- for bytea, just the last byte */
+ if (datatype == BYTEAOID)
+ charlen = 1;
+ else
+ charlen = len - pg_mbcliplen(workstr, len, len - 1);
+ lastchar = (unsigned char *) (workstr + len - charlen);
/*
- * Try to generate a larger string by incrementing the last byte.
+ * Try to generate a larger string by incrementing the last character
+ * (for BYTEA, we treat each byte as a character).
+ *
+ * Note: the incrementer function is expected to return true if it's
+ * generated a valid-per-the-encoding new character, otherwise false.
+ * The contents of the character on false return are unspecified.
*/
- while (*lastchar < (unsigned char) 255)
+ while (charinc(lastchar, charlen))
{
Const *workstr_const;
- (*lastchar)++;
-
- if (datatype != BYTEAOID)
- {
- /* do not generate invalid encoding sequences */
- if (!pg_verifymbstr(workstr, len, true))
- continue;
- workstr_const = string_to_const(workstr, datatype);
- }
- else
+ if (datatype == BYTEAOID)
workstr_const = string_to_bytea_const(workstr, len);
+ else
+ workstr_const = string_to_const(workstr, datatype);
if (DatumGetBool(FunctionCall2Coll(ltproc,
collation,
pfree(workstr_const);
}
- /* restore last byte so we don't confuse pg_mbcliplen */
- *lastchar = savelastchar;
-
/*
- * Truncate off the last character, which might be more than 1 byte,
- * depending on the character encoding.
+ * No luck here, so truncate off the last character and try to
+ * increment the next one.
*/
- if (datatype != BYTEAOID && pg_database_encoding_max_length() > 1)
- len = pg_mbcliplen(workstr, len, len - 1);
- else
- len -= 1;
-
- if (datatype != BYTEAOID)
- workstr[len] = '\0';
+ len -= charlen;
+ workstr[len] = '\0';
}
/* Failed... */
*
* Index cost estimation functions
*
- * genericcostestimate is a general-purpose estimator for use when we
- * don't have any better idea about how to estimate. Index-type-specific
- * knowledge can be incorporated in the type-specific routines.
- *
- * One bit of index-type-specific knowledge we can relatively easily use
- * in genericcostestimate is the estimate of the number of index tuples
- * visited. If numIndexTuples is not 0 then it is used as the estimate,
- * otherwise we compute a generic estimate.
- *
*-------------------------------------------------------------------------
*/
-static void
+List *
+deconstruct_indexquals(IndexPath *path)
+{
+ List *result = NIL;
+ IndexOptInfo *index = path->indexinfo;
+ ListCell *lcc,
+ *lci;
+
+ forboth(lcc, path->indexquals, lci, path->indexqualcols)
+ {
+ RestrictInfo *rinfo = castNode(RestrictInfo, lfirst(lcc));
+ int indexcol = lfirst_int(lci);
+ Expr *clause;
+ Node *leftop,
+ *rightop;
+ IndexQualInfo *qinfo;
+
+ clause = rinfo->clause;
+
+ qinfo = (IndexQualInfo *) palloc(sizeof(IndexQualInfo));
+ qinfo->rinfo = rinfo;
+ qinfo->indexcol = indexcol;
+
+ if (IsA(clause, OpExpr))
+ {
+ qinfo->clause_op = ((OpExpr *) clause)->opno;
+ leftop = get_leftop(clause);
+ rightop = get_rightop(clause);
+ if (match_index_to_operand(leftop, indexcol, index))
+ {
+ qinfo->varonleft = true;
+ qinfo->other_operand = rightop;
+ }
+ else
+ {
+ Assert(match_index_to_operand(rightop, indexcol, index));
+ qinfo->varonleft = false;
+ qinfo->other_operand = leftop;
+ }
+ }
+ else if (IsA(clause, RowCompareExpr))
+ {
+ RowCompareExpr *rc = (RowCompareExpr *) clause;
+
+ qinfo->clause_op = linitial_oid(rc->opnos);
+ /* Examine only first columns to determine left/right sides */
+ if (match_index_to_operand((Node *) linitial(rc->largs),
+ indexcol, index))
+ {
+ qinfo->varonleft = true;
+ qinfo->other_operand = (Node *) rc->rargs;
+ }
+ else
+ {
+ Assert(match_index_to_operand((Node *) linitial(rc->rargs),
+ indexcol, index));
+ qinfo->varonleft = false;
+ qinfo->other_operand = (Node *) rc->largs;
+ }
+ }
+ else if (IsA(clause, ScalarArrayOpExpr))
+ {
+ ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
+
+ qinfo->clause_op = saop->opno;
+ /* index column is always on the left in this case */
+ Assert(match_index_to_operand((Node *) linitial(saop->args),
+ indexcol, index));
+ qinfo->varonleft = true;
+ qinfo->other_operand = (Node *) lsecond(saop->args);
+ }
+ else if (IsA(clause, NullTest))
+ {
+ qinfo->clause_op = InvalidOid;
+ Assert(match_index_to_operand((Node *) ((NullTest *) clause)->arg,
+ indexcol, index));
+ qinfo->varonleft = true;
+ qinfo->other_operand = NULL;
+ }
+ else
+ {
+ elog(ERROR, "unsupported indexqual type: %d",
+ (int) nodeTag(clause));
+ }
+
+ result = lappend(result, qinfo);
+ }
+ return result;
+}
+
+/*
+ * Simple function to compute the total eval cost of the "other operands"
+ * in an IndexQualInfo list. Since we know these will be evaluated just
+ * once per scan, there's no need to distinguish startup from per-row cost.
+ */
+static Cost
+other_operands_eval_cost(PlannerInfo *root, List *qinfos)
+{
+ Cost qual_arg_cost = 0;
+ ListCell *lc;
+
+ foreach(lc, qinfos)
+ {
+ IndexQualInfo *qinfo = (IndexQualInfo *) lfirst(lc);
+ QualCost index_qual_cost;
+
+ cost_qual_eval_node(&index_qual_cost, qinfo->other_operand, root);
+ qual_arg_cost += index_qual_cost.startup + index_qual_cost.per_tuple;
+ }
+ return qual_arg_cost;
+}
+
+/*
+ * Get other-operand eval cost for an index orderby list.
+ *
+ * Index orderby expressions aren't represented as RestrictInfos (since they
+ * aren't boolean, usually). So we can't apply deconstruct_indexquals to
+ * them. However, they are much simpler to deal with since they are always
+ * OpExprs and the index column is always on the left.
+ */
+static Cost
+orderby_operands_eval_cost(PlannerInfo *root, IndexPath *path)
+{
+ Cost qual_arg_cost = 0;
+ ListCell *lc;
+
+ foreach(lc, path->indexorderbys)
+ {
+ Expr *clause = (Expr *) lfirst(lc);
+ Node *other_operand;
+ QualCost index_qual_cost;
+
+ if (IsA(clause, OpExpr))
+ {
+ other_operand = get_rightop(clause);
+ }
+ else
+ {
+ elog(ERROR, "unsupported indexorderby type: %d",
+ (int) nodeTag(clause));
+ other_operand = NULL; /* keep compiler quiet */
+ }
+
+ cost_qual_eval_node(&index_qual_cost, other_operand, root);
+ qual_arg_cost += index_qual_cost.startup + index_qual_cost.per_tuple;
+ }
+ return qual_arg_cost;
+}
+
+void
genericcostestimate(PlannerInfo *root,
- IndexOptInfo *index,
- List *indexQuals,
- List *indexOrderBys,
- RelOptInfo *outer_rel,
- double numIndexTuples,
- Cost *indexStartupCost,
- Cost *indexTotalCost,
- Selectivity *indexSelectivity,
- double *indexCorrelation)
+ IndexPath *path,
+ double loop_count,
+ List *qinfos,
+ GenericCosts *costs)
{
+ IndexOptInfo *index = path->indexinfo;
+ List *indexQuals = path->indexquals;
+ List *indexOrderBys = path->indexorderbys;
+ Cost indexStartupCost;
+ Cost indexTotalCost;
+ Selectivity indexSelectivity;
+ double indexCorrelation;
double numIndexPages;
+ double numIndexTuples;
+ double spc_random_page_cost;
double num_sa_scans;
double num_outer_scans;
double num_scans;
- QualCost index_qual_cost;
double qual_op_cost;
double qual_arg_cost;
- double spc_random_page_cost;
List *selectivityQuals;
ListCell *l;
- /*----------
+ /*
* If the index is partial, AND the index predicate with the explicitly
* given indexquals to produce a more accurate idea of the index
- * selectivity. However, we need to be careful not to insert redundant
- * clauses, because clauselist_selectivity() is easily fooled into
- * computing a too-low selectivity estimate. Our approach is to add
- * only the index predicate clause(s) that cannot be proven to be implied
- * by the given indexquals. This successfully handles cases such as a
- * qual "x = 42" used with a partial index "WHERE x >= 40 AND x < 50".
- * There are many other cases where we won't detect redundancy, leading
- * to a too-low selectivity estimate, which will bias the system in favor
- * of using partial indexes where possible. That is not necessarily bad
- * though.
- *
- * Note that indexQuals contains RestrictInfo nodes while the indpred
- * does not. This is OK for both predicate_implied_by() and
- * clauselist_selectivity().
- *----------
+ * selectivity.
*/
- if (index->indpred != NIL)
- {
- List *predExtraQuals = NIL;
-
- foreach(l, index->indpred)
- {
- Node *predQual = (Node *) lfirst(l);
- List *oneQual = list_make1(predQual);
-
- if (!predicate_implied_by(oneQual, indexQuals))
- predExtraQuals = list_concat(predExtraQuals, oneQual);
- }
- /* list_concat avoids modifying the passed-in indexQuals list */
- selectivityQuals = list_concat(predExtraQuals, indexQuals);
- }
- else
- selectivityQuals = indexQuals;
+ selectivityQuals = add_predicate_to_quals(index, indexQuals);
/*
* Check for ScalarArrayOpExpr index quals, and estimate the number of
}
/* Estimate the fraction of main-table tuples that will be visited */
- *indexSelectivity = clauselist_selectivity(root, selectivityQuals,
- index->rel->relid,
- JOIN_INNER,
- NULL);
+ indexSelectivity = clauselist_selectivity(root, selectivityQuals,
+ index->rel->relid,
+ JOIN_INNER,
+ NULL);
/*
* If caller didn't give us an estimate, estimate the number of index
* tuples that will be visited. We do it in this rather peculiar-looking
* way in order to get the right answer for partial indexes.
*/
+ numIndexTuples = costs->numIndexTuples;
if (numIndexTuples <= 0.0)
{
- numIndexTuples = *indexSelectivity * index->rel->tuples;
+ numIndexTuples = indexSelectivity * index->rel->tuples;
/*
* The above calculation counts all the tuples visited across all
*
* We use the simplistic method of taking a pro-rata fraction of the total
* number of index pages. In effect, this counts only leaf pages and not
- * any overhead such as index metapage or upper tree levels. In practice
- * this seems a better approximation than charging for access to the upper
- * levels, perhaps because those tend to stay in cache under load.
+ * any overhead such as index metapage or upper tree levels.
+ *
+ * In practice access to upper index levels is often nearly free because
+ * those tend to stay in cache under load; moreover, the cost involved is
+ * highly dependent on index type. We therefore ignore such costs here
+ * and leave it to the caller to add a suitable charge if needed.
*/
if (index->pages > 1 && index->tuples > 1)
numIndexPages = ceil(numIndexTuples * index->pages / index->tuples);
else
numIndexPages = 1.0;
- /* fetch estimated page cost for schema containing index */
+ /* fetch estimated page cost for tablespace containing index */
get_tablespace_page_costs(index->reltablespace,
&spc_random_page_cost,
NULL);
* The above calculations are all per-index-scan. However, if we are in a
* nestloop inner scan, we can expect the scan to be repeated (with
* different search keys) for each row of the outer relation. Likewise,
- * ScalarArrayOpExpr quals result in multiple index scans. This creates
+ * ScalarArrayOpExpr quals result in multiple index scans. This creates
* the potential for cache effects to reduce the number of disk page
- * fetches needed. We want to estimate the average per-scan I/O cost in
+ * fetches needed. We want to estimate the average per-scan I/O cost in
* the presence of caching.
*
* We use the Mackert-Lohman formula (see costsize.c for details) to
* Note that we are counting pages not tuples anymore, so we take N = T =
* index size, as if there were one "tuple" per page.
*/
- if (outer_rel != NULL && outer_rel->rows > 1)
- {
- num_outer_scans = outer_rel->rows;
- num_scans = num_sa_scans * num_outer_scans;
- }
- else
- {
- num_outer_scans = 1;
- num_scans = num_sa_scans;
- }
+ num_outer_scans = loop_count;
+ num_scans = num_sa_scans * num_outer_scans;
if (num_scans > 1)
{
* share for each outer scan. (Don't pro-rate for ScalarArrayOpExpr,
* since that's internal to the indexscan.)
*/
- *indexTotalCost = (pages_fetched * spc_random_page_cost)
+ indexTotalCost = (pages_fetched * spc_random_page_cost)
/ num_outer_scans;
}
else
* For a single index scan, we just charge spc_random_page_cost per
* page touched.
*/
- *indexTotalCost = numIndexPages * spc_random_page_cost;
+ indexTotalCost = numIndexPages * spc_random_page_cost;
}
- /*
- * A difficulty with the leaf-pages-only cost approach is that for small
- * selectivities (eg, single index tuple fetched) all indexes will look
- * equally attractive because we will estimate exactly 1 leaf page to be
- * fetched. All else being equal, we should prefer physically smaller
- * indexes over larger ones. (An index might be smaller because it is
- * partial or because it contains fewer columns; presumably the other
- * columns in the larger index aren't useful to the query, or the larger
- * index would have better selectivity.)
- *
- * We can deal with this by adding a very small "fudge factor" that
- * depends on the index size. The fudge factor used here is one
- * spc_random_page_cost per 100000 index pages, which should be small
- * enough to not alter index-vs-seqscan decisions, but will prevent
- * indexes of different sizes from looking exactly equally attractive.
- */
- *indexTotalCost += index->pages * spc_random_page_cost / 100000.0;
-
/*
* CPU cost: any complex expressions in the indexquals will need to be
* evaluated once at the start of the scan to reduce them to runtime keys
* to pass to the index AM (see nodeIndexscan.c). We model the per-tuple
* CPU costs as cpu_index_tuple_cost plus one cpu_operator_cost per
- * indexqual operator. Because we have numIndexTuples as a per-scan
+ * indexqual operator. Because we have numIndexTuples as a per-scan
* number, we have to multiply by num_sa_scans to get the correct result
* for ScalarArrayOpExpr cases. Similarly add in costs for any index
* ORDER BY expressions.
*
- * Note: this neglects the possible costs of rechecking lossy operators
- * and OR-clause expressions. Detecting that that might be needed seems
- * more expensive than it's worth, though, considering all the other
- * inaccuracies here ...
+ * Note: this neglects the possible costs of rechecking lossy operators.
+ * Detecting that that might be needed seems more expensive than it's
+ * worth, though, considering all the other inaccuracies here ...
*/
- cost_qual_eval(&index_qual_cost, indexQuals, root);
- qual_arg_cost = index_qual_cost.startup + index_qual_cost.per_tuple;
- cost_qual_eval(&index_qual_cost, indexOrderBys, root);
- qual_arg_cost += index_qual_cost.startup + index_qual_cost.per_tuple;
+ qual_arg_cost = other_operands_eval_cost(root, qinfos) +
+ orderby_operands_eval_cost(root, path);
qual_op_cost = cpu_operator_cost *
(list_length(indexQuals) + list_length(indexOrderBys));
- qual_arg_cost -= qual_op_cost;
- if (qual_arg_cost < 0) /* just in case... */
- qual_arg_cost = 0;
- *indexStartupCost = qual_arg_cost;
- *indexTotalCost += qual_arg_cost;
- *indexTotalCost += numIndexTuples * num_sa_scans * (cpu_index_tuple_cost + qual_op_cost);
+ indexStartupCost = qual_arg_cost;
+ indexTotalCost += qual_arg_cost;
+ indexTotalCost += numIndexTuples * num_sa_scans * (cpu_index_tuple_cost + qual_op_cost);
/*
- * We also add a CPU-cost component to represent the general costs of
- * starting an indexscan, such as analysis of btree index keys and initial
- * tree descent. This is estimated at 100x cpu_operator_cost, which is a
- * bit arbitrary but seems the right order of magnitude. (As noted above,
- * we don't charge any I/O for touching upper tree levels, but charging
- * nothing at all has been found too optimistic.)
- *
- * Although this is startup cost with respect to any one scan, we add it
- * to the "total" cost component because it's only very interesting in the
- * many-ScalarArrayOpExpr-scan case, and there it will be paid over the
- * life of the scan node.
+ * Generic assumption about index correlation: there isn't any.
*/
- *indexTotalCost += num_sa_scans * 100.0 * cpu_operator_cost;
+ indexCorrelation = 0.0;
/*
- * Generic assumption about index correlation: there isn't any.
+ * Return everything to caller.
*/
- *indexCorrelation = 0.0;
+ costs->indexStartupCost = indexStartupCost;
+ costs->indexTotalCost = indexTotalCost;
+ costs->indexSelectivity = indexSelectivity;
+ costs->indexCorrelation = indexCorrelation;
+ costs->numIndexPages = numIndexPages;
+ costs->numIndexTuples = numIndexTuples;
+ costs->spc_random_page_cost = spc_random_page_cost;
+ costs->num_sa_scans = num_sa_scans;
+}
+
+/*
+ * If the index is partial, add its predicate to the given qual list.
+ *
+ * ANDing the index predicate with the explicitly given indexquals produces
+ * a more accurate idea of the index's selectivity. However, we need to be
+ * careful not to insert redundant clauses, because clauselist_selectivity()
+ * is easily fooled into computing a too-low selectivity estimate. Our
+ * approach is to add only the predicate clause(s) that cannot be proven to
+ * be implied by the given indexquals. This successfully handles cases such
+ * as a qual "x = 42" used with a partial index "WHERE x >= 40 AND x < 50".
+ * There are many other cases where we won't detect redundancy, leading to a
+ * too-low selectivity estimate, which will bias the system in favor of using
+ * partial indexes where possible. That is not necessarily bad though.
+ *
+ * Note that indexQuals contains RestrictInfo nodes while the indpred
+ * does not, so the output list will be mixed. This is OK for both
+ * predicate_implied_by() and clauselist_selectivity(), but might be
+ * problematic if the result were passed to other things.
+ */
+static List *
+add_predicate_to_quals(IndexOptInfo *index, List *indexQuals)
+{
+ List *predExtraQuals = NIL;
+ ListCell *lc;
+
+ if (index->indpred == NIL)
+ return indexQuals;
+
+ foreach(lc, index->indpred)
+ {
+ Node *predQual = (Node *) lfirst(lc);
+ List *oneQual = list_make1(predQual);
+
+ if (!predicate_implied_by(oneQual, indexQuals))
+ predExtraQuals = list_concat(predExtraQuals, oneQual);
+ }
+ /* list_concat avoids modifying the passed-in indexQuals list */
+ return list_concat(predExtraQuals, indexQuals);
}
-Datum
-btcostestimate(PG_FUNCTION_ARGS)
+void
+btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
+ Cost *indexStartupCost, Cost *indexTotalCost,
+ Selectivity *indexSelectivity, double *indexCorrelation,
+ double *indexPages)
{
- PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
- IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(1);
- List *indexQuals = (List *) PG_GETARG_POINTER(2);
- List *indexOrderBys = (List *) PG_GETARG_POINTER(3);
- RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(4);
- Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(5);
- Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(6);
- Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(7);
- double *indexCorrelation = (double *) PG_GETARG_POINTER(8);
+ IndexOptInfo *index = path->indexinfo;
+ List *qinfos;
+ GenericCosts costs;
Oid relid;
AttrNumber colnum;
VariableStatData vardata;
double numIndexTuples;
+ Cost descentCost;
List *indexBoundQuals;
int indexcol;
bool eqQualHere;
bool found_saop;
bool found_is_null_op;
double num_sa_scans;
- ListCell *l;
+ ListCell *lc;
+
+ /* Do preliminary analysis of indexquals */
+ qinfos = deconstruct_indexquals(path);
/*
* For a btree scan, only leading '=' quals plus inequality quals for the
* the "boundary quals" that determine the starting and stopping points of
* the index scan). Additional quals can suppress visits to the heap, so
* it's OK to count them in indexSelectivity, but they should not count
- * for estimating numIndexTuples. So we must examine the given indexQuals
- * to find out which ones count as boundary quals. We rely on the
+ * for estimating numIndexTuples. So we must examine the given indexquals
+ * to find out which ones count as boundary quals. We rely on the
* knowledge that they are given in index column order.
*
* For a RowCompareExpr, we consider only the first column, just as
found_saop = false;
found_is_null_op = false;
num_sa_scans = 1;
- foreach(l, indexQuals)
+ foreach(lc, qinfos)
{
- RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
- Expr *clause;
- Node *leftop,
- *rightop;
+ IndexQualInfo *qinfo = (IndexQualInfo *) lfirst(lc);
+ RestrictInfo *rinfo = qinfo->rinfo;
+ Expr *clause = rinfo->clause;
Oid clause_op;
int op_strategy;
- bool is_null_op = false;
- Assert(IsA(rinfo, RestrictInfo));
- clause = rinfo->clause;
- if (IsA(clause, OpExpr))
+ if (indexcol != qinfo->indexcol)
{
- leftop = get_leftop(clause);
- rightop = get_rightop(clause);
- clause_op = ((OpExpr *) clause)->opno;
+ /* Beginning of a new column's quals */
+ if (!eqQualHere)
+ break; /* done if no '=' qual for indexcol */
+ eqQualHere = false;
+ indexcol++;
+ if (indexcol != qinfo->indexcol)
+ break; /* no quals at all for indexcol */
}
- else if (IsA(clause, RowCompareExpr))
- {
- RowCompareExpr *rc = (RowCompareExpr *) clause;
- leftop = (Node *) linitial(rc->largs);
- rightop = (Node *) linitial(rc->rargs);
- clause_op = linitial_oid(rc->opnos);
- }
- else if (IsA(clause, ScalarArrayOpExpr))
+ if (IsA(clause, ScalarArrayOpExpr))
{
- ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
+ int alength = estimate_array_length(qinfo->other_operand);
- leftop = (Node *) linitial(saop->args);
- rightop = (Node *) lsecond(saop->args);
- clause_op = saop->opno;
found_saop = true;
+ /* count up number of SA scans induced by indexBoundQuals only */
+ if (alength > 1)
+ num_sa_scans *= alength;
}
else if (IsA(clause, NullTest))
{
NullTest *nt = (NullTest *) clause;
- leftop = (Node *) nt->arg;
- rightop = NULL;
- clause_op = InvalidOid;
if (nt->nulltesttype == IS_NULL)
{
found_is_null_op = true;
- is_null_op = true;
- }
- }
- else
- {
- elog(ERROR, "unsupported indexqual type: %d",
- (int) nodeTag(clause));
- continue; /* keep compiler quiet */
- }
- if (match_index_to_operand(leftop, indexcol, index))
- {
- /* clause_op is correct */
- }
- else if (match_index_to_operand(rightop, indexcol, index))
- {
- /* Must flip operator to get the opfamily member */
- clause_op = get_commutator(clause_op);
- }
- else
- {
- /* Must be past the end of quals for indexcol, try next */
- if (!eqQualHere)
- break; /* done if no '=' qual for indexcol */
- indexcol++;
- eqQualHere = false;
- if (match_index_to_operand(leftop, indexcol, index))
- {
- /* clause_op is correct */
- }
- else if (match_index_to_operand(rightop, indexcol, index))
- {
- /* Must flip operator to get the opfamily member */
- clause_op = get_commutator(clause_op);
- }
- else
- {
- /* No quals for new indexcol, so we are done */
- break;
+ /* IS NULL is like = for selectivity determination purposes */
+ eqQualHere = true;
}
}
+
+ /*
+ * We would need to commute the clause_op if not varonleft, except
+ * that we only care if it's equality or not, so that refinement is
+ * unnecessary.
+ */
+ clause_op = qinfo->clause_op;
+
/* check for equality operator */
if (OidIsValid(clause_op))
{
if (op_strategy == BTEqualStrategyNumber)
eqQualHere = true;
}
- else if (is_null_op)
- {
- /* IS NULL is like = for purposes of selectivity determination */
- eqQualHere = true;
- }
- /* count up number of SA scans induced by indexBoundQuals only */
- if (IsA(clause, ScalarArrayOpExpr))
- {
- ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
- int alength = estimate_array_length(lsecond(saop->args));
- if (alength > 1)
- num_sa_scans *= alength;
- }
indexBoundQuals = lappend(indexBoundQuals, rinfo);
}
numIndexTuples = 1.0;
else
{
+ List *selectivityQuals;
Selectivity btreeSelectivity;
- btreeSelectivity = clauselist_selectivity(root, indexBoundQuals,
+ /*
+ * If the index is partial, AND the index predicate with the
+ * index-bound quals to produce a more accurate idea of the number of
+ * rows covered by the bound conditions.
+ */
+ selectivityQuals = add_predicate_to_quals(index, indexBoundQuals);
+
+ btreeSelectivity = clauselist_selectivity(root, selectivityQuals,
index->rel->relid,
JOIN_INNER,
NULL);
numIndexTuples = rint(numIndexTuples / num_sa_scans);
}
- genericcostestimate(root, index, indexQuals, indexOrderBys,
- outer_rel, numIndexTuples,
- indexStartupCost, indexTotalCost,
- indexSelectivity, indexCorrelation);
+ /*
+ * Now do generic index cost estimation.
+ */
+ MemSet(&costs, 0, sizeof(costs));
+ costs.numIndexTuples = numIndexTuples;
+
+ genericcostestimate(root, path, loop_count, qinfos, &costs);
+
+ /*
+ * Add a CPU-cost component to represent the costs of initial btree
+ * descent. We don't charge any I/O cost for touching upper btree levels,
+ * since they tend to stay in cache, but we still have to do about log2(N)
+ * comparisons to descend a btree of N leaf tuples. We charge one
+ * cpu_operator_cost per comparison.
+ *
+ * If there are ScalarArrayOpExprs, charge this once per SA scan. The
+ * ones after the first one are not startup cost so far as the overall
+ * plan is concerned, so add them only to "total" cost.
+ */
+ if (index->tuples > 1) /* avoid computing log(0) */
+ {
+ descentCost = ceil(log(index->tuples) / log(2.0)) * cpu_operator_cost;
+ costs.indexStartupCost += descentCost;
+ costs.indexTotalCost += costs.num_sa_scans * descentCost;
+ }
+
+ /*
+ * Even though we're not charging I/O cost for touching upper btree pages,
+ * it's still reasonable to charge some CPU cost per page descended
+ * through. Moreover, if we had no such charge at all, bloated indexes
+ * would appear to have the same search cost as unbloated ones, at least
+ * in cases where only a single leaf page is expected to be visited. This
+ * cost is somewhat arbitrarily set at 50x cpu_operator_cost per page
+ * touched. The number of such pages is btree tree height plus one (ie,
+ * we charge for the leaf page too). As above, charge once per SA scan.
+ */
+ descentCost = (index->tree_height + 1) * 50.0 * cpu_operator_cost;
+ costs.indexStartupCost += descentCost;
+ costs.indexTotalCost += costs.num_sa_scans * descentCost;
/*
* If we can get an estimate of the first column's ordering correlation C
* is that multiple columns dilute the importance of the first column's
* ordering, but don't negate it entirely. Before 8.0 we divided the
* correlation by the number of columns, but that seems too strong.)
- *
- * We can skip all this if we found a ScalarArrayOpExpr, because then the
- * call must be for a bitmap index scan, and the caller isn't going to
- * care what the index correlation is.
*/
- if (found_saop)
- PG_RETURN_VOID();
-
MemSet(&vardata, 0, sizeof(vardata));
if (index->indexkeys[0] != 0)
varCorrelation = -varCorrelation;
if (index->ncolumns > 1)
- *indexCorrelation = varCorrelation * 0.75;
+ costs.indexCorrelation = varCorrelation * 0.75;
else
- *indexCorrelation = varCorrelation;
+ costs.indexCorrelation = varCorrelation;
free_attstatsslot(InvalidOid, NULL, 0, numbers, nnumbers);
}
ReleaseVariableStats(vardata);
- PG_RETURN_VOID();
+ *indexStartupCost = costs.indexStartupCost;
+ *indexTotalCost = costs.indexTotalCost;
+ *indexSelectivity = costs.indexSelectivity;
+ *indexCorrelation = costs.indexCorrelation;
+ *indexPages = costs.numIndexPages;
}
-Datum
-hashcostestimate(PG_FUNCTION_ARGS)
+void
+hashcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
+ Cost *indexStartupCost, Cost *indexTotalCost,
+ Selectivity *indexSelectivity, double *indexCorrelation,
+ double *indexPages)
{
- PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
- IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(1);
- List *indexQuals = (List *) PG_GETARG_POINTER(2);
- List *indexOrderBys = (List *) PG_GETARG_POINTER(3);
- RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(4);
- Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(5);
- Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(6);
- Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(7);
- double *indexCorrelation = (double *) PG_GETARG_POINTER(8);
-
- genericcostestimate(root, index, indexQuals, indexOrderBys, outer_rel, 0.0,
- indexStartupCost, indexTotalCost,
- indexSelectivity, indexCorrelation);
-
- PG_RETURN_VOID();
+ List *qinfos;
+ GenericCosts costs;
+
+ /* Do preliminary analysis of indexquals */
+ qinfos = deconstruct_indexquals(path);
+
+ MemSet(&costs, 0, sizeof(costs));
+
+ genericcostestimate(root, path, loop_count, qinfos, &costs);
+
+ /*
+ * A hash index has no descent costs as such, since the index AM can go
+ * directly to the target bucket after computing the hash value. There
+ * are a couple of other hash-specific costs that we could conceivably add
+ * here, though:
+ *
+ * Ideally we'd charge spc_random_page_cost for each page in the target
+ * bucket, not just the numIndexPages pages that genericcostestimate
+ * thought we'd visit. However in most cases we don't know which bucket
+ * that will be. There's no point in considering the average bucket size
+ * because the hash AM makes sure that's always one page.
+ *
+ * Likewise, we could consider charging some CPU for each index tuple in
+ * the bucket, if we knew how many there were. But the per-tuple cost is
+ * just a hash value comparison, not a general datatype-dependent
+ * comparison, so any such charge ought to be quite a bit less than
+ * cpu_operator_cost; which makes it probably not worth worrying about.
+ *
+ * A bigger issue is that chance hash-value collisions will result in
+ * wasted probes into the heap. We don't currently attempt to model this
+ * cost on the grounds that it's rare, but maybe it's not rare enough.
+ * (Any fix for this ought to consider the generic lossy-operator problem,
+ * though; it's not entirely hash-specific.)
+ */
+
+ *indexStartupCost = costs.indexStartupCost;
+ *indexTotalCost = costs.indexTotalCost;
+ *indexSelectivity = costs.indexSelectivity;
+ *indexCorrelation = costs.indexCorrelation;
+ *indexPages = costs.numIndexPages;
}
-Datum
-gistcostestimate(PG_FUNCTION_ARGS)
+void
+gistcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
+ Cost *indexStartupCost, Cost *indexTotalCost,
+ Selectivity *indexSelectivity, double *indexCorrelation,
+ double *indexPages)
{
- PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
- IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(1);
- List *indexQuals = (List *) PG_GETARG_POINTER(2);
- List *indexOrderBys = (List *) PG_GETARG_POINTER(3);
- RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(4);
- Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(5);
- Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(6);
- Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(7);
- double *indexCorrelation = (double *) PG_GETARG_POINTER(8);
-
- genericcostestimate(root, index, indexQuals, indexOrderBys, outer_rel, 0.0,
- indexStartupCost, indexTotalCost,
- indexSelectivity, indexCorrelation);
-
- PG_RETURN_VOID();
+ IndexOptInfo *index = path->indexinfo;
+ List *qinfos;
+ GenericCosts costs;
+ Cost descentCost;
+
+ /* Do preliminary analysis of indexquals */
+ qinfos = deconstruct_indexquals(path);
+
+ MemSet(&costs, 0, sizeof(costs));
+
+ genericcostestimate(root, path, loop_count, qinfos, &costs);
+
+ /*
+ * We model index descent costs similarly to those for btree, but to do
+ * that we first need an idea of the tree height. We somewhat arbitrarily
+ * assume that the fanout is 100, meaning the tree height is at most
+ * log100(index->pages).
+ *
+ * Although this computation isn't really expensive enough to require
+ * caching, we might as well use index->tree_height to cache it.
+ */
+ if (index->tree_height < 0) /* unknown? */
+ {
+ if (index->pages > 1) /* avoid computing log(0) */
+ index->tree_height = (int) (log(index->pages) / log(100.0));
+ else
+ index->tree_height = 0;
+ }
+
+ /*
+ * Add a CPU-cost component to represent the costs of initial descent. We
+ * just use log(N) here not log2(N) since the branching factor isn't
+ * necessarily two anyway. As for btree, charge once per SA scan.
+ */
+ if (index->tuples > 1) /* avoid computing log(0) */
+ {
+ descentCost = ceil(log(index->tuples)) * cpu_operator_cost;
+ costs.indexStartupCost += descentCost;
+ costs.indexTotalCost += costs.num_sa_scans * descentCost;
+ }
+
+ /*
+ * Likewise add a per-page charge, calculated the same as for btrees.
+ */
+ descentCost = (index->tree_height + 1) * 50.0 * cpu_operator_cost;
+ costs.indexStartupCost += descentCost;
+ costs.indexTotalCost += costs.num_sa_scans * descentCost;
+
+ *indexStartupCost = costs.indexStartupCost;
+ *indexTotalCost = costs.indexTotalCost;
+ *indexSelectivity = costs.indexSelectivity;
+ *indexCorrelation = costs.indexCorrelation;
+ *indexPages = costs.numIndexPages;
}
-/* Find the index column matching "op"; return its index, or -1 if no match */
-static int
-find_index_column(Node *op, IndexOptInfo *index)
+void
+spgcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
+ Cost *indexStartupCost, Cost *indexTotalCost,
+ Selectivity *indexSelectivity, double *indexCorrelation,
+ double *indexPages)
{
+ IndexOptInfo *index = path->indexinfo;
+ List *qinfos;
+ GenericCosts costs;
+ Cost descentCost;
+
+ /* Do preliminary analysis of indexquals */
+ qinfos = deconstruct_indexquals(path);
+
+ MemSet(&costs, 0, sizeof(costs));
+
+ genericcostestimate(root, path, loop_count, qinfos, &costs);
+
+ /*
+ * We model index descent costs similarly to those for btree, but to do
+ * that we first need an idea of the tree height. We somewhat arbitrarily
+ * assume that the fanout is 100, meaning the tree height is at most
+ * log100(index->pages).
+ *
+ * Although this computation isn't really expensive enough to require
+ * caching, we might as well use index->tree_height to cache it.
+ */
+ if (index->tree_height < 0) /* unknown? */
+ {
+ if (index->pages > 1) /* avoid computing log(0) */
+ index->tree_height = (int) (log(index->pages) / log(100.0));
+ else
+ index->tree_height = 0;
+ }
+
+ /*
+ * Add a CPU-cost component to represent the costs of initial descent. We
+ * just use log(N) here not log2(N) since the branching factor isn't
+ * necessarily two anyway. As for btree, charge once per SA scan.
+ */
+ if (index->tuples > 1) /* avoid computing log(0) */
+ {
+ descentCost = ceil(log(index->tuples)) * cpu_operator_cost;
+ costs.indexStartupCost += descentCost;
+ costs.indexTotalCost += costs.num_sa_scans * descentCost;
+ }
+
+ /*
+ * Likewise add a per-page charge, calculated the same as for btrees.
+ */
+ descentCost = (index->tree_height + 1) * 50.0 * cpu_operator_cost;
+ costs.indexStartupCost += descentCost;
+ costs.indexTotalCost += costs.num_sa_scans * descentCost;
+
+ *indexStartupCost = costs.indexStartupCost;
+ *indexTotalCost = costs.indexTotalCost;
+ *indexSelectivity = costs.indexSelectivity;
+ *indexCorrelation = costs.indexCorrelation;
+ *indexPages = costs.numIndexPages;
+}
+
+
+/*
+ * Support routines for gincostestimate
+ */
+
+typedef struct
+{
+ bool haveFullScan;
+ double partialEntries;
+ double exactEntries;
+ double searchEntries;
+ double arrayScans;
+} GinQualCounts;
+
+/*
+ * Estimate the number of index terms that need to be searched for while
+ * testing the given GIN query, and increment the counts in *counts
+ * appropriately. If the query is unsatisfiable, return false.
+ */
+static bool
+gincost_pattern(IndexOptInfo *index, int indexcol,
+ Oid clause_op, Datum query,
+ GinQualCounts *counts)
+{
+ Oid extractProcOid;
+ Oid collation;
+ int strategy_op;
+ Oid lefttype,
+ righttype;
+ int32 nentries = 0;
+ bool *partial_matches = NULL;
+ Pointer *extra_data = NULL;
+ bool *nullFlags = NULL;
+ int32 searchMode = GIN_SEARCH_MODE_DEFAULT;
+ int32 i;
+
+ /*
+ * Get the operator's strategy number and declared input data types within
+ * the index opfamily. (We don't need the latter, but we use
+ * get_op_opfamily_properties because it will throw error if it fails to
+ * find a matching pg_amop entry.)
+ */
+ get_op_opfamily_properties(clause_op, index->opfamily[indexcol], false,
+ &strategy_op, &lefttype, &righttype);
+
+ /*
+ * GIN always uses the "default" support functions, which are those with
+ * lefttype == righttype == the opclass' opcintype (see
+ * IndexSupportInitialize in relcache.c).
+ */
+ extractProcOid = get_opfamily_proc(index->opfamily[indexcol],
+ index->opcintype[indexcol],
+ index->opcintype[indexcol],
+ GIN_EXTRACTQUERY_PROC);
+
+ if (!OidIsValid(extractProcOid))
+ {
+ /* should not happen; throw same error as index_getprocinfo */
+ elog(ERROR, "missing support function %d for attribute %d of index \"%s\"",
+ GIN_EXTRACTQUERY_PROC, indexcol + 1,
+ get_rel_name(index->indexoid));
+ }
+
+ /*
+ * Choose collation to pass to extractProc (should match initGinState).
+ */
+ if (OidIsValid(index->indexcollations[indexcol]))
+ collation = index->indexcollations[indexcol];
+ else
+ collation = DEFAULT_COLLATION_OID;
+
+ OidFunctionCall7Coll(extractProcOid,
+ collation,
+ query,
+ PointerGetDatum(&nentries),
+ UInt16GetDatum(strategy_op),
+ PointerGetDatum(&partial_matches),
+ PointerGetDatum(&extra_data),
+ PointerGetDatum(&nullFlags),
+ PointerGetDatum(&searchMode));
+
+ if (nentries <= 0 && searchMode == GIN_SEARCH_MODE_DEFAULT)
+ {
+ /* No match is possible */
+ return false;
+ }
+
+ for (i = 0; i < nentries; i++)
+ {
+ /*
+ * For partial match we haven't any information to estimate number of
+ * matched entries in index, so, we just estimate it as 100
+ */
+ if (partial_matches && partial_matches[i])
+ counts->partialEntries += 100;
+ else
+ counts->exactEntries++;
+
+ counts->searchEntries++;
+ }
+
+ if (searchMode == GIN_SEARCH_MODE_INCLUDE_EMPTY)
+ {
+ /* Treat "include empty" like an exact-match item */
+ counts->exactEntries++;
+ counts->searchEntries++;
+ }
+ else if (searchMode != GIN_SEARCH_MODE_DEFAULT)
+ {
+ /* It's GIN_SEARCH_MODE_ALL */
+ counts->haveFullScan = true;
+ }
+
+ return true;
+}
+
+/*
+ * Estimate the number of index terms that need to be searched for while
+ * testing the given GIN index clause, and increment the counts in *counts
+ * appropriately. If the query is unsatisfiable, return false.
+ */
+static bool
+gincost_opexpr(PlannerInfo *root,
+ IndexOptInfo *index,
+ IndexQualInfo *qinfo,
+ GinQualCounts *counts)
+{
+ int indexcol = qinfo->indexcol;
+ Oid clause_op = qinfo->clause_op;
+ Node *operand = qinfo->other_operand;
+
+ if (!qinfo->varonleft)
+ {
+ /* must commute the operator */
+ clause_op = get_commutator(clause_op);
+ }
+
+ /* aggressively reduce to a constant, and look through relabeling */
+ operand = estimate_expression_value(root, operand);
+
+ if (IsA(operand, RelabelType))
+ operand = (Node *) ((RelabelType *) operand)->arg;
+
+ /*
+ * It's impossible to call extractQuery method for unknown operand. So
+ * unless operand is a Const we can't do much; just assume there will be
+ * one ordinary search entry from the operand at runtime.
+ */
+ if (!IsA(operand, Const))
+ {
+ counts->exactEntries++;
+ counts->searchEntries++;
+ return true;
+ }
+
+ /* If Const is null, there can be no matches */
+ if (((Const *) operand)->constisnull)
+ return false;
+
+ /* Otherwise, apply extractQuery and get the actual term counts */
+ return gincost_pattern(index, indexcol, clause_op,
+ ((Const *) operand)->constvalue,
+ counts);
+}
+
+/*
+ * Estimate the number of index terms that need to be searched for while
+ * testing the given GIN index clause, and increment the counts in *counts
+ * appropriately. If the query is unsatisfiable, return false.
+ *
+ * A ScalarArrayOpExpr will give rise to N separate indexscans at runtime,
+ * each of which involves one value from the RHS array, plus all the
+ * non-array quals (if any). To model this, we average the counts across
+ * the RHS elements, and add the averages to the counts in *counts (which
+ * correspond to per-indexscan costs). We also multiply counts->arrayScans
+ * by N, causing gincostestimate to scale up its estimates accordingly.
+ */
+static bool
+gincost_scalararrayopexpr(PlannerInfo *root,
+ IndexOptInfo *index,
+ IndexQualInfo *qinfo,
+ double numIndexEntries,
+ GinQualCounts *counts)
+{
+ int indexcol = qinfo->indexcol;
+ Oid clause_op = qinfo->clause_op;
+ Node *rightop = qinfo->other_operand;
+ ArrayType *arrayval;
+ int16 elmlen;
+ bool elmbyval;
+ char elmalign;
+ int numElems;
+ Datum *elemValues;
+ bool *elemNulls;
+ GinQualCounts arraycounts;
+ int numPossible = 0;
int i;
- for (i = 0; i < index->ncolumns; i++)
+ Assert(((ScalarArrayOpExpr *) qinfo->rinfo->clause)->useOr);
+
+ /* aggressively reduce to a constant, and look through relabeling */
+ rightop = estimate_expression_value(root, rightop);
+
+ if (IsA(rightop, RelabelType))
+ rightop = (Node *) ((RelabelType *) rightop)->arg;
+
+ /*
+ * It's impossible to call extractQuery method for unknown operand. So
+ * unless operand is a Const we can't do much; just assume there will be
+ * one ordinary search entry from each array entry at runtime, and fall
+ * back on a probably-bad estimate of the number of array entries.
+ */
+ if (!IsA(rightop, Const))
+ {
+ counts->exactEntries++;
+ counts->searchEntries++;
+ counts->arrayScans *= estimate_array_length(rightop);
+ return true;
+ }
+
+ /* If Const is null, there can be no matches */
+ if (((Const *) rightop)->constisnull)
+ return false;
+
+ /* Otherwise, extract the array elements and iterate over them */
+ arrayval = DatumGetArrayTypeP(((Const *) rightop)->constvalue);
+ get_typlenbyvalalign(ARR_ELEMTYPE(arrayval),
+ &elmlen, &elmbyval, &elmalign);
+ deconstruct_array(arrayval,
+ ARR_ELEMTYPE(arrayval),
+ elmlen, elmbyval, elmalign,
+ &elemValues, &elemNulls, &numElems);
+
+ memset(&arraycounts, 0, sizeof(arraycounts));
+
+ for (i = 0; i < numElems; i++)
{
- if (match_index_to_operand(op, i, index))
- return i;
+ GinQualCounts elemcounts;
+
+ /* NULL can't match anything, so ignore, as the executor will */
+ if (elemNulls[i])
+ continue;
+
+ /* Otherwise, apply extractQuery and get the actual term counts */
+ memset(&elemcounts, 0, sizeof(elemcounts));
+
+ if (gincost_pattern(index, indexcol, clause_op, elemValues[i],
+ &elemcounts))
+ {
+ /* We ignore array elements that are unsatisfiable patterns */
+ numPossible++;
+
+ if (elemcounts.haveFullScan)
+ {
+ /*
+ * Full index scan will be required. We treat this as if
+ * every key in the index had been listed in the query; is
+ * that reasonable?
+ */
+ elemcounts.partialEntries = 0;
+ elemcounts.exactEntries = numIndexEntries;
+ elemcounts.searchEntries = numIndexEntries;
+ }
+ arraycounts.partialEntries += elemcounts.partialEntries;
+ arraycounts.exactEntries += elemcounts.exactEntries;
+ arraycounts.searchEntries += elemcounts.searchEntries;
+ }
+ }
+
+ if (numPossible == 0)
+ {
+ /* No satisfiable patterns in the array */
+ return false;
}
- return -1;
+ /*
+ * Now add the averages to the global counts. This will give us an
+ * estimate of the average number of terms searched for in each indexscan,
+ * including contributions from both array and non-array quals.
+ */
+ counts->partialEntries += arraycounts.partialEntries / numPossible;
+ counts->exactEntries += arraycounts.exactEntries / numPossible;
+ counts->searchEntries += arraycounts.searchEntries / numPossible;
+
+ counts->arrayScans *= numPossible;
+
+ return true;
}
/*
* GIN has search behavior completely different from other index types
*/
-Datum
-gincostestimate(PG_FUNCTION_ARGS)
+void
+gincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
+ Cost *indexStartupCost, Cost *indexTotalCost,
+ Selectivity *indexSelectivity, double *indexCorrelation,
+ double *indexPages)
{
- PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
- IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(1);
- List *indexQuals = (List *) PG_GETARG_POINTER(2);
- List *indexOrderBys = (List *) PG_GETARG_POINTER(3);
- RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(4);
- Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(5);
- Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(6);
- Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(7);
- double *indexCorrelation = (double *) PG_GETARG_POINTER(8);
+ IndexOptInfo *index = path->indexinfo;
+ List *indexQuals = path->indexquals;
+ List *indexOrderBys = path->indexorderbys;
+ List *qinfos;
ListCell *l;
List *selectivityQuals;
double numPages = index->pages,
numDataPages,
numPendingPages,
numEntries;
- bool haveFullScan = false;
- double partialEntriesInQuals = 0.0;
- double searchEntriesInQuals = 0.0;
- double exactEntriesInQuals = 0.0;
+ GinQualCounts counts;
+ bool matchPossible;
+ double partialScale;
double entryPagesFetched,
dataPagesFetched,
dataPagesFetchedBySel;
double qual_op_cost,
qual_arg_cost,
spc_random_page_cost,
- num_scans;
- QualCost index_qual_cost;
+ outer_scans;
Relation indexRel;
GinStatsData ginStats;
- /*
- * Obtain statistic information from the meta page
- */
- indexRel = index_open(index->indexoid, AccessShareLock);
- ginGetStats(indexRel, &ginStats);
- index_close(indexRel, AccessShareLock);
-
- numEntryPages = ginStats.nEntryPages;
- numDataPages = ginStats.nDataPages;
- numPendingPages = ginStats.nPendingPages;
- numEntries = ginStats.nEntries;
+ /* Do preliminary analysis of indexquals */
+ qinfos = deconstruct_indexquals(path);
/*
- * nPendingPages can be trusted, but the other fields are as of the last
- * VACUUM. Scale them by the ratio numPages / nTotalPages to account for
- * growth since then. If the fields are zero (implying no VACUUM at all,
- * and an index created pre-9.1), assume all pages are entry pages.
+ * Obtain statistical information from the meta page, if possible. Else
+ * set ginStats to zeroes, and we'll cope below.
*/
- if (ginStats.nTotalPages == 0 || ginStats.nEntryPages == 0)
+ if (!index->hypothetical)
+ {
+ indexRel = index_open(index->indexoid, AccessShareLock);
+ ginGetStats(indexRel, &ginStats);
+ index_close(indexRel, AccessShareLock);
+ }
+ else
{
- numEntryPages = numPages;
- numDataPages = 0;
- numEntries = numTuples; /* bogus, but no other info available */
+ memset(&ginStats, 0, sizeof(ginStats));
}
+
+ /*
+ * Assuming we got valid (nonzero) stats at all, nPendingPages can be
+ * trusted, but the other fields are data as of the last VACUUM. We can
+ * scale them up to account for growth since then, but that method only
+ * goes so far; in the worst case, the stats might be for a completely
+ * empty index, and scaling them will produce pretty bogus numbers.
+ * Somewhat arbitrarily, set the cutoff for doing scaling at 4X growth; if
+ * it's grown more than that, fall back to estimating things only from the
+ * assumed-accurate index size. But we'll trust nPendingPages in any case
+ * so long as it's not clearly insane, ie, more than the index size.
+ */
+ if (ginStats.nPendingPages < numPages)
+ numPendingPages = ginStats.nPendingPages;
else
+ numPendingPages = 0;
+
+ if (numPages > 0 && ginStats.nTotalPages <= numPages &&
+ ginStats.nTotalPages > numPages / 4 &&
+ ginStats.nEntryPages > 0 && ginStats.nEntries > 0)
{
+ /*
+ * OK, the stats seem close enough to sane to be trusted. But we
+ * still need to scale them by the ratio numPages / nTotalPages to
+ * account for growth since the last VACUUM.
+ */
double scale = numPages / ginStats.nTotalPages;
- numEntryPages = ceil(numEntryPages * scale);
- numDataPages = ceil(numDataPages * scale);
- numEntries = ceil(numEntries * scale);
+ numEntryPages = ceil(ginStats.nEntryPages * scale);
+ numDataPages = ceil(ginStats.nDataPages * scale);
+ numEntries = ceil(ginStats.nEntries * scale);
/* ensure we didn't round up too much */
- numEntryPages = Min(numEntryPages, numPages);
- numDataPages = Min(numDataPages, numPages - numEntryPages);
+ numEntryPages = Min(numEntryPages, numPages - numPendingPages);
+ numDataPages = Min(numDataPages,
+ numPages - numPendingPages - numEntryPages);
+ }
+ else
+ {
+ /*
+ * We might get here because it's a hypothetical index, or an index
+ * created pre-9.1 and never vacuumed since upgrading (in which case
+ * its stats would read as zeroes), or just because it's grown too
+ * much since the last VACUUM for us to put our faith in scaling.
+ *
+ * Invent some plausible internal statistics based on the index page
+ * count (and clamp that to at least 10 pages, just in case). We
+ * estimate that 90% of the index is entry pages, and the rest is data
+ * pages. Estimate 100 entries per entry page; this is rather bogus
+ * since it'll depend on the size of the keys, but it's more robust
+ * than trying to predict the number of entries per heap tuple.
+ */
+ numPages = Max(numPages, 10);
+ numEntryPages = floor((numPages - numPendingPages) * 0.90);
+ numDataPages = numPages - numPendingPages - numEntryPages;
+ numEntries = floor(numEntryPages * 100);
}
/* In an empty index, numEntries could be zero. Avoid divide-by-zero */
JOIN_INNER,
NULL);
- /* fetch estimated page cost for schema containing index */
+ /* fetch estimated page cost for tablespace containing index */
get_tablespace_page_costs(index->reltablespace,
&spc_random_page_cost,
NULL);
/*
* Examine quals to estimate number of search entries & partial matches
*/
- foreach(l, indexQuals)
- {
- RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
- Expr *clause;
- Node *leftop,
- *rightop,
- *operand;
- Oid extractProcOid;
- Oid clause_op;
- int strategy_op;
- Oid lefttype,
- righttype;
- int32 nentries = 0;
- bool *partial_matches = NULL;
- Pointer *extra_data = NULL;
- bool *nullFlags = NULL;
- int32 searchMode = GIN_SEARCH_MODE_DEFAULT;
- int indexcol;
-
- Assert(IsA(rinfo, RestrictInfo));
- clause = rinfo->clause;
- Assert(IsA(clause, OpExpr));
- leftop = get_leftop(clause);
- rightop = get_rightop(clause);
- clause_op = ((OpExpr *) clause)->opno;
-
- if ((indexcol = find_index_column(leftop, index)) >= 0)
- {
- operand = rightop;
- }
- else if ((indexcol = find_index_column(rightop, index)) >= 0)
- {
- operand = leftop;
- clause_op = get_commutator(clause_op);
- }
- else
- {
- elog(ERROR, "could not match index to operand");
- operand = NULL; /* keep compiler quiet */
- }
+ memset(&counts, 0, sizeof(counts));
+ counts.arrayScans = 1;
+ matchPossible = true;
- if (IsA(operand, RelabelType))
- operand = (Node *) ((RelabelType *) operand)->arg;
-
- /*
- * It's impossible to call extractQuery method for unknown operand. So
- * unless operand is a Const we can't do much; just assume there will
- * be one ordinary search entry from the operand at runtime.
- */
- if (!IsA(operand, Const))
- {
- searchEntriesInQuals++;
- continue;
- }
-
- /* If Const is null, there can be no matches */
- if (((Const *) operand)->constisnull)
- {
- *indexStartupCost = 0;
- *indexTotalCost = 0;
- *indexSelectivity = 0;
- PG_RETURN_VOID();
- }
-
- /*
- * Get the operator's strategy number and declared input data types
- * within the index opfamily. (We don't need the latter, but we use
- * get_op_opfamily_properties because it will throw error if it fails
- * to find a matching pg_amop entry.)
- */
- get_op_opfamily_properties(clause_op, index->opfamily[indexcol], false,
- &strategy_op, &lefttype, &righttype);
-
- /*
- * GIN always uses the "default" support functions, which are those
- * with lefttype == righttype == the opclass' opcintype (see
- * IndexSupportInitialize in relcache.c).
- */
- extractProcOid = get_opfamily_proc(index->opfamily[indexcol],
- index->opcintype[indexcol],
- index->opcintype[indexcol],
- GIN_EXTRACTQUERY_PROC);
+ foreach(l, qinfos)
+ {
+ IndexQualInfo *qinfo = (IndexQualInfo *) lfirst(l);
+ Expr *clause = qinfo->rinfo->clause;
- if (!OidIsValid(extractProcOid))
+ if (IsA(clause, OpExpr))
{
- /* should not happen; throw same error as index_getprocinfo */
- elog(ERROR, "missing support function %d for attribute %d of index \"%s\"",
- GIN_EXTRACTQUERY_PROC, indexcol + 1,
- get_rel_name(index->indexoid));
+ matchPossible = gincost_opexpr(root,
+ index,
+ qinfo,
+ &counts);
+ if (!matchPossible)
+ break;
}
-
- OidFunctionCall7(extractProcOid,
- ((Const *) operand)->constvalue,
- PointerGetDatum(&nentries),
- UInt16GetDatum(strategy_op),
- PointerGetDatum(&partial_matches),
- PointerGetDatum(&extra_data),
- PointerGetDatum(&nullFlags),
- PointerGetDatum(&searchMode));
-
- if (nentries <= 0 && searchMode == GIN_SEARCH_MODE_DEFAULT)
+ else if (IsA(clause, ScalarArrayOpExpr))
{
- /* No match is possible */
- *indexStartupCost = 0;
- *indexTotalCost = 0;
- *indexSelectivity = 0;
- PG_RETURN_VOID();
+ matchPossible = gincost_scalararrayopexpr(root,
+ index,
+ qinfo,
+ numEntries,
+ &counts);
+ if (!matchPossible)
+ break;
}
else
{
- int32 i;
-
- for (i = 0; i < nentries; i++)
- {
- /*
- * For partial match we haven't any information to estimate
- * number of matched entries in index, so, we just estimate it
- * as 100
- */
- if (partial_matches && partial_matches[i])
- partialEntriesInQuals += 100;
- else
- exactEntriesInQuals++;
-
- searchEntriesInQuals++;
- }
+ /* shouldn't be anything else for a GIN index */
+ elog(ERROR, "unsupported GIN indexqual type: %d",
+ (int) nodeTag(clause));
}
+ }
- if (searchMode == GIN_SEARCH_MODE_INCLUDE_EMPTY)
- {
- /* Treat "include empty" like an exact-match item */
- exactEntriesInQuals++;
- searchEntriesInQuals++;
- }
- else if (searchMode != GIN_SEARCH_MODE_DEFAULT)
- {
- /* It's GIN_SEARCH_MODE_ALL */
- haveFullScan = true;
- }
+ /* Fall out if there were any provably-unsatisfiable quals */
+ if (!matchPossible)
+ {
+ *indexStartupCost = 0;
+ *indexTotalCost = 0;
+ *indexSelectivity = 0;
+ return;
}
- if (haveFullScan || indexQuals == NIL)
+ if (counts.haveFullScan || indexQuals == NIL)
{
/*
* Full index scan will be required. We treat this as if every key in
* the index had been listed in the query; is that reasonable?
*/
- searchEntriesInQuals = numEntries;
+ counts.partialEntries = 0;
+ counts.exactEntries = numEntries;
+ counts.searchEntries = numEntries;
}
/* Will we have more than one iteration of a nestloop scan? */
- if (outer_rel != NULL && outer_rel->rows > 1)
- num_scans = outer_rel->rows;
- else
- num_scans = 1;
+ outer_scans = loop_count;
/*
- * cost to begin scan, first of all, pay attention to pending list.
+ * Compute cost to begin scan, first of all, pay attention to pending
+ * list.
*/
entryPagesFetched = numPendingPages;
/*
* Estimate number of entry pages read. We need to do
- * searchEntriesInQuals searches. Use a power function as it should be,
+ * counts.searchEntries searches. Use a power function as it should be,
* but tuples on leaf pages usually is much greater. Here we include all
* searches in entry tree, including search of first entry in partial
* match algorithm
*/
- entryPagesFetched += ceil(searchEntriesInQuals * rint(pow(numEntryPages, 0.15)));
+ entryPagesFetched += ceil(counts.searchEntries * rint(pow(numEntryPages, 0.15)));
/*
* Add an estimate of entry pages read by partial match algorithm. It's a
- * scan over leaf pages in entry tree. We haven't any useful stats here,
- * so estimate it as proportion.
+ * scan over leaf pages in entry tree. We haven't any useful stats here,
+ * so estimate it as proportion. Because counts.partialEntries is really
+ * pretty bogus (see code above), it's possible that it is more than
+ * numEntries; clamp the proportion to ensure sanity.
*/
- entryPagesFetched += ceil(numEntryPages * partialEntriesInQuals / numEntries);
+ partialScale = counts.partialEntries / numEntries;
+ partialScale = Min(partialScale, 1.0);
+
+ entryPagesFetched += ceil(numEntryPages * partialScale);
/*
* Partial match algorithm reads all data pages before doing actual scan,
- * so it's a startup cost. Again, we havn't any useful stats here, so,
- * estimate it as proportion
+ * so it's a startup cost. Again, we haven't any useful stats here, so
+ * estimate it as proportion.
*/
- dataPagesFetched = ceil(numDataPages * partialEntriesInQuals / numEntries);
+ dataPagesFetched = ceil(numDataPages * partialScale);
- /* calculate cache effects */
- if (num_scans > 1 || searchEntriesInQuals > 1)
+ /*
+ * Calculate cache effects if more than one scan due to nestloops or array
+ * quals. The result is pro-rated per nestloop scan, but the array qual
+ * factor shouldn't be pro-rated (compare genericcostestimate).
+ */
+ if (outer_scans > 1 || counts.arrayScans > 1)
{
+ entryPagesFetched *= outer_scans * counts.arrayScans;
entryPagesFetched = index_pages_fetched(entryPagesFetched,
(BlockNumber) numEntryPages,
numEntryPages, root);
+ entryPagesFetched /= outer_scans;
+ dataPagesFetched *= outer_scans * counts.arrayScans;
dataPagesFetched = index_pages_fetched(dataPagesFetched,
(BlockNumber) numDataPages,
numDataPages, root);
+ dataPagesFetched /= outer_scans;
}
/*
*/
*indexStartupCost = (entryPagesFetched + dataPagesFetched) * spc_random_page_cost;
- /* cost to scan data pages for each exact (non-partial) matched entry */
- dataPagesFetched = ceil(numDataPages * exactEntriesInQuals / numEntries);
+ /*
+ * Now compute the number of data pages fetched during the scan.
+ *
+ * We assume every entry to have the same number of items, and that there
+ * is no overlap between them. (XXX: tsvector and array opclasses collect
+ * statistics on the frequency of individual keys; it would be nice to use
+ * those here.)
+ */
+ dataPagesFetched = ceil(numDataPages * counts.exactEntries / numEntries);
/*
- * Estimate number of data pages read, using selectivity estimation and
- * capacity of data page.
+ * If there is a lot of overlap among the entries, in particular if one of
+ * the entries is very frequent, the above calculation can grossly
+ * under-estimate. As a simple cross-check, calculate a lower bound based
+ * on the overall selectivity of the quals. At a minimum, we must read
+ * one item pointer for each matching entry.
+ *
+ * The width of each item pointer varies, based on the level of
+ * compression. We don't have statistics on that, but an average of
+ * around 3 bytes per item is fairly typical.
*/
dataPagesFetchedBySel = ceil(*indexSelectivity *
- (numTuples / (BLCKSZ / SizeOfIptrData)));
-
+ (numTuples / (BLCKSZ / 3)));
if (dataPagesFetchedBySel > dataPagesFetched)
- {
- /*
- * At least one of entries is very frequent and, unfortunately, we
- * couldn't get statistic about entries (only tsvector has such
- * statistics). So, we obviously have too small estimation of pages
- * fetched from data tree. Re-estimate it from known capacity of data
- * pages
- */
dataPagesFetched = dataPagesFetchedBySel;
- }
- if (num_scans > 1)
+ /* Account for cache effects, the same as above */
+ if (outer_scans > 1 || counts.arrayScans > 1)
+ {
+ dataPagesFetched *= outer_scans * counts.arrayScans;
dataPagesFetched = index_pages_fetched(dataPagesFetched,
(BlockNumber) numDataPages,
numDataPages, root);
+ dataPagesFetched /= outer_scans;
+ }
+
+ /* And apply random_page_cost as the cost per page */
*indexTotalCost = *indexStartupCost +
dataPagesFetched * spc_random_page_cost;
/*
* Add on index qual eval costs, much as in genericcostestimate
*/
- cost_qual_eval(&index_qual_cost, indexQuals, root);
- qual_arg_cost = index_qual_cost.startup + index_qual_cost.per_tuple;
- cost_qual_eval(&index_qual_cost, indexOrderBys, root);
- qual_arg_cost += index_qual_cost.startup + index_qual_cost.per_tuple;
+ qual_arg_cost = other_operands_eval_cost(root, qinfos) +
+ orderby_operands_eval_cost(root, path);
+ qual_op_cost = cpu_operator_cost *
+ (list_length(indexQuals) + list_length(indexOrderBys));
+
+ *indexStartupCost += qual_arg_cost;
+ *indexTotalCost += qual_arg_cost;
+ *indexTotalCost += (numTuples * *indexSelectivity) * (cpu_index_tuple_cost + qual_op_cost);
+ *indexPages = dataPagesFetched;
+}
+
+/*
+ * BRIN has search behavior completely different from other index types
+ */
+void
+brincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
+ Cost *indexStartupCost, Cost *indexTotalCost,
+ Selectivity *indexSelectivity, double *indexCorrelation,
+ double *indexPages)
+{
+ IndexOptInfo *index = path->indexinfo;
+ List *indexQuals = path->indexquals;
+ List *indexOrderBys = path->indexorderbys;
+ double numPages = index->pages;
+ double numTuples = index->tuples;
+ List *qinfos;
+ Cost spc_seq_page_cost;
+ Cost spc_random_page_cost;
+ double qual_op_cost;
+ double qual_arg_cost;
+
+ /* Do preliminary analysis of indexquals */
+ qinfos = deconstruct_indexquals(path);
+
+ /* fetch estimated page cost for tablespace containing index */
+ get_tablespace_page_costs(index->reltablespace,
+ &spc_random_page_cost,
+ &spc_seq_page_cost);
+
+ /*
+ * BRIN indexes are always read in full; use that as startup cost.
+ *
+ * XXX maybe only include revmap pages here?
+ */
+ *indexStartupCost = spc_seq_page_cost * numPages * loop_count;
+
+ /*
+ * To read a BRIN index there might be a bit of back and forth over
+ * regular pages, as revmap might point to them out of sequential order;
+ * calculate this as reading the whole index in random order.
+ */
+ *indexTotalCost = spc_random_page_cost * numPages * loop_count;
+
+ *indexSelectivity =
+ clauselist_selectivity(root, indexQuals,
+ path->indexinfo->rel->relid,
+ JOIN_INNER, NULL);
+ *indexCorrelation = 1;
+
+ /*
+ * Add on index qual eval costs, much as in genericcostestimate.
+ */
+ qual_arg_cost = other_operands_eval_cost(root, qinfos) +
+ orderby_operands_eval_cost(root, path);
qual_op_cost = cpu_operator_cost *
(list_length(indexQuals) + list_length(indexOrderBys));
- qual_arg_cost -= qual_op_cost;
- if (qual_arg_cost < 0) /* just in case... */
- qual_arg_cost = 0;
*indexStartupCost += qual_arg_cost;
*indexTotalCost += qual_arg_cost;
*indexTotalCost += (numTuples * *indexSelectivity) * (cpu_index_tuple_cost + qual_op_cost);
+ *indexPages = index->pages;
- PG_RETURN_VOID();
+ /* XXX what about pages_per_range? */
}