* Selectivity routines are registered in the pg_operator catalog
* in the "oprrest" and "oprjoin" attributes.
*
- * Index cost functions are registered in the pg_am catalog
- * in the "amcostestimate" attribute.
+ * Index cost functions are located via the index AM's API struct,
+ * which is obtained from the handler function registered in pg_am.
*
- * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* float8 oprjoin (internal, oid, internal, int2, internal);
*
* (Before Postgres 8.4, join estimators had only the first four of these
- * parameters. That signature is still allowed, but deprecated.) The
+ * parameters. That signature is still allowed, but deprecated.) The
* relationship between jointype and sjinfo is explained in the comments for
* clause_selectivity() --- the short version is that jointype is usually
* best ignored in favor of examining sjinfo.
#include "postgres.h"
#include <ctype.h>
+#include <float.h>
#include <math.h>
+#include "access/brin.h"
#include "access/gin.h"
#include "access/htup_details.h"
#include "access/sysattr.h"
#include "catalog/index.h"
+#include "catalog/pg_am.h"
#include "catalog/pg_collation.h"
+#include "catalog/pg_operator.h"
#include "catalog/pg_opfamily.h"
#include "catalog/pg_statistic.h"
+#include "catalog/pg_statistic_ext.h"
#include "catalog/pg_type.h"
#include "executor/executor.h"
#include "mb/pg_wchar.h"
+#include "miscadmin.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#include "optimizer/clauses.h"
#include "parser/parse_clause.h"
#include "parser/parse_coerce.h"
#include "parser/parsetree.h"
+#include "statistics/statistics.h"
+#include "utils/acl.h"
#include "utils/builtins.h"
#include "utils/bytea.h"
#include "utils/date.h"
#include "utils/datum.h"
#include "utils/fmgroids.h"
+#include "utils/index_selfuncs.h"
#include "utils/lsyscache.h"
#include "utils/nabstime.h"
#include "utils/pg_locale.h"
#include "utils/rel.h"
#include "utils/selfuncs.h"
-#include "utils/snapmgr.h"
#include "utils/spccache.h"
#include "utils/syscache.h"
#include "utils/timestamp.h"
#include "utils/tqual.h"
#include "utils/typcache.h"
+#include "utils/varlena.h"
/* Hooks for plugins to get control when we ask for stats */
static double eqjoinsel_semi(Oid operator,
VariableStatData *vardata1, VariableStatData *vardata2,
RelOptInfo *inner_rel);
+static bool estimate_multivariate_ndistinct(PlannerInfo *root,
+ RelOptInfo *rel, List **varinfos, double *ndistinct);
static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
Datum lobound, Datum hibound, Oid boundstypid,
double *scaledlobound, double *scaledhibound);
*
* Note: this routine is also used to estimate selectivity for some
* operators that are not "=" but have comparable selectivity behavior,
- * such as "~=" (geometric approximate-match). Even for "=", we must
+ * such as "~=" (geometric approximate-match). Even for "=", we must
* keep in mind that the left and right datatypes may differ.
*/
Datum
{
double selec;
bool isdefault;
+ Oid opfuncoid;
/*
* If the constant is NULL, assume operator is strict and return zero, ie,
/*
* If we matched the var to a unique index or DISTINCT clause, assume
- * there is exactly one match regardless of anything else. (This is
+ * there is exactly one match regardless of anything else. (This is
* slightly bogus, since the index or clause's equality operator might be
* different from ours, but it's much more likely to be right than
* ignoring the information.)
if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
return 1.0 / vardata->rel->tuples;
- if (HeapTupleIsValid(vardata->statsTuple))
+ if (HeapTupleIsValid(vardata->statsTuple) &&
+ statistic_proc_security_check(vardata,
+ (opfuncoid = get_opcode(operator))))
{
Form_pg_statistic stats;
- Datum *values;
- int nvalues;
- float4 *numbers;
- int nnumbers;
+ AttStatsSlot sslot;
bool match = false;
int i;
/*
* Is the constant "=" to any of the column's most common values?
* (Although the given operator may not really be "=", we will assume
- * that seeing whether it returns TRUE is an appropriate test. If you
+ * that seeing whether it returns TRUE is an appropriate test. If you
* don't like this, maybe you shouldn't be using eqsel for your
* operator...)
*/
- if (get_attstatsslot(vardata->statsTuple,
- vardata->atttype, vardata->atttypmod,
+ if (get_attstatsslot(&sslot, vardata->statsTuple,
STATISTIC_KIND_MCV, InvalidOid,
- NULL,
- &values, &nvalues,
- &numbers, &nnumbers))
+ ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS))
{
FmgrInfo eqproc;
- fmgr_info(get_opcode(operator), &eqproc);
+ fmgr_info(opfuncoid, &eqproc);
- for (i = 0; i < nvalues; i++)
+ for (i = 0; i < sslot.nvalues; i++)
{
/* be careful to apply operator right way 'round */
if (varonleft)
match = DatumGetBool(FunctionCall2Coll(&eqproc,
DEFAULT_COLLATION_OID,
- values[i],
+ sslot.values[i],
constval));
else
match = DatumGetBool(FunctionCall2Coll(&eqproc,
DEFAULT_COLLATION_OID,
constval,
- values[i]));
+ sslot.values[i]));
if (match)
break;
}
else
{
/* no most-common-value info available */
- values = NULL;
- numbers = NULL;
- i = nvalues = nnumbers = 0;
+ i = 0; /* keep compiler quiet */
}
if (match)
* Constant is "=" to this common value. We know selectivity
* exactly (or as exactly as ANALYZE could calculate it, anyway).
*/
- selec = numbers[i];
+ selec = sslot.numbers[i];
}
else
{
double sumcommon = 0.0;
double otherdistinct;
- for (i = 0; i < nnumbers; i++)
- sumcommon += numbers[i];
+ for (i = 0; i < sslot.nnumbers; i++)
+ sumcommon += sslot.numbers[i];
selec = 1.0 - sumcommon - stats->stanullfrac;
CLAMP_PROBABILITY(selec);
* all the not-common values share this remaining fraction
* equally, so we divide by the number of other distinct values.
*/
- otherdistinct = get_variable_numdistinct(vardata, &isdefault) - nnumbers;
+ otherdistinct = get_variable_numdistinct(vardata, &isdefault) -
+ sslot.nnumbers;
if (otherdistinct > 1)
selec /= otherdistinct;
* Another cross-check: selectivity shouldn't be estimated as more
* than the least common "most common value".
*/
- if (nnumbers > 0 && selec > numbers[nnumbers - 1])
- selec = numbers[nnumbers - 1];
+ if (sslot.nnumbers > 0 && selec > sslot.numbers[sslot.nnumbers - 1])
+ selec = sslot.numbers[sslot.nnumbers - 1];
}
- free_attstatsslot(vardata->atttype, values, nvalues,
- numbers, nnumbers);
+ free_attstatsslot(&sslot);
}
else
{
/*
* If we matched the var to a unique index or DISTINCT clause, assume
- * there is exactly one match regardless of anything else. (This is
+ * there is exactly one match regardless of anything else. (This is
* slightly bogus, since the index or clause's equality operator might be
* different from ours, but it's much more likely to be right than
* ignoring the information.)
{
Form_pg_statistic stats;
double ndistinct;
- float4 *numbers;
- int nnumbers;
+ AttStatsSlot sslot;
stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
* result averaged over all possible values whether common or
* uncommon. (Essentially, we are assuming that the not-yet-known
* comparison value is equally likely to be any of the possible
- * values, regardless of their frequency in the table. Is that a good
+ * values, regardless of their frequency in the table. Is that a good
* idea?)
*/
selec = 1.0 - stats->stanullfrac;
* Cross-check: selectivity should never be estimated as more than the
* most common value's.
*/
- if (get_attstatsslot(vardata->statsTuple,
- vardata->atttype, vardata->atttypmod,
+ if (get_attstatsslot(&sslot, vardata->statsTuple,
STATISTIC_KIND_MCV, InvalidOid,
- NULL,
- NULL, NULL,
- &numbers, &nnumbers))
+ ATTSTATSSLOT_NUMBERS))
{
- if (nnumbers > 0 && selec > numbers[0])
- selec = numbers[0];
- free_attstatsslot(vardata->atttype, NULL, 0, numbers, nnumbers);
+ if (sslot.nnumbers > 0 && selec > sslot.numbers[0])
+ selec = sslot.numbers[0];
+ free_attstatsslot(&sslot);
}
}
else
{
double mcv_selec,
sumcommon;
- Datum *values;
- int nvalues;
- float4 *numbers;
- int nnumbers;
+ AttStatsSlot sslot;
int i;
mcv_selec = 0.0;
sumcommon = 0.0;
if (HeapTupleIsValid(vardata->statsTuple) &&
- get_attstatsslot(vardata->statsTuple,
- vardata->atttype, vardata->atttypmod,
+ statistic_proc_security_check(vardata, opproc->fn_oid) &&
+ get_attstatsslot(&sslot, vardata->statsTuple,
STATISTIC_KIND_MCV, InvalidOid,
- NULL,
- &values, &nvalues,
- &numbers, &nnumbers))
+ ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS))
{
- for (i = 0; i < nvalues; i++)
+ for (i = 0; i < sslot.nvalues; i++)
{
if (varonleft ?
DatumGetBool(FunctionCall2Coll(opproc,
DEFAULT_COLLATION_OID,
- values[i],
+ sslot.values[i],
constval)) :
DatumGetBool(FunctionCall2Coll(opproc,
DEFAULT_COLLATION_OID,
constval,
- values[i])))
- mcv_selec += numbers[i];
- sumcommon += numbers[i];
+ sslot.values[i])))
+ mcv_selec += sslot.numbers[i];
+ sumcommon += sslot.numbers[i];
}
- free_attstatsslot(vardata->atttype, values, nvalues,
- numbers, nnumbers);
+ free_attstatsslot(&sslot);
}
*sumcommonp = sumcommon;
* essentially using the histogram just as a representative sample. However,
* small histograms are unlikely to be all that representative, so the caller
* should be prepared to fall back on some other estimation approach when the
- * histogram is missing or very small. It may also be prudent to combine this
+ * histogram is missing or very small. It may also be prudent to combine this
* approach with another one when the histogram is small.
*
* If the actual histogram size is not at least min_hist_size, we won't bother
*
* Note that the result disregards both the most-common-values (if any) and
* null entries. The caller is expected to combine this result with
- * statistics for those portions of the column population. It may also be
+ * statistics for those portions of the column population. It may also be
* prudent to clamp the result range, ie, disbelieve exact 0 or 1 outputs.
*/
double
int *hist_size)
{
double result;
- Datum *values;
- int nvalues;
+ AttStatsSlot sslot;
/* check sanity of parameters */
Assert(n_skip >= 0);
Assert(min_hist_size > 2 * n_skip);
if (HeapTupleIsValid(vardata->statsTuple) &&
- get_attstatsslot(vardata->statsTuple,
- vardata->atttype, vardata->atttypmod,
+ statistic_proc_security_check(vardata, opproc->fn_oid) &&
+ get_attstatsslot(&sslot, vardata->statsTuple,
STATISTIC_KIND_HISTOGRAM, InvalidOid,
- NULL,
- &values, &nvalues,
- NULL, NULL))
+ ATTSTATSSLOT_VALUES))
{
- *hist_size = nvalues;
- if (nvalues >= min_hist_size)
+ *hist_size = sslot.nvalues;
+ if (sslot.nvalues >= min_hist_size)
{
int nmatch = 0;
int i;
- for (i = n_skip; i < nvalues - n_skip; i++)
+ for (i = n_skip; i < sslot.nvalues - n_skip; i++)
{
if (varonleft ?
DatumGetBool(FunctionCall2Coll(opproc,
DEFAULT_COLLATION_OID,
- values[i],
+ sslot.values[i],
constval)) :
DatumGetBool(FunctionCall2Coll(opproc,
DEFAULT_COLLATION_OID,
constval,
- values[i])))
+ sslot.values[i])))
nmatch++;
}
- result = ((double) nmatch) / ((double) (nvalues - 2 * n_skip));
+ result = ((double) nmatch) / ((double) (sslot.nvalues - 2 * n_skip));
}
else
result = -1;
- free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0);
+ free_attstatsslot(&sslot);
}
else
{
Datum constval, Oid consttype)
{
double hist_selec;
- Oid hist_op;
- Datum *values;
- int nvalues;
+ AttStatsSlot sslot;
hist_selec = -1.0;
* the reverse way if isgt is TRUE.
*/
if (HeapTupleIsValid(vardata->statsTuple) &&
- get_attstatsslot(vardata->statsTuple,
- vardata->atttype, vardata->atttypmod,
+ statistic_proc_security_check(vardata, opproc->fn_oid) &&
+ get_attstatsslot(&sslot, vardata->statsTuple,
STATISTIC_KIND_HISTOGRAM, InvalidOid,
- &hist_op,
- &values, &nvalues,
- NULL, NULL))
+ ATTSTATSSLOT_VALUES))
{
- if (nvalues > 1)
+ if (sslot.nvalues > 1)
{
/*
* Use binary search to find proper location, ie, the first slot
*
* If the binary search accesses the first or last histogram
* entry, we try to replace that endpoint with the true column min
- * or max as found by get_actual_variable_range(). This
+ * or max as found by get_actual_variable_range(). This
* ameliorates misestimates when the min or max is moving as a
* result of changes since the last ANALYZE. Note that this could
* result in effectively including MCVs into the histogram that
*/
double histfrac;
int lobound = 0; /* first possible slot to search */
- int hibound = nvalues; /* last+1 slot to search */
+ int hibound = sslot.nvalues; /* last+1 slot to search */
bool have_end = false;
/*
* one of them to be updated, so we deal with that within the
* loop.)
*/
- if (nvalues == 2)
+ if (sslot.nvalues == 2)
have_end = get_actual_variable_range(root,
vardata,
- hist_op,
- &values[0],
- &values[1]);
+ sslot.staop,
+ &sslot.values[0],
+ &sslot.values[1]);
while (lobound < hibound)
{
* histogram entry, first try to replace it with the actual
* current min or max (unless we already did so above).
*/
- if (probe == 0 && nvalues > 2)
+ if (probe == 0 && sslot.nvalues > 2)
have_end = get_actual_variable_range(root,
vardata,
- hist_op,
- &values[0],
+ sslot.staop,
+ &sslot.values[0],
NULL);
- else if (probe == nvalues - 1 && nvalues > 2)
+ else if (probe == sslot.nvalues - 1 && sslot.nvalues > 2)
have_end = get_actual_variable_range(root,
vardata,
- hist_op,
+ sslot.staop,
NULL,
- &values[probe]);
+ &sslot.values[probe]);
ltcmp = DatumGetBool(FunctionCall2Coll(opproc,
DEFAULT_COLLATION_OID,
- values[probe],
+ sslot.values[probe],
constval));
if (isgt)
ltcmp = !ltcmp;
/* Constant is below lower histogram boundary. */
histfrac = 0.0;
}
- else if (lobound >= nvalues)
+ else if (lobound >= sslot.nvalues)
{
/* Constant is above upper histogram boundary. */
histfrac = 1.0;
* interpolation within this bin.
*/
if (convert_to_scalar(constval, consttype, &val,
- values[i - 1], values[i],
+ sslot.values[i - 1], sslot.values[i],
vardata->vartype,
&low, &high))
{
/*
* Watch out for the possibility that we got a NaN or
- * Infinity from the division. This can happen
+ * Infinity from the division. This can happen
* despite the previous checks, if for example "low"
* is -Infinity.
*/
* Ideally we'd produce an error here, on the grounds that
* the given operator shouldn't have scalarXXsel
* registered as its selectivity func unless we can deal
- * with its operand types. But currently, all manner of
+ * with its operand types. But currently, all manner of
* stuff is invoking scalarXXsel, so give a default
* estimate until that can be fixed.
*/
* binfrac partial bin below the constant.
*/
histfrac = (double) (i - 1) + binfrac;
- histfrac /= (double) (nvalues - 1);
+ histfrac /= (double) (sslot.nvalues - 1);
}
/*
/*
* The histogram boundaries are only approximate to begin with,
- * and may well be out of date anyway. Therefore, don't believe
+ * and may well be out of date anyway. Therefore, don't believe
* extremely small or large selectivity estimates --- unless we
* got actual current endpoint values from the table.
*/
}
}
- free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0);
+ free_attstatsslot(&sslot);
}
return hist_selec;
/*
* If this is for a NOT LIKE or similar operator, get the corresponding
- * positive-match operator and work with that. Set result to the correct
+ * positive-match operator and work with that. Set result to the correct
* default estimate, too.
*/
if (negate)
/*
* Pull out any fixed prefix implied by the pattern, and estimate the
- * fractional selectivity of the remainder of the pattern. Unlike many of
+ * fractional selectivity of the remainder of the pattern. Unlike many of
* the other functions in this file, we use the pattern operator's actual
* collation for this step. This is not because we expect the collation
* to make a big difference in the selectivity estimate (it seldom would),
/*
* If we have most-common-values info, add up the fractions of the MCV
* entries that satisfy MCV OP PATTERN. These fractions contribute
- * directly to the result selectivity. Also add up the total fraction
+ * directly to the result selectivity. Also add up the total fraction
* represented by MCV entries.
*/
mcv_selec = mcv_selectivity(&vardata, &opproc, constval, true,
PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like_IC, true));
}
+/*
+ * boolvarsel - Selectivity of Boolean variable.
+ *
+ * This can actually be called on any boolean-valued expression. If it
+ * involves only Vars of the specified relation, and if there are statistics
+ * about the Var or expression (the latter is possible if it's indexed) then
+ * we'll produce a real estimate; otherwise it's just a default.
+ */
+Selectivity
+boolvarsel(PlannerInfo *root, Node *arg, int varRelid)
+{
+ VariableStatData vardata;
+ double selec;
+
+ examine_variable(root, arg, varRelid, &vardata);
+ if (HeapTupleIsValid(vardata.statsTuple))
+ {
+ /*
+ * A boolean variable V is equivalent to the clause V = 't', so we
+ * compute the selectivity as if that is what we have.
+ */
+ selec = var_eq_const(&vardata, BooleanEqualOperator,
+ BoolGetDatum(true), false, true);
+ }
+ else if (is_funcclause(arg))
+ {
+ /*
+ * If we have no stats and it's a function call, estimate 0.3333333.
+ * This seems a pretty unprincipled choice, but Postgres has been
+ * using that estimate for function calls since 1992. The hoariness
+ * of this behavior suggests that we should not be in too much hurry
+ * to use another value.
+ */
+ selec = 0.3333333;
+ }
+ else
+ {
+ /* Otherwise, the default estimate is 0.5 */
+ selec = 0.5;
+ }
+ ReleaseVariableStats(vardata);
+ return selec;
+}
+
/*
* booltestsel - Selectivity of BooleanTest Node.
*/
{
Form_pg_statistic stats;
double freq_null;
- Datum *values;
- int nvalues;
- float4 *numbers;
- int nnumbers;
+ AttStatsSlot sslot;
stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
freq_null = stats->stanullfrac;
- if (get_attstatsslot(vardata.statsTuple,
- vardata.atttype, vardata.atttypmod,
+ if (get_attstatsslot(&sslot, vardata.statsTuple,
STATISTIC_KIND_MCV, InvalidOid,
- NULL,
- &values, &nvalues,
- &numbers, &nnumbers)
- && nnumbers > 0)
+ ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS)
+ && sslot.nnumbers > 0)
{
double freq_true;
double freq_false;
/*
* Get first MCV frequency and derive frequency for true.
*/
- if (DatumGetBool(values[0]))
- freq_true = numbers[0];
+ if (DatumGetBool(sslot.values[0]))
+ freq_true = sslot.numbers[0];
else
- freq_true = 1.0 - numbers[0] - freq_null;
+ freq_true = 1.0 - sslot.numbers[0] - freq_null;
/*
* Next derive frequency for false. Then use these as appropriate
break;
}
- free_attstatsslot(vardata.atttype, values, nvalues,
- numbers, nnumbers);
+ free_attstatsslot(&sslot);
}
else
{
leftop = (Node *) linitial(clause->args);
rightop = (Node *) lsecond(clause->args);
+ /* aggressively reduce both sides to constants */
+ leftop = estimate_expression_value(root, leftop);
+ rightop = estimate_expression_value(root, rightop);
+
/* get nominal (after relabeling) element type of rightop */
nominal_element_type = get_base_element_type(exprType(rightop));
if (!OidIsValid(nominal_element_type))
/*
* For generic operators, we assume the probability of success is
- * independent for each array element. But for "= ANY" or "<> ALL",
+ * independent for each array element. But for "= ANY" or "<> ALL",
* if the array elements are distinct (which'd typically be the case)
* then the probabilities are disjoint, and we should just sum them.
*
double nd2;
bool isdefault1;
bool isdefault2;
+ Oid opfuncoid;
Form_pg_statistic stats1 = NULL;
Form_pg_statistic stats2 = NULL;
bool have_mcvs1 = false;
- Datum *values1 = NULL;
- int nvalues1 = 0;
- float4 *numbers1 = NULL;
- int nnumbers1 = 0;
bool have_mcvs2 = false;
- Datum *values2 = NULL;
- int nvalues2 = 0;
- float4 *numbers2 = NULL;
- int nnumbers2 = 0;
+ AttStatsSlot sslot1;
+ AttStatsSlot sslot2;
nd1 = get_variable_numdistinct(vardata1, &isdefault1);
nd2 = get_variable_numdistinct(vardata2, &isdefault2);
+ opfuncoid = get_opcode(operator);
+
+ memset(&sslot1, 0, sizeof(sslot1));
+ memset(&sslot2, 0, sizeof(sslot2));
+
if (HeapTupleIsValid(vardata1->statsTuple))
{
+ /* note we allow use of nullfrac regardless of security check */
stats1 = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple);
- have_mcvs1 = get_attstatsslot(vardata1->statsTuple,
- vardata1->atttype,
- vardata1->atttypmod,
- STATISTIC_KIND_MCV,
- InvalidOid,
- NULL,
- &values1, &nvalues1,
- &numbers1, &nnumbers1);
+ if (statistic_proc_security_check(vardata1, opfuncoid))
+ have_mcvs1 = get_attstatsslot(&sslot1, vardata1->statsTuple,
+ STATISTIC_KIND_MCV, InvalidOid,
+ ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS);
}
if (HeapTupleIsValid(vardata2->statsTuple))
{
+ /* note we allow use of nullfrac regardless of security check */
stats2 = (Form_pg_statistic) GETSTRUCT(vardata2->statsTuple);
- have_mcvs2 = get_attstatsslot(vardata2->statsTuple,
- vardata2->atttype,
- vardata2->atttypmod,
- STATISTIC_KIND_MCV,
- InvalidOid,
- NULL,
- &values2, &nvalues2,
- &numbers2, &nnumbers2);
+ if (statistic_proc_security_check(vardata2, opfuncoid))
+ have_mcvs2 = get_attstatsslot(&sslot2, vardata2->statsTuple,
+ STATISTIC_KIND_MCV, InvalidOid,
+ ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS);
}
if (have_mcvs1 && have_mcvs2)
{
/*
- * We have most-common-value lists for both relations. Run through
+ * We have most-common-value lists for both relations. Run through
* the lists to see which MCVs actually join to each other with the
- * given operator. This allows us to determine the exact join
+ * given operator. This allows us to determine the exact join
* selectivity for the portion of the relations represented by the MCV
* lists. We still have to estimate for the remaining population, but
* in a skewed distribution this gives us a big leg up in accuracy.
int i,
nmatches;
- fmgr_info(get_opcode(operator), &eqproc);
- hasmatch1 = (bool *) palloc0(nvalues1 * sizeof(bool));
- hasmatch2 = (bool *) palloc0(nvalues2 * sizeof(bool));
+ fmgr_info(opfuncoid, &eqproc);
+ hasmatch1 = (bool *) palloc0(sslot1.nvalues * sizeof(bool));
+ hasmatch2 = (bool *) palloc0(sslot2.nvalues * sizeof(bool));
/*
* Note we assume that each MCV will match at most one member of the
- * other MCV list. If the operator isn't really equality, there could
+ * other MCV list. If the operator isn't really equality, there could
* be multiple matches --- but we don't look for them, both for speed
* and because the math wouldn't add up...
*/
matchprodfreq = 0.0;
nmatches = 0;
- for (i = 0; i < nvalues1; i++)
+ for (i = 0; i < sslot1.nvalues; i++)
{
int j;
- for (j = 0; j < nvalues2; j++)
+ for (j = 0; j < sslot2.nvalues; j++)
{
if (hasmatch2[j])
continue;
if (DatumGetBool(FunctionCall2Coll(&eqproc,
DEFAULT_COLLATION_OID,
- values1[i],
- values2[j])))
+ sslot1.values[i],
+ sslot2.values[j])))
{
hasmatch1[i] = hasmatch2[j] = true;
- matchprodfreq += numbers1[i] * numbers2[j];
+ matchprodfreq += sslot1.numbers[i] * sslot2.numbers[j];
nmatches++;
break;
}
CLAMP_PROBABILITY(matchprodfreq);
/* Sum up frequencies of matched and unmatched MCVs */
matchfreq1 = unmatchfreq1 = 0.0;
- for (i = 0; i < nvalues1; i++)
+ for (i = 0; i < sslot1.nvalues; i++)
{
if (hasmatch1[i])
- matchfreq1 += numbers1[i];
+ matchfreq1 += sslot1.numbers[i];
else
- unmatchfreq1 += numbers1[i];
+ unmatchfreq1 += sslot1.numbers[i];
}
CLAMP_PROBABILITY(matchfreq1);
CLAMP_PROBABILITY(unmatchfreq1);
matchfreq2 = unmatchfreq2 = 0.0;
- for (i = 0; i < nvalues2; i++)
+ for (i = 0; i < sslot2.nvalues; i++)
{
if (hasmatch2[i])
- matchfreq2 += numbers2[i];
+ matchfreq2 += sslot2.numbers[i];
else
- unmatchfreq2 += numbers2[i];
+ unmatchfreq2 += sslot2.numbers[i];
}
CLAMP_PROBABILITY(matchfreq2);
CLAMP_PROBABILITY(unmatchfreq2);
* MCVs plus non-MCV values.
*/
totalsel1 = matchprodfreq;
- if (nd2 > nvalues2)
- totalsel1 += unmatchfreq1 * otherfreq2 / (nd2 - nvalues2);
+ if (nd2 > sslot2.nvalues)
+ totalsel1 += unmatchfreq1 * otherfreq2 / (nd2 - sslot2.nvalues);
if (nd2 > nmatches)
totalsel1 += otherfreq1 * (otherfreq2 + unmatchfreq2) /
(nd2 - nmatches);
/* Same estimate from the point of view of relation 2. */
totalsel2 = matchprodfreq;
- if (nd1 > nvalues1)
- totalsel2 += unmatchfreq2 * otherfreq1 / (nd1 - nvalues1);
+ if (nd1 > sslot1.nvalues)
+ totalsel2 += unmatchfreq2 * otherfreq1 / (nd1 - sslot1.nvalues);
if (nd1 > nmatches)
totalsel2 += otherfreq2 * (otherfreq1 + unmatchfreq1) /
(nd1 - nmatches);
selec /= nd2;
}
- if (have_mcvs1)
- free_attstatsslot(vardata1->atttype, values1, nvalues1,
- numbers1, nnumbers1);
- if (have_mcvs2)
- free_attstatsslot(vardata2->atttype, values2, nvalues2,
- numbers2, nnumbers2);
+ free_attstatsslot(&sslot1);
+ free_attstatsslot(&sslot2);
return selec;
}
*
* (Also used for anti join, which we are supposed to estimate the same way.)
* Caller has ensured that vardata1 is the LHS variable.
+ * Unlike eqjoinsel_inner, we have to cope with operator being InvalidOid.
*/
static double
eqjoinsel_semi(Oid operator,
double nd2;
bool isdefault1;
bool isdefault2;
+ Oid opfuncoid;
Form_pg_statistic stats1 = NULL;
bool have_mcvs1 = false;
- Datum *values1 = NULL;
- int nvalues1 = 0;
- float4 *numbers1 = NULL;
- int nnumbers1 = 0;
bool have_mcvs2 = false;
- Datum *values2 = NULL;
- int nvalues2 = 0;
- float4 *numbers2 = NULL;
- int nnumbers2 = 0;
+ AttStatsSlot sslot1;
+ AttStatsSlot sslot2;
nd1 = get_variable_numdistinct(vardata1, &isdefault1);
nd2 = get_variable_numdistinct(vardata2, &isdefault2);
+ opfuncoid = OidIsValid(operator) ? get_opcode(operator) : InvalidOid;
+
+ memset(&sslot1, 0, sizeof(sslot1));
+ memset(&sslot2, 0, sizeof(sslot2));
+
/*
* We clamp nd2 to be not more than what we estimate the inner relation's
- * size to be. This is intuitively somewhat reasonable since obviously
+ * size to be. This is intuitively somewhat reasonable since obviously
* there can't be more than that many distinct values coming from the
* inner rel. The reason for the asymmetry (ie, that we don't clamp nd1
* likewise) is that this is the only pathway by which restriction clauses
* We can apply this clamping both with respect to the base relation from
* which the join variable comes (if there is just one), and to the
* immediate inner input relation of the current join.
+ *
+ * If we clamp, we can treat nd2 as being a non-default estimate; it's not
+ * great, maybe, but it didn't come out of nowhere either. This is most
+ * helpful when the inner relation is empty and consequently has no stats.
*/
if (vardata2->rel)
- nd2 = Min(nd2, vardata2->rel->rows);
- nd2 = Min(nd2, inner_rel->rows);
+ {
+ if (nd2 >= vardata2->rel->rows)
+ {
+ nd2 = vardata2->rel->rows;
+ isdefault2 = false;
+ }
+ }
+ if (nd2 >= inner_rel->rows)
+ {
+ nd2 = inner_rel->rows;
+ isdefault2 = false;
+ }
if (HeapTupleIsValid(vardata1->statsTuple))
{
+ /* note we allow use of nullfrac regardless of security check */
stats1 = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple);
- have_mcvs1 = get_attstatsslot(vardata1->statsTuple,
- vardata1->atttype,
- vardata1->atttypmod,
- STATISTIC_KIND_MCV,
- InvalidOid,
- NULL,
- &values1, &nvalues1,
- &numbers1, &nnumbers1);
+ if (statistic_proc_security_check(vardata1, opfuncoid))
+ have_mcvs1 = get_attstatsslot(&sslot1, vardata1->statsTuple,
+ STATISTIC_KIND_MCV, InvalidOid,
+ ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS);
}
- if (HeapTupleIsValid(vardata2->statsTuple))
+ if (HeapTupleIsValid(vardata2->statsTuple) &&
+ statistic_proc_security_check(vardata2, opfuncoid))
{
- have_mcvs2 = get_attstatsslot(vardata2->statsTuple,
- vardata2->atttype,
- vardata2->atttypmod,
- STATISTIC_KIND_MCV,
- InvalidOid,
- NULL,
- &values2, &nvalues2,
- &numbers2, &nnumbers2);
+ have_mcvs2 = get_attstatsslot(&sslot2, vardata2->statsTuple,
+ STATISTIC_KIND_MCV, InvalidOid,
+ ATTSTATSSLOT_VALUES);
+ /* note: currently don't need stanumbers from RHS */
}
if (have_mcvs1 && have_mcvs2 && OidIsValid(operator))
{
/*
- * We have most-common-value lists for both relations. Run through
+ * We have most-common-value lists for both relations. Run through
* the lists to see which MCVs actually join to each other with the
- * given operator. This allows us to determine the exact join
+ * given operator. This allows us to determine the exact join
* selectivity for the portion of the relations represented by the MCV
* lists. We still have to estimate for the remaining population, but
* in a skewed distribution this gives us a big leg up in accuracy.
/*
* The clamping above could have resulted in nd2 being less than
- * nvalues2; in which case, we assume that precisely the nd2 most
- * common values in the relation will appear in the join input, and so
- * compare to only the first nd2 members of the MCV list. Of course
- * this is frequently wrong, but it's the best bet we can make.
+ * sslot2.nvalues; in which case, we assume that precisely the nd2
+ * most common values in the relation will appear in the join input,
+ * and so compare to only the first nd2 members of the MCV list. Of
+ * course this is frequently wrong, but it's the best bet we can make.
*/
- clamped_nvalues2 = Min(nvalues2, nd2);
+ clamped_nvalues2 = Min(sslot2.nvalues, nd2);
- fmgr_info(get_opcode(operator), &eqproc);
- hasmatch1 = (bool *) palloc0(nvalues1 * sizeof(bool));
+ fmgr_info(opfuncoid, &eqproc);
+ hasmatch1 = (bool *) palloc0(sslot1.nvalues * sizeof(bool));
hasmatch2 = (bool *) palloc0(clamped_nvalues2 * sizeof(bool));
/*
* Note we assume that each MCV will match at most one member of the
- * other MCV list. If the operator isn't really equality, there could
+ * other MCV list. If the operator isn't really equality, there could
* be multiple matches --- but we don't look for them, both for speed
* and because the math wouldn't add up...
*/
nmatches = 0;
- for (i = 0; i < nvalues1; i++)
+ for (i = 0; i < sslot1.nvalues; i++)
{
int j;
continue;
if (DatumGetBool(FunctionCall2Coll(&eqproc,
DEFAULT_COLLATION_OID,
- values1[i],
- values2[j])))
+ sslot1.values[i],
+ sslot2.values[j])))
{
hasmatch1[i] = hasmatch2[j] = true;
nmatches++;
}
/* Sum up frequencies of matched MCVs */
matchfreq1 = 0.0;
- for (i = 0; i < nvalues1; i++)
+ for (i = 0; i < sslot1.nvalues; i++)
{
if (hasmatch1[i])
- matchfreq1 += numbers1[i];
+ matchfreq1 += sslot1.numbers[i];
}
CLAMP_PROBABILITY(matchfreq1);
pfree(hasmatch1);
/*
* Now we need to estimate the fraction of relation 1 that has at
- * least one join partner. We know for certain that the matched MCVs
+ * least one join partner. We know for certain that the matched MCVs
* do, so that gives us a lower bound, but we're really in the dark
* about everything else. Our crude approach is: if nd1 <= nd2 then
* assume all non-null rel1 rows have join partners, else assume for
selec = 0.5 * (1.0 - nullfrac1);
}
- if (have_mcvs1)
- free_attstatsslot(vardata1->atttype, values1, nvalues1,
- numbers1, nnumbers1);
- if (have_mcvs2)
- free_attstatsslot(vardata2->atttype, values2, nvalues2,
- numbers2, nnumbers2);
+ free_attstatsslot(&sslot1);
+ free_attstatsslot(&sslot2);
return selec;
}
* groupExprs - list of expressions being grouped by
* input_rows - number of rows estimated to arrive at the group/unique
* filter step
+ * pgset - NULL, or a List** pointing to a grouping set to filter the
+ * groupExprs against
*
* Given the lack of any cross-correlation statistics in the system, it's
* impossible to do anything really trustworthy with GROUP BY conditions
* case (all possible cross-product terms actually appear as groups) since
* very often the grouped-by Vars are highly correlated. Our current approach
* is as follows:
- * 1. Expressions yielding boolean are assumed to contribute two groups,
+ * 1. Expressions yielding boolean are assumed to contribute two groups,
* independently of their content, and are ignored in the subsequent
- * steps. This is mainly because tests like "col IS NULL" break the
+ * steps. This is mainly because tests like "col IS NULL" break the
* heuristic used in step 2 especially badly.
- * 2. Reduce the given expressions to a list of unique Vars used. For
+ * 2. Reduce the given expressions to a list of unique Vars used. For
* example, GROUP BY a, a + b is treated the same as GROUP BY a, b.
* It is clearly correct not to count the same Var more than once.
* It is also reasonable to treat f(x) the same as x: f() cannot
* As a special case, if a GROUP BY expression can be matched to an
* expressional index for which we have statistics, then we treat the
* whole expression as though it were just a Var.
- * 3. If the list contains Vars of different relations that are known equal
+ * 3. If the list contains Vars of different relations that are known equal
* due to equivalence classes, then drop all but one of the Vars from each
* known-equal set, keeping the one with smallest estimated # of values
* (since the extra values of the others can't appear in joined rows).
* Note the reason we only consider Vars of different relations is that
* if we considered ones of the same rel, we'd be double-counting the
* restriction selectivity of the equality in the next step.
- * 4. For Vars within a single source rel, we multiply together the numbers
+ * 4. For Vars within a single source rel, we multiply together the numbers
* of values, clamp to the number of rows in the rel (divided by 10 if
- * more than one Var), and then multiply by the selectivity of the
- * restriction clauses for that rel. When there's more than one Var,
- * the initial product is probably too high (it's the worst case) but
- * clamping to a fraction of the rel's rows seems to be a helpful
- * heuristic for not letting the estimate get out of hand. (The factor
- * of 10 is derived from pre-Postgres-7.4 practice.) Multiplying
- * by the restriction selectivity is effectively assuming that the
- * restriction clauses are independent of the grouping, which is a crummy
- * assumption, but it's hard to do better.
- * 5. If there are Vars from multiple rels, we repeat step 4 for each such
+ * more than one Var), and then multiply by a factor based on the
+ * selectivity of the restriction clauses for that rel. When there's
+ * more than one Var, the initial product is probably too high (it's the
+ * worst case) but clamping to a fraction of the rel's rows seems to be a
+ * helpful heuristic for not letting the estimate get out of hand. (The
+ * factor of 10 is derived from pre-Postgres-7.4 practice.) The factor
+ * we multiply by to adjust for the restriction selectivity assumes that
+ * the restriction clauses are independent of the grouping, which may not
+ * be a valid assumption, but it's hard to do better.
+ * 5. If there are Vars from multiple rels, we repeat step 4 for each such
* rel, and multiply the results together.
* Note that rels not containing grouped Vars are ignored completely, as are
* join clauses. Such rels cannot increase the number of groups, and we
* but we don't have the info to do better).
*/
double
-estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows)
+estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
+ List **pgset)
{
List *varinfos = NIL;
double numdistinct;
ListCell *l;
+ int i;
/*
* We don't ever want to return an estimate of zero groups, as that tends
* for normal cases with GROUP BY or DISTINCT, but it is possible for
* corner cases with set operations.)
*/
- if (groupExprs == NIL)
+ if (groupExprs == NIL || (pgset && list_length(*pgset) < 1))
return 1.0;
/*
- * Count groups derived from boolean grouping expressions. For other
+ * Count groups derived from boolean grouping expressions. For other
* expressions, find the unique Vars used, treating an expression as a Var
* if we can find stats for it. For each one, record the statistical
* estimate of number of distinct values (total in its table, without
*/
numdistinct = 1.0;
+ i = 0;
foreach(l, groupExprs)
{
Node *groupexpr = (Node *) lfirst(l);
List *varshere;
ListCell *l2;
+ /* is expression in this grouping set? */
+ if (pgset && !list_member_int(*pgset, i++))
+ continue;
+
/* Short-circuit for expressions returning boolean */
if (exprType(groupexpr) == BOOLOID)
{
* down to ignoring the possible addition of nulls to the result set).
*/
varshere = pull_var_clause(groupexpr,
- PVC_RECURSE_AGGREGATES,
+ PVC_RECURSE_AGGREGATES |
+ PVC_RECURSE_WINDOWFUNCS |
PVC_RECURSE_PLACEHOLDERS);
/*
* Group Vars by relation and estimate total numdistinct.
*
* For each iteration of the outer loop, we process the frontmost Var in
- * varinfos, plus all other Vars in the same relation. We remove these
+ * varinfos, plus all other Vars in the same relation. We remove these
* Vars from the newvarinfos list for the next iteration. This is the
* easiest way to group Vars of same rel together.
*/
{
GroupVarInfo *varinfo1 = (GroupVarInfo *) linitial(varinfos);
RelOptInfo *rel = varinfo1->rel;
- double reldistinct = varinfo1->ndistinct;
+ double reldistinct = 1;
double relmaxndistinct = reldistinct;
- int relvarcount = 1;
+ int relvarcount = 0;
List *newvarinfos = NIL;
+ List *relvarinfos = NIL;
/*
- * Get the product of numdistinct estimates of the Vars for this rel.
- * Also, construct new varinfos list of remaining Vars.
+ * Split the list of varinfos in two - one for the current rel, one
+ * for remaining Vars on other rels.
*/
+ relvarinfos = lcons(varinfo1, relvarinfos);
for_each_cell(l, lnext(list_head(varinfos)))
{
GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
if (varinfo2->rel == varinfo1->rel)
{
- reldistinct *= varinfo2->ndistinct;
- if (relmaxndistinct < varinfo2->ndistinct)
- relmaxndistinct = varinfo2->ndistinct;
- relvarcount++;
+ /* varinfos on current rel */
+ relvarinfos = lcons(varinfo2, relvarinfos);
}
else
{
}
}
+ /*
+ * Get the numdistinct estimate for the Vars of this rel. We
+ * iteratively search for multivariate n-distinct with maximum number
+ * of vars; assuming that each var group is independent of the others,
+ * we multiply them together. Any remaining relvarinfos after no more
+ * multivariate matches are found are assumed independent too, so
+ * their individual ndistinct estimates are multiplied also.
+ *
+ * While iterating, count how many separate numdistinct values we
+ * apply. We apply a fudge factor below, but only if we multiplied
+ * more than one such values.
+ */
+ while (relvarinfos)
+ {
+ double mvndistinct;
+
+ if (estimate_multivariate_ndistinct(root, rel, &relvarinfos,
+ &mvndistinct))
+ {
+ reldistinct *= mvndistinct;
+ if (relmaxndistinct < mvndistinct)
+ relmaxndistinct = mvndistinct;
+ relvarcount++;
+ }
+ else
+ {
+ foreach(l, relvarinfos)
+ {
+ GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
+
+ reldistinct *= varinfo2->ndistinct;
+ if (relmaxndistinct < varinfo2->ndistinct)
+ relmaxndistinct = varinfo2->ndistinct;
+ relvarcount++;
+ }
+
+ /* we're done with this relation */
+ relvarinfos = NIL;
+ }
+ }
+
/*
* Sanity check --- don't divide by zero if empty relation.
*/
- Assert(rel->reloptkind == RELOPT_BASEREL);
+ Assert(IS_SIMPLE_REL(rel));
if (rel->tuples > 0)
{
/*
reldistinct = clamp;
/*
- * Multiply by restriction selectivity.
+ * Update the estimate based on the restriction selectivity,
+ * guarding against division by zero when reldistinct is zero.
+ * Also skip this if we know that we are returning all rows.
*/
- reldistinct *= rel->rows / rel->tuples;
+ if (reldistinct > 0 && rel->rows < rel->tuples)
+ {
+ /*
+ * Given a table containing N rows with n distinct values in a
+ * uniform distribution, if we select p rows at random then
+ * the expected number of distinct values selected is
+ *
+ * n * (1 - product((N-N/n-i)/(N-i), i=0..p-1))
+ *
+ * = n * (1 - (N-N/n)! / (N-N/n-p)! * (N-p)! / N!)
+ *
+ * See "Approximating block accesses in database
+ * organizations", S. B. Yao, Communications of the ACM,
+ * Volume 20 Issue 4, April 1977 Pages 260-261.
+ *
+ * Alternatively, re-arranging the terms from the factorials,
+ * this may be written as
+ *
+ * n * (1 - product((N-p-i)/(N-i), i=0..N/n-1))
+ *
+ * This form of the formula is more efficient to compute in
+ * the common case where p is larger than N/n. Additionally,
+ * as pointed out by Dell'Era, if i << N for all terms in the
+ * product, it can be approximated by
+ *
+ * n * (1 - ((N-p)/N)^(N/n))
+ *
+ * See "Expected distinct values when selecting from a bag
+ * without replacement", Alberto Dell'Era,
+ * http://www.adellera.it/investigations/distinct_balls/.
+ *
+ * The condition i << N is equivalent to n >> 1, so this is a
+ * good approximation when the number of distinct values in
+ * the table is large. It turns out that this formula also
+ * works well even when n is small.
+ */
+ reldistinct *=
+ (1 - pow((rel->tuples - rel->rows) / rel->tuples,
+ rel->tuples / reldistinct));
+ }
+ reldistinct = clamp_row_est(reldistinct);
/*
* Update estimate of total distinct groups.
* distribution, so this will have to do for now.
*
* We are passed the number of buckets the executor will use for the given
- * input relation. If the data were perfectly distributed, with the same
+ * input relation. If the data were perfectly distributed, with the same
* number of tuples going into each available bucket, then the bucketsize
* fraction would be 1/nbuckets. But this happy state of affairs will occur
* only if (a) there are at least nbuckets distinct data values, and (b)
- * we have a not-too-skewed data distribution. Otherwise the buckets will
+ * we have a not-too-skewed data distribution. Otherwise the buckets will
* be nonuniformly occupied. If the other relation in the join has a key
* distribution similar to this one's, then the most-loaded buckets are
* exactly those that will be probed most often. Therefore, the "average"
mcvfreq,
avgfreq;
bool isdefault;
- float4 *numbers;
- int nnumbers;
+ AttStatsSlot sslot;
examine_variable(root, hashkey, 0, &vardata);
* XXX Possibly better way, but much more expensive: multiply by
* selectivity of rel's restriction clauses that mention the target Var.
*/
- if (vardata.rel)
+ if (vardata.rel && vardata.rel->tuples > 0)
+ {
ndistinct *= vardata.rel->rows / vardata.rel->tuples;
+ ndistinct = clamp_row_est(ndistinct);
+ }
/*
* Initial estimate of bucketsize fraction is 1/nbuckets as long as the
if (HeapTupleIsValid(vardata.statsTuple))
{
- if (get_attstatsslot(vardata.statsTuple,
- vardata.atttype, vardata.atttypmod,
+ if (get_attstatsslot(&sslot, vardata.statsTuple,
STATISTIC_KIND_MCV, InvalidOid,
- NULL,
- NULL, NULL,
- &numbers, &nnumbers))
+ ATTSTATSSLOT_NUMBERS))
{
/*
* The first MCV stat is for the most common value.
*/
- if (nnumbers > 0)
- mcvfreq = numbers[0];
- free_attstatsslot(vardata.atttype, NULL, 0,
- numbers, nnumbers);
+ if (sslot.nnumbers > 0)
+ mcvfreq = sslot.numbers[0];
+ free_attstatsslot(&sslot);
}
}
*-------------------------------------------------------------------------
*/
+/*
+ * Find applicable ndistinct statistics for the given list of VarInfos (which
+ * must all belong to the given rel), and update *ndistinct to the estimate of
+ * the MVNDistinctItem that best matches. If a match it found, *varinfos is
+ * updated to remove the list of matched varinfos.
+ *
+ * Varinfos that aren't for simple Vars are ignored.
+ *
+ * Return TRUE if we're able to find a match, FALSE otherwise.
+ */
+static bool
+estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel,
+ List **varinfos, double *ndistinct)
+{
+ ListCell *lc;
+ Bitmapset *attnums = NULL;
+ int nmatches;
+ Oid statOid = InvalidOid;
+ MVNDistinct *stats;
+ Bitmapset *matched = NULL;
+
+ /* bail out immediately if the table has no extended statistics */
+ if (!rel->statlist)
+ return false;
+
+ /* Determine the attnums we're looking for */
+ foreach(lc, *varinfos)
+ {
+ GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc);
+
+ Assert(varinfo->rel == rel);
+
+ if (IsA(varinfo->var, Var))
+ {
+ attnums = bms_add_member(attnums,
+ ((Var *) varinfo->var)->varattno);
+ }
+ }
+
+ /* look for the ndistinct statistics matching the most vars */
+ nmatches = 1; /* we require at least two matches */
+ foreach(lc, rel->statlist)
+ {
+ StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc);
+ Bitmapset *shared;
+ int nshared;
+
+ /* skip statistics of other kinds */
+ if (info->kind != STATS_EXT_NDISTINCT)
+ continue;
+
+ /* compute attnums shared by the vars and the statistics object */
+ shared = bms_intersect(info->keys, attnums);
+ nshared = bms_num_members(shared);
+
+ /*
+ * Does this statistics object match more columns than the currently
+ * best object? If so, use this one instead.
+ *
+ * XXX This should break ties using name of the object, or something
+ * like that, to make the outcome stable.
+ */
+ if (nshared > nmatches)
+ {
+ statOid = info->statOid;
+ nmatches = nshared;
+ matched = shared;
+ }
+ }
+
+ /* No match? */
+ if (statOid == InvalidOid)
+ return false;
+ Assert(nmatches > 1 && matched != NULL);
+
+ stats = statext_ndistinct_load(statOid);
+
+ /*
+ * If we have a match, search it for the specific item that matches (there
+ * must be one), and construct the output values.
+ */
+ if (stats)
+ {
+ int i;
+ List *newlist = NIL;
+ MVNDistinctItem *item = NULL;
+
+ /* Find the specific item that exactly matches the combination */
+ for (i = 0; i < stats->nitems; i++)
+ {
+ MVNDistinctItem *tmpitem = &stats->items[i];
+
+ if (bms_subset_compare(tmpitem->attrs, matched) == BMS_EQUAL)
+ {
+ item = tmpitem;
+ break;
+ }
+ }
+
+ /* make sure we found an item */
+ if (!item)
+ elog(ERROR, "corrupt MVNDistinct entry");
+
+ /* Form the output varinfo list, keeping only unmatched ones */
+ foreach(lc, *varinfos)
+ {
+ GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc);
+ AttrNumber attnum;
+
+ if (!IsA(varinfo->var, Var))
+ {
+ newlist = lappend(newlist, varinfo);
+ continue;
+ }
+
+ attnum = ((Var *) varinfo->var)->varattno;
+ if (!bms_is_member(attnum, matched))
+ newlist = lappend(newlist, varinfo);
+ }
+
+ *varinfos = newlist;
+ *ndistinct = item->ndistinct;
+ return true;
+ }
+
+ return false;
+}
+
/*
* convert_to_scalar
* Convert non-NULL values of the indicated types to the comparison
* operators to estimate selectivity for the other's. This is outright
* wrong in some cases --- in particular signed versus unsigned
* interpretation could trip us up. But it's useful enough in the
- * majority of cases that we do it anyway. Should think about more
+ * majority of cases that we do it anyway. Should think about more
* rigorous ways to do it.
*/
switch (valuetypid)
case REGTYPEOID:
case REGCONFIGOID:
case REGDICTIONARYOID:
+ case REGROLEOID:
+ case REGNAMESPACEOID:
*scaledvalue = convert_numeric_to_scalar(value, valuetypid);
*scaledlobound = convert_numeric_to_scalar(lobound, boundstypid);
*scaledhibound = convert_numeric_to_scalar(hibound, boundstypid);
case INETOID:
case CIDROID:
case MACADDROID:
+ case MACADDR8OID:
*scaledvalue = convert_network_to_scalar(value, valuetypid);
*scaledlobound = convert_network_to_scalar(lobound, boundstypid);
*scaledhibound = convert_network_to_scalar(hibound, boundstypid);
case REGTYPEOID:
case REGCONFIGOID:
case REGDICTIONARYOID:
+ case REGROLEOID:
+ case REGNAMESPACEOID:
/* we can treat OIDs as integers... */
return (double) DatumGetObjectId(value);
}
return 0.0; /* empty string has scalar value 0 */
/*
- * Since base is at least 10, need not consider more than about 20 chars
+ * There seems little point in considering more than a dozen bytes from
+ * the string. Since base is at least 10, that will give us nominal
+ * resolution of at least 12 decimal digits, which is surely far more
+ * precision than this estimation technique has got anyway (especially in
+ * non-C locales). Also, even with the maximum possible base of 256, this
+ * ensures denom cannot grow larger than 256^13 = 2.03e31, which will not
+ * overflow on any known machine.
*/
- if (slen > 20)
- slen = 20;
+ if (slen > 12)
+ slen = 12;
/* Convert initial characters to fraction */
base = rangehi - rangelo + 1;
size_t xfrmlen2 PG_USED_FOR_ASSERTS_ONLY;
/*
- * Note: originally we guessed at a suitable output buffer size, and
- * only needed to call strxfrm twice if our guess was too small.
- * However, it seems that some versions of Solaris have buggy strxfrm
- * that can write past the specified buffer length in that scenario.
- * So, do it the dumb way for portability.
- *
- * Yet other systems (e.g., glibc) sometimes return a smaller value
- * from the second call than the first; thus the Assert must be <= not
- * == as you'd expect. Can't any of these people program their way
- * out of a paper bag?
+ * XXX: We could guess at a suitable output buffer size and only call
+ * strxfrm twice if our guess is too small.
*
* XXX: strxfrm doesn't support UTF-8 encoding on Win32, it can return
* bogus data or set an error. This is not really a problem unless it
#endif
xfrmstr = (char *) palloc(xfrmlen + 1);
xfrmlen2 = strxfrm(xfrmstr, val, xfrmlen + 1);
+
+ /*
+ * Some systems (e.g., glibc) can return a smaller value from the
+ * second call than the first; thus the Assert must be <= not ==.
+ */
Assert(xfrmlen2 <= xfrmlen);
pfree(val);
val = xfrmstr;
* average month length of 365.25/12.0 days. Not too
* accurate, but plenty good enough for our purposes.
*/
-#ifdef HAVE_INT64_TIMESTAMP
return interval->time + interval->day * (double) USECS_PER_DAY +
interval->month * ((DAYS_PER_YEAR / (double) MONTHS_PER_YEAR) * USECS_PER_DAY);
-#else
- return interval->time + interval->day * SECS_PER_DAY +
- interval->month * ((DAYS_PER_YEAR / (double) MONTHS_PER_YEAR) * (double) SECS_PER_DAY);
-#endif
}
case RELTIMEOID:
-#ifdef HAVE_INT64_TIMESTAMP
return (DatumGetRelativeTime(value) * 1000000.0);
-#else
- return DatumGetRelativeTime(value);
-#endif
case TINTERVALOID:
{
TimeInterval tinterval = DatumGetTimeInterval(value);
-#ifdef HAVE_INT64_TIMESTAMP
if (tinterval->status != 0)
return ((tinterval->data[1] - tinterval->data[0]) * 1000000.0);
-#else
- if (tinterval->status != 0)
- return tinterval->data[1] - tinterval->data[0];
-#endif
return 0; /* for lack of a better idea */
}
case TIMEOID:
TimeTzADT *timetz = DatumGetTimeTzADTP(value);
/* use GMT-equivalent time */
-#ifdef HAVE_INT64_TIMESTAMP
return (double) (timetz->time + (timetz->zone * 1000000.0));
-#else
- return (double) (timetz->time + timetz->zone);
-#endif
}
}
right = (Node *) lsecond(args);
/*
- * Examine both sides. Note that when varRelid is nonzero, Vars of other
+ * Examine both sides. Note that when varRelid is nonzero, Vars of other
* relations will be treated as pseudoconstants.
*/
examine_variable(root, left, varRelid, vardata);
return true;
}
- /* Ooops, clause has wrong structure (probably var op var) */
+ /* Oops, clause has wrong structure (probably var op var) */
ReleaseVariableStats(*vardata);
ReleaseVariableStats(rdata);
* freefunc: pointer to a function to release statsTuple with.
* vartype: exposed type of the expression; this should always match
* the declared input type of the operator we are estimating for.
- * atttype, atttypmod: type data to pass to get_attstatsslot(). This is
+ * atttype, atttypmod: actual type/typmod of the "var" expression. This is
* commonly the same as the exposed type of the variable argument,
* but can be different in binary-compatible-type cases.
* isunique: TRUE if we were able to match the var to a unique index or a
* this query. (Caution: this should be trusted for statistical
* purposes only, since we do not check indimmediate nor verify that
* the exact same definition of equality applies.)
+ * acl_ok: TRUE if current user has permission to read the column(s)
+ * underlying the pg_statistic entry. This is consulted by
+ * statistic_proc_security_check().
*
* Caller is responsible for doing ReleaseVariableStats() before exiting.
*/
/*
* Okay, it's a more complicated expression. Determine variable
- * membership. Note that when varRelid isn't zero, only vars of that
+ * membership. Note that when varRelid isn't zero, only vars of that
* relation are considered "real" vars.
*/
varnos = pull_varnos(basenode);
if (onerel)
{
/*
- * We have an expression in vars of a single relation. Try to match
+ * We have an expression in vars of a single relation. Try to match
* it to expressional index columns, in hopes of finding some
* statistics.
*
* XXX it's conceivable that there are multiple matches with different
* index opfamilies; if so, we need to pick one that matches the
- * operator we are estimating for. FIXME later.
+ * operator we are estimating for. FIXME later.
*/
ListCell *ilist;
Int16GetDatum(pos + 1),
BoolGetDatum(false));
vardata->freefunc = ReleaseSysCache;
+
+ if (HeapTupleIsValid(vardata->statsTuple))
+ {
+ /* Get index's table for permission check */
+ RangeTblEntry *rte;
+
+ rte = planner_rt_fetch(index->rel->relid, root);
+ Assert(rte->rtekind == RTE_RELATION);
+
+ /*
+ * For simplicity, we insist on the whole
+ * table being selectable, rather than trying
+ * to identify which column(s) the index
+ * depends on.
+ */
+ vardata->acl_ok =
+ (pg_class_aclcheck(rte->relid, GetUserId(),
+ ACL_SELECT) == ACLCHECK_OK);
+ }
+ else
+ {
+ /* suppress leakproofness checks later */
+ vardata->acl_ok = true;
+ }
}
if (vardata->statsTuple)
break;
Int16GetDatum(var->varattno),
BoolGetDatum(rte->inh));
vardata->freefunc = ReleaseSysCache;
+
+ if (HeapTupleIsValid(vardata->statsTuple))
+ {
+ /* check if user has permission to read this column */
+ vardata->acl_ok =
+ (pg_class_aclcheck(rte->relid, GetUserId(),
+ ACL_SELECT) == ACLCHECK_OK) ||
+ (pg_attribute_aclcheck(rte->relid, var->varattno, GetUserId(),
+ ACL_SELECT) == ACLCHECK_OK);
+ }
+ else
+ {
+ /* suppress any possible leakproofness checks later */
+ vardata->acl_ok = true;
+ }
}
else if (rte->rtekind == RTE_SUBQUERY && !rte->inh)
{
*
* This is probably a harsher restriction than necessary; it's
* certainly OK for the selectivity estimator (which is a C function,
- * and therefore omnipotent anyway) to look at the statistics. But
+ * and therefore omnipotent anyway) to look at the statistics. But
* many selectivity estimators will happily *invoke the operator
* function* to try to work out a good estimate - and that's not OK.
* So for now, don't dig down for stats.
}
}
+/*
+ * Check whether it is permitted to call func_oid passing some of the
+ * pg_statistic data in vardata. We allow this either if the user has SELECT
+ * privileges on the table or column underlying the pg_statistic data or if
+ * the function is marked leak-proof.
+ */
+bool
+statistic_proc_security_check(VariableStatData *vardata, Oid func_oid)
+{
+ if (vardata->acl_ok)
+ return true;
+
+ if (!OidIsValid(func_oid))
+ return false;
+
+ if (get_func_leakproof(func_oid))
+ return true;
+
+ ereport(DEBUG2,
+ (errmsg_internal("not using statistics because function \"%s\" is not leak-proof",
+ get_func_name(func_oid))));
+ return false;
+}
+
/*
* get_variable_numdistinct
* Estimate the number of distinct values of a variable.
* *isdefault: set to TRUE if the result is a default rather than based on
* anything meaningful.
*
- * NB: be careful to produce an integral result, since callers may compare
- * the result to exact integer counts.
+ * NB: be careful to produce a positive integral result, since callers may
+ * compare the result to exact integer counts, or might divide by it.
*/
double
get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
{
double stadistinct;
+ double stanullfrac = 0.0;
double ntuples;
*isdefault = false;
/*
- * Determine the stadistinct value to use. There are cases where we can
+ * Determine the stadistinct value to use. There are cases where we can
* get an estimate even without a pg_statistic entry, or can get a better
- * value than is in pg_statistic.
+ * value than is in pg_statistic. Grab stanullfrac too if we can find it
+ * (otherwise, assume no nulls, for lack of any better idea).
*/
if (HeapTupleIsValid(vardata->statsTuple))
{
stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
stadistinct = stats->stadistinct;
+ stanullfrac = stats->stanullfrac;
}
else if (vardata->vartype == BOOLOID)
{
{
case ObjectIdAttributeNumber:
case SelfItemPointerAttributeNumber:
- stadistinct = -1.0; /* unique */
+ stadistinct = -1.0; /* unique (and all non null) */
break;
case TableOidAttributeNumber:
stadistinct = 1.0; /* only 1 value */
* If there is a unique index or DISTINCT clause for the variable, assume
* it is unique no matter what pg_statistic says; the statistics could be
* out of date, or we might have found a partial unique index that proves
- * the var is unique for this query.
+ * the var is unique for this query. However, we'd better still believe
+ * the null-fraction statistic.
*/
if (vardata->isunique)
- stadistinct = -1.0;
+ stadistinct = -1.0 * (1.0 - stanullfrac);
/*
* If we had an absolute estimate, use that.
*/
if (stadistinct > 0.0)
- return stadistinct;
+ return clamp_row_est(stadistinct);
/*
* Otherwise we need to get the relation size; punt if not available.
* If we had a relative estimate, use that.
*/
if (stadistinct < 0.0)
- return floor((-stadistinct * ntuples) + 0.5);
+ return clamp_row_est(-stadistinct * ntuples);
/*
* With no data, estimate ndistinct = ntuples if the table is small, else
* that the behavior isn't discontinuous.
*/
if (ntuples < DEFAULT_NUM_DISTINCT)
- return ntuples;
+ return clamp_row_est(ntuples);
*isdefault = true;
return DEFAULT_NUM_DISTINCT;
bool have_data = false;
int16 typLen;
bool typByVal;
- Datum *values;
- int nvalues;
+ Oid opfuncoid;
+ AttStatsSlot sslot;
int i;
/*
* XXX It's very tempting to try to use the actual column min and max, if
- * we can get them relatively-cheaply with an index probe. However, since
+ * we can get them relatively-cheaply with an index probe. However, since
* this function is called many times during join planning, that could
* have unpleasant effects on planning speed. Need more investigation
* before enabling this.
return false;
}
+ /*
+ * If we can't apply the sortop to the stats data, just fail. In
+ * principle, if there's a histogram and no MCVs, we could return the
+ * histogram endpoints without ever applying the sortop ... but it's
+ * probably not worth trying, because whatever the caller wants to do with
+ * the endpoints would likely fail the security check too.
+ */
+ if (!statistic_proc_security_check(vardata,
+ (opfuncoid = get_opcode(sortop))))
+ return false;
+
get_typlenbyval(vardata->atttype, &typLen, &typByVal);
/*
* the one we want, fail --- this suggests that there is data we can't
* use.
*/
- if (get_attstatsslot(vardata->statsTuple,
- vardata->atttype, vardata->atttypmod,
+ if (get_attstatsslot(&sslot, vardata->statsTuple,
STATISTIC_KIND_HISTOGRAM, sortop,
- NULL,
- &values, &nvalues,
- NULL, NULL))
+ ATTSTATSSLOT_VALUES))
{
- if (nvalues > 0)
+ if (sslot.nvalues > 0)
{
- tmin = datumCopy(values[0], typByVal, typLen);
- tmax = datumCopy(values[nvalues - 1], typByVal, typLen);
+ tmin = datumCopy(sslot.values[0], typByVal, typLen);
+ tmax = datumCopy(sslot.values[sslot.nvalues - 1], typByVal, typLen);
have_data = true;
}
- free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0);
+ free_attstatsslot(&sslot);
}
- else if (get_attstatsslot(vardata->statsTuple,
- vardata->atttype, vardata->atttypmod,
+ else if (get_attstatsslot(&sslot, vardata->statsTuple,
STATISTIC_KIND_HISTOGRAM, InvalidOid,
- NULL,
- &values, &nvalues,
- NULL, NULL))
+ 0))
{
- free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0);
+ free_attstatsslot(&sslot);
return false;
}
* the MCVs. However, usually the MCVs will not be the extreme values, so
* avoid unnecessary data copying.
*/
- if (get_attstatsslot(vardata->statsTuple,
- vardata->atttype, vardata->atttypmod,
+ if (get_attstatsslot(&sslot, vardata->statsTuple,
STATISTIC_KIND_MCV, InvalidOid,
- NULL,
- &values, &nvalues,
- NULL, NULL))
+ ATTSTATSSLOT_VALUES))
{
bool tmin_is_mcv = false;
bool tmax_is_mcv = false;
FmgrInfo opproc;
- fmgr_info(get_opcode(sortop), &opproc);
+ fmgr_info(opfuncoid, &opproc);
- for (i = 0; i < nvalues; i++)
+ for (i = 0; i < sslot.nvalues; i++)
{
if (!have_data)
{
- tmin = tmax = values[i];
+ tmin = tmax = sslot.values[i];
tmin_is_mcv = tmax_is_mcv = have_data = true;
continue;
}
if (DatumGetBool(FunctionCall2Coll(&opproc,
DEFAULT_COLLATION_OID,
- values[i], tmin)))
+ sslot.values[i], tmin)))
{
- tmin = values[i];
+ tmin = sslot.values[i];
tmin_is_mcv = true;
}
if (DatumGetBool(FunctionCall2Coll(&opproc,
DEFAULT_COLLATION_OID,
- tmax, values[i])))
+ tmax, sslot.values[i])))
{
- tmax = values[i];
+ tmax = sslot.values[i];
tmax_is_mcv = true;
}
}
tmin = datumCopy(tmin, typByVal, typLen);
if (tmax_is_mcv)
tmax = datumCopy(tmax, typByVal, typLen);
- free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0);
+ free_attstatsslot(&sslot);
}
*min = tmin;
HeapTuple tup;
Datum values[INDEX_MAX_KEYS];
bool isnull[INDEX_MAX_KEYS];
+ SnapshotData SnapshotDirty;
estate = CreateExecutorState();
econtext = GetPerTupleExprContext(estate);
slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRel));
econtext->ecxt_scantuple = slot;
get_typlenbyval(vardata->atttype, &typLen, &typByVal);
+ InitDirtySnapshot(SnapshotDirty);
/* set up an IS NOT NULL scan key so that we ignore nulls */
ScanKeyEntryInitialize(&scankeys[0],
/* If min is requested ... */
if (min)
{
- index_scan = index_beginscan(heapRel, indexRel,
- GetActiveSnapshot(), 1, 0);
+ /*
+ * In principle, we should scan the index with our current
+ * active snapshot, which is the best approximation we've got
+ * to what the query will see when executed. But that won't
+ * be exact if a new snap is taken before running the query,
+ * and it can be very expensive if a lot of uncommitted rows
+ * exist at the end of the index (because we'll laboriously
+ * fetch each one and reject it). What seems like a good
+ * compromise is to use SnapshotDirty. That will accept
+ * uncommitted rows, and thus avoid fetching multiple heap
+ * tuples in this scenario. On the other hand, it will reject
+ * known-dead rows, and thus not give a bogus answer when the
+ * extreme value has been deleted; that case motivates not
+ * using SnapshotAny here.
+ */
+ index_scan = index_beginscan(heapRel, indexRel, &SnapshotDirty,
+ 1, 0);
index_rescan(index_scan, scankeys, 1, NULL, 0);
/* Fetch first tuple in sortop's direction */
/* If max is requested, and we didn't find the index is empty */
if (max && have_data)
{
- index_scan = index_beginscan(heapRel, indexRel,
- GetActiveSnapshot(), 1, 0);
+ index_scan = index_beginscan(heapRel, indexRel, &SnapshotDirty,
+ 1, 0);
index_rescan(index_scan, scankeys, 1, NULL, 0);
/* Fetch first tuple in reverse direction */
/*
* Check whether char is a letter (and, hence, subject to case-folding)
*
- * In multibyte character sets, we can't use isalpha, and it does not seem
- * worth trying to convert to wchar_t to use iswalpha. Instead, just assume
+ * In multibyte character sets or with ICU, we can't use isalpha, and it does not seem
+ * worth trying to convert to wchar_t to use iswalpha. Instead, just assume
* any multibyte char is potentially case-varying.
*/
static int
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
else if (is_multibyte && IS_HIGHBIT_SET(c))
return true;
+ else if (locale && locale->provider == COLLPROVIDER_ICU)
+ return IS_HIGHBIT_SET(c) ? true : false;
#ifdef HAVE_LOCALE_T
- else if (locale)
- return isalpha_l((unsigned char) c, locale);
+ else if (locale && locale->provider == COLLPROVIDER_LIBC)
+ return isalpha_l((unsigned char) c, locale->info.lt);
#endif
else
return isalpha((unsigned char) c);
}
else
{
- bytea *bstr = DatumGetByteaP(patt_const->constvalue);
+ bytea *bstr = DatumGetByteaPP(patt_const->constvalue);
- pattlen = VARSIZE(bstr) - VARHDRSZ;
+ pattlen = VARSIZE_ANY_EXHDR(bstr);
patt = (char *) palloc(pattlen);
- memcpy(patt, VARDATA(bstr), pattlen);
- if ((Pointer) bstr != DatumGetPointer(patt_const->constvalue))
- pfree(bstr);
+ memcpy(patt, VARDATA_ANY(bstr), pattlen);
+ Assert((Pointer) bstr == DatumGetPointer(patt_const->constvalue));
}
match = palloc(pattlen + 1);
* together with info about MCVs and NULLs.
*
* We use the >= and < operators from the specified btree opfamily to do the
- * estimation. The given variable and Const must be of the associated
+ * estimation. The given variable and Const must be of the associated
* datatype.
*
* XXX Note: we make use of the upper bound to estimate operator selectivity
/*
* Merge the two selectivities in the same way as for a range query
- * (see clauselist_selectivity()). Note that we don't need to worry
+ * (see clauselist_selectivity()). Note that we don't need to worry
* about double-exclusion of nulls, since ineq_histogram_selectivity
* doesn't count those anyway.
*/
* that is not a bulletproof guarantee that an extension of the string might
* not sort after it; an example is that "foo " is less than "foo!", but it
* is not clear that a "dictionary" sort ordering will consider "foo!" less
- * than "foo bar". CAUTION: Therefore, this function should be used only for
+ * than "foo bar". CAUTION: Therefore, this function should be used only for
* estimation purposes when working in a non-C collation.
*
* To try to catch most cases where an extended string might otherwise sort
}
else if (datatype == BYTEAOID)
{
- bytea *bstr = DatumGetByteaP(str_const->constvalue);
+ bytea *bstr = DatumGetByteaPP(str_const->constvalue);
- len = VARSIZE(bstr) - VARHDRSZ;
+ len = VARSIZE_ANY_EXHDR(bstr);
workstr = (char *) palloc(len);
- memcpy(workstr, VARDATA(bstr), len);
- if ((Pointer) bstr != DatumGetPointer(str_const->constvalue))
- pfree(bstr);
+ memcpy(workstr, VARDATA_ANY(bstr), len);
+ Assert((Pointer) bstr == DatumGetPointer(str_const->constvalue));
cmpstr = str_const->constvalue;
}
else
*-------------------------------------------------------------------------
*/
+List *
+deconstruct_indexquals(IndexPath *path)
+{
+ List *result = NIL;
+ IndexOptInfo *index = path->indexinfo;
+ ListCell *lcc,
+ *lci;
+
+ forboth(lcc, path->indexquals, lci, path->indexqualcols)
+ {
+ RestrictInfo *rinfo = lfirst_node(RestrictInfo, lcc);
+ int indexcol = lfirst_int(lci);
+ Expr *clause;
+ Node *leftop,
+ *rightop;
+ IndexQualInfo *qinfo;
+
+ clause = rinfo->clause;
+
+ qinfo = (IndexQualInfo *) palloc(sizeof(IndexQualInfo));
+ qinfo->rinfo = rinfo;
+ qinfo->indexcol = indexcol;
+
+ if (IsA(clause, OpExpr))
+ {
+ qinfo->clause_op = ((OpExpr *) clause)->opno;
+ leftop = get_leftop(clause);
+ rightop = get_rightop(clause);
+ if (match_index_to_operand(leftop, indexcol, index))
+ {
+ qinfo->varonleft = true;
+ qinfo->other_operand = rightop;
+ }
+ else
+ {
+ Assert(match_index_to_operand(rightop, indexcol, index));
+ qinfo->varonleft = false;
+ qinfo->other_operand = leftop;
+ }
+ }
+ else if (IsA(clause, RowCompareExpr))
+ {
+ RowCompareExpr *rc = (RowCompareExpr *) clause;
+
+ qinfo->clause_op = linitial_oid(rc->opnos);
+ /* Examine only first columns to determine left/right sides */
+ if (match_index_to_operand((Node *) linitial(rc->largs),
+ indexcol, index))
+ {
+ qinfo->varonleft = true;
+ qinfo->other_operand = (Node *) rc->rargs;
+ }
+ else
+ {
+ Assert(match_index_to_operand((Node *) linitial(rc->rargs),
+ indexcol, index));
+ qinfo->varonleft = false;
+ qinfo->other_operand = (Node *) rc->largs;
+ }
+ }
+ else if (IsA(clause, ScalarArrayOpExpr))
+ {
+ ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
+
+ qinfo->clause_op = saop->opno;
+ /* index column is always on the left in this case */
+ Assert(match_index_to_operand((Node *) linitial(saop->args),
+ indexcol, index));
+ qinfo->varonleft = true;
+ qinfo->other_operand = (Node *) lsecond(saop->args);
+ }
+ else if (IsA(clause, NullTest))
+ {
+ qinfo->clause_op = InvalidOid;
+ Assert(match_index_to_operand((Node *) ((NullTest *) clause)->arg,
+ indexcol, index));
+ qinfo->varonleft = true;
+ qinfo->other_operand = NULL;
+ }
+ else
+ {
+ elog(ERROR, "unsupported indexqual type: %d",
+ (int) nodeTag(clause));
+ }
+
+ result = lappend(result, qinfo);
+ }
+ return result;
+}
+
/*
- * genericcostestimate is a general-purpose estimator that can be used for
- * most index types. In some cases we use genericcostestimate as the base
- * code and then incorporate additional index-type-specific knowledge in
- * the type-specific calling function. To avoid code duplication, we make
- * genericcostestimate return a number of intermediate values as well as
- * its preliminary estimates of the output cost values. The GenericCosts
- * struct includes all these values.
+ * Simple function to compute the total eval cost of the "other operands"
+ * in an IndexQualInfo list. Since we know these will be evaluated just
+ * once per scan, there's no need to distinguish startup from per-row cost.
+ */
+static Cost
+other_operands_eval_cost(PlannerInfo *root, List *qinfos)
+{
+ Cost qual_arg_cost = 0;
+ ListCell *lc;
+
+ foreach(lc, qinfos)
+ {
+ IndexQualInfo *qinfo = (IndexQualInfo *) lfirst(lc);
+ QualCost index_qual_cost;
+
+ cost_qual_eval_node(&index_qual_cost, qinfo->other_operand, root);
+ qual_arg_cost += index_qual_cost.startup + index_qual_cost.per_tuple;
+ }
+ return qual_arg_cost;
+}
+
+/*
+ * Get other-operand eval cost for an index orderby list.
*
- * Callers should initialize all fields of GenericCosts to zero. In addition,
- * they can set numIndexTuples to some positive value if they have a better
- * than default way of estimating the number of leaf index tuples visited.
+ * Index orderby expressions aren't represented as RestrictInfos (since they
+ * aren't boolean, usually). So we can't apply deconstruct_indexquals to
+ * them. However, they are much simpler to deal with since they are always
+ * OpExprs and the index column is always on the left.
*/
-typedef struct
+static Cost
+orderby_operands_eval_cost(PlannerInfo *root, IndexPath *path)
{
- /* These are the values the cost estimator must return to the planner */
- Cost indexStartupCost; /* index-related startup cost */
- Cost indexTotalCost; /* total index-related scan cost */
- Selectivity indexSelectivity; /* selectivity of index */
- double indexCorrelation; /* order correlation of index */
-
- /* Intermediate values we obtain along the way */
- double numIndexPages; /* number of leaf pages visited */
- double numIndexTuples; /* number of leaf tuples visited */
- double spc_random_page_cost; /* relevant random_page_cost value */
- double num_sa_scans; /* # indexscans from ScalarArrayOps */
-} GenericCosts;
+ Cost qual_arg_cost = 0;
+ ListCell *lc;
-static void
+ foreach(lc, path->indexorderbys)
+ {
+ Expr *clause = (Expr *) lfirst(lc);
+ Node *other_operand;
+ QualCost index_qual_cost;
+
+ if (IsA(clause, OpExpr))
+ {
+ other_operand = get_rightop(clause);
+ }
+ else
+ {
+ elog(ERROR, "unsupported indexorderby type: %d",
+ (int) nodeTag(clause));
+ other_operand = NULL; /* keep compiler quiet */
+ }
+
+ cost_qual_eval_node(&index_qual_cost, other_operand, root);
+ qual_arg_cost += index_qual_cost.startup + index_qual_cost.per_tuple;
+ }
+ return qual_arg_cost;
+}
+
+void
genericcostestimate(PlannerInfo *root,
IndexPath *path,
double loop_count,
+ List *qinfos,
GenericCosts *costs)
{
IndexOptInfo *index = path->indexinfo;
double num_sa_scans;
double num_outer_scans;
double num_scans;
- QualCost index_qual_cost;
double qual_op_cost;
double qual_arg_cost;
List *selectivityQuals;
*
* In practice access to upper index levels is often nearly free because
* those tend to stay in cache under load; moreover, the cost involved is
- * highly dependent on index type. We therefore ignore such costs here
+ * highly dependent on index type. We therefore ignore such costs here
* and leave it to the caller to add a suitable charge if needed.
*/
if (index->pages > 1 && index->tuples > 1)
else
numIndexPages = 1.0;
- /* fetch estimated page cost for schema containing index */
+ /* fetch estimated page cost for tablespace containing index */
get_tablespace_page_costs(index->reltablespace,
&spc_random_page_cost,
NULL);
* The above calculations are all per-index-scan. However, if we are in a
* nestloop inner scan, we can expect the scan to be repeated (with
* different search keys) for each row of the outer relation. Likewise,
- * ScalarArrayOpExpr quals result in multiple index scans. This creates
+ * ScalarArrayOpExpr quals result in multiple index scans. This creates
* the potential for cache effects to reduce the number of disk page
- * fetches needed. We want to estimate the average per-scan I/O cost in
+ * fetches needed. We want to estimate the average per-scan I/O cost in
* the presence of caching.
*
* We use the Mackert-Lohman formula (see costsize.c for details) to
* evaluated once at the start of the scan to reduce them to runtime keys
* to pass to the index AM (see nodeIndexscan.c). We model the per-tuple
* CPU costs as cpu_index_tuple_cost plus one cpu_operator_cost per
- * indexqual operator. Because we have numIndexTuples as a per-scan
+ * indexqual operator. Because we have numIndexTuples as a per-scan
* number, we have to multiply by num_sa_scans to get the correct result
* for ScalarArrayOpExpr cases. Similarly add in costs for any index
* ORDER BY expressions.
* Detecting that that might be needed seems more expensive than it's
* worth, though, considering all the other inaccuracies here ...
*/
- cost_qual_eval(&index_qual_cost, indexQuals, root);
- qual_arg_cost = index_qual_cost.startup + index_qual_cost.per_tuple;
- cost_qual_eval(&index_qual_cost, indexOrderBys, root);
- qual_arg_cost += index_qual_cost.startup + index_qual_cost.per_tuple;
+ qual_arg_cost = other_operands_eval_cost(root, qinfos) +
+ orderby_operands_eval_cost(root, path);
qual_op_cost = cpu_operator_cost *
(list_length(indexQuals) + list_length(indexOrderBys));
- qual_arg_cost -= qual_op_cost;
- if (qual_arg_cost < 0) /* just in case... */
- qual_arg_cost = 0;
indexStartupCost = qual_arg_cost;
indexTotalCost += qual_arg_cost;
* ANDing the index predicate with the explicitly given indexquals produces
* a more accurate idea of the index's selectivity. However, we need to be
* careful not to insert redundant clauses, because clauselist_selectivity()
- * is easily fooled into computing a too-low selectivity estimate. Our
+ * is easily fooled into computing a too-low selectivity estimate. Our
* approach is to add only the predicate clause(s) that cannot be proven to
- * be implied by the given indexquals. This successfully handles cases such
+ * be implied by the given indexquals. This successfully handles cases such
* as a qual "x = 42" used with a partial index "WHERE x >= 40 AND x < 50".
* There are many other cases where we won't detect redundancy, leading to a
* too-low selectivity estimate, which will bias the system in favor of using
- * partial indexes where possible. That is not necessarily bad though.
+ * partial indexes where possible. That is not necessarily bad though.
*
* Note that indexQuals contains RestrictInfo nodes while the indpred
- * does not, so the output list will be mixed. This is OK for both
+ * does not, so the output list will be mixed. This is OK for both
* predicate_implied_by() and clauselist_selectivity(), but might be
* problematic if the result were passed to other things.
*/
}
-Datum
-btcostestimate(PG_FUNCTION_ARGS)
+void
+btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
+ Cost *indexStartupCost, Cost *indexTotalCost,
+ Selectivity *indexSelectivity, double *indexCorrelation,
+ double *indexPages)
{
- PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
- IndexPath *path = (IndexPath *) PG_GETARG_POINTER(1);
- double loop_count = PG_GETARG_FLOAT8(2);
- Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
- Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
- Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
- double *indexCorrelation = (double *) PG_GETARG_POINTER(6);
IndexOptInfo *index = path->indexinfo;
+ List *qinfos;
GenericCosts costs;
Oid relid;
AttrNumber colnum;
bool found_saop;
bool found_is_null_op;
double num_sa_scans;
- ListCell *lcc,
- *lci;
+ ListCell *lc;
+
+ /* Do preliminary analysis of indexquals */
+ qinfos = deconstruct_indexquals(path);
/*
* For a btree scan, only leading '=' quals plus inequality quals for the
* the index scan). Additional quals can suppress visits to the heap, so
* it's OK to count them in indexSelectivity, but they should not count
* for estimating numIndexTuples. So we must examine the given indexquals
- * to find out which ones count as boundary quals. We rely on the
+ * to find out which ones count as boundary quals. We rely on the
* knowledge that they are given in index column order.
*
* For a RowCompareExpr, we consider only the first column, just as
found_saop = false;
found_is_null_op = false;
num_sa_scans = 1;
- forboth(lcc, path->indexquals, lci, path->indexqualcols)
+ foreach(lc, qinfos)
{
- RestrictInfo *rinfo = (RestrictInfo *) lfirst(lcc);
- Expr *clause;
- Node *leftop,
- *rightop PG_USED_FOR_ASSERTS_ONLY;
+ IndexQualInfo *qinfo = (IndexQualInfo *) lfirst(lc);
+ RestrictInfo *rinfo = qinfo->rinfo;
+ Expr *clause = rinfo->clause;
Oid clause_op;
int op_strategy;
- bool is_null_op = false;
- if (indexcol != lfirst_int(lci))
+ if (indexcol != qinfo->indexcol)
{
/* Beginning of a new column's quals */
if (!eqQualHere)
break; /* done if no '=' qual for indexcol */
eqQualHere = false;
indexcol++;
- if (indexcol != lfirst_int(lci))
+ if (indexcol != qinfo->indexcol)
break; /* no quals at all for indexcol */
}
- Assert(IsA(rinfo, RestrictInfo));
- clause = rinfo->clause;
-
- if (IsA(clause, OpExpr))
- {
- leftop = get_leftop(clause);
- rightop = get_rightop(clause);
- clause_op = ((OpExpr *) clause)->opno;
- }
- else if (IsA(clause, RowCompareExpr))
- {
- RowCompareExpr *rc = (RowCompareExpr *) clause;
-
- leftop = (Node *) linitial(rc->largs);
- rightop = (Node *) linitial(rc->rargs);
- clause_op = linitial_oid(rc->opnos);
- }
- else if (IsA(clause, ScalarArrayOpExpr))
+ if (IsA(clause, ScalarArrayOpExpr))
{
- ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
+ int alength = estimate_array_length(qinfo->other_operand);
- leftop = (Node *) linitial(saop->args);
- rightop = (Node *) lsecond(saop->args);
- clause_op = saop->opno;
found_saop = true;
+ /* count up number of SA scans induced by indexBoundQuals only */
+ if (alength > 1)
+ num_sa_scans *= alength;
}
else if (IsA(clause, NullTest))
{
NullTest *nt = (NullTest *) clause;
- leftop = (Node *) nt->arg;
- rightop = NULL;
- clause_op = InvalidOid;
if (nt->nulltesttype == IS_NULL)
{
found_is_null_op = true;
- is_null_op = true;
+ /* IS NULL is like = for selectivity determination purposes */
+ eqQualHere = true;
}
}
- else
- {
- elog(ERROR, "unsupported indexqual type: %d",
- (int) nodeTag(clause));
- continue; /* keep compiler quiet */
- }
- if (match_index_to_operand(leftop, indexcol, index))
- {
- /* clause_op is correct */
- }
- else
- {
- Assert(match_index_to_operand(rightop, indexcol, index));
- /* Must flip operator to get the opfamily member */
- clause_op = get_commutator(clause_op);
- }
+ /*
+ * We would need to commute the clause_op if not varonleft, except
+ * that we only care if it's equality or not, so that refinement is
+ * unnecessary.
+ */
+ clause_op = qinfo->clause_op;
/* check for equality operator */
if (OidIsValid(clause_op))
if (op_strategy == BTEqualStrategyNumber)
eqQualHere = true;
}
- else if (is_null_op)
- {
- /* IS NULL is like = for purposes of selectivity determination */
- eqQualHere = true;
- }
- /* count up number of SA scans induced by indexBoundQuals only */
- if (IsA(clause, ScalarArrayOpExpr))
- {
- ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
- int alength = estimate_array_length(lsecond(saop->args));
- if (alength > 1)
- num_sa_scans *= alength;
- }
indexBoundQuals = lappend(indexBoundQuals, rinfo);
}
MemSet(&costs, 0, sizeof(costs));
costs.numIndexTuples = numIndexTuples;
- genericcostestimate(root, path, loop_count, &costs);
+ genericcostestimate(root, path, loop_count, qinfos, &costs);
/*
* Add a CPU-cost component to represent the costs of initial btree
if (HeapTupleIsValid(vardata.statsTuple))
{
Oid sortop;
- float4 *numbers;
- int nnumbers;
+ AttStatsSlot sslot;
sortop = get_opfamily_member(index->opfamily[0],
index->opcintype[0],
index->opcintype[0],
BTLessStrategyNumber);
if (OidIsValid(sortop) &&
- get_attstatsslot(vardata.statsTuple, InvalidOid, 0,
- STATISTIC_KIND_CORRELATION,
- sortop,
- NULL,
- NULL, NULL,
- &numbers, &nnumbers))
+ get_attstatsslot(&sslot, vardata.statsTuple,
+ STATISTIC_KIND_CORRELATION, sortop,
+ ATTSTATSSLOT_NUMBERS))
{
double varCorrelation;
- Assert(nnumbers == 1);
- varCorrelation = numbers[0];
+ Assert(sslot.nnumbers == 1);
+ varCorrelation = sslot.numbers[0];
if (index->reverse_sort[0])
varCorrelation = -varCorrelation;
else
costs.indexCorrelation = varCorrelation;
- free_attstatsslot(InvalidOid, NULL, 0, numbers, nnumbers);
+ free_attstatsslot(&sslot);
}
}
*indexTotalCost = costs.indexTotalCost;
*indexSelectivity = costs.indexSelectivity;
*indexCorrelation = costs.indexCorrelation;
-
- PG_RETURN_VOID();
+ *indexPages = costs.numIndexPages;
}
-Datum
-hashcostestimate(PG_FUNCTION_ARGS)
+void
+hashcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
+ Cost *indexStartupCost, Cost *indexTotalCost,
+ Selectivity *indexSelectivity, double *indexCorrelation,
+ double *indexPages)
{
- PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
- IndexPath *path = (IndexPath *) PG_GETARG_POINTER(1);
- double loop_count = PG_GETARG_FLOAT8(2);
- Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
- Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
- Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
- double *indexCorrelation = (double *) PG_GETARG_POINTER(6);
+ List *qinfos;
GenericCosts costs;
+ /* Do preliminary analysis of indexquals */
+ qinfos = deconstruct_indexquals(path);
+
MemSet(&costs, 0, sizeof(costs));
- genericcostestimate(root, path, loop_count, &costs);
+ genericcostestimate(root, path, loop_count, qinfos, &costs);
/*
* A hash index has no descent costs as such, since the index AM can go
* because the hash AM makes sure that's always one page.
*
* Likewise, we could consider charging some CPU for each index tuple in
- * the bucket, if we knew how many there were. But the per-tuple cost is
+ * the bucket, if we knew how many there were. But the per-tuple cost is
* just a hash value comparison, not a general datatype-dependent
* comparison, so any such charge ought to be quite a bit less than
* cpu_operator_cost; which makes it probably not worth worrying about.
*indexTotalCost = costs.indexTotalCost;
*indexSelectivity = costs.indexSelectivity;
*indexCorrelation = costs.indexCorrelation;
-
- PG_RETURN_VOID();
+ *indexPages = costs.numIndexPages;
}
-Datum
-gistcostestimate(PG_FUNCTION_ARGS)
+void
+gistcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
+ Cost *indexStartupCost, Cost *indexTotalCost,
+ Selectivity *indexSelectivity, double *indexCorrelation,
+ double *indexPages)
{
- PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
- IndexPath *path = (IndexPath *) PG_GETARG_POINTER(1);
- double loop_count = PG_GETARG_FLOAT8(2);
- Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
- Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
- Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
- double *indexCorrelation = (double *) PG_GETARG_POINTER(6);
IndexOptInfo *index = path->indexinfo;
+ List *qinfos;
GenericCosts costs;
Cost descentCost;
+ /* Do preliminary analysis of indexquals */
+ qinfos = deconstruct_indexquals(path);
+
MemSet(&costs, 0, sizeof(costs));
- genericcostestimate(root, path, loop_count, &costs);
+ genericcostestimate(root, path, loop_count, qinfos, &costs);
/*
* We model index descent costs similarly to those for btree, but to do
/*
* Add a CPU-cost component to represent the costs of initial descent. We
* just use log(N) here not log2(N) since the branching factor isn't
- * necessarily two anyway. As for btree, charge once per SA scan.
+ * necessarily two anyway. As for btree, charge once per SA scan.
*/
if (index->tuples > 1) /* avoid computing log(0) */
{
*indexTotalCost = costs.indexTotalCost;
*indexSelectivity = costs.indexSelectivity;
*indexCorrelation = costs.indexCorrelation;
-
- PG_RETURN_VOID();
+ *indexPages = costs.numIndexPages;
}
-Datum
-spgcostestimate(PG_FUNCTION_ARGS)
+void
+spgcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
+ Cost *indexStartupCost, Cost *indexTotalCost,
+ Selectivity *indexSelectivity, double *indexCorrelation,
+ double *indexPages)
{
- PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
- IndexPath *path = (IndexPath *) PG_GETARG_POINTER(1);
- double loop_count = PG_GETARG_FLOAT8(2);
- Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
- Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
- Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
- double *indexCorrelation = (double *) PG_GETARG_POINTER(6);
IndexOptInfo *index = path->indexinfo;
+ List *qinfos;
GenericCosts costs;
Cost descentCost;
+ /* Do preliminary analysis of indexquals */
+ qinfos = deconstruct_indexquals(path);
+
MemSet(&costs, 0, sizeof(costs));
- genericcostestimate(root, path, loop_count, &costs);
+ genericcostestimate(root, path, loop_count, qinfos, &costs);
/*
* We model index descent costs similarly to those for btree, but to do
/*
* Add a CPU-cost component to represent the costs of initial descent. We
* just use log(N) here not log2(N) since the branching factor isn't
- * necessarily two anyway. As for btree, charge once per SA scan.
+ * necessarily two anyway. As for btree, charge once per SA scan.
*/
if (index->tuples > 1) /* avoid computing log(0) */
{
*indexTotalCost = costs.indexTotalCost;
*indexSelectivity = costs.indexSelectivity;
*indexCorrelation = costs.indexCorrelation;
-
- PG_RETURN_VOID();
+ *indexPages = costs.numIndexPages;
}
double arrayScans;
} GinQualCounts;
-/* Find the index column matching "op"; return its index, or -1 if no match */
-static int
-find_index_column(Node *op, IndexOptInfo *index)
-{
- int i;
-
- for (i = 0; i < index->ncolumns; i++)
- {
- if (match_index_to_operand(op, i, index))
- return i;
- }
-
- return -1;
-}
-
/*
* Estimate the number of index terms that need to be searched for while
* testing the given GIN query, and increment the counts in *counts
/*
* Get the operator's strategy number and declared input data types within
- * the index opfamily. (We don't need the latter, but we use
+ * the index opfamily. (We don't need the latter, but we use
* get_op_opfamily_properties because it will throw error if it fails to
* find a matching pg_amop entry.)
*/
* appropriately. If the query is unsatisfiable, return false.
*/
static bool
-gincost_opexpr(IndexOptInfo *index, OpExpr *clause, GinQualCounts *counts)
+gincost_opexpr(PlannerInfo *root,
+ IndexOptInfo *index,
+ IndexQualInfo *qinfo,
+ GinQualCounts *counts)
{
- Node *leftop = get_leftop((Expr *) clause);
- Node *rightop = get_rightop((Expr *) clause);
- Oid clause_op = clause->opno;
- int indexcol;
- Node *operand;
+ int indexcol = qinfo->indexcol;
+ Oid clause_op = qinfo->clause_op;
+ Node *operand = qinfo->other_operand;
- /* Locate the operand being compared to the index column */
- if ((indexcol = find_index_column(leftop, index)) >= 0)
+ if (!qinfo->varonleft)
{
- operand = rightop;
- }
- else if ((indexcol = find_index_column(rightop, index)) >= 0)
- {
- operand = leftop;
+ /* must commute the operator */
clause_op = get_commutator(clause_op);
}
- else
- {
- elog(ERROR, "could not match index to operand");
- operand = NULL; /* keep compiler quiet */
- }
+
+ /* aggressively reduce to a constant, and look through relabeling */
+ operand = estimate_expression_value(root, operand);
if (IsA(operand, RelabelType))
operand = (Node *) ((RelabelType *) operand)->arg;
* each of which involves one value from the RHS array, plus all the
* non-array quals (if any). To model this, we average the counts across
* the RHS elements, and add the averages to the counts in *counts (which
- * correspond to per-indexscan costs). We also multiply counts->arrayScans
+ * correspond to per-indexscan costs). We also multiply counts->arrayScans
* by N, causing gincostestimate to scale up its estimates accordingly.
*/
static bool
-gincost_scalararrayopexpr(IndexOptInfo *index, ScalarArrayOpExpr *clause,
+gincost_scalararrayopexpr(PlannerInfo *root,
+ IndexOptInfo *index,
+ IndexQualInfo *qinfo,
double numIndexEntries,
GinQualCounts *counts)
{
- Node *leftop = (Node *) linitial(clause->args);
- Node *rightop = (Node *) lsecond(clause->args);
- Oid clause_op = clause->opno;
- int indexcol;
+ int indexcol = qinfo->indexcol;
+ Oid clause_op = qinfo->clause_op;
+ Node *rightop = qinfo->other_operand;
ArrayType *arrayval;
int16 elmlen;
bool elmbyval;
int numPossible = 0;
int i;
- Assert(clause->useOr);
+ Assert(((ScalarArrayOpExpr *) qinfo->rinfo->clause)->useOr);
- /* index column must be on the left */
- if ((indexcol = find_index_column(leftop, index)) < 0)
- elog(ERROR, "could not match index to operand");
+ /* aggressively reduce to a constant, and look through relabeling */
+ rightop = estimate_expression_value(root, rightop);
if (IsA(rightop, RelabelType))
rightop = (Node *) ((RelabelType *) rightop)->arg;
/*
* GIN has search behavior completely different from other index types
*/
-Datum
-gincostestimate(PG_FUNCTION_ARGS)
+void
+gincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
+ Cost *indexStartupCost, Cost *indexTotalCost,
+ Selectivity *indexSelectivity, double *indexCorrelation,
+ double *indexPages)
{
- PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
- IndexPath *path = (IndexPath *) PG_GETARG_POINTER(1);
- double loop_count = PG_GETARG_FLOAT8(2);
- Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
- Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
- Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
- double *indexCorrelation = (double *) PG_GETARG_POINTER(6);
IndexOptInfo *index = path->indexinfo;
List *indexQuals = path->indexquals;
List *indexOrderBys = path->indexorderbys;
+ List *qinfos;
ListCell *l;
List *selectivityQuals;
double numPages = index->pages,
numEntries;
GinQualCounts counts;
bool matchPossible;
+ double partialScale;
double entryPagesFetched,
dataPagesFetched,
dataPagesFetchedBySel;
qual_arg_cost,
spc_random_page_cost,
outer_scans;
- QualCost index_qual_cost;
Relation indexRel;
GinStatsData ginStats;
- /*
- * Obtain statistic information from the meta page
- */
- indexRel = index_open(index->indexoid, AccessShareLock);
- ginGetStats(indexRel, &ginStats);
- index_close(indexRel, AccessShareLock);
-
- numEntryPages = ginStats.nEntryPages;
- numDataPages = ginStats.nDataPages;
- numPendingPages = ginStats.nPendingPages;
- numEntries = ginStats.nEntries;
+ /* Do preliminary analysis of indexquals */
+ qinfos = deconstruct_indexquals(path);
/*
- * nPendingPages can be trusted, but the other fields are as of the last
- * VACUUM. Scale them by the ratio numPages / nTotalPages to account for
- * growth since then. If the fields are zero (implying no VACUUM at all,
- * and an index created pre-9.1), assume all pages are entry pages.
+ * Obtain statistical information from the meta page, if possible. Else
+ * set ginStats to zeroes, and we'll cope below.
*/
- if (ginStats.nTotalPages == 0 || ginStats.nEntryPages == 0)
+ if (!index->hypothetical)
{
- numEntryPages = numPages;
- numDataPages = 0;
- numEntries = numTuples; /* bogus, but no other info available */
+ indexRel = index_open(index->indexoid, AccessShareLock);
+ ginGetStats(indexRel, &ginStats);
+ index_close(indexRel, AccessShareLock);
}
else
{
+ memset(&ginStats, 0, sizeof(ginStats));
+ }
+
+ /*
+ * Assuming we got valid (nonzero) stats at all, nPendingPages can be
+ * trusted, but the other fields are data as of the last VACUUM. We can
+ * scale them up to account for growth since then, but that method only
+ * goes so far; in the worst case, the stats might be for a completely
+ * empty index, and scaling them will produce pretty bogus numbers.
+ * Somewhat arbitrarily, set the cutoff for doing scaling at 4X growth; if
+ * it's grown more than that, fall back to estimating things only from the
+ * assumed-accurate index size. But we'll trust nPendingPages in any case
+ * so long as it's not clearly insane, ie, more than the index size.
+ */
+ if (ginStats.nPendingPages < numPages)
+ numPendingPages = ginStats.nPendingPages;
+ else
+ numPendingPages = 0;
+
+ if (numPages > 0 && ginStats.nTotalPages <= numPages &&
+ ginStats.nTotalPages > numPages / 4 &&
+ ginStats.nEntryPages > 0 && ginStats.nEntries > 0)
+ {
+ /*
+ * OK, the stats seem close enough to sane to be trusted. But we
+ * still need to scale them by the ratio numPages / nTotalPages to
+ * account for growth since the last VACUUM.
+ */
double scale = numPages / ginStats.nTotalPages;
- numEntryPages = ceil(numEntryPages * scale);
- numDataPages = ceil(numDataPages * scale);
- numEntries = ceil(numEntries * scale);
+ numEntryPages = ceil(ginStats.nEntryPages * scale);
+ numDataPages = ceil(ginStats.nDataPages * scale);
+ numEntries = ceil(ginStats.nEntries * scale);
/* ensure we didn't round up too much */
- numEntryPages = Min(numEntryPages, numPages);
- numDataPages = Min(numDataPages, numPages - numEntryPages);
+ numEntryPages = Min(numEntryPages, numPages - numPendingPages);
+ numDataPages = Min(numDataPages,
+ numPages - numPendingPages - numEntryPages);
+ }
+ else
+ {
+ /*
+ * We might get here because it's a hypothetical index, or an index
+ * created pre-9.1 and never vacuumed since upgrading (in which case
+ * its stats would read as zeroes), or just because it's grown too
+ * much since the last VACUUM for us to put our faith in scaling.
+ *
+ * Invent some plausible internal statistics based on the index page
+ * count (and clamp that to at least 10 pages, just in case). We
+ * estimate that 90% of the index is entry pages, and the rest is data
+ * pages. Estimate 100 entries per entry page; this is rather bogus
+ * since it'll depend on the size of the keys, but it's more robust
+ * than trying to predict the number of entries per heap tuple.
+ */
+ numPages = Max(numPages, 10);
+ numEntryPages = floor((numPages - numPendingPages) * 0.90);
+ numDataPages = numPages - numPendingPages - numEntryPages;
+ numEntries = floor(numEntryPages * 100);
}
/* In an empty index, numEntries could be zero. Avoid divide-by-zero */
JOIN_INNER,
NULL);
- /* fetch estimated page cost for schema containing index */
+ /* fetch estimated page cost for tablespace containing index */
get_tablespace_page_costs(index->reltablespace,
&spc_random_page_cost,
NULL);
counts.arrayScans = 1;
matchPossible = true;
- foreach(l, indexQuals)
+ foreach(l, qinfos)
{
- RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
- Expr *clause;
+ IndexQualInfo *qinfo = (IndexQualInfo *) lfirst(l);
+ Expr *clause = qinfo->rinfo->clause;
- Assert(IsA(rinfo, RestrictInfo));
- clause = rinfo->clause;
if (IsA(clause, OpExpr))
{
- matchPossible = gincost_opexpr(index,
- (OpExpr *) clause,
+ matchPossible = gincost_opexpr(root,
+ index,
+ qinfo,
&counts);
if (!matchPossible)
break;
}
else if (IsA(clause, ScalarArrayOpExpr))
{
- matchPossible = gincost_scalararrayopexpr(index,
- (ScalarArrayOpExpr *) clause,
+ matchPossible = gincost_scalararrayopexpr(root,
+ index,
+ qinfo,
numEntries,
&counts);
if (!matchPossible)
*indexStartupCost = 0;
*indexTotalCost = 0;
*indexSelectivity = 0;
- PG_RETURN_VOID();
+ return;
}
if (counts.haveFullScan || indexQuals == NIL)
/*
* Add an estimate of entry pages read by partial match algorithm. It's a
- * scan over leaf pages in entry tree. We haven't any useful stats here,
- * so estimate it as proportion.
+ * scan over leaf pages in entry tree. We haven't any useful stats here,
+ * so estimate it as proportion. Because counts.partialEntries is really
+ * pretty bogus (see code above), it's possible that it is more than
+ * numEntries; clamp the proportion to ensure sanity.
*/
- entryPagesFetched += ceil(numEntryPages * counts.partialEntries / numEntries);
+ partialScale = counts.partialEntries / numEntries;
+ partialScale = Min(partialScale, 1.0);
+
+ entryPagesFetched += ceil(numEntryPages * partialScale);
/*
* Partial match algorithm reads all data pages before doing actual scan,
- * so it's a startup cost. Again, we haven't any useful stats here, so,
- * estimate it as proportion
+ * so it's a startup cost. Again, we haven't any useful stats here, so
+ * estimate it as proportion.
*/
- dataPagesFetched = ceil(numDataPages * counts.partialEntries / numEntries);
+ dataPagesFetched = ceil(numDataPages * partialScale);
/*
* Calculate cache effects if more than one scan due to nestloops or array
*indexStartupCost = (entryPagesFetched + dataPagesFetched) * spc_random_page_cost;
/*
- * Now we compute the number of data pages fetched while the scan
- * proceeds.
+ * Now compute the number of data pages fetched during the scan.
+ *
+ * We assume every entry to have the same number of items, and that there
+ * is no overlap between them. (XXX: tsvector and array opclasses collect
+ * statistics on the frequency of individual keys; it would be nice to use
+ * those here.)
*/
-
- /* data pages scanned for each exact (non-partial) matched entry */
dataPagesFetched = ceil(numDataPages * counts.exactEntries / numEntries);
/*
- * Estimate number of data pages read, using selectivity estimation and
- * capacity of data page.
+ * If there is a lot of overlap among the entries, in particular if one of
+ * the entries is very frequent, the above calculation can grossly
+ * under-estimate. As a simple cross-check, calculate a lower bound based
+ * on the overall selectivity of the quals. At a minimum, we must read
+ * one item pointer for each matching entry.
+ *
+ * The width of each item pointer varies, based on the level of
+ * compression. We don't have statistics on that, but an average of
+ * around 3 bytes per item is fairly typical.
*/
dataPagesFetchedBySel = ceil(*indexSelectivity *
- (numTuples / (BLCKSZ / SizeOfIptrData)));
-
+ (numTuples / (BLCKSZ / 3)));
if (dataPagesFetchedBySel > dataPagesFetched)
- {
- /*
- * At least one of entries is very frequent and, unfortunately, we
- * couldn't get statistic about entries (only tsvector has such
- * statistics). So, we obviously have too small estimation of pages
- * fetched from data tree. Re-estimate it from known capacity of data
- * pages
- */
dataPagesFetched = dataPagesFetchedBySel;
- }
/* Account for cache effects, the same as above */
if (outer_scans > 1 || counts.arrayScans > 1)
/*
* Add on index qual eval costs, much as in genericcostestimate
*/
- cost_qual_eval(&index_qual_cost, indexQuals, root);
- qual_arg_cost = index_qual_cost.startup + index_qual_cost.per_tuple;
- cost_qual_eval(&index_qual_cost, indexOrderBys, root);
- qual_arg_cost += index_qual_cost.startup + index_qual_cost.per_tuple;
+ qual_arg_cost = other_operands_eval_cost(root, qinfos) +
+ orderby_operands_eval_cost(root, path);
qual_op_cost = cpu_operator_cost *
(list_length(indexQuals) + list_length(indexOrderBys));
- qual_arg_cost -= qual_op_cost;
- if (qual_arg_cost < 0) /* just in case... */
- qual_arg_cost = 0;
*indexStartupCost += qual_arg_cost;
*indexTotalCost += qual_arg_cost;
*indexTotalCost += (numTuples * *indexSelectivity) * (cpu_index_tuple_cost + qual_op_cost);
+ *indexPages = dataPagesFetched;
+}
+
+/*
+ * BRIN has search behavior completely different from other index types
+ */
+void
+brincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
+ Cost *indexStartupCost, Cost *indexTotalCost,
+ Selectivity *indexSelectivity, double *indexCorrelation,
+ double *indexPages)
+{
+ IndexOptInfo *index = path->indexinfo;
+ List *indexQuals = path->indexquals;
+ double numPages = index->pages;
+ RelOptInfo *baserel = index->rel;
+ RangeTblEntry *rte = planner_rt_fetch(baserel->relid, root);
+ List *qinfos;
+ Cost spc_seq_page_cost;
+ Cost spc_random_page_cost;
+ double qual_arg_cost;
+ double qualSelectivity;
+ BrinStatsData statsData;
+ double indexRanges;
+ double minimalRanges;
+ double estimatedRanges;
+ double selec;
+ Relation indexRel;
+ ListCell *l;
+ VariableStatData vardata;
+
+ Assert(rte->rtekind == RTE_RELATION);
+
+ /* fetch estimated page cost for the tablespace containing the index */
+ get_tablespace_page_costs(index->reltablespace,
+ &spc_random_page_cost,
+ &spc_seq_page_cost);
+
+ /*
+ * Obtain some data from the index itself.
+ */
+ indexRel = index_open(index->indexoid, AccessShareLock);
+ brinGetStats(indexRel, &statsData);
+ index_close(indexRel, AccessShareLock);
+
+ /*
+ * Compute index correlation
+ *
+ * Because we can use all index quals equally when scanning, we can use
+ * the largest correlation (in absolute value) among columns used by the
+ * query. Start at zero, the worst possible case. If we cannot find any
+ * correlation statistics, we will keep it as 0.
+ */
+ *indexCorrelation = 0;
+
+ qinfos = deconstruct_indexquals(path);
+ foreach(l, qinfos)
+ {
+ IndexQualInfo *qinfo = (IndexQualInfo *) lfirst(l);
+ AttrNumber attnum = index->indexkeys[qinfo->indexcol];
+
+ /* attempt to lookup stats in relation for this index column */
+ if (attnum != 0)
+ {
+ /* Simple variable -- look to stats for the underlying table */
+ if (get_relation_stats_hook &&
+ (*get_relation_stats_hook) (root, rte, attnum, &vardata))
+ {
+ /*
+ * The hook took control of acquiring a stats tuple. If it
+ * did supply a tuple, it'd better have supplied a freefunc.
+ */
+ if (HeapTupleIsValid(vardata.statsTuple) && !vardata.freefunc)
+ elog(ERROR,
+ "no function provided to release variable stats with");
+ }
+ else
+ {
+ vardata.statsTuple =
+ SearchSysCache3(STATRELATTINH,
+ ObjectIdGetDatum(rte->relid),
+ Int16GetDatum(attnum),
+ BoolGetDatum(false));
+ vardata.freefunc = ReleaseSysCache;
+ }
+ }
+ else
+ {
+ /*
+ * Looks like we've found an expression column in the index. Let's
+ * see if there's any stats for it.
+ */
+
+ /* get the attnum from the 0-based index. */
+ attnum = qinfo->indexcol + 1;
+
+ if (get_index_stats_hook &&
+ (*get_index_stats_hook) (root, index->indexoid, attnum, &vardata))
+ {
+ /*
+ * The hook took control of acquiring a stats tuple. If it
+ * did supply a tuple, it'd better have supplied a freefunc.
+ */
+ if (HeapTupleIsValid(vardata.statsTuple) &&
+ !vardata.freefunc)
+ elog(ERROR, "no function provided to release variable stats with");
+ }
+ else
+ {
+ vardata.statsTuple = SearchSysCache3(STATRELATTINH,
+ ObjectIdGetDatum(index->indexoid),
+ Int16GetDatum(attnum),
+ BoolGetDatum(false));
+ vardata.freefunc = ReleaseSysCache;
+ }
+ }
+
+ if (HeapTupleIsValid(vardata.statsTuple))
+ {
+ AttStatsSlot sslot;
+
+ if (get_attstatsslot(&sslot, vardata.statsTuple,
+ STATISTIC_KIND_CORRELATION, InvalidOid,
+ ATTSTATSSLOT_NUMBERS))
+ {
+ double varCorrelation = 0.0;
+
+ if (sslot.nnumbers > 0)
+ varCorrelation = Abs(sslot.numbers[0]);
+
+ if (varCorrelation > *indexCorrelation)
+ *indexCorrelation = varCorrelation;
+
+ free_attstatsslot(&sslot);
+ }
+ }
+
+ ReleaseVariableStats(vardata);
+ }
+
+ qualSelectivity = clauselist_selectivity(root, indexQuals,
+ baserel->relid,
+ JOIN_INNER, NULL);
+
+ /* work out the actual number of ranges in the index */
+ indexRanges = Max(ceil((double) baserel->pages / statsData.pagesPerRange),
+ 1.0);
+
+ /*
+ * Now calculate the minimum possible ranges we could match with if all of
+ * the rows were in the perfect order in the table's heap.
+ */
+ minimalRanges = ceil(indexRanges * qualSelectivity);
+
+ /*
+ * Now estimate the number of ranges that we'll touch by using the
+ * indexCorrelation from the stats. Careful not to divide by zero (note
+ * we're using the absolute value of the correlation).
+ */
+ if (*indexCorrelation < 1.0e-10)
+ estimatedRanges = indexRanges;
+ else
+ estimatedRanges = Min(minimalRanges / *indexCorrelation, indexRanges);
+
+ /* we expect to visit this portion of the table */
+ selec = estimatedRanges / indexRanges;
+
+ CLAMP_PROBABILITY(selec);
+
+ *indexSelectivity = selec;
+
+ /*
+ * Compute the index qual costs, much as in genericcostestimate, to add to
+ * the index costs.
+ */
+ qual_arg_cost = other_operands_eval_cost(root, qinfos) +
+ orderby_operands_eval_cost(root, path);
+
+ /*
+ * Compute the startup cost as the cost to read the whole revmap
+ * sequentially, including the cost to execute the index quals.
+ */
+ *indexStartupCost =
+ spc_seq_page_cost * statsData.revmapNumPages * loop_count;
+ *indexStartupCost += qual_arg_cost;
+
+ /*
+ * To read a BRIN index there might be a bit of back and forth over
+ * regular pages, as revmap might point to them out of sequential order;
+ * calculate the total cost as reading the whole index in random order.
+ */
+ *indexTotalCost = *indexStartupCost +
+ spc_random_page_cost * (numPages - statsData.revmapNumPages) * loop_count;
+
+ /*
+ * Charge a small amount per range tuple which we expect to match to. This
+ * is meant to reflect the costs of manipulating the bitmap. The BRIN scan
+ * will set a bit for each page in the range when we find a matching
+ * range, so we must multiply the charge by the number of pages in the
+ * range.
+ */
+ *indexTotalCost += 0.1 * cpu_operator_cost * estimatedRanges *
+ statsData.pagesPerRange;
- PG_RETURN_VOID();
+ *indexPages = index->pages;
}