get_relation_stats_hook_type get_relation_stats_hook = NULL;
get_index_stats_hook_type get_index_stats_hook = NULL;
+static double eqsel_internal(PG_FUNCTION_ARGS, bool negate);
static double var_eq_const(VariableStatData *vardata, Oid operator,
Datum constval, bool constisnull,
- bool varonleft);
+ bool varonleft, bool negate);
static double var_eq_non_const(VariableStatData *vardata, Oid operator,
Node *other,
- bool varonleft);
+ bool varonleft, bool negate);
static double ineq_histogram_selectivity(PlannerInfo *root,
VariableStatData *vardata,
FmgrInfo *opproc, bool isgt,
*/
Datum
eqsel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8((float8) eqsel_internal(fcinfo, false));
+}
+
+/*
+ * Common code for eqsel() and neqsel()
+ */
+static double
+eqsel_internal(PG_FUNCTION_ARGS, bool negate)
{
PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
Oid operator = PG_GETARG_OID(1);
bool varonleft;
double selec;
+ /*
+ * When asked about <>, we do the estimation using the corresponding =
+ * operator, then convert to <> via "1.0 - eq_selectivity - nullfrac".
+ */
+ if (negate)
+ {
+ operator = get_negator(operator);
+ if (!OidIsValid(operator))
+ {
+ /* Use default selectivity (should we raise an error instead?) */
+ return 1.0 - DEFAULT_EQ_SEL;
+ }
+ }
+
/*
* If expression is not variable = something or something = variable, then
* punt and return a default estimate.
*/
if (!get_restriction_variable(root, args, varRelid,
&vardata, &other, &varonleft))
- PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
+ return negate ? (1.0 - DEFAULT_EQ_SEL) : DEFAULT_EQ_SEL;
/*
* We can do a lot better if the something is a constant. (Note: the
selec = var_eq_const(&vardata, operator,
((Const *) other)->constvalue,
((Const *) other)->constisnull,
- varonleft);
+ varonleft, negate);
else
selec = var_eq_non_const(&vardata, operator, other,
- varonleft);
+ varonleft, negate);
ReleaseVariableStats(vardata);
- PG_RETURN_FLOAT8((float8) selec);
+ return selec;
}
/*
static double
var_eq_const(VariableStatData *vardata, Oid operator,
Datum constval, bool constisnull,
- bool varonleft)
+ bool varonleft, bool negate)
{
double selec;
+ double nullfrac = 0.0;
bool isdefault;
Oid opfuncoid;
/*
* If the constant is NULL, assume operator is strict and return zero, ie,
- * operator will never return TRUE.
+ * operator will never return TRUE. (It's zero even for a negator op.)
*/
if (constisnull)
return 0.0;
+ /*
+ * Grab the nullfrac for use below. Note we allow use of nullfrac
+ * regardless of security check.
+ */
+ if (HeapTupleIsValid(vardata->statsTuple))
+ {
+ Form_pg_statistic stats;
+
+ stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
+ nullfrac = stats->stanullfrac;
+ }
+
/*
* If we matched the var to a unique index or DISTINCT clause, assume
* there is exactly one match regardless of anything else. (This is
* ignoring the information.)
*/
if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
- return 1.0 / vardata->rel->tuples;
-
- if (HeapTupleIsValid(vardata->statsTuple) &&
- statistic_proc_security_check(vardata,
- (opfuncoid = get_opcode(operator))))
+ {
+ selec = 1.0 / vardata->rel->tuples;
+ }
+ else if (HeapTupleIsValid(vardata->statsTuple) &&
+ statistic_proc_security_check(vardata,
+ (opfuncoid = get_opcode(operator))))
{
Form_pg_statistic stats;
AttStatsSlot sslot;
for (i = 0; i < sslot.nnumbers; i++)
sumcommon += sslot.numbers[i];
- selec = 1.0 - sumcommon - stats->stanullfrac;
+ selec = 1.0 - sumcommon - nullfrac;
CLAMP_PROBABILITY(selec);
/*
selec = 1.0 / get_variable_numdistinct(vardata, &isdefault);
}
+ /* now adjust if we wanted <> rather than = */
+ if (negate)
+ selec = 1.0 - selec - nullfrac;
+
/* result should be in range, but make sure... */
CLAMP_PROBABILITY(selec);
static double
var_eq_non_const(VariableStatData *vardata, Oid operator,
Node *other,
- bool varonleft)
+ bool varonleft, bool negate)
{
double selec;
+ double nullfrac = 0.0;
bool isdefault;
+ /*
+ * Grab the nullfrac for use below.
+ */
+ if (HeapTupleIsValid(vardata->statsTuple))
+ {
+ Form_pg_statistic stats;
+
+ stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
+ nullfrac = stats->stanullfrac;
+ }
+
/*
* If we matched the var to a unique index or DISTINCT clause, assume
* there is exactly one match regardless of anything else. (This is
* ignoring the information.)
*/
if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
- return 1.0 / vardata->rel->tuples;
-
- if (HeapTupleIsValid(vardata->statsTuple))
+ {
+ selec = 1.0 / vardata->rel->tuples;
+ }
+ else if (HeapTupleIsValid(vardata->statsTuple))
{
Form_pg_statistic stats;
double ndistinct;
* values, regardless of their frequency in the table. Is that a good
* idea?)
*/
- selec = 1.0 - stats->stanullfrac;
+ selec = 1.0 - nullfrac;
ndistinct = get_variable_numdistinct(vardata, &isdefault);
if (ndistinct > 1)
selec /= ndistinct;
selec = 1.0 / get_variable_numdistinct(vardata, &isdefault);
}
+ /* now adjust if we wanted <> rather than = */
+ if (negate)
+ selec = 1.0 - selec - nullfrac;
+
/* result should be in range, but make sure... */
CLAMP_PROBABILITY(selec);
Datum
neqsel(PG_FUNCTION_ARGS)
{
- PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
- Oid operator = PG_GETARG_OID(1);
- List *args = (List *) PG_GETARG_POINTER(2);
- int varRelid = PG_GETARG_INT32(3);
- Oid eqop;
- float8 result;
-
- /*
- * We want 1 - eqsel() where the equality operator is the one associated
- * with this != operator, that is, its negator.
- */
- eqop = get_negator(operator);
- if (eqop)
- {
- result = DatumGetFloat8(DirectFunctionCall4(eqsel,
- PointerGetDatum(root),
- ObjectIdGetDatum(eqop),
- PointerGetDatum(args),
- Int32GetDatum(varRelid)));
- }
- else
- {
- /* Use default selectivity (should we raise an error instead?) */
- result = DEFAULT_EQ_SEL;
- }
- result = 1.0 - result;
- PG_RETURN_FLOAT8(result);
+ PG_RETURN_FLOAT8((float8) eqsel_internal(fcinfo, true));
}
/*
Const *patt;
Const *prefix = NULL;
Selectivity rest_selec = 0;
+ double nullfrac = 0.0;
double result;
/*
return result;
}
+ /*
+ * Grab the nullfrac for use below.
+ */
+ if (HeapTupleIsValid(vardata.statsTuple))
+ {
+ Form_pg_statistic stats;
+
+ stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
+ nullfrac = stats->stanullfrac;
+ }
+
/*
* Pull out any fixed prefix implied by the pattern, and estimate the
* fractional selectivity of the remainder of the pattern. Unlike many of
if (eqopr == InvalidOid)
elog(ERROR, "no = operator for opfamily %u", opfamily);
result = var_eq_const(&vardata, eqopr, prefix->constvalue,
- false, true);
+ false, true, false);
}
else
{
Selectivity selec;
int hist_size;
FmgrInfo opproc;
- double nullfrac,
- mcv_selec,
+ double mcv_selec,
sumcommon;
/* Try to use the histogram entries to get selectivity */
mcv_selec = mcv_selectivity(&vardata, &opproc, constval, true,
&sumcommon);
- if (HeapTupleIsValid(vardata.statsTuple))
- nullfrac = ((Form_pg_statistic) GETSTRUCT(vardata.statsTuple))->stanullfrac;
- else
- nullfrac = 0.0;
-
/*
* Now merge the results from the MCV and histogram calculations,
* realizing that the histogram covers only the non-null values that
*/
selec *= 1.0 - nullfrac - sumcommon;
selec += mcv_selec;
-
- /* result should be in range, but make sure... */
- CLAMP_PROBABILITY(selec);
result = selec;
}
+ /* now adjust if we wanted not-match rather than match */
+ if (negate)
+ result = 1.0 - result - nullfrac;
+
+ /* result should be in range, but make sure... */
+ CLAMP_PROBABILITY(result);
+
if (prefix)
{
pfree(DatumGetPointer(prefix->constvalue));
ReleaseVariableStats(vardata);
- return negate ? (1.0 - result) : result;
+ return result;
}
/*
* compute the selectivity as if that is what we have.
*/
selec = var_eq_const(&vardata, BooleanEqualOperator,
- BoolGetDatum(true), false, true);
+ BoolGetDatum(true), false, true, false);
}
else if (is_funcclause(arg))
{
if (cmpopr == InvalidOid)
elog(ERROR, "no = operator for opfamily %u", opfamily);
eq_sel = var_eq_const(vardata, cmpopr, prefixcon->constvalue,
- false, true);
+ false, true, false);
prefixsel = Max(prefixsel, eq_sel);