From 39df0f150ca69fac1c89537065ddc97af18921b8 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 24 Sep 2015 18:35:46 -0400 Subject: [PATCH] Allow planner to use expression-index stats for function calls in WHERE. Previously, a function call appearing at the top level of WHERE had a hard-wired selectivity estimate of 0.3333333, a kludge conveniently dated in the source code itself to July 1992. The expectation at the time was that somebody would soon implement estimator support functions analogous to those for operators; but no such code has appeared, nor does it seem likely to in the near future. We do have an alternative solution though, at least for immutable functions on single relations: creating an expression index on the function call will allow ANALYZE to gather stats about the function's selectivity. But the code in clause_selectivity() failed to make use of such data even if it exists. Refactor so that that will happen. I chose to make it try this technique for any clause type for which clause_selectivity() doesn't have a special case, not just functions. To avoid adding unnecessary overhead in the common case where we don't learn anything new, make selfuncs.c provide an API that hooks directly to examine_variable() and then var_eq_const(), rather than the previous coding which laboriously constructed an OpExpr only so that it could be expensively deconstructed again. I preserved the behavior that the default estimate for a function call is 0.3333333. (For any other expression node type, it's 0.5, as before.) I had originally thought to make the default be 0.5 across the board, but changing a default estimate that's survived for twenty-three years seems like something not to do without a lot more testing than I care to put into it right now. Per a complaint from Jehan-Guillaume de Rorthais. Back-patch into 9.5, but not further, at least for the moment. --- src/backend/optimizer/path/clausesel.c | 45 ++++++++------------------ src/backend/utils/adt/selfuncs.c | 45 ++++++++++++++++++++++++++ src/include/utils/selfuncs.h | 1 + 3 files changed, 59 insertions(+), 32 deletions(-) diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c index dcac1c1e7d..6ce2726a7e 100644 --- a/src/backend/optimizer/path/clausesel.c +++ b/src/backend/optimizer/path/clausesel.c @@ -14,7 +14,6 @@ */ #include "postgres.h" -#include "catalog/pg_operator.h" #include "nodes/makefuncs.h" #include "optimizer/clauses.h" #include "optimizer/cost.h" @@ -568,18 +567,8 @@ clause_selectivity(PlannerInfo *root, if (var->varlevelsup == 0 && (varRelid == 0 || varRelid == (int) var->varno)) { - /* - * A Var at the top of a clause must be a bool Var. This is - * equivalent to the clause reln.attribute = 't', so we compute - * the selectivity as if that is what we have. - */ - s1 = restriction_selectivity(root, - BooleanEqualOperator, - list_make2(var, - makeBoolConst(true, - false)), - InvalidOid, - varRelid); + /* Use the restriction selectivity function for a bool Var */ + s1 = boolvarsel(root, (Node *) var, varRelid); } } else if (IsA(clause, Const)) @@ -680,25 +669,6 @@ clause_selectivity(PlannerInfo *root, if (IsA(clause, DistinctExpr)) s1 = 1.0 - s1; } - else if (is_funcclause(clause)) - { - /* - * This is not an operator, so we guess at the selectivity. THIS IS A - * HACK TO GET V4 OUT THE DOOR. FUNCS SHOULD BE ABLE TO HAVE - * SELECTIVITIES THEMSELVES. -- JMH 7/9/92 - */ - s1 = (Selectivity) 0.3333333; - } -#ifdef NOT_USED - else if (IsA(clause, SubPlan) || - IsA(clause, AlternativeSubPlan)) - { - /* - * Just for the moment! FIX ME! - vadim 02/04/98 - */ - s1 = (Selectivity) 0.5; - } -#endif else if (IsA(clause, ScalarArrayOpExpr)) { /* Use node specific selectivity calculation function */ @@ -766,6 +736,17 @@ clause_selectivity(PlannerInfo *root, jointype, sjinfo); } + else + { + /* + * For anything else, see if we can consider it as a boolean variable. + * This only works if it's an immutable expression in Vars of a single + * relation; but there's no point in us checking that here because + * boolvarsel() will do it internally, and return a suitable default + * selectivity if not. + */ + s1 = boolvarsel(root, clause, varRelid); + } /* Cache the result if possible */ if (cacheable) diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 72bc502522..37fad8692a 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -105,6 +105,7 @@ #include "access/sysattr.h" #include "catalog/index.h" #include "catalog/pg_collation.h" +#include "catalog/pg_operator.h" #include "catalog/pg_opfamily.h" #include "catalog/pg_statistic.h" #include "catalog/pg_type.h" @@ -1439,6 +1440,50 @@ icnlikesel(PG_FUNCTION_ARGS) PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like_IC, true)); } +/* + * boolvarsel - Selectivity of Boolean variable. + * + * This can actually be called on any boolean-valued expression. If it + * involves only Vars of the specified relation, and if there are statistics + * about the Var or expression (the latter is possible if it's indexed) then + * we'll produce a real estimate; otherwise it's just a default. + */ +Selectivity +boolvarsel(PlannerInfo *root, Node *arg, int varRelid) +{ + VariableStatData vardata; + double selec; + + examine_variable(root, arg, varRelid, &vardata); + if (HeapTupleIsValid(vardata.statsTuple)) + { + /* + * A boolean variable V is equivalent to the clause V = 't', so we + * compute the selectivity as if that is what we have. + */ + selec = var_eq_const(&vardata, BooleanEqualOperator, + BoolGetDatum(true), false, true); + } + else if (is_funcclause(arg)) + { + /* + * If we have no stats and it's a function call, estimate 0.3333333. + * This seems a pretty unprincipled choice, but Postgres has been + * using that estimate for function calls since 1992. The hoariness + * of this behavior suggests that we should not be in too much hurry + * to use another value. + */ + selec = 0.3333333; + } + else + { + /* Otherwise, the default estimate is 0.5 */ + selec = 0.5; + } + ReleaseVariableStats(vardata); + return selec; +} + /* * booltestsel - Selectivity of BooleanTest Node. */ diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h index b3d8017b8b..a7433d9c0f 100644 --- a/src/include/utils/selfuncs.h +++ b/src/include/utils/selfuncs.h @@ -164,6 +164,7 @@ extern Datum icregexnejoinsel(PG_FUNCTION_ARGS); extern Datum nlikejoinsel(PG_FUNCTION_ARGS); extern Datum icnlikejoinsel(PG_FUNCTION_ARGS); +extern Selectivity boolvarsel(PlannerInfo *root, Node *arg, int varRelid); extern Selectivity booltestsel(PlannerInfo *root, BoolTestType booltesttype, Node *arg, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo); -- 2.40.0