From b6108fe59b997aafe6a8003b38a34d91c073618c Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sun, 8 Jul 2012 23:51:19 -0400 Subject: [PATCH] Fix planner to pass correct collation to operator selectivity estimators. We can do this without creating an API break for estimation functions by passing the collation using the existing fmgr functionality for passing an input collation as a hidden parameter. The need for this was foreseen at the outset, but we didn't get around to making it happen in 9.1 because of the decision to sort all pg_statistic histograms according to the database's default collation. That meant that selectivity estimators generally need to use the default collation too, even if they're estimating for an operator that will do something different. The reason it's suddenly become more interesting is that regexp interpretation also uses a collation (for its LC_TYPE not LC_COLLATE property), and we no longer want to use the wrong collation when examining regexps during planning. It's not that the selectivity estimate is likely to change much from this; rather that we are thinking of caching compiled regexps during planner estimation, and we won't get the intended benefit if we cache them with a different collation than the executor will use. Back-patch to 9.1, both because the regexp change is likely to get back-patched and because we might as well get this right in all collation-supporting branches, in case any third-party code wants to rely on getting the collation. The patch turns out to be minuscule now that I've done it ... --- src/backend/optimizer/path/clausesel.c | 10 ++- src/backend/optimizer/util/plancat.c | 26 ++++--- src/backend/utils/adt/selfuncs.c | 96 ++++++++++++++++---------- src/include/optimizer/plancat.h | 2 + 4 files changed, 83 insertions(+), 51 deletions(-) diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c index ca068991c1..b9e97d414d 100644 --- a/src/backend/optimizer/path/clausesel.c +++ b/src/backend/optimizer/path/clausesel.c @@ -579,6 +579,7 @@ clause_selectivity(PlannerInfo *root, list_make2(var, makeBoolConst(true, false)), + InvalidOid, varRelid); } } @@ -650,13 +651,15 @@ clause_selectivity(PlannerInfo *root, } else if (is_opclause(clause) || IsA(clause, DistinctExpr)) { - Oid opno = ((OpExpr *) clause)->opno; + OpExpr *opclause = (OpExpr *) clause; + Oid opno = opclause->opno; if (treat_as_join_clause(clause, rinfo, varRelid, sjinfo)) { /* Estimate selectivity for a join clause. */ s1 = join_selectivity(root, opno, - ((OpExpr *) clause)->args, + opclause->args, + opclause->inputcollid, jointype, sjinfo); } @@ -664,7 +667,8 @@ clause_selectivity(PlannerInfo *root, { /* Estimate selectivity for a restriction clause. */ s1 = restriction_selectivity(root, opno, - ((OpExpr *) clause)->args, + opclause->args, + opclause->inputcollid, varRelid); } diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index a92d248528..77e701d2fc 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -907,6 +907,7 @@ Selectivity restriction_selectivity(PlannerInfo *root, Oid operatorid, List *args, + Oid inputcollid, int varRelid) { RegProcedure oprrest = get_oprrest(operatorid); @@ -919,11 +920,12 @@ restriction_selectivity(PlannerInfo *root, if (!oprrest) return (Selectivity) 0.5; - result = DatumGetFloat8(OidFunctionCall4(oprrest, - PointerGetDatum(root), - ObjectIdGetDatum(operatorid), - PointerGetDatum(args), - Int32GetDatum(varRelid))); + result = DatumGetFloat8(OidFunctionCall4Coll(oprrest, + inputcollid, + PointerGetDatum(root), + ObjectIdGetDatum(operatorid), + PointerGetDatum(args), + Int32GetDatum(varRelid))); if (result < 0.0 || result > 1.0) elog(ERROR, "invalid restriction selectivity: %f", result); @@ -942,6 +944,7 @@ Selectivity join_selectivity(PlannerInfo *root, Oid operatorid, List *args, + Oid inputcollid, JoinType jointype, SpecialJoinInfo *sjinfo) { @@ -955,12 +958,13 @@ join_selectivity(PlannerInfo *root, if (!oprjoin) return (Selectivity) 0.5; - result = DatumGetFloat8(OidFunctionCall5(oprjoin, - PointerGetDatum(root), - ObjectIdGetDatum(operatorid), - PointerGetDatum(args), - Int16GetDatum(jointype), - PointerGetDatum(sjinfo))); + result = DatumGetFloat8(OidFunctionCall5Coll(oprjoin, + inputcollid, + PointerGetDatum(root), + ObjectIdGetDatum(operatorid), + PointerGetDatum(args), + Int16GetDatum(jointype), + PointerGetDatum(sjinfo))); if (result < 0.0 || result > 1.0) elog(ERROR, "invalid join selectivity: %f", result); diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index d700b86b93..acbdad9562 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -83,6 +83,15 @@ * joins, however, the selectivity is defined as the fraction of the left-hand * side relation's rows that are expected to have a match (ie, at least one * row with a TRUE result) in the right-hand side. + * + * For both oprrest and oprjoin functions, the operator's input collation OID + * (if any) is passed using the standard fmgr mechanism, so that the estimator + * function can fetch it with PG_GET_COLLATION(). Note, however, that all + * statistics in pg_statistic are currently built using the database's default + * collation. Thus, in most cases where we are looking at statistics, we + * should ignore the actual operator collation and use DEFAULT_COLLATION_OID. + * We expect that the error induced by doing this is usually not large enough + * to justify complicating matters. *---------- */ @@ -1087,6 +1096,7 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate) Oid operator = PG_GETARG_OID(1); List *args = (List *) PG_GETARG_POINTER(2); int varRelid = PG_GETARG_INT32(3); + Oid collation = PG_GET_COLLATION(); VariableStatData vardata; Node *other; bool varonleft; @@ -1187,12 +1197,15 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate) } /* - * Divide pattern into fixed prefix and remainder. XXX we have to assume - * default collation here, because we don't have access to the actual - * input collation for the operator. FIXME ... + * Divide pattern into fixed prefix and remainder. Unlike many of the + * other functions in this file, we use the pattern operator's actual + * collation for this step. This is not because we expect the collation + * to make a big difference in the selectivity estimate (it seldom would), + * but because we want to be sure we cache compiled regexps under the + * right cache key, so that they can be re-used at runtime. */ patt = (Const *) other; - pstatus = pattern_fixed_prefix(patt, ptype, DEFAULT_COLLATION_OID, + pstatus = pattern_fixed_prefix(patt, ptype, collation, &prefix, &rest); /* @@ -1776,18 +1789,20 @@ scalararraysel(PlannerInfo *root, elem_nulls[i], elmbyval)); if (is_join_clause) - s2 = DatumGetFloat8(FunctionCall5(&oprselproc, - PointerGetDatum(root), - ObjectIdGetDatum(operator), - PointerGetDatum(args), - Int16GetDatum(jointype), - PointerGetDatum(sjinfo))); + s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc, + clause->inputcollid, + PointerGetDatum(root), + ObjectIdGetDatum(operator), + PointerGetDatum(args), + Int16GetDatum(jointype), + PointerGetDatum(sjinfo))); else - s2 = DatumGetFloat8(FunctionCall4(&oprselproc, - PointerGetDatum(root), - ObjectIdGetDatum(operator), - PointerGetDatum(args), - Int32GetDatum(varRelid))); + s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc, + clause->inputcollid, + PointerGetDatum(root), + ObjectIdGetDatum(operator), + PointerGetDatum(args), + Int32GetDatum(varRelid))); if (useOr) s1 = s1 + s2 - s1 * s2; else @@ -1818,18 +1833,20 @@ scalararraysel(PlannerInfo *root, */ args = list_make2(leftop, elem); if (is_join_clause) - s2 = DatumGetFloat8(FunctionCall5(&oprselproc, - PointerGetDatum(root), - ObjectIdGetDatum(operator), - PointerGetDatum(args), - Int16GetDatum(jointype), - PointerGetDatum(sjinfo))); + s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc, + clause->inputcollid, + PointerGetDatum(root), + ObjectIdGetDatum(operator), + PointerGetDatum(args), + Int16GetDatum(jointype), + PointerGetDatum(sjinfo))); else - s2 = DatumGetFloat8(FunctionCall4(&oprselproc, - PointerGetDatum(root), - ObjectIdGetDatum(operator), - PointerGetDatum(args), - Int32GetDatum(varRelid))); + s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc, + clause->inputcollid, + PointerGetDatum(root), + ObjectIdGetDatum(operator), + PointerGetDatum(args), + Int32GetDatum(varRelid))); if (useOr) s1 = s1 + s2 - s1 * s2; else @@ -1854,18 +1871,20 @@ scalararraysel(PlannerInfo *root, dummyexpr->collation = clause->inputcollid; args = list_make2(leftop, dummyexpr); if (is_join_clause) - s2 = DatumGetFloat8(FunctionCall5(&oprselproc, - PointerGetDatum(root), - ObjectIdGetDatum(operator), - PointerGetDatum(args), - Int16GetDatum(jointype), - PointerGetDatum(sjinfo))); + s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc, + clause->inputcollid, + PointerGetDatum(root), + ObjectIdGetDatum(operator), + PointerGetDatum(args), + Int16GetDatum(jointype), + PointerGetDatum(sjinfo))); else - s2 = DatumGetFloat8(FunctionCall4(&oprselproc, - PointerGetDatum(root), - ObjectIdGetDatum(operator), - PointerGetDatum(args), - Int32GetDatum(varRelid))); + s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc, + clause->inputcollid, + PointerGetDatum(root), + ObjectIdGetDatum(operator), + PointerGetDatum(args), + Int32GetDatum(varRelid))); s1 = useOr ? 0.0 : 1.0; /* @@ -1937,6 +1956,7 @@ rowcomparesel(PlannerInfo *root, { Selectivity s1; Oid opno = linitial_oid(clause->opnos); + Oid inputcollid = linitial_oid(clause->inputcollids); List *opargs; bool is_join_clause; @@ -1977,6 +1997,7 @@ rowcomparesel(PlannerInfo *root, /* Estimate selectivity for a join clause. */ s1 = join_selectivity(root, opno, opargs, + inputcollid, jointype, sjinfo); } @@ -1985,6 +2006,7 @@ rowcomparesel(PlannerInfo *root, /* Estimate selectivity for a restriction clause. */ s1 = restriction_selectivity(root, opno, opargs, + inputcollid, varRelid); } diff --git a/src/include/optimizer/plancat.h b/src/include/optimizer/plancat.h index c0b8eda813..5138a8d97c 100644 --- a/src/include/optimizer/plancat.h +++ b/src/include/optimizer/plancat.h @@ -43,11 +43,13 @@ extern bool has_unique_index(RelOptInfo *rel, AttrNumber attno); extern Selectivity restriction_selectivity(PlannerInfo *root, Oid operatorid, List *args, + Oid inputcollid, int varRelid); extern Selectivity join_selectivity(PlannerInfo *root, Oid operatorid, List *args, + Oid inputcollid, JoinType jointype, SpecialJoinInfo *sjinfo); -- 2.40.0