From e7ef6d7e248cd39b8a4a7630776ec3924feeafa6 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sun, 8 Jul 2012 23:51:08 -0400 Subject: [PATCH] Fix planner to pass correct collation to operator selectivity estimators. We can do this without creating an API break for estimation functions by passing the collation using the existing fmgr functionality for passing an input collation as a hidden parameter. The need for this was foreseen at the outset, but we didn't get around to making it happen in 9.1 because of the decision to sort all pg_statistic histograms according to the database's default collation. That meant that selectivity estimators generally need to use the default collation too, even if they're estimating for an operator that will do something different. The reason it's suddenly become more interesting is that regexp interpretation also uses a collation (for its LC_TYPE not LC_COLLATE property), and we no longer want to use the wrong collation when examining regexps during planning. It's not that the selectivity estimate is likely to change much from this; rather that we are thinking of caching compiled regexps during planner estimation, and we won't get the intended benefit if we cache them with a different collation than the executor will use. Back-patch to 9.1, both because the regexp change is likely to get back-patched and because we might as well get this right in all collation-supporting branches, in case any third-party code wants to rely on getting the collation. The patch turns out to be minuscule now that I've done it ... --- src/backend/optimizer/path/clausesel.c | 10 ++- src/backend/optimizer/util/plancat.c | 26 ++++--- src/backend/utils/adt/selfuncs.c | 96 ++++++++++++++++---------- src/include/optimizer/plancat.h | 2 + 4 files changed, 83 insertions(+), 51 deletions(-) diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c index bb148d7b07..6b8d92773f 100644 --- a/src/backend/optimizer/path/clausesel.c +++ b/src/backend/optimizer/path/clausesel.c @@ -578,6 +578,7 @@ clause_selectivity(PlannerInfo *root, list_make2(var, makeBoolConst(true, false)), + InvalidOid, varRelid); } } @@ -649,13 +650,15 @@ clause_selectivity(PlannerInfo *root, } else if (is_opclause(clause) || IsA(clause, DistinctExpr)) { - Oid opno = ((OpExpr *) clause)->opno; + OpExpr *opclause = (OpExpr *) clause; + Oid opno = opclause->opno; if (treat_as_join_clause(clause, rinfo, varRelid, sjinfo)) { /* Estimate selectivity for a join clause. */ s1 = join_selectivity(root, opno, - ((OpExpr *) clause)->args, + opclause->args, + opclause->inputcollid, jointype, sjinfo); } @@ -663,7 +666,8 @@ clause_selectivity(PlannerInfo *root, { /* Estimate selectivity for a restriction clause. */ s1 = restriction_selectivity(root, opno, - ((OpExpr *) clause)->args, + opclause->args, + opclause->inputcollid, varRelid); } diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 38b81a05ff..1818a2a871 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -1010,6 +1010,7 @@ Selectivity restriction_selectivity(PlannerInfo *root, Oid operatorid, List *args, + Oid inputcollid, int varRelid) { RegProcedure oprrest = get_oprrest(operatorid); @@ -1022,11 +1023,12 @@ restriction_selectivity(PlannerInfo *root, if (!oprrest) return (Selectivity) 0.5; - result = DatumGetFloat8(OidFunctionCall4(oprrest, - PointerGetDatum(root), - ObjectIdGetDatum(operatorid), - PointerGetDatum(args), - Int32GetDatum(varRelid))); + result = DatumGetFloat8(OidFunctionCall4Coll(oprrest, + inputcollid, + PointerGetDatum(root), + ObjectIdGetDatum(operatorid), + PointerGetDatum(args), + Int32GetDatum(varRelid))); if (result < 0.0 || result > 1.0) elog(ERROR, "invalid restriction selectivity: %f", result); @@ -1045,6 +1047,7 @@ Selectivity join_selectivity(PlannerInfo *root, Oid operatorid, List *args, + Oid inputcollid, JoinType jointype, SpecialJoinInfo *sjinfo) { @@ -1058,12 +1061,13 @@ join_selectivity(PlannerInfo *root, if (!oprjoin) return (Selectivity) 0.5; - result = DatumGetFloat8(OidFunctionCall5(oprjoin, - PointerGetDatum(root), - ObjectIdGetDatum(operatorid), - PointerGetDatum(args), - Int16GetDatum(jointype), - PointerGetDatum(sjinfo))); + result = DatumGetFloat8(OidFunctionCall5Coll(oprjoin, + inputcollid, + PointerGetDatum(root), + ObjectIdGetDatum(operatorid), + PointerGetDatum(args), + Int16GetDatum(jointype), + PointerGetDatum(sjinfo))); if (result < 0.0 || result > 1.0) elog(ERROR, "invalid join selectivity: %f", result); diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 95e46276f0..dc38034104 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -83,6 +83,15 @@ * joins, however, the selectivity is defined as the fraction of the left-hand * side relation's rows that are expected to have a match (ie, at least one * row with a TRUE result) in the right-hand side. + * + * For both oprrest and oprjoin functions, the operator's input collation OID + * (if any) is passed using the standard fmgr mechanism, so that the estimator + * function can fetch it with PG_GET_COLLATION(). Note, however, that all + * statistics in pg_statistic are currently built using the database's default + * collation. Thus, in most cases where we are looking at statistics, we + * should ignore the actual operator collation and use DEFAULT_COLLATION_OID. + * We expect that the error induced by doing this is usually not large enough + * to justify complicating matters. *---------- */ @@ -1097,6 +1106,7 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate) Oid operator = PG_GETARG_OID(1); List *args = (List *) PG_GETARG_POINTER(2); int varRelid = PG_GETARG_INT32(3); + Oid collation = PG_GET_COLLATION(); VariableStatData vardata; Node *other; bool varonleft; @@ -1197,12 +1207,15 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate) } /* - * Divide pattern into fixed prefix and remainder. XXX we have to assume - * default collation here, because we don't have access to the actual - * input collation for the operator. FIXME ... + * Divide pattern into fixed prefix and remainder. Unlike many of the + * other functions in this file, we use the pattern operator's actual + * collation for this step. This is not because we expect the collation + * to make a big difference in the selectivity estimate (it seldom would), + * but because we want to be sure we cache compiled regexps under the + * right cache key, so that they can be re-used at runtime. */ patt = (Const *) other; - pstatus = pattern_fixed_prefix(patt, ptype, DEFAULT_COLLATION_OID, + pstatus = pattern_fixed_prefix(patt, ptype, collation, &prefix, &rest); /* @@ -1847,18 +1860,20 @@ scalararraysel(PlannerInfo *root, elem_nulls[i], elmbyval)); if (is_join_clause) - s2 = DatumGetFloat8(FunctionCall5(&oprselproc, - PointerGetDatum(root), - ObjectIdGetDatum(operator), - PointerGetDatum(args), - Int16GetDatum(jointype), - PointerGetDatum(sjinfo))); + s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc, + clause->inputcollid, + PointerGetDatum(root), + ObjectIdGetDatum(operator), + PointerGetDatum(args), + Int16GetDatum(jointype), + PointerGetDatum(sjinfo))); else - s2 = DatumGetFloat8(FunctionCall4(&oprselproc, - PointerGetDatum(root), - ObjectIdGetDatum(operator), - PointerGetDatum(args), - Int32GetDatum(varRelid))); + s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc, + clause->inputcollid, + PointerGetDatum(root), + ObjectIdGetDatum(operator), + PointerGetDatum(args), + Int32GetDatum(varRelid))); if (useOr) { @@ -1912,18 +1927,20 @@ scalararraysel(PlannerInfo *root, */ args = list_make2(leftop, elem); if (is_join_clause) - s2 = DatumGetFloat8(FunctionCall5(&oprselproc, - PointerGetDatum(root), - ObjectIdGetDatum(operator), - PointerGetDatum(args), - Int16GetDatum(jointype), - PointerGetDatum(sjinfo))); + s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc, + clause->inputcollid, + PointerGetDatum(root), + ObjectIdGetDatum(operator), + PointerGetDatum(args), + Int16GetDatum(jointype), + PointerGetDatum(sjinfo))); else - s2 = DatumGetFloat8(FunctionCall4(&oprselproc, - PointerGetDatum(root), - ObjectIdGetDatum(operator), - PointerGetDatum(args), - Int32GetDatum(varRelid))); + s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc, + clause->inputcollid, + PointerGetDatum(root), + ObjectIdGetDatum(operator), + PointerGetDatum(args), + Int32GetDatum(varRelid))); if (useOr) { @@ -1962,18 +1979,20 @@ scalararraysel(PlannerInfo *root, dummyexpr->collation = clause->inputcollid; args = list_make2(leftop, dummyexpr); if (is_join_clause) - s2 = DatumGetFloat8(FunctionCall5(&oprselproc, - PointerGetDatum(root), - ObjectIdGetDatum(operator), - PointerGetDatum(args), - Int16GetDatum(jointype), - PointerGetDatum(sjinfo))); + s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc, + clause->inputcollid, + PointerGetDatum(root), + ObjectIdGetDatum(operator), + PointerGetDatum(args), + Int16GetDatum(jointype), + PointerGetDatum(sjinfo))); else - s2 = DatumGetFloat8(FunctionCall4(&oprselproc, - PointerGetDatum(root), - ObjectIdGetDatum(operator), - PointerGetDatum(args), - Int32GetDatum(varRelid))); + s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc, + clause->inputcollid, + PointerGetDatum(root), + ObjectIdGetDatum(operator), + PointerGetDatum(args), + Int32GetDatum(varRelid))); s1 = useOr ? 0.0 : 1.0; /* @@ -2046,6 +2065,7 @@ rowcomparesel(PlannerInfo *root, { Selectivity s1; Oid opno = linitial_oid(clause->opnos); + Oid inputcollid = linitial_oid(clause->inputcollids); List *opargs; bool is_join_clause; @@ -2086,6 +2106,7 @@ rowcomparesel(PlannerInfo *root, /* Estimate selectivity for a join clause. */ s1 = join_selectivity(root, opno, opargs, + inputcollid, jointype, sjinfo); } @@ -2094,6 +2115,7 @@ rowcomparesel(PlannerInfo *root, /* Estimate selectivity for a restriction clause. */ s1 = restriction_selectivity(root, opno, opargs, + inputcollid, varRelid); } diff --git a/src/include/optimizer/plancat.h b/src/include/optimizer/plancat.h index cf4a3f2b99..e0d04dbecd 100644 --- a/src/include/optimizer/plancat.h +++ b/src/include/optimizer/plancat.h @@ -43,11 +43,13 @@ extern bool has_unique_index(RelOptInfo *rel, AttrNumber attno); extern Selectivity restriction_selectivity(PlannerInfo *root, Oid operatorid, List *args, + Oid inputcollid, int varRelid); extern Selectivity join_selectivity(PlannerInfo *root, Oid operatorid, List *args, + Oid inputcollid, JoinType jointype, SpecialJoinInfo *sjinfo); -- 2.40.0